{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 1.0,
  "eval_steps": 500,
  "global_step": 76801,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.0,
      "grad_norm": 2.6360721588134766,
      "learning_rate": 4e-08,
      "loss": 3.842,
      "step": 1
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.8853049278259277,
      "learning_rate": 8e-08,
      "loss": 4.141,
      "step": 2
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.9480462074279785,
      "learning_rate": 1.2000000000000002e-07,
      "loss": 4.1192,
      "step": 3
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.734976053237915,
      "learning_rate": 1.6e-07,
      "loss": 4.239,
      "step": 4
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.9792237281799316,
      "learning_rate": 2.0000000000000002e-07,
      "loss": 4.1071,
      "step": 5
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.669177293777466,
      "learning_rate": 2.4000000000000003e-07,
      "loss": 4.1591,
      "step": 6
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.6104001998901367,
      "learning_rate": 2.8e-07,
      "loss": 4.1089,
      "step": 7
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.851250410079956,
      "learning_rate": 3.2e-07,
      "loss": 4.1698,
      "step": 8
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.725309133529663,
      "learning_rate": 3.6e-07,
      "loss": 3.8417,
      "step": 9
    },
    {
      "epoch": 0.0,
      "grad_norm": 3.0180118083953857,
      "learning_rate": 4.0000000000000003e-07,
      "loss": 4.0617,
      "step": 10
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.881134033203125,
      "learning_rate": 4.4e-07,
      "loss": 3.9367,
      "step": 11
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.743598222732544,
      "learning_rate": 4.800000000000001e-07,
      "loss": 4.362,
      "step": 12
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.865445613861084,
      "learning_rate": 5.2e-07,
      "loss": 3.7601,
      "step": 13
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.937363386154175,
      "learning_rate": 5.6e-07,
      "loss": 4.2377,
      "step": 14
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.578627824783325,
      "learning_rate": 6.000000000000001e-07,
      "loss": 3.8448,
      "step": 15
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.529411792755127,
      "learning_rate": 6.4e-07,
      "loss": 3.8423,
      "step": 16
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.6894795894622803,
      "learning_rate": 6.800000000000001e-07,
      "loss": 4.2214,
      "step": 17
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.872169256210327,
      "learning_rate": 7.2e-07,
      "loss": 4.3846,
      "step": 18
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.8828916549682617,
      "learning_rate": 7.6e-07,
      "loss": 4.2138,
      "step": 19
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.898146867752075,
      "learning_rate": 8.000000000000001e-07,
      "loss": 3.9896,
      "step": 20
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.7412266731262207,
      "learning_rate": 8.400000000000001e-07,
      "loss": 4.3825,
      "step": 21
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.9025230407714844,
      "learning_rate": 8.8e-07,
      "loss": 3.9096,
      "step": 22
    },
    {
      "epoch": 0.0,
      "grad_norm": 3.2570407390594482,
      "learning_rate": 9.200000000000001e-07,
      "loss": 4.3376,
      "step": 23
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.659943103790283,
      "learning_rate": 9.600000000000001e-07,
      "loss": 4.0545,
      "step": 24
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.9407384395599365,
      "learning_rate": 1.0000000000000002e-06,
      "loss": 4.1586,
      "step": 25
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.756056308746338,
      "learning_rate": 1.04e-06,
      "loss": 4.2269,
      "step": 26
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.7252752780914307,
      "learning_rate": 1.08e-06,
      "loss": 4.1041,
      "step": 27
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.8850579261779785,
      "learning_rate": 1.12e-06,
      "loss": 3.9733,
      "step": 28
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.9056522846221924,
      "learning_rate": 1.1600000000000001e-06,
      "loss": 4.0159,
      "step": 29
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.906548023223877,
      "learning_rate": 1.2000000000000002e-06,
      "loss": 3.9914,
      "step": 30
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.94385027885437,
      "learning_rate": 1.2400000000000002e-06,
      "loss": 3.7592,
      "step": 31
    },
    {
      "epoch": 0.0,
      "grad_norm": 3.0003395080566406,
      "learning_rate": 1.28e-06,
      "loss": 3.9656,
      "step": 32
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.632593870162964,
      "learning_rate": 1.32e-06,
      "loss": 4.0545,
      "step": 33
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.907654047012329,
      "learning_rate": 1.3600000000000001e-06,
      "loss": 4.1637,
      "step": 34
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.7286550998687744,
      "learning_rate": 1.4000000000000001e-06,
      "loss": 4.279,
      "step": 35
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.5989270210266113,
      "learning_rate": 1.44e-06,
      "loss": 4.2473,
      "step": 36
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.9238083362579346,
      "learning_rate": 1.48e-06,
      "loss": 4.1756,
      "step": 37
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.853593111038208,
      "learning_rate": 1.52e-06,
      "loss": 4.1498,
      "step": 38
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.7109591960906982,
      "learning_rate": 1.56e-06,
      "loss": 4.0424,
      "step": 39
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.73944354057312,
      "learning_rate": 1.6000000000000001e-06,
      "loss": 4.1563,
      "step": 40
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.8629908561706543,
      "learning_rate": 1.6400000000000002e-06,
      "loss": 4.0588,
      "step": 41
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.6956381797790527,
      "learning_rate": 1.6800000000000002e-06,
      "loss": 4.0294,
      "step": 42
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.677142858505249,
      "learning_rate": 1.72e-06,
      "loss": 4.0883,
      "step": 43
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.7257091999053955,
      "learning_rate": 1.76e-06,
      "loss": 4.0879,
      "step": 44
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.6246135234832764,
      "learning_rate": 1.8000000000000001e-06,
      "loss": 3.9316,
      "step": 45
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.7583272457122803,
      "learning_rate": 1.8400000000000002e-06,
      "loss": 4.0153,
      "step": 46
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.593121290206909,
      "learning_rate": 1.8800000000000002e-06,
      "loss": 3.9954,
      "step": 47
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.466965675354004,
      "learning_rate": 1.9200000000000003e-06,
      "loss": 3.9051,
      "step": 48
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.582454204559326,
      "learning_rate": 1.9600000000000003e-06,
      "loss": 4.0527,
      "step": 49
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.5882623195648193,
      "learning_rate": 2.0000000000000003e-06,
      "loss": 3.9359,
      "step": 50
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.483936071395874,
      "learning_rate": 2.04e-06,
      "loss": 3.8511,
      "step": 51
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.491462469100952,
      "learning_rate": 2.08e-06,
      "loss": 3.8638,
      "step": 52
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.6357574462890625,
      "learning_rate": 2.12e-06,
      "loss": 4.0435,
      "step": 53
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.549468755722046,
      "learning_rate": 2.16e-06,
      "loss": 3.9889,
      "step": 54
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.4345016479492188,
      "learning_rate": 2.2e-06,
      "loss": 4.0762,
      "step": 55
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.6174962520599365,
      "learning_rate": 2.24e-06,
      "loss": 4.0602,
      "step": 56
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.7284035682678223,
      "learning_rate": 2.28e-06,
      "loss": 3.8259,
      "step": 57
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.7826550006866455,
      "learning_rate": 2.3200000000000002e-06,
      "loss": 4.2081,
      "step": 58
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.579955577850342,
      "learning_rate": 2.3600000000000003e-06,
      "loss": 4.1214,
      "step": 59
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.6836893558502197,
      "learning_rate": 2.4000000000000003e-06,
      "loss": 4.0339,
      "step": 60
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.690777540206909,
      "learning_rate": 2.4400000000000004e-06,
      "loss": 4.0501,
      "step": 61
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.5354864597320557,
      "learning_rate": 2.4800000000000004e-06,
      "loss": 3.8497,
      "step": 62
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.463876724243164,
      "learning_rate": 2.52e-06,
      "loss": 4.0547,
      "step": 63
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.4772632122039795,
      "learning_rate": 2.56e-06,
      "loss": 4.0527,
      "step": 64
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.4833028316497803,
      "learning_rate": 2.6e-06,
      "loss": 3.9757,
      "step": 65
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.492631196975708,
      "learning_rate": 2.64e-06,
      "loss": 4.0916,
      "step": 66
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.555624008178711,
      "learning_rate": 2.68e-06,
      "loss": 3.7396,
      "step": 67
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.17523455619812,
      "learning_rate": 2.7200000000000002e-06,
      "loss": 4.05,
      "step": 68
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.682180881500244,
      "learning_rate": 2.7600000000000003e-06,
      "loss": 3.9177,
      "step": 69
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.464984893798828,
      "learning_rate": 2.8000000000000003e-06,
      "loss": 4.0861,
      "step": 70
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.41684627532959,
      "learning_rate": 2.84e-06,
      "loss": 3.9772,
      "step": 71
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.4441537857055664,
      "learning_rate": 2.88e-06,
      "loss": 3.8837,
      "step": 72
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.4571757316589355,
      "learning_rate": 2.92e-06,
      "loss": 4.0703,
      "step": 73
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.703176736831665,
      "learning_rate": 2.96e-06,
      "loss": 3.9971,
      "step": 74
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.633815288543701,
      "learning_rate": 3e-06,
      "loss": 4.0716,
      "step": 75
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.3572745323181152,
      "learning_rate": 3.04e-06,
      "loss": 3.8928,
      "step": 76
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.466259717941284,
      "learning_rate": 3.08e-06,
      "loss": 3.9962,
      "step": 77
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.326463460922241,
      "learning_rate": 3.12e-06,
      "loss": 4.0832,
      "step": 78
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.3735435009002686,
      "learning_rate": 3.1600000000000002e-06,
      "loss": 4.1229,
      "step": 79
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.5681800842285156,
      "learning_rate": 3.2000000000000003e-06,
      "loss": 3.9395,
      "step": 80
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.3849031925201416,
      "learning_rate": 3.2400000000000003e-06,
      "loss": 3.8178,
      "step": 81
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.292621612548828,
      "learning_rate": 3.2800000000000004e-06,
      "loss": 3.9561,
      "step": 82
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.207146644592285,
      "learning_rate": 3.3200000000000004e-06,
      "loss": 3.9433,
      "step": 83
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.237436532974243,
      "learning_rate": 3.3600000000000004e-06,
      "loss": 3.9075,
      "step": 84
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.3125154972076416,
      "learning_rate": 3.4000000000000005e-06,
      "loss": 4.0242,
      "step": 85
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.4223341941833496,
      "learning_rate": 3.44e-06,
      "loss": 3.993,
      "step": 86
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.4205482006073,
      "learning_rate": 3.48e-06,
      "loss": 3.9571,
      "step": 87
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.19935941696167,
      "learning_rate": 3.52e-06,
      "loss": 4.2425,
      "step": 88
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.1447272300720215,
      "learning_rate": 3.5600000000000002e-06,
      "loss": 3.8625,
      "step": 89
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.1671953201293945,
      "learning_rate": 3.6000000000000003e-06,
      "loss": 3.9637,
      "step": 90
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.247581720352173,
      "learning_rate": 3.6400000000000003e-06,
      "loss": 4.2464,
      "step": 91
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.240403175354004,
      "learning_rate": 3.6800000000000003e-06,
      "loss": 4.1943,
      "step": 92
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.0277981758117676,
      "learning_rate": 3.7200000000000004e-06,
      "loss": 3.9185,
      "step": 93
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.112370491027832,
      "learning_rate": 3.7600000000000004e-06,
      "loss": 3.9042,
      "step": 94
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.400146007537842,
      "learning_rate": 3.8000000000000005e-06,
      "loss": 3.804,
      "step": 95
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.070908546447754,
      "learning_rate": 3.8400000000000005e-06,
      "loss": 3.9537,
      "step": 96
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.5253829956054688,
      "learning_rate": 3.88e-06,
      "loss": 4.0379,
      "step": 97
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.060279130935669,
      "learning_rate": 3.920000000000001e-06,
      "loss": 3.8425,
      "step": 98
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.216923236846924,
      "learning_rate": 3.96e-06,
      "loss": 3.9074,
      "step": 99
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.1882646083831787,
      "learning_rate": 4.000000000000001e-06,
      "loss": 3.7111,
      "step": 100
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.1739585399627686,
      "learning_rate": 4.04e-06,
      "loss": 4.077,
      "step": 101
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.20379638671875,
      "learning_rate": 4.08e-06,
      "loss": 3.6502,
      "step": 102
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.1343164443969727,
      "learning_rate": 4.12e-06,
      "loss": 3.8796,
      "step": 103
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.066589832305908,
      "learning_rate": 4.16e-06,
      "loss": 3.9492,
      "step": 104
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.028674840927124,
      "learning_rate": 4.2000000000000004e-06,
      "loss": 3.8716,
      "step": 105
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.0088679790496826,
      "learning_rate": 4.24e-06,
      "loss": 3.8033,
      "step": 106
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.9459502696990967,
      "learning_rate": 4.2800000000000005e-06,
      "loss": 3.991,
      "step": 107
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.8849880695343018,
      "learning_rate": 4.32e-06,
      "loss": 3.9906,
      "step": 108
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.9695014953613281,
      "learning_rate": 4.360000000000001e-06,
      "loss": 3.9284,
      "step": 109
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.9652130603790283,
      "learning_rate": 4.4e-06,
      "loss": 4.0393,
      "step": 110
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.036538600921631,
      "learning_rate": 4.440000000000001e-06,
      "loss": 3.8126,
      "step": 111
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.1357288360595703,
      "learning_rate": 4.48e-06,
      "loss": 3.7248,
      "step": 112
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.9554420709609985,
      "learning_rate": 4.520000000000001e-06,
      "loss": 3.8211,
      "step": 113
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.8781063556671143,
      "learning_rate": 4.56e-06,
      "loss": 3.7039,
      "step": 114
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.0145397186279297,
      "learning_rate": 4.600000000000001e-06,
      "loss": 3.8706,
      "step": 115
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.899590253829956,
      "learning_rate": 4.6400000000000005e-06,
      "loss": 4.1432,
      "step": 116
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.8759679794311523,
      "learning_rate": 4.680000000000001e-06,
      "loss": 3.7444,
      "step": 117
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.0493576526641846,
      "learning_rate": 4.7200000000000005e-06,
      "loss": 3.753,
      "step": 118
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.8805238008499146,
      "learning_rate": 4.76e-06,
      "loss": 4.0789,
      "step": 119
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.955185890197754,
      "learning_rate": 4.800000000000001e-06,
      "loss": 3.9794,
      "step": 120
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.007941961288452,
      "learning_rate": 4.84e-06,
      "loss": 3.8788,
      "step": 121
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.9280686378479004,
      "learning_rate": 4.880000000000001e-06,
      "loss": 4.0594,
      "step": 122
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.0928452014923096,
      "learning_rate": 4.92e-06,
      "loss": 3.9411,
      "step": 123
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.9408096075057983,
      "learning_rate": 4.960000000000001e-06,
      "loss": 3.9635,
      "step": 124
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.8970637321472168,
      "learning_rate": 5e-06,
      "loss": 3.7759,
      "step": 125
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.0077576637268066,
      "learning_rate": 5.04e-06,
      "loss": 3.9182,
      "step": 126
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.788185954093933,
      "learning_rate": 5.0800000000000005e-06,
      "loss": 3.8802,
      "step": 127
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.7904188632965088,
      "learning_rate": 5.12e-06,
      "loss": 3.9659,
      "step": 128
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.8854382038116455,
      "learning_rate": 5.1600000000000006e-06,
      "loss": 3.7629,
      "step": 129
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.7723218202590942,
      "learning_rate": 5.2e-06,
      "loss": 3.9029,
      "step": 130
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.7876309156417847,
      "learning_rate": 5.240000000000001e-06,
      "loss": 3.9002,
      "step": 131
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.8020645380020142,
      "learning_rate": 5.28e-06,
      "loss": 3.8521,
      "step": 132
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.7023932933807373,
      "learning_rate": 5.320000000000001e-06,
      "loss": 3.8513,
      "step": 133
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.8135490417480469,
      "learning_rate": 5.36e-06,
      "loss": 3.7779,
      "step": 134
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.8681467771530151,
      "learning_rate": 5.400000000000001e-06,
      "loss": 3.8546,
      "step": 135
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.8147190809249878,
      "learning_rate": 5.4400000000000004e-06,
      "loss": 3.8267,
      "step": 136
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.8241143226623535,
      "learning_rate": 5.480000000000001e-06,
      "loss": 3.938,
      "step": 137
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.7781798839569092,
      "learning_rate": 5.5200000000000005e-06,
      "loss": 3.6809,
      "step": 138
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.6862727403640747,
      "learning_rate": 5.560000000000001e-06,
      "loss": 3.6451,
      "step": 139
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.8438223600387573,
      "learning_rate": 5.600000000000001e-06,
      "loss": 3.7184,
      "step": 140
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.64910888671875,
      "learning_rate": 5.64e-06,
      "loss": 3.814,
      "step": 141
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.84051513671875,
      "learning_rate": 5.68e-06,
      "loss": 3.7076,
      "step": 142
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.5885024070739746,
      "learning_rate": 5.72e-06,
      "loss": 3.9443,
      "step": 143
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.7579338550567627,
      "learning_rate": 5.76e-06,
      "loss": 4.038,
      "step": 144
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.7181975841522217,
      "learning_rate": 5.8e-06,
      "loss": 3.9224,
      "step": 145
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.7873283624649048,
      "learning_rate": 5.84e-06,
      "loss": 3.7275,
      "step": 146
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.5855158567428589,
      "learning_rate": 5.8800000000000005e-06,
      "loss": 3.8149,
      "step": 147
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.681315302848816,
      "learning_rate": 5.92e-06,
      "loss": 3.6112,
      "step": 148
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.5858527421951294,
      "learning_rate": 5.9600000000000005e-06,
      "loss": 3.5871,
      "step": 149
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.6723918914794922,
      "learning_rate": 6e-06,
      "loss": 3.643,
      "step": 150
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.5923492908477783,
      "learning_rate": 6.040000000000001e-06,
      "loss": 3.8313,
      "step": 151
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.7056281566619873,
      "learning_rate": 6.08e-06,
      "loss": 3.8012,
      "step": 152
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.4436019659042358,
      "learning_rate": 6.120000000000001e-06,
      "loss": 3.7778,
      "step": 153
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.7572412490844727,
      "learning_rate": 6.16e-06,
      "loss": 3.7243,
      "step": 154
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.6081651449203491,
      "learning_rate": 6.200000000000001e-06,
      "loss": 3.6158,
      "step": 155
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.6968644857406616,
      "learning_rate": 6.24e-06,
      "loss": 3.5865,
      "step": 156
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.5402848720550537,
      "learning_rate": 6.280000000000001e-06,
      "loss": 3.8456,
      "step": 157
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.465819001197815,
      "learning_rate": 6.3200000000000005e-06,
      "loss": 3.8254,
      "step": 158
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.5601025819778442,
      "learning_rate": 6.360000000000001e-06,
      "loss": 3.9109,
      "step": 159
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.660900592803955,
      "learning_rate": 6.4000000000000006e-06,
      "loss": 3.8614,
      "step": 160
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.7481505870819092,
      "learning_rate": 6.440000000000001e-06,
      "loss": 3.7565,
      "step": 161
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.4580985307693481,
      "learning_rate": 6.480000000000001e-06,
      "loss": 3.5254,
      "step": 162
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.5330735445022583,
      "learning_rate": 6.520000000000001e-06,
      "loss": 3.8946,
      "step": 163
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.5232447385787964,
      "learning_rate": 6.560000000000001e-06,
      "loss": 3.9182,
      "step": 164
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.5081517696380615,
      "learning_rate": 6.600000000000001e-06,
      "loss": 3.6899,
      "step": 165
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.4630393981933594,
      "learning_rate": 6.640000000000001e-06,
      "loss": 3.6609,
      "step": 166
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.5021440982818604,
      "learning_rate": 6.680000000000001e-06,
      "loss": 3.8342,
      "step": 167
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.5835685729980469,
      "learning_rate": 6.720000000000001e-06,
      "loss": 3.8312,
      "step": 168
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.4751720428466797,
      "learning_rate": 6.760000000000001e-06,
      "loss": 3.7601,
      "step": 169
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.333896517753601,
      "learning_rate": 6.800000000000001e-06,
      "loss": 3.6224,
      "step": 170
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.6626546382904053,
      "learning_rate": 6.8400000000000014e-06,
      "loss": 3.8271,
      "step": 171
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.4665930271148682,
      "learning_rate": 6.88e-06,
      "loss": 3.7951,
      "step": 172
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.3348621129989624,
      "learning_rate": 6.92e-06,
      "loss": 3.6586,
      "step": 173
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.4866670370101929,
      "learning_rate": 6.96e-06,
      "loss": 3.9059,
      "step": 174
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.5790976285934448,
      "learning_rate": 7e-06,
      "loss": 3.8511,
      "step": 175
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.5290911197662354,
      "learning_rate": 7.04e-06,
      "loss": 3.8839,
      "step": 176
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.5688611268997192,
      "learning_rate": 7.08e-06,
      "loss": 3.7616,
      "step": 177
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.5621362924575806,
      "learning_rate": 7.1200000000000004e-06,
      "loss": 3.6742,
      "step": 178
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.368998646736145,
      "learning_rate": 7.16e-06,
      "loss": 3.7848,
      "step": 179
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.3867559432983398,
      "learning_rate": 7.2000000000000005e-06,
      "loss": 3.632,
      "step": 180
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.420078158378601,
      "learning_rate": 7.24e-06,
      "loss": 3.5555,
      "step": 181
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.515386939048767,
      "learning_rate": 7.280000000000001e-06,
      "loss": 3.6341,
      "step": 182
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.3554686307907104,
      "learning_rate": 7.32e-06,
      "loss": 3.8689,
      "step": 183
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.590132236480713,
      "learning_rate": 7.360000000000001e-06,
      "loss": 3.6719,
      "step": 184
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.4328200817108154,
      "learning_rate": 7.4e-06,
      "loss": 3.6513,
      "step": 185
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.3275903463363647,
      "learning_rate": 7.440000000000001e-06,
      "loss": 3.7537,
      "step": 186
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.8551759719848633,
      "learning_rate": 7.48e-06,
      "loss": 3.5873,
      "step": 187
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.5812379121780396,
      "learning_rate": 7.520000000000001e-06,
      "loss": 3.8094,
      "step": 188
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.3696532249450684,
      "learning_rate": 7.5600000000000005e-06,
      "loss": 3.7793,
      "step": 189
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.4765467643737793,
      "learning_rate": 7.600000000000001e-06,
      "loss": 3.6277,
      "step": 190
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.3354705572128296,
      "learning_rate": 7.640000000000001e-06,
      "loss": 3.7158,
      "step": 191
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.380820870399475,
      "learning_rate": 7.680000000000001e-06,
      "loss": 3.746,
      "step": 192
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.4038985967636108,
      "learning_rate": 7.72e-06,
      "loss": 4.0105,
      "step": 193
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.3704357147216797,
      "learning_rate": 7.76e-06,
      "loss": 3.7911,
      "step": 194
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.501017451286316,
      "learning_rate": 7.800000000000002e-06,
      "loss": 3.6946,
      "step": 195
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.4533863067626953,
      "learning_rate": 7.840000000000001e-06,
      "loss": 3.6744,
      "step": 196
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.3890782594680786,
      "learning_rate": 7.88e-06,
      "loss": 3.7648,
      "step": 197
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.3984930515289307,
      "learning_rate": 7.92e-06,
      "loss": 3.5688,
      "step": 198
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.4697076082229614,
      "learning_rate": 7.960000000000002e-06,
      "loss": 3.678,
      "step": 199
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.353148341178894,
      "learning_rate": 8.000000000000001e-06,
      "loss": 3.5411,
      "step": 200
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.6426255702972412,
      "learning_rate": 8.040000000000001e-06,
      "loss": 3.5902,
      "step": 201
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.3281501531600952,
      "learning_rate": 8.08e-06,
      "loss": 3.774,
      "step": 202
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.5287785530090332,
      "learning_rate": 8.120000000000002e-06,
      "loss": 3.6569,
      "step": 203
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.3118435144424438,
      "learning_rate": 8.16e-06,
      "loss": 3.589,
      "step": 204
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.3367993831634521,
      "learning_rate": 8.2e-06,
      "loss": 3.5401,
      "step": 205
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.300559401512146,
      "learning_rate": 8.24e-06,
      "loss": 3.4121,
      "step": 206
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.5026222467422485,
      "learning_rate": 8.28e-06,
      "loss": 3.5655,
      "step": 207
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.484371304512024,
      "learning_rate": 8.32e-06,
      "loss": 3.5509,
      "step": 208
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.3178977966308594,
      "learning_rate": 8.36e-06,
      "loss": 3.4671,
      "step": 209
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.484305739402771,
      "learning_rate": 8.400000000000001e-06,
      "loss": 3.549,
      "step": 210
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.4261474609375,
      "learning_rate": 8.44e-06,
      "loss": 3.6021,
      "step": 211
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.3964322805404663,
      "learning_rate": 8.48e-06,
      "loss": 3.7123,
      "step": 212
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.2391568422317505,
      "learning_rate": 8.52e-06,
      "loss": 3.5031,
      "step": 213
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.592327356338501,
      "learning_rate": 8.560000000000001e-06,
      "loss": 3.5855,
      "step": 214
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.520735263824463,
      "learning_rate": 8.6e-06,
      "loss": 3.5614,
      "step": 215
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.3161176443099976,
      "learning_rate": 8.64e-06,
      "loss": 3.4054,
      "step": 216
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.2075525522232056,
      "learning_rate": 8.68e-06,
      "loss": 3.4862,
      "step": 217
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.3521463871002197,
      "learning_rate": 8.720000000000001e-06,
      "loss": 3.7837,
      "step": 218
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.3691067695617676,
      "learning_rate": 8.76e-06,
      "loss": 3.6105,
      "step": 219
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.3659659624099731,
      "learning_rate": 8.8e-06,
      "loss": 3.4509,
      "step": 220
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.4890493154525757,
      "learning_rate": 8.84e-06,
      "loss": 3.5372,
      "step": 221
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.4077739715576172,
      "learning_rate": 8.880000000000001e-06,
      "loss": 3.7311,
      "step": 222
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.349009394645691,
      "learning_rate": 8.920000000000001e-06,
      "loss": 3.5487,
      "step": 223
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.3305162191390991,
      "learning_rate": 8.96e-06,
      "loss": 3.7071,
      "step": 224
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.898146629333496,
      "learning_rate": 9e-06,
      "loss": 3.2984,
      "step": 225
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.4863876104354858,
      "learning_rate": 9.040000000000002e-06,
      "loss": 3.5949,
      "step": 226
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.3922983407974243,
      "learning_rate": 9.080000000000001e-06,
      "loss": 3.7372,
      "step": 227
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.2451083660125732,
      "learning_rate": 9.12e-06,
      "loss": 3.5154,
      "step": 228
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.242289662361145,
      "learning_rate": 9.16e-06,
      "loss": 3.5404,
      "step": 229
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.4195979833602905,
      "learning_rate": 9.200000000000002e-06,
      "loss": 3.4923,
      "step": 230
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.2903532981872559,
      "learning_rate": 9.240000000000001e-06,
      "loss": 3.7446,
      "step": 231
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.2502385377883911,
      "learning_rate": 9.280000000000001e-06,
      "loss": 3.4483,
      "step": 232
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.3172341585159302,
      "learning_rate": 9.32e-06,
      "loss": 3.6858,
      "step": 233
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.306687355041504,
      "learning_rate": 9.360000000000002e-06,
      "loss": 3.6573,
      "step": 234
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.1721315383911133,
      "learning_rate": 9.4e-06,
      "loss": 3.7752,
      "step": 235
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.4512488842010498,
      "learning_rate": 9.440000000000001e-06,
      "loss": 3.4563,
      "step": 236
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.3754931688308716,
      "learning_rate": 9.48e-06,
      "loss": 3.7198,
      "step": 237
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.2005563974380493,
      "learning_rate": 9.52e-06,
      "loss": 3.5439,
      "step": 238
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.1567516326904297,
      "learning_rate": 9.56e-06,
      "loss": 3.7227,
      "step": 239
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.3108162879943848,
      "learning_rate": 9.600000000000001e-06,
      "loss": 3.5601,
      "step": 240
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.2817221879959106,
      "learning_rate": 9.640000000000001e-06,
      "loss": 3.8437,
      "step": 241
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.2722257375717163,
      "learning_rate": 9.68e-06,
      "loss": 3.6864,
      "step": 242
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.373525857925415,
      "learning_rate": 9.72e-06,
      "loss": 3.5669,
      "step": 243
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.1271272897720337,
      "learning_rate": 9.760000000000001e-06,
      "loss": 3.4869,
      "step": 244
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.2747565507888794,
      "learning_rate": 9.800000000000001e-06,
      "loss": 3.5606,
      "step": 245
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.2551965713500977,
      "learning_rate": 9.84e-06,
      "loss": 3.5614,
      "step": 246
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.235103964805603,
      "learning_rate": 9.88e-06,
      "loss": 3.535,
      "step": 247
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.1262242794036865,
      "learning_rate": 9.920000000000002e-06,
      "loss": 3.7261,
      "step": 248
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.373874306678772,
      "learning_rate": 9.960000000000001e-06,
      "loss": 3.3544,
      "step": 249
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.332512617111206,
      "learning_rate": 1e-05,
      "loss": 3.5804,
      "step": 250
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.2702279090881348,
      "learning_rate": 1.004e-05,
      "loss": 3.5657,
      "step": 251
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.2593179941177368,
      "learning_rate": 1.008e-05,
      "loss": 3.8026,
      "step": 252
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.323883295059204,
      "learning_rate": 1.0120000000000001e-05,
      "loss": 3.5858,
      "step": 253
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.191678524017334,
      "learning_rate": 1.0160000000000001e-05,
      "loss": 3.6422,
      "step": 254
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.4066582918167114,
      "learning_rate": 1.02e-05,
      "loss": 3.2823,
      "step": 255
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.172131061553955,
      "learning_rate": 1.024e-05,
      "loss": 3.678,
      "step": 256
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.3282766342163086,
      "learning_rate": 1.0280000000000002e-05,
      "loss": 3.3545,
      "step": 257
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.1334161758422852,
      "learning_rate": 1.0320000000000001e-05,
      "loss": 3.4833,
      "step": 258
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.3432629108428955,
      "learning_rate": 1.036e-05,
      "loss": 3.412,
      "step": 259
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.2830902338027954,
      "learning_rate": 1.04e-05,
      "loss": 3.2383,
      "step": 260
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.5054943561553955,
      "learning_rate": 1.0440000000000002e-05,
      "loss": 3.4144,
      "step": 261
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.213753581047058,
      "learning_rate": 1.0480000000000001e-05,
      "loss": 3.6166,
      "step": 262
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.4019075632095337,
      "learning_rate": 1.0520000000000001e-05,
      "loss": 3.7725,
      "step": 263
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.2914060354232788,
      "learning_rate": 1.056e-05,
      "loss": 3.5746,
      "step": 264
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.2282453775405884,
      "learning_rate": 1.0600000000000002e-05,
      "loss": 3.6194,
      "step": 265
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.2999927997589111,
      "learning_rate": 1.0640000000000001e-05,
      "loss": 3.7348,
      "step": 266
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.196705937385559,
      "learning_rate": 1.0680000000000001e-05,
      "loss": 3.5353,
      "step": 267
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.1487430334091187,
      "learning_rate": 1.072e-05,
      "loss": 3.5693,
      "step": 268
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.3711289167404175,
      "learning_rate": 1.0760000000000002e-05,
      "loss": 3.555,
      "step": 269
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.194109559059143,
      "learning_rate": 1.0800000000000002e-05,
      "loss": 3.7503,
      "step": 270
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.1945223808288574,
      "learning_rate": 1.0840000000000001e-05,
      "loss": 3.5252,
      "step": 271
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.1687543392181396,
      "learning_rate": 1.0880000000000001e-05,
      "loss": 3.5277,
      "step": 272
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.1445167064666748,
      "learning_rate": 1.0920000000000002e-05,
      "loss": 3.6064,
      "step": 273
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.211444616317749,
      "learning_rate": 1.0960000000000002e-05,
      "loss": 3.7435,
      "step": 274
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.319568395614624,
      "learning_rate": 1.1000000000000001e-05,
      "loss": 3.4991,
      "step": 275
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.2164995670318604,
      "learning_rate": 1.1040000000000001e-05,
      "loss": 3.7545,
      "step": 276
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.341293454170227,
      "learning_rate": 1.1080000000000002e-05,
      "loss": 3.5541,
      "step": 277
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.2456176280975342,
      "learning_rate": 1.1120000000000002e-05,
      "loss": 3.5604,
      "step": 278
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.2874537706375122,
      "learning_rate": 1.1160000000000002e-05,
      "loss": 3.2795,
      "step": 279
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.2618956565856934,
      "learning_rate": 1.1200000000000001e-05,
      "loss": 3.6139,
      "step": 280
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.1764187812805176,
      "learning_rate": 1.1240000000000002e-05,
      "loss": 3.7308,
      "step": 281
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.1646764278411865,
      "learning_rate": 1.128e-05,
      "loss": 3.5606,
      "step": 282
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.194098711013794,
      "learning_rate": 1.132e-05,
      "loss": 3.612,
      "step": 283
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.1776366233825684,
      "learning_rate": 1.136e-05,
      "loss": 3.5603,
      "step": 284
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.2851743698120117,
      "learning_rate": 1.14e-05,
      "loss": 3.5139,
      "step": 285
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.1848849058151245,
      "learning_rate": 1.144e-05,
      "loss": 3.5644,
      "step": 286
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.2475007772445679,
      "learning_rate": 1.148e-05,
      "loss": 3.373,
      "step": 287
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.2679738998413086,
      "learning_rate": 1.152e-05,
      "loss": 3.5912,
      "step": 288
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.3935728073120117,
      "learning_rate": 1.156e-05,
      "loss": 3.6157,
      "step": 289
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.3543609380722046,
      "learning_rate": 1.16e-05,
      "loss": 3.5327,
      "step": 290
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.1726360321044922,
      "learning_rate": 1.164e-05,
      "loss": 3.4925,
      "step": 291
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.201158881187439,
      "learning_rate": 1.168e-05,
      "loss": 3.5155,
      "step": 292
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.2419703006744385,
      "learning_rate": 1.172e-05,
      "loss": 3.3765,
      "step": 293
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.35267972946167,
      "learning_rate": 1.1760000000000001e-05,
      "loss": 3.5558,
      "step": 294
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.151383638381958,
      "learning_rate": 1.18e-05,
      "loss": 3.4974,
      "step": 295
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.2320599555969238,
      "learning_rate": 1.184e-05,
      "loss": 3.5415,
      "step": 296
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.549755334854126,
      "learning_rate": 1.188e-05,
      "loss": 3.1009,
      "step": 297
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.5196317434310913,
      "learning_rate": 1.1920000000000001e-05,
      "loss": 3.4773,
      "step": 298
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.3132879734039307,
      "learning_rate": 1.196e-05,
      "loss": 3.5351,
      "step": 299
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.3025786876678467,
      "learning_rate": 1.2e-05,
      "loss": 3.4774,
      "step": 300
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.0775986909866333,
      "learning_rate": 1.204e-05,
      "loss": 3.5666,
      "step": 301
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.3229408264160156,
      "learning_rate": 1.2080000000000001e-05,
      "loss": 3.5505,
      "step": 302
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.2017590999603271,
      "learning_rate": 1.2120000000000001e-05,
      "loss": 3.6368,
      "step": 303
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.1562576293945312,
      "learning_rate": 1.216e-05,
      "loss": 3.3456,
      "step": 304
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.099551796913147,
      "learning_rate": 1.22e-05,
      "loss": 3.4957,
      "step": 305
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.3115863800048828,
      "learning_rate": 1.2240000000000001e-05,
      "loss": 3.7417,
      "step": 306
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.149437427520752,
      "learning_rate": 1.2280000000000001e-05,
      "loss": 3.4665,
      "step": 307
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.1257636547088623,
      "learning_rate": 1.232e-05,
      "loss": 3.3928,
      "step": 308
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.3049019575119019,
      "learning_rate": 1.236e-05,
      "loss": 3.6235,
      "step": 309
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.2018177509307861,
      "learning_rate": 1.2400000000000002e-05,
      "loss": 3.6804,
      "step": 310
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.3146343231201172,
      "learning_rate": 1.2440000000000001e-05,
      "loss": 3.3652,
      "step": 311
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.1633341312408447,
      "learning_rate": 1.248e-05,
      "loss": 3.3469,
      "step": 312
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.2480295896530151,
      "learning_rate": 1.252e-05,
      "loss": 3.4472,
      "step": 313
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.1936359405517578,
      "learning_rate": 1.2560000000000002e-05,
      "loss": 3.5908,
      "step": 314
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.131730079650879,
      "learning_rate": 1.2600000000000001e-05,
      "loss": 3.6097,
      "step": 315
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.1805363893508911,
      "learning_rate": 1.2640000000000001e-05,
      "loss": 3.5206,
      "step": 316
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.2584612369537354,
      "learning_rate": 1.268e-05,
      "loss": 3.6433,
      "step": 317
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.2794992923736572,
      "learning_rate": 1.2720000000000002e-05,
      "loss": 3.3382,
      "step": 318
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.2125407457351685,
      "learning_rate": 1.2760000000000001e-05,
      "loss": 3.4305,
      "step": 319
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.05080246925354,
      "learning_rate": 1.2800000000000001e-05,
      "loss": 3.4517,
      "step": 320
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.1467257738113403,
      "learning_rate": 1.284e-05,
      "loss": 3.5012,
      "step": 321
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.1289032697677612,
      "learning_rate": 1.2880000000000002e-05,
      "loss": 3.3915,
      "step": 322
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.179397702217102,
      "learning_rate": 1.2920000000000002e-05,
      "loss": 3.4378,
      "step": 323
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.1573898792266846,
      "learning_rate": 1.2960000000000001e-05,
      "loss": 3.7245,
      "step": 324
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.1706581115722656,
      "learning_rate": 1.3000000000000001e-05,
      "loss": 3.5699,
      "step": 325
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.1088812351226807,
      "learning_rate": 1.3040000000000002e-05,
      "loss": 3.6058,
      "step": 326
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.1518479585647583,
      "learning_rate": 1.3080000000000002e-05,
      "loss": 3.2983,
      "step": 327
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.1431056261062622,
      "learning_rate": 1.3120000000000001e-05,
      "loss": 3.5188,
      "step": 328
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.1246886253356934,
      "learning_rate": 1.3160000000000001e-05,
      "loss": 3.7236,
      "step": 329
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.2479276657104492,
      "learning_rate": 1.3200000000000002e-05,
      "loss": 3.4976,
      "step": 330
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.2542630434036255,
      "learning_rate": 1.3240000000000002e-05,
      "loss": 3.432,
      "step": 331
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.1454205513000488,
      "learning_rate": 1.3280000000000002e-05,
      "loss": 3.3256,
      "step": 332
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.1067277193069458,
      "learning_rate": 1.3320000000000001e-05,
      "loss": 3.5658,
      "step": 333
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.3475713729858398,
      "learning_rate": 1.3360000000000003e-05,
      "loss": 3.4386,
      "step": 334
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.2399545907974243,
      "learning_rate": 1.3400000000000002e-05,
      "loss": 3.2551,
      "step": 335
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.2514928579330444,
      "learning_rate": 1.3440000000000002e-05,
      "loss": 3.5109,
      "step": 336
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.2714629173278809,
      "learning_rate": 1.3480000000000001e-05,
      "loss": 3.2125,
      "step": 337
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.4666588306427002,
      "learning_rate": 1.3520000000000003e-05,
      "loss": 3.5677,
      "step": 338
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.1658459901809692,
      "learning_rate": 1.3560000000000002e-05,
      "loss": 3.4602,
      "step": 339
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.1025182008743286,
      "learning_rate": 1.3600000000000002e-05,
      "loss": 3.5978,
      "step": 340
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.1045827865600586,
      "learning_rate": 1.3640000000000002e-05,
      "loss": 3.5085,
      "step": 341
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.0693367719650269,
      "learning_rate": 1.3680000000000003e-05,
      "loss": 3.3534,
      "step": 342
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.1799782514572144,
      "learning_rate": 1.3720000000000002e-05,
      "loss": 3.4717,
      "step": 343
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.1575151681900024,
      "learning_rate": 1.376e-05,
      "loss": 3.3962,
      "step": 344
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.1296716928482056,
      "learning_rate": 1.38e-05,
      "loss": 3.5393,
      "step": 345
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.0438934564590454,
      "learning_rate": 1.384e-05,
      "loss": 3.1742,
      "step": 346
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.229446291923523,
      "learning_rate": 1.3880000000000001e-05,
      "loss": 3.4298,
      "step": 347
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.171051025390625,
      "learning_rate": 1.392e-05,
      "loss": 3.4423,
      "step": 348
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.1685518026351929,
      "learning_rate": 1.396e-05,
      "loss": 3.5351,
      "step": 349
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.2045828104019165,
      "learning_rate": 1.4e-05,
      "loss": 3.6064,
      "step": 350
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.294032096862793,
      "learning_rate": 1.4040000000000001e-05,
      "loss": 3.4982,
      "step": 351
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.211124300956726,
      "learning_rate": 1.408e-05,
      "loss": 3.307,
      "step": 352
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.0914918184280396,
      "learning_rate": 1.412e-05,
      "loss": 3.6368,
      "step": 353
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.0677165985107422,
      "learning_rate": 1.416e-05,
      "loss": 3.2992,
      "step": 354
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.092270016670227,
      "learning_rate": 1.4200000000000001e-05,
      "loss": 3.5705,
      "step": 355
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.1621726751327515,
      "learning_rate": 1.4240000000000001e-05,
      "loss": 3.4804,
      "step": 356
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.2131627798080444,
      "learning_rate": 1.428e-05,
      "loss": 3.5745,
      "step": 357
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.1489454507827759,
      "learning_rate": 1.432e-05,
      "loss": 3.3016,
      "step": 358
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.1206746101379395,
      "learning_rate": 1.4360000000000001e-05,
      "loss": 3.3722,
      "step": 359
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.1582571268081665,
      "learning_rate": 1.4400000000000001e-05,
      "loss": 3.4463,
      "step": 360
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.1361464262008667,
      "learning_rate": 1.444e-05,
      "loss": 3.1879,
      "step": 361
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.2100480794906616,
      "learning_rate": 1.448e-05,
      "loss": 3.4777,
      "step": 362
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.3528227806091309,
      "learning_rate": 1.4520000000000002e-05,
      "loss": 3.4809,
      "step": 363
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.2759846448898315,
      "learning_rate": 1.4560000000000001e-05,
      "loss": 3.3756,
      "step": 364
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.288599967956543,
      "learning_rate": 1.46e-05,
      "loss": 3.2836,
      "step": 365
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.1789629459381104,
      "learning_rate": 1.464e-05,
      "loss": 3.3971,
      "step": 366
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.3310807943344116,
      "learning_rate": 1.4680000000000002e-05,
      "loss": 3.4212,
      "step": 367
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.301321268081665,
      "learning_rate": 1.4720000000000001e-05,
      "loss": 3.3676,
      "step": 368
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.1009836196899414,
      "learning_rate": 1.4760000000000001e-05,
      "loss": 3.3446,
      "step": 369
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.1524156332015991,
      "learning_rate": 1.48e-05,
      "loss": 3.4525,
      "step": 370
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.0535404682159424,
      "learning_rate": 1.4840000000000002e-05,
      "loss": 3.4978,
      "step": 371
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.1284363269805908,
      "learning_rate": 1.4880000000000002e-05,
      "loss": 3.4155,
      "step": 372
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.1958726644515991,
      "learning_rate": 1.4920000000000001e-05,
      "loss": 3.4987,
      "step": 373
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.1061465740203857,
      "learning_rate": 1.496e-05,
      "loss": 3.3997,
      "step": 374
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.0636059045791626,
      "learning_rate": 1.5000000000000002e-05,
      "loss": 3.4195,
      "step": 375
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.1445302963256836,
      "learning_rate": 1.5040000000000002e-05,
      "loss": 3.3929,
      "step": 376
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.1013340950012207,
      "learning_rate": 1.5080000000000001e-05,
      "loss": 3.1795,
      "step": 377
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.1197954416275024,
      "learning_rate": 1.5120000000000001e-05,
      "loss": 3.1795,
      "step": 378
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.1252715587615967,
      "learning_rate": 1.516e-05,
      "loss": 3.4812,
      "step": 379
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.097214698791504,
      "learning_rate": 1.5200000000000002e-05,
      "loss": 3.415,
      "step": 380
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.1290453672409058,
      "learning_rate": 1.5240000000000001e-05,
      "loss": 3.4602,
      "step": 381
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.159507155418396,
      "learning_rate": 1.5280000000000003e-05,
      "loss": 3.2774,
      "step": 382
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.1913237571716309,
      "learning_rate": 1.5320000000000002e-05,
      "loss": 3.3021,
      "step": 383
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.2278084754943848,
      "learning_rate": 1.5360000000000002e-05,
      "loss": 3.4563,
      "step": 384
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1315604448318481,
      "learning_rate": 1.54e-05,
      "loss": 3.2088,
      "step": 385
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.134002447128296,
      "learning_rate": 1.544e-05,
      "loss": 3.6347,
      "step": 386
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1199214458465576,
      "learning_rate": 1.548e-05,
      "loss": 3.4524,
      "step": 387
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0860595703125,
      "learning_rate": 1.552e-05,
      "loss": 3.3122,
      "step": 388
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1202516555786133,
      "learning_rate": 1.556e-05,
      "loss": 3.6148,
      "step": 389
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1478224992752075,
      "learning_rate": 1.5600000000000003e-05,
      "loss": 3.2501,
      "step": 390
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1141929626464844,
      "learning_rate": 1.5640000000000003e-05,
      "loss": 3.4085,
      "step": 391
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.2918083667755127,
      "learning_rate": 1.5680000000000002e-05,
      "loss": 3.5685,
      "step": 392
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.2793824672698975,
      "learning_rate": 1.5720000000000002e-05,
      "loss": 3.3904,
      "step": 393
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.2362834215164185,
      "learning_rate": 1.576e-05,
      "loss": 3.4781,
      "step": 394
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.308270812034607,
      "learning_rate": 1.58e-05,
      "loss": 3.5602,
      "step": 395
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1067708730697632,
      "learning_rate": 1.584e-05,
      "loss": 3.4321,
      "step": 396
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0692731142044067,
      "learning_rate": 1.588e-05,
      "loss": 3.4322,
      "step": 397
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1469955444335938,
      "learning_rate": 1.5920000000000003e-05,
      "loss": 3.4058,
      "step": 398
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.2370024919509888,
      "learning_rate": 1.5960000000000003e-05,
      "loss": 3.4506,
      "step": 399
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1209419965744019,
      "learning_rate": 1.6000000000000003e-05,
      "loss": 3.4313,
      "step": 400
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.3568485975265503,
      "learning_rate": 1.6040000000000002e-05,
      "loss": 3.3941,
      "step": 401
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1107232570648193,
      "learning_rate": 1.6080000000000002e-05,
      "loss": 3.4417,
      "step": 402
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0312113761901855,
      "learning_rate": 1.612e-05,
      "loss": 3.2929,
      "step": 403
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1254969835281372,
      "learning_rate": 1.616e-05,
      "loss": 3.3561,
      "step": 404
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1603095531463623,
      "learning_rate": 1.62e-05,
      "loss": 3.4339,
      "step": 405
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0845942497253418,
      "learning_rate": 1.6240000000000004e-05,
      "loss": 3.3589,
      "step": 406
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.57513427734375,
      "learning_rate": 1.628e-05,
      "loss": 3.3769,
      "step": 407
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.111190915107727,
      "learning_rate": 1.632e-05,
      "loss": 3.4955,
      "step": 408
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0654323101043701,
      "learning_rate": 1.636e-05,
      "loss": 3.4944,
      "step": 409
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1829193830490112,
      "learning_rate": 1.64e-05,
      "loss": 3.6411,
      "step": 410
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0607624053955078,
      "learning_rate": 1.6440000000000002e-05,
      "loss": 3.3698,
      "step": 411
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1578700542449951,
      "learning_rate": 1.648e-05,
      "loss": 3.2558,
      "step": 412
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1147887706756592,
      "learning_rate": 1.652e-05,
      "loss": 3.3939,
      "step": 413
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.084524393081665,
      "learning_rate": 1.656e-05,
      "loss": 3.3477,
      "step": 414
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1199545860290527,
      "learning_rate": 1.66e-05,
      "loss": 3.4376,
      "step": 415
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1180952787399292,
      "learning_rate": 1.664e-05,
      "loss": 3.3142,
      "step": 416
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.10832941532135,
      "learning_rate": 1.668e-05,
      "loss": 3.3244,
      "step": 417
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1151961088180542,
      "learning_rate": 1.672e-05,
      "loss": 3.3293,
      "step": 418
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0395761728286743,
      "learning_rate": 1.6760000000000002e-05,
      "loss": 3.412,
      "step": 419
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1816532611846924,
      "learning_rate": 1.6800000000000002e-05,
      "loss": 3.3051,
      "step": 420
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0726028680801392,
      "learning_rate": 1.684e-05,
      "loss": 3.2693,
      "step": 421
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0358428955078125,
      "learning_rate": 1.688e-05,
      "loss": 3.3168,
      "step": 422
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.158468246459961,
      "learning_rate": 1.692e-05,
      "loss": 3.2606,
      "step": 423
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1571656465530396,
      "learning_rate": 1.696e-05,
      "loss": 3.3038,
      "step": 424
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1262303590774536,
      "learning_rate": 1.7e-05,
      "loss": 3.4081,
      "step": 425
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0895349979400635,
      "learning_rate": 1.704e-05,
      "loss": 3.3891,
      "step": 426
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1159723997116089,
      "learning_rate": 1.7080000000000002e-05,
      "loss": 3.2833,
      "step": 427
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0984688997268677,
      "learning_rate": 1.7120000000000002e-05,
      "loss": 3.3503,
      "step": 428
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.143836259841919,
      "learning_rate": 1.7160000000000002e-05,
      "loss": 3.2742,
      "step": 429
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1477305889129639,
      "learning_rate": 1.72e-05,
      "loss": 3.5775,
      "step": 430
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0219895839691162,
      "learning_rate": 1.724e-05,
      "loss": 3.2543,
      "step": 431
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0214216709136963,
      "learning_rate": 1.728e-05,
      "loss": 3.3119,
      "step": 432
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1090824604034424,
      "learning_rate": 1.732e-05,
      "loss": 3.3513,
      "step": 433
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0541256666183472,
      "learning_rate": 1.736e-05,
      "loss": 3.285,
      "step": 434
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1272515058517456,
      "learning_rate": 1.7400000000000003e-05,
      "loss": 3.2318,
      "step": 435
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1367864608764648,
      "learning_rate": 1.7440000000000002e-05,
      "loss": 3.4602,
      "step": 436
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.19173264503479,
      "learning_rate": 1.7480000000000002e-05,
      "loss": 3.3355,
      "step": 437
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0798397064208984,
      "learning_rate": 1.752e-05,
      "loss": 3.4888,
      "step": 438
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0993057489395142,
      "learning_rate": 1.756e-05,
      "loss": 3.2076,
      "step": 439
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1635149717330933,
      "learning_rate": 1.76e-05,
      "loss": 3.2317,
      "step": 440
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.141228437423706,
      "learning_rate": 1.764e-05,
      "loss": 3.3939,
      "step": 441
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.039370059967041,
      "learning_rate": 1.768e-05,
      "loss": 3.3116,
      "step": 442
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1230391263961792,
      "learning_rate": 1.7720000000000003e-05,
      "loss": 3.2864,
      "step": 443
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0903788805007935,
      "learning_rate": 1.7760000000000003e-05,
      "loss": 3.3757,
      "step": 444
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1756134033203125,
      "learning_rate": 1.7800000000000002e-05,
      "loss": 3.4535,
      "step": 445
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1282016038894653,
      "learning_rate": 1.7840000000000002e-05,
      "loss": 3.4401,
      "step": 446
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0586931705474854,
      "learning_rate": 1.788e-05,
      "loss": 3.4105,
      "step": 447
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1563504934310913,
      "learning_rate": 1.792e-05,
      "loss": 3.2943,
      "step": 448
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.5392509698867798,
      "learning_rate": 1.796e-05,
      "loss": 3.2643,
      "step": 449
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.4400266408920288,
      "learning_rate": 1.8e-05,
      "loss": 3.1495,
      "step": 450
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0123649835586548,
      "learning_rate": 1.8040000000000003e-05,
      "loss": 3.4314,
      "step": 451
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1417686939239502,
      "learning_rate": 1.8080000000000003e-05,
      "loss": 3.2944,
      "step": 452
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0597214698791504,
      "learning_rate": 1.8120000000000003e-05,
      "loss": 3.0377,
      "step": 453
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1433954238891602,
      "learning_rate": 1.8160000000000002e-05,
      "loss": 3.4398,
      "step": 454
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.2238569259643555,
      "learning_rate": 1.8200000000000002e-05,
      "loss": 3.3742,
      "step": 455
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0212583541870117,
      "learning_rate": 1.824e-05,
      "loss": 3.1939,
      "step": 456
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1697381734848022,
      "learning_rate": 1.828e-05,
      "loss": 3.2122,
      "step": 457
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.139695167541504,
      "learning_rate": 1.832e-05,
      "loss": 3.2117,
      "step": 458
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0630030632019043,
      "learning_rate": 1.8360000000000004e-05,
      "loss": 3.1543,
      "step": 459
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.11666738986969,
      "learning_rate": 1.8400000000000003e-05,
      "loss": 3.369,
      "step": 460
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1261482238769531,
      "learning_rate": 1.8440000000000003e-05,
      "loss": 3.3025,
      "step": 461
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1504830121994019,
      "learning_rate": 1.8480000000000003e-05,
      "loss": 3.3077,
      "step": 462
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0904523134231567,
      "learning_rate": 1.8520000000000002e-05,
      "loss": 3.3276,
      "step": 463
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0720056295394897,
      "learning_rate": 1.8560000000000002e-05,
      "loss": 3.2472,
      "step": 464
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1152276992797852,
      "learning_rate": 1.86e-05,
      "loss": 3.0073,
      "step": 465
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.3360005617141724,
      "learning_rate": 1.864e-05,
      "loss": 3.1854,
      "step": 466
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0795246362686157,
      "learning_rate": 1.8680000000000004e-05,
      "loss": 3.309,
      "step": 467
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0418883562088013,
      "learning_rate": 1.8720000000000004e-05,
      "loss": 3.2362,
      "step": 468
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0183978080749512,
      "learning_rate": 1.876e-05,
      "loss": 3.3526,
      "step": 469
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1067613363265991,
      "learning_rate": 1.88e-05,
      "loss": 3.4482,
      "step": 470
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.119295358657837,
      "learning_rate": 1.884e-05,
      "loss": 3.3289,
      "step": 471
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1077462434768677,
      "learning_rate": 1.8880000000000002e-05,
      "loss": 3.2743,
      "step": 472
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0801949501037598,
      "learning_rate": 1.8920000000000002e-05,
      "loss": 3.3375,
      "step": 473
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1203199625015259,
      "learning_rate": 1.896e-05,
      "loss": 3.2907,
      "step": 474
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0159072875976562,
      "learning_rate": 1.9e-05,
      "loss": 3.0605,
      "step": 475
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0944480895996094,
      "learning_rate": 1.904e-05,
      "loss": 3.1754,
      "step": 476
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0354666709899902,
      "learning_rate": 1.908e-05,
      "loss": 3.3133,
      "step": 477
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.2041491270065308,
      "learning_rate": 1.912e-05,
      "loss": 3.6592,
      "step": 478
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9990301728248596,
      "learning_rate": 1.916e-05,
      "loss": 3.3773,
      "step": 479
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.2765320539474487,
      "learning_rate": 1.9200000000000003e-05,
      "loss": 3.3204,
      "step": 480
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1121737957000732,
      "learning_rate": 1.9240000000000002e-05,
      "loss": 3.1801,
      "step": 481
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1510838270187378,
      "learning_rate": 1.9280000000000002e-05,
      "loss": 3.3542,
      "step": 482
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.156065583229065,
      "learning_rate": 1.932e-05,
      "loss": 3.438,
      "step": 483
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.127637267112732,
      "learning_rate": 1.936e-05,
      "loss": 3.0956,
      "step": 484
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0707669258117676,
      "learning_rate": 1.94e-05,
      "loss": 3.253,
      "step": 485
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0935925245285034,
      "learning_rate": 1.944e-05,
      "loss": 3.2898,
      "step": 486
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0307819843292236,
      "learning_rate": 1.948e-05,
      "loss": 3.3425,
      "step": 487
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0521392822265625,
      "learning_rate": 1.9520000000000003e-05,
      "loss": 3.2153,
      "step": 488
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.038244366645813,
      "learning_rate": 1.9560000000000002e-05,
      "loss": 3.4251,
      "step": 489
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.084317684173584,
      "learning_rate": 1.9600000000000002e-05,
      "loss": 3.2158,
      "step": 490
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1398887634277344,
      "learning_rate": 1.9640000000000002e-05,
      "loss": 2.9897,
      "step": 491
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1193300485610962,
      "learning_rate": 1.968e-05,
      "loss": 3.3137,
      "step": 492
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1132800579071045,
      "learning_rate": 1.972e-05,
      "loss": 3.3197,
      "step": 493
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9895418286323547,
      "learning_rate": 1.976e-05,
      "loss": 3.2383,
      "step": 494
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.149144172668457,
      "learning_rate": 1.98e-05,
      "loss": 3.1708,
      "step": 495
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0241221189498901,
      "learning_rate": 1.9840000000000003e-05,
      "loss": 3.3645,
      "step": 496
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.153150200843811,
      "learning_rate": 1.9880000000000003e-05,
      "loss": 3.2804,
      "step": 497
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1109392642974854,
      "learning_rate": 1.9920000000000002e-05,
      "loss": 3.1023,
      "step": 498
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1298779249191284,
      "learning_rate": 1.9960000000000002e-05,
      "loss": 3.3793,
      "step": 499
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.045403242111206,
      "learning_rate": 2e-05,
      "loss": 3.2814,
      "step": 500
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0525611639022827,
      "learning_rate": 1.9999999991523645e-05,
      "loss": 3.2729,
      "step": 501
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.074338674545288,
      "learning_rate": 1.999999996609457e-05,
      "loss": 3.4091,
      "step": 502
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1678229570388794,
      "learning_rate": 1.9999999923712782e-05,
      "loss": 3.2226,
      "step": 503
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0118802785873413,
      "learning_rate": 1.9999999864378273e-05,
      "loss": 2.9811,
      "step": 504
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1826244592666626,
      "learning_rate": 1.9999999788091054e-05,
      "loss": 3.3048,
      "step": 505
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9932262301445007,
      "learning_rate": 1.9999999694851114e-05,
      "loss": 3.2886,
      "step": 506
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.073855996131897,
      "learning_rate": 1.9999999584658463e-05,
      "loss": 3.2063,
      "step": 507
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0852198600769043,
      "learning_rate": 1.9999999457513095e-05,
      "loss": 3.5164,
      "step": 508
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0323177576065063,
      "learning_rate": 1.9999999313415013e-05,
      "loss": 3.4022,
      "step": 509
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.2071292400360107,
      "learning_rate": 1.9999999152364214e-05,
      "loss": 3.3726,
      "step": 510
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.3262149095535278,
      "learning_rate": 1.9999998974360705e-05,
      "loss": 3.2422,
      "step": 511
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.088922142982483,
      "learning_rate": 1.9999998779404478e-05,
      "loss": 3.3487,
      "step": 512
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1320501565933228,
      "learning_rate": 1.9999998567495537e-05,
      "loss": 3.3222,
      "step": 513
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0021700859069824,
      "learning_rate": 1.9999998338633882e-05,
      "loss": 3.1366,
      "step": 514
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1446888446807861,
      "learning_rate": 1.9999998092819516e-05,
      "loss": 3.4702,
      "step": 515
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.2241650819778442,
      "learning_rate": 1.9999997830052437e-05,
      "loss": 3.1338,
      "step": 516
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1227467060089111,
      "learning_rate": 1.9999997550332644e-05,
      "loss": 3.377,
      "step": 517
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.059486746788025,
      "learning_rate": 1.999999725366014e-05,
      "loss": 3.0835,
      "step": 518
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0666310787200928,
      "learning_rate": 1.9999996940034926e-05,
      "loss": 3.2973,
      "step": 519
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0681586265563965,
      "learning_rate": 1.9999996609457e-05,
      "loss": 3.3442,
      "step": 520
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1896082162857056,
      "learning_rate": 1.9999996261926366e-05,
      "loss": 3.0124,
      "step": 521
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0241807699203491,
      "learning_rate": 1.999999589744302e-05,
      "loss": 3.3813,
      "step": 522
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1239187717437744,
      "learning_rate": 1.9999995516006964e-05,
      "loss": 3.2547,
      "step": 523
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0054688453674316,
      "learning_rate": 1.99999951176182e-05,
      "loss": 2.9686,
      "step": 524
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0719677209854126,
      "learning_rate": 1.999999470227673e-05,
      "loss": 3.3273,
      "step": 525
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0321543216705322,
      "learning_rate": 1.999999426998255e-05,
      "loss": 3.315,
      "step": 526
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0999255180358887,
      "learning_rate": 1.9999993820735667e-05,
      "loss": 3.2017,
      "step": 527
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1598730087280273,
      "learning_rate": 1.999999335453608e-05,
      "loss": 3.2216,
      "step": 528
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.065443515777588,
      "learning_rate": 1.9999992871383786e-05,
      "loss": 3.2549,
      "step": 529
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0682913064956665,
      "learning_rate": 1.9999992371278786e-05,
      "loss": 3.448,
      "step": 530
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0644968748092651,
      "learning_rate": 1.9999991854221086e-05,
      "loss": 3.306,
      "step": 531
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1199527978897095,
      "learning_rate": 1.9999991320210682e-05,
      "loss": 3.2489,
      "step": 532
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.97482830286026,
      "learning_rate": 1.9999990769247578e-05,
      "loss": 3.3267,
      "step": 533
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.170732855796814,
      "learning_rate": 1.9999990201331774e-05,
      "loss": 3.499,
      "step": 534
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1308046579360962,
      "learning_rate": 1.999998961646327e-05,
      "loss": 3.4177,
      "step": 535
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0981197357177734,
      "learning_rate": 1.9999989014642067e-05,
      "loss": 3.408,
      "step": 536
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.3207072019577026,
      "learning_rate": 1.999998839586817e-05,
      "loss": 3.3908,
      "step": 537
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0052565336227417,
      "learning_rate": 1.9999987760141573e-05,
      "loss": 3.3053,
      "step": 538
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0265601873397827,
      "learning_rate": 1.999998710746228e-05,
      "loss": 3.3846,
      "step": 539
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.2263798713684082,
      "learning_rate": 1.9999986437830294e-05,
      "loss": 3.1216,
      "step": 540
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1435977220535278,
      "learning_rate": 1.9999985751245617e-05,
      "loss": 3.4954,
      "step": 541
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1267924308776855,
      "learning_rate": 1.9999985047708247e-05,
      "loss": 3.0813,
      "step": 542
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1178089380264282,
      "learning_rate": 1.9999984327218187e-05,
      "loss": 3.4586,
      "step": 543
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0522702932357788,
      "learning_rate": 1.9999983589775436e-05,
      "loss": 3.03,
      "step": 544
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0870553255081177,
      "learning_rate": 1.9999982835379996e-05,
      "loss": 3.0628,
      "step": 545
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9993359446525574,
      "learning_rate": 1.9999982064031875e-05,
      "loss": 3.1897,
      "step": 546
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9643563628196716,
      "learning_rate": 1.999998127573106e-05,
      "loss": 3.4555,
      "step": 547
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1366262435913086,
      "learning_rate": 1.9999980470477566e-05,
      "loss": 3.2496,
      "step": 548
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9999822974205017,
      "learning_rate": 1.999997964827139e-05,
      "loss": 3.3616,
      "step": 549
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1958757638931274,
      "learning_rate": 1.999997880911253e-05,
      "loss": 3.3764,
      "step": 550
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.069819450378418,
      "learning_rate": 1.999997795300099e-05,
      "loss": 2.9006,
      "step": 551
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9946544766426086,
      "learning_rate": 1.999997707993677e-05,
      "loss": 3.1326,
      "step": 552
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9850538372993469,
      "learning_rate": 1.9999976189919877e-05,
      "loss": 3.1592,
      "step": 553
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1059260368347168,
      "learning_rate": 1.9999975282950306e-05,
      "loss": 3.0714,
      "step": 554
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0242140293121338,
      "learning_rate": 1.999997435902806e-05,
      "loss": 3.1901,
      "step": 555
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1954764127731323,
      "learning_rate": 1.9999973418153143e-05,
      "loss": 3.249,
      "step": 556
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1860547065734863,
      "learning_rate": 1.9999972460325556e-05,
      "loss": 3.1204,
      "step": 557
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0093539953231812,
      "learning_rate": 1.99999714855453e-05,
      "loss": 3.5447,
      "step": 558
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0134291648864746,
      "learning_rate": 1.9999970493812374e-05,
      "loss": 3.187,
      "step": 559
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9965076446533203,
      "learning_rate": 1.999996948512678e-05,
      "loss": 3.1801,
      "step": 560
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.072050929069519,
      "learning_rate": 1.9999968459488527e-05,
      "loss": 3.1613,
      "step": 561
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0976959466934204,
      "learning_rate": 1.999996741689761e-05,
      "loss": 3.1231,
      "step": 562
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0736546516418457,
      "learning_rate": 1.999996635735403e-05,
      "loss": 3.3562,
      "step": 563
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0208983421325684,
      "learning_rate": 1.9999965280857795e-05,
      "loss": 3.1421,
      "step": 564
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0851699113845825,
      "learning_rate": 1.99999641874089e-05,
      "loss": 3.2907,
      "step": 565
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0521636009216309,
      "learning_rate": 1.999996307700735e-05,
      "loss": 3.2571,
      "step": 566
    },
    {
      "epoch": 0.01,
      "grad_norm": 2.5020689964294434,
      "learning_rate": 1.9999961949653145e-05,
      "loss": 3.4008,
      "step": 567
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.3361762762069702,
      "learning_rate": 1.999996080534629e-05,
      "loss": 3.3132,
      "step": 568
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0630524158477783,
      "learning_rate": 1.999995964408679e-05,
      "loss": 3.2401,
      "step": 569
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1111615896224976,
      "learning_rate": 1.9999958465874636e-05,
      "loss": 3.3262,
      "step": 570
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0372880697250366,
      "learning_rate": 1.999995727070984e-05,
      "loss": 3.4492,
      "step": 571
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0686702728271484,
      "learning_rate": 1.99999560585924e-05,
      "loss": 3.2661,
      "step": 572
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.052327275276184,
      "learning_rate": 1.999995482952232e-05,
      "loss": 3.3876,
      "step": 573
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0939970016479492,
      "learning_rate": 1.9999953583499598e-05,
      "loss": 3.2001,
      "step": 574
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9712551832199097,
      "learning_rate": 1.9999952320524237e-05,
      "loss": 3.0456,
      "step": 575
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9818482995033264,
      "learning_rate": 1.9999951040596244e-05,
      "loss": 3.2708,
      "step": 576
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1945148706436157,
      "learning_rate": 1.9999949743715618e-05,
      "loss": 3.0096,
      "step": 577
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0726872682571411,
      "learning_rate": 1.999994842988236e-05,
      "loss": 3.2404,
      "step": 578
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.13429594039917,
      "learning_rate": 1.999994709909647e-05,
      "loss": 3.2786,
      "step": 579
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0736154317855835,
      "learning_rate": 1.999994575135796e-05,
      "loss": 3.3782,
      "step": 580
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1750836372375488,
      "learning_rate": 1.9999944386666822e-05,
      "loss": 3.2,
      "step": 581
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.284869909286499,
      "learning_rate": 1.9999943005023067e-05,
      "loss": 3.2165,
      "step": 582
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1151189804077148,
      "learning_rate": 1.9999941606426686e-05,
      "loss": 3.4826,
      "step": 583
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1217048168182373,
      "learning_rate": 1.9999940190877693e-05,
      "loss": 3.3347,
      "step": 584
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0561217069625854,
      "learning_rate": 1.9999938758376084e-05,
      "loss": 3.3899,
      "step": 585
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1472972631454468,
      "learning_rate": 1.9999937308921863e-05,
      "loss": 3.5072,
      "step": 586
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0017129182815552,
      "learning_rate": 1.9999935842515033e-05,
      "loss": 3.3393,
      "step": 587
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1268882751464844,
      "learning_rate": 1.9999934359155594e-05,
      "loss": 3.4718,
      "step": 588
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1063978672027588,
      "learning_rate": 1.9999932858843553e-05,
      "loss": 3.0354,
      "step": 589
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.028096318244934,
      "learning_rate": 1.9999931341578907e-05,
      "loss": 3.1288,
      "step": 590
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.2268648147583008,
      "learning_rate": 1.9999929807361665e-05,
      "loss": 3.2388,
      "step": 591
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0897763967514038,
      "learning_rate": 1.999992825619182e-05,
      "loss": 3.4536,
      "step": 592
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1435543298721313,
      "learning_rate": 1.9999926688069386e-05,
      "loss": 3.0298,
      "step": 593
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0181866884231567,
      "learning_rate": 1.999992510299436e-05,
      "loss": 3.4919,
      "step": 594
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1434595584869385,
      "learning_rate": 1.999992350096674e-05,
      "loss": 3.0459,
      "step": 595
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1505793333053589,
      "learning_rate": 1.999992188198654e-05,
      "loss": 3.3024,
      "step": 596
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1466320753097534,
      "learning_rate": 1.9999920246053753e-05,
      "loss": 3.4036,
      "step": 597
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1233444213867188,
      "learning_rate": 1.9999918593168388e-05,
      "loss": 3.2819,
      "step": 598
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.2227567434310913,
      "learning_rate": 1.999991692333044e-05,
      "loss": 3.2266,
      "step": 599
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0578944683074951,
      "learning_rate": 1.9999915236539923e-05,
      "loss": 3.3104,
      "step": 600
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1311055421829224,
      "learning_rate": 1.999991353279683e-05,
      "loss": 3.2573,
      "step": 601
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.3867322206497192,
      "learning_rate": 1.9999911812101168e-05,
      "loss": 2.9592,
      "step": 602
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0795447826385498,
      "learning_rate": 1.999991007445294e-05,
      "loss": 3.2904,
      "step": 603
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.134522795677185,
      "learning_rate": 1.999990831985215e-05,
      "loss": 3.2018,
      "step": 604
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0512927770614624,
      "learning_rate": 1.9999906548298797e-05,
      "loss": 3.3608,
      "step": 605
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1605185270309448,
      "learning_rate": 1.9999904759792886e-05,
      "loss": 3.0721,
      "step": 606
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.2929859161376953,
      "learning_rate": 1.9999902954334424e-05,
      "loss": 3.5451,
      "step": 607
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1354010105133057,
      "learning_rate": 1.9999901131923404e-05,
      "loss": 3.1919,
      "step": 608
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0596989393234253,
      "learning_rate": 1.9999899292559842e-05,
      "loss": 3.2485,
      "step": 609
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1600462198257446,
      "learning_rate": 1.9999897436243733e-05,
      "loss": 3.0267,
      "step": 610
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0292643308639526,
      "learning_rate": 1.999989556297508e-05,
      "loss": 2.9866,
      "step": 611
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0238713026046753,
      "learning_rate": 1.999989367275389e-05,
      "loss": 3.2794,
      "step": 612
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0457857847213745,
      "learning_rate": 1.9999891765580165e-05,
      "loss": 3.0135,
      "step": 613
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1550387144088745,
      "learning_rate": 1.999988984145391e-05,
      "loss": 3.1089,
      "step": 614
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0993770360946655,
      "learning_rate": 1.9999887900375117e-05,
      "loss": 3.3797,
      "step": 615
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.051396369934082,
      "learning_rate": 1.9999885942343805e-05,
      "loss": 3.0884,
      "step": 616
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.051438808441162,
      "learning_rate": 1.999988396735997e-05,
      "loss": 3.2412,
      "step": 617
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0866461992263794,
      "learning_rate": 1.9999881975423616e-05,
      "loss": 3.3713,
      "step": 618
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9957977533340454,
      "learning_rate": 1.999987996653474e-05,
      "loss": 3.3137,
      "step": 619
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.018767237663269,
      "learning_rate": 1.9999877940693356e-05,
      "loss": 3.1426,
      "step": 620
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0204949378967285,
      "learning_rate": 1.9999875897899464e-05,
      "loss": 3.2388,
      "step": 621
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0666790008544922,
      "learning_rate": 1.9999873838153068e-05,
      "loss": 3.2225,
      "step": 622
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1070890426635742,
      "learning_rate": 1.9999871761454165e-05,
      "loss": 3.3287,
      "step": 623
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.051900863647461,
      "learning_rate": 1.9999869667802768e-05,
      "loss": 2.9893,
      "step": 624
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.2028177976608276,
      "learning_rate": 1.9999867557198874e-05,
      "loss": 3.1533,
      "step": 625
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9389963746070862,
      "learning_rate": 1.9999865429642486e-05,
      "loss": 3.1816,
      "step": 626
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0444281101226807,
      "learning_rate": 1.9999863285133615e-05,
      "loss": 3.1056,
      "step": 627
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0867947340011597,
      "learning_rate": 1.9999861123672256e-05,
      "loss": 3.034,
      "step": 628
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0968680381774902,
      "learning_rate": 1.9999858945258418e-05,
      "loss": 3.2053,
      "step": 629
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.149648904800415,
      "learning_rate": 1.9999856749892103e-05,
      "loss": 2.9877,
      "step": 630
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1193212270736694,
      "learning_rate": 1.9999854537573315e-05,
      "loss": 3.224,
      "step": 631
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0147778987884521,
      "learning_rate": 1.999985230830206e-05,
      "loss": 3.3027,
      "step": 632
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0418678522109985,
      "learning_rate": 1.9999850062078338e-05,
      "loss": 3.2711,
      "step": 633
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9845024943351746,
      "learning_rate": 1.9999847798902153e-05,
      "loss": 3.1072,
      "step": 634
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1423430442810059,
      "learning_rate": 1.999984551877351e-05,
      "loss": 3.3258,
      "step": 635
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0565850734710693,
      "learning_rate": 1.999984322169241e-05,
      "loss": 3.3047,
      "step": 636
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9639279246330261,
      "learning_rate": 1.9999840907658863e-05,
      "loss": 2.9918,
      "step": 637
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0998992919921875,
      "learning_rate": 1.999983857667287e-05,
      "loss": 3.2841,
      "step": 638
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0404384136199951,
      "learning_rate": 1.9999836228734434e-05,
      "loss": 3.1924,
      "step": 639
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1271649599075317,
      "learning_rate": 1.999983386384356e-05,
      "loss": 3.4376,
      "step": 640
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0350428819656372,
      "learning_rate": 1.9999831482000252e-05,
      "loss": 3.0736,
      "step": 641
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1325254440307617,
      "learning_rate": 1.9999829083204514e-05,
      "loss": 3.0862,
      "step": 642
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0738775730133057,
      "learning_rate": 1.999982666745635e-05,
      "loss": 3.2553,
      "step": 643
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.195133924484253,
      "learning_rate": 1.999982423475576e-05,
      "loss": 3.1313,
      "step": 644
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0121746063232422,
      "learning_rate": 1.999982178510276e-05,
      "loss": 3.184,
      "step": 645
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0663244724273682,
      "learning_rate": 1.999981931849734e-05,
      "loss": 3.3653,
      "step": 646
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.4494984149932861,
      "learning_rate": 1.999981683493951e-05,
      "loss": 3.155,
      "step": 647
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.01862370967865,
      "learning_rate": 1.9999814334429278e-05,
      "loss": 3.0756,
      "step": 648
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0991846323013306,
      "learning_rate": 1.9999811816966643e-05,
      "loss": 3.3071,
      "step": 649
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.3675397634506226,
      "learning_rate": 1.9999809282551613e-05,
      "loss": 3.2694,
      "step": 650
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9921035766601562,
      "learning_rate": 1.9999806731184188e-05,
      "loss": 3.2863,
      "step": 651
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0774097442626953,
      "learning_rate": 1.9999804162864373e-05,
      "loss": 3.2339,
      "step": 652
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1731749773025513,
      "learning_rate": 1.9999801577592177e-05,
      "loss": 3.1479,
      "step": 653
    },
    {
      "epoch": 0.01,
      "grad_norm": 2.2828619480133057,
      "learning_rate": 1.99997989753676e-05,
      "loss": 3.3014,
      "step": 654
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.013906717300415,
      "learning_rate": 1.999979635619065e-05,
      "loss": 3.0598,
      "step": 655
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8976019024848938,
      "learning_rate": 1.9999793720061328e-05,
      "loss": 3.2067,
      "step": 656
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.4842193126678467,
      "learning_rate": 1.999979106697964e-05,
      "loss": 3.313,
      "step": 657
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.141160488128662,
      "learning_rate": 1.999978839694559e-05,
      "loss": 3.2294,
      "step": 658
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0246342420578003,
      "learning_rate": 1.999978570995918e-05,
      "loss": 2.9725,
      "step": 659
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.164686918258667,
      "learning_rate": 1.999978300602042e-05,
      "loss": 3.238,
      "step": 660
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.046270728111267,
      "learning_rate": 1.9999780285129315e-05,
      "loss": 3.1885,
      "step": 661
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0866631269454956,
      "learning_rate": 1.9999777547285858e-05,
      "loss": 3.2071,
      "step": 662
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0068321228027344,
      "learning_rate": 1.999977479249007e-05,
      "loss": 3.2825,
      "step": 663
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0435471534729004,
      "learning_rate": 1.9999772020741944e-05,
      "loss": 3.1334,
      "step": 664
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0627686977386475,
      "learning_rate": 1.999976923204149e-05,
      "loss": 3.1973,
      "step": 665
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.084165334701538,
      "learning_rate": 1.9999766426388713e-05,
      "loss": 3.1185,
      "step": 666
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1331273317337036,
      "learning_rate": 1.9999763603783614e-05,
      "loss": 3.2842,
      "step": 667
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.047084093093872,
      "learning_rate": 1.99997607642262e-05,
      "loss": 3.1179,
      "step": 668
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.091280221939087,
      "learning_rate": 1.999975790771647e-05,
      "loss": 3.3016,
      "step": 669
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.2936543226242065,
      "learning_rate": 1.999975503425444e-05,
      "loss": 3.2676,
      "step": 670
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.035278081893921,
      "learning_rate": 1.9999752143840113e-05,
      "loss": 3.2416,
      "step": 671
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0383696556091309,
      "learning_rate": 1.9999749236473484e-05,
      "loss": 3.2483,
      "step": 672
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9916897416114807,
      "learning_rate": 1.999974631215457e-05,
      "loss": 3.0668,
      "step": 673
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.087020993232727,
      "learning_rate": 1.9999743370883363e-05,
      "loss": 3.1491,
      "step": 674
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1170731782913208,
      "learning_rate": 1.999974041265988e-05,
      "loss": 3.1092,
      "step": 675
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.156657099723816,
      "learning_rate": 1.999973743748412e-05,
      "loss": 3.124,
      "step": 676
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1065747737884521,
      "learning_rate": 1.9999734445356087e-05,
      "loss": 3.0942,
      "step": 677
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.091972827911377,
      "learning_rate": 1.999973143627579e-05,
      "loss": 3.1236,
      "step": 678
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1447196006774902,
      "learning_rate": 1.9999728410243234e-05,
      "loss": 3.0617,
      "step": 679
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.045579433441162,
      "learning_rate": 1.999972536725842e-05,
      "loss": 3.2783,
      "step": 680
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0152943134307861,
      "learning_rate": 1.999972230732136e-05,
      "loss": 3.1109,
      "step": 681
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0348814725875854,
      "learning_rate": 1.999971923043205e-05,
      "loss": 3.1887,
      "step": 682
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0190691947937012,
      "learning_rate": 1.9999716136590498e-05,
      "loss": 3.027,
      "step": 683
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0831905603408813,
      "learning_rate": 1.9999713025796717e-05,
      "loss": 3.2599,
      "step": 684
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0086432695388794,
      "learning_rate": 1.9999709898050703e-05,
      "loss": 3.1559,
      "step": 685
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.066694974899292,
      "learning_rate": 1.999970675335247e-05,
      "loss": 3.0911,
      "step": 686
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.012952446937561,
      "learning_rate": 1.999970359170201e-05,
      "loss": 3.3999,
      "step": 687
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9881135821342468,
      "learning_rate": 1.999970041309934e-05,
      "loss": 3.0141,
      "step": 688
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0535900592803955,
      "learning_rate": 1.9999697217544464e-05,
      "loss": 3.1195,
      "step": 689
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1084853410720825,
      "learning_rate": 1.9999694005037384e-05,
      "loss": 3.212,
      "step": 690
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0511837005615234,
      "learning_rate": 1.9999690775578105e-05,
      "loss": 3.1908,
      "step": 691
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1158353090286255,
      "learning_rate": 1.9999687529166638e-05,
      "loss": 3.0226,
      "step": 692
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9728719592094421,
      "learning_rate": 1.9999684265802985e-05,
      "loss": 3.1472,
      "step": 693
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1255990266799927,
      "learning_rate": 1.9999680985487147e-05,
      "loss": 3.0651,
      "step": 694
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0154026746749878,
      "learning_rate": 1.9999677688219136e-05,
      "loss": 3.4195,
      "step": 695
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1079884767532349,
      "learning_rate": 1.9999674373998954e-05,
      "loss": 3.074,
      "step": 696
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.2181472778320312,
      "learning_rate": 1.999967104282661e-05,
      "loss": 3.2411,
      "step": 697
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0310230255126953,
      "learning_rate": 1.999966769470211e-05,
      "loss": 2.9268,
      "step": 698
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.002172589302063,
      "learning_rate": 1.9999664329625452e-05,
      "loss": 3.0703,
      "step": 699
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9950087666511536,
      "learning_rate": 1.9999660947596647e-05,
      "loss": 3.056,
      "step": 700
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0166993141174316,
      "learning_rate": 1.9999657548615708e-05,
      "loss": 3.1863,
      "step": 701
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1089979410171509,
      "learning_rate": 1.9999654132682627e-05,
      "loss": 3.1128,
      "step": 702
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0699198246002197,
      "learning_rate": 1.999965069979742e-05,
      "loss": 3.1915,
      "step": 703
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0835212469100952,
      "learning_rate": 1.9999647249960086e-05,
      "loss": 3.4351,
      "step": 704
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0230737924575806,
      "learning_rate": 1.9999643783170636e-05,
      "loss": 3.2921,
      "step": 705
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0335521697998047,
      "learning_rate": 1.9999640299429074e-05,
      "loss": 3.2668,
      "step": 706
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.127454161643982,
      "learning_rate": 1.9999636798735406e-05,
      "loss": 3.2289,
      "step": 707
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1327290534973145,
      "learning_rate": 1.9999633281089636e-05,
      "loss": 3.2948,
      "step": 708
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.043753743171692,
      "learning_rate": 1.9999629746491773e-05,
      "loss": 3.0511,
      "step": 709
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0452519655227661,
      "learning_rate": 1.9999626194941823e-05,
      "loss": 3.0479,
      "step": 710
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0872607231140137,
      "learning_rate": 1.999962262643979e-05,
      "loss": 3.0245,
      "step": 711
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0052921772003174,
      "learning_rate": 1.9999619040985677e-05,
      "loss": 3.1917,
      "step": 712
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.2169442176818848,
      "learning_rate": 1.99996154385795e-05,
      "loss": 3.3243,
      "step": 713
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1536070108413696,
      "learning_rate": 1.9999611819221255e-05,
      "loss": 3.2018,
      "step": 714
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.011647343635559,
      "learning_rate": 1.9999608182910954e-05,
      "loss": 3.3003,
      "step": 715
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1960150003433228,
      "learning_rate": 1.99996045296486e-05,
      "loss": 3.1585,
      "step": 716
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1061497926712036,
      "learning_rate": 1.9999600859434198e-05,
      "loss": 3.4127,
      "step": 717
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0174319744110107,
      "learning_rate": 1.999959717226776e-05,
      "loss": 3.0548,
      "step": 718
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.045460820198059,
      "learning_rate": 1.999959346814929e-05,
      "loss": 3.092,
      "step": 719
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9723655581474304,
      "learning_rate": 1.9999589747078788e-05,
      "loss": 3.2563,
      "step": 720
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9578402042388916,
      "learning_rate": 1.9999586009056272e-05,
      "loss": 3.2876,
      "step": 721
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1418770551681519,
      "learning_rate": 1.999958225408174e-05,
      "loss": 3.2541,
      "step": 722
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0234333276748657,
      "learning_rate": 1.9999578482155196e-05,
      "loss": 2.9924,
      "step": 723
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9806786179542542,
      "learning_rate": 1.9999574693276657e-05,
      "loss": 3.2373,
      "step": 724
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.085862159729004,
      "learning_rate": 1.9999570887446118e-05,
      "loss": 3.409,
      "step": 725
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.2696027755737305,
      "learning_rate": 1.9999567064663593e-05,
      "loss": 3.2683,
      "step": 726
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0275641679763794,
      "learning_rate": 1.9999563224929087e-05,
      "loss": 3.2101,
      "step": 727
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.119676113128662,
      "learning_rate": 1.9999559368242602e-05,
      "loss": 3.3613,
      "step": 728
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.145599365234375,
      "learning_rate": 1.9999555494604152e-05,
      "loss": 3.0048,
      "step": 729
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.068324327468872,
      "learning_rate": 1.9999551604013736e-05,
      "loss": 3.2387,
      "step": 730
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0058895349502563,
      "learning_rate": 1.9999547696471366e-05,
      "loss": 3.2084,
      "step": 731
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.087689757347107,
      "learning_rate": 1.9999543771977044e-05,
      "loss": 3.0763,
      "step": 732
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.097083330154419,
      "learning_rate": 1.9999539830530784e-05,
      "loss": 3.3294,
      "step": 733
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0473312139511108,
      "learning_rate": 1.9999535872132586e-05,
      "loss": 3.1883,
      "step": 734
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0746433734893799,
      "learning_rate": 1.9999531896782457e-05,
      "loss": 3.3163,
      "step": 735
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.997627854347229,
      "learning_rate": 1.999952790448041e-05,
      "loss": 2.9839,
      "step": 736
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.996619462966919,
      "learning_rate": 1.9999523895226445e-05,
      "loss": 3.139,
      "step": 737
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9734403491020203,
      "learning_rate": 1.999951986902057e-05,
      "loss": 3.3516,
      "step": 738
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.029418706893921,
      "learning_rate": 1.9999515825862794e-05,
      "loss": 3.0458,
      "step": 739
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1283535957336426,
      "learning_rate": 1.9999511765753123e-05,
      "loss": 3.2091,
      "step": 740
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0963948965072632,
      "learning_rate": 1.9999507688691566e-05,
      "loss": 3.1561,
      "step": 741
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9968599081039429,
      "learning_rate": 1.999950359467812e-05,
      "loss": 2.9972,
      "step": 742
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0523761510849,
      "learning_rate": 1.999949948371281e-05,
      "loss": 3.0072,
      "step": 743
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0525438785552979,
      "learning_rate": 1.9999495355795627e-05,
      "loss": 3.1929,
      "step": 744
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.240771770477295,
      "learning_rate": 1.9999491210926582e-05,
      "loss": 3.311,
      "step": 745
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9531870484352112,
      "learning_rate": 1.9999487049105686e-05,
      "loss": 3.1124,
      "step": 746
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.2000648975372314,
      "learning_rate": 1.9999482870332945e-05,
      "loss": 3.2944,
      "step": 747
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0814357995986938,
      "learning_rate": 1.9999478674608363e-05,
      "loss": 3.0457,
      "step": 748
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0234920978546143,
      "learning_rate": 1.9999474461931948e-05,
      "loss": 3.2032,
      "step": 749
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.029374122619629,
      "learning_rate": 1.9999470232303707e-05,
      "loss": 3.2403,
      "step": 750
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.113174557685852,
      "learning_rate": 1.999946598572365e-05,
      "loss": 3.0675,
      "step": 751
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0064685344696045,
      "learning_rate": 1.999946172219178e-05,
      "loss": 3.3013,
      "step": 752
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0686792135238647,
      "learning_rate": 1.9999457441708112e-05,
      "loss": 3.1323,
      "step": 753
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9999473094940186,
      "learning_rate": 1.9999453144272644e-05,
      "loss": 3.2899,
      "step": 754
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1043342351913452,
      "learning_rate": 1.9999448829885387e-05,
      "loss": 2.9958,
      "step": 755
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.127708077430725,
      "learning_rate": 1.999944449854635e-05,
      "loss": 2.9845,
      "step": 756
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.013487458229065,
      "learning_rate": 1.9999440150255537e-05,
      "loss": 2.9731,
      "step": 757
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.054702877998352,
      "learning_rate": 1.999943578501296e-05,
      "loss": 3.1173,
      "step": 758
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.081831932067871,
      "learning_rate": 1.999943140281862e-05,
      "loss": 3.325,
      "step": 759
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.05030357837677,
      "learning_rate": 1.999942700367253e-05,
      "loss": 3.1074,
      "step": 760
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.106073260307312,
      "learning_rate": 1.9999422587574694e-05,
      "loss": 3.1991,
      "step": 761
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0685596466064453,
      "learning_rate": 1.999941815452512e-05,
      "loss": 3.1449,
      "step": 762
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9924586415290833,
      "learning_rate": 1.999941370452382e-05,
      "loss": 3.0158,
      "step": 763
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0630189180374146,
      "learning_rate": 1.99994092375708e-05,
      "loss": 3.4044,
      "step": 764
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0867211818695068,
      "learning_rate": 1.999940475366606e-05,
      "loss": 3.347,
      "step": 765
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0051946640014648,
      "learning_rate": 1.9999400252809614e-05,
      "loss": 3.1418,
      "step": 766
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0404176712036133,
      "learning_rate": 1.999939573500147e-05,
      "loss": 3.1192,
      "step": 767
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0750337839126587,
      "learning_rate": 1.999939120024163e-05,
      "loss": 2.9981,
      "step": 768
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0580825805664062,
      "learning_rate": 1.9999386648530114e-05,
      "loss": 3.3083,
      "step": 769
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0019561052322388,
      "learning_rate": 1.9999382079866917e-05,
      "loss": 3.0852,
      "step": 770
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0631123781204224,
      "learning_rate": 1.9999377494252053e-05,
      "loss": 2.964,
      "step": 771
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.012953519821167,
      "learning_rate": 1.999937289168553e-05,
      "loss": 2.9653,
      "step": 772
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0415050983428955,
      "learning_rate": 1.999936827216735e-05,
      "loss": 3.2292,
      "step": 773
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.075244426727295,
      "learning_rate": 1.9999363635697525e-05,
      "loss": 3.1982,
      "step": 774
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0457950830459595,
      "learning_rate": 1.9999358982276067e-05,
      "loss": 3.1493,
      "step": 775
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9632299542427063,
      "learning_rate": 1.9999354311902977e-05,
      "loss": 3.1289,
      "step": 776
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9733619689941406,
      "learning_rate": 1.9999349624578267e-05,
      "loss": 3.1784,
      "step": 777
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.3526334762573242,
      "learning_rate": 1.999934492030194e-05,
      "loss": 3.0358,
      "step": 778
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9875199198722839,
      "learning_rate": 1.9999340199074013e-05,
      "loss": 3.1378,
      "step": 779
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1030664443969727,
      "learning_rate": 1.999933546089449e-05,
      "loss": 3.1557,
      "step": 780
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.110348105430603,
      "learning_rate": 1.9999330705763368e-05,
      "loss": 3.1312,
      "step": 781
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0914689302444458,
      "learning_rate": 1.9999325933680672e-05,
      "loss": 3.3995,
      "step": 782
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9957778453826904,
      "learning_rate": 1.99993211446464e-05,
      "loss": 3.1872,
      "step": 783
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9999958276748657,
      "learning_rate": 1.9999316338660564e-05,
      "loss": 3.2151,
      "step": 784
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0259565114974976,
      "learning_rate": 1.9999311515723173e-05,
      "loss": 3.0721,
      "step": 785
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9353435039520264,
      "learning_rate": 1.999930667583423e-05,
      "loss": 3.0773,
      "step": 786
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9530938267707825,
      "learning_rate": 1.9999301818993747e-05,
      "loss": 3.0476,
      "step": 787
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9848134517669678,
      "learning_rate": 1.9999296945201734e-05,
      "loss": 3.1466,
      "step": 788
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0199733972549438,
      "learning_rate": 1.9999292054458196e-05,
      "loss": 3.0521,
      "step": 789
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0415960550308228,
      "learning_rate": 1.999928714676314e-05,
      "loss": 3.1217,
      "step": 790
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0888910293579102,
      "learning_rate": 1.999928222211658e-05,
      "loss": 3.014,
      "step": 791
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9574097394943237,
      "learning_rate": 1.9999277280518515e-05,
      "loss": 2.9564,
      "step": 792
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.219873070716858,
      "learning_rate": 1.9999272321968967e-05,
      "loss": 3.2313,
      "step": 793
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1316627264022827,
      "learning_rate": 1.999926734646793e-05,
      "loss": 2.9145,
      "step": 794
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.097689151763916,
      "learning_rate": 1.9999262354015424e-05,
      "loss": 3.1844,
      "step": 795
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0918803215026855,
      "learning_rate": 1.999925734461145e-05,
      "loss": 3.1341,
      "step": 796
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.019296407699585,
      "learning_rate": 1.9999252318256017e-05,
      "loss": 3.0551,
      "step": 797
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0578861236572266,
      "learning_rate": 1.999924727494914e-05,
      "loss": 3.1418,
      "step": 798
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9975650906562805,
      "learning_rate": 1.999924221469082e-05,
      "loss": 3.0727,
      "step": 799
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.968525767326355,
      "learning_rate": 1.999923713748107e-05,
      "loss": 3.0205,
      "step": 800
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9585459232330322,
      "learning_rate": 1.9999232043319895e-05,
      "loss": 3.2974,
      "step": 801
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0550745725631714,
      "learning_rate": 1.999922693220731e-05,
      "loss": 3.3901,
      "step": 802
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0038493871688843,
      "learning_rate": 1.999922180414332e-05,
      "loss": 3.1784,
      "step": 803
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0357584953308105,
      "learning_rate": 1.9999216659127928e-05,
      "loss": 3.1776,
      "step": 804
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1534866094589233,
      "learning_rate": 1.999921149716115e-05,
      "loss": 3.1713,
      "step": 805
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0014898777008057,
      "learning_rate": 1.9999206318242994e-05,
      "loss": 3.0527,
      "step": 806
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0488214492797852,
      "learning_rate": 1.9999201122373465e-05,
      "loss": 3.0102,
      "step": 807
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0452767610549927,
      "learning_rate": 1.999919590955258e-05,
      "loss": 3.2423,
      "step": 808
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1253819465637207,
      "learning_rate": 1.9999190679780338e-05,
      "loss": 3.2028,
      "step": 809
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0848350524902344,
      "learning_rate": 1.9999185433056754e-05,
      "loss": 3.3136,
      "step": 810
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0288218259811401,
      "learning_rate": 1.999918016938183e-05,
      "loss": 3.273,
      "step": 811
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0790340900421143,
      "learning_rate": 1.9999174888755587e-05,
      "loss": 3.1152,
      "step": 812
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.062048077583313,
      "learning_rate": 1.999916959117802e-05,
      "loss": 2.8146,
      "step": 813
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0196423530578613,
      "learning_rate": 1.999916427664915e-05,
      "loss": 2.9367,
      "step": 814
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.074269413948059,
      "learning_rate": 1.999915894516898e-05,
      "loss": 2.9399,
      "step": 815
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.071152687072754,
      "learning_rate": 1.9999153596737518e-05,
      "loss": 3.1402,
      "step": 816
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0123546123504639,
      "learning_rate": 1.9999148231354775e-05,
      "loss": 3.2035,
      "step": 817
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0399994850158691,
      "learning_rate": 1.9999142849020762e-05,
      "loss": 3.082,
      "step": 818
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9657137989997864,
      "learning_rate": 1.9999137449735485e-05,
      "loss": 2.6812,
      "step": 819
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0127472877502441,
      "learning_rate": 1.9999132033498954e-05,
      "loss": 3.3011,
      "step": 820
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0514609813690186,
      "learning_rate": 1.999912660031118e-05,
      "loss": 3.1889,
      "step": 821
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9774591326713562,
      "learning_rate": 1.9999121150172166e-05,
      "loss": 3.218,
      "step": 822
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.105299949645996,
      "learning_rate": 1.9999115683081932e-05,
      "loss": 3.0536,
      "step": 823
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9871382713317871,
      "learning_rate": 1.9999110199040478e-05,
      "loss": 3.1451,
      "step": 824
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0164860486984253,
      "learning_rate": 1.9999104698047818e-05,
      "loss": 2.8478,
      "step": 825
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0730077028274536,
      "learning_rate": 1.9999099180103955e-05,
      "loss": 2.875,
      "step": 826
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0586016178131104,
      "learning_rate": 1.999909364520891e-05,
      "loss": 3.2903,
      "step": 827
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0240336656570435,
      "learning_rate": 1.9999088093362682e-05,
      "loss": 3.2131,
      "step": 828
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.131431221961975,
      "learning_rate": 1.9999082524565282e-05,
      "loss": 3.031,
      "step": 829
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9808217287063599,
      "learning_rate": 1.9999076938816727e-05,
      "loss": 3.1685,
      "step": 830
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9978897571563721,
      "learning_rate": 1.9999071336117016e-05,
      "loss": 3.2465,
      "step": 831
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0984476804733276,
      "learning_rate": 1.9999065716466167e-05,
      "loss": 3.3265,
      "step": 832
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.989495038986206,
      "learning_rate": 1.9999060079864182e-05,
      "loss": 3.1452,
      "step": 833
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0807362794876099,
      "learning_rate": 1.9999054426311076e-05,
      "loss": 3.0966,
      "step": 834
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1280035972595215,
      "learning_rate": 1.999904875580686e-05,
      "loss": 3.0197,
      "step": 835
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.199709177017212,
      "learning_rate": 1.9999043068351538e-05,
      "loss": 2.9553,
      "step": 836
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1124334335327148,
      "learning_rate": 1.999903736394512e-05,
      "loss": 3.054,
      "step": 837
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9799936413764954,
      "learning_rate": 1.999903164258762e-05,
      "loss": 3.1409,
      "step": 838
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0560842752456665,
      "learning_rate": 1.9999025904279047e-05,
      "loss": 3.1497,
      "step": 839
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9796755313873291,
      "learning_rate": 1.9999020149019412e-05,
      "loss": 3.1218,
      "step": 840
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0646061897277832,
      "learning_rate": 1.9999014376808716e-05,
      "loss": 3.099,
      "step": 841
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1293495893478394,
      "learning_rate": 1.999900858764698e-05,
      "loss": 3.0704,
      "step": 842
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1241785287857056,
      "learning_rate": 1.9999002781534205e-05,
      "loss": 3.0529,
      "step": 843
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.123936414718628,
      "learning_rate": 1.9998996958470406e-05,
      "loss": 2.8113,
      "step": 844
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1695793867111206,
      "learning_rate": 1.999899111845559e-05,
      "loss": 3.1405,
      "step": 845
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0754872560501099,
      "learning_rate": 1.999898526148977e-05,
      "loss": 2.9694,
      "step": 846
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0474941730499268,
      "learning_rate": 1.9998979387572954e-05,
      "loss": 3.111,
      "step": 847
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9941449165344238,
      "learning_rate": 1.999897349670515e-05,
      "loss": 3.0259,
      "step": 848
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9725133180618286,
      "learning_rate": 1.9998967588886374e-05,
      "loss": 3.2122,
      "step": 849
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9824245572090149,
      "learning_rate": 1.9998961664116632e-05,
      "loss": 3.0915,
      "step": 850
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.993998646736145,
      "learning_rate": 1.9998955722395935e-05,
      "loss": 3.0253,
      "step": 851
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.2694662809371948,
      "learning_rate": 1.999894976372429e-05,
      "loss": 3.2393,
      "step": 852
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.065221905708313,
      "learning_rate": 1.9998943788101708e-05,
      "loss": 3.0304,
      "step": 853
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1427961587905884,
      "learning_rate": 1.9998937795528205e-05,
      "loss": 3.0605,
      "step": 854
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.035929560661316,
      "learning_rate": 1.9998931786003784e-05,
      "loss": 2.9488,
      "step": 855
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0564244985580444,
      "learning_rate": 1.9998925759528455e-05,
      "loss": 2.9922,
      "step": 856
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1052207946777344,
      "learning_rate": 1.9998919716102238e-05,
      "loss": 3.1557,
      "step": 857
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1231622695922852,
      "learning_rate": 1.9998913655725133e-05,
      "loss": 2.9679,
      "step": 858
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0772508382797241,
      "learning_rate": 1.9998907578397155e-05,
      "loss": 2.9896,
      "step": 859
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0578254461288452,
      "learning_rate": 1.999890148411831e-05,
      "loss": 3.1562,
      "step": 860
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0543938875198364,
      "learning_rate": 1.9998895372888612e-05,
      "loss": 3.0572,
      "step": 861
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1150339841842651,
      "learning_rate": 1.9998889244708072e-05,
      "loss": 3.198,
      "step": 862
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.4759308099746704,
      "learning_rate": 1.9998883099576698e-05,
      "loss": 3.3979,
      "step": 863
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0597894191741943,
      "learning_rate": 1.99988769374945e-05,
      "loss": 3.1409,
      "step": 864
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0274927616119385,
      "learning_rate": 1.9998870758461494e-05,
      "loss": 3.2428,
      "step": 865
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1657627820968628,
      "learning_rate": 1.9998864562477684e-05,
      "loss": 2.9295,
      "step": 866
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1455183029174805,
      "learning_rate": 1.9998858349543085e-05,
      "loss": 3.1528,
      "step": 867
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0377023220062256,
      "learning_rate": 1.9998852119657703e-05,
      "loss": 3.0227,
      "step": 868
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9950929284095764,
      "learning_rate": 1.9998845872821552e-05,
      "loss": 3.1391,
      "step": 869
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0403600931167603,
      "learning_rate": 1.9998839609034642e-05,
      "loss": 2.9875,
      "step": 870
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0606958866119385,
      "learning_rate": 1.9998833328296983e-05,
      "loss": 3.1479,
      "step": 871
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9720853567123413,
      "learning_rate": 1.9998827030608585e-05,
      "loss": 3.0621,
      "step": 872
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.09648597240448,
      "learning_rate": 1.999882071596946e-05,
      "loss": 3.228,
      "step": 873
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9491646885871887,
      "learning_rate": 1.9998814384379615e-05,
      "loss": 3.0685,
      "step": 874
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9981906414031982,
      "learning_rate": 1.999880803583907e-05,
      "loss": 2.9583,
      "step": 875
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.102733850479126,
      "learning_rate": 1.9998801670347826e-05,
      "loss": 3.1059,
      "step": 876
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0480657815933228,
      "learning_rate": 1.9998795287905898e-05,
      "loss": 3.0168,
      "step": 877
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9706751704216003,
      "learning_rate": 1.99987888885133e-05,
      "loss": 3.1535,
      "step": 878
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0096923112869263,
      "learning_rate": 1.9998782472170032e-05,
      "loss": 3.078,
      "step": 879
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1365385055541992,
      "learning_rate": 1.999877603887612e-05,
      "loss": 2.9824,
      "step": 880
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9797784686088562,
      "learning_rate": 1.999876958863156e-05,
      "loss": 3.0677,
      "step": 881
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9815528392791748,
      "learning_rate": 1.9998763121436374e-05,
      "loss": 2.9423,
      "step": 882
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0344301462173462,
      "learning_rate": 1.9998756637290566e-05,
      "loss": 3.1373,
      "step": 883
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.938645601272583,
      "learning_rate": 1.9998750136194155e-05,
      "loss": 2.8555,
      "step": 884
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9533089995384216,
      "learning_rate": 1.999874361814714e-05,
      "loss": 3.3437,
      "step": 885
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9755212068557739,
      "learning_rate": 1.9998737083149545e-05,
      "loss": 3.1386,
      "step": 886
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9770657420158386,
      "learning_rate": 1.9998730531201372e-05,
      "loss": 3.1297,
      "step": 887
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9919024705886841,
      "learning_rate": 1.9998723962302637e-05,
      "loss": 3.2915,
      "step": 888
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1359142065048218,
      "learning_rate": 1.9998717376453346e-05,
      "loss": 3.2807,
      "step": 889
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0236159563064575,
      "learning_rate": 1.9998710773653517e-05,
      "loss": 2.9235,
      "step": 890
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9844536185264587,
      "learning_rate": 1.9998704153903155e-05,
      "loss": 2.7594,
      "step": 891
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0131374597549438,
      "learning_rate": 1.9998697517202276e-05,
      "loss": 3.408,
      "step": 892
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0107240676879883,
      "learning_rate": 1.999869086355089e-05,
      "loss": 2.9963,
      "step": 893
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.995485246181488,
      "learning_rate": 1.9998684192949005e-05,
      "loss": 3.0053,
      "step": 894
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9991757869720459,
      "learning_rate": 1.9998677505396634e-05,
      "loss": 3.0326,
      "step": 895
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9663136005401611,
      "learning_rate": 1.999867080089379e-05,
      "loss": 3.2188,
      "step": 896
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0086252689361572,
      "learning_rate": 1.9998664079440483e-05,
      "loss": 3.0925,
      "step": 897
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.2122036218643188,
      "learning_rate": 1.9998657341036726e-05,
      "loss": 3.2296,
      "step": 898
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.2155429124832153,
      "learning_rate": 1.999865058568253e-05,
      "loss": 3.1625,
      "step": 899
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0298625230789185,
      "learning_rate": 1.9998643813377902e-05,
      "loss": 3.0535,
      "step": 900
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9818474054336548,
      "learning_rate": 1.999863702412286e-05,
      "loss": 3.187,
      "step": 901
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1612097024917603,
      "learning_rate": 1.999863021791741e-05,
      "loss": 3.1847,
      "step": 902
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.056449294090271,
      "learning_rate": 1.999862339476157e-05,
      "loss": 3.3241,
      "step": 903
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0618267059326172,
      "learning_rate": 1.9998616554655347e-05,
      "loss": 3.2622,
      "step": 904
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0239644050598145,
      "learning_rate": 1.999860969759875e-05,
      "loss": 3.289,
      "step": 905
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0209020376205444,
      "learning_rate": 1.9998602823591796e-05,
      "loss": 3.3375,
      "step": 906
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0252418518066406,
      "learning_rate": 1.9998595932634495e-05,
      "loss": 3.1272,
      "step": 907
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9600666165351868,
      "learning_rate": 1.999858902472686e-05,
      "loss": 3.1422,
      "step": 908
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0066412687301636,
      "learning_rate": 1.99985820998689e-05,
      "loss": 2.9859,
      "step": 909
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0506565570831299,
      "learning_rate": 1.9998575158060624e-05,
      "loss": 3.0529,
      "step": 910
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9152898788452148,
      "learning_rate": 1.999856819930205e-05,
      "loss": 3.0179,
      "step": 911
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.061489224433899,
      "learning_rate": 1.999856122359319e-05,
      "loss": 2.9662,
      "step": 912
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9449135065078735,
      "learning_rate": 1.999855423093405e-05,
      "loss": 3.1784,
      "step": 913
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1260766983032227,
      "learning_rate": 1.9998547221324648e-05,
      "loss": 3.1711,
      "step": 914
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0641288757324219,
      "learning_rate": 1.999854019476499e-05,
      "loss": 3.1782,
      "step": 915
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9510896801948547,
      "learning_rate": 1.9998533151255092e-05,
      "loss": 3.2776,
      "step": 916
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0585248470306396,
      "learning_rate": 1.9998526090794965e-05,
      "loss": 3.0682,
      "step": 917
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9522992968559265,
      "learning_rate": 1.999851901338462e-05,
      "loss": 2.9874,
      "step": 918
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9869092106819153,
      "learning_rate": 1.9998511919024068e-05,
      "loss": 3.0901,
      "step": 919
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0681264400482178,
      "learning_rate": 1.9998504807713325e-05,
      "loss": 3.1419,
      "step": 920
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.070059061050415,
      "learning_rate": 1.9998497679452402e-05,
      "loss": 3.1714,
      "step": 921
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0250158309936523,
      "learning_rate": 1.999849053424131e-05,
      "loss": 3.1688,
      "step": 922
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.998481035232544,
      "learning_rate": 1.9998483372080058e-05,
      "loss": 3.1019,
      "step": 923
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1057493686676025,
      "learning_rate": 1.999847619296866e-05,
      "loss": 3.0739,
      "step": 924
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9569240212440491,
      "learning_rate": 1.999846899690713e-05,
      "loss": 3.0738,
      "step": 925
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.065171718597412,
      "learning_rate": 1.9998461783895486e-05,
      "loss": 2.908,
      "step": 926
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1213998794555664,
      "learning_rate": 1.9998454553933726e-05,
      "loss": 3.0497,
      "step": 927
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0831985473632812,
      "learning_rate": 1.9998447307021874e-05,
      "loss": 3.2129,
      "step": 928
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.04285728931427,
      "learning_rate": 1.9998440043159938e-05,
      "loss": 3.1506,
      "step": 929
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.023958683013916,
      "learning_rate": 1.9998432762347927e-05,
      "loss": 2.9848,
      "step": 930
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1389471292495728,
      "learning_rate": 1.9998425464585862e-05,
      "loss": 2.9191,
      "step": 931
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0419585704803467,
      "learning_rate": 1.9998418149873746e-05,
      "loss": 2.9287,
      "step": 932
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.150151252746582,
      "learning_rate": 1.9998410818211596e-05,
      "loss": 3.0218,
      "step": 933
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0700383186340332,
      "learning_rate": 1.999840346959943e-05,
      "loss": 3.1991,
      "step": 934
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.01340913772583,
      "learning_rate": 1.999839610403725e-05,
      "loss": 3.2167,
      "step": 935
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0401850938796997,
      "learning_rate": 1.9998388721525072e-05,
      "loss": 3.1436,
      "step": 936
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9867053031921387,
      "learning_rate": 1.9998381322062912e-05,
      "loss": 3.0502,
      "step": 937
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9582052826881409,
      "learning_rate": 1.999837390565078e-05,
      "loss": 3.1255,
      "step": 938
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0463893413543701,
      "learning_rate": 1.9998366472288687e-05,
      "loss": 3.0312,
      "step": 939
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0714845657348633,
      "learning_rate": 1.999835902197665e-05,
      "loss": 3.226,
      "step": 940
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9688833951950073,
      "learning_rate": 1.9998351554714675e-05,
      "loss": 3.182,
      "step": 941
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0801643133163452,
      "learning_rate": 1.999834407050278e-05,
      "loss": 3.1719,
      "step": 942
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9893796443939209,
      "learning_rate": 1.9998336569340978e-05,
      "loss": 3.1574,
      "step": 943
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.02359938621521,
      "learning_rate": 1.999832905122928e-05,
      "loss": 3.0986,
      "step": 944
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0436512231826782,
      "learning_rate": 1.9998321516167697e-05,
      "loss": 3.2444,
      "step": 945
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.12213134765625,
      "learning_rate": 1.9998313964156245e-05,
      "loss": 3.1825,
      "step": 946
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0384093523025513,
      "learning_rate": 1.9998306395194932e-05,
      "loss": 3.0949,
      "step": 947
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0173896551132202,
      "learning_rate": 1.999829880928378e-05,
      "loss": 3.0838,
      "step": 948
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0076509714126587,
      "learning_rate": 1.999829120642279e-05,
      "loss": 3.0632,
      "step": 949
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.147940754890442,
      "learning_rate": 1.9998283586611985e-05,
      "loss": 2.9556,
      "step": 950
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0102530717849731,
      "learning_rate": 1.9998275949851373e-05,
      "loss": 3.0805,
      "step": 951
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0244728326797485,
      "learning_rate": 1.9998268296140967e-05,
      "loss": 2.9555,
      "step": 952
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0225794315338135,
      "learning_rate": 1.9998260625480782e-05,
      "loss": 3.0697,
      "step": 953
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0003045797348022,
      "learning_rate": 1.9998252937870827e-05,
      "loss": 2.9654,
      "step": 954
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9526498913764954,
      "learning_rate": 1.9998245233311123e-05,
      "loss": 2.8704,
      "step": 955
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0825307369232178,
      "learning_rate": 1.9998237511801673e-05,
      "loss": 2.7542,
      "step": 956
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.046280026435852,
      "learning_rate": 1.9998229773342497e-05,
      "loss": 3.1528,
      "step": 957
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9904586672782898,
      "learning_rate": 1.9998222017933603e-05,
      "loss": 3.1751,
      "step": 958
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9879342913627625,
      "learning_rate": 1.999821424557501e-05,
      "loss": 3.0847,
      "step": 959
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9452928304672241,
      "learning_rate": 1.999820645626673e-05,
      "loss": 3.0119,
      "step": 960
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0251743793487549,
      "learning_rate": 1.9998198650008773e-05,
      "loss": 3.3489,
      "step": 961
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9638100862503052,
      "learning_rate": 1.9998190826801152e-05,
      "loss": 2.9895,
      "step": 962
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9774160385131836,
      "learning_rate": 1.9998182986643884e-05,
      "loss": 3.0615,
      "step": 963
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.106619954109192,
      "learning_rate": 1.9998175129536982e-05,
      "loss": 3.0007,
      "step": 964
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0081579685211182,
      "learning_rate": 1.9998167255480455e-05,
      "loss": 2.8741,
      "step": 965
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0640190839767456,
      "learning_rate": 1.9998159364474315e-05,
      "loss": 3.0623,
      "step": 966
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0781960487365723,
      "learning_rate": 1.9998151456518585e-05,
      "loss": 3.0512,
      "step": 967
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9891348481178284,
      "learning_rate": 1.9998143531613275e-05,
      "loss": 2.8786,
      "step": 968
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0898860692977905,
      "learning_rate": 1.9998135589758392e-05,
      "loss": 2.9861,
      "step": 969
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9234252572059631,
      "learning_rate": 1.9998127630953953e-05,
      "loss": 2.9413,
      "step": 970
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0397675037384033,
      "learning_rate": 1.9998119655199978e-05,
      "loss": 3.1743,
      "step": 971
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9764729738235474,
      "learning_rate": 1.999811166249647e-05,
      "loss": 3.0237,
      "step": 972
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1814281940460205,
      "learning_rate": 1.9998103652843447e-05,
      "loss": 3.1876,
      "step": 973
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1550933122634888,
      "learning_rate": 1.999809562624092e-05,
      "loss": 2.9379,
      "step": 974
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0396941900253296,
      "learning_rate": 1.999808758268891e-05,
      "loss": 2.9703,
      "step": 975
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.09954035282135,
      "learning_rate": 1.9998079522187426e-05,
      "loss": 3.0828,
      "step": 976
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9824070334434509,
      "learning_rate": 1.9998071444736483e-05,
      "loss": 2.9559,
      "step": 977
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9607374668121338,
      "learning_rate": 1.9998063350336093e-05,
      "loss": 3.0212,
      "step": 978
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0776690244674683,
      "learning_rate": 1.9998055238986268e-05,
      "loss": 3.0905,
      "step": 979
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.050789475440979,
      "learning_rate": 1.9998047110687026e-05,
      "loss": 2.9722,
      "step": 980
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9717557430267334,
      "learning_rate": 1.9998038965438376e-05,
      "loss": 2.9707,
      "step": 981
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0492526292800903,
      "learning_rate": 1.9998030803240337e-05,
      "loss": 3.1067,
      "step": 982
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9739171862602234,
      "learning_rate": 1.9998022624092918e-05,
      "loss": 3.1528,
      "step": 983
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.027417540550232,
      "learning_rate": 1.999801442799614e-05,
      "loss": 3.0872,
      "step": 984
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9737182855606079,
      "learning_rate": 1.9998006214950006e-05,
      "loss": 2.9882,
      "step": 985
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0208163261413574,
      "learning_rate": 1.999799798495454e-05,
      "loss": 3.1325,
      "step": 986
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0675897598266602,
      "learning_rate": 1.9997989738009748e-05,
      "loss": 3.1466,
      "step": 987
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.099690556526184,
      "learning_rate": 1.9997981474115652e-05,
      "loss": 3.1724,
      "step": 988
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.039630651473999,
      "learning_rate": 1.999797319327226e-05,
      "loss": 3.2248,
      "step": 989
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0219818353652954,
      "learning_rate": 1.9997964895479592e-05,
      "loss": 3.1801,
      "step": 990
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9738742113113403,
      "learning_rate": 1.999795658073765e-05,
      "loss": 3.0354,
      "step": 991
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1436320543289185,
      "learning_rate": 1.9997948249046465e-05,
      "loss": 3.1806,
      "step": 992
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9352022409439087,
      "learning_rate": 1.999793990040604e-05,
      "loss": 3.0667,
      "step": 993
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9627606272697449,
      "learning_rate": 1.9997931534816385e-05,
      "loss": 3.1678,
      "step": 994
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0941979885101318,
      "learning_rate": 1.9997923152277527e-05,
      "loss": 3.0698,
      "step": 995
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9811708331108093,
      "learning_rate": 1.9997914752789475e-05,
      "loss": 3.0901,
      "step": 996
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1066480875015259,
      "learning_rate": 1.9997906336352237e-05,
      "loss": 3.1629,
      "step": 997
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9770456552505493,
      "learning_rate": 1.999789790296584e-05,
      "loss": 3.0623,
      "step": 998
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.940777063369751,
      "learning_rate": 1.999788945263028e-05,
      "loss": 3.1546,
      "step": 999
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0305061340332031,
      "learning_rate": 1.999788098534559e-05,
      "loss": 3.1134,
      "step": 1000
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0063836574554443,
      "learning_rate": 1.9997872501111773e-05,
      "loss": 3.0766,
      "step": 1001
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0578796863555908,
      "learning_rate": 1.9997863999928844e-05,
      "loss": 3.0812,
      "step": 1002
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0724766254425049,
      "learning_rate": 1.9997855481796827e-05,
      "loss": 2.9987,
      "step": 1003
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9653846621513367,
      "learning_rate": 1.9997846946715724e-05,
      "loss": 2.968,
      "step": 1004
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1871520280838013,
      "learning_rate": 1.999783839468556e-05,
      "loss": 2.8655,
      "step": 1005
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.011651635169983,
      "learning_rate": 1.999782982570634e-05,
      "loss": 3.087,
      "step": 1006
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1541516780853271,
      "learning_rate": 1.9997821239778084e-05,
      "loss": 2.9942,
      "step": 1007
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.991147518157959,
      "learning_rate": 1.999781263690081e-05,
      "loss": 3.1452,
      "step": 1008
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.015201449394226,
      "learning_rate": 1.9997804017074522e-05,
      "loss": 3.019,
      "step": 1009
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1943914890289307,
      "learning_rate": 1.9997795380299247e-05,
      "loss": 2.9533,
      "step": 1010
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.4281123876571655,
      "learning_rate": 1.9997786726574988e-05,
      "loss": 2.854,
      "step": 1011
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.024108648300171,
      "learning_rate": 1.999777805590177e-05,
      "loss": 3.1305,
      "step": 1012
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0617262125015259,
      "learning_rate": 1.9997769368279597e-05,
      "loss": 3.0953,
      "step": 1013
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0675746202468872,
      "learning_rate": 1.9997760663708493e-05,
      "loss": 3.293,
      "step": 1014
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.045405387878418,
      "learning_rate": 1.999775194218847e-05,
      "loss": 3.2038,
      "step": 1015
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0927971601486206,
      "learning_rate": 1.999774320371954e-05,
      "loss": 3.0062,
      "step": 1016
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.2305458784103394,
      "learning_rate": 1.999773444830172e-05,
      "loss": 3.261,
      "step": 1017
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.093327522277832,
      "learning_rate": 1.9997725675935028e-05,
      "loss": 3.0535,
      "step": 1018
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1485042572021484,
      "learning_rate": 1.9997716886619473e-05,
      "loss": 3.2071,
      "step": 1019
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.027761459350586,
      "learning_rate": 1.999770808035507e-05,
      "loss": 3.1349,
      "step": 1020
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.3450769186019897,
      "learning_rate": 1.999769925714184e-05,
      "loss": 3.2548,
      "step": 1021
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9526140093803406,
      "learning_rate": 1.9997690416979794e-05,
      "loss": 3.0122,
      "step": 1022
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9796904921531677,
      "learning_rate": 1.9997681559868945e-05,
      "loss": 2.9995,
      "step": 1023
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9850354790687561,
      "learning_rate": 1.9997672685809316e-05,
      "loss": 3.1858,
      "step": 1024
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.3556605577468872,
      "learning_rate": 1.9997663794800913e-05,
      "loss": 2.9288,
      "step": 1025
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1541306972503662,
      "learning_rate": 1.9997654886843754e-05,
      "loss": 3.1139,
      "step": 1026
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0426430702209473,
      "learning_rate": 1.9997645961937855e-05,
      "loss": 2.8302,
      "step": 1027
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.139782190322876,
      "learning_rate": 1.999763702008323e-05,
      "loss": 3.1629,
      "step": 1028
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9060243964195251,
      "learning_rate": 1.9997628061279896e-05,
      "loss": 2.8254,
      "step": 1029
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9912933707237244,
      "learning_rate": 1.999761908552787e-05,
      "loss": 3.2432,
      "step": 1030
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0835816860198975,
      "learning_rate": 1.999761009282716e-05,
      "loss": 3.0587,
      "step": 1031
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0742183923721313,
      "learning_rate": 1.9997601083177788e-05,
      "loss": 3.0726,
      "step": 1032
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.065148115158081,
      "learning_rate": 1.9997592056579766e-05,
      "loss": 3.2956,
      "step": 1033
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0072661638259888,
      "learning_rate": 1.999758301303311e-05,
      "loss": 3.2168,
      "step": 1034
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0469732284545898,
      "learning_rate": 1.9997573952537836e-05,
      "loss": 3.1985,
      "step": 1035
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0676658153533936,
      "learning_rate": 1.9997564875093954e-05,
      "loss": 3.108,
      "step": 1036
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.066182255744934,
      "learning_rate": 1.999755578070149e-05,
      "loss": 3.1924,
      "step": 1037
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0430457592010498,
      "learning_rate": 1.9997546669360453e-05,
      "loss": 3.173,
      "step": 1038
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9826711416244507,
      "learning_rate": 1.9997537541070858e-05,
      "loss": 2.8736,
      "step": 1039
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1086362600326538,
      "learning_rate": 1.9997528395832722e-05,
      "loss": 3.2114,
      "step": 1040
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9839752316474915,
      "learning_rate": 1.9997519233646058e-05,
      "loss": 3.1558,
      "step": 1041
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9992867708206177,
      "learning_rate": 1.9997510054510887e-05,
      "loss": 2.987,
      "step": 1042
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0505433082580566,
      "learning_rate": 1.999750085842722e-05,
      "loss": 2.9734,
      "step": 1043
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0971465110778809,
      "learning_rate": 1.9997491645395072e-05,
      "loss": 3.1155,
      "step": 1044
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9826428890228271,
      "learning_rate": 1.9997482415414465e-05,
      "loss": 3.101,
      "step": 1045
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.249294638633728,
      "learning_rate": 1.9997473168485406e-05,
      "loss": 3.0835,
      "step": 1046
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.054176688194275,
      "learning_rate": 1.9997463904607913e-05,
      "loss": 3.1482,
      "step": 1047
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0875811576843262,
      "learning_rate": 1.9997454623782005e-05,
      "loss": 3.0353,
      "step": 1048
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.013317346572876,
      "learning_rate": 1.99974453260077e-05,
      "loss": 2.9515,
      "step": 1049
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9654662013053894,
      "learning_rate": 1.9997436011285004e-05,
      "loss": 3.2187,
      "step": 1050
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9921106100082397,
      "learning_rate": 1.999742667961394e-05,
      "loss": 2.9863,
      "step": 1051
    },
    {
      "epoch": 0.01,
      "grad_norm": 2.2445647716522217,
      "learning_rate": 1.999741733099453e-05,
      "loss": 3.1067,
      "step": 1052
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9727924466133118,
      "learning_rate": 1.9997407965426773e-05,
      "loss": 3.1413,
      "step": 1053
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0921217203140259,
      "learning_rate": 1.9997398582910697e-05,
      "loss": 3.1311,
      "step": 1054
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9207566380500793,
      "learning_rate": 1.999738918344632e-05,
      "loss": 3.0877,
      "step": 1055
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0717092752456665,
      "learning_rate": 1.9997379767033645e-05,
      "loss": 3.1402,
      "step": 1056
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0371975898742676,
      "learning_rate": 1.9997370333672703e-05,
      "loss": 3.1061,
      "step": 1057
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.113745927810669,
      "learning_rate": 1.9997360883363498e-05,
      "loss": 3.0933,
      "step": 1058
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.958127498626709,
      "learning_rate": 1.9997351416106055e-05,
      "loss": 2.889,
      "step": 1059
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.025744080543518,
      "learning_rate": 1.9997341931900385e-05,
      "loss": 2.8596,
      "step": 1060
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0766186714172363,
      "learning_rate": 1.9997332430746503e-05,
      "loss": 3.17,
      "step": 1061
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.923754870891571,
      "learning_rate": 1.999732291264443e-05,
      "loss": 2.9605,
      "step": 1062
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0037710666656494,
      "learning_rate": 1.9997313377594177e-05,
      "loss": 3.0259,
      "step": 1063
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9522504806518555,
      "learning_rate": 1.9997303825595767e-05,
      "loss": 2.8244,
      "step": 1064
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0177645683288574,
      "learning_rate": 1.9997294256649207e-05,
      "loss": 2.9022,
      "step": 1065
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9660139679908752,
      "learning_rate": 1.999728467075452e-05,
      "loss": 2.9324,
      "step": 1066
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9800227880477905,
      "learning_rate": 1.9997275067911718e-05,
      "loss": 2.9552,
      "step": 1067
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0672346353530884,
      "learning_rate": 1.9997265448120822e-05,
      "loss": 2.8599,
      "step": 1068
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0038878917694092,
      "learning_rate": 1.9997255811381845e-05,
      "loss": 3.1886,
      "step": 1069
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0451711416244507,
      "learning_rate": 1.9997246157694805e-05,
      "loss": 3.0938,
      "step": 1070
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1221117973327637,
      "learning_rate": 1.9997236487059717e-05,
      "loss": 3.3953,
      "step": 1071
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1410133838653564,
      "learning_rate": 1.9997226799476598e-05,
      "loss": 2.9802,
      "step": 1072
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0748059749603271,
      "learning_rate": 1.9997217094945463e-05,
      "loss": 2.8947,
      "step": 1073
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1683905124664307,
      "learning_rate": 1.999720737346633e-05,
      "loss": 2.9612,
      "step": 1074
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0934419631958008,
      "learning_rate": 1.999719763503922e-05,
      "loss": 2.8549,
      "step": 1075
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.2503278255462646,
      "learning_rate": 1.999718787966414e-05,
      "loss": 2.8286,
      "step": 1076
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.065481424331665,
      "learning_rate": 1.999717810734111e-05,
      "loss": 2.8662,
      "step": 1077
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0161484479904175,
      "learning_rate": 1.9997168318070153e-05,
      "loss": 3.1061,
      "step": 1078
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0419963598251343,
      "learning_rate": 1.9997158511851277e-05,
      "loss": 2.9341,
      "step": 1079
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1366112232208252,
      "learning_rate": 1.99971486886845e-05,
      "loss": 3.1408,
      "step": 1080
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0028650760650635,
      "learning_rate": 1.9997138848569847e-05,
      "loss": 3.2064,
      "step": 1081
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.09965980052948,
      "learning_rate": 1.9997128991507324e-05,
      "loss": 3.1409,
      "step": 1082
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.009709358215332,
      "learning_rate": 1.9997119117496955e-05,
      "loss": 2.9673,
      "step": 1083
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.042051076889038,
      "learning_rate": 1.999710922653875e-05,
      "loss": 3.0274,
      "step": 1084
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9565120935440063,
      "learning_rate": 1.9997099318632737e-05,
      "loss": 3.1389,
      "step": 1085
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0874996185302734,
      "learning_rate": 1.9997089393778917e-05,
      "loss": 3.3148,
      "step": 1086
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0311527252197266,
      "learning_rate": 1.999707945197732e-05,
      "loss": 2.7566,
      "step": 1087
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0645763874053955,
      "learning_rate": 1.999706949322796e-05,
      "loss": 3.2312,
      "step": 1088
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0537675619125366,
      "learning_rate": 1.9997059517530846e-05,
      "loss": 2.8463,
      "step": 1089
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0171124935150146,
      "learning_rate": 1.9997049524886005e-05,
      "loss": 3.0902,
      "step": 1090
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9493039846420288,
      "learning_rate": 1.9997039515293447e-05,
      "loss": 3.1564,
      "step": 1091
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1669529676437378,
      "learning_rate": 1.9997029488753194e-05,
      "loss": 3.1063,
      "step": 1092
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.214972972869873,
      "learning_rate": 1.999701944526526e-05,
      "loss": 3.1946,
      "step": 1093
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0449987649917603,
      "learning_rate": 1.9997009384829663e-05,
      "loss": 3.0038,
      "step": 1094
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9882421493530273,
      "learning_rate": 1.9996999307446423e-05,
      "loss": 2.8069,
      "step": 1095
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9939326643943787,
      "learning_rate": 1.999698921311555e-05,
      "loss": 2.9152,
      "step": 1096
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9883946776390076,
      "learning_rate": 1.9996979101837065e-05,
      "loss": 2.8202,
      "step": 1097
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9772175550460815,
      "learning_rate": 1.999696897361099e-05,
      "loss": 2.8694,
      "step": 1098
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1232504844665527,
      "learning_rate": 1.9996958828437335e-05,
      "loss": 3.029,
      "step": 1099
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0320227146148682,
      "learning_rate": 1.999694866631612e-05,
      "loss": 3.0459,
      "step": 1100
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0262877941131592,
      "learning_rate": 1.999693848724736e-05,
      "loss": 2.9086,
      "step": 1101
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0768985748291016,
      "learning_rate": 1.9996928291231074e-05,
      "loss": 3.1058,
      "step": 1102
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.098297357559204,
      "learning_rate": 1.9996918078267282e-05,
      "loss": 2.9864,
      "step": 1103
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0751056671142578,
      "learning_rate": 1.9996907848355997e-05,
      "loss": 2.9513,
      "step": 1104
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.024755835533142,
      "learning_rate": 1.999689760149724e-05,
      "loss": 2.9831,
      "step": 1105
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.005277156829834,
      "learning_rate": 1.999688733769102e-05,
      "loss": 2.8443,
      "step": 1106
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.001954197883606,
      "learning_rate": 1.9996877056937367e-05,
      "loss": 3.1614,
      "step": 1107
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.860797107219696,
      "learning_rate": 1.999686675923629e-05,
      "loss": 3.0316,
      "step": 1108
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.051046371459961,
      "learning_rate": 1.9996856444587812e-05,
      "loss": 2.9674,
      "step": 1109
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9734130501747131,
      "learning_rate": 1.9996846112991943e-05,
      "loss": 3.0929,
      "step": 1110
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0069111585617065,
      "learning_rate": 1.999683576444871e-05,
      "loss": 3.0256,
      "step": 1111
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1582999229431152,
      "learning_rate": 1.999682539895812e-05,
      "loss": 3.0557,
      "step": 1112
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0440670251846313,
      "learning_rate": 1.9996815016520194e-05,
      "loss": 2.9895,
      "step": 1113
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.026105284690857,
      "learning_rate": 1.9996804617134957e-05,
      "loss": 3.0328,
      "step": 1114
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1102392673492432,
      "learning_rate": 1.999679420080242e-05,
      "loss": 3.2293,
      "step": 1115
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0251225233078003,
      "learning_rate": 1.99967837675226e-05,
      "loss": 3.111,
      "step": 1116
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9998620748519897,
      "learning_rate": 1.9996773317295517e-05,
      "loss": 3.0433,
      "step": 1117
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0706048011779785,
      "learning_rate": 1.999676285012119e-05,
      "loss": 3.0439,
      "step": 1118
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0649476051330566,
      "learning_rate": 1.9996752365999633e-05,
      "loss": 2.9449,
      "step": 1119
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0594552755355835,
      "learning_rate": 1.9996741864930865e-05,
      "loss": 3.0605,
      "step": 1120
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.199373483657837,
      "learning_rate": 1.9996731346914906e-05,
      "loss": 3.2853,
      "step": 1121
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.064073085784912,
      "learning_rate": 1.9996720811951773e-05,
      "loss": 3.1445,
      "step": 1122
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0311393737792969,
      "learning_rate": 1.999671026004148e-05,
      "loss": 3.0952,
      "step": 1123
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0988367795944214,
      "learning_rate": 1.999669969118405e-05,
      "loss": 3.1155,
      "step": 1124
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1144671440124512,
      "learning_rate": 1.99966891053795e-05,
      "loss": 2.834,
      "step": 1125
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9919388890266418,
      "learning_rate": 1.9996678502627844e-05,
      "loss": 3.2707,
      "step": 1126
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.039042592048645,
      "learning_rate": 1.9996667882929107e-05,
      "loss": 3.0296,
      "step": 1127
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0624892711639404,
      "learning_rate": 1.9996657246283297e-05,
      "loss": 3.3548,
      "step": 1128
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1598063707351685,
      "learning_rate": 1.9996646592690444e-05,
      "loss": 3.0346,
      "step": 1129
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0717613697052002,
      "learning_rate": 1.9996635922150556e-05,
      "loss": 2.9427,
      "step": 1130
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0429936647415161,
      "learning_rate": 1.9996625234663657e-05,
      "loss": 3.39,
      "step": 1131
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9804409742355347,
      "learning_rate": 1.9996614530229763e-05,
      "loss": 3.2238,
      "step": 1132
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0147219896316528,
      "learning_rate": 1.999660380884889e-05,
      "loss": 2.9105,
      "step": 1133
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9208223819732666,
      "learning_rate": 1.999659307052106e-05,
      "loss": 3.0247,
      "step": 1134
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9801596403121948,
      "learning_rate": 1.999658231524629e-05,
      "loss": 2.9315,
      "step": 1135
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1228547096252441,
      "learning_rate": 1.99965715430246e-05,
      "loss": 3.0993,
      "step": 1136
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0026198625564575,
      "learning_rate": 1.9996560753856004e-05,
      "loss": 3.0813,
      "step": 1137
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0369774103164673,
      "learning_rate": 1.9996549947740524e-05,
      "loss": 3.1039,
      "step": 1138
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.076582670211792,
      "learning_rate": 1.9996539124678175e-05,
      "loss": 3.0133,
      "step": 1139
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.037097454071045,
      "learning_rate": 1.999652828466898e-05,
      "loss": 2.8966,
      "step": 1140
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9640483260154724,
      "learning_rate": 1.999651742771295e-05,
      "loss": 2.999,
      "step": 1141
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9586038589477539,
      "learning_rate": 1.999650655381011e-05,
      "loss": 2.82,
      "step": 1142
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9364737868309021,
      "learning_rate": 1.9996495662960477e-05,
      "loss": 3.042,
      "step": 1143
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9798777103424072,
      "learning_rate": 1.999648475516407e-05,
      "loss": 3.2023,
      "step": 1144
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0888493061065674,
      "learning_rate": 1.9996473830420903e-05,
      "loss": 2.8561,
      "step": 1145
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9298384189605713,
      "learning_rate": 1.9996462888731e-05,
      "loss": 3.1548,
      "step": 1146
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1429853439331055,
      "learning_rate": 1.9996451930094377e-05,
      "loss": 3.051,
      "step": 1147
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0313395261764526,
      "learning_rate": 1.9996440954511054e-05,
      "loss": 3.0638,
      "step": 1148
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.059863805770874,
      "learning_rate": 1.9996429961981047e-05,
      "loss": 2.9568,
      "step": 1149
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.085802435874939,
      "learning_rate": 1.9996418952504378e-05,
      "loss": 3.1517,
      "step": 1150
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9905686974525452,
      "learning_rate": 1.9996407926081063e-05,
      "loss": 3.0314,
      "step": 1151
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0871938467025757,
      "learning_rate": 1.999639688271112e-05,
      "loss": 2.9765,
      "step": 1152
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0176968574523926,
      "learning_rate": 1.9996385822394574e-05,
      "loss": 2.9595,
      "step": 1153
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.987672746181488,
      "learning_rate": 1.9996374745131434e-05,
      "loss": 2.914,
      "step": 1154
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0912328958511353,
      "learning_rate": 1.9996363650921722e-05,
      "loss": 2.9447,
      "step": 1155
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1537405252456665,
      "learning_rate": 1.9996352539765463e-05,
      "loss": 3.1004,
      "step": 1156
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9178892970085144,
      "learning_rate": 1.9996341411662674e-05,
      "loss": 2.9925,
      "step": 1157
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0093871355056763,
      "learning_rate": 1.9996330266613367e-05,
      "loss": 3.1312,
      "step": 1158
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0506983995437622,
      "learning_rate": 1.9996319104617567e-05,
      "loss": 2.7533,
      "step": 1159
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0729358196258545,
      "learning_rate": 1.999630792567529e-05,
      "loss": 3.0657,
      "step": 1160
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0531500577926636,
      "learning_rate": 1.9996296729786555e-05,
      "loss": 3.0353,
      "step": 1161
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0145750045776367,
      "learning_rate": 1.9996285516951383e-05,
      "loss": 3.0519,
      "step": 1162
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0114954710006714,
      "learning_rate": 1.9996274287169793e-05,
      "loss": 3.1778,
      "step": 1163
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9772562980651855,
      "learning_rate": 1.9996263040441804e-05,
      "loss": 2.8313,
      "step": 1164
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0288457870483398,
      "learning_rate": 1.9996251776767434e-05,
      "loss": 3.1499,
      "step": 1165
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1633224487304688,
      "learning_rate": 1.99962404961467e-05,
      "loss": 3.1883,
      "step": 1166
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1254693269729614,
      "learning_rate": 1.9996229198579623e-05,
      "loss": 3.1202,
      "step": 1167
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.988240122795105,
      "learning_rate": 1.9996217884066227e-05,
      "loss": 2.7411,
      "step": 1168
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.138690710067749,
      "learning_rate": 1.9996206552606524e-05,
      "loss": 2.9813,
      "step": 1169
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9638063907623291,
      "learning_rate": 1.9996195204200534e-05,
      "loss": 3.1761,
      "step": 1170
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0869498252868652,
      "learning_rate": 1.9996183838848278e-05,
      "loss": 2.8401,
      "step": 1171
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0242441892623901,
      "learning_rate": 1.999617245654978e-05,
      "loss": 3.0059,
      "step": 1172
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0355979204177856,
      "learning_rate": 1.9996161057305055e-05,
      "loss": 2.8981,
      "step": 1173
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0406514406204224,
      "learning_rate": 1.999614964111412e-05,
      "loss": 2.997,
      "step": 1174
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.2251815795898438,
      "learning_rate": 1.9996138207976994e-05,
      "loss": 3.2303,
      "step": 1175
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0983853340148926,
      "learning_rate": 1.9996126757893698e-05,
      "loss": 2.9401,
      "step": 1176
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0749647617340088,
      "learning_rate": 1.9996115290864257e-05,
      "loss": 2.943,
      "step": 1177
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9847370982170105,
      "learning_rate": 1.9996103806888683e-05,
      "loss": 3.2438,
      "step": 1178
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0856492519378662,
      "learning_rate": 1.9996092305967e-05,
      "loss": 3.0019,
      "step": 1179
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0590100288391113,
      "learning_rate": 1.9996080788099222e-05,
      "loss": 3.0157,
      "step": 1180
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.086647868156433,
      "learning_rate": 1.9996069253285374e-05,
      "loss": 3.1075,
      "step": 1181
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9987267851829529,
      "learning_rate": 1.9996057701525476e-05,
      "loss": 3.0622,
      "step": 1182
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.110617995262146,
      "learning_rate": 1.9996046132819546e-05,
      "loss": 3.0104,
      "step": 1183
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1056026220321655,
      "learning_rate": 1.99960345471676e-05,
      "loss": 2.8738,
      "step": 1184
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0061992406845093,
      "learning_rate": 1.999602294456966e-05,
      "loss": 2.8814,
      "step": 1185
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0990957021713257,
      "learning_rate": 1.9996011325025746e-05,
      "loss": 2.9147,
      "step": 1186
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9429801106452942,
      "learning_rate": 1.999599968853588e-05,
      "loss": 3.0268,
      "step": 1187
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.2685264348983765,
      "learning_rate": 1.999598803510008e-05,
      "loss": 3.1799,
      "step": 1188
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0661041736602783,
      "learning_rate": 1.9995976364718364e-05,
      "loss": 3.0354,
      "step": 1189
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0338565111160278,
      "learning_rate": 1.9995964677390755e-05,
      "loss": 2.9306,
      "step": 1190
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.978380024433136,
      "learning_rate": 1.9995952973117266e-05,
      "loss": 3.0406,
      "step": 1191
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1420948505401611,
      "learning_rate": 1.9995941251897927e-05,
      "loss": 3.0984,
      "step": 1192
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9783583283424377,
      "learning_rate": 1.9995929513732752e-05,
      "loss": 3.0486,
      "step": 1193
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9606913328170776,
      "learning_rate": 1.999591775862176e-05,
      "loss": 2.9962,
      "step": 1194
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9087657928466797,
      "learning_rate": 1.9995905986564972e-05,
      "loss": 2.9836,
      "step": 1195
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0495491027832031,
      "learning_rate": 1.999589419756241e-05,
      "loss": 2.9494,
      "step": 1196
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9616666436195374,
      "learning_rate": 1.9995882391614093e-05,
      "loss": 2.9099,
      "step": 1197
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.034286618232727,
      "learning_rate": 1.9995870568720043e-05,
      "loss": 3.2268,
      "step": 1198
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1761870384216309,
      "learning_rate": 1.9995858728880272e-05,
      "loss": 3.0374,
      "step": 1199
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9393999576568604,
      "learning_rate": 1.9995846872094807e-05,
      "loss": 2.963,
      "step": 1200
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0075689554214478,
      "learning_rate": 1.999583499836367e-05,
      "loss": 3.2452,
      "step": 1201
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0750865936279297,
      "learning_rate": 1.9995823107686875e-05,
      "loss": 3.0308,
      "step": 1202
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.029221534729004,
      "learning_rate": 1.9995811200064445e-05,
      "loss": 3.0286,
      "step": 1203
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0293859243392944,
      "learning_rate": 1.99957992754964e-05,
      "loss": 3.1506,
      "step": 1204
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0052379369735718,
      "learning_rate": 1.999578733398276e-05,
      "loss": 2.9278,
      "step": 1205
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1187835931777954,
      "learning_rate": 1.9995775375523547e-05,
      "loss": 3.1159,
      "step": 1206
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.980632483959198,
      "learning_rate": 1.999576340011878e-05,
      "loss": 2.9511,
      "step": 1207
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.99382483959198,
      "learning_rate": 1.9995751407768476e-05,
      "loss": 3.0665,
      "step": 1208
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0432329177856445,
      "learning_rate": 1.9995739398472663e-05,
      "loss": 3.1588,
      "step": 1209
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1689428091049194,
      "learning_rate": 1.9995727372231356e-05,
      "loss": 2.7997,
      "step": 1210
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0553264617919922,
      "learning_rate": 1.9995715329044575e-05,
      "loss": 2.9979,
      "step": 1211
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9771426916122437,
      "learning_rate": 1.9995703268912345e-05,
      "loss": 3.0352,
      "step": 1212
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0148695707321167,
      "learning_rate": 1.9995691191834678e-05,
      "loss": 3.1625,
      "step": 1213
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9857099652290344,
      "learning_rate": 1.9995679097811602e-05,
      "loss": 2.8279,
      "step": 1214
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1060644388198853,
      "learning_rate": 1.9995666986843138e-05,
      "loss": 3.12,
      "step": 1215
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0963947772979736,
      "learning_rate": 1.9995654858929298e-05,
      "loss": 3.2022,
      "step": 1216
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.2028863430023193,
      "learning_rate": 1.999564271407011e-05,
      "loss": 2.8431,
      "step": 1217
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9818021655082703,
      "learning_rate": 1.9995630552265598e-05,
      "loss": 3.0617,
      "step": 1218
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1953060626983643,
      "learning_rate": 1.999561837351577e-05,
      "loss": 3.0823,
      "step": 1219
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.2119263410568237,
      "learning_rate": 1.9995606177820658e-05,
      "loss": 3.0148,
      "step": 1220
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.441992998123169,
      "learning_rate": 1.999559396518028e-05,
      "loss": 2.9901,
      "step": 1221
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0179003477096558,
      "learning_rate": 1.9995581735594654e-05,
      "loss": 2.9034,
      "step": 1222
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0712984800338745,
      "learning_rate": 1.99955694890638e-05,
      "loss": 3.0675,
      "step": 1223
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0106579065322876,
      "learning_rate": 1.999555722558774e-05,
      "loss": 3.0154,
      "step": 1224
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0284427404403687,
      "learning_rate": 1.9995544945166504e-05,
      "loss": 3.0603,
      "step": 1225
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1060727834701538,
      "learning_rate": 1.9995532647800098e-05,
      "loss": 2.9471,
      "step": 1226
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0730218887329102,
      "learning_rate": 1.999552033348855e-05,
      "loss": 3.0789,
      "step": 1227
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9671676754951477,
      "learning_rate": 1.999550800223188e-05,
      "loss": 3.0053,
      "step": 1228
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0461690425872803,
      "learning_rate": 1.9995495654030112e-05,
      "loss": 3.0311,
      "step": 1229
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.050673246383667,
      "learning_rate": 1.9995483288883262e-05,
      "loss": 3.1836,
      "step": 1230
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0768511295318604,
      "learning_rate": 1.9995470906791352e-05,
      "loss": 3.0771,
      "step": 1231
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0352590084075928,
      "learning_rate": 1.9995458507754405e-05,
      "loss": 2.8727,
      "step": 1232
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9670048356056213,
      "learning_rate": 1.9995446091772443e-05,
      "loss": 3.1093,
      "step": 1233
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0245482921600342,
      "learning_rate": 1.999543365884548e-05,
      "loss": 2.938,
      "step": 1234
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.083777666091919,
      "learning_rate": 1.9995421208973547e-05,
      "loss": 3.1366,
      "step": 1235
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.011377215385437,
      "learning_rate": 1.9995408742156662e-05,
      "loss": 2.9427,
      "step": 1236
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0435534715652466,
      "learning_rate": 1.999539625839484e-05,
      "loss": 3.1193,
      "step": 1237
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1567721366882324,
      "learning_rate": 1.999538375768811e-05,
      "loss": 3.0998,
      "step": 1238
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0236488580703735,
      "learning_rate": 1.999537124003649e-05,
      "loss": 3.1359,
      "step": 1239
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.074554681777954,
      "learning_rate": 1.9995358705439996e-05,
      "loss": 2.8677,
      "step": 1240
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0873255729675293,
      "learning_rate": 1.9995346153898657e-05,
      "loss": 3.0088,
      "step": 1241
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0086463689804077,
      "learning_rate": 1.999533358541249e-05,
      "loss": 3.0195,
      "step": 1242
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0321260690689087,
      "learning_rate": 1.9995320999981523e-05,
      "loss": 2.7964,
      "step": 1243
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9774879217147827,
      "learning_rate": 1.999530839760577e-05,
      "loss": 3.0894,
      "step": 1244
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0863009691238403,
      "learning_rate": 1.9995295778285254e-05,
      "loss": 3.0357,
      "step": 1245
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1088523864746094,
      "learning_rate": 1.9995283142019993e-05,
      "loss": 3.1484,
      "step": 1246
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9620901346206665,
      "learning_rate": 1.9995270488810018e-05,
      "loss": 3.3267,
      "step": 1247
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0396448373794556,
      "learning_rate": 1.9995257818655345e-05,
      "loss": 2.8264,
      "step": 1248
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.198454737663269,
      "learning_rate": 1.999524513155599e-05,
      "loss": 3.214,
      "step": 1249
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0519044399261475,
      "learning_rate": 1.9995232427511984e-05,
      "loss": 2.8834,
      "step": 1250
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9375627040863037,
      "learning_rate": 1.9995219706523344e-05,
      "loss": 2.9048,
      "step": 1251
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0572192668914795,
      "learning_rate": 1.999520696859009e-05,
      "loss": 3.0093,
      "step": 1252
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9918432831764221,
      "learning_rate": 1.999519421371225e-05,
      "loss": 2.931,
      "step": 1253
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0479813814163208,
      "learning_rate": 1.9995181441889837e-05,
      "loss": 3.0154,
      "step": 1254
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9841722846031189,
      "learning_rate": 1.9995168653122877e-05,
      "loss": 2.8609,
      "step": 1255
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9597564339637756,
      "learning_rate": 1.9995155847411393e-05,
      "loss": 2.9978,
      "step": 1256
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.2328118085861206,
      "learning_rate": 1.9995143024755404e-05,
      "loss": 3.0499,
      "step": 1257
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0806058645248413,
      "learning_rate": 1.999513018515493e-05,
      "loss": 3.0462,
      "step": 1258
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0477705001831055,
      "learning_rate": 1.999511732861e-05,
      "loss": 3.013,
      "step": 1259
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0332789421081543,
      "learning_rate": 1.9995104455120634e-05,
      "loss": 3.1706,
      "step": 1260
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.085782527923584,
      "learning_rate": 1.9995091564686847e-05,
      "loss": 3.1641,
      "step": 1261
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1859182119369507,
      "learning_rate": 1.9995078657308667e-05,
      "loss": 2.9217,
      "step": 1262
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9274513125419617,
      "learning_rate": 1.9995065732986112e-05,
      "loss": 3.2232,
      "step": 1263
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.113479733467102,
      "learning_rate": 1.999505279171921e-05,
      "loss": 3.0954,
      "step": 1264
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1174064874649048,
      "learning_rate": 1.9995039833507973e-05,
      "loss": 2.9732,
      "step": 1265
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9862983822822571,
      "learning_rate": 1.999502685835243e-05,
      "loss": 2.9147,
      "step": 1266
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0776053667068481,
      "learning_rate": 1.9995013866252602e-05,
      "loss": 2.8372,
      "step": 1267
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.015044093132019,
      "learning_rate": 1.9995000857208518e-05,
      "loss": 3.0058,
      "step": 1268
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1111558675765991,
      "learning_rate": 1.9994987831220186e-05,
      "loss": 2.9497,
      "step": 1269
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.033050537109375,
      "learning_rate": 1.9994974788287638e-05,
      "loss": 2.9636,
      "step": 1270
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0627654790878296,
      "learning_rate": 1.999496172841089e-05,
      "loss": 2.9859,
      "step": 1271
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0247917175292969,
      "learning_rate": 1.999494865158997e-05,
      "loss": 2.9985,
      "step": 1272
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.024472713470459,
      "learning_rate": 1.9994935557824894e-05,
      "loss": 2.7728,
      "step": 1273
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9775472283363342,
      "learning_rate": 1.999492244711569e-05,
      "loss": 2.9256,
      "step": 1274
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0638716220855713,
      "learning_rate": 1.9994909319462378e-05,
      "loss": 2.8896,
      "step": 1275
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9574682116508484,
      "learning_rate": 1.999489617486498e-05,
      "loss": 2.9743,
      "step": 1276
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.062595009803772,
      "learning_rate": 1.999488301332352e-05,
      "loss": 3.0724,
      "step": 1277
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0898241996765137,
      "learning_rate": 1.9994869834838017e-05,
      "loss": 2.9786,
      "step": 1278
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9143360257148743,
      "learning_rate": 1.9994856639408496e-05,
      "loss": 2.7728,
      "step": 1279
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9548547267913818,
      "learning_rate": 1.9994843427034975e-05,
      "loss": 3.0604,
      "step": 1280
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0037126541137695,
      "learning_rate": 1.9994830197717486e-05,
      "loss": 3.016,
      "step": 1281
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0247039794921875,
      "learning_rate": 1.999481695145604e-05,
      "loss": 2.8835,
      "step": 1282
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.042007565498352,
      "learning_rate": 1.9994803688250667e-05,
      "loss": 2.9754,
      "step": 1283
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.161745548248291,
      "learning_rate": 1.9994790408101385e-05,
      "loss": 3.1917,
      "step": 1284
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0246617794036865,
      "learning_rate": 1.9994777111008222e-05,
      "loss": 3.0048,
      "step": 1285
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9666004180908203,
      "learning_rate": 1.9994763796971195e-05,
      "loss": 3.0086,
      "step": 1286
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9806466698646545,
      "learning_rate": 1.999475046599033e-05,
      "loss": 2.8075,
      "step": 1287
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0278586149215698,
      "learning_rate": 1.9994737118065647e-05,
      "loss": 2.9819,
      "step": 1288
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0156365633010864,
      "learning_rate": 1.999472375319717e-05,
      "loss": 2.9374,
      "step": 1289
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9916043877601624,
      "learning_rate": 1.9994710371384924e-05,
      "loss": 3.0241,
      "step": 1290
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9696193933486938,
      "learning_rate": 1.999469697262893e-05,
      "loss": 2.8656,
      "step": 1291
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.105634331703186,
      "learning_rate": 1.9994683556929205e-05,
      "loss": 3.036,
      "step": 1292
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.037382960319519,
      "learning_rate": 1.999467012428578e-05,
      "loss": 3.1204,
      "step": 1293
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0961779356002808,
      "learning_rate": 1.9994656674698676e-05,
      "loss": 3.1745,
      "step": 1294
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0160011053085327,
      "learning_rate": 1.9994643208167912e-05,
      "loss": 2.9991,
      "step": 1295
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.293184518814087,
      "learning_rate": 1.9994629724693515e-05,
      "loss": 3.0824,
      "step": 1296
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.2747889757156372,
      "learning_rate": 1.9994616224275507e-05,
      "loss": 2.9396,
      "step": 1297
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9768050909042358,
      "learning_rate": 1.999460270691391e-05,
      "loss": 2.9,
      "step": 1298
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9905856251716614,
      "learning_rate": 1.9994589172608743e-05,
      "loss": 2.9834,
      "step": 1299
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0064470767974854,
      "learning_rate": 1.9994575621360034e-05,
      "loss": 2.9787,
      "step": 1300
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0236715078353882,
      "learning_rate": 1.9994562053167807e-05,
      "loss": 2.9904,
      "step": 1301
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.4055670499801636,
      "learning_rate": 1.999454846803208e-05,
      "loss": 2.8824,
      "step": 1302
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9906288981437683,
      "learning_rate": 1.999453486595288e-05,
      "loss": 2.9753,
      "step": 1303
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1179910898208618,
      "learning_rate": 1.9994521246930234e-05,
      "loss": 3.3462,
      "step": 1304
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0998620986938477,
      "learning_rate": 1.9994507610964152e-05,
      "loss": 2.879,
      "step": 1305
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9658514857292175,
      "learning_rate": 1.999449395805467e-05,
      "loss": 3.0732,
      "step": 1306
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1071784496307373,
      "learning_rate": 1.9994480288201807e-05,
      "loss": 2.9499,
      "step": 1307
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0744093656539917,
      "learning_rate": 1.9994466601405583e-05,
      "loss": 2.9755,
      "step": 1308
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.132723093032837,
      "learning_rate": 1.999445289766602e-05,
      "loss": 3.0029,
      "step": 1309
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0014655590057373,
      "learning_rate": 1.9994439176983153e-05,
      "loss": 3.1232,
      "step": 1310
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9684532284736633,
      "learning_rate": 1.999442543935699e-05,
      "loss": 3.0699,
      "step": 1311
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.05902099609375,
      "learning_rate": 1.9994411684787567e-05,
      "loss": 2.9666,
      "step": 1312
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0261131525039673,
      "learning_rate": 1.99943979132749e-05,
      "loss": 2.976,
      "step": 1313
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9872940182685852,
      "learning_rate": 1.999438412481901e-05,
      "loss": 3.2148,
      "step": 1314
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9678892493247986,
      "learning_rate": 1.999437031941993e-05,
      "loss": 2.9433,
      "step": 1315
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.237873911857605,
      "learning_rate": 1.9994356497077676e-05,
      "loss": 3.0631,
      "step": 1316
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9943615198135376,
      "learning_rate": 1.9994342657792272e-05,
      "loss": 3.0917,
      "step": 1317
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0213568210601807,
      "learning_rate": 1.9994328801563744e-05,
      "loss": 2.828,
      "step": 1318
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.004630208015442,
      "learning_rate": 1.9994314928392116e-05,
      "loss": 3.0298,
      "step": 1319
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.044772982597351,
      "learning_rate": 1.9994301038277407e-05,
      "loss": 3.0448,
      "step": 1320
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9761690497398376,
      "learning_rate": 1.9994287131219644e-05,
      "loss": 2.9346,
      "step": 1321
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0665040016174316,
      "learning_rate": 1.999427320721885e-05,
      "loss": 2.9841,
      "step": 1322
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0129705667495728,
      "learning_rate": 1.999425926627505e-05,
      "loss": 2.9357,
      "step": 1323
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1329048871994019,
      "learning_rate": 1.9994245308388262e-05,
      "loss": 2.7396,
      "step": 1324
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.028194546699524,
      "learning_rate": 1.999423133355852e-05,
      "loss": 2.8952,
      "step": 1325
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9782152771949768,
      "learning_rate": 1.9994217341785837e-05,
      "loss": 3.0083,
      "step": 1326
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0820245742797852,
      "learning_rate": 1.9994203333070242e-05,
      "loss": 3.0903,
      "step": 1327
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0168408155441284,
      "learning_rate": 1.999418930741176e-05,
      "loss": 2.8469,
      "step": 1328
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0669106245040894,
      "learning_rate": 1.9994175264810408e-05,
      "loss": 3.0525,
      "step": 1329
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0119127035140991,
      "learning_rate": 1.999416120526622e-05,
      "loss": 2.9826,
      "step": 1330
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0749539136886597,
      "learning_rate": 1.999414712877921e-05,
      "loss": 3.0352,
      "step": 1331
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0232768058776855,
      "learning_rate": 1.9994133035349408e-05,
      "loss": 3.035,
      "step": 1332
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.183303952217102,
      "learning_rate": 1.9994118924976836e-05,
      "loss": 3.0175,
      "step": 1333
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.977407693862915,
      "learning_rate": 1.999410479766152e-05,
      "loss": 3.106,
      "step": 1334
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1085091829299927,
      "learning_rate": 1.999409065340348e-05,
      "loss": 2.9752,
      "step": 1335
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9356614947319031,
      "learning_rate": 1.9994076492202745e-05,
      "loss": 2.8982,
      "step": 1336
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1196796894073486,
      "learning_rate": 1.999406231405933e-05,
      "loss": 2.8082,
      "step": 1337
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0454672574996948,
      "learning_rate": 1.999404811897327e-05,
      "loss": 2.9769,
      "step": 1338
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0221788883209229,
      "learning_rate": 1.9994033906944583e-05,
      "loss": 3.0455,
      "step": 1339
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0954455137252808,
      "learning_rate": 1.999401967797329e-05,
      "loss": 3.0018,
      "step": 1340
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.117186427116394,
      "learning_rate": 1.9994005432059423e-05,
      "loss": 2.9248,
      "step": 1341
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9114678502082825,
      "learning_rate": 1.9993991169203004e-05,
      "loss": 3.0019,
      "step": 1342
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9148634672164917,
      "learning_rate": 1.9993976889404056e-05,
      "loss": 2.8836,
      "step": 1343
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1303681135177612,
      "learning_rate": 1.99939625926626e-05,
      "loss": 2.9102,
      "step": 1344
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9724372625350952,
      "learning_rate": 1.999394827897866e-05,
      "loss": 3.0623,
      "step": 1345
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.985872209072113,
      "learning_rate": 1.999393394835227e-05,
      "loss": 3.0083,
      "step": 1346
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9564743041992188,
      "learning_rate": 1.9993919600783445e-05,
      "loss": 2.8817,
      "step": 1347
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.04552161693573,
      "learning_rate": 1.9993905236272213e-05,
      "loss": 2.7759,
      "step": 1348
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.096590518951416,
      "learning_rate": 1.9993890854818594e-05,
      "loss": 3.2244,
      "step": 1349
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9817976355552673,
      "learning_rate": 1.999387645642262e-05,
      "loss": 3.1682,
      "step": 1350
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0506335496902466,
      "learning_rate": 1.9993862041084308e-05,
      "loss": 2.8255,
      "step": 1351
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0795084238052368,
      "learning_rate": 1.9993847608803688e-05,
      "loss": 3.2563,
      "step": 1352
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0832557678222656,
      "learning_rate": 1.999383315958078e-05,
      "loss": 2.9128,
      "step": 1353
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1371184587478638,
      "learning_rate": 1.999381869341561e-05,
      "loss": 2.9539,
      "step": 1354
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0316781997680664,
      "learning_rate": 1.9993804210308204e-05,
      "loss": 3.0451,
      "step": 1355
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.057557225227356,
      "learning_rate": 1.9993789710258585e-05,
      "loss": 3.2339,
      "step": 1356
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0212310552597046,
      "learning_rate": 1.999377519326678e-05,
      "loss": 3.1735,
      "step": 1357
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9885228276252747,
      "learning_rate": 1.999376065933281e-05,
      "loss": 3.1926,
      "step": 1358
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.018581509590149,
      "learning_rate": 1.99937461084567e-05,
      "loss": 2.8445,
      "step": 1359
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9293608665466309,
      "learning_rate": 1.999373154063848e-05,
      "loss": 2.8824,
      "step": 1360
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9558140635490417,
      "learning_rate": 1.999371695587817e-05,
      "loss": 3.0337,
      "step": 1361
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9720208644866943,
      "learning_rate": 1.9993702354175792e-05,
      "loss": 3.0166,
      "step": 1362
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9490322470664978,
      "learning_rate": 1.999368773553138e-05,
      "loss": 2.9027,
      "step": 1363
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0353124141693115,
      "learning_rate": 1.999367309994495e-05,
      "loss": 2.8207,
      "step": 1364
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.096195936203003,
      "learning_rate": 1.9993658447416528e-05,
      "loss": 3.0248,
      "step": 1365
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0234659910202026,
      "learning_rate": 1.9993643777946144e-05,
      "loss": 2.7689,
      "step": 1366
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.993640661239624,
      "learning_rate": 1.9993629091533816e-05,
      "loss": 2.8015,
      "step": 1367
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0587314367294312,
      "learning_rate": 1.9993614388179573e-05,
      "loss": 3.1059,
      "step": 1368
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9511246681213379,
      "learning_rate": 1.9993599667883444e-05,
      "loss": 2.8122,
      "step": 1369
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0778108835220337,
      "learning_rate": 1.9993584930645444e-05,
      "loss": 3.0053,
      "step": 1370
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.02610445022583,
      "learning_rate": 1.9993570176465605e-05,
      "loss": 2.9274,
      "step": 1371
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0940085649490356,
      "learning_rate": 1.9993555405343952e-05,
      "loss": 3.0473,
      "step": 1372
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9866806268692017,
      "learning_rate": 1.9993540617280505e-05,
      "loss": 2.798,
      "step": 1373
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0019769668579102,
      "learning_rate": 1.9993525812275295e-05,
      "loss": 3.0487,
      "step": 1374
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1045689582824707,
      "learning_rate": 1.9993510990328345e-05,
      "loss": 3.0861,
      "step": 1375
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1730018854141235,
      "learning_rate": 1.999349615143968e-05,
      "loss": 2.7883,
      "step": 1376
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0067346096038818,
      "learning_rate": 1.9993481295609324e-05,
      "loss": 2.8591,
      "step": 1377
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1758872270584106,
      "learning_rate": 1.99934664228373e-05,
      "loss": 2.9751,
      "step": 1378
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0188878774642944,
      "learning_rate": 1.999345153312364e-05,
      "loss": 3.0299,
      "step": 1379
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.016363501548767,
      "learning_rate": 1.9993436626468362e-05,
      "loss": 3.0399,
      "step": 1380
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0664799213409424,
      "learning_rate": 1.99934217028715e-05,
      "loss": 3.0638,
      "step": 1381
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9722745418548584,
      "learning_rate": 1.9993406762333068e-05,
      "loss": 2.9762,
      "step": 1382
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.2100239992141724,
      "learning_rate": 1.9993391804853098e-05,
      "loss": 2.9455,
      "step": 1383
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0035710334777832,
      "learning_rate": 1.9993376830431618e-05,
      "loss": 3.0416,
      "step": 1384
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9412104487419128,
      "learning_rate": 1.999336183906865e-05,
      "loss": 3.196,
      "step": 1385
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9983119368553162,
      "learning_rate": 1.999334683076422e-05,
      "loss": 2.7832,
      "step": 1386
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9175722599029541,
      "learning_rate": 1.9993331805518353e-05,
      "loss": 2.8591,
      "step": 1387
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0740758180618286,
      "learning_rate": 1.9993316763331073e-05,
      "loss": 3.0064,
      "step": 1388
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9963623881340027,
      "learning_rate": 1.9993301704202406e-05,
      "loss": 2.9309,
      "step": 1389
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0003122091293335,
      "learning_rate": 1.999328662813238e-05,
      "loss": 3.0198,
      "step": 1390
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0729376077651978,
      "learning_rate": 1.999327153512102e-05,
      "loss": 3.0527,
      "step": 1391
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0847545862197876,
      "learning_rate": 1.9993256425168347e-05,
      "loss": 3.0345,
      "step": 1392
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1238030195236206,
      "learning_rate": 1.999324129827439e-05,
      "loss": 2.9007,
      "step": 1393
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9895889163017273,
      "learning_rate": 1.999322615443918e-05,
      "loss": 2.7966,
      "step": 1394
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9602797031402588,
      "learning_rate": 1.9993210993662734e-05,
      "loss": 2.837,
      "step": 1395
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1044893264770508,
      "learning_rate": 1.999319581594508e-05,
      "loss": 3.3593,
      "step": 1396
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9865185618400574,
      "learning_rate": 1.999318062128625e-05,
      "loss": 2.867,
      "step": 1397
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0719506740570068,
      "learning_rate": 1.9993165409686263e-05,
      "loss": 2.9219,
      "step": 1398
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0061120986938477,
      "learning_rate": 1.999315018114514e-05,
      "loss": 2.893,
      "step": 1399
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9091824889183044,
      "learning_rate": 1.9993134935662924e-05,
      "loss": 2.9974,
      "step": 1400
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1251267194747925,
      "learning_rate": 1.9993119673239624e-05,
      "loss": 2.8671,
      "step": 1401
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9769282341003418,
      "learning_rate": 1.9993104393875275e-05,
      "loss": 3.016,
      "step": 1402
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.1201813220977783,
      "learning_rate": 1.9993089097569895e-05,
      "loss": 2.9942,
      "step": 1403
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1047648191452026,
      "learning_rate": 1.9993073784323516e-05,
      "loss": 2.7754,
      "step": 1404
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9761635661125183,
      "learning_rate": 1.9993058454136168e-05,
      "loss": 3.2224,
      "step": 1405
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0600755214691162,
      "learning_rate": 1.9993043107007865e-05,
      "loss": 3.0303,
      "step": 1406
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9800803065299988,
      "learning_rate": 1.9993027742938643e-05,
      "loss": 2.9429,
      "step": 1407
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0225884914398193,
      "learning_rate": 1.9993012361928524e-05,
      "loss": 2.8519,
      "step": 1408
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0058915615081787,
      "learning_rate": 1.999299696397754e-05,
      "loss": 2.9338,
      "step": 1409
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9906496405601501,
      "learning_rate": 1.9992981549085708e-05,
      "loss": 3.0486,
      "step": 1410
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0314942598342896,
      "learning_rate": 1.999296611725306e-05,
      "loss": 2.9637,
      "step": 1411
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9777362942695618,
      "learning_rate": 1.9992950668479615e-05,
      "loss": 2.9638,
      "step": 1412
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.018520474433899,
      "learning_rate": 1.999293520276541e-05,
      "loss": 2.983,
      "step": 1413
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0554612874984741,
      "learning_rate": 1.9992919720110466e-05,
      "loss": 3.1209,
      "step": 1414
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9696428179740906,
      "learning_rate": 1.9992904220514803e-05,
      "loss": 2.9091,
      "step": 1415
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9305080771446228,
      "learning_rate": 1.9992888703978458e-05,
      "loss": 2.8681,
      "step": 1416
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.2033528089523315,
      "learning_rate": 1.9992873170501453e-05,
      "loss": 3.0362,
      "step": 1417
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.049797773361206,
      "learning_rate": 1.9992857620083814e-05,
      "loss": 3.0496,
      "step": 1418
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.058423399925232,
      "learning_rate": 1.9992842052725566e-05,
      "loss": 3.0669,
      "step": 1419
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9376599788665771,
      "learning_rate": 1.9992826468426737e-05,
      "loss": 2.8584,
      "step": 1420
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0096251964569092,
      "learning_rate": 1.9992810867187354e-05,
      "loss": 2.9899,
      "step": 1421
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0393930673599243,
      "learning_rate": 1.999279524900744e-05,
      "loss": 2.9488,
      "step": 1422
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9192358255386353,
      "learning_rate": 1.999277961388703e-05,
      "loss": 2.997,
      "step": 1423
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0921233892440796,
      "learning_rate": 1.9992763961826137e-05,
      "loss": 3.0986,
      "step": 1424
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0629291534423828,
      "learning_rate": 1.99927482928248e-05,
      "loss": 3.2018,
      "step": 1425
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1088016033172607,
      "learning_rate": 1.9992732606883037e-05,
      "loss": 2.8613,
      "step": 1426
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1059658527374268,
      "learning_rate": 1.9992716904000883e-05,
      "loss": 2.8575,
      "step": 1427
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0037126541137695,
      "learning_rate": 1.9992701184178355e-05,
      "loss": 3.1478,
      "step": 1428
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9998108744621277,
      "learning_rate": 1.9992685447415486e-05,
      "loss": 3.0361,
      "step": 1429
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9495002031326294,
      "learning_rate": 1.9992669693712304e-05,
      "loss": 3.0795,
      "step": 1430
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0204042196273804,
      "learning_rate": 1.999265392306883e-05,
      "loss": 2.9386,
      "step": 1431
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0275720357894897,
      "learning_rate": 1.9992638135485095e-05,
      "loss": 3.2237,
      "step": 1432
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9832268357276917,
      "learning_rate": 1.9992622330961125e-05,
      "loss": 2.9522,
      "step": 1433
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9526532292366028,
      "learning_rate": 1.9992606509496944e-05,
      "loss": 3.2113,
      "step": 1434
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.094476580619812,
      "learning_rate": 1.999259067109258e-05,
      "loss": 3.1158,
      "step": 1435
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0410484075546265,
      "learning_rate": 1.9992574815748064e-05,
      "loss": 2.98,
      "step": 1436
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0317617654800415,
      "learning_rate": 1.999255894346342e-05,
      "loss": 2.9465,
      "step": 1437
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0536408424377441,
      "learning_rate": 1.9992543054238674e-05,
      "loss": 2.8874,
      "step": 1438
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.098266839981079,
      "learning_rate": 1.9992527148073854e-05,
      "loss": 2.9821,
      "step": 1439
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.948902428150177,
      "learning_rate": 1.999251122496898e-05,
      "loss": 2.9277,
      "step": 1440
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1161015033721924,
      "learning_rate": 1.9992495284924094e-05,
      "loss": 3.1916,
      "step": 1441
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0430190563201904,
      "learning_rate": 1.9992479327939215e-05,
      "loss": 2.9022,
      "step": 1442
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9388145804405212,
      "learning_rate": 1.9992463354014365e-05,
      "loss": 3.0192,
      "step": 1443
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0502713918685913,
      "learning_rate": 1.9992447363149578e-05,
      "loss": 3.0908,
      "step": 1444
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9893761277198792,
      "learning_rate": 1.9992431355344873e-05,
      "loss": 3.0547,
      "step": 1445
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1816887855529785,
      "learning_rate": 1.999241533060029e-05,
      "loss": 2.7832,
      "step": 1446
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0583735704421997,
      "learning_rate": 1.9992399288915846e-05,
      "loss": 2.9558,
      "step": 1447
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0591282844543457,
      "learning_rate": 1.999238323029157e-05,
      "loss": 3.1707,
      "step": 1448
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9422552585601807,
      "learning_rate": 1.9992367154727493e-05,
      "loss": 2.9788,
      "step": 1449
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0740572214126587,
      "learning_rate": 1.999235106222364e-05,
      "loss": 2.8909,
      "step": 1450
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.005721926689148,
      "learning_rate": 1.9992334952780038e-05,
      "loss": 3.0992,
      "step": 1451
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9664828777313232,
      "learning_rate": 1.9992318826396715e-05,
      "loss": 2.9532,
      "step": 1452
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0170665979385376,
      "learning_rate": 1.9992302683073695e-05,
      "loss": 2.9452,
      "step": 1453
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.070265769958496,
      "learning_rate": 1.9992286522811012e-05,
      "loss": 2.777,
      "step": 1454
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0061659812927246,
      "learning_rate": 1.9992270345608683e-05,
      "loss": 2.9273,
      "step": 1455
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1342217922210693,
      "learning_rate": 1.999225415146675e-05,
      "loss": 3.0913,
      "step": 1456
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0101253986358643,
      "learning_rate": 1.9992237940385227e-05,
      "loss": 2.8879,
      "step": 1457
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0009660720825195,
      "learning_rate": 1.9992221712364147e-05,
      "loss": 3.4715,
      "step": 1458
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0091568231582642,
      "learning_rate": 1.999220546740354e-05,
      "loss": 2.9784,
      "step": 1459
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.013550043106079,
      "learning_rate": 1.999218920550343e-05,
      "loss": 3.0614,
      "step": 1460
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0262805223464966,
      "learning_rate": 1.9992172926663844e-05,
      "loss": 2.8562,
      "step": 1461
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0480297803878784,
      "learning_rate": 1.999215663088481e-05,
      "loss": 3.02,
      "step": 1462
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9940795302391052,
      "learning_rate": 1.999214031816636e-05,
      "loss": 2.9273,
      "step": 1463
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.064092993736267,
      "learning_rate": 1.9992123988508518e-05,
      "loss": 3.1157,
      "step": 1464
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0393950939178467,
      "learning_rate": 1.9992107641911312e-05,
      "loss": 2.9242,
      "step": 1465
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0457630157470703,
      "learning_rate": 1.999209127837477e-05,
      "loss": 3.0561,
      "step": 1466
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9798226356506348,
      "learning_rate": 1.9992074897898916e-05,
      "loss": 2.7174,
      "step": 1467
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.3067370653152466,
      "learning_rate": 1.9992058500483786e-05,
      "loss": 2.9816,
      "step": 1468
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9792221188545227,
      "learning_rate": 1.9992042086129396e-05,
      "loss": 2.9618,
      "step": 1469
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1826578378677368,
      "learning_rate": 1.9992025654835787e-05,
      "loss": 3.1448,
      "step": 1470
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0131475925445557,
      "learning_rate": 1.9992009206602976e-05,
      "loss": 3.0257,
      "step": 1471
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.004713773727417,
      "learning_rate": 1.9991992741431e-05,
      "loss": 2.9541,
      "step": 1472
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0360493659973145,
      "learning_rate": 1.9991976259319884e-05,
      "loss": 3.2328,
      "step": 1473
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1364843845367432,
      "learning_rate": 1.999195976026965e-05,
      "loss": 2.9205,
      "step": 1474
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1023064851760864,
      "learning_rate": 1.9991943244280333e-05,
      "loss": 2.9634,
      "step": 1475
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.16041100025177,
      "learning_rate": 1.9991926711351956e-05,
      "loss": 2.6711,
      "step": 1476
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0659160614013672,
      "learning_rate": 1.9991910161484553e-05,
      "loss": 2.6755,
      "step": 1477
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0567365884780884,
      "learning_rate": 1.9991893594678144e-05,
      "loss": 2.8945,
      "step": 1478
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1129796504974365,
      "learning_rate": 1.9991877010932764e-05,
      "loss": 2.85,
      "step": 1479
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.2084180116653442,
      "learning_rate": 1.999186041024844e-05,
      "loss": 3.1136,
      "step": 1480
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9440271854400635,
      "learning_rate": 1.9991843792625197e-05,
      "loss": 2.9308,
      "step": 1481
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.03095543384552,
      "learning_rate": 1.9991827158063068e-05,
      "loss": 2.8537,
      "step": 1482
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0236892700195312,
      "learning_rate": 1.9991810506562072e-05,
      "loss": 3.1048,
      "step": 1483
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0066871643066406,
      "learning_rate": 1.9991793838122247e-05,
      "loss": 2.9681,
      "step": 1484
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9525554776191711,
      "learning_rate": 1.999177715274362e-05,
      "loss": 2.8099,
      "step": 1485
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0554834604263306,
      "learning_rate": 1.9991760450426213e-05,
      "loss": 2.9218,
      "step": 1486
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9370474219322205,
      "learning_rate": 1.9991743731170063e-05,
      "loss": 3.0393,
      "step": 1487
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.942441463470459,
      "learning_rate": 1.9991726994975188e-05,
      "loss": 2.931,
      "step": 1488
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9926319718360901,
      "learning_rate": 1.9991710241841623e-05,
      "loss": 2.9226,
      "step": 1489
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0021494626998901,
      "learning_rate": 1.99916934717694e-05,
      "loss": 2.78,
      "step": 1490
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0463217496871948,
      "learning_rate": 1.999167668475854e-05,
      "loss": 2.9023,
      "step": 1491
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0555551052093506,
      "learning_rate": 1.9991659880809075e-05,
      "loss": 3.0637,
      "step": 1492
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.8983290791511536,
      "learning_rate": 1.9991643059921033e-05,
      "loss": 2.9572,
      "step": 1493
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0472102165222168,
      "learning_rate": 1.9991626222094442e-05,
      "loss": 3.0898,
      "step": 1494
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9491761922836304,
      "learning_rate": 1.999160936732933e-05,
      "loss": 3.0106,
      "step": 1495
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9811049103736877,
      "learning_rate": 1.999159249562573e-05,
      "loss": 2.8957,
      "step": 1496
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0757958889007568,
      "learning_rate": 1.999157560698366e-05,
      "loss": 2.9329,
      "step": 1497
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0061004161834717,
      "learning_rate": 1.999155870140316e-05,
      "loss": 2.8688,
      "step": 1498
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0506523847579956,
      "learning_rate": 1.9991541778884253e-05,
      "loss": 2.9291,
      "step": 1499
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9071704149246216,
      "learning_rate": 1.9991524839426973e-05,
      "loss": 2.9334,
      "step": 1500
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9334436058998108,
      "learning_rate": 1.9991507883031342e-05,
      "loss": 2.8819,
      "step": 1501
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1012372970581055,
      "learning_rate": 1.999149090969739e-05,
      "loss": 2.7771,
      "step": 1502
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9892992377281189,
      "learning_rate": 1.999147391942515e-05,
      "loss": 2.7477,
      "step": 1503
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9920552968978882,
      "learning_rate": 1.9991456912214643e-05,
      "loss": 3.0492,
      "step": 1504
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0611422061920166,
      "learning_rate": 1.9991439888065906e-05,
      "loss": 2.9334,
      "step": 1505
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9198254346847534,
      "learning_rate": 1.9991422846978967e-05,
      "loss": 2.8551,
      "step": 1506
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1009910106658936,
      "learning_rate": 1.999140578895385e-05,
      "loss": 3.157,
      "step": 1507
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9481131434440613,
      "learning_rate": 1.9991388713990587e-05,
      "loss": 2.8912,
      "step": 1508
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0595036745071411,
      "learning_rate": 1.999137162208921e-05,
      "loss": 3.0303,
      "step": 1509
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0685845613479614,
      "learning_rate": 1.999135451324974e-05,
      "loss": 2.8826,
      "step": 1510
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.033738374710083,
      "learning_rate": 1.999133738747221e-05,
      "loss": 3.0236,
      "step": 1511
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9966238141059875,
      "learning_rate": 1.9991320244756653e-05,
      "loss": 3.0599,
      "step": 1512
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9868507385253906,
      "learning_rate": 1.9991303085103092e-05,
      "loss": 2.9495,
      "step": 1513
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0309725999832153,
      "learning_rate": 1.999128590851156e-05,
      "loss": 2.9056,
      "step": 1514
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0686126947402954,
      "learning_rate": 1.9991268714982085e-05,
      "loss": 3.0954,
      "step": 1515
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.125620722770691,
      "learning_rate": 1.9991251504514696e-05,
      "loss": 2.9498,
      "step": 1516
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.106940507888794,
      "learning_rate": 1.999123427710942e-05,
      "loss": 2.941,
      "step": 1517
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.3828375339508057,
      "learning_rate": 1.999121703276629e-05,
      "loss": 2.8488,
      "step": 1518
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.976437509059906,
      "learning_rate": 1.9991199771485333e-05,
      "loss": 2.9659,
      "step": 1519
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0357673168182373,
      "learning_rate": 1.9991182493266577e-05,
      "loss": 2.8747,
      "step": 1520
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1347124576568604,
      "learning_rate": 1.999116519811006e-05,
      "loss": 3.1617,
      "step": 1521
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.043777346611023,
      "learning_rate": 1.9991147886015797e-05,
      "loss": 3.003,
      "step": 1522
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9670736193656921,
      "learning_rate": 1.999113055698383e-05,
      "loss": 2.9313,
      "step": 1523
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9947971701622009,
      "learning_rate": 1.9991113211014177e-05,
      "loss": 3.0121,
      "step": 1524
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0767478942871094,
      "learning_rate": 1.999109584810688e-05,
      "loss": 2.7874,
      "step": 1525
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0004702806472778,
      "learning_rate": 1.999107846826196e-05,
      "loss": 3.003,
      "step": 1526
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.940655529499054,
      "learning_rate": 1.9991061071479446e-05,
      "loss": 3.014,
      "step": 1527
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.975712239742279,
      "learning_rate": 1.9991043657759373e-05,
      "loss": 2.933,
      "step": 1528
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9706062078475952,
      "learning_rate": 1.9991026227101764e-05,
      "loss": 2.8258,
      "step": 1529
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0032753944396973,
      "learning_rate": 1.9991008779506655e-05,
      "loss": 2.8706,
      "step": 1530
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1453351974487305,
      "learning_rate": 1.999099131497407e-05,
      "loss": 3.0591,
      "step": 1531
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0271073579788208,
      "learning_rate": 1.9990973833504044e-05,
      "loss": 2.8175,
      "step": 1532
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.986745297908783,
      "learning_rate": 1.9990956335096603e-05,
      "loss": 3.0473,
      "step": 1533
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0422977209091187,
      "learning_rate": 1.9990938819751777e-05,
      "loss": 3.0696,
      "step": 1534
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.06111478805542,
      "learning_rate": 1.9990921287469595e-05,
      "loss": 3.0948,
      "step": 1535
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0582860708236694,
      "learning_rate": 1.999090373825009e-05,
      "loss": 2.8516,
      "step": 1536
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0007222890853882,
      "learning_rate": 1.999088617209329e-05,
      "loss": 2.8411,
      "step": 1537
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9763408303260803,
      "learning_rate": 1.999086858899922e-05,
      "loss": 3.0122,
      "step": 1538
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0292998552322388,
      "learning_rate": 1.999085098896792e-05,
      "loss": 2.9367,
      "step": 1539
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9423262476921082,
      "learning_rate": 1.9990833371999413e-05,
      "loss": 2.9998,
      "step": 1540
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0167979001998901,
      "learning_rate": 1.999081573809373e-05,
      "loss": 2.9758,
      "step": 1541
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.056530475616455,
      "learning_rate": 1.99907980872509e-05,
      "loss": 2.9932,
      "step": 1542
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9501709342002869,
      "learning_rate": 1.9990780419470953e-05,
      "loss": 3.0751,
      "step": 1543
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9686335325241089,
      "learning_rate": 1.999076273475392e-05,
      "loss": 2.8621,
      "step": 1544
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9835036993026733,
      "learning_rate": 1.999074503309983e-05,
      "loss": 2.8426,
      "step": 1545
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1333950757980347,
      "learning_rate": 1.9990727314508712e-05,
      "loss": 2.8777,
      "step": 1546
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9799221158027649,
      "learning_rate": 1.9990709578980602e-05,
      "loss": 2.8894,
      "step": 1547
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0120973587036133,
      "learning_rate": 1.9990691826515523e-05,
      "loss": 3.0802,
      "step": 1548
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0095422267913818,
      "learning_rate": 1.999067405711351e-05,
      "loss": 3.0068,
      "step": 1549
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9856138825416565,
      "learning_rate": 1.9990656270774588e-05,
      "loss": 2.813,
      "step": 1550
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.982758104801178,
      "learning_rate": 1.9990638467498794e-05,
      "loss": 3.0275,
      "step": 1551
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0781484842300415,
      "learning_rate": 1.9990620647286155e-05,
      "loss": 3.171,
      "step": 1552
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.027299404144287,
      "learning_rate": 1.9990602810136697e-05,
      "loss": 2.8065,
      "step": 1553
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0205978155136108,
      "learning_rate": 1.9990584956050452e-05,
      "loss": 3.0363,
      "step": 1554
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.156140923500061,
      "learning_rate": 1.999056708502746e-05,
      "loss": 3.0487,
      "step": 1555
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9676924347877502,
      "learning_rate": 1.9990549197067736e-05,
      "loss": 2.9861,
      "step": 1556
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0299737453460693,
      "learning_rate": 1.999053129217132e-05,
      "loss": 2.9311,
      "step": 1557
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9537287950515747,
      "learning_rate": 1.999051337033824e-05,
      "loss": 2.6749,
      "step": 1558
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.59342622756958,
      "learning_rate": 1.999049543156853e-05,
      "loss": 2.961,
      "step": 1559
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9674244523048401,
      "learning_rate": 1.999047747586221e-05,
      "loss": 2.828,
      "step": 1560
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0317716598510742,
      "learning_rate": 1.9990459503219322e-05,
      "loss": 2.9985,
      "step": 1561
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9662545323371887,
      "learning_rate": 1.999044151363989e-05,
      "loss": 3.1618,
      "step": 1562
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.015734076499939,
      "learning_rate": 1.999042350712395e-05,
      "loss": 2.96,
      "step": 1563
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0695455074310303,
      "learning_rate": 1.999040548367152e-05,
      "loss": 2.9971,
      "step": 1564
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0242620706558228,
      "learning_rate": 1.9990387443282647e-05,
      "loss": 2.7464,
      "step": 1565
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.0657315254211426,
      "learning_rate": 1.999036938595735e-05,
      "loss": 3.0975,
      "step": 1566
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.005984902381897,
      "learning_rate": 1.999035131169567e-05,
      "loss": 2.8832,
      "step": 1567
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9426263570785522,
      "learning_rate": 1.9990333220497626e-05,
      "loss": 3.1223,
      "step": 1568
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0425854921340942,
      "learning_rate": 1.9990315112363256e-05,
      "loss": 3.0025,
      "step": 1569
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9194478988647461,
      "learning_rate": 1.9990296987292587e-05,
      "loss": 2.9538,
      "step": 1570
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.081857681274414,
      "learning_rate": 1.999027884528565e-05,
      "loss": 2.9967,
      "step": 1571
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9984095096588135,
      "learning_rate": 1.999026068634248e-05,
      "loss": 2.7197,
      "step": 1572
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0858975648880005,
      "learning_rate": 1.9990242510463103e-05,
      "loss": 2.993,
      "step": 1573
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0020148754119873,
      "learning_rate": 1.9990224317647553e-05,
      "loss": 2.9563,
      "step": 1574
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0420459508895874,
      "learning_rate": 1.9990206107895857e-05,
      "loss": 3.0543,
      "step": 1575
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0448966026306152,
      "learning_rate": 1.9990187881208048e-05,
      "loss": 2.8517,
      "step": 1576
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0440621376037598,
      "learning_rate": 1.9990169637584162e-05,
      "loss": 3.0858,
      "step": 1577
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9865626692771912,
      "learning_rate": 1.999015137702422e-05,
      "loss": 3.2743,
      "step": 1578
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0088828802108765,
      "learning_rate": 1.9990133099528262e-05,
      "loss": 3.1232,
      "step": 1579
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1110029220581055,
      "learning_rate": 1.999011480509631e-05,
      "loss": 3.013,
      "step": 1580
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.063350796699524,
      "learning_rate": 1.9990096493728406e-05,
      "loss": 2.9887,
      "step": 1581
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0158365964889526,
      "learning_rate": 1.9990078165424572e-05,
      "loss": 3.0003,
      "step": 1582
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1126139163970947,
      "learning_rate": 1.9990059820184844e-05,
      "loss": 2.6569,
      "step": 1583
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.969230055809021,
      "learning_rate": 1.999004145800925e-05,
      "loss": 3.0837,
      "step": 1584
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0353097915649414,
      "learning_rate": 1.9990023078897822e-05,
      "loss": 2.8731,
      "step": 1585
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0660518407821655,
      "learning_rate": 1.9990004682850594e-05,
      "loss": 3.0488,
      "step": 1586
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9830960035324097,
      "learning_rate": 1.9989986269867596e-05,
      "loss": 3.1521,
      "step": 1587
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0494264364242554,
      "learning_rate": 1.998996783994885e-05,
      "loss": 2.9398,
      "step": 1588
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1614842414855957,
      "learning_rate": 1.9989949393094405e-05,
      "loss": 2.8163,
      "step": 1589
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0828558206558228,
      "learning_rate": 1.9989930929304277e-05,
      "loss": 2.8682,
      "step": 1590
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1576274633407593,
      "learning_rate": 1.9989912448578504e-05,
      "loss": 2.9543,
      "step": 1591
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9745906591415405,
      "learning_rate": 1.9989893950917114e-05,
      "loss": 3.0654,
      "step": 1592
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9639468193054199,
      "learning_rate": 1.9989875436320144e-05,
      "loss": 3.0213,
      "step": 1593
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1625548601150513,
      "learning_rate": 1.998985690478762e-05,
      "loss": 2.916,
      "step": 1594
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1518462896347046,
      "learning_rate": 1.9989838356319575e-05,
      "loss": 2.6666,
      "step": 1595
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0641223192214966,
      "learning_rate": 1.998981979091604e-05,
      "loss": 2.7293,
      "step": 1596
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0059164762496948,
      "learning_rate": 1.998980120857705e-05,
      "loss": 2.9593,
      "step": 1597
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9999648928642273,
      "learning_rate": 1.9989782609302634e-05,
      "loss": 3.1386,
      "step": 1598
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0915521383285522,
      "learning_rate": 1.998976399309282e-05,
      "loss": 3.0081,
      "step": 1599
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0253878831863403,
      "learning_rate": 1.9989745359947646e-05,
      "loss": 3.1372,
      "step": 1600
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0045734643936157,
      "learning_rate": 1.998972670986714e-05,
      "loss": 2.9428,
      "step": 1601
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1041314601898193,
      "learning_rate": 1.9989708042851334e-05,
      "loss": 3.1103,
      "step": 1602
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9969794750213623,
      "learning_rate": 1.9989689358900256e-05,
      "loss": 3.0953,
      "step": 1603
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9918074011802673,
      "learning_rate": 1.9989670658013948e-05,
      "loss": 2.87,
      "step": 1604
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.2798298597335815,
      "learning_rate": 1.998965194019243e-05,
      "loss": 3.3587,
      "step": 1605
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0166935920715332,
      "learning_rate": 1.998963320543574e-05,
      "loss": 2.9371,
      "step": 1606
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0010930299758911,
      "learning_rate": 1.998961445374391e-05,
      "loss": 2.7433,
      "step": 1607
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9698352217674255,
      "learning_rate": 1.9989595685116967e-05,
      "loss": 2.8749,
      "step": 1608
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0493309497833252,
      "learning_rate": 1.998957689955495e-05,
      "loss": 2.8023,
      "step": 1609
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1692650318145752,
      "learning_rate": 1.9989558097057884e-05,
      "loss": 2.9458,
      "step": 1610
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9446313977241516,
      "learning_rate": 1.9989539277625808e-05,
      "loss": 2.8637,
      "step": 1611
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0468286275863647,
      "learning_rate": 1.9989520441258748e-05,
      "loss": 2.877,
      "step": 1612
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9921855330467224,
      "learning_rate": 1.9989501587956734e-05,
      "loss": 2.8719,
      "step": 1613
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.008437991142273,
      "learning_rate": 1.9989482717719805e-05,
      "loss": 2.7504,
      "step": 1614
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9807834625244141,
      "learning_rate": 1.998946383054799e-05,
      "loss": 2.9642,
      "step": 1615
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9793401956558228,
      "learning_rate": 1.998944492644132e-05,
      "loss": 2.9169,
      "step": 1616
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9713717103004456,
      "learning_rate": 1.9989426005399828e-05,
      "loss": 2.6656,
      "step": 1617
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9058127403259277,
      "learning_rate": 1.9989407067423543e-05,
      "loss": 2.8525,
      "step": 1618
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.014115333557129,
      "learning_rate": 1.9989388112512504e-05,
      "loss": 3.1473,
      "step": 1619
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0799369812011719,
      "learning_rate": 1.9989369140666735e-05,
      "loss": 2.9817,
      "step": 1620
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9713369607925415,
      "learning_rate": 1.9989350151886277e-05,
      "loss": 2.6655,
      "step": 1621
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0897010564804077,
      "learning_rate": 1.9989331146171154e-05,
      "loss": 2.9551,
      "step": 1622
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9582477807998657,
      "learning_rate": 1.9989312123521403e-05,
      "loss": 2.8761,
      "step": 1623
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9819431900978088,
      "learning_rate": 1.9989293083937055e-05,
      "loss": 3.1309,
      "step": 1624
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.3448177576065063,
      "learning_rate": 1.9989274027418146e-05,
      "loss": 2.8344,
      "step": 1625
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1188671588897705,
      "learning_rate": 1.9989254953964697e-05,
      "loss": 3.0342,
      "step": 1626
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1934820413589478,
      "learning_rate": 1.998923586357675e-05,
      "loss": 3.0462,
      "step": 1627
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0185143947601318,
      "learning_rate": 1.9989216756254338e-05,
      "loss": 3.1022,
      "step": 1628
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0429104566574097,
      "learning_rate": 1.998919763199749e-05,
      "loss": 2.836,
      "step": 1629
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.068255066871643,
      "learning_rate": 1.9989178490806236e-05,
      "loss": 2.9648,
      "step": 1630
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0924183130264282,
      "learning_rate": 1.9989159332680616e-05,
      "loss": 2.9226,
      "step": 1631
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0362354516983032,
      "learning_rate": 1.9989140157620652e-05,
      "loss": 2.8899,
      "step": 1632
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1116095781326294,
      "learning_rate": 1.998912096562639e-05,
      "loss": 3.0623,
      "step": 1633
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.968896746635437,
      "learning_rate": 1.9989101756697847e-05,
      "loss": 2.9242,
      "step": 1634
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.199633240699768,
      "learning_rate": 1.9989082530835066e-05,
      "loss": 2.8462,
      "step": 1635
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0276256799697876,
      "learning_rate": 1.998906328803808e-05,
      "loss": 3.0461,
      "step": 1636
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9665101766586304,
      "learning_rate": 1.9989044028306918e-05,
      "loss": 3.0297,
      "step": 1637
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0678492784500122,
      "learning_rate": 1.998902475164161e-05,
      "loss": 2.6809,
      "step": 1638
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0045987367630005,
      "learning_rate": 1.9989005458042195e-05,
      "loss": 3.0699,
      "step": 1639
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.2441602945327759,
      "learning_rate": 1.9988986147508704e-05,
      "loss": 3.103,
      "step": 1640
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0793440341949463,
      "learning_rate": 1.9988966820041163e-05,
      "loss": 2.7293,
      "step": 1641
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0102823972702026,
      "learning_rate": 1.9988947475639615e-05,
      "loss": 2.8874,
      "step": 1642
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9709641337394714,
      "learning_rate": 1.998892811430409e-05,
      "loss": 2.9718,
      "step": 1643
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1070351600646973,
      "learning_rate": 1.9988908736034612e-05,
      "loss": 3.1974,
      "step": 1644
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9565927982330322,
      "learning_rate": 1.9988889340831226e-05,
      "loss": 3.0409,
      "step": 1645
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1902397871017456,
      "learning_rate": 1.998886992869396e-05,
      "loss": 2.8943,
      "step": 1646
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0729707479476929,
      "learning_rate": 1.9988850499622842e-05,
      "loss": 2.9603,
      "step": 1647
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0738341808319092,
      "learning_rate": 1.9988831053617913e-05,
      "loss": 2.946,
      "step": 1648
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0147743225097656,
      "learning_rate": 1.9988811590679203e-05,
      "loss": 2.9467,
      "step": 1649
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9524252414703369,
      "learning_rate": 1.998879211080674e-05,
      "loss": 2.8559,
      "step": 1650
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0377830266952515,
      "learning_rate": 1.9988772614000567e-05,
      "loss": 2.9566,
      "step": 1651
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0843021869659424,
      "learning_rate": 1.998875310026071e-05,
      "loss": 3.0009,
      "step": 1652
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0780322551727295,
      "learning_rate": 1.99887335695872e-05,
      "loss": 2.941,
      "step": 1653
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1150561571121216,
      "learning_rate": 1.9988714021980077e-05,
      "loss": 2.7518,
      "step": 1654
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9111547470092773,
      "learning_rate": 1.998869445743937e-05,
      "loss": 2.8976,
      "step": 1655
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.986237108707428,
      "learning_rate": 1.9988674875965115e-05,
      "loss": 3.015,
      "step": 1656
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9959140419960022,
      "learning_rate": 1.9988655277557343e-05,
      "loss": 2.6836,
      "step": 1657
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0415278673171997,
      "learning_rate": 1.9988635662216084e-05,
      "loss": 3.1035,
      "step": 1658
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9182656407356262,
      "learning_rate": 1.9988616029941375e-05,
      "loss": 3.1316,
      "step": 1659
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0557026863098145,
      "learning_rate": 1.9988596380733252e-05,
      "loss": 2.844,
      "step": 1660
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1776702404022217,
      "learning_rate": 1.9988576714591744e-05,
      "loss": 3.0947,
      "step": 1661
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0120285749435425,
      "learning_rate": 1.9988557031516886e-05,
      "loss": 2.9322,
      "step": 1662
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0799273252487183,
      "learning_rate": 1.9988537331508707e-05,
      "loss": 3.0346,
      "step": 1663
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.007988452911377,
      "learning_rate": 1.9988517614567252e-05,
      "loss": 2.893,
      "step": 1664
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.066148042678833,
      "learning_rate": 1.998849788069254e-05,
      "loss": 3.0479,
      "step": 1665
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0955020189285278,
      "learning_rate": 1.9988478129884615e-05,
      "loss": 3.0061,
      "step": 1666
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9257725477218628,
      "learning_rate": 1.9988458362143507e-05,
      "loss": 3.1111,
      "step": 1667
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1250783205032349,
      "learning_rate": 1.9988438577469246e-05,
      "loss": 2.7209,
      "step": 1668
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9483781456947327,
      "learning_rate": 1.998841877586187e-05,
      "loss": 3.07,
      "step": 1669
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9236438274383545,
      "learning_rate": 1.9988398957321412e-05,
      "loss": 2.905,
      "step": 1670
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9923233985900879,
      "learning_rate": 1.9988379121847904e-05,
      "loss": 3.0939,
      "step": 1671
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9214622378349304,
      "learning_rate": 1.998835926944138e-05,
      "loss": 2.87,
      "step": 1672
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.2165523767471313,
      "learning_rate": 1.998833940010188e-05,
      "loss": 2.9357,
      "step": 1673
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9360278844833374,
      "learning_rate": 1.9988319513829425e-05,
      "loss": 2.9033,
      "step": 1674
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0511852502822876,
      "learning_rate": 1.998829961062406e-05,
      "loss": 2.7326,
      "step": 1675
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1558787822723389,
      "learning_rate": 1.998827969048581e-05,
      "loss": 2.8781,
      "step": 1676
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1070870161056519,
      "learning_rate": 1.998825975341472e-05,
      "loss": 3.1402,
      "step": 1677
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.012638807296753,
      "learning_rate": 1.998823979941081e-05,
      "loss": 3.1759,
      "step": 1678
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0076100826263428,
      "learning_rate": 1.9988219828474125e-05,
      "loss": 2.9965,
      "step": 1679
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9393603801727295,
      "learning_rate": 1.9988199840604692e-05,
      "loss": 2.9053,
      "step": 1680
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.1798267364501953,
      "learning_rate": 1.998817983580255e-05,
      "loss": 2.8873,
      "step": 1681
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0401684045791626,
      "learning_rate": 1.9988159814067727e-05,
      "loss": 3.0145,
      "step": 1682
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9474437832832336,
      "learning_rate": 1.998813977540026e-05,
      "loss": 3.1645,
      "step": 1683
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0835508108139038,
      "learning_rate": 1.9988119719800187e-05,
      "loss": 2.861,
      "step": 1684
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0421242713928223,
      "learning_rate": 1.9988099647267538e-05,
      "loss": 3.0461,
      "step": 1685
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0019209384918213,
      "learning_rate": 1.9988079557802348e-05,
      "loss": 2.9462,
      "step": 1686
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0239229202270508,
      "learning_rate": 1.9988059451404648e-05,
      "loss": 2.9821,
      "step": 1687
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0334668159484863,
      "learning_rate": 1.9988039328074474e-05,
      "loss": 2.7883,
      "step": 1688
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.029262661933899,
      "learning_rate": 1.998801918781186e-05,
      "loss": 2.7966,
      "step": 1689
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0036377906799316,
      "learning_rate": 1.9987999030616846e-05,
      "loss": 2.8,
      "step": 1690
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0392072200775146,
      "learning_rate": 1.9987978856489456e-05,
      "loss": 3.0183,
      "step": 1691
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1264874935150146,
      "learning_rate": 1.998795866542973e-05,
      "loss": 2.7438,
      "step": 1692
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9262657761573792,
      "learning_rate": 1.9987938457437703e-05,
      "loss": 3.0127,
      "step": 1693
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9824953675270081,
      "learning_rate": 1.998791823251341e-05,
      "loss": 2.7855,
      "step": 1694
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0289249420166016,
      "learning_rate": 1.9987897990656878e-05,
      "loss": 3.133,
      "step": 1695
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9707925915718079,
      "learning_rate": 1.9987877731868148e-05,
      "loss": 2.9542,
      "step": 1696
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.05844247341156,
      "learning_rate": 1.998785745614725e-05,
      "loss": 2.8488,
      "step": 1697
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9762373566627502,
      "learning_rate": 1.9987837163494225e-05,
      "loss": 2.968,
      "step": 1698
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9876459240913391,
      "learning_rate": 1.9987816853909102e-05,
      "loss": 3.0226,
      "step": 1699
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9407309293746948,
      "learning_rate": 1.9987796527391916e-05,
      "loss": 3.021,
      "step": 1700
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0231382846832275,
      "learning_rate": 1.9987776183942704e-05,
      "loss": 2.9478,
      "step": 1701
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0007489919662476,
      "learning_rate": 1.9987755823561497e-05,
      "loss": 2.7931,
      "step": 1702
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0159425735473633,
      "learning_rate": 1.998773544624833e-05,
      "loss": 2.9059,
      "step": 1703
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0222448110580444,
      "learning_rate": 1.998771505200324e-05,
      "loss": 2.9098,
      "step": 1704
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0670126676559448,
      "learning_rate": 1.9987694640826265e-05,
      "loss": 3.1713,
      "step": 1705
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0491701364517212,
      "learning_rate": 1.998767421271743e-05,
      "loss": 2.9672,
      "step": 1706
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9964539408683777,
      "learning_rate": 1.9987653767676775e-05,
      "loss": 2.8988,
      "step": 1707
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.069996953010559,
      "learning_rate": 1.9987633305704333e-05,
      "loss": 2.9576,
      "step": 1708
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0091933012008667,
      "learning_rate": 1.9987612826800143e-05,
      "loss": 3.049,
      "step": 1709
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0144838094711304,
      "learning_rate": 1.9987592330964236e-05,
      "loss": 2.805,
      "step": 1710
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9789202809333801,
      "learning_rate": 1.9987571818196648e-05,
      "loss": 2.8186,
      "step": 1711
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9389736652374268,
      "learning_rate": 1.998755128849741e-05,
      "loss": 2.9717,
      "step": 1712
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0888030529022217,
      "learning_rate": 1.9987530741866563e-05,
      "loss": 2.8559,
      "step": 1713
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.092718243598938,
      "learning_rate": 1.9987510178304137e-05,
      "loss": 2.8213,
      "step": 1714
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0322345495224,
      "learning_rate": 1.998748959781017e-05,
      "loss": 2.6361,
      "step": 1715
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.122106909751892,
      "learning_rate": 1.9987469000384695e-05,
      "loss": 3.0086,
      "step": 1716
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0115687847137451,
      "learning_rate": 1.9987448386027746e-05,
      "loss": 2.6975,
      "step": 1717
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0736162662506104,
      "learning_rate": 1.9987427754739365e-05,
      "loss": 2.9162,
      "step": 1718
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.039111852645874,
      "learning_rate": 1.9987407106519574e-05,
      "loss": 2.9094,
      "step": 1719
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9555898308753967,
      "learning_rate": 1.998738644136842e-05,
      "loss": 2.9453,
      "step": 1720
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0305131673812866,
      "learning_rate": 1.998736575928593e-05,
      "loss": 2.7243,
      "step": 1721
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9768767356872559,
      "learning_rate": 1.9987345060272148e-05,
      "loss": 2.8482,
      "step": 1722
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0008686780929565,
      "learning_rate": 1.99873243443271e-05,
      "loss": 3.1998,
      "step": 1723
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0405937433242798,
      "learning_rate": 1.9987303611450824e-05,
      "loss": 2.9838,
      "step": 1724
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9888593554496765,
      "learning_rate": 1.998728286164336e-05,
      "loss": 2.7521,
      "step": 1725
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0025454759597778,
      "learning_rate": 1.9987262094904733e-05,
      "loss": 3.0013,
      "step": 1726
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1212091445922852,
      "learning_rate": 1.9987241311234985e-05,
      "loss": 3.0143,
      "step": 1727
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1134443283081055,
      "learning_rate": 1.9987220510634153e-05,
      "loss": 3.0346,
      "step": 1728
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9618282318115234,
      "learning_rate": 1.998719969310227e-05,
      "loss": 3.088,
      "step": 1729
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0387585163116455,
      "learning_rate": 1.9987178858639373e-05,
      "loss": 2.8474,
      "step": 1730
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9524897933006287,
      "learning_rate": 1.9987158007245492e-05,
      "loss": 2.886,
      "step": 1731
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0331486463546753,
      "learning_rate": 1.9987137138920666e-05,
      "loss": 3.2463,
      "step": 1732
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0011866092681885,
      "learning_rate": 1.9987116253664932e-05,
      "loss": 2.8667,
      "step": 1733
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.986656904220581,
      "learning_rate": 1.9987095351478317e-05,
      "loss": 2.853,
      "step": 1734
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1050708293914795,
      "learning_rate": 1.9987074432360872e-05,
      "loss": 2.7675,
      "step": 1735
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0812335014343262,
      "learning_rate": 1.9987053496312615e-05,
      "loss": 2.7971,
      "step": 1736
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0235542058944702,
      "learning_rate": 1.9987032543333598e-05,
      "loss": 2.8138,
      "step": 1737
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.8657870292663574,
      "learning_rate": 1.9987011573423843e-05,
      "loss": 2.9364,
      "step": 1738
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1493042707443237,
      "learning_rate": 1.9986990586583394e-05,
      "loss": 2.9843,
      "step": 1739
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9880544543266296,
      "learning_rate": 1.998696958281228e-05,
      "loss": 2.9377,
      "step": 1740
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9689700603485107,
      "learning_rate": 1.9986948562110544e-05,
      "loss": 2.9288,
      "step": 1741
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.04323410987854,
      "learning_rate": 1.9986927524478217e-05,
      "loss": 2.955,
      "step": 1742
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1981357336044312,
      "learning_rate": 1.9986906469915332e-05,
      "loss": 3.0216,
      "step": 1743
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1146622896194458,
      "learning_rate": 1.9986885398421932e-05,
      "loss": 2.8222,
      "step": 1744
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0392284393310547,
      "learning_rate": 1.9986864309998048e-05,
      "loss": 2.8602,
      "step": 1745
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9968277812004089,
      "learning_rate": 1.9986843204643716e-05,
      "loss": 2.9734,
      "step": 1746
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0628643035888672,
      "learning_rate": 1.9986822082358972e-05,
      "loss": 2.9356,
      "step": 1747
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9962918162345886,
      "learning_rate": 1.998680094314385e-05,
      "loss": 2.9884,
      "step": 1748
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9811909198760986,
      "learning_rate": 1.9986779786998395e-05,
      "loss": 2.7856,
      "step": 1749
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9644089341163635,
      "learning_rate": 1.998675861392263e-05,
      "loss": 2.8716,
      "step": 1750
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9410746097564697,
      "learning_rate": 1.9986737423916596e-05,
      "loss": 2.8143,
      "step": 1751
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9744976758956909,
      "learning_rate": 1.9986716216980334e-05,
      "loss": 2.7479,
      "step": 1752
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0303928852081299,
      "learning_rate": 1.9986694993113872e-05,
      "loss": 2.8633,
      "step": 1753
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0044775009155273,
      "learning_rate": 1.998667375231725e-05,
      "loss": 2.9272,
      "step": 1754
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.082155704498291,
      "learning_rate": 1.998665249459051e-05,
      "loss": 2.811,
      "step": 1755
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0125051736831665,
      "learning_rate": 1.998663121993367e-05,
      "loss": 2.8648,
      "step": 1756
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0039172172546387,
      "learning_rate": 1.9986609928346786e-05,
      "loss": 3.1417,
      "step": 1757
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0639907121658325,
      "learning_rate": 1.9986588619829884e-05,
      "loss": 2.8387,
      "step": 1758
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1112768650054932,
      "learning_rate": 1.9986567294383003e-05,
      "loss": 2.8405,
      "step": 1759
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9810624718666077,
      "learning_rate": 1.9986545952006176e-05,
      "loss": 2.8946,
      "step": 1760
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.97825688123703,
      "learning_rate": 1.9986524592699444e-05,
      "loss": 2.7804,
      "step": 1761
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.16802179813385,
      "learning_rate": 1.998650321646284e-05,
      "loss": 2.964,
      "step": 1762
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0690670013427734,
      "learning_rate": 1.99864818232964e-05,
      "loss": 2.8736,
      "step": 1763
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9537972211837769,
      "learning_rate": 1.998646041320016e-05,
      "loss": 2.8889,
      "step": 1764
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9925946593284607,
      "learning_rate": 1.9986438986174158e-05,
      "loss": 2.9205,
      "step": 1765
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0365509986877441,
      "learning_rate": 1.998641754221843e-05,
      "loss": 2.7318,
      "step": 1766
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0650575160980225,
      "learning_rate": 1.998639608133301e-05,
      "loss": 3.0131,
      "step": 1767
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1062090396881104,
      "learning_rate": 1.9986374603517942e-05,
      "loss": 2.9036,
      "step": 1768
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.074105143547058,
      "learning_rate": 1.998635310877325e-05,
      "loss": 2.9187,
      "step": 1769
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0695956945419312,
      "learning_rate": 1.9986331597098983e-05,
      "loss": 2.9951,
      "step": 1770
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0214109420776367,
      "learning_rate": 1.9986310068495166e-05,
      "loss": 2.9206,
      "step": 1771
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0205564498901367,
      "learning_rate": 1.9986288522961844e-05,
      "loss": 3.0212,
      "step": 1772
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.4460952281951904,
      "learning_rate": 1.998626696049905e-05,
      "loss": 2.9694,
      "step": 1773
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0561528205871582,
      "learning_rate": 1.9986245381106823e-05,
      "loss": 3.0869,
      "step": 1774
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1692811250686646,
      "learning_rate": 1.99862237847852e-05,
      "loss": 3.0351,
      "step": 1775
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0264540910720825,
      "learning_rate": 1.998620217153421e-05,
      "loss": 2.9359,
      "step": 1776
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9794336557388306,
      "learning_rate": 1.9986180541353895e-05,
      "loss": 2.8374,
      "step": 1777
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9835681915283203,
      "learning_rate": 1.9986158894244297e-05,
      "loss": 3.0859,
      "step": 1778
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0282864570617676,
      "learning_rate": 1.9986137230205445e-05,
      "loss": 3.0278,
      "step": 1779
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.028714895248413,
      "learning_rate": 1.9986115549237375e-05,
      "loss": 2.8893,
      "step": 1780
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.005068063735962,
      "learning_rate": 1.9986093851340133e-05,
      "loss": 2.7514,
      "step": 1781
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.121877908706665,
      "learning_rate": 1.9986072136513747e-05,
      "loss": 2.9619,
      "step": 1782
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0563737154006958,
      "learning_rate": 1.9986050404758256e-05,
      "loss": 2.7586,
      "step": 1783
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1080355644226074,
      "learning_rate": 1.9986028656073697e-05,
      "loss": 2.7977,
      "step": 1784
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0366721153259277,
      "learning_rate": 1.9986006890460107e-05,
      "loss": 2.9519,
      "step": 1785
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9984944462776184,
      "learning_rate": 1.9985985107917524e-05,
      "loss": 2.8673,
      "step": 1786
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0459283590316772,
      "learning_rate": 1.9985963308445987e-05,
      "loss": 3.0886,
      "step": 1787
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0687811374664307,
      "learning_rate": 1.9985941492045525e-05,
      "loss": 2.9231,
      "step": 1788
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9685364365577698,
      "learning_rate": 1.998591965871618e-05,
      "loss": 2.8531,
      "step": 1789
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1976678371429443,
      "learning_rate": 1.9985897808457992e-05,
      "loss": 2.9801,
      "step": 1790
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0718753337860107,
      "learning_rate": 1.9985875941270993e-05,
      "loss": 2.9504,
      "step": 1791
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.4615976810455322,
      "learning_rate": 1.9985854057155224e-05,
      "loss": 2.8937,
      "step": 1792
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9372032284736633,
      "learning_rate": 1.9985832156110722e-05,
      "loss": 2.6983,
      "step": 1793
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9210360050201416,
      "learning_rate": 1.9985810238137518e-05,
      "loss": 2.7536,
      "step": 1794
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0825679302215576,
      "learning_rate": 1.9985788303235655e-05,
      "loss": 2.7683,
      "step": 1795
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0549519062042236,
      "learning_rate": 1.9985766351405172e-05,
      "loss": 2.9239,
      "step": 1796
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1067198514938354,
      "learning_rate": 1.9985744382646098e-05,
      "loss": 3.003,
      "step": 1797
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0294482707977295,
      "learning_rate": 1.9985722396958478e-05,
      "loss": 2.9338,
      "step": 1798
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0687947273254395,
      "learning_rate": 1.9985700394342346e-05,
      "loss": 3.0897,
      "step": 1799
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.070345401763916,
      "learning_rate": 1.9985678374797738e-05,
      "loss": 2.7875,
      "step": 1800
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0608974695205688,
      "learning_rate": 1.9985656338324695e-05,
      "loss": 3.0761,
      "step": 1801
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0143009424209595,
      "learning_rate": 1.9985634284923252e-05,
      "loss": 2.9542,
      "step": 1802
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.929686427116394,
      "learning_rate": 1.998561221459345e-05,
      "loss": 3.0081,
      "step": 1803
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0273525714874268,
      "learning_rate": 1.998559012733532e-05,
      "loss": 2.8308,
      "step": 1804
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9447320103645325,
      "learning_rate": 1.9985568023148902e-05,
      "loss": 2.7494,
      "step": 1805
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0971837043762207,
      "learning_rate": 1.9985545902034236e-05,
      "loss": 2.8287,
      "step": 1806
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9400066137313843,
      "learning_rate": 1.9985523763991355e-05,
      "loss": 2.868,
      "step": 1807
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0983777046203613,
      "learning_rate": 1.99855016090203e-05,
      "loss": 3.3149,
      "step": 1808
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0123802423477173,
      "learning_rate": 1.998547943712111e-05,
      "loss": 2.8457,
      "step": 1809
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.997053861618042,
      "learning_rate": 1.9985457248293817e-05,
      "loss": 3.0025,
      "step": 1810
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9998201131820679,
      "learning_rate": 1.9985435042538465e-05,
      "loss": 2.8124,
      "step": 1811
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9751814603805542,
      "learning_rate": 1.998541281985509e-05,
      "loss": 2.9614,
      "step": 1812
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0778366327285767,
      "learning_rate": 1.998539058024372e-05,
      "loss": 3.0759,
      "step": 1813
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9420864582061768,
      "learning_rate": 1.998536832370441e-05,
      "loss": 2.9271,
      "step": 1814
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.117408037185669,
      "learning_rate": 1.9985346050237188e-05,
      "loss": 2.9157,
      "step": 1815
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0166270732879639,
      "learning_rate": 1.998532375984209e-05,
      "loss": 2.8752,
      "step": 1816
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0585155487060547,
      "learning_rate": 1.9985301452519152e-05,
      "loss": 3.065,
      "step": 1817
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9767308235168457,
      "learning_rate": 1.998527912826842e-05,
      "loss": 3.0259,
      "step": 1818
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0259095430374146,
      "learning_rate": 1.998525678708993e-05,
      "loss": 2.9166,
      "step": 1819
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1486046314239502,
      "learning_rate": 1.9985234428983715e-05,
      "loss": 2.8798,
      "step": 1820
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1439656019210815,
      "learning_rate": 1.9985212053949818e-05,
      "loss": 2.7516,
      "step": 1821
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9176898002624512,
      "learning_rate": 1.9985189661988272e-05,
      "loss": 2.7636,
      "step": 1822
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9475244283676147,
      "learning_rate": 1.998516725309912e-05,
      "loss": 2.7886,
      "step": 1823
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9346799254417419,
      "learning_rate": 1.9985144827282395e-05,
      "loss": 2.8205,
      "step": 1824
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.3425471782684326,
      "learning_rate": 1.998512238453814e-05,
      "loss": 2.7915,
      "step": 1825
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9303944706916809,
      "learning_rate": 1.9985099924866388e-05,
      "loss": 3.0511,
      "step": 1826
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1847325563430786,
      "learning_rate": 1.9985077448267183e-05,
      "loss": 2.952,
      "step": 1827
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0044872760772705,
      "learning_rate": 1.9985054954740556e-05,
      "loss": 2.825,
      "step": 1828
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0768736600875854,
      "learning_rate": 1.9985032444286552e-05,
      "loss": 3.0866,
      "step": 1829
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9713953137397766,
      "learning_rate": 1.9985009916905204e-05,
      "loss": 2.9933,
      "step": 1830
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0220239162445068,
      "learning_rate": 1.9984987372596553e-05,
      "loss": 3.1381,
      "step": 1831
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9594956040382385,
      "learning_rate": 1.998496481136064e-05,
      "loss": 2.867,
      "step": 1832
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.092461347579956,
      "learning_rate": 1.9984942233197495e-05,
      "loss": 3.0236,
      "step": 1833
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9834431409835815,
      "learning_rate": 1.9984919638107163e-05,
      "loss": 2.9523,
      "step": 1834
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9840819835662842,
      "learning_rate": 1.9984897026089683e-05,
      "loss": 2.943,
      "step": 1835
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0270882844924927,
      "learning_rate": 1.9984874397145083e-05,
      "loss": 2.8148,
      "step": 1836
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.942054033279419,
      "learning_rate": 1.9984851751273418e-05,
      "loss": 3.0547,
      "step": 1837
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0934308767318726,
      "learning_rate": 1.9984829088474714e-05,
      "loss": 2.9142,
      "step": 1838
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9774618148803711,
      "learning_rate": 1.9984806408749012e-05,
      "loss": 2.9461,
      "step": 1839
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0291218757629395,
      "learning_rate": 1.998478371209635e-05,
      "loss": 3.0363,
      "step": 1840
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0568186044692993,
      "learning_rate": 1.998476099851677e-05,
      "loss": 2.9461,
      "step": 1841
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.047060489654541,
      "learning_rate": 1.9984738268010307e-05,
      "loss": 3.1488,
      "step": 1842
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0671683549880981,
      "learning_rate": 1.9984715520577e-05,
      "loss": 3.101,
      "step": 1843
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1054257154464722,
      "learning_rate": 1.998469275621689e-05,
      "loss": 2.9074,
      "step": 1844
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0052385330200195,
      "learning_rate": 1.9984669974930013e-05,
      "loss": 2.7572,
      "step": 1845
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.133905291557312,
      "learning_rate": 1.998464717671641e-05,
      "loss": 2.9605,
      "step": 1846
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0218777656555176,
      "learning_rate": 1.9984624361576116e-05,
      "loss": 3.2092,
      "step": 1847
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9820497632026672,
      "learning_rate": 1.9984601529509177e-05,
      "loss": 2.8594,
      "step": 1848
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.000610113143921,
      "learning_rate": 1.998457868051562e-05,
      "loss": 3.144,
      "step": 1849
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1262279748916626,
      "learning_rate": 1.9984555814595494e-05,
      "loss": 3.0347,
      "step": 1850
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9625828266143799,
      "learning_rate": 1.9984532931748835e-05,
      "loss": 2.9445,
      "step": 1851
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0941890478134155,
      "learning_rate": 1.998451003197568e-05,
      "loss": 2.8175,
      "step": 1852
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9548028111457825,
      "learning_rate": 1.9984487115276064e-05,
      "loss": 2.8445,
      "step": 1853
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0796765089035034,
      "learning_rate": 1.9984464181650037e-05,
      "loss": 2.8806,
      "step": 1854
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0371925830841064,
      "learning_rate": 1.9984441231097632e-05,
      "loss": 2.7722,
      "step": 1855
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0860975980758667,
      "learning_rate": 1.998441826361888e-05,
      "loss": 2.7075,
      "step": 1856
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0395047664642334,
      "learning_rate": 1.9984395279213834e-05,
      "loss": 2.9728,
      "step": 1857
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1119697093963623,
      "learning_rate": 1.9984372277882526e-05,
      "loss": 2.9668,
      "step": 1858
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.125745177268982,
      "learning_rate": 1.9984349259624992e-05,
      "loss": 2.8313,
      "step": 1859
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9752556681632996,
      "learning_rate": 1.9984326224441277e-05,
      "loss": 2.8028,
      "step": 1860
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.023361086845398,
      "learning_rate": 1.9984303172331417e-05,
      "loss": 2.7662,
      "step": 1861
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0162461996078491,
      "learning_rate": 1.9984280103295447e-05,
      "loss": 3.0684,
      "step": 1862
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0011215209960938,
      "learning_rate": 1.9984257017333415e-05,
      "loss": 2.8558,
      "step": 1863
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.2256906032562256,
      "learning_rate": 1.9984233914445353e-05,
      "loss": 3.0945,
      "step": 1864
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0398650169372559,
      "learning_rate": 1.9984210794631306e-05,
      "loss": 3.0164,
      "step": 1865
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9272610545158386,
      "learning_rate": 1.998418765789131e-05,
      "loss": 2.8169,
      "step": 1866
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0661580562591553,
      "learning_rate": 1.9984164504225404e-05,
      "loss": 2.8662,
      "step": 1867
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9693477153778076,
      "learning_rate": 1.9984141333633623e-05,
      "loss": 2.849,
      "step": 1868
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0136882066726685,
      "learning_rate": 1.9984118146116016e-05,
      "loss": 2.8373,
      "step": 1869
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9399505853652954,
      "learning_rate": 1.9984094941672617e-05,
      "loss": 2.7544,
      "step": 1870
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0679644346237183,
      "learning_rate": 1.998407172030346e-05,
      "loss": 2.8411,
      "step": 1871
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.94244784116745,
      "learning_rate": 1.9984048482008596e-05,
      "loss": 2.9059,
      "step": 1872
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0772969722747803,
      "learning_rate": 1.9984025226788056e-05,
      "loss": 3.0358,
      "step": 1873
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0415219068527222,
      "learning_rate": 1.998400195464188e-05,
      "loss": 2.9534,
      "step": 1874
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.146470546722412,
      "learning_rate": 1.998397866557011e-05,
      "loss": 2.8116,
      "step": 1875
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0238200426101685,
      "learning_rate": 1.9983955359572785e-05,
      "loss": 2.839,
      "step": 1876
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0096580982208252,
      "learning_rate": 1.9983932036649944e-05,
      "loss": 2.9996,
      "step": 1877
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0418331623077393,
      "learning_rate": 1.9983908696801627e-05,
      "loss": 2.787,
      "step": 1878
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0827943086624146,
      "learning_rate": 1.9983885340027874e-05,
      "loss": 3.1271,
      "step": 1879
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0118587017059326,
      "learning_rate": 1.9983861966328722e-05,
      "loss": 3.0647,
      "step": 1880
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0706403255462646,
      "learning_rate": 1.9983838575704212e-05,
      "loss": 2.898,
      "step": 1881
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9555357098579407,
      "learning_rate": 1.9983815168154384e-05,
      "loss": 2.832,
      "step": 1882
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9529595375061035,
      "learning_rate": 1.998379174367928e-05,
      "loss": 2.7558,
      "step": 1883
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9351635575294495,
      "learning_rate": 1.9983768302278936e-05,
      "loss": 2.9713,
      "step": 1884
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9403401613235474,
      "learning_rate": 1.9983744843953393e-05,
      "loss": 2.9422,
      "step": 1885
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9994440078735352,
      "learning_rate": 1.9983721368702692e-05,
      "loss": 2.8837,
      "step": 1886
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.107026219367981,
      "learning_rate": 1.998369787652687e-05,
      "loss": 2.7158,
      "step": 1887
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0840750932693481,
      "learning_rate": 1.998367436742597e-05,
      "loss": 2.6872,
      "step": 1888
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0529887676239014,
      "learning_rate": 1.998365084140003e-05,
      "loss": 3.1066,
      "step": 1889
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0877759456634521,
      "learning_rate": 1.9983627298449088e-05,
      "loss": 2.7831,
      "step": 1890
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0344139337539673,
      "learning_rate": 1.9983603738573188e-05,
      "loss": 2.9453,
      "step": 1891
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1109434366226196,
      "learning_rate": 1.998358016177237e-05,
      "loss": 2.7154,
      "step": 1892
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9069176316261292,
      "learning_rate": 1.998355656804667e-05,
      "loss": 2.8529,
      "step": 1893
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1401395797729492,
      "learning_rate": 1.9983532957396133e-05,
      "loss": 2.9468,
      "step": 1894
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.093629002571106,
      "learning_rate": 1.9983509329820793e-05,
      "loss": 3.1468,
      "step": 1895
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0206893682479858,
      "learning_rate": 1.9983485685320695e-05,
      "loss": 2.9857,
      "step": 1896
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.067988634109497,
      "learning_rate": 1.9983462023895876e-05,
      "loss": 2.7246,
      "step": 1897
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.067416787147522,
      "learning_rate": 1.998343834554638e-05,
      "loss": 2.9781,
      "step": 1898
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0810089111328125,
      "learning_rate": 1.998341465027224e-05,
      "loss": 2.9084,
      "step": 1899
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1570712327957153,
      "learning_rate": 1.9983390938073504e-05,
      "loss": 2.8656,
      "step": 1900
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.019092082977295,
      "learning_rate": 1.998336720895021e-05,
      "loss": 2.8774,
      "step": 1901
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0133270025253296,
      "learning_rate": 1.998334346290239e-05,
      "loss": 2.9461,
      "step": 1902
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0536844730377197,
      "learning_rate": 1.9983319699930098e-05,
      "loss": 3.0029,
      "step": 1903
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9937374591827393,
      "learning_rate": 1.998329592003337e-05,
      "loss": 2.8279,
      "step": 1904
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0736744403839111,
      "learning_rate": 1.9983272123212237e-05,
      "loss": 2.9406,
      "step": 1905
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9847787022590637,
      "learning_rate": 1.998324830946675e-05,
      "loss": 2.9041,
      "step": 1906
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0213335752487183,
      "learning_rate": 1.9983224478796945e-05,
      "loss": 2.8801,
      "step": 1907
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9721880555152893,
      "learning_rate": 1.9983200631202863e-05,
      "loss": 2.9415,
      "step": 1908
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0998976230621338,
      "learning_rate": 1.9983176766684543e-05,
      "loss": 2.9357,
      "step": 1909
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0434788465499878,
      "learning_rate": 1.998315288524203e-05,
      "loss": 2.8946,
      "step": 1910
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9187115430831909,
      "learning_rate": 1.998312898687536e-05,
      "loss": 2.8936,
      "step": 1911
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0606396198272705,
      "learning_rate": 1.9983105071584576e-05,
      "loss": 2.8151,
      "step": 1912
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.063614845275879,
      "learning_rate": 1.9983081139369717e-05,
      "loss": 2.813,
      "step": 1913
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9741641283035278,
      "learning_rate": 1.9983057190230824e-05,
      "loss": 3.0957,
      "step": 1914
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9779198169708252,
      "learning_rate": 1.998303322416794e-05,
      "loss": 2.836,
      "step": 1915
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.044477939605713,
      "learning_rate": 1.9983009241181098e-05,
      "loss": 2.8797,
      "step": 1916
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.025541067123413,
      "learning_rate": 1.998298524127035e-05,
      "loss": 2.8383,
      "step": 1917
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.987298846244812,
      "learning_rate": 1.9982961224435726e-05,
      "loss": 2.8661,
      "step": 1918
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9883530735969543,
      "learning_rate": 1.9982937190677273e-05,
      "loss": 2.9223,
      "step": 1919
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0423129796981812,
      "learning_rate": 1.998291313999503e-05,
      "loss": 3.0846,
      "step": 1920
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9641650319099426,
      "learning_rate": 1.9982889072389036e-05,
      "loss": 2.9066,
      "step": 1921
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9844750761985779,
      "learning_rate": 1.9982864987859337e-05,
      "loss": 2.9573,
      "step": 1922
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9988359808921814,
      "learning_rate": 1.998284088640597e-05,
      "loss": 2.911,
      "step": 1923
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.060604453086853,
      "learning_rate": 1.9982816768028973e-05,
      "loss": 2.8039,
      "step": 1924
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.128258228302002,
      "learning_rate": 1.9982792632728393e-05,
      "loss": 2.8528,
      "step": 1925
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.1413930654525757,
      "learning_rate": 1.998276848050427e-05,
      "loss": 3.0571,
      "step": 1926
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.16664457321167,
      "learning_rate": 1.998274431135664e-05,
      "loss": 2.6728,
      "step": 1927
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9761835336685181,
      "learning_rate": 1.998272012528555e-05,
      "loss": 2.9802,
      "step": 1928
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0990104675292969,
      "learning_rate": 1.998269592229104e-05,
      "loss": 3.0655,
      "step": 1929
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9234775304794312,
      "learning_rate": 1.9982671702373145e-05,
      "loss": 2.8992,
      "step": 1930
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0576839447021484,
      "learning_rate": 1.998264746553191e-05,
      "loss": 2.8797,
      "step": 1931
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.1044082641601562,
      "learning_rate": 1.9982623211767375e-05,
      "loss": 2.8298,
      "step": 1932
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.1091703176498413,
      "learning_rate": 1.9982598941079587e-05,
      "loss": 2.7162,
      "step": 1933
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.1067947149276733,
      "learning_rate": 1.9982574653468578e-05,
      "loss": 2.9503,
      "step": 1934
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9934644103050232,
      "learning_rate": 1.9982550348934397e-05,
      "loss": 2.8983,
      "step": 1935
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.4506807327270508,
      "learning_rate": 1.998252602747708e-05,
      "loss": 2.9548,
      "step": 1936
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.045348048210144,
      "learning_rate": 1.998250168909667e-05,
      "loss": 2.9118,
      "step": 1937
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0102347135543823,
      "learning_rate": 1.9982477333793212e-05,
      "loss": 3.2044,
      "step": 1938
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0546034574508667,
      "learning_rate": 1.998245296156674e-05,
      "loss": 3.1425,
      "step": 1939
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0548526048660278,
      "learning_rate": 1.99824285724173e-05,
      "loss": 2.9654,
      "step": 1940
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.149509072303772,
      "learning_rate": 1.9982404166344933e-05,
      "loss": 2.8903,
      "step": 1941
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.073099970817566,
      "learning_rate": 1.998237974334968e-05,
      "loss": 3.0636,
      "step": 1942
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9469038248062134,
      "learning_rate": 1.998235530343158e-05,
      "loss": 3.129,
      "step": 1943
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9427287578582764,
      "learning_rate": 1.998233084659068e-05,
      "loss": 2.9081,
      "step": 1944
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9665023684501648,
      "learning_rate": 1.9982306372827013e-05,
      "loss": 2.9903,
      "step": 1945
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9799749255180359,
      "learning_rate": 1.998228188214063e-05,
      "loss": 2.8198,
      "step": 1946
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0488189458847046,
      "learning_rate": 1.9982257374531565e-05,
      "loss": 2.6448,
      "step": 1947
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.1068613529205322,
      "learning_rate": 1.9982232849999867e-05,
      "loss": 2.7377,
      "step": 1948
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0905272960662842,
      "learning_rate": 1.998220830854557e-05,
      "loss": 3.146,
      "step": 1949
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0343337059020996,
      "learning_rate": 1.998218375016872e-05,
      "loss": 2.6552,
      "step": 1950
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9257133603096008,
      "learning_rate": 1.9982159174869357e-05,
      "loss": 2.6379,
      "step": 1951
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0310965776443481,
      "learning_rate": 1.998213458264752e-05,
      "loss": 2.8492,
      "step": 1952
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0023088455200195,
      "learning_rate": 1.9982109973503258e-05,
      "loss": 2.8657,
      "step": 1953
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0539220571517944,
      "learning_rate": 1.9982085347436606e-05,
      "loss": 3.084,
      "step": 1954
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9833760857582092,
      "learning_rate": 1.998206070444761e-05,
      "loss": 2.7077,
      "step": 1955
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0477573871612549,
      "learning_rate": 1.998203604453631e-05,
      "loss": 2.8281,
      "step": 1956
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0778436660766602,
      "learning_rate": 1.9982011367702748e-05,
      "loss": 2.908,
      "step": 1957
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.1292306184768677,
      "learning_rate": 1.9981986673946966e-05,
      "loss": 2.9973,
      "step": 1958
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9323295950889587,
      "learning_rate": 1.9981961963269005e-05,
      "loss": 2.8536,
      "step": 1959
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.8900657296180725,
      "learning_rate": 1.9981937235668903e-05,
      "loss": 2.7309,
      "step": 1960
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0129867792129517,
      "learning_rate": 1.998191249114671e-05,
      "loss": 2.823,
      "step": 1961
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.8991156816482544,
      "learning_rate": 1.9981887729702466e-05,
      "loss": 3.0536,
      "step": 1962
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.163831353187561,
      "learning_rate": 1.9981862951336207e-05,
      "loss": 2.9571,
      "step": 1963
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.8965288996696472,
      "learning_rate": 1.9981838156047985e-05,
      "loss": 2.8431,
      "step": 1964
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0167220830917358,
      "learning_rate": 1.9981813343837834e-05,
      "loss": 2.9488,
      "step": 1965
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9630857706069946,
      "learning_rate": 1.9981788514705794e-05,
      "loss": 3.0253,
      "step": 1966
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.1257743835449219,
      "learning_rate": 1.9981763668651916e-05,
      "loss": 3.0232,
      "step": 1967
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0626713037490845,
      "learning_rate": 1.9981738805676238e-05,
      "loss": 2.9633,
      "step": 1968
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9920055270195007,
      "learning_rate": 1.99817139257788e-05,
      "loss": 2.71,
      "step": 1969
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0854438543319702,
      "learning_rate": 1.9981689028959645e-05,
      "loss": 2.8035,
      "step": 1970
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9585039019584656,
      "learning_rate": 1.9981664115218816e-05,
      "loss": 2.6928,
      "step": 1971
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9858331084251404,
      "learning_rate": 1.9981639184556355e-05,
      "loss": 2.8738,
      "step": 1972
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0115420818328857,
      "learning_rate": 1.9981614236972307e-05,
      "loss": 2.9739,
      "step": 1973
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9991152882575989,
      "learning_rate": 1.9981589272466713e-05,
      "loss": 2.644,
      "step": 1974
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9104332327842712,
      "learning_rate": 1.9981564291039613e-05,
      "loss": 2.9061,
      "step": 1975
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9125204682350159,
      "learning_rate": 1.9981539292691047e-05,
      "loss": 2.8357,
      "step": 1976
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.00594162940979,
      "learning_rate": 1.9981514277421064e-05,
      "loss": 2.753,
      "step": 1977
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9664433002471924,
      "learning_rate": 1.9981489245229704e-05,
      "loss": 2.867,
      "step": 1978
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0426387786865234,
      "learning_rate": 1.9981464196117008e-05,
      "loss": 2.8497,
      "step": 1979
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0652413368225098,
      "learning_rate": 1.998143913008302e-05,
      "loss": 2.751,
      "step": 1980
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0541883707046509,
      "learning_rate": 1.998141404712778e-05,
      "loss": 2.6895,
      "step": 1981
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0055936574935913,
      "learning_rate": 1.9981388947251333e-05,
      "loss": 2.9105,
      "step": 1982
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9934613704681396,
      "learning_rate": 1.998136383045372e-05,
      "loss": 2.9415,
      "step": 1983
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0559240579605103,
      "learning_rate": 1.998133869673499e-05,
      "loss": 2.9475,
      "step": 1984
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0972341299057007,
      "learning_rate": 1.9981313546095172e-05,
      "loss": 2.9057,
      "step": 1985
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9837170243263245,
      "learning_rate": 1.998128837853432e-05,
      "loss": 2.896,
      "step": 1986
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0281052589416504,
      "learning_rate": 1.9981263194052473e-05,
      "loss": 2.9443,
      "step": 1987
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9556578993797302,
      "learning_rate": 1.9981237992649677e-05,
      "loss": 2.9287,
      "step": 1988
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0994638204574585,
      "learning_rate": 1.9981212774325967e-05,
      "loss": 3.2253,
      "step": 1989
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0042767524719238,
      "learning_rate": 1.9981187539081396e-05,
      "loss": 2.9998,
      "step": 1990
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0051500797271729,
      "learning_rate": 1.9981162286915996e-05,
      "loss": 2.8865,
      "step": 1991
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9041386246681213,
      "learning_rate": 1.998113701782982e-05,
      "loss": 3.0546,
      "step": 1992
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0748761892318726,
      "learning_rate": 1.99811117318229e-05,
      "loss": 3.1786,
      "step": 1993
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0730150938034058,
      "learning_rate": 1.9981086428895288e-05,
      "loss": 2.9411,
      "step": 1994
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.3910503387451172,
      "learning_rate": 1.9981061109047022e-05,
      "loss": 2.7434,
      "step": 1995
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0670732259750366,
      "learning_rate": 1.998103577227815e-05,
      "loss": 2.79,
      "step": 1996
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.057020902633667,
      "learning_rate": 1.998101041858871e-05,
      "loss": 2.7697,
      "step": 1997
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.2160941362380981,
      "learning_rate": 1.9980985047978744e-05,
      "loss": 2.826,
      "step": 1998
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9675873517990112,
      "learning_rate": 1.99809596604483e-05,
      "loss": 2.9833,
      "step": 1999
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.095987319946289,
      "learning_rate": 1.9980934255997416e-05,
      "loss": 2.9625,
      "step": 2000
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0567353963851929,
      "learning_rate": 1.9980908834626143e-05,
      "loss": 2.9786,
      "step": 2001
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.1017314195632935,
      "learning_rate": 1.9980883396334513e-05,
      "loss": 2.9836,
      "step": 2002
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9392622709274292,
      "learning_rate": 1.998085794112258e-05,
      "loss": 2.9523,
      "step": 2003
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9937704801559448,
      "learning_rate": 1.998083246899038e-05,
      "loss": 2.8859,
      "step": 2004
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0108773708343506,
      "learning_rate": 1.9980806979937956e-05,
      "loss": 2.8236,
      "step": 2005
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.1869531869888306,
      "learning_rate": 1.9980781473965358e-05,
      "loss": 2.8671,
      "step": 2006
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9586384296417236,
      "learning_rate": 1.998075595107262e-05,
      "loss": 2.9068,
      "step": 2007
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.999547004699707,
      "learning_rate": 1.998073041125979e-05,
      "loss": 2.9163,
      "step": 2008
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9341563582420349,
      "learning_rate": 1.9980704854526913e-05,
      "loss": 3.144,
      "step": 2009
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0081117153167725,
      "learning_rate": 1.998067928087403e-05,
      "loss": 3.0375,
      "step": 2010
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.012598991394043,
      "learning_rate": 1.9980653690301187e-05,
      "loss": 3.0829,
      "step": 2011
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.4891033172607422,
      "learning_rate": 1.9980628082808424e-05,
      "loss": 2.8786,
      "step": 2012
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9556730389595032,
      "learning_rate": 1.9980602458395785e-05,
      "loss": 2.8757,
      "step": 2013
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9581902027130127,
      "learning_rate": 1.9980576817063313e-05,
      "loss": 2.8112,
      "step": 2014
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.1368602514266968,
      "learning_rate": 1.9980551158811057e-05,
      "loss": 2.7629,
      "step": 2015
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9764521718025208,
      "learning_rate": 1.998052548363905e-05,
      "loss": 3.0566,
      "step": 2016
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9483747482299805,
      "learning_rate": 1.9980499791547346e-05,
      "loss": 2.9187,
      "step": 2017
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9556835293769836,
      "learning_rate": 1.9980474082535983e-05,
      "loss": 2.9485,
      "step": 2018
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0114457607269287,
      "learning_rate": 1.998044835660501e-05,
      "loss": 2.8438,
      "step": 2019
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0528755187988281,
      "learning_rate": 1.998042261375446e-05,
      "loss": 2.7923,
      "step": 2020
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9835565090179443,
      "learning_rate": 1.9980396853984386e-05,
      "loss": 2.8677,
      "step": 2021
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9449933767318726,
      "learning_rate": 1.9980371077294826e-05,
      "loss": 2.8775,
      "step": 2022
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.078683614730835,
      "learning_rate": 1.998034528368583e-05,
      "loss": 3.0131,
      "step": 2023
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0334770679473877,
      "learning_rate": 1.9980319473157434e-05,
      "loss": 2.6606,
      "step": 2024
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9826579093933105,
      "learning_rate": 1.998029364570969e-05,
      "loss": 2.8065,
      "step": 2025
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0274217128753662,
      "learning_rate": 1.9980267801342634e-05,
      "loss": 2.9313,
      "step": 2026
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0052298307418823,
      "learning_rate": 1.998024194005632e-05,
      "loss": 2.967,
      "step": 2027
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0140581130981445,
      "learning_rate": 1.9980216061850775e-05,
      "loss": 2.6594,
      "step": 2028
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0189317464828491,
      "learning_rate": 1.998019016672606e-05,
      "loss": 2.84,
      "step": 2029
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0026615858078003,
      "learning_rate": 1.998016425468221e-05,
      "loss": 2.7394,
      "step": 2030
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9544400572776794,
      "learning_rate": 1.9980138325719275e-05,
      "loss": 2.8459,
      "step": 2031
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.98426353931427,
      "learning_rate": 1.998011237983729e-05,
      "loss": 2.8905,
      "step": 2032
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0904005765914917,
      "learning_rate": 1.998008641703631e-05,
      "loss": 3.0723,
      "step": 2033
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9953451156616211,
      "learning_rate": 1.9980060437316368e-05,
      "loss": 2.8406,
      "step": 2034
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9384192824363708,
      "learning_rate": 1.9980034440677512e-05,
      "loss": 2.7123,
      "step": 2035
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0561256408691406,
      "learning_rate": 1.998000842711979e-05,
      "loss": 2.789,
      "step": 2036
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.126723051071167,
      "learning_rate": 1.997998239664324e-05,
      "loss": 3.0235,
      "step": 2037
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0968424081802368,
      "learning_rate": 1.9979956349247915e-05,
      "loss": 2.9419,
      "step": 2038
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9408855438232422,
      "learning_rate": 1.9979930284933847e-05,
      "loss": 2.7094,
      "step": 2039
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9678168892860413,
      "learning_rate": 1.997990420370109e-05,
      "loss": 2.8518,
      "step": 2040
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9344573616981506,
      "learning_rate": 1.9979878105549685e-05,
      "loss": 2.779,
      "step": 2041
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.965872049331665,
      "learning_rate": 1.9979851990479675e-05,
      "loss": 2.8291,
      "step": 2042
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.077995777130127,
      "learning_rate": 1.9979825858491107e-05,
      "loss": 2.9821,
      "step": 2043
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0206732749938965,
      "learning_rate": 1.9979799709584022e-05,
      "loss": 2.9241,
      "step": 2044
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9140269756317139,
      "learning_rate": 1.9979773543758466e-05,
      "loss": 2.7351,
      "step": 2045
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0467338562011719,
      "learning_rate": 1.9979747361014485e-05,
      "loss": 2.8951,
      "step": 2046
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9822001457214355,
      "learning_rate": 1.997972116135212e-05,
      "loss": 2.8445,
      "step": 2047
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.98438560962677,
      "learning_rate": 1.997969494477142e-05,
      "loss": 3.1096,
      "step": 2048
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.106032371520996,
      "learning_rate": 1.9979668711272424e-05,
      "loss": 3.0005,
      "step": 2049
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9928110837936401,
      "learning_rate": 1.997964246085518e-05,
      "loss": 2.8879,
      "step": 2050
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.1286684274673462,
      "learning_rate": 1.997961619351973e-05,
      "loss": 2.8678,
      "step": 2051
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0689384937286377,
      "learning_rate": 1.9979589909266122e-05,
      "loss": 2.8239,
      "step": 2052
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9518784880638123,
      "learning_rate": 1.9979563608094395e-05,
      "loss": 2.728,
      "step": 2053
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9958990812301636,
      "learning_rate": 1.9979537290004605e-05,
      "loss": 3.0969,
      "step": 2054
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0414983034133911,
      "learning_rate": 1.997951095499678e-05,
      "loss": 2.9258,
      "step": 2055
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9864273071289062,
      "learning_rate": 1.997948460307098e-05,
      "loss": 3.0842,
      "step": 2056
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0126900672912598,
      "learning_rate": 1.9979458234227242e-05,
      "loss": 3.0851,
      "step": 2057
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0152037143707275,
      "learning_rate": 1.997943184846561e-05,
      "loss": 2.9361,
      "step": 2058
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.057909369468689,
      "learning_rate": 1.9979405445786133e-05,
      "loss": 2.7784,
      "step": 2059
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9858989119529724,
      "learning_rate": 1.997937902618885e-05,
      "loss": 3.0621,
      "step": 2060
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0096722841262817,
      "learning_rate": 1.9979352589673813e-05,
      "loss": 3.2447,
      "step": 2061
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9920147657394409,
      "learning_rate": 1.997932613624106e-05,
      "loss": 2.9089,
      "step": 2062
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.1805788278579712,
      "learning_rate": 1.9979299665890643e-05,
      "loss": 2.8286,
      "step": 2063
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0507726669311523,
      "learning_rate": 1.9979273178622596e-05,
      "loss": 2.8431,
      "step": 2064
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9449031949043274,
      "learning_rate": 1.9979246674436976e-05,
      "loss": 3.0067,
      "step": 2065
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9606695175170898,
      "learning_rate": 1.997922015333382e-05,
      "loss": 2.8304,
      "step": 2066
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9626725316047668,
      "learning_rate": 1.9979193615313177e-05,
      "loss": 2.9237,
      "step": 2067
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.960629940032959,
      "learning_rate": 1.9979167060375092e-05,
      "loss": 2.8738,
      "step": 2068
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9848026633262634,
      "learning_rate": 1.9979140488519607e-05,
      "loss": 3.0375,
      "step": 2069
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9935755133628845,
      "learning_rate": 1.9979113899746764e-05,
      "loss": 3.1129,
      "step": 2070
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.984740674495697,
      "learning_rate": 1.997908729405662e-05,
      "loss": 2.9108,
      "step": 2071
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9603685140609741,
      "learning_rate": 1.9979060671449208e-05,
      "loss": 2.9515,
      "step": 2072
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9824780821800232,
      "learning_rate": 1.9979034031924576e-05,
      "loss": 2.8097,
      "step": 2073
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.985145628452301,
      "learning_rate": 1.997900737548278e-05,
      "loss": 2.9031,
      "step": 2074
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0413662195205688,
      "learning_rate": 1.9978980702123847e-05,
      "loss": 3.002,
      "step": 2075
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0138344764709473,
      "learning_rate": 1.9978954011847837e-05,
      "loss": 2.9614,
      "step": 2076
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9402489066123962,
      "learning_rate": 1.9978927304654786e-05,
      "loss": 3.0733,
      "step": 2077
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.00937020778656,
      "learning_rate": 1.9978900580544747e-05,
      "loss": 2.8217,
      "step": 2078
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.3008396625518799,
      "learning_rate": 1.9978873839517758e-05,
      "loss": 2.7025,
      "step": 2079
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.1514607667922974,
      "learning_rate": 1.9978847081573868e-05,
      "loss": 2.8843,
      "step": 2080
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9035728573799133,
      "learning_rate": 1.9978820306713124e-05,
      "loss": 3.0267,
      "step": 2081
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9685350656509399,
      "learning_rate": 1.9978793514935567e-05,
      "loss": 2.9162,
      "step": 2082
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0617097616195679,
      "learning_rate": 1.9978766706241244e-05,
      "loss": 2.711,
      "step": 2083
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9957800507545471,
      "learning_rate": 1.9978739880630207e-05,
      "loss": 2.7361,
      "step": 2084
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0234965085983276,
      "learning_rate": 1.997871303810249e-05,
      "loss": 3.1561,
      "step": 2085
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.069975733757019,
      "learning_rate": 1.9978686178658145e-05,
      "loss": 2.9756,
      "step": 2086
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9041221141815186,
      "learning_rate": 1.997865930229722e-05,
      "loss": 2.9827,
      "step": 2087
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.1100070476531982,
      "learning_rate": 1.9978632409019755e-05,
      "loss": 2.8364,
      "step": 2088
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9798078536987305,
      "learning_rate": 1.99786054988258e-05,
      "loss": 2.9174,
      "step": 2089
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0219035148620605,
      "learning_rate": 1.9978578571715393e-05,
      "loss": 2.7826,
      "step": 2090
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9783356785774231,
      "learning_rate": 1.997855162768859e-05,
      "loss": 2.7811,
      "step": 2091
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9904299974441528,
      "learning_rate": 1.9978524666745428e-05,
      "loss": 2.8317,
      "step": 2092
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.009589433670044,
      "learning_rate": 1.997849768888596e-05,
      "loss": 2.9702,
      "step": 2093
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9310964941978455,
      "learning_rate": 1.997847069411023e-05,
      "loss": 2.8484,
      "step": 2094
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9500358700752258,
      "learning_rate": 1.9978443682418277e-05,
      "loss": 2.906,
      "step": 2095
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.066253900527954,
      "learning_rate": 1.9978416653810155e-05,
      "loss": 2.805,
      "step": 2096
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.066823124885559,
      "learning_rate": 1.9978389608285905e-05,
      "loss": 2.7611,
      "step": 2097
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.03169846534729,
      "learning_rate": 1.9978362545845576e-05,
      "loss": 2.7822,
      "step": 2098
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9606301784515381,
      "learning_rate": 1.997833546648921e-05,
      "loss": 2.7626,
      "step": 2099
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9961988925933838,
      "learning_rate": 1.9978308370216858e-05,
      "loss": 2.7225,
      "step": 2100
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0733500719070435,
      "learning_rate": 1.9978281257028566e-05,
      "loss": 2.9537,
      "step": 2101
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0261540412902832,
      "learning_rate": 1.997825412692437e-05,
      "loss": 2.8825,
      "step": 2102
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0253416299819946,
      "learning_rate": 1.9978226979904328e-05,
      "loss": 3.0717,
      "step": 2103
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9145868420600891,
      "learning_rate": 1.997819981596848e-05,
      "loss": 2.872,
      "step": 2104
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9618481397628784,
      "learning_rate": 1.9978172635116872e-05,
      "loss": 2.859,
      "step": 2105
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9512057900428772,
      "learning_rate": 1.9978145437349556e-05,
      "loss": 2.6251,
      "step": 2106
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9510306119918823,
      "learning_rate": 1.9978118222666568e-05,
      "loss": 3.1109,
      "step": 2107
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0534042119979858,
      "learning_rate": 1.9978090991067963e-05,
      "loss": 2.7585,
      "step": 2108
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.1860406398773193,
      "learning_rate": 1.997806374255378e-05,
      "loss": 2.9176,
      "step": 2109
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9412431120872498,
      "learning_rate": 1.9978036477124073e-05,
      "loss": 2.6844,
      "step": 2110
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0028966665267944,
      "learning_rate": 1.997800919477888e-05,
      "loss": 2.6592,
      "step": 2111
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0558217763900757,
      "learning_rate": 1.9977981895518254e-05,
      "loss": 2.9429,
      "step": 2112
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.1061394214630127,
      "learning_rate": 1.997795457934224e-05,
      "loss": 2.8585,
      "step": 2113
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0772619247436523,
      "learning_rate": 1.9977927246250883e-05,
      "loss": 3.1049,
      "step": 2114
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0055004358291626,
      "learning_rate": 1.9977899896244228e-05,
      "loss": 3.0478,
      "step": 2115
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.033345341682434,
      "learning_rate": 1.9977872529322324e-05,
      "loss": 3.0691,
      "step": 2116
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9203508496284485,
      "learning_rate": 1.9977845145485214e-05,
      "loss": 2.8884,
      "step": 2117
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9391398429870605,
      "learning_rate": 1.9977817744732946e-05,
      "loss": 2.9691,
      "step": 2118
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0457292795181274,
      "learning_rate": 1.997779032706557e-05,
      "loss": 2.601,
      "step": 2119
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9895262122154236,
      "learning_rate": 1.9977762892483126e-05,
      "loss": 2.9176,
      "step": 2120
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9508692026138306,
      "learning_rate": 1.9977735440985667e-05,
      "loss": 2.7725,
      "step": 2121
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9804604053497314,
      "learning_rate": 1.9977707972573234e-05,
      "loss": 2.8489,
      "step": 2122
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.1989201307296753,
      "learning_rate": 1.997768048724588e-05,
      "loss": 2.9598,
      "step": 2123
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9464597105979919,
      "learning_rate": 1.9977652985003642e-05,
      "loss": 2.6084,
      "step": 2124
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0379772186279297,
      "learning_rate": 1.9977625465846578e-05,
      "loss": 3.0442,
      "step": 2125
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.1076016426086426,
      "learning_rate": 1.9977597929774726e-05,
      "loss": 3.0142,
      "step": 2126
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9338982105255127,
      "learning_rate": 1.9977570376788137e-05,
      "loss": 2.7846,
      "step": 2127
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0042779445648193,
      "learning_rate": 1.9977542806886853e-05,
      "loss": 2.8219,
      "step": 2128
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.993083119392395,
      "learning_rate": 1.9977515220070927e-05,
      "loss": 3.0036,
      "step": 2129
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0229908227920532,
      "learning_rate": 1.9977487616340404e-05,
      "loss": 2.9229,
      "step": 2130
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9786216020584106,
      "learning_rate": 1.9977459995695328e-05,
      "loss": 2.7954,
      "step": 2131
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9037566184997559,
      "learning_rate": 1.9977432358135746e-05,
      "loss": 3.0559,
      "step": 2132
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0152515172958374,
      "learning_rate": 1.9977404703661708e-05,
      "loss": 3.0045,
      "step": 2133
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0031400918960571,
      "learning_rate": 1.9977377032273256e-05,
      "loss": 3.0083,
      "step": 2134
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.2341388463974,
      "learning_rate": 1.9977349343970447e-05,
      "loss": 2.7353,
      "step": 2135
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.8905926942825317,
      "learning_rate": 1.9977321638753316e-05,
      "loss": 2.7523,
      "step": 2136
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.143750548362732,
      "learning_rate": 1.9977293916621915e-05,
      "loss": 2.7777,
      "step": 2137
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9643047451972961,
      "learning_rate": 1.9977266177576293e-05,
      "loss": 2.7606,
      "step": 2138
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0285630226135254,
      "learning_rate": 1.9977238421616494e-05,
      "loss": 2.858,
      "step": 2139
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0049387216567993,
      "learning_rate": 1.9977210648742568e-05,
      "loss": 2.9253,
      "step": 2140
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0042204856872559,
      "learning_rate": 1.9977182858954556e-05,
      "loss": 2.8316,
      "step": 2141
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9963797926902771,
      "learning_rate": 1.9977155052252513e-05,
      "loss": 2.8474,
      "step": 2142
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9650789499282837,
      "learning_rate": 1.9977127228636482e-05,
      "loss": 2.7852,
      "step": 2143
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9656021595001221,
      "learning_rate": 1.9977099388106508e-05,
      "loss": 2.9321,
      "step": 2144
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.1629594564437866,
      "learning_rate": 1.9977071530662644e-05,
      "loss": 2.9094,
      "step": 2145
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0093101263046265,
      "learning_rate": 1.9977043656304932e-05,
      "loss": 2.7703,
      "step": 2146
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0892727375030518,
      "learning_rate": 1.9977015765033422e-05,
      "loss": 2.8629,
      "step": 2147
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0051769018173218,
      "learning_rate": 1.9976987856848158e-05,
      "loss": 2.9294,
      "step": 2148
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0663343667984009,
      "learning_rate": 1.9976959931749195e-05,
      "loss": 2.9829,
      "step": 2149
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9484953880310059,
      "learning_rate": 1.9976931989736572e-05,
      "loss": 2.8042,
      "step": 2150
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.8825893402099609,
      "learning_rate": 1.9976904030810342e-05,
      "loss": 2.7306,
      "step": 2151
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0640017986297607,
      "learning_rate": 1.9976876054970544e-05,
      "loss": 2.9362,
      "step": 2152
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9418867230415344,
      "learning_rate": 1.997684806221724e-05,
      "loss": 2.8007,
      "step": 2153
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0246973037719727,
      "learning_rate": 1.9976820052550462e-05,
      "loss": 2.7709,
      "step": 2154
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0721118450164795,
      "learning_rate": 1.9976792025970267e-05,
      "loss": 2.8954,
      "step": 2155
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9798683524131775,
      "learning_rate": 1.9976763982476697e-05,
      "loss": 2.9467,
      "step": 2156
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9529316425323486,
      "learning_rate": 1.9976735922069804e-05,
      "loss": 2.947,
      "step": 2157
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9413264989852905,
      "learning_rate": 1.9976707844749635e-05,
      "loss": 2.9422,
      "step": 2158
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.1649610996246338,
      "learning_rate": 1.997667975051624e-05,
      "loss": 2.7543,
      "step": 2159
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.1212908029556274,
      "learning_rate": 1.9976651639369658e-05,
      "loss": 2.5756,
      "step": 2160
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0681501626968384,
      "learning_rate": 1.9976623511309942e-05,
      "loss": 2.9205,
      "step": 2161
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.033267617225647,
      "learning_rate": 1.9976595366337143e-05,
      "loss": 2.8093,
      "step": 2162
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.225054383277893,
      "learning_rate": 1.99765672044513e-05,
      "loss": 2.7139,
      "step": 2163
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.006050705909729,
      "learning_rate": 1.997653902565247e-05,
      "loss": 2.7732,
      "step": 2164
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0127570629119873,
      "learning_rate": 1.9976510829940698e-05,
      "loss": 3.0336,
      "step": 2165
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9891682267189026,
      "learning_rate": 1.9976482617316028e-05,
      "loss": 3.1147,
      "step": 2166
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.2164454460144043,
      "learning_rate": 1.997645438777851e-05,
      "loss": 3.0352,
      "step": 2167
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0704164505004883,
      "learning_rate": 1.9976426141328194e-05,
      "loss": 2.9501,
      "step": 2168
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.106137990951538,
      "learning_rate": 1.9976397877965126e-05,
      "loss": 2.9019,
      "step": 2169
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9737116694450378,
      "learning_rate": 1.997636959768935e-05,
      "loss": 2.821,
      "step": 2170
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0798507928848267,
      "learning_rate": 1.9976341300500923e-05,
      "loss": 3.0654,
      "step": 2171
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9521040916442871,
      "learning_rate": 1.9976312986399886e-05,
      "loss": 3.0498,
      "step": 2172
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.139822244644165,
      "learning_rate": 1.9976284655386288e-05,
      "loss": 2.9137,
      "step": 2173
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0000426769256592,
      "learning_rate": 1.997625630746018e-05,
      "loss": 2.8593,
      "step": 2174
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9815129637718201,
      "learning_rate": 1.997622794262161e-05,
      "loss": 2.8795,
      "step": 2175
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9971376657485962,
      "learning_rate": 1.997619956087062e-05,
      "loss": 2.9674,
      "step": 2176
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9706850647926331,
      "learning_rate": 1.9976171162207264e-05,
      "loss": 3.0768,
      "step": 2177
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9563332796096802,
      "learning_rate": 1.9976142746631586e-05,
      "loss": 2.9721,
      "step": 2178
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0075448751449585,
      "learning_rate": 1.997611431414364e-05,
      "loss": 2.817,
      "step": 2179
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0257353782653809,
      "learning_rate": 1.997608586474347e-05,
      "loss": 2.7565,
      "step": 2180
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9880132675170898,
      "learning_rate": 1.9976057398431124e-05,
      "loss": 2.8069,
      "step": 2181
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0863232612609863,
      "learning_rate": 1.997602891520665e-05,
      "loss": 2.9326,
      "step": 2182
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9705010652542114,
      "learning_rate": 1.99760004150701e-05,
      "loss": 2.8567,
      "step": 2183
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9396037459373474,
      "learning_rate": 1.997597189802152e-05,
      "loss": 3.0082,
      "step": 2184
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.1323503255844116,
      "learning_rate": 1.9975943364060958e-05,
      "loss": 2.8467,
      "step": 2185
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9860690832138062,
      "learning_rate": 1.9975914813188463e-05,
      "loss": 2.9498,
      "step": 2186
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.017754316329956,
      "learning_rate": 1.9975886245404078e-05,
      "loss": 2.8229,
      "step": 2187
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0066158771514893,
      "learning_rate": 1.997585766070786e-05,
      "loss": 2.9991,
      "step": 2188
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0968973636627197,
      "learning_rate": 1.9975829059099852e-05,
      "loss": 2.8442,
      "step": 2189
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.996181845664978,
      "learning_rate": 1.9975800440580108e-05,
      "loss": 2.884,
      "step": 2190
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0300527811050415,
      "learning_rate": 1.9975771805148672e-05,
      "loss": 2.9751,
      "step": 2191
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.050073266029358,
      "learning_rate": 1.9975743152805594e-05,
      "loss": 3.0003,
      "step": 2192
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9884541630744934,
      "learning_rate": 1.997571448355092e-05,
      "loss": 3.0321,
      "step": 2193
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9781611561775208,
      "learning_rate": 1.9975685797384702e-05,
      "loss": 2.7833,
      "step": 2194
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.1020662784576416,
      "learning_rate": 1.9975657094306986e-05,
      "loss": 2.9632,
      "step": 2195
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9203799366950989,
      "learning_rate": 1.997562837431782e-05,
      "loss": 2.986,
      "step": 2196
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.898360550403595,
      "learning_rate": 1.997559963741726e-05,
      "loss": 2.9207,
      "step": 2197
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9398849010467529,
      "learning_rate": 1.9975570883605343e-05,
      "loss": 2.6488,
      "step": 2198
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0394777059555054,
      "learning_rate": 1.997554211288213e-05,
      "loss": 2.8242,
      "step": 2199
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.1067070960998535,
      "learning_rate": 1.9975513325247656e-05,
      "loss": 3.0948,
      "step": 2200
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9482288956642151,
      "learning_rate": 1.9975484520701983e-05,
      "loss": 2.9097,
      "step": 2201
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.952736496925354,
      "learning_rate": 1.9975455699245155e-05,
      "loss": 2.6963,
      "step": 2202
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0315721035003662,
      "learning_rate": 1.997542686087722e-05,
      "loss": 2.9254,
      "step": 2203
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9866127967834473,
      "learning_rate": 1.9975398005598227e-05,
      "loss": 2.8685,
      "step": 2204
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9482116103172302,
      "learning_rate": 1.9975369133408225e-05,
      "loss": 2.9904,
      "step": 2205
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9417298436164856,
      "learning_rate": 1.997534024430726e-05,
      "loss": 3.0184,
      "step": 2206
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0016754865646362,
      "learning_rate": 1.997531133829539e-05,
      "loss": 2.7918,
      "step": 2207
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9506635069847107,
      "learning_rate": 1.9975282415372654e-05,
      "loss": 2.8925,
      "step": 2208
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.103413701057434,
      "learning_rate": 1.9975253475539106e-05,
      "loss": 2.9388,
      "step": 2209
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.981066107749939,
      "learning_rate": 1.9975224518794792e-05,
      "loss": 2.9098,
      "step": 2210
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0104631185531616,
      "learning_rate": 1.9975195545139767e-05,
      "loss": 2.9245,
      "step": 2211
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9937293529510498,
      "learning_rate": 1.9975166554574072e-05,
      "loss": 2.9469,
      "step": 2212
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0591777563095093,
      "learning_rate": 1.9975137547097763e-05,
      "loss": 2.9064,
      "step": 2213
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9818199276924133,
      "learning_rate": 1.9975108522710886e-05,
      "loss": 2.8541,
      "step": 2214
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0602480173110962,
      "learning_rate": 1.9975079481413494e-05,
      "loss": 2.9559,
      "step": 2215
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0023767948150635,
      "learning_rate": 1.9975050423205627e-05,
      "loss": 2.948,
      "step": 2216
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.990293562412262,
      "learning_rate": 1.9975021348087344e-05,
      "loss": 2.808,
      "step": 2217
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0222634077072144,
      "learning_rate": 1.9974992256058693e-05,
      "loss": 2.8038,
      "step": 2218
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.1130104064941406,
      "learning_rate": 1.9974963147119716e-05,
      "loss": 3.0626,
      "step": 2219
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.1193256378173828,
      "learning_rate": 1.9974934021270468e-05,
      "loss": 2.88,
      "step": 2220
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9558925628662109,
      "learning_rate": 1.9974904878511e-05,
      "loss": 2.8966,
      "step": 2221
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0268278121948242,
      "learning_rate": 1.997487571884136e-05,
      "loss": 3.0245,
      "step": 2222
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.012534737586975,
      "learning_rate": 1.9974846542261595e-05,
      "loss": 2.7316,
      "step": 2223
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0028629302978516,
      "learning_rate": 1.9974817348771756e-05,
      "loss": 2.7551,
      "step": 2224
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.033216953277588,
      "learning_rate": 1.9974788138371893e-05,
      "loss": 2.9381,
      "step": 2225
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.070777416229248,
      "learning_rate": 1.9974758911062054e-05,
      "loss": 2.9057,
      "step": 2226
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9487616419792175,
      "learning_rate": 1.997472966684229e-05,
      "loss": 2.806,
      "step": 2227
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9740139842033386,
      "learning_rate": 1.997470040571265e-05,
      "loss": 2.6446,
      "step": 2228
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0105828046798706,
      "learning_rate": 1.9974671127673182e-05,
      "loss": 2.7782,
      "step": 2229
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0484997034072876,
      "learning_rate": 1.997464183272394e-05,
      "loss": 3.0505,
      "step": 2230
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9725250601768494,
      "learning_rate": 1.997461252086497e-05,
      "loss": 2.7963,
      "step": 2231
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.055797815322876,
      "learning_rate": 1.9974583192096322e-05,
      "loss": 2.7681,
      "step": 2232
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.937350869178772,
      "learning_rate": 1.9974553846418047e-05,
      "loss": 3.0117,
      "step": 2233
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.1010792255401611,
      "learning_rate": 1.9974524483830196e-05,
      "loss": 2.8707,
      "step": 2234
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0204136371612549,
      "learning_rate": 1.9974495104332815e-05,
      "loss": 2.8247,
      "step": 2235
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.025660514831543,
      "learning_rate": 1.9974465707925957e-05,
      "loss": 2.9882,
      "step": 2236
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9620779156684875,
      "learning_rate": 1.997443629460967e-05,
      "loss": 2.96,
      "step": 2237
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9500171542167664,
      "learning_rate": 1.9974406864384003e-05,
      "loss": 3.0635,
      "step": 2238
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0059770345687866,
      "learning_rate": 1.997437741724901e-05,
      "loss": 3.0116,
      "step": 2239
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9003970623016357,
      "learning_rate": 1.9974347953204734e-05,
      "loss": 2.7947,
      "step": 2240
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.1060278415679932,
      "learning_rate": 1.997431847225123e-05,
      "loss": 2.7894,
      "step": 2241
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.942669153213501,
      "learning_rate": 1.9974288974388552e-05,
      "loss": 2.949,
      "step": 2242
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9627400636672974,
      "learning_rate": 1.9974259459616744e-05,
      "loss": 2.9155,
      "step": 2243
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9160329103469849,
      "learning_rate": 1.9974229927935854e-05,
      "loss": 2.8511,
      "step": 2244
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9028465151786804,
      "learning_rate": 1.9974200379345937e-05,
      "loss": 2.7994,
      "step": 2245
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9395064115524292,
      "learning_rate": 1.997417081384704e-05,
      "loss": 2.8356,
      "step": 2246
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.965964674949646,
      "learning_rate": 1.9974141231439218e-05,
      "loss": 2.7156,
      "step": 2247
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.4742778539657593,
      "learning_rate": 1.9974111632122514e-05,
      "loss": 2.7153,
      "step": 2248
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9738017916679382,
      "learning_rate": 1.9974082015896985e-05,
      "loss": 3.0791,
      "step": 2249
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.948862612247467,
      "learning_rate": 1.9974052382762674e-05,
      "loss": 2.8837,
      "step": 2250
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9515354037284851,
      "learning_rate": 1.997402273271964e-05,
      "loss": 2.9919,
      "step": 2251
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0413622856140137,
      "learning_rate": 1.9973993065767927e-05,
      "loss": 3.0651,
      "step": 2252
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0057591199874878,
      "learning_rate": 1.9973963381907584e-05,
      "loss": 2.8292,
      "step": 2253
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9245554804801941,
      "learning_rate": 1.9973933681138667e-05,
      "loss": 2.9605,
      "step": 2254
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9863709211349487,
      "learning_rate": 1.9973903963461224e-05,
      "loss": 2.9383,
      "step": 2255
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.023745059967041,
      "learning_rate": 1.9973874228875303e-05,
      "loss": 2.8733,
      "step": 2256
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9799591898918152,
      "learning_rate": 1.9973844477380958e-05,
      "loss": 3.0449,
      "step": 2257
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9540426135063171,
      "learning_rate": 1.9973814708978235e-05,
      "loss": 2.7471,
      "step": 2258
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9823552966117859,
      "learning_rate": 1.997378492366719e-05,
      "loss": 2.9065,
      "step": 2259
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9886971712112427,
      "learning_rate": 1.997375512144787e-05,
      "loss": 2.8616,
      "step": 2260
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0517799854278564,
      "learning_rate": 1.9973725302320326e-05,
      "loss": 2.8383,
      "step": 2261
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9958295226097107,
      "learning_rate": 1.997369546628461e-05,
      "loss": 2.9263,
      "step": 2262
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0949604511260986,
      "learning_rate": 1.9973665613340768e-05,
      "loss": 2.8293,
      "step": 2263
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0268070697784424,
      "learning_rate": 1.997363574348886e-05,
      "loss": 2.8515,
      "step": 2264
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9510844945907593,
      "learning_rate": 1.9973605856728925e-05,
      "loss": 2.8757,
      "step": 2265
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9710565805435181,
      "learning_rate": 1.997357595306102e-05,
      "loss": 2.7862,
      "step": 2266
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9345425963401794,
      "learning_rate": 1.9973546032485194e-05,
      "loss": 2.8933,
      "step": 2267
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.144100546836853,
      "learning_rate": 1.9973516095001503e-05,
      "loss": 2.8291,
      "step": 2268
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9975717067718506,
      "learning_rate": 1.997348614060999e-05,
      "loss": 2.742,
      "step": 2269
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9318250417709351,
      "learning_rate": 1.997345616931071e-05,
      "loss": 2.8695,
      "step": 2270
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.972369909286499,
      "learning_rate": 1.9973426181103713e-05,
      "loss": 2.7084,
      "step": 2271
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.177368402481079,
      "learning_rate": 1.9973396175989052e-05,
      "loss": 2.8432,
      "step": 2272
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9854777455329895,
      "learning_rate": 1.9973366153966772e-05,
      "loss": 2.7298,
      "step": 2273
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9474849104881287,
      "learning_rate": 1.997333611503693e-05,
      "loss": 2.9509,
      "step": 2274
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.1050524711608887,
      "learning_rate": 1.9973306059199577e-05,
      "loss": 2.8533,
      "step": 2275
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0178860425949097,
      "learning_rate": 1.997327598645476e-05,
      "loss": 2.9137,
      "step": 2276
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.1663929224014282,
      "learning_rate": 1.9973245896802527e-05,
      "loss": 2.9958,
      "step": 2277
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0487362146377563,
      "learning_rate": 1.9973215790242937e-05,
      "loss": 2.5827,
      "step": 2278
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.121403455734253,
      "learning_rate": 1.9973185666776035e-05,
      "loss": 2.9739,
      "step": 2279
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9958083033561707,
      "learning_rate": 1.997315552640188e-05,
      "loss": 2.7636,
      "step": 2280
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9668231010437012,
      "learning_rate": 1.9973125369120514e-05,
      "loss": 2.8975,
      "step": 2281
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9847008585929871,
      "learning_rate": 1.997309519493199e-05,
      "loss": 2.7386,
      "step": 2282
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.1350129842758179,
      "learning_rate": 1.9973065003836366e-05,
      "loss": 2.9712,
      "step": 2283
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.124479055404663,
      "learning_rate": 1.9973034795833685e-05,
      "loss": 3.1101,
      "step": 2284
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9315397143363953,
      "learning_rate": 1.9973004570924002e-05,
      "loss": 2.6488,
      "step": 2285
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.1804742813110352,
      "learning_rate": 1.997297432910737e-05,
      "loss": 2.8368,
      "step": 2286
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0361677408218384,
      "learning_rate": 1.9972944070383834e-05,
      "loss": 2.9222,
      "step": 2287
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0488295555114746,
      "learning_rate": 1.997291379475345e-05,
      "loss": 2.7018,
      "step": 2288
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.094269037246704,
      "learning_rate": 1.997288350221627e-05,
      "loss": 2.7777,
      "step": 2289
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9632754921913147,
      "learning_rate": 1.9972853192772344e-05,
      "loss": 2.9623,
      "step": 2290
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.1588406562805176,
      "learning_rate": 1.997282286642172e-05,
      "loss": 2.8402,
      "step": 2291
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0196304321289062,
      "learning_rate": 1.9972792523164456e-05,
      "loss": 2.9706,
      "step": 2292
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0000944137573242,
      "learning_rate": 1.99727621630006e-05,
      "loss": 2.7558,
      "step": 2293
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9990678429603577,
      "learning_rate": 1.99727317859302e-05,
      "loss": 2.7976,
      "step": 2294
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9966122508049011,
      "learning_rate": 1.9972701391953314e-05,
      "loss": 3.0224,
      "step": 2295
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9765092730522156,
      "learning_rate": 1.997267098106999e-05,
      "loss": 2.6837,
      "step": 2296
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0394665002822876,
      "learning_rate": 1.9972640553280283e-05,
      "loss": 3.0503,
      "step": 2297
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.018372893333435,
      "learning_rate": 1.997261010858424e-05,
      "loss": 2.9214,
      "step": 2298
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0616636276245117,
      "learning_rate": 1.997257964698191e-05,
      "loss": 2.703,
      "step": 2299
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.000728726387024,
      "learning_rate": 1.9972549168473355e-05,
      "loss": 2.6105,
      "step": 2300
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0599536895751953,
      "learning_rate": 1.997251867305862e-05,
      "loss": 2.9176,
      "step": 2301
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.036599040031433,
      "learning_rate": 1.9972488160737752e-05,
      "loss": 2.9582,
      "step": 2302
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.141084909439087,
      "learning_rate": 1.9972457631510812e-05,
      "loss": 2.9338,
      "step": 2303
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9909117817878723,
      "learning_rate": 1.9972427085377847e-05,
      "loss": 2.8183,
      "step": 2304
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9419842958450317,
      "learning_rate": 1.997239652233891e-05,
      "loss": 2.872,
      "step": 2305
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9638420343399048,
      "learning_rate": 1.9972365942394054e-05,
      "loss": 2.7751,
      "step": 2306
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.1065798997879028,
      "learning_rate": 1.9972335345543328e-05,
      "loss": 3.081,
      "step": 2307
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.1015325784683228,
      "learning_rate": 1.9972304731786786e-05,
      "loss": 2.6205,
      "step": 2308
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0110243558883667,
      "learning_rate": 1.9972274101124476e-05,
      "loss": 2.5876,
      "step": 2309
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.909031093120575,
      "learning_rate": 1.9972243453556456e-05,
      "loss": 3.024,
      "step": 2310
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9042223691940308,
      "learning_rate": 1.9972212789082773e-05,
      "loss": 2.922,
      "step": 2311
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0761545896530151,
      "learning_rate": 1.997218210770348e-05,
      "loss": 3.0712,
      "step": 2312
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9788743257522583,
      "learning_rate": 1.9972151409418634e-05,
      "loss": 2.7098,
      "step": 2313
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9877846837043762,
      "learning_rate": 1.9972120694228278e-05,
      "loss": 2.7463,
      "step": 2314
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0178202390670776,
      "learning_rate": 1.997208996213247e-05,
      "loss": 2.79,
      "step": 2315
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.130416750907898,
      "learning_rate": 1.9972059213131266e-05,
      "loss": 2.9434,
      "step": 2316
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0761381387710571,
      "learning_rate": 1.997202844722471e-05,
      "loss": 2.8178,
      "step": 2317
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0231893062591553,
      "learning_rate": 1.997199766441286e-05,
      "loss": 2.9025,
      "step": 2318
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9698013067245483,
      "learning_rate": 1.997196686469576e-05,
      "loss": 2.7736,
      "step": 2319
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0041872262954712,
      "learning_rate": 1.9971936048073473e-05,
      "loss": 2.8723,
      "step": 2320
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9832141399383545,
      "learning_rate": 1.9971905214546044e-05,
      "loss": 3.0457,
      "step": 2321
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9812408089637756,
      "learning_rate": 1.9971874364113524e-05,
      "loss": 2.8521,
      "step": 2322
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0267733335494995,
      "learning_rate": 1.9971843496775972e-05,
      "loss": 2.9862,
      "step": 2323
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0694186687469482,
      "learning_rate": 1.9971812612533438e-05,
      "loss": 2.7593,
      "step": 2324
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9634892344474792,
      "learning_rate": 1.9971781711385973e-05,
      "loss": 2.8412,
      "step": 2325
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0221120119094849,
      "learning_rate": 1.9971750793333627e-05,
      "loss": 2.8144,
      "step": 2326
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.1352648735046387,
      "learning_rate": 1.9971719858376456e-05,
      "loss": 2.9334,
      "step": 2327
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0023294687271118,
      "learning_rate": 1.9971688906514516e-05,
      "loss": 2.6862,
      "step": 2328
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0524863004684448,
      "learning_rate": 1.997165793774785e-05,
      "loss": 2.8554,
      "step": 2329
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.043779730796814,
      "learning_rate": 1.9971626952076517e-05,
      "loss": 2.8218,
      "step": 2330
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.1820522546768188,
      "learning_rate": 1.997159594950057e-05,
      "loss": 2.9008,
      "step": 2331
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0347579717636108,
      "learning_rate": 1.9971564930020057e-05,
      "loss": 2.987,
      "step": 2332
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9540678858757019,
      "learning_rate": 1.9971533893635033e-05,
      "loss": 2.913,
      "step": 2333
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0431604385375977,
      "learning_rate": 1.997150284034555e-05,
      "loss": 2.9528,
      "step": 2334
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0638360977172852,
      "learning_rate": 1.9971471770151664e-05,
      "loss": 2.9366,
      "step": 2335
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9665047526359558,
      "learning_rate": 1.9971440683053426e-05,
      "loss": 2.8601,
      "step": 2336
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0605089664459229,
      "learning_rate": 1.9971409579050887e-05,
      "loss": 2.6658,
      "step": 2337
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0463316440582275,
      "learning_rate": 1.99713784581441e-05,
      "loss": 2.8241,
      "step": 2338
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.919092059135437,
      "learning_rate": 1.9971347320333117e-05,
      "loss": 2.7491,
      "step": 2339
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0462855100631714,
      "learning_rate": 1.997131616561799e-05,
      "loss": 3.2267,
      "step": 2340
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9553394913673401,
      "learning_rate": 1.997128499399878e-05,
      "loss": 2.8642,
      "step": 2341
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0686001777648926,
      "learning_rate": 1.997125380547553e-05,
      "loss": 2.7785,
      "step": 2342
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9384109973907471,
      "learning_rate": 1.99712226000483e-05,
      "loss": 2.8478,
      "step": 2343
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0831003189086914,
      "learning_rate": 1.9971191377717133e-05,
      "loss": 2.8541,
      "step": 2344
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.133959174156189,
      "learning_rate": 1.9971160138482094e-05,
      "loss": 2.9195,
      "step": 2345
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9558602571487427,
      "learning_rate": 1.997112888234323e-05,
      "loss": 2.642,
      "step": 2346
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9926659464836121,
      "learning_rate": 1.9971097609300592e-05,
      "loss": 2.7724,
      "step": 2347
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9827715754508972,
      "learning_rate": 1.9971066319354236e-05,
      "loss": 3.0782,
      "step": 2348
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9662830233573914,
      "learning_rate": 1.9971035012504217e-05,
      "loss": 3.1698,
      "step": 2349
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0343800783157349,
      "learning_rate": 1.9971003688750584e-05,
      "loss": 3.0397,
      "step": 2350
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0027867555618286,
      "learning_rate": 1.9970972348093393e-05,
      "loss": 2.5646,
      "step": 2351
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0881128311157227,
      "learning_rate": 1.9970940990532693e-05,
      "loss": 2.7971,
      "step": 2352
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0187932252883911,
      "learning_rate": 1.997090961606854e-05,
      "loss": 2.9138,
      "step": 2353
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0124216079711914,
      "learning_rate": 1.997087822470099e-05,
      "loss": 3.0604,
      "step": 2354
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0954680442810059,
      "learning_rate": 1.9970846816430092e-05,
      "loss": 2.9375,
      "step": 2355
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0078151226043701,
      "learning_rate": 1.99708153912559e-05,
      "loss": 2.9647,
      "step": 2356
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.3835891485214233,
      "learning_rate": 1.997078394917847e-05,
      "loss": 2.8581,
      "step": 2357
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9711002707481384,
      "learning_rate": 1.997075249019785e-05,
      "loss": 2.8496,
      "step": 2358
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9506243467330933,
      "learning_rate": 1.9970721014314097e-05,
      "loss": 2.8592,
      "step": 2359
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0632503032684326,
      "learning_rate": 1.9970689521527267e-05,
      "loss": 2.8866,
      "step": 2360
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.05014169216156,
      "learning_rate": 1.9970658011837404e-05,
      "loss": 2.7929,
      "step": 2361
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.106323480606079,
      "learning_rate": 1.9970626485244575e-05,
      "loss": 2.903,
      "step": 2362
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0040524005889893,
      "learning_rate": 1.9970594941748824e-05,
      "loss": 2.7135,
      "step": 2363
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.095657229423523,
      "learning_rate": 1.99705633813502e-05,
      "loss": 2.9602,
      "step": 2364
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9435173273086548,
      "learning_rate": 1.9970531804048772e-05,
      "loss": 2.8776,
      "step": 2365
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.1551554203033447,
      "learning_rate": 1.997050020984458e-05,
      "loss": 2.85,
      "step": 2366
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0511677265167236,
      "learning_rate": 1.9970468598737684e-05,
      "loss": 2.8053,
      "step": 2367
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9117081761360168,
      "learning_rate": 1.9970436970728136e-05,
      "loss": 2.9789,
      "step": 2368
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0357080698013306,
      "learning_rate": 1.997040532581599e-05,
      "loss": 2.6707,
      "step": 2369
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9484148025512695,
      "learning_rate": 1.9970373664001298e-05,
      "loss": 2.9849,
      "step": 2370
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9913942217826843,
      "learning_rate": 1.9970341985284113e-05,
      "loss": 2.8303,
      "step": 2371
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0041383504867554,
      "learning_rate": 1.997031028966449e-05,
      "loss": 2.9157,
      "step": 2372
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9832750558853149,
      "learning_rate": 1.9970278577142484e-05,
      "loss": 2.5274,
      "step": 2373
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.4434866905212402,
      "learning_rate": 1.9970246847718153e-05,
      "loss": 2.7408,
      "step": 2374
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9805296063423157,
      "learning_rate": 1.997021510139154e-05,
      "loss": 2.5779,
      "step": 2375
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.1813075542449951,
      "learning_rate": 1.9970183338162706e-05,
      "loss": 2.68,
      "step": 2376
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9593744874000549,
      "learning_rate": 1.9970151558031702e-05,
      "loss": 3.0621,
      "step": 2377
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.008402943611145,
      "learning_rate": 1.9970119760998587e-05,
      "loss": 2.9307,
      "step": 2378
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0237219333648682,
      "learning_rate": 1.9970087947063406e-05,
      "loss": 3.1534,
      "step": 2379
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.976356029510498,
      "learning_rate": 1.9970056116226222e-05,
      "loss": 2.761,
      "step": 2380
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0554484128952026,
      "learning_rate": 1.9970024268487085e-05,
      "loss": 2.6592,
      "step": 2381
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.223718523979187,
      "learning_rate": 1.996999240384605e-05,
      "loss": 2.9409,
      "step": 2382
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.011011004447937,
      "learning_rate": 1.9969960522303166e-05,
      "loss": 3.0187,
      "step": 2383
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9967834949493408,
      "learning_rate": 1.9969928623858492e-05,
      "loss": 2.8444,
      "step": 2384
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9480045437812805,
      "learning_rate": 1.9969896708512082e-05,
      "loss": 2.8981,
      "step": 2385
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0395759344100952,
      "learning_rate": 1.9969864776263993e-05,
      "loss": 2.9239,
      "step": 2386
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.015382170677185,
      "learning_rate": 1.996983282711427e-05,
      "loss": 3.1633,
      "step": 2387
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0552736520767212,
      "learning_rate": 1.9969800861062977e-05,
      "loss": 2.7563,
      "step": 2388
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.1250793933868408,
      "learning_rate": 1.996976887811016e-05,
      "loss": 2.9076,
      "step": 2389
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9447014927864075,
      "learning_rate": 1.9969736878255877e-05,
      "loss": 3.03,
      "step": 2390
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0435676574707031,
      "learning_rate": 1.9969704861500182e-05,
      "loss": 3.0375,
      "step": 2391
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0121114253997803,
      "learning_rate": 1.9969672827843134e-05,
      "loss": 2.9334,
      "step": 2392
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9984174370765686,
      "learning_rate": 1.996964077728478e-05,
      "loss": 2.7293,
      "step": 2393
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.1169342994689941,
      "learning_rate": 1.9969608709825178e-05,
      "loss": 2.7733,
      "step": 2394
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.094586730003357,
      "learning_rate": 1.996957662546438e-05,
      "loss": 2.8943,
      "step": 2395
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.039617657661438,
      "learning_rate": 1.9969544524202443e-05,
      "loss": 2.9662,
      "step": 2396
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9946964979171753,
      "learning_rate": 1.996951240603942e-05,
      "loss": 2.9643,
      "step": 2397
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9943634271621704,
      "learning_rate": 1.9969480270975365e-05,
      "loss": 2.7226,
      "step": 2398
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9094713926315308,
      "learning_rate": 1.9969448119010335e-05,
      "loss": 2.6844,
      "step": 2399
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.022815227508545,
      "learning_rate": 1.9969415950144382e-05,
      "loss": 2.9102,
      "step": 2400
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0956534147262573,
      "learning_rate": 1.996938376437756e-05,
      "loss": 2.95,
      "step": 2401
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0354163646697998,
      "learning_rate": 1.9969351561709928e-05,
      "loss": 3.0513,
      "step": 2402
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.003676414489746,
      "learning_rate": 1.9969319342141535e-05,
      "loss": 2.7949,
      "step": 2403
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0245360136032104,
      "learning_rate": 1.996928710567244e-05,
      "loss": 2.8349,
      "step": 2404
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9090934991836548,
      "learning_rate": 1.9969254852302695e-05,
      "loss": 3.0031,
      "step": 2405
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0169678926467896,
      "learning_rate": 1.9969222582032353e-05,
      "loss": 2.8375,
      "step": 2406
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9701345562934875,
      "learning_rate": 1.996919029486147e-05,
      "loss": 2.6873,
      "step": 2407
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0438299179077148,
      "learning_rate": 1.9969157990790106e-05,
      "loss": 2.8516,
      "step": 2408
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9605687260627747,
      "learning_rate": 1.996912566981831e-05,
      "loss": 2.9849,
      "step": 2409
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9488478899002075,
      "learning_rate": 1.996909333194614e-05,
      "loss": 3.0218,
      "step": 2410
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0504417419433594,
      "learning_rate": 1.996906097717365e-05,
      "loss": 2.9712,
      "step": 2411
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9879950881004333,
      "learning_rate": 1.996902860550089e-05,
      "loss": 2.8741,
      "step": 2412
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9776952266693115,
      "learning_rate": 1.996899621692792e-05,
      "loss": 2.7836,
      "step": 2413
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9342239499092102,
      "learning_rate": 1.99689638114548e-05,
      "loss": 2.7172,
      "step": 2414
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0188575983047485,
      "learning_rate": 1.996893138908157e-05,
      "loss": 2.8956,
      "step": 2415
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0739421844482422,
      "learning_rate": 1.99688989498083e-05,
      "loss": 2.9081,
      "step": 2416
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0390301942825317,
      "learning_rate": 1.9968866493635035e-05,
      "loss": 2.5925,
      "step": 2417
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0641165971755981,
      "learning_rate": 1.9968834020561834e-05,
      "loss": 2.9069,
      "step": 2418
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0025341510772705,
      "learning_rate": 1.9968801530588754e-05,
      "loss": 2.9435,
      "step": 2419
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0861616134643555,
      "learning_rate": 1.9968769023715845e-05,
      "loss": 2.913,
      "step": 2420
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.016420602798462,
      "learning_rate": 1.9968736499943168e-05,
      "loss": 2.9523,
      "step": 2421
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.00449800491333,
      "learning_rate": 1.9968703959270775e-05,
      "loss": 2.6694,
      "step": 2422
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.133934736251831,
      "learning_rate": 1.996867140169872e-05,
      "loss": 2.76,
      "step": 2423
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.033845067024231,
      "learning_rate": 1.9968638827227058e-05,
      "loss": 2.8834,
      "step": 2424
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.967041552066803,
      "learning_rate": 1.996860623585585e-05,
      "loss": 3.1331,
      "step": 2425
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9532028436660767,
      "learning_rate": 1.9968573627585145e-05,
      "loss": 2.752,
      "step": 2426
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.940152108669281,
      "learning_rate": 1.9968541002414995e-05,
      "loss": 2.9151,
      "step": 2427
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9594738483428955,
      "learning_rate": 1.996850836034547e-05,
      "loss": 3.0175,
      "step": 2428
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0188367366790771,
      "learning_rate": 1.996847570137661e-05,
      "loss": 2.7147,
      "step": 2429
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0546895265579224,
      "learning_rate": 1.9968443025508476e-05,
      "loss": 2.7448,
      "step": 2430
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.074733018875122,
      "learning_rate": 1.9968410332741126e-05,
      "loss": 2.9189,
      "step": 2431
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0261716842651367,
      "learning_rate": 1.9968377623074614e-05,
      "loss": 2.7554,
      "step": 2432
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9703977108001709,
      "learning_rate": 1.9968344896508992e-05,
      "loss": 2.9807,
      "step": 2433
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.1146595478057861,
      "learning_rate": 1.9968312153044318e-05,
      "loss": 3.0462,
      "step": 2434
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.1835159063339233,
      "learning_rate": 1.9968279392680648e-05,
      "loss": 2.5335,
      "step": 2435
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9681292176246643,
      "learning_rate": 1.9968246615418038e-05,
      "loss": 3.0002,
      "step": 2436
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.996100127696991,
      "learning_rate": 1.9968213821256544e-05,
      "loss": 2.9979,
      "step": 2437
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9981727600097656,
      "learning_rate": 1.9968181010196218e-05,
      "loss": 2.7351,
      "step": 2438
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.1364350318908691,
      "learning_rate": 1.9968148182237117e-05,
      "loss": 2.8212,
      "step": 2439
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9392597079277039,
      "learning_rate": 1.9968115337379302e-05,
      "loss": 2.7677,
      "step": 2440
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.105685830116272,
      "learning_rate": 1.996808247562282e-05,
      "loss": 3.0461,
      "step": 2441
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9957540035247803,
      "learning_rate": 1.996804959696773e-05,
      "loss": 2.8942,
      "step": 2442
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0366227626800537,
      "learning_rate": 1.9968016701414092e-05,
      "loss": 2.9745,
      "step": 2443
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9985908269882202,
      "learning_rate": 1.9967983788961957e-05,
      "loss": 2.8324,
      "step": 2444
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0572142601013184,
      "learning_rate": 1.996795085961138e-05,
      "loss": 2.8494,
      "step": 2445
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9460699558258057,
      "learning_rate": 1.9967917913362426e-05,
      "loss": 2.7892,
      "step": 2446
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0181403160095215,
      "learning_rate": 1.996788495021514e-05,
      "loss": 2.8408,
      "step": 2447
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9992625117301941,
      "learning_rate": 1.9967851970169578e-05,
      "loss": 3.0973,
      "step": 2448
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0865479707717896,
      "learning_rate": 1.9967818973225804e-05,
      "loss": 2.804,
      "step": 2449
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9820935130119324,
      "learning_rate": 1.9967785959383866e-05,
      "loss": 3.0228,
      "step": 2450
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.937112033367157,
      "learning_rate": 1.9967752928643827e-05,
      "loss": 2.7287,
      "step": 2451
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9126037359237671,
      "learning_rate": 1.996771988100574e-05,
      "loss": 2.8632,
      "step": 2452
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0114614963531494,
      "learning_rate": 1.9967686816469654e-05,
      "loss": 2.8434,
      "step": 2453
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9219619631767273,
      "learning_rate": 1.996765373503564e-05,
      "loss": 2.878,
      "step": 2454
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.1057814359664917,
      "learning_rate": 1.9967620636703743e-05,
      "loss": 2.8219,
      "step": 2455
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0037376880645752,
      "learning_rate": 1.996758752147402e-05,
      "loss": 2.7836,
      "step": 2456
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.1350775957107544,
      "learning_rate": 1.996755438934653e-05,
      "loss": 2.8133,
      "step": 2457
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0415315628051758,
      "learning_rate": 1.9967521240321332e-05,
      "loss": 2.9359,
      "step": 2458
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9354647994041443,
      "learning_rate": 1.996748807439847e-05,
      "loss": 2.9156,
      "step": 2459
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.505414605140686,
      "learning_rate": 1.9967454891578016e-05,
      "loss": 3.0848,
      "step": 2460
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9848788976669312,
      "learning_rate": 1.9967421691860017e-05,
      "loss": 3.0105,
      "step": 2461
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0123744010925293,
      "learning_rate": 1.9967388475244527e-05,
      "loss": 2.8453,
      "step": 2462
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9552212953567505,
      "learning_rate": 1.996735524173161e-05,
      "loss": 2.8278,
      "step": 2463
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.209794282913208,
      "learning_rate": 1.996732199132132e-05,
      "loss": 2.8967,
      "step": 2464
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.98699951171875,
      "learning_rate": 1.996728872401371e-05,
      "loss": 2.8208,
      "step": 2465
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.040814757347107,
      "learning_rate": 1.996725543980884e-05,
      "loss": 2.7802,
      "step": 2466
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0465974807739258,
      "learning_rate": 1.9967222138706762e-05,
      "loss": 2.6326,
      "step": 2467
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9357179999351501,
      "learning_rate": 1.996718882070754e-05,
      "loss": 3.0479,
      "step": 2468
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0498874187469482,
      "learning_rate": 1.996715548581122e-05,
      "loss": 3.0562,
      "step": 2469
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.1631907224655151,
      "learning_rate": 1.9967122134017872e-05,
      "loss": 2.8664,
      "step": 2470
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9617360830307007,
      "learning_rate": 1.996708876532754e-05,
      "loss": 2.9415,
      "step": 2471
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.991166353225708,
      "learning_rate": 1.9967055379740287e-05,
      "loss": 2.6153,
      "step": 2472
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.077133297920227,
      "learning_rate": 1.9967021977256163e-05,
      "loss": 2.7535,
      "step": 2473
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9612443447113037,
      "learning_rate": 1.9966988557875236e-05,
      "loss": 3.0276,
      "step": 2474
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.059084177017212,
      "learning_rate": 1.9966955121597557e-05,
      "loss": 2.9167,
      "step": 2475
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0480170249938965,
      "learning_rate": 1.996692166842318e-05,
      "loss": 2.986,
      "step": 2476
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.023213267326355,
      "learning_rate": 1.9966888198352162e-05,
      "loss": 2.8014,
      "step": 2477
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0257437229156494,
      "learning_rate": 1.9966854711384562e-05,
      "loss": 2.8095,
      "step": 2478
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.034628987312317,
      "learning_rate": 1.996682120752044e-05,
      "loss": 2.7973,
      "step": 2479
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9670807123184204,
      "learning_rate": 1.9966787686759844e-05,
      "loss": 2.8694,
      "step": 2480
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.992194652557373,
      "learning_rate": 1.9966754149102838e-05,
      "loss": 2.9057,
      "step": 2481
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.993044376373291,
      "learning_rate": 1.996672059454948e-05,
      "loss": 2.883,
      "step": 2482
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9735006093978882,
      "learning_rate": 1.996668702309982e-05,
      "loss": 2.6983,
      "step": 2483
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9500437378883362,
      "learning_rate": 1.996665343475392e-05,
      "loss": 2.6899,
      "step": 2484
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0009737014770508,
      "learning_rate": 1.9966619829511832e-05,
      "loss": 2.6736,
      "step": 2485
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0081963539123535,
      "learning_rate": 1.9966586207373623e-05,
      "loss": 2.9354,
      "step": 2486
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.017519235610962,
      "learning_rate": 1.996655256833934e-05,
      "loss": 2.9724,
      "step": 2487
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0877653360366821,
      "learning_rate": 1.996651891240904e-05,
      "loss": 2.883,
      "step": 2488
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.1757649183273315,
      "learning_rate": 1.9966485239582786e-05,
      "loss": 2.8949,
      "step": 2489
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.1261810064315796,
      "learning_rate": 1.996645154986064e-05,
      "loss": 2.8855,
      "step": 2490
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9676485657691956,
      "learning_rate": 1.9966417843242643e-05,
      "loss": 2.7736,
      "step": 2491
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0383511781692505,
      "learning_rate": 1.996638411972886e-05,
      "loss": 3.0886,
      "step": 2492
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.03302001953125,
      "learning_rate": 1.9966350379319354e-05,
      "loss": 2.8073,
      "step": 2493
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0865422487258911,
      "learning_rate": 1.9966316622014175e-05,
      "loss": 2.7676,
      "step": 2494
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0818865299224854,
      "learning_rate": 1.9966282847813386e-05,
      "loss": 2.7511,
      "step": 2495
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0607191324234009,
      "learning_rate": 1.9966249056717034e-05,
      "loss": 3.1683,
      "step": 2496
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0084539651870728,
      "learning_rate": 1.9966215248725187e-05,
      "loss": 2.6727,
      "step": 2497
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.028123140335083,
      "learning_rate": 1.99661814238379e-05,
      "loss": 2.8395,
      "step": 2498
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0400933027267456,
      "learning_rate": 1.9966147582055228e-05,
      "loss": 2.9066,
      "step": 2499
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9532814025878906,
      "learning_rate": 1.9966113723377227e-05,
      "loss": 2.7615,
      "step": 2500
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.1324889659881592,
      "learning_rate": 1.9966079847803955e-05,
      "loss": 3.1082,
      "step": 2501
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9725901484489441,
      "learning_rate": 1.9966045955335476e-05,
      "loss": 2.8958,
      "step": 2502
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.020591378211975,
      "learning_rate": 1.996601204597184e-05,
      "loss": 2.9914,
      "step": 2503
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9746025800704956,
      "learning_rate": 1.9965978119713106e-05,
      "loss": 2.8155,
      "step": 2504
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0009064674377441,
      "learning_rate": 1.9965944176559334e-05,
      "loss": 2.8663,
      "step": 2505
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9879781603813171,
      "learning_rate": 1.996591021651058e-05,
      "loss": 2.9098,
      "step": 2506
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.104729175567627,
      "learning_rate": 1.99658762395669e-05,
      "loss": 2.847,
      "step": 2507
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.122488021850586,
      "learning_rate": 1.9965842245728354e-05,
      "loss": 2.6353,
      "step": 2508
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0803438425064087,
      "learning_rate": 1.9965808234994996e-05,
      "loss": 2.8253,
      "step": 2509
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0033903121948242,
      "learning_rate": 1.996577420736689e-05,
      "loss": 2.8572,
      "step": 2510
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0207476615905762,
      "learning_rate": 1.996574016284409e-05,
      "loss": 2.9354,
      "step": 2511
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9916861057281494,
      "learning_rate": 1.9965706101426653e-05,
      "loss": 2.7755,
      "step": 2512
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9987860321998596,
      "learning_rate": 1.9965672023114636e-05,
      "loss": 2.9723,
      "step": 2513
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9721745252609253,
      "learning_rate": 1.99656379279081e-05,
      "loss": 2.804,
      "step": 2514
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0853197574615479,
      "learning_rate": 1.99656038158071e-05,
      "loss": 2.8431,
      "step": 2515
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.956230640411377,
      "learning_rate": 1.9965569686811694e-05,
      "loss": 3.0508,
      "step": 2516
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0331637859344482,
      "learning_rate": 1.996553554092194e-05,
      "loss": 2.8904,
      "step": 2517
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.032201886177063,
      "learning_rate": 1.9965501378137903e-05,
      "loss": 2.7861,
      "step": 2518
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.2116703987121582,
      "learning_rate": 1.996546719845963e-05,
      "loss": 2.8384,
      "step": 2519
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0148242712020874,
      "learning_rate": 1.9965433001887183e-05,
      "loss": 2.7592,
      "step": 2520
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9935497045516968,
      "learning_rate": 1.996539878842062e-05,
      "loss": 2.9116,
      "step": 2521
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0145972967147827,
      "learning_rate": 1.9965364558060002e-05,
      "loss": 2.8922,
      "step": 2522
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.067053198814392,
      "learning_rate": 1.9965330310805383e-05,
      "loss": 2.9695,
      "step": 2523
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0512787103652954,
      "learning_rate": 1.9965296046656824e-05,
      "loss": 2.7672,
      "step": 2524
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.1693682670593262,
      "learning_rate": 1.996526176561438e-05,
      "loss": 2.9515,
      "step": 2525
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9369248151779175,
      "learning_rate": 1.996522746767811e-05,
      "loss": 3.0169,
      "step": 2526
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0194867849349976,
      "learning_rate": 1.9965193152848075e-05,
      "loss": 2.7832,
      "step": 2527
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.083403468132019,
      "learning_rate": 1.9965158821124332e-05,
      "loss": 3.0022,
      "step": 2528
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.048954725265503,
      "learning_rate": 1.9965124472506933e-05,
      "loss": 2.7776,
      "step": 2529
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0699467658996582,
      "learning_rate": 1.9965090106995947e-05,
      "loss": 2.9268,
      "step": 2530
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.4727790355682373,
      "learning_rate": 1.9965055724591424e-05,
      "loss": 2.9179,
      "step": 2531
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9069254398345947,
      "learning_rate": 1.9965021325293425e-05,
      "loss": 2.8871,
      "step": 2532
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.044901967048645,
      "learning_rate": 1.996498690910201e-05,
      "loss": 2.9555,
      "step": 2533
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0142732858657837,
      "learning_rate": 1.9964952476017237e-05,
      "loss": 2.579,
      "step": 2534
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9504325985908508,
      "learning_rate": 1.996491802603916e-05,
      "loss": 2.7206,
      "step": 2535
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0480719804763794,
      "learning_rate": 1.996488355916784e-05,
      "loss": 2.9166,
      "step": 2536
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0018842220306396,
      "learning_rate": 1.996484907540334e-05,
      "loss": 2.8324,
      "step": 2537
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.084322214126587,
      "learning_rate": 1.996481457474571e-05,
      "loss": 2.82,
      "step": 2538
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0760880708694458,
      "learning_rate": 1.9964780057195016e-05,
      "loss": 2.7145,
      "step": 2539
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.1095002889633179,
      "learning_rate": 1.996474552275131e-05,
      "loss": 3.0224,
      "step": 2540
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9711510539054871,
      "learning_rate": 1.996471097141466e-05,
      "loss": 2.6553,
      "step": 2541
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.073851466178894,
      "learning_rate": 1.996467640318511e-05,
      "loss": 2.4633,
      "step": 2542
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0711735486984253,
      "learning_rate": 1.9964641818062734e-05,
      "loss": 2.8364,
      "step": 2543
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9303366541862488,
      "learning_rate": 1.9964607216047582e-05,
      "loss": 2.768,
      "step": 2544
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0606011152267456,
      "learning_rate": 1.9964572597139714e-05,
      "loss": 2.9378,
      "step": 2545
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.036817193031311,
      "learning_rate": 1.9964537961339193e-05,
      "loss": 2.8244,
      "step": 2546
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9687051773071289,
      "learning_rate": 1.996450330864607e-05,
      "loss": 2.9807,
      "step": 2547
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0714020729064941,
      "learning_rate": 1.9964468639060406e-05,
      "loss": 3.1551,
      "step": 2548
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.1320358514785767,
      "learning_rate": 1.9964433952582263e-05,
      "loss": 2.7504,
      "step": 2549
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9730051159858704,
      "learning_rate": 1.9964399249211698e-05,
      "loss": 2.9697,
      "step": 2550
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9277093410491943,
      "learning_rate": 1.9964364528948768e-05,
      "loss": 2.9032,
      "step": 2551
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9802834391593933,
      "learning_rate": 1.996432979179354e-05,
      "loss": 2.6968,
      "step": 2552
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.032664179801941,
      "learning_rate": 1.996429503774606e-05,
      "loss": 2.713,
      "step": 2553
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0520660877227783,
      "learning_rate": 1.99642602668064e-05,
      "loss": 2.7347,
      "step": 2554
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9401945471763611,
      "learning_rate": 1.9964225478974607e-05,
      "loss": 2.7415,
      "step": 2555
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9537470936775208,
      "learning_rate": 1.9964190674250746e-05,
      "loss": 2.983,
      "step": 2556
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9750708341598511,
      "learning_rate": 1.9964155852634875e-05,
      "loss": 2.7486,
      "step": 2557
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0072885751724243,
      "learning_rate": 1.9964121014127058e-05,
      "loss": 2.9203,
      "step": 2558
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9510583877563477,
      "learning_rate": 1.9964086158727346e-05,
      "loss": 2.7398,
      "step": 2559
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0989770889282227,
      "learning_rate": 1.9964051286435802e-05,
      "loss": 2.8398,
      "step": 2560
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9357402324676514,
      "learning_rate": 1.9964016397252487e-05,
      "loss": 2.7684,
      "step": 2561
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.06485915184021,
      "learning_rate": 1.9963981491177457e-05,
      "loss": 2.9963,
      "step": 2562
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.012022852897644,
      "learning_rate": 1.996394656821077e-05,
      "loss": 2.7387,
      "step": 2563
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9971203804016113,
      "learning_rate": 1.9963911628352493e-05,
      "loss": 2.8418,
      "step": 2564
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.075096845626831,
      "learning_rate": 1.9963876671602674e-05,
      "loss": 2.8357,
      "step": 2565
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.935131847858429,
      "learning_rate": 1.996384169796138e-05,
      "loss": 3.0271,
      "step": 2566
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0957144498825073,
      "learning_rate": 1.996380670742867e-05,
      "loss": 2.7828,
      "step": 2567
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0796363353729248,
      "learning_rate": 1.99637717000046e-05,
      "loss": 2.9488,
      "step": 2568
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9639449119567871,
      "learning_rate": 1.9963736675689233e-05,
      "loss": 2.8721,
      "step": 2569
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9696369171142578,
      "learning_rate": 1.9963701634482622e-05,
      "loss": 2.685,
      "step": 2570
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.1112195253372192,
      "learning_rate": 1.996366657638483e-05,
      "loss": 2.9517,
      "step": 2571
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0290377140045166,
      "learning_rate": 1.9963631501395923e-05,
      "loss": 2.8653,
      "step": 2572
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.901328444480896,
      "learning_rate": 1.996359640951595e-05,
      "loss": 2.8386,
      "step": 2573
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0239566564559937,
      "learning_rate": 1.9963561300744977e-05,
      "loss": 2.7689,
      "step": 2574
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.8922009468078613,
      "learning_rate": 1.996352617508306e-05,
      "loss": 2.8645,
      "step": 2575
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0683526992797852,
      "learning_rate": 1.996349103253026e-05,
      "loss": 3.2937,
      "step": 2576
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9202356338500977,
      "learning_rate": 1.9963455873086637e-05,
      "loss": 2.8813,
      "step": 2577
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9322821497917175,
      "learning_rate": 1.996342069675225e-05,
      "loss": 2.7137,
      "step": 2578
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9481191635131836,
      "learning_rate": 1.996338550352716e-05,
      "loss": 2.6423,
      "step": 2579
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0250638723373413,
      "learning_rate": 1.9963350293411423e-05,
      "loss": 2.8932,
      "step": 2580
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0518953800201416,
      "learning_rate": 1.9963315066405103e-05,
      "loss": 2.7925,
      "step": 2581
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0333553552627563,
      "learning_rate": 1.9963279822508262e-05,
      "loss": 2.8113,
      "step": 2582
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0570086240768433,
      "learning_rate": 1.996324456172095e-05,
      "loss": 2.6597,
      "step": 2583
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9675830006599426,
      "learning_rate": 1.9963209284043236e-05,
      "loss": 2.7525,
      "step": 2584
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9716488122940063,
      "learning_rate": 1.9963173989475173e-05,
      "loss": 2.9879,
      "step": 2585
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9243635535240173,
      "learning_rate": 1.9963138678016826e-05,
      "loss": 2.6315,
      "step": 2586
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9985517859458923,
      "learning_rate": 1.9963103349668252e-05,
      "loss": 2.8852,
      "step": 2587
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9985618591308594,
      "learning_rate": 1.9963068004429513e-05,
      "loss": 2.7358,
      "step": 2588
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0415709018707275,
      "learning_rate": 1.996303264230067e-05,
      "loss": 2.9853,
      "step": 2589
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9250391125679016,
      "learning_rate": 1.9962997263281777e-05,
      "loss": 2.8732,
      "step": 2590
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9009510278701782,
      "learning_rate": 1.99629618673729e-05,
      "loss": 2.9139,
      "step": 2591
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9893699884414673,
      "learning_rate": 1.9962926454574095e-05,
      "loss": 3.0305,
      "step": 2592
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9807971715927124,
      "learning_rate": 1.9962891024885422e-05,
      "loss": 2.9295,
      "step": 2593
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0030806064605713,
      "learning_rate": 1.9962855578306946e-05,
      "loss": 2.8006,
      "step": 2594
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0062260627746582,
      "learning_rate": 1.9962820114838724e-05,
      "loss": 2.9086,
      "step": 2595
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.1005717515945435,
      "learning_rate": 1.9962784634480818e-05,
      "loss": 2.7567,
      "step": 2596
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9504809379577637,
      "learning_rate": 1.9962749137233278e-05,
      "loss": 2.8805,
      "step": 2597
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0183013677597046,
      "learning_rate": 1.9962713623096183e-05,
      "loss": 3.0793,
      "step": 2598
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9383373856544495,
      "learning_rate": 1.9962678092069575e-05,
      "loss": 3.0366,
      "step": 2599
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.007602334022522,
      "learning_rate": 1.9962642544153524e-05,
      "loss": 2.8638,
      "step": 2600
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9552040696144104,
      "learning_rate": 1.9962606979348087e-05,
      "loss": 2.8669,
      "step": 2601
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.1442228555679321,
      "learning_rate": 1.9962571397653327e-05,
      "loss": 3.0008,
      "step": 2602
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0006431341171265,
      "learning_rate": 1.9962535799069303e-05,
      "loss": 2.9883,
      "step": 2603
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9859067797660828,
      "learning_rate": 1.9962500183596073e-05,
      "loss": 2.808,
      "step": 2604
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9568144679069519,
      "learning_rate": 1.99624645512337e-05,
      "loss": 2.7906,
      "step": 2605
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0671255588531494,
      "learning_rate": 1.996242890198225e-05,
      "loss": 2.7956,
      "step": 2606
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.083164095878601,
      "learning_rate": 1.9962393235841767e-05,
      "loss": 2.8612,
      "step": 2607
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9677048325538635,
      "learning_rate": 1.996235755281233e-05,
      "loss": 2.7913,
      "step": 2608
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9505887031555176,
      "learning_rate": 1.9962321852893986e-05,
      "loss": 2.7967,
      "step": 2609
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.106662631034851,
      "learning_rate": 1.9962286136086802e-05,
      "loss": 2.7887,
      "step": 2610
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.156136155128479,
      "learning_rate": 1.996225040239084e-05,
      "loss": 2.768,
      "step": 2611
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0121259689331055,
      "learning_rate": 1.9962214651806153e-05,
      "loss": 2.8999,
      "step": 2612
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9683994650840759,
      "learning_rate": 1.996217888433281e-05,
      "loss": 2.8205,
      "step": 2613
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9519884586334229,
      "learning_rate": 1.9962143099970865e-05,
      "loss": 3.0414,
      "step": 2614
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9140115976333618,
      "learning_rate": 1.9962107298720384e-05,
      "loss": 2.8963,
      "step": 2615
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0374345779418945,
      "learning_rate": 1.9962071480581426e-05,
      "loss": 2.7482,
      "step": 2616
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9255743026733398,
      "learning_rate": 1.9962035645554054e-05,
      "loss": 2.706,
      "step": 2617
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9801007509231567,
      "learning_rate": 1.9961999793638318e-05,
      "loss": 2.8909,
      "step": 2618
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0143581628799438,
      "learning_rate": 1.9961963924834294e-05,
      "loss": 2.844,
      "step": 2619
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9176469445228577,
      "learning_rate": 1.9961928039142032e-05,
      "loss": 3.051,
      "step": 2620
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9431779384613037,
      "learning_rate": 1.9961892136561597e-05,
      "loss": 2.7688,
      "step": 2621
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9909617304801941,
      "learning_rate": 1.9961856217093046e-05,
      "loss": 2.8524,
      "step": 2622
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9944292902946472,
      "learning_rate": 1.996182028073645e-05,
      "loss": 2.8503,
      "step": 2623
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.1291273832321167,
      "learning_rate": 1.996178432749186e-05,
      "loss": 2.6472,
      "step": 2624
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0240050554275513,
      "learning_rate": 1.9961748357359338e-05,
      "loss": 3.1164,
      "step": 2625
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.002286672592163,
      "learning_rate": 1.9961712370338947e-05,
      "loss": 2.6043,
      "step": 2626
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9239546656608582,
      "learning_rate": 1.9961676366430753e-05,
      "loss": 2.5925,
      "step": 2627
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.1991466283798218,
      "learning_rate": 1.9961640345634804e-05,
      "loss": 2.9572,
      "step": 2628
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9877219200134277,
      "learning_rate": 1.9961604307951174e-05,
      "loss": 2.7839,
      "step": 2629
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.1482642889022827,
      "learning_rate": 1.996156825337992e-05,
      "loss": 2.7742,
      "step": 2630
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0137596130371094,
      "learning_rate": 1.99615321819211e-05,
      "loss": 2.7666,
      "step": 2631
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0733314752578735,
      "learning_rate": 1.996149609357478e-05,
      "loss": 2.8493,
      "step": 2632
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.1092420816421509,
      "learning_rate": 1.9961459988341018e-05,
      "loss": 2.8167,
      "step": 2633
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0370466709136963,
      "learning_rate": 1.9961423866219874e-05,
      "loss": 2.8552,
      "step": 2634
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0066055059432983,
      "learning_rate": 1.996138772721141e-05,
      "loss": 2.6215,
      "step": 2635
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9758213758468628,
      "learning_rate": 1.996135157131569e-05,
      "loss": 2.764,
      "step": 2636
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0257192850112915,
      "learning_rate": 1.9961315398532775e-05,
      "loss": 2.891,
      "step": 2637
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.011796236038208,
      "learning_rate": 1.9961279208862723e-05,
      "loss": 2.7575,
      "step": 2638
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9508446455001831,
      "learning_rate": 1.9961243002305595e-05,
      "loss": 2.7113,
      "step": 2639
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9407790303230286,
      "learning_rate": 1.996120677886146e-05,
      "loss": 2.7527,
      "step": 2640
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0241705179214478,
      "learning_rate": 1.996117053853037e-05,
      "loss": 2.9231,
      "step": 2641
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9217931032180786,
      "learning_rate": 1.9961134281312392e-05,
      "loss": 2.8834,
      "step": 2642
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9776386618614197,
      "learning_rate": 1.996109800720759e-05,
      "loss": 2.784,
      "step": 2643
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9541914463043213,
      "learning_rate": 1.9961061716216018e-05,
      "loss": 2.7832,
      "step": 2644
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.082305908203125,
      "learning_rate": 1.996102540833774e-05,
      "loss": 2.8232,
      "step": 2645
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9659601449966431,
      "learning_rate": 1.9960989083572818e-05,
      "loss": 2.8514,
      "step": 2646
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.1232130527496338,
      "learning_rate": 1.9960952741921315e-05,
      "loss": 2.7522,
      "step": 2647
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.880456268787384,
      "learning_rate": 1.9960916383383295e-05,
      "loss": 2.8733,
      "step": 2648
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.1095752716064453,
      "learning_rate": 1.9960880007958812e-05,
      "loss": 2.8153,
      "step": 2649
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.2319824695587158,
      "learning_rate": 1.9960843615647932e-05,
      "loss": 2.7075,
      "step": 2650
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9805256724357605,
      "learning_rate": 1.9960807206450718e-05,
      "loss": 2.6854,
      "step": 2651
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.022162914276123,
      "learning_rate": 1.996077078036723e-05,
      "loss": 2.9732,
      "step": 2652
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.2746764421463013,
      "learning_rate": 1.9960734337397533e-05,
      "loss": 2.7856,
      "step": 2653
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9524661302566528,
      "learning_rate": 1.9960697877541683e-05,
      "loss": 2.8823,
      "step": 2654
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9970338344573975,
      "learning_rate": 1.9960661400799748e-05,
      "loss": 2.51,
      "step": 2655
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.056388020515442,
      "learning_rate": 1.9960624907171784e-05,
      "loss": 2.8215,
      "step": 2656
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9711652398109436,
      "learning_rate": 1.9960588396657856e-05,
      "loss": 2.6947,
      "step": 2657
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0693473815917969,
      "learning_rate": 1.9960551869258024e-05,
      "loss": 2.4965,
      "step": 2658
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.993999719619751,
      "learning_rate": 1.9960515324972353e-05,
      "loss": 3.0614,
      "step": 2659
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0557523965835571,
      "learning_rate": 1.9960478763800904e-05,
      "loss": 2.8576,
      "step": 2660
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.969814121723175,
      "learning_rate": 1.9960442185743735e-05,
      "loss": 2.7279,
      "step": 2661
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.959748387336731,
      "learning_rate": 1.9960405590800917e-05,
      "loss": 2.7657,
      "step": 2662
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0431550741195679,
      "learning_rate": 1.9960368978972503e-05,
      "loss": 2.7656,
      "step": 2663
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9737486243247986,
      "learning_rate": 1.9960332350258556e-05,
      "loss": 2.8593,
      "step": 2664
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0075280666351318,
      "learning_rate": 1.9960295704659142e-05,
      "loss": 2.6202,
      "step": 2665
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0217708349227905,
      "learning_rate": 1.996025904217432e-05,
      "loss": 2.6798,
      "step": 2666
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0439916849136353,
      "learning_rate": 1.9960222362804156e-05,
      "loss": 2.6306,
      "step": 2667
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9513757824897766,
      "learning_rate": 1.9960185666548713e-05,
      "loss": 2.8293,
      "step": 2668
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9484351277351379,
      "learning_rate": 1.9960148953408048e-05,
      "loss": 2.6892,
      "step": 2669
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0124765634536743,
      "learning_rate": 1.9960112223382224e-05,
      "loss": 3.0166,
      "step": 2670
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9837020635604858,
      "learning_rate": 1.9960075476471307e-05,
      "loss": 2.8299,
      "step": 2671
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.987011194229126,
      "learning_rate": 1.996003871267535e-05,
      "loss": 2.9055,
      "step": 2672
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.1006765365600586,
      "learning_rate": 1.996000193199443e-05,
      "loss": 2.85,
      "step": 2673
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0479027032852173,
      "learning_rate": 1.9959965134428597e-05,
      "loss": 3.0768,
      "step": 2674
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0191007852554321,
      "learning_rate": 1.9959928319977922e-05,
      "loss": 2.7395,
      "step": 2675
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0132591724395752,
      "learning_rate": 1.9959891488642462e-05,
      "loss": 2.7759,
      "step": 2676
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.1092747449874878,
      "learning_rate": 1.995985464042228e-05,
      "loss": 2.8278,
      "step": 2677
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0267614126205444,
      "learning_rate": 1.995981777531744e-05,
      "loss": 2.9502,
      "step": 2678
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.00760817527771,
      "learning_rate": 1.9959780893328e-05,
      "loss": 2.7507,
      "step": 2679
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.073773741722107,
      "learning_rate": 1.9959743994454033e-05,
      "loss": 2.8311,
      "step": 2680
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.965937614440918,
      "learning_rate": 1.9959707078695592e-05,
      "loss": 2.85,
      "step": 2681
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.948532223701477,
      "learning_rate": 1.9959670146052742e-05,
      "loss": 2.7905,
      "step": 2682
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0055958032608032,
      "learning_rate": 1.9959633196525547e-05,
      "loss": 2.7565,
      "step": 2683
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.1103435754776,
      "learning_rate": 1.9959596230114068e-05,
      "loss": 2.7514,
      "step": 2684
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0478256940841675,
      "learning_rate": 1.9959559246818366e-05,
      "loss": 2.8782,
      "step": 2685
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9933339953422546,
      "learning_rate": 1.995952224663851e-05,
      "loss": 2.8361,
      "step": 2686
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9914591312408447,
      "learning_rate": 1.9959485229574555e-05,
      "loss": 2.8058,
      "step": 2687
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9821968078613281,
      "learning_rate": 1.995944819562657e-05,
      "loss": 2.7685,
      "step": 2688
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9819918274879456,
      "learning_rate": 1.9959411144794618e-05,
      "loss": 2.6835,
      "step": 2689
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0801175832748413,
      "learning_rate": 1.9959374077078758e-05,
      "loss": 2.8141,
      "step": 2690
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9679265022277832,
      "learning_rate": 1.995933699247905e-05,
      "loss": 2.8331,
      "step": 2691
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9475385546684265,
      "learning_rate": 1.9959299890995564e-05,
      "loss": 2.8835,
      "step": 2692
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9964756369590759,
      "learning_rate": 1.9959262772628357e-05,
      "loss": 2.7473,
      "step": 2693
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0751564502716064,
      "learning_rate": 1.99592256373775e-05,
      "loss": 2.8465,
      "step": 2694
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0914099216461182,
      "learning_rate": 1.9959188485243045e-05,
      "loss": 3.0185,
      "step": 2695
    },
    {
      "epoch": 0.04,
      "grad_norm": 4.729445457458496,
      "learning_rate": 1.9959151316225066e-05,
      "loss": 2.8111,
      "step": 2696
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9316367506980896,
      "learning_rate": 1.9959114130323617e-05,
      "loss": 2.642,
      "step": 2697
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9496925473213196,
      "learning_rate": 1.9959076927538765e-05,
      "loss": 3.0641,
      "step": 2698
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0492206811904907,
      "learning_rate": 1.9959039707870574e-05,
      "loss": 2.8693,
      "step": 2699
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.1203792095184326,
      "learning_rate": 1.9959002471319105e-05,
      "loss": 3.0039,
      "step": 2700
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.98248690366745,
      "learning_rate": 1.995896521788442e-05,
      "loss": 2.901,
      "step": 2701
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9807566404342651,
      "learning_rate": 1.9958927947566587e-05,
      "loss": 3.0694,
      "step": 2702
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0030847787857056,
      "learning_rate": 1.9958890660365665e-05,
      "loss": 2.9469,
      "step": 2703
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0341747999191284,
      "learning_rate": 1.995885335628172e-05,
      "loss": 2.9074,
      "step": 2704
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9682683348655701,
      "learning_rate": 1.9958816035314813e-05,
      "loss": 2.7473,
      "step": 2705
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0246211290359497,
      "learning_rate": 1.9958778697465007e-05,
      "loss": 2.6495,
      "step": 2706
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9856398701667786,
      "learning_rate": 1.9958741342732368e-05,
      "loss": 2.6793,
      "step": 2707
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9095323085784912,
      "learning_rate": 1.9958703971116956e-05,
      "loss": 2.9639,
      "step": 2708
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9280319213867188,
      "learning_rate": 1.995866658261884e-05,
      "loss": 2.7672,
      "step": 2709
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9819558262825012,
      "learning_rate": 1.995862917723807e-05,
      "loss": 2.7059,
      "step": 2710
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9431380033493042,
      "learning_rate": 1.9958591754974728e-05,
      "loss": 2.775,
      "step": 2711
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9737064838409424,
      "learning_rate": 1.9958554315828865e-05,
      "loss": 2.9052,
      "step": 2712
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.011988878250122,
      "learning_rate": 1.995851685980055e-05,
      "loss": 2.7136,
      "step": 2713
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.991229772567749,
      "learning_rate": 1.995847938688984e-05,
      "loss": 2.9574,
      "step": 2714
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9689707159996033,
      "learning_rate": 1.9958441897096807e-05,
      "loss": 2.8251,
      "step": 2715
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.982862114906311,
      "learning_rate": 1.9958404390421507e-05,
      "loss": 2.979,
      "step": 2716
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.061242938041687,
      "learning_rate": 1.9958366866864007e-05,
      "loss": 3.0183,
      "step": 2717
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.051780104637146,
      "learning_rate": 1.9958329326424374e-05,
      "loss": 2.7759,
      "step": 2718
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.4702088832855225,
      "learning_rate": 1.9958291769102664e-05,
      "loss": 2.9764,
      "step": 2719
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.8988497853279114,
      "learning_rate": 1.9958254194898946e-05,
      "loss": 2.7285,
      "step": 2720
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9709345698356628,
      "learning_rate": 1.9958216603813283e-05,
      "loss": 2.8716,
      "step": 2721
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9378542900085449,
      "learning_rate": 1.9958178995845738e-05,
      "loss": 2.9529,
      "step": 2722
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9513556361198425,
      "learning_rate": 1.995814137099638e-05,
      "loss": 2.8302,
      "step": 2723
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0023082494735718,
      "learning_rate": 1.995810372926526e-05,
      "loss": 2.7996,
      "step": 2724
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.1128121614456177,
      "learning_rate": 1.9958066070652453e-05,
      "loss": 2.9237,
      "step": 2725
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0373473167419434,
      "learning_rate": 1.995802839515802e-05,
      "loss": 2.7699,
      "step": 2726
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0276316404342651,
      "learning_rate": 1.9957990702782024e-05,
      "loss": 2.9149,
      "step": 2727
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9641039371490479,
      "learning_rate": 1.9957952993524527e-05,
      "loss": 2.6197,
      "step": 2728
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.1357362270355225,
      "learning_rate": 1.9957915267385596e-05,
      "loss": 2.7123,
      "step": 2729
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9995461106300354,
      "learning_rate": 1.99578775243653e-05,
      "loss": 2.7822,
      "step": 2730
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9895714521408081,
      "learning_rate": 1.9957839764463688e-05,
      "loss": 2.8971,
      "step": 2731
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9841997027397156,
      "learning_rate": 1.9957801987680837e-05,
      "loss": 2.7866,
      "step": 2732
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.075609564781189,
      "learning_rate": 1.995776419401681e-05,
      "loss": 2.8397,
      "step": 2733
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.044568657875061,
      "learning_rate": 1.9957726383471664e-05,
      "loss": 2.6674,
      "step": 2734
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0064929723739624,
      "learning_rate": 1.995768855604547e-05,
      "loss": 2.7254,
      "step": 2735
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0545074939727783,
      "learning_rate": 1.995765071173829e-05,
      "loss": 2.8782,
      "step": 2736
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.1169476509094238,
      "learning_rate": 1.9957612850550183e-05,
      "loss": 2.88,
      "step": 2737
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9203037023544312,
      "learning_rate": 1.9957574972481223e-05,
      "loss": 2.7538,
      "step": 2738
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0250805616378784,
      "learning_rate": 1.9957537077531465e-05,
      "loss": 2.6625,
      "step": 2739
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.081084132194519,
      "learning_rate": 1.9957499165700977e-05,
      "loss": 2.6645,
      "step": 2740
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0477638244628906,
      "learning_rate": 1.9957461236989827e-05,
      "loss": 2.8722,
      "step": 2741
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0133657455444336,
      "learning_rate": 1.9957423291398073e-05,
      "loss": 2.6428,
      "step": 2742
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9209771156311035,
      "learning_rate": 1.9957385328925786e-05,
      "loss": 2.6695,
      "step": 2743
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9767107367515564,
      "learning_rate": 1.995734734957302e-05,
      "loss": 2.8616,
      "step": 2744
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9861503839492798,
      "learning_rate": 1.9957309353339852e-05,
      "loss": 2.6156,
      "step": 2745
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9612933397293091,
      "learning_rate": 1.9957271340226335e-05,
      "loss": 2.8792,
      "step": 2746
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9815043210983276,
      "learning_rate": 1.9957233310232542e-05,
      "loss": 2.9887,
      "step": 2747
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.3362962007522583,
      "learning_rate": 1.9957195263358534e-05,
      "loss": 3.0513,
      "step": 2748
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.959178626537323,
      "learning_rate": 1.9957157199604376e-05,
      "loss": 2.9474,
      "step": 2749
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0157644748687744,
      "learning_rate": 1.9957119118970132e-05,
      "loss": 2.7752,
      "step": 2750
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9787552356719971,
      "learning_rate": 1.9957081021455867e-05,
      "loss": 3.0461,
      "step": 2751
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9861484169960022,
      "learning_rate": 1.9957042907061644e-05,
      "loss": 2.8086,
      "step": 2752
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9225472807884216,
      "learning_rate": 1.995700477578753e-05,
      "loss": 2.7908,
      "step": 2753
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9862416386604309,
      "learning_rate": 1.9956966627633588e-05,
      "loss": 2.7425,
      "step": 2754
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0215235948562622,
      "learning_rate": 1.9956928462599884e-05,
      "loss": 2.8972,
      "step": 2755
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9684881567955017,
      "learning_rate": 1.995689028068648e-05,
      "loss": 3.0117,
      "step": 2756
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9909473061561584,
      "learning_rate": 1.9956852081893447e-05,
      "loss": 2.7328,
      "step": 2757
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.2871052026748657,
      "learning_rate": 1.995681386622084e-05,
      "loss": 2.7916,
      "step": 2758
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.4309715032577515,
      "learning_rate": 1.9956775633668736e-05,
      "loss": 2.7816,
      "step": 2759
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.079317331314087,
      "learning_rate": 1.9956737384237187e-05,
      "loss": 2.7697,
      "step": 2760
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.1396076679229736,
      "learning_rate": 1.9956699117926267e-05,
      "loss": 2.7618,
      "step": 2761
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.99152672290802,
      "learning_rate": 1.9956660834736037e-05,
      "loss": 3.0062,
      "step": 2762
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0008141994476318,
      "learning_rate": 1.995662253466656e-05,
      "loss": 2.8486,
      "step": 2763
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0282622575759888,
      "learning_rate": 1.9956584217717907e-05,
      "loss": 2.9497,
      "step": 2764
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.2279973030090332,
      "learning_rate": 1.995654588389014e-05,
      "loss": 3.0149,
      "step": 2765
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.090274453163147,
      "learning_rate": 1.9956507533183324e-05,
      "loss": 2.9712,
      "step": 2766
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.2252089977264404,
      "learning_rate": 1.995646916559752e-05,
      "loss": 2.8294,
      "step": 2767
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9556685090065002,
      "learning_rate": 1.9956430781132798e-05,
      "loss": 2.9465,
      "step": 2768
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0898154973983765,
      "learning_rate": 1.9956392379789223e-05,
      "loss": 3.0312,
      "step": 2769
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0883164405822754,
      "learning_rate": 1.9956353961566858e-05,
      "loss": 2.7006,
      "step": 2770
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9792590141296387,
      "learning_rate": 1.9956315526465768e-05,
      "loss": 2.7033,
      "step": 2771
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9529377222061157,
      "learning_rate": 1.995627707448602e-05,
      "loss": 2.7,
      "step": 2772
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.2068374156951904,
      "learning_rate": 1.995623860562768e-05,
      "loss": 2.738,
      "step": 2773
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.994166374206543,
      "learning_rate": 1.9956200119890806e-05,
      "loss": 2.7992,
      "step": 2774
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9216245412826538,
      "learning_rate": 1.9956161617275474e-05,
      "loss": 2.8025,
      "step": 2775
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0573115348815918,
      "learning_rate": 1.9956123097781744e-05,
      "loss": 2.9603,
      "step": 2776
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9736863374710083,
      "learning_rate": 1.995608456140968e-05,
      "loss": 2.7558,
      "step": 2777
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0526703596115112,
      "learning_rate": 1.9956046008159346e-05,
      "loss": 2.8664,
      "step": 2778
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.052693486213684,
      "learning_rate": 1.9956007438030816e-05,
      "loss": 2.8556,
      "step": 2779
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9905352592468262,
      "learning_rate": 1.9955968851024145e-05,
      "loss": 2.8921,
      "step": 2780
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.054805040359497,
      "learning_rate": 1.9955930247139405e-05,
      "loss": 2.649,
      "step": 2781
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0513180494308472,
      "learning_rate": 1.9955891626376655e-05,
      "loss": 2.9628,
      "step": 2782
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0092942714691162,
      "learning_rate": 1.9955852988735967e-05,
      "loss": 2.962,
      "step": 2783
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.8377185463905334,
      "learning_rate": 1.9955814334217406e-05,
      "loss": 2.838,
      "step": 2784
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0270065069198608,
      "learning_rate": 1.9955775662821036e-05,
      "loss": 2.7585,
      "step": 2785
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0807485580444336,
      "learning_rate": 1.995573697454692e-05,
      "loss": 2.7399,
      "step": 2786
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0486137866973877,
      "learning_rate": 1.9955698269395126e-05,
      "loss": 3.0857,
      "step": 2787
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9158341884613037,
      "learning_rate": 1.995565954736572e-05,
      "loss": 2.8985,
      "step": 2788
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0044392347335815,
      "learning_rate": 1.995562080845877e-05,
      "loss": 2.72,
      "step": 2789
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9826537370681763,
      "learning_rate": 1.9955582052674333e-05,
      "loss": 3.0256,
      "step": 2790
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9405049085617065,
      "learning_rate": 1.9955543280012484e-05,
      "loss": 2.8197,
      "step": 2791
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9684468507766724,
      "learning_rate": 1.9955504490473286e-05,
      "loss": 2.8615,
      "step": 2792
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0376871824264526,
      "learning_rate": 1.99554656840568e-05,
      "loss": 2.8493,
      "step": 2793
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0160510540008545,
      "learning_rate": 1.99554268607631e-05,
      "loss": 2.9037,
      "step": 2794
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0392446517944336,
      "learning_rate": 1.9955388020592248e-05,
      "loss": 2.8106,
      "step": 2795
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.079574465751648,
      "learning_rate": 1.9955349163544308e-05,
      "loss": 2.6893,
      "step": 2796
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0082355737686157,
      "learning_rate": 1.9955310289619348e-05,
      "loss": 2.7759,
      "step": 2797
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0090373754501343,
      "learning_rate": 1.995527139881743e-05,
      "loss": 2.951,
      "step": 2798
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9618929624557495,
      "learning_rate": 1.9955232491138626e-05,
      "loss": 2.955,
      "step": 2799
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9265614748001099,
      "learning_rate": 1.9955193566583e-05,
      "loss": 2.7575,
      "step": 2800
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0095864534378052,
      "learning_rate": 1.9955154625150616e-05,
      "loss": 2.9465,
      "step": 2801
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9554997682571411,
      "learning_rate": 1.995511566684154e-05,
      "loss": 2.7018,
      "step": 2802
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9262382388114929,
      "learning_rate": 1.9955076691655838e-05,
      "loss": 2.8949,
      "step": 2803
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0749515295028687,
      "learning_rate": 1.995503769959358e-05,
      "loss": 2.6471,
      "step": 2804
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0561904907226562,
      "learning_rate": 1.995499869065483e-05,
      "loss": 2.6822,
      "step": 2805
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.973179817199707,
      "learning_rate": 1.9954959664839652e-05,
      "loss": 2.9466,
      "step": 2806
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0063095092773438,
      "learning_rate": 1.9954920622148115e-05,
      "loss": 2.9901,
      "step": 2807
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.023733139038086,
      "learning_rate": 1.9954881562580282e-05,
      "loss": 2.8344,
      "step": 2808
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0962260961532593,
      "learning_rate": 1.9954842486136222e-05,
      "loss": 2.7857,
      "step": 2809
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0733978748321533,
      "learning_rate": 1.9954803392816e-05,
      "loss": 2.8785,
      "step": 2810
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.1028960943222046,
      "learning_rate": 1.9954764282619684e-05,
      "loss": 2.9088,
      "step": 2811
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0626204013824463,
      "learning_rate": 1.9954725155547337e-05,
      "loss": 3.028,
      "step": 2812
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0323824882507324,
      "learning_rate": 1.9954686011599028e-05,
      "loss": 2.7348,
      "step": 2813
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0434879064559937,
      "learning_rate": 1.995464685077482e-05,
      "loss": 2.9521,
      "step": 2814
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.1223599910736084,
      "learning_rate": 1.9954607673074785e-05,
      "loss": 2.6675,
      "step": 2815
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9791080951690674,
      "learning_rate": 1.9954568478498984e-05,
      "loss": 2.9257,
      "step": 2816
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9877060055732727,
      "learning_rate": 1.995452926704749e-05,
      "loss": 2.7732,
      "step": 2817
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.1296288967132568,
      "learning_rate": 1.995449003872036e-05,
      "loss": 2.8009,
      "step": 2818
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9822888374328613,
      "learning_rate": 1.9954450793517667e-05,
      "loss": 2.623,
      "step": 2819
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9916373491287231,
      "learning_rate": 1.995441153143948e-05,
      "loss": 2.8501,
      "step": 2820
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0000863075256348,
      "learning_rate": 1.995437225248586e-05,
      "loss": 2.7428,
      "step": 2821
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9682146310806274,
      "learning_rate": 1.9954332956656873e-05,
      "loss": 2.6811,
      "step": 2822
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9401090741157532,
      "learning_rate": 1.995429364395259e-05,
      "loss": 2.5866,
      "step": 2823
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0054634809494019,
      "learning_rate": 1.9954254314373077e-05,
      "loss": 2.6132,
      "step": 2824
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0062495470046997,
      "learning_rate": 1.9954214967918397e-05,
      "loss": 2.8366,
      "step": 2825
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.972918689250946,
      "learning_rate": 1.9954175604588622e-05,
      "loss": 2.792,
      "step": 2826
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0883735418319702,
      "learning_rate": 1.9954136224383813e-05,
      "loss": 3.0472,
      "step": 2827
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9889377951622009,
      "learning_rate": 1.995409682730404e-05,
      "loss": 2.8967,
      "step": 2828
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0948947668075562,
      "learning_rate": 1.995405741334937e-05,
      "loss": 2.736,
      "step": 2829
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9671722650527954,
      "learning_rate": 1.995401798251987e-05,
      "loss": 2.8969,
      "step": 2830
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9590430855751038,
      "learning_rate": 1.9953978534815606e-05,
      "loss": 2.8342,
      "step": 2831
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9615622162818909,
      "learning_rate": 1.995393907023664e-05,
      "loss": 2.9334,
      "step": 2832
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.8995311260223389,
      "learning_rate": 1.9953899588783053e-05,
      "loss": 2.729,
      "step": 2833
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.127112627029419,
      "learning_rate": 1.99538600904549e-05,
      "loss": 2.921,
      "step": 2834
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9237267374992371,
      "learning_rate": 1.9953820575252245e-05,
      "loss": 2.97,
      "step": 2835
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0659228563308716,
      "learning_rate": 1.9953781043175163e-05,
      "loss": 2.8309,
      "step": 2836
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9775119423866272,
      "learning_rate": 1.9953741494223722e-05,
      "loss": 3.0438,
      "step": 2837
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.036851406097412,
      "learning_rate": 1.9953701928397985e-05,
      "loss": 2.7063,
      "step": 2838
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9264500141143799,
      "learning_rate": 1.9953662345698016e-05,
      "loss": 2.7614,
      "step": 2839
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0373486280441284,
      "learning_rate": 1.995362274612389e-05,
      "loss": 2.8291,
      "step": 2840
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0393084287643433,
      "learning_rate": 1.9953583129675667e-05,
      "loss": 2.9315,
      "step": 2841
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.009263038635254,
      "learning_rate": 1.9953543496353422e-05,
      "loss": 2.8191,
      "step": 2842
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9926641583442688,
      "learning_rate": 1.9953503846157213e-05,
      "loss": 2.8761,
      "step": 2843
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.096130609512329,
      "learning_rate": 1.995346417908711e-05,
      "loss": 2.8312,
      "step": 2844
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0128957033157349,
      "learning_rate": 1.995342449514319e-05,
      "loss": 2.7786,
      "step": 2845
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.308260440826416,
      "learning_rate": 1.9953384794325506e-05,
      "loss": 2.9359,
      "step": 2846
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.1383413076400757,
      "learning_rate": 1.995334507663413e-05,
      "loss": 2.9685,
      "step": 2847
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0226317644119263,
      "learning_rate": 1.9953305342069134e-05,
      "loss": 2.8445,
      "step": 2848
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.069663405418396,
      "learning_rate": 1.995326559063058e-05,
      "loss": 2.6262,
      "step": 2849
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.074204683303833,
      "learning_rate": 1.9953225822318537e-05,
      "loss": 2.8831,
      "step": 2850
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9659027457237244,
      "learning_rate": 1.9953186037133076e-05,
      "loss": 2.9072,
      "step": 2851
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9781490564346313,
      "learning_rate": 1.995314623507426e-05,
      "loss": 2.9152,
      "step": 2852
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.983414888381958,
      "learning_rate": 1.995310641614216e-05,
      "loss": 2.6843,
      "step": 2853
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.8796279430389404,
      "learning_rate": 1.995306658033684e-05,
      "loss": 2.5659,
      "step": 2854
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9478854537010193,
      "learning_rate": 1.995302672765837e-05,
      "loss": 2.9246,
      "step": 2855
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.1575807332992554,
      "learning_rate": 1.9952986858106814e-05,
      "loss": 2.92,
      "step": 2856
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.1748360395431519,
      "learning_rate": 1.9952946971682244e-05,
      "loss": 2.7152,
      "step": 2857
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0578209161758423,
      "learning_rate": 1.9952907068384723e-05,
      "loss": 2.9994,
      "step": 2858
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.1734758615493774,
      "learning_rate": 1.9952867148214323e-05,
      "loss": 3.0154,
      "step": 2859
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0383739471435547,
      "learning_rate": 1.995282721117111e-05,
      "loss": 2.7777,
      "step": 2860
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9711620807647705,
      "learning_rate": 1.9952787257255152e-05,
      "loss": 2.9816,
      "step": 2861
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0283762216567993,
      "learning_rate": 1.9952747286466517e-05,
      "loss": 2.7103,
      "step": 2862
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.004173755645752,
      "learning_rate": 1.995270729880527e-05,
      "loss": 2.9991,
      "step": 2863
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9994312524795532,
      "learning_rate": 1.9952667294271486e-05,
      "loss": 3.0157,
      "step": 2864
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9588585495948792,
      "learning_rate": 1.9952627272865223e-05,
      "loss": 2.6709,
      "step": 2865
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0169672966003418,
      "learning_rate": 1.9952587234586554e-05,
      "loss": 2.8487,
      "step": 2866
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0193547010421753,
      "learning_rate": 1.995254717943555e-05,
      "loss": 2.8065,
      "step": 2867
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9720368385314941,
      "learning_rate": 1.9952507107412273e-05,
      "loss": 2.87,
      "step": 2868
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.913877546787262,
      "learning_rate": 1.9952467018516793e-05,
      "loss": 2.8178,
      "step": 2869
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0241670608520508,
      "learning_rate": 1.995242691274918e-05,
      "loss": 2.8752,
      "step": 2870
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0194364786148071,
      "learning_rate": 1.9952386790109498e-05,
      "loss": 2.9462,
      "step": 2871
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9912444949150085,
      "learning_rate": 1.995234665059782e-05,
      "loss": 2.9306,
      "step": 2872
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.952807605266571,
      "learning_rate": 1.9952306494214206e-05,
      "loss": 3.0153,
      "step": 2873
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0158326625823975,
      "learning_rate": 1.9952266320958736e-05,
      "loss": 2.5835,
      "step": 2874
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0173189640045166,
      "learning_rate": 1.9952226130831465e-05,
      "loss": 2.7556,
      "step": 2875
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9517707824707031,
      "learning_rate": 1.9952185923832473e-05,
      "loss": 2.8267,
      "step": 2876
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9566114544868469,
      "learning_rate": 1.9952145699961822e-05,
      "loss": 2.911,
      "step": 2877
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0541040897369385,
      "learning_rate": 1.9952105459219582e-05,
      "loss": 2.8598,
      "step": 2878
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9882573485374451,
      "learning_rate": 1.9952065201605818e-05,
      "loss": 2.884,
      "step": 2879
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.1587786674499512,
      "learning_rate": 1.9952024927120604e-05,
      "loss": 2.8034,
      "step": 2880
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.977624237537384,
      "learning_rate": 1.9951984635764e-05,
      "loss": 2.7674,
      "step": 2881
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9830440282821655,
      "learning_rate": 1.9951944327536082e-05,
      "loss": 2.686,
      "step": 2882
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9906233549118042,
      "learning_rate": 1.9951904002436916e-05,
      "loss": 2.7968,
      "step": 2883
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0927798748016357,
      "learning_rate": 1.9951863660466568e-05,
      "loss": 2.8252,
      "step": 2884
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0297127962112427,
      "learning_rate": 1.995182330162511e-05,
      "loss": 2.9685,
      "step": 2885
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.8847635388374329,
      "learning_rate": 1.9951782925912608e-05,
      "loss": 2.869,
      "step": 2886
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0731009244918823,
      "learning_rate": 1.995174253332913e-05,
      "loss": 3.0118,
      "step": 2887
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9299958348274231,
      "learning_rate": 1.9951702123874746e-05,
      "loss": 2.8445,
      "step": 2888
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9902806878089905,
      "learning_rate": 1.9951661697549526e-05,
      "loss": 2.9254,
      "step": 2889
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0643826723098755,
      "learning_rate": 1.9951621254353536e-05,
      "loss": 2.9094,
      "step": 2890
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.011670470237732,
      "learning_rate": 1.9951580794286843e-05,
      "loss": 2.9981,
      "step": 2891
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0475378036499023,
      "learning_rate": 1.9951540317349516e-05,
      "loss": 2.7581,
      "step": 2892
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0198158025741577,
      "learning_rate": 1.9951499823541628e-05,
      "loss": 2.8854,
      "step": 2893
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9415518045425415,
      "learning_rate": 1.9951459312863245e-05,
      "loss": 2.7055,
      "step": 2894
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0036548376083374,
      "learning_rate": 1.9951418785314436e-05,
      "loss": 2.765,
      "step": 2895
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9290896654129028,
      "learning_rate": 1.9951378240895272e-05,
      "loss": 2.9974,
      "step": 2896
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9959180355072021,
      "learning_rate": 1.9951337679605814e-05,
      "loss": 2.847,
      "step": 2897
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9502058029174805,
      "learning_rate": 1.995129710144614e-05,
      "loss": 2.7273,
      "step": 2898
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.8833985328674316,
      "learning_rate": 1.9951256506416313e-05,
      "loss": 2.8152,
      "step": 2899
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9881259202957153,
      "learning_rate": 1.9951215894516402e-05,
      "loss": 2.8564,
      "step": 2900
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.982336699962616,
      "learning_rate": 1.995117526574648e-05,
      "loss": 2.9793,
      "step": 2901
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0315685272216797,
      "learning_rate": 1.995113462010661e-05,
      "loss": 2.8553,
      "step": 2902
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0143914222717285,
      "learning_rate": 1.9951093957596866e-05,
      "loss": 2.777,
      "step": 2903
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9147194027900696,
      "learning_rate": 1.9951053278217317e-05,
      "loss": 2.7716,
      "step": 2904
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0003246068954468,
      "learning_rate": 1.995101258196803e-05,
      "loss": 2.8897,
      "step": 2905
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0329489707946777,
      "learning_rate": 1.995097186884907e-05,
      "loss": 2.8458,
      "step": 2906
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0024088621139526,
      "learning_rate": 1.9950931138860512e-05,
      "loss": 2.7263,
      "step": 2907
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9508952498435974,
      "learning_rate": 1.995089039200242e-05,
      "loss": 2.742,
      "step": 2908
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0601750612258911,
      "learning_rate": 1.995084962827487e-05,
      "loss": 3.0635,
      "step": 2909
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0417890548706055,
      "learning_rate": 1.9950808847677926e-05,
      "loss": 2.7611,
      "step": 2910
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0028855800628662,
      "learning_rate": 1.995076805021166e-05,
      "loss": 2.882,
      "step": 2911
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0566908121109009,
      "learning_rate": 1.995072723587614e-05,
      "loss": 2.9269,
      "step": 2912
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0009506940841675,
      "learning_rate": 1.9950686404671434e-05,
      "loss": 2.8077,
      "step": 2913
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9892264008522034,
      "learning_rate": 1.9950645556597613e-05,
      "loss": 2.693,
      "step": 2914
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0687963962554932,
      "learning_rate": 1.9950604691654744e-05,
      "loss": 3.2838,
      "step": 2915
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0428946018218994,
      "learning_rate": 1.9950563809842896e-05,
      "loss": 2.7131,
      "step": 2916
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9798606038093567,
      "learning_rate": 1.995052291116214e-05,
      "loss": 2.7955,
      "step": 2917
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.003616452217102,
      "learning_rate": 1.9950481995612544e-05,
      "loss": 2.729,
      "step": 2918
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.8888339996337891,
      "learning_rate": 1.9950441063194184e-05,
      "loss": 2.8606,
      "step": 2919
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9344977140426636,
      "learning_rate": 1.9950400113907122e-05,
      "loss": 2.7907,
      "step": 2920
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0426844358444214,
      "learning_rate": 1.9950359147751426e-05,
      "loss": 2.844,
      "step": 2921
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0083259344100952,
      "learning_rate": 1.995031816472717e-05,
      "loss": 3.0919,
      "step": 2922
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0382411479949951,
      "learning_rate": 1.9950277164834422e-05,
      "loss": 2.7269,
      "step": 2923
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0752490758895874,
      "learning_rate": 1.9950236148073254e-05,
      "loss": 2.6732,
      "step": 2924
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0301790237426758,
      "learning_rate": 1.995019511444373e-05,
      "loss": 2.846,
      "step": 2925
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.167396068572998,
      "learning_rate": 1.9950154063945927e-05,
      "loss": 2.952,
      "step": 2926
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0181859731674194,
      "learning_rate": 1.9950112996579907e-05,
      "loss": 2.7801,
      "step": 2927
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9234054088592529,
      "learning_rate": 1.995007191234574e-05,
      "loss": 2.9586,
      "step": 2928
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0057095289230347,
      "learning_rate": 1.9950030811243507e-05,
      "loss": 2.8782,
      "step": 2929
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.014333724975586,
      "learning_rate": 1.9949989693273262e-05,
      "loss": 2.7497,
      "step": 2930
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9764691591262817,
      "learning_rate": 1.9949948558435088e-05,
      "loss": 2.8964,
      "step": 2931
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.007575511932373,
      "learning_rate": 1.9949907406729046e-05,
      "loss": 2.8056,
      "step": 2932
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0781790018081665,
      "learning_rate": 1.994986623815521e-05,
      "loss": 2.6565,
      "step": 2933
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.059538722038269,
      "learning_rate": 1.9949825052713647e-05,
      "loss": 2.7423,
      "step": 2934
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0022995471954346,
      "learning_rate": 1.9949783850404427e-05,
      "loss": 2.8124,
      "step": 2935
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.913005530834198,
      "learning_rate": 1.9949742631227626e-05,
      "loss": 2.9277,
      "step": 2936
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9761595726013184,
      "learning_rate": 1.9949701395183302e-05,
      "loss": 2.7384,
      "step": 2937
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9833224415779114,
      "learning_rate": 1.9949660142271537e-05,
      "loss": 2.8711,
      "step": 2938
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0186147689819336,
      "learning_rate": 1.994961887249239e-05,
      "loss": 3.1175,
      "step": 2939
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.082859992980957,
      "learning_rate": 1.9949577585845944e-05,
      "loss": 2.807,
      "step": 2940
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.978844165802002,
      "learning_rate": 1.9949536282332257e-05,
      "loss": 3.1082,
      "step": 2941
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9891997575759888,
      "learning_rate": 1.9949494961951404e-05,
      "loss": 2.9024,
      "step": 2942
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9557677507400513,
      "learning_rate": 1.9949453624703455e-05,
      "loss": 2.557,
      "step": 2943
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9560303688049316,
      "learning_rate": 1.9949412270588482e-05,
      "loss": 2.9086,
      "step": 2944
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9796600341796875,
      "learning_rate": 1.994937089960655e-05,
      "loss": 2.9844,
      "step": 2945
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.972447395324707,
      "learning_rate": 1.994932951175774e-05,
      "loss": 2.9114,
      "step": 2946
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9403300881385803,
      "learning_rate": 1.9949288107042104e-05,
      "loss": 2.4898,
      "step": 2947
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.4200212955474854,
      "learning_rate": 1.9949246685459726e-05,
      "loss": 3.0033,
      "step": 2948
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0102379322052002,
      "learning_rate": 1.9949205247010673e-05,
      "loss": 2.7151,
      "step": 2949
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.885445773601532,
      "learning_rate": 1.9949163791695015e-05,
      "loss": 2.9666,
      "step": 2950
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.972163736820221,
      "learning_rate": 1.9949122319512824e-05,
      "loss": 2.7175,
      "step": 2951
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0547535419464111,
      "learning_rate": 1.9949080830464167e-05,
      "loss": 2.8642,
      "step": 2952
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0517654418945312,
      "learning_rate": 1.9949039324549115e-05,
      "loss": 2.811,
      "step": 2953
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.6728801727294922,
      "learning_rate": 1.9948997801767744e-05,
      "loss": 2.9582,
      "step": 2954
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.072251558303833,
      "learning_rate": 1.9948956262120113e-05,
      "loss": 2.8934,
      "step": 2955
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0207462310791016,
      "learning_rate": 1.9948914705606302e-05,
      "loss": 2.8207,
      "step": 2956
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.070284366607666,
      "learning_rate": 1.9948873132226377e-05,
      "loss": 2.6855,
      "step": 2957
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9820803999900818,
      "learning_rate": 1.994883154198041e-05,
      "loss": 2.8922,
      "step": 2958
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0010021924972534,
      "learning_rate": 1.9948789934868476e-05,
      "loss": 2.7484,
      "step": 2959
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0027095079421997,
      "learning_rate": 1.9948748310890635e-05,
      "loss": 2.8381,
      "step": 2960
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.1302732229232788,
      "learning_rate": 1.9948706670046965e-05,
      "loss": 2.778,
      "step": 2961
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0032577514648438,
      "learning_rate": 1.9948665012337538e-05,
      "loss": 2.7269,
      "step": 2962
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0327389240264893,
      "learning_rate": 1.994862333776242e-05,
      "loss": 2.967,
      "step": 2963
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.1425445079803467,
      "learning_rate": 1.9948581646321682e-05,
      "loss": 2.7569,
      "step": 2964
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9625568389892578,
      "learning_rate": 1.99485399380154e-05,
      "loss": 2.8153,
      "step": 2965
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.073149561882019,
      "learning_rate": 1.9948498212843637e-05,
      "loss": 2.7535,
      "step": 2966
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.992688775062561,
      "learning_rate": 1.994845647080647e-05,
      "loss": 2.7372,
      "step": 2967
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.042724847793579,
      "learning_rate": 1.9948414711903967e-05,
      "loss": 2.9612,
      "step": 2968
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.8973993062973022,
      "learning_rate": 1.99483729361362e-05,
      "loss": 2.7054,
      "step": 2969
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0022563934326172,
      "learning_rate": 1.9948331143503236e-05,
      "loss": 2.8969,
      "step": 2970
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9981710314750671,
      "learning_rate": 1.994828933400515e-05,
      "loss": 2.6459,
      "step": 2971
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0581600666046143,
      "learning_rate": 1.9948247507642012e-05,
      "loss": 2.8688,
      "step": 2972
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.981094241142273,
      "learning_rate": 1.9948205664413894e-05,
      "loss": 2.8608,
      "step": 2973
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.076817512512207,
      "learning_rate": 1.9948163804320862e-05,
      "loss": 2.9329,
      "step": 2974
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.074792504310608,
      "learning_rate": 1.994812192736299e-05,
      "loss": 2.7575,
      "step": 2975
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9588677287101746,
      "learning_rate": 1.9948080033540355e-05,
      "loss": 2.8551,
      "step": 2976
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9626783132553101,
      "learning_rate": 1.994803812285302e-05,
      "loss": 2.887,
      "step": 2977
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9740352630615234,
      "learning_rate": 1.9947996195301056e-05,
      "loss": 2.771,
      "step": 2978
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.108386754989624,
      "learning_rate": 1.994795425088454e-05,
      "loss": 2.8655,
      "step": 2979
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9237786531448364,
      "learning_rate": 1.9947912289603538e-05,
      "loss": 2.7991,
      "step": 2980
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.1125091314315796,
      "learning_rate": 1.994787031145812e-05,
      "loss": 2.7049,
      "step": 2981
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0505338907241821,
      "learning_rate": 1.9947828316448364e-05,
      "loss": 2.6978,
      "step": 2982
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0589463710784912,
      "learning_rate": 1.9947786304574336e-05,
      "loss": 2.7737,
      "step": 2983
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9692708253860474,
      "learning_rate": 1.994774427583611e-05,
      "loss": 2.7641,
      "step": 2984
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.024465799331665,
      "learning_rate": 1.9947702230233754e-05,
      "loss": 2.6495,
      "step": 2985
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0414448976516724,
      "learning_rate": 1.9947660167767343e-05,
      "loss": 2.6921,
      "step": 2986
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.1321932077407837,
      "learning_rate": 1.9947618088436943e-05,
      "loss": 3.0702,
      "step": 2987
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0298237800598145,
      "learning_rate": 1.994757599224263e-05,
      "loss": 2.8514,
      "step": 2988
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.041934847831726,
      "learning_rate": 1.9947533879184477e-05,
      "loss": 2.7198,
      "step": 2989
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0632387399673462,
      "learning_rate": 1.9947491749262547e-05,
      "loss": 2.7337,
      "step": 2990
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9596048593521118,
      "learning_rate": 1.994744960247692e-05,
      "loss": 2.7067,
      "step": 2991
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0649808645248413,
      "learning_rate": 1.9947407438827665e-05,
      "loss": 2.7288,
      "step": 2992
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.010618805885315,
      "learning_rate": 1.994736525831485e-05,
      "loss": 3.0079,
      "step": 2993
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0592869520187378,
      "learning_rate": 1.994732306093855e-05,
      "loss": 2.5554,
      "step": 2994
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0535324811935425,
      "learning_rate": 1.9947280846698837e-05,
      "loss": 2.7692,
      "step": 2995
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0503591299057007,
      "learning_rate": 1.9947238615595782e-05,
      "loss": 2.8442,
      "step": 2996
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0211632251739502,
      "learning_rate": 1.994719636762945e-05,
      "loss": 2.9219,
      "step": 2997
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.000312328338623,
      "learning_rate": 1.9947154102799925e-05,
      "loss": 2.9346,
      "step": 2998
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0691570043563843,
      "learning_rate": 1.9947111821107272e-05,
      "loss": 2.7786,
      "step": 2999
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.5162956714630127,
      "learning_rate": 1.9947069522551562e-05,
      "loss": 3.0715,
      "step": 3000
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9416806697845459,
      "learning_rate": 1.9947027207132862e-05,
      "loss": 2.7406,
      "step": 3001
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0444906949996948,
      "learning_rate": 1.9946984874851258e-05,
      "loss": 2.9075,
      "step": 3002
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.8984659314155579,
      "learning_rate": 1.9946942525706806e-05,
      "loss": 2.6551,
      "step": 3003
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0130560398101807,
      "learning_rate": 1.994690015969959e-05,
      "loss": 2.9024,
      "step": 3004
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9964467883110046,
      "learning_rate": 1.9946857776829674e-05,
      "loss": 2.5873,
      "step": 3005
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.1505082845687866,
      "learning_rate": 1.994681537709713e-05,
      "loss": 2.7331,
      "step": 3006
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.030808448791504,
      "learning_rate": 1.994677296050204e-05,
      "loss": 2.6783,
      "step": 3007
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9840357899665833,
      "learning_rate": 1.994673052704446e-05,
      "loss": 2.6895,
      "step": 3008
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.180859088897705,
      "learning_rate": 1.9946688076724475e-05,
      "loss": 2.9188,
      "step": 3009
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9955975413322449,
      "learning_rate": 1.9946645609542154e-05,
      "loss": 2.7434,
      "step": 3010
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9879156947135925,
      "learning_rate": 1.9946603125497565e-05,
      "loss": 2.6919,
      "step": 3011
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.1488404273986816,
      "learning_rate": 1.994656062459078e-05,
      "loss": 2.621,
      "step": 3012
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.1460962295532227,
      "learning_rate": 1.9946518106821875e-05,
      "loss": 2.6949,
      "step": 3013
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.032681941986084,
      "learning_rate": 1.994647557219092e-05,
      "loss": 2.7959,
      "step": 3014
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.089673399925232,
      "learning_rate": 1.9946433020697988e-05,
      "loss": 2.5194,
      "step": 3015
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0578190088272095,
      "learning_rate": 1.994639045234315e-05,
      "loss": 2.9881,
      "step": 3016
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0614019632339478,
      "learning_rate": 1.994634786712648e-05,
      "loss": 2.8162,
      "step": 3017
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9656524062156677,
      "learning_rate": 1.994630526504805e-05,
      "loss": 2.6976,
      "step": 3018
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.183813452720642,
      "learning_rate": 1.9946262646107928e-05,
      "loss": 2.8324,
      "step": 3019
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0331083536148071,
      "learning_rate": 1.994622001030619e-05,
      "loss": 2.767,
      "step": 3020
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.2823532819747925,
      "learning_rate": 1.9946177357642907e-05,
      "loss": 2.7225,
      "step": 3021
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.1256130933761597,
      "learning_rate": 1.9946134688118157e-05,
      "loss": 2.878,
      "step": 3022
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.137009620666504,
      "learning_rate": 1.9946092001732006e-05,
      "loss": 2.7371,
      "step": 3023
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.022271752357483,
      "learning_rate": 1.9946049298484523e-05,
      "loss": 2.6782,
      "step": 3024
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0184460878372192,
      "learning_rate": 1.994600657837579e-05,
      "loss": 2.9219,
      "step": 3025
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0962121486663818,
      "learning_rate": 1.994596384140587e-05,
      "loss": 2.7987,
      "step": 3026
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.4735552072525024,
      "learning_rate": 1.9945921087574844e-05,
      "loss": 2.7933,
      "step": 3027
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.1892497539520264,
      "learning_rate": 1.994587831688278e-05,
      "loss": 2.7556,
      "step": 3028
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.4833048582077026,
      "learning_rate": 1.9945835529329748e-05,
      "loss": 2.7841,
      "step": 3029
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.1001558303833008,
      "learning_rate": 1.9945792724915827e-05,
      "loss": 2.8373,
      "step": 3030
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.3044192790985107,
      "learning_rate": 1.9945749903641083e-05,
      "loss": 2.8438,
      "step": 3031
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0489295721054077,
      "learning_rate": 1.9945707065505595e-05,
      "loss": 2.8169,
      "step": 3032
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9696027040481567,
      "learning_rate": 1.994566421050943e-05,
      "loss": 2.6905,
      "step": 3033
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9845737218856812,
      "learning_rate": 1.9945621338652664e-05,
      "loss": 2.9709,
      "step": 3034
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0540828704833984,
      "learning_rate": 1.9945578449935368e-05,
      "loss": 2.764,
      "step": 3035
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9886232614517212,
      "learning_rate": 1.9945535544357616e-05,
      "loss": 2.7098,
      "step": 3036
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.086251974105835,
      "learning_rate": 1.994549262191948e-05,
      "loss": 2.4994,
      "step": 3037
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.984869122505188,
      "learning_rate": 1.994544968262103e-05,
      "loss": 2.8437,
      "step": 3038
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0322316884994507,
      "learning_rate": 1.9945406726462348e-05,
      "loss": 2.8714,
      "step": 3039
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.8861057758331299,
      "learning_rate": 1.9945363753443494e-05,
      "loss": 2.8613,
      "step": 3040
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9704180359840393,
      "learning_rate": 1.994532076356455e-05,
      "loss": 2.6668,
      "step": 3041
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0896567106246948,
      "learning_rate": 1.9945277756825586e-05,
      "loss": 2.8424,
      "step": 3042
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.470677375793457,
      "learning_rate": 1.9945234733226674e-05,
      "loss": 2.9376,
      "step": 3043
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0713138580322266,
      "learning_rate": 1.994519169276789e-05,
      "loss": 2.8646,
      "step": 3044
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.01941978931427,
      "learning_rate": 1.9945148635449305e-05,
      "loss": 2.9544,
      "step": 3045
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.107478380203247,
      "learning_rate": 1.994510556127099e-05,
      "loss": 2.6853,
      "step": 3046
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9269631505012512,
      "learning_rate": 1.994506247023302e-05,
      "loss": 2.6577,
      "step": 3047
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9753506779670715,
      "learning_rate": 1.994501936233547e-05,
      "loss": 2.9986,
      "step": 3048
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0029240846633911,
      "learning_rate": 1.994497623757841e-05,
      "loss": 2.8316,
      "step": 3049
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9808635115623474,
      "learning_rate": 1.9944933095961917e-05,
      "loss": 2.7058,
      "step": 3050
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0938981771469116,
      "learning_rate": 1.9944889937486057e-05,
      "loss": 2.8608,
      "step": 3051
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0528590679168701,
      "learning_rate": 1.994484676215091e-05,
      "loss": 2.5795,
      "step": 3052
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9199084043502808,
      "learning_rate": 1.9944803569956548e-05,
      "loss": 3.0093,
      "step": 3053
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0042186975479126,
      "learning_rate": 1.994476036090304e-05,
      "loss": 2.9895,
      "step": 3054
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9930838346481323,
      "learning_rate": 1.9944717134990464e-05,
      "loss": 2.8668,
      "step": 3055
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9761956334114075,
      "learning_rate": 1.994467389221889e-05,
      "loss": 2.6781,
      "step": 3056
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9912916421890259,
      "learning_rate": 1.9944630632588394e-05,
      "loss": 2.7205,
      "step": 3057
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.1187814474105835,
      "learning_rate": 1.994458735609905e-05,
      "loss": 2.6776,
      "step": 3058
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0785402059555054,
      "learning_rate": 1.9944544062750927e-05,
      "loss": 2.5219,
      "step": 3059
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0740247964859009,
      "learning_rate": 1.99445007525441e-05,
      "loss": 2.7083,
      "step": 3060
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0006327629089355,
      "learning_rate": 1.9944457425478643e-05,
      "loss": 2.7904,
      "step": 3061
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9665931463241577,
      "learning_rate": 1.9944414081554636e-05,
      "loss": 2.9432,
      "step": 3062
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0886716842651367,
      "learning_rate": 1.9944370720772142e-05,
      "loss": 2.8272,
      "step": 3063
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.5781539678573608,
      "learning_rate": 1.9944327343131236e-05,
      "loss": 2.7844,
      "step": 3064
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.027213215827942,
      "learning_rate": 1.9944283948631996e-05,
      "loss": 2.7871,
      "step": 3065
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0359822511672974,
      "learning_rate": 1.9944240537274497e-05,
      "loss": 2.8879,
      "step": 3066
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.4691275358200073,
      "learning_rate": 1.994419710905881e-05,
      "loss": 2.8099,
      "step": 3067
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0677833557128906,
      "learning_rate": 1.9944153663985003e-05,
      "loss": 2.9686,
      "step": 3068
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.022899866104126,
      "learning_rate": 1.994411020205316e-05,
      "loss": 2.7532,
      "step": 3069
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.059614658355713,
      "learning_rate": 1.9944066723263346e-05,
      "loss": 2.609,
      "step": 3070
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0146605968475342,
      "learning_rate": 1.9944023227615636e-05,
      "loss": 2.8256,
      "step": 3071
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.066832184791565,
      "learning_rate": 1.994397971511011e-05,
      "loss": 2.8443,
      "step": 3072
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9011062383651733,
      "learning_rate": 1.9943936185746837e-05,
      "loss": 2.5185,
      "step": 3073
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.081565260887146,
      "learning_rate": 1.994389263952589e-05,
      "loss": 2.9433,
      "step": 3074
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0229616165161133,
      "learning_rate": 1.9943849076447344e-05,
      "loss": 3.0031,
      "step": 3075
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.982293963432312,
      "learning_rate": 1.9943805496511273e-05,
      "loss": 2.6791,
      "step": 3076
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9780439734458923,
      "learning_rate": 1.9943761899717753e-05,
      "loss": 2.7245,
      "step": 3077
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0732649564743042,
      "learning_rate": 1.9943718286066854e-05,
      "loss": 2.6187,
      "step": 3078
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.273463487625122,
      "learning_rate": 1.9943674655558655e-05,
      "loss": 2.9609,
      "step": 3079
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0165492296218872,
      "learning_rate": 1.9943631008193222e-05,
      "loss": 2.8948,
      "step": 3080
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9044839143753052,
      "learning_rate": 1.9943587343970638e-05,
      "loss": 2.6497,
      "step": 3081
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9329251050949097,
      "learning_rate": 1.994354366289097e-05,
      "loss": 2.6413,
      "step": 3082
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9555695652961731,
      "learning_rate": 1.9943499964954296e-05,
      "loss": 2.6243,
      "step": 3083
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0992629528045654,
      "learning_rate": 1.9943456250160687e-05,
      "loss": 2.8615,
      "step": 3084
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0782827138900757,
      "learning_rate": 1.9943412518510225e-05,
      "loss": 2.601,
      "step": 3085
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9805164337158203,
      "learning_rate": 1.994336877000297e-05,
      "loss": 2.8341,
      "step": 3086
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.1176114082336426,
      "learning_rate": 1.994332500463901e-05,
      "loss": 3.0187,
      "step": 3087
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0226284265518188,
      "learning_rate": 1.994328122241841e-05,
      "loss": 2.7972,
      "step": 3088
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9742432832717896,
      "learning_rate": 1.9943237423341253e-05,
      "loss": 2.737,
      "step": 3089
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0828450918197632,
      "learning_rate": 1.9943193607407604e-05,
      "loss": 2.7469,
      "step": 3090
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0684348344802856,
      "learning_rate": 1.9943149774617542e-05,
      "loss": 2.7861,
      "step": 3091
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.1460583209991455,
      "learning_rate": 1.994310592497114e-05,
      "loss": 2.9548,
      "step": 3092
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9942936897277832,
      "learning_rate": 1.9943062058468477e-05,
      "loss": 2.7371,
      "step": 3093
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0145877599716187,
      "learning_rate": 1.994301817510962e-05,
      "loss": 2.8298,
      "step": 3094
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9491133689880371,
      "learning_rate": 1.9942974274894644e-05,
      "loss": 2.8862,
      "step": 3095
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9501825571060181,
      "learning_rate": 1.9942930357823628e-05,
      "loss": 2.5953,
      "step": 3096
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9496470093727112,
      "learning_rate": 1.994288642389665e-05,
      "loss": 2.9038,
      "step": 3097
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0942212343215942,
      "learning_rate": 1.9942842473113773e-05,
      "loss": 2.869,
      "step": 3098
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9901622533798218,
      "learning_rate": 1.994279850547508e-05,
      "loss": 2.7077,
      "step": 3099
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9795432090759277,
      "learning_rate": 1.994275452098064e-05,
      "loss": 2.7197,
      "step": 3100
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0414481163024902,
      "learning_rate": 1.9942710519630534e-05,
      "loss": 2.8302,
      "step": 3101
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.103721261024475,
      "learning_rate": 1.994266650142483e-05,
      "loss": 2.9353,
      "step": 3102
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9465301632881165,
      "learning_rate": 1.994262246636361e-05,
      "loss": 2.8959,
      "step": 3103
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9867264032363892,
      "learning_rate": 1.9942578414446944e-05,
      "loss": 2.8099,
      "step": 3104
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0299774408340454,
      "learning_rate": 1.9942534345674902e-05,
      "loss": 2.5736,
      "step": 3105
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9999372363090515,
      "learning_rate": 1.994249026004757e-05,
      "loss": 3.0409,
      "step": 3106
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9416584968566895,
      "learning_rate": 1.994244615756501e-05,
      "loss": 2.6289,
      "step": 3107
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9869092702865601,
      "learning_rate": 1.994240203822731e-05,
      "loss": 2.4804,
      "step": 3108
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9979215860366821,
      "learning_rate": 1.9942357902034537e-05,
      "loss": 2.7997,
      "step": 3109
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.1963298320770264,
      "learning_rate": 1.9942313748986765e-05,
      "loss": 2.5992,
      "step": 3110
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.982502281665802,
      "learning_rate": 1.9942269579084072e-05,
      "loss": 2.9082,
      "step": 3111
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0027692317962646,
      "learning_rate": 1.9942225392326534e-05,
      "loss": 2.9643,
      "step": 3112
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.022925853729248,
      "learning_rate": 1.994218118871422e-05,
      "loss": 2.7736,
      "step": 3113
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9874891638755798,
      "learning_rate": 1.994213696824721e-05,
      "loss": 2.7775,
      "step": 3114
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.125635027885437,
      "learning_rate": 1.994209273092558e-05,
      "loss": 2.8827,
      "step": 3115
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0155655145645142,
      "learning_rate": 1.99420484767494e-05,
      "loss": 2.7354,
      "step": 3116
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9607530236244202,
      "learning_rate": 1.9942004205718747e-05,
      "loss": 2.9882,
      "step": 3117
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.2075985670089722,
      "learning_rate": 1.9941959917833698e-05,
      "loss": 2.7943,
      "step": 3118
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0496641397476196,
      "learning_rate": 1.9941915613094323e-05,
      "loss": 2.7235,
      "step": 3119
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9865449070930481,
      "learning_rate": 1.9941871291500704e-05,
      "loss": 2.9071,
      "step": 3120
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9869151711463928,
      "learning_rate": 1.9941826953052914e-05,
      "loss": 2.7448,
      "step": 3121
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.002918004989624,
      "learning_rate": 1.9941782597751026e-05,
      "loss": 2.7141,
      "step": 3122
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0441956520080566,
      "learning_rate": 1.9941738225595116e-05,
      "loss": 2.8888,
      "step": 3123
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0334186553955078,
      "learning_rate": 1.994169383658526e-05,
      "loss": 3.0087,
      "step": 3124
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.014075517654419,
      "learning_rate": 1.9941649430721535e-05,
      "loss": 2.78,
      "step": 3125
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.920936644077301,
      "learning_rate": 1.9941605008004013e-05,
      "loss": 2.701,
      "step": 3126
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0485060214996338,
      "learning_rate": 1.994156056843277e-05,
      "loss": 2.852,
      "step": 3127
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.094204306602478,
      "learning_rate": 1.994151611200788e-05,
      "loss": 2.8522,
      "step": 3128
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.077451467514038,
      "learning_rate": 1.994147163872942e-05,
      "loss": 2.866,
      "step": 3129
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9492138624191284,
      "learning_rate": 1.994142714859747e-05,
      "loss": 2.522,
      "step": 3130
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0671494007110596,
      "learning_rate": 1.9941382641612095e-05,
      "loss": 3.0048,
      "step": 3131
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.068250298500061,
      "learning_rate": 1.9941338117773378e-05,
      "loss": 2.8983,
      "step": 3132
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9910423159599304,
      "learning_rate": 1.9941293577081395e-05,
      "loss": 2.5765,
      "step": 3133
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0464292764663696,
      "learning_rate": 1.994124901953622e-05,
      "loss": 2.8951,
      "step": 3134
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9639892578125,
      "learning_rate": 1.9941204445137924e-05,
      "loss": 2.6704,
      "step": 3135
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9963511824607849,
      "learning_rate": 1.9941159853886588e-05,
      "loss": 2.7638,
      "step": 3136
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0019117593765259,
      "learning_rate": 1.994111524578229e-05,
      "loss": 2.8401,
      "step": 3137
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9697481989860535,
      "learning_rate": 1.9941070620825097e-05,
      "loss": 2.8052,
      "step": 3138
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9915825724601746,
      "learning_rate": 1.9941025979015093e-05,
      "loss": 2.6241,
      "step": 3139
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.1421838998794556,
      "learning_rate": 1.9940981320352346e-05,
      "loss": 2.9109,
      "step": 3140
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9167528748512268,
      "learning_rate": 1.9940936644836936e-05,
      "loss": 2.6538,
      "step": 3141
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.16122567653656,
      "learning_rate": 1.994089195246894e-05,
      "loss": 2.7372,
      "step": 3142
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.335173487663269,
      "learning_rate": 1.9940847243248434e-05,
      "loss": 2.8077,
      "step": 3143
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0156419277191162,
      "learning_rate": 1.994080251717549e-05,
      "loss": 2.9381,
      "step": 3144
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9738168120384216,
      "learning_rate": 1.9940757774250187e-05,
      "loss": 2.867,
      "step": 3145
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9779322147369385,
      "learning_rate": 1.9940713014472597e-05,
      "loss": 2.934,
      "step": 3146
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0374122858047485,
      "learning_rate": 1.9940668237842802e-05,
      "loss": 2.7966,
      "step": 3147
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0785975456237793,
      "learning_rate": 1.994062344436087e-05,
      "loss": 2.9074,
      "step": 3148
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9401926398277283,
      "learning_rate": 1.9940578634026885e-05,
      "loss": 2.7317,
      "step": 3149
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9192465543746948,
      "learning_rate": 1.994053380684092e-05,
      "loss": 2.743,
      "step": 3150
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.060334324836731,
      "learning_rate": 1.994048896280305e-05,
      "loss": 3.0675,
      "step": 3151
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.143651008605957,
      "learning_rate": 1.994044410191335e-05,
      "loss": 2.7263,
      "step": 3152
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9415442943572998,
      "learning_rate": 1.9940399224171898e-05,
      "loss": 3.0059,
      "step": 3153
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0508610010147095,
      "learning_rate": 1.994035432957877e-05,
      "loss": 2.7041,
      "step": 3154
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0774561166763306,
      "learning_rate": 1.9940309418134037e-05,
      "loss": 2.8234,
      "step": 3155
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0107786655426025,
      "learning_rate": 1.9940264489837786e-05,
      "loss": 2.8329,
      "step": 3156
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0526846647262573,
      "learning_rate": 1.9940219544690084e-05,
      "loss": 2.777,
      "step": 3157
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0049235820770264,
      "learning_rate": 1.994017458269101e-05,
      "loss": 2.8767,
      "step": 3158
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.8877015113830566,
      "learning_rate": 1.9940129603840642e-05,
      "loss": 2.7106,
      "step": 3159
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.998635470867157,
      "learning_rate": 1.9940084608139053e-05,
      "loss": 2.7882,
      "step": 3160
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9979303479194641,
      "learning_rate": 1.9940039595586323e-05,
      "loss": 2.738,
      "step": 3161
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9374758005142212,
      "learning_rate": 1.9939994566182527e-05,
      "loss": 2.7091,
      "step": 3162
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9577284455299377,
      "learning_rate": 1.9939949519927738e-05,
      "loss": 2.9314,
      "step": 3163
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.971271812915802,
      "learning_rate": 1.9939904456822036e-05,
      "loss": 2.8023,
      "step": 3164
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9069137573242188,
      "learning_rate": 1.9939859376865497e-05,
      "loss": 2.9282,
      "step": 3165
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0106910467147827,
      "learning_rate": 1.9939814280058196e-05,
      "loss": 2.8806,
      "step": 3166
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0370928049087524,
      "learning_rate": 1.993976916640021e-05,
      "loss": 2.9316,
      "step": 3167
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9899057149887085,
      "learning_rate": 1.9939724035891612e-05,
      "loss": 2.7709,
      "step": 3168
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9932435154914856,
      "learning_rate": 1.9939678888532488e-05,
      "loss": 2.7971,
      "step": 3169
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0428091287612915,
      "learning_rate": 1.9939633724322903e-05,
      "loss": 2.7058,
      "step": 3170
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0931001901626587,
      "learning_rate": 1.9939588543262943e-05,
      "loss": 2.5794,
      "step": 3171
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9810200333595276,
      "learning_rate": 1.9939543345352682e-05,
      "loss": 2.8962,
      "step": 3172
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9440953135490417,
      "learning_rate": 1.9939498130592195e-05,
      "loss": 3.0164,
      "step": 3173
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.1884201765060425,
      "learning_rate": 1.9939452898981556e-05,
      "loss": 2.7836,
      "step": 3174
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9461618065834045,
      "learning_rate": 1.9939407650520846e-05,
      "loss": 2.6816,
      "step": 3175
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0587458610534668,
      "learning_rate": 1.9939362385210144e-05,
      "loss": 2.7995,
      "step": 3176
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0235059261322021,
      "learning_rate": 1.993931710304952e-05,
      "loss": 2.8199,
      "step": 3177
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0696794986724854,
      "learning_rate": 1.9939271804039054e-05,
      "loss": 2.8746,
      "step": 3178
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9988772869110107,
      "learning_rate": 1.9939226488178825e-05,
      "loss": 2.8595,
      "step": 3179
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0649809837341309,
      "learning_rate": 1.9939181155468908e-05,
      "loss": 2.8779,
      "step": 3180
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.077204942703247,
      "learning_rate": 1.9939135805909375e-05,
      "loss": 3.076,
      "step": 3181
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0829721689224243,
      "learning_rate": 1.993909043950031e-05,
      "loss": 2.8139,
      "step": 3182
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0431571006774902,
      "learning_rate": 1.9939045056241788e-05,
      "loss": 2.9784,
      "step": 3183
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9079776406288147,
      "learning_rate": 1.9938999656133883e-05,
      "loss": 2.7681,
      "step": 3184
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0032392740249634,
      "learning_rate": 1.9938954239176677e-05,
      "loss": 2.7183,
      "step": 3185
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9805530309677124,
      "learning_rate": 1.9938908805370244e-05,
      "loss": 2.7902,
      "step": 3186
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9544584155082703,
      "learning_rate": 1.993886335471466e-05,
      "loss": 2.4042,
      "step": 3187
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0848076343536377,
      "learning_rate": 1.9938817887210003e-05,
      "loss": 2.9609,
      "step": 3188
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9934329390525818,
      "learning_rate": 1.993877240285635e-05,
      "loss": 2.8154,
      "step": 3189
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9046439528465271,
      "learning_rate": 1.993872690165378e-05,
      "loss": 2.9208,
      "step": 3190
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9776556491851807,
      "learning_rate": 1.9938681383602368e-05,
      "loss": 2.7838,
      "step": 3191
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.8890731334686279,
      "learning_rate": 1.9938635848702193e-05,
      "loss": 2.5723,
      "step": 3192
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0750654935836792,
      "learning_rate": 1.9938590296953327e-05,
      "loss": 2.6385,
      "step": 3193
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0236520767211914,
      "learning_rate": 1.993854472835585e-05,
      "loss": 2.8538,
      "step": 3194
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.950415849685669,
      "learning_rate": 1.993849914290985e-05,
      "loss": 3.0243,
      "step": 3195
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9608497619628906,
      "learning_rate": 1.9938453540615387e-05,
      "loss": 2.6438,
      "step": 3196
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.974388599395752,
      "learning_rate": 1.993840792147255e-05,
      "loss": 2.8738,
      "step": 3197
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9711275696754456,
      "learning_rate": 1.9938362285481408e-05,
      "loss": 2.7978,
      "step": 3198
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.025489330291748,
      "learning_rate": 1.9938316632642043e-05,
      "loss": 2.7223,
      "step": 3199
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9437462687492371,
      "learning_rate": 1.9938270962954535e-05,
      "loss": 2.7314,
      "step": 3200
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0256762504577637,
      "learning_rate": 1.993822527641896e-05,
      "loss": 2.8944,
      "step": 3201
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9979054927825928,
      "learning_rate": 1.9938179573035388e-05,
      "loss": 2.6217,
      "step": 3202
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0008430480957031,
      "learning_rate": 1.9938133852803906e-05,
      "loss": 2.6101,
      "step": 3203
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9735663533210754,
      "learning_rate": 1.9938088115724587e-05,
      "loss": 2.7295,
      "step": 3204
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9080084562301636,
      "learning_rate": 1.9938042361797512e-05,
      "loss": 2.5355,
      "step": 3205
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0131758451461792,
      "learning_rate": 1.993799659102275e-05,
      "loss": 2.6868,
      "step": 3206
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9775466918945312,
      "learning_rate": 1.993795080340039e-05,
      "loss": 2.7631,
      "step": 3207
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.965067446231842,
      "learning_rate": 1.9937904998930507e-05,
      "loss": 2.8533,
      "step": 3208
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.1476250886917114,
      "learning_rate": 1.9937859177613172e-05,
      "loss": 2.7546,
      "step": 3209
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9454486966133118,
      "learning_rate": 1.9937813339448463e-05,
      "loss": 2.8729,
      "step": 3210
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.1892588138580322,
      "learning_rate": 1.9937767484436466e-05,
      "loss": 2.7806,
      "step": 3211
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9377501010894775,
      "learning_rate": 1.9937721612577254e-05,
      "loss": 2.8564,
      "step": 3212
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9695222973823547,
      "learning_rate": 1.9937675723870903e-05,
      "loss": 2.8425,
      "step": 3213
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9896083474159241,
      "learning_rate": 1.9937629818317497e-05,
      "loss": 2.8634,
      "step": 3214
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.014861822128296,
      "learning_rate": 1.9937583895917103e-05,
      "loss": 2.7878,
      "step": 3215
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.939696729183197,
      "learning_rate": 1.993753795666981e-05,
      "loss": 2.7436,
      "step": 3216
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9628407955169678,
      "learning_rate": 1.993749200057569e-05,
      "loss": 2.8673,
      "step": 3217
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9376967549324036,
      "learning_rate": 1.9937446027634818e-05,
      "loss": 2.8442,
      "step": 3218
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0834283828735352,
      "learning_rate": 1.9937400037847282e-05,
      "loss": 3.021,
      "step": 3219
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.1216758489608765,
      "learning_rate": 1.993735403121315e-05,
      "loss": 2.7204,
      "step": 3220
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0330891609191895,
      "learning_rate": 1.9937308007732508e-05,
      "loss": 2.6566,
      "step": 3221
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9845075011253357,
      "learning_rate": 1.993726196740543e-05,
      "loss": 2.8066,
      "step": 3222
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0760489702224731,
      "learning_rate": 1.9937215910231987e-05,
      "loss": 2.8545,
      "step": 3223
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.007950782775879,
      "learning_rate": 1.993716983621227e-05,
      "loss": 2.8251,
      "step": 3224
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9340288043022156,
      "learning_rate": 1.993712374534635e-05,
      "loss": 2.7014,
      "step": 3225
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.007039189338684,
      "learning_rate": 1.9937077637634307e-05,
      "loss": 2.9784,
      "step": 3226
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.1157742738723755,
      "learning_rate": 1.993703151307622e-05,
      "loss": 2.8085,
      "step": 3227
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9729766249656677,
      "learning_rate": 1.993698537167216e-05,
      "loss": 2.8511,
      "step": 3228
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.001651406288147,
      "learning_rate": 1.993693921342221e-05,
      "loss": 2.8386,
      "step": 3229
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9985750913619995,
      "learning_rate": 1.993689303832646e-05,
      "loss": 2.8552,
      "step": 3230
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.049896478652954,
      "learning_rate": 1.9936846846384968e-05,
      "loss": 2.9468,
      "step": 3231
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.013085961341858,
      "learning_rate": 1.9936800637597826e-05,
      "loss": 2.7064,
      "step": 3232
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0745139122009277,
      "learning_rate": 1.9936754411965105e-05,
      "loss": 2.8431,
      "step": 3233
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9728822708129883,
      "learning_rate": 1.9936708169486892e-05,
      "loss": 2.8949,
      "step": 3234
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0214731693267822,
      "learning_rate": 1.9936661910163256e-05,
      "loss": 2.7533,
      "step": 3235
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9140561819076538,
      "learning_rate": 1.9936615633994277e-05,
      "loss": 2.7003,
      "step": 3236
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.1537684202194214,
      "learning_rate": 1.993656934098004e-05,
      "loss": 2.8017,
      "step": 3237
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0420030355453491,
      "learning_rate": 1.9936523031120618e-05,
      "loss": 2.7621,
      "step": 3238
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9838732481002808,
      "learning_rate": 1.993647670441609e-05,
      "loss": 2.7675,
      "step": 3239
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9965696334838867,
      "learning_rate": 1.9936430360866535e-05,
      "loss": 2.7919,
      "step": 3240
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9880895614624023,
      "learning_rate": 1.9936384000472032e-05,
      "loss": 2.6883,
      "step": 3241
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9089425206184387,
      "learning_rate": 1.993633762323266e-05,
      "loss": 2.6853,
      "step": 3242
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.038543939590454,
      "learning_rate": 1.99362912291485e-05,
      "loss": 2.9237,
      "step": 3243
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9442216157913208,
      "learning_rate": 1.993624481821962e-05,
      "loss": 2.75,
      "step": 3244
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.1745637655258179,
      "learning_rate": 1.9936198390446114e-05,
      "loss": 2.7062,
      "step": 3245
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.163018822669983,
      "learning_rate": 1.9936151945828048e-05,
      "loss": 2.8831,
      "step": 3246
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9440976977348328,
      "learning_rate": 1.993610548436551e-05,
      "loss": 2.8872,
      "step": 3247
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9871476292610168,
      "learning_rate": 1.993605900605857e-05,
      "loss": 2.6883,
      "step": 3248
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9699636101722717,
      "learning_rate": 1.9936012510907316e-05,
      "loss": 2.5925,
      "step": 3249
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.1025340557098389,
      "learning_rate": 1.993596599891182e-05,
      "loss": 2.8929,
      "step": 3250
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0361074209213257,
      "learning_rate": 1.993591947007216e-05,
      "loss": 2.5333,
      "step": 3251
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9429321885108948,
      "learning_rate": 1.9935872924388424e-05,
      "loss": 2.9293,
      "step": 3252
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9977102875709534,
      "learning_rate": 1.9935826361860685e-05,
      "loss": 2.6599,
      "step": 3253
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0821332931518555,
      "learning_rate": 1.993577978248902e-05,
      "loss": 2.9647,
      "step": 3254
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.005853533744812,
      "learning_rate": 1.9935733186273506e-05,
      "loss": 2.6428,
      "step": 3255
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.089836597442627,
      "learning_rate": 1.9935686573214227e-05,
      "loss": 2.7297,
      "step": 3256
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9401996731758118,
      "learning_rate": 1.9935639943311265e-05,
      "loss": 2.9614,
      "step": 3257
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0583970546722412,
      "learning_rate": 1.993559329656469e-05,
      "loss": 2.7493,
      "step": 3258
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9334164261817932,
      "learning_rate": 1.9935546632974587e-05,
      "loss": 2.7783,
      "step": 3259
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.1157108545303345,
      "learning_rate": 1.9935499952541034e-05,
      "loss": 2.7819,
      "step": 3260
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9113713502883911,
      "learning_rate": 1.9935453255264113e-05,
      "loss": 2.862,
      "step": 3261
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9839030504226685,
      "learning_rate": 1.9935406541143897e-05,
      "loss": 2.6063,
      "step": 3262
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0393657684326172,
      "learning_rate": 1.993535981018047e-05,
      "loss": 2.9084,
      "step": 3263
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9843417406082153,
      "learning_rate": 1.993531306237391e-05,
      "loss": 2.8844,
      "step": 3264
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0210843086242676,
      "learning_rate": 1.9935266297724297e-05,
      "loss": 2.9803,
      "step": 3265
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0050488710403442,
      "learning_rate": 1.9935219516231704e-05,
      "loss": 2.8501,
      "step": 3266
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.01081120967865,
      "learning_rate": 1.993517271789622e-05,
      "loss": 2.803,
      "step": 3267
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0002615451812744,
      "learning_rate": 1.993512590271792e-05,
      "loss": 2.5379,
      "step": 3268
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.8949485421180725,
      "learning_rate": 1.993507907069688e-05,
      "loss": 2.7101,
      "step": 3269
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.956219494342804,
      "learning_rate": 1.9935032221833184e-05,
      "loss": 2.7669,
      "step": 3270
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0982416868209839,
      "learning_rate": 1.9934985356126916e-05,
      "loss": 2.9556,
      "step": 3271
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0434311628341675,
      "learning_rate": 1.9934938473578142e-05,
      "loss": 2.8807,
      "step": 3272
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.96656334400177,
      "learning_rate": 1.993489157418695e-05,
      "loss": 2.9621,
      "step": 3273
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.005230188369751,
      "learning_rate": 1.9934844657953423e-05,
      "loss": 2.6194,
      "step": 3274
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9858521223068237,
      "learning_rate": 1.9934797724877633e-05,
      "loss": 2.6054,
      "step": 3275
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0899442434310913,
      "learning_rate": 1.9934750774959663e-05,
      "loss": 2.7857,
      "step": 3276
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9872550368309021,
      "learning_rate": 1.9934703808199592e-05,
      "loss": 2.5623,
      "step": 3277
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9264081120491028,
      "learning_rate": 1.9934656824597498e-05,
      "loss": 2.821,
      "step": 3278
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.044764757156372,
      "learning_rate": 1.9934609824153464e-05,
      "loss": 2.7896,
      "step": 3279
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0109888315200806,
      "learning_rate": 1.9934562806867566e-05,
      "loss": 2.7953,
      "step": 3280
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.982928991317749,
      "learning_rate": 1.9934515772739892e-05,
      "loss": 2.9203,
      "step": 3281
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0167943239212036,
      "learning_rate": 1.9934468721770513e-05,
      "loss": 2.8507,
      "step": 3282
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0731308460235596,
      "learning_rate": 1.993442165395951e-05,
      "loss": 2.9068,
      "step": 3283
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9944426417350769,
      "learning_rate": 1.9934374569306964e-05,
      "loss": 2.8461,
      "step": 3284
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0658915042877197,
      "learning_rate": 1.9934327467812956e-05,
      "loss": 2.8684,
      "step": 3285
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.370157241821289,
      "learning_rate": 1.9934280349477563e-05,
      "loss": 2.9272,
      "step": 3286
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0217593908309937,
      "learning_rate": 1.9934233214300867e-05,
      "loss": 2.7027,
      "step": 3287
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.043702483177185,
      "learning_rate": 1.9934186062282947e-05,
      "loss": 2.7517,
      "step": 3288
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9944296479225159,
      "learning_rate": 1.9934138893423888e-05,
      "loss": 3.0107,
      "step": 3289
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0910110473632812,
      "learning_rate": 1.993409170772376e-05,
      "loss": 2.7734,
      "step": 3290
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9691740274429321,
      "learning_rate": 1.993404450518265e-05,
      "loss": 2.9174,
      "step": 3291
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.01833176612854,
      "learning_rate": 1.9933997285800637e-05,
      "loss": 2.7241,
      "step": 3292
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0496057271957397,
      "learning_rate": 1.9933950049577803e-05,
      "loss": 2.6912,
      "step": 3293
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.8537389039993286,
      "learning_rate": 1.9933902796514223e-05,
      "loss": 2.8859,
      "step": 3294
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0732249021530151,
      "learning_rate": 1.993385552660998e-05,
      "loss": 2.9375,
      "step": 3295
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0638757944107056,
      "learning_rate": 1.9933808239865152e-05,
      "loss": 2.9308,
      "step": 3296
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0418615341186523,
      "learning_rate": 1.9933760936279824e-05,
      "loss": 2.8497,
      "step": 3297
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9331380724906921,
      "learning_rate": 1.9933713615854072e-05,
      "loss": 2.7577,
      "step": 3298
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9189572930335999,
      "learning_rate": 1.993366627858798e-05,
      "loss": 2.9424,
      "step": 3299
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9381359219551086,
      "learning_rate": 1.9933618924481623e-05,
      "loss": 2.7845,
      "step": 3300
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0218464136123657,
      "learning_rate": 1.9933571553535085e-05,
      "loss": 2.8308,
      "step": 3301
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.1127455234527588,
      "learning_rate": 1.9933524165748444e-05,
      "loss": 2.817,
      "step": 3302
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0947004556655884,
      "learning_rate": 1.9933476761121783e-05,
      "loss": 2.8774,
      "step": 3303
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.1758891344070435,
      "learning_rate": 1.993342933965518e-05,
      "loss": 2.7085,
      "step": 3304
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.1922895908355713,
      "learning_rate": 1.9933381901348717e-05,
      "loss": 2.5875,
      "step": 3305
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.007949948310852,
      "learning_rate": 1.9933334446202473e-05,
      "loss": 2.661,
      "step": 3306
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0946472883224487,
      "learning_rate": 1.993328697421653e-05,
      "loss": 2.7887,
      "step": 3307
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9853113889694214,
      "learning_rate": 1.993323948539097e-05,
      "loss": 2.6483,
      "step": 3308
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9833889603614807,
      "learning_rate": 1.993319197972587e-05,
      "loss": 2.745,
      "step": 3309
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.8875301480293274,
      "learning_rate": 1.9933144457221308e-05,
      "loss": 2.7653,
      "step": 3310
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.030785322189331,
      "learning_rate": 1.9933096917877373e-05,
      "loss": 3.1297,
      "step": 3311
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0142136812210083,
      "learning_rate": 1.993304936169414e-05,
      "loss": 2.6437,
      "step": 3312
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0555399656295776,
      "learning_rate": 1.993300178867169e-05,
      "loss": 2.9295,
      "step": 3313
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0235503911972046,
      "learning_rate": 1.9932954198810103e-05,
      "loss": 2.6867,
      "step": 3314
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0671809911727905,
      "learning_rate": 1.993290659210946e-05,
      "loss": 2.9829,
      "step": 3315
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9127234220504761,
      "learning_rate": 1.9932858968569844e-05,
      "loss": 2.719,
      "step": 3316
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0462725162506104,
      "learning_rate": 1.9932811328191335e-05,
      "loss": 2.8853,
      "step": 3317
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9965373873710632,
      "learning_rate": 1.9932763670974014e-05,
      "loss": 3.0422,
      "step": 3318
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0059248208999634,
      "learning_rate": 1.993271599691796e-05,
      "loss": 2.7526,
      "step": 3319
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9260225892066956,
      "learning_rate": 1.993266830602325e-05,
      "loss": 2.9918,
      "step": 3320
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.961581289768219,
      "learning_rate": 1.9932620598289977e-05,
      "loss": 2.9346,
      "step": 3321
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9904019236564636,
      "learning_rate": 1.993257287371821e-05,
      "loss": 2.7231,
      "step": 3322
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9899696111679077,
      "learning_rate": 1.9932525132308035e-05,
      "loss": 2.7664,
      "step": 3323
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.048263430595398,
      "learning_rate": 1.9932477374059534e-05,
      "loss": 2.9251,
      "step": 3324
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.094934344291687,
      "learning_rate": 1.9932429598972784e-05,
      "loss": 2.8594,
      "step": 3325
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.982921302318573,
      "learning_rate": 1.993238180704787e-05,
      "loss": 2.6494,
      "step": 3326
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.1582149267196655,
      "learning_rate": 1.993233399828487e-05,
      "loss": 2.8464,
      "step": 3327
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0449635982513428,
      "learning_rate": 1.993228617268387e-05,
      "loss": 2.8539,
      "step": 3328
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.021872639656067,
      "learning_rate": 1.9932238330244942e-05,
      "loss": 2.6097,
      "step": 3329
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.5070724487304688,
      "learning_rate": 1.9932190470968175e-05,
      "loss": 2.803,
      "step": 3330
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.936911940574646,
      "learning_rate": 1.9932142594853644e-05,
      "loss": 2.6144,
      "step": 3331
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9807641506195068,
      "learning_rate": 1.993209470190144e-05,
      "loss": 2.6707,
      "step": 3332
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.229230284690857,
      "learning_rate": 1.9932046792111633e-05,
      "loss": 2.6504,
      "step": 3333
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0705934762954712,
      "learning_rate": 1.9931998865484313e-05,
      "loss": 3.0623,
      "step": 3334
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0033808946609497,
      "learning_rate": 1.9931950922019556e-05,
      "loss": 2.8341,
      "step": 3335
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.039017677307129,
      "learning_rate": 1.9931902961717444e-05,
      "loss": 2.5701,
      "step": 3336
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.3274797201156616,
      "learning_rate": 1.993185498457806e-05,
      "loss": 2.7861,
      "step": 3337
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9943333864212036,
      "learning_rate": 1.9931806990601482e-05,
      "loss": 2.9192,
      "step": 3338
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9994727969169617,
      "learning_rate": 1.9931758979787794e-05,
      "loss": 2.7684,
      "step": 3339
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.2779706716537476,
      "learning_rate": 1.993171095213708e-05,
      "loss": 2.6727,
      "step": 3340
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.032691240310669,
      "learning_rate": 1.9931662907649417e-05,
      "loss": 2.8965,
      "step": 3341
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.1308283805847168,
      "learning_rate": 1.9931614846324888e-05,
      "loss": 2.9375,
      "step": 3342
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0963118076324463,
      "learning_rate": 1.9931566768163573e-05,
      "loss": 2.6937,
      "step": 3343
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0032674074172974,
      "learning_rate": 1.9931518673165557e-05,
      "loss": 2.6724,
      "step": 3344
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0916575193405151,
      "learning_rate": 1.9931470561330918e-05,
      "loss": 2.9832,
      "step": 3345
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0089166164398193,
      "learning_rate": 1.993142243265974e-05,
      "loss": 2.6525,
      "step": 3346
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9821559190750122,
      "learning_rate": 1.9931374287152107e-05,
      "loss": 2.7574,
      "step": 3347
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0615335702896118,
      "learning_rate": 1.993132612480809e-05,
      "loss": 2.7279,
      "step": 3348
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9042971134185791,
      "learning_rate": 1.9931277945627783e-05,
      "loss": 2.8849,
      "step": 3349
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.5770405530929565,
      "learning_rate": 1.993122974961126e-05,
      "loss": 3.0342,
      "step": 3350
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9200795292854309,
      "learning_rate": 1.9931181536758607e-05,
      "loss": 2.7034,
      "step": 3351
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0578078031539917,
      "learning_rate": 1.9931133307069904e-05,
      "loss": 2.666,
      "step": 3352
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.063853144645691,
      "learning_rate": 1.9931085060545233e-05,
      "loss": 2.9654,
      "step": 3353
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0004676580429077,
      "learning_rate": 1.9931036797184677e-05,
      "loss": 2.738,
      "step": 3354
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9634072184562683,
      "learning_rate": 1.993098851698831e-05,
      "loss": 3.1316,
      "step": 3355
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0340447425842285,
      "learning_rate": 1.9930940219956227e-05,
      "loss": 2.7445,
      "step": 3356
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0847201347351074,
      "learning_rate": 1.9930891906088502e-05,
      "loss": 2.9772,
      "step": 3357
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0859198570251465,
      "learning_rate": 1.9930843575385215e-05,
      "loss": 2.9254,
      "step": 3358
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9576047658920288,
      "learning_rate": 1.9930795227846454e-05,
      "loss": 2.617,
      "step": 3359
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9243859648704529,
      "learning_rate": 1.9930746863472296e-05,
      "loss": 2.6838,
      "step": 3360
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9546908140182495,
      "learning_rate": 1.9930698482262823e-05,
      "loss": 2.8265,
      "step": 3361
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9532461166381836,
      "learning_rate": 1.9930650084218123e-05,
      "loss": 2.7961,
      "step": 3362
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.037443995475769,
      "learning_rate": 1.993060166933827e-05,
      "loss": 2.7812,
      "step": 3363
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9663851857185364,
      "learning_rate": 1.9930553237623353e-05,
      "loss": 2.8243,
      "step": 3364
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0355556011199951,
      "learning_rate": 1.9930504789073453e-05,
      "loss": 2.7739,
      "step": 3365
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9683393239974976,
      "learning_rate": 1.9930456323688645e-05,
      "loss": 2.8448,
      "step": 3366
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9367444515228271,
      "learning_rate": 1.993040784146902e-05,
      "loss": 2.6765,
      "step": 3367
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9985408186912537,
      "learning_rate": 1.9930359342414657e-05,
      "loss": 2.5561,
      "step": 3368
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.8659766316413879,
      "learning_rate": 1.9930310826525638e-05,
      "loss": 2.6708,
      "step": 3369
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9509211182594299,
      "learning_rate": 1.9930262293802042e-05,
      "loss": 2.8273,
      "step": 3370
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.1588019132614136,
      "learning_rate": 1.9930213744243957e-05,
      "loss": 2.9037,
      "step": 3371
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.952157199382782,
      "learning_rate": 1.9930165177851462e-05,
      "loss": 2.8453,
      "step": 3372
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9427292943000793,
      "learning_rate": 1.993011659462464e-05,
      "loss": 3.0455,
      "step": 3373
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0355011224746704,
      "learning_rate": 1.9930067994563573e-05,
      "loss": 2.844,
      "step": 3374
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.1003162860870361,
      "learning_rate": 1.9930019377668345e-05,
      "loss": 2.5817,
      "step": 3375
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0409729480743408,
      "learning_rate": 1.9929970743939034e-05,
      "loss": 2.6268,
      "step": 3376
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.959174394607544,
      "learning_rate": 1.992992209337573e-05,
      "loss": 2.8376,
      "step": 3377
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9694193601608276,
      "learning_rate": 1.992987342597851e-05,
      "loss": 2.7948,
      "step": 3378
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.042104959487915,
      "learning_rate": 1.9929824741747456e-05,
      "loss": 2.6112,
      "step": 3379
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9642194509506226,
      "learning_rate": 1.9929776040682655e-05,
      "loss": 2.8414,
      "step": 3380
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.2453687191009521,
      "learning_rate": 1.9929727322784185e-05,
      "loss": 2.8903,
      "step": 3381
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0181221961975098,
      "learning_rate": 1.992967858805213e-05,
      "loss": 2.8929,
      "step": 3382
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0159684419631958,
      "learning_rate": 1.9929629836486573e-05,
      "loss": 2.6777,
      "step": 3383
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9516910910606384,
      "learning_rate": 1.9929581068087596e-05,
      "loss": 2.7827,
      "step": 3384
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9883461594581604,
      "learning_rate": 1.9929532282855283e-05,
      "loss": 2.508,
      "step": 3385
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9732974171638489,
      "learning_rate": 1.9929483480789714e-05,
      "loss": 2.8493,
      "step": 3386
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9198720455169678,
      "learning_rate": 1.9929434661890975e-05,
      "loss": 2.8139,
      "step": 3387
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9933127164840698,
      "learning_rate": 1.992938582615915e-05,
      "loss": 2.6176,
      "step": 3388
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9680997729301453,
      "learning_rate": 1.9929336973594314e-05,
      "loss": 2.843,
      "step": 3389
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9325788617134094,
      "learning_rate": 1.992928810419656e-05,
      "loss": 2.8955,
      "step": 3390
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0608688592910767,
      "learning_rate": 1.9929239217965964e-05,
      "loss": 2.7421,
      "step": 3391
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0308539867401123,
      "learning_rate": 1.9929190314902613e-05,
      "loss": 2.8698,
      "step": 3392
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0497865676879883,
      "learning_rate": 1.9929141395006582e-05,
      "loss": 3.0445,
      "step": 3393
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0608996152877808,
      "learning_rate": 1.9929092458277964e-05,
      "loss": 2.729,
      "step": 3394
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.096817135810852,
      "learning_rate": 1.9929043504716836e-05,
      "loss": 2.9614,
      "step": 3395
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.2167805433273315,
      "learning_rate": 1.9928994534323286e-05,
      "loss": 2.7534,
      "step": 3396
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.8984221816062927,
      "learning_rate": 1.992894554709739e-05,
      "loss": 2.692,
      "step": 3397
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.1577887535095215,
      "learning_rate": 1.9928896543039237e-05,
      "loss": 2.8614,
      "step": 3398
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9831466674804688,
      "learning_rate": 1.9928847522148903e-05,
      "loss": 2.8144,
      "step": 3399
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0047321319580078,
      "learning_rate": 1.992879848442648e-05,
      "loss": 2.9215,
      "step": 3400
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.1904042959213257,
      "learning_rate": 1.9928749429872045e-05,
      "loss": 2.9692,
      "step": 3401
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9590195417404175,
      "learning_rate": 1.9928700358485686e-05,
      "loss": 2.8372,
      "step": 3402
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.111788034439087,
      "learning_rate": 1.9928651270267482e-05,
      "loss": 2.8477,
      "step": 3403
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.947016716003418,
      "learning_rate": 1.9928602165217517e-05,
      "loss": 2.887,
      "step": 3404
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0125529766082764,
      "learning_rate": 1.9928553043335875e-05,
      "loss": 2.7435,
      "step": 3405
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0271764993667603,
      "learning_rate": 1.9928503904622638e-05,
      "loss": 2.8958,
      "step": 3406
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0399959087371826,
      "learning_rate": 1.992845474907789e-05,
      "loss": 2.7515,
      "step": 3407
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9450799226760864,
      "learning_rate": 1.9928405576701716e-05,
      "loss": 2.6386,
      "step": 3408
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9557992219924927,
      "learning_rate": 1.9928356387494195e-05,
      "loss": 2.8379,
      "step": 3409
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.072620153427124,
      "learning_rate": 1.992830718145542e-05,
      "loss": 2.7165,
      "step": 3410
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0012152194976807,
      "learning_rate": 1.992825795858546e-05,
      "loss": 2.9363,
      "step": 3411
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0143952369689941,
      "learning_rate": 1.9928208718884413e-05,
      "loss": 2.9518,
      "step": 3412
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.100972056388855,
      "learning_rate": 1.9928159462352353e-05,
      "loss": 2.6851,
      "step": 3413
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0426548719406128,
      "learning_rate": 1.992811018898937e-05,
      "loss": 2.8414,
      "step": 3414
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0342302322387695,
      "learning_rate": 1.9928060898795534e-05,
      "loss": 2.8126,
      "step": 3415
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0276055335998535,
      "learning_rate": 1.992801159177095e-05,
      "loss": 2.9989,
      "step": 3416
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9614527821540833,
      "learning_rate": 1.992796226791568e-05,
      "loss": 2.9808,
      "step": 3417
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.247840404510498,
      "learning_rate": 1.9927912927229826e-05,
      "loss": 2.6237,
      "step": 3418
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9520460963249207,
      "learning_rate": 1.9927863569713457e-05,
      "loss": 2.6983,
      "step": 3419
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0215106010437012,
      "learning_rate": 1.9927814195366663e-05,
      "loss": 2.7587,
      "step": 3420
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0241214036941528,
      "learning_rate": 1.992776480418953e-05,
      "loss": 2.7901,
      "step": 3421
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0105741024017334,
      "learning_rate": 1.992771539618214e-05,
      "loss": 2.9152,
      "step": 3422
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9182330965995789,
      "learning_rate": 1.9927665971344577e-05,
      "loss": 2.8323,
      "step": 3423
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9990398287773132,
      "learning_rate": 1.9927616529676918e-05,
      "loss": 2.6662,
      "step": 3424
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0512033700942993,
      "learning_rate": 1.9927567071179257e-05,
      "loss": 2.6044,
      "step": 3425
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.010972499847412,
      "learning_rate": 1.9927517595851674e-05,
      "loss": 2.694,
      "step": 3426
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0103100538253784,
      "learning_rate": 1.9927468103694252e-05,
      "loss": 2.6788,
      "step": 3427
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.1563481092453003,
      "learning_rate": 1.9927418594707075e-05,
      "loss": 2.8898,
      "step": 3428
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9924378395080566,
      "learning_rate": 1.9927369068890227e-05,
      "loss": 2.7158,
      "step": 3429
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.8931219577789307,
      "learning_rate": 1.9927319526243793e-05,
      "loss": 2.5798,
      "step": 3430
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9768775105476379,
      "learning_rate": 1.9927269966767854e-05,
      "loss": 2.7314,
      "step": 3431
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.160698652267456,
      "learning_rate": 1.9927220390462496e-05,
      "loss": 2.7359,
      "step": 3432
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9963934421539307,
      "learning_rate": 1.9927170797327806e-05,
      "loss": 2.7866,
      "step": 3433
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.4413044452667236,
      "learning_rate": 1.9927121187363866e-05,
      "loss": 2.9389,
      "step": 3434
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9650879502296448,
      "learning_rate": 1.9927071560570757e-05,
      "loss": 2.791,
      "step": 3435
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9816060066223145,
      "learning_rate": 1.9927021916948566e-05,
      "loss": 2.8108,
      "step": 3436
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0450149774551392,
      "learning_rate": 1.9926972256497373e-05,
      "loss": 2.7637,
      "step": 3437
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9829277396202087,
      "learning_rate": 1.992692257921727e-05,
      "loss": 2.734,
      "step": 3438
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.102262020111084,
      "learning_rate": 1.992687288510834e-05,
      "loss": 2.8889,
      "step": 3439
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0448952913284302,
      "learning_rate": 1.992682317417066e-05,
      "loss": 2.7674,
      "step": 3440
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0592339038848877,
      "learning_rate": 1.992677344640432e-05,
      "loss": 2.8288,
      "step": 3441
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9625842571258545,
      "learning_rate": 1.9926723701809402e-05,
      "loss": 2.729,
      "step": 3442
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9609777927398682,
      "learning_rate": 1.9926673940385993e-05,
      "loss": 2.9146,
      "step": 3443
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9764866828918457,
      "learning_rate": 1.9926624162134174e-05,
      "loss": 2.7775,
      "step": 3444
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0655319690704346,
      "learning_rate": 1.9926574367054032e-05,
      "loss": 2.6365,
      "step": 3445
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.4145876169204712,
      "learning_rate": 1.992652455514565e-05,
      "loss": 2.5614,
      "step": 3446
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9390122294425964,
      "learning_rate": 1.992647472640911e-05,
      "loss": 2.891,
      "step": 3447
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0523184537887573,
      "learning_rate": 1.9926424880844505e-05,
      "loss": 2.9541,
      "step": 3448
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0742056369781494,
      "learning_rate": 1.9926375018451913e-05,
      "loss": 2.6268,
      "step": 3449
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.109056830406189,
      "learning_rate": 1.9926325139231414e-05,
      "loss": 2.6769,
      "step": 3450
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0385411977767944,
      "learning_rate": 1.9926275243183102e-05,
      "loss": 2.7627,
      "step": 3451
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0329155921936035,
      "learning_rate": 1.9926225330307058e-05,
      "loss": 2.9202,
      "step": 3452
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9842711091041565,
      "learning_rate": 1.9926175400603366e-05,
      "loss": 2.9306,
      "step": 3453
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9240018129348755,
      "learning_rate": 1.992612545407211e-05,
      "loss": 2.806,
      "step": 3454
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.971112072467804,
      "learning_rate": 1.9926075490713373e-05,
      "loss": 2.785,
      "step": 3455
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0915416479110718,
      "learning_rate": 1.9926025510527246e-05,
      "loss": 2.904,
      "step": 3456
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9969411492347717,
      "learning_rate": 1.9925975513513806e-05,
      "loss": 2.7522,
      "step": 3457
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9642713665962219,
      "learning_rate": 1.9925925499673145e-05,
      "loss": 2.7692,
      "step": 3458
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.2490882873535156,
      "learning_rate": 1.9925875469005345e-05,
      "loss": 2.8646,
      "step": 3459
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.080782175064087,
      "learning_rate": 1.9925825421510486e-05,
      "loss": 2.8549,
      "step": 3460
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9824575781822205,
      "learning_rate": 1.992577535718866e-05,
      "loss": 2.9134,
      "step": 3461
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.1386173963546753,
      "learning_rate": 1.992572527603995e-05,
      "loss": 2.6024,
      "step": 3462
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.8812263011932373,
      "learning_rate": 1.9925675178064438e-05,
      "loss": 3.0378,
      "step": 3463
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.2152243852615356,
      "learning_rate": 1.9925625063262214e-05,
      "loss": 2.8501,
      "step": 3464
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9674428701400757,
      "learning_rate": 1.9925574931633355e-05,
      "loss": 2.8879,
      "step": 3465
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0271241664886475,
      "learning_rate": 1.9925524783177956e-05,
      "loss": 2.7712,
      "step": 3466
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0105953216552734,
      "learning_rate": 1.992547461789609e-05,
      "loss": 2.8479,
      "step": 3467
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9951740503311157,
      "learning_rate": 1.9925424435787853e-05,
      "loss": 2.9034,
      "step": 3468
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0214186906814575,
      "learning_rate": 1.992537423685333e-05,
      "loss": 2.8176,
      "step": 3469
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.1677451133728027,
      "learning_rate": 1.9925324021092594e-05,
      "loss": 2.8556,
      "step": 3470
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9994052648544312,
      "learning_rate": 1.9925273788505742e-05,
      "loss": 2.8021,
      "step": 3471
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0662752389907837,
      "learning_rate": 1.9925223539092852e-05,
      "loss": 2.85,
      "step": 3472
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.030385971069336,
      "learning_rate": 1.9925173272854018e-05,
      "loss": 2.7309,
      "step": 3473
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0394139289855957,
      "learning_rate": 1.9925122989789316e-05,
      "loss": 2.82,
      "step": 3474
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0326519012451172,
      "learning_rate": 1.9925072689898835e-05,
      "loss": 2.7526,
      "step": 3475
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.084366798400879,
      "learning_rate": 1.9925022373182662e-05,
      "loss": 2.8676,
      "step": 3476
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.087454080581665,
      "learning_rate": 1.9924972039640878e-05,
      "loss": 2.6634,
      "step": 3477
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9331421852111816,
      "learning_rate": 1.992492168927357e-05,
      "loss": 2.9166,
      "step": 3478
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0942927598953247,
      "learning_rate": 1.992487132208083e-05,
      "loss": 2.9102,
      "step": 3479
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.059770941734314,
      "learning_rate": 1.9924820938062732e-05,
      "loss": 2.7153,
      "step": 3480
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9617868661880493,
      "learning_rate": 1.992477053721937e-05,
      "loss": 2.8324,
      "step": 3481
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0430657863616943,
      "learning_rate": 1.9924720119550824e-05,
      "loss": 2.7702,
      "step": 3482
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9509655833244324,
      "learning_rate": 1.9924669685057182e-05,
      "loss": 2.9364,
      "step": 3483
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9469743371009827,
      "learning_rate": 1.9924619233738533e-05,
      "loss": 2.601,
      "step": 3484
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0080465078353882,
      "learning_rate": 1.9924568765594956e-05,
      "loss": 2.7636,
      "step": 3485
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0024405717849731,
      "learning_rate": 1.992451828062654e-05,
      "loss": 2.6379,
      "step": 3486
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0629929304122925,
      "learning_rate": 1.992446777883337e-05,
      "loss": 2.5777,
      "step": 3487
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0430657863616943,
      "learning_rate": 1.9924417260215533e-05,
      "loss": 2.8157,
      "step": 3488
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0696767568588257,
      "learning_rate": 1.992436672477311e-05,
      "loss": 2.7674,
      "step": 3489
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0509700775146484,
      "learning_rate": 1.992431617250619e-05,
      "loss": 2.5802,
      "step": 3490
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.016256332397461,
      "learning_rate": 1.9924265603414863e-05,
      "loss": 2.8356,
      "step": 3491
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0467867851257324,
      "learning_rate": 1.9924215017499205e-05,
      "loss": 2.7848,
      "step": 3492
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9457511305809021,
      "learning_rate": 1.992416441475931e-05,
      "loss": 2.6621,
      "step": 3493
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0015183687210083,
      "learning_rate": 1.9924113795195263e-05,
      "loss": 2.6379,
      "step": 3494
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0196421146392822,
      "learning_rate": 1.9924063158807144e-05,
      "loss": 2.6568,
      "step": 3495
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.1192830801010132,
      "learning_rate": 1.9924012505595044e-05,
      "loss": 2.7009,
      "step": 3496
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0393229722976685,
      "learning_rate": 1.992396183555905e-05,
      "loss": 2.8585,
      "step": 3497
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9199212789535522,
      "learning_rate": 1.9923911148699244e-05,
      "loss": 2.8198,
      "step": 3498
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0127204656600952,
      "learning_rate": 1.9923860445015713e-05,
      "loss": 2.8973,
      "step": 3499
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.5032473802566528,
      "learning_rate": 1.9923809724508542e-05,
      "loss": 2.85,
      "step": 3500
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9773111939430237,
      "learning_rate": 1.992375898717782e-05,
      "loss": 2.8519,
      "step": 3501
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9939420223236084,
      "learning_rate": 1.992370823302363e-05,
      "loss": 2.907,
      "step": 3502
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9922443628311157,
      "learning_rate": 1.992365746204606e-05,
      "loss": 2.841,
      "step": 3503
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0411796569824219,
      "learning_rate": 1.9923606674245195e-05,
      "loss": 3.0453,
      "step": 3504
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0114306211471558,
      "learning_rate": 1.992355586962112e-05,
      "loss": 2.7827,
      "step": 3505
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0745450258255005,
      "learning_rate": 1.9923505048173925e-05,
      "loss": 2.9291,
      "step": 3506
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9546449780464172,
      "learning_rate": 1.9923454209903693e-05,
      "loss": 2.7586,
      "step": 3507
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9681736826896667,
      "learning_rate": 1.9923403354810512e-05,
      "loss": 2.6985,
      "step": 3508
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.035780668258667,
      "learning_rate": 1.9923352482894464e-05,
      "loss": 2.6971,
      "step": 3509
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9983640909194946,
      "learning_rate": 1.992330159415564e-05,
      "loss": 2.7584,
      "step": 3510
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9886947870254517,
      "learning_rate": 1.9923250688594126e-05,
      "loss": 2.9992,
      "step": 3511
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9464759230613708,
      "learning_rate": 1.9923199766210005e-05,
      "loss": 2.9547,
      "step": 3512
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0545873641967773,
      "learning_rate": 1.9923148827003366e-05,
      "loss": 2.8428,
      "step": 3513
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9946345686912537,
      "learning_rate": 1.9923097870974297e-05,
      "loss": 2.7418,
      "step": 3514
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0412622690200806,
      "learning_rate": 1.992304689812288e-05,
      "loss": 2.753,
      "step": 3515
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0502253770828247,
      "learning_rate": 1.9922995908449204e-05,
      "loss": 2.8264,
      "step": 3516
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0136774778366089,
      "learning_rate": 1.9922944901953353e-05,
      "loss": 2.5127,
      "step": 3517
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0089787244796753,
      "learning_rate": 1.9922893878635417e-05,
      "loss": 2.8551,
      "step": 3518
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.305158019065857,
      "learning_rate": 1.992284283849548e-05,
      "loss": 2.9685,
      "step": 3519
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0381979942321777,
      "learning_rate": 1.992279178153363e-05,
      "loss": 2.9903,
      "step": 3520
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0164155960083008,
      "learning_rate": 1.9922740707749954e-05,
      "loss": 2.803,
      "step": 3521
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0546925067901611,
      "learning_rate": 1.9922689617144534e-05,
      "loss": 2.81,
      "step": 3522
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0241142511367798,
      "learning_rate": 1.9922638509717463e-05,
      "loss": 2.6155,
      "step": 3523
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9942298531532288,
      "learning_rate": 1.9922587385468827e-05,
      "loss": 2.6749,
      "step": 3524
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0631463527679443,
      "learning_rate": 1.9922536244398705e-05,
      "loss": 2.948,
      "step": 3525
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9300572872161865,
      "learning_rate": 1.9922485086507194e-05,
      "loss": 2.9407,
      "step": 3526
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9054935574531555,
      "learning_rate": 1.9922433911794374e-05,
      "loss": 2.7022,
      "step": 3527
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0576571226119995,
      "learning_rate": 1.9922382720260333e-05,
      "loss": 2.699,
      "step": 3528
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.8985180258750916,
      "learning_rate": 1.9922331511905157e-05,
      "loss": 2.6481,
      "step": 3529
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0345501899719238,
      "learning_rate": 1.9922280286728936e-05,
      "loss": 2.7831,
      "step": 3530
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9965613484382629,
      "learning_rate": 1.9922229044731756e-05,
      "loss": 2.8326,
      "step": 3531
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9962769150733948,
      "learning_rate": 1.99221777859137e-05,
      "loss": 2.7623,
      "step": 3532
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.1016777753829956,
      "learning_rate": 1.992212651027486e-05,
      "loss": 2.8888,
      "step": 3533
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9330457448959351,
      "learning_rate": 1.992207521781532e-05,
      "loss": 2.8488,
      "step": 3534
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0294420719146729,
      "learning_rate": 1.9922023908535172e-05,
      "loss": 2.7658,
      "step": 3535
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9629634618759155,
      "learning_rate": 1.992197258243449e-05,
      "loss": 2.6865,
      "step": 3536
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.1246534585952759,
      "learning_rate": 1.992192123951338e-05,
      "loss": 2.9462,
      "step": 3537
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9573847055435181,
      "learning_rate": 1.992186987977191e-05,
      "loss": 2.7956,
      "step": 3538
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0247845649719238,
      "learning_rate": 1.992181850321018e-05,
      "loss": 2.6973,
      "step": 3539
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9565038084983826,
      "learning_rate": 1.9921767109828273e-05,
      "loss": 2.7481,
      "step": 3540
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0935086011886597,
      "learning_rate": 1.9921715699626274e-05,
      "loss": 2.8064,
      "step": 3541
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9271145462989807,
      "learning_rate": 1.9921664272604273e-05,
      "loss": 2.7105,
      "step": 3542
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.056637167930603,
      "learning_rate": 1.9921612828762357e-05,
      "loss": 2.9618,
      "step": 3543
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9238767623901367,
      "learning_rate": 1.9921561368100613e-05,
      "loss": 2.6836,
      "step": 3544
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.02367103099823,
      "learning_rate": 1.9921509890619125e-05,
      "loss": 2.6488,
      "step": 3545
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.1890370845794678,
      "learning_rate": 1.992145839631799e-05,
      "loss": 2.9836,
      "step": 3546
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0099419355392456,
      "learning_rate": 1.9921406885197282e-05,
      "loss": 2.9194,
      "step": 3547
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9636458158493042,
      "learning_rate": 1.9921355357257097e-05,
      "loss": 2.8836,
      "step": 3548
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9856219291687012,
      "learning_rate": 1.9921303812497517e-05,
      "loss": 2.8694,
      "step": 3549
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0253920555114746,
      "learning_rate": 1.9921252250918634e-05,
      "loss": 2.6813,
      "step": 3550
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.1354745626449585,
      "learning_rate": 1.9921200672520536e-05,
      "loss": 2.8682,
      "step": 3551
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9858112335205078,
      "learning_rate": 1.9921149077303305e-05,
      "loss": 2.6788,
      "step": 3552
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.1551517248153687,
      "learning_rate": 1.9921097465267036e-05,
      "loss": 2.7606,
      "step": 3553
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0078331232070923,
      "learning_rate": 1.992104583641181e-05,
      "loss": 2.5986,
      "step": 3554
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0849891901016235,
      "learning_rate": 1.9920994190737713e-05,
      "loss": 3.0591,
      "step": 3555
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.946404755115509,
      "learning_rate": 1.9920942528244843e-05,
      "loss": 2.7597,
      "step": 3556
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.2842624187469482,
      "learning_rate": 1.9920890848933276e-05,
      "loss": 2.7486,
      "step": 3557
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9388296604156494,
      "learning_rate": 1.9920839152803108e-05,
      "loss": 2.8288,
      "step": 3558
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9707484245300293,
      "learning_rate": 1.992078743985442e-05,
      "loss": 2.7051,
      "step": 3559
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9441770315170288,
      "learning_rate": 1.9920735710087306e-05,
      "loss": 2.8585,
      "step": 3560
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9833685755729675,
      "learning_rate": 1.992068396350185e-05,
      "loss": 2.6571,
      "step": 3561
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0667259693145752,
      "learning_rate": 1.9920632200098142e-05,
      "loss": 2.718,
      "step": 3562
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0528500080108643,
      "learning_rate": 1.992058041987626e-05,
      "loss": 2.7215,
      "step": 3563
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9974415302276611,
      "learning_rate": 1.9920528622836306e-05,
      "loss": 2.7844,
      "step": 3564
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.8722954988479614,
      "learning_rate": 1.992047680897836e-05,
      "loss": 2.6745,
      "step": 3565
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9686970114707947,
      "learning_rate": 1.9920424978302514e-05,
      "loss": 2.6927,
      "step": 3566
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9833462238311768,
      "learning_rate": 1.9920373130808853e-05,
      "loss": 2.6096,
      "step": 3567
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0094233751296997,
      "learning_rate": 1.9920321266497464e-05,
      "loss": 2.5958,
      "step": 3568
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0407854318618774,
      "learning_rate": 1.9920269385368432e-05,
      "loss": 2.4773,
      "step": 3569
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9962458610534668,
      "learning_rate": 1.9920217487421856e-05,
      "loss": 2.8332,
      "step": 3570
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9598596692085266,
      "learning_rate": 1.9920165572657814e-05,
      "loss": 2.8626,
      "step": 3571
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0415472984313965,
      "learning_rate": 1.9920113641076396e-05,
      "loss": 2.7879,
      "step": 3572
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.045593023300171,
      "learning_rate": 1.9920061692677692e-05,
      "loss": 2.9321,
      "step": 3573
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0179803371429443,
      "learning_rate": 1.992000972746179e-05,
      "loss": 2.6799,
      "step": 3574
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0204293727874756,
      "learning_rate": 1.991995774542878e-05,
      "loss": 2.9352,
      "step": 3575
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9312888979911804,
      "learning_rate": 1.9919905746578742e-05,
      "loss": 2.6338,
      "step": 3576
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.016754388809204,
      "learning_rate": 1.9919853730911768e-05,
      "loss": 2.7942,
      "step": 3577
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9327657222747803,
      "learning_rate": 1.9919801698427954e-05,
      "loss": 2.8817,
      "step": 3578
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0452221632003784,
      "learning_rate": 1.9919749649127378e-05,
      "loss": 2.5658,
      "step": 3579
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0914589166641235,
      "learning_rate": 1.9919697583010135e-05,
      "loss": 2.9256,
      "step": 3580
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9988155961036682,
      "learning_rate": 1.9919645500076307e-05,
      "loss": 2.9255,
      "step": 3581
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0167781114578247,
      "learning_rate": 1.9919593400325988e-05,
      "loss": 2.8708,
      "step": 3582
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9578032493591309,
      "learning_rate": 1.9919541283759263e-05,
      "loss": 2.4336,
      "step": 3583
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0195101499557495,
      "learning_rate": 1.9919489150376224e-05,
      "loss": 2.8005,
      "step": 3584
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0306838750839233,
      "learning_rate": 1.9919437000176954e-05,
      "loss": 2.8835,
      "step": 3585
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.2895721197128296,
      "learning_rate": 1.9919384833161547e-05,
      "loss": 2.6139,
      "step": 3586
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0790773630142212,
      "learning_rate": 1.9919332649330086e-05,
      "loss": 2.5937,
      "step": 3587
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9611873626708984,
      "learning_rate": 1.9919280448682662e-05,
      "loss": 2.7235,
      "step": 3588
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9496548771858215,
      "learning_rate": 1.991922823121936e-05,
      "loss": 2.8762,
      "step": 3589
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0624548196792603,
      "learning_rate": 1.991917599694028e-05,
      "loss": 2.7199,
      "step": 3590
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0436638593673706,
      "learning_rate": 1.99191237458455e-05,
      "loss": 2.8079,
      "step": 3591
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9417016506195068,
      "learning_rate": 1.991907147793511e-05,
      "loss": 2.4746,
      "step": 3592
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9911989569664001,
      "learning_rate": 1.99190191932092e-05,
      "loss": 2.8023,
      "step": 3593
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9198434948921204,
      "learning_rate": 1.991896689166786e-05,
      "loss": 2.831,
      "step": 3594
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.04993736743927,
      "learning_rate": 1.9918914573311176e-05,
      "loss": 2.997,
      "step": 3595
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.1007404327392578,
      "learning_rate": 1.9918862238139237e-05,
      "loss": 2.6777,
      "step": 3596
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0626686811447144,
      "learning_rate": 1.9918809886152135e-05,
      "loss": 2.8384,
      "step": 3597
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.1094762086868286,
      "learning_rate": 1.9918757517349952e-05,
      "loss": 2.8243,
      "step": 3598
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0067907571792603,
      "learning_rate": 1.9918705131732786e-05,
      "loss": 2.7784,
      "step": 3599
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9742881059646606,
      "learning_rate": 1.991865272930072e-05,
      "loss": 2.7433,
      "step": 3600
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9230492115020752,
      "learning_rate": 1.991860031005384e-05,
      "loss": 2.6187,
      "step": 3601
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.938757061958313,
      "learning_rate": 1.9918547873992242e-05,
      "loss": 2.6323,
      "step": 3602
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0096334218978882,
      "learning_rate": 1.991849542111601e-05,
      "loss": 2.5498,
      "step": 3603
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.1308964490890503,
      "learning_rate": 1.9918442951425235e-05,
      "loss": 2.5903,
      "step": 3604
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9728731513023376,
      "learning_rate": 1.9918390464920008e-05,
      "loss": 2.992,
      "step": 3605
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.983209490776062,
      "learning_rate": 1.991833796160041e-05,
      "loss": 2.6056,
      "step": 3606
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9015092253684998,
      "learning_rate": 1.991828544146654e-05,
      "loss": 2.8427,
      "step": 3607
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.959867000579834,
      "learning_rate": 1.991823290451848e-05,
      "loss": 2.6491,
      "step": 3608
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9288502335548401,
      "learning_rate": 1.9918180350756324e-05,
      "loss": 2.6499,
      "step": 3609
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9489830136299133,
      "learning_rate": 1.9918127780180156e-05,
      "loss": 2.8309,
      "step": 3610
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9248335361480713,
      "learning_rate": 1.991807519279007e-05,
      "loss": 2.6711,
      "step": 3611
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0264732837677002,
      "learning_rate": 1.991802258858615e-05,
      "loss": 2.7643,
      "step": 3612
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9262929558753967,
      "learning_rate": 1.9917969967568487e-05,
      "loss": 2.7911,
      "step": 3613
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9996541142463684,
      "learning_rate": 1.9917917329737174e-05,
      "loss": 2.9151,
      "step": 3614
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0852463245391846,
      "learning_rate": 1.99178646750923e-05,
      "loss": 2.7682,
      "step": 3615
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9601466655731201,
      "learning_rate": 1.9917812003633945e-05,
      "loss": 2.7277,
      "step": 3616
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.068528175354004,
      "learning_rate": 1.991775931536221e-05,
      "loss": 2.8105,
      "step": 3617
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0234942436218262,
      "learning_rate": 1.991770661027718e-05,
      "loss": 2.679,
      "step": 3618
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0672039985656738,
      "learning_rate": 1.9917653888378942e-05,
      "loss": 2.9551,
      "step": 3619
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.1814378499984741,
      "learning_rate": 1.991760114966758e-05,
      "loss": 2.8345,
      "step": 3620
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9943850636482239,
      "learning_rate": 1.9917548394143202e-05,
      "loss": 2.7514,
      "step": 3621
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.026268720626831,
      "learning_rate": 1.991749562180588e-05,
      "loss": 2.7101,
      "step": 3622
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0051313638687134,
      "learning_rate": 1.991744283265571e-05,
      "loss": 2.8186,
      "step": 3623
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0335742235183716,
      "learning_rate": 1.9917390026692782e-05,
      "loss": 2.7737,
      "step": 3624
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9652961492538452,
      "learning_rate": 1.9917337203917183e-05,
      "loss": 2.7863,
      "step": 3625
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9641575217247009,
      "learning_rate": 1.9917284364329006e-05,
      "loss": 2.7074,
      "step": 3626
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9381993412971497,
      "learning_rate": 1.9917231507928336e-05,
      "loss": 2.9523,
      "step": 3627
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.113820195198059,
      "learning_rate": 1.9917178634715265e-05,
      "loss": 2.8786,
      "step": 3628
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0732429027557373,
      "learning_rate": 1.9917125744689888e-05,
      "loss": 2.7733,
      "step": 3629
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9980700612068176,
      "learning_rate": 1.9917072837852282e-05,
      "loss": 2.9585,
      "step": 3630
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0990216732025146,
      "learning_rate": 1.991701991420255e-05,
      "loss": 2.7304,
      "step": 3631
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9641981720924377,
      "learning_rate": 1.991696697374077e-05,
      "loss": 2.8079,
      "step": 3632
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9546264410018921,
      "learning_rate": 1.9916914016467043e-05,
      "loss": 2.9042,
      "step": 3633
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9398106932640076,
      "learning_rate": 1.9916861042381452e-05,
      "loss": 2.7209,
      "step": 3634
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9623807072639465,
      "learning_rate": 1.9916808051484086e-05,
      "loss": 2.6878,
      "step": 3635
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9295570254325867,
      "learning_rate": 1.9916755043775038e-05,
      "loss": 2.8381,
      "step": 3636
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.013848900794983,
      "learning_rate": 1.9916702019254398e-05,
      "loss": 2.9343,
      "step": 3637
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9894880652427673,
      "learning_rate": 1.991664897792225e-05,
      "loss": 2.8204,
      "step": 3638
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9389442205429077,
      "learning_rate": 1.9916595919778696e-05,
      "loss": 2.73,
      "step": 3639
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.003788709640503,
      "learning_rate": 1.9916542844823814e-05,
      "loss": 2.519,
      "step": 3640
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9690541625022888,
      "learning_rate": 1.9916489753057703e-05,
      "loss": 2.7205,
      "step": 3641
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9568485617637634,
      "learning_rate": 1.9916436644480444e-05,
      "loss": 2.6679,
      "step": 3642
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0496577024459839,
      "learning_rate": 1.9916383519092132e-05,
      "loss": 2.8836,
      "step": 3643
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9758594632148743,
      "learning_rate": 1.991633037689286e-05,
      "loss": 2.616,
      "step": 3644
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9370554685592651,
      "learning_rate": 1.991627721788271e-05,
      "loss": 2.6472,
      "step": 3645
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.1179360151290894,
      "learning_rate": 1.991622404206178e-05,
      "loss": 2.7816,
      "step": 3646
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0359530448913574,
      "learning_rate": 1.9916170849430158e-05,
      "loss": 2.7385,
      "step": 3647
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9758473038673401,
      "learning_rate": 1.991611763998793e-05,
      "loss": 2.7903,
      "step": 3648
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9384191036224365,
      "learning_rate": 1.9916064413735193e-05,
      "loss": 3.0132,
      "step": 3649
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0540934801101685,
      "learning_rate": 1.991601117067203e-05,
      "loss": 2.7559,
      "step": 3650
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.051537275314331,
      "learning_rate": 1.9915957910798536e-05,
      "loss": 2.8697,
      "step": 3651
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0709023475646973,
      "learning_rate": 1.9915904634114798e-05,
      "loss": 2.8757,
      "step": 3652
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.1305204629898071,
      "learning_rate": 1.9915851340620916e-05,
      "loss": 2.8102,
      "step": 3653
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9693034291267395,
      "learning_rate": 1.9915798030316967e-05,
      "loss": 2.7131,
      "step": 3654
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9715014696121216,
      "learning_rate": 1.991574470320305e-05,
      "loss": 2.9391,
      "step": 3655
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.007169485092163,
      "learning_rate": 1.991569135927925e-05,
      "loss": 2.7889,
      "step": 3656
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.913788378238678,
      "learning_rate": 1.991563799854566e-05,
      "loss": 2.6877,
      "step": 3657
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.965644121170044,
      "learning_rate": 1.9915584621002374e-05,
      "loss": 2.4745,
      "step": 3658
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.061679720878601,
      "learning_rate": 1.9915531226649473e-05,
      "loss": 2.8724,
      "step": 3659
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9634270668029785,
      "learning_rate": 1.9915477815487058e-05,
      "loss": 2.8237,
      "step": 3660
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.050297737121582,
      "learning_rate": 1.9915424387515213e-05,
      "loss": 2.8522,
      "step": 3661
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9961175322532654,
      "learning_rate": 1.9915370942734033e-05,
      "loss": 2.8901,
      "step": 3662
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9508734941482544,
      "learning_rate": 1.9915317481143607e-05,
      "loss": 2.6779,
      "step": 3663
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0430703163146973,
      "learning_rate": 1.991526400274402e-05,
      "loss": 2.6663,
      "step": 3664
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9424455165863037,
      "learning_rate": 1.9915210507535372e-05,
      "loss": 2.8306,
      "step": 3665
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.975131630897522,
      "learning_rate": 1.991515699551775e-05,
      "loss": 2.5832,
      "step": 3666
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9620649218559265,
      "learning_rate": 1.991510346669124e-05,
      "loss": 3.025,
      "step": 3667
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0474151372909546,
      "learning_rate": 1.991504992105594e-05,
      "loss": 2.9982,
      "step": 3668
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.088380217552185,
      "learning_rate": 1.9914996358611938e-05,
      "loss": 2.8115,
      "step": 3669
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9925658702850342,
      "learning_rate": 1.991494277935932e-05,
      "loss": 2.8182,
      "step": 3670
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9573149681091309,
      "learning_rate": 1.9914889183298185e-05,
      "loss": 2.718,
      "step": 3671
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9618481397628784,
      "learning_rate": 1.991483557042862e-05,
      "loss": 2.7486,
      "step": 3672
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.052943468093872,
      "learning_rate": 1.991478194075071e-05,
      "loss": 2.7248,
      "step": 3673
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9468994140625,
      "learning_rate": 1.991472829426456e-05,
      "loss": 2.7779,
      "step": 3674
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.8508172631263733,
      "learning_rate": 1.9914674630970248e-05,
      "loss": 2.7771,
      "step": 3675
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9762924313545227,
      "learning_rate": 1.991462095086787e-05,
      "loss": 2.626,
      "step": 3676
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.987812876701355,
      "learning_rate": 1.9914567253957517e-05,
      "loss": 2.8828,
      "step": 3677
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0053671598434448,
      "learning_rate": 1.9914513540239283e-05,
      "loss": 2.8196,
      "step": 3678
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0436255931854248,
      "learning_rate": 1.991445980971325e-05,
      "loss": 2.7782,
      "step": 3679
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9098607301712036,
      "learning_rate": 1.9914406062379518e-05,
      "loss": 2.6496,
      "step": 3680
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9701529145240784,
      "learning_rate": 1.9914352298238175e-05,
      "loss": 2.7474,
      "step": 3681
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9230700731277466,
      "learning_rate": 1.991429851728931e-05,
      "loss": 2.6624,
      "step": 3682
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.058063268661499,
      "learning_rate": 1.9914244719533022e-05,
      "loss": 2.7731,
      "step": 3683
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0537569522857666,
      "learning_rate": 1.9914190904969394e-05,
      "loss": 2.623,
      "step": 3684
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0129268169403076,
      "learning_rate": 1.9914137073598517e-05,
      "loss": 2.8715,
      "step": 3685
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0109931230545044,
      "learning_rate": 1.9914083225420485e-05,
      "loss": 2.8078,
      "step": 3686
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9893196821212769,
      "learning_rate": 1.991402936043539e-05,
      "loss": 2.6269,
      "step": 3687
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9618573784828186,
      "learning_rate": 1.9913975478643327e-05,
      "loss": 2.6069,
      "step": 3688
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9319003820419312,
      "learning_rate": 1.991392158004438e-05,
      "loss": 2.5855,
      "step": 3689
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.1023534536361694,
      "learning_rate": 1.991386766463864e-05,
      "loss": 2.9845,
      "step": 3690
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9971767663955688,
      "learning_rate": 1.9913813732426206e-05,
      "loss": 2.6247,
      "step": 3691
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9598639607429504,
      "learning_rate": 1.9913759783407163e-05,
      "loss": 2.9933,
      "step": 3692
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9123126864433289,
      "learning_rate": 1.9913705817581607e-05,
      "loss": 2.7925,
      "step": 3693
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.920316755771637,
      "learning_rate": 1.9913651834949624e-05,
      "loss": 2.5311,
      "step": 3694
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0330041646957397,
      "learning_rate": 1.991359783551131e-05,
      "loss": 2.8286,
      "step": 3695
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9941976070404053,
      "learning_rate": 1.9913543819266756e-05,
      "loss": 2.9236,
      "step": 3696
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.1827900409698486,
      "learning_rate": 1.991348978621605e-05,
      "loss": 2.7256,
      "step": 3697
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0763497352600098,
      "learning_rate": 1.991343573635929e-05,
      "loss": 2.7089,
      "step": 3698
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0377274751663208,
      "learning_rate": 1.9913381669696564e-05,
      "loss": 2.8807,
      "step": 3699
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.3363109827041626,
      "learning_rate": 1.991332758622796e-05,
      "loss": 2.7305,
      "step": 3700
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0718497037887573,
      "learning_rate": 1.9913273485953577e-05,
      "loss": 2.5949,
      "step": 3701
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0167901515960693,
      "learning_rate": 1.9913219368873504e-05,
      "loss": 2.728,
      "step": 3702
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9637195467948914,
      "learning_rate": 1.9913165234987826e-05,
      "loss": 2.9896,
      "step": 3703
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.1205031871795654,
      "learning_rate": 1.9913111084296645e-05,
      "loss": 2.783,
      "step": 3704
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9743786454200745,
      "learning_rate": 1.9913056916800046e-05,
      "loss": 2.9738,
      "step": 3705
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0166906118392944,
      "learning_rate": 1.9913002732498127e-05,
      "loss": 2.7071,
      "step": 3706
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0187904834747314,
      "learning_rate": 1.9912948531390973e-05,
      "loss": 2.6594,
      "step": 3707
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9878503680229187,
      "learning_rate": 1.9912894313478678e-05,
      "loss": 2.7891,
      "step": 3708
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9462363123893738,
      "learning_rate": 1.9912840078761334e-05,
      "loss": 2.6401,
      "step": 3709
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0221638679504395,
      "learning_rate": 1.991278582723904e-05,
      "loss": 2.5675,
      "step": 3710
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.017525315284729,
      "learning_rate": 1.9912731558911873e-05,
      "loss": 2.8742,
      "step": 3711
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0011382102966309,
      "learning_rate": 1.991267727377994e-05,
      "loss": 2.7749,
      "step": 3712
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0723804235458374,
      "learning_rate": 1.9912622971843326e-05,
      "loss": 2.8793,
      "step": 3713
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9681645035743713,
      "learning_rate": 1.991256865310212e-05,
      "loss": 2.7866,
      "step": 3714
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9997218251228333,
      "learning_rate": 1.991251431755642e-05,
      "loss": 2.7364,
      "step": 3715
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9584503173828125,
      "learning_rate": 1.991245996520632e-05,
      "loss": 2.587,
      "step": 3716
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9872071743011475,
      "learning_rate": 1.9912405596051904e-05,
      "loss": 2.9575,
      "step": 3717
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.1834256649017334,
      "learning_rate": 1.9912351210093267e-05,
      "loss": 3.2365,
      "step": 3718
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0810346603393555,
      "learning_rate": 1.9912296807330505e-05,
      "loss": 2.4308,
      "step": 3719
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0167274475097656,
      "learning_rate": 1.991224238776371e-05,
      "loss": 2.7637,
      "step": 3720
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9719821810722351,
      "learning_rate": 1.991218795139297e-05,
      "loss": 2.7394,
      "step": 3721
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9103384613990784,
      "learning_rate": 1.991213349821838e-05,
      "loss": 2.6224,
      "step": 3722
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9189510345458984,
      "learning_rate": 1.991207902824003e-05,
      "loss": 2.5705,
      "step": 3723
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.505387544631958,
      "learning_rate": 1.9912024541458016e-05,
      "loss": 2.7135,
      "step": 3724
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.070081353187561,
      "learning_rate": 1.9911970037872425e-05,
      "loss": 2.6982,
      "step": 3725
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.01654851436615,
      "learning_rate": 1.9911915517483354e-05,
      "loss": 2.8178,
      "step": 3726
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9773858189582825,
      "learning_rate": 1.9911860980290897e-05,
      "loss": 2.7461,
      "step": 3727
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.134732961654663,
      "learning_rate": 1.9911806426295143e-05,
      "loss": 2.5456,
      "step": 3728
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0090062618255615,
      "learning_rate": 1.9911751855496183e-05,
      "loss": 2.6488,
      "step": 3729
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0047407150268555,
      "learning_rate": 1.991169726789411e-05,
      "loss": 2.8116,
      "step": 3730
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9448102116584778,
      "learning_rate": 1.991164266348902e-05,
      "loss": 2.7821,
      "step": 3731
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9507254958152771,
      "learning_rate": 1.9911588042281003e-05,
      "loss": 2.6348,
      "step": 3732
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.990296483039856,
      "learning_rate": 1.9911533404270155e-05,
      "loss": 2.6918,
      "step": 3733
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.8852108120918274,
      "learning_rate": 1.991147874945656e-05,
      "loss": 2.7559,
      "step": 3734
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.8999418616294861,
      "learning_rate": 1.9911424077840325e-05,
      "loss": 2.5478,
      "step": 3735
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9682365655899048,
      "learning_rate": 1.991136938942153e-05,
      "loss": 2.6246,
      "step": 3736
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9340171813964844,
      "learning_rate": 1.991131468420027e-05,
      "loss": 2.9633,
      "step": 3737
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9851509928703308,
      "learning_rate": 1.9911259962176642e-05,
      "loss": 2.9455,
      "step": 3738
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.03427255153656,
      "learning_rate": 1.9911205223350737e-05,
      "loss": 2.9014,
      "step": 3739
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9080183506011963,
      "learning_rate": 1.9911150467722645e-05,
      "loss": 2.5848,
      "step": 3740
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9713496565818787,
      "learning_rate": 1.9911095695292463e-05,
      "loss": 2.7055,
      "step": 3741
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0838606357574463,
      "learning_rate": 1.9911040906060284e-05,
      "loss": 2.8091,
      "step": 3742
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0210670232772827,
      "learning_rate": 1.9910986100026197e-05,
      "loss": 2.7091,
      "step": 3743
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9889625906944275,
      "learning_rate": 1.9910931277190296e-05,
      "loss": 2.5414,
      "step": 3744
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9806839823722839,
      "learning_rate": 1.9910876437552674e-05,
      "loss": 2.6859,
      "step": 3745
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9530094265937805,
      "learning_rate": 1.9910821581113428e-05,
      "loss": 2.9812,
      "step": 3746
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9379515051841736,
      "learning_rate": 1.9910766707872643e-05,
      "loss": 2.8766,
      "step": 3747
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.272180438041687,
      "learning_rate": 1.9910711817830418e-05,
      "loss": 2.9702,
      "step": 3748
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0614919662475586,
      "learning_rate": 1.991065691098685e-05,
      "loss": 2.6387,
      "step": 3749
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.1076815128326416,
      "learning_rate": 1.9910601987342022e-05,
      "loss": 2.7959,
      "step": 3750
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0162529945373535,
      "learning_rate": 1.9910547046896034e-05,
      "loss": 2.8505,
      "step": 3751
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9087767004966736,
      "learning_rate": 1.9910492089648974e-05,
      "loss": 2.8215,
      "step": 3752
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0537774562835693,
      "learning_rate": 1.991043711560094e-05,
      "loss": 2.7253,
      "step": 3753
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9435515403747559,
      "learning_rate": 1.9910382124752025e-05,
      "loss": 2.6706,
      "step": 3754
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0525367259979248,
      "learning_rate": 1.991032711710232e-05,
      "loss": 2.7467,
      "step": 3755
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9308435320854187,
      "learning_rate": 1.9910272092651914e-05,
      "loss": 2.7745,
      "step": 3756
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.001478910446167,
      "learning_rate": 1.991021705140091e-05,
      "loss": 2.6754,
      "step": 3757
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0954035520553589,
      "learning_rate": 1.9910161993349396e-05,
      "loss": 2.9021,
      "step": 3758
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9750094413757324,
      "learning_rate": 1.9910106918497467e-05,
      "loss": 2.6993,
      "step": 3759
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.1189912557601929,
      "learning_rate": 1.9910051826845215e-05,
      "loss": 2.5623,
      "step": 3760
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.064049482345581,
      "learning_rate": 1.9909996718392732e-05,
      "loss": 2.7789,
      "step": 3761
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0755332708358765,
      "learning_rate": 1.9909941593140113e-05,
      "loss": 2.7523,
      "step": 3762
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9624666571617126,
      "learning_rate": 1.9909886451087453e-05,
      "loss": 2.641,
      "step": 3763
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9768678545951843,
      "learning_rate": 1.9909831292234842e-05,
      "loss": 2.8604,
      "step": 3764
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9190870523452759,
      "learning_rate": 1.9909776116582377e-05,
      "loss": 2.7983,
      "step": 3765
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0701583623886108,
      "learning_rate": 1.990972092413015e-05,
      "loss": 2.7334,
      "step": 3766
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9693161249160767,
      "learning_rate": 1.9909665714878255e-05,
      "loss": 2.5204,
      "step": 3767
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.1636972427368164,
      "learning_rate": 1.9909610488826784e-05,
      "loss": 2.6232,
      "step": 3768
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.015526294708252,
      "learning_rate": 1.990955524597583e-05,
      "loss": 2.6176,
      "step": 3769
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9862498044967651,
      "learning_rate": 1.990949998632549e-05,
      "loss": 2.7191,
      "step": 3770
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.002194881439209,
      "learning_rate": 1.9909444709875856e-05,
      "loss": 2.5641,
      "step": 3771
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0982530117034912,
      "learning_rate": 1.9909389416627026e-05,
      "loss": 2.9944,
      "step": 3772
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0704540014266968,
      "learning_rate": 1.9909334106579085e-05,
      "loss": 2.7146,
      "step": 3773
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.018515706062317,
      "learning_rate": 1.9909278779732133e-05,
      "loss": 2.7402,
      "step": 3774
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.937690019607544,
      "learning_rate": 1.990922343608626e-05,
      "loss": 2.9187,
      "step": 3775
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0346126556396484,
      "learning_rate": 1.9909168075641565e-05,
      "loss": 2.7819,
      "step": 3776
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.902470588684082,
      "learning_rate": 1.990911269839814e-05,
      "loss": 2.5361,
      "step": 3777
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9266136884689331,
      "learning_rate": 1.9909057304356073e-05,
      "loss": 2.7436,
      "step": 3778
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9514965415000916,
      "learning_rate": 1.9909001893515464e-05,
      "loss": 2.8228,
      "step": 3779
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9858081340789795,
      "learning_rate": 1.9908946465876406e-05,
      "loss": 2.8663,
      "step": 3780
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9365408420562744,
      "learning_rate": 1.9908891021438994e-05,
      "loss": 2.8001,
      "step": 3781
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.8898537158966064,
      "learning_rate": 1.990883556020332e-05,
      "loss": 2.844,
      "step": 3782
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.1045376062393188,
      "learning_rate": 1.9908780082169477e-05,
      "loss": 2.9514,
      "step": 3783
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0023632049560547,
      "learning_rate": 1.990872458733756e-05,
      "loss": 2.4569,
      "step": 3784
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.1226937770843506,
      "learning_rate": 1.9908669075707666e-05,
      "loss": 2.999,
      "step": 3785
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.058822512626648,
      "learning_rate": 1.9908613547279886e-05,
      "loss": 2.8306,
      "step": 3786
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9698176383972168,
      "learning_rate": 1.9908558002054315e-05,
      "loss": 2.7345,
      "step": 3787
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9737997651100159,
      "learning_rate": 1.9908502440031044e-05,
      "loss": 2.6585,
      "step": 3788
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9728347659111023,
      "learning_rate": 1.9908446861210175e-05,
      "loss": 2.6181,
      "step": 3789
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9889333248138428,
      "learning_rate": 1.9908391265591795e-05,
      "loss": 2.8355,
      "step": 3790
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9869384169578552,
      "learning_rate": 1.9908335653176003e-05,
      "loss": 2.9287,
      "step": 3791
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0149199962615967,
      "learning_rate": 1.9908280023962886e-05,
      "loss": 2.8569,
      "step": 3792
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.020308017730713,
      "learning_rate": 1.990822437795255e-05,
      "loss": 2.8889,
      "step": 3793
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.89925217628479,
      "learning_rate": 1.990816871514508e-05,
      "loss": 2.7851,
      "step": 3794
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.024760365486145,
      "learning_rate": 1.990811303554057e-05,
      "loss": 2.7333,
      "step": 3795
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9291908144950867,
      "learning_rate": 1.990805733913912e-05,
      "loss": 2.8829,
      "step": 3796
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0923588275909424,
      "learning_rate": 1.990800162594082e-05,
      "loss": 2.6552,
      "step": 3797
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0618337392807007,
      "learning_rate": 1.9907945895945772e-05,
      "loss": 2.8473,
      "step": 3798
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0523501634597778,
      "learning_rate": 1.9907890149154058e-05,
      "loss": 2.6049,
      "step": 3799
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0491386651992798,
      "learning_rate": 1.990783438556578e-05,
      "loss": 2.8075,
      "step": 3800
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0074889659881592,
      "learning_rate": 1.9907778605181035e-05,
      "loss": 2.7319,
      "step": 3801
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9835529923439026,
      "learning_rate": 1.9907722807999913e-05,
      "loss": 2.6959,
      "step": 3802
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9451385140419006,
      "learning_rate": 1.9907666994022508e-05,
      "loss": 2.75,
      "step": 3803
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0118809938430786,
      "learning_rate": 1.990761116324892e-05,
      "loss": 2.8243,
      "step": 3804
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9260828495025635,
      "learning_rate": 1.9907555315679238e-05,
      "loss": 2.8693,
      "step": 3805
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9778842926025391,
      "learning_rate": 1.990749945131356e-05,
      "loss": 2.8773,
      "step": 3806
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.109735369682312,
      "learning_rate": 1.9907443570151978e-05,
      "loss": 2.7901,
      "step": 3807
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0112066268920898,
      "learning_rate": 1.9907387672194593e-05,
      "loss": 2.8539,
      "step": 3808
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9190028309822083,
      "learning_rate": 1.990733175744149e-05,
      "loss": 2.9224,
      "step": 3809
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9659903645515442,
      "learning_rate": 1.9907275825892767e-05,
      "loss": 2.5986,
      "step": 3810
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9810906648635864,
      "learning_rate": 1.9907219877548526e-05,
      "loss": 2.8808,
      "step": 3811
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0765764713287354,
      "learning_rate": 1.9907163912408854e-05,
      "loss": 2.7508,
      "step": 3812
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0562446117401123,
      "learning_rate": 1.9907107930473848e-05,
      "loss": 2.745,
      "step": 3813
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0194804668426514,
      "learning_rate": 1.9907051931743605e-05,
      "loss": 2.4895,
      "step": 3814
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9681399464607239,
      "learning_rate": 1.9906995916218216e-05,
      "loss": 2.8757,
      "step": 3815
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0340220928192139,
      "learning_rate": 1.990693988389778e-05,
      "loss": 2.6464,
      "step": 3816
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0140916109085083,
      "learning_rate": 1.9906883834782392e-05,
      "loss": 2.8284,
      "step": 3817
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9964566826820374,
      "learning_rate": 1.9906827768872143e-05,
      "loss": 2.7993,
      "step": 3818
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0062363147735596,
      "learning_rate": 1.990677168616713e-05,
      "loss": 2.6528,
      "step": 3819
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0368688106536865,
      "learning_rate": 1.990671558666745e-05,
      "loss": 2.8819,
      "step": 3820
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0449315309524536,
      "learning_rate": 1.9906659470373193e-05,
      "loss": 2.7635,
      "step": 3821
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0006860494613647,
      "learning_rate": 1.990660333728446e-05,
      "loss": 2.6909,
      "step": 3822
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.024728536605835,
      "learning_rate": 1.9906547187401345e-05,
      "loss": 2.8977,
      "step": 3823
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9799277186393738,
      "learning_rate": 1.990649102072394e-05,
      "loss": 2.7818,
      "step": 3824
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.1653655767440796,
      "learning_rate": 1.9906434837252342e-05,
      "loss": 2.7295,
      "step": 3825
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.1531630754470825,
      "learning_rate": 1.9906378636986648e-05,
      "loss": 2.7509,
      "step": 3826
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0634093284606934,
      "learning_rate": 1.990632241992695e-05,
      "loss": 2.806,
      "step": 3827
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0852940082550049,
      "learning_rate": 1.9906266186073345e-05,
      "loss": 2.8418,
      "step": 3828
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.1261012554168701,
      "learning_rate": 1.990620993542593e-05,
      "loss": 2.7695,
      "step": 3829
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0159754753112793,
      "learning_rate": 1.99061536679848e-05,
      "loss": 2.7433,
      "step": 3830
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.1329349279403687,
      "learning_rate": 1.9906097383750046e-05,
      "loss": 2.5844,
      "step": 3831
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.92918461561203,
      "learning_rate": 1.9906041082721764e-05,
      "loss": 2.9047,
      "step": 3832
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9570224285125732,
      "learning_rate": 1.9905984764900057e-05,
      "loss": 2.7011,
      "step": 3833
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0190469026565552,
      "learning_rate": 1.9905928430285014e-05,
      "loss": 2.6949,
      "step": 3834
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9699079394340515,
      "learning_rate": 1.990587207887673e-05,
      "loss": 2.8848,
      "step": 3835
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0239524841308594,
      "learning_rate": 1.9905815710675306e-05,
      "loss": 2.5018,
      "step": 3836
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0841683149337769,
      "learning_rate": 1.990575932568083e-05,
      "loss": 2.7632,
      "step": 3837
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.1606625318527222,
      "learning_rate": 1.9905702923893405e-05,
      "loss": 2.7414,
      "step": 3838
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9679367542266846,
      "learning_rate": 1.990564650531312e-05,
      "loss": 2.5862,
      "step": 3839
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9877391457557678,
      "learning_rate": 1.9905590069940074e-05,
      "loss": 2.6941,
      "step": 3840
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0476279258728027,
      "learning_rate": 1.9905533617774364e-05,
      "loss": 2.6436,
      "step": 3841
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0356868505477905,
      "learning_rate": 1.9905477148816087e-05,
      "loss": 2.8037,
      "step": 3842
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0158770084381104,
      "learning_rate": 1.9905420663065335e-05,
      "loss": 2.8163,
      "step": 3843
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0331887006759644,
      "learning_rate": 1.99053641605222e-05,
      "loss": 2.7415,
      "step": 3844
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0777466297149658,
      "learning_rate": 1.9905307641186783e-05,
      "loss": 2.7563,
      "step": 3845
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9906136393547058,
      "learning_rate": 1.9905251105059184e-05,
      "loss": 2.7745,
      "step": 3846
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0159868001937866,
      "learning_rate": 1.990519455213949e-05,
      "loss": 2.8566,
      "step": 3847
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9339935183525085,
      "learning_rate": 1.9905137982427804e-05,
      "loss": 2.5828,
      "step": 3848
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9119172692298889,
      "learning_rate": 1.990508139592422e-05,
      "loss": 2.8178,
      "step": 3849
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0405174493789673,
      "learning_rate": 1.990502479262883e-05,
      "loss": 2.7557,
      "step": 3850
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0702290534973145,
      "learning_rate": 1.9904968172541732e-05,
      "loss": 2.6477,
      "step": 3851
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9850768446922302,
      "learning_rate": 1.9904911535663026e-05,
      "loss": 2.7918,
      "step": 3852
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0308018922805786,
      "learning_rate": 1.99048548819928e-05,
      "loss": 2.5491,
      "step": 3853
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0380152463912964,
      "learning_rate": 1.990479821153116e-05,
      "loss": 2.6743,
      "step": 3854
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9297832250595093,
      "learning_rate": 1.9904741524278193e-05,
      "loss": 3.0073,
      "step": 3855
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.058379054069519,
      "learning_rate": 1.9904684820234003e-05,
      "loss": 2.8808,
      "step": 3856
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9972335696220398,
      "learning_rate": 1.990462809939868e-05,
      "loss": 2.7499,
      "step": 3857
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.045897126197815,
      "learning_rate": 1.9904571361772322e-05,
      "loss": 2.6911,
      "step": 3858
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.4239412546157837,
      "learning_rate": 1.9904514607355024e-05,
      "loss": 2.7776,
      "step": 3859
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9985060691833496,
      "learning_rate": 1.9904457836146888e-05,
      "loss": 2.6838,
      "step": 3860
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9774871468544006,
      "learning_rate": 1.9904401048148e-05,
      "loss": 2.7684,
      "step": 3861
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.979131817817688,
      "learning_rate": 1.9904344243358467e-05,
      "loss": 2.7925,
      "step": 3862
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.1027860641479492,
      "learning_rate": 1.990428742177838e-05,
      "loss": 2.8361,
      "step": 3863
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.367606282234192,
      "learning_rate": 1.9904230583407834e-05,
      "loss": 2.8009,
      "step": 3864
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0293421745300293,
      "learning_rate": 1.9904173728246928e-05,
      "loss": 2.8696,
      "step": 3865
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9737765789031982,
      "learning_rate": 1.990411685629576e-05,
      "loss": 2.6521,
      "step": 3866
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9724211096763611,
      "learning_rate": 1.990405996755442e-05,
      "loss": 2.8492,
      "step": 3867
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0115001201629639,
      "learning_rate": 1.990400306202301e-05,
      "loss": 2.6856,
      "step": 3868
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0377036333084106,
      "learning_rate": 1.9903946139701625e-05,
      "loss": 2.836,
      "step": 3869
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.006404995918274,
      "learning_rate": 1.990388920059036e-05,
      "loss": 2.6698,
      "step": 3870
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0172117948532104,
      "learning_rate": 1.9903832244689318e-05,
      "loss": 2.6707,
      "step": 3871
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0517358779907227,
      "learning_rate": 1.9903775271998585e-05,
      "loss": 2.7538,
      "step": 3872
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9376752972602844,
      "learning_rate": 1.9903718282518264e-05,
      "loss": 2.8561,
      "step": 3873
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.1162110567092896,
      "learning_rate": 1.9903661276248453e-05,
      "loss": 3.0198,
      "step": 3874
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.120145559310913,
      "learning_rate": 1.9903604253189245e-05,
      "loss": 2.8265,
      "step": 3875
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9600945115089417,
      "learning_rate": 1.990354721334074e-05,
      "loss": 2.6733,
      "step": 3876
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.96246737241745,
      "learning_rate": 1.990349015670303e-05,
      "loss": 3.0002,
      "step": 3877
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.071990966796875,
      "learning_rate": 1.9903433083276217e-05,
      "loss": 2.7648,
      "step": 3878
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9327068328857422,
      "learning_rate": 1.990337599306039e-05,
      "loss": 2.9799,
      "step": 3879
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9655539393424988,
      "learning_rate": 1.9903318886055656e-05,
      "loss": 2.6915,
      "step": 3880
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9902999401092529,
      "learning_rate": 1.9903261762262103e-05,
      "loss": 2.8708,
      "step": 3881
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9132457971572876,
      "learning_rate": 1.9903204621679835e-05,
      "loss": 2.7416,
      "step": 3882
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.045855164527893,
      "learning_rate": 1.9903147464308945e-05,
      "loss": 2.791,
      "step": 3883
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0383899211883545,
      "learning_rate": 1.990309029014953e-05,
      "loss": 2.6747,
      "step": 3884
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.018536925315857,
      "learning_rate": 1.9903033099201686e-05,
      "loss": 2.9014,
      "step": 3885
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.021215796470642,
      "learning_rate": 1.990297589146551e-05,
      "loss": 2.8024,
      "step": 3886
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.2890106439590454,
      "learning_rate": 1.9902918666941103e-05,
      "loss": 2.9583,
      "step": 3887
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0186007022857666,
      "learning_rate": 1.9902861425628563e-05,
      "loss": 2.7123,
      "step": 3888
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0182608366012573,
      "learning_rate": 1.9902804167527975e-05,
      "loss": 2.9977,
      "step": 3889
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0581663846969604,
      "learning_rate": 1.9902746892639452e-05,
      "loss": 2.7879,
      "step": 3890
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0083595514297485,
      "learning_rate": 1.9902689600963078e-05,
      "loss": 2.8899,
      "step": 3891
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0087647438049316,
      "learning_rate": 1.9902632292498958e-05,
      "loss": 2.801,
      "step": 3892
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0018315315246582,
      "learning_rate": 1.9902574967247187e-05,
      "loss": 2.7233,
      "step": 3893
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0900895595550537,
      "learning_rate": 1.990251762520786e-05,
      "loss": 2.592,
      "step": 3894
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.016730785369873,
      "learning_rate": 1.990246026638108e-05,
      "loss": 2.6255,
      "step": 3895
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.1133780479431152,
      "learning_rate": 1.9902402890766935e-05,
      "loss": 2.9325,
      "step": 3896
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0170210599899292,
      "learning_rate": 1.9902345498365533e-05,
      "loss": 2.9362,
      "step": 3897
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9890247583389282,
      "learning_rate": 1.990228808917696e-05,
      "loss": 2.8179,
      "step": 3898
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0050342082977295,
      "learning_rate": 1.9902230663201325e-05,
      "loss": 2.7449,
      "step": 3899
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.191390037536621,
      "learning_rate": 1.9902173220438715e-05,
      "loss": 2.6086,
      "step": 3900
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0381569862365723,
      "learning_rate": 1.9902115760889237e-05,
      "loss": 2.8311,
      "step": 3901
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9652128219604492,
      "learning_rate": 1.990205828455298e-05,
      "loss": 2.9324,
      "step": 3902
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0358625650405884,
      "learning_rate": 1.9902000791430046e-05,
      "loss": 2.7263,
      "step": 3903
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.051684021949768,
      "learning_rate": 1.9901943281520532e-05,
      "loss": 2.8187,
      "step": 3904
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9865396618843079,
      "learning_rate": 1.9901885754824533e-05,
      "loss": 2.5745,
      "step": 3905
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0369889736175537,
      "learning_rate": 1.9901828211342147e-05,
      "loss": 2.6493,
      "step": 3906
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9842143654823303,
      "learning_rate": 1.9901770651073475e-05,
      "loss": 2.679,
      "step": 3907
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9487974047660828,
      "learning_rate": 1.9901713074018613e-05,
      "loss": 2.4384,
      "step": 3908
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9338073134422302,
      "learning_rate": 1.9901655480177658e-05,
      "loss": 2.6287,
      "step": 3909
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9841486811637878,
      "learning_rate": 1.9901597869550706e-05,
      "loss": 2.7063,
      "step": 3910
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0822443962097168,
      "learning_rate": 1.9901540242137858e-05,
      "loss": 2.613,
      "step": 3911
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0080657005310059,
      "learning_rate": 1.990148259793921e-05,
      "loss": 2.7095,
      "step": 3912
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9729963541030884,
      "learning_rate": 1.9901424936954857e-05,
      "loss": 3.0568,
      "step": 3913
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9441010355949402,
      "learning_rate": 1.9901367259184904e-05,
      "loss": 2.7746,
      "step": 3914
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.1432956457138062,
      "learning_rate": 1.9901309564629442e-05,
      "loss": 2.8044,
      "step": 3915
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.017975926399231,
      "learning_rate": 1.990125185328857e-05,
      "loss": 2.7754,
      "step": 3916
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0791535377502441,
      "learning_rate": 1.9901194125162388e-05,
      "loss": 2.6422,
      "step": 3917
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.919629693031311,
      "learning_rate": 1.9901136380250993e-05,
      "loss": 2.902,
      "step": 3918
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9333623647689819,
      "learning_rate": 1.9901078618554485e-05,
      "loss": 2.9608,
      "step": 3919
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9175515174865723,
      "learning_rate": 1.9901020840072955e-05,
      "loss": 2.7187,
      "step": 3920
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9744604229927063,
      "learning_rate": 1.9900963044806508e-05,
      "loss": 2.7064,
      "step": 3921
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.978965699672699,
      "learning_rate": 1.990090523275524e-05,
      "loss": 2.639,
      "step": 3922
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.102966547012329,
      "learning_rate": 1.9900847403919248e-05,
      "loss": 2.7159,
      "step": 3923
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0064924955368042,
      "learning_rate": 1.990078955829863e-05,
      "loss": 2.9896,
      "step": 3924
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9456913471221924,
      "learning_rate": 1.9900731695893486e-05,
      "loss": 2.5485,
      "step": 3925
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0471817255020142,
      "learning_rate": 1.9900673816703913e-05,
      "loss": 2.8151,
      "step": 3926
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0201290845870972,
      "learning_rate": 1.990061592073001e-05,
      "loss": 2.7971,
      "step": 3927
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.021373987197876,
      "learning_rate": 1.990055800797187e-05,
      "loss": 2.5602,
      "step": 3928
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9934194087982178,
      "learning_rate": 1.99005000784296e-05,
      "loss": 2.5962,
      "step": 3929
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.963663637638092,
      "learning_rate": 1.9900442132103287e-05,
      "loss": 2.9247,
      "step": 3930
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.192429780960083,
      "learning_rate": 1.990038416899304e-05,
      "loss": 2.7807,
      "step": 3931
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9224985837936401,
      "learning_rate": 1.9900326189098955e-05,
      "loss": 2.58,
      "step": 3932
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0592695474624634,
      "learning_rate": 1.9900268192421124e-05,
      "loss": 2.8858,
      "step": 3933
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0663297176361084,
      "learning_rate": 1.990021017895965e-05,
      "loss": 2.7981,
      "step": 3934
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9508551955223083,
      "learning_rate": 1.9900152148714633e-05,
      "loss": 2.5141,
      "step": 3935
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0597963333129883,
      "learning_rate": 1.990009410168617e-05,
      "loss": 2.9548,
      "step": 3936
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.05991792678833,
      "learning_rate": 1.9900036037874354e-05,
      "loss": 2.5977,
      "step": 3937
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0286717414855957,
      "learning_rate": 1.9899977957279293e-05,
      "loss": 2.8612,
      "step": 3938
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.996403694152832,
      "learning_rate": 1.989991985990108e-05,
      "loss": 2.6623,
      "step": 3939
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9450878500938416,
      "learning_rate": 1.9899861745739813e-05,
      "loss": 2.5312,
      "step": 3940
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9707019925117493,
      "learning_rate": 1.989980361479559e-05,
      "loss": 2.7846,
      "step": 3941
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.2652366161346436,
      "learning_rate": 1.989974546706851e-05,
      "loss": 2.8963,
      "step": 3942
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.8809316158294678,
      "learning_rate": 1.9899687302558678e-05,
      "loss": 2.7854,
      "step": 3943
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9410699605941772,
      "learning_rate": 1.9899629121266184e-05,
      "loss": 2.7034,
      "step": 3944
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0172947645187378,
      "learning_rate": 1.989957092319113e-05,
      "loss": 2.7478,
      "step": 3945
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9864473342895508,
      "learning_rate": 1.9899512708333614e-05,
      "loss": 2.7338,
      "step": 3946
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.1047086715698242,
      "learning_rate": 1.989945447669374e-05,
      "loss": 2.8187,
      "step": 3947
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9827825427055359,
      "learning_rate": 1.9899396228271595e-05,
      "loss": 2.7738,
      "step": 3948
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9962844848632812,
      "learning_rate": 1.9899337963067288e-05,
      "loss": 2.7548,
      "step": 3949
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9789645671844482,
      "learning_rate": 1.9899279681080914e-05,
      "loss": 2.8372,
      "step": 3950
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9559396505355835,
      "learning_rate": 1.989922138231257e-05,
      "loss": 2.7898,
      "step": 3951
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0881078243255615,
      "learning_rate": 1.9899163066762364e-05,
      "loss": 2.826,
      "step": 3952
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.065342664718628,
      "learning_rate": 1.9899104734430384e-05,
      "loss": 2.6681,
      "step": 3953
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0594433546066284,
      "learning_rate": 1.989904638531673e-05,
      "loss": 2.8431,
      "step": 3954
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.040234088897705,
      "learning_rate": 1.989898801942151e-05,
      "loss": 2.7843,
      "step": 3955
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.987148642539978,
      "learning_rate": 1.9898929636744814e-05,
      "loss": 2.9626,
      "step": 3956
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.2618333101272583,
      "learning_rate": 1.9898871237286743e-05,
      "loss": 2.8721,
      "step": 3957
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9856946468353271,
      "learning_rate": 1.98988128210474e-05,
      "loss": 2.861,
      "step": 3958
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.6163996458053589,
      "learning_rate": 1.9898754388026875e-05,
      "loss": 2.9113,
      "step": 3959
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0677564144134521,
      "learning_rate": 1.9898695938225273e-05,
      "loss": 2.8186,
      "step": 3960
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0345100164413452,
      "learning_rate": 1.9898637471642698e-05,
      "loss": 2.6022,
      "step": 3961
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9581899642944336,
      "learning_rate": 1.989857898827924e-05,
      "loss": 2.7082,
      "step": 3962
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0216376781463623,
      "learning_rate": 1.9898520488135005e-05,
      "loss": 2.6178,
      "step": 3963
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0225557088851929,
      "learning_rate": 1.989846197121009e-05,
      "loss": 2.6742,
      "step": 3964
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9712263941764832,
      "learning_rate": 1.989840343750459e-05,
      "loss": 2.9827,
      "step": 3965
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0070945024490356,
      "learning_rate": 1.989834488701861e-05,
      "loss": 2.9218,
      "step": 3966
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0312302112579346,
      "learning_rate": 1.9898286319752248e-05,
      "loss": 2.7578,
      "step": 3967
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0392612218856812,
      "learning_rate": 1.98982277357056e-05,
      "loss": 2.8127,
      "step": 3968
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9382216334342957,
      "learning_rate": 1.989816913487877e-05,
      "loss": 2.9777,
      "step": 3969
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.2524203062057495,
      "learning_rate": 1.9898110517271853e-05,
      "loss": 2.7911,
      "step": 3970
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9218539595603943,
      "learning_rate": 1.9898051882884953e-05,
      "loss": 2.7389,
      "step": 3971
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0592881441116333,
      "learning_rate": 1.989799323171816e-05,
      "loss": 2.5728,
      "step": 3972
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0405406951904297,
      "learning_rate": 1.989793456377159e-05,
      "loss": 2.9033,
      "step": 3973
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9265009164810181,
      "learning_rate": 1.9897875879045324e-05,
      "loss": 2.6563,
      "step": 3974
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9768993854522705,
      "learning_rate": 1.9897817177539476e-05,
      "loss": 2.9412,
      "step": 3975
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0247348546981812,
      "learning_rate": 1.989775845925414e-05,
      "loss": 2.8011,
      "step": 3976
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9625992178916931,
      "learning_rate": 1.989769972418941e-05,
      "loss": 2.7469,
      "step": 3977
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9538614153862,
      "learning_rate": 1.9897640972345394e-05,
      "loss": 2.7582,
      "step": 3978
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0388472080230713,
      "learning_rate": 1.989758220372219e-05,
      "loss": 3.0874,
      "step": 3979
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.018939733505249,
      "learning_rate": 1.9897523418319893e-05,
      "loss": 2.7625,
      "step": 3980
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9416196942329407,
      "learning_rate": 1.9897464616138605e-05,
      "loss": 2.8017,
      "step": 3981
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0223575830459595,
      "learning_rate": 1.989740579717843e-05,
      "loss": 2.7657,
      "step": 3982
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0194299221038818,
      "learning_rate": 1.989734696143946e-05,
      "loss": 2.6823,
      "step": 3983
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.028551697731018,
      "learning_rate": 1.98972881089218e-05,
      "loss": 2.8382,
      "step": 3984
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0603220462799072,
      "learning_rate": 1.9897229239625552e-05,
      "loss": 2.632,
      "step": 3985
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.954532265663147,
      "learning_rate": 1.989717035355081e-05,
      "loss": 2.6842,
      "step": 3986
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.1777894496917725,
      "learning_rate": 1.9897111450697676e-05,
      "loss": 2.5902,
      "step": 3987
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0799564123153687,
      "learning_rate": 1.989705253106625e-05,
      "loss": 3.1104,
      "step": 3988
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.8850938677787781,
      "learning_rate": 1.989699359465663e-05,
      "loss": 2.6317,
      "step": 3989
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9791681170463562,
      "learning_rate": 1.9896934641468922e-05,
      "loss": 2.7211,
      "step": 3990
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9170101881027222,
      "learning_rate": 1.989687567150322e-05,
      "loss": 2.8299,
      "step": 3991
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.2762835025787354,
      "learning_rate": 1.9896816684759625e-05,
      "loss": 2.7173,
      "step": 3992
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.061563491821289,
      "learning_rate": 1.989675768123824e-05,
      "loss": 2.7998,
      "step": 3993
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.885760486125946,
      "learning_rate": 1.989669866093916e-05,
      "loss": 2.6697,
      "step": 3994
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.95408034324646,
      "learning_rate": 1.989663962386249e-05,
      "loss": 2.6714,
      "step": 3995
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9471772313117981,
      "learning_rate": 1.9896580570008325e-05,
      "loss": 2.7645,
      "step": 3996
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9997661113739014,
      "learning_rate": 1.989652149937677e-05,
      "loss": 2.8017,
      "step": 3997
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9711678624153137,
      "learning_rate": 1.9896462411967926e-05,
      "loss": 2.8013,
      "step": 3998
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.070753574371338,
      "learning_rate": 1.9896403307781886e-05,
      "loss": 2.6437,
      "step": 3999
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.1879159212112427,
      "learning_rate": 1.9896344186818756e-05,
      "loss": 2.7366,
      "step": 4000
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.1064448356628418,
      "learning_rate": 1.9896285049078638e-05,
      "loss": 2.7797,
      "step": 4001
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9781420230865479,
      "learning_rate": 1.9896225894561626e-05,
      "loss": 3.0346,
      "step": 4002
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0292847156524658,
      "learning_rate": 1.989616672326782e-05,
      "loss": 2.9675,
      "step": 4003
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9878498315811157,
      "learning_rate": 1.9896107535197327e-05,
      "loss": 2.8435,
      "step": 4004
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0049865245819092,
      "learning_rate": 1.9896048330350248e-05,
      "loss": 2.6917,
      "step": 4005
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.110519289970398,
      "learning_rate": 1.989598910872667e-05,
      "loss": 2.6676,
      "step": 4006
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0839744806289673,
      "learning_rate": 1.9895929870326712e-05,
      "loss": 3.0906,
      "step": 4007
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0378456115722656,
      "learning_rate": 1.989587061515046e-05,
      "loss": 2.8783,
      "step": 4008
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0157438516616821,
      "learning_rate": 1.989581134319802e-05,
      "loss": 2.7004,
      "step": 4009
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9861663579940796,
      "learning_rate": 1.9895752054469493e-05,
      "loss": 2.9468,
      "step": 4010
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9554362297058105,
      "learning_rate": 1.9895692748964982e-05,
      "loss": 2.6876,
      "step": 4011
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.8992958068847656,
      "learning_rate": 1.9895633426684577e-05,
      "loss": 2.7609,
      "step": 4012
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9053210616111755,
      "learning_rate": 1.989557408762839e-05,
      "loss": 2.6439,
      "step": 4013
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.1563090085983276,
      "learning_rate": 1.9895514731796518e-05,
      "loss": 2.5632,
      "step": 4014
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0303343534469604,
      "learning_rate": 1.989545535918906e-05,
      "loss": 2.7835,
      "step": 4015
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9142822623252869,
      "learning_rate": 1.9895395969806116e-05,
      "loss": 2.9187,
      "step": 4016
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.1590088605880737,
      "learning_rate": 1.9895336563647793e-05,
      "loss": 2.9851,
      "step": 4017
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0928678512573242,
      "learning_rate": 1.989527714071418e-05,
      "loss": 2.6381,
      "step": 4018
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0353909730911255,
      "learning_rate": 1.989521770100539e-05,
      "loss": 2.6716,
      "step": 4019
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0002294778823853,
      "learning_rate": 1.9895158244521516e-05,
      "loss": 2.6271,
      "step": 4020
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.995900571346283,
      "learning_rate": 1.9895098771262665e-05,
      "loss": 2.777,
      "step": 4021
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9732197523117065,
      "learning_rate": 1.989503928122893e-05,
      "loss": 2.8149,
      "step": 4022
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.3981539011001587,
      "learning_rate": 1.989497977442042e-05,
      "loss": 2.7627,
      "step": 4023
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0811128616333008,
      "learning_rate": 1.9894920250837226e-05,
      "loss": 2.9076,
      "step": 4024
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9589554071426392,
      "learning_rate": 1.9894860710479458e-05,
      "loss": 2.7593,
      "step": 4025
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0552181005477905,
      "learning_rate": 1.9894801153347215e-05,
      "loss": 2.5821,
      "step": 4026
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9465223550796509,
      "learning_rate": 1.9894741579440592e-05,
      "loss": 2.5029,
      "step": 4027
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.035011887550354,
      "learning_rate": 1.98946819887597e-05,
      "loss": 2.7951,
      "step": 4028
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0187383890151978,
      "learning_rate": 1.9894622381304632e-05,
      "loss": 2.6117,
      "step": 4029
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9850383996963501,
      "learning_rate": 1.989456275707549e-05,
      "loss": 2.7558,
      "step": 4030
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.1224489212036133,
      "learning_rate": 1.989450311607238e-05,
      "loss": 2.7021,
      "step": 4031
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.4059269428253174,
      "learning_rate": 1.98944434582954e-05,
      "loss": 2.7816,
      "step": 4032
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0964372158050537,
      "learning_rate": 1.9894383783744652e-05,
      "loss": 2.8645,
      "step": 4033
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0012202262878418,
      "learning_rate": 1.989432409242023e-05,
      "loss": 2.7091,
      "step": 4034
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0089240074157715,
      "learning_rate": 1.989426438432225e-05,
      "loss": 2.8306,
      "step": 4035
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9747406840324402,
      "learning_rate": 1.9894204659450797e-05,
      "loss": 2.863,
      "step": 4036
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.2100553512573242,
      "learning_rate": 1.9894144917805986e-05,
      "loss": 2.848,
      "step": 4037
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0530805587768555,
      "learning_rate": 1.9894085159387906e-05,
      "loss": 2.4945,
      "step": 4038
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0493272542953491,
      "learning_rate": 1.989402538419667e-05,
      "loss": 2.802,
      "step": 4039
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.118281364440918,
      "learning_rate": 1.989396559223237e-05,
      "loss": 2.7461,
      "step": 4040
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0382258892059326,
      "learning_rate": 1.989390578349511e-05,
      "loss": 2.5498,
      "step": 4041
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.013552188873291,
      "learning_rate": 1.9893845957985e-05,
      "loss": 2.7835,
      "step": 4042
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.1974294185638428,
      "learning_rate": 1.9893786115702127e-05,
      "loss": 2.7922,
      "step": 4043
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0964332818984985,
      "learning_rate": 1.9893726256646602e-05,
      "loss": 2.8364,
      "step": 4044
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0156610012054443,
      "learning_rate": 1.9893666380818525e-05,
      "loss": 2.9351,
      "step": 4045
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0364434719085693,
      "learning_rate": 1.9893606488217995e-05,
      "loss": 2.683,
      "step": 4046
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.8735827803611755,
      "learning_rate": 1.9893546578845112e-05,
      "loss": 2.5627,
      "step": 4047
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9731646180152893,
      "learning_rate": 1.9893486652699984e-05,
      "loss": 2.8538,
      "step": 4048
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0520422458648682,
      "learning_rate": 1.989342670978271e-05,
      "loss": 2.8191,
      "step": 4049
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0355451107025146,
      "learning_rate": 1.989336675009339e-05,
      "loss": 2.5903,
      "step": 4050
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9598727822303772,
      "learning_rate": 1.9893306773632123e-05,
      "loss": 2.743,
      "step": 4051
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0092215538024902,
      "learning_rate": 1.9893246780399017e-05,
      "loss": 2.5622,
      "step": 4052
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9739224314689636,
      "learning_rate": 1.989318677039417e-05,
      "loss": 2.7401,
      "step": 4053
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.970668613910675,
      "learning_rate": 1.9893126743617684e-05,
      "loss": 2.7324,
      "step": 4054
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9865447282791138,
      "learning_rate": 1.9893066700069663e-05,
      "loss": 2.4485,
      "step": 4055
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9647001624107361,
      "learning_rate": 1.9893006639750206e-05,
      "loss": 2.6313,
      "step": 4056
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.103346586227417,
      "learning_rate": 1.9892946562659416e-05,
      "loss": 2.6272,
      "step": 4057
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9219588041305542,
      "learning_rate": 1.9892886468797394e-05,
      "loss": 2.6148,
      "step": 4058
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9908900856971741,
      "learning_rate": 1.9892826358164244e-05,
      "loss": 2.6626,
      "step": 4059
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9844847917556763,
      "learning_rate": 1.9892766230760063e-05,
      "loss": 2.6909,
      "step": 4060
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0670825242996216,
      "learning_rate": 1.989270608658496e-05,
      "loss": 2.6751,
      "step": 4061
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.944842517375946,
      "learning_rate": 1.9892645925639034e-05,
      "loss": 2.5817,
      "step": 4062
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9940842390060425,
      "learning_rate": 1.9892585747922383e-05,
      "loss": 2.8125,
      "step": 4063
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0779539346694946,
      "learning_rate": 1.9892525553435114e-05,
      "loss": 2.7484,
      "step": 4064
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9912818074226379,
      "learning_rate": 1.9892465342177327e-05,
      "loss": 2.5698,
      "step": 4065
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.991231381893158,
      "learning_rate": 1.9892405114149124e-05,
      "loss": 2.6234,
      "step": 4066
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0897523164749146,
      "learning_rate": 1.989234486935061e-05,
      "loss": 2.9665,
      "step": 4067
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9461666941642761,
      "learning_rate": 1.9892284607781883e-05,
      "loss": 2.7515,
      "step": 4068
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9949373006820679,
      "learning_rate": 1.9892224329443048e-05,
      "loss": 2.7562,
      "step": 4069
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0425388813018799,
      "learning_rate": 1.9892164034334203e-05,
      "loss": 2.82,
      "step": 4070
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0148028135299683,
      "learning_rate": 1.9892103722455455e-05,
      "loss": 2.7675,
      "step": 4071
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9706527590751648,
      "learning_rate": 1.9892043393806908e-05,
      "loss": 2.8107,
      "step": 4072
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9644550681114197,
      "learning_rate": 1.9891983048388657e-05,
      "loss": 2.7933,
      "step": 4073
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0401431322097778,
      "learning_rate": 1.989192268620081e-05,
      "loss": 2.8176,
      "step": 4074
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0086044073104858,
      "learning_rate": 1.9891862307243466e-05,
      "loss": 2.906,
      "step": 4075
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9690864682197571,
      "learning_rate": 1.989180191151673e-05,
      "loss": 2.727,
      "step": 4076
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0740838050842285,
      "learning_rate": 1.98917414990207e-05,
      "loss": 2.8184,
      "step": 4077
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.966557502746582,
      "learning_rate": 1.9891681069755483e-05,
      "loss": 2.9412,
      "step": 4078
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0258461236953735,
      "learning_rate": 1.989162062372118e-05,
      "loss": 2.8477,
      "step": 4079
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9915544390678406,
      "learning_rate": 1.9891560160917898e-05,
      "loss": 2.6757,
      "step": 4080
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9611409306526184,
      "learning_rate": 1.989149968134573e-05,
      "loss": 2.7801,
      "step": 4081
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.017892837524414,
      "learning_rate": 1.9891439185004783e-05,
      "loss": 2.6769,
      "step": 4082
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9699594974517822,
      "learning_rate": 1.9891378671895164e-05,
      "loss": 2.6755,
      "step": 4083
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9694744944572449,
      "learning_rate": 1.989131814201697e-05,
      "loss": 2.802,
      "step": 4084
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0057847499847412,
      "learning_rate": 1.9891257595370307e-05,
      "loss": 2.8419,
      "step": 4085
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0174732208251953,
      "learning_rate": 1.9891197031955275e-05,
      "loss": 2.6585,
      "step": 4086
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9852196574211121,
      "learning_rate": 1.9891136451771973e-05,
      "loss": 3.0179,
      "step": 4087
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.060389518737793,
      "learning_rate": 1.9891075854820512e-05,
      "loss": 2.9981,
      "step": 4088
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9994573593139648,
      "learning_rate": 1.9891015241100994e-05,
      "loss": 2.5101,
      "step": 4089
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.086625576019287,
      "learning_rate": 1.9890954610613513e-05,
      "loss": 2.8462,
      "step": 4090
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9835753440856934,
      "learning_rate": 1.989089396335818e-05,
      "loss": 2.9054,
      "step": 4091
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0299559831619263,
      "learning_rate": 1.9890833299335097e-05,
      "loss": 2.9231,
      "step": 4092
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.011235237121582,
      "learning_rate": 1.9890772618544366e-05,
      "loss": 2.7571,
      "step": 4093
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0295321941375732,
      "learning_rate": 1.9890711920986085e-05,
      "loss": 2.9006,
      "step": 4094
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.02814519405365,
      "learning_rate": 1.9890651206660364e-05,
      "loss": 2.8441,
      "step": 4095
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9421201348304749,
      "learning_rate": 1.98905904755673e-05,
      "loss": 2.673,
      "step": 4096
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0947633981704712,
      "learning_rate": 1.9890529727707003e-05,
      "loss": 2.8765,
      "step": 4097
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9943509101867676,
      "learning_rate": 1.989046896307957e-05,
      "loss": 2.9378,
      "step": 4098
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9787501692771912,
      "learning_rate": 1.9890408181685105e-05,
      "loss": 2.88,
      "step": 4099
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0128896236419678,
      "learning_rate": 1.9890347383523715e-05,
      "loss": 2.6186,
      "step": 4100
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.1529245376586914,
      "learning_rate": 1.9890286568595498e-05,
      "loss": 2.5518,
      "step": 4101
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0645915269851685,
      "learning_rate": 1.9890225736900558e-05,
      "loss": 2.7675,
      "step": 4102
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0782324075698853,
      "learning_rate": 1.9890164888438997e-05,
      "loss": 2.7932,
      "step": 4103
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0698702335357666,
      "learning_rate": 1.9890104023210924e-05,
      "loss": 2.9969,
      "step": 4104
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0361722707748413,
      "learning_rate": 1.989004314121644e-05,
      "loss": 2.8562,
      "step": 4105
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9604830741882324,
      "learning_rate": 1.9889982242455644e-05,
      "loss": 2.6355,
      "step": 4106
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0158554315567017,
      "learning_rate": 1.9889921326928644e-05,
      "loss": 2.6547,
      "step": 4107
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0541189908981323,
      "learning_rate": 1.988986039463554e-05,
      "loss": 2.9056,
      "step": 4108
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0546952486038208,
      "learning_rate": 1.9889799445576435e-05,
      "loss": 2.4971,
      "step": 4109
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.1494922637939453,
      "learning_rate": 1.9889738479751436e-05,
      "loss": 2.5585,
      "step": 4110
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9971898794174194,
      "learning_rate": 1.9889677497160643e-05,
      "loss": 2.6582,
      "step": 4111
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.024502158164978,
      "learning_rate": 1.9889616497804164e-05,
      "loss": 2.6649,
      "step": 4112
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.005017638206482,
      "learning_rate": 1.9889555481682094e-05,
      "loss": 2.6225,
      "step": 4113
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0169267654418945,
      "learning_rate": 1.9889494448794546e-05,
      "loss": 2.8676,
      "step": 4114
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9857391715049744,
      "learning_rate": 1.9889433399141617e-05,
      "loss": 2.8512,
      "step": 4115
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.1449248790740967,
      "learning_rate": 1.988937233272341e-05,
      "loss": 2.6185,
      "step": 4116
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.050583839416504,
      "learning_rate": 1.9889311249540036e-05,
      "loss": 2.7016,
      "step": 4117
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9766750931739807,
      "learning_rate": 1.988925014959159e-05,
      "loss": 2.7934,
      "step": 4118
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9375593066215515,
      "learning_rate": 1.988918903287818e-05,
      "loss": 2.8525,
      "step": 4119
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9917236566543579,
      "learning_rate": 1.988912789939991e-05,
      "loss": 2.8215,
      "step": 4120
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0383070707321167,
      "learning_rate": 1.988906674915688e-05,
      "loss": 2.6035,
      "step": 4121
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9586567878723145,
      "learning_rate": 1.98890055821492e-05,
      "loss": 2.5443,
      "step": 4122
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0683133602142334,
      "learning_rate": 1.9888944398376965e-05,
      "loss": 3.0585,
      "step": 4123
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9491111636161804,
      "learning_rate": 1.9888883197840285e-05,
      "loss": 2.8965,
      "step": 4124
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9682978391647339,
      "learning_rate": 1.988882198053926e-05,
      "loss": 2.7737,
      "step": 4125
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.1175956726074219,
      "learning_rate": 1.9888760746474e-05,
      "loss": 2.7142,
      "step": 4126
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9505043029785156,
      "learning_rate": 1.9888699495644604e-05,
      "loss": 2.726,
      "step": 4127
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.1047651767730713,
      "learning_rate": 1.9888638228051175e-05,
      "loss": 2.7902,
      "step": 4128
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0761253833770752,
      "learning_rate": 1.9888576943693822e-05,
      "loss": 2.7955,
      "step": 4129
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0752983093261719,
      "learning_rate": 1.988851564257264e-05,
      "loss": 2.671,
      "step": 4130
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0116921663284302,
      "learning_rate": 1.988845432468774e-05,
      "loss": 2.9295,
      "step": 4131
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9647688865661621,
      "learning_rate": 1.9888392990039226e-05,
      "loss": 2.6681,
      "step": 4132
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.1032660007476807,
      "learning_rate": 1.98883316386272e-05,
      "loss": 2.5964,
      "step": 4133
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9078021049499512,
      "learning_rate": 1.9888270270451767e-05,
      "loss": 2.7214,
      "step": 4134
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.083648681640625,
      "learning_rate": 1.988820888551303e-05,
      "loss": 2.8885,
      "step": 4135
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9843699336051941,
      "learning_rate": 1.9888147483811093e-05,
      "loss": 2.6676,
      "step": 4136
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.99997478723526,
      "learning_rate": 1.988808606534606e-05,
      "loss": 2.9305,
      "step": 4137
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.05830979347229,
      "learning_rate": 1.9888024630118034e-05,
      "loss": 3.0224,
      "step": 4138
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9967166781425476,
      "learning_rate": 1.988796317812712e-05,
      "loss": 2.8327,
      "step": 4139
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0335302352905273,
      "learning_rate": 1.9887901709373426e-05,
      "loss": 2.9374,
      "step": 4140
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.1000514030456543,
      "learning_rate": 1.9887840223857055e-05,
      "loss": 2.5737,
      "step": 4141
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.1155645847320557,
      "learning_rate": 1.9887778721578104e-05,
      "loss": 2.5369,
      "step": 4142
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9418310523033142,
      "learning_rate": 1.9887717202536687e-05,
      "loss": 2.7072,
      "step": 4143
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0776863098144531,
      "learning_rate": 1.98876556667329e-05,
      "loss": 2.8818,
      "step": 4144
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.07485830783844,
      "learning_rate": 1.9887594114166854e-05,
      "loss": 2.7078,
      "step": 4145
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9397582411766052,
      "learning_rate": 1.988753254483865e-05,
      "loss": 2.5021,
      "step": 4146
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9963379502296448,
      "learning_rate": 1.988747095874839e-05,
      "loss": 2.9066,
      "step": 4147
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.070980429649353,
      "learning_rate": 1.9887409355896187e-05,
      "loss": 2.6692,
      "step": 4148
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9335522651672363,
      "learning_rate": 1.9887347736282136e-05,
      "loss": 2.7439,
      "step": 4149
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0229521989822388,
      "learning_rate": 1.9887286099906344e-05,
      "loss": 2.7804,
      "step": 4150
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0539395809173584,
      "learning_rate": 1.988722444676892e-05,
      "loss": 2.7313,
      "step": 4151
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0269007682800293,
      "learning_rate": 1.9887162776869963e-05,
      "loss": 2.6096,
      "step": 4152
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.1426080465316772,
      "learning_rate": 1.988710109020958e-05,
      "loss": 2.7722,
      "step": 4153
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.040516972541809,
      "learning_rate": 1.9887039386787876e-05,
      "loss": 2.7736,
      "step": 4154
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0935763120651245,
      "learning_rate": 1.9886977666604953e-05,
      "loss": 2.5944,
      "step": 4155
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.066113829612732,
      "learning_rate": 1.9886915929660922e-05,
      "loss": 2.7849,
      "step": 4156
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9963335394859314,
      "learning_rate": 1.988685417595588e-05,
      "loss": 2.7761,
      "step": 4157
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.1228270530700684,
      "learning_rate": 1.9886792405489938e-05,
      "loss": 2.7205,
      "step": 4158
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.1034433841705322,
      "learning_rate": 1.988673061826319e-05,
      "loss": 2.6402,
      "step": 4159
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9870920181274414,
      "learning_rate": 1.9886668814275757e-05,
      "loss": 2.7063,
      "step": 4160
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9904559850692749,
      "learning_rate": 1.988660699352773e-05,
      "loss": 2.7669,
      "step": 4161
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9355509877204895,
      "learning_rate": 1.988654515601922e-05,
      "loss": 2.7487,
      "step": 4162
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9879643321037292,
      "learning_rate": 1.9886483301750334e-05,
      "loss": 2.7425,
      "step": 4163
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.1409388780593872,
      "learning_rate": 1.988642143072117e-05,
      "loss": 3.023,
      "step": 4164
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0458394289016724,
      "learning_rate": 1.988635954293184e-05,
      "loss": 2.8283,
      "step": 4165
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.1061203479766846,
      "learning_rate": 1.9886297638382442e-05,
      "loss": 2.6718,
      "step": 4166
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0503469705581665,
      "learning_rate": 1.9886235717073087e-05,
      "loss": 2.9084,
      "step": 4167
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9521815776824951,
      "learning_rate": 1.9886173779003878e-05,
      "loss": 2.7806,
      "step": 4168
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.046924352645874,
      "learning_rate": 1.9886111824174915e-05,
      "loss": 2.7393,
      "step": 4169
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0369080305099487,
      "learning_rate": 1.988604985258631e-05,
      "loss": 2.6354,
      "step": 4170
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.961005687713623,
      "learning_rate": 1.9885987864238165e-05,
      "loss": 2.6352,
      "step": 4171
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0937745571136475,
      "learning_rate": 1.9885925859130587e-05,
      "loss": 2.7954,
      "step": 4172
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9664871096611023,
      "learning_rate": 1.988586383726368e-05,
      "loss": 2.6751,
      "step": 4173
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0608274936676025,
      "learning_rate": 1.9885801798637545e-05,
      "loss": 2.7228,
      "step": 4174
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0502322912216187,
      "learning_rate": 1.9885739743252294e-05,
      "loss": 2.7017,
      "step": 4175
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.008993148803711,
      "learning_rate": 1.9885677671108028e-05,
      "loss": 2.792,
      "step": 4176
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0409166812896729,
      "learning_rate": 1.9885615582204856e-05,
      "loss": 2.6426,
      "step": 4177
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9491252303123474,
      "learning_rate": 1.9885553476542877e-05,
      "loss": 2.8664,
      "step": 4178
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.096417784690857,
      "learning_rate": 1.98854913541222e-05,
      "loss": 2.7691,
      "step": 4179
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0230755805969238,
      "learning_rate": 1.9885429214942935e-05,
      "loss": 2.7,
      "step": 4180
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9635415077209473,
      "learning_rate": 1.988536705900518e-05,
      "loss": 2.6984,
      "step": 4181
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9719972610473633,
      "learning_rate": 1.9885304886309044e-05,
      "loss": 2.7442,
      "step": 4182
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9277112483978271,
      "learning_rate": 1.988524269685463e-05,
      "loss": 2.7863,
      "step": 4183
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0255720615386963,
      "learning_rate": 1.9885180490642045e-05,
      "loss": 2.7864,
      "step": 4184
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9811806678771973,
      "learning_rate": 1.9885118267671393e-05,
      "loss": 2.8542,
      "step": 4185
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.1127561330795288,
      "learning_rate": 1.988505602794278e-05,
      "loss": 2.8245,
      "step": 4186
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9732471108436584,
      "learning_rate": 1.988499377145632e-05,
      "loss": 2.7047,
      "step": 4187
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0206608772277832,
      "learning_rate": 1.9884931498212103e-05,
      "loss": 2.7067,
      "step": 4188
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0370466709136963,
      "learning_rate": 1.9884869208210245e-05,
      "loss": 2.6638,
      "step": 4189
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9209444522857666,
      "learning_rate": 1.988480690145085e-05,
      "loss": 2.949,
      "step": 4190
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9953259229660034,
      "learning_rate": 1.988474457793402e-05,
      "loss": 2.6529,
      "step": 4191
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9910915493965149,
      "learning_rate": 1.9884682237659864e-05,
      "loss": 2.8855,
      "step": 4192
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0020118951797485,
      "learning_rate": 1.9884619880628488e-05,
      "loss": 2.652,
      "step": 4193
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.993247389793396,
      "learning_rate": 1.9884557506839997e-05,
      "loss": 2.8372,
      "step": 4194
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9172185063362122,
      "learning_rate": 1.9884495116294492e-05,
      "loss": 2.4946,
      "step": 4195
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0477467775344849,
      "learning_rate": 1.9884432708992086e-05,
      "loss": 2.6788,
      "step": 4196
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9636563062667847,
      "learning_rate": 1.9884370284932885e-05,
      "loss": 2.8766,
      "step": 4197
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0079898834228516,
      "learning_rate": 1.988430784411699e-05,
      "loss": 2.8929,
      "step": 4198
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9147552847862244,
      "learning_rate": 1.9884245386544507e-05,
      "loss": 2.7287,
      "step": 4199
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9858420491218567,
      "learning_rate": 1.9884182912215547e-05,
      "loss": 2.9464,
      "step": 4200
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.1153651475906372,
      "learning_rate": 1.988412042113021e-05,
      "loss": 2.8916,
      "step": 4201
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9825493693351746,
      "learning_rate": 1.9884057913288604e-05,
      "loss": 2.8496,
      "step": 4202
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.042718529701233,
      "learning_rate": 1.9883995388690837e-05,
      "loss": 2.7209,
      "step": 4203
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0470802783966064,
      "learning_rate": 1.988393284733701e-05,
      "loss": 2.8653,
      "step": 4204
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.004944086074829,
      "learning_rate": 1.9883870289227232e-05,
      "loss": 2.8617,
      "step": 4205
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0294609069824219,
      "learning_rate": 1.9883807714361614e-05,
      "loss": 2.7588,
      "step": 4206
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.1694763898849487,
      "learning_rate": 1.988374512274026e-05,
      "loss": 2.939,
      "step": 4207
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0491797924041748,
      "learning_rate": 1.9883682514363266e-05,
      "loss": 2.7923,
      "step": 4208
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0172474384307861,
      "learning_rate": 1.988361988923075e-05,
      "loss": 2.5689,
      "step": 4209
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0437612533569336,
      "learning_rate": 1.9883557247342812e-05,
      "loss": 2.7505,
      "step": 4210
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0749266147613525,
      "learning_rate": 1.9883494588699564e-05,
      "loss": 2.8579,
      "step": 4211
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.8714096546173096,
      "learning_rate": 1.9883431913301103e-05,
      "loss": 2.6328,
      "step": 4212
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0490862131118774,
      "learning_rate": 1.9883369221147545e-05,
      "loss": 2.8169,
      "step": 4213
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0233311653137207,
      "learning_rate": 1.988330651223899e-05,
      "loss": 2.5494,
      "step": 4214
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0095865726470947,
      "learning_rate": 1.9883243786575544e-05,
      "loss": 2.6445,
      "step": 4215
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9921244382858276,
      "learning_rate": 1.988318104415732e-05,
      "loss": 2.7054,
      "step": 4216
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0445276498794556,
      "learning_rate": 1.9883118284984417e-05,
      "loss": 2.6801,
      "step": 4217
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9538304805755615,
      "learning_rate": 1.9883055509056945e-05,
      "loss": 2.6374,
      "step": 4218
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9217702150344849,
      "learning_rate": 1.9882992716375006e-05,
      "loss": 2.7005,
      "step": 4219
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9666761755943298,
      "learning_rate": 1.9882929906938714e-05,
      "loss": 2.4952,
      "step": 4220
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0456377267837524,
      "learning_rate": 1.9882867080748175e-05,
      "loss": 2.8989,
      "step": 4221
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0545670986175537,
      "learning_rate": 1.9882804237803487e-05,
      "loss": 2.6457,
      "step": 4222
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9240713715553284,
      "learning_rate": 1.988274137810476e-05,
      "loss": 2.534,
      "step": 4223
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.1692684888839722,
      "learning_rate": 1.9882678501652107e-05,
      "loss": 2.8217,
      "step": 4224
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0123586654663086,
      "learning_rate": 1.988261560844563e-05,
      "loss": 2.7386,
      "step": 4225
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0742652416229248,
      "learning_rate": 1.9882552698485432e-05,
      "loss": 2.6252,
      "step": 4226
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9729002118110657,
      "learning_rate": 1.988248977177163e-05,
      "loss": 2.9499,
      "step": 4227
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9514116644859314,
      "learning_rate": 1.9882426828304316e-05,
      "loss": 2.8345,
      "step": 4228
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0539218187332153,
      "learning_rate": 1.9882363868083607e-05,
      "loss": 2.8805,
      "step": 4229
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.8994390368461609,
      "learning_rate": 1.988230089110961e-05,
      "loss": 2.8588,
      "step": 4230
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.017274022102356,
      "learning_rate": 1.9882237897382424e-05,
      "loss": 2.6659,
      "step": 4231
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9382221698760986,
      "learning_rate": 1.988217488690216e-05,
      "loss": 2.5276,
      "step": 4232
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9096011519432068,
      "learning_rate": 1.988211185966893e-05,
      "loss": 2.721,
      "step": 4233
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9349681735038757,
      "learning_rate": 1.9882048815682837e-05,
      "loss": 2.9213,
      "step": 4234
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0133564472198486,
      "learning_rate": 1.9881985754943985e-05,
      "loss": 2.6187,
      "step": 4235
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.1876667737960815,
      "learning_rate": 1.9881922677452483e-05,
      "loss": 2.5664,
      "step": 4236
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.2305659055709839,
      "learning_rate": 1.9881859583208437e-05,
      "loss": 2.5706,
      "step": 4237
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9403977394104004,
      "learning_rate": 1.988179647221196e-05,
      "loss": 2.6669,
      "step": 4238
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9298800826072693,
      "learning_rate": 1.988173334446315e-05,
      "loss": 2.7889,
      "step": 4239
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.096944808959961,
      "learning_rate": 1.9881670199962118e-05,
      "loss": 2.6195,
      "step": 4240
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0382702350616455,
      "learning_rate": 1.9881607038708974e-05,
      "loss": 2.736,
      "step": 4241
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9596199989318848,
      "learning_rate": 1.9881543860703822e-05,
      "loss": 2.7054,
      "step": 4242
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9760822653770447,
      "learning_rate": 1.9881480665946767e-05,
      "loss": 2.8668,
      "step": 4243
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9371407628059387,
      "learning_rate": 1.988141745443792e-05,
      "loss": 2.7281,
      "step": 4244
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.949709415435791,
      "learning_rate": 1.9881354226177387e-05,
      "loss": 2.8627,
      "step": 4245
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.044274091720581,
      "learning_rate": 1.9881290981165275e-05,
      "loss": 2.8662,
      "step": 4246
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0188071727752686,
      "learning_rate": 1.988122771940169e-05,
      "loss": 2.8375,
      "step": 4247
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.2443655729293823,
      "learning_rate": 1.988116444088674e-05,
      "loss": 2.7466,
      "step": 4248
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9375529289245605,
      "learning_rate": 1.9881101145620533e-05,
      "loss": 2.7128,
      "step": 4249
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9709506630897522,
      "learning_rate": 1.9881037833603175e-05,
      "loss": 2.7542,
      "step": 4250
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.03452467918396,
      "learning_rate": 1.9880974504834774e-05,
      "loss": 2.8007,
      "step": 4251
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0449124574661255,
      "learning_rate": 1.988091115931544e-05,
      "loss": 2.6591,
      "step": 4252
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.128287434577942,
      "learning_rate": 1.9880847797045277e-05,
      "loss": 2.7968,
      "step": 4253
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9559566974639893,
      "learning_rate": 1.9880784418024394e-05,
      "loss": 2.4936,
      "step": 4254
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9644007086753845,
      "learning_rate": 1.9880721022252896e-05,
      "loss": 2.7147,
      "step": 4255
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0343167781829834,
      "learning_rate": 1.9880657609730894e-05,
      "loss": 2.6025,
      "step": 4256
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0313640832901,
      "learning_rate": 1.9880594180458493e-05,
      "loss": 2.7212,
      "step": 4257
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.2053929567337036,
      "learning_rate": 1.98805307344358e-05,
      "loss": 2.9048,
      "step": 4258
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9896961450576782,
      "learning_rate": 1.9880467271662928e-05,
      "loss": 2.7422,
      "step": 4259
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0110282897949219,
      "learning_rate": 1.9880403792139976e-05,
      "loss": 2.8666,
      "step": 4260
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9368579983711243,
      "learning_rate": 1.988034029586706e-05,
      "loss": 2.7226,
      "step": 4261
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.071534514427185,
      "learning_rate": 1.988027678284428e-05,
      "loss": 2.7844,
      "step": 4262
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0144003629684448,
      "learning_rate": 1.988021325307175e-05,
      "loss": 2.7547,
      "step": 4263
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.1247714757919312,
      "learning_rate": 1.9880149706549572e-05,
      "loss": 2.5959,
      "step": 4264
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0129128694534302,
      "learning_rate": 1.988008614327786e-05,
      "loss": 2.9945,
      "step": 4265
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0007598400115967,
      "learning_rate": 1.9880022563256716e-05,
      "loss": 2.7536,
      "step": 4266
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.1503671407699585,
      "learning_rate": 1.9879958966486254e-05,
      "loss": 2.857,
      "step": 4267
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0869406461715698,
      "learning_rate": 1.9879895352966577e-05,
      "loss": 2.6949,
      "step": 4268
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9873620271682739,
      "learning_rate": 1.9879831722697795e-05,
      "loss": 2.523,
      "step": 4269
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9224667549133301,
      "learning_rate": 1.987976807568001e-05,
      "loss": 2.7942,
      "step": 4270
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9917232990264893,
      "learning_rate": 1.9879704411913335e-05,
      "loss": 2.8002,
      "step": 4271
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9320641160011292,
      "learning_rate": 1.9879640731397884e-05,
      "loss": 2.6886,
      "step": 4272
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.1440694332122803,
      "learning_rate": 1.9879577034133753e-05,
      "loss": 2.8288,
      "step": 4273
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9623903632164001,
      "learning_rate": 1.987951332012106e-05,
      "loss": 2.8924,
      "step": 4274
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.988523006439209,
      "learning_rate": 1.9879449589359906e-05,
      "loss": 2.8069,
      "step": 4275
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.068102478981018,
      "learning_rate": 1.9879385841850404e-05,
      "loss": 2.7289,
      "step": 4276
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9519920945167542,
      "learning_rate": 1.9879322077592658e-05,
      "loss": 2.6407,
      "step": 4277
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.979326605796814,
      "learning_rate": 1.9879258296586777e-05,
      "loss": 2.6614,
      "step": 4278
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0593211650848389,
      "learning_rate": 1.987919449883287e-05,
      "loss": 2.9751,
      "step": 4279
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9078630805015564,
      "learning_rate": 1.9879130684331047e-05,
      "loss": 2.5221,
      "step": 4280
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9996840953826904,
      "learning_rate": 1.987906685308141e-05,
      "loss": 2.7676,
      "step": 4281
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9381740093231201,
      "learning_rate": 1.987900300508408e-05,
      "loss": 2.5775,
      "step": 4282
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9962206482887268,
      "learning_rate": 1.987893914033915e-05,
      "loss": 2.8701,
      "step": 4283
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.1974917650222778,
      "learning_rate": 1.987887525884674e-05,
      "loss": 2.5514,
      "step": 4284
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.079612374305725,
      "learning_rate": 1.9878811360606947e-05,
      "loss": 2.6633,
      "step": 4285
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.1157585382461548,
      "learning_rate": 1.9878747445619892e-05,
      "loss": 2.8184,
      "step": 4286
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9570927023887634,
      "learning_rate": 1.9878683513885673e-05,
      "loss": 2.8819,
      "step": 4287
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9475725889205933,
      "learning_rate": 1.9878619565404404e-05,
      "loss": 2.5859,
      "step": 4288
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9913005828857422,
      "learning_rate": 1.987855560017619e-05,
      "loss": 2.5713,
      "step": 4289
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9103502631187439,
      "learning_rate": 1.9878491618201143e-05,
      "loss": 2.7304,
      "step": 4290
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0186076164245605,
      "learning_rate": 1.987842761947937e-05,
      "loss": 2.8058,
      "step": 4291
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.039965033531189,
      "learning_rate": 1.9878363604010977e-05,
      "loss": 2.8223,
      "step": 4292
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.1545500755310059,
      "learning_rate": 1.987829957179608e-05,
      "loss": 2.7333,
      "step": 4293
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9742302298545837,
      "learning_rate": 1.9878235522834778e-05,
      "loss": 2.6899,
      "step": 4294
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9764646887779236,
      "learning_rate": 1.987817145712718e-05,
      "loss": 2.5113,
      "step": 4295
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0118218660354614,
      "learning_rate": 1.9878107374673407e-05,
      "loss": 2.5889,
      "step": 4296
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9921877980232239,
      "learning_rate": 1.9878043275473552e-05,
      "loss": 2.5612,
      "step": 4297
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9251550436019897,
      "learning_rate": 1.9877979159527737e-05,
      "loss": 2.6271,
      "step": 4298
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0917495489120483,
      "learning_rate": 1.987791502683606e-05,
      "loss": 2.6397,
      "step": 4299
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9851850867271423,
      "learning_rate": 1.9877850877398636e-05,
      "loss": 2.9155,
      "step": 4300
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0937541723251343,
      "learning_rate": 1.987778671121557e-05,
      "loss": 2.7303,
      "step": 4301
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0080220699310303,
      "learning_rate": 1.9877722528286976e-05,
      "loss": 2.6846,
      "step": 4302
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.8545418381690979,
      "learning_rate": 1.987765832861296e-05,
      "loss": 2.7583,
      "step": 4303
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9787895083427429,
      "learning_rate": 1.9877594112193628e-05,
      "loss": 2.769,
      "step": 4304
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0277725458145142,
      "learning_rate": 1.987752987902909e-05,
      "loss": 2.9166,
      "step": 4305
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0327894687652588,
      "learning_rate": 1.9877465629119463e-05,
      "loss": 2.6918,
      "step": 4306
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9829155802726746,
      "learning_rate": 1.9877401362464842e-05,
      "loss": 2.76,
      "step": 4307
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0606330633163452,
      "learning_rate": 1.9877337079065343e-05,
      "loss": 2.8043,
      "step": 4308
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.987706184387207,
      "learning_rate": 1.9877272778921077e-05,
      "loss": 2.8547,
      "step": 4309
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9915091395378113,
      "learning_rate": 1.987720846203215e-05,
      "loss": 2.8569,
      "step": 4310
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.8769580721855164,
      "learning_rate": 1.9877144128398675e-05,
      "loss": 2.8447,
      "step": 4311
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0545860528945923,
      "learning_rate": 1.9877079778020757e-05,
      "loss": 2.8058,
      "step": 4312
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.1072416305541992,
      "learning_rate": 1.9877015410898502e-05,
      "loss": 2.4842,
      "step": 4313
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.7167625427246094,
      "learning_rate": 1.987695102703203e-05,
      "loss": 2.8696,
      "step": 4314
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.004349708557129,
      "learning_rate": 1.987688662642144e-05,
      "loss": 2.8475,
      "step": 4315
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9634398221969604,
      "learning_rate": 1.9876822209066843e-05,
      "loss": 2.893,
      "step": 4316
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9737861752510071,
      "learning_rate": 1.9876757774968355e-05,
      "loss": 2.996,
      "step": 4317
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0581626892089844,
      "learning_rate": 1.9876693324126075e-05,
      "loss": 2.6692,
      "step": 4318
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0093669891357422,
      "learning_rate": 1.9876628856540118e-05,
      "loss": 2.7504,
      "step": 4319
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9491541981697083,
      "learning_rate": 1.9876564372210594e-05,
      "loss": 2.4742,
      "step": 4320
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.134181261062622,
      "learning_rate": 1.987649987113761e-05,
      "loss": 2.7121,
      "step": 4321
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0223979949951172,
      "learning_rate": 1.987643535332128e-05,
      "loss": 2.5074,
      "step": 4322
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0073291063308716,
      "learning_rate": 1.9876370818761704e-05,
      "loss": 2.5496,
      "step": 4323
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.8938237428665161,
      "learning_rate": 1.9876306267459e-05,
      "loss": 2.6019,
      "step": 4324
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0507947206497192,
      "learning_rate": 1.9876241699413275e-05,
      "loss": 2.6253,
      "step": 4325
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.168843388557434,
      "learning_rate": 1.9876177114624637e-05,
      "loss": 2.7862,
      "step": 4326
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.8991703391075134,
      "learning_rate": 1.9876112513093197e-05,
      "loss": 2.6588,
      "step": 4327
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.005210518836975,
      "learning_rate": 1.9876047894819062e-05,
      "loss": 2.7017,
      "step": 4328
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0005306005477905,
      "learning_rate": 1.9875983259802346e-05,
      "loss": 2.9495,
      "step": 4329
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.3479652404785156,
      "learning_rate": 1.9875918608043157e-05,
      "loss": 2.5536,
      "step": 4330
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9553754329681396,
      "learning_rate": 1.9875853939541602e-05,
      "loss": 2.7466,
      "step": 4331
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9164727330207825,
      "learning_rate": 1.987578925429779e-05,
      "loss": 2.7828,
      "step": 4332
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0803256034851074,
      "learning_rate": 1.9875724552311837e-05,
      "loss": 2.5917,
      "step": 4333
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.053929090499878,
      "learning_rate": 1.9875659833583848e-05,
      "loss": 2.7789,
      "step": 4334
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9802798628807068,
      "learning_rate": 1.987559509811393e-05,
      "loss": 2.4425,
      "step": 4335
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9318438768386841,
      "learning_rate": 1.98755303459022e-05,
      "loss": 2.8422,
      "step": 4336
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.951994001865387,
      "learning_rate": 1.987546557694876e-05,
      "loss": 2.6901,
      "step": 4337
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.981351375579834,
      "learning_rate": 1.9875400791253727e-05,
      "loss": 2.6543,
      "step": 4338
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9576754570007324,
      "learning_rate": 1.9875335988817208e-05,
      "loss": 2.4677,
      "step": 4339
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9871758222579956,
      "learning_rate": 1.987527116963931e-05,
      "loss": 2.7578,
      "step": 4340
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9932180047035217,
      "learning_rate": 1.9875206333720145e-05,
      "loss": 2.8804,
      "step": 4341
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9446271061897278,
      "learning_rate": 1.9875141481059824e-05,
      "loss": 2.6359,
      "step": 4342
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0143448114395142,
      "learning_rate": 1.9875076611658456e-05,
      "loss": 2.6651,
      "step": 4343
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0654956102371216,
      "learning_rate": 1.9875011725516153e-05,
      "loss": 2.7398,
      "step": 4344
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0239347219467163,
      "learning_rate": 1.9874946822633022e-05,
      "loss": 2.8117,
      "step": 4345
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.05210542678833,
      "learning_rate": 1.9874881903009174e-05,
      "loss": 2.7221,
      "step": 4346
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.007886290550232,
      "learning_rate": 1.9874816966644718e-05,
      "loss": 2.7735,
      "step": 4347
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9984157681465149,
      "learning_rate": 1.9874752013539768e-05,
      "loss": 2.925,
      "step": 4348
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.088057041168213,
      "learning_rate": 1.987468704369443e-05,
      "loss": 2.7735,
      "step": 4349
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9217303395271301,
      "learning_rate": 1.9874622057108816e-05,
      "loss": 2.4731,
      "step": 4350
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9700737595558167,
      "learning_rate": 1.9874557053783035e-05,
      "loss": 2.6576,
      "step": 4351
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0414351224899292,
      "learning_rate": 1.98744920337172e-05,
      "loss": 2.7446,
      "step": 4352
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9617871046066284,
      "learning_rate": 1.9874426996911414e-05,
      "loss": 2.773,
      "step": 4353
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.99643874168396,
      "learning_rate": 1.9874361943365795e-05,
      "loss": 2.8811,
      "step": 4354
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9238592386245728,
      "learning_rate": 1.9874296873080453e-05,
      "loss": 2.6361,
      "step": 4355
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9788423776626587,
      "learning_rate": 1.9874231786055494e-05,
      "loss": 2.4911,
      "step": 4356
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.048678994178772,
      "learning_rate": 1.987416668229103e-05,
      "loss": 2.6568,
      "step": 4357
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.96372389793396,
      "learning_rate": 1.987410156178717e-05,
      "loss": 2.5802,
      "step": 4358
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9893758296966553,
      "learning_rate": 1.987403642454403e-05,
      "loss": 2.6424,
      "step": 4359
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0005614757537842,
      "learning_rate": 1.9873971270561717e-05,
      "loss": 2.7672,
      "step": 4360
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9973781704902649,
      "learning_rate": 1.9873906099840335e-05,
      "loss": 2.5502,
      "step": 4361
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9090601801872253,
      "learning_rate": 1.9873840912380003e-05,
      "loss": 2.7226,
      "step": 4362
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.013594388961792,
      "learning_rate": 1.9873775708180833e-05,
      "loss": 2.8482,
      "step": 4363
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0162039995193481,
      "learning_rate": 1.9873710487242926e-05,
      "loss": 2.8403,
      "step": 4364
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.1915981769561768,
      "learning_rate": 1.98736452495664e-05,
      "loss": 2.8005,
      "step": 4365
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0032062530517578,
      "learning_rate": 1.9873579995151367e-05,
      "loss": 2.8466,
      "step": 4366
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9271165728569031,
      "learning_rate": 1.9873514723997932e-05,
      "loss": 2.6882,
      "step": 4367
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0297492742538452,
      "learning_rate": 1.9873449436106208e-05,
      "loss": 2.971,
      "step": 4368
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9047540426254272,
      "learning_rate": 1.9873384131476303e-05,
      "loss": 2.8994,
      "step": 4369
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.003727912902832,
      "learning_rate": 1.9873318810108332e-05,
      "loss": 2.8509,
      "step": 4370
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0053836107254028,
      "learning_rate": 1.9873253472002404e-05,
      "loss": 2.8319,
      "step": 4371
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.240936040878296,
      "learning_rate": 1.987318811715863e-05,
      "loss": 2.8995,
      "step": 4372
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0007145404815674,
      "learning_rate": 1.987312274557712e-05,
      "loss": 2.7155,
      "step": 4373
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.021417260169983,
      "learning_rate": 1.987305735725799e-05,
      "loss": 2.73,
      "step": 4374
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9932168126106262,
      "learning_rate": 1.987299195220134e-05,
      "loss": 2.805,
      "step": 4375
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.8648537993431091,
      "learning_rate": 1.9872926530407293e-05,
      "loss": 2.7574,
      "step": 4376
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9170839190483093,
      "learning_rate": 1.987286109187595e-05,
      "loss": 2.6926,
      "step": 4377
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0147879123687744,
      "learning_rate": 1.9872795636607426e-05,
      "loss": 2.7663,
      "step": 4378
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0818977355957031,
      "learning_rate": 1.987273016460183e-05,
      "loss": 2.5872,
      "step": 4379
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0146604776382446,
      "learning_rate": 1.987266467585928e-05,
      "loss": 2.8665,
      "step": 4380
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.054622769355774,
      "learning_rate": 1.9872599170379882e-05,
      "loss": 2.6094,
      "step": 4381
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0625826120376587,
      "learning_rate": 1.9872533648163747e-05,
      "loss": 2.8769,
      "step": 4382
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9494690895080566,
      "learning_rate": 1.9872468109210983e-05,
      "loss": 2.8408,
      "step": 4383
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9873760342597961,
      "learning_rate": 1.9872402553521704e-05,
      "loss": 2.4594,
      "step": 4384
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0163224935531616,
      "learning_rate": 1.987233698109602e-05,
      "loss": 2.7181,
      "step": 4385
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0104756355285645,
      "learning_rate": 1.9872271391934047e-05,
      "loss": 2.8168,
      "step": 4386
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.958953857421875,
      "learning_rate": 1.9872205786035895e-05,
      "loss": 2.7824,
      "step": 4387
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.1288446187973022,
      "learning_rate": 1.9872140163401666e-05,
      "loss": 2.8225,
      "step": 4388
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0173099040985107,
      "learning_rate": 1.9872074524031483e-05,
      "loss": 2.7373,
      "step": 4389
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9178646802902222,
      "learning_rate": 1.9872008867925453e-05,
      "loss": 2.7635,
      "step": 4390
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9773545861244202,
      "learning_rate": 1.9871943195083685e-05,
      "loss": 2.9041,
      "step": 4391
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.8973696827888489,
      "learning_rate": 1.987187750550629e-05,
      "loss": 2.6366,
      "step": 4392
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9473910927772522,
      "learning_rate": 1.9871811799193385e-05,
      "loss": 2.549,
      "step": 4393
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9953824281692505,
      "learning_rate": 1.9871746076145078e-05,
      "loss": 2.8557,
      "step": 4394
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9507832527160645,
      "learning_rate": 1.987168033636148e-05,
      "loss": 2.4265,
      "step": 4395
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0336081981658936,
      "learning_rate": 1.9871614579842702e-05,
      "loss": 2.8148,
      "step": 4396
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9804611802101135,
      "learning_rate": 1.9871548806588854e-05,
      "loss": 2.6072,
      "step": 4397
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0131069421768188,
      "learning_rate": 1.987148301660005e-05,
      "loss": 2.669,
      "step": 4398
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9559462070465088,
      "learning_rate": 1.9871417209876405e-05,
      "loss": 2.748,
      "step": 4399
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0299205780029297,
      "learning_rate": 1.9871351386418026e-05,
      "loss": 2.6064,
      "step": 4400
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.1673872470855713,
      "learning_rate": 1.987128554622502e-05,
      "loss": 2.9581,
      "step": 4401
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0235084295272827,
      "learning_rate": 1.987121968929751e-05,
      "loss": 2.6535,
      "step": 4402
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0080792903900146,
      "learning_rate": 1.9871153815635596e-05,
      "loss": 2.8854,
      "step": 4403
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.235355019569397,
      "learning_rate": 1.9871087925239402e-05,
      "loss": 2.7097,
      "step": 4404
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.027327299118042,
      "learning_rate": 1.987102201810903e-05,
      "loss": 2.7332,
      "step": 4405
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.1296669244766235,
      "learning_rate": 1.9870956094244595e-05,
      "loss": 2.7745,
      "step": 4406
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0260543823242188,
      "learning_rate": 1.987089015364621e-05,
      "loss": 2.8435,
      "step": 4407
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0438826084136963,
      "learning_rate": 1.9870824196313982e-05,
      "loss": 2.757,
      "step": 4408
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.1240335702896118,
      "learning_rate": 1.9870758222248026e-05,
      "loss": 2.7379,
      "step": 4409
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9866347908973694,
      "learning_rate": 1.9870692231448457e-05,
      "loss": 2.8676,
      "step": 4410
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0512568950653076,
      "learning_rate": 1.9870626223915382e-05,
      "loss": 2.7452,
      "step": 4411
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.033617615699768,
      "learning_rate": 1.9870560199648915e-05,
      "loss": 2.7251,
      "step": 4412
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.002057671546936,
      "learning_rate": 1.987049415864917e-05,
      "loss": 2.4823,
      "step": 4413
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.1939843893051147,
      "learning_rate": 1.9870428100916253e-05,
      "loss": 2.9106,
      "step": 4414
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9564114212989807,
      "learning_rate": 1.987036202645028e-05,
      "loss": 2.8531,
      "step": 4415
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.939525842666626,
      "learning_rate": 1.9870295935251366e-05,
      "loss": 2.7696,
      "step": 4416
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9446777701377869,
      "learning_rate": 1.9870229827319615e-05,
      "loss": 2.8706,
      "step": 4417
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.121150016784668,
      "learning_rate": 1.9870163702655145e-05,
      "loss": 2.8475,
      "step": 4418
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9983561635017395,
      "learning_rate": 1.9870097561258068e-05,
      "loss": 2.4714,
      "step": 4419
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.1761130094528198,
      "learning_rate": 1.9870031403128495e-05,
      "loss": 2.8351,
      "step": 4420
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9824849963188171,
      "learning_rate": 1.986996522826654e-05,
      "loss": 2.5971,
      "step": 4421
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9366732835769653,
      "learning_rate": 1.986989903667231e-05,
      "loss": 2.5915,
      "step": 4422
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9190374612808228,
      "learning_rate": 1.9869832828345918e-05,
      "loss": 2.8389,
      "step": 4423
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0436549186706543,
      "learning_rate": 1.9869766603287485e-05,
      "loss": 2.8336,
      "step": 4424
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0306434631347656,
      "learning_rate": 1.986970036149711e-05,
      "loss": 2.826,
      "step": 4425
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0957822799682617,
      "learning_rate": 1.986963410297492e-05,
      "loss": 2.7081,
      "step": 4426
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9670608043670654,
      "learning_rate": 1.9869567827721016e-05,
      "loss": 3.0284,
      "step": 4427
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.1307157278060913,
      "learning_rate": 1.9869501535735515e-05,
      "loss": 2.7673,
      "step": 4428
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9630732536315918,
      "learning_rate": 1.9869435227018527e-05,
      "loss": 2.7208,
      "step": 4429
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0381803512573242,
      "learning_rate": 1.986936890157017e-05,
      "loss": 2.7331,
      "step": 4430
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.1098759174346924,
      "learning_rate": 1.9869302559390545e-05,
      "loss": 2.9873,
      "step": 4431
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0781433582305908,
      "learning_rate": 1.9869236200479774e-05,
      "loss": 2.8175,
      "step": 4432
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0793145895004272,
      "learning_rate": 1.986916982483797e-05,
      "loss": 2.636,
      "step": 4433
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9729543924331665,
      "learning_rate": 1.9869103432465242e-05,
      "loss": 2.7598,
      "step": 4434
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.013166904449463,
      "learning_rate": 1.98690370233617e-05,
      "loss": 2.8217,
      "step": 4435
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9976076483726501,
      "learning_rate": 1.9868970597527463e-05,
      "loss": 2.8758,
      "step": 4436
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.1106795072555542,
      "learning_rate": 1.986890415496264e-05,
      "loss": 2.7764,
      "step": 4437
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9610336422920227,
      "learning_rate": 1.9868837695667346e-05,
      "loss": 2.9108,
      "step": 4438
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9747051000595093,
      "learning_rate": 1.986877121964169e-05,
      "loss": 2.8294,
      "step": 4439
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.141859531402588,
      "learning_rate": 1.9868704726885785e-05,
      "loss": 2.843,
      "step": 4440
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.2532289028167725,
      "learning_rate": 1.9868638217399746e-05,
      "loss": 2.7114,
      "step": 4441
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.097046136856079,
      "learning_rate": 1.9868571691183685e-05,
      "loss": 2.6362,
      "step": 4442
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9281219244003296,
      "learning_rate": 1.9868505148237716e-05,
      "loss": 2.6549,
      "step": 4443
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0047506093978882,
      "learning_rate": 1.986843858856195e-05,
      "loss": 2.6078,
      "step": 4444
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0261720418930054,
      "learning_rate": 1.98683720121565e-05,
      "loss": 2.8386,
      "step": 4445
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0119600296020508,
      "learning_rate": 1.9868305419021478e-05,
      "loss": 2.6492,
      "step": 4446
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9669832587242126,
      "learning_rate": 1.9868238809157e-05,
      "loss": 2.7412,
      "step": 4447
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0665357112884521,
      "learning_rate": 1.9868172182563177e-05,
      "loss": 2.7617,
      "step": 4448
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9629961252212524,
      "learning_rate": 1.9868105539240118e-05,
      "loss": 2.9166,
      "step": 4449
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0443696975708008,
      "learning_rate": 1.986803887918795e-05,
      "loss": 2.7614,
      "step": 4450
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9935970306396484,
      "learning_rate": 1.9867972202406766e-05,
      "loss": 2.7417,
      "step": 4451
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9922152757644653,
      "learning_rate": 1.9867905508896694e-05,
      "loss": 2.7267,
      "step": 4452
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0697370767593384,
      "learning_rate": 1.986783879865784e-05,
      "loss": 2.6321,
      "step": 4453
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0248255729675293,
      "learning_rate": 1.9867772071690317e-05,
      "loss": 2.7374,
      "step": 4454
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9543808698654175,
      "learning_rate": 1.9867705327994244e-05,
      "loss": 2.9645,
      "step": 4455
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0305174589157104,
      "learning_rate": 1.986763856756973e-05,
      "loss": 2.5344,
      "step": 4456
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.902191162109375,
      "learning_rate": 1.9867571790416886e-05,
      "loss": 2.8373,
      "step": 4457
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9761344194412231,
      "learning_rate": 1.9867504996535828e-05,
      "loss": 2.6701,
      "step": 4458
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9533754587173462,
      "learning_rate": 1.9867438185926672e-05,
      "loss": 2.6345,
      "step": 4459
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9517418742179871,
      "learning_rate": 1.9867371358589528e-05,
      "loss": 2.8041,
      "step": 4460
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9684209227561951,
      "learning_rate": 1.986730451452451e-05,
      "loss": 2.6497,
      "step": 4461
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.1823079586029053,
      "learning_rate": 1.9867237653731727e-05,
      "loss": 2.7191,
      "step": 4462
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9519813656806946,
      "learning_rate": 1.98671707762113e-05,
      "loss": 2.6943,
      "step": 4463
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0016108751296997,
      "learning_rate": 1.9867103881963336e-05,
      "loss": 3.0162,
      "step": 4464
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.1258745193481445,
      "learning_rate": 1.9867036970987952e-05,
      "loss": 2.6535,
      "step": 4465
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.8843876719474792,
      "learning_rate": 1.986697004328526e-05,
      "loss": 2.5975,
      "step": 4466
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.1182492971420288,
      "learning_rate": 1.9866903098855378e-05,
      "loss": 2.8186,
      "step": 4467
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.2959649562835693,
      "learning_rate": 1.9866836137698412e-05,
      "loss": 2.5797,
      "step": 4468
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.130449891090393,
      "learning_rate": 1.986676915981448e-05,
      "loss": 2.57,
      "step": 4469
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0687010288238525,
      "learning_rate": 1.9866702165203693e-05,
      "loss": 2.7771,
      "step": 4470
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0818251371383667,
      "learning_rate": 1.9866635153866167e-05,
      "loss": 2.8444,
      "step": 4471
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9507020115852356,
      "learning_rate": 1.9866568125802014e-05,
      "loss": 2.8701,
      "step": 4472
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.1145178079605103,
      "learning_rate": 1.986650108101135e-05,
      "loss": 2.6973,
      "step": 4473
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9343014359474182,
      "learning_rate": 1.9866434019494283e-05,
      "loss": 2.9867,
      "step": 4474
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9933557510375977,
      "learning_rate": 1.9866366941250933e-05,
      "loss": 2.7178,
      "step": 4475
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9560204744338989,
      "learning_rate": 1.9866299846281413e-05,
      "loss": 2.646,
      "step": 4476
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.018327236175537,
      "learning_rate": 1.9866232734585834e-05,
      "loss": 2.8219,
      "step": 4477
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.122578740119934,
      "learning_rate": 1.9866165606164313e-05,
      "loss": 2.8554,
      "step": 4478
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.1033155918121338,
      "learning_rate": 1.9866098461016957e-05,
      "loss": 2.7579,
      "step": 4479
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0095250606536865,
      "learning_rate": 1.986603129914389e-05,
      "loss": 2.9043,
      "step": 4480
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.1398425102233887,
      "learning_rate": 1.9865964120545217e-05,
      "loss": 3.093,
      "step": 4481
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.6625863313674927,
      "learning_rate": 1.9865896925221056e-05,
      "loss": 2.6184,
      "step": 4482
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0313880443572998,
      "learning_rate": 1.9865829713171518e-05,
      "loss": 2.6076,
      "step": 4483
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0861886739730835,
      "learning_rate": 1.986576248439672e-05,
      "loss": 2.9155,
      "step": 4484
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.1470962762832642,
      "learning_rate": 1.9865695238896778e-05,
      "loss": 2.5419,
      "step": 4485
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0293166637420654,
      "learning_rate": 1.9865627976671803e-05,
      "loss": 2.8462,
      "step": 4486
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.12471342086792,
      "learning_rate": 1.9865560697721905e-05,
      "loss": 2.6739,
      "step": 4487
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.126661777496338,
      "learning_rate": 1.9865493402047206e-05,
      "loss": 2.7985,
      "step": 4488
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.016241192817688,
      "learning_rate": 1.986542608964781e-05,
      "loss": 2.7936,
      "step": 4489
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9974715709686279,
      "learning_rate": 1.9865358760523846e-05,
      "loss": 2.771,
      "step": 4490
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0134813785552979,
      "learning_rate": 1.9865291414675415e-05,
      "loss": 2.6929,
      "step": 4491
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9570468664169312,
      "learning_rate": 1.9865224052102636e-05,
      "loss": 2.5505,
      "step": 4492
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.880639374256134,
      "learning_rate": 1.9865156672805623e-05,
      "loss": 2.5221,
      "step": 4493
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0982698202133179,
      "learning_rate": 1.986508927678449e-05,
      "loss": 2.6914,
      "step": 4494
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0124222040176392,
      "learning_rate": 1.986502186403935e-05,
      "loss": 2.5163,
      "step": 4495
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9145181775093079,
      "learning_rate": 1.9864954434570323e-05,
      "loss": 2.6749,
      "step": 4496
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.4828587770462036,
      "learning_rate": 1.9864886988377513e-05,
      "loss": 2.8897,
      "step": 4497
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0674537420272827,
      "learning_rate": 1.9864819525461045e-05,
      "loss": 2.8774,
      "step": 4498
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0027196407318115,
      "learning_rate": 1.9864752045821025e-05,
      "loss": 2.6253,
      "step": 4499
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.892526388168335,
      "learning_rate": 1.9864684549457577e-05,
      "loss": 2.582,
      "step": 4500
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0540705919265747,
      "learning_rate": 1.9864617036370805e-05,
      "loss": 2.5943,
      "step": 4501
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.8882647752761841,
      "learning_rate": 1.9864549506560827e-05,
      "loss": 2.8529,
      "step": 4502
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9563076496124268,
      "learning_rate": 1.986448196002776e-05,
      "loss": 2.5239,
      "step": 4503
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.2047488689422607,
      "learning_rate": 1.9864414396771717e-05,
      "loss": 2.7929,
      "step": 4504
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9900112748146057,
      "learning_rate": 1.9864346816792812e-05,
      "loss": 2.7914,
      "step": 4505
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0753271579742432,
      "learning_rate": 1.9864279220091162e-05,
      "loss": 2.4892,
      "step": 4506
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9448592066764832,
      "learning_rate": 1.9864211606666876e-05,
      "loss": 2.8436,
      "step": 4507
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9962863326072693,
      "learning_rate": 1.9864143976520073e-05,
      "loss": 2.6706,
      "step": 4508
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0116523504257202,
      "learning_rate": 1.986407632965087e-05,
      "loss": 2.5708,
      "step": 4509
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0174369812011719,
      "learning_rate": 1.9864008666059374e-05,
      "loss": 2.8872,
      "step": 4510
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.06830632686615,
      "learning_rate": 1.986394098574571e-05,
      "loss": 2.6852,
      "step": 4511
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0120311975479126,
      "learning_rate": 1.986387328870998e-05,
      "loss": 2.681,
      "step": 4512
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.1054097414016724,
      "learning_rate": 1.9863805574952312e-05,
      "loss": 2.736,
      "step": 4513
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9869415760040283,
      "learning_rate": 1.9863737844472816e-05,
      "loss": 2.753,
      "step": 4514
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9761326313018799,
      "learning_rate": 1.98636700972716e-05,
      "loss": 2.7506,
      "step": 4515
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.028612494468689,
      "learning_rate": 1.9863602333348786e-05,
      "loss": 2.7637,
      "step": 4516
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0232017040252686,
      "learning_rate": 1.9863534552704485e-05,
      "loss": 2.778,
      "step": 4517
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9060404300689697,
      "learning_rate": 1.9863466755338816e-05,
      "loss": 2.8919,
      "step": 4518
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.019822597503662,
      "learning_rate": 1.9863398941251893e-05,
      "loss": 2.8351,
      "step": 4519
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0655912160873413,
      "learning_rate": 1.9863331110443828e-05,
      "loss": 2.7062,
      "step": 4520
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0789180994033813,
      "learning_rate": 1.986326326291474e-05,
      "loss": 2.5747,
      "step": 4521
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0848761796951294,
      "learning_rate": 1.986319539866474e-05,
      "loss": 2.5912,
      "step": 4522
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0102301836013794,
      "learning_rate": 1.9863127517693947e-05,
      "loss": 2.6434,
      "step": 4523
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0901676416397095,
      "learning_rate": 1.9863059620002473e-05,
      "loss": 2.8902,
      "step": 4524
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.1528109312057495,
      "learning_rate": 1.9862991705590434e-05,
      "loss": 2.7264,
      "step": 4525
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9362943172454834,
      "learning_rate": 1.9862923774457947e-05,
      "loss": 2.5709,
      "step": 4526
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9695144295692444,
      "learning_rate": 1.986285582660512e-05,
      "loss": 2.7719,
      "step": 4527
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.012749195098877,
      "learning_rate": 1.986278786203208e-05,
      "loss": 2.4826,
      "step": 4528
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0260341167449951,
      "learning_rate": 1.9862719880738933e-05,
      "loss": 2.7164,
      "step": 4529
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9711105823516846,
      "learning_rate": 1.98626518827258e-05,
      "loss": 2.79,
      "step": 4530
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9501733183860779,
      "learning_rate": 1.986258386799279e-05,
      "loss": 2.8476,
      "step": 4531
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9382809400558472,
      "learning_rate": 1.986251583654002e-05,
      "loss": 2.7082,
      "step": 4532
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0009613037109375,
      "learning_rate": 1.986244778836761e-05,
      "loss": 2.8749,
      "step": 4533
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0023291110992432,
      "learning_rate": 1.9862379723475673e-05,
      "loss": 2.6809,
      "step": 4534
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.1032944917678833,
      "learning_rate": 1.9862311641864323e-05,
      "loss": 2.7859,
      "step": 4535
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.162373423576355,
      "learning_rate": 1.986224354353368e-05,
      "loss": 2.8712,
      "step": 4536
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0999075174331665,
      "learning_rate": 1.9862175428483847e-05,
      "loss": 2.7708,
      "step": 4537
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0486969947814941,
      "learning_rate": 1.9862107296714954e-05,
      "loss": 2.6115,
      "step": 4538
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0483989715576172,
      "learning_rate": 1.9862039148227107e-05,
      "loss": 2.6537,
      "step": 4539
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9447581768035889,
      "learning_rate": 1.986197098302043e-05,
      "loss": 2.6001,
      "step": 4540
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9373035430908203,
      "learning_rate": 1.986190280109503e-05,
      "loss": 2.854,
      "step": 4541
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.067413091659546,
      "learning_rate": 1.9861834602451028e-05,
      "loss": 2.812,
      "step": 4542
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0579473972320557,
      "learning_rate": 1.9861766387088538e-05,
      "loss": 2.7765,
      "step": 4543
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9648382663726807,
      "learning_rate": 1.9861698155007674e-05,
      "loss": 2.6493,
      "step": 4544
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9981719851493835,
      "learning_rate": 1.9861629906208555e-05,
      "loss": 2.7263,
      "step": 4545
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9080607891082764,
      "learning_rate": 1.9861561640691293e-05,
      "loss": 2.7559,
      "step": 4546
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.7527389526367188,
      "learning_rate": 1.986149335845601e-05,
      "loss": 2.717,
      "step": 4547
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0125603675842285,
      "learning_rate": 1.9861425059502812e-05,
      "loss": 2.6499,
      "step": 4548
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9861685633659363,
      "learning_rate": 1.9861356743831824e-05,
      "loss": 2.6229,
      "step": 4549
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9717320203781128,
      "learning_rate": 1.9861288411443157e-05,
      "loss": 2.8316,
      "step": 4550
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0163850784301758,
      "learning_rate": 1.9861220062336926e-05,
      "loss": 2.7719,
      "step": 4551
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9523040056228638,
      "learning_rate": 1.986115169651325e-05,
      "loss": 2.6747,
      "step": 4552
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9575790166854858,
      "learning_rate": 1.9861083313972242e-05,
      "loss": 2.7809,
      "step": 4553
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.943949818611145,
      "learning_rate": 1.9861014914714024e-05,
      "loss": 2.9039,
      "step": 4554
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.118478536605835,
      "learning_rate": 1.9860946498738706e-05,
      "loss": 2.5573,
      "step": 4555
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9497480392456055,
      "learning_rate": 1.98608780660464e-05,
      "loss": 2.8595,
      "step": 4556
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0326765775680542,
      "learning_rate": 1.9860809616637234e-05,
      "loss": 2.68,
      "step": 4557
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0335273742675781,
      "learning_rate": 1.9860741150511314e-05,
      "loss": 2.8698,
      "step": 4558
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0058188438415527,
      "learning_rate": 1.986067266766876e-05,
      "loss": 2.6694,
      "step": 4559
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.021842360496521,
      "learning_rate": 1.9860604168109692e-05,
      "loss": 2.7753,
      "step": 4560
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9483190178871155,
      "learning_rate": 1.9860535651834218e-05,
      "loss": 2.7375,
      "step": 4561
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0292844772338867,
      "learning_rate": 1.9860467118842457e-05,
      "loss": 2.7953,
      "step": 4562
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0755494832992554,
      "learning_rate": 1.9860398569134527e-05,
      "loss": 2.5542,
      "step": 4563
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0177407264709473,
      "learning_rate": 1.9860330002710545e-05,
      "loss": 2.7832,
      "step": 4564
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0560835599899292,
      "learning_rate": 1.9860261419570625e-05,
      "loss": 2.8086,
      "step": 4565
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9261502027511597,
      "learning_rate": 1.9860192819714883e-05,
      "loss": 2.6745,
      "step": 4566
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9916227459907532,
      "learning_rate": 1.9860124203143436e-05,
      "loss": 2.5559,
      "step": 4567
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9307297468185425,
      "learning_rate": 1.98600555698564e-05,
      "loss": 2.6285,
      "step": 4568
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9554701447486877,
      "learning_rate": 1.9859986919853896e-05,
      "loss": 2.9605,
      "step": 4569
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9495060443878174,
      "learning_rate": 1.9859918253136032e-05,
      "loss": 2.5662,
      "step": 4570
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9852795600891113,
      "learning_rate": 1.985984956970293e-05,
      "loss": 2.8053,
      "step": 4571
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0235259532928467,
      "learning_rate": 1.9859780869554704e-05,
      "loss": 2.8534,
      "step": 4572
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9545522332191467,
      "learning_rate": 1.9859712152691473e-05,
      "loss": 2.749,
      "step": 4573
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0728600025177002,
      "learning_rate": 1.985964341911335e-05,
      "loss": 2.619,
      "step": 4574
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.2372137308120728,
      "learning_rate": 1.9859574668820455e-05,
      "loss": 2.5709,
      "step": 4575
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0034637451171875,
      "learning_rate": 1.9859505901812905e-05,
      "loss": 2.7907,
      "step": 4576
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9099109768867493,
      "learning_rate": 1.9859437118090816e-05,
      "loss": 2.6112,
      "step": 4577
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9982414841651917,
      "learning_rate": 1.9859368317654297e-05,
      "loss": 2.6857,
      "step": 4578
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9593393802642822,
      "learning_rate": 1.9859299500503476e-05,
      "loss": 2.7456,
      "step": 4579
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.076225996017456,
      "learning_rate": 1.985923066663846e-05,
      "loss": 2.7331,
      "step": 4580
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.1619952917099,
      "learning_rate": 1.9859161816059373e-05,
      "loss": 2.5938,
      "step": 4581
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0608822107315063,
      "learning_rate": 1.985909294876633e-05,
      "loss": 2.7764,
      "step": 4582
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.1265326738357544,
      "learning_rate": 1.9859024064759443e-05,
      "loss": 2.7118,
      "step": 4583
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0377742052078247,
      "learning_rate": 1.9858955164038838e-05,
      "loss": 2.8083,
      "step": 4584
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0598235130310059,
      "learning_rate": 1.9858886246604622e-05,
      "loss": 2.7201,
      "step": 4585
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.582902193069458,
      "learning_rate": 1.985881731245692e-05,
      "loss": 2.6552,
      "step": 4586
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0851842164993286,
      "learning_rate": 1.9858748361595842e-05,
      "loss": 2.8044,
      "step": 4587
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9897516369819641,
      "learning_rate": 1.985867939402151e-05,
      "loss": 2.692,
      "step": 4588
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0115247964859009,
      "learning_rate": 1.985861040973404e-05,
      "loss": 2.6848,
      "step": 4589
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0061546564102173,
      "learning_rate": 1.985854140873354e-05,
      "loss": 2.8415,
      "step": 4590
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9580247402191162,
      "learning_rate": 1.9858472391020142e-05,
      "loss": 2.7677,
      "step": 4591
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0289359092712402,
      "learning_rate": 1.9858403356593953e-05,
      "loss": 2.8113,
      "step": 4592
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0772515535354614,
      "learning_rate": 1.9858334305455096e-05,
      "loss": 2.676,
      "step": 4593
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9181066155433655,
      "learning_rate": 1.9858265237603682e-05,
      "loss": 2.6091,
      "step": 4594
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.997591495513916,
      "learning_rate": 1.9858196153039834e-05,
      "loss": 2.6167,
      "step": 4595
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.498136281967163,
      "learning_rate": 1.985812705176366e-05,
      "loss": 2.7511,
      "step": 4596
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0658283233642578,
      "learning_rate": 1.985805793377529e-05,
      "loss": 2.7426,
      "step": 4597
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9268788695335388,
      "learning_rate": 1.985798879907483e-05,
      "loss": 2.668,
      "step": 4598
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.2185277938842773,
      "learning_rate": 1.98579196476624e-05,
      "loss": 2.7411,
      "step": 4599
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0263502597808838,
      "learning_rate": 1.9857850479538123e-05,
      "loss": 2.7479,
      "step": 4600
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.2850357294082642,
      "learning_rate": 1.985778129470211e-05,
      "loss": 2.5785,
      "step": 4601
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.129297137260437,
      "learning_rate": 1.9857712093154485e-05,
      "loss": 2.8895,
      "step": 4602
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0527918338775635,
      "learning_rate": 1.9857642874895357e-05,
      "loss": 2.7882,
      "step": 4603
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9112884998321533,
      "learning_rate": 1.9857573639924844e-05,
      "loss": 2.8642,
      "step": 4604
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0846000909805298,
      "learning_rate": 1.985750438824307e-05,
      "loss": 2.722,
      "step": 4605
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.085157871246338,
      "learning_rate": 1.9857435119850147e-05,
      "loss": 2.7541,
      "step": 4606
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.2019579410552979,
      "learning_rate": 1.9857365834746197e-05,
      "loss": 2.5677,
      "step": 4607
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9838089942932129,
      "learning_rate": 1.985729653293133e-05,
      "loss": 2.811,
      "step": 4608
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0013346672058105,
      "learning_rate": 1.9857227214405673e-05,
      "loss": 2.5418,
      "step": 4609
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9810295104980469,
      "learning_rate": 1.9857157879169336e-05,
      "loss": 2.7727,
      "step": 4610
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9864104986190796,
      "learning_rate": 1.985708852722244e-05,
      "loss": 2.5879,
      "step": 4611
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9586655497550964,
      "learning_rate": 1.98570191585651e-05,
      "loss": 2.5497,
      "step": 4612
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.113021731376648,
      "learning_rate": 1.985694977319744e-05,
      "loss": 2.7814,
      "step": 4613
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.975827693939209,
      "learning_rate": 1.985688037111957e-05,
      "loss": 2.6038,
      "step": 4614
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0650005340576172,
      "learning_rate": 1.985681095233161e-05,
      "loss": 2.7851,
      "step": 4615
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9026219248771667,
      "learning_rate": 1.985674151683368e-05,
      "loss": 2.7899,
      "step": 4616
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.026302695274353,
      "learning_rate": 1.9856672064625896e-05,
      "loss": 2.6721,
      "step": 4617
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9997550845146179,
      "learning_rate": 1.9856602595708376e-05,
      "loss": 2.635,
      "step": 4618
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.085762858390808,
      "learning_rate": 1.9856533110081234e-05,
      "loss": 2.6593,
      "step": 4619
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9990670680999756,
      "learning_rate": 1.9856463607744596e-05,
      "loss": 2.701,
      "step": 4620
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.091058611869812,
      "learning_rate": 1.985639408869857e-05,
      "loss": 2.6873,
      "step": 4621
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0314420461654663,
      "learning_rate": 1.9856324552943285e-05,
      "loss": 2.902,
      "step": 4622
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0354714393615723,
      "learning_rate": 1.9856255000478853e-05,
      "loss": 2.653,
      "step": 4623
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9984365701675415,
      "learning_rate": 1.9856185431305386e-05,
      "loss": 2.508,
      "step": 4624
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9666648507118225,
      "learning_rate": 1.985611584542301e-05,
      "loss": 2.889,
      "step": 4625
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9804898500442505,
      "learning_rate": 1.9856046242831844e-05,
      "loss": 2.6586,
      "step": 4626
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.2160084247589111,
      "learning_rate": 1.9855976623532e-05,
      "loss": 2.7716,
      "step": 4627
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0731290578842163,
      "learning_rate": 1.9855906987523598e-05,
      "loss": 2.5261,
      "step": 4628
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.2702666521072388,
      "learning_rate": 1.985583733480676e-05,
      "loss": 2.5916,
      "step": 4629
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0124194622039795,
      "learning_rate": 1.98557676653816e-05,
      "loss": 2.7511,
      "step": 4630
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9989257454872131,
      "learning_rate": 1.9855697979248236e-05,
      "loss": 2.7789,
      "step": 4631
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0092686414718628,
      "learning_rate": 1.9855628276406786e-05,
      "loss": 2.7765,
      "step": 4632
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0372726917266846,
      "learning_rate": 1.985555855685737e-05,
      "loss": 2.6395,
      "step": 4633
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0840059518814087,
      "learning_rate": 1.9855488820600105e-05,
      "loss": 2.6182,
      "step": 4634
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0323623418807983,
      "learning_rate": 1.9855419067635115e-05,
      "loss": 2.7818,
      "step": 4635
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9888967871665955,
      "learning_rate": 1.9855349297962508e-05,
      "loss": 2.5558,
      "step": 4636
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0185108184814453,
      "learning_rate": 1.9855279511582406e-05,
      "loss": 2.8099,
      "step": 4637
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9826157689094543,
      "learning_rate": 1.9855209708494933e-05,
      "loss": 2.6454,
      "step": 4638
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.1802384853363037,
      "learning_rate": 1.9855139888700198e-05,
      "loss": 2.7391,
      "step": 4639
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0418791770935059,
      "learning_rate": 1.9855070052198328e-05,
      "loss": 2.5942,
      "step": 4640
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9953499436378479,
      "learning_rate": 1.9855000198989438e-05,
      "loss": 2.7053,
      "step": 4641
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9950467944145203,
      "learning_rate": 1.9854930329073642e-05,
      "loss": 2.7839,
      "step": 4642
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9915024638175964,
      "learning_rate": 1.9854860442451064e-05,
      "loss": 2.8166,
      "step": 4643
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9842850565910339,
      "learning_rate": 1.985479053912182e-05,
      "loss": 2.7611,
      "step": 4644
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0365822315216064,
      "learning_rate": 1.9854720619086033e-05,
      "loss": 2.63,
      "step": 4645
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.944000244140625,
      "learning_rate": 1.9854650682343818e-05,
      "loss": 2.9117,
      "step": 4646
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0879327058792114,
      "learning_rate": 1.9854580728895293e-05,
      "loss": 2.7231,
      "step": 4647
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0105032920837402,
      "learning_rate": 1.9854510758740576e-05,
      "loss": 2.9618,
      "step": 4648
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0571922063827515,
      "learning_rate": 1.9854440771879787e-05,
      "loss": 2.7148,
      "step": 4649
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0029433965682983,
      "learning_rate": 1.9854370768313045e-05,
      "loss": 2.9029,
      "step": 4650
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9886061549186707,
      "learning_rate": 1.9854300748040468e-05,
      "loss": 2.5346,
      "step": 4651
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.954598605632782,
      "learning_rate": 1.9854230711062176e-05,
      "loss": 2.4746,
      "step": 4652
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0513222217559814,
      "learning_rate": 1.9854160657378287e-05,
      "loss": 2.7001,
      "step": 4653
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.099969506263733,
      "learning_rate": 1.9854090586988916e-05,
      "loss": 2.7931,
      "step": 4654
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9740140438079834,
      "learning_rate": 1.9854020499894185e-05,
      "loss": 2.7259,
      "step": 4655
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9765115976333618,
      "learning_rate": 1.9853950396094216e-05,
      "loss": 2.87,
      "step": 4656
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0282365083694458,
      "learning_rate": 1.985388027558912e-05,
      "loss": 2.778,
      "step": 4657
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.1524689197540283,
      "learning_rate": 1.9853810138379027e-05,
      "loss": 2.8755,
      "step": 4658
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9806546568870544,
      "learning_rate": 1.9853739984464046e-05,
      "loss": 3.0335,
      "step": 4659
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9805863499641418,
      "learning_rate": 1.9853669813844297e-05,
      "loss": 2.5578,
      "step": 4660
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.992891788482666,
      "learning_rate": 1.9853599626519906e-05,
      "loss": 2.753,
      "step": 4661
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9435164928436279,
      "learning_rate": 1.9853529422490986e-05,
      "loss": 2.6792,
      "step": 4662
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9855511784553528,
      "learning_rate": 1.9853459201757658e-05,
      "loss": 2.7163,
      "step": 4663
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0144144296646118,
      "learning_rate": 1.985338896432004e-05,
      "loss": 2.6833,
      "step": 4664
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.038296103477478,
      "learning_rate": 1.9853318710178252e-05,
      "loss": 2.823,
      "step": 4665
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0372657775878906,
      "learning_rate": 1.985324843933241e-05,
      "loss": 2.8766,
      "step": 4666
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.3176276683807373,
      "learning_rate": 1.985317815178264e-05,
      "loss": 2.808,
      "step": 4667
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9562565088272095,
      "learning_rate": 1.9853107847529055e-05,
      "loss": 2.6065,
      "step": 4668
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.057617425918579,
      "learning_rate": 1.9853037526571776e-05,
      "loss": 2.7446,
      "step": 4669
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0766170024871826,
      "learning_rate": 1.985296718891092e-05,
      "loss": 2.6362,
      "step": 4670
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.2812225818634033,
      "learning_rate": 1.9852896834546614e-05,
      "loss": 2.481,
      "step": 4671
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.050750970840454,
      "learning_rate": 1.9852826463478968e-05,
      "loss": 2.8338,
      "step": 4672
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9539911150932312,
      "learning_rate": 1.9852756075708107e-05,
      "loss": 2.7933,
      "step": 4673
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.673788070678711,
      "learning_rate": 1.9852685671234145e-05,
      "loss": 2.7893,
      "step": 4674
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.843972384929657,
      "learning_rate": 1.985261525005721e-05,
      "loss": 2.6541,
      "step": 4675
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0766199827194214,
      "learning_rate": 1.9852544812177413e-05,
      "loss": 2.7657,
      "step": 4676
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.1148673295974731,
      "learning_rate": 1.9852474357594876e-05,
      "loss": 2.7887,
      "step": 4677
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9809386730194092,
      "learning_rate": 1.985240388630972e-05,
      "loss": 2.6888,
      "step": 4678
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0484652519226074,
      "learning_rate": 1.9852333398322065e-05,
      "loss": 2.6176,
      "step": 4679
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9368278384208679,
      "learning_rate": 1.9852262893632027e-05,
      "loss": 2.7898,
      "step": 4680
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9405116438865662,
      "learning_rate": 1.985219237223973e-05,
      "loss": 2.6074,
      "step": 4681
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.025683045387268,
      "learning_rate": 1.9852121834145287e-05,
      "loss": 2.6598,
      "step": 4682
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0518923997879028,
      "learning_rate": 1.9852051279348825e-05,
      "loss": 2.4085,
      "step": 4683
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9966092705726624,
      "learning_rate": 1.985198070785046e-05,
      "loss": 2.813,
      "step": 4684
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9864409565925598,
      "learning_rate": 1.985191011965031e-05,
      "loss": 2.7742,
      "step": 4685
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9329318404197693,
      "learning_rate": 1.98518395147485e-05,
      "loss": 2.6746,
      "step": 4686
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.1008660793304443,
      "learning_rate": 1.9851768893145144e-05,
      "loss": 2.6802,
      "step": 4687
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.007663607597351,
      "learning_rate": 1.9851698254840365e-05,
      "loss": 2.7338,
      "step": 4688
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.074948787689209,
      "learning_rate": 1.9851627599834278e-05,
      "loss": 2.6877,
      "step": 4689
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9371920824050903,
      "learning_rate": 1.985155692812701e-05,
      "loss": 2.6693,
      "step": 4690
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9393095374107361,
      "learning_rate": 1.985148623971868e-05,
      "loss": 2.6842,
      "step": 4691
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9985430836677551,
      "learning_rate": 1.98514155346094e-05,
      "loss": 2.7146,
      "step": 4692
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9553138017654419,
      "learning_rate": 1.98513448127993e-05,
      "loss": 2.592,
      "step": 4693
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.1145509481430054,
      "learning_rate": 1.985127407428849e-05,
      "loss": 2.7891,
      "step": 4694
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9756449460983276,
      "learning_rate": 1.9851203319077097e-05,
      "loss": 2.7848,
      "step": 4695
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9744873046875,
      "learning_rate": 1.9851132547165237e-05,
      "loss": 2.7558,
      "step": 4696
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.99759840965271,
      "learning_rate": 1.9851061758553035e-05,
      "loss": 2.5232,
      "step": 4697
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.3034741878509521,
      "learning_rate": 1.9850990953240606e-05,
      "loss": 2.5749,
      "step": 4698
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0605823993682861,
      "learning_rate": 1.985092013122807e-05,
      "loss": 2.5103,
      "step": 4699
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0149425268173218,
      "learning_rate": 1.985084929251555e-05,
      "loss": 2.6473,
      "step": 4700
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9329389929771423,
      "learning_rate": 1.985077843710317e-05,
      "loss": 2.7467,
      "step": 4701
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.1731916666030884,
      "learning_rate": 1.9850707564991042e-05,
      "loss": 2.7685,
      "step": 4702
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9538072943687439,
      "learning_rate": 1.9850636676179288e-05,
      "loss": 2.7692,
      "step": 4703
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9028840661048889,
      "learning_rate": 1.9850565770668028e-05,
      "loss": 2.7563,
      "step": 4704
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0458152294158936,
      "learning_rate": 1.985049484845739e-05,
      "loss": 2.6922,
      "step": 4705
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0991841554641724,
      "learning_rate": 1.985042390954748e-05,
      "loss": 2.7705,
      "step": 4706
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0702296495437622,
      "learning_rate": 1.985035295393843e-05,
      "loss": 2.8361,
      "step": 4707
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9661253094673157,
      "learning_rate": 1.985028198163036e-05,
      "loss": 2.7619,
      "step": 4708
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0632721185684204,
      "learning_rate": 1.985021099262338e-05,
      "loss": 2.7755,
      "step": 4709
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9852749705314636,
      "learning_rate": 1.9850139986917618e-05,
      "loss": 2.7647,
      "step": 4710
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0438090562820435,
      "learning_rate": 1.9850068964513196e-05,
      "loss": 2.472,
      "step": 4711
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0318838357925415,
      "learning_rate": 1.984999792541023e-05,
      "loss": 2.7792,
      "step": 4712
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9984896183013916,
      "learning_rate": 1.9849926869608844e-05,
      "loss": 2.906,
      "step": 4713
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0507020950317383,
      "learning_rate": 1.984985579710916e-05,
      "loss": 2.8087,
      "step": 4714
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.069618582725525,
      "learning_rate": 1.984978470791129e-05,
      "loss": 2.6292,
      "step": 4715
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.1465049982070923,
      "learning_rate": 1.984971360201536e-05,
      "loss": 2.7048,
      "step": 4716
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.031553864479065,
      "learning_rate": 1.984964247942149e-05,
      "loss": 3.0577,
      "step": 4717
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.1025669574737549,
      "learning_rate": 1.9849571340129805e-05,
      "loss": 2.6557,
      "step": 4718
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9973154067993164,
      "learning_rate": 1.9849500184140416e-05,
      "loss": 2.6208,
      "step": 4719
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9545770883560181,
      "learning_rate": 1.9849429011453454e-05,
      "loss": 2.8697,
      "step": 4720
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9435945749282837,
      "learning_rate": 1.9849357822069032e-05,
      "loss": 2.8358,
      "step": 4721
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.000124216079712,
      "learning_rate": 1.9849286615987277e-05,
      "loss": 2.8712,
      "step": 4722
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0331372022628784,
      "learning_rate": 1.98492153932083e-05,
      "loss": 2.8434,
      "step": 4723
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.2921143770217896,
      "learning_rate": 1.9849144153732234e-05,
      "loss": 2.9975,
      "step": 4724
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.981240451335907,
      "learning_rate": 1.984907289755919e-05,
      "loss": 2.8636,
      "step": 4725
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0012092590332031,
      "learning_rate": 1.984900162468929e-05,
      "loss": 2.7337,
      "step": 4726
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.087003469467163,
      "learning_rate": 1.9848930335122664e-05,
      "loss": 2.8816,
      "step": 4727
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0640403032302856,
      "learning_rate": 1.9848859028859426e-05,
      "loss": 2.5231,
      "step": 4728
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9767740964889526,
      "learning_rate": 1.984878770589969e-05,
      "loss": 2.7572,
      "step": 4729
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.971817672252655,
      "learning_rate": 1.9848716366243588e-05,
      "loss": 2.6394,
      "step": 4730
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9848594069480896,
      "learning_rate": 1.9848645009891235e-05,
      "loss": 2.768,
      "step": 4731
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.075103521347046,
      "learning_rate": 1.9848573636842756e-05,
      "loss": 2.6758,
      "step": 4732
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9073885083198547,
      "learning_rate": 1.984850224709827e-05,
      "loss": 2.8356,
      "step": 4733
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9701099991798401,
      "learning_rate": 1.9848430840657896e-05,
      "loss": 2.6802,
      "step": 4734
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9441078901290894,
      "learning_rate": 1.984835941752176e-05,
      "loss": 2.7089,
      "step": 4735
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9863768815994263,
      "learning_rate": 1.984828797768998e-05,
      "loss": 2.7464,
      "step": 4736
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.6514263153076172,
      "learning_rate": 1.9848216521162677e-05,
      "loss": 2.8124,
      "step": 4737
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9890356063842773,
      "learning_rate": 1.9848145047939965e-05,
      "loss": 2.8795,
      "step": 4738
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.8654012084007263,
      "learning_rate": 1.9848073558021982e-05,
      "loss": 2.6759,
      "step": 4739
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9078537225723267,
      "learning_rate": 1.9848002051408834e-05,
      "loss": 2.7064,
      "step": 4740
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9630364179611206,
      "learning_rate": 1.984793052810065e-05,
      "loss": 2.7125,
      "step": 4741
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9847505688667297,
      "learning_rate": 1.984785898809755e-05,
      "loss": 2.816,
      "step": 4742
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9199023246765137,
      "learning_rate": 1.984778743139965e-05,
      "loss": 2.9102,
      "step": 4743
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9336037039756775,
      "learning_rate": 1.984771585800708e-05,
      "loss": 2.6173,
      "step": 4744
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.047658920288086,
      "learning_rate": 1.9847644267919953e-05,
      "loss": 2.7961,
      "step": 4745
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.041836142539978,
      "learning_rate": 1.98475726611384e-05,
      "loss": 2.8762,
      "step": 4746
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9966074824333191,
      "learning_rate": 1.9847501037662533e-05,
      "loss": 2.6987,
      "step": 4747
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9976739287376404,
      "learning_rate": 1.9847429397492476e-05,
      "loss": 2.8162,
      "step": 4748
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.132573127746582,
      "learning_rate": 1.9847357740628355e-05,
      "loss": 2.6808,
      "step": 4749
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0135527849197388,
      "learning_rate": 1.984728606707029e-05,
      "loss": 2.798,
      "step": 4750
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9971466064453125,
      "learning_rate": 1.9847214376818392e-05,
      "loss": 2.8458,
      "step": 4751
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9415978789329529,
      "learning_rate": 1.9847142669872798e-05,
      "loss": 3.0022,
      "step": 4752
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9806017875671387,
      "learning_rate": 1.9847070946233624e-05,
      "loss": 2.622,
      "step": 4753
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.8974464535713196,
      "learning_rate": 1.9846999205900986e-05,
      "loss": 2.6325,
      "step": 4754
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.1339582204818726,
      "learning_rate": 1.9846927448875012e-05,
      "loss": 2.8881,
      "step": 4755
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.055812954902649,
      "learning_rate": 1.9846855675155822e-05,
      "loss": 2.6193,
      "step": 4756
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9410881400108337,
      "learning_rate": 1.9846783884743533e-05,
      "loss": 2.6811,
      "step": 4757
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9617791175842285,
      "learning_rate": 1.9846712077638278e-05,
      "loss": 2.7738,
      "step": 4758
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.032226324081421,
      "learning_rate": 1.984664025384017e-05,
      "loss": 2.8316,
      "step": 4759
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0101580619812012,
      "learning_rate": 1.9846568413349327e-05,
      "loss": 2.6446,
      "step": 4760
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0328069925308228,
      "learning_rate": 1.9846496556165878e-05,
      "loss": 2.7721,
      "step": 4761
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9596999287605286,
      "learning_rate": 1.9846424682289948e-05,
      "loss": 2.6257,
      "step": 4762
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0165013074874878,
      "learning_rate": 1.984635279172165e-05,
      "loss": 2.7471,
      "step": 4763
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9737491011619568,
      "learning_rate": 1.984628088446111e-05,
      "loss": 2.606,
      "step": 4764
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.062925100326538,
      "learning_rate": 1.9846208960508452e-05,
      "loss": 2.7411,
      "step": 4765
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.8836890459060669,
      "learning_rate": 1.9846137019863794e-05,
      "loss": 2.8076,
      "step": 4766
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0132300853729248,
      "learning_rate": 1.9846065062527258e-05,
      "loss": 2.739,
      "step": 4767
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.051145076751709,
      "learning_rate": 1.9845993088498968e-05,
      "loss": 2.6571,
      "step": 4768
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9845038652420044,
      "learning_rate": 1.9845921097779047e-05,
      "loss": 2.6326,
      "step": 4769
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.037363886833191,
      "learning_rate": 1.9845849090367614e-05,
      "loss": 2.7422,
      "step": 4770
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.020555019378662,
      "learning_rate": 1.9845777066264796e-05,
      "loss": 2.8984,
      "step": 4771
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.047468662261963,
      "learning_rate": 1.984570502547071e-05,
      "loss": 2.6921,
      "step": 4772
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.051213026046753,
      "learning_rate": 1.9845632967985477e-05,
      "loss": 2.7039,
      "step": 4773
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9962530136108398,
      "learning_rate": 1.9845560893809226e-05,
      "loss": 2.5946,
      "step": 4774
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.4683539867401123,
      "learning_rate": 1.9845488802942076e-05,
      "loss": 2.9499,
      "step": 4775
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9391481876373291,
      "learning_rate": 1.9845416695384145e-05,
      "loss": 2.8071,
      "step": 4776
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0351024866104126,
      "learning_rate": 1.984534457113556e-05,
      "loss": 2.4749,
      "step": 4777
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9716697931289673,
      "learning_rate": 1.984527243019644e-05,
      "loss": 2.6272,
      "step": 4778
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9806167483329773,
      "learning_rate": 1.9845200272566913e-05,
      "loss": 2.5916,
      "step": 4779
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0377635955810547,
      "learning_rate": 1.9845128098247098e-05,
      "loss": 2.9676,
      "step": 4780
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9285381436347961,
      "learning_rate": 1.9845055907237113e-05,
      "loss": 2.5794,
      "step": 4781
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9458692669868469,
      "learning_rate": 1.9844983699537085e-05,
      "loss": 2.7406,
      "step": 4782
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0202869176864624,
      "learning_rate": 1.9844911475147137e-05,
      "loss": 2.8598,
      "step": 4783
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9412074089050293,
      "learning_rate": 1.984483923406739e-05,
      "loss": 2.6355,
      "step": 4784
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.1293699741363525,
      "learning_rate": 1.9844766976297968e-05,
      "loss": 2.5059,
      "step": 4785
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0134042501449585,
      "learning_rate": 1.9844694701838995e-05,
      "loss": 2.728,
      "step": 4786
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9788379669189453,
      "learning_rate": 1.9844622410690585e-05,
      "loss": 2.843,
      "step": 4787
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0373188257217407,
      "learning_rate": 1.9844550102852866e-05,
      "loss": 2.9269,
      "step": 4788
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0553702116012573,
      "learning_rate": 1.9844477778325965e-05,
      "loss": 2.8347,
      "step": 4789
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9783669114112854,
      "learning_rate": 1.9844405437109997e-05,
      "loss": 2.4959,
      "step": 4790
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.966783344745636,
      "learning_rate": 1.9844333079205088e-05,
      "loss": 2.6632,
      "step": 4791
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0975141525268555,
      "learning_rate": 1.9844260704611363e-05,
      "loss": 2.7907,
      "step": 4792
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.984013020992279,
      "learning_rate": 1.9844188313328942e-05,
      "loss": 2.751,
      "step": 4793
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9721426963806152,
      "learning_rate": 1.984411590535795e-05,
      "loss": 2.3986,
      "step": 4794
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.2678823471069336,
      "learning_rate": 1.9844043480698505e-05,
      "loss": 2.7241,
      "step": 4795
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0289695262908936,
      "learning_rate": 1.9843971039350734e-05,
      "loss": 2.6195,
      "step": 4796
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.002336025238037,
      "learning_rate": 1.9843898581314757e-05,
      "loss": 2.7095,
      "step": 4797
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9584819674491882,
      "learning_rate": 1.9843826106590696e-05,
      "loss": 2.7748,
      "step": 4798
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0483782291412354,
      "learning_rate": 1.9843753615178684e-05,
      "loss": 2.726,
      "step": 4799
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9303570985794067,
      "learning_rate": 1.9843681107078828e-05,
      "loss": 2.5848,
      "step": 4800
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9611640572547913,
      "learning_rate": 1.9843608582291264e-05,
      "loss": 2.751,
      "step": 4801
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.1280913352966309,
      "learning_rate": 1.9843536040816108e-05,
      "loss": 2.6771,
      "step": 4802
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.972459614276886,
      "learning_rate": 1.9843463482653488e-05,
      "loss": 2.6039,
      "step": 4803
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9260814189910889,
      "learning_rate": 1.9843390907803522e-05,
      "loss": 2.8492,
      "step": 4804
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.948721170425415,
      "learning_rate": 1.984331831626633e-05,
      "loss": 2.7704,
      "step": 4805
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.98382967710495,
      "learning_rate": 1.9843245708042047e-05,
      "loss": 2.6622,
      "step": 4806
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.989652156829834,
      "learning_rate": 1.9843173083130788e-05,
      "loss": 2.7164,
      "step": 4807
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9601844549179077,
      "learning_rate": 1.9843100441532674e-05,
      "loss": 2.8183,
      "step": 4808
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0577269792556763,
      "learning_rate": 1.9843027783247836e-05,
      "loss": 2.6504,
      "step": 4809
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9978649020195007,
      "learning_rate": 1.984295510827639e-05,
      "loss": 2.8925,
      "step": 4810
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.1058571338653564,
      "learning_rate": 1.984288241661846e-05,
      "loss": 2.6148,
      "step": 4811
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0344616174697876,
      "learning_rate": 1.9842809708274172e-05,
      "loss": 2.6905,
      "step": 4812
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.936384379863739,
      "learning_rate": 1.984273698324365e-05,
      "loss": 2.7271,
      "step": 4813
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9807224869728088,
      "learning_rate": 1.9842664241527014e-05,
      "loss": 2.9058,
      "step": 4814
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9870253801345825,
      "learning_rate": 1.984259148312439e-05,
      "loss": 2.6494,
      "step": 4815
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.042589545249939,
      "learning_rate": 1.98425187080359e-05,
      "loss": 2.9102,
      "step": 4816
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0198121070861816,
      "learning_rate": 1.9842445916261666e-05,
      "loss": 2.6655,
      "step": 4817
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0803593397140503,
      "learning_rate": 1.9842373107801817e-05,
      "loss": 2.7855,
      "step": 4818
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9746502637863159,
      "learning_rate": 1.9842300282656465e-05,
      "loss": 2.9292,
      "step": 4819
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0452605485916138,
      "learning_rate": 1.9842227440825746e-05,
      "loss": 2.6666,
      "step": 4820
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.1175987720489502,
      "learning_rate": 1.984215458230978e-05,
      "loss": 2.5035,
      "step": 4821
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.1391117572784424,
      "learning_rate": 1.9842081707108684e-05,
      "loss": 2.86,
      "step": 4822
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0854172706604004,
      "learning_rate": 1.984200881522259e-05,
      "loss": 2.9878,
      "step": 4823
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.057140588760376,
      "learning_rate": 1.984193590665162e-05,
      "loss": 2.6797,
      "step": 4824
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9316425919532776,
      "learning_rate": 1.984186298139589e-05,
      "loss": 2.8377,
      "step": 4825
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9299606680870056,
      "learning_rate": 1.9841790039455533e-05,
      "loss": 2.8093,
      "step": 4826
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9514284729957581,
      "learning_rate": 1.984171708083067e-05,
      "loss": 2.5935,
      "step": 4827
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0427838563919067,
      "learning_rate": 1.984164410552142e-05,
      "loss": 2.7903,
      "step": 4828
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.127830982208252,
      "learning_rate": 1.984157111352791e-05,
      "loss": 2.7136,
      "step": 4829
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9373067021369934,
      "learning_rate": 1.984149810485027e-05,
      "loss": 2.5704,
      "step": 4830
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0362809896469116,
      "learning_rate": 1.9841425079488612e-05,
      "loss": 2.5475,
      "step": 4831
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9138314127922058,
      "learning_rate": 1.9841352037443066e-05,
      "loss": 2.565,
      "step": 4832
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9942381381988525,
      "learning_rate": 1.9841278978713756e-05,
      "loss": 2.5278,
      "step": 4833
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.021531581878662,
      "learning_rate": 1.9841205903300807e-05,
      "loss": 2.9408,
      "step": 4834
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.084375262260437,
      "learning_rate": 1.984113281120434e-05,
      "loss": 2.667,
      "step": 4835
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0297338962554932,
      "learning_rate": 1.9841059702424477e-05,
      "loss": 2.7986,
      "step": 4836
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9240823984146118,
      "learning_rate": 1.984098657696135e-05,
      "loss": 2.592,
      "step": 4837
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0155631303787231,
      "learning_rate": 1.9840913434815077e-05,
      "loss": 2.8911,
      "step": 4838
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0507737398147583,
      "learning_rate": 1.984084027598578e-05,
      "loss": 2.6238,
      "step": 4839
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9617756009101868,
      "learning_rate": 1.984076710047359e-05,
      "loss": 2.7733,
      "step": 4840
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0674641132354736,
      "learning_rate": 1.9840693908278626e-05,
      "loss": 2.462,
      "step": 4841
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0909851789474487,
      "learning_rate": 1.9840620699401013e-05,
      "loss": 2.5729,
      "step": 4842
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.042211890220642,
      "learning_rate": 1.9840547473840872e-05,
      "loss": 2.8174,
      "step": 4843
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0061585903167725,
      "learning_rate": 1.9840474231598333e-05,
      "loss": 2.7126,
      "step": 4844
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.079067587852478,
      "learning_rate": 1.9840400972673518e-05,
      "loss": 2.4743,
      "step": 4845
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0149143934249878,
      "learning_rate": 1.984032769706655e-05,
      "loss": 2.8416,
      "step": 4846
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0258054733276367,
      "learning_rate": 1.9840254404777555e-05,
      "loss": 2.5841,
      "step": 4847
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9530844688415527,
      "learning_rate": 1.9840181095806655e-05,
      "loss": 2.7273,
      "step": 4848
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9868733286857605,
      "learning_rate": 1.9840107770153976e-05,
      "loss": 2.5774,
      "step": 4849
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9900794625282288,
      "learning_rate": 1.984003442781964e-05,
      "loss": 2.8408,
      "step": 4850
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.033900260925293,
      "learning_rate": 1.9839961068803775e-05,
      "loss": 2.6353,
      "step": 4851
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0778846740722656,
      "learning_rate": 1.9839887693106506e-05,
      "loss": 2.6906,
      "step": 4852
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9809041619300842,
      "learning_rate": 1.983981430072795e-05,
      "loss": 2.8601,
      "step": 4853
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9733067154884338,
      "learning_rate": 1.9839740891668238e-05,
      "loss": 2.7265,
      "step": 4854
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0065187215805054,
      "learning_rate": 1.9839667465927493e-05,
      "loss": 2.6472,
      "step": 4855
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.1053229570388794,
      "learning_rate": 1.983959402350584e-05,
      "loss": 2.7997,
      "step": 4856
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0951789617538452,
      "learning_rate": 1.9839520564403403e-05,
      "loss": 2.6644,
      "step": 4857
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0913223028182983,
      "learning_rate": 1.9839447088620304e-05,
      "loss": 2.8011,
      "step": 4858
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0880461931228638,
      "learning_rate": 1.9839373596156674e-05,
      "loss": 2.5874,
      "step": 4859
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.95207679271698,
      "learning_rate": 1.983930008701263e-05,
      "loss": 2.7332,
      "step": 4860
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0991787910461426,
      "learning_rate": 1.98392265611883e-05,
      "loss": 2.6883,
      "step": 4861
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0477619171142578,
      "learning_rate": 1.9839153018683808e-05,
      "loss": 2.6619,
      "step": 4862
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9534064531326294,
      "learning_rate": 1.983907945949928e-05,
      "loss": 2.877,
      "step": 4863
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9146372675895691,
      "learning_rate": 1.983900588363484e-05,
      "loss": 2.6037,
      "step": 4864
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.015397310256958,
      "learning_rate": 1.9838932291090616e-05,
      "loss": 2.7789,
      "step": 4865
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.264678955078125,
      "learning_rate": 1.983885868186673e-05,
      "loss": 2.6538,
      "step": 4866
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9553960561752319,
      "learning_rate": 1.98387850559633e-05,
      "loss": 2.5964,
      "step": 4867
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9463881254196167,
      "learning_rate": 1.983871141338046e-05,
      "loss": 2.7826,
      "step": 4868
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0220807790756226,
      "learning_rate": 1.9838637754118334e-05,
      "loss": 2.7391,
      "step": 4869
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.032636046409607,
      "learning_rate": 1.9838564078177043e-05,
      "loss": 2.6384,
      "step": 4870
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.063520073890686,
      "learning_rate": 1.9838490385556715e-05,
      "loss": 2.8747,
      "step": 4871
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0109553337097168,
      "learning_rate": 1.983841667625747e-05,
      "loss": 2.5859,
      "step": 4872
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9729530215263367,
      "learning_rate": 1.9838342950279444e-05,
      "loss": 2.5739,
      "step": 4873
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0029240846633911,
      "learning_rate": 1.9838269207622745e-05,
      "loss": 2.9061,
      "step": 4874
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9970863461494446,
      "learning_rate": 1.9838195448287517e-05,
      "loss": 2.4872,
      "step": 4875
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.008711814880371,
      "learning_rate": 1.9838121672273872e-05,
      "loss": 2.8142,
      "step": 4876
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0714521408081055,
      "learning_rate": 1.9838047879581937e-05,
      "loss": 2.7644,
      "step": 4877
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0118343830108643,
      "learning_rate": 1.983797407021184e-05,
      "loss": 2.843,
      "step": 4878
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9557441473007202,
      "learning_rate": 1.9837900244163703e-05,
      "loss": 2.6591,
      "step": 4879
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.038280963897705,
      "learning_rate": 1.9837826401437658e-05,
      "loss": 2.8008,
      "step": 4880
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0345572233200073,
      "learning_rate": 1.9837752542033822e-05,
      "loss": 2.5263,
      "step": 4881
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.5045149326324463,
      "learning_rate": 1.9837678665952324e-05,
      "loss": 2.565,
      "step": 4882
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9957680106163025,
      "learning_rate": 1.983760477319329e-05,
      "loss": 2.6783,
      "step": 4883
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.017886757850647,
      "learning_rate": 1.9837530863756843e-05,
      "loss": 2.6247,
      "step": 4884
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0208122730255127,
      "learning_rate": 1.9837456937643108e-05,
      "loss": 2.8888,
      "step": 4885
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9244347214698792,
      "learning_rate": 1.9837382994852216e-05,
      "loss": 2.6438,
      "step": 4886
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9892544746398926,
      "learning_rate": 1.983730903538428e-05,
      "loss": 2.7963,
      "step": 4887
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9876185655593872,
      "learning_rate": 1.983723505923944e-05,
      "loss": 2.6133,
      "step": 4888
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0413132905960083,
      "learning_rate": 1.9837161066417816e-05,
      "loss": 2.7092,
      "step": 4889
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0241245031356812,
      "learning_rate": 1.9837087056919528e-05,
      "loss": 2.7458,
      "step": 4890
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.082395076751709,
      "learning_rate": 1.983701303074471e-05,
      "loss": 2.602,
      "step": 4891
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9464614391326904,
      "learning_rate": 1.9836938987893476e-05,
      "loss": 2.8056,
      "step": 4892
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0681946277618408,
      "learning_rate": 1.9836864928365967e-05,
      "loss": 2.9207,
      "step": 4893
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.1177271604537964,
      "learning_rate": 1.9836790852162296e-05,
      "loss": 2.7481,
      "step": 4894
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9800024628639221,
      "learning_rate": 1.9836716759282594e-05,
      "loss": 2.7359,
      "step": 4895
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9668319821357727,
      "learning_rate": 1.9836642649726984e-05,
      "loss": 2.8165,
      "step": 4896
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.045298457145691,
      "learning_rate": 1.9836568523495592e-05,
      "loss": 2.9545,
      "step": 4897
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0055979490280151,
      "learning_rate": 1.9836494380588548e-05,
      "loss": 2.6372,
      "step": 4898
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9711634516716003,
      "learning_rate": 1.9836420221005973e-05,
      "loss": 2.5618,
      "step": 4899
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0081924200057983,
      "learning_rate": 1.9836346044747996e-05,
      "loss": 2.6553,
      "step": 4900
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9238929748535156,
      "learning_rate": 1.983627185181474e-05,
      "loss": 2.5338,
      "step": 4901
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9606505036354065,
      "learning_rate": 1.983619764220633e-05,
      "loss": 2.752,
      "step": 4902
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0148011445999146,
      "learning_rate": 1.9836123415922895e-05,
      "loss": 2.9466,
      "step": 4903
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9829723834991455,
      "learning_rate": 1.983604917296456e-05,
      "loss": 2.8034,
      "step": 4904
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0408743619918823,
      "learning_rate": 1.9835974913331446e-05,
      "loss": 2.6481,
      "step": 4905
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9736762046813965,
      "learning_rate": 1.983590063702369e-05,
      "loss": 2.6372,
      "step": 4906
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0685063600540161,
      "learning_rate": 1.9835826344041407e-05,
      "loss": 2.8388,
      "step": 4907
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9611672163009644,
      "learning_rate": 1.9835752034384726e-05,
      "loss": 2.6754,
      "step": 4908
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0887451171875,
      "learning_rate": 1.9835677708053774e-05,
      "loss": 2.7218,
      "step": 4909
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0492744445800781,
      "learning_rate": 1.9835603365048678e-05,
      "loss": 3.005,
      "step": 4910
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.134137749671936,
      "learning_rate": 1.983552900536956e-05,
      "loss": 2.7036,
      "step": 4911
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9420711398124695,
      "learning_rate": 1.9835454629016558e-05,
      "loss": 2.6517,
      "step": 4912
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.1960159540176392,
      "learning_rate": 1.9835380235989785e-05,
      "loss": 2.7836,
      "step": 4913
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9992996454238892,
      "learning_rate": 1.9835305826289364e-05,
      "loss": 2.6798,
      "step": 4914
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0582785606384277,
      "learning_rate": 1.9835231399915436e-05,
      "loss": 2.6949,
      "step": 4915
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0285298824310303,
      "learning_rate": 1.983515695686812e-05,
      "loss": 2.7607,
      "step": 4916
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.028032898902893,
      "learning_rate": 1.9835082497147537e-05,
      "loss": 2.7136,
      "step": 4917
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.117543339729309,
      "learning_rate": 1.9835008020753823e-05,
      "loss": 2.8082,
      "step": 4918
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9807488322257996,
      "learning_rate": 1.9834933527687095e-05,
      "loss": 2.6854,
      "step": 4919
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.042794942855835,
      "learning_rate": 1.9834859017947483e-05,
      "loss": 2.7363,
      "step": 4920
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0563310384750366,
      "learning_rate": 1.9834784491535118e-05,
      "loss": 2.7208,
      "step": 4921
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.8984920382499695,
      "learning_rate": 1.983470994845012e-05,
      "loss": 2.6337,
      "step": 4922
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9588509202003479,
      "learning_rate": 1.983463538869262e-05,
      "loss": 2.5498,
      "step": 4923
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9959774613380432,
      "learning_rate": 1.9834560812262738e-05,
      "loss": 2.8835,
      "step": 4924
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9904100298881531,
      "learning_rate": 1.9834486219160603e-05,
      "loss": 2.6898,
      "step": 4925
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.2141485214233398,
      "learning_rate": 1.983441160938635e-05,
      "loss": 2.8199,
      "step": 4926
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.040893793106079,
      "learning_rate": 1.983433698294009e-05,
      "loss": 2.6145,
      "step": 4927
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9882054328918457,
      "learning_rate": 1.9834262339821965e-05,
      "loss": 2.5918,
      "step": 4928
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9992101192474365,
      "learning_rate": 1.9834187680032092e-05,
      "loss": 2.7167,
      "step": 4929
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0898040533065796,
      "learning_rate": 1.98341130035706e-05,
      "loss": 2.7173,
      "step": 4930
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.1177046298980713,
      "learning_rate": 1.9834038310437613e-05,
      "loss": 2.7745,
      "step": 4931
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0775249004364014,
      "learning_rate": 1.9833963600633264e-05,
      "loss": 2.7641,
      "step": 4932
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0567798614501953,
      "learning_rate": 1.9833888874157674e-05,
      "loss": 2.8511,
      "step": 4933
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.2637715339660645,
      "learning_rate": 1.983381413101097e-05,
      "loss": 2.8126,
      "step": 4934
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9908999800682068,
      "learning_rate": 1.9833739371193283e-05,
      "loss": 2.7822,
      "step": 4935
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9442279934883118,
      "learning_rate": 1.9833664594704737e-05,
      "loss": 2.8806,
      "step": 4936
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0775138139724731,
      "learning_rate": 1.9833589801545458e-05,
      "loss": 3.03,
      "step": 4937
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.014561414718628,
      "learning_rate": 1.983351499171557e-05,
      "loss": 2.7834,
      "step": 4938
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.987833559513092,
      "learning_rate": 1.9833440165215204e-05,
      "loss": 2.579,
      "step": 4939
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0305818319320679,
      "learning_rate": 1.983336532204449e-05,
      "loss": 2.7798,
      "step": 4940
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.004631757736206,
      "learning_rate": 1.9833290462203553e-05,
      "loss": 2.7743,
      "step": 4941
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.021939992904663,
      "learning_rate": 1.983321558569251e-05,
      "loss": 2.6426,
      "step": 4942
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.067600965499878,
      "learning_rate": 1.9833140692511503e-05,
      "loss": 2.5696,
      "step": 4943
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9251516461372375,
      "learning_rate": 1.9833065782660648e-05,
      "loss": 2.9232,
      "step": 4944
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.2213984727859497,
      "learning_rate": 1.9832990856140077e-05,
      "loss": 2.6265,
      "step": 4945
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.071508526802063,
      "learning_rate": 1.9832915912949917e-05,
      "loss": 2.5172,
      "step": 4946
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9753742814064026,
      "learning_rate": 1.9832840953090292e-05,
      "loss": 2.4928,
      "step": 4947
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0692239999771118,
      "learning_rate": 1.9832765976561332e-05,
      "loss": 2.742,
      "step": 4948
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9914087653160095,
      "learning_rate": 1.9832690983363164e-05,
      "loss": 2.6504,
      "step": 4949
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9738326072692871,
      "learning_rate": 1.9832615973495913e-05,
      "loss": 2.6009,
      "step": 4950
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0926505327224731,
      "learning_rate": 1.9832540946959707e-05,
      "loss": 2.6969,
      "step": 4951
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9469223618507385,
      "learning_rate": 1.9832465903754676e-05,
      "loss": 2.6565,
      "step": 4952
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9784608483314514,
      "learning_rate": 1.983239084388094e-05,
      "loss": 2.9166,
      "step": 4953
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.141643762588501,
      "learning_rate": 1.983231576733864e-05,
      "loss": 2.7462,
      "step": 4954
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0413856506347656,
      "learning_rate": 1.9832240674127887e-05,
      "loss": 2.5678,
      "step": 4955
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0047529935836792,
      "learning_rate": 1.9832165564248816e-05,
      "loss": 2.8432,
      "step": 4956
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.994509756565094,
      "learning_rate": 1.9832090437701557e-05,
      "loss": 2.4116,
      "step": 4957
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.96318519115448,
      "learning_rate": 1.983201529448623e-05,
      "loss": 2.6883,
      "step": 4958
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0225694179534912,
      "learning_rate": 1.9831940134602973e-05,
      "loss": 2.6861,
      "step": 4959
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.1577320098876953,
      "learning_rate": 1.9831864958051903e-05,
      "loss": 2.7897,
      "step": 4960
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9961601495742798,
      "learning_rate": 1.9831789764833152e-05,
      "loss": 2.567,
      "step": 4961
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0852497816085815,
      "learning_rate": 1.983171455494685e-05,
      "loss": 2.7146,
      "step": 4962
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0210752487182617,
      "learning_rate": 1.9831639328393117e-05,
      "loss": 2.587,
      "step": 4963
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9834575653076172,
      "learning_rate": 1.9831564085172087e-05,
      "loss": 2.8084,
      "step": 4964
    },
    {
      "epoch": 0.06,
      "grad_norm": 4.708198547363281,
      "learning_rate": 1.9831488825283884e-05,
      "loss": 2.8601,
      "step": 4965
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0883445739746094,
      "learning_rate": 1.9831413548728638e-05,
      "loss": 2.7557,
      "step": 4966
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9957861304283142,
      "learning_rate": 1.9831338255506477e-05,
      "loss": 2.6304,
      "step": 4967
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0442323684692383,
      "learning_rate": 1.983126294561753e-05,
      "loss": 2.7079,
      "step": 4968
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.034942626953125,
      "learning_rate": 1.983118761906192e-05,
      "loss": 2.6834,
      "step": 4969
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9423888921737671,
      "learning_rate": 1.9831112275839773e-05,
      "loss": 2.7802,
      "step": 4970
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9987073540687561,
      "learning_rate": 1.9831036915951226e-05,
      "loss": 2.7094,
      "step": 4971
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0656776428222656,
      "learning_rate": 1.98309615393964e-05,
      "loss": 2.6166,
      "step": 4972
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.2028807401657104,
      "learning_rate": 1.9830886146175422e-05,
      "loss": 2.7634,
      "step": 4973
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9149759411811829,
      "learning_rate": 1.983081073628842e-05,
      "loss": 2.6269,
      "step": 4974
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.3226370811462402,
      "learning_rate": 1.983073530973553e-05,
      "loss": 2.6557,
      "step": 4975
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9531660676002502,
      "learning_rate": 1.983065986651687e-05,
      "loss": 2.8304,
      "step": 4976
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0067788362503052,
      "learning_rate": 1.9830584406632575e-05,
      "loss": 2.5318,
      "step": 4977
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0317769050598145,
      "learning_rate": 1.9830508930082764e-05,
      "loss": 2.8096,
      "step": 4978
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.032069444656372,
      "learning_rate": 1.9830433436867572e-05,
      "loss": 2.52,
      "step": 4979
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9957781434059143,
      "learning_rate": 1.9830357926987126e-05,
      "loss": 2.5582,
      "step": 4980
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9422440528869629,
      "learning_rate": 1.9830282400441558e-05,
      "loss": 2.8902,
      "step": 4981
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.972098171710968,
      "learning_rate": 1.983020685723099e-05,
      "loss": 2.7773,
      "step": 4982
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.2472286224365234,
      "learning_rate": 1.9830131297355548e-05,
      "loss": 2.5529,
      "step": 4983
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.2006196975708008,
      "learning_rate": 1.9830055720815364e-05,
      "loss": 2.7397,
      "step": 4984
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0170444250106812,
      "learning_rate": 1.9829980127610565e-05,
      "loss": 2.8816,
      "step": 4985
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0433197021484375,
      "learning_rate": 1.982990451774128e-05,
      "loss": 2.8214,
      "step": 4986
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0793912410736084,
      "learning_rate": 1.9829828891207638e-05,
      "loss": 2.6404,
      "step": 4987
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9882888197898865,
      "learning_rate": 1.9829753248009766e-05,
      "loss": 2.7015,
      "step": 4988
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0689399242401123,
      "learning_rate": 1.9829677588147794e-05,
      "loss": 2.8751,
      "step": 4989
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0649747848510742,
      "learning_rate": 1.982960191162185e-05,
      "loss": 2.8559,
      "step": 4990
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0747694969177246,
      "learning_rate": 1.982952621843206e-05,
      "loss": 2.5894,
      "step": 4991
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0470069646835327,
      "learning_rate": 1.9829450508578548e-05,
      "loss": 2.6186,
      "step": 4992
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.950950562953949,
      "learning_rate": 1.9829374782061454e-05,
      "loss": 2.6784,
      "step": 4993
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0067181587219238,
      "learning_rate": 1.9829299038880897e-05,
      "loss": 2.7958,
      "step": 4994
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0764321088790894,
      "learning_rate": 1.982922327903701e-05,
      "loss": 2.5327,
      "step": 4995
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9070234894752502,
      "learning_rate": 1.982914750252992e-05,
      "loss": 2.6539,
      "step": 4996
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0472179651260376,
      "learning_rate": 1.9829071709359756e-05,
      "loss": 2.7923,
      "step": 4997
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0613062381744385,
      "learning_rate": 1.9828995899526648e-05,
      "loss": 2.7373,
      "step": 4998
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0157504081726074,
      "learning_rate": 1.9828920073030717e-05,
      "loss": 2.8356,
      "step": 4999
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.10511314868927,
      "learning_rate": 1.9828844229872103e-05,
      "loss": 2.7339,
      "step": 5000
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9632705450057983,
      "learning_rate": 1.9828768370050922e-05,
      "loss": 2.7939,
      "step": 5001
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0379146337509155,
      "learning_rate": 1.9828692493567315e-05,
      "loss": 2.7853,
      "step": 5002
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.1682926416397095,
      "learning_rate": 1.98286166004214e-05,
      "loss": 2.6789,
      "step": 5003
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9634224772453308,
      "learning_rate": 1.9828540690613312e-05,
      "loss": 2.73,
      "step": 5004
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9810112118721008,
      "learning_rate": 1.9828464764143177e-05,
      "loss": 2.6188,
      "step": 5005
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0759556293487549,
      "learning_rate": 1.9828388821011128e-05,
      "loss": 2.6485,
      "step": 5006
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0942591428756714,
      "learning_rate": 1.982831286121729e-05,
      "loss": 2.7084,
      "step": 5007
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.023270845413208,
      "learning_rate": 1.982823688476179e-05,
      "loss": 2.6648,
      "step": 5008
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0905689001083374,
      "learning_rate": 1.982816089164476e-05,
      "loss": 2.7611,
      "step": 5009
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9478206038475037,
      "learning_rate": 1.982808488186633e-05,
      "loss": 2.7678,
      "step": 5010
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9860543012619019,
      "learning_rate": 1.9828008855426626e-05,
      "loss": 2.7292,
      "step": 5011
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9260875582695007,
      "learning_rate": 1.982793281232578e-05,
      "loss": 2.6654,
      "step": 5012
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9207059741020203,
      "learning_rate": 1.982785675256391e-05,
      "loss": 2.7273,
      "step": 5013
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.1302903890609741,
      "learning_rate": 1.9827780676141166e-05,
      "loss": 2.9002,
      "step": 5014
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.010891318321228,
      "learning_rate": 1.9827704583057654e-05,
      "loss": 2.6838,
      "step": 5015
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0146464109420776,
      "learning_rate": 1.982762847331352e-05,
      "loss": 2.7947,
      "step": 5016
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0685096979141235,
      "learning_rate": 1.982755234690888e-05,
      "loss": 2.6849,
      "step": 5017
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9716241359710693,
      "learning_rate": 1.9827476203843877e-05,
      "loss": 2.6708,
      "step": 5018
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9480912089347839,
      "learning_rate": 1.982740004411863e-05,
      "loss": 2.6639,
      "step": 5019
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0539495944976807,
      "learning_rate": 1.982732386773327e-05,
      "loss": 2.578,
      "step": 5020
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.044910192489624,
      "learning_rate": 1.9827247674687927e-05,
      "loss": 2.7486,
      "step": 5021
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9286842346191406,
      "learning_rate": 1.982717146498273e-05,
      "loss": 2.5794,
      "step": 5022
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9771305322647095,
      "learning_rate": 1.982709523861781e-05,
      "loss": 2.685,
      "step": 5023
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.064756155014038,
      "learning_rate": 1.982701899559329e-05,
      "loss": 2.5759,
      "step": 5024
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0338423252105713,
      "learning_rate": 1.982694273590931e-05,
      "loss": 2.6886,
      "step": 5025
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0466489791870117,
      "learning_rate": 1.982686645956599e-05,
      "loss": 2.727,
      "step": 5026
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0487821102142334,
      "learning_rate": 1.9826790166563463e-05,
      "loss": 2.7366,
      "step": 5027
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0507338047027588,
      "learning_rate": 1.9826713856901857e-05,
      "loss": 2.5552,
      "step": 5028
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.1956396102905273,
      "learning_rate": 1.98266375305813e-05,
      "loss": 2.8417,
      "step": 5029
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9616678357124329,
      "learning_rate": 1.982656118760193e-05,
      "loss": 2.629,
      "step": 5030
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9604141712188721,
      "learning_rate": 1.9826484827963864e-05,
      "loss": 2.7866,
      "step": 5031
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9816555976867676,
      "learning_rate": 1.982640845166724e-05,
      "loss": 2.7155,
      "step": 5032
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9615738391876221,
      "learning_rate": 1.9826332058712185e-05,
      "loss": 2.6792,
      "step": 5033
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0896332263946533,
      "learning_rate": 1.9826255649098827e-05,
      "loss": 2.9784,
      "step": 5034
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0652892589569092,
      "learning_rate": 1.9826179222827296e-05,
      "loss": 2.8235,
      "step": 5035
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.1260849237442017,
      "learning_rate": 1.9826102779897728e-05,
      "loss": 2.5295,
      "step": 5036
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0319393873214722,
      "learning_rate": 1.982602632031024e-05,
      "loss": 2.8067,
      "step": 5037
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0250990390777588,
      "learning_rate": 1.9825949844064974e-05,
      "loss": 2.8502,
      "step": 5038
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9267123341560364,
      "learning_rate": 1.9825873351162054e-05,
      "loss": 2.6219,
      "step": 5039
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0139294862747192,
      "learning_rate": 1.9825796841601604e-05,
      "loss": 2.6421,
      "step": 5040
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9868850111961365,
      "learning_rate": 1.9825720315383767e-05,
      "loss": 2.7468,
      "step": 5041
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.182667851448059,
      "learning_rate": 1.982564377250866e-05,
      "loss": 2.7566,
      "step": 5042
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0448253154754639,
      "learning_rate": 1.982556721297642e-05,
      "loss": 3.0021,
      "step": 5043
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9339985847473145,
      "learning_rate": 1.9825490636787177e-05,
      "loss": 2.7548,
      "step": 5044
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0695736408233643,
      "learning_rate": 1.9825414043941057e-05,
      "loss": 2.4723,
      "step": 5045
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0672107934951782,
      "learning_rate": 1.9825337434438194e-05,
      "loss": 2.9574,
      "step": 5046
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0543419122695923,
      "learning_rate": 1.9825260808278712e-05,
      "loss": 2.7673,
      "step": 5047
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0643259286880493,
      "learning_rate": 1.9825184165462744e-05,
      "loss": 2.6967,
      "step": 5048
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0052753686904907,
      "learning_rate": 1.9825107505990422e-05,
      "loss": 2.6973,
      "step": 5049
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.065270185470581,
      "learning_rate": 1.9825030829861874e-05,
      "loss": 2.8017,
      "step": 5050
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9867503046989441,
      "learning_rate": 1.9824954137077234e-05,
      "loss": 2.6588,
      "step": 5051
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0124270915985107,
      "learning_rate": 1.9824877427636622e-05,
      "loss": 2.7749,
      "step": 5052
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0491597652435303,
      "learning_rate": 1.982480070154018e-05,
      "loss": 2.6765,
      "step": 5053
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.1928653717041016,
      "learning_rate": 1.9824723958788028e-05,
      "loss": 2.461,
      "step": 5054
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.193572998046875,
      "learning_rate": 1.9824647199380302e-05,
      "loss": 2.6452,
      "step": 5055
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0228570699691772,
      "learning_rate": 1.982457042331713e-05,
      "loss": 2.6781,
      "step": 5056
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.1413987874984741,
      "learning_rate": 1.9824493630598645e-05,
      "loss": 2.6193,
      "step": 5057
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0563002824783325,
      "learning_rate": 1.9824416821224975e-05,
      "loss": 2.3886,
      "step": 5058
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9791268706321716,
      "learning_rate": 1.9824339995196248e-05,
      "loss": 2.6474,
      "step": 5059
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.2463772296905518,
      "learning_rate": 1.9824263152512597e-05,
      "loss": 2.7797,
      "step": 5060
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0837275981903076,
      "learning_rate": 1.9824186293174153e-05,
      "loss": 2.6922,
      "step": 5061
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0246343612670898,
      "learning_rate": 1.9824109417181047e-05,
      "loss": 2.6742,
      "step": 5062
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.1015398502349854,
      "learning_rate": 1.9824032524533402e-05,
      "loss": 2.5347,
      "step": 5063
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0299794673919678,
      "learning_rate": 1.9823955615231355e-05,
      "loss": 2.7282,
      "step": 5064
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9226358532905579,
      "learning_rate": 1.982387868927504e-05,
      "loss": 2.6128,
      "step": 5065
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0327788591384888,
      "learning_rate": 1.982380174666458e-05,
      "loss": 2.6188,
      "step": 5066
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9130027890205383,
      "learning_rate": 1.9823724787400103e-05,
      "loss": 2.6213,
      "step": 5067
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.4634817838668823,
      "learning_rate": 1.982364781148175e-05,
      "loss": 2.6721,
      "step": 5068
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.279914379119873,
      "learning_rate": 1.9823570818909643e-05,
      "loss": 2.6737,
      "step": 5069
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.2057090997695923,
      "learning_rate": 1.982349380968392e-05,
      "loss": 2.8424,
      "step": 5070
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0892846584320068,
      "learning_rate": 1.98234167838047e-05,
      "loss": 2.9546,
      "step": 5071
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0098482370376587,
      "learning_rate": 1.9823339741272126e-05,
      "loss": 2.8049,
      "step": 5072
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0510797500610352,
      "learning_rate": 1.9823262682086318e-05,
      "loss": 2.8224,
      "step": 5073
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9427592754364014,
      "learning_rate": 1.9823185606247416e-05,
      "loss": 2.6845,
      "step": 5074
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9480307698249817,
      "learning_rate": 1.9823108513755547e-05,
      "loss": 2.8128,
      "step": 5075
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0990155935287476,
      "learning_rate": 1.982303140461084e-05,
      "loss": 2.605,
      "step": 5076
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0080093145370483,
      "learning_rate": 1.9822954278813428e-05,
      "loss": 2.6836,
      "step": 5077
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0386581420898438,
      "learning_rate": 1.9822877136363438e-05,
      "loss": 2.8411,
      "step": 5078
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0222920179367065,
      "learning_rate": 1.9822799977261003e-05,
      "loss": 2.6175,
      "step": 5079
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9389200806617737,
      "learning_rate": 1.9822722801506257e-05,
      "loss": 2.6412,
      "step": 5080
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.15056574344635,
      "learning_rate": 1.9822645609099328e-05,
      "loss": 2.6648,
      "step": 5081
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.107521891593933,
      "learning_rate": 1.9822568400040344e-05,
      "loss": 2.4886,
      "step": 5082
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9908357262611389,
      "learning_rate": 1.982249117432944e-05,
      "loss": 2.781,
      "step": 5083
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.1404635906219482,
      "learning_rate": 1.9822413931966747e-05,
      "loss": 2.8591,
      "step": 5084
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.910585880279541,
      "learning_rate": 1.9822336672952393e-05,
      "loss": 2.9493,
      "step": 5085
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.1472266912460327,
      "learning_rate": 1.9822259397286512e-05,
      "loss": 2.6253,
      "step": 5086
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9636828899383545,
      "learning_rate": 1.982218210496923e-05,
      "loss": 2.8027,
      "step": 5087
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0193405151367188,
      "learning_rate": 1.9822104796000688e-05,
      "loss": 2.563,
      "step": 5088
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9795226454734802,
      "learning_rate": 1.9822027470381006e-05,
      "loss": 2.6459,
      "step": 5089
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0029397010803223,
      "learning_rate": 1.9821950128110322e-05,
      "loss": 2.6351,
      "step": 5090
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9595596790313721,
      "learning_rate": 1.9821872769188764e-05,
      "loss": 2.5935,
      "step": 5091
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9458625316619873,
      "learning_rate": 1.9821795393616465e-05,
      "loss": 2.6056,
      "step": 5092
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9283429384231567,
      "learning_rate": 1.9821718001393555e-05,
      "loss": 2.7348,
      "step": 5093
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.943361759185791,
      "learning_rate": 1.9821640592520163e-05,
      "loss": 2.527,
      "step": 5094
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9923487305641174,
      "learning_rate": 1.9821563166996424e-05,
      "loss": 2.8735,
      "step": 5095
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9954476356506348,
      "learning_rate": 1.982148572482247e-05,
      "loss": 2.633,
      "step": 5096
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9317843914031982,
      "learning_rate": 1.9821408265998427e-05,
      "loss": 2.6921,
      "step": 5097
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.005803108215332,
      "learning_rate": 1.9821330790524433e-05,
      "loss": 2.8184,
      "step": 5098
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0721665620803833,
      "learning_rate": 1.9821253298400614e-05,
      "loss": 2.5622,
      "step": 5099
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0620990991592407,
      "learning_rate": 1.9821175789627106e-05,
      "loss": 2.5243,
      "step": 5100
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0459402799606323,
      "learning_rate": 1.9821098264204034e-05,
      "loss": 2.7063,
      "step": 5101
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9872159361839294,
      "learning_rate": 1.9821020722131532e-05,
      "loss": 2.6856,
      "step": 5102
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9628962874412537,
      "learning_rate": 1.9820943163409735e-05,
      "loss": 2.9176,
      "step": 5103
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0845587253570557,
      "learning_rate": 1.9820865588038772e-05,
      "loss": 2.7776,
      "step": 5104
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9658766388893127,
      "learning_rate": 1.9820787996018775e-05,
      "loss": 2.6644,
      "step": 5105
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.049943208694458,
      "learning_rate": 1.9820710387349873e-05,
      "loss": 2.6767,
      "step": 5106
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0269927978515625,
      "learning_rate": 1.98206327620322e-05,
      "loss": 2.7469,
      "step": 5107
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0356413125991821,
      "learning_rate": 1.982055512006589e-05,
      "loss": 2.7001,
      "step": 5108
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0524309873580933,
      "learning_rate": 1.982047746145107e-05,
      "loss": 2.6756,
      "step": 5109
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9187594056129456,
      "learning_rate": 1.9820399786187875e-05,
      "loss": 2.8909,
      "step": 5110
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9266214370727539,
      "learning_rate": 1.982032209427643e-05,
      "loss": 2.6535,
      "step": 5111
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9451812505722046,
      "learning_rate": 1.9820244385716877e-05,
      "loss": 2.7115,
      "step": 5112
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0832349061965942,
      "learning_rate": 1.982016666050934e-05,
      "loss": 2.5557,
      "step": 5113
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.976887583732605,
      "learning_rate": 1.9820088918653954e-05,
      "loss": 2.6271,
      "step": 5114
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9973483085632324,
      "learning_rate": 1.982001116015085e-05,
      "loss": 2.5932,
      "step": 5115
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9503350257873535,
      "learning_rate": 1.9819933385000164e-05,
      "loss": 2.5862,
      "step": 5116
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9658330678939819,
      "learning_rate": 1.9819855593202018e-05,
      "loss": 2.7227,
      "step": 5117
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9616863131523132,
      "learning_rate": 1.9819777784756554e-05,
      "loss": 2.8238,
      "step": 5118
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9631602764129639,
      "learning_rate": 1.9819699959663897e-05,
      "loss": 2.485,
      "step": 5119
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0409384965896606,
      "learning_rate": 1.9819622117924184e-05,
      "loss": 2.7875,
      "step": 5120
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9873805642127991,
      "learning_rate": 1.9819544259537542e-05,
      "loss": 2.5709,
      "step": 5121
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.159254550933838,
      "learning_rate": 1.9819466384504107e-05,
      "loss": 2.7669,
      "step": 5122
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9942288994789124,
      "learning_rate": 1.981938849282401e-05,
      "loss": 2.676,
      "step": 5123
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.997706949710846,
      "learning_rate": 1.981931058449738e-05,
      "loss": 2.5878,
      "step": 5124
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0205034017562866,
      "learning_rate": 1.9819232659524355e-05,
      "loss": 2.8084,
      "step": 5125
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9798784852027893,
      "learning_rate": 1.9819154717905064e-05,
      "loss": 2.818,
      "step": 5126
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9771567583084106,
      "learning_rate": 1.9819076759639634e-05,
      "loss": 2.6607,
      "step": 5127
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.004746437072754,
      "learning_rate": 1.9818998784728205e-05,
      "loss": 2.5453,
      "step": 5128
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9352785348892212,
      "learning_rate": 1.9818920793170907e-05,
      "loss": 2.6714,
      "step": 5129
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9381695985794067,
      "learning_rate": 1.981884278496787e-05,
      "loss": 2.638,
      "step": 5130
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.8851982951164246,
      "learning_rate": 1.9818764760119227e-05,
      "loss": 2.793,
      "step": 5131
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9015299677848816,
      "learning_rate": 1.9818686718625116e-05,
      "loss": 2.7247,
      "step": 5132
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0364091396331787,
      "learning_rate": 1.9818608660485662e-05,
      "loss": 2.8156,
      "step": 5133
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.021053671836853,
      "learning_rate": 1.9818530585700998e-05,
      "loss": 2.807,
      "step": 5134
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9547853469848633,
      "learning_rate": 1.9818452494271255e-05,
      "loss": 2.7807,
      "step": 5135
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0205824375152588,
      "learning_rate": 1.9818374386196573e-05,
      "loss": 2.568,
      "step": 5136
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0280015468597412,
      "learning_rate": 1.981829626147708e-05,
      "loss": 2.6756,
      "step": 5137
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0425928831100464,
      "learning_rate": 1.9818218120112903e-05,
      "loss": 2.5003,
      "step": 5138
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.2382696866989136,
      "learning_rate": 1.9818139962104185e-05,
      "loss": 2.6169,
      "step": 5139
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9048683047294617,
      "learning_rate": 1.9818061787451053e-05,
      "loss": 2.7387,
      "step": 5140
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9851697087287903,
      "learning_rate": 1.981798359615364e-05,
      "loss": 2.7126,
      "step": 5141
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0119549036026,
      "learning_rate": 1.9817905388212075e-05,
      "loss": 2.6539,
      "step": 5142
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.030501127243042,
      "learning_rate": 1.9817827163626497e-05,
      "loss": 2.7155,
      "step": 5143
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.2620271444320679,
      "learning_rate": 1.9817748922397033e-05,
      "loss": 2.7901,
      "step": 5144
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0032576322555542,
      "learning_rate": 1.9817670664523818e-05,
      "loss": 2.7367,
      "step": 5145
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0259262323379517,
      "learning_rate": 1.981759239000699e-05,
      "loss": 2.7687,
      "step": 5146
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.020020604133606,
      "learning_rate": 1.9817514098846668e-05,
      "loss": 2.7073,
      "step": 5147
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0281188488006592,
      "learning_rate": 1.9817435791043e-05,
      "loss": 2.7339,
      "step": 5148
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9670756459236145,
      "learning_rate": 1.981735746659611e-05,
      "loss": 2.8218,
      "step": 5149
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9787846207618713,
      "learning_rate": 1.981727912550613e-05,
      "loss": 2.7646,
      "step": 5150
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.012873888015747,
      "learning_rate": 1.98172007677732e-05,
      "loss": 2.6654,
      "step": 5151
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9790017008781433,
      "learning_rate": 1.9817122393397446e-05,
      "loss": 2.6024,
      "step": 5152
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.1046428680419922,
      "learning_rate": 1.9817044002379e-05,
      "loss": 2.6307,
      "step": 5153
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9707549810409546,
      "learning_rate": 1.9816965594718005e-05,
      "loss": 2.5563,
      "step": 5154
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9779355525970459,
      "learning_rate": 1.9816887170414583e-05,
      "loss": 2.5714,
      "step": 5155
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9800496697425842,
      "learning_rate": 1.9816808729468872e-05,
      "loss": 2.7578,
      "step": 5156
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9775120615959167,
      "learning_rate": 1.9816730271881003e-05,
      "loss": 2.7292,
      "step": 5157
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0041627883911133,
      "learning_rate": 1.9816651797651108e-05,
      "loss": 2.9813,
      "step": 5158
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9646170735359192,
      "learning_rate": 1.9816573306779327e-05,
      "loss": 2.843,
      "step": 5159
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0290582180023193,
      "learning_rate": 1.9816494799265787e-05,
      "loss": 2.5854,
      "step": 5160
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.061718225479126,
      "learning_rate": 1.9816416275110622e-05,
      "loss": 2.8164,
      "step": 5161
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9567169547080994,
      "learning_rate": 1.9816337734313963e-05,
      "loss": 2.746,
      "step": 5162
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9527118802070618,
      "learning_rate": 1.9816259176875946e-05,
      "loss": 2.6242,
      "step": 5163
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9675334692001343,
      "learning_rate": 1.9816180602796703e-05,
      "loss": 2.7784,
      "step": 5164
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9589433670043945,
      "learning_rate": 1.981610201207637e-05,
      "loss": 2.9474,
      "step": 5165
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.002160906791687,
      "learning_rate": 1.9816023404715077e-05,
      "loss": 2.762,
      "step": 5166
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9638652801513672,
      "learning_rate": 1.981594478071296e-05,
      "loss": 2.852,
      "step": 5167
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0995750427246094,
      "learning_rate": 1.9815866140070147e-05,
      "loss": 2.6159,
      "step": 5168
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.1489840745925903,
      "learning_rate": 1.981578748278678e-05,
      "loss": 2.8041,
      "step": 5169
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0465214252471924,
      "learning_rate": 1.9815708808862982e-05,
      "loss": 2.873,
      "step": 5170
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0450433492660522,
      "learning_rate": 1.9815630118298897e-05,
      "loss": 2.5412,
      "step": 5171
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9807567000389099,
      "learning_rate": 1.981555141109465e-05,
      "loss": 2.547,
      "step": 5172
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9424847364425659,
      "learning_rate": 1.9815472687250377e-05,
      "loss": 2.7897,
      "step": 5173
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0709407329559326,
      "learning_rate": 1.9815393946766212e-05,
      "loss": 2.6715,
      "step": 5174
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9720489978790283,
      "learning_rate": 1.981531518964229e-05,
      "loss": 2.5772,
      "step": 5175
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9752822518348694,
      "learning_rate": 1.9815236415878742e-05,
      "loss": 2.6672,
      "step": 5176
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.312187671661377,
      "learning_rate": 1.9815157625475703e-05,
      "loss": 2.6622,
      "step": 5177
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.995566189289093,
      "learning_rate": 1.9815078818433306e-05,
      "loss": 2.7952,
      "step": 5178
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9308032989501953,
      "learning_rate": 1.9814999994751685e-05,
      "loss": 2.751,
      "step": 5179
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.1453187465667725,
      "learning_rate": 1.9814921154430972e-05,
      "loss": 2.6116,
      "step": 5180
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0103598833084106,
      "learning_rate": 1.98148422974713e-05,
      "loss": 2.8024,
      "step": 5181
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0158872604370117,
      "learning_rate": 1.9814763423872808e-05,
      "loss": 2.7641,
      "step": 5182
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.976241946220398,
      "learning_rate": 1.9814684533635627e-05,
      "loss": 2.6113,
      "step": 5183
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9997746348381042,
      "learning_rate": 1.981460562675989e-05,
      "loss": 2.9663,
      "step": 5184
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.1890478134155273,
      "learning_rate": 1.9814526703245726e-05,
      "loss": 2.6557,
      "step": 5185
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9671263694763184,
      "learning_rate": 1.981444776309328e-05,
      "loss": 2.7356,
      "step": 5186
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.1557289361953735,
      "learning_rate": 1.9814368806302674e-05,
      "loss": 2.592,
      "step": 5187
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9859910011291504,
      "learning_rate": 1.981428983287405e-05,
      "loss": 2.5384,
      "step": 5188
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9579195380210876,
      "learning_rate": 1.981421084280754e-05,
      "loss": 2.7108,
      "step": 5189
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.1909981966018677,
      "learning_rate": 1.9814131836103277e-05,
      "loss": 2.6947,
      "step": 5190
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0561187267303467,
      "learning_rate": 1.9814052812761394e-05,
      "loss": 2.9477,
      "step": 5191
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.1630762815475464,
      "learning_rate": 1.9813973772782025e-05,
      "loss": 2.6689,
      "step": 5192
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9784899950027466,
      "learning_rate": 1.9813894716165307e-05,
      "loss": 2.7574,
      "step": 5193
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9749864935874939,
      "learning_rate": 1.981381564291137e-05,
      "loss": 2.8773,
      "step": 5194
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9544374346733093,
      "learning_rate": 1.9813736553020352e-05,
      "loss": 2.7424,
      "step": 5195
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9670525193214417,
      "learning_rate": 1.9813657446492383e-05,
      "loss": 2.7866,
      "step": 5196
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0038470029830933,
      "learning_rate": 1.9813578323327603e-05,
      "loss": 2.7114,
      "step": 5197
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0505328178405762,
      "learning_rate": 1.981349918352614e-05,
      "loss": 2.5434,
      "step": 5198
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.172406792640686,
      "learning_rate": 1.981342002708813e-05,
      "loss": 3.0099,
      "step": 5199
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9170237183570862,
      "learning_rate": 1.981334085401371e-05,
      "loss": 2.77,
      "step": 5200
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.04335355758667,
      "learning_rate": 1.981326166430301e-05,
      "loss": 2.7111,
      "step": 5201
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9676716923713684,
      "learning_rate": 1.981318245795617e-05,
      "loss": 2.7554,
      "step": 5202
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.018576741218567,
      "learning_rate": 1.9813103234973316e-05,
      "loss": 2.7737,
      "step": 5203
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9838110208511353,
      "learning_rate": 1.9813023995354584e-05,
      "loss": 2.6674,
      "step": 5204
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0067451000213623,
      "learning_rate": 1.9812944739100118e-05,
      "loss": 2.7793,
      "step": 5205
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9958716034889221,
      "learning_rate": 1.9812865466210043e-05,
      "loss": 2.3684,
      "step": 5206
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0084360837936401,
      "learning_rate": 1.9812786176684496e-05,
      "loss": 2.7557,
      "step": 5207
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9768613576889038,
      "learning_rate": 1.981270687052361e-05,
      "loss": 2.8473,
      "step": 5208
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0022615194320679,
      "learning_rate": 1.981262754772752e-05,
      "loss": 2.7719,
      "step": 5209
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0639550685882568,
      "learning_rate": 1.9812548208296368e-05,
      "loss": 2.6838,
      "step": 5210
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0199689865112305,
      "learning_rate": 1.9812468852230277e-05,
      "loss": 2.9522,
      "step": 5211
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0265469551086426,
      "learning_rate": 1.9812389479529384e-05,
      "loss": 2.5697,
      "step": 5212
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.1112526655197144,
      "learning_rate": 1.9812310090193827e-05,
      "loss": 2.9531,
      "step": 5213
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9818000793457031,
      "learning_rate": 1.981223068422374e-05,
      "loss": 2.6765,
      "step": 5214
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9266929626464844,
      "learning_rate": 1.9812151261619257e-05,
      "loss": 2.7565,
      "step": 5215
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9367949962615967,
      "learning_rate": 1.9812071822380514e-05,
      "loss": 2.7143,
      "step": 5216
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9681537747383118,
      "learning_rate": 1.981199236650764e-05,
      "loss": 2.7869,
      "step": 5217
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0110442638397217,
      "learning_rate": 1.981191289400078e-05,
      "loss": 2.5014,
      "step": 5218
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0174965858459473,
      "learning_rate": 1.981183340486006e-05,
      "loss": 2.6683,
      "step": 5219
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9450767040252686,
      "learning_rate": 1.981175389908562e-05,
      "loss": 2.8859,
      "step": 5220
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9844502806663513,
      "learning_rate": 1.981167437667759e-05,
      "loss": 2.838,
      "step": 5221
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0577484369277954,
      "learning_rate": 1.9811594837636105e-05,
      "loss": 2.7902,
      "step": 5222
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.285052537918091,
      "learning_rate": 1.9811515281961306e-05,
      "loss": 2.7941,
      "step": 5223
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0395236015319824,
      "learning_rate": 1.9811435709653318e-05,
      "loss": 2.5368,
      "step": 5224
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9546118974685669,
      "learning_rate": 1.9811356120712286e-05,
      "loss": 2.4864,
      "step": 5225
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0244492292404175,
      "learning_rate": 1.981127651513834e-05,
      "loss": 2.8738,
      "step": 5226
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0399253368377686,
      "learning_rate": 1.9811196892931616e-05,
      "loss": 2.7364,
      "step": 5227
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0212430953979492,
      "learning_rate": 1.981111725409225e-05,
      "loss": 2.5418,
      "step": 5228
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0061357021331787,
      "learning_rate": 1.9811037598620374e-05,
      "loss": 2.7041,
      "step": 5229
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.1027828454971313,
      "learning_rate": 1.9810957926516123e-05,
      "loss": 2.6849,
      "step": 5230
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0051060914993286,
      "learning_rate": 1.9810878237779635e-05,
      "loss": 2.6611,
      "step": 5231
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9540783166885376,
      "learning_rate": 1.9810798532411044e-05,
      "loss": 2.8125,
      "step": 5232
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9550498723983765,
      "learning_rate": 1.9810718810410488e-05,
      "loss": 2.6787,
      "step": 5233
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9269140958786011,
      "learning_rate": 1.9810639071778093e-05,
      "loss": 2.6863,
      "step": 5234
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9192075729370117,
      "learning_rate": 1.9810559316514007e-05,
      "loss": 2.5251,
      "step": 5235
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9942402243614197,
      "learning_rate": 1.981047954461835e-05,
      "loss": 2.8613,
      "step": 5236
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9632971286773682,
      "learning_rate": 1.9810399756091275e-05,
      "loss": 2.6982,
      "step": 5237
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9749805927276611,
      "learning_rate": 1.9810319950932903e-05,
      "loss": 2.9125,
      "step": 5238
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.072669506072998,
      "learning_rate": 1.9810240129143375e-05,
      "loss": 2.6406,
      "step": 5239
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0585753917694092,
      "learning_rate": 1.9810160290722825e-05,
      "loss": 2.6736,
      "step": 5240
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.007948637008667,
      "learning_rate": 1.9810080435671387e-05,
      "loss": 2.9158,
      "step": 5241
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9440937042236328,
      "learning_rate": 1.98100005639892e-05,
      "loss": 2.78,
      "step": 5242
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9562011957168579,
      "learning_rate": 1.9809920675676396e-05,
      "loss": 2.6049,
      "step": 5243
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0616252422332764,
      "learning_rate": 1.9809840770733118e-05,
      "loss": 2.7326,
      "step": 5244
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9998840093612671,
      "learning_rate": 1.9809760849159493e-05,
      "loss": 2.8109,
      "step": 5245
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.3224437236785889,
      "learning_rate": 1.9809680910955654e-05,
      "loss": 2.5234,
      "step": 5246
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.1845622062683105,
      "learning_rate": 1.9809600956121745e-05,
      "loss": 2.809,
      "step": 5247
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9957106709480286,
      "learning_rate": 1.98095209846579e-05,
      "loss": 2.6665,
      "step": 5248
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.3613773584365845,
      "learning_rate": 1.980944099656425e-05,
      "loss": 2.6338,
      "step": 5249
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.961185872554779,
      "learning_rate": 1.9809360991840935e-05,
      "loss": 2.7489,
      "step": 5250
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9865682125091553,
      "learning_rate": 1.980928097048809e-05,
      "loss": 2.642,
      "step": 5251
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0175772905349731,
      "learning_rate": 1.9809200932505846e-05,
      "loss": 2.7484,
      "step": 5252
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0601861476898193,
      "learning_rate": 1.9809120877894347e-05,
      "loss": 2.4535,
      "step": 5253
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.047886610031128,
      "learning_rate": 1.980904080665372e-05,
      "loss": 2.7298,
      "step": 5254
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9902949333190918,
      "learning_rate": 1.9808960718784106e-05,
      "loss": 2.6692,
      "step": 5255
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9815402030944824,
      "learning_rate": 1.980888061428564e-05,
      "loss": 2.7655,
      "step": 5256
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.935854971408844,
      "learning_rate": 1.9808800493158454e-05,
      "loss": 2.5403,
      "step": 5257
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0675609111785889,
      "learning_rate": 1.9808720355402692e-05,
      "loss": 2.615,
      "step": 5258
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0311474800109863,
      "learning_rate": 1.9808640201018484e-05,
      "loss": 2.837,
      "step": 5259
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.3062199354171753,
      "learning_rate": 1.9808560030005965e-05,
      "loss": 2.6713,
      "step": 5260
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0575294494628906,
      "learning_rate": 1.9808479842365275e-05,
      "loss": 2.7043,
      "step": 5261
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0029584169387817,
      "learning_rate": 1.9808399638096545e-05,
      "loss": 2.7969,
      "step": 5262
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.064845323562622,
      "learning_rate": 1.9808319417199915e-05,
      "loss": 2.8458,
      "step": 5263
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9499145746231079,
      "learning_rate": 1.980823917967552e-05,
      "loss": 2.7174,
      "step": 5264
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9605628848075867,
      "learning_rate": 1.9808158925523495e-05,
      "loss": 2.7882,
      "step": 5265
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.1425597667694092,
      "learning_rate": 1.9808078654743977e-05,
      "loss": 2.5976,
      "step": 5266
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0841041803359985,
      "learning_rate": 1.9807998367337103e-05,
      "loss": 2.6728,
      "step": 5267
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0623044967651367,
      "learning_rate": 1.9807918063303004e-05,
      "loss": 2.7764,
      "step": 5268
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9821333885192871,
      "learning_rate": 1.9807837742641823e-05,
      "loss": 2.7376,
      "step": 5269
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.1084238290786743,
      "learning_rate": 1.9807757405353695e-05,
      "loss": 2.6432,
      "step": 5270
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9678552746772766,
      "learning_rate": 1.9807677051438753e-05,
      "loss": 2.4935,
      "step": 5271
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9654020071029663,
      "learning_rate": 1.9807596680897134e-05,
      "loss": 2.7136,
      "step": 5272
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9507654905319214,
      "learning_rate": 1.9807516293728976e-05,
      "loss": 2.6398,
      "step": 5273
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9239946007728577,
      "learning_rate": 1.9807435889934412e-05,
      "loss": 2.4807,
      "step": 5274
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.025664210319519,
      "learning_rate": 1.9807355469513584e-05,
      "loss": 2.6764,
      "step": 5275
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0445693731307983,
      "learning_rate": 1.980727503246662e-05,
      "loss": 2.4798,
      "step": 5276
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0658745765686035,
      "learning_rate": 1.980719457879367e-05,
      "loss": 2.8467,
      "step": 5277
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.1152713298797607,
      "learning_rate": 1.980711410849485e-05,
      "loss": 2.791,
      "step": 5278
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9787047505378723,
      "learning_rate": 1.9807033621570316e-05,
      "loss": 2.5707,
      "step": 5279
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9702147245407104,
      "learning_rate": 1.9806953118020194e-05,
      "loss": 2.5307,
      "step": 5280
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9552379846572876,
      "learning_rate": 1.980687259784462e-05,
      "loss": 2.8324,
      "step": 5281
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9230726361274719,
      "learning_rate": 1.980679206104374e-05,
      "loss": 2.7637,
      "step": 5282
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.920627236366272,
      "learning_rate": 1.980671150761768e-05,
      "loss": 2.6315,
      "step": 5283
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.130980372428894,
      "learning_rate": 1.980663093756658e-05,
      "loss": 2.7752,
      "step": 5284
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.1154701709747314,
      "learning_rate": 1.980655035089058e-05,
      "loss": 2.8945,
      "step": 5285
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.953859806060791,
      "learning_rate": 1.9806469747589808e-05,
      "loss": 2.5655,
      "step": 5286
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.045647144317627,
      "learning_rate": 1.980638912766441e-05,
      "loss": 2.7683,
      "step": 5287
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9897764325141907,
      "learning_rate": 1.980630849111452e-05,
      "loss": 2.7568,
      "step": 5288
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0947816371917725,
      "learning_rate": 1.9806227837940276e-05,
      "loss": 2.8999,
      "step": 5289
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9867962002754211,
      "learning_rate": 1.980614716814181e-05,
      "loss": 2.5309,
      "step": 5290
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.1382588148117065,
      "learning_rate": 1.980606648171926e-05,
      "loss": 2.5896,
      "step": 5291
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0501428842544556,
      "learning_rate": 1.9805985778672766e-05,
      "loss": 2.7387,
      "step": 5292
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.1853002309799194,
      "learning_rate": 1.980590505900246e-05,
      "loss": 2.5666,
      "step": 5293
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0478161573410034,
      "learning_rate": 1.9805824322708482e-05,
      "loss": 2.7742,
      "step": 5294
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9132081270217896,
      "learning_rate": 1.980574356979097e-05,
      "loss": 2.8513,
      "step": 5295
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0039949417114258,
      "learning_rate": 1.980566280025006e-05,
      "loss": 2.6942,
      "step": 5296
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9890972971916199,
      "learning_rate": 1.9805582014085887e-05,
      "loss": 2.776,
      "step": 5297
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9032099843025208,
      "learning_rate": 1.980550121129859e-05,
      "loss": 2.7338,
      "step": 5298
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0160435438156128,
      "learning_rate": 1.9805420391888307e-05,
      "loss": 2.6829,
      "step": 5299
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0307551622390747,
      "learning_rate": 1.9805339555855168e-05,
      "loss": 2.7784,
      "step": 5300
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9385023713111877,
      "learning_rate": 1.980525870319932e-05,
      "loss": 2.6184,
      "step": 5301
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.2812774181365967,
      "learning_rate": 1.9805177833920896e-05,
      "loss": 2.8053,
      "step": 5302
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.994752824306488,
      "learning_rate": 1.980509694802003e-05,
      "loss": 2.6878,
      "step": 5303
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0390517711639404,
      "learning_rate": 1.980501604549686e-05,
      "loss": 2.9372,
      "step": 5304
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9513524770736694,
      "learning_rate": 1.980493512635153e-05,
      "loss": 2.6694,
      "step": 5305
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0944899320602417,
      "learning_rate": 1.980485419058417e-05,
      "loss": 2.5526,
      "step": 5306
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0282113552093506,
      "learning_rate": 1.9804773238194918e-05,
      "loss": 2.6795,
      "step": 5307
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.359352469444275,
      "learning_rate": 1.980469226918391e-05,
      "loss": 2.8266,
      "step": 5308
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9644757509231567,
      "learning_rate": 1.9804611283551293e-05,
      "loss": 2.9018,
      "step": 5309
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.8930381536483765,
      "learning_rate": 1.980453028129719e-05,
      "loss": 2.6205,
      "step": 5310
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0523130893707275,
      "learning_rate": 1.980444926242175e-05,
      "loss": 2.5824,
      "step": 5311
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9297958016395569,
      "learning_rate": 1.9804368226925102e-05,
      "loss": 2.7577,
      "step": 5312
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9916370511054993,
      "learning_rate": 1.9804287174807387e-05,
      "loss": 2.6405,
      "step": 5313
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.034712314605713,
      "learning_rate": 1.9804206106068748e-05,
      "loss": 2.6582,
      "step": 5314
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0177392959594727,
      "learning_rate": 1.980412502070931e-05,
      "loss": 2.7144,
      "step": 5315
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9951415657997131,
      "learning_rate": 1.980404391872922e-05,
      "loss": 2.4121,
      "step": 5316
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0355441570281982,
      "learning_rate": 1.9803962800128615e-05,
      "loss": 2.5029,
      "step": 5317
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0296425819396973,
      "learning_rate": 1.9803881664907626e-05,
      "loss": 2.622,
      "step": 5318
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.1405991315841675,
      "learning_rate": 1.9803800513066398e-05,
      "loss": 2.803,
      "step": 5319
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.1013498306274414,
      "learning_rate": 1.9803719344605065e-05,
      "loss": 2.7475,
      "step": 5320
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9886903762817383,
      "learning_rate": 1.9803638159523767e-05,
      "loss": 2.5394,
      "step": 5321
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9961537718772888,
      "learning_rate": 1.9803556957822635e-05,
      "loss": 2.6769,
      "step": 5322
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9523710608482361,
      "learning_rate": 1.9803475739501812e-05,
      "loss": 2.7205,
      "step": 5323
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9594447612762451,
      "learning_rate": 1.980339450456144e-05,
      "loss": 2.7465,
      "step": 5324
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0129224061965942,
      "learning_rate": 1.980331325300165e-05,
      "loss": 2.7301,
      "step": 5325
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0785740613937378,
      "learning_rate": 1.9803231984822577e-05,
      "loss": 2.7174,
      "step": 5326
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0164721012115479,
      "learning_rate": 1.9803150700024366e-05,
      "loss": 2.6972,
      "step": 5327
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0771548748016357,
      "learning_rate": 1.9803069398607154e-05,
      "loss": 2.905,
      "step": 5328
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9797764420509338,
      "learning_rate": 1.9802988080571075e-05,
      "loss": 2.5578,
      "step": 5329
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9560040235519409,
      "learning_rate": 1.9802906745916268e-05,
      "loss": 2.7186,
      "step": 5330
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9801478385925293,
      "learning_rate": 1.9802825394642873e-05,
      "loss": 2.5872,
      "step": 5331
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.1835384368896484,
      "learning_rate": 1.9802744026751025e-05,
      "loss": 2.7181,
      "step": 5332
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0317022800445557,
      "learning_rate": 1.9802662642240867e-05,
      "loss": 2.9552,
      "step": 5333
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0394151210784912,
      "learning_rate": 1.980258124111253e-05,
      "loss": 2.9285,
      "step": 5334
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.962081253528595,
      "learning_rate": 1.9802499823366155e-05,
      "loss": 2.7268,
      "step": 5335
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.012506127357483,
      "learning_rate": 1.980241838900188e-05,
      "loss": 2.8153,
      "step": 5336
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0497817993164062,
      "learning_rate": 1.9802336938019848e-05,
      "loss": 2.9964,
      "step": 5337
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9744722247123718,
      "learning_rate": 1.980225547042019e-05,
      "loss": 2.7319,
      "step": 5338
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9763401746749878,
      "learning_rate": 1.9802173986203047e-05,
      "loss": 2.9993,
      "step": 5339
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9034376740455627,
      "learning_rate": 1.9802092485368557e-05,
      "loss": 2.7953,
      "step": 5340
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9970560669898987,
      "learning_rate": 1.980201096791686e-05,
      "loss": 2.6589,
      "step": 5341
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.013502597808838,
      "learning_rate": 1.9801929433848087e-05,
      "loss": 2.469,
      "step": 5342
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9673387408256531,
      "learning_rate": 1.9801847883162385e-05,
      "loss": 2.5858,
      "step": 5343
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9522415399551392,
      "learning_rate": 1.9801766315859888e-05,
      "loss": 2.6564,
      "step": 5344
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9494583606719971,
      "learning_rate": 1.9801684731940736e-05,
      "loss": 2.5473,
      "step": 5345
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9315041303634644,
      "learning_rate": 1.9801603131405065e-05,
      "loss": 2.7956,
      "step": 5346
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9683980345726013,
      "learning_rate": 1.9801521514253015e-05,
      "loss": 2.9253,
      "step": 5347
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9736183285713196,
      "learning_rate": 1.9801439880484724e-05,
      "loss": 2.6644,
      "step": 5348
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0289474725723267,
      "learning_rate": 1.980135823010033e-05,
      "loss": 2.6823,
      "step": 5349
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0182369947433472,
      "learning_rate": 1.9801276563099972e-05,
      "loss": 2.8026,
      "step": 5350
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9501047730445862,
      "learning_rate": 1.980119487948379e-05,
      "loss": 2.9484,
      "step": 5351
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.1113337278366089,
      "learning_rate": 1.9801113179251916e-05,
      "loss": 2.638,
      "step": 5352
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9101577997207642,
      "learning_rate": 1.98010314624045e-05,
      "loss": 2.7573,
      "step": 5353
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9189348220825195,
      "learning_rate": 1.9800949728941666e-05,
      "loss": 2.6415,
      "step": 5354
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.000227451324463,
      "learning_rate": 1.9800867978863568e-05,
      "loss": 2.5952,
      "step": 5355
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.040855050086975,
      "learning_rate": 1.9800786212170332e-05,
      "loss": 2.8884,
      "step": 5356
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.1022212505340576,
      "learning_rate": 1.98007044288621e-05,
      "loss": 2.8676,
      "step": 5357
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0133001804351807,
      "learning_rate": 1.9800622628939017e-05,
      "loss": 2.5194,
      "step": 5358
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.117375373840332,
      "learning_rate": 1.9800540812401213e-05,
      "loss": 2.5857,
      "step": 5359
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.4777032136917114,
      "learning_rate": 1.9800458979248837e-05,
      "loss": 2.6808,
      "step": 5360
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0929063558578491,
      "learning_rate": 1.9800377129482015e-05,
      "loss": 2.4266,
      "step": 5361
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9856737852096558,
      "learning_rate": 1.9800295263100892e-05,
      "loss": 2.5224,
      "step": 5362
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0045948028564453,
      "learning_rate": 1.9800213380105608e-05,
      "loss": 2.7995,
      "step": 5363
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0729762315750122,
      "learning_rate": 1.98001314804963e-05,
      "loss": 2.6803,
      "step": 5364
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0307503938674927,
      "learning_rate": 1.980004956427311e-05,
      "loss": 2.8674,
      "step": 5365
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0164141654968262,
      "learning_rate": 1.9799967631436174e-05,
      "loss": 2.694,
      "step": 5366
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0724976062774658,
      "learning_rate": 1.979988568198563e-05,
      "loss": 2.8604,
      "step": 5367
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0185182094573975,
      "learning_rate": 1.9799803715921616e-05,
      "loss": 2.8895,
      "step": 5368
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0555733442306519,
      "learning_rate": 1.9799721733244276e-05,
      "loss": 2.9074,
      "step": 5369
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9418209195137024,
      "learning_rate": 1.9799639733953743e-05,
      "loss": 2.7526,
      "step": 5370
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9104586243629456,
      "learning_rate": 1.9799557718050163e-05,
      "loss": 2.6132,
      "step": 5371
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9909668564796448,
      "learning_rate": 1.979947568553367e-05,
      "loss": 2.6908,
      "step": 5372
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.946942150592804,
      "learning_rate": 1.9799393636404405e-05,
      "loss": 2.5525,
      "step": 5373
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0963026285171509,
      "learning_rate": 1.9799311570662504e-05,
      "loss": 2.8128,
      "step": 5374
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0455459356307983,
      "learning_rate": 1.9799229488308112e-05,
      "loss": 2.6435,
      "step": 5375
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9703106880187988,
      "learning_rate": 1.979914738934136e-05,
      "loss": 2.6827,
      "step": 5376
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0626587867736816,
      "learning_rate": 1.9799065273762396e-05,
      "loss": 2.5729,
      "step": 5377
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9871115684509277,
      "learning_rate": 1.979898314157135e-05,
      "loss": 2.7676,
      "step": 5378
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0193003416061401,
      "learning_rate": 1.979890099276837e-05,
      "loss": 2.9223,
      "step": 5379
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9639570116996765,
      "learning_rate": 1.979881882735359e-05,
      "loss": 2.7833,
      "step": 5380
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9959732890129089,
      "learning_rate": 1.979873664532715e-05,
      "loss": 2.6799,
      "step": 5381
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0322719812393188,
      "learning_rate": 1.9798654446689193e-05,
      "loss": 2.6155,
      "step": 5382
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0282567739486694,
      "learning_rate": 1.9798572231439854e-05,
      "loss": 2.8596,
      "step": 5383
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9684428572654724,
      "learning_rate": 1.9798489999579275e-05,
      "loss": 2.6752,
      "step": 5384
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0198063850402832,
      "learning_rate": 1.979840775110759e-05,
      "loss": 2.6893,
      "step": 5385
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0509366989135742,
      "learning_rate": 1.979832548602495e-05,
      "loss": 2.706,
      "step": 5386
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0019571781158447,
      "learning_rate": 1.979824320433148e-05,
      "loss": 2.8203,
      "step": 5387
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0413634777069092,
      "learning_rate": 1.979816090602733e-05,
      "loss": 2.6657,
      "step": 5388
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0554914474487305,
      "learning_rate": 1.9798078591112634e-05,
      "loss": 2.5042,
      "step": 5389
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.3039164543151855,
      "learning_rate": 1.9797996259587536e-05,
      "loss": 2.6048,
      "step": 5390
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.3843905925750732,
      "learning_rate": 1.9797913911452172e-05,
      "loss": 2.7432,
      "step": 5391
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.088402271270752,
      "learning_rate": 1.9797831546706682e-05,
      "loss": 2.5668,
      "step": 5392
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9440280795097351,
      "learning_rate": 1.979774916535121e-05,
      "loss": 2.6808,
      "step": 5393
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9724055528640747,
      "learning_rate": 1.979766676738589e-05,
      "loss": 2.6708,
      "step": 5394
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.1471112966537476,
      "learning_rate": 1.979758435281086e-05,
      "loss": 2.5981,
      "step": 5395
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9459915161132812,
      "learning_rate": 1.979750192162627e-05,
      "loss": 2.8793,
      "step": 5396
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9903802275657654,
      "learning_rate": 1.979741947383225e-05,
      "loss": 2.8433,
      "step": 5397
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9773480296134949,
      "learning_rate": 1.979733700942894e-05,
      "loss": 2.6202,
      "step": 5398
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.897642195224762,
      "learning_rate": 1.9797254528416488e-05,
      "loss": 2.5973,
      "step": 5399
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.1729506254196167,
      "learning_rate": 1.9797172030795025e-05,
      "loss": 2.936,
      "step": 5400
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9469380378723145,
      "learning_rate": 1.9797089516564697e-05,
      "loss": 2.7265,
      "step": 5401
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9418715834617615,
      "learning_rate": 1.979700698572564e-05,
      "loss": 2.5551,
      "step": 5402
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9474992156028748,
      "learning_rate": 1.9796924438278e-05,
      "loss": 2.7444,
      "step": 5403
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0551459789276123,
      "learning_rate": 1.9796841874221907e-05,
      "loss": 2.732,
      "step": 5404
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0145386457443237,
      "learning_rate": 1.9796759293557507e-05,
      "loss": 2.567,
      "step": 5405
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9105933308601379,
      "learning_rate": 1.9796676696284943e-05,
      "loss": 2.7565,
      "step": 5406
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.090216875076294,
      "learning_rate": 1.9796594082404345e-05,
      "loss": 2.4923,
      "step": 5407
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.8783342242240906,
      "learning_rate": 1.979651145191586e-05,
      "loss": 2.8795,
      "step": 5408
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0359283685684204,
      "learning_rate": 1.9796428804819633e-05,
      "loss": 2.7495,
      "step": 5409
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0021767616271973,
      "learning_rate": 1.9796346141115795e-05,
      "loss": 2.84,
      "step": 5410
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9897167086601257,
      "learning_rate": 1.979626346080449e-05,
      "loss": 2.3909,
      "step": 5411
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.182107925415039,
      "learning_rate": 1.9796180763885856e-05,
      "loss": 2.5005,
      "step": 5412
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0764838457107544,
      "learning_rate": 1.979609805036004e-05,
      "loss": 2.6616,
      "step": 5413
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0801067352294922,
      "learning_rate": 1.9796015320227175e-05,
      "loss": 2.7933,
      "step": 5414
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9633579254150391,
      "learning_rate": 1.9795932573487403e-05,
      "loss": 2.6533,
      "step": 5415
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9705020785331726,
      "learning_rate": 1.9795849810140863e-05,
      "loss": 2.7675,
      "step": 5416
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0864291191101074,
      "learning_rate": 1.97957670301877e-05,
      "loss": 2.4976,
      "step": 5417
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9756656885147095,
      "learning_rate": 1.9795684233628045e-05,
      "loss": 2.5471,
      "step": 5418
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9756749272346497,
      "learning_rate": 1.9795601420462053e-05,
      "loss": 2.7909,
      "step": 5419
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9839895367622375,
      "learning_rate": 1.979551859068985e-05,
      "loss": 2.7337,
      "step": 5420
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9847978949546814,
      "learning_rate": 1.9795435744311587e-05,
      "loss": 2.7247,
      "step": 5421
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.1861175298690796,
      "learning_rate": 1.97953528813274e-05,
      "loss": 2.7138,
      "step": 5422
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9945078492164612,
      "learning_rate": 1.9795270001737425e-05,
      "loss": 2.9232,
      "step": 5423
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9796334505081177,
      "learning_rate": 1.979518710554181e-05,
      "loss": 2.6007,
      "step": 5424
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9835940003395081,
      "learning_rate": 1.9795104192740694e-05,
      "loss": 2.6884,
      "step": 5425
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0131478309631348,
      "learning_rate": 1.9795021263334212e-05,
      "loss": 2.556,
      "step": 5426
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0108286142349243,
      "learning_rate": 1.979493831732251e-05,
      "loss": 2.6965,
      "step": 5427
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.1613390445709229,
      "learning_rate": 1.979485535470573e-05,
      "loss": 2.7965,
      "step": 5428
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.986657977104187,
      "learning_rate": 1.979477237548401e-05,
      "loss": 2.6358,
      "step": 5429
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.3169465065002441,
      "learning_rate": 1.9794689379657486e-05,
      "loss": 2.8368,
      "step": 5430
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0004583597183228,
      "learning_rate": 1.9794606367226304e-05,
      "loss": 2.6342,
      "step": 5431
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.042921543121338,
      "learning_rate": 1.9794523338190608e-05,
      "loss": 2.658,
      "step": 5432
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.02322256565094,
      "learning_rate": 1.979444029255053e-05,
      "loss": 2.6749,
      "step": 5433
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.1391246318817139,
      "learning_rate": 1.9794357230306223e-05,
      "loss": 2.5792,
      "step": 5434
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9029355645179749,
      "learning_rate": 1.9794274151457812e-05,
      "loss": 2.5994,
      "step": 5435
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0246952772140503,
      "learning_rate": 1.9794191056005452e-05,
      "loss": 2.4322,
      "step": 5436
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9320225119590759,
      "learning_rate": 1.9794107943949278e-05,
      "loss": 2.6389,
      "step": 5437
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9562903642654419,
      "learning_rate": 1.9794024815289428e-05,
      "loss": 2.864,
      "step": 5438
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0600885152816772,
      "learning_rate": 1.9793941670026046e-05,
      "loss": 2.7415,
      "step": 5439
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9961028695106506,
      "learning_rate": 1.9793858508159275e-05,
      "loss": 2.6387,
      "step": 5440
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.1226310729980469,
      "learning_rate": 1.979377532968925e-05,
      "loss": 2.8206,
      "step": 5441
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9134122133255005,
      "learning_rate": 1.9793692134616122e-05,
      "loss": 2.7318,
      "step": 5442
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.001649022102356,
      "learning_rate": 1.9793608922940023e-05,
      "loss": 2.7982,
      "step": 5443
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.154731273651123,
      "learning_rate": 1.9793525694661096e-05,
      "loss": 2.8053,
      "step": 5444
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0033172369003296,
      "learning_rate": 1.979344244977948e-05,
      "loss": 2.6929,
      "step": 5445
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0810142755508423,
      "learning_rate": 1.9793359188295323e-05,
      "loss": 2.4771,
      "step": 5446
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0110403299331665,
      "learning_rate": 1.9793275910208765e-05,
      "loss": 2.589,
      "step": 5447
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0351355075836182,
      "learning_rate": 1.979319261551994e-05,
      "loss": 2.7518,
      "step": 5448
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.8654003143310547,
      "learning_rate": 1.9793109304228996e-05,
      "loss": 2.5668,
      "step": 5449
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.1445595026016235,
      "learning_rate": 1.979302597633607e-05,
      "loss": 3.0217,
      "step": 5450
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9822845458984375,
      "learning_rate": 1.9792942631841306e-05,
      "loss": 2.6547,
      "step": 5451
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.066026210784912,
      "learning_rate": 1.9792859270744845e-05,
      "loss": 2.9081,
      "step": 5452
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.2474617958068848,
      "learning_rate": 1.979277589304683e-05,
      "loss": 2.6966,
      "step": 5453
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9037899374961853,
      "learning_rate": 1.9792692498747398e-05,
      "loss": 2.6912,
      "step": 5454
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0653576850891113,
      "learning_rate": 1.979260908784669e-05,
      "loss": 2.6095,
      "step": 5455
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9704521298408508,
      "learning_rate": 1.9792525660344855e-05,
      "loss": 2.5998,
      "step": 5456
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0070734024047852,
      "learning_rate": 1.9792442216242028e-05,
      "loss": 2.4806,
      "step": 5457
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0092390775680542,
      "learning_rate": 1.979235875553835e-05,
      "loss": 2.6628,
      "step": 5458
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9874700903892517,
      "learning_rate": 1.9792275278233964e-05,
      "loss": 2.5478,
      "step": 5459
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0101733207702637,
      "learning_rate": 1.979219178432901e-05,
      "loss": 2.8335,
      "step": 5460
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9835353493690491,
      "learning_rate": 1.979210827382364e-05,
      "loss": 2.7249,
      "step": 5461
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9657276272773743,
      "learning_rate": 1.979202474671798e-05,
      "loss": 2.5483,
      "step": 5462
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9655596613883972,
      "learning_rate": 1.9791941203012176e-05,
      "loss": 2.5878,
      "step": 5463
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0259369611740112,
      "learning_rate": 1.979185764270638e-05,
      "loss": 2.5643,
      "step": 5464
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0213074684143066,
      "learning_rate": 1.979177406580072e-05,
      "loss": 2.3214,
      "step": 5465
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.065630316734314,
      "learning_rate": 1.9791690472295348e-05,
      "loss": 2.8178,
      "step": 5466
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.03598952293396,
      "learning_rate": 1.9791606862190397e-05,
      "loss": 2.6247,
      "step": 5467
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0289909839630127,
      "learning_rate": 1.9791523235486013e-05,
      "loss": 2.5629,
      "step": 5468
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9728602766990662,
      "learning_rate": 1.979143959218234e-05,
      "loss": 2.5863,
      "step": 5469
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.2588807344436646,
      "learning_rate": 1.9791355932279517e-05,
      "loss": 2.8172,
      "step": 5470
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0577642917633057,
      "learning_rate": 1.9791272255777687e-05,
      "loss": 2.5774,
      "step": 5471
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.973696768283844,
      "learning_rate": 1.9791188562676992e-05,
      "loss": 2.7315,
      "step": 5472
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0033013820648193,
      "learning_rate": 1.9791104852977572e-05,
      "loss": 2.613,
      "step": 5473
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.994144082069397,
      "learning_rate": 1.979102112667957e-05,
      "loss": 2.8243,
      "step": 5474
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9576425552368164,
      "learning_rate": 1.9790937383783125e-05,
      "loss": 2.7667,
      "step": 5475
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.1204272508621216,
      "learning_rate": 1.9790853624288387e-05,
      "loss": 2.5087,
      "step": 5476
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9383848309516907,
      "learning_rate": 1.9790769848195492e-05,
      "loss": 2.4997,
      "step": 5477
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.1060893535614014,
      "learning_rate": 1.9790686055504583e-05,
      "loss": 2.6398,
      "step": 5478
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0416154861450195,
      "learning_rate": 1.9790602246215797e-05,
      "loss": 2.6065,
      "step": 5479
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0974911451339722,
      "learning_rate": 1.9790518420329284e-05,
      "loss": 2.5732,
      "step": 5480
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9453366994857788,
      "learning_rate": 1.9790434577845188e-05,
      "loss": 2.7423,
      "step": 5481
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0852315425872803,
      "learning_rate": 1.9790350718763644e-05,
      "loss": 2.8322,
      "step": 5482
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0581920146942139,
      "learning_rate": 1.979026684308479e-05,
      "loss": 2.7991,
      "step": 5483
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9767112731933594,
      "learning_rate": 1.979018295080878e-05,
      "loss": 2.5563,
      "step": 5484
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0279581546783447,
      "learning_rate": 1.9790099041935754e-05,
      "loss": 2.5936,
      "step": 5485
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9960450530052185,
      "learning_rate": 1.9790015116465847e-05,
      "loss": 2.5802,
      "step": 5486
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9707555770874023,
      "learning_rate": 1.9789931174399205e-05,
      "loss": 2.779,
      "step": 5487
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.045632004737854,
      "learning_rate": 1.978984721573597e-05,
      "loss": 2.8645,
      "step": 5488
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.047987699508667,
      "learning_rate": 1.978976324047629e-05,
      "loss": 2.9985,
      "step": 5489
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.5075782537460327,
      "learning_rate": 1.9789679248620298e-05,
      "loss": 2.4418,
      "step": 5490
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.1302680969238281,
      "learning_rate": 1.9789595240168143e-05,
      "loss": 2.8535,
      "step": 5491
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9571114778518677,
      "learning_rate": 1.9789511215119966e-05,
      "loss": 2.6529,
      "step": 5492
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0849628448486328,
      "learning_rate": 1.978942717347591e-05,
      "loss": 2.8112,
      "step": 5493
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0139342546463013,
      "learning_rate": 1.978934311523611e-05,
      "loss": 2.7246,
      "step": 5494
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9218276739120483,
      "learning_rate": 1.9789259040400715e-05,
      "loss": 2.6229,
      "step": 5495
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.1032652854919434,
      "learning_rate": 1.9789174948969875e-05,
      "loss": 2.7322,
      "step": 5496
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9763206243515015,
      "learning_rate": 1.9789090840943718e-05,
      "loss": 2.7373,
      "step": 5497
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0317151546478271,
      "learning_rate": 1.9789006716322397e-05,
      "loss": 2.9157,
      "step": 5498
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0664528608322144,
      "learning_rate": 1.9788922575106047e-05,
      "loss": 2.6026,
      "step": 5499
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9338526129722595,
      "learning_rate": 1.9788838417294817e-05,
      "loss": 2.6403,
      "step": 5500
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9912049770355225,
      "learning_rate": 1.978875424288885e-05,
      "loss": 2.9138,
      "step": 5501
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0065195560455322,
      "learning_rate": 1.978867005188828e-05,
      "loss": 2.6856,
      "step": 5502
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0509331226348877,
      "learning_rate": 1.978858584429326e-05,
      "loss": 2.9058,
      "step": 5503
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0389869213104248,
      "learning_rate": 1.9788501620103928e-05,
      "loss": 2.6778,
      "step": 5504
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9793320894241333,
      "learning_rate": 1.9788417379320427e-05,
      "loss": 2.7229,
      "step": 5505
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0387316942214966,
      "learning_rate": 1.97883331219429e-05,
      "loss": 2.6087,
      "step": 5506
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9151723384857178,
      "learning_rate": 1.978824884797149e-05,
      "loss": 2.635,
      "step": 5507
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0199090242385864,
      "learning_rate": 1.978816455740634e-05,
      "loss": 2.8964,
      "step": 5508
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9163589477539062,
      "learning_rate": 1.978808025024759e-05,
      "loss": 2.7213,
      "step": 5509
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0862728357315063,
      "learning_rate": 1.9787995926495385e-05,
      "loss": 2.9612,
      "step": 5510
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.1858446598052979,
      "learning_rate": 1.9787911586149873e-05,
      "loss": 2.6756,
      "step": 5511
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9841516613960266,
      "learning_rate": 1.978782722921119e-05,
      "loss": 2.5716,
      "step": 5512
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9235232472419739,
      "learning_rate": 1.9787742855679482e-05,
      "loss": 2.7607,
      "step": 5513
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9773905277252197,
      "learning_rate": 1.9787658465554894e-05,
      "loss": 2.6412,
      "step": 5514
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0010138750076294,
      "learning_rate": 1.9787574058837564e-05,
      "loss": 2.9078,
      "step": 5515
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.064809799194336,
      "learning_rate": 1.9787489635527638e-05,
      "loss": 2.5588,
      "step": 5516
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.994879961013794,
      "learning_rate": 1.9787405195625258e-05,
      "loss": 2.7135,
      "step": 5517
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.040041208267212,
      "learning_rate": 1.9787320739130566e-05,
      "loss": 2.7929,
      "step": 5518
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9410704970359802,
      "learning_rate": 1.9787236266043712e-05,
      "loss": 2.6568,
      "step": 5519
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9682562351226807,
      "learning_rate": 1.978715177636483e-05,
      "loss": 2.6936,
      "step": 5520
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0472029447555542,
      "learning_rate": 1.9787067270094072e-05,
      "loss": 2.6951,
      "step": 5521
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0427783727645874,
      "learning_rate": 1.9786982747231574e-05,
      "loss": 2.95,
      "step": 5522
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0462193489074707,
      "learning_rate": 1.9786898207777483e-05,
      "loss": 2.6591,
      "step": 5523
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0636191368103027,
      "learning_rate": 1.978681365173194e-05,
      "loss": 2.635,
      "step": 5524
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9719313979148865,
      "learning_rate": 1.9786729079095094e-05,
      "loss": 2.7217,
      "step": 5525
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9616719484329224,
      "learning_rate": 1.978664448986708e-05,
      "loss": 2.671,
      "step": 5526
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0033365488052368,
      "learning_rate": 1.9786559884048046e-05,
      "loss": 2.7174,
      "step": 5527
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.984346330165863,
      "learning_rate": 1.9786475261638136e-05,
      "loss": 2.7466,
      "step": 5528
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.1820369958877563,
      "learning_rate": 1.9786390622637494e-05,
      "loss": 2.6635,
      "step": 5529
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0972272157669067,
      "learning_rate": 1.9786305967046258e-05,
      "loss": 2.4043,
      "step": 5530
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.067150592803955,
      "learning_rate": 1.9786221294864577e-05,
      "loss": 2.7069,
      "step": 5531
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0239959955215454,
      "learning_rate": 1.9786136606092594e-05,
      "loss": 2.688,
      "step": 5532
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.099727988243103,
      "learning_rate": 1.978605190073045e-05,
      "loss": 2.6671,
      "step": 5533
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.1265498399734497,
      "learning_rate": 1.9785967178778294e-05,
      "loss": 2.7054,
      "step": 5534
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0863734483718872,
      "learning_rate": 1.978588244023626e-05,
      "loss": 2.5533,
      "step": 5535
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.1436920166015625,
      "learning_rate": 1.9785797685104505e-05,
      "loss": 2.6876,
      "step": 5536
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9287353157997131,
      "learning_rate": 1.9785712913383158e-05,
      "loss": 2.7202,
      "step": 5537
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9246678948402405,
      "learning_rate": 1.9785628125072372e-05,
      "loss": 2.5759,
      "step": 5538
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0369385480880737,
      "learning_rate": 1.9785543320172287e-05,
      "loss": 2.6248,
      "step": 5539
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0076472759246826,
      "learning_rate": 1.9785458498683052e-05,
      "loss": 2.7241,
      "step": 5540
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0601645708084106,
      "learning_rate": 1.978537366060481e-05,
      "loss": 3.1187,
      "step": 5541
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9587970972061157,
      "learning_rate": 1.9785288805937696e-05,
      "loss": 2.7688,
      "step": 5542
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.036517858505249,
      "learning_rate": 1.978520393468186e-05,
      "loss": 2.871,
      "step": 5543
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.066036343574524,
      "learning_rate": 1.9785119046837446e-05,
      "loss": 2.8485,
      "step": 5544
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0945725440979004,
      "learning_rate": 1.9785034142404603e-05,
      "loss": 2.7351,
      "step": 5545
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9324705004692078,
      "learning_rate": 1.9784949221383463e-05,
      "loss": 2.7378,
      "step": 5546
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9641293287277222,
      "learning_rate": 1.978486428377418e-05,
      "loss": 2.7706,
      "step": 5547
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.014868140220642,
      "learning_rate": 1.9784779329576894e-05,
      "loss": 2.787,
      "step": 5548
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9273396134376526,
      "learning_rate": 1.9784694358791746e-05,
      "loss": 2.8066,
      "step": 5549
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9490538239479065,
      "learning_rate": 1.9784609371418887e-05,
      "loss": 2.7827,
      "step": 5550
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.1524499654769897,
      "learning_rate": 1.9784524367458457e-05,
      "loss": 2.8211,
      "step": 5551
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.062607765197754,
      "learning_rate": 1.9784439346910604e-05,
      "loss": 2.6,
      "step": 5552
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0369521379470825,
      "learning_rate": 1.9784354309775464e-05,
      "loss": 2.7631,
      "step": 5553
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9817991852760315,
      "learning_rate": 1.9784269256053186e-05,
      "loss": 2.6864,
      "step": 5554
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.1714705228805542,
      "learning_rate": 1.9784184185743916e-05,
      "loss": 2.8937,
      "step": 5555
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9668377637863159,
      "learning_rate": 1.9784099098847796e-05,
      "loss": 2.5497,
      "step": 5556
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9747630953788757,
      "learning_rate": 1.9784013995364972e-05,
      "loss": 2.4584,
      "step": 5557
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9673059582710266,
      "learning_rate": 1.978392887529559e-05,
      "loss": 2.8776,
      "step": 5558
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9862872958183289,
      "learning_rate": 1.9783843738639784e-05,
      "loss": 2.7912,
      "step": 5559
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.1866035461425781,
      "learning_rate": 1.978375858539771e-05,
      "loss": 2.7515,
      "step": 5560
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9165611267089844,
      "learning_rate": 1.9783673415569507e-05,
      "loss": 2.5229,
      "step": 5561
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.1000207662582397,
      "learning_rate": 1.978358822915532e-05,
      "loss": 2.5695,
      "step": 5562
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9062334895133972,
      "learning_rate": 1.9783503026155296e-05,
      "loss": 2.8404,
      "step": 5563
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.002399206161499,
      "learning_rate": 1.9783417806569574e-05,
      "loss": 2.758,
      "step": 5564
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9555639028549194,
      "learning_rate": 1.97833325703983e-05,
      "loss": 2.6467,
      "step": 5565
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9340791702270508,
      "learning_rate": 1.9783247317641623e-05,
      "loss": 2.5181,
      "step": 5566
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0491012334823608,
      "learning_rate": 1.9783162048299685e-05,
      "loss": 2.7039,
      "step": 5567
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.044819712638855,
      "learning_rate": 1.9783076762372632e-05,
      "loss": 2.7155,
      "step": 5568
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.1144514083862305,
      "learning_rate": 1.9782991459860606e-05,
      "loss": 2.6699,
      "step": 5569
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0943154096603394,
      "learning_rate": 1.978290614076375e-05,
      "loss": 2.6214,
      "step": 5570
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0529574155807495,
      "learning_rate": 1.9782820805082215e-05,
      "loss": 2.7281,
      "step": 5571
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.954636812210083,
      "learning_rate": 1.9782735452816138e-05,
      "loss": 2.744,
      "step": 5572
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.976595938205719,
      "learning_rate": 1.978265008396567e-05,
      "loss": 2.5698,
      "step": 5573
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9332400560379028,
      "learning_rate": 1.9782564698530952e-05,
      "loss": 3.0505,
      "step": 5574
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0817036628723145,
      "learning_rate": 1.9782479296512128e-05,
      "loss": 2.7754,
      "step": 5575
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9058240652084351,
      "learning_rate": 1.9782393877909348e-05,
      "loss": 2.6481,
      "step": 5576
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.955253541469574,
      "learning_rate": 1.9782308442722753e-05,
      "loss": 2.8997,
      "step": 5577
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9083335399627686,
      "learning_rate": 1.9782222990952487e-05,
      "loss": 2.5532,
      "step": 5578
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9714512825012207,
      "learning_rate": 1.9782137522598694e-05,
      "loss": 2.7148,
      "step": 5579
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.063659429550171,
      "learning_rate": 1.9782052037661525e-05,
      "loss": 2.8809,
      "step": 5580
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9684025645256042,
      "learning_rate": 1.978196653614112e-05,
      "loss": 2.6786,
      "step": 5581
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0689847469329834,
      "learning_rate": 1.9781881018037627e-05,
      "loss": 2.7488,
      "step": 5582
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9783627390861511,
      "learning_rate": 1.9781795483351187e-05,
      "loss": 2.624,
      "step": 5583
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0160033702850342,
      "learning_rate": 1.9781709932081945e-05,
      "loss": 2.7406,
      "step": 5584
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0530189275741577,
      "learning_rate": 1.978162436423005e-05,
      "loss": 2.7632,
      "step": 5585
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.066898226737976,
      "learning_rate": 1.9781538779795642e-05,
      "loss": 2.878,
      "step": 5586
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0873761177062988,
      "learning_rate": 1.9781453178778872e-05,
      "loss": 2.8141,
      "step": 5587
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9649814367294312,
      "learning_rate": 1.9781367561179886e-05,
      "loss": 2.7819,
      "step": 5588
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9451185464859009,
      "learning_rate": 1.978128192699882e-05,
      "loss": 3.0916,
      "step": 5589
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9025532007217407,
      "learning_rate": 1.9781196276235827e-05,
      "loss": 2.6917,
      "step": 5590
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0211505889892578,
      "learning_rate": 1.9781110608891047e-05,
      "loss": 2.5083,
      "step": 5591
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9772717356681824,
      "learning_rate": 1.9781024924964632e-05,
      "loss": 2.7837,
      "step": 5592
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9187465906143188,
      "learning_rate": 1.978093922445672e-05,
      "loss": 2.6205,
      "step": 5593
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9192201495170593,
      "learning_rate": 1.978085350736746e-05,
      "loss": 2.7653,
      "step": 5594
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.968738853931427,
      "learning_rate": 1.9780767773697e-05,
      "loss": 2.9398,
      "step": 5595
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9738425016403198,
      "learning_rate": 1.9780682023445478e-05,
      "loss": 2.7511,
      "step": 5596
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9511807560920715,
      "learning_rate": 1.9780596256613043e-05,
      "loss": 2.6056,
      "step": 5597
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.064267873764038,
      "learning_rate": 1.9780510473199844e-05,
      "loss": 2.993,
      "step": 5598
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0035622119903564,
      "learning_rate": 1.9780424673206022e-05,
      "loss": 2.7473,
      "step": 5599
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.011691689491272,
      "learning_rate": 1.9780338856631726e-05,
      "loss": 2.6612,
      "step": 5600
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9547592401504517,
      "learning_rate": 1.97802530234771e-05,
      "loss": 2.9724,
      "step": 5601
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.072973608970642,
      "learning_rate": 1.9780167173742284e-05,
      "loss": 2.8232,
      "step": 5602
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.168246865272522,
      "learning_rate": 1.9780081307427432e-05,
      "loss": 2.7227,
      "step": 5603
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0726163387298584,
      "learning_rate": 1.9779995424532686e-05,
      "loss": 2.718,
      "step": 5604
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.2390719652175903,
      "learning_rate": 1.977990952505819e-05,
      "loss": 2.5857,
      "step": 5605
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9375082850456238,
      "learning_rate": 1.977982360900409e-05,
      "loss": 2.9633,
      "step": 5606
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0056285858154297,
      "learning_rate": 1.9779737676370538e-05,
      "loss": 2.5433,
      "step": 5607
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.122470736503601,
      "learning_rate": 1.977965172715767e-05,
      "loss": 2.7055,
      "step": 5608
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9670881628990173,
      "learning_rate": 1.9779565761365638e-05,
      "loss": 2.7046,
      "step": 5609
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.2631584405899048,
      "learning_rate": 1.9779479778994584e-05,
      "loss": 2.7101,
      "step": 5610
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.280685544013977,
      "learning_rate": 1.977939378004466e-05,
      "loss": 2.4705,
      "step": 5611
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0528267621994019,
      "learning_rate": 1.9779307764516002e-05,
      "loss": 2.8312,
      "step": 5612
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9823894500732422,
      "learning_rate": 1.9779221732408765e-05,
      "loss": 2.7611,
      "step": 5613
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9689667224884033,
      "learning_rate": 1.977913568372309e-05,
      "loss": 2.5675,
      "step": 5614
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0319616794586182,
      "learning_rate": 1.977904961845912e-05,
      "loss": 2.7955,
      "step": 5615
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9958038926124573,
      "learning_rate": 1.9778963536617012e-05,
      "loss": 2.5988,
      "step": 5616
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9929701089859009,
      "learning_rate": 1.9778877438196898e-05,
      "loss": 2.7956,
      "step": 5617
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.03397536277771,
      "learning_rate": 1.9778791323198936e-05,
      "loss": 2.7314,
      "step": 5618
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.153771162033081,
      "learning_rate": 1.9778705191623265e-05,
      "loss": 2.612,
      "step": 5619
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.007064700126648,
      "learning_rate": 1.9778619043470035e-05,
      "loss": 2.5615,
      "step": 5620
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9946718215942383,
      "learning_rate": 1.9778532878739387e-05,
      "loss": 2.6303,
      "step": 5621
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0300960540771484,
      "learning_rate": 1.977844669743147e-05,
      "loss": 2.7611,
      "step": 5622
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0027458667755127,
      "learning_rate": 1.9778360499546433e-05,
      "loss": 2.827,
      "step": 5623
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9368016719818115,
      "learning_rate": 1.9778274285084418e-05,
      "loss": 2.7342,
      "step": 5624
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9143832921981812,
      "learning_rate": 1.977818805404557e-05,
      "loss": 2.6625,
      "step": 5625
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9054428935050964,
      "learning_rate": 1.9778101806430042e-05,
      "loss": 2.2954,
      "step": 5626
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0052855014801025,
      "learning_rate": 1.977801554223797e-05,
      "loss": 2.8955,
      "step": 5627
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9186867475509644,
      "learning_rate": 1.9777929261469508e-05,
      "loss": 2.5758,
      "step": 5628
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.975673496723175,
      "learning_rate": 1.9777842964124804e-05,
      "loss": 2.7121,
      "step": 5629
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9997748732566833,
      "learning_rate": 1.9777756650203997e-05,
      "loss": 2.6354,
      "step": 5630
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.054930567741394,
      "learning_rate": 1.9777670319707236e-05,
      "loss": 2.5526,
      "step": 5631
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9915661215782166,
      "learning_rate": 1.977758397263467e-05,
      "loss": 2.622,
      "step": 5632
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.176673173904419,
      "learning_rate": 1.9777497608986443e-05,
      "loss": 2.8317,
      "step": 5633
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.1138877868652344,
      "learning_rate": 1.9777411228762703e-05,
      "loss": 2.5307,
      "step": 5634
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.051568627357483,
      "learning_rate": 1.9777324831963594e-05,
      "loss": 2.6384,
      "step": 5635
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9102824330329895,
      "learning_rate": 1.9777238418589268e-05,
      "loss": 2.6547,
      "step": 5636
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0746580362319946,
      "learning_rate": 1.977715198863986e-05,
      "loss": 2.5994,
      "step": 5637
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9604971408843994,
      "learning_rate": 1.977706554211553e-05,
      "loss": 2.6686,
      "step": 5638
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9566723108291626,
      "learning_rate": 1.9776979079016417e-05,
      "loss": 2.4392,
      "step": 5639
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.1779099702835083,
      "learning_rate": 1.977689259934267e-05,
      "loss": 2.7928,
      "step": 5640
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.061436414718628,
      "learning_rate": 1.9776806103094433e-05,
      "loss": 2.4711,
      "step": 5641
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9261394143104553,
      "learning_rate": 1.9776719590271855e-05,
      "loss": 2.7443,
      "step": 5642
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9893395304679871,
      "learning_rate": 1.9776633060875082e-05,
      "loss": 2.777,
      "step": 5643
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9931512475013733,
      "learning_rate": 1.977654651490426e-05,
      "loss": 2.6451,
      "step": 5644
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9959415197372437,
      "learning_rate": 1.9776459952359537e-05,
      "loss": 2.6693,
      "step": 5645
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0380064249038696,
      "learning_rate": 1.977637337324106e-05,
      "loss": 2.6765,
      "step": 5646
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.8866490125656128,
      "learning_rate": 1.9776286777548975e-05,
      "loss": 2.7038,
      "step": 5647
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.000062346458435,
      "learning_rate": 1.9776200165283425e-05,
      "loss": 2.5159,
      "step": 5648
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9973160624504089,
      "learning_rate": 1.9776113536444564e-05,
      "loss": 2.6338,
      "step": 5649
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9743379354476929,
      "learning_rate": 1.9776026891032534e-05,
      "loss": 2.9685,
      "step": 5650
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9924849271774292,
      "learning_rate": 1.9775940229047485e-05,
      "loss": 2.5691,
      "step": 5651
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.1127749681472778,
      "learning_rate": 1.977585355048956e-05,
      "loss": 2.5889,
      "step": 5652
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0378791093826294,
      "learning_rate": 1.977576685535891e-05,
      "loss": 2.7491,
      "step": 5653
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.1149567365646362,
      "learning_rate": 1.9775680143655676e-05,
      "loss": 2.6918,
      "step": 5654
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9567840695381165,
      "learning_rate": 1.9775593415380016e-05,
      "loss": 2.6254,
      "step": 5655
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9736446738243103,
      "learning_rate": 1.9775506670532064e-05,
      "loss": 2.7954,
      "step": 5656
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.013771653175354,
      "learning_rate": 1.9775419909111977e-05,
      "loss": 2.4777,
      "step": 5657
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9565312266349792,
      "learning_rate": 1.9775333131119894e-05,
      "loss": 2.4532,
      "step": 5658
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0497514009475708,
      "learning_rate": 1.9775246336555974e-05,
      "loss": 2.7283,
      "step": 5659
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9732317924499512,
      "learning_rate": 1.9775159525420352e-05,
      "loss": 2.7059,
      "step": 5660
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.8863161206245422,
      "learning_rate": 1.9775072697713175e-05,
      "loss": 2.255,
      "step": 5661
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.172008991241455,
      "learning_rate": 1.9774985853434604e-05,
      "loss": 2.6824,
      "step": 5662
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.1450371742248535,
      "learning_rate": 1.977489899258477e-05,
      "loss": 2.6812,
      "step": 5663
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0660769939422607,
      "learning_rate": 1.977481211516383e-05,
      "loss": 2.9717,
      "step": 5664
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9813052415847778,
      "learning_rate": 1.9774725221171928e-05,
      "loss": 2.426,
      "step": 5665
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.019729733467102,
      "learning_rate": 1.9774638310609214e-05,
      "loss": 2.3486,
      "step": 5666
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0314929485321045,
      "learning_rate": 1.977455138347583e-05,
      "loss": 2.8287,
      "step": 5667
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.1092448234558105,
      "learning_rate": 1.977446443977193e-05,
      "loss": 2.8611,
      "step": 5668
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.2511305809020996,
      "learning_rate": 1.9774377479497656e-05,
      "loss": 3.0689,
      "step": 5669
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9170416593551636,
      "learning_rate": 1.977429050265316e-05,
      "loss": 2.5464,
      "step": 5670
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0142943859100342,
      "learning_rate": 1.9774203509238586e-05,
      "loss": 2.6901,
      "step": 5671
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.3756135702133179,
      "learning_rate": 1.977411649925408e-05,
      "loss": 2.6491,
      "step": 5672
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0278812646865845,
      "learning_rate": 1.9774029472699794e-05,
      "loss": 2.6338,
      "step": 5673
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.996433675289154,
      "learning_rate": 1.977394242957587e-05,
      "loss": 2.6026,
      "step": 5674
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.123669147491455,
      "learning_rate": 1.9773855369882463e-05,
      "loss": 2.4131,
      "step": 5675
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9399564862251282,
      "learning_rate": 1.977376829361972e-05,
      "loss": 2.5962,
      "step": 5676
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.973767876625061,
      "learning_rate": 1.977368120078778e-05,
      "loss": 2.7896,
      "step": 5677
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0456498861312866,
      "learning_rate": 1.9773594091386795e-05,
      "loss": 2.8586,
      "step": 5678
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0229437351226807,
      "learning_rate": 1.9773506965416914e-05,
      "loss": 2.6937,
      "step": 5679
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0106488466262817,
      "learning_rate": 1.9773419822878286e-05,
      "loss": 2.8084,
      "step": 5680
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0567857027053833,
      "learning_rate": 1.9773332663771057e-05,
      "loss": 2.6722,
      "step": 5681
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0495091676712036,
      "learning_rate": 1.9773245488095376e-05,
      "loss": 2.4632,
      "step": 5682
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9799712896347046,
      "learning_rate": 1.9773158295851384e-05,
      "loss": 2.7615,
      "step": 5683
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0984411239624023,
      "learning_rate": 1.9773071087039238e-05,
      "loss": 2.6824,
      "step": 5684
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9628296494483948,
      "learning_rate": 1.9772983861659084e-05,
      "loss": 2.6634,
      "step": 5685
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.2180306911468506,
      "learning_rate": 1.9772896619711064e-05,
      "loss": 2.8072,
      "step": 5686
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0668445825576782,
      "learning_rate": 1.9772809361195333e-05,
      "loss": 2.6778,
      "step": 5687
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9445236921310425,
      "learning_rate": 1.9772722086112037e-05,
      "loss": 2.7681,
      "step": 5688
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.260661005973816,
      "learning_rate": 1.977263479446132e-05,
      "loss": 2.5693,
      "step": 5689
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9933786392211914,
      "learning_rate": 1.9772547486243332e-05,
      "loss": 2.5906,
      "step": 5690
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.2715682983398438,
      "learning_rate": 1.9772460161458223e-05,
      "loss": 2.6006,
      "step": 5691
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0142972469329834,
      "learning_rate": 1.9772372820106143e-05,
      "loss": 2.5607,
      "step": 5692
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9715073108673096,
      "learning_rate": 1.9772285462187234e-05,
      "loss": 2.6996,
      "step": 5693
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9253430366516113,
      "learning_rate": 1.9772198087701647e-05,
      "loss": 2.5208,
      "step": 5694
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.051281213760376,
      "learning_rate": 1.9772110696649533e-05,
      "loss": 2.8476,
      "step": 5695
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.972392737865448,
      "learning_rate": 1.9772023289031033e-05,
      "loss": 2.4982,
      "step": 5696
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9319800138473511,
      "learning_rate": 1.97719358648463e-05,
      "loss": 2.5145,
      "step": 5697
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9912182092666626,
      "learning_rate": 1.9771848424095482e-05,
      "loss": 2.4145,
      "step": 5698
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9260989427566528,
      "learning_rate": 1.977176096677873e-05,
      "loss": 2.7073,
      "step": 5699
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.912560760974884,
      "learning_rate": 1.9771673492896185e-05,
      "loss": 2.5562,
      "step": 5700
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0194975137710571,
      "learning_rate": 1.9771586002448004e-05,
      "loss": 2.7031,
      "step": 5701
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.1006942987442017,
      "learning_rate": 1.9771498495434328e-05,
      "loss": 2.3292,
      "step": 5702
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9459534287452698,
      "learning_rate": 1.9771410971855306e-05,
      "loss": 2.5103,
      "step": 5703
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.174918532371521,
      "learning_rate": 1.977132343171109e-05,
      "loss": 2.9648,
      "step": 5704
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9763052463531494,
      "learning_rate": 1.977123587500183e-05,
      "loss": 2.6264,
      "step": 5705
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0648908615112305,
      "learning_rate": 1.977114830172767e-05,
      "loss": 2.7719,
      "step": 5706
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.1531121730804443,
      "learning_rate": 1.9771060711888758e-05,
      "loss": 2.7201,
      "step": 5707
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9189437031745911,
      "learning_rate": 1.9770973105485245e-05,
      "loss": 2.5206,
      "step": 5708
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.010974645614624,
      "learning_rate": 1.977088548251728e-05,
      "loss": 2.6544,
      "step": 5709
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.131651759147644,
      "learning_rate": 1.977079784298501e-05,
      "loss": 2.6736,
      "step": 5710
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.8794564604759216,
      "learning_rate": 1.977071018688858e-05,
      "loss": 2.7029,
      "step": 5711
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0486968755722046,
      "learning_rate": 1.977062251422815e-05,
      "loss": 2.6317,
      "step": 5712
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9952144622802734,
      "learning_rate": 1.9770534825003856e-05,
      "loss": 2.4915,
      "step": 5713
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9808740615844727,
      "learning_rate": 1.9770447119215854e-05,
      "loss": 2.6785,
      "step": 5714
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.1404833793640137,
      "learning_rate": 1.9770359396864288e-05,
      "loss": 2.7458,
      "step": 5715
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0020091533660889,
      "learning_rate": 1.9770271657949315e-05,
      "loss": 2.6715,
      "step": 5716
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.107796549797058,
      "learning_rate": 1.977018390247107e-05,
      "loss": 2.7213,
      "step": 5717
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9785126447677612,
      "learning_rate": 1.9770096130429716e-05,
      "loss": 2.5755,
      "step": 5718
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9394952058792114,
      "learning_rate": 1.977000834182539e-05,
      "loss": 2.6338,
      "step": 5719
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0627007484436035,
      "learning_rate": 1.9769920536658252e-05,
      "loss": 2.6328,
      "step": 5720
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0631685256958008,
      "learning_rate": 1.976983271492844e-05,
      "loss": 2.6455,
      "step": 5721
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.1890345811843872,
      "learning_rate": 1.9769744876636114e-05,
      "loss": 3.0366,
      "step": 5722
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.100528359413147,
      "learning_rate": 1.9769657021781412e-05,
      "loss": 2.6343,
      "step": 5723
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9198383688926697,
      "learning_rate": 1.976956915036449e-05,
      "loss": 2.5097,
      "step": 5724
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0241732597351074,
      "learning_rate": 1.9769481262385493e-05,
      "loss": 2.6226,
      "step": 5725
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0737570524215698,
      "learning_rate": 1.9769393357844574e-05,
      "loss": 2.5209,
      "step": 5726
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0274279117584229,
      "learning_rate": 1.9769305436741878e-05,
      "loss": 2.4702,
      "step": 5727
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0322284698486328,
      "learning_rate": 1.976921749907756e-05,
      "loss": 2.8222,
      "step": 5728
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9956653118133545,
      "learning_rate": 1.9769129544851764e-05,
      "loss": 2.5745,
      "step": 5729
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9906054139137268,
      "learning_rate": 1.976904157406464e-05,
      "loss": 2.5539,
      "step": 5730
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9941443800926208,
      "learning_rate": 1.9768953586716332e-05,
      "loss": 2.5618,
      "step": 5731
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9298161268234253,
      "learning_rate": 1.9768865582807e-05,
      "loss": 2.5128,
      "step": 5732
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9757656455039978,
      "learning_rate": 1.9768777562336785e-05,
      "loss": 2.709,
      "step": 5733
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9408313035964966,
      "learning_rate": 1.976868952530584e-05,
      "loss": 2.4977,
      "step": 5734
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0124073028564453,
      "learning_rate": 1.9768601471714314e-05,
      "loss": 2.6532,
      "step": 5735
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9685308337211609,
      "learning_rate": 1.9768513401562353e-05,
      "loss": 2.4895,
      "step": 5736
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.020861029624939,
      "learning_rate": 1.976842531485011e-05,
      "loss": 2.7739,
      "step": 5737
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9865685105323792,
      "learning_rate": 1.9768337211577734e-05,
      "loss": 2.5302,
      "step": 5738
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.2859611511230469,
      "learning_rate": 1.9768249091745373e-05,
      "loss": 2.5729,
      "step": 5739
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.06537663936615,
      "learning_rate": 1.9768160955353176e-05,
      "loss": 2.7162,
      "step": 5740
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9182521104812622,
      "learning_rate": 1.9768072802401295e-05,
      "loss": 2.5798,
      "step": 5741
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0390034914016724,
      "learning_rate": 1.9767984632889873e-05,
      "loss": 2.6306,
      "step": 5742
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0586193799972534,
      "learning_rate": 1.9767896446819068e-05,
      "loss": 2.6761,
      "step": 5743
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.057623028755188,
      "learning_rate": 1.9767808244189024e-05,
      "loss": 2.6821,
      "step": 5744
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.1127513647079468,
      "learning_rate": 1.9767720024999895e-05,
      "loss": 2.557,
      "step": 5745
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.1369577646255493,
      "learning_rate": 1.9767631789251824e-05,
      "loss": 2.7133,
      "step": 5746
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.8605990409851074,
      "learning_rate": 1.9767543536944967e-05,
      "loss": 2.599,
      "step": 5747
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9433869123458862,
      "learning_rate": 1.976745526807947e-05,
      "loss": 2.6125,
      "step": 5748
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0007175207138062,
      "learning_rate": 1.9767366982655482e-05,
      "loss": 2.7858,
      "step": 5749
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9585133790969849,
      "learning_rate": 1.9767278680673155e-05,
      "loss": 2.7513,
      "step": 5750
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0426803827285767,
      "learning_rate": 1.976719036213264e-05,
      "loss": 2.7432,
      "step": 5751
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0768743753433228,
      "learning_rate": 1.976710202703408e-05,
      "loss": 2.9279,
      "step": 5752
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9008143544197083,
      "learning_rate": 1.9767013675377632e-05,
      "loss": 2.5197,
      "step": 5753
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9663153886795044,
      "learning_rate": 1.9766925307163448e-05,
      "loss": 2.7169,
      "step": 5754
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9628530144691467,
      "learning_rate": 1.9766836922391666e-05,
      "loss": 2.8374,
      "step": 5755
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0015645027160645,
      "learning_rate": 1.9766748521062444e-05,
      "loss": 2.475,
      "step": 5756
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0033504962921143,
      "learning_rate": 1.9766660103175934e-05,
      "loss": 2.7669,
      "step": 5757
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0239763259887695,
      "learning_rate": 1.976657166873228e-05,
      "loss": 2.705,
      "step": 5758
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.999067485332489,
      "learning_rate": 1.9766483217731633e-05,
      "loss": 2.7859,
      "step": 5759
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9470047354698181,
      "learning_rate": 1.9766394750174148e-05,
      "loss": 2.607,
      "step": 5760
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.1900988817214966,
      "learning_rate": 1.9766306266059968e-05,
      "loss": 2.8346,
      "step": 5761
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0573105812072754,
      "learning_rate": 1.976621776538925e-05,
      "loss": 2.9348,
      "step": 5762
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.971731424331665,
      "learning_rate": 1.9766129248162137e-05,
      "loss": 2.507,
      "step": 5763
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9953892827033997,
      "learning_rate": 1.9766040714378785e-05,
      "loss": 2.7173,
      "step": 5764
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9740049242973328,
      "learning_rate": 1.976595216403934e-05,
      "loss": 2.4585,
      "step": 5765
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0478776693344116,
      "learning_rate": 1.9765863597143956e-05,
      "loss": 2.8063,
      "step": 5766
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9102372527122498,
      "learning_rate": 1.9765775013692777e-05,
      "loss": 2.7305,
      "step": 5767
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.1270136833190918,
      "learning_rate": 1.976568641368596e-05,
      "loss": 2.7651,
      "step": 5768
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9547717571258545,
      "learning_rate": 1.976559779712365e-05,
      "loss": 2.6789,
      "step": 5769
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9775527119636536,
      "learning_rate": 1.9765509164006e-05,
      "loss": 2.8591,
      "step": 5770
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9761297702789307,
      "learning_rate": 1.976542051433316e-05,
      "loss": 2.6574,
      "step": 5771
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0724635124206543,
      "learning_rate": 1.976533184810528e-05,
      "loss": 2.8164,
      "step": 5772
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0881356000900269,
      "learning_rate": 1.976524316532251e-05,
      "loss": 2.5485,
      "step": 5773
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9403478503227234,
      "learning_rate": 1.9765154465985e-05,
      "loss": 2.7705,
      "step": 5774
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9379450082778931,
      "learning_rate": 1.9765065750092905e-05,
      "loss": 2.558,
      "step": 5775
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9931517243385315,
      "learning_rate": 1.9764977017646368e-05,
      "loss": 2.5755,
      "step": 5776
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.978780210018158,
      "learning_rate": 1.9764888268645544e-05,
      "loss": 2.5859,
      "step": 5777
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0882364511489868,
      "learning_rate": 1.9764799503090582e-05,
      "loss": 2.7165,
      "step": 5778
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0775707960128784,
      "learning_rate": 1.9764710720981632e-05,
      "loss": 2.7583,
      "step": 5779
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.1087619066238403,
      "learning_rate": 1.9764621922318846e-05,
      "loss": 2.6805,
      "step": 5780
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0198601484298706,
      "learning_rate": 1.9764533107102373e-05,
      "loss": 2.7496,
      "step": 5781
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9276878237724304,
      "learning_rate": 1.9764444275332363e-05,
      "loss": 2.8584,
      "step": 5782
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.8928490877151489,
      "learning_rate": 1.9764355427008974e-05,
      "loss": 2.6806,
      "step": 5783
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0146080255508423,
      "learning_rate": 1.9764266562132345e-05,
      "loss": 2.6875,
      "step": 5784
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9915590286254883,
      "learning_rate": 1.9764177680702632e-05,
      "loss": 2.5569,
      "step": 5785
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9231811761856079,
      "learning_rate": 1.976408878271999e-05,
      "loss": 2.8074,
      "step": 5786
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0904381275177002,
      "learning_rate": 1.9763999868184562e-05,
      "loss": 2.676,
      "step": 5787
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0286253690719604,
      "learning_rate": 1.9763910937096503e-05,
      "loss": 2.8813,
      "step": 5788
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.011381983757019,
      "learning_rate": 1.9763821989455963e-05,
      "loss": 2.6519,
      "step": 5789
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0267064571380615,
      "learning_rate": 1.9763733025263095e-05,
      "loss": 2.5845,
      "step": 5790
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9687209725379944,
      "learning_rate": 1.9763644044518045e-05,
      "loss": 2.7261,
      "step": 5791
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9785929322242737,
      "learning_rate": 1.9763555047220967e-05,
      "loss": 3.0496,
      "step": 5792
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0569641590118408,
      "learning_rate": 1.9763466033372016e-05,
      "loss": 2.7215,
      "step": 5793
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9238953590393066,
      "learning_rate": 1.9763377002971332e-05,
      "loss": 2.46,
      "step": 5794
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0156680345535278,
      "learning_rate": 1.9763287956019076e-05,
      "loss": 2.6171,
      "step": 5795
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9648661017417908,
      "learning_rate": 1.9763198892515393e-05,
      "loss": 2.7726,
      "step": 5796
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.97151780128479,
      "learning_rate": 1.976310981246044e-05,
      "loss": 2.6219,
      "step": 5797
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9417852759361267,
      "learning_rate": 1.976302071585436e-05,
      "loss": 2.6537,
      "step": 5798
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0115470886230469,
      "learning_rate": 1.976293160269731e-05,
      "loss": 2.6355,
      "step": 5799
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0235756635665894,
      "learning_rate": 1.976284247298944e-05,
      "loss": 2.7217,
      "step": 5800
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0339593887329102,
      "learning_rate": 1.97627533267309e-05,
      "loss": 2.6665,
      "step": 5801
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9528300762176514,
      "learning_rate": 1.9762664163921842e-05,
      "loss": 2.8433,
      "step": 5802
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9670766592025757,
      "learning_rate": 1.9762574984562416e-05,
      "loss": 2.6115,
      "step": 5803
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.8655233979225159,
      "learning_rate": 1.976248578865277e-05,
      "loss": 2.7614,
      "step": 5804
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0552691221237183,
      "learning_rate": 1.9762396576193066e-05,
      "loss": 2.7131,
      "step": 5805
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9151234030723572,
      "learning_rate": 1.9762307347183445e-05,
      "loss": 2.7105,
      "step": 5806
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9545134902000427,
      "learning_rate": 1.9762218101624063e-05,
      "loss": 2.3812,
      "step": 5807
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9889622330665588,
      "learning_rate": 1.9762128839515067e-05,
      "loss": 2.729,
      "step": 5808
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0749356746673584,
      "learning_rate": 1.9762039560856613e-05,
      "loss": 2.742,
      "step": 5809
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9414226412773132,
      "learning_rate": 1.9761950265648853e-05,
      "loss": 2.621,
      "step": 5810
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9773634076118469,
      "learning_rate": 1.9761860953891932e-05,
      "loss": 3.0093,
      "step": 5811
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0875022411346436,
      "learning_rate": 1.9761771625586007e-05,
      "loss": 2.801,
      "step": 5812
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0071594715118408,
      "learning_rate": 1.976168228073123e-05,
      "loss": 2.7299,
      "step": 5813
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9957689046859741,
      "learning_rate": 1.976159291932775e-05,
      "loss": 2.779,
      "step": 5814
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.1516953706741333,
      "learning_rate": 1.9761503541375716e-05,
      "loss": 2.9564,
      "step": 5815
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.1786106824874878,
      "learning_rate": 1.9761414146875285e-05,
      "loss": 2.6722,
      "step": 5816
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0979281663894653,
      "learning_rate": 1.9761324735826604e-05,
      "loss": 2.5742,
      "step": 5817
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.979704737663269,
      "learning_rate": 1.9761235308229823e-05,
      "loss": 2.5915,
      "step": 5818
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.1530475616455078,
      "learning_rate": 1.9761145864085102e-05,
      "loss": 2.7939,
      "step": 5819
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0325915813446045,
      "learning_rate": 1.976105640339259e-05,
      "loss": 2.4787,
      "step": 5820
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.1892520189285278,
      "learning_rate": 1.976096692615243e-05,
      "loss": 2.9246,
      "step": 5821
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.074380874633789,
      "learning_rate": 1.9760877432364786e-05,
      "loss": 2.6812,
      "step": 5822
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9604084491729736,
      "learning_rate": 1.97607879220298e-05,
      "loss": 2.5796,
      "step": 5823
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9568596482276917,
      "learning_rate": 1.9760698395147626e-05,
      "loss": 2.8431,
      "step": 5824
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.033908724784851,
      "learning_rate": 1.9760608851718422e-05,
      "loss": 2.6796,
      "step": 5825
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.2688629627227783,
      "learning_rate": 1.9760519291742332e-05,
      "loss": 2.7781,
      "step": 5826
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9746091961860657,
      "learning_rate": 1.976042971521951e-05,
      "loss": 2.7328,
      "step": 5827
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.984868049621582,
      "learning_rate": 1.976034012215011e-05,
      "loss": 2.3661,
      "step": 5828
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0372459888458252,
      "learning_rate": 1.976025051253428e-05,
      "loss": 2.4452,
      "step": 5829
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9336955547332764,
      "learning_rate": 1.976016088637218e-05,
      "loss": 2.6508,
      "step": 5830
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9922179579734802,
      "learning_rate": 1.9760071243663953e-05,
      "loss": 2.7522,
      "step": 5831
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9983890056610107,
      "learning_rate": 1.9759981584409757e-05,
      "loss": 2.6492,
      "step": 5832
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0086385011672974,
      "learning_rate": 1.9759891908609738e-05,
      "loss": 2.7438,
      "step": 5833
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.1398817300796509,
      "learning_rate": 1.975980221626405e-05,
      "loss": 2.7374,
      "step": 5834
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0863960981369019,
      "learning_rate": 1.9759712507372853e-05,
      "loss": 2.7689,
      "step": 5835
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0619621276855469,
      "learning_rate": 1.975962278193629e-05,
      "loss": 2.8468,
      "step": 5836
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.947321355342865,
      "learning_rate": 1.9759533039954513e-05,
      "loss": 2.8089,
      "step": 5837
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0301138162612915,
      "learning_rate": 1.975944328142768e-05,
      "loss": 2.7997,
      "step": 5838
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.016395926475525,
      "learning_rate": 1.9759353506355937e-05,
      "loss": 2.6119,
      "step": 5839
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0134475231170654,
      "learning_rate": 1.9759263714739443e-05,
      "loss": 2.7352,
      "step": 5840
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9690192937850952,
      "learning_rate": 1.9759173906578345e-05,
      "loss": 2.6437,
      "step": 5841
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9556609988212585,
      "learning_rate": 1.9759084081872793e-05,
      "loss": 2.7517,
      "step": 5842
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.112661361694336,
      "learning_rate": 1.975899424062295e-05,
      "loss": 2.6182,
      "step": 5843
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.4254316091537476,
      "learning_rate": 1.9758904382828957e-05,
      "loss": 2.6315,
      "step": 5844
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9938355088233948,
      "learning_rate": 1.9758814508490968e-05,
      "loss": 2.6245,
      "step": 5845
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.964594304561615,
      "learning_rate": 1.975872461760914e-05,
      "loss": 2.6275,
      "step": 5846
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0120872259140015,
      "learning_rate": 1.9758634710183625e-05,
      "loss": 2.7607,
      "step": 5847
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0178916454315186,
      "learning_rate": 1.9758544786214573e-05,
      "loss": 2.6589,
      "step": 5848
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.1420749425888062,
      "learning_rate": 1.9758454845702138e-05,
      "loss": 2.7351,
      "step": 5849
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0754801034927368,
      "learning_rate": 1.9758364888646472e-05,
      "loss": 2.7082,
      "step": 5850
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9579201340675354,
      "learning_rate": 1.9758274915047723e-05,
      "loss": 2.6934,
      "step": 5851
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9834913015365601,
      "learning_rate": 1.9758184924906052e-05,
      "loss": 2.6842,
      "step": 5852
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0383450984954834,
      "learning_rate": 1.9758094918221604e-05,
      "loss": 2.8247,
      "step": 5853
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9743318557739258,
      "learning_rate": 1.9758004894994542e-05,
      "loss": 2.7733,
      "step": 5854
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0188316106796265,
      "learning_rate": 1.9757914855225004e-05,
      "loss": 2.7407,
      "step": 5855
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9487013220787048,
      "learning_rate": 1.975782479891315e-05,
      "loss": 2.867,
      "step": 5856
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0147948265075684,
      "learning_rate": 1.9757734726059134e-05,
      "loss": 2.6839,
      "step": 5857
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0405170917510986,
      "learning_rate": 1.975764463666311e-05,
      "loss": 2.4998,
      "step": 5858
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0203616619110107,
      "learning_rate": 1.9757554530725227e-05,
      "loss": 2.7068,
      "step": 5859
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0709534883499146,
      "learning_rate": 1.975746440824564e-05,
      "loss": 2.7414,
      "step": 5860
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9133050441741943,
      "learning_rate": 1.97573742692245e-05,
      "loss": 2.709,
      "step": 5861
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0080015659332275,
      "learning_rate": 1.975728411366196e-05,
      "loss": 2.675,
      "step": 5862
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.203336477279663,
      "learning_rate": 1.9757193941558174e-05,
      "loss": 2.8557,
      "step": 5863
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0892263650894165,
      "learning_rate": 1.975710375291329e-05,
      "loss": 2.6208,
      "step": 5864
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9840039014816284,
      "learning_rate": 1.9757013547727472e-05,
      "loss": 2.7882,
      "step": 5865
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.1628530025482178,
      "learning_rate": 1.975692332600086e-05,
      "loss": 2.6888,
      "step": 5866
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.963452160358429,
      "learning_rate": 1.9756833087733617e-05,
      "loss": 2.4899,
      "step": 5867
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0591740608215332,
      "learning_rate": 1.9756742832925893e-05,
      "loss": 2.4522,
      "step": 5868
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.114126205444336,
      "learning_rate": 1.9756652561577835e-05,
      "loss": 2.7549,
      "step": 5869
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9720937609672546,
      "learning_rate": 1.9756562273689606e-05,
      "loss": 2.6844,
      "step": 5870
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.1019238233566284,
      "learning_rate": 1.975647196926135e-05,
      "loss": 2.9331,
      "step": 5871
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.8910553455352783,
      "learning_rate": 1.9756381648293228e-05,
      "loss": 2.3219,
      "step": 5872
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0888972282409668,
      "learning_rate": 1.9756291310785385e-05,
      "loss": 2.6659,
      "step": 5873
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0349664688110352,
      "learning_rate": 1.975620095673798e-05,
      "loss": 2.617,
      "step": 5874
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0088982582092285,
      "learning_rate": 1.9756110586151166e-05,
      "loss": 2.6903,
      "step": 5875
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9743291735649109,
      "learning_rate": 1.9756020199025095e-05,
      "loss": 2.6973,
      "step": 5876
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9754633903503418,
      "learning_rate": 1.975592979535992e-05,
      "loss": 2.6322,
      "step": 5877
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9997790455818176,
      "learning_rate": 1.9755839375155793e-05,
      "loss": 2.7979,
      "step": 5878
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.912743091583252,
      "learning_rate": 1.975574893841287e-05,
      "loss": 2.807,
      "step": 5879
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0460100173950195,
      "learning_rate": 1.9755658485131303e-05,
      "loss": 2.6556,
      "step": 5880
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.307652473449707,
      "learning_rate": 1.9755568015311246e-05,
      "loss": 2.7739,
      "step": 5881
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9665824174880981,
      "learning_rate": 1.975547752895285e-05,
      "loss": 2.8386,
      "step": 5882
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0012377500534058,
      "learning_rate": 1.975538702605627e-05,
      "loss": 2.6179,
      "step": 5883
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0120799541473389,
      "learning_rate": 1.975529650662166e-05,
      "loss": 3.015,
      "step": 5884
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9446542263031006,
      "learning_rate": 1.9755205970649173e-05,
      "loss": 2.6924,
      "step": 5885
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9875559210777283,
      "learning_rate": 1.975511541813896e-05,
      "loss": 2.7236,
      "step": 5886
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0438168048858643,
      "learning_rate": 1.975502484909118e-05,
      "loss": 2.6686,
      "step": 5887
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0193517208099365,
      "learning_rate": 1.9754934263505983e-05,
      "loss": 2.5019,
      "step": 5888
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.018296241760254,
      "learning_rate": 1.975484366138352e-05,
      "loss": 2.4939,
      "step": 5889
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0055245161056519,
      "learning_rate": 1.9754753042723953e-05,
      "loss": 2.6308,
      "step": 5890
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9997893571853638,
      "learning_rate": 1.9754662407527428e-05,
      "loss": 2.5615,
      "step": 5891
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0486900806427002,
      "learning_rate": 1.97545717557941e-05,
      "loss": 2.5549,
      "step": 5892
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9104008674621582,
      "learning_rate": 1.975448108752412e-05,
      "loss": 2.5354,
      "step": 5893
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9581496715545654,
      "learning_rate": 1.9754390402717652e-05,
      "loss": 2.7735,
      "step": 5894
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.028882384300232,
      "learning_rate": 1.975429970137484e-05,
      "loss": 2.7979,
      "step": 5895
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0631102323532104,
      "learning_rate": 1.975420898349584e-05,
      "loss": 2.5937,
      "step": 5896
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.036905288696289,
      "learning_rate": 1.9754118249080806e-05,
      "loss": 2.6687,
      "step": 5897
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.93961101770401,
      "learning_rate": 1.9754027498129896e-05,
      "loss": 2.7059,
      "step": 5898
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0719218254089355,
      "learning_rate": 1.9753936730643257e-05,
      "loss": 2.4711,
      "step": 5899
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0627011060714722,
      "learning_rate": 1.9753845946621047e-05,
      "loss": 2.8088,
      "step": 5900
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0209869146347046,
      "learning_rate": 1.975375514606342e-05,
      "loss": 2.3332,
      "step": 5901
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0626208782196045,
      "learning_rate": 1.975366432897053e-05,
      "loss": 2.5539,
      "step": 5902
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0637879371643066,
      "learning_rate": 1.9753573495342526e-05,
      "loss": 2.9157,
      "step": 5903
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0511115789413452,
      "learning_rate": 1.9753482645179565e-05,
      "loss": 2.6769,
      "step": 5904
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9843158721923828,
      "learning_rate": 1.9753391778481807e-05,
      "loss": 2.7266,
      "step": 5905
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9626844525337219,
      "learning_rate": 1.97533008952494e-05,
      "loss": 2.607,
      "step": 5906
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9766964316368103,
      "learning_rate": 1.9753209995482495e-05,
      "loss": 2.536,
      "step": 5907
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0049443244934082,
      "learning_rate": 1.9753119079181254e-05,
      "loss": 2.8128,
      "step": 5908
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9593580365180969,
      "learning_rate": 1.9753028146345828e-05,
      "loss": 2.6217,
      "step": 5909
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0210697650909424,
      "learning_rate": 1.9752937196976366e-05,
      "loss": 3.0014,
      "step": 5910
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.156058430671692,
      "learning_rate": 1.9752846231073032e-05,
      "loss": 2.5332,
      "step": 5911
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.011637806892395,
      "learning_rate": 1.975275524863597e-05,
      "loss": 2.3628,
      "step": 5912
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9666491746902466,
      "learning_rate": 1.975266424966534e-05,
      "loss": 2.5486,
      "step": 5913
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9948322772979736,
      "learning_rate": 1.9752573234161297e-05,
      "loss": 2.6625,
      "step": 5914
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9498504996299744,
      "learning_rate": 1.975248220212399e-05,
      "loss": 2.7668,
      "step": 5915
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9010302424430847,
      "learning_rate": 1.975239115355358e-05,
      "loss": 2.7897,
      "step": 5916
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0219727754592896,
      "learning_rate": 1.975230008845022e-05,
      "loss": 2.773,
      "step": 5917
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9252057075500488,
      "learning_rate": 1.9752209006814063e-05,
      "loss": 2.5446,
      "step": 5918
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0052984952926636,
      "learning_rate": 1.975211790864526e-05,
      "loss": 2.6067,
      "step": 5919
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.968909740447998,
      "learning_rate": 1.975202679394397e-05,
      "loss": 2.522,
      "step": 5920
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9784983992576599,
      "learning_rate": 1.9751935662710344e-05,
      "loss": 2.7951,
      "step": 5921
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0380032062530518,
      "learning_rate": 1.975184451494454e-05,
      "loss": 2.7574,
      "step": 5922
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9614746570587158,
      "learning_rate": 1.975175335064671e-05,
      "loss": 2.638,
      "step": 5923
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9104514122009277,
      "learning_rate": 1.975166216981701e-05,
      "loss": 3.0581,
      "step": 5924
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0949697494506836,
      "learning_rate": 1.9751570972455596e-05,
      "loss": 2.7001,
      "step": 5925
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0756114721298218,
      "learning_rate": 1.9751479758562617e-05,
      "loss": 2.7359,
      "step": 5926
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9910336136817932,
      "learning_rate": 1.9751388528138235e-05,
      "loss": 2.5152,
      "step": 5927
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0230226516723633,
      "learning_rate": 1.97512972811826e-05,
      "loss": 2.6916,
      "step": 5928
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0197447538375854,
      "learning_rate": 1.9751206017695865e-05,
      "loss": 2.7765,
      "step": 5929
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9792823195457458,
      "learning_rate": 1.975111473767819e-05,
      "loss": 2.6804,
      "step": 5930
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0810401439666748,
      "learning_rate": 1.9751023441129725e-05,
      "loss": 2.6248,
      "step": 5931
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0387294292449951,
      "learning_rate": 1.975093212805063e-05,
      "loss": 2.8991,
      "step": 5932
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.942337155342102,
      "learning_rate": 1.9750840798441056e-05,
      "loss": 2.8178,
      "step": 5933
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.1666145324707031,
      "learning_rate": 1.9750749452301158e-05,
      "loss": 2.6695,
      "step": 5934
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9710201621055603,
      "learning_rate": 1.975065808963109e-05,
      "loss": 2.6846,
      "step": 5935
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.3170852661132812,
      "learning_rate": 1.9750566710431013e-05,
      "loss": 2.6578,
      "step": 5936
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9450460076332092,
      "learning_rate": 1.9750475314701073e-05,
      "loss": 2.8757,
      "step": 5937
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.97325199842453,
      "learning_rate": 1.9750383902441428e-05,
      "loss": 2.8442,
      "step": 5938
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9653013348579407,
      "learning_rate": 1.9750292473652236e-05,
      "loss": 2.6016,
      "step": 5939
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0054761171340942,
      "learning_rate": 1.9750201028333647e-05,
      "loss": 2.5526,
      "step": 5940
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.2570167779922485,
      "learning_rate": 1.9750109566485824e-05,
      "loss": 2.6603,
      "step": 5941
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9625895619392395,
      "learning_rate": 1.9750018088108918e-05,
      "loss": 2.88,
      "step": 5942
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0656089782714844,
      "learning_rate": 1.9749926593203082e-05,
      "loss": 2.6947,
      "step": 5943
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9525026679039001,
      "learning_rate": 1.974983508176847e-05,
      "loss": 2.5253,
      "step": 5944
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9862606525421143,
      "learning_rate": 1.9749743553805238e-05,
      "loss": 2.7115,
      "step": 5945
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9907974600791931,
      "learning_rate": 1.9749652009313548e-05,
      "loss": 2.5668,
      "step": 5946
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9810314774513245,
      "learning_rate": 1.9749560448293548e-05,
      "loss": 2.5437,
      "step": 5947
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.4982223510742188,
      "learning_rate": 1.9749468870745394e-05,
      "loss": 2.6272,
      "step": 5948
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.3339000940322876,
      "learning_rate": 1.9749377276669243e-05,
      "loss": 2.7531,
      "step": 5949
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0383591651916504,
      "learning_rate": 1.974928566606525e-05,
      "loss": 2.6629,
      "step": 5950
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0940775871276855,
      "learning_rate": 1.9749194038933565e-05,
      "loss": 2.6037,
      "step": 5951
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0721440315246582,
      "learning_rate": 1.9749102395274355e-05,
      "loss": 2.4764,
      "step": 5952
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9484302997589111,
      "learning_rate": 1.9749010735087766e-05,
      "loss": 2.8554,
      "step": 5953
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.1509709358215332,
      "learning_rate": 1.9748919058373955e-05,
      "loss": 2.7117,
      "step": 5954
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.155088186264038,
      "learning_rate": 1.9748827365133078e-05,
      "loss": 2.78,
      "step": 5955
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.3203036785125732,
      "learning_rate": 1.9748735655365294e-05,
      "loss": 2.507,
      "step": 5956
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.254475712776184,
      "learning_rate": 1.9748643929070755e-05,
      "loss": 2.8056,
      "step": 5957
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.095990777015686,
      "learning_rate": 1.9748552186249614e-05,
      "loss": 2.6928,
      "step": 5958
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9491044878959656,
      "learning_rate": 1.9748460426902033e-05,
      "loss": 2.7506,
      "step": 5959
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9525278210639954,
      "learning_rate": 1.974836865102816e-05,
      "loss": 2.8134,
      "step": 5960
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0858746767044067,
      "learning_rate": 1.9748276858628157e-05,
      "loss": 2.6252,
      "step": 5961
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.1280899047851562,
      "learning_rate": 1.9748185049702176e-05,
      "loss": 2.4133,
      "step": 5962
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0761761665344238,
      "learning_rate": 1.9748093224250376e-05,
      "loss": 2.8355,
      "step": 5963
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0101408958435059,
      "learning_rate": 1.974800138227291e-05,
      "loss": 2.7801,
      "step": 5964
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9293361306190491,
      "learning_rate": 1.974790952376993e-05,
      "loss": 2.5122,
      "step": 5965
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.8890659213066101,
      "learning_rate": 1.97478176487416e-05,
      "loss": 2.6923,
      "step": 5966
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9580435156822205,
      "learning_rate": 1.9747725757188067e-05,
      "loss": 2.7312,
      "step": 5967
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0895746946334839,
      "learning_rate": 1.9747633849109497e-05,
      "loss": 2.6856,
      "step": 5968
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0535815954208374,
      "learning_rate": 1.974754192450604e-05,
      "loss": 2.7162,
      "step": 5969
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9627452492713928,
      "learning_rate": 1.974744998337785e-05,
      "loss": 2.8169,
      "step": 5970
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0597480535507202,
      "learning_rate": 1.9747358025725084e-05,
      "loss": 2.7549,
      "step": 5971
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.056934118270874,
      "learning_rate": 1.97472660515479e-05,
      "loss": 2.5966,
      "step": 5972
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9677479267120361,
      "learning_rate": 1.974717406084645e-05,
      "loss": 2.6379,
      "step": 5973
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9001351594924927,
      "learning_rate": 1.9747082053620896e-05,
      "loss": 2.464,
      "step": 5974
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9910852909088135,
      "learning_rate": 1.9746990029871388e-05,
      "loss": 2.4064,
      "step": 5975
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.1095937490463257,
      "learning_rate": 1.974689798959809e-05,
      "loss": 2.8585,
      "step": 5976
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.993202805519104,
      "learning_rate": 1.9746805932801148e-05,
      "loss": 2.7785,
      "step": 5977
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0492087602615356,
      "learning_rate": 1.974671385948072e-05,
      "loss": 2.5599,
      "step": 5978
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9386765956878662,
      "learning_rate": 1.974662176963697e-05,
      "loss": 2.6362,
      "step": 5979
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0044068098068237,
      "learning_rate": 1.9746529663270048e-05,
      "loss": 2.7982,
      "step": 5980
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.1478370428085327,
      "learning_rate": 1.974643754038011e-05,
      "loss": 2.6126,
      "step": 5981
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9971446394920349,
      "learning_rate": 1.9746345400967316e-05,
      "loss": 2.7595,
      "step": 5982
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9733408689498901,
      "learning_rate": 1.9746253245031818e-05,
      "loss": 2.7756,
      "step": 5983
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0274009704589844,
      "learning_rate": 1.9746161072573774e-05,
      "loss": 2.8686,
      "step": 5984
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9675590395927429,
      "learning_rate": 1.974606888359334e-05,
      "loss": 2.616,
      "step": 5985
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.1880016326904297,
      "learning_rate": 1.974597667809067e-05,
      "loss": 2.5002,
      "step": 5986
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9869675636291504,
      "learning_rate": 1.9745884456065923e-05,
      "loss": 2.6111,
      "step": 5987
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0259379148483276,
      "learning_rate": 1.9745792217519255e-05,
      "loss": 2.6492,
      "step": 5988
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0556061267852783,
      "learning_rate": 1.9745699962450822e-05,
      "loss": 2.5965,
      "step": 5989
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0674339532852173,
      "learning_rate": 1.9745607690860785e-05,
      "loss": 2.8371,
      "step": 5990
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0928486585617065,
      "learning_rate": 1.9745515402749293e-05,
      "loss": 2.5723,
      "step": 5991
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9693211913108826,
      "learning_rate": 1.9745423098116504e-05,
      "loss": 2.6633,
      "step": 5992
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0861150026321411,
      "learning_rate": 1.9745330776962578e-05,
      "loss": 2.5587,
      "step": 5993
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9417067766189575,
      "learning_rate": 1.9745238439287667e-05,
      "loss": 2.8031,
      "step": 5994
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0017095804214478,
      "learning_rate": 1.974514608509193e-05,
      "loss": 2.7468,
      "step": 5995
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9050960540771484,
      "learning_rate": 1.974505371437553e-05,
      "loss": 2.5745,
      "step": 5996
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.8916175365447998,
      "learning_rate": 1.9744961327138613e-05,
      "loss": 2.6546,
      "step": 5997
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9550510048866272,
      "learning_rate": 1.974486892338134e-05,
      "loss": 2.7139,
      "step": 5998
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.091829538345337,
      "learning_rate": 1.9744776503103868e-05,
      "loss": 2.6215,
      "step": 5999
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0772284269332886,
      "learning_rate": 1.974468406630635e-05,
      "loss": 2.5599,
      "step": 6000
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9247375726699829,
      "learning_rate": 1.9744591612988948e-05,
      "loss": 2.6155,
      "step": 6001
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9524394869804382,
      "learning_rate": 1.974449914315182e-05,
      "loss": 2.7208,
      "step": 6002
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9612957239151001,
      "learning_rate": 1.9744406656795114e-05,
      "loss": 2.5682,
      "step": 6003
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9272464513778687,
      "learning_rate": 1.9744314153918993e-05,
      "loss": 2.6169,
      "step": 6004
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.2097084522247314,
      "learning_rate": 1.9744221634523616e-05,
      "loss": 2.5184,
      "step": 6005
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0650863647460938,
      "learning_rate": 1.9744129098609135e-05,
      "loss": 2.5117,
      "step": 6006
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9552513957023621,
      "learning_rate": 1.974403654617571e-05,
      "loss": 2.5757,
      "step": 6007
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0432873964309692,
      "learning_rate": 1.9743943977223497e-05,
      "loss": 2.7529,
      "step": 6008
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0400439500808716,
      "learning_rate": 1.974385139175265e-05,
      "loss": 2.5311,
      "step": 6009
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9820378422737122,
      "learning_rate": 1.974375878976333e-05,
      "loss": 2.375,
      "step": 6010
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9679824113845825,
      "learning_rate": 1.9743666171255692e-05,
      "loss": 2.5438,
      "step": 6011
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9654666185379028,
      "learning_rate": 1.9743573536229892e-05,
      "loss": 2.7601,
      "step": 6012
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0312960147857666,
      "learning_rate": 1.974348088468609e-05,
      "loss": 2.808,
      "step": 6013
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0558116436004639,
      "learning_rate": 1.9743388216624443e-05,
      "loss": 2.779,
      "step": 6014
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9788362979888916,
      "learning_rate": 1.9743295532045105e-05,
      "loss": 2.7418,
      "step": 6015
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0440813302993774,
      "learning_rate": 1.9743202830948235e-05,
      "loss": 2.8295,
      "step": 6016
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0399638414382935,
      "learning_rate": 1.974311011333399e-05,
      "loss": 2.5192,
      "step": 6017
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.025259256362915,
      "learning_rate": 1.9743017379202526e-05,
      "loss": 2.7005,
      "step": 6018
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9482632875442505,
      "learning_rate": 1.9742924628554005e-05,
      "loss": 2.8236,
      "step": 6019
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9712290167808533,
      "learning_rate": 1.974283186138858e-05,
      "loss": 2.7426,
      "step": 6020
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.01658034324646,
      "learning_rate": 1.9742739077706406e-05,
      "loss": 2.58,
      "step": 6021
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9245757460594177,
      "learning_rate": 1.9742646277507643e-05,
      "loss": 2.8883,
      "step": 6022
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.1613097190856934,
      "learning_rate": 1.9742553460792453e-05,
      "loss": 2.7832,
      "step": 6023
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0273793935775757,
      "learning_rate": 1.9742460627560985e-05,
      "loss": 2.893,
      "step": 6024
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.076622724533081,
      "learning_rate": 1.97423677778134e-05,
      "loss": 2.519,
      "step": 6025
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0422600507736206,
      "learning_rate": 1.9742274911549856e-05,
      "loss": 2.7588,
      "step": 6026
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9822518825531006,
      "learning_rate": 1.9742182028770514e-05,
      "loss": 2.7727,
      "step": 6027
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0202882289886475,
      "learning_rate": 1.9742089129475524e-05,
      "loss": 2.616,
      "step": 6028
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0072485208511353,
      "learning_rate": 1.9741996213665048e-05,
      "loss": 2.709,
      "step": 6029
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0931193828582764,
      "learning_rate": 1.974190328133924e-05,
      "loss": 2.656,
      "step": 6030
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0319308042526245,
      "learning_rate": 1.9741810332498262e-05,
      "loss": 2.8493,
      "step": 6031
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0062344074249268,
      "learning_rate": 1.9741717367142268e-05,
      "loss": 2.6661,
      "step": 6032
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.328855514526367,
      "learning_rate": 1.974162438527142e-05,
      "loss": 2.8781,
      "step": 6033
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.035614252090454,
      "learning_rate": 1.9741531386885872e-05,
      "loss": 2.7853,
      "step": 6034
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.958311915397644,
      "learning_rate": 1.9741438371985782e-05,
      "loss": 2.8157,
      "step": 6035
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9726164937019348,
      "learning_rate": 1.974134534057131e-05,
      "loss": 2.5264,
      "step": 6036
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0316953659057617,
      "learning_rate": 1.974125229264261e-05,
      "loss": 3.1577,
      "step": 6037
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9028236865997314,
      "learning_rate": 1.974115922819984e-05,
      "loss": 2.5031,
      "step": 6038
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.1178196668624878,
      "learning_rate": 1.9741066147243163e-05,
      "loss": 2.661,
      "step": 6039
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.069467544555664,
      "learning_rate": 1.974097304977273e-05,
      "loss": 2.7419,
      "step": 6040
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9578133821487427,
      "learning_rate": 1.9740879935788704e-05,
      "loss": 2.6405,
      "step": 6041
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0218051671981812,
      "learning_rate": 1.9740786805291242e-05,
      "loss": 2.5882,
      "step": 6042
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0714524984359741,
      "learning_rate": 1.9740693658280498e-05,
      "loss": 2.6756,
      "step": 6043
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9964566230773926,
      "learning_rate": 1.9740600494756636e-05,
      "loss": 2.8288,
      "step": 6044
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0554906129837036,
      "learning_rate": 1.974050731471981e-05,
      "loss": 2.9936,
      "step": 6045
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0990524291992188,
      "learning_rate": 1.9740414118170176e-05,
      "loss": 2.6346,
      "step": 6046
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0004512071609497,
      "learning_rate": 1.9740320905107894e-05,
      "loss": 2.6906,
      "step": 6047
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.1998282670974731,
      "learning_rate": 1.9740227675533124e-05,
      "loss": 2.7347,
      "step": 6048
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.1878693103790283,
      "learning_rate": 1.9740134429446027e-05,
      "loss": 2.5578,
      "step": 6049
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0454657077789307,
      "learning_rate": 1.9740041166846754e-05,
      "loss": 2.6612,
      "step": 6050
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9480933547019958,
      "learning_rate": 1.973994788773546e-05,
      "loss": 2.9061,
      "step": 6051
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0306535959243774,
      "learning_rate": 1.973985459211232e-05,
      "loss": 2.7597,
      "step": 6052
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.1294236183166504,
      "learning_rate": 1.973976127997747e-05,
      "loss": 2.5443,
      "step": 6053
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9910421371459961,
      "learning_rate": 1.973966795133109e-05,
      "loss": 2.4602,
      "step": 6054
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.1099456548690796,
      "learning_rate": 1.973957460617332e-05,
      "loss": 2.5153,
      "step": 6055
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9987056851387024,
      "learning_rate": 1.9739481244504325e-05,
      "loss": 2.8549,
      "step": 6056
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.1002017259597778,
      "learning_rate": 1.973938786632427e-05,
      "loss": 2.5919,
      "step": 6057
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.1659258604049683,
      "learning_rate": 1.97392944716333e-05,
      "loss": 2.8874,
      "step": 6058
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.084161639213562,
      "learning_rate": 1.9739201060431588e-05,
      "loss": 2.9132,
      "step": 6059
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.030389428138733,
      "learning_rate": 1.973910763271928e-05,
      "loss": 2.7013,
      "step": 6060
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9665654301643372,
      "learning_rate": 1.9739014188496544e-05,
      "loss": 2.6187,
      "step": 6061
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0427650213241577,
      "learning_rate": 1.9738920727763532e-05,
      "loss": 2.8297,
      "step": 6062
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9423591494560242,
      "learning_rate": 1.97388272505204e-05,
      "loss": 2.5882,
      "step": 6063
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.1839935779571533,
      "learning_rate": 1.9738733756767317e-05,
      "loss": 2.6621,
      "step": 6064
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9690552353858948,
      "learning_rate": 1.9738640246504433e-05,
      "loss": 2.58,
      "step": 6065
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9216395020484924,
      "learning_rate": 1.9738546719731908e-05,
      "loss": 2.7566,
      "step": 6066
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.001126766204834,
      "learning_rate": 1.9738453176449904e-05,
      "loss": 2.7628,
      "step": 6067
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.036826729774475,
      "learning_rate": 1.9738359616658575e-05,
      "loss": 2.5741,
      "step": 6068
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0593692064285278,
      "learning_rate": 1.973826604035808e-05,
      "loss": 2.5979,
      "step": 6069
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0346609354019165,
      "learning_rate": 1.9738172447548584e-05,
      "loss": 2.728,
      "step": 6070
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0082108974456787,
      "learning_rate": 1.9738078838230235e-05,
      "loss": 2.7736,
      "step": 6071
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.007304072380066,
      "learning_rate": 1.97379852124032e-05,
      "loss": 2.6402,
      "step": 6072
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9931779503822327,
      "learning_rate": 1.9737891570067636e-05,
      "loss": 2.8779,
      "step": 6073
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9531176090240479,
      "learning_rate": 1.97377979112237e-05,
      "loss": 2.6541,
      "step": 6074
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.053238034248352,
      "learning_rate": 1.9737704235871554e-05,
      "loss": 2.6811,
      "step": 6075
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9393471479415894,
      "learning_rate": 1.973761054401135e-05,
      "loss": 2.7274,
      "step": 6076
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0275760889053345,
      "learning_rate": 1.9737516835643253e-05,
      "loss": 2.7044,
      "step": 6077
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9965896606445312,
      "learning_rate": 1.9737423110767425e-05,
      "loss": 2.542,
      "step": 6078
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0124571323394775,
      "learning_rate": 1.9737329369384015e-05,
      "loss": 2.7857,
      "step": 6079
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.016558051109314,
      "learning_rate": 1.9737235611493187e-05,
      "loss": 2.6641,
      "step": 6080
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.990839958190918,
      "learning_rate": 1.9737141837095103e-05,
      "loss": 2.7558,
      "step": 6081
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.016549825668335,
      "learning_rate": 1.973704804618992e-05,
      "loss": 2.8233,
      "step": 6082
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9689702987670898,
      "learning_rate": 1.9736954238777793e-05,
      "loss": 2.6818,
      "step": 6083
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0071256160736084,
      "learning_rate": 1.9736860414858883e-05,
      "loss": 2.5336,
      "step": 6084
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0881242752075195,
      "learning_rate": 1.973676657443335e-05,
      "loss": 2.7016,
      "step": 6085
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9709838628768921,
      "learning_rate": 1.9736672717501356e-05,
      "loss": 2.8308,
      "step": 6086
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9558907747268677,
      "learning_rate": 1.973657884406306e-05,
      "loss": 2.7884,
      "step": 6087
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0039902925491333,
      "learning_rate": 1.9736484954118613e-05,
      "loss": 2.8704,
      "step": 6088
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.171500563621521,
      "learning_rate": 1.9736391047668183e-05,
      "loss": 2.6469,
      "step": 6089
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.2798569202423096,
      "learning_rate": 1.973629712471192e-05,
      "loss": 2.7017,
      "step": 6090
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.1357789039611816,
      "learning_rate": 1.973620318525e-05,
      "loss": 2.8857,
      "step": 6091
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0707536935806274,
      "learning_rate": 1.973610922928256e-05,
      "loss": 2.7751,
      "step": 6092
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9731671810150146,
      "learning_rate": 1.9736015256809774e-05,
      "loss": 2.6843,
      "step": 6093
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9889099597930908,
      "learning_rate": 1.97359212678318e-05,
      "loss": 2.6246,
      "step": 6094
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9869720339775085,
      "learning_rate": 1.9735827262348793e-05,
      "loss": 2.777,
      "step": 6095
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9138409495353699,
      "learning_rate": 1.9735733240360918e-05,
      "loss": 2.6237,
      "step": 6096
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.2537933588027954,
      "learning_rate": 1.9735639201868332e-05,
      "loss": 2.4546,
      "step": 6097
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.1159776449203491,
      "learning_rate": 1.973554514687119e-05,
      "loss": 2.7104,
      "step": 6098
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.086789608001709,
      "learning_rate": 1.9735451075369653e-05,
      "loss": 2.5759,
      "step": 6099
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0642168521881104,
      "learning_rate": 1.9735356987363884e-05,
      "loss": 2.8704,
      "step": 6100
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.112326741218567,
      "learning_rate": 1.9735262882854042e-05,
      "loss": 2.8592,
      "step": 6101
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9851614236831665,
      "learning_rate": 1.973516876184028e-05,
      "loss": 2.5342,
      "step": 6102
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0459363460540771,
      "learning_rate": 1.9735074624322773e-05,
      "loss": 2.5633,
      "step": 6103
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9780690670013428,
      "learning_rate": 1.9734980470301663e-05,
      "loss": 2.4603,
      "step": 6104
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.014681339263916,
      "learning_rate": 1.9734886299777118e-05,
      "loss": 2.681,
      "step": 6105
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9684288501739502,
      "learning_rate": 1.97347921127493e-05,
      "loss": 2.6858,
      "step": 6106
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9784584045410156,
      "learning_rate": 1.9734697909218363e-05,
      "loss": 2.6767,
      "step": 6107
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.1226232051849365,
      "learning_rate": 1.973460368918447e-05,
      "loss": 2.7627,
      "step": 6108
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0277506113052368,
      "learning_rate": 1.973450945264778e-05,
      "loss": 2.4728,
      "step": 6109
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0443332195281982,
      "learning_rate": 1.973441519960845e-05,
      "loss": 2.719,
      "step": 6110
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0932490825653076,
      "learning_rate": 1.9734320930066645e-05,
      "loss": 2.6133,
      "step": 6111
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0511823892593384,
      "learning_rate": 1.973422664402252e-05,
      "loss": 2.5543,
      "step": 6112
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9744455218315125,
      "learning_rate": 1.973413234147624e-05,
      "loss": 2.5236,
      "step": 6113
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.034035325050354,
      "learning_rate": 1.973403802242796e-05,
      "loss": 2.5986,
      "step": 6114
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0621907711029053,
      "learning_rate": 1.9733943686877843e-05,
      "loss": 2.6815,
      "step": 6115
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.1896036863327026,
      "learning_rate": 1.9733849334826044e-05,
      "loss": 2.7406,
      "step": 6116
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.1034388542175293,
      "learning_rate": 1.973375496627273e-05,
      "loss": 2.8161,
      "step": 6117
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9984724521636963,
      "learning_rate": 1.9733660581218058e-05,
      "loss": 2.58,
      "step": 6118
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.313646674156189,
      "learning_rate": 1.9733566179662187e-05,
      "loss": 2.7225,
      "step": 6119
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.048233151435852,
      "learning_rate": 1.9733471761605277e-05,
      "loss": 2.8338,
      "step": 6120
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9653517603874207,
      "learning_rate": 1.9733377327047492e-05,
      "loss": 2.5503,
      "step": 6121
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9781544208526611,
      "learning_rate": 1.9733282875988986e-05,
      "loss": 2.5816,
      "step": 6122
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9747933745384216,
      "learning_rate": 1.9733188408429922e-05,
      "loss": 2.5967,
      "step": 6123
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.1576004028320312,
      "learning_rate": 1.973309392437046e-05,
      "loss": 2.6171,
      "step": 6124
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9274995923042297,
      "learning_rate": 1.9732999423810763e-05,
      "loss": 2.5726,
      "step": 6125
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.033246397972107,
      "learning_rate": 1.9732904906750986e-05,
      "loss": 2.9638,
      "step": 6126
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9787362813949585,
      "learning_rate": 1.9732810373191294e-05,
      "loss": 2.7957,
      "step": 6127
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9894342422485352,
      "learning_rate": 1.9732715823131842e-05,
      "loss": 2.5394,
      "step": 6128
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.02513587474823,
      "learning_rate": 1.9732621256572794e-05,
      "loss": 2.7918,
      "step": 6129
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0129741430282593,
      "learning_rate": 1.9732526673514313e-05,
      "loss": 2.5624,
      "step": 6130
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.028875470161438,
      "learning_rate": 1.9732432073956553e-05,
      "loss": 2.8214,
      "step": 6131
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9883272647857666,
      "learning_rate": 1.9732337457899674e-05,
      "loss": 2.4812,
      "step": 6132
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.029732584953308,
      "learning_rate": 1.9732242825343847e-05,
      "loss": 2.5255,
      "step": 6133
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0339707136154175,
      "learning_rate": 1.973214817628922e-05,
      "loss": 2.4025,
      "step": 6134
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9791274666786194,
      "learning_rate": 1.973205351073596e-05,
      "loss": 2.8354,
      "step": 6135
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.101564884185791,
      "learning_rate": 1.9731958828684227e-05,
      "loss": 2.6547,
      "step": 6136
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0913981199264526,
      "learning_rate": 1.9731864130134176e-05,
      "loss": 2.5727,
      "step": 6137
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.998008131980896,
      "learning_rate": 1.973176941508598e-05,
      "loss": 2.4755,
      "step": 6138
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9094182252883911,
      "learning_rate": 1.9731674683539785e-05,
      "loss": 2.8135,
      "step": 6139
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9642931818962097,
      "learning_rate": 1.9731579935495762e-05,
      "loss": 2.6291,
      "step": 6140
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9449033141136169,
      "learning_rate": 1.973148517095407e-05,
      "loss": 2.712,
      "step": 6141
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9745804667472839,
      "learning_rate": 1.973139038991486e-05,
      "loss": 2.7646,
      "step": 6142
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9801130890846252,
      "learning_rate": 1.9731295592378304e-05,
      "loss": 2.7977,
      "step": 6143
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0167877674102783,
      "learning_rate": 1.973120077834456e-05,
      "loss": 2.3536,
      "step": 6144
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9568225145339966,
      "learning_rate": 1.973110594781379e-05,
      "loss": 2.7836,
      "step": 6145
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.1281107664108276,
      "learning_rate": 1.973101110078615e-05,
      "loss": 2.5709,
      "step": 6146
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.112114429473877,
      "learning_rate": 1.9730916237261804e-05,
      "loss": 2.7181,
      "step": 6147
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9528690576553345,
      "learning_rate": 1.973082135724091e-05,
      "loss": 2.6005,
      "step": 6148
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.2345125675201416,
      "learning_rate": 1.9730726460723634e-05,
      "loss": 2.7295,
      "step": 6149
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0051331520080566,
      "learning_rate": 1.973063154771013e-05,
      "loss": 2.74,
      "step": 6150
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.007853627204895,
      "learning_rate": 1.9730536618200568e-05,
      "loss": 2.6099,
      "step": 6151
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9274623990058899,
      "learning_rate": 1.9730441672195097e-05,
      "loss": 2.6403,
      "step": 6152
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9159325957298279,
      "learning_rate": 1.973034670969389e-05,
      "loss": 2.5902,
      "step": 6153
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.092771291732788,
      "learning_rate": 1.97302517306971e-05,
      "loss": 2.7486,
      "step": 6154
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0943200588226318,
      "learning_rate": 1.9730156735204892e-05,
      "loss": 2.6219,
      "step": 6155
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0220634937286377,
      "learning_rate": 1.9730061723217428e-05,
      "loss": 2.6598,
      "step": 6156
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.1478610038757324,
      "learning_rate": 1.972996669473486e-05,
      "loss": 2.7447,
      "step": 6157
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.928322970867157,
      "learning_rate": 1.972987164975736e-05,
      "loss": 2.6739,
      "step": 6158
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9983372092247009,
      "learning_rate": 1.9729776588285086e-05,
      "loss": 2.6955,
      "step": 6159
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.036084771156311,
      "learning_rate": 1.9729681510318194e-05,
      "loss": 2.6696,
      "step": 6160
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.167443037033081,
      "learning_rate": 1.9729586415856853e-05,
      "loss": 2.9433,
      "step": 6161
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.1036704778671265,
      "learning_rate": 1.972949130490122e-05,
      "loss": 2.8027,
      "step": 6162
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9355164766311646,
      "learning_rate": 1.9729396177451458e-05,
      "loss": 2.6022,
      "step": 6163
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.1815226078033447,
      "learning_rate": 1.9729301033507723e-05,
      "loss": 2.6895,
      "step": 6164
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9703181385993958,
      "learning_rate": 1.9729205873070182e-05,
      "loss": 2.8126,
      "step": 6165
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9920377135276794,
      "learning_rate": 1.9729110696138996e-05,
      "loss": 2.6978,
      "step": 6166
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0967072248458862,
      "learning_rate": 1.9729015502714323e-05,
      "loss": 2.4304,
      "step": 6167
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9160654544830322,
      "learning_rate": 1.972892029279633e-05,
      "loss": 2.7674,
      "step": 6168
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9622529149055481,
      "learning_rate": 1.9728825066385172e-05,
      "loss": 2.7033,
      "step": 6169
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9868846535682678,
      "learning_rate": 1.9728729823481016e-05,
      "loss": 2.6047,
      "step": 6170
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.119484782218933,
      "learning_rate": 1.9728634564084016e-05,
      "loss": 2.7831,
      "step": 6171
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0450085401535034,
      "learning_rate": 1.972853928819434e-05,
      "loss": 2.8018,
      "step": 6172
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.18882155418396,
      "learning_rate": 1.9728443995812147e-05,
      "loss": 2.622,
      "step": 6173
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9997886419296265,
      "learning_rate": 1.9728348686937598e-05,
      "loss": 2.6965,
      "step": 6174
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9625169634819031,
      "learning_rate": 1.972825336157086e-05,
      "loss": 2.6201,
      "step": 6175
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9456976652145386,
      "learning_rate": 1.972815801971209e-05,
      "loss": 2.6414,
      "step": 6176
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.1054694652557373,
      "learning_rate": 1.9728062661361446e-05,
      "loss": 2.6632,
      "step": 6177
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0205878019332886,
      "learning_rate": 1.9727967286519097e-05,
      "loss": 2.6259,
      "step": 6178
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9625486731529236,
      "learning_rate": 1.97278718951852e-05,
      "loss": 2.6236,
      "step": 6179
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0586028099060059,
      "learning_rate": 1.972777648735992e-05,
      "loss": 2.6794,
      "step": 6180
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.076122522354126,
      "learning_rate": 1.9727681063043414e-05,
      "loss": 2.6395,
      "step": 6181
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0045275688171387,
      "learning_rate": 1.972758562223585e-05,
      "loss": 2.8303,
      "step": 6182
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.8935879468917847,
      "learning_rate": 1.9727490164937382e-05,
      "loss": 2.7368,
      "step": 6183
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9911378026008606,
      "learning_rate": 1.9727394691148182e-05,
      "loss": 2.6606,
      "step": 6184
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0187370777130127,
      "learning_rate": 1.97272992008684e-05,
      "loss": 2.7421,
      "step": 6185
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9439544677734375,
      "learning_rate": 1.9727203694098206e-05,
      "loss": 2.7481,
      "step": 6186
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0225054025650024,
      "learning_rate": 1.972710817083776e-05,
      "loss": 2.6873,
      "step": 6187
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9976605772972107,
      "learning_rate": 1.9727012631087226e-05,
      "loss": 2.6038,
      "step": 6188
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9197971820831299,
      "learning_rate": 1.972691707484676e-05,
      "loss": 2.7653,
      "step": 6189
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0562350749969482,
      "learning_rate": 1.972682150211653e-05,
      "loss": 2.7233,
      "step": 6190
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0692059993743896,
      "learning_rate": 1.9726725912896695e-05,
      "loss": 2.5198,
      "step": 6191
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9920464754104614,
      "learning_rate": 1.972663030718742e-05,
      "loss": 2.634,
      "step": 6192
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.1571687459945679,
      "learning_rate": 1.9726534684988864e-05,
      "loss": 2.7156,
      "step": 6193
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9584951400756836,
      "learning_rate": 1.972643904630119e-05,
      "loss": 2.6956,
      "step": 6194
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0162241458892822,
      "learning_rate": 1.9726343391124557e-05,
      "loss": 2.7086,
      "step": 6195
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9653934240341187,
      "learning_rate": 1.9726247719459133e-05,
      "loss": 2.587,
      "step": 6196
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0558058023452759,
      "learning_rate": 1.972615203130508e-05,
      "loss": 2.7893,
      "step": 6197
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9998050332069397,
      "learning_rate": 1.9726056326662553e-05,
      "loss": 2.72,
      "step": 6198
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9822560548782349,
      "learning_rate": 1.9725960605531722e-05,
      "loss": 2.7402,
      "step": 6199
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.976939857006073,
      "learning_rate": 1.9725864867912745e-05,
      "loss": 2.4692,
      "step": 6200
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0196902751922607,
      "learning_rate": 1.9725769113805787e-05,
      "loss": 2.6239,
      "step": 6201
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0579839944839478,
      "learning_rate": 1.9725673343211004e-05,
      "loss": 2.8854,
      "step": 6202
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0977911949157715,
      "learning_rate": 1.972557755612857e-05,
      "loss": 2.6725,
      "step": 6203
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9465088844299316,
      "learning_rate": 1.9725481752558634e-05,
      "loss": 2.5626,
      "step": 6204
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0385448932647705,
      "learning_rate": 1.9725385932501372e-05,
      "loss": 2.6786,
      "step": 6205
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.001000165939331,
      "learning_rate": 1.9725290095956935e-05,
      "loss": 2.7176,
      "step": 6206
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.1086034774780273,
      "learning_rate": 1.972519424292549e-05,
      "loss": 2.7472,
      "step": 6207
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.1031534671783447,
      "learning_rate": 1.97250983734072e-05,
      "loss": 2.6145,
      "step": 6208
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9717328548431396,
      "learning_rate": 1.972500248740223e-05,
      "loss": 2.6491,
      "step": 6209
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0878630876541138,
      "learning_rate": 1.9724906584910736e-05,
      "loss": 2.8261,
      "step": 6210
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0398341417312622,
      "learning_rate": 1.9724810665932884e-05,
      "loss": 2.8307,
      "step": 6211
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0664814710617065,
      "learning_rate": 1.9724714730468836e-05,
      "loss": 2.7724,
      "step": 6212
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9696382284164429,
      "learning_rate": 1.972461877851876e-05,
      "loss": 2.4498,
      "step": 6213
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0126856565475464,
      "learning_rate": 1.972452281008281e-05,
      "loss": 2.7946,
      "step": 6214
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9431182146072388,
      "learning_rate": 1.9724426825161152e-05,
      "loss": 2.6323,
      "step": 6215
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0067609548568726,
      "learning_rate": 1.9724330823753953e-05,
      "loss": 2.7286,
      "step": 6216
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9313531517982483,
      "learning_rate": 1.972423480586137e-05,
      "loss": 2.7683,
      "step": 6217
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0568420886993408,
      "learning_rate": 1.972413877148357e-05,
      "loss": 2.8718,
      "step": 6218
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0530009269714355,
      "learning_rate": 1.9724042720620708e-05,
      "loss": 2.6942,
      "step": 6219
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9743406176567078,
      "learning_rate": 1.972394665327296e-05,
      "loss": 2.5763,
      "step": 6220
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9310750365257263,
      "learning_rate": 1.9723850569440473e-05,
      "loss": 2.6284,
      "step": 6221
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9503182768821716,
      "learning_rate": 1.9723754469123428e-05,
      "loss": 2.6637,
      "step": 6222
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9991093277931213,
      "learning_rate": 1.9723658352321973e-05,
      "loss": 2.7377,
      "step": 6223
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9559774994850159,
      "learning_rate": 1.9723562219036277e-05,
      "loss": 2.5273,
      "step": 6224
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9939979910850525,
      "learning_rate": 1.97234660692665e-05,
      "loss": 2.7571,
      "step": 6225
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9619856476783752,
      "learning_rate": 1.972336990301281e-05,
      "loss": 2.7178,
      "step": 6226
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9623581767082214,
      "learning_rate": 1.9723273720275365e-05,
      "loss": 2.8995,
      "step": 6227
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9472374320030212,
      "learning_rate": 1.972317752105433e-05,
      "loss": 2.5246,
      "step": 6228
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.941028892993927,
      "learning_rate": 1.972308130534987e-05,
      "loss": 2.6254,
      "step": 6229
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9942072033882141,
      "learning_rate": 1.972298507316215e-05,
      "loss": 2.6426,
      "step": 6230
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0320075750350952,
      "learning_rate": 1.9722888824491325e-05,
      "loss": 2.87,
      "step": 6231
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9694275259971619,
      "learning_rate": 1.972279255933756e-05,
      "loss": 2.7434,
      "step": 6232
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9725233316421509,
      "learning_rate": 1.9722696277701025e-05,
      "loss": 2.9177,
      "step": 6233
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0958377122879028,
      "learning_rate": 1.972259997958188e-05,
      "loss": 2.7594,
      "step": 6234
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.045743703842163,
      "learning_rate": 1.9722503664980286e-05,
      "loss": 2.5358,
      "step": 6235
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.1624884605407715,
      "learning_rate": 1.972240733389641e-05,
      "loss": 2.8761,
      "step": 6236
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.1471449136734009,
      "learning_rate": 1.972231098633041e-05,
      "loss": 2.5688,
      "step": 6237
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0095301866531372,
      "learning_rate": 1.972221462228245e-05,
      "loss": 2.5268,
      "step": 6238
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0161206722259521,
      "learning_rate": 1.9722118241752698e-05,
      "loss": 2.8155,
      "step": 6239
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.252349615097046,
      "learning_rate": 1.9722021844741316e-05,
      "loss": 2.5583,
      "step": 6240
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0983325242996216,
      "learning_rate": 1.9721925431248468e-05,
      "loss": 2.7092,
      "step": 6241
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.1998647451400757,
      "learning_rate": 1.9721829001274314e-05,
      "loss": 2.6459,
      "step": 6242
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9558905363082886,
      "learning_rate": 1.972173255481902e-05,
      "loss": 2.8623,
      "step": 6243
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.1789271831512451,
      "learning_rate": 1.972163609188275e-05,
      "loss": 2.7484,
      "step": 6244
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0683648586273193,
      "learning_rate": 1.9721539612465662e-05,
      "loss": 2.642,
      "step": 6245
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0752249956130981,
      "learning_rate": 1.9721443116567927e-05,
      "loss": 2.6537,
      "step": 6246
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0679820775985718,
      "learning_rate": 1.9721346604189705e-05,
      "loss": 2.6844,
      "step": 6247
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9855133891105652,
      "learning_rate": 1.9721250075331163e-05,
      "loss": 2.9359,
      "step": 6248
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0617172718048096,
      "learning_rate": 1.972115352999246e-05,
      "loss": 2.5908,
      "step": 6249
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.067406177520752,
      "learning_rate": 1.972105696817376e-05,
      "loss": 2.7673,
      "step": 6250
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0125784873962402,
      "learning_rate": 1.972096038987523e-05,
      "loss": 2.6114,
      "step": 6251
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0160071849822998,
      "learning_rate": 1.9720863795097032e-05,
      "loss": 2.7031,
      "step": 6252
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9848095178604126,
      "learning_rate": 1.972076718383933e-05,
      "loss": 2.5817,
      "step": 6253
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9907047748565674,
      "learning_rate": 1.9720670556102287e-05,
      "loss": 2.5384,
      "step": 6254
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9485626816749573,
      "learning_rate": 1.9720573911886064e-05,
      "loss": 2.7019,
      "step": 6255
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9551015496253967,
      "learning_rate": 1.972047725119083e-05,
      "loss": 2.8065,
      "step": 6256
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9354262351989746,
      "learning_rate": 1.972038057401675e-05,
      "loss": 2.7604,
      "step": 6257
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9768428802490234,
      "learning_rate": 1.9720283880363983e-05,
      "loss": 2.6173,
      "step": 6258
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9569182991981506,
      "learning_rate": 1.9720187170232696e-05,
      "loss": 2.8169,
      "step": 6259
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.2333637475967407,
      "learning_rate": 1.972009044362305e-05,
      "loss": 2.8446,
      "step": 6260
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.016554832458496,
      "learning_rate": 1.9719993700535213e-05,
      "loss": 2.7932,
      "step": 6261
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.1690430641174316,
      "learning_rate": 1.9719896940969345e-05,
      "loss": 2.5983,
      "step": 6262
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9117345213890076,
      "learning_rate": 1.971980016492561e-05,
      "loss": 2.9221,
      "step": 6263
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0162596702575684,
      "learning_rate": 1.9719703372404176e-05,
      "loss": 2.6774,
      "step": 6264
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9591560959815979,
      "learning_rate": 1.9719606563405205e-05,
      "loss": 2.5216,
      "step": 6265
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.1600066423416138,
      "learning_rate": 1.971950973792886e-05,
      "loss": 2.6196,
      "step": 6266
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9458897113800049,
      "learning_rate": 1.971941289597531e-05,
      "loss": 2.72,
      "step": 6267
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9306175708770752,
      "learning_rate": 1.971931603754471e-05,
      "loss": 2.6082,
      "step": 6268
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9540387392044067,
      "learning_rate": 1.9719219162637232e-05,
      "loss": 2.7307,
      "step": 6269
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.7358708381652832,
      "learning_rate": 1.971912227125304e-05,
      "loss": 2.6631,
      "step": 6270
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0690878629684448,
      "learning_rate": 1.9719025363392293e-05,
      "loss": 2.554,
      "step": 6271
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9927144050598145,
      "learning_rate": 1.971892843905516e-05,
      "loss": 2.7152,
      "step": 6272
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.967110276222229,
      "learning_rate": 1.9718831498241805e-05,
      "loss": 2.8138,
      "step": 6273
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0463801622390747,
      "learning_rate": 1.9718734540952385e-05,
      "loss": 2.7431,
      "step": 6274
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.1291887760162354,
      "learning_rate": 1.9718637567187077e-05,
      "loss": 2.6976,
      "step": 6275
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.980229914188385,
      "learning_rate": 1.9718540576946033e-05,
      "loss": 2.6456,
      "step": 6276
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9767568111419678,
      "learning_rate": 1.971844357022943e-05,
      "loss": 2.712,
      "step": 6277
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9510316848754883,
      "learning_rate": 1.971834654703742e-05,
      "loss": 2.8801,
      "step": 6278
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9584668278694153,
      "learning_rate": 1.9718249507370175e-05,
      "loss": 2.6443,
      "step": 6279
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9918277859687805,
      "learning_rate": 1.971815245122786e-05,
      "loss": 2.7051,
      "step": 6280
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0055100917816162,
      "learning_rate": 1.9718055378610633e-05,
      "loss": 2.6655,
      "step": 6281
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0149472951889038,
      "learning_rate": 1.9717958289518665e-05,
      "loss": 2.759,
      "step": 6282
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.949998140335083,
      "learning_rate": 1.9717861183952117e-05,
      "loss": 2.5839,
      "step": 6283
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0402073860168457,
      "learning_rate": 1.9717764061911157e-05,
      "loss": 2.4711,
      "step": 6284
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.993674099445343,
      "learning_rate": 1.971766692339595e-05,
      "loss": 2.8396,
      "step": 6285
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9887760281562805,
      "learning_rate": 1.9717569768406654e-05,
      "loss": 2.8641,
      "step": 6286
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.8886672258377075,
      "learning_rate": 1.9717472596943436e-05,
      "loss": 2.7186,
      "step": 6287
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9208338856697083,
      "learning_rate": 1.9717375409006467e-05,
      "loss": 2.8218,
      "step": 6288
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9557200074195862,
      "learning_rate": 1.9717278204595907e-05,
      "loss": 2.5506,
      "step": 6289
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.033387303352356,
      "learning_rate": 1.971718098371192e-05,
      "loss": 2.6394,
      "step": 6290
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9640192985534668,
      "learning_rate": 1.9717083746354674e-05,
      "loss": 2.7218,
      "step": 6291
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9095640182495117,
      "learning_rate": 1.971698649252433e-05,
      "loss": 2.7813,
      "step": 6292
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.93828284740448,
      "learning_rate": 1.9716889222221053e-05,
      "loss": 2.6308,
      "step": 6293
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0013614892959595,
      "learning_rate": 1.9716791935445012e-05,
      "loss": 2.784,
      "step": 6294
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.977817952632904,
      "learning_rate": 1.971669463219637e-05,
      "loss": 2.5857,
      "step": 6295
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.1282798051834106,
      "learning_rate": 1.971659731247529e-05,
      "loss": 2.7281,
      "step": 6296
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0998395681381226,
      "learning_rate": 1.971649997628194e-05,
      "loss": 2.7639,
      "step": 6297
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.1856942176818848,
      "learning_rate": 1.9716402623616483e-05,
      "loss": 2.6055,
      "step": 6298
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0018854141235352,
      "learning_rate": 1.9716305254479085e-05,
      "loss": 2.5331,
      "step": 6299
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.801164150238037,
      "learning_rate": 1.971620786886991e-05,
      "loss": 2.5762,
      "step": 6300
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9698005318641663,
      "learning_rate": 1.9716110466789123e-05,
      "loss": 2.6207,
      "step": 6301
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9486927390098572,
      "learning_rate": 1.9716013048236894e-05,
      "loss": 2.7472,
      "step": 6302
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9214293360710144,
      "learning_rate": 1.971591561321338e-05,
      "loss": 2.6214,
      "step": 6303
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9848204851150513,
      "learning_rate": 1.971581816171875e-05,
      "loss": 2.6511,
      "step": 6304
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0052142143249512,
      "learning_rate": 1.9715720693753174e-05,
      "loss": 2.652,
      "step": 6305
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9138612151145935,
      "learning_rate": 1.971562320931681e-05,
      "loss": 2.4895,
      "step": 6306
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9452409148216248,
      "learning_rate": 1.9715525708409825e-05,
      "loss": 2.4304,
      "step": 6307
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0452260971069336,
      "learning_rate": 1.9715428191032385e-05,
      "loss": 2.7071,
      "step": 6308
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.002242922782898,
      "learning_rate": 1.9715330657184654e-05,
      "loss": 2.495,
      "step": 6309
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0542901754379272,
      "learning_rate": 1.9715233106866804e-05,
      "loss": 2.5863,
      "step": 6310
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9721757173538208,
      "learning_rate": 1.971513554007899e-05,
      "loss": 2.7513,
      "step": 6311
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0414481163024902,
      "learning_rate": 1.971503795682139e-05,
      "loss": 2.5611,
      "step": 6312
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0802751779556274,
      "learning_rate": 1.9714940357094155e-05,
      "loss": 2.4021,
      "step": 6313
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.042000651359558,
      "learning_rate": 1.971484274089746e-05,
      "loss": 2.6074,
      "step": 6314
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0509111881256104,
      "learning_rate": 1.9714745108231467e-05,
      "loss": 2.5812,
      "step": 6315
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0539456605911255,
      "learning_rate": 1.9714647459096344e-05,
      "loss": 2.6654,
      "step": 6316
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.186244010925293,
      "learning_rate": 1.9714549793492252e-05,
      "loss": 2.6848,
      "step": 6317
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0783495903015137,
      "learning_rate": 1.9714452111419362e-05,
      "loss": 2.4266,
      "step": 6318
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0346318483352661,
      "learning_rate": 1.9714354412877838e-05,
      "loss": 2.782,
      "step": 6319
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.1820266246795654,
      "learning_rate": 1.9714256697867845e-05,
      "loss": 2.4886,
      "step": 6320
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.1391258239746094,
      "learning_rate": 1.9714158966389546e-05,
      "loss": 2.6553,
      "step": 6321
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9950556755065918,
      "learning_rate": 1.9714061218443114e-05,
      "loss": 2.7856,
      "step": 6322
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0338209867477417,
      "learning_rate": 1.9713963454028705e-05,
      "loss": 2.6533,
      "step": 6323
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.128637671470642,
      "learning_rate": 1.971386567314649e-05,
      "loss": 2.6766,
      "step": 6324
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.2188456058502197,
      "learning_rate": 1.9713767875796635e-05,
      "loss": 2.6253,
      "step": 6325
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0620086193084717,
      "learning_rate": 1.971367006197931e-05,
      "loss": 2.474,
      "step": 6326
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.035041332244873,
      "learning_rate": 1.9713572231694672e-05,
      "loss": 2.4808,
      "step": 6327
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9941980838775635,
      "learning_rate": 1.971347438494289e-05,
      "loss": 2.4952,
      "step": 6328
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.069854736328125,
      "learning_rate": 1.971337652172413e-05,
      "loss": 2.4258,
      "step": 6329
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0208731889724731,
      "learning_rate": 1.9713278642038563e-05,
      "loss": 2.5634,
      "step": 6330
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.1114916801452637,
      "learning_rate": 1.9713180745886346e-05,
      "loss": 2.8085,
      "step": 6331
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0847797393798828,
      "learning_rate": 1.9713082833267652e-05,
      "loss": 2.5407,
      "step": 6332
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9894896745681763,
      "learning_rate": 1.9712984904182645e-05,
      "loss": 2.6875,
      "step": 6333
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.933600127696991,
      "learning_rate": 1.971288695863149e-05,
      "loss": 2.8109,
      "step": 6334
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0518040657043457,
      "learning_rate": 1.9712788996614357e-05,
      "loss": 2.531,
      "step": 6335
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9913419485092163,
      "learning_rate": 1.97126910181314e-05,
      "loss": 2.9119,
      "step": 6336
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.058887004852295,
      "learning_rate": 1.9712593023182803e-05,
      "loss": 2.7666,
      "step": 6337
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9547853469848633,
      "learning_rate": 1.971249501176872e-05,
      "loss": 2.5313,
      "step": 6338
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0294620990753174,
      "learning_rate": 1.9712396983889318e-05,
      "loss": 2.6674,
      "step": 6339
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0104535818099976,
      "learning_rate": 1.9712298939544766e-05,
      "loss": 2.5107,
      "step": 6340
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0141834020614624,
      "learning_rate": 1.971220087873523e-05,
      "loss": 2.7619,
      "step": 6341
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9784649610519409,
      "learning_rate": 1.9712102801460877e-05,
      "loss": 2.7301,
      "step": 6342
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0934581756591797,
      "learning_rate": 1.9712004707721867e-05,
      "loss": 2.7643,
      "step": 6343
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.92220139503479,
      "learning_rate": 1.971190659751838e-05,
      "loss": 2.5593,
      "step": 6344
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9218207597732544,
      "learning_rate": 1.9711808470850564e-05,
      "loss": 2.6859,
      "step": 6345
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0030176639556885,
      "learning_rate": 1.97117103277186e-05,
      "loss": 2.4454,
      "step": 6346
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0236015319824219,
      "learning_rate": 1.9711612168122646e-05,
      "loss": 2.6655,
      "step": 6347
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.1249194145202637,
      "learning_rate": 1.9711513992062875e-05,
      "loss": 2.5423,
      "step": 6348
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.129459023475647,
      "learning_rate": 1.9711415799539454e-05,
      "loss": 2.5965,
      "step": 6349
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9804083704948425,
      "learning_rate": 1.971131759055254e-05,
      "loss": 2.9862,
      "step": 6350
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.12571120262146,
      "learning_rate": 1.9711219365102304e-05,
      "loss": 2.6147,
      "step": 6351
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9531368017196655,
      "learning_rate": 1.9711121123188917e-05,
      "loss": 2.5421,
      "step": 6352
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9800019860267639,
      "learning_rate": 1.971102286481254e-05,
      "loss": 2.4906,
      "step": 6353
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9842134714126587,
      "learning_rate": 1.9710924589973342e-05,
      "loss": 2.8316,
      "step": 6354
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0464129447937012,
      "learning_rate": 1.9710826298671494e-05,
      "loss": 2.521,
      "step": 6355
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0499976873397827,
      "learning_rate": 1.9710727990907153e-05,
      "loss": 2.7015,
      "step": 6356
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0167592763900757,
      "learning_rate": 1.9710629666680493e-05,
      "loss": 2.6823,
      "step": 6357
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9268284440040588,
      "learning_rate": 1.9710531325991677e-05,
      "loss": 2.8367,
      "step": 6358
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0602598190307617,
      "learning_rate": 1.971043296884087e-05,
      "loss": 2.7514,
      "step": 6359
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0280481576919556,
      "learning_rate": 1.9710334595228246e-05,
      "loss": 2.7144,
      "step": 6360
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9926019310951233,
      "learning_rate": 1.9710236205153967e-05,
      "loss": 2.7972,
      "step": 6361
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.991115391254425,
      "learning_rate": 1.97101377986182e-05,
      "loss": 2.6195,
      "step": 6362
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9890532493591309,
      "learning_rate": 1.9710039375621114e-05,
      "loss": 2.6884,
      "step": 6363
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9376391172409058,
      "learning_rate": 1.970994093616287e-05,
      "loss": 2.656,
      "step": 6364
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9709022641181946,
      "learning_rate": 1.9709842480243643e-05,
      "loss": 2.6372,
      "step": 6365
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0184752941131592,
      "learning_rate": 1.9709744007863595e-05,
      "loss": 2.6309,
      "step": 6366
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9882090091705322,
      "learning_rate": 1.9709645519022893e-05,
      "loss": 2.7302,
      "step": 6367
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9847800731658936,
      "learning_rate": 1.9709547013721704e-05,
      "loss": 2.7243,
      "step": 6368
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.053076148033142,
      "learning_rate": 1.9709448491960198e-05,
      "loss": 2.4341,
      "step": 6369
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9354162812232971,
      "learning_rate": 1.9709349953738535e-05,
      "loss": 2.7738,
      "step": 6370
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0811502933502197,
      "learning_rate": 1.9709251399056893e-05,
      "loss": 2.592,
      "step": 6371
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.2084513902664185,
      "learning_rate": 1.9709152827915427e-05,
      "loss": 2.5209,
      "step": 6372
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9890623092651367,
      "learning_rate": 1.9709054240314314e-05,
      "loss": 2.6272,
      "step": 6373
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9989960193634033,
      "learning_rate": 1.9708955636253715e-05,
      "loss": 2.4804,
      "step": 6374
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9480695128440857,
      "learning_rate": 1.97088570157338e-05,
      "loss": 2.8304,
      "step": 6375
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0830363035202026,
      "learning_rate": 1.9708758378754735e-05,
      "loss": 2.6987,
      "step": 6376
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0458652973175049,
      "learning_rate": 1.970865972531669e-05,
      "loss": 2.9487,
      "step": 6377
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9792874455451965,
      "learning_rate": 1.9708561055419827e-05,
      "loss": 2.5739,
      "step": 6378
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0249918699264526,
      "learning_rate": 1.970846236906432e-05,
      "loss": 2.559,
      "step": 6379
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9335075616836548,
      "learning_rate": 1.9708363666250327e-05,
      "loss": 2.6115,
      "step": 6380
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.031043291091919,
      "learning_rate": 1.9708264946978028e-05,
      "loss": 2.7687,
      "step": 6381
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0356467962265015,
      "learning_rate": 1.9708166211247577e-05,
      "loss": 2.6896,
      "step": 6382
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9690642356872559,
      "learning_rate": 1.9708067459059147e-05,
      "loss": 2.6515,
      "step": 6383
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9810231924057007,
      "learning_rate": 1.970796869041291e-05,
      "loss": 2.8682,
      "step": 6384
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.8888358473777771,
      "learning_rate": 1.9707869905309027e-05,
      "loss": 2.519,
      "step": 6385
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0178111791610718,
      "learning_rate": 1.9707771103747667e-05,
      "loss": 2.7578,
      "step": 6386
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0140446424484253,
      "learning_rate": 1.9707672285729002e-05,
      "loss": 2.8514,
      "step": 6387
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0577107667922974,
      "learning_rate": 1.9707573451253195e-05,
      "loss": 2.862,
      "step": 6388
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9286563992500305,
      "learning_rate": 1.9707474600320415e-05,
      "loss": 2.604,
      "step": 6389
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9187138080596924,
      "learning_rate": 1.9707375732930828e-05,
      "loss": 2.5766,
      "step": 6390
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9534868001937866,
      "learning_rate": 1.97072768490846e-05,
      "loss": 2.5952,
      "step": 6391
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.019038438796997,
      "learning_rate": 1.9707177948781903e-05,
      "loss": 2.6497,
      "step": 6392
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0595542192459106,
      "learning_rate": 1.9707079032022903e-05,
      "loss": 2.4718,
      "step": 6393
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0882500410079956,
      "learning_rate": 1.9706980098807767e-05,
      "loss": 2.6094,
      "step": 6394
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0088423490524292,
      "learning_rate": 1.9706881149136668e-05,
      "loss": 2.7667,
      "step": 6395
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9588896036148071,
      "learning_rate": 1.9706782183009766e-05,
      "loss": 2.6674,
      "step": 6396
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.088671088218689,
      "learning_rate": 1.970668320042723e-05,
      "loss": 2.716,
      "step": 6397
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.095858097076416,
      "learning_rate": 1.970658420138923e-05,
      "loss": 2.7983,
      "step": 6398
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0413812398910522,
      "learning_rate": 1.9706485185895936e-05,
      "loss": 2.5835,
      "step": 6399
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0104420185089111,
      "learning_rate": 1.9706386153947513e-05,
      "loss": 2.5031,
      "step": 6400
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9699785113334656,
      "learning_rate": 1.970628710554413e-05,
      "loss": 2.679,
      "step": 6401
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9918128848075867,
      "learning_rate": 1.970618804068595e-05,
      "loss": 2.4719,
      "step": 6402
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0345628261566162,
      "learning_rate": 1.9706088959373146e-05,
      "loss": 2.5142,
      "step": 6403
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9989767670631409,
      "learning_rate": 1.970598986160589e-05,
      "loss": 2.5014,
      "step": 6404
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9310474991798401,
      "learning_rate": 1.970589074738434e-05,
      "loss": 2.5281,
      "step": 6405
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.01113760471344,
      "learning_rate": 1.970579161670867e-05,
      "loss": 2.6322,
      "step": 6406
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9095299243927002,
      "learning_rate": 1.970569246957905e-05,
      "loss": 2.8056,
      "step": 6407
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0000461339950562,
      "learning_rate": 1.970559330599564e-05,
      "loss": 2.5139,
      "step": 6408
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0366116762161255,
      "learning_rate": 1.9705494125958617e-05,
      "loss": 2.7672,
      "step": 6409
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.034000039100647,
      "learning_rate": 1.9705394929468148e-05,
      "loss": 2.7569,
      "step": 6410
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0007966756820679,
      "learning_rate": 1.9705295716524393e-05,
      "loss": 2.714,
      "step": 6411
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.169359803199768,
      "learning_rate": 1.9705196487127528e-05,
      "loss": 2.8177,
      "step": 6412
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.087045431137085,
      "learning_rate": 1.970509724127772e-05,
      "loss": 2.7808,
      "step": 6413
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9952861070632935,
      "learning_rate": 1.9704997978975134e-05,
      "loss": 2.512,
      "step": 6414
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0416008234024048,
      "learning_rate": 1.9704898700219945e-05,
      "loss": 2.6426,
      "step": 6415
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0629518032073975,
      "learning_rate": 1.9704799405012313e-05,
      "loss": 2.587,
      "step": 6416
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9657558798789978,
      "learning_rate": 1.9704700093352414e-05,
      "loss": 2.6554,
      "step": 6417
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0564162731170654,
      "learning_rate": 1.9704600765240408e-05,
      "loss": 2.5114,
      "step": 6418
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0092837810516357,
      "learning_rate": 1.970450142067647e-05,
      "loss": 2.7858,
      "step": 6419
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0734221935272217,
      "learning_rate": 1.970440205966077e-05,
      "loss": 2.6529,
      "step": 6420
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9410339593887329,
      "learning_rate": 1.9704302682193467e-05,
      "loss": 2.6251,
      "step": 6421
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9759786128997803,
      "learning_rate": 1.9704203288274736e-05,
      "loss": 2.7063,
      "step": 6422
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.1206793785095215,
      "learning_rate": 1.970410387790475e-05,
      "loss": 2.6667,
      "step": 6423
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.186620831489563,
      "learning_rate": 1.9704004451083666e-05,
      "loss": 2.6443,
      "step": 6424
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0079516172409058,
      "learning_rate": 1.970390500781166e-05,
      "loss": 2.7165,
      "step": 6425
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9911460280418396,
      "learning_rate": 1.97038055480889e-05,
      "loss": 2.7752,
      "step": 6426
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0036277770996094,
      "learning_rate": 1.970370607191556e-05,
      "loss": 2.6693,
      "step": 6427
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0325303077697754,
      "learning_rate": 1.9703606579291798e-05,
      "loss": 2.6381,
      "step": 6428
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0324602127075195,
      "learning_rate": 1.9703507070217786e-05,
      "loss": 2.6172,
      "step": 6429
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0641533136367798,
      "learning_rate": 1.9703407544693696e-05,
      "loss": 2.8013,
      "step": 6430
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.950467586517334,
      "learning_rate": 1.970330800271969e-05,
      "loss": 2.6253,
      "step": 6431
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0055774450302124,
      "learning_rate": 1.970320844429595e-05,
      "loss": 2.5408,
      "step": 6432
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0571807622909546,
      "learning_rate": 1.970310886942263e-05,
      "loss": 2.732,
      "step": 6433
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0019221305847168,
      "learning_rate": 1.9703009278099906e-05,
      "loss": 2.5367,
      "step": 6434
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.001349925994873,
      "learning_rate": 1.9702909670327948e-05,
      "loss": 2.8572,
      "step": 6435
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0732718706130981,
      "learning_rate": 1.970281004610692e-05,
      "loss": 2.8374,
      "step": 6436
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.004590630531311,
      "learning_rate": 1.9702710405436997e-05,
      "loss": 2.5895,
      "step": 6437
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9863455295562744,
      "learning_rate": 1.9702610748318346e-05,
      "loss": 2.7409,
      "step": 6438
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0699316263198853,
      "learning_rate": 1.9702511074751132e-05,
      "loss": 2.7401,
      "step": 6439
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.086547613143921,
      "learning_rate": 1.9702411384735524e-05,
      "loss": 2.5618,
      "step": 6440
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.1352887153625488,
      "learning_rate": 1.9702311678271698e-05,
      "loss": 2.57,
      "step": 6441
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9587093591690063,
      "learning_rate": 1.9702211955359817e-05,
      "loss": 2.7479,
      "step": 6442
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9527540802955627,
      "learning_rate": 1.970211221600005e-05,
      "loss": 2.7387,
      "step": 6443
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9673316478729248,
      "learning_rate": 1.970201246019257e-05,
      "loss": 2.6877,
      "step": 6444
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0106054544448853,
      "learning_rate": 1.9701912687937542e-05,
      "loss": 2.6223,
      "step": 6445
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.1031851768493652,
      "learning_rate": 1.970181289923514e-05,
      "loss": 2.6032,
      "step": 6446
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0697271823883057,
      "learning_rate": 1.9701713094085523e-05,
      "loss": 2.6215,
      "step": 6447
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0655295848846436,
      "learning_rate": 1.9701613272488873e-05,
      "loss": 2.7614,
      "step": 6448
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0377576351165771,
      "learning_rate": 1.9701513434445353e-05,
      "loss": 2.8353,
      "step": 6449
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.965929388999939,
      "learning_rate": 1.9701413579955134e-05,
      "loss": 2.661,
      "step": 6450
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0274537801742554,
      "learning_rate": 1.9701313709018384e-05,
      "loss": 2.5144,
      "step": 6451
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9581682682037354,
      "learning_rate": 1.970121382163527e-05,
      "loss": 2.7455,
      "step": 6452
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9539101123809814,
      "learning_rate": 1.9701113917805964e-05,
      "loss": 2.6139,
      "step": 6453
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.8929002285003662,
      "learning_rate": 1.970101399753064e-05,
      "loss": 2.7608,
      "step": 6454
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0400301218032837,
      "learning_rate": 1.9700914060809455e-05,
      "loss": 2.7272,
      "step": 6455
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.112449049949646,
      "learning_rate": 1.970081410764259e-05,
      "loss": 2.5927,
      "step": 6456
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0883920192718506,
      "learning_rate": 1.9700714138030213e-05,
      "loss": 2.5697,
      "step": 6457
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.1171776056289673,
      "learning_rate": 1.9700614151972487e-05,
      "loss": 2.634,
      "step": 6458
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9438745379447937,
      "learning_rate": 1.9700514149469586e-05,
      "loss": 2.8106,
      "step": 6459
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0145381689071655,
      "learning_rate": 1.970041413052168e-05,
      "loss": 2.6471,
      "step": 6460
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.232008934020996,
      "learning_rate": 1.9700314095128935e-05,
      "loss": 2.6792,
      "step": 6461
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9961974024772644,
      "learning_rate": 1.9700214043291525e-05,
      "loss": 2.5657,
      "step": 6462
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.035644292831421,
      "learning_rate": 1.9700113975009617e-05,
      "loss": 2.5648,
      "step": 6463
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9775177836418152,
      "learning_rate": 1.970001389028338e-05,
      "loss": 2.6865,
      "step": 6464
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9716300368309021,
      "learning_rate": 1.9699913789112987e-05,
      "loss": 2.6818,
      "step": 6465
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.011565089225769,
      "learning_rate": 1.9699813671498606e-05,
      "loss": 2.5885,
      "step": 6466
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.198661208152771,
      "learning_rate": 1.9699713537440405e-05,
      "loss": 2.6118,
      "step": 6467
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9142361283302307,
      "learning_rate": 1.9699613386938558e-05,
      "loss": 2.6824,
      "step": 6468
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0498476028442383,
      "learning_rate": 1.969951321999323e-05,
      "loss": 2.961,
      "step": 6469
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9710757732391357,
      "learning_rate": 1.9699413036604592e-05,
      "loss": 2.5404,
      "step": 6470
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.2040144205093384,
      "learning_rate": 1.9699312836772813e-05,
      "loss": 2.4485,
      "step": 6471
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0332602262496948,
      "learning_rate": 1.9699212620498067e-05,
      "loss": 2.7539,
      "step": 6472
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.083320140838623,
      "learning_rate": 1.969911238778052e-05,
      "loss": 2.5279,
      "step": 6473
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9745153188705444,
      "learning_rate": 1.9699012138620345e-05,
      "loss": 2.5309,
      "step": 6474
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0449804067611694,
      "learning_rate": 1.969891187301771e-05,
      "loss": 2.6528,
      "step": 6475
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0088931322097778,
      "learning_rate": 1.9698811590972785e-05,
      "loss": 2.5371,
      "step": 6476
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.1053283214569092,
      "learning_rate": 1.9698711292485737e-05,
      "loss": 2.6271,
      "step": 6477
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0352481603622437,
      "learning_rate": 1.9698610977556742e-05,
      "loss": 2.6523,
      "step": 6478
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9772154688835144,
      "learning_rate": 1.9698510646185967e-05,
      "loss": 2.4635,
      "step": 6479
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0011042356491089,
      "learning_rate": 1.969841029837358e-05,
      "loss": 2.5803,
      "step": 6480
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.078098177909851,
      "learning_rate": 1.969830993411976e-05,
      "loss": 2.666,
      "step": 6481
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0085200071334839,
      "learning_rate": 1.9698209553424665e-05,
      "loss": 2.7273,
      "step": 6482
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.108579158782959,
      "learning_rate": 1.9698109156288473e-05,
      "loss": 2.5666,
      "step": 6483
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.044961929321289,
      "learning_rate": 1.969800874271135e-05,
      "loss": 2.6299,
      "step": 6484
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9803265929222107,
      "learning_rate": 1.9697908312693473e-05,
      "loss": 2.7118,
      "step": 6485
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0426115989685059,
      "learning_rate": 1.9697807866235003e-05,
      "loss": 2.749,
      "step": 6486
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.1450812816619873,
      "learning_rate": 1.9697707403336116e-05,
      "loss": 2.8013,
      "step": 6487
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0635044574737549,
      "learning_rate": 1.9697606923996982e-05,
      "loss": 2.7517,
      "step": 6488
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9717480540275574,
      "learning_rate": 1.9697506428217766e-05,
      "loss": 2.6615,
      "step": 6489
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9759281277656555,
      "learning_rate": 1.969740591599865e-05,
      "loss": 2.7429,
      "step": 6490
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.169569969177246,
      "learning_rate": 1.969730538733979e-05,
      "loss": 2.8191,
      "step": 6491
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9751202464103699,
      "learning_rate": 1.969720484224137e-05,
      "loss": 2.7108,
      "step": 6492
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9425063729286194,
      "learning_rate": 1.969710428070355e-05,
      "loss": 2.7924,
      "step": 6493
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0377564430236816,
      "learning_rate": 1.969700370272651e-05,
      "loss": 2.7485,
      "step": 6494
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.05824613571167,
      "learning_rate": 1.9696903108310407e-05,
      "loss": 2.7241,
      "step": 6495
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.8687257170677185,
      "learning_rate": 1.9696802497455426e-05,
      "loss": 2.6886,
      "step": 6496
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0007688999176025,
      "learning_rate": 1.9696701870161727e-05,
      "loss": 2.5247,
      "step": 6497
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0449917316436768,
      "learning_rate": 1.9696601226429484e-05,
      "loss": 2.7579,
      "step": 6498
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9972569942474365,
      "learning_rate": 1.9696500566258873e-05,
      "loss": 2.7299,
      "step": 6499
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0345155000686646,
      "learning_rate": 1.9696399889650055e-05,
      "loss": 2.6923,
      "step": 6500
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9726771116256714,
      "learning_rate": 1.9696299196603212e-05,
      "loss": 2.7544,
      "step": 6501
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0119057893753052,
      "learning_rate": 1.9696198487118505e-05,
      "loss": 2.84,
      "step": 6502
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9714793562889099,
      "learning_rate": 1.9696097761196107e-05,
      "loss": 2.579,
      "step": 6503
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0855674743652344,
      "learning_rate": 1.969599701883619e-05,
      "loss": 2.8275,
      "step": 6504
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0019237995147705,
      "learning_rate": 1.9695896260038926e-05,
      "loss": 2.3616,
      "step": 6505
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0236291885375977,
      "learning_rate": 1.9695795484804483e-05,
      "loss": 2.3928,
      "step": 6506
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9687061309814453,
      "learning_rate": 1.9695694693133033e-05,
      "loss": 2.5902,
      "step": 6507
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9664583206176758,
      "learning_rate": 1.969559388502475e-05,
      "loss": 2.7356,
      "step": 6508
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9400779008865356,
      "learning_rate": 1.9695493060479798e-05,
      "loss": 2.62,
      "step": 6509
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0111061334609985,
      "learning_rate": 1.969539221949836e-05,
      "loss": 2.7632,
      "step": 6510
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9393638968467712,
      "learning_rate": 1.9695291362080587e-05,
      "loss": 2.6052,
      "step": 6511
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.1001794338226318,
      "learning_rate": 1.969519048822667e-05,
      "loss": 2.6123,
      "step": 6512
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0230499505996704,
      "learning_rate": 1.969508959793677e-05,
      "loss": 2.61,
      "step": 6513
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9931899905204773,
      "learning_rate": 1.969498869121106e-05,
      "loss": 2.3857,
      "step": 6514
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0865799188613892,
      "learning_rate": 1.9694887768049715e-05,
      "loss": 2.6598,
      "step": 6515
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9640737771987915,
      "learning_rate": 1.9694786828452895e-05,
      "loss": 2.6207,
      "step": 6516
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9496186971664429,
      "learning_rate": 1.9694685872420783e-05,
      "loss": 2.4259,
      "step": 6517
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9748578667640686,
      "learning_rate": 1.9694584899953542e-05,
      "loss": 2.6426,
      "step": 6518
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0615030527114868,
      "learning_rate": 1.969448391105135e-05,
      "loss": 2.579,
      "step": 6519
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9679868221282959,
      "learning_rate": 1.969438290571437e-05,
      "loss": 2.7023,
      "step": 6520
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.1537266969680786,
      "learning_rate": 1.9694281883942784e-05,
      "loss": 2.6086,
      "step": 6521
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.925557553768158,
      "learning_rate": 1.9694180845736754e-05,
      "loss": 2.6054,
      "step": 6522
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9352023601531982,
      "learning_rate": 1.9694079791096454e-05,
      "loss": 2.6666,
      "step": 6523
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0032780170440674,
      "learning_rate": 1.969397872002206e-05,
      "loss": 2.7043,
      "step": 6524
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.8930495977401733,
      "learning_rate": 1.9693877632513737e-05,
      "loss": 2.597,
      "step": 6525
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9755659103393555,
      "learning_rate": 1.9693776528571655e-05,
      "loss": 2.7188,
      "step": 6526
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9338887929916382,
      "learning_rate": 1.9693675408195995e-05,
      "loss": 2.6791,
      "step": 6527
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9667413234710693,
      "learning_rate": 1.9693574271386916e-05,
      "loss": 2.689,
      "step": 6528
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9789403676986694,
      "learning_rate": 1.9693473118144603e-05,
      "loss": 2.7527,
      "step": 6529
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0127546787261963,
      "learning_rate": 1.9693371948469213e-05,
      "loss": 2.4469,
      "step": 6530
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.15132737159729,
      "learning_rate": 1.969327076236093e-05,
      "loss": 2.613,
      "step": 6531
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0529803037643433,
      "learning_rate": 1.969316955981992e-05,
      "loss": 2.4963,
      "step": 6532
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9719037413597107,
      "learning_rate": 1.969306834084635e-05,
      "loss": 2.8252,
      "step": 6533
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0147072076797485,
      "learning_rate": 1.9692967105440402e-05,
      "loss": 2.6248,
      "step": 6534
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9855034947395325,
      "learning_rate": 1.969286585360224e-05,
      "loss": 2.5735,
      "step": 6535
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0565893650054932,
      "learning_rate": 1.969276458533204e-05,
      "loss": 2.6489,
      "step": 6536
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9789797067642212,
      "learning_rate": 1.9692663300629968e-05,
      "loss": 2.5001,
      "step": 6537
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.6179614067077637,
      "learning_rate": 1.9692561999496203e-05,
      "loss": 2.7421,
      "step": 6538
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.1181387901306152,
      "learning_rate": 1.969246068193091e-05,
      "loss": 2.7665,
      "step": 6539
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9610182046890259,
      "learning_rate": 1.9692359347934266e-05,
      "loss": 2.7192,
      "step": 6540
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.8851690292358398,
      "learning_rate": 1.969225799750644e-05,
      "loss": 2.7647,
      "step": 6541
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9521141648292542,
      "learning_rate": 1.9692156630647604e-05,
      "loss": 2.6517,
      "step": 6542
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9608968496322632,
      "learning_rate": 1.9692055247357928e-05,
      "loss": 2.6581,
      "step": 6543
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9645318984985352,
      "learning_rate": 1.969195384763759e-05,
      "loss": 2.5138,
      "step": 6544
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9937524199485779,
      "learning_rate": 1.9691852431486754e-05,
      "loss": 2.7296,
      "step": 6545
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9725612998008728,
      "learning_rate": 1.9691750998905596e-05,
      "loss": 2.6617,
      "step": 6546
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9489050507545471,
      "learning_rate": 1.969164954989429e-05,
      "loss": 2.6107,
      "step": 6547
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9987325668334961,
      "learning_rate": 1.9691548084453004e-05,
      "loss": 2.74,
      "step": 6548
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0772924423217773,
      "learning_rate": 1.9691446602581914e-05,
      "loss": 2.6222,
      "step": 6549
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0480873584747314,
      "learning_rate": 1.969134510428119e-05,
      "loss": 2.7755,
      "step": 6550
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0373735427856445,
      "learning_rate": 1.9691243589551e-05,
      "loss": 2.5672,
      "step": 6551
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0687743425369263,
      "learning_rate": 1.9691142058391524e-05,
      "loss": 2.626,
      "step": 6552
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0876725912094116,
      "learning_rate": 1.969104051080293e-05,
      "loss": 2.8862,
      "step": 6553
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9472460746765137,
      "learning_rate": 1.969093894678539e-05,
      "loss": 2.6804,
      "step": 6554
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.6626849174499512,
      "learning_rate": 1.9690837366339072e-05,
      "loss": 2.731,
      "step": 6555
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9727734923362732,
      "learning_rate": 1.9690735769464157e-05,
      "loss": 2.6781,
      "step": 6556
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.14547860622406,
      "learning_rate": 1.9690634156160814e-05,
      "loss": 2.4866,
      "step": 6557
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0818393230438232,
      "learning_rate": 1.9690532526429213e-05,
      "loss": 2.7726,
      "step": 6558
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.1114100217819214,
      "learning_rate": 1.9690430880269526e-05,
      "loss": 2.6763,
      "step": 6559
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9462980628013611,
      "learning_rate": 1.9690329217681927e-05,
      "loss": 2.4877,
      "step": 6560
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.748724341392517,
      "learning_rate": 1.969022753866659e-05,
      "loss": 2.7735,
      "step": 6561
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.2521122694015503,
      "learning_rate": 1.9690125843223682e-05,
      "loss": 2.5065,
      "step": 6562
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.014901041984558,
      "learning_rate": 1.9690024131353382e-05,
      "loss": 2.8371,
      "step": 6563
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9952060580253601,
      "learning_rate": 1.968992240305586e-05,
      "loss": 2.7114,
      "step": 6564
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9745831489562988,
      "learning_rate": 1.9689820658331283e-05,
      "loss": 2.6229,
      "step": 6565
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.045127511024475,
      "learning_rate": 1.9689718897179834e-05,
      "loss": 2.5265,
      "step": 6566
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.1809380054473877,
      "learning_rate": 1.9689617119601677e-05,
      "loss": 2.7308,
      "step": 6567
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0047813653945923,
      "learning_rate": 1.9689515325596988e-05,
      "loss": 2.6646,
      "step": 6568
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.13541841506958,
      "learning_rate": 1.9689413515165937e-05,
      "loss": 2.6451,
      "step": 6569
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0857784748077393,
      "learning_rate": 1.96893116883087e-05,
      "loss": 2.5726,
      "step": 6570
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.1303478479385376,
      "learning_rate": 1.9689209845025448e-05,
      "loss": 2.6564,
      "step": 6571
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9796340465545654,
      "learning_rate": 1.9689107985316355e-05,
      "loss": 2.67,
      "step": 6572
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.1266682147979736,
      "learning_rate": 1.968900610918159e-05,
      "loss": 2.8146,
      "step": 6573
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9670664072036743,
      "learning_rate": 1.968890421662133e-05,
      "loss": 2.6665,
      "step": 6574
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.001393437385559,
      "learning_rate": 1.9688802307635745e-05,
      "loss": 2.6687,
      "step": 6575
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0619064569473267,
      "learning_rate": 1.9688700382225007e-05,
      "loss": 2.7703,
      "step": 6576
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9860274791717529,
      "learning_rate": 1.9688598440389293e-05,
      "loss": 2.8106,
      "step": 6577
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.939640998840332,
      "learning_rate": 1.9688496482128774e-05,
      "loss": 2.4951,
      "step": 6578
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0778425931930542,
      "learning_rate": 1.968839450744362e-05,
      "loss": 2.7072,
      "step": 6579
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.1443846225738525,
      "learning_rate": 1.9688292516334006e-05,
      "loss": 2.59,
      "step": 6580
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.000913381576538,
      "learning_rate": 1.9688190508800106e-05,
      "loss": 2.6592,
      "step": 6581
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9315844774246216,
      "learning_rate": 1.9688088484842092e-05,
      "loss": 2.6663,
      "step": 6582
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0418757200241089,
      "learning_rate": 1.9687986444460136e-05,
      "loss": 2.7943,
      "step": 6583
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9333417415618896,
      "learning_rate": 1.968788438765441e-05,
      "loss": 2.6036,
      "step": 6584
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0004749298095703,
      "learning_rate": 1.9687782314425093e-05,
      "loss": 2.5201,
      "step": 6585
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0576245784759521,
      "learning_rate": 1.9687680224772352e-05,
      "loss": 2.8009,
      "step": 6586
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0391433238983154,
      "learning_rate": 1.968757811869636e-05,
      "loss": 2.8326,
      "step": 6587
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.072402000427246,
      "learning_rate": 1.968747599619729e-05,
      "loss": 2.5075,
      "step": 6588
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0164754390716553,
      "learning_rate": 1.9687373857275324e-05,
      "loss": 2.5074,
      "step": 6589
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0219993591308594,
      "learning_rate": 1.9687271701930624e-05,
      "loss": 2.606,
      "step": 6590
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0128954648971558,
      "learning_rate": 1.968716953016337e-05,
      "loss": 2.8694,
      "step": 6591
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0055017471313477,
      "learning_rate": 1.968706734197373e-05,
      "loss": 2.6125,
      "step": 6592
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.002204179763794,
      "learning_rate": 1.968696513736188e-05,
      "loss": 2.4256,
      "step": 6593
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9187242984771729,
      "learning_rate": 1.9686862916327993e-05,
      "loss": 2.6199,
      "step": 6594
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9675179719924927,
      "learning_rate": 1.968676067887224e-05,
      "loss": 2.739,
      "step": 6595
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9983639717102051,
      "learning_rate": 1.96866584249948e-05,
      "loss": 2.7674,
      "step": 6596
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9446819424629211,
      "learning_rate": 1.9686556154695845e-05,
      "loss": 2.7607,
      "step": 6597
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9381859302520752,
      "learning_rate": 1.9686453867975546e-05,
      "loss": 2.7073,
      "step": 6598
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9281131625175476,
      "learning_rate": 1.9686351564834076e-05,
      "loss": 2.4698,
      "step": 6599
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0380905866622925,
      "learning_rate": 1.968624924527161e-05,
      "loss": 2.7428,
      "step": 6600
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.1406304836273193,
      "learning_rate": 1.9686146909288317e-05,
      "loss": 2.7194,
      "step": 6601
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9879899621009827,
      "learning_rate": 1.9686044556884377e-05,
      "loss": 2.674,
      "step": 6602
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0053327083587646,
      "learning_rate": 1.968594218805996e-05,
      "loss": 2.5252,
      "step": 6603
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.943615198135376,
      "learning_rate": 1.9685839802815243e-05,
      "loss": 2.7177,
      "step": 6604
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0226942300796509,
      "learning_rate": 1.9685737401150394e-05,
      "loss": 2.6588,
      "step": 6605
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0439330339431763,
      "learning_rate": 1.9685634983065588e-05,
      "loss": 2.5072,
      "step": 6606
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0367426872253418,
      "learning_rate": 1.9685532548561004e-05,
      "loss": 2.5083,
      "step": 6607
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0574901103973389,
      "learning_rate": 1.9685430097636812e-05,
      "loss": 2.7649,
      "step": 6608
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0978654623031616,
      "learning_rate": 1.9685327630293184e-05,
      "loss": 2.5868,
      "step": 6609
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0077050924301147,
      "learning_rate": 1.9685225146530294e-05,
      "loss": 2.4441,
      "step": 6610
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0387897491455078,
      "learning_rate": 1.9685122646348317e-05,
      "loss": 2.5327,
      "step": 6611
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.1154723167419434,
      "learning_rate": 1.9685020129747426e-05,
      "loss": 2.6004,
      "step": 6612
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9341617226600647,
      "learning_rate": 1.9684917596727798e-05,
      "loss": 2.57,
      "step": 6613
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0358760356903076,
      "learning_rate": 1.96848150472896e-05,
      "loss": 2.8491,
      "step": 6614
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9603898525238037,
      "learning_rate": 1.9684712481433016e-05,
      "loss": 2.5165,
      "step": 6615
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0922566652297974,
      "learning_rate": 1.9684609899158212e-05,
      "loss": 2.7479,
      "step": 6616
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.8749831914901733,
      "learning_rate": 1.968450730046536e-05,
      "loss": 2.7255,
      "step": 6617
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.948517918586731,
      "learning_rate": 1.9684404685354644e-05,
      "loss": 2.7364,
      "step": 6618
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0427664518356323,
      "learning_rate": 1.968430205382623e-05,
      "loss": 2.6523,
      "step": 6619
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0185706615447998,
      "learning_rate": 1.9684199405880293e-05,
      "loss": 2.6455,
      "step": 6620
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0176714658737183,
      "learning_rate": 1.968409674151701e-05,
      "loss": 2.5659,
      "step": 6621
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0789260864257812,
      "learning_rate": 1.9683994060736545e-05,
      "loss": 2.6818,
      "step": 6622
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.2023489475250244,
      "learning_rate": 1.9683891363539085e-05,
      "loss": 2.2989,
      "step": 6623
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.8857354521751404,
      "learning_rate": 1.9683788649924803e-05,
      "loss": 2.5921,
      "step": 6624
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.2353116273880005,
      "learning_rate": 1.9683685919893866e-05,
      "loss": 2.7046,
      "step": 6625
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.940261960029602,
      "learning_rate": 1.9683583173446448e-05,
      "loss": 2.7736,
      "step": 6626
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0008835792541504,
      "learning_rate": 1.9683480410582732e-05,
      "loss": 2.738,
      "step": 6627
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9667580723762512,
      "learning_rate": 1.9683377631302884e-05,
      "loss": 2.5358,
      "step": 6628
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9774143099784851,
      "learning_rate": 1.968327483560708e-05,
      "loss": 2.7192,
      "step": 6629
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.936389148235321,
      "learning_rate": 1.9683172023495496e-05,
      "loss": 2.6915,
      "step": 6630
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0115582942962646,
      "learning_rate": 1.9683069194968306e-05,
      "loss": 2.613,
      "step": 6631
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.8786092400550842,
      "learning_rate": 1.9682966350025684e-05,
      "loss": 2.5684,
      "step": 6632
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.128036618232727,
      "learning_rate": 1.96828634886678e-05,
      "loss": 2.6546,
      "step": 6633
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9983900189399719,
      "learning_rate": 1.9682760610894837e-05,
      "loss": 2.7851,
      "step": 6634
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9918808341026306,
      "learning_rate": 1.9682657716706967e-05,
      "loss": 2.6063,
      "step": 6635
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.1050512790679932,
      "learning_rate": 1.9682554806104357e-05,
      "loss": 2.8128,
      "step": 6636
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0580366849899292,
      "learning_rate": 1.968245187908719e-05,
      "loss": 2.4666,
      "step": 6637
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0959904193878174,
      "learning_rate": 1.9682348935655637e-05,
      "loss": 2.9451,
      "step": 6638
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9944653511047363,
      "learning_rate": 1.9682245975809873e-05,
      "loss": 2.7162,
      "step": 6639
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9506570100784302,
      "learning_rate": 1.9682142999550073e-05,
      "loss": 2.7044,
      "step": 6640
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9120942950248718,
      "learning_rate": 1.968204000687641e-05,
      "loss": 2.4872,
      "step": 6641
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0126770734786987,
      "learning_rate": 1.968193699778906e-05,
      "loss": 2.6359,
      "step": 6642
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.8688876032829285,
      "learning_rate": 1.9681833972288197e-05,
      "loss": 2.5543,
      "step": 6643
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.2536640167236328,
      "learning_rate": 1.9681730930373993e-05,
      "loss": 2.6426,
      "step": 6644
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9244018793106079,
      "learning_rate": 1.968162787204663e-05,
      "loss": 2.6723,
      "step": 6645
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0059483051300049,
      "learning_rate": 1.9681524797306276e-05,
      "loss": 2.3249,
      "step": 6646
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.1755449771881104,
      "learning_rate": 1.9681421706153108e-05,
      "loss": 2.6386,
      "step": 6647
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9268753528594971,
      "learning_rate": 1.96813185985873e-05,
      "loss": 2.6922,
      "step": 6648
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.1191128492355347,
      "learning_rate": 1.968121547460903e-05,
      "loss": 2.7902,
      "step": 6649
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9854731559753418,
      "learning_rate": 1.968111233421847e-05,
      "loss": 2.552,
      "step": 6650
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0539299249649048,
      "learning_rate": 1.9681009177415796e-05,
      "loss": 2.7195,
      "step": 6651
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9883242249488831,
      "learning_rate": 1.9680906004201178e-05,
      "loss": 2.5903,
      "step": 6652
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9882740378379822,
      "learning_rate": 1.96808028145748e-05,
      "loss": 2.7216,
      "step": 6653
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.043848991394043,
      "learning_rate": 1.9680699608536828e-05,
      "loss": 2.4938,
      "step": 6654
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.1449735164642334,
      "learning_rate": 1.968059638608744e-05,
      "loss": 2.6601,
      "step": 6655
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9787124395370483,
      "learning_rate": 1.9680493147226815e-05,
      "loss": 2.5814,
      "step": 6656
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.196425199508667,
      "learning_rate": 1.9680389891955128e-05,
      "loss": 2.6881,
      "step": 6657
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0461732149124146,
      "learning_rate": 1.9680286620272548e-05,
      "loss": 2.8587,
      "step": 6658
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9771456718444824,
      "learning_rate": 1.968018333217925e-05,
      "loss": 2.8147,
      "step": 6659
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9877874851226807,
      "learning_rate": 1.9680080027675416e-05,
      "loss": 2.385,
      "step": 6660
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9432540535926819,
      "learning_rate": 1.9679976706761214e-05,
      "loss": 2.7614,
      "step": 6661
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.139464259147644,
      "learning_rate": 1.9679873369436822e-05,
      "loss": 2.5461,
      "step": 6662
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.1004818677902222,
      "learning_rate": 1.967977001570242e-05,
      "loss": 2.7047,
      "step": 6663
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9789004325866699,
      "learning_rate": 1.9679666645558176e-05,
      "loss": 2.6932,
      "step": 6664
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0840450525283813,
      "learning_rate": 1.967956325900427e-05,
      "loss": 2.5858,
      "step": 6665
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.05278480052948,
      "learning_rate": 1.9679459856040873e-05,
      "loss": 2.7426,
      "step": 6666
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.8961800336837769,
      "learning_rate": 1.9679356436668164e-05,
      "loss": 2.3569,
      "step": 6667
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0640372037887573,
      "learning_rate": 1.9679253000886316e-05,
      "loss": 2.8755,
      "step": 6668
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9784168601036072,
      "learning_rate": 1.9679149548695505e-05,
      "loss": 2.6858,
      "step": 6669
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.026346206665039,
      "learning_rate": 1.967904608009591e-05,
      "loss": 2.7054,
      "step": 6670
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.066583275794983,
      "learning_rate": 1.96789425950877e-05,
      "loss": 2.5086,
      "step": 6671
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0619574785232544,
      "learning_rate": 1.9678839093671055e-05,
      "loss": 2.7061,
      "step": 6672
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.1604713201522827,
      "learning_rate": 1.9678735575846148e-05,
      "loss": 2.7525,
      "step": 6673
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9706156253814697,
      "learning_rate": 1.9678632041613156e-05,
      "loss": 2.5194,
      "step": 6674
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0226738452911377,
      "learning_rate": 1.9678528490972254e-05,
      "loss": 2.6963,
      "step": 6675
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9621396660804749,
      "learning_rate": 1.9678424923923617e-05,
      "loss": 2.5453,
      "step": 6676
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0200401544570923,
      "learning_rate": 1.9678321340467422e-05,
      "loss": 2.6637,
      "step": 6677
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.050444483757019,
      "learning_rate": 1.967821774060384e-05,
      "loss": 2.4128,
      "step": 6678
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9114784002304077,
      "learning_rate": 1.9678114124333058e-05,
      "loss": 2.4909,
      "step": 6679
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9471170902252197,
      "learning_rate": 1.967801049165524e-05,
      "loss": 2.7916,
      "step": 6680
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9525700807571411,
      "learning_rate": 1.9677906842570564e-05,
      "loss": 2.5849,
      "step": 6681
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.1603589057922363,
      "learning_rate": 1.967780317707921e-05,
      "loss": 2.5388,
      "step": 6682
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9957748651504517,
      "learning_rate": 1.967769949518135e-05,
      "loss": 2.6588,
      "step": 6683
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0535272359848022,
      "learning_rate": 1.9677595796877158e-05,
      "loss": 2.7291,
      "step": 6684
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9818023443222046,
      "learning_rate": 1.9677492082166818e-05,
      "loss": 2.7797,
      "step": 6685
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9068862795829773,
      "learning_rate": 1.9677388351050497e-05,
      "loss": 2.7084,
      "step": 6686
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.8966214656829834,
      "learning_rate": 1.9677284603528375e-05,
      "loss": 2.5591,
      "step": 6687
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0093432664871216,
      "learning_rate": 1.9677180839600627e-05,
      "loss": 2.6236,
      "step": 6688
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0297294855117798,
      "learning_rate": 1.967707705926743e-05,
      "loss": 2.8039,
      "step": 6689
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.2433040142059326,
      "learning_rate": 1.967697326252896e-05,
      "loss": 2.6213,
      "step": 6690
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9372017979621887,
      "learning_rate": 1.967686944938539e-05,
      "loss": 2.798,
      "step": 6691
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9145030379295349,
      "learning_rate": 1.96767656198369e-05,
      "loss": 2.5623,
      "step": 6692
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0267622470855713,
      "learning_rate": 1.9676661773883664e-05,
      "loss": 2.6491,
      "step": 6693
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0431216955184937,
      "learning_rate": 1.9676557911525856e-05,
      "loss": 2.508,
      "step": 6694
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0709877014160156,
      "learning_rate": 1.967645403276366e-05,
      "loss": 2.5689,
      "step": 6695
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0156373977661133,
      "learning_rate": 1.967635013759724e-05,
      "loss": 2.7794,
      "step": 6696
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.04246187210083,
      "learning_rate": 1.967624622602678e-05,
      "loss": 2.5988,
      "step": 6697
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9440767765045166,
      "learning_rate": 1.9676142298052454e-05,
      "loss": 2.7194,
      "step": 6698
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9927741289138794,
      "learning_rate": 1.9676038353674443e-05,
      "loss": 2.7638,
      "step": 6699
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9544238448143005,
      "learning_rate": 1.9675934392892914e-05,
      "loss": 2.774,
      "step": 6700
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0922009944915771,
      "learning_rate": 1.967583041570805e-05,
      "loss": 2.7055,
      "step": 6701
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0129122734069824,
      "learning_rate": 1.9675726422120024e-05,
      "loss": 2.7017,
      "step": 6702
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0473358631134033,
      "learning_rate": 1.967562241212902e-05,
      "loss": 2.5641,
      "step": 6703
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.142825722694397,
      "learning_rate": 1.96755183857352e-05,
      "loss": 2.5517,
      "step": 6704
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0096737146377563,
      "learning_rate": 1.9675414342938754e-05,
      "loss": 2.4573,
      "step": 6705
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0536314249038696,
      "learning_rate": 1.9675310283739854e-05,
      "loss": 2.3604,
      "step": 6706
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.967923104763031,
      "learning_rate": 1.967520620813867e-05,
      "loss": 2.4418,
      "step": 6707
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9220336675643921,
      "learning_rate": 1.9675102116135386e-05,
      "loss": 2.4541,
      "step": 6708
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.041562557220459,
      "learning_rate": 1.9674998007730173e-05,
      "loss": 2.5689,
      "step": 6709
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0901094675064087,
      "learning_rate": 1.9674893882923215e-05,
      "loss": 2.8384,
      "step": 6710
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0680263042449951,
      "learning_rate": 1.9674789741714683e-05,
      "loss": 2.5906,
      "step": 6711
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9827126264572144,
      "learning_rate": 1.9674685584104754e-05,
      "loss": 2.6751,
      "step": 6712
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.038462519645691,
      "learning_rate": 1.9674581410093608e-05,
      "loss": 2.4709,
      "step": 6713
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.939147412776947,
      "learning_rate": 1.9674477219681417e-05,
      "loss": 2.5154,
      "step": 6714
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9961629509925842,
      "learning_rate": 1.967437301286836e-05,
      "loss": 2.7876,
      "step": 6715
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.05203115940094,
      "learning_rate": 1.9674268789654615e-05,
      "loss": 2.5872,
      "step": 6716
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9837511777877808,
      "learning_rate": 1.9674164550040357e-05,
      "loss": 2.714,
      "step": 6717
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9756331443786621,
      "learning_rate": 1.9674060294025755e-05,
      "loss": 2.6358,
      "step": 6718
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.977575957775116,
      "learning_rate": 1.9673956021611e-05,
      "loss": 2.637,
      "step": 6719
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0850168466567993,
      "learning_rate": 1.967385173279626e-05,
      "loss": 2.7632,
      "step": 6720
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9699726104736328,
      "learning_rate": 1.9673747427581716e-05,
      "loss": 2.479,
      "step": 6721
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0830214023590088,
      "learning_rate": 1.9673643105967544e-05,
      "loss": 2.8024,
      "step": 6722
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0851550102233887,
      "learning_rate": 1.9673538767953915e-05,
      "loss": 2.6466,
      "step": 6723
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9080936312675476,
      "learning_rate": 1.9673434413541014e-05,
      "loss": 2.561,
      "step": 6724
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.017120361328125,
      "learning_rate": 1.9673330042729013e-05,
      "loss": 2.6517,
      "step": 6725
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9935858249664307,
      "learning_rate": 1.967322565551809e-05,
      "loss": 2.601,
      "step": 6726
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9878154397010803,
      "learning_rate": 1.9673121251908426e-05,
      "loss": 2.6752,
      "step": 6727
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0526245832443237,
      "learning_rate": 1.967301683190019e-05,
      "loss": 2.8307,
      "step": 6728
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9725987911224365,
      "learning_rate": 1.9672912395493563e-05,
      "loss": 2.7959,
      "step": 6729
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.059175729751587,
      "learning_rate": 1.9672807942688722e-05,
      "loss": 2.6698,
      "step": 6730
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0276800394058228,
      "learning_rate": 1.9672703473485848e-05,
      "loss": 2.5666,
      "step": 6731
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9715832471847534,
      "learning_rate": 1.967259898788511e-05,
      "loss": 2.6601,
      "step": 6732
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0319856405258179,
      "learning_rate": 1.9672494485886696e-05,
      "loss": 2.513,
      "step": 6733
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0207878351211548,
      "learning_rate": 1.967238996749077e-05,
      "loss": 2.6077,
      "step": 6734
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0218963623046875,
      "learning_rate": 1.967228543269752e-05,
      "loss": 3.011,
      "step": 6735
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.061646580696106,
      "learning_rate": 1.967218088150712e-05,
      "loss": 2.6602,
      "step": 6736
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0006030797958374,
      "learning_rate": 1.9672076313919747e-05,
      "loss": 2.7205,
      "step": 6737
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0465571880340576,
      "learning_rate": 1.9671971729935573e-05,
      "loss": 2.5631,
      "step": 6738
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0598657131195068,
      "learning_rate": 1.9671867129554784e-05,
      "loss": 2.754,
      "step": 6739
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9930582046508789,
      "learning_rate": 1.9671762512777554e-05,
      "loss": 2.8465,
      "step": 6740
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9155705571174622,
      "learning_rate": 1.9671657879604057e-05,
      "loss": 2.6282,
      "step": 6741
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0411293506622314,
      "learning_rate": 1.967155323003447e-05,
      "loss": 2.7858,
      "step": 6742
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0443542003631592,
      "learning_rate": 1.967144856406898e-05,
      "loss": 2.5503,
      "step": 6743
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0788295269012451,
      "learning_rate": 1.9671343881707756e-05,
      "loss": 2.816,
      "step": 6744
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.8858140110969543,
      "learning_rate": 1.9671239182950977e-05,
      "loss": 2.32,
      "step": 6745
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9347570538520813,
      "learning_rate": 1.967113446779882e-05,
      "loss": 2.7309,
      "step": 6746
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.027930498123169,
      "learning_rate": 1.9671029736251464e-05,
      "loss": 2.6851,
      "step": 6747
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9309865236282349,
      "learning_rate": 1.967092498830909e-05,
      "loss": 2.6161,
      "step": 6748
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9326075911521912,
      "learning_rate": 1.9670820223971865e-05,
      "loss": 2.6809,
      "step": 6749
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9865699410438538,
      "learning_rate": 1.9670715443239977e-05,
      "loss": 2.7983,
      "step": 6750
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9718540906906128,
      "learning_rate": 1.9670610646113596e-05,
      "loss": 2.7917,
      "step": 6751
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9309812188148499,
      "learning_rate": 1.9670505832592907e-05,
      "loss": 2.5913,
      "step": 6752
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0920473337173462,
      "learning_rate": 1.9670401002678084e-05,
      "loss": 2.7749,
      "step": 6753
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.1571060419082642,
      "learning_rate": 1.9670296156369305e-05,
      "loss": 2.5777,
      "step": 6754
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.037937879562378,
      "learning_rate": 1.9670191293666745e-05,
      "loss": 2.6231,
      "step": 6755
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.140974521636963,
      "learning_rate": 1.9670086414570588e-05,
      "loss": 2.783,
      "step": 6756
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9843239784240723,
      "learning_rate": 1.966998151908101e-05,
      "loss": 2.7916,
      "step": 6757
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9499247074127197,
      "learning_rate": 1.966987660719818e-05,
      "loss": 2.6432,
      "step": 6758
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.1065479516983032,
      "learning_rate": 1.9669771678922288e-05,
      "loss": 2.6247,
      "step": 6759
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9790713787078857,
      "learning_rate": 1.96696667342535e-05,
      "loss": 2.6683,
      "step": 6760
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9967543482780457,
      "learning_rate": 1.966956177319201e-05,
      "loss": 2.7362,
      "step": 6761
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0690512657165527,
      "learning_rate": 1.966945679573798e-05,
      "loss": 2.4587,
      "step": 6762
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0410411357879639,
      "learning_rate": 1.9669351801891598e-05,
      "loss": 2.6021,
      "step": 6763
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.917977511882782,
      "learning_rate": 1.9669246791653036e-05,
      "loss": 2.7446,
      "step": 6764
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9369667172431946,
      "learning_rate": 1.966914176502248e-05,
      "loss": 2.5616,
      "step": 6765
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0209180116653442,
      "learning_rate": 1.9669036722000096e-05,
      "loss": 2.7411,
      "step": 6766
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.1046143770217896,
      "learning_rate": 1.9668931662586067e-05,
      "loss": 2.7017,
      "step": 6767
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0216809511184692,
      "learning_rate": 1.9668826586780578e-05,
      "loss": 2.6955,
      "step": 6768
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9322608709335327,
      "learning_rate": 1.96687214945838e-05,
      "loss": 2.47,
      "step": 6769
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.116113543510437,
      "learning_rate": 1.9668616385995916e-05,
      "loss": 2.6831,
      "step": 6770
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9550116062164307,
      "learning_rate": 1.96685112610171e-05,
      "loss": 2.5221,
      "step": 6771
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9926509857177734,
      "learning_rate": 1.966840611964753e-05,
      "loss": 2.7613,
      "step": 6772
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9484850168228149,
      "learning_rate": 1.9668300961887385e-05,
      "loss": 2.7976,
      "step": 6773
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0237634181976318,
      "learning_rate": 1.9668195787736845e-05,
      "loss": 2.6755,
      "step": 6774
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0704431533813477,
      "learning_rate": 1.966809059719609e-05,
      "loss": 2.5677,
      "step": 6775
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0447685718536377,
      "learning_rate": 1.9667985390265292e-05,
      "loss": 2.5793,
      "step": 6776
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.1153534650802612,
      "learning_rate": 1.9667880166944634e-05,
      "loss": 2.7633,
      "step": 6777
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.3576602935791016,
      "learning_rate": 1.9667774927234293e-05,
      "loss": 2.8282,
      "step": 6778
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0161501169204712,
      "learning_rate": 1.966766967113445e-05,
      "loss": 2.614,
      "step": 6779
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0270198583602905,
      "learning_rate": 1.966756439864528e-05,
      "loss": 2.5262,
      "step": 6780
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9509328603744507,
      "learning_rate": 1.966745910976696e-05,
      "loss": 2.5268,
      "step": 6781
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0039000511169434,
      "learning_rate": 1.9667353804499675e-05,
      "loss": 2.6517,
      "step": 6782
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0409002304077148,
      "learning_rate": 1.9667248482843595e-05,
      "loss": 2.8394,
      "step": 6783
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.98499995470047,
      "learning_rate": 1.9667143144798908e-05,
      "loss": 2.6601,
      "step": 6784
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9873984456062317,
      "learning_rate": 1.9667037790365787e-05,
      "loss": 2.7226,
      "step": 6785
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0567946434020996,
      "learning_rate": 1.9666932419544408e-05,
      "loss": 2.6417,
      "step": 6786
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9532361030578613,
      "learning_rate": 1.9666827032334958e-05,
      "loss": 2.7147,
      "step": 6787
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0367783308029175,
      "learning_rate": 1.966672162873761e-05,
      "loss": 2.6945,
      "step": 6788
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0404350757598877,
      "learning_rate": 1.9666616208752536e-05,
      "loss": 2.4446,
      "step": 6789
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0106937885284424,
      "learning_rate": 1.966651077237993e-05,
      "loss": 2.5202,
      "step": 6790
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.020710825920105,
      "learning_rate": 1.966640531961996e-05,
      "loss": 2.5676,
      "step": 6791
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0414583683013916,
      "learning_rate": 1.966629985047281e-05,
      "loss": 2.6052,
      "step": 6792
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0263011455535889,
      "learning_rate": 1.9666194364938655e-05,
      "loss": 2.6614,
      "step": 6793
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9584606289863586,
      "learning_rate": 1.9666088863017674e-05,
      "loss": 2.5596,
      "step": 6794
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0302886962890625,
      "learning_rate": 1.9665983344710047e-05,
      "loss": 2.6426,
      "step": 6795
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0773180723190308,
      "learning_rate": 1.9665877810015955e-05,
      "loss": 2.6094,
      "step": 6796
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.075087547302246,
      "learning_rate": 1.9665772258935574e-05,
      "loss": 2.753,
      "step": 6797
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9898967146873474,
      "learning_rate": 1.9665666691469083e-05,
      "loss": 2.874,
      "step": 6798
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.409207820892334,
      "learning_rate": 1.966556110761666e-05,
      "loss": 2.508,
      "step": 6799
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0025752782821655,
      "learning_rate": 1.966545550737849e-05,
      "loss": 2.7987,
      "step": 6800
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9030567407608032,
      "learning_rate": 1.966534989075475e-05,
      "loss": 2.6346,
      "step": 6801
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9953054189682007,
      "learning_rate": 1.9665244257745607e-05,
      "loss": 2.7289,
      "step": 6802
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0141748189926147,
      "learning_rate": 1.966513860835126e-05,
      "loss": 2.5089,
      "step": 6803
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9752747416496277,
      "learning_rate": 1.966503294257187e-05,
      "loss": 2.7463,
      "step": 6804
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0362032651901245,
      "learning_rate": 1.9664927260407628e-05,
      "loss": 2.6149,
      "step": 6805
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0071858167648315,
      "learning_rate": 1.9664821561858712e-05,
      "loss": 2.6464,
      "step": 6806
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9238053560256958,
      "learning_rate": 1.9664715846925295e-05,
      "loss": 2.5381,
      "step": 6807
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9718489646911621,
      "learning_rate": 1.966461011560756e-05,
      "loss": 2.4772,
      "step": 6808
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.2275793552398682,
      "learning_rate": 1.9664504367905686e-05,
      "loss": 2.6589,
      "step": 6809
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0222595930099487,
      "learning_rate": 1.9664398603819856e-05,
      "loss": 2.5867,
      "step": 6810
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9632840156555176,
      "learning_rate": 1.966429282335024e-05,
      "loss": 2.5203,
      "step": 6811
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0382615327835083,
      "learning_rate": 1.9664187026497025e-05,
      "loss": 2.5731,
      "step": 6812
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.8941071033477783,
      "learning_rate": 1.966408121326039e-05,
      "loss": 2.6121,
      "step": 6813
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.2650256156921387,
      "learning_rate": 1.9663975383640512e-05,
      "loss": 2.8826,
      "step": 6814
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9503604173660278,
      "learning_rate": 1.966386953763757e-05,
      "loss": 2.7022,
      "step": 6815
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0318806171417236,
      "learning_rate": 1.9663763675251743e-05,
      "loss": 2.736,
      "step": 6816
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.1236188411712646,
      "learning_rate": 1.9663657796483215e-05,
      "loss": 2.7421,
      "step": 6817
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0536094903945923,
      "learning_rate": 1.9663551901332162e-05,
      "loss": 2.74,
      "step": 6818
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0872561931610107,
      "learning_rate": 1.9663445989798763e-05,
      "loss": 2.863,
      "step": 6819
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9953265190124512,
      "learning_rate": 1.96633400618832e-05,
      "loss": 2.5942,
      "step": 6820
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0624030828475952,
      "learning_rate": 1.9663234117585647e-05,
      "loss": 2.5527,
      "step": 6821
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9886159896850586,
      "learning_rate": 1.966312815690629e-05,
      "loss": 2.6118,
      "step": 6822
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.176649570465088,
      "learning_rate": 1.9663022179845306e-05,
      "loss": 2.6585,
      "step": 6823
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.965588390827179,
      "learning_rate": 1.9662916186402876e-05,
      "loss": 2.5545,
      "step": 6824
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9973468780517578,
      "learning_rate": 1.9662810176579178e-05,
      "loss": 2.6324,
      "step": 6825
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.957396924495697,
      "learning_rate": 1.9662704150374394e-05,
      "loss": 2.747,
      "step": 6826
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9800969362258911,
      "learning_rate": 1.9662598107788698e-05,
      "loss": 2.9956,
      "step": 6827
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.1500012874603271,
      "learning_rate": 1.9662492048822276e-05,
      "loss": 2.7724,
      "step": 6828
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9942860007286072,
      "learning_rate": 1.9662385973475307e-05,
      "loss": 2.6626,
      "step": 6829
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9242096543312073,
      "learning_rate": 1.966227988174797e-05,
      "loss": 2.7272,
      "step": 6830
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9861516952514648,
      "learning_rate": 1.9662173773640445e-05,
      "loss": 2.5126,
      "step": 6831
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.121106743812561,
      "learning_rate": 1.9662067649152907e-05,
      "loss": 2.7021,
      "step": 6832
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9343417882919312,
      "learning_rate": 1.9661961508285545e-05,
      "loss": 2.7331,
      "step": 6833
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9888206124305725,
      "learning_rate": 1.966185535103853e-05,
      "loss": 2.5003,
      "step": 6834
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9925469756126404,
      "learning_rate": 1.9661749177412046e-05,
      "loss": 2.7348,
      "step": 6835
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.020347237586975,
      "learning_rate": 1.9661642987406276e-05,
      "loss": 2.8883,
      "step": 6836
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.2784394025802612,
      "learning_rate": 1.9661536781021396e-05,
      "loss": 2.6288,
      "step": 6837
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0965462923049927,
      "learning_rate": 1.9661430558257586e-05,
      "loss": 2.5801,
      "step": 6838
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0175209045410156,
      "learning_rate": 1.9661324319115028e-05,
      "loss": 2.572,
      "step": 6839
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.038392186164856,
      "learning_rate": 1.9661218063593902e-05,
      "loss": 2.522,
      "step": 6840
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.013870120048523,
      "learning_rate": 1.9661111791694388e-05,
      "loss": 2.6951,
      "step": 6841
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.1143884658813477,
      "learning_rate": 1.9661005503416662e-05,
      "loss": 2.6301,
      "step": 6842
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0582470893859863,
      "learning_rate": 1.9660899198760913e-05,
      "loss": 2.8904,
      "step": 6843
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.1103861331939697,
      "learning_rate": 1.9660792877727312e-05,
      "loss": 2.6646,
      "step": 6844
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.2477530241012573,
      "learning_rate": 1.9660686540316044e-05,
      "loss": 2.6382,
      "step": 6845
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9983086585998535,
      "learning_rate": 1.966058018652729e-05,
      "loss": 2.5504,
      "step": 6846
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9268068671226501,
      "learning_rate": 1.9660473816361226e-05,
      "loss": 2.6731,
      "step": 6847
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0097293853759766,
      "learning_rate": 1.966036742981804e-05,
      "loss": 2.3909,
      "step": 6848
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.2586328983306885,
      "learning_rate": 1.96602610268979e-05,
      "loss": 2.7258,
      "step": 6849
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.944679856300354,
      "learning_rate": 1.9660154607600998e-05,
      "loss": 2.632,
      "step": 6850
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0404165983200073,
      "learning_rate": 1.966004817192751e-05,
      "loss": 2.6032,
      "step": 6851
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.061619758605957,
      "learning_rate": 1.9659941719877618e-05,
      "loss": 2.5301,
      "step": 6852
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.965303897857666,
      "learning_rate": 1.9659835251451502e-05,
      "loss": 2.7253,
      "step": 6853
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9695076942443848,
      "learning_rate": 1.965972876664934e-05,
      "loss": 2.663,
      "step": 6854
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9802736043930054,
      "learning_rate": 1.965962226547131e-05,
      "loss": 2.8644,
      "step": 6855
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.1554548740386963,
      "learning_rate": 1.9659515747917602e-05,
      "loss": 2.5896,
      "step": 6856
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9965160489082336,
      "learning_rate": 1.965940921398839e-05,
      "loss": 2.6726,
      "step": 6857
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0068877935409546,
      "learning_rate": 1.965930266368386e-05,
      "loss": 2.6665,
      "step": 6858
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9866238832473755,
      "learning_rate": 1.9659196097004184e-05,
      "loss": 2.474,
      "step": 6859
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0855532884597778,
      "learning_rate": 1.9659089513949546e-05,
      "loss": 2.5933,
      "step": 6860
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0213583707809448,
      "learning_rate": 1.965898291452013e-05,
      "loss": 2.6201,
      "step": 6861
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.075579285621643,
      "learning_rate": 1.9658876298716113e-05,
      "loss": 2.4922,
      "step": 6862
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.1716206073760986,
      "learning_rate": 1.965876966653768e-05,
      "loss": 2.5078,
      "step": 6863
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0349478721618652,
      "learning_rate": 1.965866301798501e-05,
      "loss": 2.5654,
      "step": 6864
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0081020593643188,
      "learning_rate": 1.965855635305828e-05,
      "loss": 2.6115,
      "step": 6865
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9704532027244568,
      "learning_rate": 1.9658449671757672e-05,
      "loss": 2.5069,
      "step": 6866
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0026617050170898,
      "learning_rate": 1.9658342974083372e-05,
      "loss": 2.5997,
      "step": 6867
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.1574965715408325,
      "learning_rate": 1.9658236260035557e-05,
      "loss": 2.5116,
      "step": 6868
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.989354133605957,
      "learning_rate": 1.9658129529614408e-05,
      "loss": 2.5276,
      "step": 6869
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0029548406600952,
      "learning_rate": 1.965802278282011e-05,
      "loss": 2.6961,
      "step": 6870
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9433063864707947,
      "learning_rate": 1.9657916019652835e-05,
      "loss": 2.886,
      "step": 6871
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.8947268724441528,
      "learning_rate": 1.965780924011277e-05,
      "loss": 2.6537,
      "step": 6872
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0968486070632935,
      "learning_rate": 1.96577024442001e-05,
      "loss": 2.843,
      "step": 6873
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9128909707069397,
      "learning_rate": 1.9657595631914998e-05,
      "loss": 2.623,
      "step": 6874
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.2381514310836792,
      "learning_rate": 1.9657488803257644e-05,
      "loss": 2.5925,
      "step": 6875
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9508642554283142,
      "learning_rate": 1.965738195822823e-05,
      "loss": 2.7641,
      "step": 6876
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0061167478561401,
      "learning_rate": 1.9657275096826925e-05,
      "loss": 2.5009,
      "step": 6877
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0333493947982788,
      "learning_rate": 1.9657168219053923e-05,
      "loss": 2.7023,
      "step": 6878
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9676011204719543,
      "learning_rate": 1.9657061324909392e-05,
      "loss": 2.6192,
      "step": 6879
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.1448302268981934,
      "learning_rate": 1.965695441439352e-05,
      "loss": 2.5137,
      "step": 6880
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0607130527496338,
      "learning_rate": 1.9656847487506492e-05,
      "loss": 2.6376,
      "step": 6881
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.4426180124282837,
      "learning_rate": 1.965674054424848e-05,
      "loss": 2.5923,
      "step": 6882
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9596514105796814,
      "learning_rate": 1.965663358461967e-05,
      "loss": 2.6658,
      "step": 6883
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9662124514579773,
      "learning_rate": 1.965652660862025e-05,
      "loss": 2.5363,
      "step": 6884
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.019294023513794,
      "learning_rate": 1.9656419616250387e-05,
      "loss": 2.5866,
      "step": 6885
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0495229959487915,
      "learning_rate": 1.965631260751027e-05,
      "loss": 2.7203,
      "step": 6886
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.933094322681427,
      "learning_rate": 1.9656205582400085e-05,
      "loss": 2.5391,
      "step": 6887
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9960260987281799,
      "learning_rate": 1.965609854092001e-05,
      "loss": 2.6026,
      "step": 6888
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0918564796447754,
      "learning_rate": 1.965599148307022e-05,
      "loss": 2.7381,
      "step": 6889
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.12962806224823,
      "learning_rate": 1.9655884408850907e-05,
      "loss": 2.6129,
      "step": 6890
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0477310419082642,
      "learning_rate": 1.9655777318262245e-05,
      "loss": 2.7257,
      "step": 6891
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9271345734596252,
      "learning_rate": 1.9655670211304417e-05,
      "loss": 2.9586,
      "step": 6892
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9056363105773926,
      "learning_rate": 1.9655563087977606e-05,
      "loss": 2.4763,
      "step": 6893
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9372739791870117,
      "learning_rate": 1.9655455948281992e-05,
      "loss": 2.7897,
      "step": 6894
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9679545760154724,
      "learning_rate": 1.965534879221776e-05,
      "loss": 2.6849,
      "step": 6895
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.042628288269043,
      "learning_rate": 1.9655241619785093e-05,
      "loss": 2.6628,
      "step": 6896
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0299875736236572,
      "learning_rate": 1.9655134430984164e-05,
      "loss": 2.6042,
      "step": 6897
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9734917879104614,
      "learning_rate": 1.965502722581516e-05,
      "loss": 2.6338,
      "step": 6898
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9845370054244995,
      "learning_rate": 1.9654920004278264e-05,
      "loss": 2.6105,
      "step": 6899
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9434557557106018,
      "learning_rate": 1.9654812766373655e-05,
      "loss": 2.6972,
      "step": 6900
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9294106364250183,
      "learning_rate": 1.9654705512101517e-05,
      "loss": 2.7545,
      "step": 6901
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9859327673912048,
      "learning_rate": 1.965459824146203e-05,
      "loss": 2.6138,
      "step": 6902
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0517019033432007,
      "learning_rate": 1.965449095445538e-05,
      "loss": 2.6946,
      "step": 6903
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9551599621772766,
      "learning_rate": 1.9654383651081744e-05,
      "loss": 2.4909,
      "step": 6904
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9393953084945679,
      "learning_rate": 1.9654276331341305e-05,
      "loss": 2.5426,
      "step": 6905
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0021238327026367,
      "learning_rate": 1.9654168995234248e-05,
      "loss": 2.8488,
      "step": 6906
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.00188410282135,
      "learning_rate": 1.9654061642760746e-05,
      "loss": 2.4899,
      "step": 6907
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9712054133415222,
      "learning_rate": 1.965395427392099e-05,
      "loss": 2.7121,
      "step": 6908
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0341622829437256,
      "learning_rate": 1.9653846888715165e-05,
      "loss": 2.7454,
      "step": 6909
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.110274314880371,
      "learning_rate": 1.9653739487143442e-05,
      "loss": 2.7444,
      "step": 6910
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0202772617340088,
      "learning_rate": 1.9653632069206012e-05,
      "loss": 2.8206,
      "step": 6911
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9397922158241272,
      "learning_rate": 1.9653524634903052e-05,
      "loss": 2.3534,
      "step": 6912
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9746245741844177,
      "learning_rate": 1.9653417184234745e-05,
      "loss": 2.6338,
      "step": 6913
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.046812653541565,
      "learning_rate": 1.9653309717201277e-05,
      "loss": 2.6721,
      "step": 6914
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0199953317642212,
      "learning_rate": 1.9653202233802827e-05,
      "loss": 2.6888,
      "step": 6915
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.952154278755188,
      "learning_rate": 1.9653094734039572e-05,
      "loss": 2.6799,
      "step": 6916
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0786676406860352,
      "learning_rate": 1.96529872179117e-05,
      "loss": 2.5967,
      "step": 6917
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9367772340774536,
      "learning_rate": 1.9652879685419402e-05,
      "loss": 2.6305,
      "step": 6918
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0847110748291016,
      "learning_rate": 1.9652772136562846e-05,
      "loss": 2.6282,
      "step": 6919
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9827528595924377,
      "learning_rate": 1.965266457134222e-05,
      "loss": 2.7279,
      "step": 6920
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0625038146972656,
      "learning_rate": 1.9652556989757703e-05,
      "loss": 2.8527,
      "step": 6921
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.06807541847229,
      "learning_rate": 1.965244939180948e-05,
      "loss": 2.7853,
      "step": 6922
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9872176051139832,
      "learning_rate": 1.965234177749774e-05,
      "loss": 2.712,
      "step": 6923
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.969109833240509,
      "learning_rate": 1.9652234146822656e-05,
      "loss": 2.6382,
      "step": 6924
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.1029495000839233,
      "learning_rate": 1.9652126499784413e-05,
      "loss": 2.4062,
      "step": 6925
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0091501474380493,
      "learning_rate": 1.9652018836383192e-05,
      "loss": 2.6143,
      "step": 6926
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9086034297943115,
      "learning_rate": 1.9651911156619178e-05,
      "loss": 2.5643,
      "step": 6927
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9598749876022339,
      "learning_rate": 1.9651803460492557e-05,
      "loss": 2.6177,
      "step": 6928
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.030579924583435,
      "learning_rate": 1.9651695748003506e-05,
      "loss": 2.6736,
      "step": 6929
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.2870761156082153,
      "learning_rate": 1.9651588019152208e-05,
      "loss": 2.5941,
      "step": 6930
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0737102031707764,
      "learning_rate": 1.965148027393885e-05,
      "loss": 2.558,
      "step": 6931
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.1170687675476074,
      "learning_rate": 1.9651372512363606e-05,
      "loss": 2.5761,
      "step": 6932
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.129224181175232,
      "learning_rate": 1.965126473442667e-05,
      "loss": 2.6841,
      "step": 6933
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.142613410949707,
      "learning_rate": 1.9651156940128216e-05,
      "loss": 2.5999,
      "step": 6934
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.1299030780792236,
      "learning_rate": 1.965104912946843e-05,
      "loss": 2.5304,
      "step": 6935
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.1603691577911377,
      "learning_rate": 1.9650941302447497e-05,
      "loss": 2.5429,
      "step": 6936
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9510185122489929,
      "learning_rate": 1.9650833459065595e-05,
      "loss": 2.5134,
      "step": 6937
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0159286260604858,
      "learning_rate": 1.9650725599322912e-05,
      "loss": 2.8206,
      "step": 6938
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9728896021842957,
      "learning_rate": 1.9650617723219625e-05,
      "loss": 2.6697,
      "step": 6939
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.3669673204421997,
      "learning_rate": 1.965050983075592e-05,
      "loss": 2.4613,
      "step": 6940
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0410661697387695,
      "learning_rate": 1.965040192193198e-05,
      "loss": 2.5295,
      "step": 6941
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.012377142906189,
      "learning_rate": 1.9650293996747987e-05,
      "loss": 2.7056,
      "step": 6942
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9996613264083862,
      "learning_rate": 1.9650186055204128e-05,
      "loss": 2.7387,
      "step": 6943
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.065482258796692,
      "learning_rate": 1.965007809730058e-05,
      "loss": 2.4322,
      "step": 6944
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9605908393859863,
      "learning_rate": 1.9649970123037527e-05,
      "loss": 2.5206,
      "step": 6945
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.088831901550293,
      "learning_rate": 1.964986213241516e-05,
      "loss": 2.696,
      "step": 6946
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9279038906097412,
      "learning_rate": 1.964975412543365e-05,
      "loss": 2.6345,
      "step": 6947
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0302321910858154,
      "learning_rate": 1.9649646102093184e-05,
      "loss": 2.3978,
      "step": 6948
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0363658666610718,
      "learning_rate": 1.9649538062393954e-05,
      "loss": 2.5694,
      "step": 6949
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0219088792800903,
      "learning_rate": 1.9649430006336132e-05,
      "loss": 2.5942,
      "step": 6950
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0092294216156006,
      "learning_rate": 1.9649321933919906e-05,
      "loss": 2.5708,
      "step": 6951
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9321755170822144,
      "learning_rate": 1.964921384514546e-05,
      "loss": 2.6735,
      "step": 6952
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.115222454071045,
      "learning_rate": 1.9649105740012975e-05,
      "loss": 2.551,
      "step": 6953
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0093214511871338,
      "learning_rate": 1.9648997618522634e-05,
      "loss": 2.6149,
      "step": 6954
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9541849493980408,
      "learning_rate": 1.9648889480674624e-05,
      "loss": 2.6605,
      "step": 6955
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.069210410118103,
      "learning_rate": 1.9648781326469122e-05,
      "loss": 2.7302,
      "step": 6956
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0621747970581055,
      "learning_rate": 1.9648673155906317e-05,
      "loss": 2.6054,
      "step": 6957
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9184136986732483,
      "learning_rate": 1.964856496898639e-05,
      "loss": 2.5717,
      "step": 6958
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9620539546012878,
      "learning_rate": 1.9648456765709527e-05,
      "loss": 2.5792,
      "step": 6959
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.1298424005508423,
      "learning_rate": 1.9648348546075904e-05,
      "loss": 2.7646,
      "step": 6960
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0096675157546997,
      "learning_rate": 1.9648240310085716e-05,
      "loss": 2.6037,
      "step": 6961
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9564855098724365,
      "learning_rate": 1.964813205773914e-05,
      "loss": 2.716,
      "step": 6962
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.2948613166809082,
      "learning_rate": 1.9648023789036356e-05,
      "loss": 2.5999,
      "step": 6963
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0001099109649658,
      "learning_rate": 1.964791550397755e-05,
      "loss": 2.4953,
      "step": 6964
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9699574112892151,
      "learning_rate": 1.9647807202562913e-05,
      "loss": 2.8401,
      "step": 6965
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0614326000213623,
      "learning_rate": 1.964769888479262e-05,
      "loss": 2.762,
      "step": 6966
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9380520582199097,
      "learning_rate": 1.9647590550666857e-05,
      "loss": 2.6771,
      "step": 6967
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0458637475967407,
      "learning_rate": 1.9647482200185804e-05,
      "loss": 2.638,
      "step": 6968
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9472005367279053,
      "learning_rate": 1.9647373833349652e-05,
      "loss": 2.6185,
      "step": 6969
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0352189540863037,
      "learning_rate": 1.9647265450158582e-05,
      "loss": 2.5343,
      "step": 6970
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.1060112714767456,
      "learning_rate": 1.9647157050612773e-05,
      "loss": 2.7948,
      "step": 6971
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.151240587234497,
      "learning_rate": 1.9647048634712414e-05,
      "loss": 2.7697,
      "step": 6972
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9593169093132019,
      "learning_rate": 1.964694020245769e-05,
      "loss": 2.5857,
      "step": 6973
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9626807570457458,
      "learning_rate": 1.964683175384878e-05,
      "loss": 2.8982,
      "step": 6974
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9903219938278198,
      "learning_rate": 1.964672328888587e-05,
      "loss": 2.6606,
      "step": 6975
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.991330623626709,
      "learning_rate": 1.9646614807569148e-05,
      "loss": 2.5755,
      "step": 6976
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9828394651412964,
      "learning_rate": 1.964650630989879e-05,
      "loss": 2.9783,
      "step": 6977
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0282548666000366,
      "learning_rate": 1.9646397795874984e-05,
      "loss": 2.6267,
      "step": 6978
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.1843935251235962,
      "learning_rate": 1.9646289265497915e-05,
      "loss": 2.6618,
      "step": 6979
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.1220406293869019,
      "learning_rate": 1.9646180718767765e-05,
      "loss": 2.6544,
      "step": 6980
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0551214218139648,
      "learning_rate": 1.964607215568472e-05,
      "loss": 2.6166,
      "step": 6981
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.8772099614143372,
      "learning_rate": 1.9645963576248962e-05,
      "loss": 2.6936,
      "step": 6982
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0158957242965698,
      "learning_rate": 1.9645854980460676e-05,
      "loss": 2.7537,
      "step": 6983
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0121523141860962,
      "learning_rate": 1.9645746368320046e-05,
      "loss": 2.6672,
      "step": 6984
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9572421312332153,
      "learning_rate": 1.964563773982726e-05,
      "loss": 2.7583,
      "step": 6985
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9849485754966736,
      "learning_rate": 1.9645529094982493e-05,
      "loss": 2.528,
      "step": 6986
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0148048400878906,
      "learning_rate": 1.9645420433785936e-05,
      "loss": 2.4399,
      "step": 6987
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.997208297252655,
      "learning_rate": 1.964531175623777e-05,
      "loss": 2.6796,
      "step": 6988
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9404876232147217,
      "learning_rate": 1.9645203062338184e-05,
      "loss": 2.4852,
      "step": 6989
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9189801216125488,
      "learning_rate": 1.9645094352087354e-05,
      "loss": 2.5552,
      "step": 6990
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9334126114845276,
      "learning_rate": 1.9644985625485478e-05,
      "loss": 2.3105,
      "step": 6991
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0883097648620605,
      "learning_rate": 1.9644876882532725e-05,
      "loss": 2.7155,
      "step": 6992
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0176912546157837,
      "learning_rate": 1.964476812322929e-05,
      "loss": 2.6712,
      "step": 6993
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9044598340988159,
      "learning_rate": 1.964465934757535e-05,
      "loss": 2.6307,
      "step": 6994
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0190503597259521,
      "learning_rate": 1.9644550555571097e-05,
      "loss": 2.8249,
      "step": 6995
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0075098276138306,
      "learning_rate": 1.9644441747216708e-05,
      "loss": 2.4195,
      "step": 6996
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.91617751121521,
      "learning_rate": 1.9644332922512374e-05,
      "loss": 2.7449,
      "step": 6997
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.1027302742004395,
      "learning_rate": 1.964422408145827e-05,
      "loss": 2.6635,
      "step": 6998
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.101270318031311,
      "learning_rate": 1.9644115224054592e-05,
      "loss": 2.6512,
      "step": 6999
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.185899257659912,
      "learning_rate": 1.964400635030152e-05,
      "loss": 2.7263,
      "step": 7000
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9387572407722473,
      "learning_rate": 1.9643897460199236e-05,
      "loss": 2.4803,
      "step": 7001
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.912569522857666,
      "learning_rate": 1.964378855374793e-05,
      "loss": 2.7791,
      "step": 7002
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9455347657203674,
      "learning_rate": 1.9643679630947777e-05,
      "loss": 2.8091,
      "step": 7003
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0087610483169556,
      "learning_rate": 1.964357069179897e-05,
      "loss": 2.5709,
      "step": 7004
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0148643255233765,
      "learning_rate": 1.9643461736301695e-05,
      "loss": 2.7657,
      "step": 7005
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9785507321357727,
      "learning_rate": 1.9643352764456133e-05,
      "loss": 2.5395,
      "step": 7006
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0581563711166382,
      "learning_rate": 1.9643243776262463e-05,
      "loss": 2.7153,
      "step": 7007
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9969948530197144,
      "learning_rate": 1.964313477172088e-05,
      "loss": 2.5734,
      "step": 7008
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9283049702644348,
      "learning_rate": 1.9643025750831566e-05,
      "loss": 2.5744,
      "step": 7009
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9286220073699951,
      "learning_rate": 1.9642916713594702e-05,
      "loss": 2.7009,
      "step": 7010
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.004396915435791,
      "learning_rate": 1.9642807660010478e-05,
      "loss": 2.5887,
      "step": 7011
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9791271090507507,
      "learning_rate": 1.964269859007907e-05,
      "loss": 2.3335,
      "step": 7012
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9735604524612427,
      "learning_rate": 1.9642589503800673e-05,
      "loss": 2.5772,
      "step": 7013
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9543394446372986,
      "learning_rate": 1.9642480401175467e-05,
      "loss": 2.5312,
      "step": 7014
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.042497992515564,
      "learning_rate": 1.964237128220364e-05,
      "loss": 2.7442,
      "step": 7015
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9276105165481567,
      "learning_rate": 1.9642262146885375e-05,
      "loss": 2.7374,
      "step": 7016
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0244901180267334,
      "learning_rate": 1.9642152995220855e-05,
      "loss": 2.6819,
      "step": 7017
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9420542120933533,
      "learning_rate": 1.9642043827210266e-05,
      "loss": 2.6709,
      "step": 7018
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9728504419326782,
      "learning_rate": 1.9641934642853794e-05,
      "loss": 2.7234,
      "step": 7019
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9601316452026367,
      "learning_rate": 1.9641825442151626e-05,
      "loss": 2.4879,
      "step": 7020
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.220660924911499,
      "learning_rate": 1.9641716225103945e-05,
      "loss": 2.6421,
      "step": 7021
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0626475811004639,
      "learning_rate": 1.9641606991710937e-05,
      "loss": 2.7322,
      "step": 7022
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9852219820022583,
      "learning_rate": 1.9641497741972784e-05,
      "loss": 2.5506,
      "step": 7023
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9902220964431763,
      "learning_rate": 1.9641388475889675e-05,
      "loss": 2.5748,
      "step": 7024
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0621356964111328,
      "learning_rate": 1.9641279193461794e-05,
      "loss": 2.2938,
      "step": 7025
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.020508050918579,
      "learning_rate": 1.9641169894689324e-05,
      "loss": 2.749,
      "step": 7026
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0359472036361694,
      "learning_rate": 1.9641060579572456e-05,
      "loss": 2.7512,
      "step": 7027
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0033949613571167,
      "learning_rate": 1.964095124811137e-05,
      "loss": 2.5068,
      "step": 7028
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9972927570343018,
      "learning_rate": 1.9640841900306253e-05,
      "loss": 2.6073,
      "step": 7029
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0033962726593018,
      "learning_rate": 1.9640732536157294e-05,
      "loss": 2.6612,
      "step": 7030
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0786691904067993,
      "learning_rate": 1.9640623155664672e-05,
      "loss": 2.6285,
      "step": 7031
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9872465133666992,
      "learning_rate": 1.9640513758828576e-05,
      "loss": 2.7652,
      "step": 7032
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.024330973625183,
      "learning_rate": 1.964040434564919e-05,
      "loss": 2.7818,
      "step": 7033
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.000707983970642,
      "learning_rate": 1.9640294916126698e-05,
      "loss": 2.5959,
      "step": 7034
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.159824252128601,
      "learning_rate": 1.9640185470261292e-05,
      "loss": 2.5263,
      "step": 7035
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9385574460029602,
      "learning_rate": 1.9640076008053152e-05,
      "loss": 2.4909,
      "step": 7036
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0960133075714111,
      "learning_rate": 1.9639966529502466e-05,
      "loss": 2.6295,
      "step": 7037
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0840270519256592,
      "learning_rate": 1.9639857034609418e-05,
      "loss": 2.5374,
      "step": 7038
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9898453950881958,
      "learning_rate": 1.9639747523374194e-05,
      "loss": 2.5082,
      "step": 7039
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9932399392127991,
      "learning_rate": 1.963963799579698e-05,
      "loss": 2.6116,
      "step": 7040
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.018317461013794,
      "learning_rate": 1.963952845187796e-05,
      "loss": 2.548,
      "step": 7041
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9814376831054688,
      "learning_rate": 1.9639418891617322e-05,
      "loss": 2.8288,
      "step": 7042
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9453418850898743,
      "learning_rate": 1.9639309315015248e-05,
      "loss": 2.9343,
      "step": 7043
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9668567180633545,
      "learning_rate": 1.9639199722071932e-05,
      "loss": 2.4482,
      "step": 7044
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0405027866363525,
      "learning_rate": 1.9639090112787552e-05,
      "loss": 2.5129,
      "step": 7045
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.1356500387191772,
      "learning_rate": 1.9638980487162297e-05,
      "loss": 2.4576,
      "step": 7046
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.187757968902588,
      "learning_rate": 1.963887084519635e-05,
      "loss": 2.6939,
      "step": 7047
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.051877498626709,
      "learning_rate": 1.9638761186889904e-05,
      "loss": 2.6279,
      "step": 7048
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0157032012939453,
      "learning_rate": 1.9638651512243136e-05,
      "loss": 2.7397,
      "step": 7049
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.8836470246315002,
      "learning_rate": 1.9638541821256238e-05,
      "loss": 2.7935,
      "step": 7050
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9409613609313965,
      "learning_rate": 1.963843211392939e-05,
      "loss": 2.5789,
      "step": 7051
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.3678407669067383,
      "learning_rate": 1.9638322390262785e-05,
      "loss": 2.5155,
      "step": 7052
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0134294033050537,
      "learning_rate": 1.9638212650256606e-05,
      "loss": 2.6394,
      "step": 7053
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9541304111480713,
      "learning_rate": 1.9638102893911037e-05,
      "loss": 2.6514,
      "step": 7054
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0342085361480713,
      "learning_rate": 1.963799312122627e-05,
      "loss": 2.6833,
      "step": 7055
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0042909383773804,
      "learning_rate": 1.963788333220248e-05,
      "loss": 2.6556,
      "step": 7056
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9691692590713501,
      "learning_rate": 1.9637773526839866e-05,
      "loss": 2.5963,
      "step": 7057
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0818402767181396,
      "learning_rate": 1.9637663705138607e-05,
      "loss": 2.4587,
      "step": 7058
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9439024925231934,
      "learning_rate": 1.963755386709889e-05,
      "loss": 2.4791,
      "step": 7059
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.022200107574463,
      "learning_rate": 1.96374440127209e-05,
      "loss": 2.6012,
      "step": 7060
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9580317139625549,
      "learning_rate": 1.963733414200483e-05,
      "loss": 2.4034,
      "step": 7061
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9968355298042297,
      "learning_rate": 1.9637224254950854e-05,
      "loss": 2.6656,
      "step": 7062
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0879216194152832,
      "learning_rate": 1.963711435155917e-05,
      "loss": 2.6692,
      "step": 7063
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9096124768257141,
      "learning_rate": 1.963700443182996e-05,
      "loss": 2.7163,
      "step": 7064
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0091912746429443,
      "learning_rate": 1.963689449576341e-05,
      "loss": 2.6157,
      "step": 7065
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9618443846702576,
      "learning_rate": 1.9636784543359704e-05,
      "loss": 2.541,
      "step": 7066
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0606825351715088,
      "learning_rate": 1.9636674574619034e-05,
      "loss": 2.6436,
      "step": 7067
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9904553294181824,
      "learning_rate": 1.9636564589541578e-05,
      "loss": 2.8744,
      "step": 7068
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0309882164001465,
      "learning_rate": 1.9636454588127533e-05,
      "loss": 2.7857,
      "step": 7069
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.004752516746521,
      "learning_rate": 1.9636344570377076e-05,
      "loss": 2.5299,
      "step": 7070
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.071951985359192,
      "learning_rate": 1.9636234536290403e-05,
      "loss": 2.5781,
      "step": 7071
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9386432766914368,
      "learning_rate": 1.9636124485867693e-05,
      "loss": 2.5403,
      "step": 7072
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.038942575454712,
      "learning_rate": 1.9636014419109136e-05,
      "loss": 2.8304,
      "step": 7073
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9237015247344971,
      "learning_rate": 1.963590433601491e-05,
      "loss": 2.8912,
      "step": 7074
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.016755223274231,
      "learning_rate": 1.963579423658522e-05,
      "loss": 2.6986,
      "step": 7075
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0426020622253418,
      "learning_rate": 1.9635684120820236e-05,
      "loss": 2.7287,
      "step": 7076
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9313874840736389,
      "learning_rate": 1.963557398872015e-05,
      "loss": 2.4906,
      "step": 7077
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0253547430038452,
      "learning_rate": 1.963546384028515e-05,
      "loss": 2.5616,
      "step": 7078
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.077224850654602,
      "learning_rate": 1.9635353675515423e-05,
      "loss": 2.6801,
      "step": 7079
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0686994791030884,
      "learning_rate": 1.9635243494411155e-05,
      "loss": 2.8016,
      "step": 7080
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0618879795074463,
      "learning_rate": 1.963513329697253e-05,
      "loss": 2.614,
      "step": 7081
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.964528501033783,
      "learning_rate": 1.9635023083199737e-05,
      "loss": 2.9422,
      "step": 7082
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.956747829914093,
      "learning_rate": 1.9634912853092967e-05,
      "loss": 2.5629,
      "step": 7083
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0257588624954224,
      "learning_rate": 1.96348026066524e-05,
      "loss": 2.5763,
      "step": 7084
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0756828784942627,
      "learning_rate": 1.9634692343878224e-05,
      "loss": 2.671,
      "step": 7085
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.3546507358551025,
      "learning_rate": 1.963458206477063e-05,
      "loss": 2.6163,
      "step": 7086
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0379085540771484,
      "learning_rate": 1.9634471769329804e-05,
      "loss": 2.5579,
      "step": 7087
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9320725202560425,
      "learning_rate": 1.963436145755593e-05,
      "loss": 2.7888,
      "step": 7088
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9279264807701111,
      "learning_rate": 1.9634251129449198e-05,
      "loss": 2.5137,
      "step": 7089
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0028325319290161,
      "learning_rate": 1.963414078500979e-05,
      "loss": 2.8043,
      "step": 7090
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0337706804275513,
      "learning_rate": 1.96340304242379e-05,
      "loss": 2.6203,
      "step": 7091
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.1070291996002197,
      "learning_rate": 1.963392004713371e-05,
      "loss": 2.6786,
      "step": 7092
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.038434624671936,
      "learning_rate": 1.9633809653697412e-05,
      "loss": 2.6379,
      "step": 7093
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9306145906448364,
      "learning_rate": 1.9633699243929187e-05,
      "loss": 2.5761,
      "step": 7094
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9885148406028748,
      "learning_rate": 1.9633588817829228e-05,
      "loss": 2.6989,
      "step": 7095
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.988243579864502,
      "learning_rate": 1.9633478375397715e-05,
      "loss": 2.7786,
      "step": 7096
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0089292526245117,
      "learning_rate": 1.9633367916634844e-05,
      "loss": 2.7249,
      "step": 7097
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0228917598724365,
      "learning_rate": 1.9633257441540796e-05,
      "loss": 2.5675,
      "step": 7098
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9736841320991516,
      "learning_rate": 1.9633146950115762e-05,
      "loss": 2.544,
      "step": 7099
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.98048996925354,
      "learning_rate": 1.9633036442359926e-05,
      "loss": 2.6943,
      "step": 7100
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0286622047424316,
      "learning_rate": 1.9632925918273478e-05,
      "loss": 2.6448,
      "step": 7101
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.01613450050354,
      "learning_rate": 1.9632815377856604e-05,
      "loss": 2.6669,
      "step": 7102
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.025024175643921,
      "learning_rate": 1.963270482110949e-05,
      "loss": 2.7088,
      "step": 7103
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0019603967666626,
      "learning_rate": 1.963259424803233e-05,
      "loss": 2.556,
      "step": 7104
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.009050726890564,
      "learning_rate": 1.9632483658625304e-05,
      "loss": 2.594,
      "step": 7105
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9415296912193298,
      "learning_rate": 1.96323730528886e-05,
      "loss": 2.5776,
      "step": 7106
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9735630750656128,
      "learning_rate": 1.963226243082241e-05,
      "loss": 2.5861,
      "step": 7107
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9394318461418152,
      "learning_rate": 1.9632151792426916e-05,
      "loss": 2.8063,
      "step": 7108
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9410163760185242,
      "learning_rate": 1.9632041137702312e-05,
      "loss": 2.6417,
      "step": 7109
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9628435969352722,
      "learning_rate": 1.963193046664878e-05,
      "loss": 2.5508,
      "step": 7110
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0039293766021729,
      "learning_rate": 1.963181977926651e-05,
      "loss": 2.6793,
      "step": 7111
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.2899620532989502,
      "learning_rate": 1.9631709075555693e-05,
      "loss": 2.7204,
      "step": 7112
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.031860113143921,
      "learning_rate": 1.9631598355516507e-05,
      "loss": 2.6362,
      "step": 7113
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9699891805648804,
      "learning_rate": 1.9631487619149152e-05,
      "loss": 2.8239,
      "step": 7114
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9213936924934387,
      "learning_rate": 1.9631376866453808e-05,
      "loss": 2.5815,
      "step": 7115
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.8587548732757568,
      "learning_rate": 1.9631266097430663e-05,
      "loss": 2.4725,
      "step": 7116
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9509318470954895,
      "learning_rate": 1.9631155312079904e-05,
      "loss": 2.6349,
      "step": 7117
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0792622566223145,
      "learning_rate": 1.9631044510401724e-05,
      "loss": 2.7205,
      "step": 7118
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.171037197113037,
      "learning_rate": 1.9630933692396307e-05,
      "loss": 2.5236,
      "step": 7119
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9814565181732178,
      "learning_rate": 1.9630822858063842e-05,
      "loss": 2.6697,
      "step": 7120
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0279309749603271,
      "learning_rate": 1.963071200740452e-05,
      "loss": 2.6086,
      "step": 7121
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9574964046478271,
      "learning_rate": 1.9630601140418517e-05,
      "loss": 2.4936,
      "step": 7122
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.1413347721099854,
      "learning_rate": 1.9630490257106037e-05,
      "loss": 2.7897,
      "step": 7123
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0120846033096313,
      "learning_rate": 1.9630379357467254e-05,
      "loss": 2.4473,
      "step": 7124
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.2626800537109375,
      "learning_rate": 1.9630268441502368e-05,
      "loss": 2.6258,
      "step": 7125
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.031319260597229,
      "learning_rate": 1.963015750921156e-05,
      "loss": 2.7124,
      "step": 7126
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.1696889400482178,
      "learning_rate": 1.963004656059502e-05,
      "loss": 2.569,
      "step": 7127
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.013521432876587,
      "learning_rate": 1.9629935595652934e-05,
      "loss": 2.609,
      "step": 7128
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.084220290184021,
      "learning_rate": 1.9629824614385494e-05,
      "loss": 2.9335,
      "step": 7129
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9810965061187744,
      "learning_rate": 1.9629713616792884e-05,
      "loss": 2.7551,
      "step": 7130
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9971849322319031,
      "learning_rate": 1.962960260287529e-05,
      "loss": 2.2575,
      "step": 7131
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0275828838348389,
      "learning_rate": 1.962949157263291e-05,
      "loss": 2.5147,
      "step": 7132
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9316754341125488,
      "learning_rate": 1.9629380526065927e-05,
      "loss": 2.6113,
      "step": 7133
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0102543830871582,
      "learning_rate": 1.9629269463174526e-05,
      "loss": 2.6038,
      "step": 7134
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.1442036628723145,
      "learning_rate": 1.96291583839589e-05,
      "loss": 2.9218,
      "step": 7135
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9260269403457642,
      "learning_rate": 1.9629047288419235e-05,
      "loss": 2.7092,
      "step": 7136
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0713860988616943,
      "learning_rate": 1.9628936176555716e-05,
      "loss": 2.6693,
      "step": 7137
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9987269639968872,
      "learning_rate": 1.962882504836854e-05,
      "loss": 2.5186,
      "step": 7138
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.975031316280365,
      "learning_rate": 1.9628713903857887e-05,
      "loss": 2.5876,
      "step": 7139
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9919087886810303,
      "learning_rate": 1.9628602743023948e-05,
      "loss": 2.521,
      "step": 7140
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9510054588317871,
      "learning_rate": 1.9628491565866915e-05,
      "loss": 2.7109,
      "step": 7141
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0409646034240723,
      "learning_rate": 1.962838037238697e-05,
      "loss": 2.6055,
      "step": 7142
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0384280681610107,
      "learning_rate": 1.962826916258431e-05,
      "loss": 2.7531,
      "step": 7143
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0638600587844849,
      "learning_rate": 1.9628157936459117e-05,
      "loss": 2.6597,
      "step": 7144
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0143146514892578,
      "learning_rate": 1.9628046694011578e-05,
      "loss": 2.8538,
      "step": 7145
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9892411231994629,
      "learning_rate": 1.962793543524189e-05,
      "loss": 2.577,
      "step": 7146
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9242268800735474,
      "learning_rate": 1.9627824160150238e-05,
      "loss": 2.787,
      "step": 7147
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0240261554718018,
      "learning_rate": 1.9627712868736802e-05,
      "loss": 2.79,
      "step": 7148
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0233429670333862,
      "learning_rate": 1.9627601561001784e-05,
      "loss": 2.5147,
      "step": 7149
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.009346604347229,
      "learning_rate": 1.9627490236945362e-05,
      "loss": 2.7012,
      "step": 7150
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.942711591720581,
      "learning_rate": 1.9627378896567733e-05,
      "loss": 2.9614,
      "step": 7151
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0683730840682983,
      "learning_rate": 1.962726753986908e-05,
      "loss": 2.5011,
      "step": 7152
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0550674200057983,
      "learning_rate": 1.9627156166849594e-05,
      "loss": 2.4346,
      "step": 7153
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9693611264228821,
      "learning_rate": 1.9627044777509466e-05,
      "loss": 2.4467,
      "step": 7154
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0280282497406006,
      "learning_rate": 1.962693337184888e-05,
      "loss": 2.7726,
      "step": 7155
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9567698240280151,
      "learning_rate": 1.9626821949868023e-05,
      "loss": 2.5388,
      "step": 7156
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.981499969959259,
      "learning_rate": 1.9626710511567094e-05,
      "loss": 2.6167,
      "step": 7157
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9188598394393921,
      "learning_rate": 1.9626599056946276e-05,
      "loss": 2.5786,
      "step": 7158
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9819559454917908,
      "learning_rate": 1.9626487586005755e-05,
      "loss": 2.7226,
      "step": 7159
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0838487148284912,
      "learning_rate": 1.962637609874573e-05,
      "loss": 2.8098,
      "step": 7160
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0748707056045532,
      "learning_rate": 1.9626264595166376e-05,
      "loss": 2.6079,
      "step": 7161
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.205682635307312,
      "learning_rate": 1.962615307526789e-05,
      "loss": 2.6984,
      "step": 7162
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.1090950965881348,
      "learning_rate": 1.962604153905046e-05,
      "loss": 2.5803,
      "step": 7163
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.045884132385254,
      "learning_rate": 1.9625929986514277e-05,
      "loss": 2.8298,
      "step": 7164
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9561421871185303,
      "learning_rate": 1.962581841765953e-05,
      "loss": 2.8029,
      "step": 7165
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0842397212982178,
      "learning_rate": 1.96257068324864e-05,
      "loss": 2.7823,
      "step": 7166
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0482126474380493,
      "learning_rate": 1.962559523099509e-05,
      "loss": 2.6128,
      "step": 7167
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9960202574729919,
      "learning_rate": 1.9625483613185776e-05,
      "loss": 2.7645,
      "step": 7168
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9695040583610535,
      "learning_rate": 1.9625371979058652e-05,
      "loss": 2.563,
      "step": 7169
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0316762924194336,
      "learning_rate": 1.9625260328613917e-05,
      "loss": 2.5962,
      "step": 7170
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9751233458518982,
      "learning_rate": 1.9625148661851746e-05,
      "loss": 2.5567,
      "step": 7171
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9909396171569824,
      "learning_rate": 1.9625036978772335e-05,
      "loss": 2.8273,
      "step": 7172
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.1238552331924438,
      "learning_rate": 1.9624925279375868e-05,
      "loss": 2.7874,
      "step": 7173
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9172836542129517,
      "learning_rate": 1.9624813563662545e-05,
      "loss": 2.5976,
      "step": 7174
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9080698490142822,
      "learning_rate": 1.962470183163254e-05,
      "loss": 2.68,
      "step": 7175
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0097477436065674,
      "learning_rate": 1.962459008328606e-05,
      "loss": 2.5019,
      "step": 7176
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0844439268112183,
      "learning_rate": 1.9624478318623283e-05,
      "loss": 2.4176,
      "step": 7177
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9676467180252075,
      "learning_rate": 1.96243665376444e-05,
      "loss": 2.6812,
      "step": 7178
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0382699966430664,
      "learning_rate": 1.9624254740349604e-05,
      "loss": 2.8455,
      "step": 7179
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.052003264427185,
      "learning_rate": 1.9624142926739082e-05,
      "loss": 2.7556,
      "step": 7180
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.923872172832489,
      "learning_rate": 1.9624031096813022e-05,
      "loss": 2.7138,
      "step": 7181
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0685702562332153,
      "learning_rate": 1.962391925057162e-05,
      "loss": 2.7167,
      "step": 7182
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0289779901504517,
      "learning_rate": 1.9623807388015055e-05,
      "loss": 2.3768,
      "step": 7183
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0928250551223755,
      "learning_rate": 1.9623695509143525e-05,
      "loss": 2.7243,
      "step": 7184
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0019832849502563,
      "learning_rate": 1.962358361395722e-05,
      "loss": 2.8267,
      "step": 7185
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.915457546710968,
      "learning_rate": 1.9623471702456322e-05,
      "loss": 2.7682,
      "step": 7186
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0927104949951172,
      "learning_rate": 1.962335977464103e-05,
      "loss": 2.7862,
      "step": 7187
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0491243600845337,
      "learning_rate": 1.9623247830511526e-05,
      "loss": 2.4913,
      "step": 7188
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9845076203346252,
      "learning_rate": 1.9623135870068006e-05,
      "loss": 2.4493,
      "step": 7189
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0213547945022583,
      "learning_rate": 1.9623023893310658e-05,
      "loss": 2.7612,
      "step": 7190
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0008039474487305,
      "learning_rate": 1.962291190023967e-05,
      "loss": 2.7689,
      "step": 7191
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9494182467460632,
      "learning_rate": 1.9622799890855232e-05,
      "loss": 2.5778,
      "step": 7192
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0528708696365356,
      "learning_rate": 1.9622687865157533e-05,
      "loss": 2.6378,
      "step": 7193
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.1368520259857178,
      "learning_rate": 1.9622575823146766e-05,
      "loss": 2.7561,
      "step": 7194
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0753647089004517,
      "learning_rate": 1.962246376482312e-05,
      "loss": 2.5966,
      "step": 7195
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.025229811668396,
      "learning_rate": 1.9622351690186785e-05,
      "loss": 2.8199,
      "step": 7196
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9529913067817688,
      "learning_rate": 1.962223959923795e-05,
      "loss": 2.6926,
      "step": 7197
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9436497092247009,
      "learning_rate": 1.9622127491976807e-05,
      "loss": 2.82,
      "step": 7198
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0033525228500366,
      "learning_rate": 1.9622015368403544e-05,
      "loss": 2.4453,
      "step": 7199
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9564584493637085,
      "learning_rate": 1.962190322851835e-05,
      "loss": 2.5602,
      "step": 7200
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0683010816574097,
      "learning_rate": 1.9621791072321416e-05,
      "loss": 2.6198,
      "step": 7201
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.111754298210144,
      "learning_rate": 1.9621678899812934e-05,
      "loss": 2.8095,
      "step": 7202
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9362067580223083,
      "learning_rate": 1.9621566710993095e-05,
      "loss": 2.6353,
      "step": 7203
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0030674934387207,
      "learning_rate": 1.9621454505862085e-05,
      "loss": 2.7029,
      "step": 7204
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0266910791397095,
      "learning_rate": 1.9621342284420097e-05,
      "loss": 2.4443,
      "step": 7205
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.025289535522461,
      "learning_rate": 1.962123004666732e-05,
      "loss": 2.583,
      "step": 7206
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9493572115898132,
      "learning_rate": 1.9621117792603946e-05,
      "loss": 2.7127,
      "step": 7207
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9873312711715698,
      "learning_rate": 1.9621005522230162e-05,
      "loss": 2.5047,
      "step": 7208
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0025575160980225,
      "learning_rate": 1.9620893235546162e-05,
      "loss": 2.6886,
      "step": 7209
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0425922870635986,
      "learning_rate": 1.9620780932552137e-05,
      "loss": 2.7639,
      "step": 7210
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9116657972335815,
      "learning_rate": 1.9620668613248272e-05,
      "loss": 2.443,
      "step": 7211
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9008905291557312,
      "learning_rate": 1.9620556277634763e-05,
      "loss": 2.484,
      "step": 7212
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9528672695159912,
      "learning_rate": 1.96204439257118e-05,
      "loss": 2.5343,
      "step": 7213
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0702321529388428,
      "learning_rate": 1.962033155747957e-05,
      "loss": 2.7621,
      "step": 7214
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0416151285171509,
      "learning_rate": 1.9620219172938264e-05,
      "loss": 2.6698,
      "step": 7215
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.4948350191116333,
      "learning_rate": 1.9620106772088074e-05,
      "loss": 2.764,
      "step": 7216
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0641077756881714,
      "learning_rate": 1.961999435492919e-05,
      "loss": 2.5407,
      "step": 7217
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.1311438083648682,
      "learning_rate": 1.9619881921461803e-05,
      "loss": 2.5291,
      "step": 7218
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0018932819366455,
      "learning_rate": 1.9619769471686106e-05,
      "loss": 2.7591,
      "step": 7219
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9656382203102112,
      "learning_rate": 1.9619657005602283e-05,
      "loss": 2.7184,
      "step": 7220
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0464441776275635,
      "learning_rate": 1.961954452321053e-05,
      "loss": 2.6835,
      "step": 7221
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.002381443977356,
      "learning_rate": 1.9619432024511038e-05,
      "loss": 2.6685,
      "step": 7222
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9627336859703064,
      "learning_rate": 1.9619319509503995e-05,
      "loss": 2.7765,
      "step": 7223
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9404663443565369,
      "learning_rate": 1.9619206978189592e-05,
      "loss": 2.6973,
      "step": 7224
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0182586908340454,
      "learning_rate": 1.9619094430568023e-05,
      "loss": 2.5556,
      "step": 7225
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9702470302581787,
      "learning_rate": 1.9618981866639477e-05,
      "loss": 2.5592,
      "step": 7226
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.2354158163070679,
      "learning_rate": 1.961886928640414e-05,
      "loss": 2.6555,
      "step": 7227
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.099446177482605,
      "learning_rate": 1.9618756689862212e-05,
      "loss": 2.6393,
      "step": 7228
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0818790197372437,
      "learning_rate": 1.9618644077013875e-05,
      "loss": 2.4809,
      "step": 7229
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9217556118965149,
      "learning_rate": 1.9618531447859323e-05,
      "loss": 2.6255,
      "step": 7230
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9949645400047302,
      "learning_rate": 1.961841880239875e-05,
      "loss": 2.7267,
      "step": 7231
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.999039351940155,
      "learning_rate": 1.9618306140632346e-05,
      "loss": 2.6731,
      "step": 7232
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0523349046707153,
      "learning_rate": 1.96181934625603e-05,
      "loss": 2.6483,
      "step": 7233
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9672902226448059,
      "learning_rate": 1.9618080768182802e-05,
      "loss": 2.5198,
      "step": 7234
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.2895485162734985,
      "learning_rate": 1.961796805750005e-05,
      "loss": 2.5219,
      "step": 7235
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9991417527198792,
      "learning_rate": 1.9617855330512226e-05,
      "loss": 2.7301,
      "step": 7236
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0278981924057007,
      "learning_rate": 1.9617742587219524e-05,
      "loss": 2.612,
      "step": 7237
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9458065629005432,
      "learning_rate": 1.9617629827622136e-05,
      "loss": 2.6777,
      "step": 7238
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.000880241394043,
      "learning_rate": 1.9617517051720258e-05,
      "loss": 2.5395,
      "step": 7239
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0015109777450562,
      "learning_rate": 1.9617404259514073e-05,
      "loss": 2.536,
      "step": 7240
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.05477774143219,
      "learning_rate": 1.961729145100378e-05,
      "loss": 2.7532,
      "step": 7241
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.1348434686660767,
      "learning_rate": 1.9617178626189556e-05,
      "loss": 2.6242,
      "step": 7242
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9823206067085266,
      "learning_rate": 1.961706578507161e-05,
      "loss": 2.6168,
      "step": 7243
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.049128770828247,
      "learning_rate": 1.9616952927650122e-05,
      "loss": 2.6287,
      "step": 7244
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9732100367546082,
      "learning_rate": 1.961684005392529e-05,
      "loss": 2.6308,
      "step": 7245
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9919828772544861,
      "learning_rate": 1.96167271638973e-05,
      "loss": 2.4886,
      "step": 7246
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0253956317901611,
      "learning_rate": 1.9616614257566347e-05,
      "loss": 2.536,
      "step": 7247
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9484361410140991,
      "learning_rate": 1.961650133493262e-05,
      "loss": 2.8464,
      "step": 7248
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0061296224594116,
      "learning_rate": 1.9616388395996317e-05,
      "loss": 2.6342,
      "step": 7249
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9775159358978271,
      "learning_rate": 1.9616275440757615e-05,
      "loss": 2.4948,
      "step": 7250
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0442826747894287,
      "learning_rate": 1.961616246921672e-05,
      "loss": 2.6019,
      "step": 7251
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0151875019073486,
      "learning_rate": 1.9616049481373815e-05,
      "loss": 2.6907,
      "step": 7252
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0927869081497192,
      "learning_rate": 1.9615936477229094e-05,
      "loss": 2.9189,
      "step": 7253
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0173819065093994,
      "learning_rate": 1.9615823456782753e-05,
      "loss": 2.7078,
      "step": 7254
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9259377121925354,
      "learning_rate": 1.9615710420034975e-05,
      "loss": 2.5987,
      "step": 7255
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.2125409841537476,
      "learning_rate": 1.961559736698596e-05,
      "loss": 2.4237,
      "step": 7256
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9529224634170532,
      "learning_rate": 1.9615484297635894e-05,
      "loss": 2.7149,
      "step": 7257
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0632438659667969,
      "learning_rate": 1.961537121198497e-05,
      "loss": 2.5621,
      "step": 7258
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9966144561767578,
      "learning_rate": 1.9615258110033385e-05,
      "loss": 2.7,
      "step": 7259
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9951833486557007,
      "learning_rate": 1.9615144991781322e-05,
      "loss": 2.5812,
      "step": 7260
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.1266748905181885,
      "learning_rate": 1.9615031857228973e-05,
      "loss": 2.7254,
      "step": 7261
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0113255977630615,
      "learning_rate": 1.9614918706376537e-05,
      "loss": 2.8787,
      "step": 7262
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9982066750526428,
      "learning_rate": 1.9614805539224204e-05,
      "loss": 2.5319,
      "step": 7263
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0176784992218018,
      "learning_rate": 1.9614692355772166e-05,
      "loss": 2.5861,
      "step": 7264
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0892809629440308,
      "learning_rate": 1.961457915602061e-05,
      "loss": 2.8893,
      "step": 7265
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9811731576919556,
      "learning_rate": 1.9614465939969732e-05,
      "loss": 2.8557,
      "step": 7266
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0313820838928223,
      "learning_rate": 1.9614352707619722e-05,
      "loss": 2.7525,
      "step": 7267
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0103811025619507,
      "learning_rate": 1.9614239458970772e-05,
      "loss": 2.7077,
      "step": 7268
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9900963306427002,
      "learning_rate": 1.9614126194023078e-05,
      "loss": 2.8273,
      "step": 7269
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.019046664237976,
      "learning_rate": 1.9614012912776824e-05,
      "loss": 2.5888,
      "step": 7270
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0073838233947754,
      "learning_rate": 1.9613899615232213e-05,
      "loss": 2.8018,
      "step": 7271
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9453564286231995,
      "learning_rate": 1.9613786301389427e-05,
      "loss": 2.8189,
      "step": 7272
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0799075365066528,
      "learning_rate": 1.9613672971248664e-05,
      "loss": 2.6321,
      "step": 7273
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9649047255516052,
      "learning_rate": 1.9613559624810116e-05,
      "loss": 2.4958,
      "step": 7274
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0959703922271729,
      "learning_rate": 1.961344626207397e-05,
      "loss": 2.8591,
      "step": 7275
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.1893510818481445,
      "learning_rate": 1.9613332883040425e-05,
      "loss": 2.7553,
      "step": 7276
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0116231441497803,
      "learning_rate": 1.9613219487709664e-05,
      "loss": 2.651,
      "step": 7277
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9936540126800537,
      "learning_rate": 1.961310607608189e-05,
      "loss": 2.7291,
      "step": 7278
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.060522198677063,
      "learning_rate": 1.961299264815729e-05,
      "loss": 2.6598,
      "step": 7279
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.2680819034576416,
      "learning_rate": 1.9612879203936056e-05,
      "loss": 2.6312,
      "step": 7280
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0520704984664917,
      "learning_rate": 1.961276574341838e-05,
      "loss": 2.7326,
      "step": 7281
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0463567972183228,
      "learning_rate": 1.9612652266604455e-05,
      "loss": 2.5216,
      "step": 7282
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.034930944442749,
      "learning_rate": 1.9612538773494475e-05,
      "loss": 2.5365,
      "step": 7283
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9877616763114929,
      "learning_rate": 1.961242526408863e-05,
      "loss": 2.6278,
      "step": 7284
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0322636365890503,
      "learning_rate": 1.9612311738387113e-05,
      "loss": 2.7354,
      "step": 7285
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.101280927658081,
      "learning_rate": 1.961219819639012e-05,
      "loss": 2.5504,
      "step": 7286
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0610753297805786,
      "learning_rate": 1.961208463809784e-05,
      "loss": 2.6116,
      "step": 7287
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9809363484382629,
      "learning_rate": 1.961197106351046e-05,
      "loss": 2.6435,
      "step": 7288
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.1064646244049072,
      "learning_rate": 1.9611857472628183e-05,
      "loss": 2.707,
      "step": 7289
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9679580330848694,
      "learning_rate": 1.96117438654512e-05,
      "loss": 2.63,
      "step": 7290
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0581825971603394,
      "learning_rate": 1.9611630241979697e-05,
      "loss": 2.6465,
      "step": 7291
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.2332943677902222,
      "learning_rate": 1.961151660221387e-05,
      "loss": 2.5254,
      "step": 7292
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0949307680130005,
      "learning_rate": 1.9611402946153914e-05,
      "loss": 2.4962,
      "step": 7293
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.944648265838623,
      "learning_rate": 1.9611289273800015e-05,
      "loss": 2.3585,
      "step": 7294
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0205930471420288,
      "learning_rate": 1.9611175585152374e-05,
      "loss": 2.54,
      "step": 7295
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.1275440454483032,
      "learning_rate": 1.961106188021118e-05,
      "loss": 2.4215,
      "step": 7296
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0273303985595703,
      "learning_rate": 1.9610948158976626e-05,
      "loss": 2.7837,
      "step": 7297
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.8170655965805054,
      "learning_rate": 1.9610834421448902e-05,
      "loss": 2.5166,
      "step": 7298
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.1647499799728394,
      "learning_rate": 1.9610720667628208e-05,
      "loss": 2.5101,
      "step": 7299
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0479758977890015,
      "learning_rate": 1.9610606897514727e-05,
      "loss": 2.6507,
      "step": 7300
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.2056443691253662,
      "learning_rate": 1.961049311110866e-05,
      "loss": 2.6191,
      "step": 7301
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0031139850616455,
      "learning_rate": 1.9610379308410195e-05,
      "loss": 2.7069,
      "step": 7302
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0257573127746582,
      "learning_rate": 1.9610265489419532e-05,
      "loss": 2.6501,
      "step": 7303
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0597150325775146,
      "learning_rate": 1.9610151654136853e-05,
      "loss": 2.8758,
      "step": 7304
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.306128740310669,
      "learning_rate": 1.961003780256236e-05,
      "loss": 2.8013,
      "step": 7305
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.1537843942642212,
      "learning_rate": 1.9609923934696242e-05,
      "loss": 2.5644,
      "step": 7306
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.2133125066757202,
      "learning_rate": 1.9609810050538692e-05,
      "loss": 2.651,
      "step": 7307
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.002729058265686,
      "learning_rate": 1.9609696150089907e-05,
      "loss": 2.5953,
      "step": 7308
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9778302311897278,
      "learning_rate": 1.9609582233350076e-05,
      "loss": 2.6782,
      "step": 7309
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9172469973564148,
      "learning_rate": 1.9609468300319392e-05,
      "loss": 2.5461,
      "step": 7310
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9930736422538757,
      "learning_rate": 1.960935435099805e-05,
      "loss": 2.5884,
      "step": 7311
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0732442140579224,
      "learning_rate": 1.960924038538624e-05,
      "loss": 2.6714,
      "step": 7312
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0222413539886475,
      "learning_rate": 1.960912640348416e-05,
      "loss": 2.7056,
      "step": 7313
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0331467390060425,
      "learning_rate": 1.9609012405292e-05,
      "loss": 2.4083,
      "step": 7314
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0139492750167847,
      "learning_rate": 1.9608898390809956e-05,
      "loss": 2.6104,
      "step": 7315
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9147590398788452,
      "learning_rate": 1.960878436003822e-05,
      "loss": 2.4464,
      "step": 7316
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0366867780685425,
      "learning_rate": 1.9608670312976982e-05,
      "loss": 2.615,
      "step": 7317
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.1298432350158691,
      "learning_rate": 1.960855624962644e-05,
      "loss": 2.7417,
      "step": 7318
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0355639457702637,
      "learning_rate": 1.9608442169986785e-05,
      "loss": 2.8078,
      "step": 7319
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9944142699241638,
      "learning_rate": 1.9608328074058208e-05,
      "loss": 2.697,
      "step": 7320
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9767298102378845,
      "learning_rate": 1.9608213961840913e-05,
      "loss": 2.6914,
      "step": 7321
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.079654335975647,
      "learning_rate": 1.960809983333508e-05,
      "loss": 2.7653,
      "step": 7322
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0396369695663452,
      "learning_rate": 1.9607985688540908e-05,
      "loss": 2.6752,
      "step": 7323
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0458416938781738,
      "learning_rate": 1.960787152745859e-05,
      "loss": 2.5666,
      "step": 7324
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.137665867805481,
      "learning_rate": 1.9607757350088324e-05,
      "loss": 2.701,
      "step": 7325
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0408351421356201,
      "learning_rate": 1.9607643156430298e-05,
      "loss": 2.6416,
      "step": 7326
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.1182224750518799,
      "learning_rate": 1.960752894648471e-05,
      "loss": 2.5211,
      "step": 7327
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9867010712623596,
      "learning_rate": 1.960741472025175e-05,
      "loss": 2.5787,
      "step": 7328
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0206611156463623,
      "learning_rate": 1.9607300477731608e-05,
      "loss": 2.7951,
      "step": 7329
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9043802618980408,
      "learning_rate": 1.9607186218924484e-05,
      "loss": 2.5333,
      "step": 7330
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0935386419296265,
      "learning_rate": 1.9607071943830573e-05,
      "loss": 2.8243,
      "step": 7331
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0237780809402466,
      "learning_rate": 1.9606957652450065e-05,
      "loss": 2.7569,
      "step": 7332
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.067986011505127,
      "learning_rate": 1.9606843344783156e-05,
      "loss": 2.703,
      "step": 7333
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.2438162565231323,
      "learning_rate": 1.9606729020830033e-05,
      "loss": 2.489,
      "step": 7334
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9595436453819275,
      "learning_rate": 1.96066146805909e-05,
      "loss": 2.6048,
      "step": 7335
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9843566417694092,
      "learning_rate": 1.9606500324065945e-05,
      "loss": 2.6289,
      "step": 7336
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0428754091262817,
      "learning_rate": 1.9606385951255362e-05,
      "loss": 2.738,
      "step": 7337
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9280330538749695,
      "learning_rate": 1.9606271562159348e-05,
      "loss": 2.3659,
      "step": 7338
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0767186880111694,
      "learning_rate": 1.960615715677809e-05,
      "loss": 2.6249,
      "step": 7339
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9981619119644165,
      "learning_rate": 1.9606042735111794e-05,
      "loss": 2.6363,
      "step": 7340
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.1367615461349487,
      "learning_rate": 1.960592829716064e-05,
      "loss": 2.8407,
      "step": 7341
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.934910774230957,
      "learning_rate": 1.960581384292483e-05,
      "loss": 2.9512,
      "step": 7342
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.880733847618103,
      "learning_rate": 1.9605699372404556e-05,
      "loss": 2.815,
      "step": 7343
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9897288084030151,
      "learning_rate": 1.9605584885600015e-05,
      "loss": 2.5408,
      "step": 7344
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0050795078277588,
      "learning_rate": 1.9605470382511397e-05,
      "loss": 2.742,
      "step": 7345
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0566978454589844,
      "learning_rate": 1.96053558631389e-05,
      "loss": 2.6113,
      "step": 7346
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9667965769767761,
      "learning_rate": 1.9605241327482716e-05,
      "loss": 2.6274,
      "step": 7347
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9560127258300781,
      "learning_rate": 1.9605126775543035e-05,
      "loss": 2.5684,
      "step": 7348
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9982897043228149,
      "learning_rate": 1.960501220732006e-05,
      "loss": 2.6661,
      "step": 7349
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0271613597869873,
      "learning_rate": 1.960489762281398e-05,
      "loss": 2.7062,
      "step": 7350
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9348569512367249,
      "learning_rate": 1.960478302202499e-05,
      "loss": 2.6184,
      "step": 7351
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9096304774284363,
      "learning_rate": 1.960466840495328e-05,
      "loss": 2.83,
      "step": 7352
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.006658911705017,
      "learning_rate": 1.9604553771599053e-05,
      "loss": 2.5648,
      "step": 7353
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9634643793106079,
      "learning_rate": 1.9604439121962497e-05,
      "loss": 2.4082,
      "step": 7354
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.007173776626587,
      "learning_rate": 1.960432445604381e-05,
      "loss": 2.4587,
      "step": 7355
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9168108701705933,
      "learning_rate": 1.960420977384318e-05,
      "loss": 2.725,
      "step": 7356
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9641813039779663,
      "learning_rate": 1.9604095075360813e-05,
      "loss": 2.6011,
      "step": 7357
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0292905569076538,
      "learning_rate": 1.960398036059689e-05,
      "loss": 2.4342,
      "step": 7358
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0095568895339966,
      "learning_rate": 1.9603865629551615e-05,
      "loss": 2.6754,
      "step": 7359
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0312457084655762,
      "learning_rate": 1.9603750882225178e-05,
      "loss": 2.5455,
      "step": 7360
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.069914698600769,
      "learning_rate": 1.9603636118617774e-05,
      "loss": 2.5122,
      "step": 7361
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9405418038368225,
      "learning_rate": 1.96035213387296e-05,
      "loss": 2.4575,
      "step": 7362
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9937271475791931,
      "learning_rate": 1.960340654256085e-05,
      "loss": 2.7798,
      "step": 7363
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.8953043222427368,
      "learning_rate": 1.960329173011172e-05,
      "loss": 2.6011,
      "step": 7364
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.94668048620224,
      "learning_rate": 1.9603176901382396e-05,
      "loss": 2.5483,
      "step": 7365
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0474015474319458,
      "learning_rate": 1.9603062056373083e-05,
      "loss": 2.5765,
      "step": 7366
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0011874437332153,
      "learning_rate": 1.960294719508397e-05,
      "loss": 2.5059,
      "step": 7367
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0560626983642578,
      "learning_rate": 1.9602832317515253e-05,
      "loss": 2.6247,
      "step": 7368
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0438933372497559,
      "learning_rate": 1.9602717423667128e-05,
      "loss": 2.6683,
      "step": 7369
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9410036206245422,
      "learning_rate": 1.960260251353979e-05,
      "loss": 2.758,
      "step": 7370
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.1455509662628174,
      "learning_rate": 1.9602487587133432e-05,
      "loss": 2.6607,
      "step": 7371
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.011718988418579,
      "learning_rate": 1.960237264444825e-05,
      "loss": 2.4187,
      "step": 7372
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.063887596130371,
      "learning_rate": 1.9602257685484434e-05,
      "loss": 2.6486,
      "step": 7373
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.039042592048645,
      "learning_rate": 1.960214271024219e-05,
      "loss": 2.6362,
      "step": 7374
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9586317539215088,
      "learning_rate": 1.9602027718721703e-05,
      "loss": 2.6647,
      "step": 7375
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9942554831504822,
      "learning_rate": 1.960191271092317e-05,
      "loss": 2.5064,
      "step": 7376
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.081870198249817,
      "learning_rate": 1.9601797686846787e-05,
      "loss": 2.7628,
      "step": 7377
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0507280826568604,
      "learning_rate": 1.9601682646492754e-05,
      "loss": 2.9089,
      "step": 7378
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0294263362884521,
      "learning_rate": 1.9601567589861258e-05,
      "loss": 2.7162,
      "step": 7379
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0746150016784668,
      "learning_rate": 1.9601452516952496e-05,
      "loss": 2.6626,
      "step": 7380
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9712386131286621,
      "learning_rate": 1.9601337427766664e-05,
      "loss": 2.5203,
      "step": 7381
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9291953444480896,
      "learning_rate": 1.960122232230396e-05,
      "loss": 2.6843,
      "step": 7382
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0551414489746094,
      "learning_rate": 1.9601107200564575e-05,
      "loss": 2.7013,
      "step": 7383
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9456499218940735,
      "learning_rate": 1.9600992062548707e-05,
      "loss": 2.5766,
      "step": 7384
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.980679988861084,
      "learning_rate": 1.9600876908256548e-05,
      "loss": 2.4742,
      "step": 7385
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0774407386779785,
      "learning_rate": 1.9600761737688296e-05,
      "loss": 2.5372,
      "step": 7386
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.8938091993331909,
      "learning_rate": 1.9600646550844146e-05,
      "loss": 2.5374,
      "step": 7387
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0165801048278809,
      "learning_rate": 1.960053134772429e-05,
      "loss": 2.6018,
      "step": 7388
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9840244054794312,
      "learning_rate": 1.9600416128328927e-05,
      "loss": 2.7165,
      "step": 7389
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.066066026687622,
      "learning_rate": 1.9600300892658254e-05,
      "loss": 2.6664,
      "step": 7390
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.8960509896278381,
      "learning_rate": 1.960018564071246e-05,
      "loss": 2.4992,
      "step": 7391
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.2092350721359253,
      "learning_rate": 1.9600070372491744e-05,
      "loss": 2.7283,
      "step": 7392
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0145506858825684,
      "learning_rate": 1.9599955087996304e-05,
      "loss": 2.7052,
      "step": 7393
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.1286908388137817,
      "learning_rate": 1.959983978722633e-05,
      "loss": 2.7013,
      "step": 7394
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9664316177368164,
      "learning_rate": 1.9599724470182023e-05,
      "loss": 2.6583,
      "step": 7395
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0472058057785034,
      "learning_rate": 1.9599609136863573e-05,
      "loss": 2.5863,
      "step": 7396
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.1747535467147827,
      "learning_rate": 1.959949378727118e-05,
      "loss": 2.6957,
      "step": 7397
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0535399913787842,
      "learning_rate": 1.9599378421405038e-05,
      "loss": 2.7442,
      "step": 7398
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0314373970031738,
      "learning_rate": 1.9599263039265343e-05,
      "loss": 2.5528,
      "step": 7399
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9652913808822632,
      "learning_rate": 1.959914764085229e-05,
      "loss": 2.8549,
      "step": 7400
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.1738862991333008,
      "learning_rate": 1.959903222616607e-05,
      "loss": 2.6801,
      "step": 7401
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0398467779159546,
      "learning_rate": 1.9598916795206888e-05,
      "loss": 2.7284,
      "step": 7402
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0302237272262573,
      "learning_rate": 1.9598801347974933e-05,
      "loss": 2.6464,
      "step": 7403
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0515726804733276,
      "learning_rate": 1.9598685884470406e-05,
      "loss": 2.7029,
      "step": 7404
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.174472451210022,
      "learning_rate": 1.9598570404693498e-05,
      "loss": 2.6011,
      "step": 7405
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9968844652175903,
      "learning_rate": 1.9598454908644406e-05,
      "loss": 2.7354,
      "step": 7406
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0701817274093628,
      "learning_rate": 1.9598339396323323e-05,
      "loss": 2.6671,
      "step": 7407
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0617635250091553,
      "learning_rate": 1.959822386773045e-05,
      "loss": 2.693,
      "step": 7408
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0221160650253296,
      "learning_rate": 1.959810832286598e-05,
      "loss": 2.5429,
      "step": 7409
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9747415781021118,
      "learning_rate": 1.959799276173011e-05,
      "loss": 2.4951,
      "step": 7410
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9877970218658447,
      "learning_rate": 1.959787718432304e-05,
      "loss": 2.5468,
      "step": 7411
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9169228076934814,
      "learning_rate": 1.959776159064496e-05,
      "loss": 2.6495,
      "step": 7412
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9787675142288208,
      "learning_rate": 1.959764598069606e-05,
      "loss": 2.7282,
      "step": 7413
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9279423952102661,
      "learning_rate": 1.9597530354476548e-05,
      "loss": 2.7855,
      "step": 7414
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0951447486877441,
      "learning_rate": 1.9597414711986617e-05,
      "loss": 2.7026,
      "step": 7415
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0596084594726562,
      "learning_rate": 1.959729905322646e-05,
      "loss": 2.5628,
      "step": 7416
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0826667547225952,
      "learning_rate": 1.9597183378196273e-05,
      "loss": 2.4257,
      "step": 7417
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0499763488769531,
      "learning_rate": 1.9597067686896255e-05,
      "loss": 2.7224,
      "step": 7418
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9937378168106079,
      "learning_rate": 1.9596951979326603e-05,
      "loss": 2.7635,
      "step": 7419
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.974426805973053,
      "learning_rate": 1.959683625548751e-05,
      "loss": 2.5449,
      "step": 7420
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9560901522636414,
      "learning_rate": 1.959672051537917e-05,
      "loss": 2.7149,
      "step": 7421
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9449514150619507,
      "learning_rate": 1.9596604759001784e-05,
      "loss": 2.7853,
      "step": 7422
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.8993469476699829,
      "learning_rate": 1.9596488986355547e-05,
      "loss": 2.6867,
      "step": 7423
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0130234956741333,
      "learning_rate": 1.9596373197440653e-05,
      "loss": 2.7248,
      "step": 7424
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9044731855392456,
      "learning_rate": 1.9596257392257302e-05,
      "loss": 2.8415,
      "step": 7425
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.2508726119995117,
      "learning_rate": 1.959614157080569e-05,
      "loss": 2.6535,
      "step": 7426
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.956606388092041,
      "learning_rate": 1.959602573308601e-05,
      "loss": 2.7518,
      "step": 7427
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0405957698822021,
      "learning_rate": 1.959590987909846e-05,
      "loss": 2.7111,
      "step": 7428
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.986656904220581,
      "learning_rate": 1.9595794008843237e-05,
      "loss": 2.6338,
      "step": 7429
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.1429469585418701,
      "learning_rate": 1.9595678122320535e-05,
      "loss": 2.6555,
      "step": 7430
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9858183860778809,
      "learning_rate": 1.9595562219530552e-05,
      "loss": 2.6478,
      "step": 7431
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0094584226608276,
      "learning_rate": 1.9595446300473487e-05,
      "loss": 2.7209,
      "step": 7432
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.990755558013916,
      "learning_rate": 1.9595330365149538e-05,
      "loss": 2.5977,
      "step": 7433
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.051494836807251,
      "learning_rate": 1.9595214413558893e-05,
      "loss": 2.6375,
      "step": 7434
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.025307059288025,
      "learning_rate": 1.9595098445701753e-05,
      "loss": 2.5002,
      "step": 7435
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0507560968399048,
      "learning_rate": 1.9594982461578318e-05,
      "loss": 2.6371,
      "step": 7436
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.982047975063324,
      "learning_rate": 1.959486646118878e-05,
      "loss": 2.5955,
      "step": 7437
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.1907416582107544,
      "learning_rate": 1.959475044453334e-05,
      "loss": 2.5397,
      "step": 7438
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0714861154556274,
      "learning_rate": 1.9594634411612188e-05,
      "loss": 2.6547,
      "step": 7439
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.009549856185913,
      "learning_rate": 1.9594518362425526e-05,
      "loss": 2.7165,
      "step": 7440
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9729028344154358,
      "learning_rate": 1.9594402296973555e-05,
      "loss": 2.7298,
      "step": 7441
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0210292339324951,
      "learning_rate": 1.959428621525646e-05,
      "loss": 2.5341,
      "step": 7442
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0807006359100342,
      "learning_rate": 1.9594170117274447e-05,
      "loss": 2.6332,
      "step": 7443
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.997836172580719,
      "learning_rate": 1.9594054003027706e-05,
      "loss": 2.7075,
      "step": 7444
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9773699641227722,
      "learning_rate": 1.9593937872516442e-05,
      "loss": 2.7609,
      "step": 7445
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9593871235847473,
      "learning_rate": 1.9593821725740847e-05,
      "loss": 2.7511,
      "step": 7446
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0824682712554932,
      "learning_rate": 1.9593705562701116e-05,
      "loss": 2.6508,
      "step": 7447
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0268253087997437,
      "learning_rate": 1.959358938339745e-05,
      "loss": 2.59,
      "step": 7448
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.012665867805481,
      "learning_rate": 1.9593473187830045e-05,
      "loss": 2.5202,
      "step": 7449
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9859292507171631,
      "learning_rate": 1.9593356975999098e-05,
      "loss": 2.5664,
      "step": 7450
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0039055347442627,
      "learning_rate": 1.9593240747904804e-05,
      "loss": 2.6975,
      "step": 7451
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.1734421253204346,
      "learning_rate": 1.959312450354736e-05,
      "loss": 2.5704,
      "step": 7452
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0057573318481445,
      "learning_rate": 1.9593008242926968e-05,
      "loss": 2.7809,
      "step": 7453
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.2511852979660034,
      "learning_rate": 1.959289196604382e-05,
      "loss": 2.4776,
      "step": 7454
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.1583133935928345,
      "learning_rate": 1.9592775672898113e-05,
      "loss": 2.6069,
      "step": 7455
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9870456457138062,
      "learning_rate": 1.9592659363490048e-05,
      "loss": 2.7372,
      "step": 7456
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9417082667350769,
      "learning_rate": 1.959254303781982e-05,
      "loss": 2.5366,
      "step": 7457
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.060824990272522,
      "learning_rate": 1.9592426695887626e-05,
      "loss": 2.7254,
      "step": 7458
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.042579174041748,
      "learning_rate": 1.9592310337693665e-05,
      "loss": 2.5192,
      "step": 7459
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0215495824813843,
      "learning_rate": 1.9592193963238132e-05,
      "loss": 2.7654,
      "step": 7460
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.07611882686615,
      "learning_rate": 1.9592077572521223e-05,
      "loss": 2.5964,
      "step": 7461
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.1233323812484741,
      "learning_rate": 1.9591961165543138e-05,
      "loss": 2.613,
      "step": 7462
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.1865521669387817,
      "learning_rate": 1.9591844742304077e-05,
      "loss": 2.6051,
      "step": 7463
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9991053938865662,
      "learning_rate": 1.959172830280423e-05,
      "loss": 2.625,
      "step": 7464
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0610437393188477,
      "learning_rate": 1.95916118470438e-05,
      "loss": 2.5443,
      "step": 7465
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.101328730583191,
      "learning_rate": 1.959149537502298e-05,
      "loss": 2.6345,
      "step": 7466
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9712780714035034,
      "learning_rate": 1.9591378886741974e-05,
      "loss": 2.7215,
      "step": 7467
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9313721060752869,
      "learning_rate": 1.9591262382200974e-05,
      "loss": 2.4442,
      "step": 7468
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0940580368041992,
      "learning_rate": 1.959114586140018e-05,
      "loss": 2.6601,
      "step": 7469
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9534735083580017,
      "learning_rate": 1.959102932433979e-05,
      "loss": 2.5842,
      "step": 7470
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0466862916946411,
      "learning_rate": 1.959091277102e-05,
      "loss": 2.4486,
      "step": 7471
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0440701246261597,
      "learning_rate": 1.9590796201441007e-05,
      "loss": 2.6159,
      "step": 7472
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9466574788093567,
      "learning_rate": 1.959067961560301e-05,
      "loss": 2.5156,
      "step": 7473
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0047026872634888,
      "learning_rate": 1.9590563013506205e-05,
      "loss": 2.6887,
      "step": 7474
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.96855628490448,
      "learning_rate": 1.9590446395150794e-05,
      "loss": 2.68,
      "step": 7475
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9901684522628784,
      "learning_rate": 1.959032976053697e-05,
      "loss": 2.6064,
      "step": 7476
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.000659704208374,
      "learning_rate": 1.9590213109664932e-05,
      "loss": 2.6391,
      "step": 7477
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0645629167556763,
      "learning_rate": 1.9590096442534877e-05,
      "loss": 2.7625,
      "step": 7478
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9590895175933838,
      "learning_rate": 1.9589979759147004e-05,
      "loss": 2.6921,
      "step": 7479
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.027215600013733,
      "learning_rate": 1.958986305950151e-05,
      "loss": 2.7061,
      "step": 7480
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9683923721313477,
      "learning_rate": 1.9589746343598596e-05,
      "loss": 2.6359,
      "step": 7481
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0278044939041138,
      "learning_rate": 1.9589629611438457e-05,
      "loss": 2.6808,
      "step": 7482
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9382929801940918,
      "learning_rate": 1.9589512863021293e-05,
      "loss": 2.5711,
      "step": 7483
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9963847398757935,
      "learning_rate": 1.9589396098347296e-05,
      "loss": 2.8458,
      "step": 7484
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0746350288391113,
      "learning_rate": 1.958927931741667e-05,
      "loss": 2.689,
      "step": 7485
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9268573522567749,
      "learning_rate": 1.9589162520229613e-05,
      "loss": 2.6995,
      "step": 7486
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.019195795059204,
      "learning_rate": 1.958904570678632e-05,
      "loss": 2.5778,
      "step": 7487
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.192025899887085,
      "learning_rate": 1.958892887708699e-05,
      "loss": 2.5837,
      "step": 7488
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9448727369308472,
      "learning_rate": 1.958881203113182e-05,
      "loss": 2.5855,
      "step": 7489
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.1008484363555908,
      "learning_rate": 1.958869516892101e-05,
      "loss": 2.719,
      "step": 7490
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0424168109893799,
      "learning_rate": 1.9588578290454757e-05,
      "loss": 2.7873,
      "step": 7491
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9352865815162659,
      "learning_rate": 1.958846139573326e-05,
      "loss": 2.6069,
      "step": 7492
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.2661628723144531,
      "learning_rate": 1.958834448475672e-05,
      "loss": 2.6359,
      "step": 7493
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0530738830566406,
      "learning_rate": 1.9588227557525326e-05,
      "loss": 2.4253,
      "step": 7494
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0846199989318848,
      "learning_rate": 1.9588110614039287e-05,
      "loss": 2.8241,
      "step": 7495
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0915061235427856,
      "learning_rate": 1.9587993654298793e-05,
      "loss": 2.7019,
      "step": 7496
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0327922105789185,
      "learning_rate": 1.958787667830405e-05,
      "loss": 2.7209,
      "step": 7497
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0719448328018188,
      "learning_rate": 1.958775968605525e-05,
      "loss": 2.5952,
      "step": 7498
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9770402312278748,
      "learning_rate": 1.9587642677552588e-05,
      "loss": 2.576,
      "step": 7499
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.019590139389038,
      "learning_rate": 1.9587525652796275e-05,
      "loss": 2.6075,
      "step": 7500
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.008379578590393,
      "learning_rate": 1.95874086117865e-05,
      "loss": 2.5491,
      "step": 7501
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.953632652759552,
      "learning_rate": 1.958729155452346e-05,
      "loss": 2.6741,
      "step": 7502
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.922916829586029,
      "learning_rate": 1.958717448100736e-05,
      "loss": 2.5828,
      "step": 7503
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0172537565231323,
      "learning_rate": 1.95870573912384e-05,
      "loss": 2.6688,
      "step": 7504
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9692373871803284,
      "learning_rate": 1.9586940285216766e-05,
      "loss": 2.5781,
      "step": 7505
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9445236921310425,
      "learning_rate": 1.958682316294267e-05,
      "loss": 2.6771,
      "step": 7506
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.961879551410675,
      "learning_rate": 1.95867060244163e-05,
      "loss": 2.7637,
      "step": 7507
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0112862586975098,
      "learning_rate": 1.9586588869637865e-05,
      "loss": 2.6694,
      "step": 7508
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9622468948364258,
      "learning_rate": 1.9586471698607554e-05,
      "loss": 2.5519,
      "step": 7509
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0918694734573364,
      "learning_rate": 1.958635451132557e-05,
      "loss": 2.5313,
      "step": 7510
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9812532663345337,
      "learning_rate": 1.9586237307792112e-05,
      "loss": 2.6682,
      "step": 7511
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0049835443496704,
      "learning_rate": 1.958612008800738e-05,
      "loss": 2.6378,
      "step": 7512
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.8951539993286133,
      "learning_rate": 1.958600285197157e-05,
      "loss": 2.386,
      "step": 7513
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0017026662826538,
      "learning_rate": 1.958588559968488e-05,
      "loss": 2.6821,
      "step": 7514
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9283847212791443,
      "learning_rate": 1.9585768331147514e-05,
      "loss": 2.8879,
      "step": 7515
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.017262578010559,
      "learning_rate": 1.9585651046359665e-05,
      "loss": 2.5053,
      "step": 7516
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0721757411956787,
      "learning_rate": 1.9585533745321534e-05,
      "loss": 2.5721,
      "step": 7517
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.146860122680664,
      "learning_rate": 1.958541642803332e-05,
      "loss": 2.855,
      "step": 7518
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.1174873113632202,
      "learning_rate": 1.9585299094495224e-05,
      "loss": 2.705,
      "step": 7519
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.149404525756836,
      "learning_rate": 1.958518174470744e-05,
      "loss": 2.8559,
      "step": 7520
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0402706861495972,
      "learning_rate": 1.958506437867017e-05,
      "loss": 2.5743,
      "step": 7521
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0209027528762817,
      "learning_rate": 1.958494699638361e-05,
      "loss": 2.7478,
      "step": 7522
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9230315089225769,
      "learning_rate": 1.958482959784797e-05,
      "loss": 2.5288,
      "step": 7523
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9459167718887329,
      "learning_rate": 1.9584712183063432e-05,
      "loss": 2.4691,
      "step": 7524
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9579082727432251,
      "learning_rate": 1.9584594752030205e-05,
      "loss": 2.5414,
      "step": 7525
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.059590458869934,
      "learning_rate": 1.9584477304748488e-05,
      "loss": 2.7741,
      "step": 7526
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0857748985290527,
      "learning_rate": 1.958435984121848e-05,
      "loss": 2.6727,
      "step": 7527
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.093868613243103,
      "learning_rate": 1.9584242361440376e-05,
      "loss": 2.7537,
      "step": 7528
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9613582491874695,
      "learning_rate": 1.9584124865414386e-05,
      "loss": 2.5816,
      "step": 7529
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9480178356170654,
      "learning_rate": 1.9584007353140694e-05,
      "loss": 2.5876,
      "step": 7530
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9933818578720093,
      "learning_rate": 1.9583889824619507e-05,
      "loss": 2.5593,
      "step": 7531
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0822778940200806,
      "learning_rate": 1.9583772279851026e-05,
      "loss": 2.5972,
      "step": 7532
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0984084606170654,
      "learning_rate": 1.9583654718835447e-05,
      "loss": 2.6989,
      "step": 7533
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9549679160118103,
      "learning_rate": 1.958353714157297e-05,
      "loss": 2.55,
      "step": 7534
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9773848056793213,
      "learning_rate": 1.9583419548063797e-05,
      "loss": 2.5432,
      "step": 7535
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.903795599937439,
      "learning_rate": 1.9583301938308122e-05,
      "loss": 2.587,
      "step": 7536
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.2890121936798096,
      "learning_rate": 1.9583184312306145e-05,
      "loss": 2.6202,
      "step": 7537
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0740740299224854,
      "learning_rate": 1.9583066670058074e-05,
      "loss": 2.4808,
      "step": 7538
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9903543591499329,
      "learning_rate": 1.95829490115641e-05,
      "loss": 2.4872,
      "step": 7539
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9593101143836975,
      "learning_rate": 1.9582831336824423e-05,
      "loss": 2.657,
      "step": 7540
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0932488441467285,
      "learning_rate": 1.9582713645839248e-05,
      "loss": 2.7836,
      "step": 7541
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9697953462600708,
      "learning_rate": 1.9582595938608767e-05,
      "loss": 2.6086,
      "step": 7542
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9838033318519592,
      "learning_rate": 1.9582478215133186e-05,
      "loss": 2.5653,
      "step": 7543
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.931176483631134,
      "learning_rate": 1.95823604754127e-05,
      "loss": 2.2769,
      "step": 7544
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9913979172706604,
      "learning_rate": 1.9582242719447514e-05,
      "loss": 2.579,
      "step": 7545
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0305830240249634,
      "learning_rate": 1.9582124947237818e-05,
      "loss": 2.5116,
      "step": 7546
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0242592096328735,
      "learning_rate": 1.9582007158783823e-05,
      "loss": 2.8212,
      "step": 7547
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0664879083633423,
      "learning_rate": 1.958188935408572e-05,
      "loss": 2.6422,
      "step": 7548
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0960638523101807,
      "learning_rate": 1.9581771533143713e-05,
      "loss": 2.6791,
      "step": 7549
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9460025429725647,
      "learning_rate": 1.9581653695958e-05,
      "loss": 2.561,
      "step": 7550
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0576481819152832,
      "learning_rate": 1.9581535842528786e-05,
      "loss": 2.5875,
      "step": 7551
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0451905727386475,
      "learning_rate": 1.9581417972856265e-05,
      "loss": 2.5258,
      "step": 7552
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0075840950012207,
      "learning_rate": 1.9581300086940636e-05,
      "loss": 2.6917,
      "step": 7553
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.288010835647583,
      "learning_rate": 1.9581182184782103e-05,
      "loss": 2.5422,
      "step": 7554
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9136397242546082,
      "learning_rate": 1.958106426638086e-05,
      "loss": 2.5744,
      "step": 7555
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.081714153289795,
      "learning_rate": 1.9580946331737113e-05,
      "loss": 2.4029,
      "step": 7556
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0047746896743774,
      "learning_rate": 1.9580828380851062e-05,
      "loss": 2.6451,
      "step": 7557
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9913704991340637,
      "learning_rate": 1.9580710413722904e-05,
      "loss": 2.4993,
      "step": 7558
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9875354766845703,
      "learning_rate": 1.9580592430352838e-05,
      "loss": 2.6969,
      "step": 7559
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0078561305999756,
      "learning_rate": 1.9580474430741067e-05,
      "loss": 2.3527,
      "step": 7560
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9125824570655823,
      "learning_rate": 1.958035641488779e-05,
      "loss": 2.4003,
      "step": 7561
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9468763470649719,
      "learning_rate": 1.9580238382793206e-05,
      "loss": 2.777,
      "step": 7562
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0177607536315918,
      "learning_rate": 1.958012033445752e-05,
      "loss": 2.6456,
      "step": 7563
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0205023288726807,
      "learning_rate": 1.9580002269880924e-05,
      "loss": 2.5966,
      "step": 7564
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9458466172218323,
      "learning_rate": 1.957988418906362e-05,
      "loss": 2.4688,
      "step": 7565
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.967030942440033,
      "learning_rate": 1.9579766092005815e-05,
      "loss": 2.9459,
      "step": 7566
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.1665682792663574,
      "learning_rate": 1.9579647978707702e-05,
      "loss": 2.7439,
      "step": 7567
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0373125076293945,
      "learning_rate": 1.9579529849169487e-05,
      "loss": 2.8088,
      "step": 7568
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9953592419624329,
      "learning_rate": 1.9579411703391363e-05,
      "loss": 2.5277,
      "step": 7569
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.1058282852172852,
      "learning_rate": 1.9579293541373537e-05,
      "loss": 2.7078,
      "step": 7570
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0123825073242188,
      "learning_rate": 1.9579175363116206e-05,
      "loss": 2.5625,
      "step": 7571
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9480931758880615,
      "learning_rate": 1.957905716861957e-05,
      "loss": 2.5652,
      "step": 7572
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9758809208869934,
      "learning_rate": 1.9578938957883834e-05,
      "loss": 2.6507,
      "step": 7573
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0765630006790161,
      "learning_rate": 1.957882073090919e-05,
      "loss": 2.6999,
      "step": 7574
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.091406226158142,
      "learning_rate": 1.957870248769585e-05,
      "loss": 2.6564,
      "step": 7575
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9843090772628784,
      "learning_rate": 1.9578584228244e-05,
      "loss": 2.6135,
      "step": 7576
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9546492695808411,
      "learning_rate": 1.9578465952553852e-05,
      "loss": 2.6468,
      "step": 7577
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9932042360305786,
      "learning_rate": 1.9578347660625604e-05,
      "loss": 2.6337,
      "step": 7578
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.951845645904541,
      "learning_rate": 1.9578229352459453e-05,
      "loss": 2.488,
      "step": 7579
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9713265895843506,
      "learning_rate": 1.9578111028055603e-05,
      "loss": 2.7077,
      "step": 7580
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.2337448596954346,
      "learning_rate": 1.9577992687414253e-05,
      "loss": 2.6585,
      "step": 7581
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0770257711410522,
      "learning_rate": 1.9577874330535604e-05,
      "loss": 2.5503,
      "step": 7582
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0410916805267334,
      "learning_rate": 1.9577755957419857e-05,
      "loss": 2.7383,
      "step": 7583
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.051034688949585,
      "learning_rate": 1.9577637568067215e-05,
      "loss": 2.4895,
      "step": 7584
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.006350040435791,
      "learning_rate": 1.9577519162477872e-05,
      "loss": 2.6923,
      "step": 7585
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.079237937927246,
      "learning_rate": 1.9577400740652033e-05,
      "loss": 2.5685,
      "step": 7586
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.1614800691604614,
      "learning_rate": 1.95772823025899e-05,
      "loss": 2.6095,
      "step": 7587
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9449979066848755,
      "learning_rate": 1.957716384829167e-05,
      "loss": 2.4829,
      "step": 7588
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.1349246501922607,
      "learning_rate": 1.9577045377757547e-05,
      "loss": 2.6211,
      "step": 7589
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0220465660095215,
      "learning_rate": 1.9576926890987737e-05,
      "loss": 2.4886,
      "step": 7590
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0180641412734985,
      "learning_rate": 1.957680838798243e-05,
      "loss": 2.6389,
      "step": 7591
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0340994596481323,
      "learning_rate": 1.957668986874183e-05,
      "loss": 2.5605,
      "step": 7592
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9229394793510437,
      "learning_rate": 1.9576571333266142e-05,
      "loss": 2.6163,
      "step": 7593
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.042638897895813,
      "learning_rate": 1.9576452781555562e-05,
      "loss": 2.6164,
      "step": 7594
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9924156665802002,
      "learning_rate": 1.95763342136103e-05,
      "loss": 2.4285,
      "step": 7595
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.176871418952942,
      "learning_rate": 1.957621562943054e-05,
      "loss": 2.5424,
      "step": 7596
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0057692527770996,
      "learning_rate": 1.95760970290165e-05,
      "loss": 2.8217,
      "step": 7597
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.014728307723999,
      "learning_rate": 1.957597841236838e-05,
      "loss": 2.8149,
      "step": 7598
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9705285429954529,
      "learning_rate": 1.9575859779486367e-05,
      "loss": 2.3036,
      "step": 7599
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.927779495716095,
      "learning_rate": 1.9575741130370675e-05,
      "loss": 2.7404,
      "step": 7600
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0163811445236206,
      "learning_rate": 1.95756224650215e-05,
      "loss": 2.4539,
      "step": 7601
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0559693574905396,
      "learning_rate": 1.9575503783439046e-05,
      "loss": 2.5394,
      "step": 7602
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0409201383590698,
      "learning_rate": 1.957538508562351e-05,
      "loss": 2.6623,
      "step": 7603
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0112117528915405,
      "learning_rate": 1.9575266371575098e-05,
      "loss": 2.5742,
      "step": 7604
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0421392917633057,
      "learning_rate": 1.9575147641294005e-05,
      "loss": 2.7609,
      "step": 7605
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.1393309831619263,
      "learning_rate": 1.957502889478044e-05,
      "loss": 2.4506,
      "step": 7606
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9741740226745605,
      "learning_rate": 1.95749101320346e-05,
      "loss": 2.7833,
      "step": 7607
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0110374689102173,
      "learning_rate": 1.9574791353056685e-05,
      "loss": 2.6514,
      "step": 7608
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0318249464035034,
      "learning_rate": 1.9574672557846898e-05,
      "loss": 2.5761,
      "step": 7609
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9969890117645264,
      "learning_rate": 1.9574553746405445e-05,
      "loss": 2.5366,
      "step": 7610
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.1114099025726318,
      "learning_rate": 1.9574434918732517e-05,
      "loss": 2.5262,
      "step": 7611
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9997175931930542,
      "learning_rate": 1.9574316074828324e-05,
      "loss": 3.0135,
      "step": 7612
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.1102054119110107,
      "learning_rate": 1.957419721469307e-05,
      "loss": 2.6502,
      "step": 7613
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9940444231033325,
      "learning_rate": 1.9574078338326944e-05,
      "loss": 2.5919,
      "step": 7614
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9415265917778015,
      "learning_rate": 1.9573959445730157e-05,
      "loss": 2.4823,
      "step": 7615
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0657719373703003,
      "learning_rate": 1.9573840536902908e-05,
      "loss": 2.4682,
      "step": 7616
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.1573618650436401,
      "learning_rate": 1.95737216118454e-05,
      "loss": 2.4098,
      "step": 7617
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.036080002784729,
      "learning_rate": 1.9573602670557835e-05,
      "loss": 2.5601,
      "step": 7618
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.030830979347229,
      "learning_rate": 1.957348371304041e-05,
      "loss": 2.539,
      "step": 7619
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.142723798751831,
      "learning_rate": 1.9573364739293334e-05,
      "loss": 2.6188,
      "step": 7620
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0046982765197754,
      "learning_rate": 1.95732457493168e-05,
      "loss": 2.6316,
      "step": 7621
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0860772132873535,
      "learning_rate": 1.957312674311102e-05,
      "loss": 2.5853,
      "step": 7622
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0206265449523926,
      "learning_rate": 1.9573007720676187e-05,
      "loss": 2.5729,
      "step": 7623
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0293554067611694,
      "learning_rate": 1.9572888682012505e-05,
      "loss": 2.643,
      "step": 7624
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0510286092758179,
      "learning_rate": 1.9572769627120177e-05,
      "loss": 2.795,
      "step": 7625
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0333420038223267,
      "learning_rate": 1.957265055599941e-05,
      "loss": 2.6237,
      "step": 7626
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9168252348899841,
      "learning_rate": 1.9572531468650395e-05,
      "loss": 2.8328,
      "step": 7627
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0536549091339111,
      "learning_rate": 1.9572412365073337e-05,
      "loss": 2.5909,
      "step": 7628
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9990038275718689,
      "learning_rate": 1.9572293245268446e-05,
      "loss": 2.5755,
      "step": 7629
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9907785058021545,
      "learning_rate": 1.9572174109235914e-05,
      "loss": 2.587,
      "step": 7630
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9593562483787537,
      "learning_rate": 1.9572054956975946e-05,
      "loss": 2.5623,
      "step": 7631
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.003739833831787,
      "learning_rate": 1.957193578848875e-05,
      "loss": 2.6023,
      "step": 7632
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.1083163022994995,
      "learning_rate": 1.957181660377452e-05,
      "loss": 2.5527,
      "step": 7633
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.958832323551178,
      "learning_rate": 1.9571697402833463e-05,
      "loss": 2.7719,
      "step": 7634
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0419700145721436,
      "learning_rate": 1.9571578185665777e-05,
      "loss": 2.4937,
      "step": 7635
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0205988883972168,
      "learning_rate": 1.957145895227167e-05,
      "loss": 2.4959,
      "step": 7636
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.060036063194275,
      "learning_rate": 1.9571339702651337e-05,
      "loss": 2.6118,
      "step": 7637
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0618360042572021,
      "learning_rate": 1.9571220436804985e-05,
      "loss": 2.628,
      "step": 7638
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9864645600318909,
      "learning_rate": 1.9571101154732813e-05,
      "loss": 2.6564,
      "step": 7639
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9368476271629333,
      "learning_rate": 1.9570981856435027e-05,
      "loss": 2.6478,
      "step": 7640
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0901726484298706,
      "learning_rate": 1.9570862541911827e-05,
      "loss": 2.7825,
      "step": 7641
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9275378584861755,
      "learning_rate": 1.9570743211163416e-05,
      "loss": 2.5604,
      "step": 7642
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0208954811096191,
      "learning_rate": 1.9570623864189998e-05,
      "loss": 2.7894,
      "step": 7643
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9784252047538757,
      "learning_rate": 1.9570504500991768e-05,
      "loss": 2.5075,
      "step": 7644
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9375983476638794,
      "learning_rate": 1.9570385121568937e-05,
      "loss": 2.6979,
      "step": 7645
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9563536643981934,
      "learning_rate": 1.9570265725921702e-05,
      "loss": 2.5892,
      "step": 7646
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0641356706619263,
      "learning_rate": 1.9570146314050272e-05,
      "loss": 2.9301,
      "step": 7647
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0752860307693481,
      "learning_rate": 1.957002688595484e-05,
      "loss": 2.5333,
      "step": 7648
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.4649428129196167,
      "learning_rate": 1.9569907441635612e-05,
      "loss": 2.546,
      "step": 7649
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.1040078401565552,
      "learning_rate": 1.9569787981092796e-05,
      "loss": 2.6119,
      "step": 7650
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9454475045204163,
      "learning_rate": 1.9569668504326587e-05,
      "loss": 2.617,
      "step": 7651
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0317320823669434,
      "learning_rate": 1.9569549011337194e-05,
      "loss": 2.6462,
      "step": 7652
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9920442700386047,
      "learning_rate": 1.9569429502124816e-05,
      "loss": 2.5335,
      "step": 7653
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0277024507522583,
      "learning_rate": 1.9569309976689653e-05,
      "loss": 2.5249,
      "step": 7654
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0912210941314697,
      "learning_rate": 1.9569190435031912e-05,
      "loss": 2.5349,
      "step": 7655
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.050655484199524,
      "learning_rate": 1.9569070877151796e-05,
      "loss": 2.7714,
      "step": 7656
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.922270655632019,
      "learning_rate": 1.9568951303049504e-05,
      "loss": 2.6614,
      "step": 7657
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.051450490951538,
      "learning_rate": 1.956883171272524e-05,
      "loss": 2.8037,
      "step": 7658
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.077273964881897,
      "learning_rate": 1.956871210617921e-05,
      "loss": 2.7186,
      "step": 7659
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.1850457191467285,
      "learning_rate": 1.9568592483411612e-05,
      "loss": 2.6624,
      "step": 7660
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9707321524620056,
      "learning_rate": 1.9568472844422653e-05,
      "loss": 2.6978,
      "step": 7661
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.229302167892456,
      "learning_rate": 1.956835318921253e-05,
      "loss": 2.6305,
      "step": 7662
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9980975985527039,
      "learning_rate": 1.9568233517781453e-05,
      "loss": 2.7665,
      "step": 7663
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.974941611289978,
      "learning_rate": 1.956811383012962e-05,
      "loss": 2.5057,
      "step": 7664
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.25391685962677,
      "learning_rate": 1.9567994126257237e-05,
      "loss": 2.6545,
      "step": 7665
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9312484860420227,
      "learning_rate": 1.9567874406164504e-05,
      "loss": 2.57,
      "step": 7666
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0144857168197632,
      "learning_rate": 1.9567754669851625e-05,
      "loss": 2.7393,
      "step": 7667
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.1773262023925781,
      "learning_rate": 1.9567634917318805e-05,
      "loss": 2.7473,
      "step": 7668
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.2078502178192139,
      "learning_rate": 1.956751514856624e-05,
      "loss": 2.7317,
      "step": 7669
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0804983377456665,
      "learning_rate": 1.9567395363594146e-05,
      "loss": 2.7046,
      "step": 7670
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.951701819896698,
      "learning_rate": 1.9567275562402714e-05,
      "loss": 2.375,
      "step": 7671
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0148429870605469,
      "learning_rate": 1.9567155744992153e-05,
      "loss": 2.5328,
      "step": 7672
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0000123977661133,
      "learning_rate": 1.9567035911362663e-05,
      "loss": 2.6195,
      "step": 7673
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0245755910873413,
      "learning_rate": 1.956691606151445e-05,
      "loss": 2.7211,
      "step": 7674
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9457497596740723,
      "learning_rate": 1.9566796195447716e-05,
      "loss": 2.5991,
      "step": 7675
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.942130982875824,
      "learning_rate": 1.9566676313162664e-05,
      "loss": 2.7495,
      "step": 7676
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.1055349111557007,
      "learning_rate": 1.9566556414659497e-05,
      "loss": 2.7687,
      "step": 7677
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0283582210540771,
      "learning_rate": 1.9566436499938417e-05,
      "loss": 2.713,
      "step": 7678
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0957448482513428,
      "learning_rate": 1.9566316568999634e-05,
      "loss": 2.6112,
      "step": 7679
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.059241771697998,
      "learning_rate": 1.956619662184334e-05,
      "loss": 2.6702,
      "step": 7680
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9511370658874512,
      "learning_rate": 1.956607665846975e-05,
      "loss": 2.574,
      "step": 7681
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9161285758018494,
      "learning_rate": 1.9565956678879057e-05,
      "loss": 2.6784,
      "step": 7682
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.015371322631836,
      "learning_rate": 1.9565836683071472e-05,
      "loss": 2.4552,
      "step": 7683
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.4124892950057983,
      "learning_rate": 1.9565716671047194e-05,
      "loss": 2.7219,
      "step": 7684
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0105009078979492,
      "learning_rate": 1.956559664280643e-05,
      "loss": 2.5837,
      "step": 7685
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0989209413528442,
      "learning_rate": 1.9565476598349385e-05,
      "loss": 2.5551,
      "step": 7686
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.04952871799469,
      "learning_rate": 1.956535653767625e-05,
      "loss": 2.7136,
      "step": 7687
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0292019844055176,
      "learning_rate": 1.9565236460787247e-05,
      "loss": 2.6695,
      "step": 7688
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9945905804634094,
      "learning_rate": 1.956511636768257e-05,
      "loss": 2.7066,
      "step": 7689
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.162585735321045,
      "learning_rate": 1.9564996258362415e-05,
      "loss": 2.6917,
      "step": 7690
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.1284762620925903,
      "learning_rate": 1.9564876132827e-05,
      "loss": 2.8588,
      "step": 7691
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0152884721755981,
      "learning_rate": 1.956475599107652e-05,
      "loss": 2.6802,
      "step": 7692
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0729326009750366,
      "learning_rate": 1.956463583311118e-05,
      "loss": 2.3982,
      "step": 7693
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9895924925804138,
      "learning_rate": 1.956451565893119e-05,
      "loss": 2.4891,
      "step": 7694
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0166202783584595,
      "learning_rate": 1.956439546853674e-05,
      "loss": 2.6429,
      "step": 7695
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.4755146503448486,
      "learning_rate": 1.9564275261928046e-05,
      "loss": 2.5098,
      "step": 7696
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0654164552688599,
      "learning_rate": 1.956415503910531e-05,
      "loss": 2.859,
      "step": 7697
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0163688659667969,
      "learning_rate": 1.956403480006873e-05,
      "loss": 2.6285,
      "step": 7698
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0054813623428345,
      "learning_rate": 1.9563914544818513e-05,
      "loss": 2.5407,
      "step": 7699
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.076675534248352,
      "learning_rate": 1.9563794273354867e-05,
      "loss": 2.8632,
      "step": 7700
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9965616464614868,
      "learning_rate": 1.956367398567799e-05,
      "loss": 2.4758,
      "step": 7701
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9477538466453552,
      "learning_rate": 1.956355368178809e-05,
      "loss": 2.4639,
      "step": 7702
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0007251501083374,
      "learning_rate": 1.9563433361685366e-05,
      "loss": 2.6944,
      "step": 7703
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0431455373764038,
      "learning_rate": 1.9563313025370028e-05,
      "loss": 2.7049,
      "step": 7704
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.08955979347229,
      "learning_rate": 1.9563192672842276e-05,
      "loss": 2.6286,
      "step": 7705
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.060799479484558,
      "learning_rate": 1.9563072304102317e-05,
      "loss": 2.6332,
      "step": 7706
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0979348421096802,
      "learning_rate": 1.956295191915035e-05,
      "loss": 2.4208,
      "step": 7707
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.225368618965149,
      "learning_rate": 1.9562831517986583e-05,
      "loss": 2.4848,
      "step": 7708
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0013349056243896,
      "learning_rate": 1.956271110061122e-05,
      "loss": 2.6184,
      "step": 7709
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9384333491325378,
      "learning_rate": 1.956259066702446e-05,
      "loss": 2.6614,
      "step": 7710
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.068626880645752,
      "learning_rate": 1.9562470217226518e-05,
      "loss": 2.7185,
      "step": 7711
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0315759181976318,
      "learning_rate": 1.9562349751217588e-05,
      "loss": 2.6016,
      "step": 7712
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.041197419166565,
      "learning_rate": 1.956222926899788e-05,
      "loss": 2.6141,
      "step": 7713
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9721373915672302,
      "learning_rate": 1.95621087705676e-05,
      "loss": 2.4702,
      "step": 7714
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9051092267036438,
      "learning_rate": 1.9561988255926943e-05,
      "loss": 2.516,
      "step": 7715
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.904009222984314,
      "learning_rate": 1.956186772507612e-05,
      "loss": 2.6764,
      "step": 7716
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.968887209892273,
      "learning_rate": 1.9561747178015332e-05,
      "loss": 2.5156,
      "step": 7717
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0841455459594727,
      "learning_rate": 1.956162661474479e-05,
      "loss": 2.6589,
      "step": 7718
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0764211416244507,
      "learning_rate": 1.9561506035264694e-05,
      "loss": 2.4386,
      "step": 7719
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9819577932357788,
      "learning_rate": 1.9561385439575247e-05,
      "loss": 2.5704,
      "step": 7720
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0101280212402344,
      "learning_rate": 1.9561264827676652e-05,
      "loss": 2.759,
      "step": 7721
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0407627820968628,
      "learning_rate": 1.956114419956912e-05,
      "loss": 2.6753,
      "step": 7722
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0443257093429565,
      "learning_rate": 1.9561023555252852e-05,
      "loss": 2.4509,
      "step": 7723
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9513802528381348,
      "learning_rate": 1.956090289472805e-05,
      "loss": 2.7575,
      "step": 7724
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0029244422912598,
      "learning_rate": 1.956078221799492e-05,
      "loss": 2.4458,
      "step": 7725
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0084244012832642,
      "learning_rate": 1.956066152505367e-05,
      "loss": 2.6339,
      "step": 7726
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.1330689191818237,
      "learning_rate": 1.9560540815904503e-05,
      "loss": 2.5717,
      "step": 7727
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9634780883789062,
      "learning_rate": 1.956042009054762e-05,
      "loss": 2.5186,
      "step": 7728
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.950482189655304,
      "learning_rate": 1.9560299348983233e-05,
      "loss": 2.6556,
      "step": 7729
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9922811388969421,
      "learning_rate": 1.9560178591211537e-05,
      "loss": 2.6294,
      "step": 7730
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.909267246723175,
      "learning_rate": 1.9560057817232744e-05,
      "loss": 2.6316,
      "step": 7731
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0669959783554077,
      "learning_rate": 1.9559937027047056e-05,
      "loss": 2.5479,
      "step": 7732
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.112472653388977,
      "learning_rate": 1.9559816220654677e-05,
      "loss": 2.7848,
      "step": 7733
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9753508567810059,
      "learning_rate": 1.9559695398055813e-05,
      "loss": 2.7546,
      "step": 7734
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9457769989967346,
      "learning_rate": 1.955957455925067e-05,
      "loss": 2.4187,
      "step": 7735
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9610393047332764,
      "learning_rate": 1.955945370423945e-05,
      "loss": 2.4101,
      "step": 7736
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0365104675292969,
      "learning_rate": 1.9559332833022363e-05,
      "loss": 2.4316,
      "step": 7737
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0983765125274658,
      "learning_rate": 1.955921194559961e-05,
      "loss": 2.4608,
      "step": 7738
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9920737743377686,
      "learning_rate": 1.9559091041971397e-05,
      "loss": 2.4847,
      "step": 7739
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0356190204620361,
      "learning_rate": 1.9558970122137928e-05,
      "loss": 2.4486,
      "step": 7740
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0897542238235474,
      "learning_rate": 1.955884918609941e-05,
      "loss": 2.8743,
      "step": 7741
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.1280122995376587,
      "learning_rate": 1.9558728233856042e-05,
      "loss": 2.5007,
      "step": 7742
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9638646245002747,
      "learning_rate": 1.9558607265408035e-05,
      "loss": 2.507,
      "step": 7743
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9899725317955017,
      "learning_rate": 1.9558486280755593e-05,
      "loss": 2.5319,
      "step": 7744
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0124856233596802,
      "learning_rate": 1.955836527989892e-05,
      "loss": 2.7269,
      "step": 7745
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9379564523696899,
      "learning_rate": 1.9558244262838226e-05,
      "loss": 2.5674,
      "step": 7746
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9695178270339966,
      "learning_rate": 1.9558123229573706e-05,
      "loss": 2.5326,
      "step": 7747
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.1094872951507568,
      "learning_rate": 1.9558002180105575e-05,
      "loss": 2.637,
      "step": 7748
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9919269680976868,
      "learning_rate": 1.9557881114434032e-05,
      "loss": 2.7781,
      "step": 7749
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0232067108154297,
      "learning_rate": 1.9557760032559287e-05,
      "loss": 2.5731,
      "step": 7750
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9102962613105774,
      "learning_rate": 1.955763893448154e-05,
      "loss": 2.4145,
      "step": 7751
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.1077951192855835,
      "learning_rate": 1.9557517820201e-05,
      "loss": 2.4967,
      "step": 7752
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.1140458583831787,
      "learning_rate": 1.9557396689717874e-05,
      "loss": 2.5119,
      "step": 7753
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.980148434638977,
      "learning_rate": 1.9557275543032367e-05,
      "loss": 2.593,
      "step": 7754
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9699826836585999,
      "learning_rate": 1.9557154380144677e-05,
      "loss": 2.5854,
      "step": 7755
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0224305391311646,
      "learning_rate": 1.9557033201055016e-05,
      "loss": 2.687,
      "step": 7756
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.083844780921936,
      "learning_rate": 1.955691200576359e-05,
      "loss": 2.6827,
      "step": 7757
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.109026551246643,
      "learning_rate": 1.95567907942706e-05,
      "loss": 2.7129,
      "step": 7758
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0469101667404175,
      "learning_rate": 1.9556669566576253e-05,
      "loss": 2.6525,
      "step": 7759
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.012192726135254,
      "learning_rate": 1.9556548322680758e-05,
      "loss": 2.5829,
      "step": 7760
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9621270298957825,
      "learning_rate": 1.955642706258432e-05,
      "loss": 2.2778,
      "step": 7761
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0010875463485718,
      "learning_rate": 1.9556305786287137e-05,
      "loss": 2.6823,
      "step": 7762
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0300796031951904,
      "learning_rate": 1.9556184493789424e-05,
      "loss": 2.651,
      "step": 7763
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0871411561965942,
      "learning_rate": 1.9556063185091383e-05,
      "loss": 2.4776,
      "step": 7764
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.938935399055481,
      "learning_rate": 1.955594186019322e-05,
      "loss": 2.5844,
      "step": 7765
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9687193036079407,
      "learning_rate": 1.9555820519095137e-05,
      "loss": 2.8347,
      "step": 7766
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0904513597488403,
      "learning_rate": 1.9555699161797347e-05,
      "loss": 2.6773,
      "step": 7767
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.1074934005737305,
      "learning_rate": 1.955557778830005e-05,
      "loss": 2.5351,
      "step": 7768
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.034420371055603,
      "learning_rate": 1.955545639860345e-05,
      "loss": 2.7399,
      "step": 7769
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0084699392318726,
      "learning_rate": 1.955533499270776e-05,
      "loss": 2.7662,
      "step": 7770
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0908750295639038,
      "learning_rate": 1.9555213570613182e-05,
      "loss": 2.6572,
      "step": 7771
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0190941095352173,
      "learning_rate": 1.955509213231992e-05,
      "loss": 2.8911,
      "step": 7772
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.01816987991333,
      "learning_rate": 1.955497067782818e-05,
      "loss": 2.6849,
      "step": 7773
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9796877503395081,
      "learning_rate": 1.9554849207138175e-05,
      "loss": 2.7718,
      "step": 7774
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.031126618385315,
      "learning_rate": 1.95547277202501e-05,
      "loss": 2.4878,
      "step": 7775
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.2015761137008667,
      "learning_rate": 1.955460621716417e-05,
      "loss": 2.7587,
      "step": 7776
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9964413642883301,
      "learning_rate": 1.955448469788059e-05,
      "loss": 2.663,
      "step": 7777
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0223495960235596,
      "learning_rate": 1.955436316239956e-05,
      "loss": 2.3505,
      "step": 7778
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9991697669029236,
      "learning_rate": 1.955424161072129e-05,
      "loss": 2.6229,
      "step": 7779
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.1307655572891235,
      "learning_rate": 1.9554120042845984e-05,
      "loss": 2.7497,
      "step": 7780
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9672645926475525,
      "learning_rate": 1.955399845877385e-05,
      "loss": 2.6322,
      "step": 7781
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0463199615478516,
      "learning_rate": 1.9553876858505094e-05,
      "loss": 2.643,
      "step": 7782
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.128171443939209,
      "learning_rate": 1.9553755242039925e-05,
      "loss": 2.7355,
      "step": 7783
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0526257753372192,
      "learning_rate": 1.955363360937854e-05,
      "loss": 2.6679,
      "step": 7784
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9367126822471619,
      "learning_rate": 1.9553511960521158e-05,
      "loss": 2.5469,
      "step": 7785
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9689605832099915,
      "learning_rate": 1.9553390295467976e-05,
      "loss": 2.7518,
      "step": 7786
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0038081407546997,
      "learning_rate": 1.95532686142192e-05,
      "loss": 2.5717,
      "step": 7787
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0340044498443604,
      "learning_rate": 1.9553146916775045e-05,
      "loss": 2.5652,
      "step": 7788
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0491365194320679,
      "learning_rate": 1.9553025203135708e-05,
      "loss": 2.6246,
      "step": 7789
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0860345363616943,
      "learning_rate": 1.9552903473301396e-05,
      "loss": 2.7781,
      "step": 7790
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9830337166786194,
      "learning_rate": 1.9552781727272322e-05,
      "loss": 2.5806,
      "step": 7791
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9949455857276917,
      "learning_rate": 1.9552659965048687e-05,
      "loss": 2.6835,
      "step": 7792
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0337599515914917,
      "learning_rate": 1.95525381866307e-05,
      "loss": 2.3541,
      "step": 7793
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9856112003326416,
      "learning_rate": 1.9552416392018564e-05,
      "loss": 2.5463,
      "step": 7794
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9968098998069763,
      "learning_rate": 1.9552294581212486e-05,
      "loss": 2.5015,
      "step": 7795
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0140407085418701,
      "learning_rate": 1.955217275421268e-05,
      "loss": 2.6059,
      "step": 7796
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0740587711334229,
      "learning_rate": 1.9552050911019346e-05,
      "loss": 2.82,
      "step": 7797
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0542042255401611,
      "learning_rate": 1.9551929051632687e-05,
      "loss": 2.762,
      "step": 7798
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9583717584609985,
      "learning_rate": 1.955180717605292e-05,
      "loss": 2.6592,
      "step": 7799
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9990049600601196,
      "learning_rate": 1.955168528428024e-05,
      "loss": 2.666,
      "step": 7800
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0730432271957397,
      "learning_rate": 1.955156337631486e-05,
      "loss": 2.6251,
      "step": 7801
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0001122951507568,
      "learning_rate": 1.9551441452156988e-05,
      "loss": 2.6099,
      "step": 7802
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.049819827079773,
      "learning_rate": 1.9551319511806826e-05,
      "loss": 2.538,
      "step": 7803
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.1624740362167358,
      "learning_rate": 1.9551197555264587e-05,
      "loss": 2.6493,
      "step": 7804
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.1141935586929321,
      "learning_rate": 1.9551075582530472e-05,
      "loss": 2.5472,
      "step": 7805
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0639499425888062,
      "learning_rate": 1.9550953593604692e-05,
      "loss": 2.3197,
      "step": 7806
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9401991367340088,
      "learning_rate": 1.955083158848745e-05,
      "loss": 2.7317,
      "step": 7807
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.130068302154541,
      "learning_rate": 1.9550709567178954e-05,
      "loss": 2.7937,
      "step": 7808
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0242528915405273,
      "learning_rate": 1.955058752967941e-05,
      "loss": 2.627,
      "step": 7809
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0518555641174316,
      "learning_rate": 1.955046547598903e-05,
      "loss": 2.6291,
      "step": 7810
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.034445881843567,
      "learning_rate": 1.9550343406108014e-05,
      "loss": 2.7059,
      "step": 7811
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0189611911773682,
      "learning_rate": 1.9550221320036575e-05,
      "loss": 2.6924,
      "step": 7812
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.188470482826233,
      "learning_rate": 1.9550099217774913e-05,
      "loss": 2.9282,
      "step": 7813
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.037818431854248,
      "learning_rate": 1.9549977099323244e-05,
      "loss": 2.5688,
      "step": 7814
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9455304741859436,
      "learning_rate": 1.9549854964681766e-05,
      "loss": 2.5246,
      "step": 7815
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9823470115661621,
      "learning_rate": 1.954973281385069e-05,
      "loss": 2.5175,
      "step": 7816
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0486654043197632,
      "learning_rate": 1.9549610646830226e-05,
      "loss": 2.5609,
      "step": 7817
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.012266755104065,
      "learning_rate": 1.9549488463620577e-05,
      "loss": 2.6023,
      "step": 7818
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9903583526611328,
      "learning_rate": 1.9549366264221953e-05,
      "loss": 2.3911,
      "step": 7819
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9779478311538696,
      "learning_rate": 1.9549244048634554e-05,
      "loss": 2.601,
      "step": 7820
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9607028365135193,
      "learning_rate": 1.9549121816858598e-05,
      "loss": 2.5044,
      "step": 7821
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0167827606201172,
      "learning_rate": 1.9548999568894286e-05,
      "loss": 2.5185,
      "step": 7822
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0408424139022827,
      "learning_rate": 1.9548877304741826e-05,
      "loss": 2.7219,
      "step": 7823
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.042022943496704,
      "learning_rate": 1.9548755024401425e-05,
      "loss": 2.505,
      "step": 7824
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0476592779159546,
      "learning_rate": 1.9548632727873293e-05,
      "loss": 2.3642,
      "step": 7825
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.062005877494812,
      "learning_rate": 1.9548510415157632e-05,
      "loss": 2.6948,
      "step": 7826
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0434755086898804,
      "learning_rate": 1.9548388086254654e-05,
      "loss": 2.6616,
      "step": 7827
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0553609132766724,
      "learning_rate": 1.9548265741164564e-05,
      "loss": 2.5757,
      "step": 7828
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9499902725219727,
      "learning_rate": 1.9548143379887573e-05,
      "loss": 2.6163,
      "step": 7829
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9915257692337036,
      "learning_rate": 1.9548021002423884e-05,
      "loss": 2.7578,
      "step": 7830
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9689756631851196,
      "learning_rate": 1.9547898608773707e-05,
      "loss": 2.6106,
      "step": 7831
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9248933792114258,
      "learning_rate": 1.954777619893725e-05,
      "loss": 2.4322,
      "step": 7832
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9637295603752136,
      "learning_rate": 1.9547653772914716e-05,
      "loss": 2.51,
      "step": 7833
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9558057188987732,
      "learning_rate": 1.9547531330706318e-05,
      "loss": 2.7078,
      "step": 7834
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0913424491882324,
      "learning_rate": 1.9547408872312256e-05,
      "loss": 2.4963,
      "step": 7835
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9699386954307556,
      "learning_rate": 1.954728639773275e-05,
      "loss": 2.7065,
      "step": 7836
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9957926869392395,
      "learning_rate": 1.9547163906968e-05,
      "loss": 2.8042,
      "step": 7837
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9897993206977844,
      "learning_rate": 1.954704140001821e-05,
      "loss": 2.4483,
      "step": 7838
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.1768746376037598,
      "learning_rate": 1.9546918876883595e-05,
      "loss": 2.4497,
      "step": 7839
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9032050371170044,
      "learning_rate": 1.9546796337564354e-05,
      "loss": 2.6792,
      "step": 7840
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0366305112838745,
      "learning_rate": 1.954667378206071e-05,
      "loss": 2.6168,
      "step": 7841
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0070315599441528,
      "learning_rate": 1.9546551210372853e-05,
      "loss": 2.5104,
      "step": 7842
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0462212562561035,
      "learning_rate": 1.9546428622501003e-05,
      "loss": 2.7845,
      "step": 7843
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0381362438201904,
      "learning_rate": 1.954630601844536e-05,
      "loss": 2.5473,
      "step": 7844
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9734607338905334,
      "learning_rate": 1.954618339820614e-05,
      "loss": 2.848,
      "step": 7845
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0981532335281372,
      "learning_rate": 1.9546060761783543e-05,
      "loss": 2.8118,
      "step": 7846
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9479086399078369,
      "learning_rate": 1.9545938109177782e-05,
      "loss": 2.5989,
      "step": 7847
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.024605393409729,
      "learning_rate": 1.954581544038906e-05,
      "loss": 2.8146,
      "step": 7848
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9667680859565735,
      "learning_rate": 1.9545692755417596e-05,
      "loss": 2.734,
      "step": 7849
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9336836338043213,
      "learning_rate": 1.9545570054263586e-05,
      "loss": 2.6514,
      "step": 7850
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9816272258758545,
      "learning_rate": 1.954544733692724e-05,
      "loss": 2.5981,
      "step": 7851
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9734528064727783,
      "learning_rate": 1.954532460340877e-05,
      "loss": 2.5526,
      "step": 7852
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0156170129776,
      "learning_rate": 1.954520185370838e-05,
      "loss": 2.4679,
      "step": 7853
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.986132025718689,
      "learning_rate": 1.9545079087826284e-05,
      "loss": 2.6921,
      "step": 7854
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0140968561172485,
      "learning_rate": 1.9544956305762685e-05,
      "loss": 2.5367,
      "step": 7855
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9328780770301819,
      "learning_rate": 1.954483350751779e-05,
      "loss": 2.49,
      "step": 7856
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0176554918289185,
      "learning_rate": 1.9544710693091814e-05,
      "loss": 2.872,
      "step": 7857
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0749337673187256,
      "learning_rate": 1.954458786248496e-05,
      "loss": 2.69,
      "step": 7858
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9196096062660217,
      "learning_rate": 1.954446501569744e-05,
      "loss": 2.5617,
      "step": 7859
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.285292863845825,
      "learning_rate": 1.9544342152729453e-05,
      "loss": 2.4885,
      "step": 7860
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9875469207763672,
      "learning_rate": 1.954421927358122e-05,
      "loss": 2.6062,
      "step": 7861
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0076419115066528,
      "learning_rate": 1.954409637825294e-05,
      "loss": 2.5554,
      "step": 7862
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0336997509002686,
      "learning_rate": 1.9543973466744825e-05,
      "loss": 2.4865,
      "step": 7863
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.1016273498535156,
      "learning_rate": 1.9543850539057085e-05,
      "loss": 2.6079,
      "step": 7864
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0493371486663818,
      "learning_rate": 1.9543727595189924e-05,
      "loss": 2.6243,
      "step": 7865
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9498925805091858,
      "learning_rate": 1.954360463514355e-05,
      "loss": 2.5525,
      "step": 7866
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9657260775566101,
      "learning_rate": 1.9543481658918176e-05,
      "loss": 2.514,
      "step": 7867
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9821090698242188,
      "learning_rate": 1.9543358666514012e-05,
      "loss": 2.7045,
      "step": 7868
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.1229435205459595,
      "learning_rate": 1.954323565793126e-05,
      "loss": 2.5464,
      "step": 7869
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0159639120101929,
      "learning_rate": 1.9543112633170134e-05,
      "loss": 2.7622,
      "step": 7870
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9798898100852966,
      "learning_rate": 1.954298959223084e-05,
      "loss": 2.6621,
      "step": 7871
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0268504619598389,
      "learning_rate": 1.9542866535113582e-05,
      "loss": 2.7059,
      "step": 7872
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.1686227321624756,
      "learning_rate": 1.9542743461818578e-05,
      "loss": 2.61,
      "step": 7873
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.3117119073867798,
      "learning_rate": 1.954262037234603e-05,
      "loss": 2.4458,
      "step": 7874
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9856606125831604,
      "learning_rate": 1.9542497266696153e-05,
      "loss": 2.6787,
      "step": 7875
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0299646854400635,
      "learning_rate": 1.9542374144869146e-05,
      "loss": 2.6014,
      "step": 7876
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0158917903900146,
      "learning_rate": 1.9542251006865226e-05,
      "loss": 2.4973,
      "step": 7877
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.037002444267273,
      "learning_rate": 1.95421278526846e-05,
      "loss": 2.5823,
      "step": 7878
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9781131148338318,
      "learning_rate": 1.954200468232747e-05,
      "loss": 2.6104,
      "step": 7879
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.020501732826233,
      "learning_rate": 1.9541881495794057e-05,
      "loss": 2.795,
      "step": 7880
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0279427766799927,
      "learning_rate": 1.9541758293084562e-05,
      "loss": 2.7751,
      "step": 7881
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.163881540298462,
      "learning_rate": 1.9541635074199194e-05,
      "loss": 2.5715,
      "step": 7882
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0975284576416016,
      "learning_rate": 1.9541511839138162e-05,
      "loss": 2.7043,
      "step": 7883
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9811083674430847,
      "learning_rate": 1.9541388587901675e-05,
      "loss": 2.6122,
      "step": 7884
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9679612517356873,
      "learning_rate": 1.9541265320489945e-05,
      "loss": 2.5621,
      "step": 7885
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9735601544380188,
      "learning_rate": 1.954114203690318e-05,
      "loss": 2.6223,
      "step": 7886
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9469065070152283,
      "learning_rate": 1.9541018737141587e-05,
      "loss": 2.642,
      "step": 7887
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9384530186653137,
      "learning_rate": 1.9540895421205376e-05,
      "loss": 2.6729,
      "step": 7888
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0068726539611816,
      "learning_rate": 1.9540772089094755e-05,
      "loss": 2.6522,
      "step": 7889
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0080763101577759,
      "learning_rate": 1.9540648740809932e-05,
      "loss": 2.6177,
      "step": 7890
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0620617866516113,
      "learning_rate": 1.9540525376351123e-05,
      "loss": 2.694,
      "step": 7891
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9960637092590332,
      "learning_rate": 1.954040199571853e-05,
      "loss": 2.8422,
      "step": 7892
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9581617116928101,
      "learning_rate": 1.9540278598912365e-05,
      "loss": 2.5717,
      "step": 7893
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9404550790786743,
      "learning_rate": 1.9540155185932836e-05,
      "loss": 2.567,
      "step": 7894
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0671766996383667,
      "learning_rate": 1.9540031756780154e-05,
      "loss": 2.7031,
      "step": 7895
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9444154500961304,
      "learning_rate": 1.9539908311454524e-05,
      "loss": 2.4776,
      "step": 7896
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9865649938583374,
      "learning_rate": 1.953978484995616e-05,
      "loss": 2.6128,
      "step": 7897
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.1663897037506104,
      "learning_rate": 1.9539661372285273e-05,
      "loss": 2.6837,
      "step": 7898
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.086053490638733,
      "learning_rate": 1.9539537878442067e-05,
      "loss": 2.6456,
      "step": 7899
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0257880687713623,
      "learning_rate": 1.953941436842675e-05,
      "loss": 2.7948,
      "step": 7900
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9474539756774902,
      "learning_rate": 1.9539290842239537e-05,
      "loss": 2.5622,
      "step": 7901
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9589610695838928,
      "learning_rate": 1.9539167299880638e-05,
      "loss": 2.4582,
      "step": 7902
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0738847255706787,
      "learning_rate": 1.9539043741350257e-05,
      "loss": 2.5728,
      "step": 7903
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.105455994606018,
      "learning_rate": 1.9538920166648608e-05,
      "loss": 2.4871,
      "step": 7904
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0968378782272339,
      "learning_rate": 1.9538796575775897e-05,
      "loss": 2.8996,
      "step": 7905
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.2343281507492065,
      "learning_rate": 1.9538672968732334e-05,
      "loss": 2.5374,
      "step": 7906
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.2157001495361328,
      "learning_rate": 1.9538549345518133e-05,
      "loss": 2.8996,
      "step": 7907
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9770490527153015,
      "learning_rate": 1.9538425706133496e-05,
      "loss": 2.6202,
      "step": 7908
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0893819332122803,
      "learning_rate": 1.953830205057864e-05,
      "loss": 2.5331,
      "step": 7909
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9880850315093994,
      "learning_rate": 1.953817837885377e-05,
      "loss": 2.6462,
      "step": 7910
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9814851880073547,
      "learning_rate": 1.95380546909591e-05,
      "loss": 2.6586,
      "step": 7911
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.2566653490066528,
      "learning_rate": 1.953793098689483e-05,
      "loss": 2.4738,
      "step": 7912
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.194233775138855,
      "learning_rate": 1.953780726666118e-05,
      "loss": 2.6401,
      "step": 7913
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9722334742546082,
      "learning_rate": 1.953768353025836e-05,
      "loss": 2.6172,
      "step": 7914
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0079761743545532,
      "learning_rate": 1.9537559777686575e-05,
      "loss": 2.5315,
      "step": 7915
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9825881123542786,
      "learning_rate": 1.9537436008946032e-05,
      "loss": 2.6262,
      "step": 7916
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.099913477897644,
      "learning_rate": 1.9537312224036945e-05,
      "loss": 2.5611,
      "step": 7917
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.1398264169692993,
      "learning_rate": 1.9537188422959526e-05,
      "loss": 2.6484,
      "step": 7918
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.1742382049560547,
      "learning_rate": 1.953706460571398e-05,
      "loss": 2.6975,
      "step": 7919
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0237419605255127,
      "learning_rate": 1.9536940772300523e-05,
      "loss": 2.8029,
      "step": 7920
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.008409857749939,
      "learning_rate": 1.953681692271936e-05,
      "loss": 2.7176,
      "step": 7921
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.905089795589447,
      "learning_rate": 1.95366930569707e-05,
      "loss": 2.685,
      "step": 7922
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9600228667259216,
      "learning_rate": 1.9536569175054755e-05,
      "loss": 2.6456,
      "step": 7923
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0277018547058105,
      "learning_rate": 1.9536445276971735e-05,
      "loss": 2.5107,
      "step": 7924
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9365925192832947,
      "learning_rate": 1.953632136272185e-05,
      "loss": 2.4779,
      "step": 7925
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9064115285873413,
      "learning_rate": 1.9536197432305314e-05,
      "loss": 2.7334,
      "step": 7926
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0836164951324463,
      "learning_rate": 1.9536073485722333e-05,
      "loss": 2.7423,
      "step": 7927
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.979566216468811,
      "learning_rate": 1.9535949522973115e-05,
      "loss": 2.7112,
      "step": 7928
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.055107831954956,
      "learning_rate": 1.953582554405787e-05,
      "loss": 2.8235,
      "step": 7929
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9529341459274292,
      "learning_rate": 1.9535701548976815e-05,
      "loss": 2.5763,
      "step": 7930
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.243726134300232,
      "learning_rate": 1.9535577537730156e-05,
      "loss": 2.5175,
      "step": 7931
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0827840566635132,
      "learning_rate": 1.9535453510318104e-05,
      "loss": 2.7214,
      "step": 7932
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0605818033218384,
      "learning_rate": 1.9535329466740865e-05,
      "loss": 2.7194,
      "step": 7933
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9749034643173218,
      "learning_rate": 1.9535205406998656e-05,
      "loss": 2.6295,
      "step": 7934
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0063400268554688,
      "learning_rate": 1.953508133109168e-05,
      "loss": 2.5259,
      "step": 7935
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9462732076644897,
      "learning_rate": 1.9534957239020156e-05,
      "loss": 2.6655,
      "step": 7936
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.8752523064613342,
      "learning_rate": 1.9534833130784288e-05,
      "loss": 2.5397,
      "step": 7937
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9815176129341125,
      "learning_rate": 1.9534709006384286e-05,
      "loss": 2.6943,
      "step": 7938
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9555512070655823,
      "learning_rate": 1.9534584865820366e-05,
      "loss": 2.6646,
      "step": 7939
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0356547832489014,
      "learning_rate": 1.9534460709092735e-05,
      "loss": 2.5701,
      "step": 7940
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0452919006347656,
      "learning_rate": 1.9534336536201602e-05,
      "loss": 2.2451,
      "step": 7941
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9835161566734314,
      "learning_rate": 1.953421234714718e-05,
      "loss": 2.5679,
      "step": 7942
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0916630029678345,
      "learning_rate": 1.9534088141929678e-05,
      "loss": 2.6015,
      "step": 7943
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.1092544794082642,
      "learning_rate": 1.9533963920549307e-05,
      "loss": 2.4313,
      "step": 7944
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0061296224594116,
      "learning_rate": 1.953383968300628e-05,
      "loss": 2.7403,
      "step": 7945
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.12566339969635,
      "learning_rate": 1.95337154293008e-05,
      "loss": 2.6042,
      "step": 7946
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.945716381072998,
      "learning_rate": 1.9533591159433086e-05,
      "loss": 2.5217,
      "step": 7947
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0083568096160889,
      "learning_rate": 1.953346687340335e-05,
      "loss": 2.7188,
      "step": 7948
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.056247353553772,
      "learning_rate": 1.9533342571211792e-05,
      "loss": 2.6754,
      "step": 7949
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0366519689559937,
      "learning_rate": 1.953321825285863e-05,
      "loss": 2.7653,
      "step": 7950
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0348806381225586,
      "learning_rate": 1.9533093918344077e-05,
      "loss": 2.584,
      "step": 7951
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9485321640968323,
      "learning_rate": 1.953296956766834e-05,
      "loss": 2.8018,
      "step": 7952
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.1242167949676514,
      "learning_rate": 1.953284520083163e-05,
      "loss": 2.4645,
      "step": 7953
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.990759015083313,
      "learning_rate": 1.9532720817834158e-05,
      "loss": 2.6114,
      "step": 7954
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0870739221572876,
      "learning_rate": 1.9532596418676137e-05,
      "loss": 2.663,
      "step": 7955
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.1636794805526733,
      "learning_rate": 1.953247200335777e-05,
      "loss": 2.6496,
      "step": 7956
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9529651999473572,
      "learning_rate": 1.9532347571879283e-05,
      "loss": 2.5252,
      "step": 7957
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9274693727493286,
      "learning_rate": 1.953222312424087e-05,
      "loss": 2.4594,
      "step": 7958
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9386348724365234,
      "learning_rate": 1.9532098660442753e-05,
      "loss": 2.6763,
      "step": 7959
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0888184309005737,
      "learning_rate": 1.953197418048514e-05,
      "loss": 2.6942,
      "step": 7960
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0664573907852173,
      "learning_rate": 1.953184968436824e-05,
      "loss": 2.6258,
      "step": 7961
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0522838830947876,
      "learning_rate": 1.953172517209227e-05,
      "loss": 2.7953,
      "step": 7962
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0186681747436523,
      "learning_rate": 1.9531600643657434e-05,
      "loss": 2.5735,
      "step": 7963
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9628528356552124,
      "learning_rate": 1.9531476099063948e-05,
      "loss": 2.7579,
      "step": 7964
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9334421753883362,
      "learning_rate": 1.953135153831202e-05,
      "loss": 2.419,
      "step": 7965
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.4679433107376099,
      "learning_rate": 1.9531226961401865e-05,
      "loss": 2.7369,
      "step": 7966
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.088683009147644,
      "learning_rate": 1.953110236833369e-05,
      "loss": 2.3957,
      "step": 7967
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9884760975837708,
      "learning_rate": 1.9530977759107705e-05,
      "loss": 2.6267,
      "step": 7968
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9609180688858032,
      "learning_rate": 1.9530853133724125e-05,
      "loss": 2.6649,
      "step": 7969
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9449159502983093,
      "learning_rate": 1.9530728492183164e-05,
      "loss": 2.4808,
      "step": 7970
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0013347864151,
      "learning_rate": 1.9530603834485025e-05,
      "loss": 2.6267,
      "step": 7971
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0111668109893799,
      "learning_rate": 1.953047916062993e-05,
      "loss": 2.4235,
      "step": 7972
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.911331832408905,
      "learning_rate": 1.953035447061808e-05,
      "loss": 2.5353,
      "step": 7973
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.039492130279541,
      "learning_rate": 1.9530229764449692e-05,
      "loss": 2.6543,
      "step": 7974
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9829804301261902,
      "learning_rate": 1.9530105042124973e-05,
      "loss": 2.563,
      "step": 7975
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9161291122436523,
      "learning_rate": 1.9529980303644144e-05,
      "loss": 2.6474,
      "step": 7976
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0661548376083374,
      "learning_rate": 1.9529855549007404e-05,
      "loss": 2.5966,
      "step": 7977
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9366517066955566,
      "learning_rate": 1.9529730778214975e-05,
      "loss": 2.5206,
      "step": 7978
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9315374493598938,
      "learning_rate": 1.952960599126706e-05,
      "loss": 2.564,
      "step": 7979
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.096820592880249,
      "learning_rate": 1.9529481188163878e-05,
      "loss": 2.7316,
      "step": 7980
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0616230964660645,
      "learning_rate": 1.9529356368905636e-05,
      "loss": 2.457,
      "step": 7981
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.1197285652160645,
      "learning_rate": 1.9529231533492546e-05,
      "loss": 2.7378,
      "step": 7982
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0141923427581787,
      "learning_rate": 1.9529106681924822e-05,
      "loss": 2.6349,
      "step": 7983
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.1102184057235718,
      "learning_rate": 1.9528981814202672e-05,
      "loss": 2.5245,
      "step": 7984
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0235713720321655,
      "learning_rate": 1.9528856930326313e-05,
      "loss": 2.6824,
      "step": 7985
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.062072992324829,
      "learning_rate": 1.952873203029595e-05,
      "loss": 2.9982,
      "step": 7986
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9900082349777222,
      "learning_rate": 1.95286071141118e-05,
      "loss": 2.6385,
      "step": 7987
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0122957229614258,
      "learning_rate": 1.9528482181774072e-05,
      "loss": 2.445,
      "step": 7988
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0427892208099365,
      "learning_rate": 1.952835723328298e-05,
      "loss": 2.6657,
      "step": 7989
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.012406587600708,
      "learning_rate": 1.9528232268638737e-05,
      "loss": 2.7251,
      "step": 7990
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.1289548873901367,
      "learning_rate": 1.9528107287841548e-05,
      "loss": 2.5764,
      "step": 7991
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9572408199310303,
      "learning_rate": 1.952798229089163e-05,
      "loss": 2.5871,
      "step": 7992
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9989957213401794,
      "learning_rate": 1.9527857277789197e-05,
      "loss": 2.4685,
      "step": 7993
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.1280241012573242,
      "learning_rate": 1.9527732248534456e-05,
      "loss": 2.3953,
      "step": 7994
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9568303227424622,
      "learning_rate": 1.952760720312762e-05,
      "loss": 2.6098,
      "step": 7995
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.1156667470932007,
      "learning_rate": 1.952748214156891e-05,
      "loss": 2.5654,
      "step": 7996
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.061221957206726,
      "learning_rate": 1.952735706385852e-05,
      "loss": 2.6956,
      "step": 7997
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9699426889419556,
      "learning_rate": 1.9527231969996676e-05,
      "loss": 2.3609,
      "step": 7998
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9461878538131714,
      "learning_rate": 1.9527106859983588e-05,
      "loss": 2.6808,
      "step": 7999
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.1325403451919556,
      "learning_rate": 1.9526981733819465e-05,
      "loss": 2.6459,
      "step": 8000
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0587303638458252,
      "learning_rate": 1.952685659150452e-05,
      "loss": 2.7285,
      "step": 8001
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.3274221420288086,
      "learning_rate": 1.9526731433038962e-05,
      "loss": 2.5813,
      "step": 8002
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.3544024229049683,
      "learning_rate": 1.9526606258423013e-05,
      "loss": 2.7432,
      "step": 8003
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.032599687576294,
      "learning_rate": 1.9526481067656877e-05,
      "loss": 2.7094,
      "step": 8004
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.997327446937561,
      "learning_rate": 1.952635586074077e-05,
      "loss": 2.8486,
      "step": 8005
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9114588499069214,
      "learning_rate": 1.95262306376749e-05,
      "loss": 2.5525,
      "step": 8006
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9942049980163574,
      "learning_rate": 1.952610539845948e-05,
      "loss": 2.7254,
      "step": 8007
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0310800075531006,
      "learning_rate": 1.952598014309473e-05,
      "loss": 2.7809,
      "step": 8008
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.950064480304718,
      "learning_rate": 1.952585487158085e-05,
      "loss": 2.6701,
      "step": 8009
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9137211441993713,
      "learning_rate": 1.952572958391806e-05,
      "loss": 2.7053,
      "step": 8010
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0579479932785034,
      "learning_rate": 1.9525604280106575e-05,
      "loss": 2.5836,
      "step": 8011
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9575719237327576,
      "learning_rate": 1.95254789601466e-05,
      "loss": 2.7548,
      "step": 8012
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9152315258979797,
      "learning_rate": 1.9525353624038353e-05,
      "loss": 2.7756,
      "step": 8013
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0375131368637085,
      "learning_rate": 1.9525228271782045e-05,
      "loss": 2.7249,
      "step": 8014
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0167025327682495,
      "learning_rate": 1.9525102903377886e-05,
      "loss": 2.556,
      "step": 8015
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9011420607566833,
      "learning_rate": 1.9524977518826092e-05,
      "loss": 2.5943,
      "step": 8016
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0275955200195312,
      "learning_rate": 1.9524852118126875e-05,
      "loss": 2.588,
      "step": 8017
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9089562892913818,
      "learning_rate": 1.9524726701280445e-05,
      "loss": 2.3701,
      "step": 8018
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0512211322784424,
      "learning_rate": 1.9524601268287016e-05,
      "loss": 2.4983,
      "step": 8019
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0392001867294312,
      "learning_rate": 1.95244758191468e-05,
      "loss": 2.599,
      "step": 8020
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.1830209493637085,
      "learning_rate": 1.9524350353860013e-05,
      "loss": 2.4269,
      "step": 8021
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0204609632492065,
      "learning_rate": 1.9524224872426867e-05,
      "loss": 2.634,
      "step": 8022
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.173748254776001,
      "learning_rate": 1.952409937484757e-05,
      "loss": 2.6527,
      "step": 8023
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.1696877479553223,
      "learning_rate": 1.952397386112234e-05,
      "loss": 2.7181,
      "step": 8024
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0913087129592896,
      "learning_rate": 1.9523848331251386e-05,
      "loss": 2.5456,
      "step": 8025
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.1655430793762207,
      "learning_rate": 1.9523722785234922e-05,
      "loss": 2.7663,
      "step": 8026
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0069648027420044,
      "learning_rate": 1.9523597223073163e-05,
      "loss": 2.6008,
      "step": 8027
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.140032172203064,
      "learning_rate": 1.952347164476632e-05,
      "loss": 2.4619,
      "step": 8028
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9899635314941406,
      "learning_rate": 1.9523346050314603e-05,
      "loss": 2.4535,
      "step": 8029
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0386929512023926,
      "learning_rate": 1.952322043971823e-05,
      "loss": 2.5942,
      "step": 8030
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.1116037368774414,
      "learning_rate": 1.9523094812977415e-05,
      "loss": 2.3225,
      "step": 8031
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.1235288381576538,
      "learning_rate": 1.9522969170092362e-05,
      "loss": 2.6216,
      "step": 8032
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0181329250335693,
      "learning_rate": 1.9522843511063295e-05,
      "loss": 2.6185,
      "step": 8033
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.974564790725708,
      "learning_rate": 1.952271783589042e-05,
      "loss": 2.4264,
      "step": 8034
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0852789878845215,
      "learning_rate": 1.952259214457395e-05,
      "loss": 2.5336,
      "step": 8035
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0687086582183838,
      "learning_rate": 1.9522466437114102e-05,
      "loss": 2.484,
      "step": 8036
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.095492959022522,
      "learning_rate": 1.952234071351109e-05,
      "loss": 2.6245,
      "step": 8037
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.1689234972000122,
      "learning_rate": 1.9522214973765118e-05,
      "loss": 2.9156,
      "step": 8038
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.1162421703338623,
      "learning_rate": 1.952208921787641e-05,
      "loss": 2.4358,
      "step": 8039
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0135239362716675,
      "learning_rate": 1.9521963445845173e-05,
      "loss": 2.6077,
      "step": 8040
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.001458764076233,
      "learning_rate": 1.952183765767162e-05,
      "loss": 2.5928,
      "step": 8041
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9797289371490479,
      "learning_rate": 1.952171185335597e-05,
      "loss": 2.8076,
      "step": 8042
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.6768749952316284,
      "learning_rate": 1.952158603289843e-05,
      "loss": 2.7392,
      "step": 8043
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0073820352554321,
      "learning_rate": 1.9521460196299218e-05,
      "loss": 2.5687,
      "step": 8044
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9564648270606995,
      "learning_rate": 1.952133434355854e-05,
      "loss": 2.6254,
      "step": 8045
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9890149235725403,
      "learning_rate": 1.9521208474676617e-05,
      "loss": 2.5147,
      "step": 8046
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9366326332092285,
      "learning_rate": 1.952108258965366e-05,
      "loss": 2.8418,
      "step": 8047
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.113145112991333,
      "learning_rate": 1.9520956688489882e-05,
      "loss": 2.6342,
      "step": 8048
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0160412788391113,
      "learning_rate": 1.95208307711855e-05,
      "loss": 2.5406,
      "step": 8049
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9316552877426147,
      "learning_rate": 1.952070483774072e-05,
      "loss": 2.3382,
      "step": 8050
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9415668845176697,
      "learning_rate": 1.952057888815576e-05,
      "loss": 2.7337,
      "step": 8051
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9422160387039185,
      "learning_rate": 1.9520452922430832e-05,
      "loss": 2.5294,
      "step": 8052
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.1106009483337402,
      "learning_rate": 1.9520326940566155e-05,
      "loss": 2.6746,
      "step": 8053
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.016191840171814,
      "learning_rate": 1.9520200942561933e-05,
      "loss": 2.5954,
      "step": 8054
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.077164888381958,
      "learning_rate": 1.9520074928418385e-05,
      "loss": 2.6746,
      "step": 8055
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9620271921157837,
      "learning_rate": 1.9519948898135726e-05,
      "loss": 2.7282,
      "step": 8056
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9912720918655396,
      "learning_rate": 1.951982285171417e-05,
      "loss": 2.7491,
      "step": 8057
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.4490852355957031,
      "learning_rate": 1.9519696789153927e-05,
      "loss": 2.5577,
      "step": 8058
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0541831254959106,
      "learning_rate": 1.9519570710455213e-05,
      "loss": 2.5768,
      "step": 8059
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.1550949811935425,
      "learning_rate": 1.9519444615618242e-05,
      "loss": 2.555,
      "step": 8060
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9478890895843506,
      "learning_rate": 1.9519318504643223e-05,
      "loss": 2.5563,
      "step": 8061
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0367274284362793,
      "learning_rate": 1.9519192377530376e-05,
      "loss": 2.7873,
      "step": 8062
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.1198477745056152,
      "learning_rate": 1.9519066234279912e-05,
      "loss": 2.6337,
      "step": 8063
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0960272550582886,
      "learning_rate": 1.9518940074892048e-05,
      "loss": 2.8492,
      "step": 8064
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.01969575881958,
      "learning_rate": 1.9518813899366992e-05,
      "loss": 2.5715,
      "step": 8065
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0140244960784912,
      "learning_rate": 1.9518687707704963e-05,
      "loss": 2.4982,
      "step": 8066
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0066618919372559,
      "learning_rate": 1.9518561499906174e-05,
      "loss": 2.6999,
      "step": 8067
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.08784818649292,
      "learning_rate": 1.9518435275970834e-05,
      "loss": 2.6061,
      "step": 8068
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.944441020488739,
      "learning_rate": 1.9518309035899164e-05,
      "loss": 2.653,
      "step": 8069
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.1876106262207031,
      "learning_rate": 1.9518182779691375e-05,
      "loss": 2.6185,
      "step": 8070
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.076227068901062,
      "learning_rate": 1.9518056507347682e-05,
      "loss": 2.6136,
      "step": 8071
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0559509992599487,
      "learning_rate": 1.9517930218868296e-05,
      "loss": 2.9792,
      "step": 8072
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9899964332580566,
      "learning_rate": 1.9517803914253434e-05,
      "loss": 2.7044,
      "step": 8073
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9825529456138611,
      "learning_rate": 1.951767759350331e-05,
      "loss": 2.5264,
      "step": 8074
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9804531931877136,
      "learning_rate": 1.951755125661814e-05,
      "loss": 2.7822,
      "step": 8075
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0900671482086182,
      "learning_rate": 1.951742490359813e-05,
      "loss": 2.8745,
      "step": 8076
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0352121591567993,
      "learning_rate": 1.9517298534443503e-05,
      "loss": 2.5129,
      "step": 8077
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0012197494506836,
      "learning_rate": 1.951717214915447e-05,
      "loss": 2.4173,
      "step": 8078
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9386287927627563,
      "learning_rate": 1.9517045747731246e-05,
      "loss": 2.6339,
      "step": 8079
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0148248672485352,
      "learning_rate": 1.951691933017404e-05,
      "loss": 2.625,
      "step": 8080
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.00048828125,
      "learning_rate": 1.9516792896483077e-05,
      "loss": 2.5668,
      "step": 8081
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9210032820701599,
      "learning_rate": 1.951666644665856e-05,
      "loss": 2.4848,
      "step": 8082
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.004826307296753,
      "learning_rate": 1.9516539980700715e-05,
      "loss": 2.7724,
      "step": 8083
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.990107536315918,
      "learning_rate": 1.9516413498609746e-05,
      "loss": 2.2882,
      "step": 8084
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.1088941097259521,
      "learning_rate": 1.9516287000385874e-05,
      "loss": 2.6949,
      "step": 8085
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9947117567062378,
      "learning_rate": 1.951616048602931e-05,
      "loss": 2.7918,
      "step": 8086
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.06171715259552,
      "learning_rate": 1.9516033955540268e-05,
      "loss": 2.662,
      "step": 8087
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0211360454559326,
      "learning_rate": 1.9515907408918965e-05,
      "loss": 2.632,
      "step": 8088
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.985693097114563,
      "learning_rate": 1.9515780846165615e-05,
      "loss": 2.5853,
      "step": 8089
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9983393549919128,
      "learning_rate": 1.951565426728043e-05,
      "loss": 2.389,
      "step": 8090
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9178071618080139,
      "learning_rate": 1.9515527672263626e-05,
      "loss": 2.6694,
      "step": 8091
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.942613959312439,
      "learning_rate": 1.951540106111542e-05,
      "loss": 2.5071,
      "step": 8092
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0065830945968628,
      "learning_rate": 1.9515274433836027e-05,
      "loss": 2.6586,
      "step": 8093
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0116004943847656,
      "learning_rate": 1.9515147790425658e-05,
      "loss": 2.659,
      "step": 8094
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0274643898010254,
      "learning_rate": 1.9515021130884527e-05,
      "loss": 2.4608,
      "step": 8095
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9123745560646057,
      "learning_rate": 1.9514894455212855e-05,
      "loss": 2.6326,
      "step": 8096
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0110961198806763,
      "learning_rate": 1.9514767763410847e-05,
      "loss": 2.5102,
      "step": 8097
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0745320320129395,
      "learning_rate": 1.951464105547873e-05,
      "loss": 2.5791,
      "step": 8098
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.192546010017395,
      "learning_rate": 1.9514514331416707e-05,
      "loss": 2.6928,
      "step": 8099
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9072577953338623,
      "learning_rate": 1.9514387591225e-05,
      "loss": 2.6065,
      "step": 8100
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9214282631874084,
      "learning_rate": 1.9514260834903823e-05,
      "loss": 2.6194,
      "step": 8101
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0122126340866089,
      "learning_rate": 1.951413406245339e-05,
      "loss": 2.6929,
      "step": 8102
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0091588497161865,
      "learning_rate": 1.9514007273873913e-05,
      "loss": 2.6874,
      "step": 8103
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0200659036636353,
      "learning_rate": 1.9513880469165612e-05,
      "loss": 2.7067,
      "step": 8104
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9506975412368774,
      "learning_rate": 1.95137536483287e-05,
      "loss": 2.7308,
      "step": 8105
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9749922752380371,
      "learning_rate": 1.9513626811363387e-05,
      "loss": 2.6988,
      "step": 8106
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0488791465759277,
      "learning_rate": 1.9513499958269897e-05,
      "loss": 2.7435,
      "step": 8107
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.084463357925415,
      "learning_rate": 1.9513373089048443e-05,
      "loss": 2.535,
      "step": 8108
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9860399961471558,
      "learning_rate": 1.951324620369923e-05,
      "loss": 2.5369,
      "step": 8109
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0253795385360718,
      "learning_rate": 1.9513119302222487e-05,
      "loss": 2.7201,
      "step": 8110
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0443382263183594,
      "learning_rate": 1.9512992384618423e-05,
      "loss": 2.6365,
      "step": 8111
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0045156478881836,
      "learning_rate": 1.951286545088725e-05,
      "loss": 2.4706,
      "step": 8112
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.3077940940856934,
      "learning_rate": 1.9512738501029188e-05,
      "loss": 2.5508,
      "step": 8113
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0538115501403809,
      "learning_rate": 1.9512611535044455e-05,
      "loss": 2.5082,
      "step": 8114
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9819315075874329,
      "learning_rate": 1.9512484552933255e-05,
      "loss": 2.8,
      "step": 8115
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9904484152793884,
      "learning_rate": 1.9512357554695814e-05,
      "loss": 2.7039,
      "step": 8116
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9756447076797485,
      "learning_rate": 1.951223054033234e-05,
      "loss": 2.6724,
      "step": 8117
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9529669880867004,
      "learning_rate": 1.951210350984306e-05,
      "loss": 2.6473,
      "step": 8118
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9840654730796814,
      "learning_rate": 1.9511976463228173e-05,
      "loss": 2.6582,
      "step": 8119
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.2553738355636597,
      "learning_rate": 1.9511849400487907e-05,
      "loss": 2.5684,
      "step": 8120
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0958281755447388,
      "learning_rate": 1.9511722321622467e-05,
      "loss": 2.7574,
      "step": 8121
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.1142619848251343,
      "learning_rate": 1.9511595226632082e-05,
      "loss": 2.8402,
      "step": 8122
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0740771293640137,
      "learning_rate": 1.9511468115516954e-05,
      "loss": 2.4217,
      "step": 8123
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9418536424636841,
      "learning_rate": 1.9511340988277304e-05,
      "loss": 2.8249,
      "step": 8124
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.02266263961792,
      "learning_rate": 1.9511213844913352e-05,
      "loss": 2.7192,
      "step": 8125
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.1380980014801025,
      "learning_rate": 1.9511086685425307e-05,
      "loss": 2.5858,
      "step": 8126
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.208288311958313,
      "learning_rate": 1.9510959509813388e-05,
      "loss": 2.4139,
      "step": 8127
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9936116933822632,
      "learning_rate": 1.951083231807781e-05,
      "loss": 2.5036,
      "step": 8128
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9963857531547546,
      "learning_rate": 1.9510705110218783e-05,
      "loss": 2.4525,
      "step": 8129
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0097432136535645,
      "learning_rate": 1.9510577886236532e-05,
      "loss": 2.7738,
      "step": 8130
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0199321508407593,
      "learning_rate": 1.9510450646131266e-05,
      "loss": 2.7918,
      "step": 8131
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.1943203210830688,
      "learning_rate": 1.9510323389903206e-05,
      "loss": 2.6393,
      "step": 8132
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9976104497909546,
      "learning_rate": 1.9510196117552565e-05,
      "loss": 2.5696,
      "step": 8133
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0122461318969727,
      "learning_rate": 1.951006882907956e-05,
      "loss": 2.7988,
      "step": 8134
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0432713031768799,
      "learning_rate": 1.95099415244844e-05,
      "loss": 2.4587,
      "step": 8135
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0792633295059204,
      "learning_rate": 1.950981420376731e-05,
      "loss": 2.7568,
      "step": 8136
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9883958101272583,
      "learning_rate": 1.95096868669285e-05,
      "loss": 2.6583,
      "step": 8137
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9532163143157959,
      "learning_rate": 1.950955951396819e-05,
      "loss": 2.6569,
      "step": 8138
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0126328468322754,
      "learning_rate": 1.9509432144886592e-05,
      "loss": 2.6603,
      "step": 8139
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9539602994918823,
      "learning_rate": 1.9509304759683925e-05,
      "loss": 2.6657,
      "step": 8140
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9853342175483704,
      "learning_rate": 1.9509177358360405e-05,
      "loss": 2.7527,
      "step": 8141
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0316303968429565,
      "learning_rate": 1.9509049940916246e-05,
      "loss": 2.5169,
      "step": 8142
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9175575375556946,
      "learning_rate": 1.950892250735166e-05,
      "loss": 2.6546,
      "step": 8143
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9449113607406616,
      "learning_rate": 1.9508795057666875e-05,
      "loss": 2.5387,
      "step": 8144
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0873053073883057,
      "learning_rate": 1.9508667591862095e-05,
      "loss": 2.5417,
      "step": 8145
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.2216945886611938,
      "learning_rate": 1.9508540109937545e-05,
      "loss": 2.8297,
      "step": 8146
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0227590799331665,
      "learning_rate": 1.9508412611893434e-05,
      "loss": 2.7981,
      "step": 8147
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0247383117675781,
      "learning_rate": 1.9508285097729984e-05,
      "loss": 2.4589,
      "step": 8148
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0068528652191162,
      "learning_rate": 1.9508157567447402e-05,
      "loss": 2.5886,
      "step": 8149
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.8960112929344177,
      "learning_rate": 1.9508030021045915e-05,
      "loss": 2.5658,
      "step": 8150
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.2875832319259644,
      "learning_rate": 1.9507902458525737e-05,
      "loss": 2.579,
      "step": 8151
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.2588669061660767,
      "learning_rate": 1.9507774879887077e-05,
      "loss": 2.4149,
      "step": 8152
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.000056266784668,
      "learning_rate": 1.950764728513016e-05,
      "loss": 2.509,
      "step": 8153
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.1076394319534302,
      "learning_rate": 1.95075196742552e-05,
      "loss": 2.749,
      "step": 8154
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.009721040725708,
      "learning_rate": 1.9507392047262406e-05,
      "loss": 2.5178,
      "step": 8155
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9699643850326538,
      "learning_rate": 1.9507264404152003e-05,
      "loss": 2.4354,
      "step": 8156
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0430505275726318,
      "learning_rate": 1.9507136744924206e-05,
      "loss": 2.5501,
      "step": 8157
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0325534343719482,
      "learning_rate": 1.950700906957923e-05,
      "loss": 2.5865,
      "step": 8158
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9608052968978882,
      "learning_rate": 1.9506881378117292e-05,
      "loss": 2.5705,
      "step": 8159
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0418301820755005,
      "learning_rate": 1.950675367053861e-05,
      "loss": 2.6743,
      "step": 8160
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9884578585624695,
      "learning_rate": 1.9506625946843392e-05,
      "loss": 2.6743,
      "step": 8161
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9654340147972107,
      "learning_rate": 1.9506498207031866e-05,
      "loss": 2.6918,
      "step": 8162
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0493295192718506,
      "learning_rate": 1.9506370451104243e-05,
      "loss": 2.4735,
      "step": 8163
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0201010704040527,
      "learning_rate": 1.9506242679060738e-05,
      "loss": 2.5726,
      "step": 8164
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0389463901519775,
      "learning_rate": 1.950611489090157e-05,
      "loss": 2.5901,
      "step": 8165
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.338090658187866,
      "learning_rate": 1.9505987086626958e-05,
      "loss": 2.7702,
      "step": 8166
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0399467945098877,
      "learning_rate": 1.9505859266237116e-05,
      "loss": 2.8188,
      "step": 8167
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.2045197486877441,
      "learning_rate": 1.950573142973226e-05,
      "loss": 2.5776,
      "step": 8168
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.953092634677887,
      "learning_rate": 1.9505603577112607e-05,
      "loss": 2.5843,
      "step": 8169
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0888068675994873,
      "learning_rate": 1.9505475708378373e-05,
      "loss": 2.8187,
      "step": 8170
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.981059193611145,
      "learning_rate": 1.9505347823529778e-05,
      "loss": 2.7524,
      "step": 8171
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9877469539642334,
      "learning_rate": 1.9505219922567032e-05,
      "loss": 2.5054,
      "step": 8172
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9935497045516968,
      "learning_rate": 1.9505092005490365e-05,
      "loss": 2.6818,
      "step": 8173
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0942833423614502,
      "learning_rate": 1.950496407229998e-05,
      "loss": 2.6021,
      "step": 8174
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9798370003700256,
      "learning_rate": 1.95048361229961e-05,
      "loss": 2.5394,
      "step": 8175
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0243362188339233,
      "learning_rate": 1.9504708157578945e-05,
      "loss": 2.5368,
      "step": 8176
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.1896871328353882,
      "learning_rate": 1.9504580176048722e-05,
      "loss": 2.5637,
      "step": 8177
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.021836519241333,
      "learning_rate": 1.950445217840566e-05,
      "loss": 2.6619,
      "step": 8178
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9611968398094177,
      "learning_rate": 1.9504324164649965e-05,
      "loss": 2.5715,
      "step": 8179
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0587700605392456,
      "learning_rate": 1.9504196134781862e-05,
      "loss": 2.4073,
      "step": 8180
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0317645072937012,
      "learning_rate": 1.9504068088801566e-05,
      "loss": 2.446,
      "step": 8181
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.2647663354873657,
      "learning_rate": 1.950394002670929e-05,
      "loss": 2.4587,
      "step": 8182
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.1616203784942627,
      "learning_rate": 1.950381194850526e-05,
      "loss": 2.5368,
      "step": 8183
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9997344017028809,
      "learning_rate": 1.9503683854189682e-05,
      "loss": 2.4085,
      "step": 8184
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0326170921325684,
      "learning_rate": 1.9503555743762783e-05,
      "loss": 2.7682,
      "step": 8185
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0196207761764526,
      "learning_rate": 1.950342761722477e-05,
      "loss": 2.4004,
      "step": 8186
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.1313503980636597,
      "learning_rate": 1.950329947457587e-05,
      "loss": 2.6091,
      "step": 8187
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0092294216156006,
      "learning_rate": 1.95031713158163e-05,
      "loss": 2.6817,
      "step": 8188
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0076135396957397,
      "learning_rate": 1.950304314094627e-05,
      "loss": 2.4277,
      "step": 8189
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9526160955429077,
      "learning_rate": 1.9502914949966e-05,
      "loss": 2.5357,
      "step": 8190
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.2540868520736694,
      "learning_rate": 1.950278674287571e-05,
      "loss": 2.7006,
      "step": 8191
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.1667945384979248,
      "learning_rate": 1.9502658519675615e-05,
      "loss": 2.4325,
      "step": 8192
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9270306825637817,
      "learning_rate": 1.950253028036593e-05,
      "loss": 2.6133,
      "step": 8193
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9513031840324402,
      "learning_rate": 1.950240202494688e-05,
      "loss": 2.7417,
      "step": 8194
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.063636302947998,
      "learning_rate": 1.9502273753418674e-05,
      "loss": 2.6349,
      "step": 8195
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0230927467346191,
      "learning_rate": 1.9502145465781535e-05,
      "loss": 2.7115,
      "step": 8196
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0626190900802612,
      "learning_rate": 1.950201716203568e-05,
      "loss": 2.4514,
      "step": 8197
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0867115259170532,
      "learning_rate": 1.9501888842181323e-05,
      "loss": 2.6018,
      "step": 8198
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9999662041664124,
      "learning_rate": 1.9501760506218684e-05,
      "loss": 2.4993,
      "step": 8199
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0257002115249634,
      "learning_rate": 1.950163215414798e-05,
      "loss": 2.6906,
      "step": 8200
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0988608598709106,
      "learning_rate": 1.9501503785969432e-05,
      "loss": 2.5586,
      "step": 8201
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0698450803756714,
      "learning_rate": 1.9501375401683252e-05,
      "loss": 2.7032,
      "step": 8202
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.969653308391571,
      "learning_rate": 1.950124700128966e-05,
      "loss": 2.5686,
      "step": 8203
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9806157946586609,
      "learning_rate": 1.9501118584788875e-05,
      "loss": 2.6449,
      "step": 8204
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.1125903129577637,
      "learning_rate": 1.9500990152181112e-05,
      "loss": 2.5988,
      "step": 8205
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9561054706573486,
      "learning_rate": 1.950086170346659e-05,
      "loss": 2.7127,
      "step": 8206
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9472354054450989,
      "learning_rate": 1.9500733238645527e-05,
      "loss": 2.5469,
      "step": 8207
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.2012500762939453,
      "learning_rate": 1.9500604757718143e-05,
      "loss": 2.7399,
      "step": 8208
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0867952108383179,
      "learning_rate": 1.950047626068465e-05,
      "loss": 2.6775,
      "step": 8209
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.1021438837051392,
      "learning_rate": 1.9500347747545274e-05,
      "loss": 2.5431,
      "step": 8210
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.135448932647705,
      "learning_rate": 1.9500219218300223e-05,
      "loss": 2.5967,
      "step": 8211
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.3620381355285645,
      "learning_rate": 1.9500090672949725e-05,
      "loss": 2.5598,
      "step": 8212
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9918534159660339,
      "learning_rate": 1.949996211149399e-05,
      "loss": 2.7406,
      "step": 8213
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.054164171218872,
      "learning_rate": 1.9499833533933242e-05,
      "loss": 2.5237,
      "step": 8214
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9397648572921753,
      "learning_rate": 1.9499704940267693e-05,
      "loss": 2.5509,
      "step": 8215
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9629286527633667,
      "learning_rate": 1.9499576330497568e-05,
      "loss": 2.6742,
      "step": 8216
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9670246243476868,
      "learning_rate": 1.9499447704623077e-05,
      "loss": 2.5196,
      "step": 8217
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.1441198587417603,
      "learning_rate": 1.9499319062644443e-05,
      "loss": 2.5795,
      "step": 8218
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0105377435684204,
      "learning_rate": 1.9499190404561886e-05,
      "loss": 2.5902,
      "step": 8219
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0828763246536255,
      "learning_rate": 1.949906173037562e-05,
      "loss": 2.4582,
      "step": 8220
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9326319098472595,
      "learning_rate": 1.9498933040085862e-05,
      "loss": 2.6627,
      "step": 8221
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9849511384963989,
      "learning_rate": 1.9498804333692836e-05,
      "loss": 2.5829,
      "step": 8222
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.067553162574768,
      "learning_rate": 1.9498675611196754e-05,
      "loss": 2.7391,
      "step": 8223
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0010781288146973,
      "learning_rate": 1.9498546872597837e-05,
      "loss": 2.906,
      "step": 8224
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.051303505897522,
      "learning_rate": 1.9498418117896306e-05,
      "loss": 2.5154,
      "step": 8225
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9882354736328125,
      "learning_rate": 1.9498289347092373e-05,
      "loss": 2.6193,
      "step": 8226
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0620932579040527,
      "learning_rate": 1.9498160560186263e-05,
      "loss": 2.4934,
      "step": 8227
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.881467342376709,
      "learning_rate": 1.9498031757178188e-05,
      "loss": 2.7606,
      "step": 8228
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9322633147239685,
      "learning_rate": 1.9497902938068373e-05,
      "loss": 2.6314,
      "step": 8229
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.134137511253357,
      "learning_rate": 1.949777410285703e-05,
      "loss": 2.7701,
      "step": 8230
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9702858328819275,
      "learning_rate": 1.9497645251544385e-05,
      "loss": 2.7121,
      "step": 8231
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.1200861930847168,
      "learning_rate": 1.949751638413065e-05,
      "loss": 2.7634,
      "step": 8232
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.1841845512390137,
      "learning_rate": 1.949738750061604e-05,
      "loss": 2.8132,
      "step": 8233
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9722402691841125,
      "learning_rate": 1.9497258601000785e-05,
      "loss": 2.6548,
      "step": 8234
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0172163248062134,
      "learning_rate": 1.9497129685285094e-05,
      "loss": 2.7714,
      "step": 8235
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.1993374824523926,
      "learning_rate": 1.949700075346919e-05,
      "loss": 2.4918,
      "step": 8236
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0402408838272095,
      "learning_rate": 1.949687180555329e-05,
      "loss": 2.6267,
      "step": 8237
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.335322618484497,
      "learning_rate": 1.9496742841537612e-05,
      "loss": 2.5792,
      "step": 8238
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9879607558250427,
      "learning_rate": 1.9496613861422378e-05,
      "loss": 2.7855,
      "step": 8239
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.3703845739364624,
      "learning_rate": 1.94964848652078e-05,
      "loss": 3.0379,
      "step": 8240
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.978852391242981,
      "learning_rate": 1.9496355852894106e-05,
      "loss": 2.8602,
      "step": 8241
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9017895460128784,
      "learning_rate": 1.9496226824481507e-05,
      "loss": 2.397,
      "step": 8242
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9535087943077087,
      "learning_rate": 1.9496097779970225e-05,
      "loss": 2.6123,
      "step": 8243
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.05846107006073,
      "learning_rate": 1.9495968719360478e-05,
      "loss": 2.5355,
      "step": 8244
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.129961609840393,
      "learning_rate": 1.9495839642652483e-05,
      "loss": 2.5414,
      "step": 8245
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0000205039978027,
      "learning_rate": 1.9495710549846465e-05,
      "loss": 2.6329,
      "step": 8246
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9755347371101379,
      "learning_rate": 1.9495581440942633e-05,
      "loss": 2.605,
      "step": 8247
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9697727560997009,
      "learning_rate": 1.9495452315941218e-05,
      "loss": 2.5634,
      "step": 8248
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9893032908439636,
      "learning_rate": 1.949532317484243e-05,
      "loss": 2.5893,
      "step": 8249
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0532689094543457,
      "learning_rate": 1.9495194017646488e-05,
      "loss": 2.4782,
      "step": 8250
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0005518198013306,
      "learning_rate": 1.9495064844353617e-05,
      "loss": 2.6059,
      "step": 8251
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0031495094299316,
      "learning_rate": 1.9494935654964026e-05,
      "loss": 2.6385,
      "step": 8252
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.960665762424469,
      "learning_rate": 1.9494806449477947e-05,
      "loss": 2.8174,
      "step": 8253
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9952478408813477,
      "learning_rate": 1.949467722789559e-05,
      "loss": 2.5853,
      "step": 8254
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0153238773345947,
      "learning_rate": 1.9494547990217175e-05,
      "loss": 2.5415,
      "step": 8255
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.1123807430267334,
      "learning_rate": 1.9494418736442923e-05,
      "loss": 2.7743,
      "step": 8256
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.8984034061431885,
      "learning_rate": 1.9494289466573053e-05,
      "loss": 2.5253,
      "step": 8257
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.126989483833313,
      "learning_rate": 1.9494160180607785e-05,
      "loss": 2.449,
      "step": 8258
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0587077140808105,
      "learning_rate": 1.9494030878547335e-05,
      "loss": 2.5523,
      "step": 8259
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0256195068359375,
      "learning_rate": 1.9493901560391925e-05,
      "loss": 2.7686,
      "step": 8260
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.201338291168213,
      "learning_rate": 1.9493772226141774e-05,
      "loss": 2.6955,
      "step": 8261
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9505637884140015,
      "learning_rate": 1.94936428757971e-05,
      "loss": 2.4964,
      "step": 8262
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9562661647796631,
      "learning_rate": 1.949351350935812e-05,
      "loss": 2.7483,
      "step": 8263
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.1243689060211182,
      "learning_rate": 1.9493384126825062e-05,
      "loss": 2.6667,
      "step": 8264
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9807735085487366,
      "learning_rate": 1.9493254728198137e-05,
      "loss": 2.5177,
      "step": 8265
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9731390476226807,
      "learning_rate": 1.9493125313477566e-05,
      "loss": 2.6587,
      "step": 8266
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9514597058296204,
      "learning_rate": 1.949299588266357e-05,
      "loss": 2.5153,
      "step": 8267
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0699349641799927,
      "learning_rate": 1.9492866435756365e-05,
      "loss": 2.4702,
      "step": 8268
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9953426122665405,
      "learning_rate": 1.9492736972756176e-05,
      "loss": 2.9058,
      "step": 8269
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0072675943374634,
      "learning_rate": 1.949260749366322e-05,
      "loss": 2.673,
      "step": 8270
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.032430648803711,
      "learning_rate": 1.9492477998477716e-05,
      "loss": 2.4009,
      "step": 8271
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.069621205329895,
      "learning_rate": 1.949234848719988e-05,
      "loss": 2.8063,
      "step": 8272
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9100042581558228,
      "learning_rate": 1.949221895982994e-05,
      "loss": 2.5167,
      "step": 8273
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9778541922569275,
      "learning_rate": 1.949208941636811e-05,
      "loss": 2.4545,
      "step": 8274
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0489197969436646,
      "learning_rate": 1.949195985681461e-05,
      "loss": 2.6382,
      "step": 8275
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9889488220214844,
      "learning_rate": 1.949183028116966e-05,
      "loss": 2.406,
      "step": 8276
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0983463525772095,
      "learning_rate": 1.9491700689433477e-05,
      "loss": 2.6362,
      "step": 8277
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0685656070709229,
      "learning_rate": 1.9491571081606287e-05,
      "loss": 2.5026,
      "step": 8278
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9985101222991943,
      "learning_rate": 1.9491441457688303e-05,
      "loss": 2.8296,
      "step": 8279
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0464121103286743,
      "learning_rate": 1.949131181767975e-05,
      "loss": 2.75,
      "step": 8280
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.1281386613845825,
      "learning_rate": 1.9491182161580847e-05,
      "loss": 2.5682,
      "step": 8281
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.1712398529052734,
      "learning_rate": 1.9491052489391807e-05,
      "loss": 2.754,
      "step": 8282
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9233307838439941,
      "learning_rate": 1.9490922801112862e-05,
      "loss": 2.584,
      "step": 8283
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0402019023895264,
      "learning_rate": 1.949079309674422e-05,
      "loss": 2.6339,
      "step": 8284
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9546154737472534,
      "learning_rate": 1.949066337628611e-05,
      "loss": 2.4937,
      "step": 8285
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9520547389984131,
      "learning_rate": 1.9490533639738746e-05,
      "loss": 2.6391,
      "step": 8286
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9165828824043274,
      "learning_rate": 1.949040388710235e-05,
      "loss": 3.0342,
      "step": 8287
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0958313941955566,
      "learning_rate": 1.9490274118377145e-05,
      "loss": 2.5132,
      "step": 8288
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9992521405220032,
      "learning_rate": 1.9490144333563342e-05,
      "loss": 2.5599,
      "step": 8289
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.93929123878479,
      "learning_rate": 1.949001453266117e-05,
      "loss": 2.7388,
      "step": 8290
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0207165479660034,
      "learning_rate": 1.9489884715670847e-05,
      "loss": 2.9418,
      "step": 8291
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9570927619934082,
      "learning_rate": 1.9489754882592588e-05,
      "loss": 2.5469,
      "step": 8292
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.053831934928894,
      "learning_rate": 1.9489625033426623e-05,
      "loss": 2.6541,
      "step": 8293
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9660000801086426,
      "learning_rate": 1.948949516817316e-05,
      "loss": 2.5818,
      "step": 8294
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.014298439025879,
      "learning_rate": 1.9489365286832428e-05,
      "loss": 2.994,
      "step": 8295
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9765449166297913,
      "learning_rate": 1.9489235389404646e-05,
      "loss": 2.5509,
      "step": 8296
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.93929123878479,
      "learning_rate": 1.948910547589003e-05,
      "loss": 2.7441,
      "step": 8297
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.00636625289917,
      "learning_rate": 1.9488975546288808e-05,
      "loss": 2.7698,
      "step": 8298
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.1730455160140991,
      "learning_rate": 1.9488845600601188e-05,
      "loss": 2.6046,
      "step": 8299
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9750308394432068,
      "learning_rate": 1.9488715638827403e-05,
      "loss": 2.7261,
      "step": 8300
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9293733239173889,
      "learning_rate": 1.9488585660967667e-05,
      "loss": 2.4978,
      "step": 8301
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0676767826080322,
      "learning_rate": 1.9488455667022198e-05,
      "loss": 2.82,
      "step": 8302
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9867925643920898,
      "learning_rate": 1.9488325656991225e-05,
      "loss": 2.6315,
      "step": 8303
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9955365657806396,
      "learning_rate": 1.948819563087496e-05,
      "loss": 2.5536,
      "step": 8304
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9498671889305115,
      "learning_rate": 1.9488065588673626e-05,
      "loss": 2.4747,
      "step": 8305
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0292952060699463,
      "learning_rate": 1.9487935530387447e-05,
      "loss": 2.7127,
      "step": 8306
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9430447816848755,
      "learning_rate": 1.9487805456016637e-05,
      "loss": 2.5368,
      "step": 8307
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0453696250915527,
      "learning_rate": 1.948767536556142e-05,
      "loss": 2.5038,
      "step": 8308
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.074807047843933,
      "learning_rate": 1.9487545259022017e-05,
      "loss": 2.6231,
      "step": 8309
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9639581441879272,
      "learning_rate": 1.9487415136398647e-05,
      "loss": 2.4978,
      "step": 8310
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9732029438018799,
      "learning_rate": 1.9487284997691535e-05,
      "loss": 2.5955,
      "step": 8311
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9651848673820496,
      "learning_rate": 1.9487154842900896e-05,
      "loss": 2.5925,
      "step": 8312
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.2333003282546997,
      "learning_rate": 1.9487024672026952e-05,
      "loss": 2.5973,
      "step": 8313
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.1995269060134888,
      "learning_rate": 1.9486894485069926e-05,
      "loss": 2.584,
      "step": 8314
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.1695237159729004,
      "learning_rate": 1.9486764282030038e-05,
      "loss": 2.5436,
      "step": 8315
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0414334535598755,
      "learning_rate": 1.9486634062907504e-05,
      "loss": 2.8759,
      "step": 8316
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.036078929901123,
      "learning_rate": 1.9486503827702553e-05,
      "loss": 2.6762,
      "step": 8317
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0747724771499634,
      "learning_rate": 1.94863735764154e-05,
      "loss": 2.5072,
      "step": 8318
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.113708734512329,
      "learning_rate": 1.9486243309046267e-05,
      "loss": 2.4739,
      "step": 8319
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0159014463424683,
      "learning_rate": 1.9486113025595376e-05,
      "loss": 2.6311,
      "step": 8320
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.110825538635254,
      "learning_rate": 1.9485982726062948e-05,
      "loss": 2.5216,
      "step": 8321
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.14713716506958,
      "learning_rate": 1.9485852410449203e-05,
      "loss": 2.6601,
      "step": 8322
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0237644910812378,
      "learning_rate": 1.9485722078754358e-05,
      "loss": 2.5709,
      "step": 8323
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9572851061820984,
      "learning_rate": 1.948559173097864e-05,
      "loss": 2.6399,
      "step": 8324
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.2499969005584717,
      "learning_rate": 1.948546136712227e-05,
      "loss": 2.7127,
      "step": 8325
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0868743658065796,
      "learning_rate": 1.948533098718546e-05,
      "loss": 2.5476,
      "step": 8326
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0009801387786865,
      "learning_rate": 1.9485200591168442e-05,
      "loss": 2.6035,
      "step": 8327
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.1337823867797852,
      "learning_rate": 1.9485070179071437e-05,
      "loss": 2.8715,
      "step": 8328
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9414591789245605,
      "learning_rate": 1.9484939750894657e-05,
      "loss": 2.5325,
      "step": 8329
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.119499683380127,
      "learning_rate": 1.948480930663833e-05,
      "loss": 2.61,
      "step": 8330
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.1805731058120728,
      "learning_rate": 1.9484678846302673e-05,
      "loss": 2.577,
      "step": 8331
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.2129286527633667,
      "learning_rate": 1.948454836988791e-05,
      "loss": 2.6999,
      "step": 8332
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9466447234153748,
      "learning_rate": 1.9484417877394263e-05,
      "loss": 2.4764,
      "step": 8333
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.982107400894165,
      "learning_rate": 1.9484287368821954e-05,
      "loss": 2.5809,
      "step": 8334
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9442692995071411,
      "learning_rate": 1.9484156844171198e-05,
      "loss": 2.7702,
      "step": 8335
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9779925346374512,
      "learning_rate": 1.948402630344222e-05,
      "loss": 2.5143,
      "step": 8336
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9609251022338867,
      "learning_rate": 1.948389574663524e-05,
      "loss": 2.5154,
      "step": 8337
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.06238853931427,
      "learning_rate": 1.948376517375049e-05,
      "loss": 2.7754,
      "step": 8338
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9979811906814575,
      "learning_rate": 1.9483634584788173e-05,
      "loss": 2.6929,
      "step": 8339
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.2179840803146362,
      "learning_rate": 1.9483503979748525e-05,
      "loss": 2.4247,
      "step": 8340
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0456894636154175,
      "learning_rate": 1.9483373358631758e-05,
      "loss": 2.4957,
      "step": 8341
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9940635561943054,
      "learning_rate": 1.94832427214381e-05,
      "loss": 2.5286,
      "step": 8342
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9699724912643433,
      "learning_rate": 1.9483112068167768e-05,
      "loss": 2.6114,
      "step": 8343
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0918455123901367,
      "learning_rate": 1.948298139882099e-05,
      "loss": 2.6855,
      "step": 8344
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.1710816621780396,
      "learning_rate": 1.9482850713397975e-05,
      "loss": 2.6144,
      "step": 8345
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0365506410598755,
      "learning_rate": 1.948272001189896e-05,
      "loss": 2.3938,
      "step": 8346
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.035489797592163,
      "learning_rate": 1.9482589294324158e-05,
      "loss": 2.4926,
      "step": 8347
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9303246140480042,
      "learning_rate": 1.9482458560673788e-05,
      "loss": 2.6452,
      "step": 8348
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9736146926879883,
      "learning_rate": 1.9482327810948075e-05,
      "loss": 2.568,
      "step": 8349
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9686789512634277,
      "learning_rate": 1.9482197045147245e-05,
      "loss": 2.4779,
      "step": 8350
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9845588803291321,
      "learning_rate": 1.9482066263271516e-05,
      "loss": 2.608,
      "step": 8351
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0009210109710693,
      "learning_rate": 1.9481935465321107e-05,
      "loss": 2.5738,
      "step": 8352
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0508843660354614,
      "learning_rate": 1.9481804651296242e-05,
      "loss": 2.5832,
      "step": 8353
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0203627347946167,
      "learning_rate": 1.9481673821197144e-05,
      "loss": 2.5842,
      "step": 8354
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9772263765335083,
      "learning_rate": 1.9481542975024035e-05,
      "loss": 2.8674,
      "step": 8355
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.045323133468628,
      "learning_rate": 1.948141211277713e-05,
      "loss": 2.5926,
      "step": 8356
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9359182715415955,
      "learning_rate": 1.9481281234456664e-05,
      "loss": 2.8448,
      "step": 8357
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0369343757629395,
      "learning_rate": 1.9481150340062846e-05,
      "loss": 2.5474,
      "step": 8358
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.965394139289856,
      "learning_rate": 1.9481019429595904e-05,
      "loss": 2.4071,
      "step": 8359
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.1245890855789185,
      "learning_rate": 1.9480888503056058e-05,
      "loss": 2.4794,
      "step": 8360
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0101205110549927,
      "learning_rate": 1.9480757560443535e-05,
      "loss": 2.4364,
      "step": 8361
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0282037258148193,
      "learning_rate": 1.948062660175855e-05,
      "loss": 2.7831,
      "step": 8362
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.89999920129776,
      "learning_rate": 1.948049562700133e-05,
      "loss": 2.6824,
      "step": 8363
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9725316762924194,
      "learning_rate": 1.9480364636172093e-05,
      "loss": 2.4413,
      "step": 8364
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.3741390705108643,
      "learning_rate": 1.9480233629271063e-05,
      "loss": 2.6399,
      "step": 8365
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9736831784248352,
      "learning_rate": 1.9480102606298464e-05,
      "loss": 2.5866,
      "step": 8366
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.002967119216919,
      "learning_rate": 1.9479971567254513e-05,
      "loss": 2.634,
      "step": 8367
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0088920593261719,
      "learning_rate": 1.947984051213944e-05,
      "loss": 2.6756,
      "step": 8368
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.027570128440857,
      "learning_rate": 1.9479709440953457e-05,
      "loss": 2.8396,
      "step": 8369
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0820493698120117,
      "learning_rate": 1.9479578353696797e-05,
      "loss": 2.5854,
      "step": 8370
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9593807458877563,
      "learning_rate": 1.9479447250369675e-05,
      "loss": 2.5828,
      "step": 8371
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9885124564170837,
      "learning_rate": 1.9479316130972316e-05,
      "loss": 2.505,
      "step": 8372
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.1839948892593384,
      "learning_rate": 1.9479184995504942e-05,
      "loss": 2.5213,
      "step": 8373
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9678777456283569,
      "learning_rate": 1.947905384396777e-05,
      "loss": 2.3632,
      "step": 8374
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9986882209777832,
      "learning_rate": 1.9478922676361037e-05,
      "loss": 2.5461,
      "step": 8375
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.000978708267212,
      "learning_rate": 1.9478791492684946e-05,
      "loss": 2.8313,
      "step": 8376
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9659605026245117,
      "learning_rate": 1.9478660292939736e-05,
      "loss": 2.5444,
      "step": 8377
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.96458899974823,
      "learning_rate": 1.947852907712562e-05,
      "loss": 2.529,
      "step": 8378
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0168569087982178,
      "learning_rate": 1.947839784524282e-05,
      "loss": 2.7153,
      "step": 8379
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9854737520217896,
      "learning_rate": 1.9478266597291564e-05,
      "loss": 2.519,
      "step": 8380
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.085124135017395,
      "learning_rate": 1.9478135333272074e-05,
      "loss": 2.7266,
      "step": 8381
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0717780590057373,
      "learning_rate": 1.947800405318457e-05,
      "loss": 2.5557,
      "step": 8382
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.077040672302246,
      "learning_rate": 1.9477872757029268e-05,
      "loss": 2.7901,
      "step": 8383
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9430821537971497,
      "learning_rate": 1.9477741444806403e-05,
      "loss": 2.5879,
      "step": 8384
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0234501361846924,
      "learning_rate": 1.947761011651619e-05,
      "loss": 2.5016,
      "step": 8385
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9391295909881592,
      "learning_rate": 1.9477478772158854e-05,
      "loss": 2.137,
      "step": 8386
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9817422032356262,
      "learning_rate": 1.947734741173462e-05,
      "loss": 2.6653,
      "step": 8387
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0150450468063354,
      "learning_rate": 1.947721603524371e-05,
      "loss": 2.5593,
      "step": 8388
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0037347078323364,
      "learning_rate": 1.947708464268634e-05,
      "loss": 2.7052,
      "step": 8389
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0948033332824707,
      "learning_rate": 1.947695323406274e-05,
      "loss": 2.4733,
      "step": 8390
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9126356840133667,
      "learning_rate": 1.9476821809373128e-05,
      "loss": 2.5231,
      "step": 8391
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.075897216796875,
      "learning_rate": 1.9476690368617733e-05,
      "loss": 2.4923,
      "step": 8392
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0216115713119507,
      "learning_rate": 1.947655891179677e-05,
      "loss": 2.796,
      "step": 8393
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9247224926948547,
      "learning_rate": 1.9476427438910467e-05,
      "loss": 2.5547,
      "step": 8394
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.08182954788208,
      "learning_rate": 1.9476295949959046e-05,
      "loss": 2.4788,
      "step": 8395
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0624827146530151,
      "learning_rate": 1.947616444494273e-05,
      "loss": 2.4706,
      "step": 8396
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.081273078918457,
      "learning_rate": 1.947603292386174e-05,
      "loss": 2.6643,
      "step": 8397
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9962900280952454,
      "learning_rate": 1.9475901386716308e-05,
      "loss": 2.6806,
      "step": 8398
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0941187143325806,
      "learning_rate": 1.947576983350664e-05,
      "loss": 2.4787,
      "step": 8399
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0194238424301147,
      "learning_rate": 1.9475638264232972e-05,
      "loss": 2.6259,
      "step": 8400
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9522183537483215,
      "learning_rate": 1.9475506678895525e-05,
      "loss": 2.6304,
      "step": 8401
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0172697305679321,
      "learning_rate": 1.947537507749452e-05,
      "loss": 2.6802,
      "step": 8402
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0417089462280273,
      "learning_rate": 1.947524346003018e-05,
      "loss": 2.6862,
      "step": 8403
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0035406351089478,
      "learning_rate": 1.9475111826502728e-05,
      "loss": 2.5399,
      "step": 8404
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0816205739974976,
      "learning_rate": 1.947498017691239e-05,
      "loss": 2.4435,
      "step": 8405
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9862448573112488,
      "learning_rate": 1.9474848511259387e-05,
      "loss": 2.8311,
      "step": 8406
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0779861211776733,
      "learning_rate": 1.9474716829543942e-05,
      "loss": 2.4581,
      "step": 8407
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9214414358139038,
      "learning_rate": 1.947458513176628e-05,
      "loss": 2.6581,
      "step": 8408
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.1213631629943848,
      "learning_rate": 1.947445341792662e-05,
      "loss": 2.6239,
      "step": 8409
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9882391691207886,
      "learning_rate": 1.947432168802519e-05,
      "loss": 2.5526,
      "step": 8410
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.1573772430419922,
      "learning_rate": 1.9474189942062208e-05,
      "loss": 2.8935,
      "step": 8411
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.091810703277588,
      "learning_rate": 1.9474058180037906e-05,
      "loss": 2.3801,
      "step": 8412
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.1224632263183594,
      "learning_rate": 1.94739264019525e-05,
      "loss": 2.6314,
      "step": 8413
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.012157678604126,
      "learning_rate": 1.9473794607806215e-05,
      "loss": 2.5451,
      "step": 8414
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.1129357814788818,
      "learning_rate": 1.9473662797599277e-05,
      "loss": 2.8044,
      "step": 8415
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0354058742523193,
      "learning_rate": 1.9473530971331906e-05,
      "loss": 2.7503,
      "step": 8416
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.022690773010254,
      "learning_rate": 1.947339912900433e-05,
      "loss": 2.557,
      "step": 8417
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.1218087673187256,
      "learning_rate": 1.9473267270616766e-05,
      "loss": 2.769,
      "step": 8418
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9869509935379028,
      "learning_rate": 1.9473135396169443e-05,
      "loss": 2.6762,
      "step": 8419
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0231460332870483,
      "learning_rate": 1.9473003505662582e-05,
      "loss": 2.4541,
      "step": 8420
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.114108920097351,
      "learning_rate": 1.9472871599096407e-05,
      "loss": 2.454,
      "step": 8421
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.1201839447021484,
      "learning_rate": 1.947273967647114e-05,
      "loss": 2.3894,
      "step": 8422
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9666179418563843,
      "learning_rate": 1.947260773778701e-05,
      "loss": 2.8436,
      "step": 8423
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0957025289535522,
      "learning_rate": 1.9472475783044235e-05,
      "loss": 2.4892,
      "step": 8424
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.1540753841400146,
      "learning_rate": 1.9472343812243042e-05,
      "loss": 2.6875,
      "step": 8425
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.432464361190796,
      "learning_rate": 1.9472211825383652e-05,
      "loss": 2.4258,
      "step": 8426
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.035618782043457,
      "learning_rate": 1.9472079822466293e-05,
      "loss": 2.4203,
      "step": 8427
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9592362642288208,
      "learning_rate": 1.9471947803491186e-05,
      "loss": 2.613,
      "step": 8428
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.14426851272583,
      "learning_rate": 1.9471815768458552e-05,
      "loss": 2.7074,
      "step": 8429
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9831425547599792,
      "learning_rate": 1.947168371736862e-05,
      "loss": 2.5895,
      "step": 8430
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.014884352684021,
      "learning_rate": 1.947155165022161e-05,
      "loss": 2.5808,
      "step": 8431
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.075774073600769,
      "learning_rate": 1.947141956701775e-05,
      "loss": 2.7108,
      "step": 8432
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9790862202644348,
      "learning_rate": 1.947128746775726e-05,
      "loss": 2.4884,
      "step": 8433
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.282293438911438,
      "learning_rate": 1.9471155352440365e-05,
      "loss": 2.6099,
      "step": 8434
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0437917709350586,
      "learning_rate": 1.9471023221067288e-05,
      "loss": 2.6976,
      "step": 8435
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9391231536865234,
      "learning_rate": 1.9470891073638255e-05,
      "loss": 2.579,
      "step": 8436
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.001823902130127,
      "learning_rate": 1.9470758910153492e-05,
      "loss": 2.4935,
      "step": 8437
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0306591987609863,
      "learning_rate": 1.947062673061322e-05,
      "loss": 2.4334,
      "step": 8438
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.1017001867294312,
      "learning_rate": 1.947049453501766e-05,
      "loss": 2.6509,
      "step": 8439
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9686678647994995,
      "learning_rate": 1.9470362323367044e-05,
      "loss": 2.5821,
      "step": 8440
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.8917235732078552,
      "learning_rate": 1.9470230095661588e-05,
      "loss": 2.5969,
      "step": 8441
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.2277485132217407,
      "learning_rate": 1.947009785190152e-05,
      "loss": 2.4617,
      "step": 8442
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9301502108573914,
      "learning_rate": 1.9469965592087064e-05,
      "loss": 2.67,
      "step": 8443
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9576630592346191,
      "learning_rate": 1.9469833316218448e-05,
      "loss": 2.4506,
      "step": 8444
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0183159112930298,
      "learning_rate": 1.9469701024295894e-05,
      "loss": 2.5538,
      "step": 8445
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0229403972625732,
      "learning_rate": 1.946956871631962e-05,
      "loss": 2.889,
      "step": 8446
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.010274052619934,
      "learning_rate": 1.9469436392289854e-05,
      "loss": 2.7172,
      "step": 8447
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9389779567718506,
      "learning_rate": 1.9469304052206826e-05,
      "loss": 2.857,
      "step": 8448
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0434623956680298,
      "learning_rate": 1.946917169607075e-05,
      "loss": 2.3493,
      "step": 8449
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.1620341539382935,
      "learning_rate": 1.946903932388186e-05,
      "loss": 2.5439,
      "step": 8450
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0003167390823364,
      "learning_rate": 1.946890693564038e-05,
      "loss": 2.7225,
      "step": 8451
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.039924144744873,
      "learning_rate": 1.9468774531346527e-05,
      "loss": 2.6037,
      "step": 8452
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.053455114364624,
      "learning_rate": 1.9468642111000527e-05,
      "loss": 2.5479,
      "step": 8453
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0269724130630493,
      "learning_rate": 1.9468509674602612e-05,
      "loss": 2.8968,
      "step": 8454
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9634315967559814,
      "learning_rate": 1.9468377222153e-05,
      "loss": 2.6898,
      "step": 8455
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0412967205047607,
      "learning_rate": 1.9468244753651917e-05,
      "loss": 2.6294,
      "step": 8456
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9921860098838806,
      "learning_rate": 1.9468112269099586e-05,
      "loss": 2.7727,
      "step": 8457
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9290599226951599,
      "learning_rate": 1.9467979768496235e-05,
      "loss": 2.6856,
      "step": 8458
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.979376494884491,
      "learning_rate": 1.9467847251842084e-05,
      "loss": 2.4583,
      "step": 8459
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0561002492904663,
      "learning_rate": 1.9467714719137364e-05,
      "loss": 2.6943,
      "step": 8460
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.954181969165802,
      "learning_rate": 1.9467582170382294e-05,
      "loss": 2.5398,
      "step": 8461
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0308609008789062,
      "learning_rate": 1.9467449605577102e-05,
      "loss": 2.476,
      "step": 8462
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.230716586112976,
      "learning_rate": 1.946731702472201e-05,
      "loss": 2.5063,
      "step": 8463
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0450156927108765,
      "learning_rate": 1.9467184427817248e-05,
      "loss": 2.7064,
      "step": 8464
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9909964799880981,
      "learning_rate": 1.946705181486303e-05,
      "loss": 2.733,
      "step": 8465
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9933955669403076,
      "learning_rate": 1.9466919185859595e-05,
      "loss": 2.5781,
      "step": 8466
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0553898811340332,
      "learning_rate": 1.946678654080716e-05,
      "loss": 2.6007,
      "step": 8467
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.1317484378814697,
      "learning_rate": 1.9466653879705948e-05,
      "loss": 2.4702,
      "step": 8468
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9561567306518555,
      "learning_rate": 1.9466521202556188e-05,
      "loss": 2.581,
      "step": 8469
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9832658171653748,
      "learning_rate": 1.9466388509358104e-05,
      "loss": 2.6151,
      "step": 8470
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.1806762218475342,
      "learning_rate": 1.9466255800111917e-05,
      "loss": 2.6504,
      "step": 8471
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.1317676305770874,
      "learning_rate": 1.946612307481786e-05,
      "loss": 2.7519,
      "step": 8472
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.1129366159439087,
      "learning_rate": 1.946599033347615e-05,
      "loss": 2.612,
      "step": 8473
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9749076962471008,
      "learning_rate": 1.9465857576087018e-05,
      "loss": 2.7316,
      "step": 8474
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.050779938697815,
      "learning_rate": 1.9465724802650685e-05,
      "loss": 2.6634,
      "step": 8475
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.016687035560608,
      "learning_rate": 1.9465592013167378e-05,
      "loss": 2.6809,
      "step": 8476
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9349832534790039,
      "learning_rate": 1.946545920763732e-05,
      "loss": 2.7298,
      "step": 8477
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0069923400878906,
      "learning_rate": 1.946532638606074e-05,
      "loss": 2.6648,
      "step": 8478
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.3376617431640625,
      "learning_rate": 1.946519354843786e-05,
      "loss": 2.5225,
      "step": 8479
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.054827332496643,
      "learning_rate": 1.946506069476891e-05,
      "loss": 2.56,
      "step": 8480
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0042431354522705,
      "learning_rate": 1.9464927825054107e-05,
      "loss": 2.5137,
      "step": 8481
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0096492767333984,
      "learning_rate": 1.9464794939293684e-05,
      "loss": 2.8654,
      "step": 8482
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0199302434921265,
      "learning_rate": 1.9464662037487862e-05,
      "loss": 2.7317,
      "step": 8483
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9679437279701233,
      "learning_rate": 1.9464529119636864e-05,
      "loss": 2.6289,
      "step": 8484
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9688140153884888,
      "learning_rate": 1.946439618574092e-05,
      "loss": 2.4815,
      "step": 8485
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9631842970848083,
      "learning_rate": 1.9464263235800252e-05,
      "loss": 2.7014,
      "step": 8486
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.91806960105896,
      "learning_rate": 1.9464130269815092e-05,
      "loss": 2.335,
      "step": 8487
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9583179354667664,
      "learning_rate": 1.946399728778566e-05,
      "loss": 2.604,
      "step": 8488
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9774882197380066,
      "learning_rate": 1.9463864289712183e-05,
      "loss": 2.5364,
      "step": 8489
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.153190016746521,
      "learning_rate": 1.946373127559488e-05,
      "loss": 2.6198,
      "step": 8490
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9838843941688538,
      "learning_rate": 1.946359824543399e-05,
      "loss": 2.3625,
      "step": 8491
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.928368330001831,
      "learning_rate": 1.9463465199229724e-05,
      "loss": 2.717,
      "step": 8492
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0038765668869019,
      "learning_rate": 1.946333213698232e-05,
      "loss": 2.5384,
      "step": 8493
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9704920649528503,
      "learning_rate": 1.946319905869199e-05,
      "loss": 2.8674,
      "step": 8494
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9598731398582458,
      "learning_rate": 1.9463065964358975e-05,
      "loss": 2.5139,
      "step": 8495
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9762070775032043,
      "learning_rate": 1.946293285398349e-05,
      "loss": 2.6974,
      "step": 8496
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0253287553787231,
      "learning_rate": 1.9462799727565764e-05,
      "loss": 2.7398,
      "step": 8497
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9512594938278198,
      "learning_rate": 1.9462666585106023e-05,
      "loss": 2.7091,
      "step": 8498
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0617740154266357,
      "learning_rate": 1.946253342660449e-05,
      "loss": 2.7559,
      "step": 8499
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0563055276870728,
      "learning_rate": 1.9462400252061394e-05,
      "loss": 2.5238,
      "step": 8500
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9441729187965393,
      "learning_rate": 1.946226706147696e-05,
      "loss": 2.4547,
      "step": 8501
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.040686845779419,
      "learning_rate": 1.9462133854851415e-05,
      "loss": 2.4777,
      "step": 8502
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.959762454032898,
      "learning_rate": 1.946200063218498e-05,
      "loss": 2.8358,
      "step": 8503
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0008805990219116,
      "learning_rate": 1.9461867393477884e-05,
      "loss": 2.3597,
      "step": 8504
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0299171209335327,
      "learning_rate": 1.9461734138730356e-05,
      "loss": 2.4805,
      "step": 8505
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9982196688652039,
      "learning_rate": 1.946160086794262e-05,
      "loss": 2.5546,
      "step": 8506
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9920169711112976,
      "learning_rate": 1.9461467581114898e-05,
      "loss": 2.5865,
      "step": 8507
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0128545761108398,
      "learning_rate": 1.9461334278247415e-05,
      "loss": 2.4746,
      "step": 8508
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.972671389579773,
      "learning_rate": 1.9461200959340408e-05,
      "loss": 2.5171,
      "step": 8509
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0317285060882568,
      "learning_rate": 1.946106762439409e-05,
      "loss": 2.3315,
      "step": 8510
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0154056549072266,
      "learning_rate": 1.9460934273408698e-05,
      "loss": 2.5299,
      "step": 8511
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9446868896484375,
      "learning_rate": 1.946080090638445e-05,
      "loss": 2.367,
      "step": 8512
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9593592286109924,
      "learning_rate": 1.946066752332157e-05,
      "loss": 2.4008,
      "step": 8513
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0194915533065796,
      "learning_rate": 1.94605341242203e-05,
      "loss": 2.6512,
      "step": 8514
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0236469507217407,
      "learning_rate": 1.946040070908085e-05,
      "loss": 2.7884,
      "step": 8515
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9574823379516602,
      "learning_rate": 1.946026727790345e-05,
      "loss": 2.5431,
      "step": 8516
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.227987289428711,
      "learning_rate": 1.9460133830688326e-05,
      "loss": 2.7514,
      "step": 8517
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.1674004793167114,
      "learning_rate": 1.9460000367435706e-05,
      "loss": 2.7376,
      "step": 8518
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9219892024993896,
      "learning_rate": 1.945986688814582e-05,
      "loss": 2.5584,
      "step": 8519
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.087964415550232,
      "learning_rate": 1.9459733392818888e-05,
      "loss": 2.6316,
      "step": 8520
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.1478499174118042,
      "learning_rate": 1.9459599881455143e-05,
      "loss": 2.6804,
      "step": 8521
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.1331983804702759,
      "learning_rate": 1.94594663540548e-05,
      "loss": 2.8164,
      "step": 8522
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.214922547340393,
      "learning_rate": 1.9459332810618096e-05,
      "loss": 2.6691,
      "step": 8523
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.1084873676300049,
      "learning_rate": 1.9459199251145254e-05,
      "loss": 2.582,
      "step": 8524
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.036842703819275,
      "learning_rate": 1.94590656756365e-05,
      "loss": 2.5594,
      "step": 8525
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0574613809585571,
      "learning_rate": 1.945893208409206e-05,
      "loss": 2.5294,
      "step": 8526
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.017459750175476,
      "learning_rate": 1.9458798476512163e-05,
      "loss": 2.5546,
      "step": 8527
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9907302856445312,
      "learning_rate": 1.945866485289703e-05,
      "loss": 2.7139,
      "step": 8528
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9790703654289246,
      "learning_rate": 1.9458531213246892e-05,
      "loss": 2.6706,
      "step": 8529
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.007014513015747,
      "learning_rate": 1.9458397557561977e-05,
      "loss": 2.4587,
      "step": 8530
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9833890795707703,
      "learning_rate": 1.9458263885842508e-05,
      "loss": 2.6345,
      "step": 8531
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0160821676254272,
      "learning_rate": 1.9458130198088715e-05,
      "loss": 2.5989,
      "step": 8532
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0404181480407715,
      "learning_rate": 1.945799649430082e-05,
      "loss": 2.7037,
      "step": 8533
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0434461832046509,
      "learning_rate": 1.9457862774479054e-05,
      "loss": 2.7784,
      "step": 8534
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9707303047180176,
      "learning_rate": 1.9457729038623636e-05,
      "loss": 2.7813,
      "step": 8535
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9715340733528137,
      "learning_rate": 1.9457595286734806e-05,
      "loss": 2.8103,
      "step": 8536
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.008211612701416,
      "learning_rate": 1.945746151881278e-05,
      "loss": 2.7305,
      "step": 8537
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0171246528625488,
      "learning_rate": 1.9457327734857787e-05,
      "loss": 2.5323,
      "step": 8538
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0276236534118652,
      "learning_rate": 1.945719393487006e-05,
      "loss": 2.708,
      "step": 8539
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9582564234733582,
      "learning_rate": 1.9457060118849816e-05,
      "loss": 2.4884,
      "step": 8540
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.03121817111969,
      "learning_rate": 1.9456926286797287e-05,
      "loss": 2.7071,
      "step": 8541
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.1449284553527832,
      "learning_rate": 1.94567924387127e-05,
      "loss": 2.7544,
      "step": 8542
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0273939371109009,
      "learning_rate": 1.945665857459628e-05,
      "loss": 2.4484,
      "step": 8543
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9401620030403137,
      "learning_rate": 1.9456524694448257e-05,
      "loss": 2.7091,
      "step": 8544
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9016398787498474,
      "learning_rate": 1.9456390798268856e-05,
      "loss": 2.6097,
      "step": 8545
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9917524456977844,
      "learning_rate": 1.9456256886058305e-05,
      "loss": 2.5324,
      "step": 8546
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9870092272758484,
      "learning_rate": 1.945612295781683e-05,
      "loss": 2.6004,
      "step": 8547
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9974706172943115,
      "learning_rate": 1.9455989013544658e-05,
      "loss": 2.4975,
      "step": 8548
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0406385660171509,
      "learning_rate": 1.9455855053242016e-05,
      "loss": 2.599,
      "step": 8549
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0755378007888794,
      "learning_rate": 1.9455721076909136e-05,
      "loss": 2.3564,
      "step": 8550
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.1516568660736084,
      "learning_rate": 1.9455587084546236e-05,
      "loss": 2.6381,
      "step": 8551
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.140921950340271,
      "learning_rate": 1.945545307615355e-05,
      "loss": 2.556,
      "step": 8552
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.074836015701294,
      "learning_rate": 1.94553190517313e-05,
      "loss": 2.7503,
      "step": 8553
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9696342349052429,
      "learning_rate": 1.9455185011279716e-05,
      "loss": 2.5859,
      "step": 8554
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9940776824951172,
      "learning_rate": 1.9455050954799027e-05,
      "loss": 2.592,
      "step": 8555
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9710983633995056,
      "learning_rate": 1.945491688228946e-05,
      "loss": 2.6457,
      "step": 8556
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0306349992752075,
      "learning_rate": 1.945478279375124e-05,
      "loss": 2.6891,
      "step": 8557
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0449542999267578,
      "learning_rate": 1.9454648689184593e-05,
      "loss": 2.5014,
      "step": 8558
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.148544192314148,
      "learning_rate": 1.945451456858975e-05,
      "loss": 2.758,
      "step": 8559
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0766475200653076,
      "learning_rate": 1.945438043196694e-05,
      "loss": 2.798,
      "step": 8560
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.971964955329895,
      "learning_rate": 1.9454246279316385e-05,
      "loss": 2.4423,
      "step": 8561
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9537592530250549,
      "learning_rate": 1.9454112110638315e-05,
      "loss": 2.5793,
      "step": 8562
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0591356754302979,
      "learning_rate": 1.9453977925932956e-05,
      "loss": 2.4374,
      "step": 8563
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9963650703430176,
      "learning_rate": 1.945384372520054e-05,
      "loss": 2.5548,
      "step": 8564
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0316801071166992,
      "learning_rate": 1.945370950844129e-05,
      "loss": 2.6096,
      "step": 8565
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.338755488395691,
      "learning_rate": 1.945357527565543e-05,
      "loss": 2.4344,
      "step": 8566
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9951769709587097,
      "learning_rate": 1.9453441026843197e-05,
      "loss": 2.5514,
      "step": 8567
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0831115245819092,
      "learning_rate": 1.9453306762004814e-05,
      "loss": 2.6349,
      "step": 8568
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9652318358421326,
      "learning_rate": 1.9453172481140507e-05,
      "loss": 2.4528,
      "step": 8569
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0079256296157837,
      "learning_rate": 1.9453038184250505e-05,
      "loss": 2.9153,
      "step": 8570
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.208345651626587,
      "learning_rate": 1.9452903871335037e-05,
      "loss": 2.3785,
      "step": 8571
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.1805685758590698,
      "learning_rate": 1.945276954239433e-05,
      "loss": 2.4056,
      "step": 8572
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0451505184173584,
      "learning_rate": 1.945263519742861e-05,
      "loss": 2.9415,
      "step": 8573
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0522730350494385,
      "learning_rate": 1.9452500836438107e-05,
      "loss": 2.6981,
      "step": 8574
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9064502120018005,
      "learning_rate": 1.9452366459423048e-05,
      "loss": 2.5623,
      "step": 8575
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9030101299285889,
      "learning_rate": 1.945223206638366e-05,
      "loss": 2.701,
      "step": 8576
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.991581380367279,
      "learning_rate": 1.9452097657320174e-05,
      "loss": 2.4975,
      "step": 8577
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9195762872695923,
      "learning_rate": 1.945196323223281e-05,
      "loss": 2.6061,
      "step": 8578
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0264604091644287,
      "learning_rate": 1.9451828791121806e-05,
      "loss": 2.5426,
      "step": 8579
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0962700843811035,
      "learning_rate": 1.9451694333987384e-05,
      "loss": 2.6019,
      "step": 8580
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9792073965072632,
      "learning_rate": 1.945155986082977e-05,
      "loss": 2.6041,
      "step": 8581
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9747529625892639,
      "learning_rate": 1.9451425371649197e-05,
      "loss": 2.5384,
      "step": 8582
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.004526138305664,
      "learning_rate": 1.9451290866445893e-05,
      "loss": 2.5405,
      "step": 8583
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0644142627716064,
      "learning_rate": 1.9451156345220083e-05,
      "loss": 2.6021,
      "step": 8584
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9930938482284546,
      "learning_rate": 1.9451021807971994e-05,
      "loss": 2.5541,
      "step": 8585
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0607305765151978,
      "learning_rate": 1.9450887254701857e-05,
      "loss": 2.6368,
      "step": 8586
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0368900299072266,
      "learning_rate": 1.94507526854099e-05,
      "loss": 2.5843,
      "step": 8587
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9590020179748535,
      "learning_rate": 1.945061810009635e-05,
      "loss": 2.4859,
      "step": 8588
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0973187685012817,
      "learning_rate": 1.9450483498761436e-05,
      "loss": 2.835,
      "step": 8589
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9397537708282471,
      "learning_rate": 1.945034888140539e-05,
      "loss": 2.5726,
      "step": 8590
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9501964449882507,
      "learning_rate": 1.945021424802843e-05,
      "loss": 2.5211,
      "step": 8591
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0072813034057617,
      "learning_rate": 1.945007959863079e-05,
      "loss": 2.6253,
      "step": 8592
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.8707998991012573,
      "learning_rate": 1.94499449332127e-05,
      "loss": 2.3653,
      "step": 8593
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0022255182266235,
      "learning_rate": 1.944981025177439e-05,
      "loss": 2.5284,
      "step": 8594
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.959205687046051,
      "learning_rate": 1.944967555431608e-05,
      "loss": 2.4574,
      "step": 8595
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0571796894073486,
      "learning_rate": 1.9449540840838006e-05,
      "loss": 2.4011,
      "step": 8596
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.1437735557556152,
      "learning_rate": 1.9449406111340394e-05,
      "loss": 2.5899,
      "step": 8597
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.1437573432922363,
      "learning_rate": 1.944927136582347e-05,
      "loss": 2.5612,
      "step": 8598
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9558366537094116,
      "learning_rate": 1.9449136604287465e-05,
      "loss": 2.3293,
      "step": 8599
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0277988910675049,
      "learning_rate": 1.944900182673261e-05,
      "loss": 2.7221,
      "step": 8600
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.003588080406189,
      "learning_rate": 1.944886703315913e-05,
      "loss": 2.6505,
      "step": 8601
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0803790092468262,
      "learning_rate": 1.944873222356725e-05,
      "loss": 2.5126,
      "step": 8602
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0804389715194702,
      "learning_rate": 1.9448597397957208e-05,
      "loss": 2.8478,
      "step": 8603
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0042113065719604,
      "learning_rate": 1.9448462556329225e-05,
      "loss": 2.3224,
      "step": 8604
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.1486618518829346,
      "learning_rate": 1.944832769868353e-05,
      "loss": 2.6571,
      "step": 8605
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0233200788497925,
      "learning_rate": 1.9448192825020356e-05,
      "loss": 2.6091,
      "step": 8606
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0687675476074219,
      "learning_rate": 1.944805793533993e-05,
      "loss": 2.7135,
      "step": 8607
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.084499716758728,
      "learning_rate": 1.9447923029642477e-05,
      "loss": 2.526,
      "step": 8608
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.1092145442962646,
      "learning_rate": 1.944778810792823e-05,
      "loss": 2.4443,
      "step": 8609
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0578888654708862,
      "learning_rate": 1.9447653170197415e-05,
      "loss": 2.5841,
      "step": 8610
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.964078962802887,
      "learning_rate": 1.9447518216450263e-05,
      "loss": 2.8226,
      "step": 8611
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0649157762527466,
      "learning_rate": 1.9447383246687002e-05,
      "loss": 2.4915,
      "step": 8612
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.2152669429779053,
      "learning_rate": 1.944724826090786e-05,
      "loss": 2.6006,
      "step": 8613
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0087761878967285,
      "learning_rate": 1.9447113259113065e-05,
      "loss": 2.4323,
      "step": 8614
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.999679684638977,
      "learning_rate": 1.9446978241302846e-05,
      "loss": 2.4361,
      "step": 8615
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.970931351184845,
      "learning_rate": 1.9446843207477436e-05,
      "loss": 2.5456,
      "step": 8616
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9909980297088623,
      "learning_rate": 1.9446708157637063e-05,
      "loss": 2.5401,
      "step": 8617
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9913114309310913,
      "learning_rate": 1.944657309178195e-05,
      "loss": 2.6691,
      "step": 8618
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.960124671459198,
      "learning_rate": 1.9446438009912333e-05,
      "loss": 2.81,
      "step": 8619
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0459648370742798,
      "learning_rate": 1.9446302912028434e-05,
      "loss": 2.4289,
      "step": 8620
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.1516392230987549,
      "learning_rate": 1.944616779813049e-05,
      "loss": 2.4183,
      "step": 8621
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0020831823349,
      "learning_rate": 1.9446032668218723e-05,
      "loss": 2.6375,
      "step": 8622
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9817429780960083,
      "learning_rate": 1.9445897522293368e-05,
      "loss": 2.4683,
      "step": 8623
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0503519773483276,
      "learning_rate": 1.944576236035465e-05,
      "loss": 2.5985,
      "step": 8624
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.252130389213562,
      "learning_rate": 1.94456271824028e-05,
      "loss": 2.4563,
      "step": 8625
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.1467677354812622,
      "learning_rate": 1.9445491988438045e-05,
      "loss": 2.5335,
      "step": 8626
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.1887216567993164,
      "learning_rate": 1.9445356778460617e-05,
      "loss": 2.5201,
      "step": 8627
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.1133449077606201,
      "learning_rate": 1.944522155247074e-05,
      "loss": 2.6121,
      "step": 8628
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9593027234077454,
      "learning_rate": 1.944508631046865e-05,
      "loss": 2.544,
      "step": 8629
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9914329648017883,
      "learning_rate": 1.9444951052454576e-05,
      "loss": 2.5319,
      "step": 8630
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.1044573783874512,
      "learning_rate": 1.9444815778428742e-05,
      "loss": 2.3546,
      "step": 8631
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.155767560005188,
      "learning_rate": 1.944468048839138e-05,
      "loss": 2.5249,
      "step": 8632
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.1437864303588867,
      "learning_rate": 1.944454518234272e-05,
      "loss": 2.7443,
      "step": 8633
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9914165139198303,
      "learning_rate": 1.9444409860282993e-05,
      "loss": 2.6246,
      "step": 8634
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0345052480697632,
      "learning_rate": 1.9444274522212426e-05,
      "loss": 2.7593,
      "step": 8635
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0186558961868286,
      "learning_rate": 1.9444139168131245e-05,
      "loss": 2.6978,
      "step": 8636
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9342625737190247,
      "learning_rate": 1.9444003798039684e-05,
      "loss": 2.6184,
      "step": 8637
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9620103240013123,
      "learning_rate": 1.944386841193797e-05,
      "loss": 2.626,
      "step": 8638
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.971960723400116,
      "learning_rate": 1.944373300982634e-05,
      "loss": 2.5481,
      "step": 8639
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9706690907478333,
      "learning_rate": 1.9443597591705016e-05,
      "loss": 2.616,
      "step": 8640
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0711901187896729,
      "learning_rate": 1.9443462157574227e-05,
      "loss": 2.5303,
      "step": 8641
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0267159938812256,
      "learning_rate": 1.9443326707434205e-05,
      "loss": 2.8035,
      "step": 8642
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.1740009784698486,
      "learning_rate": 1.944319124128518e-05,
      "loss": 2.6255,
      "step": 8643
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.1800391674041748,
      "learning_rate": 1.944305575912738e-05,
      "loss": 2.6899,
      "step": 8644
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.114340901374817,
      "learning_rate": 1.9442920260961037e-05,
      "loss": 2.6025,
      "step": 8645
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0083318948745728,
      "learning_rate": 1.944278474678638e-05,
      "loss": 2.5802,
      "step": 8646
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0618577003479004,
      "learning_rate": 1.9442649216603638e-05,
      "loss": 2.5106,
      "step": 8647
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0456558465957642,
      "learning_rate": 1.944251367041304e-05,
      "loss": 2.6676,
      "step": 8648
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.161378264427185,
      "learning_rate": 1.9442378108214817e-05,
      "loss": 2.4595,
      "step": 8649
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.1738133430480957,
      "learning_rate": 1.94422425300092e-05,
      "loss": 2.6226,
      "step": 8650
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.000186800956726,
      "learning_rate": 1.9442106935796414e-05,
      "loss": 2.9168,
      "step": 8651
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9731682538986206,
      "learning_rate": 1.9441971325576695e-05,
      "loss": 2.536,
      "step": 8652
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.978748619556427,
      "learning_rate": 1.9441835699350273e-05,
      "loss": 2.8095,
      "step": 8653
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9485928416252136,
      "learning_rate": 1.944170005711737e-05,
      "loss": 2.5761,
      "step": 8654
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9923012852668762,
      "learning_rate": 1.9441564398878222e-05,
      "loss": 2.458,
      "step": 8655
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0375834703445435,
      "learning_rate": 1.944142872463306e-05,
      "loss": 2.4435,
      "step": 8656
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0652508735656738,
      "learning_rate": 1.9441293034382108e-05,
      "loss": 2.4751,
      "step": 8657
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9967058897018433,
      "learning_rate": 1.9441157328125607e-05,
      "loss": 2.7359,
      "step": 8658
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9872559309005737,
      "learning_rate": 1.9441021605863775e-05,
      "loss": 2.5684,
      "step": 8659
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9604254961013794,
      "learning_rate": 1.944088586759685e-05,
      "loss": 2.7067,
      "step": 8660
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0568444728851318,
      "learning_rate": 1.9440750113325055e-05,
      "loss": 2.9142,
      "step": 8661
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0210574865341187,
      "learning_rate": 1.9440614343048628e-05,
      "loss": 2.6536,
      "step": 8662
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.1477108001708984,
      "learning_rate": 1.9440478556767798e-05,
      "loss": 2.392,
      "step": 8663
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9867296814918518,
      "learning_rate": 1.944034275448279e-05,
      "loss": 2.6717,
      "step": 8664
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.064052700996399,
      "learning_rate": 1.9440206936193835e-05,
      "loss": 2.5868,
      "step": 8665
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0050749778747559,
      "learning_rate": 1.9440071101901168e-05,
      "loss": 2.7127,
      "step": 8666
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.979924738407135,
      "learning_rate": 1.9439935251605017e-05,
      "loss": 2.5829,
      "step": 8667
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0521976947784424,
      "learning_rate": 1.9439799385305608e-05,
      "loss": 2.5651,
      "step": 8668
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0395370721817017,
      "learning_rate": 1.943966350300318e-05,
      "loss": 2.6429,
      "step": 8669
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0172799825668335,
      "learning_rate": 1.9439527604697956e-05,
      "loss": 2.6922,
      "step": 8670
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0289720296859741,
      "learning_rate": 1.943939169039017e-05,
      "loss": 2.466,
      "step": 8671
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9692153930664062,
      "learning_rate": 1.943925576008005e-05,
      "loss": 2.6117,
      "step": 8672
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9995855093002319,
      "learning_rate": 1.943911981376783e-05,
      "loss": 2.7497,
      "step": 8673
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.028357982635498,
      "learning_rate": 1.9438983851453736e-05,
      "loss": 2.4965,
      "step": 8674
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0449166297912598,
      "learning_rate": 1.9438847873138004e-05,
      "loss": 2.6254,
      "step": 8675
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9902369379997253,
      "learning_rate": 1.9438711878820858e-05,
      "loss": 2.5799,
      "step": 8676
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.121647596359253,
      "learning_rate": 1.943857586850253e-05,
      "loss": 2.6667,
      "step": 8677
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0015146732330322,
      "learning_rate": 1.943843984218326e-05,
      "loss": 2.6205,
      "step": 8678
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0591837167739868,
      "learning_rate": 1.9438303799863263e-05,
      "loss": 2.6157,
      "step": 8679
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0371026992797852,
      "learning_rate": 1.9438167741542782e-05,
      "loss": 2.5362,
      "step": 8680
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.118295669555664,
      "learning_rate": 1.9438031667222043e-05,
      "loss": 2.6409,
      "step": 8681
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.1201205253601074,
      "learning_rate": 1.9437895576901274e-05,
      "loss": 2.7842,
      "step": 8682
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0821090936660767,
      "learning_rate": 1.9437759470580714e-05,
      "loss": 2.6089,
      "step": 8683
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.014500617980957,
      "learning_rate": 1.9437623348260583e-05,
      "loss": 2.7491,
      "step": 8684
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.154586911201477,
      "learning_rate": 1.943748720994112e-05,
      "loss": 2.7977,
      "step": 8685
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0930254459381104,
      "learning_rate": 1.9437351055622554e-05,
      "loss": 2.6044,
      "step": 8686
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9868221879005432,
      "learning_rate": 1.9437214885305114e-05,
      "loss": 2.6741,
      "step": 8687
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9323601126670837,
      "learning_rate": 1.943707869898903e-05,
      "loss": 2.8222,
      "step": 8688
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.162672758102417,
      "learning_rate": 1.9436942496674535e-05,
      "loss": 2.6003,
      "step": 8689
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.959580659866333,
      "learning_rate": 1.943680627836186e-05,
      "loss": 2.6151,
      "step": 8690
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0462241172790527,
      "learning_rate": 1.9436670044051237e-05,
      "loss": 2.7164,
      "step": 8691
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9641088843345642,
      "learning_rate": 1.9436533793742892e-05,
      "loss": 2.4241,
      "step": 8692
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0268328189849854,
      "learning_rate": 1.9436397527437063e-05,
      "loss": 2.7249,
      "step": 8693
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.089263916015625,
      "learning_rate": 1.9436261245133977e-05,
      "loss": 2.5431,
      "step": 8694
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.945685625076294,
      "learning_rate": 1.9436124946833863e-05,
      "loss": 2.468,
      "step": 8695
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0048874616622925,
      "learning_rate": 1.9435988632536956e-05,
      "loss": 2.606,
      "step": 8696
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.02077317237854,
      "learning_rate": 1.9435852302243484e-05,
      "loss": 2.6833,
      "step": 8697
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0476232767105103,
      "learning_rate": 1.9435715955953678e-05,
      "loss": 2.6863,
      "step": 8698
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0540788173675537,
      "learning_rate": 1.9435579593667774e-05,
      "loss": 2.828,
      "step": 8699
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.094562292098999,
      "learning_rate": 1.9435443215386e-05,
      "loss": 2.7543,
      "step": 8700
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.6708778142929077,
      "learning_rate": 1.9435306821108586e-05,
      "loss": 2.5643,
      "step": 8701
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.917292594909668,
      "learning_rate": 1.943517041083576e-05,
      "loss": 2.6667,
      "step": 8702
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9813180565834045,
      "learning_rate": 1.9435033984567764e-05,
      "loss": 2.5601,
      "step": 8703
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0603970289230347,
      "learning_rate": 1.9434897542304822e-05,
      "loss": 2.6956,
      "step": 8704
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9601993560791016,
      "learning_rate": 1.9434761084047165e-05,
      "loss": 2.591,
      "step": 8705
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9856762290000916,
      "learning_rate": 1.9434624609795025e-05,
      "loss": 2.6956,
      "step": 8706
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.947913408279419,
      "learning_rate": 1.9434488119548637e-05,
      "loss": 2.7696,
      "step": 8707
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.016758918762207,
      "learning_rate": 1.9434351613308223e-05,
      "loss": 2.6373,
      "step": 8708
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0360016822814941,
      "learning_rate": 1.9434215091074025e-05,
      "loss": 2.6718,
      "step": 8709
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0877420902252197,
      "learning_rate": 1.943407855284627e-05,
      "loss": 2.8262,
      "step": 8710
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.8865928649902344,
      "learning_rate": 1.9433941998625186e-05,
      "loss": 2.6185,
      "step": 8711
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.027501106262207,
      "learning_rate": 1.9433805428411013e-05,
      "loss": 2.8571,
      "step": 8712
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.8841229677200317,
      "learning_rate": 1.9433668842203978e-05,
      "loss": 2.6506,
      "step": 8713
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0207514762878418,
      "learning_rate": 1.943353224000431e-05,
      "loss": 2.5704,
      "step": 8714
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9277458786964417,
      "learning_rate": 1.9433395621812242e-05,
      "loss": 2.4828,
      "step": 8715
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9325006604194641,
      "learning_rate": 1.9433258987628007e-05,
      "loss": 2.6162,
      "step": 8716
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0302906036376953,
      "learning_rate": 1.9433122337451837e-05,
      "loss": 2.4258,
      "step": 8717
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.050153374671936,
      "learning_rate": 1.943298567128396e-05,
      "loss": 2.8819,
      "step": 8718
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0569425821304321,
      "learning_rate": 1.9432848989124614e-05,
      "loss": 2.4327,
      "step": 8719
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9970081448554993,
      "learning_rate": 1.9432712290974022e-05,
      "loss": 2.896,
      "step": 8720
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0470354557037354,
      "learning_rate": 1.9432575576832425e-05,
      "loss": 2.8133,
      "step": 8721
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0687675476074219,
      "learning_rate": 1.9432438846700047e-05,
      "loss": 2.502,
      "step": 8722
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9637138843536377,
      "learning_rate": 1.9432302100577128e-05,
      "loss": 2.7808,
      "step": 8723
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0423647165298462,
      "learning_rate": 1.943216533846389e-05,
      "loss": 2.4706,
      "step": 8724
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.117052674293518,
      "learning_rate": 1.9432028560360575e-05,
      "loss": 2.3711,
      "step": 8725
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9408499002456665,
      "learning_rate": 1.943189176626741e-05,
      "loss": 2.664,
      "step": 8726
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0670217275619507,
      "learning_rate": 1.943175495618462e-05,
      "loss": 2.7551,
      "step": 8727
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0544992685317993,
      "learning_rate": 1.943161813011245e-05,
      "loss": 2.5727,
      "step": 8728
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9546182155609131,
      "learning_rate": 1.9431481288051124e-05,
      "loss": 2.4974,
      "step": 8729
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0896164178848267,
      "learning_rate": 1.9431344430000875e-05,
      "loss": 2.5745,
      "step": 8730
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9181850552558899,
      "learning_rate": 1.9431207555961937e-05,
      "loss": 2.8239,
      "step": 8731
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.017480731010437,
      "learning_rate": 1.943107066593454e-05,
      "loss": 2.4318,
      "step": 8732
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.1301815509796143,
      "learning_rate": 1.9430933759918916e-05,
      "loss": 2.6198,
      "step": 8733
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.1369282007217407,
      "learning_rate": 1.94307968379153e-05,
      "loss": 2.7542,
      "step": 8734
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9270943403244019,
      "learning_rate": 1.9430659899923922e-05,
      "loss": 2.4798,
      "step": 8735
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0300078392028809,
      "learning_rate": 1.9430522945945013e-05,
      "loss": 2.613,
      "step": 8736
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.018898606300354,
      "learning_rate": 1.9430385975978804e-05,
      "loss": 2.7237,
      "step": 8737
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0183912515640259,
      "learning_rate": 1.943024899002553e-05,
      "loss": 2.4602,
      "step": 8738
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.2642310857772827,
      "learning_rate": 1.9430111988085426e-05,
      "loss": 2.6173,
      "step": 8739
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9432632327079773,
      "learning_rate": 1.9429974970158718e-05,
      "loss": 2.5398,
      "step": 8740
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0365525484085083,
      "learning_rate": 1.9429837936245645e-05,
      "loss": 2.6404,
      "step": 8741
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0677192211151123,
      "learning_rate": 1.942970088634643e-05,
      "loss": 2.8178,
      "step": 8742
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.1562434434890747,
      "learning_rate": 1.9429563820461314e-05,
      "loss": 2.6218,
      "step": 8743
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.1056917905807495,
      "learning_rate": 1.9429426738590526e-05,
      "loss": 2.7635,
      "step": 8744
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9363124966621399,
      "learning_rate": 1.94292896407343e-05,
      "loss": 2.418,
      "step": 8745
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0335345268249512,
      "learning_rate": 1.9429152526892866e-05,
      "loss": 2.4273,
      "step": 8746
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.1214771270751953,
      "learning_rate": 1.9429015397066458e-05,
      "loss": 2.5012,
      "step": 8747
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.986677885055542,
      "learning_rate": 1.942887825125531e-05,
      "loss": 2.6045,
      "step": 8748
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0962233543395996,
      "learning_rate": 1.9428741089459647e-05,
      "loss": 2.8008,
      "step": 8749
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.2576277256011963,
      "learning_rate": 1.9428603911679712e-05,
      "loss": 2.5434,
      "step": 8750
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0430575609207153,
      "learning_rate": 1.942846671791573e-05,
      "loss": 2.6075,
      "step": 8751
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9410818815231323,
      "learning_rate": 1.9428329508167938e-05,
      "loss": 2.6309,
      "step": 8752
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9984903335571289,
      "learning_rate": 1.9428192282436566e-05,
      "loss": 2.6785,
      "step": 8753
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.2165533304214478,
      "learning_rate": 1.9428055040721846e-05,
      "loss": 2.4703,
      "step": 8754
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0603042840957642,
      "learning_rate": 1.9427917783024017e-05,
      "loss": 2.4776,
      "step": 8755
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.082404613494873,
      "learning_rate": 1.94277805093433e-05,
      "loss": 2.5854,
      "step": 8756
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.076224446296692,
      "learning_rate": 1.9427643219679937e-05,
      "loss": 2.482,
      "step": 8757
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9443110823631287,
      "learning_rate": 1.9427505914034158e-05,
      "loss": 2.5743,
      "step": 8758
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9401105642318726,
      "learning_rate": 1.9427368592406196e-05,
      "loss": 2.5966,
      "step": 8759
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.013452172279358,
      "learning_rate": 1.9427231254796287e-05,
      "loss": 2.4724,
      "step": 8760
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0012166500091553,
      "learning_rate": 1.9427093901204658e-05,
      "loss": 2.537,
      "step": 8761
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0002927780151367,
      "learning_rate": 1.9426956531631544e-05,
      "loss": 2.4889,
      "step": 8762
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9659650325775146,
      "learning_rate": 1.942681914607718e-05,
      "loss": 2.5137,
      "step": 8763
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.00635826587677,
      "learning_rate": 1.9426681744541794e-05,
      "loss": 2.7589,
      "step": 8764
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0211652517318726,
      "learning_rate": 1.9426544327025626e-05,
      "loss": 2.6929,
      "step": 8765
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.039931297302246,
      "learning_rate": 1.9426406893528904e-05,
      "loss": 2.3858,
      "step": 8766
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9608744978904724,
      "learning_rate": 1.942626944405186e-05,
      "loss": 2.5329,
      "step": 8767
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9517054557800293,
      "learning_rate": 1.942613197859473e-05,
      "loss": 2.552,
      "step": 8768
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.046166181564331,
      "learning_rate": 1.942599449715775e-05,
      "loss": 2.732,
      "step": 8769
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9364257454872131,
      "learning_rate": 1.9425856999741146e-05,
      "loss": 2.7079,
      "step": 8770
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0000674724578857,
      "learning_rate": 1.9425719486345157e-05,
      "loss": 2.3826,
      "step": 8771
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.160254716873169,
      "learning_rate": 1.942558195697001e-05,
      "loss": 2.6387,
      "step": 8772
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9370719790458679,
      "learning_rate": 1.942544441161594e-05,
      "loss": 2.6018,
      "step": 8773
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0615135431289673,
      "learning_rate": 1.9425306850283187e-05,
      "loss": 2.8065,
      "step": 8774
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.061083436012268,
      "learning_rate": 1.9425169272971977e-05,
      "loss": 2.5519,
      "step": 8775
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.039902925491333,
      "learning_rate": 1.9425031679682543e-05,
      "loss": 2.543,
      "step": 8776
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9783795475959778,
      "learning_rate": 1.9424894070415124e-05,
      "loss": 2.6186,
      "step": 8777
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0226207971572876,
      "learning_rate": 1.9424756445169946e-05,
      "loss": 2.6018,
      "step": 8778
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9783362150192261,
      "learning_rate": 1.942461880394725e-05,
      "loss": 2.5989,
      "step": 8779
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0683212280273438,
      "learning_rate": 1.9424481146747264e-05,
      "loss": 2.8045,
      "step": 8780
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.1079946756362915,
      "learning_rate": 1.9424343473570223e-05,
      "loss": 2.5557,
      "step": 8781
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9274148941040039,
      "learning_rate": 1.942420578441636e-05,
      "loss": 2.5683,
      "step": 8782
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0016491413116455,
      "learning_rate": 1.9424068079285908e-05,
      "loss": 2.8077,
      "step": 8783
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0073422193527222,
      "learning_rate": 1.94239303581791e-05,
      "loss": 2.8334,
      "step": 8784
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0438493490219116,
      "learning_rate": 1.9423792621096172e-05,
      "loss": 2.4959,
      "step": 8785
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.931873619556427,
      "learning_rate": 1.9423654868037354e-05,
      "loss": 2.7238,
      "step": 8786
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9286758899688721,
      "learning_rate": 1.9423517099002886e-05,
      "loss": 2.711,
      "step": 8787
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9841541051864624,
      "learning_rate": 1.9423379313992993e-05,
      "loss": 2.6581,
      "step": 8788
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0922105312347412,
      "learning_rate": 1.9423241513007916e-05,
      "loss": 2.6061,
      "step": 8789
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9933697581291199,
      "learning_rate": 1.9423103696047883e-05,
      "loss": 2.5241,
      "step": 8790
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9233672022819519,
      "learning_rate": 1.9422965863113134e-05,
      "loss": 2.4011,
      "step": 8791
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.076601505279541,
      "learning_rate": 1.9422828014203895e-05,
      "loss": 2.6957,
      "step": 8792
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9738708734512329,
      "learning_rate": 1.94226901493204e-05,
      "loss": 2.7513,
      "step": 8793
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9384174942970276,
      "learning_rate": 1.942255226846289e-05,
      "loss": 2.4033,
      "step": 8794
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.012298583984375,
      "learning_rate": 1.9422414371631596e-05,
      "loss": 2.4917,
      "step": 8795
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.066285252571106,
      "learning_rate": 1.9422276458826746e-05,
      "loss": 2.6419,
      "step": 8796
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0637052059173584,
      "learning_rate": 1.9422138530048582e-05,
      "loss": 2.6817,
      "step": 8797
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9141189455986023,
      "learning_rate": 1.9422000585297335e-05,
      "loss": 2.5403,
      "step": 8798
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9703052639961243,
      "learning_rate": 1.9421862624573236e-05,
      "loss": 2.4944,
      "step": 8799
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.001498818397522,
      "learning_rate": 1.9421724647876524e-05,
      "loss": 2.4953,
      "step": 8800
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9685505628585815,
      "learning_rate": 1.9421586655207424e-05,
      "loss": 2.3941,
      "step": 8801
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0748130083084106,
      "learning_rate": 1.942144864656618e-05,
      "loss": 2.551,
      "step": 8802
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0423799753189087,
      "learning_rate": 1.942131062195302e-05,
      "loss": 2.6401,
      "step": 8803
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0306719541549683,
      "learning_rate": 1.9421172581368178e-05,
      "loss": 2.5849,
      "step": 8804
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0133119821548462,
      "learning_rate": 1.9421034524811896e-05,
      "loss": 2.5574,
      "step": 8805
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0806032419204712,
      "learning_rate": 1.9420896452284397e-05,
      "loss": 2.4527,
      "step": 8806
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0255008935928345,
      "learning_rate": 1.942075836378592e-05,
      "loss": 2.3024,
      "step": 8807
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9836024641990662,
      "learning_rate": 1.94206202593167e-05,
      "loss": 2.5507,
      "step": 8808
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9007837176322937,
      "learning_rate": 1.942048213887697e-05,
      "loss": 2.4518,
      "step": 8809
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.996499240398407,
      "learning_rate": 1.9420344002466965e-05,
      "loss": 2.5414,
      "step": 8810
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9772640466690063,
      "learning_rate": 1.9420205850086916e-05,
      "loss": 2.6054,
      "step": 8811
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9776389598846436,
      "learning_rate": 1.942006768173706e-05,
      "loss": 2.6862,
      "step": 8812
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0407284498214722,
      "learning_rate": 1.941992949741763e-05,
      "loss": 2.5259,
      "step": 8813
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0442088842391968,
      "learning_rate": 1.9419791297128863e-05,
      "loss": 2.6464,
      "step": 8814
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.069601058959961,
      "learning_rate": 1.9419653080870994e-05,
      "loss": 2.6116,
      "step": 8815
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0773978233337402,
      "learning_rate": 1.9419514848644252e-05,
      "loss": 2.8069,
      "step": 8816
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.8674440383911133,
      "learning_rate": 1.9419376600448873e-05,
      "loss": 2.8108,
      "step": 8817
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.7289725542068481,
      "learning_rate": 1.9419238336285093e-05,
      "loss": 2.5045,
      "step": 8818
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0596117973327637,
      "learning_rate": 1.9419100056153146e-05,
      "loss": 2.4975,
      "step": 8819
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9780259132385254,
      "learning_rate": 1.9418961760053267e-05,
      "loss": 2.5129,
      "step": 8820
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9936668276786804,
      "learning_rate": 1.9418823447985688e-05,
      "loss": 2.4731,
      "step": 8821
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9811586737632751,
      "learning_rate": 1.9418685119950646e-05,
      "loss": 2.778,
      "step": 8822
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9941642880439758,
      "learning_rate": 1.9418546775948373e-05,
      "loss": 2.8191,
      "step": 8823
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0302846431732178,
      "learning_rate": 1.941840841597911e-05,
      "loss": 2.6328,
      "step": 8824
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.078960657119751,
      "learning_rate": 1.941827004004308e-05,
      "loss": 2.6271,
      "step": 8825
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9054809212684631,
      "learning_rate": 1.941813164814053e-05,
      "loss": 2.58,
      "step": 8826
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9736644625663757,
      "learning_rate": 1.9417993240271687e-05,
      "loss": 2.4727,
      "step": 8827
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0206267833709717,
      "learning_rate": 1.941785481643679e-05,
      "loss": 2.6571,
      "step": 8828
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0404623746871948,
      "learning_rate": 1.9417716376636066e-05,
      "loss": 2.6696,
      "step": 8829
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.1788471937179565,
      "learning_rate": 1.9417577920869757e-05,
      "loss": 2.7093,
      "step": 8830
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.8915901780128479,
      "learning_rate": 1.9417439449138097e-05,
      "loss": 2.487,
      "step": 8831
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9427236914634705,
      "learning_rate": 1.9417300961441316e-05,
      "loss": 2.5106,
      "step": 8832
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.987763524055481,
      "learning_rate": 1.9417162457779658e-05,
      "loss": 2.5466,
      "step": 8833
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9949789643287659,
      "learning_rate": 1.9417023938153347e-05,
      "loss": 2.5105,
      "step": 8834
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0480916500091553,
      "learning_rate": 1.9416885402562626e-05,
      "loss": 2.6068,
      "step": 8835
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0286144018173218,
      "learning_rate": 1.9416746851007723e-05,
      "loss": 2.4667,
      "step": 8836
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0814845561981201,
      "learning_rate": 1.941660828348888e-05,
      "loss": 2.4679,
      "step": 8837
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9497513175010681,
      "learning_rate": 1.941646970000633e-05,
      "loss": 2.5439,
      "step": 8838
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9928342700004578,
      "learning_rate": 1.94163311005603e-05,
      "loss": 2.5435,
      "step": 8839
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9947734475135803,
      "learning_rate": 1.9416192485151036e-05,
      "loss": 2.3649,
      "step": 8840
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0608797073364258,
      "learning_rate": 1.9416053853778765e-05,
      "loss": 2.7402,
      "step": 8841
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9914329648017883,
      "learning_rate": 1.941591520644373e-05,
      "loss": 2.8833,
      "step": 8842
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.075941801071167,
      "learning_rate": 1.9415776543146158e-05,
      "loss": 2.4412,
      "step": 8843
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9954333305358887,
      "learning_rate": 1.941563786388629e-05,
      "loss": 2.5635,
      "step": 8844
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.1303364038467407,
      "learning_rate": 1.9415499168664356e-05,
      "loss": 2.6892,
      "step": 8845
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0797882080078125,
      "learning_rate": 1.9415360457480595e-05,
      "loss": 2.6732,
      "step": 8846
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9941215515136719,
      "learning_rate": 1.941522173033524e-05,
      "loss": 2.6565,
      "step": 8847
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0200221538543701,
      "learning_rate": 1.9415082987228525e-05,
      "loss": 2.4879,
      "step": 8848
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0389620065689087,
      "learning_rate": 1.941494422816069e-05,
      "loss": 2.739,
      "step": 8849
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.082541823387146,
      "learning_rate": 1.9414805453131967e-05,
      "loss": 2.6614,
      "step": 8850
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0285526514053345,
      "learning_rate": 1.9414666662142595e-05,
      "loss": 2.483,
      "step": 8851
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9287915229797363,
      "learning_rate": 1.94145278551928e-05,
      "loss": 2.7435,
      "step": 8852
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0796936750411987,
      "learning_rate": 1.9414389032282826e-05,
      "loss": 2.5087,
      "step": 8853
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9013933539390564,
      "learning_rate": 1.9414250193412907e-05,
      "loss": 2.5506,
      "step": 8854
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.1143097877502441,
      "learning_rate": 1.941411133858328e-05,
      "loss": 2.8723,
      "step": 8855
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0936318635940552,
      "learning_rate": 1.941397246779417e-05,
      "loss": 2.4808,
      "step": 8856
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.1004475355148315,
      "learning_rate": 1.9413833581045825e-05,
      "loss": 2.4526,
      "step": 8857
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9222117066383362,
      "learning_rate": 1.9413694678338472e-05,
      "loss": 2.6537,
      "step": 8858
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0402072668075562,
      "learning_rate": 1.941355575967235e-05,
      "loss": 2.4926,
      "step": 8859
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.976037859916687,
      "learning_rate": 1.94134168250477e-05,
      "loss": 2.6852,
      "step": 8860
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0244570970535278,
      "learning_rate": 1.9413277874464746e-05,
      "loss": 2.6231,
      "step": 8861
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.1819732189178467,
      "learning_rate": 1.941313890792373e-05,
      "loss": 2.6422,
      "step": 8862
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0676946640014648,
      "learning_rate": 1.941299992542489e-05,
      "loss": 2.4517,
      "step": 8863
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9331788420677185,
      "learning_rate": 1.9412860926968455e-05,
      "loss": 2.4728,
      "step": 8864
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0384116172790527,
      "learning_rate": 1.9412721912554665e-05,
      "loss": 2.5695,
      "step": 8865
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9428443312644958,
      "learning_rate": 1.9412582882183756e-05,
      "loss": 2.7842,
      "step": 8866
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.2966755628585815,
      "learning_rate": 1.9412443835855963e-05,
      "loss": 2.7346,
      "step": 8867
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.003973364830017,
      "learning_rate": 1.9412304773571523e-05,
      "loss": 2.5805,
      "step": 8868
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9223456382751465,
      "learning_rate": 1.9412165695330666e-05,
      "loss": 2.7141,
      "step": 8869
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9754530191421509,
      "learning_rate": 1.9412026601133632e-05,
      "loss": 2.6406,
      "step": 8870
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0568158626556396,
      "learning_rate": 1.941188749098066e-05,
      "loss": 2.6274,
      "step": 8871
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9876042008399963,
      "learning_rate": 1.9411748364871982e-05,
      "loss": 2.5389,
      "step": 8872
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0319770574569702,
      "learning_rate": 1.941160922280783e-05,
      "loss": 2.6751,
      "step": 8873
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.8926123976707458,
      "learning_rate": 1.9411470064788446e-05,
      "loss": 2.6676,
      "step": 8874
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9692428708076477,
      "learning_rate": 1.9411330890814065e-05,
      "loss": 2.4678,
      "step": 8875
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0636463165283203,
      "learning_rate": 1.9411191700884924e-05,
      "loss": 2.674,
      "step": 8876
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9977859258651733,
      "learning_rate": 1.9411052495001253e-05,
      "loss": 2.6549,
      "step": 8877
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9905819892883301,
      "learning_rate": 1.9410913273163297e-05,
      "loss": 2.5813,
      "step": 8878
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.074035406112671,
      "learning_rate": 1.9410774035371286e-05,
      "loss": 2.6524,
      "step": 8879
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0403105020523071,
      "learning_rate": 1.9410634781625456e-05,
      "loss": 2.6803,
      "step": 8880
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0704891681671143,
      "learning_rate": 1.9410495511926042e-05,
      "loss": 2.4373,
      "step": 8881
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.04640531539917,
      "learning_rate": 1.9410356226273284e-05,
      "loss": 2.6289,
      "step": 8882
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0230056047439575,
      "learning_rate": 1.9410216924667416e-05,
      "loss": 2.5616,
      "step": 8883
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.1204906702041626,
      "learning_rate": 1.9410077607108672e-05,
      "loss": 2.509,
      "step": 8884
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.1307483911514282,
      "learning_rate": 1.9409938273597296e-05,
      "loss": 2.5467,
      "step": 8885
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0777487754821777,
      "learning_rate": 1.9409798924133514e-05,
      "loss": 2.5136,
      "step": 8886
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0696526765823364,
      "learning_rate": 1.940965955871757e-05,
      "loss": 2.6415,
      "step": 8887
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0068734884262085,
      "learning_rate": 1.9409520177349698e-05,
      "loss": 2.505,
      "step": 8888
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0096718072891235,
      "learning_rate": 1.9409380780030133e-05,
      "loss": 2.7199,
      "step": 8889
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.1166695356369019,
      "learning_rate": 1.940924136675911e-05,
      "loss": 2.4891,
      "step": 8890
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9936895966529846,
      "learning_rate": 1.9409101937536872e-05,
      "loss": 2.5615,
      "step": 8891
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.043606162071228,
      "learning_rate": 1.9408962492363646e-05,
      "loss": 2.4865,
      "step": 8892
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9748713970184326,
      "learning_rate": 1.9408823031239675e-05,
      "loss": 2.5885,
      "step": 8893
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0690386295318604,
      "learning_rate": 1.9408683554165196e-05,
      "loss": 2.5123,
      "step": 8894
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9755563735961914,
      "learning_rate": 1.940854406114044e-05,
      "loss": 2.6369,
      "step": 8895
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9592801332473755,
      "learning_rate": 1.9408404552165647e-05,
      "loss": 2.5292,
      "step": 8896
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.342633843421936,
      "learning_rate": 1.940826502724105e-05,
      "loss": 2.4066,
      "step": 8897
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0404261350631714,
      "learning_rate": 1.9408125486366894e-05,
      "loss": 2.656,
      "step": 8898
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.1192270517349243,
      "learning_rate": 1.9407985929543406e-05,
      "loss": 2.5253,
      "step": 8899
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.1046403646469116,
      "learning_rate": 1.940784635677083e-05,
      "loss": 2.4556,
      "step": 8900
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9552616477012634,
      "learning_rate": 1.9407706768049395e-05,
      "loss": 2.6617,
      "step": 8901
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0364232063293457,
      "learning_rate": 1.9407567163379347e-05,
      "loss": 2.5309,
      "step": 8902
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.543368935585022,
      "learning_rate": 1.9407427542760915e-05,
      "loss": 2.6621,
      "step": 8903
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.1808116436004639,
      "learning_rate": 1.940728790619434e-05,
      "loss": 2.6187,
      "step": 8904
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0295827388763428,
      "learning_rate": 1.9407148253679856e-05,
      "loss": 2.49,
      "step": 8905
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.077823519706726,
      "learning_rate": 1.94070085852177e-05,
      "loss": 2.5746,
      "step": 8906
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9594320058822632,
      "learning_rate": 1.940686890080811e-05,
      "loss": 2.7969,
      "step": 8907
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0854955911636353,
      "learning_rate": 1.940672920045132e-05,
      "loss": 2.6411,
      "step": 8908
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.1249151229858398,
      "learning_rate": 1.940658948414757e-05,
      "loss": 2.6428,
      "step": 8909
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0582904815673828,
      "learning_rate": 1.94064497518971e-05,
      "loss": 2.7129,
      "step": 8910
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9507836103439331,
      "learning_rate": 1.9406310003700136e-05,
      "loss": 2.4992,
      "step": 8911
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0301941633224487,
      "learning_rate": 1.940617023955693e-05,
      "loss": 2.508,
      "step": 8912
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0789687633514404,
      "learning_rate": 1.9406030459467705e-05,
      "loss": 2.6985,
      "step": 8913
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.060355305671692,
      "learning_rate": 1.9405890663432707e-05,
      "loss": 2.522,
      "step": 8914
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9583142399787903,
      "learning_rate": 1.940575085145217e-05,
      "loss": 2.7313,
      "step": 8915
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9564443230628967,
      "learning_rate": 1.9405611023526326e-05,
      "loss": 2.5967,
      "step": 8916
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9433383941650391,
      "learning_rate": 1.940547117965542e-05,
      "loss": 2.5051,
      "step": 8917
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0145032405853271,
      "learning_rate": 1.9405331319839685e-05,
      "loss": 2.5028,
      "step": 8918
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0205782651901245,
      "learning_rate": 1.940519144407936e-05,
      "loss": 2.5631,
      "step": 8919
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0898218154907227,
      "learning_rate": 1.9405051552374682e-05,
      "loss": 2.9223,
      "step": 8920
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0093410015106201,
      "learning_rate": 1.9404911644725885e-05,
      "loss": 2.7583,
      "step": 8921
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0321173667907715,
      "learning_rate": 1.9404771721133207e-05,
      "loss": 2.7291,
      "step": 8922
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.4907654523849487,
      "learning_rate": 1.9404631781596893e-05,
      "loss": 2.5764,
      "step": 8923
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0114911794662476,
      "learning_rate": 1.9404491826117165e-05,
      "loss": 2.7254,
      "step": 8924
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0722895860671997,
      "learning_rate": 1.940435185469428e-05,
      "loss": 2.6744,
      "step": 8925
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0095620155334473,
      "learning_rate": 1.9404211867328456e-05,
      "loss": 2.5788,
      "step": 8926
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0755077600479126,
      "learning_rate": 1.940407186401994e-05,
      "loss": 2.8755,
      "step": 8927
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.1259268522262573,
      "learning_rate": 1.9403931844768968e-05,
      "loss": 2.6071,
      "step": 8928
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0129363536834717,
      "learning_rate": 1.940379180957578e-05,
      "loss": 2.7738,
      "step": 8929
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9398566484451294,
      "learning_rate": 1.940365175844061e-05,
      "loss": 2.4159,
      "step": 8930
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0502599477767944,
      "learning_rate": 1.9403511691363696e-05,
      "loss": 2.4037,
      "step": 8931
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.1318717002868652,
      "learning_rate": 1.9403371608345275e-05,
      "loss": 2.8972,
      "step": 8932
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9467617869377136,
      "learning_rate": 1.9403231509385586e-05,
      "loss": 2.6416,
      "step": 8933
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9851657748222351,
      "learning_rate": 1.9403091394484863e-05,
      "loss": 2.6326,
      "step": 8934
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9523695707321167,
      "learning_rate": 1.940295126364335e-05,
      "loss": 2.5324,
      "step": 8935
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.074420690536499,
      "learning_rate": 1.940281111686128e-05,
      "loss": 2.8607,
      "step": 8936
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9381167888641357,
      "learning_rate": 1.9402670954138892e-05,
      "loss": 2.4998,
      "step": 8937
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0423213243484497,
      "learning_rate": 1.9402530775476422e-05,
      "loss": 2.6398,
      "step": 8938
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9942998886108398,
      "learning_rate": 1.9402390580874106e-05,
      "loss": 2.5613,
      "step": 8939
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9449472427368164,
      "learning_rate": 1.940225037033219e-05,
      "loss": 2.2815,
      "step": 8940
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9868815541267395,
      "learning_rate": 1.94021101438509e-05,
      "loss": 2.4312,
      "step": 8941
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0244766473770142,
      "learning_rate": 1.9401969901430484e-05,
      "loss": 2.4956,
      "step": 8942
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.6248657703399658,
      "learning_rate": 1.940182964307117e-05,
      "loss": 2.6641,
      "step": 8943
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.1309741735458374,
      "learning_rate": 1.940168936877321e-05,
      "loss": 2.5795,
      "step": 8944
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9391258358955383,
      "learning_rate": 1.940154907853683e-05,
      "loss": 2.6084,
      "step": 8945
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0024832487106323,
      "learning_rate": 1.9401408772362265e-05,
      "loss": 2.43,
      "step": 8946
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.905791699886322,
      "learning_rate": 1.9401268450249764e-05,
      "loss": 2.5224,
      "step": 8947
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9659463763237,
      "learning_rate": 1.940112811219956e-05,
      "loss": 2.6774,
      "step": 8948
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0408775806427002,
      "learning_rate": 1.9400987758211887e-05,
      "loss": 2.3341,
      "step": 8949
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.1110442876815796,
      "learning_rate": 1.940084738828699e-05,
      "loss": 2.4286,
      "step": 8950
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9957612156867981,
      "learning_rate": 1.94007070024251e-05,
      "loss": 2.5631,
      "step": 8951
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.044219970703125,
      "learning_rate": 1.9400566600626464e-05,
      "loss": 2.7938,
      "step": 8952
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.1442532539367676,
      "learning_rate": 1.940042618289131e-05,
      "loss": 2.623,
      "step": 8953
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0325664281845093,
      "learning_rate": 1.9400285749219876e-05,
      "loss": 2.6392,
      "step": 8954
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9818012118339539,
      "learning_rate": 1.9400145299612413e-05,
      "loss": 2.4215,
      "step": 8955
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.1197998523712158,
      "learning_rate": 1.9400004834069147e-05,
      "loss": 2.6317,
      "step": 8956
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.130480408668518,
      "learning_rate": 1.9399864352590324e-05,
      "loss": 2.6383,
      "step": 8957
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9341012239456177,
      "learning_rate": 1.9399723855176172e-05,
      "loss": 2.3987,
      "step": 8958
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0441371202468872,
      "learning_rate": 1.939958334182694e-05,
      "loss": 2.7034,
      "step": 8959
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9643867015838623,
      "learning_rate": 1.9399442812542857e-05,
      "loss": 2.6977,
      "step": 8960
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.085242509841919,
      "learning_rate": 1.939930226732417e-05,
      "loss": 2.6152,
      "step": 8961
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9543569087982178,
      "learning_rate": 1.9399161706171106e-05,
      "loss": 2.4506,
      "step": 8962
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9660334587097168,
      "learning_rate": 1.9399021129083917e-05,
      "loss": 2.345,
      "step": 8963
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.1921896934509277,
      "learning_rate": 1.939888053606283e-05,
      "loss": 2.6145,
      "step": 8964
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0170106887817383,
      "learning_rate": 1.939873992710809e-05,
      "loss": 2.4456,
      "step": 8965
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0490304231643677,
      "learning_rate": 1.939859930221993e-05,
      "loss": 2.5463,
      "step": 8966
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.960719645023346,
      "learning_rate": 1.9398458661398595e-05,
      "loss": 2.8042,
      "step": 8967
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0759130716323853,
      "learning_rate": 1.9398318004644322e-05,
      "loss": 2.5561,
      "step": 8968
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.092078447341919,
      "learning_rate": 1.9398177331957346e-05,
      "loss": 2.557,
      "step": 8969
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.1142593622207642,
      "learning_rate": 1.9398036643337903e-05,
      "loss": 2.5733,
      "step": 8970
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9658342003822327,
      "learning_rate": 1.939789593878624e-05,
      "loss": 2.4173,
      "step": 8971
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.005327820777893,
      "learning_rate": 1.939775521830259e-05,
      "loss": 2.5022,
      "step": 8972
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0545639991760254,
      "learning_rate": 1.939761448188719e-05,
      "loss": 2.6424,
      "step": 8973
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.1018091440200806,
      "learning_rate": 1.9397473729540283e-05,
      "loss": 2.6994,
      "step": 8974
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0062220096588135,
      "learning_rate": 1.9397332961262105e-05,
      "loss": 2.5414,
      "step": 8975
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9478686451911926,
      "learning_rate": 1.9397192177052897e-05,
      "loss": 2.6065,
      "step": 8976
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9394656419754028,
      "learning_rate": 1.939705137691289e-05,
      "loss": 2.7863,
      "step": 8977
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0582014322280884,
      "learning_rate": 1.9396910560842335e-05,
      "loss": 2.6942,
      "step": 8978
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.1278371810913086,
      "learning_rate": 1.9396769728841465e-05,
      "loss": 2.7558,
      "step": 8979
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0378764867782593,
      "learning_rate": 1.9396628880910518e-05,
      "loss": 2.6425,
      "step": 8980
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.069183349609375,
      "learning_rate": 1.939648801704973e-05,
      "loss": 2.6424,
      "step": 8981
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.097719669342041,
      "learning_rate": 1.9396347137259342e-05,
      "loss": 2.6891,
      "step": 8982
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9855637550354004,
      "learning_rate": 1.9396206241539596e-05,
      "loss": 2.2866,
      "step": 8983
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0974032878875732,
      "learning_rate": 1.9396065329890727e-05,
      "loss": 2.5106,
      "step": 8984
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0299822092056274,
      "learning_rate": 1.9395924402312976e-05,
      "loss": 2.4105,
      "step": 8985
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.065944790840149,
      "learning_rate": 1.9395783458806582e-05,
      "loss": 2.4422,
      "step": 8986
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9517208337783813,
      "learning_rate": 1.9395642499371783e-05,
      "loss": 2.6604,
      "step": 8987
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0502448081970215,
      "learning_rate": 1.939550152400882e-05,
      "loss": 2.4966,
      "step": 8988
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0684772729873657,
      "learning_rate": 1.9395360532717928e-05,
      "loss": 2.6431,
      "step": 8989
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9915849566459656,
      "learning_rate": 1.9395219525499348e-05,
      "loss": 2.5603,
      "step": 8990
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9624834656715393,
      "learning_rate": 1.9395078502353316e-05,
      "loss": 2.6971,
      "step": 8991
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9758308529853821,
      "learning_rate": 1.9394937463280077e-05,
      "loss": 2.6973,
      "step": 8992
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0000431537628174,
      "learning_rate": 1.9394796408279867e-05,
      "loss": 2.6886,
      "step": 8993
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.1216398477554321,
      "learning_rate": 1.9394655337352927e-05,
      "loss": 2.3876,
      "step": 8994
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.009711742401123,
      "learning_rate": 1.9394514250499498e-05,
      "loss": 2.6898,
      "step": 8995
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9937444925308228,
      "learning_rate": 1.939437314771981e-05,
      "loss": 2.421,
      "step": 8996
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.044610857963562,
      "learning_rate": 1.939423202901411e-05,
      "loss": 2.4505,
      "step": 8997
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0146160125732422,
      "learning_rate": 1.9394090894382635e-05,
      "loss": 2.4812,
      "step": 8998
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.1612544059753418,
      "learning_rate": 1.9393949743825623e-05,
      "loss": 2.5936,
      "step": 8999
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9404955506324768,
      "learning_rate": 1.9393808577343318e-05,
      "loss": 2.6467,
      "step": 9000
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9988435506820679,
      "learning_rate": 1.9393667394935956e-05,
      "loss": 2.8423,
      "step": 9001
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9569676518440247,
      "learning_rate": 1.9393526196603775e-05,
      "loss": 2.5007,
      "step": 9002
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0858677625656128,
      "learning_rate": 1.9393384982347015e-05,
      "loss": 2.7618,
      "step": 9003
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0150938034057617,
      "learning_rate": 1.9393243752165918e-05,
      "loss": 2.8471,
      "step": 9004
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.1111282110214233,
      "learning_rate": 1.939310250606072e-05,
      "loss": 2.4329,
      "step": 9005
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9878048896789551,
      "learning_rate": 1.939296124403166e-05,
      "loss": 2.2805,
      "step": 9006
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9602434635162354,
      "learning_rate": 1.9392819966078983e-05,
      "loss": 2.6145,
      "step": 9007
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.1294589042663574,
      "learning_rate": 1.9392678672202926e-05,
      "loss": 2.6466,
      "step": 9008
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0017883777618408,
      "learning_rate": 1.9392537362403726e-05,
      "loss": 2.6286,
      "step": 9009
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9976288676261902,
      "learning_rate": 1.9392396036681626e-05,
      "loss": 2.64,
      "step": 9010
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0214704275131226,
      "learning_rate": 1.9392254695036864e-05,
      "loss": 2.6302,
      "step": 9011
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0999412536621094,
      "learning_rate": 1.9392113337469675e-05,
      "loss": 2.4253,
      "step": 9012
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9940852522850037,
      "learning_rate": 1.9391971963980303e-05,
      "loss": 2.6551,
      "step": 9013
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0024044513702393,
      "learning_rate": 1.9391830574568992e-05,
      "loss": 2.5532,
      "step": 9014
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.1028658151626587,
      "learning_rate": 1.9391689169235976e-05,
      "loss": 2.3799,
      "step": 9015
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0901058912277222,
      "learning_rate": 1.9391547747981495e-05,
      "loss": 2.8717,
      "step": 9016
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0119154453277588,
      "learning_rate": 1.9391406310805792e-05,
      "loss": 2.7361,
      "step": 9017
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.1437723636627197,
      "learning_rate": 1.9391264857709103e-05,
      "loss": 2.6811,
      "step": 9018
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.1734424829483032,
      "learning_rate": 1.939112338869167e-05,
      "loss": 2.7101,
      "step": 9019
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9453942179679871,
      "learning_rate": 1.9390981903753735e-05,
      "loss": 2.5129,
      "step": 9020
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0736721754074097,
      "learning_rate": 1.939084040289553e-05,
      "loss": 2.6055,
      "step": 9021
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9706356525421143,
      "learning_rate": 1.9390698886117302e-05,
      "loss": 2.6608,
      "step": 9022
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0413230657577515,
      "learning_rate": 1.939055735341929e-05,
      "loss": 2.7145,
      "step": 9023
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.1433649063110352,
      "learning_rate": 1.939041580480173e-05,
      "loss": 2.571,
      "step": 9024
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0175234079360962,
      "learning_rate": 1.9390274240264868e-05,
      "loss": 2.6926,
      "step": 9025
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.2670645713806152,
      "learning_rate": 1.939013265980894e-05,
      "loss": 2.5251,
      "step": 9026
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.2612097263336182,
      "learning_rate": 1.9389991063434187e-05,
      "loss": 2.6365,
      "step": 9027
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9741947054862976,
      "learning_rate": 1.9389849451140847e-05,
      "loss": 2.6249,
      "step": 9028
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.2464072704315186,
      "learning_rate": 1.9389707822929162e-05,
      "loss": 2.5438,
      "step": 9029
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.061765193939209,
      "learning_rate": 1.9389566178799374e-05,
      "loss": 2.5947,
      "step": 9030
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9215856194496155,
      "learning_rate": 1.9389424518751722e-05,
      "loss": 2.4799,
      "step": 9031
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.5052108764648438,
      "learning_rate": 1.9389282842786444e-05,
      "loss": 2.4867,
      "step": 9032
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9392685890197754,
      "learning_rate": 1.9389141150903777e-05,
      "loss": 2.7095,
      "step": 9033
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.2616400718688965,
      "learning_rate": 1.9388999443103972e-05,
      "loss": 2.7026,
      "step": 9034
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.074051022529602,
      "learning_rate": 1.9388857719387263e-05,
      "loss": 2.6962,
      "step": 9035
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0019688606262207,
      "learning_rate": 1.9388715979753886e-05,
      "loss": 2.7631,
      "step": 9036
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9709328413009644,
      "learning_rate": 1.9388574224204087e-05,
      "loss": 2.599,
      "step": 9037
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.1482179164886475,
      "learning_rate": 1.9388432452738107e-05,
      "loss": 2.4865,
      "step": 9038
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.098488688468933,
      "learning_rate": 1.9388290665356182e-05,
      "loss": 2.5523,
      "step": 9039
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9674026370048523,
      "learning_rate": 1.9388148862058552e-05,
      "loss": 2.7033,
      "step": 9040
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.1750327348709106,
      "learning_rate": 1.9388007042845465e-05,
      "loss": 2.5557,
      "step": 9041
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0476020574569702,
      "learning_rate": 1.9387865207717157e-05,
      "loss": 2.4725,
      "step": 9042
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0148811340332031,
      "learning_rate": 1.938772335667386e-05,
      "loss": 2.599,
      "step": 9043
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.1263089179992676,
      "learning_rate": 1.9387581489715827e-05,
      "loss": 2.2642,
      "step": 9044
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9846248030662537,
      "learning_rate": 1.9387439606843297e-05,
      "loss": 2.5787,
      "step": 9045
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9868783950805664,
      "learning_rate": 1.9387297708056503e-05,
      "loss": 2.7414,
      "step": 9046
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0898847579956055,
      "learning_rate": 1.9387155793355693e-05,
      "loss": 2.6261,
      "step": 9047
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.2737709283828735,
      "learning_rate": 1.93870138627411e-05,
      "loss": 2.6259,
      "step": 9048
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.036285638809204,
      "learning_rate": 1.938687191621297e-05,
      "loss": 2.6766,
      "step": 9049
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.1131237745285034,
      "learning_rate": 1.9386729953771544e-05,
      "loss": 2.4799,
      "step": 9050
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0549652576446533,
      "learning_rate": 1.9386587975417063e-05,
      "loss": 2.5289,
      "step": 9051
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.031050682067871,
      "learning_rate": 1.9386445981149763e-05,
      "loss": 2.499,
      "step": 9052
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0293166637420654,
      "learning_rate": 1.938630397096989e-05,
      "loss": 2.4793,
      "step": 9053
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.98245769739151,
      "learning_rate": 1.938616194487768e-05,
      "loss": 2.6383,
      "step": 9054
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.035535216331482,
      "learning_rate": 1.938601990287338e-05,
      "loss": 2.6923,
      "step": 9055
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9730027914047241,
      "learning_rate": 1.9385877844957222e-05,
      "loss": 2.6612,
      "step": 9056
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0482183694839478,
      "learning_rate": 1.9385735771129456e-05,
      "loss": 2.5425,
      "step": 9057
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.1692336797714233,
      "learning_rate": 1.938559368139032e-05,
      "loss": 2.6324,
      "step": 9058
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0556640625,
      "learning_rate": 1.938545157574005e-05,
      "loss": 2.8192,
      "step": 9059
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9444046020507812,
      "learning_rate": 1.938530945417889e-05,
      "loss": 2.6026,
      "step": 9060
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0006093978881836,
      "learning_rate": 1.9385167316707084e-05,
      "loss": 2.4652,
      "step": 9061
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.2933473587036133,
      "learning_rate": 1.938502516332487e-05,
      "loss": 2.5904,
      "step": 9062
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0196963548660278,
      "learning_rate": 1.938488299403249e-05,
      "loss": 2.4299,
      "step": 9063
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9389800429344177,
      "learning_rate": 1.9384740808830183e-05,
      "loss": 2.4423,
      "step": 9064
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.1092060804367065,
      "learning_rate": 1.9384598607718193e-05,
      "loss": 2.7616,
      "step": 9065
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0139514207839966,
      "learning_rate": 1.938445639069676e-05,
      "loss": 2.4719,
      "step": 9066
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.127746820449829,
      "learning_rate": 1.938431415776612e-05,
      "loss": 2.5699,
      "step": 9067
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.1730152368545532,
      "learning_rate": 1.9384171908926523e-05,
      "loss": 2.6927,
      "step": 9068
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0422301292419434,
      "learning_rate": 1.9384029644178205e-05,
      "loss": 2.7822,
      "step": 9069
    },
    {
      "epoch": 0.12,
      "grad_norm": 3.249113082885742,
      "learning_rate": 1.9383887363521405e-05,
      "loss": 2.7336,
      "step": 9070
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9593774080276489,
      "learning_rate": 1.938374506695637e-05,
      "loss": 2.5799,
      "step": 9071
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0775476694107056,
      "learning_rate": 1.938360275448334e-05,
      "loss": 2.5852,
      "step": 9072
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0197508335113525,
      "learning_rate": 1.9383460426102552e-05,
      "loss": 2.6155,
      "step": 9073
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0085926055908203,
      "learning_rate": 1.9383318081814252e-05,
      "loss": 2.7487,
      "step": 9074
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9424421787261963,
      "learning_rate": 1.938317572161868e-05,
      "loss": 2.7716,
      "step": 9075
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0283836126327515,
      "learning_rate": 1.9383033345516073e-05,
      "loss": 2.6738,
      "step": 9076
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.964215874671936,
      "learning_rate": 1.9382890953506682e-05,
      "loss": 2.4544,
      "step": 9077
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.5579979419708252,
      "learning_rate": 1.9382748545590738e-05,
      "loss": 2.4596,
      "step": 9078
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0621992349624634,
      "learning_rate": 1.9382606121768487e-05,
      "loss": 2.5083,
      "step": 9079
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0161972045898438,
      "learning_rate": 1.938246368204017e-05,
      "loss": 2.708,
      "step": 9080
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0425918102264404,
      "learning_rate": 1.938232122640603e-05,
      "loss": 2.8114,
      "step": 9081
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9472754001617432,
      "learning_rate": 1.938217875486631e-05,
      "loss": 2.2344,
      "step": 9082
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.2407249212265015,
      "learning_rate": 1.9382036267421244e-05,
      "loss": 2.4248,
      "step": 9083
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0882298946380615,
      "learning_rate": 1.9381893764071084e-05,
      "loss": 2.3061,
      "step": 9084
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.1544902324676514,
      "learning_rate": 1.938175124481606e-05,
      "loss": 2.7107,
      "step": 9085
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0241893529891968,
      "learning_rate": 1.9381608709656424e-05,
      "loss": 2.4911,
      "step": 9086
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9510278701782227,
      "learning_rate": 1.938146615859241e-05,
      "loss": 2.6712,
      "step": 9087
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9860413074493408,
      "learning_rate": 1.9381323591624267e-05,
      "loss": 2.639,
      "step": 9088
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9948047399520874,
      "learning_rate": 1.9381181008752227e-05,
      "loss": 2.6727,
      "step": 9089
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0583820343017578,
      "learning_rate": 1.938103840997654e-05,
      "loss": 2.6628,
      "step": 9090
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9470189213752747,
      "learning_rate": 1.9380895795297444e-05,
      "loss": 2.7016,
      "step": 9091
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0201709270477295,
      "learning_rate": 1.9380753164715186e-05,
      "loss": 2.4736,
      "step": 9092
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0531213283538818,
      "learning_rate": 1.938061051823e-05,
      "loss": 2.8858,
      "step": 9093
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0863679647445679,
      "learning_rate": 1.9380467855842132e-05,
      "loss": 2.6466,
      "step": 9094
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0284000635147095,
      "learning_rate": 1.938032517755182e-05,
      "loss": 2.4406,
      "step": 9095
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9823147058486938,
      "learning_rate": 1.9380182483359316e-05,
      "loss": 2.3666,
      "step": 9096
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9979194402694702,
      "learning_rate": 1.9380039773264852e-05,
      "loss": 2.5111,
      "step": 9097
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.086612343788147,
      "learning_rate": 1.9379897047268673e-05,
      "loss": 2.4265,
      "step": 9098
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9520382881164551,
      "learning_rate": 1.937975430537102e-05,
      "loss": 2.6607,
      "step": 9099
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0348719358444214,
      "learning_rate": 1.9379611547572133e-05,
      "loss": 2.5421,
      "step": 9100
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9125314950942993,
      "learning_rate": 1.9379468773872264e-05,
      "loss": 2.74,
      "step": 9101
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.1728131771087646,
      "learning_rate": 1.9379325984271646e-05,
      "loss": 2.3963,
      "step": 9102
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0049809217453003,
      "learning_rate": 1.937918317877052e-05,
      "loss": 2.6386,
      "step": 9103
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.171062707901001,
      "learning_rate": 1.937904035736913e-05,
      "loss": 2.6246,
      "step": 9104
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0729351043701172,
      "learning_rate": 1.9378897520067723e-05,
      "loss": 2.858,
      "step": 9105
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0447583198547363,
      "learning_rate": 1.9378754666866538e-05,
      "loss": 2.6923,
      "step": 9106
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0909394025802612,
      "learning_rate": 1.9378611797765816e-05,
      "loss": 2.5671,
      "step": 9107
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9703709483146667,
      "learning_rate": 1.9378468912765797e-05,
      "loss": 2.6164,
      "step": 9108
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0565961599349976,
      "learning_rate": 1.937832601186673e-05,
      "loss": 2.5903,
      "step": 9109
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0471842288970947,
      "learning_rate": 1.937818309506885e-05,
      "loss": 2.6438,
      "step": 9110
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.047278881072998,
      "learning_rate": 1.9378040162372404e-05,
      "loss": 2.5925,
      "step": 9111
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9581565856933594,
      "learning_rate": 1.9377897213777633e-05,
      "loss": 2.3751,
      "step": 9112
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0654939413070679,
      "learning_rate": 1.9377754249284778e-05,
      "loss": 2.5309,
      "step": 9113
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9924549460411072,
      "learning_rate": 1.9377611268894084e-05,
      "loss": 2.5718,
      "step": 9114
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0583279132843018,
      "learning_rate": 1.937746827260579e-05,
      "loss": 2.495,
      "step": 9115
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.033744215965271,
      "learning_rate": 1.937732526042014e-05,
      "loss": 2.846,
      "step": 9116
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.1083742380142212,
      "learning_rate": 1.9377182232337382e-05,
      "loss": 2.6911,
      "step": 9117
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.273191213607788,
      "learning_rate": 1.937703918835775e-05,
      "loss": 2.5849,
      "step": 9118
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.06061589717865,
      "learning_rate": 1.937689612848149e-05,
      "loss": 2.5317,
      "step": 9119
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0980538129806519,
      "learning_rate": 1.9376753052708843e-05,
      "loss": 2.6494,
      "step": 9120
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9680135846138,
      "learning_rate": 1.9376609961040053e-05,
      "loss": 2.7925,
      "step": 9121
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.02011775970459,
      "learning_rate": 1.9376466853475365e-05,
      "loss": 2.2901,
      "step": 9122
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.045081377029419,
      "learning_rate": 1.9376323730015016e-05,
      "loss": 2.6493,
      "step": 9123
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.1044222116470337,
      "learning_rate": 1.9376180590659254e-05,
      "loss": 2.5172,
      "step": 9124
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9799515008926392,
      "learning_rate": 1.9376037435408318e-05,
      "loss": 2.6438,
      "step": 9125
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.2400939464569092,
      "learning_rate": 1.9375894264262454e-05,
      "loss": 2.5886,
      "step": 9126
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0741512775421143,
      "learning_rate": 1.93757510772219e-05,
      "loss": 2.7439,
      "step": 9127
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0469822883605957,
      "learning_rate": 1.9375607874286904e-05,
      "loss": 2.6148,
      "step": 9128
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0048786401748657,
      "learning_rate": 1.9375464655457706e-05,
      "loss": 2.7509,
      "step": 9129
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.1212389469146729,
      "learning_rate": 1.9375321420734548e-05,
      "loss": 2.5665,
      "step": 9130
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9898075461387634,
      "learning_rate": 1.9375178170117674e-05,
      "loss": 2.592,
      "step": 9131
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9969699382781982,
      "learning_rate": 1.9375034903607324e-05,
      "loss": 2.5807,
      "step": 9132
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9858003258705139,
      "learning_rate": 1.937489162120375e-05,
      "loss": 2.5159,
      "step": 9133
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.937614917755127,
      "learning_rate": 1.937474832290718e-05,
      "loss": 2.5946,
      "step": 9134
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.048655390739441,
      "learning_rate": 1.9374605008717876e-05,
      "loss": 2.628,
      "step": 9135
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0143885612487793,
      "learning_rate": 1.9374461678636064e-05,
      "loss": 2.5376,
      "step": 9136
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9529869556427002,
      "learning_rate": 1.9374318332661993e-05,
      "loss": 2.4013,
      "step": 9137
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9945772886276245,
      "learning_rate": 1.9374174970795906e-05,
      "loss": 2.5439,
      "step": 9138
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.959906280040741,
      "learning_rate": 1.937403159303805e-05,
      "loss": 2.6577,
      "step": 9139
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9969363808631897,
      "learning_rate": 1.937388819938866e-05,
      "loss": 2.373,
      "step": 9140
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0346981287002563,
      "learning_rate": 1.937374478984799e-05,
      "loss": 2.5138,
      "step": 9141
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9502671360969543,
      "learning_rate": 1.937360136441627e-05,
      "loss": 2.4867,
      "step": 9142
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0134881734848022,
      "learning_rate": 1.9373457923093754e-05,
      "loss": 2.2728,
      "step": 9143
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0552549362182617,
      "learning_rate": 1.937331446588068e-05,
      "loss": 2.5446,
      "step": 9144
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9702291488647461,
      "learning_rate": 1.937317099277729e-05,
      "loss": 2.6622,
      "step": 9145
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0949832201004028,
      "learning_rate": 1.9373027503783836e-05,
      "loss": 2.52,
      "step": 9146
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9577712416648865,
      "learning_rate": 1.9372883998900546e-05,
      "loss": 2.7903,
      "step": 9147
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.114404559135437,
      "learning_rate": 1.9372740478127677e-05,
      "loss": 2.6604,
      "step": 9148
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.008780837059021,
      "learning_rate": 1.9372596941465466e-05,
      "loss": 2.532,
      "step": 9149
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9575505256652832,
      "learning_rate": 1.9372453388914157e-05,
      "loss": 2.7091,
      "step": 9150
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.046887993812561,
      "learning_rate": 1.9372309820473994e-05,
      "loss": 2.7172,
      "step": 9151
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0364636182785034,
      "learning_rate": 1.937216623614522e-05,
      "loss": 2.3069,
      "step": 9152
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.016742467880249,
      "learning_rate": 1.937202263592808e-05,
      "loss": 2.4563,
      "step": 9153
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.016003966331482,
      "learning_rate": 1.937187901982282e-05,
      "loss": 2.592,
      "step": 9154
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.946726381778717,
      "learning_rate": 1.9371735387829677e-05,
      "loss": 2.7316,
      "step": 9155
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0560472011566162,
      "learning_rate": 1.9371591739948895e-05,
      "loss": 2.7701,
      "step": 9156
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9923971891403198,
      "learning_rate": 1.937144807618072e-05,
      "loss": 2.546,
      "step": 9157
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.925650954246521,
      "learning_rate": 1.9371304396525396e-05,
      "loss": 2.3192,
      "step": 9158
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9451932311058044,
      "learning_rate": 1.937116070098317e-05,
      "loss": 2.6301,
      "step": 9159
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0686497688293457,
      "learning_rate": 1.9371016989554276e-05,
      "loss": 2.2909,
      "step": 9160
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.2312275171279907,
      "learning_rate": 1.9370873262238965e-05,
      "loss": 2.8145,
      "step": 9161
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.1221164464950562,
      "learning_rate": 1.9370729519037477e-05,
      "loss": 2.5945,
      "step": 9162
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.1066621541976929,
      "learning_rate": 1.937058575995006e-05,
      "loss": 2.5861,
      "step": 9163
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.5929834842681885,
      "learning_rate": 1.937044198497695e-05,
      "loss": 2.5908,
      "step": 9164
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.014889121055603,
      "learning_rate": 1.93702981941184e-05,
      "loss": 2.8675,
      "step": 9165
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9671489596366882,
      "learning_rate": 1.9370154387374652e-05,
      "loss": 2.5625,
      "step": 9166
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.994716227054596,
      "learning_rate": 1.9370010564745946e-05,
      "loss": 2.6793,
      "step": 9167
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0384026765823364,
      "learning_rate": 1.9369866726232526e-05,
      "loss": 2.5412,
      "step": 9168
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9675102829933167,
      "learning_rate": 1.9369722871834633e-05,
      "loss": 2.6885,
      "step": 9169
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.066290020942688,
      "learning_rate": 1.936957900155252e-05,
      "loss": 2.5885,
      "step": 9170
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9064695835113525,
      "learning_rate": 1.9369435115386427e-05,
      "loss": 2.4265,
      "step": 9171
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9576005339622498,
      "learning_rate": 1.9369291213336594e-05,
      "loss": 2.7758,
      "step": 9172
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0354368686676025,
      "learning_rate": 1.9369147295403268e-05,
      "loss": 2.5492,
      "step": 9173
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.1886049509048462,
      "learning_rate": 1.936900336158669e-05,
      "loss": 2.8546,
      "step": 9174
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9204943180084229,
      "learning_rate": 1.9368859411887112e-05,
      "loss": 2.702,
      "step": 9175
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9858782887458801,
      "learning_rate": 1.9368715446304768e-05,
      "loss": 2.6014,
      "step": 9176
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9845703840255737,
      "learning_rate": 1.936857146483991e-05,
      "loss": 2.5928,
      "step": 9177
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.046395182609558,
      "learning_rate": 1.936842746749278e-05,
      "loss": 2.6258,
      "step": 9178
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0127629041671753,
      "learning_rate": 1.936828345426362e-05,
      "loss": 2.4918,
      "step": 9179
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.060794711112976,
      "learning_rate": 1.9368139425152672e-05,
      "loss": 2.7399,
      "step": 9180
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0663281679153442,
      "learning_rate": 1.936799538016019e-05,
      "loss": 2.5595,
      "step": 9181
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0159759521484375,
      "learning_rate": 1.9367851319286407e-05,
      "loss": 2.5849,
      "step": 9182
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9783603549003601,
      "learning_rate": 1.9367707242531567e-05,
      "loss": 2.4708,
      "step": 9183
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0906751155853271,
      "learning_rate": 1.9367563149895927e-05,
      "loss": 2.599,
      "step": 9184
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.2665197849273682,
      "learning_rate": 1.9367419041379717e-05,
      "loss": 2.3747,
      "step": 9185
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9703016877174377,
      "learning_rate": 1.9367274916983194e-05,
      "loss": 2.2918,
      "step": 9186
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9683656096458435,
      "learning_rate": 1.9367130776706592e-05,
      "loss": 2.5746,
      "step": 9187
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.997747004032135,
      "learning_rate": 1.9366986620550162e-05,
      "loss": 2.6729,
      "step": 9188
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0737260580062866,
      "learning_rate": 1.936684244851414e-05,
      "loss": 2.6752,
      "step": 9189
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0241711139678955,
      "learning_rate": 1.9366698260598783e-05,
      "loss": 2.6799,
      "step": 9190
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9378177523612976,
      "learning_rate": 1.9366554056804323e-05,
      "loss": 2.6138,
      "step": 9191
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.2341359853744507,
      "learning_rate": 1.936640983713101e-05,
      "loss": 2.4637,
      "step": 9192
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0631978511810303,
      "learning_rate": 1.936626560157909e-05,
      "loss": 2.7181,
      "step": 9193
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9922404289245605,
      "learning_rate": 1.936612135014881e-05,
      "loss": 2.6057,
      "step": 9194
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0080466270446777,
      "learning_rate": 1.9365977082840406e-05,
      "loss": 2.3443,
      "step": 9195
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9757750630378723,
      "learning_rate": 1.9365832799654127e-05,
      "loss": 2.5903,
      "step": 9196
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.231201410293579,
      "learning_rate": 1.9365688500590218e-05,
      "loss": 2.5093,
      "step": 9197
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0254603624343872,
      "learning_rate": 1.9365544185648927e-05,
      "loss": 2.4109,
      "step": 9198
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0883923768997192,
      "learning_rate": 1.936539985483049e-05,
      "loss": 2.6761,
      "step": 9199
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9775907397270203,
      "learning_rate": 1.936525550813516e-05,
      "loss": 2.3456,
      "step": 9200
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0162925720214844,
      "learning_rate": 1.9365111145563177e-05,
      "loss": 2.5729,
      "step": 9201
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0818063020706177,
      "learning_rate": 1.9364966767114784e-05,
      "loss": 2.7495,
      "step": 9202
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.05350661277771,
      "learning_rate": 1.9364822372790237e-05,
      "loss": 2.4576,
      "step": 9203
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.1574417352676392,
      "learning_rate": 1.9364677962589764e-05,
      "loss": 2.7208,
      "step": 9204
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0396226644515991,
      "learning_rate": 1.9364533536513624e-05,
      "loss": 2.652,
      "step": 9205
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.012952208518982,
      "learning_rate": 1.9364389094562054e-05,
      "loss": 2.5149,
      "step": 9206
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0867635011672974,
      "learning_rate": 1.93642446367353e-05,
      "loss": 2.5522,
      "step": 9207
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9423759579658508,
      "learning_rate": 1.936410016303361e-05,
      "loss": 2.7584,
      "step": 9208
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.030789852142334,
      "learning_rate": 1.9363955673457228e-05,
      "loss": 2.5446,
      "step": 9209
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0268794298171997,
      "learning_rate": 1.9363811168006396e-05,
      "loss": 2.6478,
      "step": 9210
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0680948495864868,
      "learning_rate": 1.9363666646681357e-05,
      "loss": 2.6352,
      "step": 9211
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0888450145721436,
      "learning_rate": 1.9363522109482368e-05,
      "loss": 2.4957,
      "step": 9212
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0588878393173218,
      "learning_rate": 1.936337755640966e-05,
      "loss": 2.7075,
      "step": 9213
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.091495394706726,
      "learning_rate": 1.9363232987463484e-05,
      "loss": 2.5211,
      "step": 9214
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.1100356578826904,
      "learning_rate": 1.936308840264409e-05,
      "loss": 2.7761,
      "step": 9215
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0136972665786743,
      "learning_rate": 1.9362943801951714e-05,
      "loss": 2.7564,
      "step": 9216
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.39830482006073,
      "learning_rate": 1.9362799185386606e-05,
      "loss": 2.6239,
      "step": 9217
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.135153889656067,
      "learning_rate": 1.936265455294901e-05,
      "loss": 2.5677,
      "step": 9218
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9935099482536316,
      "learning_rate": 1.9362509904639174e-05,
      "loss": 2.635,
      "step": 9219
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0147649049758911,
      "learning_rate": 1.936236524045734e-05,
      "loss": 2.72,
      "step": 9220
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9987856149673462,
      "learning_rate": 1.9362220560403754e-05,
      "loss": 2.5834,
      "step": 9221
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.825430989265442,
      "learning_rate": 1.936207586447866e-05,
      "loss": 2.5717,
      "step": 9222
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9560549855232239,
      "learning_rate": 1.9361931152682307e-05,
      "loss": 2.624,
      "step": 9223
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0495102405548096,
      "learning_rate": 1.9361786425014936e-05,
      "loss": 2.5086,
      "step": 9224
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.1377840042114258,
      "learning_rate": 1.9361641681476797e-05,
      "loss": 2.5744,
      "step": 9225
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9745174050331116,
      "learning_rate": 1.936149692206813e-05,
      "loss": 2.6966,
      "step": 9226
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0851058959960938,
      "learning_rate": 1.9361352146789186e-05,
      "loss": 2.79,
      "step": 9227
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9792649745941162,
      "learning_rate": 1.9361207355640203e-05,
      "loss": 2.5373,
      "step": 9228
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.053498387336731,
      "learning_rate": 1.9361062548621437e-05,
      "loss": 2.7973,
      "step": 9229
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.4177491664886475,
      "learning_rate": 1.9360917725733122e-05,
      "loss": 2.5451,
      "step": 9230
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9956437945365906,
      "learning_rate": 1.936077288697551e-05,
      "loss": 2.6465,
      "step": 9231
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.1386792659759521,
      "learning_rate": 1.9360628032348848e-05,
      "loss": 2.7536,
      "step": 9232
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0703431367874146,
      "learning_rate": 1.9360483161853382e-05,
      "loss": 2.4502,
      "step": 9233
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.3431651592254639,
      "learning_rate": 1.9360338275489347e-05,
      "loss": 2.6782,
      "step": 9234
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.2126978635787964,
      "learning_rate": 1.9360193373257e-05,
      "loss": 2.5728,
      "step": 9235
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0878944396972656,
      "learning_rate": 1.9360048455156585e-05,
      "loss": 2.6644,
      "step": 9236
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.1386648416519165,
      "learning_rate": 1.9359903521188347e-05,
      "loss": 2.5722,
      "step": 9237
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.992607593536377,
      "learning_rate": 1.9359758571352524e-05,
      "loss": 2.6086,
      "step": 9238
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.1794238090515137,
      "learning_rate": 1.9359613605649373e-05,
      "loss": 2.5417,
      "step": 9239
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.123263955116272,
      "learning_rate": 1.9359468624079133e-05,
      "loss": 2.6044,
      "step": 9240
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9417062997817993,
      "learning_rate": 1.935932362664205e-05,
      "loss": 2.4339,
      "step": 9241
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0453699827194214,
      "learning_rate": 1.9359178613338376e-05,
      "loss": 2.5403,
      "step": 9242
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0418918132781982,
      "learning_rate": 1.9359033584168353e-05,
      "loss": 2.546,
      "step": 9243
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9358177185058594,
      "learning_rate": 1.9358888539132218e-05,
      "loss": 2.5611,
      "step": 9244
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.089613914489746,
      "learning_rate": 1.9358743478230234e-05,
      "loss": 2.5124,
      "step": 9245
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0503981113433838,
      "learning_rate": 1.9358598401462635e-05,
      "loss": 2.3716,
      "step": 9246
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0544618368148804,
      "learning_rate": 1.935845330882967e-05,
      "loss": 2.7033,
      "step": 9247
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0301048755645752,
      "learning_rate": 1.9358308200331582e-05,
      "loss": 2.45,
      "step": 9248
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9966684579849243,
      "learning_rate": 1.9358163075968622e-05,
      "loss": 2.7099,
      "step": 9249
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9664980173110962,
      "learning_rate": 1.9358017935741037e-05,
      "loss": 2.4412,
      "step": 9250
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9978446960449219,
      "learning_rate": 1.9357872779649064e-05,
      "loss": 2.6946,
      "step": 9251
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9312480092048645,
      "learning_rate": 1.9357727607692956e-05,
      "loss": 2.7136,
      "step": 9252
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9558085799217224,
      "learning_rate": 1.9357582419872965e-05,
      "loss": 2.4571,
      "step": 9253
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0857486724853516,
      "learning_rate": 1.9357437216189324e-05,
      "loss": 2.6342,
      "step": 9254
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9443602561950684,
      "learning_rate": 1.9357291996642287e-05,
      "loss": 2.5269,
      "step": 9255
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0289251804351807,
      "learning_rate": 1.9357146761232098e-05,
      "loss": 2.4354,
      "step": 9256
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9983264803886414,
      "learning_rate": 1.9357001509959005e-05,
      "loss": 2.7149,
      "step": 9257
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9294775724411011,
      "learning_rate": 1.9356856242823255e-05,
      "loss": 2.603,
      "step": 9258
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9181879162788391,
      "learning_rate": 1.935671095982509e-05,
      "loss": 2.5756,
      "step": 9259
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0084127187728882,
      "learning_rate": 1.935656566096476e-05,
      "loss": 2.6609,
      "step": 9260
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0852534770965576,
      "learning_rate": 1.9356420346242506e-05,
      "loss": 2.4987,
      "step": 9261
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9696050882339478,
      "learning_rate": 1.935627501565858e-05,
      "loss": 2.7223,
      "step": 9262
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.006354808807373,
      "learning_rate": 1.9356129669213232e-05,
      "loss": 2.7935,
      "step": 9263
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0730880498886108,
      "learning_rate": 1.9355984306906697e-05,
      "loss": 2.7435,
      "step": 9264
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0519362688064575,
      "learning_rate": 1.935583892873923e-05,
      "loss": 2.7412,
      "step": 9265
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0405155420303345,
      "learning_rate": 1.9355693534711072e-05,
      "loss": 2.6763,
      "step": 9266
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0404436588287354,
      "learning_rate": 1.9355548124822478e-05,
      "loss": 2.627,
      "step": 9267
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.2523725032806396,
      "learning_rate": 1.9355402699073686e-05,
      "loss": 2.6817,
      "step": 9268
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9531799554824829,
      "learning_rate": 1.9355257257464944e-05,
      "loss": 2.6845,
      "step": 9269
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9866961240768433,
      "learning_rate": 1.9355111799996503e-05,
      "loss": 2.5096,
      "step": 9270
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.141857624053955,
      "learning_rate": 1.9354966326668604e-05,
      "loss": 2.7068,
      "step": 9271
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0132009983062744,
      "learning_rate": 1.93548208374815e-05,
      "loss": 2.609,
      "step": 9272
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.1473865509033203,
      "learning_rate": 1.935467533243543e-05,
      "loss": 2.6073,
      "step": 9273
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0407789945602417,
      "learning_rate": 1.9354529811530647e-05,
      "loss": 2.5365,
      "step": 9274
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9201197028160095,
      "learning_rate": 1.9354384274767393e-05,
      "loss": 2.5028,
      "step": 9275
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0032747983932495,
      "learning_rate": 1.935423872214592e-05,
      "loss": 2.4621,
      "step": 9276
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9143344163894653,
      "learning_rate": 1.935409315366647e-05,
      "loss": 2.7357,
      "step": 9277
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9654374718666077,
      "learning_rate": 1.9353947569329292e-05,
      "loss": 2.6091,
      "step": 9278
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.1513071060180664,
      "learning_rate": 1.9353801969134634e-05,
      "loss": 2.8435,
      "step": 9279
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0430352687835693,
      "learning_rate": 1.9353656353082737e-05,
      "loss": 2.8628,
      "step": 9280
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0848370790481567,
      "learning_rate": 1.9353510721173857e-05,
      "loss": 2.6086,
      "step": 9281
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.1031215190887451,
      "learning_rate": 1.9353365073408233e-05,
      "loss": 2.4456,
      "step": 9282
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0371655225753784,
      "learning_rate": 1.9353219409786115e-05,
      "loss": 2.683,
      "step": 9283
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0024055242538452,
      "learning_rate": 1.935307373030775e-05,
      "loss": 2.5677,
      "step": 9284
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.021894931793213,
      "learning_rate": 1.9352928034973386e-05,
      "loss": 2.4669,
      "step": 9285
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0983620882034302,
      "learning_rate": 1.9352782323783267e-05,
      "loss": 2.4114,
      "step": 9286
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0407871007919312,
      "learning_rate": 1.9352636596737642e-05,
      "loss": 2.38,
      "step": 9287
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.3306154012680054,
      "learning_rate": 1.9352490853836757e-05,
      "loss": 2.5927,
      "step": 9288
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.093213677406311,
      "learning_rate": 1.9352345095080862e-05,
      "loss": 2.7786,
      "step": 9289
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.967576265335083,
      "learning_rate": 1.93521993204702e-05,
      "loss": 2.4582,
      "step": 9290
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9230151772499084,
      "learning_rate": 1.9352053530005024e-05,
      "loss": 2.3481,
      "step": 9291
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9675121903419495,
      "learning_rate": 1.9351907723685573e-05,
      "loss": 2.4411,
      "step": 9292
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9717705845832825,
      "learning_rate": 1.9351761901512104e-05,
      "loss": 2.4417,
      "step": 9293
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9338557124137878,
      "learning_rate": 1.935161606348485e-05,
      "loss": 2.7425,
      "step": 9294
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9781344532966614,
      "learning_rate": 1.9351470209604077e-05,
      "loss": 2.6178,
      "step": 9295
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0512858629226685,
      "learning_rate": 1.9351324339870013e-05,
      "loss": 2.3496,
      "step": 9296
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0458664894104004,
      "learning_rate": 1.935117845428292e-05,
      "loss": 2.7416,
      "step": 9297
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0332218408584595,
      "learning_rate": 1.935103255284304e-05,
      "loss": 2.5315,
      "step": 9298
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0516976118087769,
      "learning_rate": 1.9350886635550616e-05,
      "loss": 2.3976,
      "step": 9299
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.1190282106399536,
      "learning_rate": 1.93507407024059e-05,
      "loss": 2.6494,
      "step": 9300
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0010720491409302,
      "learning_rate": 1.9350594753409143e-05,
      "loss": 2.659,
      "step": 9301
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.04529869556427,
      "learning_rate": 1.9350448788560584e-05,
      "loss": 2.7027,
      "step": 9302
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0480283498764038,
      "learning_rate": 1.9350302807860478e-05,
      "loss": 2.6599,
      "step": 9303
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9455392956733704,
      "learning_rate": 1.9350156811309068e-05,
      "loss": 2.5529,
      "step": 9304
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9979350566864014,
      "learning_rate": 1.9350010798906602e-05,
      "loss": 2.6051,
      "step": 9305
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.1101549863815308,
      "learning_rate": 1.9349864770653327e-05,
      "loss": 2.6755,
      "step": 9306
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.06157386302948,
      "learning_rate": 1.9349718726549495e-05,
      "loss": 2.458,
      "step": 9307
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0491480827331543,
      "learning_rate": 1.9349572666595348e-05,
      "loss": 2.5668,
      "step": 9308
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9730473756790161,
      "learning_rate": 1.9349426590791138e-05,
      "loss": 2.8148,
      "step": 9309
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9677860736846924,
      "learning_rate": 1.9349280499137112e-05,
      "loss": 2.4234,
      "step": 9310
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.188924789428711,
      "learning_rate": 1.9349134391633514e-05,
      "loss": 2.5377,
      "step": 9311
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0079047679901123,
      "learning_rate": 1.9348988268280594e-05,
      "loss": 2.6545,
      "step": 9312
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9456060528755188,
      "learning_rate": 1.9348842129078597e-05,
      "loss": 2.59,
      "step": 9313
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9632832407951355,
      "learning_rate": 1.9348695974027777e-05,
      "loss": 2.7051,
      "step": 9314
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.5138381719589233,
      "learning_rate": 1.9348549803128377e-05,
      "loss": 2.6792,
      "step": 9315
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.7142664194107056,
      "learning_rate": 1.934840361638065e-05,
      "loss": 2.6849,
      "step": 9316
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.1127601861953735,
      "learning_rate": 1.9348257413784836e-05,
      "loss": 2.6975,
      "step": 9317
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0227830410003662,
      "learning_rate": 1.9348111195341185e-05,
      "loss": 2.5543,
      "step": 9318
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.021261215209961,
      "learning_rate": 1.9347964961049948e-05,
      "loss": 2.6937,
      "step": 9319
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0122475624084473,
      "learning_rate": 1.9347818710911375e-05,
      "loss": 2.6418,
      "step": 9320
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0558843612670898,
      "learning_rate": 1.9347672444925707e-05,
      "loss": 2.8168,
      "step": 9321
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.1580150127410889,
      "learning_rate": 1.9347526163093197e-05,
      "loss": 2.5039,
      "step": 9322
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.1944423913955688,
      "learning_rate": 1.934737986541409e-05,
      "loss": 2.393,
      "step": 9323
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.1363706588745117,
      "learning_rate": 1.9347233551888637e-05,
      "loss": 2.6281,
      "step": 9324
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9953398108482361,
      "learning_rate": 1.9347087222517084e-05,
      "loss": 2.4686,
      "step": 9325
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0569186210632324,
      "learning_rate": 1.934694087729968e-05,
      "loss": 2.6002,
      "step": 9326
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0407711267471313,
      "learning_rate": 1.934679451623667e-05,
      "loss": 2.4632,
      "step": 9327
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9650582075119019,
      "learning_rate": 1.9346648139328307e-05,
      "loss": 2.4674,
      "step": 9328
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.109905481338501,
      "learning_rate": 1.9346501746574838e-05,
      "loss": 2.7632,
      "step": 9329
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9999797940254211,
      "learning_rate": 1.9346355337976505e-05,
      "loss": 2.5816,
      "step": 9330
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.060849666595459,
      "learning_rate": 1.9346208913533565e-05,
      "loss": 2.7082,
      "step": 9331
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9816908240318298,
      "learning_rate": 1.9346062473246263e-05,
      "loss": 2.3594,
      "step": 9332
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.017759919166565,
      "learning_rate": 1.9345916017114844e-05,
      "loss": 2.5782,
      "step": 9333
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0282447338104248,
      "learning_rate": 1.934576954513956e-05,
      "loss": 2.6059,
      "step": 9334
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9663057923316956,
      "learning_rate": 1.934562305732066e-05,
      "loss": 2.5992,
      "step": 9335
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9833137392997742,
      "learning_rate": 1.9345476553658394e-05,
      "loss": 2.5585,
      "step": 9336
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9729341268539429,
      "learning_rate": 1.9345330034153003e-05,
      "loss": 2.4731,
      "step": 9337
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.117640733718872,
      "learning_rate": 1.934518349880474e-05,
      "loss": 2.6265,
      "step": 9338
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.1244227886199951,
      "learning_rate": 1.9345036947613848e-05,
      "loss": 2.6144,
      "step": 9339
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0167542695999146,
      "learning_rate": 1.9344890380580588e-05,
      "loss": 2.549,
      "step": 9340
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0076185464859009,
      "learning_rate": 1.9344743797705193e-05,
      "loss": 2.5218,
      "step": 9341
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.1249750852584839,
      "learning_rate": 1.9344597198987927e-05,
      "loss": 2.4039,
      "step": 9342
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9511436223983765,
      "learning_rate": 1.9344450584429028e-05,
      "loss": 2.6804,
      "step": 9343
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9462519884109497,
      "learning_rate": 1.9344303954028745e-05,
      "loss": 2.6726,
      "step": 9344
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0885663032531738,
      "learning_rate": 1.9344157307787334e-05,
      "loss": 2.5828,
      "step": 9345
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.146404504776001,
      "learning_rate": 1.934401064570503e-05,
      "loss": 2.501,
      "step": 9346
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.979812741279602,
      "learning_rate": 1.9343863967782097e-05,
      "loss": 2.5705,
      "step": 9347
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9034685492515564,
      "learning_rate": 1.9343717274018776e-05,
      "loss": 2.5029,
      "step": 9348
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.2482807636260986,
      "learning_rate": 1.9343570564415316e-05,
      "loss": 2.5874,
      "step": 9349
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0765740871429443,
      "learning_rate": 1.9343423838971963e-05,
      "loss": 2.5213,
      "step": 9350
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9069128036499023,
      "learning_rate": 1.9343277097688973e-05,
      "loss": 2.5115,
      "step": 9351
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9718558192253113,
      "learning_rate": 1.9343130340566588e-05,
      "loss": 2.615,
      "step": 9352
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0312763452529907,
      "learning_rate": 1.9342983567605063e-05,
      "loss": 2.5771,
      "step": 9353
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.1033108234405518,
      "learning_rate": 1.9342836778804642e-05,
      "loss": 2.505,
      "step": 9354
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0069215297698975,
      "learning_rate": 1.9342689974165573e-05,
      "loss": 2.89,
      "step": 9355
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9776425361633301,
      "learning_rate": 1.934254315368811e-05,
      "loss": 2.4805,
      "step": 9356
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0479629039764404,
      "learning_rate": 1.9342396317372497e-05,
      "loss": 2.606,
      "step": 9357
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9605578780174255,
      "learning_rate": 1.934224946521899e-05,
      "loss": 2.546,
      "step": 9358
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0574955940246582,
      "learning_rate": 1.9342102597227825e-05,
      "loss": 2.8294,
      "step": 9359
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9325138926506042,
      "learning_rate": 1.9341955713399262e-05,
      "loss": 2.8093,
      "step": 9360
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9801419377326965,
      "learning_rate": 1.9341808813733548e-05,
      "loss": 2.4197,
      "step": 9361
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0911502838134766,
      "learning_rate": 1.9341661898230927e-05,
      "loss": 2.4258,
      "step": 9362
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.009725570678711,
      "learning_rate": 1.9341514966891656e-05,
      "loss": 2.4247,
      "step": 9363
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.043355941772461,
      "learning_rate": 1.934136801971598e-05,
      "loss": 2.6323,
      "step": 9364
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0781855583190918,
      "learning_rate": 1.9341221056704148e-05,
      "loss": 2.4127,
      "step": 9365
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0794731378555298,
      "learning_rate": 1.9341074077856405e-05,
      "loss": 2.5654,
      "step": 9366
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9601035714149475,
      "learning_rate": 1.9340927083173008e-05,
      "loss": 2.8632,
      "step": 9367
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0500903129577637,
      "learning_rate": 1.9340780072654205e-05,
      "loss": 2.5463,
      "step": 9368
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9448459148406982,
      "learning_rate": 1.9340633046300237e-05,
      "loss": 2.5526,
      "step": 9369
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.22480309009552,
      "learning_rate": 1.9340486004111363e-05,
      "loss": 2.665,
      "step": 9370
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.1548441648483276,
      "learning_rate": 1.9340338946087825e-05,
      "loss": 2.764,
      "step": 9371
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.082741141319275,
      "learning_rate": 1.934019187222988e-05,
      "loss": 2.5744,
      "step": 9372
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9631518125534058,
      "learning_rate": 1.9340044782537773e-05,
      "loss": 2.4763,
      "step": 9373
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9905425310134888,
      "learning_rate": 1.9339897677011753e-05,
      "loss": 3.0015,
      "step": 9374
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0434170961380005,
      "learning_rate": 1.9339750555652066e-05,
      "loss": 2.5803,
      "step": 9375
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9161528944969177,
      "learning_rate": 1.9339603418458967e-05,
      "loss": 2.3445,
      "step": 9376
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.931678295135498,
      "learning_rate": 1.9339456265432702e-05,
      "loss": 2.6089,
      "step": 9377
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9378237724304199,
      "learning_rate": 1.9339309096573527e-05,
      "loss": 2.7198,
      "step": 9378
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0207126140594482,
      "learning_rate": 1.9339161911881684e-05,
      "loss": 2.5643,
      "step": 9379
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0357518196105957,
      "learning_rate": 1.9339014711357423e-05,
      "loss": 2.7583,
      "step": 9380
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9674628376960754,
      "learning_rate": 1.9338867495000998e-05,
      "loss": 2.7156,
      "step": 9381
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9723953604698181,
      "learning_rate": 1.933872026281266e-05,
      "loss": 2.6774,
      "step": 9382
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0113803148269653,
      "learning_rate": 1.933857301479265e-05,
      "loss": 2.4218,
      "step": 9383
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.1211574077606201,
      "learning_rate": 1.933842575094122e-05,
      "loss": 2.5171,
      "step": 9384
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9770339727401733,
      "learning_rate": 1.9338278471258626e-05,
      "loss": 2.765,
      "step": 9385
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0209513902664185,
      "learning_rate": 1.933813117574511e-05,
      "loss": 2.8918,
      "step": 9386
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.958659291267395,
      "learning_rate": 1.9337983864400928e-05,
      "loss": 2.6135,
      "step": 9387
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0232152938842773,
      "learning_rate": 1.9337836537226332e-05,
      "loss": 2.6999,
      "step": 9388
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.006340742111206,
      "learning_rate": 1.933768919422156e-05,
      "loss": 2.5475,
      "step": 9389
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9930636882781982,
      "learning_rate": 1.933754183538687e-05,
      "loss": 2.3932,
      "step": 9390
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9725843071937561,
      "learning_rate": 1.9337394460722514e-05,
      "loss": 2.4908,
      "step": 9391
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.3360546827316284,
      "learning_rate": 1.9337247070228735e-05,
      "loss": 2.5538,
      "step": 9392
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9369672536849976,
      "learning_rate": 1.9337099663905788e-05,
      "loss": 2.5834,
      "step": 9393
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9307594299316406,
      "learning_rate": 1.933695224175392e-05,
      "loss": 2.6622,
      "step": 9394
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9883571267127991,
      "learning_rate": 1.933680480377338e-05,
      "loss": 2.4777,
      "step": 9395
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9974105358123779,
      "learning_rate": 1.9336657349964425e-05,
      "loss": 2.6409,
      "step": 9396
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9483426809310913,
      "learning_rate": 1.9336509880327297e-05,
      "loss": 2.529,
      "step": 9397
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0377190113067627,
      "learning_rate": 1.9336362394862246e-05,
      "loss": 2.4861,
      "step": 9398
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0224796533584595,
      "learning_rate": 1.933621489356953e-05,
      "loss": 2.569,
      "step": 9399
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0559351444244385,
      "learning_rate": 1.9336067376449393e-05,
      "loss": 2.6186,
      "step": 9400
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9962926506996155,
      "learning_rate": 1.9335919843502088e-05,
      "loss": 2.6532,
      "step": 9401
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9271295666694641,
      "learning_rate": 1.933577229472786e-05,
      "loss": 2.5784,
      "step": 9402
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9072424173355103,
      "learning_rate": 1.933562473012696e-05,
      "loss": 2.573,
      "step": 9403
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.009377360343933,
      "learning_rate": 1.9335477149699645e-05,
      "loss": 2.3,
      "step": 9404
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9689080715179443,
      "learning_rate": 1.933532955344616e-05,
      "loss": 2.5045,
      "step": 9405
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.040286898612976,
      "learning_rate": 1.9335181941366755e-05,
      "loss": 2.6402,
      "step": 9406
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.061711072921753,
      "learning_rate": 1.9335034313461683e-05,
      "loss": 2.2362,
      "step": 9407
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9792922139167786,
      "learning_rate": 1.9334886669731193e-05,
      "loss": 2.5269,
      "step": 9408
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0368458032608032,
      "learning_rate": 1.9334739010175532e-05,
      "loss": 2.5717,
      "step": 9409
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9628251194953918,
      "learning_rate": 1.9334591334794954e-05,
      "loss": 2.4867,
      "step": 9410
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.8982874751091003,
      "learning_rate": 1.9334443643589708e-05,
      "loss": 2.4073,
      "step": 9411
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0218557119369507,
      "learning_rate": 1.9334295936560046e-05,
      "loss": 2.5291,
      "step": 9412
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.1541098356246948,
      "learning_rate": 1.9334148213706216e-05,
      "loss": 2.5412,
      "step": 9413
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9870054721832275,
      "learning_rate": 1.9334000475028472e-05,
      "loss": 2.3984,
      "step": 9414
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9450245499610901,
      "learning_rate": 1.933385272052706e-05,
      "loss": 2.6257,
      "step": 9415
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0791704654693604,
      "learning_rate": 1.9333704950202233e-05,
      "loss": 2.5496,
      "step": 9416
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.1245832443237305,
      "learning_rate": 1.9333557164054242e-05,
      "loss": 2.3705,
      "step": 9417
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0440242290496826,
      "learning_rate": 1.9333409362083334e-05,
      "loss": 2.5522,
      "step": 9418
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9126997590065002,
      "learning_rate": 1.9333261544289765e-05,
      "loss": 2.5198,
      "step": 9419
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.011430025100708,
      "learning_rate": 1.933311371067378e-05,
      "loss": 2.6027,
      "step": 9420
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0605077743530273,
      "learning_rate": 1.9332965861235637e-05,
      "loss": 2.5235,
      "step": 9421
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.1258541345596313,
      "learning_rate": 1.9332817995975575e-05,
      "loss": 2.6252,
      "step": 9422
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.1754037141799927,
      "learning_rate": 1.933267011489386e-05,
      "loss": 2.745,
      "step": 9423
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0216273069381714,
      "learning_rate": 1.933252221799073e-05,
      "loss": 2.7947,
      "step": 9424
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0055915117263794,
      "learning_rate": 1.933237430526644e-05,
      "loss": 2.7756,
      "step": 9425
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.015540599822998,
      "learning_rate": 1.9332226376721243e-05,
      "loss": 2.569,
      "step": 9426
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9806458353996277,
      "learning_rate": 1.9332078432355385e-05,
      "loss": 2.5969,
      "step": 9427
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.066235899925232,
      "learning_rate": 1.933193047216912e-05,
      "loss": 2.4987,
      "step": 9428
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0157485008239746,
      "learning_rate": 1.93317824961627e-05,
      "loss": 2.8311,
      "step": 9429
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0140548944473267,
      "learning_rate": 1.9331634504336372e-05,
      "loss": 2.5881,
      "step": 9430
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9730705618858337,
      "learning_rate": 1.9331486496690394e-05,
      "loss": 2.5788,
      "step": 9431
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0412139892578125,
      "learning_rate": 1.9331338473225005e-05,
      "loss": 2.639,
      "step": 9432
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0199413299560547,
      "learning_rate": 1.933119043394047e-05,
      "loss": 2.6163,
      "step": 9433
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9930198192596436,
      "learning_rate": 1.9331042378837027e-05,
      "loss": 2.6844,
      "step": 9434
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.2412470579147339,
      "learning_rate": 1.9330894307914933e-05,
      "loss": 2.7067,
      "step": 9435
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.3045167922973633,
      "learning_rate": 1.9330746221174445e-05,
      "loss": 2.745,
      "step": 9436
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0244569778442383,
      "learning_rate": 1.93305981186158e-05,
      "loss": 2.2794,
      "step": 9437
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.009602427482605,
      "learning_rate": 1.9330450000239264e-05,
      "loss": 2.4846,
      "step": 9438
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0726394653320312,
      "learning_rate": 1.9330301866045076e-05,
      "loss": 2.4127,
      "step": 9439
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9426088333129883,
      "learning_rate": 1.9330153716033493e-05,
      "loss": 2.4787,
      "step": 9440
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9650259017944336,
      "learning_rate": 1.9330005550204766e-05,
      "loss": 2.8053,
      "step": 9441
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0256803035736084,
      "learning_rate": 1.9329857368559148e-05,
      "loss": 2.6148,
      "step": 9442
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.1285544633865356,
      "learning_rate": 1.9329709171096888e-05,
      "loss": 2.5068,
      "step": 9443
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.1802622079849243,
      "learning_rate": 1.9329560957818236e-05,
      "loss": 2.3623,
      "step": 9444
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9744501709938049,
      "learning_rate": 1.932941272872344e-05,
      "loss": 2.6061,
      "step": 9445
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9106760621070862,
      "learning_rate": 1.932926448381276e-05,
      "loss": 2.6554,
      "step": 9446
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0711861848831177,
      "learning_rate": 1.932911622308644e-05,
      "loss": 2.4682,
      "step": 9447
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9882045388221741,
      "learning_rate": 1.932896794654474e-05,
      "loss": 2.7221,
      "step": 9448
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0289204120635986,
      "learning_rate": 1.93288196541879e-05,
      "loss": 2.5337,
      "step": 9449
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0309513807296753,
      "learning_rate": 1.932867134601618e-05,
      "loss": 2.4019,
      "step": 9450
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.043514370918274,
      "learning_rate": 1.932852302202983e-05,
      "loss": 2.5264,
      "step": 9451
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0065274238586426,
      "learning_rate": 1.9328374682229093e-05,
      "loss": 2.5594,
      "step": 9452
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9870265126228333,
      "learning_rate": 1.932822632661423e-05,
      "loss": 2.5912,
      "step": 9453
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0041112899780273,
      "learning_rate": 1.9328077955185495e-05,
      "loss": 2.6161,
      "step": 9454
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9404479265213013,
      "learning_rate": 1.9327929567943127e-05,
      "loss": 2.5139,
      "step": 9455
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0056202411651611,
      "learning_rate": 1.932778116488739e-05,
      "loss": 2.5643,
      "step": 9456
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.16427481174469,
      "learning_rate": 1.932763274601853e-05,
      "loss": 2.4931,
      "step": 9457
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9051153659820557,
      "learning_rate": 1.93274843113368e-05,
      "loss": 2.4707,
      "step": 9458
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0507278442382812,
      "learning_rate": 1.932733586084245e-05,
      "loss": 2.5319,
      "step": 9459
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.1028112173080444,
      "learning_rate": 1.932718739453573e-05,
      "loss": 2.5217,
      "step": 9460
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9645698666572571,
      "learning_rate": 1.9327038912416895e-05,
      "loss": 2.4472,
      "step": 9461
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9834755063056946,
      "learning_rate": 1.9326890414486197e-05,
      "loss": 2.668,
      "step": 9462
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0071682929992676,
      "learning_rate": 1.9326741900743887e-05,
      "loss": 2.4217,
      "step": 9463
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.004335880279541,
      "learning_rate": 1.9326593371190213e-05,
      "loss": 2.6242,
      "step": 9464
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0230404138565063,
      "learning_rate": 1.9326444825825437e-05,
      "loss": 2.6331,
      "step": 9465
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0306535959243774,
      "learning_rate": 1.93262962646498e-05,
      "loss": 2.6846,
      "step": 9466
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0943145751953125,
      "learning_rate": 1.9326147687663556e-05,
      "loss": 2.4371,
      "step": 9467
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9890260100364685,
      "learning_rate": 1.932599909486696e-05,
      "loss": 2.7448,
      "step": 9468
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.173119068145752,
      "learning_rate": 1.9325850486260264e-05,
      "loss": 2.6288,
      "step": 9469
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.109938383102417,
      "learning_rate": 1.9325701861843718e-05,
      "loss": 2.5145,
      "step": 9470
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.017311930656433,
      "learning_rate": 1.9325553221617573e-05,
      "loss": 2.5892,
      "step": 9471
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0099279880523682,
      "learning_rate": 1.9325404565582084e-05,
      "loss": 2.5571,
      "step": 9472
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0236815214157104,
      "learning_rate": 1.93252558937375e-05,
      "loss": 2.5011,
      "step": 9473
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9614229202270508,
      "learning_rate": 1.9325107206084076e-05,
      "loss": 2.8199,
      "step": 9474
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.1083685159683228,
      "learning_rate": 1.9324958502622064e-05,
      "loss": 2.5331,
      "step": 9475
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0064949989318848,
      "learning_rate": 1.9324809783351712e-05,
      "loss": 2.3486,
      "step": 9476
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9916566014289856,
      "learning_rate": 1.9324661048273277e-05,
      "loss": 2.6946,
      "step": 9477
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.005402684211731,
      "learning_rate": 1.9324512297387007e-05,
      "loss": 2.7303,
      "step": 9478
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9834168553352356,
      "learning_rate": 1.9324363530693155e-05,
      "loss": 2.6382,
      "step": 9479
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.1264476776123047,
      "learning_rate": 1.9324214748191977e-05,
      "loss": 2.6814,
      "step": 9480
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.027571439743042,
      "learning_rate": 1.9324065949883724e-05,
      "loss": 2.6219,
      "step": 9481
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.1833209991455078,
      "learning_rate": 1.9323917135768644e-05,
      "loss": 2.5357,
      "step": 9482
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.008847951889038,
      "learning_rate": 1.932376830584699e-05,
      "loss": 2.6549,
      "step": 9483
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0639030933380127,
      "learning_rate": 1.932361946011902e-05,
      "loss": 2.6799,
      "step": 9484
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.021596908569336,
      "learning_rate": 1.9323470598584984e-05,
      "loss": 2.6065,
      "step": 9485
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9809983968734741,
      "learning_rate": 1.932332172124513e-05,
      "loss": 2.7955,
      "step": 9486
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9234580993652344,
      "learning_rate": 1.9323172828099713e-05,
      "loss": 2.6621,
      "step": 9487
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9445371031761169,
      "learning_rate": 1.9323023919148988e-05,
      "loss": 2.5969,
      "step": 9488
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0124390125274658,
      "learning_rate": 1.9322874994393207e-05,
      "loss": 2.6224,
      "step": 9489
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.058314561843872,
      "learning_rate": 1.9322726053832618e-05,
      "loss": 2.676,
      "step": 9490
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.2077603340148926,
      "learning_rate": 1.9322577097467475e-05,
      "loss": 2.5243,
      "step": 9491
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0742731094360352,
      "learning_rate": 1.9322428125298036e-05,
      "loss": 2.3918,
      "step": 9492
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9929718375205994,
      "learning_rate": 1.9322279137324548e-05,
      "loss": 2.6214,
      "step": 9493
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9532145261764526,
      "learning_rate": 1.9322130133547265e-05,
      "loss": 2.4306,
      "step": 9494
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.029389500617981,
      "learning_rate": 1.9321981113966437e-05,
      "loss": 2.4042,
      "step": 9495
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.1122018098831177,
      "learning_rate": 1.9321832078582322e-05,
      "loss": 2.7704,
      "step": 9496
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9728174209594727,
      "learning_rate": 1.932168302739517e-05,
      "loss": 2.5232,
      "step": 9497
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9756366014480591,
      "learning_rate": 1.932153396040523e-05,
      "loss": 2.5321,
      "step": 9498
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0593221187591553,
      "learning_rate": 1.9321384877612762e-05,
      "loss": 2.4911,
      "step": 9499
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0044276714324951,
      "learning_rate": 1.9321235779018014e-05,
      "loss": 2.6329,
      "step": 9500
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9951820373535156,
      "learning_rate": 1.932108666462124e-05,
      "loss": 2.5283,
      "step": 9501
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9678506851196289,
      "learning_rate": 1.932093753442269e-05,
      "loss": 2.5067,
      "step": 9502
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9701871275901794,
      "learning_rate": 1.932078838842262e-05,
      "loss": 2.7288,
      "step": 9503
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9277130365371704,
      "learning_rate": 1.9320639226621286e-05,
      "loss": 2.6874,
      "step": 9504
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.1916468143463135,
      "learning_rate": 1.9320490049018933e-05,
      "loss": 2.7651,
      "step": 9505
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.033663034439087,
      "learning_rate": 1.932034085561582e-05,
      "loss": 2.7596,
      "step": 9506
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9837368726730347,
      "learning_rate": 1.9320191646412195e-05,
      "loss": 2.6608,
      "step": 9507
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.161014437675476,
      "learning_rate": 1.9320042421408317e-05,
      "loss": 2.6486,
      "step": 9508
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0587953329086304,
      "learning_rate": 1.9319893180604438e-05,
      "loss": 2.7726,
      "step": 9509
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0754081010818481,
      "learning_rate": 1.9319743924000803e-05,
      "loss": 2.4664,
      "step": 9510
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0826753377914429,
      "learning_rate": 1.9319594651597673e-05,
      "loss": 2.8399,
      "step": 9511
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.154530644416809,
      "learning_rate": 1.9319445363395298e-05,
      "loss": 2.5169,
      "step": 9512
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9976193904876709,
      "learning_rate": 1.9319296059393936e-05,
      "loss": 2.5571,
      "step": 9513
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.8885703086853027,
      "learning_rate": 1.9319146739593833e-05,
      "loss": 2.4004,
      "step": 9514
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9392044544219971,
      "learning_rate": 1.9318997403995245e-05,
      "loss": 2.627,
      "step": 9515
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9449353218078613,
      "learning_rate": 1.9318848052598425e-05,
      "loss": 2.5284,
      "step": 9516
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9825038909912109,
      "learning_rate": 1.931869868540363e-05,
      "loss": 2.4249,
      "step": 9517
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9805447459220886,
      "learning_rate": 1.931854930241111e-05,
      "loss": 2.4889,
      "step": 9518
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9809526205062866,
      "learning_rate": 1.9318399903621113e-05,
      "loss": 2.6322,
      "step": 9519
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.1016697883605957,
      "learning_rate": 1.93182504890339e-05,
      "loss": 2.5793,
      "step": 9520
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.057421088218689,
      "learning_rate": 1.931810105864972e-05,
      "loss": 2.751,
      "step": 9521
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0843336582183838,
      "learning_rate": 1.9317951612468832e-05,
      "loss": 2.3581,
      "step": 9522
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0646264553070068,
      "learning_rate": 1.9317802150491482e-05,
      "loss": 2.7225,
      "step": 9523
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.8891377449035645,
      "learning_rate": 1.931765267271793e-05,
      "loss": 2.485,
      "step": 9524
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9880425333976746,
      "learning_rate": 1.931750317914842e-05,
      "loss": 2.493,
      "step": 9525
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.919468104839325,
      "learning_rate": 1.9317353669783216e-05,
      "loss": 2.7096,
      "step": 9526
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9442280530929565,
      "learning_rate": 1.9317204144622564e-05,
      "loss": 2.655,
      "step": 9527
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0863628387451172,
      "learning_rate": 1.9317054603666727e-05,
      "loss": 2.5848,
      "step": 9528
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.1132640838623047,
      "learning_rate": 1.9316905046915947e-05,
      "loss": 2.9556,
      "step": 9529
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0477263927459717,
      "learning_rate": 1.931675547437048e-05,
      "loss": 2.7215,
      "step": 9530
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.1134353876113892,
      "learning_rate": 1.931660588603059e-05,
      "loss": 2.6732,
      "step": 9531
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.2348912954330444,
      "learning_rate": 1.9316456281896516e-05,
      "loss": 2.5772,
      "step": 9532
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.051820993423462,
      "learning_rate": 1.9316306661968516e-05,
      "loss": 2.433,
      "step": 9533
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9502761960029602,
      "learning_rate": 1.931615702624685e-05,
      "loss": 2.5846,
      "step": 9534
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0022640228271484,
      "learning_rate": 1.931600737473177e-05,
      "loss": 2.4763,
      "step": 9535
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0166959762573242,
      "learning_rate": 1.9315857707423525e-05,
      "loss": 2.4947,
      "step": 9536
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9792133569717407,
      "learning_rate": 1.9315708024322372e-05,
      "loss": 2.6505,
      "step": 9537
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.905977725982666,
      "learning_rate": 1.931555832542856e-05,
      "loss": 2.5793,
      "step": 9538
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.1097736358642578,
      "learning_rate": 1.9315408610742353e-05,
      "loss": 2.5185,
      "step": 9539
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0685979127883911,
      "learning_rate": 1.931525888026399e-05,
      "loss": 2.5326,
      "step": 9540
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.006475806236267,
      "learning_rate": 1.931510913399374e-05,
      "loss": 2.5658,
      "step": 9541
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9203999042510986,
      "learning_rate": 1.9314959371931847e-05,
      "loss": 2.5361,
      "step": 9542
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.030232548713684,
      "learning_rate": 1.931480959407857e-05,
      "loss": 2.661,
      "step": 9543
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9934191703796387,
      "learning_rate": 1.931465980043416e-05,
      "loss": 2.5508,
      "step": 9544
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0367677211761475,
      "learning_rate": 1.9314509990998872e-05,
      "loss": 2.6241,
      "step": 9545
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0077335834503174,
      "learning_rate": 1.9314360165772957e-05,
      "loss": 2.7652,
      "step": 9546
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.1010222434997559,
      "learning_rate": 1.9314210324756673e-05,
      "loss": 2.7559,
      "step": 9547
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0078884363174438,
      "learning_rate": 1.9314060467950276e-05,
      "loss": 2.6126,
      "step": 9548
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0317591428756714,
      "learning_rate": 1.9313910595354016e-05,
      "loss": 2.6473,
      "step": 9549
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9779329299926758,
      "learning_rate": 1.9313760706968147e-05,
      "loss": 2.6385,
      "step": 9550
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9605663418769836,
      "learning_rate": 1.9313610802792923e-05,
      "loss": 2.5344,
      "step": 9551
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.020695447921753,
      "learning_rate": 1.9313460882828598e-05,
      "loss": 2.6959,
      "step": 9552
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0987058877944946,
      "learning_rate": 1.931331094707543e-05,
      "loss": 2.4844,
      "step": 9553
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9236327409744263,
      "learning_rate": 1.931316099553367e-05,
      "loss": 2.5309,
      "step": 9554
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9560680985450745,
      "learning_rate": 1.931301102820357e-05,
      "loss": 2.478,
      "step": 9555
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0802499055862427,
      "learning_rate": 1.931286104508539e-05,
      "loss": 2.8465,
      "step": 9556
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0247606039047241,
      "learning_rate": 1.931271104617938e-05,
      "loss": 2.583,
      "step": 9557
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9858243465423584,
      "learning_rate": 1.93125610314858e-05,
      "loss": 2.6845,
      "step": 9558
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9639620184898376,
      "learning_rate": 1.9312411001004895e-05,
      "loss": 2.7771,
      "step": 9559
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.021962285041809,
      "learning_rate": 1.9312260954736922e-05,
      "loss": 2.6039,
      "step": 9560
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.021032691001892,
      "learning_rate": 1.931211089268214e-05,
      "loss": 2.5802,
      "step": 9561
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.159769058227539,
      "learning_rate": 1.93119608148408e-05,
      "loss": 2.5381,
      "step": 9562
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.1665557622909546,
      "learning_rate": 1.9311810721213163e-05,
      "loss": 2.5614,
      "step": 9563
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9825817942619324,
      "learning_rate": 1.931166061179947e-05,
      "loss": 2.4676,
      "step": 9564
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9961554408073425,
      "learning_rate": 1.9311510486599988e-05,
      "loss": 2.4235,
      "step": 9565
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9678093791007996,
      "learning_rate": 1.9311360345614965e-05,
      "loss": 2.4628,
      "step": 9566
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0323457717895508,
      "learning_rate": 1.9311210188844657e-05,
      "loss": 2.5678,
      "step": 9567
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0073916912078857,
      "learning_rate": 1.931106001628932e-05,
      "loss": 2.583,
      "step": 9568
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0700854063034058,
      "learning_rate": 1.9310909827949207e-05,
      "loss": 2.4889,
      "step": 9569
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9793012738227844,
      "learning_rate": 1.9310759623824573e-05,
      "loss": 2.6648,
      "step": 9570
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9726622700691223,
      "learning_rate": 1.9310609403915673e-05,
      "loss": 2.5488,
      "step": 9571
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9676326513290405,
      "learning_rate": 1.931045916822276e-05,
      "loss": 2.5733,
      "step": 9572
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9647325873374939,
      "learning_rate": 1.9310308916746092e-05,
      "loss": 2.4045,
      "step": 9573
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0950931310653687,
      "learning_rate": 1.931015864948592e-05,
      "loss": 2.6611,
      "step": 9574
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9653634428977966,
      "learning_rate": 1.93100083664425e-05,
      "loss": 2.5478,
      "step": 9575
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9940340518951416,
      "learning_rate": 1.930985806761609e-05,
      "loss": 2.6678,
      "step": 9576
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.1257047653198242,
      "learning_rate": 1.930970775300694e-05,
      "loss": 2.6736,
      "step": 9577
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0007383823394775,
      "learning_rate": 1.9309557422615306e-05,
      "loss": 2.5414,
      "step": 9578
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9651784300804138,
      "learning_rate": 1.9309407076441448e-05,
      "loss": 2.376,
      "step": 9579
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0475283861160278,
      "learning_rate": 1.930925671448561e-05,
      "loss": 2.5918,
      "step": 9580
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.1879305839538574,
      "learning_rate": 1.930910633674806e-05,
      "loss": 2.6384,
      "step": 9581
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0579686164855957,
      "learning_rate": 1.9308955943229044e-05,
      "loss": 2.6038,
      "step": 9582
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9469434022903442,
      "learning_rate": 1.930880553392882e-05,
      "loss": 2.6376,
      "step": 9583
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0858207941055298,
      "learning_rate": 1.9308655108847637e-05,
      "loss": 2.7338,
      "step": 9584
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0780463218688965,
      "learning_rate": 1.9308504667985765e-05,
      "loss": 2.4657,
      "step": 9585
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0414124727249146,
      "learning_rate": 1.9308354211343442e-05,
      "loss": 2.6439,
      "step": 9586
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.1111356019973755,
      "learning_rate": 1.9308203738920932e-05,
      "loss": 2.7575,
      "step": 9587
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.2893798351287842,
      "learning_rate": 1.930805325071849e-05,
      "loss": 2.5803,
      "step": 9588
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0397602319717407,
      "learning_rate": 1.930790274673637e-05,
      "loss": 2.4905,
      "step": 9589
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9869293570518494,
      "learning_rate": 1.9307752226974825e-05,
      "loss": 2.5102,
      "step": 9590
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0050358772277832,
      "learning_rate": 1.9307601691434114e-05,
      "loss": 2.691,
      "step": 9591
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9515950679779053,
      "learning_rate": 1.930745114011449e-05,
      "loss": 2.7253,
      "step": 9592
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.1064242124557495,
      "learning_rate": 1.9307300573016207e-05,
      "loss": 2.5378,
      "step": 9593
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.026589274406433,
      "learning_rate": 1.930714999013952e-05,
      "loss": 2.8271,
      "step": 9594
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.1360164880752563,
      "learning_rate": 1.930699939148469e-05,
      "loss": 2.6185,
      "step": 9595
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0502245426177979,
      "learning_rate": 1.9306848777051967e-05,
      "loss": 2.7864,
      "step": 9596
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0147089958190918,
      "learning_rate": 1.9306698146841605e-05,
      "loss": 2.6352,
      "step": 9597
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9754644632339478,
      "learning_rate": 1.9306547500853866e-05,
      "loss": 2.5461,
      "step": 9598
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9520792961120605,
      "learning_rate": 1.9306396839088996e-05,
      "loss": 2.5006,
      "step": 9599
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9903799295425415,
      "learning_rate": 1.930624616154726e-05,
      "loss": 2.5029,
      "step": 9600
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0854912996292114,
      "learning_rate": 1.9306095468228908e-05,
      "loss": 2.6464,
      "step": 9601
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0821260213851929,
      "learning_rate": 1.93059447591342e-05,
      "loss": 2.7312,
      "step": 9602
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0057415962219238,
      "learning_rate": 1.9305794034263385e-05,
      "loss": 2.6006,
      "step": 9603
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.058571219444275,
      "learning_rate": 1.9305643293616722e-05,
      "loss": 2.5644,
      "step": 9604
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9477992653846741,
      "learning_rate": 1.9305492537194466e-05,
      "loss": 2.5868,
      "step": 9605
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.3801443576812744,
      "learning_rate": 1.9305341764996873e-05,
      "loss": 2.5337,
      "step": 9606
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9834855794906616,
      "learning_rate": 1.93051909770242e-05,
      "loss": 2.4632,
      "step": 9607
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.3039623498916626,
      "learning_rate": 1.93050401732767e-05,
      "loss": 2.469,
      "step": 9608
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0628409385681152,
      "learning_rate": 1.9304889353754625e-05,
      "loss": 2.5514,
      "step": 9609
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.041691780090332,
      "learning_rate": 1.930473851845824e-05,
      "loss": 2.4869,
      "step": 9610
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0476188659667969,
      "learning_rate": 1.9304587667387795e-05,
      "loss": 2.5389,
      "step": 9611
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.1787651777267456,
      "learning_rate": 1.930443680054355e-05,
      "loss": 2.6069,
      "step": 9612
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0381460189819336,
      "learning_rate": 1.9304285917925756e-05,
      "loss": 2.6255,
      "step": 9613
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9916947484016418,
      "learning_rate": 1.9304135019534666e-05,
      "loss": 2.5103,
      "step": 9614
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0945909023284912,
      "learning_rate": 1.9303984105370546e-05,
      "loss": 2.7536,
      "step": 9615
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.032882809638977,
      "learning_rate": 1.9303833175433642e-05,
      "loss": 2.4112,
      "step": 9616
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9741640090942383,
      "learning_rate": 1.9303682229724216e-05,
      "loss": 2.4716,
      "step": 9617
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0285226106643677,
      "learning_rate": 1.930353126824252e-05,
      "loss": 2.5947,
      "step": 9618
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0070555210113525,
      "learning_rate": 1.9303380290988815e-05,
      "loss": 2.7866,
      "step": 9619
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0757300853729248,
      "learning_rate": 1.930322929796335e-05,
      "loss": 2.5891,
      "step": 9620
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.952600359916687,
      "learning_rate": 1.9303078289166387e-05,
      "loss": 2.5651,
      "step": 9621
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.114803433418274,
      "learning_rate": 1.930292726459818e-05,
      "loss": 2.6295,
      "step": 9622
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0346639156341553,
      "learning_rate": 1.9302776224258985e-05,
      "loss": 2.4416,
      "step": 9623
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9575098156929016,
      "learning_rate": 1.9302625168149053e-05,
      "loss": 2.6049,
      "step": 9624
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9798628091812134,
      "learning_rate": 1.9302474096268648e-05,
      "loss": 2.5959,
      "step": 9625
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.037271499633789,
      "learning_rate": 1.9302323008618024e-05,
      "loss": 2.6525,
      "step": 9626
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.1113370656967163,
      "learning_rate": 1.9302171905197436e-05,
      "loss": 2.6966,
      "step": 9627
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0761173963546753,
      "learning_rate": 1.9302020786007138e-05,
      "loss": 2.504,
      "step": 9628
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0042563676834106,
      "learning_rate": 1.930186965104739e-05,
      "loss": 2.8703,
      "step": 9629
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9463468790054321,
      "learning_rate": 1.9301718500318446e-05,
      "loss": 2.6817,
      "step": 9630
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0069304704666138,
      "learning_rate": 1.930156733382056e-05,
      "loss": 2.488,
      "step": 9631
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0944545269012451,
      "learning_rate": 1.9301416151553996e-05,
      "loss": 2.3248,
      "step": 9632
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.1178914308547974,
      "learning_rate": 1.9301264953519e-05,
      "loss": 2.6672,
      "step": 9633
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.1025755405426025,
      "learning_rate": 1.930111373971584e-05,
      "loss": 2.6252,
      "step": 9634
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.010012149810791,
      "learning_rate": 1.9300962510144762e-05,
      "loss": 2.404,
      "step": 9635
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9835636615753174,
      "learning_rate": 1.930081126480603e-05,
      "loss": 2.6488,
      "step": 9636
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0483753681182861,
      "learning_rate": 1.930066000369989e-05,
      "loss": 2.8282,
      "step": 9637
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9030483365058899,
      "learning_rate": 1.9300508726826613e-05,
      "loss": 2.6965,
      "step": 9638
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.1953550577163696,
      "learning_rate": 1.9300357434186444e-05,
      "loss": 2.7614,
      "step": 9639
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9903373122215271,
      "learning_rate": 1.9300206125779644e-05,
      "loss": 2.6409,
      "step": 9640
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.03379487991333,
      "learning_rate": 1.9300054801606468e-05,
      "loss": 2.4988,
      "step": 9641
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.024586796760559,
      "learning_rate": 1.9299903461667173e-05,
      "loss": 2.4063,
      "step": 9642
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.3025124073028564,
      "learning_rate": 1.929975210596202e-05,
      "loss": 2.7721,
      "step": 9643
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0419853925704956,
      "learning_rate": 1.9299600734491255e-05,
      "loss": 2.5583,
      "step": 9644
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.8976804614067078,
      "learning_rate": 1.9299449347255144e-05,
      "loss": 2.4347,
      "step": 9645
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0324939489364624,
      "learning_rate": 1.929929794425394e-05,
      "loss": 2.5182,
      "step": 9646
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.063900351524353,
      "learning_rate": 1.92991465254879e-05,
      "loss": 2.5544,
      "step": 9647
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0419377088546753,
      "learning_rate": 1.9298995090957284e-05,
      "loss": 2.4676,
      "step": 9648
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9340572357177734,
      "learning_rate": 1.929884364066234e-05,
      "loss": 2.3199,
      "step": 9649
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.918176531791687,
      "learning_rate": 1.9298692174603337e-05,
      "loss": 2.4875,
      "step": 9650
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0423046350479126,
      "learning_rate": 1.929854069278052e-05,
      "loss": 2.6013,
      "step": 9651
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9785858392715454,
      "learning_rate": 1.9298389195194157e-05,
      "loss": 2.6009,
      "step": 9652
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0347447395324707,
      "learning_rate": 1.9298237681844493e-05,
      "loss": 2.8057,
      "step": 9653
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9596692323684692,
      "learning_rate": 1.9298086152731793e-05,
      "loss": 2.4572,
      "step": 9654
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.1376783847808838,
      "learning_rate": 1.929793460785631e-05,
      "loss": 2.5577,
      "step": 9655
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0643006563186646,
      "learning_rate": 1.9297783047218306e-05,
      "loss": 2.5068,
      "step": 9656
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0179489850997925,
      "learning_rate": 1.9297631470818034e-05,
      "loss": 2.6943,
      "step": 9657
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0086215734481812,
      "learning_rate": 1.929747987865575e-05,
      "loss": 2.5277,
      "step": 9658
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9751079678535461,
      "learning_rate": 1.929732827073171e-05,
      "loss": 2.5964,
      "step": 9659
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.971017599105835,
      "learning_rate": 1.9297176647046178e-05,
      "loss": 2.4827,
      "step": 9660
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0149410963058472,
      "learning_rate": 1.9297025007599404e-05,
      "loss": 2.6182,
      "step": 9661
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.1550090312957764,
      "learning_rate": 1.929687335239165e-05,
      "loss": 2.511,
      "step": 9662
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.931896984577179,
      "learning_rate": 1.9296721681423168e-05,
      "loss": 2.5279,
      "step": 9663
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.1107144355773926,
      "learning_rate": 1.9296569994694215e-05,
      "loss": 2.7066,
      "step": 9664
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9364385604858398,
      "learning_rate": 1.929641829220506e-05,
      "loss": 2.5151,
      "step": 9665
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.228280782699585,
      "learning_rate": 1.9296266573955943e-05,
      "loss": 2.3995,
      "step": 9666
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9897029995918274,
      "learning_rate": 1.9296114839947132e-05,
      "loss": 2.7903,
      "step": 9667
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0918062925338745,
      "learning_rate": 1.9295963090178883e-05,
      "loss": 2.7794,
      "step": 9668
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0721856355667114,
      "learning_rate": 1.929581132465145e-05,
      "loss": 2.4008,
      "step": 9669
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9810754060745239,
      "learning_rate": 1.9295659543365093e-05,
      "loss": 2.219,
      "step": 9670
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0976303815841675,
      "learning_rate": 1.9295507746320067e-05,
      "loss": 2.6611,
      "step": 9671
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.009892463684082,
      "learning_rate": 1.929535593351663e-05,
      "loss": 2.5456,
      "step": 9672
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9757416844367981,
      "learning_rate": 1.929520410495504e-05,
      "loss": 2.6723,
      "step": 9673
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.012480616569519,
      "learning_rate": 1.9295052260635558e-05,
      "loss": 2.6701,
      "step": 9674
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.039392352104187,
      "learning_rate": 1.9294900400558436e-05,
      "loss": 2.5117,
      "step": 9675
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9720758199691772,
      "learning_rate": 1.9294748524723936e-05,
      "loss": 2.4991,
      "step": 9676
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9968618154525757,
      "learning_rate": 1.929459663313231e-05,
      "loss": 2.3686,
      "step": 9677
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9320715665817261,
      "learning_rate": 1.929444472578382e-05,
      "loss": 2.4244,
      "step": 9678
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0893609523773193,
      "learning_rate": 1.929429280267872e-05,
      "loss": 2.4212,
      "step": 9679
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0867124795913696,
      "learning_rate": 1.929414086381727e-05,
      "loss": 2.7115,
      "step": 9680
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0761168003082275,
      "learning_rate": 1.9293988909199727e-05,
      "loss": 2.6398,
      "step": 9681
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.1002678871154785,
      "learning_rate": 1.9293836938826348e-05,
      "loss": 2.6114,
      "step": 9682
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.941926121711731,
      "learning_rate": 1.929368495269739e-05,
      "loss": 2.6571,
      "step": 9683
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.8894887566566467,
      "learning_rate": 1.9293532950813113e-05,
      "loss": 2.5226,
      "step": 9684
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0877147912979126,
      "learning_rate": 1.9293380933173775e-05,
      "loss": 2.5086,
      "step": 9685
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9471933841705322,
      "learning_rate": 1.9293228899779634e-05,
      "loss": 2.6595,
      "step": 9686
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0418860912322998,
      "learning_rate": 1.929307685063094e-05,
      "loss": 2.4696,
      "step": 9687
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.09771728515625,
      "learning_rate": 1.9292924785727964e-05,
      "loss": 2.6003,
      "step": 9688
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9778916835784912,
      "learning_rate": 1.929277270507095e-05,
      "loss": 2.4333,
      "step": 9689
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.5650556087493896,
      "learning_rate": 1.9292620608660163e-05,
      "loss": 2.7427,
      "step": 9690
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9584439992904663,
      "learning_rate": 1.9292468496495865e-05,
      "loss": 2.6808,
      "step": 9691
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0335218906402588,
      "learning_rate": 1.929231636857831e-05,
      "loss": 2.4988,
      "step": 9692
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0007675886154175,
      "learning_rate": 1.929216422490775e-05,
      "loss": 2.6528,
      "step": 9693
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9430609345436096,
      "learning_rate": 1.9292012065484446e-05,
      "loss": 2.6794,
      "step": 9694
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.1245553493499756,
      "learning_rate": 1.9291859890308662e-05,
      "loss": 2.5138,
      "step": 9695
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9640036225318909,
      "learning_rate": 1.929170769938065e-05,
      "loss": 2.6993,
      "step": 9696
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0084905624389648,
      "learning_rate": 1.929155549270067e-05,
      "loss": 2.7572,
      "step": 9697
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9506112337112427,
      "learning_rate": 1.9291403270268984e-05,
      "loss": 2.5121,
      "step": 9698
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0776618719100952,
      "learning_rate": 1.9291251032085844e-05,
      "loss": 2.5695,
      "step": 9699
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.020936131477356,
      "learning_rate": 1.929109877815151e-05,
      "loss": 2.661,
      "step": 9700
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0366935729980469,
      "learning_rate": 1.9290946508466236e-05,
      "loss": 2.6533,
      "step": 9701
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0347034931182861,
      "learning_rate": 1.929079422303029e-05,
      "loss": 2.6554,
      "step": 9702
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0594289302825928,
      "learning_rate": 1.929064192184392e-05,
      "loss": 2.692,
      "step": 9703
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.1888667345046997,
      "learning_rate": 1.929048960490739e-05,
      "loss": 2.6209,
      "step": 9704
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0200453996658325,
      "learning_rate": 1.9290337272220955e-05,
      "loss": 2.6592,
      "step": 9705
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0660635232925415,
      "learning_rate": 1.929018492378488e-05,
      "loss": 2.7791,
      "step": 9706
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9794163107872009,
      "learning_rate": 1.9290032559599416e-05,
      "loss": 2.626,
      "step": 9707
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.965373694896698,
      "learning_rate": 1.928988017966482e-05,
      "loss": 2.6414,
      "step": 9708
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.1875052452087402,
      "learning_rate": 1.928972778398136e-05,
      "loss": 2.7357,
      "step": 9709
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9937952160835266,
      "learning_rate": 1.9289575372549286e-05,
      "loss": 2.4926,
      "step": 9710
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.060858964920044,
      "learning_rate": 1.928942294536886e-05,
      "loss": 2.6104,
      "step": 9711
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9856171011924744,
      "learning_rate": 1.9289270502440337e-05,
      "loss": 2.4965,
      "step": 9712
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9797461032867432,
      "learning_rate": 1.9289118043763977e-05,
      "loss": 2.6724,
      "step": 9713
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.061531901359558,
      "learning_rate": 1.9288965569340038e-05,
      "loss": 2.4713,
      "step": 9714
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0895490646362305,
      "learning_rate": 1.9288813079168784e-05,
      "loss": 2.5535,
      "step": 9715
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.99836665391922,
      "learning_rate": 1.9288660573250466e-05,
      "loss": 2.423,
      "step": 9716
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0281745195388794,
      "learning_rate": 1.9288508051585345e-05,
      "loss": 2.5316,
      "step": 9717
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9351097345352173,
      "learning_rate": 1.928835551417368e-05,
      "loss": 2.7981,
      "step": 9718
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.016176462173462,
      "learning_rate": 1.9288202961015733e-05,
      "loss": 2.4916,
      "step": 9719
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9832941889762878,
      "learning_rate": 1.928805039211176e-05,
      "loss": 2.428,
      "step": 9720
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0716931819915771,
      "learning_rate": 1.9287897807462015e-05,
      "loss": 2.4974,
      "step": 9721
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0179741382598877,
      "learning_rate": 1.9287745207066763e-05,
      "loss": 2.7222,
      "step": 9722
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9985048770904541,
      "learning_rate": 1.9287592590926257e-05,
      "loss": 2.7357,
      "step": 9723
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.2582730054855347,
      "learning_rate": 1.9287439959040763e-05,
      "loss": 2.5364,
      "step": 9724
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.4085875749588013,
      "learning_rate": 1.9287287311410537e-05,
      "loss": 2.4425,
      "step": 9725
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.029921531677246,
      "learning_rate": 1.928713464803583e-05,
      "loss": 2.5704,
      "step": 9726
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9642919301986694,
      "learning_rate": 1.9286981968916914e-05,
      "loss": 2.6755,
      "step": 9727
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9553503394126892,
      "learning_rate": 1.9286829274054036e-05,
      "loss": 2.6099,
      "step": 9728
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9965344667434692,
      "learning_rate": 1.9286676563447466e-05,
      "loss": 2.5191,
      "step": 9729
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.8997259140014648,
      "learning_rate": 1.9286523837097453e-05,
      "loss": 2.6189,
      "step": 9730
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0128802061080933,
      "learning_rate": 1.928637109500426e-05,
      "loss": 2.3907,
      "step": 9731
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0049854516983032,
      "learning_rate": 1.9286218337168147e-05,
      "loss": 2.5717,
      "step": 9732
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0937442779541016,
      "learning_rate": 1.9286065563589373e-05,
      "loss": 2.6328,
      "step": 9733
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.964484453201294,
      "learning_rate": 1.9285912774268193e-05,
      "loss": 2.4771,
      "step": 9734
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9660891890525818,
      "learning_rate": 1.928575996920487e-05,
      "loss": 2.6648,
      "step": 9735
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.066081166267395,
      "learning_rate": 1.928560714839966e-05,
      "loss": 2.4768,
      "step": 9736
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.103675365447998,
      "learning_rate": 1.9285454311852826e-05,
      "loss": 2.6923,
      "step": 9737
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.090798258781433,
      "learning_rate": 1.9285301459564626e-05,
      "loss": 2.4736,
      "step": 9738
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.1876773834228516,
      "learning_rate": 1.9285148591535315e-05,
      "loss": 2.5555,
      "step": 9739
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0986942052841187,
      "learning_rate": 1.928499570776516e-05,
      "loss": 2.4613,
      "step": 9740
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.992330014705658,
      "learning_rate": 1.928484280825441e-05,
      "loss": 2.6171,
      "step": 9741
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0861750841140747,
      "learning_rate": 1.9284689893003335e-05,
      "loss": 2.5126,
      "step": 9742
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.004833698272705,
      "learning_rate": 1.9284536962012184e-05,
      "loss": 2.4808,
      "step": 9743
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.030255675315857,
      "learning_rate": 1.9284384015281224e-05,
      "loss": 2.5508,
      "step": 9744
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0320810079574585,
      "learning_rate": 1.928423105281071e-05,
      "loss": 2.5682,
      "step": 9745
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9980182647705078,
      "learning_rate": 1.9284078074600906e-05,
      "loss": 2.7007,
      "step": 9746
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9776213765144348,
      "learning_rate": 1.9283925080652067e-05,
      "loss": 2.778,
      "step": 9747
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.134868860244751,
      "learning_rate": 1.9283772070964454e-05,
      "loss": 2.6061,
      "step": 9748
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.986786961555481,
      "learning_rate": 1.9283619045538322e-05,
      "loss": 2.3878,
      "step": 9749
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.1570876836776733,
      "learning_rate": 1.928346600437394e-05,
      "loss": 2.4162,
      "step": 9750
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.1297253370285034,
      "learning_rate": 1.9283312947471558e-05,
      "loss": 2.7425,
      "step": 9751
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.2347168922424316,
      "learning_rate": 1.928315987483144e-05,
      "loss": 2.5024,
      "step": 9752
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9843164682388306,
      "learning_rate": 1.9283006786453843e-05,
      "loss": 2.176,
      "step": 9753
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.974250078201294,
      "learning_rate": 1.9282853682339028e-05,
      "loss": 2.6633,
      "step": 9754
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.045868158340454,
      "learning_rate": 1.928270056248726e-05,
      "loss": 2.617,
      "step": 9755
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9775452017784119,
      "learning_rate": 1.928254742689879e-05,
      "loss": 2.392,
      "step": 9756
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.3044472932815552,
      "learning_rate": 1.928239427557388e-05,
      "loss": 2.5693,
      "step": 9757
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0273886919021606,
      "learning_rate": 1.928224110851279e-05,
      "loss": 2.6844,
      "step": 9758
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0081437826156616,
      "learning_rate": 1.9282087925715783e-05,
      "loss": 2.3449,
      "step": 9759
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0534882545471191,
      "learning_rate": 1.9281934727183117e-05,
      "loss": 2.4095,
      "step": 9760
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9913865923881531,
      "learning_rate": 1.928178151291505e-05,
      "loss": 2.3354,
      "step": 9761
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.103601098060608,
      "learning_rate": 1.928162828291184e-05,
      "loss": 2.636,
      "step": 9762
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.2045592069625854,
      "learning_rate": 1.928147503717375e-05,
      "loss": 2.5417,
      "step": 9763
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.1063367128372192,
      "learning_rate": 1.928132177570104e-05,
      "loss": 2.3355,
      "step": 9764
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.1078544855117798,
      "learning_rate": 1.9281168498493967e-05,
      "loss": 2.4768,
      "step": 9765
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9984210133552551,
      "learning_rate": 1.9281015205552794e-05,
      "loss": 2.6392,
      "step": 9766
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0207058191299438,
      "learning_rate": 1.928086189687778e-05,
      "loss": 2.4881,
      "step": 9767
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.032988429069519,
      "learning_rate": 1.9280708572469184e-05,
      "loss": 2.6501,
      "step": 9768
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.1132620573043823,
      "learning_rate": 1.9280555232327266e-05,
      "loss": 2.5707,
      "step": 9769
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0660490989685059,
      "learning_rate": 1.9280401876452287e-05,
      "loss": 2.6872,
      "step": 9770
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.1925344467163086,
      "learning_rate": 1.9280248504844505e-05,
      "loss": 2.4929,
      "step": 9771
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.2210313081741333,
      "learning_rate": 1.928009511750418e-05,
      "loss": 2.7559,
      "step": 9772
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0334782600402832,
      "learning_rate": 1.9279941714431576e-05,
      "loss": 2.6736,
      "step": 9773
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0118451118469238,
      "learning_rate": 1.9279788295626952e-05,
      "loss": 2.6307,
      "step": 9774
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9992167949676514,
      "learning_rate": 1.9279634861090564e-05,
      "loss": 2.6523,
      "step": 9775
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0957063436508179,
      "learning_rate": 1.9279481410822677e-05,
      "loss": 2.4783,
      "step": 9776
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.1264485120773315,
      "learning_rate": 1.9279327944823546e-05,
      "loss": 2.6908,
      "step": 9777
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0638761520385742,
      "learning_rate": 1.9279174463093433e-05,
      "loss": 2.3673,
      "step": 9778
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9591763019561768,
      "learning_rate": 1.92790209656326e-05,
      "loss": 2.5425,
      "step": 9779
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.1193660497665405,
      "learning_rate": 1.9278867452441306e-05,
      "loss": 2.6648,
      "step": 9780
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0434528589248657,
      "learning_rate": 1.9278713923519813e-05,
      "loss": 2.67,
      "step": 9781
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0785529613494873,
      "learning_rate": 1.927856037886838e-05,
      "loss": 2.4983,
      "step": 9782
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9593841433525085,
      "learning_rate": 1.9278406818487267e-05,
      "loss": 2.7539,
      "step": 9783
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9575480818748474,
      "learning_rate": 1.9278253242376732e-05,
      "loss": 2.5158,
      "step": 9784
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9705800414085388,
      "learning_rate": 1.927809965053704e-05,
      "loss": 2.5447,
      "step": 9785
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0633611679077148,
      "learning_rate": 1.9277946042968453e-05,
      "loss": 2.5599,
      "step": 9786
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9905049800872803,
      "learning_rate": 1.927779241967122e-05,
      "loss": 2.8272,
      "step": 9787
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0992273092269897,
      "learning_rate": 1.9277638780645612e-05,
      "loss": 2.5344,
      "step": 9788
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0010071992874146,
      "learning_rate": 1.927748512589189e-05,
      "loss": 2.6246,
      "step": 9789
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.038998007774353,
      "learning_rate": 1.9277331455410307e-05,
      "loss": 2.7441,
      "step": 9790
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.2278952598571777,
      "learning_rate": 1.927717776920113e-05,
      "loss": 2.8318,
      "step": 9791
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.886677622795105,
      "learning_rate": 1.9277024067264615e-05,
      "loss": 2.5389,
      "step": 9792
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.085739016532898,
      "learning_rate": 1.9276870349601024e-05,
      "loss": 2.4497,
      "step": 9793
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9483538269996643,
      "learning_rate": 1.9276716616210616e-05,
      "loss": 2.6864,
      "step": 9794
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.059131145477295,
      "learning_rate": 1.927656286709366e-05,
      "loss": 2.4154,
      "step": 9795
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9275649785995483,
      "learning_rate": 1.9276409102250407e-05,
      "loss": 2.5594,
      "step": 9796
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.1311609745025635,
      "learning_rate": 1.927625532168112e-05,
      "loss": 2.5438,
      "step": 9797
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9368364810943604,
      "learning_rate": 1.9276101525386066e-05,
      "loss": 2.4633,
      "step": 9798
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.040435791015625,
      "learning_rate": 1.9275947713365498e-05,
      "loss": 2.6639,
      "step": 9799
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9596678018569946,
      "learning_rate": 1.927579388561968e-05,
      "loss": 2.5522,
      "step": 9800
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0333518981933594,
      "learning_rate": 1.927564004214887e-05,
      "loss": 2.5382,
      "step": 9801
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9489578604698181,
      "learning_rate": 1.927548618295333e-05,
      "loss": 2.7112,
      "step": 9802
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0996718406677246,
      "learning_rate": 1.9275332308033322e-05,
      "loss": 2.6214,
      "step": 9803
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0092661380767822,
      "learning_rate": 1.927517841738911e-05,
      "loss": 2.4031,
      "step": 9804
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.033860206604004,
      "learning_rate": 1.927502451102095e-05,
      "loss": 2.8631,
      "step": 9805
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9122596383094788,
      "learning_rate": 1.92748705889291e-05,
      "loss": 2.407,
      "step": 9806
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0636024475097656,
      "learning_rate": 1.927471665111383e-05,
      "loss": 2.6032,
      "step": 9807
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.002915859222412,
      "learning_rate": 1.9274562697575396e-05,
      "loss": 2.6423,
      "step": 9808
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9787834882736206,
      "learning_rate": 1.9274408728314058e-05,
      "loss": 2.4683,
      "step": 9809
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.2099496126174927,
      "learning_rate": 1.927425474333008e-05,
      "loss": 2.5183,
      "step": 9810
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9711193442344666,
      "learning_rate": 1.927410074262372e-05,
      "loss": 2.7396,
      "step": 9811
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0641274452209473,
      "learning_rate": 1.927394672619524e-05,
      "loss": 2.593,
      "step": 9812
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0130202770233154,
      "learning_rate": 1.9273792694044905e-05,
      "loss": 2.7174,
      "step": 9813
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9802504181861877,
      "learning_rate": 1.9273638646172967e-05,
      "loss": 2.6405,
      "step": 9814
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9134302139282227,
      "learning_rate": 1.9273484582579697e-05,
      "loss": 2.5408,
      "step": 9815
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.935829222202301,
      "learning_rate": 1.927333050326535e-05,
      "loss": 2.317,
      "step": 9816
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.8897245526313782,
      "learning_rate": 1.9273176408230193e-05,
      "loss": 2.5807,
      "step": 9817
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.8801754117012024,
      "learning_rate": 1.9273022297474478e-05,
      "loss": 2.529,
      "step": 9818
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.2907373905181885,
      "learning_rate": 1.9272868170998475e-05,
      "loss": 2.6056,
      "step": 9819
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.075517177581787,
      "learning_rate": 1.9272714028802438e-05,
      "loss": 2.7244,
      "step": 9820
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.02711820602417,
      "learning_rate": 1.9272559870886635e-05,
      "loss": 2.6683,
      "step": 9821
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.2057782411575317,
      "learning_rate": 1.9272405697251328e-05,
      "loss": 2.3704,
      "step": 9822
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.6226170063018799,
      "learning_rate": 1.9272251507896772e-05,
      "loss": 2.4354,
      "step": 9823
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.2808629274368286,
      "learning_rate": 1.9272097302823233e-05,
      "loss": 2.7295,
      "step": 9824
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.040480136871338,
      "learning_rate": 1.9271943082030965e-05,
      "loss": 2.6877,
      "step": 9825
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.8901360630989075,
      "learning_rate": 1.927178884552024e-05,
      "loss": 2.4377,
      "step": 9826
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0860841274261475,
      "learning_rate": 1.9271634593291316e-05,
      "loss": 2.6663,
      "step": 9827
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9428474307060242,
      "learning_rate": 1.927148032534445e-05,
      "loss": 2.6876,
      "step": 9828
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0408676862716675,
      "learning_rate": 1.9271326041679907e-05,
      "loss": 2.798,
      "step": 9829
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9949763417243958,
      "learning_rate": 1.927117174229795e-05,
      "loss": 2.4449,
      "step": 9830
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0954022407531738,
      "learning_rate": 1.927101742719884e-05,
      "loss": 2.5337,
      "step": 9831
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0125070810317993,
      "learning_rate": 1.9270863096382833e-05,
      "loss": 2.361,
      "step": 9832
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.955772876739502,
      "learning_rate": 1.9270708749850198e-05,
      "loss": 2.4393,
      "step": 9833
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.4410806894302368,
      "learning_rate": 1.9270554387601195e-05,
      "loss": 2.6751,
      "step": 9834
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0355346202850342,
      "learning_rate": 1.927040000963608e-05,
      "loss": 2.6611,
      "step": 9835
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0397157669067383,
      "learning_rate": 1.9270245615955126e-05,
      "loss": 2.7597,
      "step": 9836
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.035841464996338,
      "learning_rate": 1.9270091206558582e-05,
      "loss": 2.5857,
      "step": 9837
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0107343196868896,
      "learning_rate": 1.9269936781446717e-05,
      "loss": 2.4864,
      "step": 9838
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9650059938430786,
      "learning_rate": 1.9269782340619792e-05,
      "loss": 2.7009,
      "step": 9839
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.1177407503128052,
      "learning_rate": 1.9269627884078072e-05,
      "loss": 2.5179,
      "step": 9840
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.3148162364959717,
      "learning_rate": 1.926947341182181e-05,
      "loss": 2.7436,
      "step": 9841
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9930965304374695,
      "learning_rate": 1.9269318923851274e-05,
      "loss": 2.6063,
      "step": 9842
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0298558473587036,
      "learning_rate": 1.9269164420166726e-05,
      "loss": 2.4564,
      "step": 9843
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.03469717502594,
      "learning_rate": 1.926900990076843e-05,
      "loss": 2.7062,
      "step": 9844
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.1358989477157593,
      "learning_rate": 1.926885536565664e-05,
      "loss": 2.6984,
      "step": 9845
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0451335906982422,
      "learning_rate": 1.926870081483162e-05,
      "loss": 2.4998,
      "step": 9846
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0920861959457397,
      "learning_rate": 1.926854624829364e-05,
      "loss": 2.578,
      "step": 9847
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.1390941143035889,
      "learning_rate": 1.9268391666042957e-05,
      "loss": 2.5486,
      "step": 9848
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9927672147750854,
      "learning_rate": 1.926823706807983e-05,
      "loss": 2.5964,
      "step": 9849
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0219041109085083,
      "learning_rate": 1.926808245440453e-05,
      "loss": 2.587,
      "step": 9850
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.1256611347198486,
      "learning_rate": 1.9267927825017307e-05,
      "loss": 2.539,
      "step": 9851
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0991392135620117,
      "learning_rate": 1.926777317991843e-05,
      "loss": 2.7077,
      "step": 9852
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9493745565414429,
      "learning_rate": 1.926761851910816e-05,
      "loss": 2.4532,
      "step": 9853
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.076033592224121,
      "learning_rate": 1.9267463842586763e-05,
      "loss": 2.4731,
      "step": 9854
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.1087697744369507,
      "learning_rate": 1.9267309150354493e-05,
      "loss": 2.6333,
      "step": 9855
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.1496647596359253,
      "learning_rate": 1.926715444241162e-05,
      "loss": 2.5571,
      "step": 9856
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9675040245056152,
      "learning_rate": 1.92669997187584e-05,
      "loss": 2.4947,
      "step": 9857
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0251494646072388,
      "learning_rate": 1.9266844979395107e-05,
      "loss": 2.4584,
      "step": 9858
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9610682129859924,
      "learning_rate": 1.9266690224321988e-05,
      "loss": 2.5967,
      "step": 9859
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9914964437484741,
      "learning_rate": 1.926653545353931e-05,
      "loss": 2.7179,
      "step": 9860
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0710723400115967,
      "learning_rate": 1.9266380667047344e-05,
      "loss": 2.6389,
      "step": 9861
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.104320764541626,
      "learning_rate": 1.9266225864846342e-05,
      "loss": 2.4453,
      "step": 9862
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.1448354721069336,
      "learning_rate": 1.926607104693657e-05,
      "loss": 2.506,
      "step": 9863
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.175774335861206,
      "learning_rate": 1.9265916213318294e-05,
      "loss": 2.6682,
      "step": 9864
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9507459998130798,
      "learning_rate": 1.9265761363991773e-05,
      "loss": 2.7539,
      "step": 9865
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0944766998291016,
      "learning_rate": 1.926560649895727e-05,
      "loss": 2.6555,
      "step": 9866
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9777064919471741,
      "learning_rate": 1.9265451618215046e-05,
      "loss": 2.6752,
      "step": 9867
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0253245830535889,
      "learning_rate": 1.9265296721765366e-05,
      "loss": 2.4936,
      "step": 9868
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0048205852508545,
      "learning_rate": 1.926514180960849e-05,
      "loss": 2.4177,
      "step": 9869
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9698752164840698,
      "learning_rate": 1.9264986881744684e-05,
      "loss": 2.4991,
      "step": 9870
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.01821768283844,
      "learning_rate": 1.9264831938174208e-05,
      "loss": 2.5552,
      "step": 9871
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.1181182861328125,
      "learning_rate": 1.9264676978897325e-05,
      "loss": 2.5896,
      "step": 9872
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9327905178070068,
      "learning_rate": 1.92645220039143e-05,
      "loss": 2.5714,
      "step": 9873
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0308563709259033,
      "learning_rate": 1.9264367013225394e-05,
      "loss": 2.4438,
      "step": 9874
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.163716197013855,
      "learning_rate": 1.9264212006830867e-05,
      "loss": 2.6689,
      "step": 9875
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0458489656448364,
      "learning_rate": 1.9264056984730987e-05,
      "loss": 2.5843,
      "step": 9876
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0478360652923584,
      "learning_rate": 1.9263901946926012e-05,
      "loss": 2.5266,
      "step": 9877
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.164574384689331,
      "learning_rate": 1.926374689341621e-05,
      "loss": 2.5172,
      "step": 9878
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9827147722244263,
      "learning_rate": 1.926359182420184e-05,
      "loss": 2.6088,
      "step": 9879
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9654532074928284,
      "learning_rate": 1.9263436739283164e-05,
      "loss": 2.4604,
      "step": 9880
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9282635450363159,
      "learning_rate": 1.926328163866045e-05,
      "loss": 2.649,
      "step": 9881
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.068439245223999,
      "learning_rate": 1.9263126522333956e-05,
      "loss": 2.5976,
      "step": 9882
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.2777043581008911,
      "learning_rate": 1.9262971390303948e-05,
      "loss": 2.401,
      "step": 9883
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.2758862972259521,
      "learning_rate": 1.926281624257069e-05,
      "loss": 2.7059,
      "step": 9884
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0416260957717896,
      "learning_rate": 1.9262661079134437e-05,
      "loss": 2.7203,
      "step": 9885
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9383830428123474,
      "learning_rate": 1.9262505899995462e-05,
      "loss": 2.4381,
      "step": 9886
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.048507809638977,
      "learning_rate": 1.9262350705154024e-05,
      "loss": 2.7128,
      "step": 9887
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.8661969900131226,
      "learning_rate": 1.926219549461038e-05,
      "loss": 2.6805,
      "step": 9888
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0085294246673584,
      "learning_rate": 1.9262040268364804e-05,
      "loss": 2.6256,
      "step": 9889
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0533111095428467,
      "learning_rate": 1.9261885026417553e-05,
      "loss": 2.7233,
      "step": 9890
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0310192108154297,
      "learning_rate": 1.9261729768768893e-05,
      "loss": 2.5357,
      "step": 9891
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9969598054885864,
      "learning_rate": 1.9261574495419086e-05,
      "loss": 2.4686,
      "step": 9892
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0951472520828247,
      "learning_rate": 1.9261419206368394e-05,
      "loss": 2.7,
      "step": 9893
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.920841634273529,
      "learning_rate": 1.926126390161708e-05,
      "loss": 2.4398,
      "step": 9894
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.107183575630188,
      "learning_rate": 1.9261108581165405e-05,
      "loss": 2.6857,
      "step": 9895
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.1211730241775513,
      "learning_rate": 1.926095324501364e-05,
      "loss": 2.5988,
      "step": 9896
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0944395065307617,
      "learning_rate": 1.9260797893162042e-05,
      "loss": 2.5327,
      "step": 9897
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0790849924087524,
      "learning_rate": 1.926064252561088e-05,
      "loss": 2.5763,
      "step": 9898
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.1680063009262085,
      "learning_rate": 1.9260487142360408e-05,
      "loss": 2.5296,
      "step": 9899
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0650887489318848,
      "learning_rate": 1.92603317434109e-05,
      "loss": 2.7784,
      "step": 9900
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.027493953704834,
      "learning_rate": 1.9260176328762615e-05,
      "loss": 2.6061,
      "step": 9901
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0992109775543213,
      "learning_rate": 1.9260020898415816e-05,
      "loss": 2.8844,
      "step": 9902
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.126750111579895,
      "learning_rate": 1.9259865452370762e-05,
      "loss": 2.4739,
      "step": 9903
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0130717754364014,
      "learning_rate": 1.9259709990627725e-05,
      "loss": 2.4109,
      "step": 9904
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.1252975463867188,
      "learning_rate": 1.9259554513186965e-05,
      "loss": 2.5976,
      "step": 9905
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0038039684295654,
      "learning_rate": 1.9259399020048743e-05,
      "loss": 2.6877,
      "step": 9906
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.1404386758804321,
      "learning_rate": 1.9259243511213323e-05,
      "loss": 2.6949,
      "step": 9907
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.3691537380218506,
      "learning_rate": 1.9259087986680977e-05,
      "loss": 2.5241,
      "step": 9908
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0155909061431885,
      "learning_rate": 1.9258932446451958e-05,
      "loss": 2.6133,
      "step": 9909
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0186874866485596,
      "learning_rate": 1.925877689052653e-05,
      "loss": 2.6398,
      "step": 9910
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.1245132684707642,
      "learning_rate": 1.925862131890497e-05,
      "loss": 2.402,
      "step": 9911
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.050668478012085,
      "learning_rate": 1.9258465731587526e-05,
      "loss": 2.5329,
      "step": 9912
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.065528392791748,
      "learning_rate": 1.9258310128574467e-05,
      "loss": 2.5416,
      "step": 9913
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0372031927108765,
      "learning_rate": 1.925815450986606e-05,
      "loss": 2.6326,
      "step": 9914
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9576128721237183,
      "learning_rate": 1.9257998875462567e-05,
      "loss": 2.6778,
      "step": 9915
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0309401750564575,
      "learning_rate": 1.925784322536425e-05,
      "loss": 2.3758,
      "step": 9916
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9973157644271851,
      "learning_rate": 1.9257687559571376e-05,
      "loss": 2.568,
      "step": 9917
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0605124235153198,
      "learning_rate": 1.9257531878084207e-05,
      "loss": 2.3937,
      "step": 9918
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0655781030654907,
      "learning_rate": 1.9257376180903006e-05,
      "loss": 2.8811,
      "step": 9919
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0144662857055664,
      "learning_rate": 1.925722046802804e-05,
      "loss": 2.8722,
      "step": 9920
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0238503217697144,
      "learning_rate": 1.9257064739459567e-05,
      "loss": 2.5088,
      "step": 9921
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.1078236103057861,
      "learning_rate": 1.925690899519786e-05,
      "loss": 2.5509,
      "step": 9922
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9788479804992676,
      "learning_rate": 1.9256753235243174e-05,
      "loss": 2.508,
      "step": 9923
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.945914089679718,
      "learning_rate": 1.9256597459595777e-05,
      "loss": 2.5487,
      "step": 9924
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0337133407592773,
      "learning_rate": 1.9256441668255937e-05,
      "loss": 2.4327,
      "step": 9925
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0303512811660767,
      "learning_rate": 1.9256285861223913e-05,
      "loss": 2.7291,
      "step": 9926
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0823490619659424,
      "learning_rate": 1.925613003849997e-05,
      "loss": 2.6032,
      "step": 9927
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.2206604480743408,
      "learning_rate": 1.9255974200084373e-05,
      "loss": 2.4538,
      "step": 9928
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.1170203685760498,
      "learning_rate": 1.9255818345977386e-05,
      "loss": 2.5635,
      "step": 9929
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9899214506149292,
      "learning_rate": 1.9255662476179272e-05,
      "loss": 2.5813,
      "step": 9930
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.026611328125,
      "learning_rate": 1.92555065906903e-05,
      "loss": 2.475,
      "step": 9931
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.014715552330017,
      "learning_rate": 1.9255350689510727e-05,
      "loss": 2.7202,
      "step": 9932
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0780539512634277,
      "learning_rate": 1.925519477264082e-05,
      "loss": 2.5886,
      "step": 9933
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9145845174789429,
      "learning_rate": 1.9255038840080845e-05,
      "loss": 2.4136,
      "step": 9934
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9576600790023804,
      "learning_rate": 1.9254882891831065e-05,
      "loss": 2.6979,
      "step": 9935
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9892686009407043,
      "learning_rate": 1.925472692789175e-05,
      "loss": 2.4209,
      "step": 9936
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0339865684509277,
      "learning_rate": 1.9254570948263154e-05,
      "loss": 2.4243,
      "step": 9937
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9726896286010742,
      "learning_rate": 1.925441495294555e-05,
      "loss": 2.3853,
      "step": 9938
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0066161155700684,
      "learning_rate": 1.9254258941939196e-05,
      "loss": 2.4742,
      "step": 9939
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9660074710845947,
      "learning_rate": 1.9254102915244363e-05,
      "loss": 2.6312,
      "step": 9940
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.1929830312728882,
      "learning_rate": 1.9253946872861307e-05,
      "loss": 2.6128,
      "step": 9941
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.2132090330123901,
      "learning_rate": 1.9253790814790304e-05,
      "loss": 2.4375,
      "step": 9942
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.1371679306030273,
      "learning_rate": 1.925363474103161e-05,
      "loss": 2.5949,
      "step": 9943
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0739322900772095,
      "learning_rate": 1.925347865158549e-05,
      "loss": 2.4628,
      "step": 9944
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9854757785797119,
      "learning_rate": 1.925332254645221e-05,
      "loss": 2.5852,
      "step": 9945
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.8894386887550354,
      "learning_rate": 1.9253166425632038e-05,
      "loss": 2.4325,
      "step": 9946
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.12772798538208,
      "learning_rate": 1.9253010289125234e-05,
      "loss": 2.6268,
      "step": 9947
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0194966793060303,
      "learning_rate": 1.9252854136932062e-05,
      "loss": 2.7012,
      "step": 9948
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.040444254875183,
      "learning_rate": 1.9252697969052795e-05,
      "loss": 2.7135,
      "step": 9949
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9836409687995911,
      "learning_rate": 1.9252541785487687e-05,
      "loss": 2.4502,
      "step": 9950
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.172391414642334,
      "learning_rate": 1.925238558623701e-05,
      "loss": 2.6653,
      "step": 9951
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0769234895706177,
      "learning_rate": 1.9252229371301027e-05,
      "loss": 2.6305,
      "step": 9952
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.03133225440979,
      "learning_rate": 1.925207314068e-05,
      "loss": 2.5887,
      "step": 9953
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0398781299591064,
      "learning_rate": 1.92519168943742e-05,
      "loss": 2.7648,
      "step": 9954
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9945273995399475,
      "learning_rate": 1.9251760632383883e-05,
      "loss": 2.6708,
      "step": 9955
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0715839862823486,
      "learning_rate": 1.9251604354709322e-05,
      "loss": 2.5379,
      "step": 9956
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9571523666381836,
      "learning_rate": 1.925144806135078e-05,
      "loss": 2.9391,
      "step": 9957
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9984302520751953,
      "learning_rate": 1.925129175230852e-05,
      "loss": 2.8179,
      "step": 9958
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.918528139591217,
      "learning_rate": 1.9251135427582806e-05,
      "loss": 2.5392,
      "step": 9959
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9739402532577515,
      "learning_rate": 1.925097908717391e-05,
      "loss": 2.5508,
      "step": 9960
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9286555647850037,
      "learning_rate": 1.9250822731082088e-05,
      "loss": 2.7926,
      "step": 9961
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.1659224033355713,
      "learning_rate": 1.9250666359307607e-05,
      "loss": 2.6464,
      "step": 9962
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9789915680885315,
      "learning_rate": 1.9250509971850736e-05,
      "loss": 2.5972,
      "step": 9963
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.012502908706665,
      "learning_rate": 1.925035356871174e-05,
      "loss": 2.5861,
      "step": 9964
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.963513195514679,
      "learning_rate": 1.9250197149890882e-05,
      "loss": 2.6282,
      "step": 9965
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9829073548316956,
      "learning_rate": 1.9250040715388425e-05,
      "loss": 2.4496,
      "step": 9966
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.020254373550415,
      "learning_rate": 1.924988426520464e-05,
      "loss": 2.6646,
      "step": 9967
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.966213583946228,
      "learning_rate": 1.9249727799339785e-05,
      "loss": 2.4903,
      "step": 9968
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9952057600021362,
      "learning_rate": 1.9249571317794134e-05,
      "loss": 2.5486,
      "step": 9969
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0064539909362793,
      "learning_rate": 1.9249414820567942e-05,
      "loss": 2.5731,
      "step": 9970
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0188918113708496,
      "learning_rate": 1.9249258307661486e-05,
      "loss": 2.5942,
      "step": 9971
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0164810419082642,
      "learning_rate": 1.9249101779075017e-05,
      "loss": 2.496,
      "step": 9972
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9571813941001892,
      "learning_rate": 1.9248945234808817e-05,
      "loss": 2.604,
      "step": 9973
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.973445475101471,
      "learning_rate": 1.9248788674863138e-05,
      "loss": 2.5828,
      "step": 9974
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.1295764446258545,
      "learning_rate": 1.924863209923825e-05,
      "loss": 2.6141,
      "step": 9975
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9847106337547302,
      "learning_rate": 1.924847550793442e-05,
      "loss": 2.5723,
      "step": 9976
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9287640452384949,
      "learning_rate": 1.9248318900951913e-05,
      "loss": 2.7242,
      "step": 9977
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0654727220535278,
      "learning_rate": 1.924816227829099e-05,
      "loss": 2.395,
      "step": 9978
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0141141414642334,
      "learning_rate": 1.9248005639951924e-05,
      "loss": 2.7091,
      "step": 9979
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.200971245765686,
      "learning_rate": 1.9247848985934977e-05,
      "loss": 2.5899,
      "step": 9980
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.3635340929031372,
      "learning_rate": 1.9247692316240412e-05,
      "loss": 2.7874,
      "step": 9981
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9636940956115723,
      "learning_rate": 1.9247535630868497e-05,
      "loss": 2.7294,
      "step": 9982
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.090533971786499,
      "learning_rate": 1.92473789298195e-05,
      "loss": 2.5977,
      "step": 9983
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9996655583381653,
      "learning_rate": 1.924722221309368e-05,
      "loss": 2.5292,
      "step": 9984
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.050930380821228,
      "learning_rate": 1.924706548069131e-05,
      "loss": 2.7212,
      "step": 9985
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0570212602615356,
      "learning_rate": 1.9246908732612652e-05,
      "loss": 2.562,
      "step": 9986
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0696685314178467,
      "learning_rate": 1.9246751968857972e-05,
      "loss": 2.5642,
      "step": 9987
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0081636905670166,
      "learning_rate": 1.9246595189427537e-05,
      "loss": 2.671,
      "step": 9988
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0469028949737549,
      "learning_rate": 1.924643839432161e-05,
      "loss": 2.6444,
      "step": 9989
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9674373269081116,
      "learning_rate": 1.924628158354046e-05,
      "loss": 2.6398,
      "step": 9990
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9592625498771667,
      "learning_rate": 1.924612475708435e-05,
      "loss": 2.6477,
      "step": 9991
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9284117221832275,
      "learning_rate": 1.924596791495355e-05,
      "loss": 2.5424,
      "step": 9992
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.1840344667434692,
      "learning_rate": 1.924581105714832e-05,
      "loss": 2.4819,
      "step": 9993
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.056718111038208,
      "learning_rate": 1.9245654183668932e-05,
      "loss": 2.4027,
      "step": 9994
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9476428627967834,
      "learning_rate": 1.924549729451565e-05,
      "loss": 2.5177,
      "step": 9995
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0003504753112793,
      "learning_rate": 1.9245340389688735e-05,
      "loss": 2.3995,
      "step": 9996
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0028492212295532,
      "learning_rate": 1.9245183469188456e-05,
      "loss": 2.5775,
      "step": 9997
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9490361213684082,
      "learning_rate": 1.9245026533015086e-05,
      "loss": 2.5529,
      "step": 9998
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.102934718132019,
      "learning_rate": 1.924486958116888e-05,
      "loss": 2.5095,
      "step": 9999
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9535113573074341,
      "learning_rate": 1.924471261365011e-05,
      "loss": 2.5358,
      "step": 10000
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0683362483978271,
      "learning_rate": 1.9244555630459042e-05,
      "loss": 2.6408,
      "step": 10001
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.979112446308136,
      "learning_rate": 1.9244398631595944e-05,
      "loss": 2.747,
      "step": 10002
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0386652946472168,
      "learning_rate": 1.924424161706108e-05,
      "loss": 2.5415,
      "step": 10003
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9623255729675293,
      "learning_rate": 1.9244084586854713e-05,
      "loss": 2.3563,
      "step": 10004
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9908339977264404,
      "learning_rate": 1.924392754097711e-05,
      "loss": 2.621,
      "step": 10005
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.995957612991333,
      "learning_rate": 1.924377047942854e-05,
      "loss": 2.472,
      "step": 10006
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0605738162994385,
      "learning_rate": 1.924361340220927e-05,
      "loss": 2.6242,
      "step": 10007
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0463333129882812,
      "learning_rate": 1.9243456309319565e-05,
      "loss": 2.6011,
      "step": 10008
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0404834747314453,
      "learning_rate": 1.924329920075969e-05,
      "loss": 2.559,
      "step": 10009
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0195748805999756,
      "learning_rate": 1.9243142076529915e-05,
      "loss": 2.55,
      "step": 10010
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0542848110198975,
      "learning_rate": 1.92429849366305e-05,
      "loss": 2.6248,
      "step": 10011
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.113856554031372,
      "learning_rate": 1.924282778106172e-05,
      "loss": 2.5606,
      "step": 10012
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9458386898040771,
      "learning_rate": 1.9242670609823833e-05,
      "loss": 2.694,
      "step": 10013
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0547702312469482,
      "learning_rate": 1.924251342291711e-05,
      "loss": 2.5348,
      "step": 10014
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.108847737312317,
      "learning_rate": 1.9242356220341815e-05,
      "loss": 2.3296,
      "step": 10015
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0657391548156738,
      "learning_rate": 1.924219900209822e-05,
      "loss": 2.5167,
      "step": 10016
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0238500833511353,
      "learning_rate": 1.9242041768186582e-05,
      "loss": 2.7023,
      "step": 10017
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0194182395935059,
      "learning_rate": 1.9241884518607173e-05,
      "loss": 2.5978,
      "step": 10018
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.1561671495437622,
      "learning_rate": 1.9241727253360266e-05,
      "loss": 2.6147,
      "step": 10019
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.0378925800323486,
      "learning_rate": 1.9241569972446117e-05,
      "loss": 2.7186,
      "step": 10020
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.928300678730011,
      "learning_rate": 1.9241412675864998e-05,
      "loss": 2.6649,
      "step": 10021
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9916064143180847,
      "learning_rate": 1.9241255363617175e-05,
      "loss": 2.5954,
      "step": 10022
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.1296197175979614,
      "learning_rate": 1.9241098035702915e-05,
      "loss": 2.38,
      "step": 10023
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9528864026069641,
      "learning_rate": 1.924094069212248e-05,
      "loss": 2.5014,
      "step": 10024
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9971714019775391,
      "learning_rate": 1.9240783332876143e-05,
      "loss": 2.7135,
      "step": 10025
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9580926299095154,
      "learning_rate": 1.924062595796417e-05,
      "loss": 2.4387,
      "step": 10026
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0479246377944946,
      "learning_rate": 1.9240468567386825e-05,
      "loss": 2.6469,
      "step": 10027
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0512620210647583,
      "learning_rate": 1.9240311161144375e-05,
      "loss": 2.6971,
      "step": 10028
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9626870155334473,
      "learning_rate": 1.924015373923709e-05,
      "loss": 2.6054,
      "step": 10029
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0408540964126587,
      "learning_rate": 1.9239996301665232e-05,
      "loss": 2.4849,
      "step": 10030
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9169955253601074,
      "learning_rate": 1.9239838848429073e-05,
      "loss": 2.6433,
      "step": 10031
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.1780763864517212,
      "learning_rate": 1.923968137952888e-05,
      "loss": 2.744,
      "step": 10032
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9556426405906677,
      "learning_rate": 1.9239523894964913e-05,
      "loss": 2.3886,
      "step": 10033
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0401257276535034,
      "learning_rate": 1.9239366394737446e-05,
      "loss": 2.7307,
      "step": 10034
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.039851427078247,
      "learning_rate": 1.923920887884674e-05,
      "loss": 2.6473,
      "step": 10035
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0316063165664673,
      "learning_rate": 1.9239051347293073e-05,
      "loss": 2.6289,
      "step": 10036
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0053584575653076,
      "learning_rate": 1.9238893800076697e-05,
      "loss": 2.3964,
      "step": 10037
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9778124094009399,
      "learning_rate": 1.9238736237197893e-05,
      "loss": 2.6388,
      "step": 10038
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.2507387399673462,
      "learning_rate": 1.923857865865692e-05,
      "loss": 2.4892,
      "step": 10039
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9685312509536743,
      "learning_rate": 1.9238421064454043e-05,
      "loss": 2.4395,
      "step": 10040
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9747823476791382,
      "learning_rate": 1.9238263454589536e-05,
      "loss": 2.3404,
      "step": 10041
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9961565732955933,
      "learning_rate": 1.9238105829063663e-05,
      "loss": 2.5797,
      "step": 10042
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0322922468185425,
      "learning_rate": 1.9237948187876694e-05,
      "loss": 2.6021,
      "step": 10043
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.236482858657837,
      "learning_rate": 1.9237790531028892e-05,
      "loss": 2.4089,
      "step": 10044
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9873877763748169,
      "learning_rate": 1.9237632858520524e-05,
      "loss": 2.5752,
      "step": 10045
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.1621202230453491,
      "learning_rate": 1.9237475170351866e-05,
      "loss": 2.5378,
      "step": 10046
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.1030247211456299,
      "learning_rate": 1.923731746652317e-05,
      "loss": 2.5829,
      "step": 10047
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9476982951164246,
      "learning_rate": 1.9237159747034718e-05,
      "loss": 2.5116,
      "step": 10048
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.314571738243103,
      "learning_rate": 1.923700201188677e-05,
      "loss": 2.4943,
      "step": 10049
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.1919116973876953,
      "learning_rate": 1.9236844261079595e-05,
      "loss": 2.369,
      "step": 10050
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9915218353271484,
      "learning_rate": 1.9236686494613458e-05,
      "loss": 2.6303,
      "step": 10051
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0345317125320435,
      "learning_rate": 1.9236528712488632e-05,
      "loss": 2.558,
      "step": 10052
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0625900030136108,
      "learning_rate": 1.9236370914705376e-05,
      "loss": 2.6375,
      "step": 10053
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9229937791824341,
      "learning_rate": 1.923621310126397e-05,
      "loss": 2.7134,
      "step": 10054
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0550261735916138,
      "learning_rate": 1.923605527216467e-05,
      "loss": 2.6357,
      "step": 10055
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9336393475532532,
      "learning_rate": 1.9235897427407748e-05,
      "loss": 2.7145,
      "step": 10056
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.1043492555618286,
      "learning_rate": 1.9235739566993473e-05,
      "loss": 2.2404,
      "step": 10057
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.935930073261261,
      "learning_rate": 1.923558169092211e-05,
      "loss": 2.5369,
      "step": 10058
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0426782369613647,
      "learning_rate": 1.9235423799193927e-05,
      "loss": 2.6214,
      "step": 10059
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9796247482299805,
      "learning_rate": 1.9235265891809195e-05,
      "loss": 2.7259,
      "step": 10060
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9794864058494568,
      "learning_rate": 1.9235107968768178e-05,
      "loss": 2.4275,
      "step": 10061
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0155218839645386,
      "learning_rate": 1.923495003007114e-05,
      "loss": 2.3687,
      "step": 10062
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9797708988189697,
      "learning_rate": 1.9234792075718358e-05,
      "loss": 2.6298,
      "step": 10063
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.1044808626174927,
      "learning_rate": 1.9234634105710098e-05,
      "loss": 2.628,
      "step": 10064
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9773285984992981,
      "learning_rate": 1.923447612004662e-05,
      "loss": 2.6547,
      "step": 10065
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.2581326961517334,
      "learning_rate": 1.9234318118728197e-05,
      "loss": 2.5731,
      "step": 10066
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.033434271812439,
      "learning_rate": 1.9234160101755098e-05,
      "loss": 2.5025,
      "step": 10067
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0854524374008179,
      "learning_rate": 1.9234002069127593e-05,
      "loss": 2.6017,
      "step": 10068
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.975357174873352,
      "learning_rate": 1.923384402084594e-05,
      "loss": 2.7595,
      "step": 10069
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0310020446777344,
      "learning_rate": 1.923368595691042e-05,
      "loss": 2.67,
      "step": 10070
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9968681931495667,
      "learning_rate": 1.9233527877321292e-05,
      "loss": 2.6457,
      "step": 10071
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9951645731925964,
      "learning_rate": 1.923336978207883e-05,
      "loss": 2.4578,
      "step": 10072
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.985652506351471,
      "learning_rate": 1.923321167118329e-05,
      "loss": 2.5193,
      "step": 10073
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0421324968338013,
      "learning_rate": 1.9233053544634953e-05,
      "loss": 2.653,
      "step": 10074
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0072709321975708,
      "learning_rate": 1.9232895402434084e-05,
      "loss": 2.4581,
      "step": 10075
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9330150485038757,
      "learning_rate": 1.923273724458095e-05,
      "loss": 2.3929,
      "step": 10076
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0565073490142822,
      "learning_rate": 1.9232579071075817e-05,
      "loss": 2.6185,
      "step": 10077
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.097557783126831,
      "learning_rate": 1.9232420881918955e-05,
      "loss": 2.5015,
      "step": 10078
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9978446960449219,
      "learning_rate": 1.923226267711063e-05,
      "loss": 2.6298,
      "step": 10079
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.091143250465393,
      "learning_rate": 1.9232104456651118e-05,
      "loss": 2.6951,
      "step": 10080
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0634275674819946,
      "learning_rate": 1.9231946220540675e-05,
      "loss": 2.4468,
      "step": 10081
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0147124528884888,
      "learning_rate": 1.923178796877958e-05,
      "loss": 2.5773,
      "step": 10082
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9641793370246887,
      "learning_rate": 1.92316297013681e-05,
      "loss": 2.3828,
      "step": 10083
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9730843901634216,
      "learning_rate": 1.9231471418306494e-05,
      "loss": 2.6849,
      "step": 10084
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0218234062194824,
      "learning_rate": 1.9231313119595037e-05,
      "loss": 2.4021,
      "step": 10085
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9686039686203003,
      "learning_rate": 1.9231154805234e-05,
      "loss": 2.6421,
      "step": 10086
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9865516424179077,
      "learning_rate": 1.9230996475223646e-05,
      "loss": 2.6151,
      "step": 10087
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9668505787849426,
      "learning_rate": 1.923083812956425e-05,
      "loss": 2.527,
      "step": 10088
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9252719879150391,
      "learning_rate": 1.9230679768256072e-05,
      "loss": 2.5175,
      "step": 10089
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0299512147903442,
      "learning_rate": 1.9230521391299385e-05,
      "loss": 2.5158,
      "step": 10090
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9835090637207031,
      "learning_rate": 1.923036299869446e-05,
      "loss": 2.519,
      "step": 10091
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9964907169342041,
      "learning_rate": 1.9230204590441563e-05,
      "loss": 2.6717,
      "step": 10092
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9144505858421326,
      "learning_rate": 1.923004616654096e-05,
      "loss": 2.74,
      "step": 10093
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9539108872413635,
      "learning_rate": 1.9229887726992923e-05,
      "loss": 2.4929,
      "step": 10094
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0702006816864014,
      "learning_rate": 1.922972927179772e-05,
      "loss": 2.5633,
      "step": 10095
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0048210620880127,
      "learning_rate": 1.9229570800955616e-05,
      "loss": 2.2774,
      "step": 10096
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9832025170326233,
      "learning_rate": 1.922941231446689e-05,
      "loss": 2.6513,
      "step": 10097
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.074602484703064,
      "learning_rate": 1.9229253812331797e-05,
      "loss": 2.5841,
      "step": 10098
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9063227772712708,
      "learning_rate": 1.9229095294550613e-05,
      "loss": 2.6347,
      "step": 10099
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9633699655532837,
      "learning_rate": 1.9228936761123606e-05,
      "loss": 2.4253,
      "step": 10100
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9878571629524231,
      "learning_rate": 1.9228778212051047e-05,
      "loss": 2.4788,
      "step": 10101
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9868065118789673,
      "learning_rate": 1.9228619647333203e-05,
      "loss": 2.7253,
      "step": 10102
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0829945802688599,
      "learning_rate": 1.922846106697034e-05,
      "loss": 2.6581,
      "step": 10103
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0218087434768677,
      "learning_rate": 1.922830247096273e-05,
      "loss": 2.6114,
      "step": 10104
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9493498206138611,
      "learning_rate": 1.9228143859310636e-05,
      "loss": 2.4132,
      "step": 10105
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0458142757415771,
      "learning_rate": 1.9227985232014337e-05,
      "loss": 2.4804,
      "step": 10106
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9332077503204346,
      "learning_rate": 1.9227826589074095e-05,
      "loss": 2.4466,
      "step": 10107
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9922155737876892,
      "learning_rate": 1.922766793049018e-05,
      "loss": 2.5604,
      "step": 10108
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.1383423805236816,
      "learning_rate": 1.9227509256262865e-05,
      "loss": 2.536,
      "step": 10109
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9506325125694275,
      "learning_rate": 1.9227350566392414e-05,
      "loss": 2.5033,
      "step": 10110
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9751074910163879,
      "learning_rate": 1.9227191860879097e-05,
      "loss": 2.5634,
      "step": 10111
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0066033601760864,
      "learning_rate": 1.9227033139723185e-05,
      "loss": 2.7146,
      "step": 10112
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.032974362373352,
      "learning_rate": 1.9226874402924943e-05,
      "loss": 2.4551,
      "step": 10113
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9764920473098755,
      "learning_rate": 1.9226715650484645e-05,
      "loss": 2.6144,
      "step": 10114
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9376275539398193,
      "learning_rate": 1.922655688240256e-05,
      "loss": 2.5445,
      "step": 10115
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.2912166118621826,
      "learning_rate": 1.9226398098678953e-05,
      "loss": 2.7695,
      "step": 10116
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.1658838987350464,
      "learning_rate": 1.9226239299314094e-05,
      "loss": 2.6252,
      "step": 10117
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0394153594970703,
      "learning_rate": 1.9226080484308256e-05,
      "loss": 2.5686,
      "step": 10118
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0507560968399048,
      "learning_rate": 1.9225921653661704e-05,
      "loss": 2.6075,
      "step": 10119
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9940577149391174,
      "learning_rate": 1.922576280737471e-05,
      "loss": 2.7549,
      "step": 10120
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.1223887205123901,
      "learning_rate": 1.9225603945447543e-05,
      "loss": 2.5697,
      "step": 10121
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.051923394203186,
      "learning_rate": 1.9225445067880472e-05,
      "loss": 2.7039,
      "step": 10122
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0239497423171997,
      "learning_rate": 1.9225286174673764e-05,
      "loss": 2.4323,
      "step": 10123
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0106158256530762,
      "learning_rate": 1.922512726582769e-05,
      "loss": 2.6292,
      "step": 10124
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0141890048980713,
      "learning_rate": 1.922496834134252e-05,
      "loss": 2.6854,
      "step": 10125
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0354849100112915,
      "learning_rate": 1.9224809401218525e-05,
      "loss": 2.5275,
      "step": 10126
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9426413774490356,
      "learning_rate": 1.922465044545597e-05,
      "loss": 2.3416,
      "step": 10127
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9509061574935913,
      "learning_rate": 1.9224491474055132e-05,
      "loss": 2.516,
      "step": 10128
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0541386604309082,
      "learning_rate": 1.922433248701627e-05,
      "loss": 2.5702,
      "step": 10129
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9794581532478333,
      "learning_rate": 1.9224173484339664e-05,
      "loss": 2.5279,
      "step": 10130
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.999904215335846,
      "learning_rate": 1.9224014466025577e-05,
      "loss": 2.5107,
      "step": 10131
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.1437952518463135,
      "learning_rate": 1.922385543207428e-05,
      "loss": 2.6522,
      "step": 10132
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.055275797843933,
      "learning_rate": 1.922369638248604e-05,
      "loss": 2.4879,
      "step": 10133
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.2098839282989502,
      "learning_rate": 1.9223537317261135e-05,
      "loss": 2.4639,
      "step": 10134
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9118384718894958,
      "learning_rate": 1.9223378236399824e-05,
      "loss": 2.6546,
      "step": 10135
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0553207397460938,
      "learning_rate": 1.9223219139902383e-05,
      "loss": 2.5303,
      "step": 10136
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.238713264465332,
      "learning_rate": 1.9223060027769084e-05,
      "loss": 2.4563,
      "step": 10137
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9743001461029053,
      "learning_rate": 1.922290090000019e-05,
      "loss": 2.3411,
      "step": 10138
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0917567014694214,
      "learning_rate": 1.9222741756595975e-05,
      "loss": 2.7417,
      "step": 10139
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.8701682090759277,
      "learning_rate": 1.9222582597556708e-05,
      "loss": 2.5795,
      "step": 10140
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.1144918203353882,
      "learning_rate": 1.9222423422882656e-05,
      "loss": 2.6176,
      "step": 10141
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0682157278060913,
      "learning_rate": 1.9222264232574094e-05,
      "loss": 2.6058,
      "step": 10142
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.073371410369873,
      "learning_rate": 1.9222105026631292e-05,
      "loss": 2.446,
      "step": 10143
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0341228246688843,
      "learning_rate": 1.9221945805054512e-05,
      "loss": 2.6305,
      "step": 10144
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0628859996795654,
      "learning_rate": 1.9221786567844035e-05,
      "loss": 2.7879,
      "step": 10145
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.993590235710144,
      "learning_rate": 1.922162731500012e-05,
      "loss": 2.4616,
      "step": 10146
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.1215869188308716,
      "learning_rate": 1.9221468046523045e-05,
      "loss": 2.6783,
      "step": 10147
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.1347088813781738,
      "learning_rate": 1.922130876241308e-05,
      "loss": 2.3915,
      "step": 10148
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9471012949943542,
      "learning_rate": 1.9221149462670486e-05,
      "loss": 2.4618,
      "step": 10149
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9995514154434204,
      "learning_rate": 1.922099014729554e-05,
      "loss": 2.4937,
      "step": 10150
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0225541591644287,
      "learning_rate": 1.9220830816288517e-05,
      "loss": 2.5506,
      "step": 10151
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9869746565818787,
      "learning_rate": 1.9220671469649677e-05,
      "loss": 2.528,
      "step": 10152
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9369338750839233,
      "learning_rate": 1.922051210737929e-05,
      "loss": 2.3068,
      "step": 10153
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0055556297302246,
      "learning_rate": 1.9220352729477637e-05,
      "loss": 2.6372,
      "step": 10154
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9569356441497803,
      "learning_rate": 1.9220193335944983e-05,
      "loss": 2.5165,
      "step": 10155
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0684360265731812,
      "learning_rate": 1.9220033926781597e-05,
      "loss": 2.6288,
      "step": 10156
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0015082359313965,
      "learning_rate": 1.9219874501987745e-05,
      "loss": 2.8018,
      "step": 10157
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0225971937179565,
      "learning_rate": 1.9219715061563702e-05,
      "loss": 2.6854,
      "step": 10158
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.097793698310852,
      "learning_rate": 1.9219555605509744e-05,
      "loss": 2.7277,
      "step": 10159
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9799289107322693,
      "learning_rate": 1.921939613382613e-05,
      "loss": 2.7275,
      "step": 10160
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.1245678663253784,
      "learning_rate": 1.9219236646513136e-05,
      "loss": 2.4414,
      "step": 10161
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9506771564483643,
      "learning_rate": 1.921907714357103e-05,
      "loss": 2.6597,
      "step": 10162
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.1373053789138794,
      "learning_rate": 1.9218917625000092e-05,
      "loss": 2.4216,
      "step": 10163
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9742860794067383,
      "learning_rate": 1.9218758090800578e-05,
      "loss": 2.4947,
      "step": 10164
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9427374005317688,
      "learning_rate": 1.9218598540972768e-05,
      "loss": 2.5025,
      "step": 10165
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9225413799285889,
      "learning_rate": 1.9218438975516925e-05,
      "loss": 2.3582,
      "step": 10166
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9875552654266357,
      "learning_rate": 1.921827939443333e-05,
      "loss": 2.6728,
      "step": 10167
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9774103164672852,
      "learning_rate": 1.9218119797722247e-05,
      "loss": 2.6954,
      "step": 10168
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9995000958442688,
      "learning_rate": 1.9217960185383947e-05,
      "loss": 2.7027,
      "step": 10169
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9329360723495483,
      "learning_rate": 1.92178005574187e-05,
      "loss": 2.5693,
      "step": 10170
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.1470059156417847,
      "learning_rate": 1.9217640913826777e-05,
      "loss": 2.6436,
      "step": 10171
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9781288504600525,
      "learning_rate": 1.921748125460845e-05,
      "loss": 2.5245,
      "step": 10172
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.035117506980896,
      "learning_rate": 1.9217321579763987e-05,
      "loss": 2.6111,
      "step": 10173
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.1118136644363403,
      "learning_rate": 1.921716188929366e-05,
      "loss": 2.3634,
      "step": 10174
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.1462595462799072,
      "learning_rate": 1.9217002183197744e-05,
      "loss": 2.621,
      "step": 10175
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0652086734771729,
      "learning_rate": 1.9216842461476503e-05,
      "loss": 2.4328,
      "step": 10176
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0284494161605835,
      "learning_rate": 1.9216682724130214e-05,
      "loss": 2.6615,
      "step": 10177
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9750679731369019,
      "learning_rate": 1.9216522971159142e-05,
      "loss": 2.4651,
      "step": 10178
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.883528470993042,
      "learning_rate": 1.9216363202563562e-05,
      "loss": 2.4052,
      "step": 10179
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.4234347343444824,
      "learning_rate": 1.921620341834374e-05,
      "loss": 2.6957,
      "step": 10180
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.073294997215271,
      "learning_rate": 1.9216043618499956e-05,
      "loss": 2.6678,
      "step": 10181
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9961779713630676,
      "learning_rate": 1.9215883803032473e-05,
      "loss": 2.3321,
      "step": 10182
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.1911998987197876,
      "learning_rate": 1.921572397194156e-05,
      "loss": 2.5456,
      "step": 10183
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0064826011657715,
      "learning_rate": 1.9215564125227493e-05,
      "loss": 2.6343,
      "step": 10184
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0778038501739502,
      "learning_rate": 1.9215404262890546e-05,
      "loss": 2.4435,
      "step": 10185
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0628178119659424,
      "learning_rate": 1.921524438493098e-05,
      "loss": 2.5151,
      "step": 10186
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9687592387199402,
      "learning_rate": 1.921508449134908e-05,
      "loss": 2.4716,
      "step": 10187
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9675735235214233,
      "learning_rate": 1.92149245821451e-05,
      "loss": 2.5308,
      "step": 10188
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0233176946640015,
      "learning_rate": 1.9214764657319325e-05,
      "loss": 2.55,
      "step": 10189
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.1615099906921387,
      "learning_rate": 1.921460471687202e-05,
      "loss": 2.6215,
      "step": 10190
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0851882696151733,
      "learning_rate": 1.9214444760803456e-05,
      "loss": 2.6846,
      "step": 10191
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9199881553649902,
      "learning_rate": 1.9214284789113908e-05,
      "loss": 2.6741,
      "step": 10192
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.1360548734664917,
      "learning_rate": 1.9214124801803646e-05,
      "loss": 2.5367,
      "step": 10193
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9946321249008179,
      "learning_rate": 1.921396479887294e-05,
      "loss": 3.0031,
      "step": 10194
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0121296644210815,
      "learning_rate": 1.9213804780322055e-05,
      "loss": 2.5949,
      "step": 10195
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9668537974357605,
      "learning_rate": 1.9213644746151275e-05,
      "loss": 2.3557,
      "step": 10196
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.1004409790039062,
      "learning_rate": 1.921348469636086e-05,
      "loss": 2.5724,
      "step": 10197
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9495826959609985,
      "learning_rate": 1.9213324630951092e-05,
      "loss": 2.6472,
      "step": 10198
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0247983932495117,
      "learning_rate": 1.9213164549922232e-05,
      "loss": 2.5175,
      "step": 10199
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0736314058303833,
      "learning_rate": 1.9213004453274555e-05,
      "loss": 2.6637,
      "step": 10200
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0951277017593384,
      "learning_rate": 1.9212844341008336e-05,
      "loss": 2.6612,
      "step": 10201
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.3994436264038086,
      "learning_rate": 1.9212684213123843e-05,
      "loss": 2.643,
      "step": 10202
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.106157660484314,
      "learning_rate": 1.9212524069621346e-05,
      "loss": 2.8187,
      "step": 10203
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0638185739517212,
      "learning_rate": 1.921236391050112e-05,
      "loss": 2.6577,
      "step": 10204
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.2261810302734375,
      "learning_rate": 1.9212203735763437e-05,
      "loss": 2.6699,
      "step": 10205
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.1318649053573608,
      "learning_rate": 1.9212043545408566e-05,
      "loss": 2.6056,
      "step": 10206
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9169297218322754,
      "learning_rate": 1.9211883339436776e-05,
      "loss": 2.6769,
      "step": 10207
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0472643375396729,
      "learning_rate": 1.9211723117848345e-05,
      "loss": 2.7064,
      "step": 10208
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0538914203643799,
      "learning_rate": 1.921156288064354e-05,
      "loss": 2.7904,
      "step": 10209
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.072039008140564,
      "learning_rate": 1.9211402627822636e-05,
      "loss": 2.5364,
      "step": 10210
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0346249341964722,
      "learning_rate": 1.9211242359385903e-05,
      "loss": 2.6714,
      "step": 10211
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.031402349472046,
      "learning_rate": 1.921108207533361e-05,
      "loss": 2.4965,
      "step": 10212
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0067874193191528,
      "learning_rate": 1.921092177566603e-05,
      "loss": 2.7193,
      "step": 10213
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.1631951332092285,
      "learning_rate": 1.921076146038344e-05,
      "loss": 2.7097,
      "step": 10214
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.1285756826400757,
      "learning_rate": 1.9210601129486108e-05,
      "loss": 2.5663,
      "step": 10215
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0547927618026733,
      "learning_rate": 1.9210440782974303e-05,
      "loss": 2.5973,
      "step": 10216
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.1186078786849976,
      "learning_rate": 1.9210280420848296e-05,
      "loss": 2.5618,
      "step": 10217
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9599130749702454,
      "learning_rate": 1.921012004310837e-05,
      "loss": 2.5947,
      "step": 10218
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9358558058738708,
      "learning_rate": 1.9209959649754785e-05,
      "loss": 2.7017,
      "step": 10219
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0312761068344116,
      "learning_rate": 1.9209799240787815e-05,
      "loss": 2.6963,
      "step": 10220
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.1812045574188232,
      "learning_rate": 1.9209638816207735e-05,
      "loss": 2.6244,
      "step": 10221
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.2315118312835693,
      "learning_rate": 1.9209478376014815e-05,
      "loss": 2.623,
      "step": 10222
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.1763147115707397,
      "learning_rate": 1.9209317920209332e-05,
      "loss": 2.6488,
      "step": 10223
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9722493886947632,
      "learning_rate": 1.9209157448791552e-05,
      "loss": 2.5543,
      "step": 10224
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.990315318107605,
      "learning_rate": 1.920899696176175e-05,
      "loss": 2.4335,
      "step": 10225
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0367494821548462,
      "learning_rate": 1.9208836459120196e-05,
      "loss": 2.6597,
      "step": 10226
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.081955909729004,
      "learning_rate": 1.9208675940867162e-05,
      "loss": 2.6401,
      "step": 10227
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0434463024139404,
      "learning_rate": 1.920851540700292e-05,
      "loss": 2.9047,
      "step": 10228
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.2080967426300049,
      "learning_rate": 1.9208354857527748e-05,
      "loss": 2.6328,
      "step": 10229
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.1021578311920166,
      "learning_rate": 1.920819429244191e-05,
      "loss": 2.7926,
      "step": 10230
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0072345733642578,
      "learning_rate": 1.9208033711745682e-05,
      "loss": 2.4656,
      "step": 10231
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.2454935312271118,
      "learning_rate": 1.9207873115439338e-05,
      "loss": 2.4638,
      "step": 10232
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9795683026313782,
      "learning_rate": 1.9207712503523145e-05,
      "loss": 2.6725,
      "step": 10233
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.055589199066162,
      "learning_rate": 1.9207551875997383e-05,
      "loss": 2.7483,
      "step": 10234
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9661718010902405,
      "learning_rate": 1.9207391232862317e-05,
      "loss": 2.5509,
      "step": 10235
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.1182825565338135,
      "learning_rate": 1.920723057411822e-05,
      "loss": 2.6917,
      "step": 10236
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.1811306476593018,
      "learning_rate": 1.920706989976537e-05,
      "loss": 2.3033,
      "step": 10237
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0396281480789185,
      "learning_rate": 1.9206909209804036e-05,
      "loss": 2.6997,
      "step": 10238
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0007458925247192,
      "learning_rate": 1.9206748504234487e-05,
      "loss": 2.4419,
      "step": 10239
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9882862567901611,
      "learning_rate": 1.9206587783057004e-05,
      "loss": 2.2721,
      "step": 10240
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.037980318069458,
      "learning_rate": 1.9206427046271848e-05,
      "loss": 2.4907,
      "step": 10241
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.1637908220291138,
      "learning_rate": 1.9206266293879304e-05,
      "loss": 2.6444,
      "step": 10242
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0126595497131348,
      "learning_rate": 1.9206105525879636e-05,
      "loss": 2.668,
      "step": 10243
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0947908163070679,
      "learning_rate": 1.9205944742273114e-05,
      "loss": 2.4875,
      "step": 10244
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0607143640518188,
      "learning_rate": 1.920578394306002e-05,
      "loss": 2.3906,
      "step": 10245
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.099789023399353,
      "learning_rate": 1.9205623128240623e-05,
      "loss": 2.6206,
      "step": 10246
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0905685424804688,
      "learning_rate": 1.9205462297815192e-05,
      "loss": 2.9105,
      "step": 10247
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0276622772216797,
      "learning_rate": 1.9205301451784003e-05,
      "loss": 2.6069,
      "step": 10248
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9765817523002625,
      "learning_rate": 1.9205140590147326e-05,
      "loss": 2.8196,
      "step": 10249
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9788932204246521,
      "learning_rate": 1.920497971290544e-05,
      "loss": 2.5107,
      "step": 10250
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0674422979354858,
      "learning_rate": 1.920481882005861e-05,
      "loss": 2.8476,
      "step": 10251
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.179013729095459,
      "learning_rate": 1.9204657911607116e-05,
      "loss": 2.5959,
      "step": 10252
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.124879240989685,
      "learning_rate": 1.9204496987551222e-05,
      "loss": 2.8549,
      "step": 10253
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9940782189369202,
      "learning_rate": 1.920433604789121e-05,
      "loss": 2.5395,
      "step": 10254
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.182966947555542,
      "learning_rate": 1.9204175092627345e-05,
      "loss": 2.4617,
      "step": 10255
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.07710862159729,
      "learning_rate": 1.9204014121759905e-05,
      "loss": 2.6899,
      "step": 10256
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0357964038848877,
      "learning_rate": 1.920385313528916e-05,
      "loss": 2.517,
      "step": 10257
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0301986932754517,
      "learning_rate": 1.9203692133215384e-05,
      "loss": 2.4525,
      "step": 10258
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.1696386337280273,
      "learning_rate": 1.9203531115538853e-05,
      "loss": 2.421,
      "step": 10259
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0741798877716064,
      "learning_rate": 1.9203370082259833e-05,
      "loss": 2.5021,
      "step": 10260
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.2089911699295044,
      "learning_rate": 1.9203209033378605e-05,
      "loss": 2.5898,
      "step": 10261
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9278324246406555,
      "learning_rate": 1.9203047968895436e-05,
      "loss": 2.7006,
      "step": 10262
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9877657294273376,
      "learning_rate": 1.92028868888106e-05,
      "loss": 2.6291,
      "step": 10263
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.1345946788787842,
      "learning_rate": 1.9202725793124376e-05,
      "loss": 2.4494,
      "step": 10264
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.136228322982788,
      "learning_rate": 1.920256468183703e-05,
      "loss": 2.7027,
      "step": 10265
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0482043027877808,
      "learning_rate": 1.9202403554948837e-05,
      "loss": 2.6147,
      "step": 10266
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.099366307258606,
      "learning_rate": 1.9202242412460067e-05,
      "loss": 2.5652,
      "step": 10267
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9467266201972961,
      "learning_rate": 1.9202081254371002e-05,
      "loss": 2.5899,
      "step": 10268
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.1290240287780762,
      "learning_rate": 1.920192008068191e-05,
      "loss": 2.5738,
      "step": 10269
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0188332796096802,
      "learning_rate": 1.920175889139306e-05,
      "loss": 2.7079,
      "step": 10270
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9986333250999451,
      "learning_rate": 1.920159768650473e-05,
      "loss": 2.6909,
      "step": 10271
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.082288146018982,
      "learning_rate": 1.92014364660172e-05,
      "loss": 2.6799,
      "step": 10272
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.125101089477539,
      "learning_rate": 1.920127522993073e-05,
      "loss": 2.5627,
      "step": 10273
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.7004634141921997,
      "learning_rate": 1.9201113978245602e-05,
      "loss": 2.2724,
      "step": 10274
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0385876893997192,
      "learning_rate": 1.920095271096208e-05,
      "loss": 2.375,
      "step": 10275
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9968235492706299,
      "learning_rate": 1.9200791428080453e-05,
      "loss": 2.6771,
      "step": 10276
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0295284986495972,
      "learning_rate": 1.920063012960098e-05,
      "loss": 2.4747,
      "step": 10277
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0057718753814697,
      "learning_rate": 1.9200468815523944e-05,
      "loss": 2.831,
      "step": 10278
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.017061471939087,
      "learning_rate": 1.9200307485849614e-05,
      "loss": 2.3945,
      "step": 10279
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9572162628173828,
      "learning_rate": 1.9200146140578262e-05,
      "loss": 2.4272,
      "step": 10280
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.910519540309906,
      "learning_rate": 1.9199984779710164e-05,
      "loss": 2.3947,
      "step": 10281
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0538618564605713,
      "learning_rate": 1.9199823403245594e-05,
      "loss": 2.2567,
      "step": 10282
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0642023086547852,
      "learning_rate": 1.9199662011184824e-05,
      "loss": 2.7367,
      "step": 10283
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.107409119606018,
      "learning_rate": 1.9199500603528127e-05,
      "loss": 2.4644,
      "step": 10284
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9511569738388062,
      "learning_rate": 1.919933918027578e-05,
      "loss": 2.6684,
      "step": 10285
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.3543412685394287,
      "learning_rate": 1.9199177741428055e-05,
      "loss": 2.6279,
      "step": 10286
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.3420013189315796,
      "learning_rate": 1.9199016286985222e-05,
      "loss": 2.6599,
      "step": 10287
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.2591246366500854,
      "learning_rate": 1.919885481694756e-05,
      "loss": 2.5686,
      "step": 10288
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0409332513809204,
      "learning_rate": 1.9198693331315342e-05,
      "loss": 2.5152,
      "step": 10289
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.3114781379699707,
      "learning_rate": 1.919853183008884e-05,
      "loss": 2.6069,
      "step": 10290
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0147337913513184,
      "learning_rate": 1.9198370313268326e-05,
      "loss": 2.3518,
      "step": 10291
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.008387804031372,
      "learning_rate": 1.919820878085408e-05,
      "loss": 2.7396,
      "step": 10292
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9647898077964783,
      "learning_rate": 1.919804723284637e-05,
      "loss": 2.527,
      "step": 10293
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0560336112976074,
      "learning_rate": 1.9197885669245472e-05,
      "loss": 2.5405,
      "step": 10294
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.911172091960907,
      "learning_rate": 1.9197724090051657e-05,
      "loss": 2.8808,
      "step": 10295
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0641241073608398,
      "learning_rate": 1.9197562495265206e-05,
      "loss": 2.4087,
      "step": 10296
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0523850917816162,
      "learning_rate": 1.9197400884886382e-05,
      "loss": 2.6781,
      "step": 10297
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9730545878410339,
      "learning_rate": 1.919723925891547e-05,
      "loss": 2.3766,
      "step": 10298
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9143658876419067,
      "learning_rate": 1.9197077617352743e-05,
      "loss": 2.5788,
      "step": 10299
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9646704792976379,
      "learning_rate": 1.9196915960198467e-05,
      "loss": 2.5598,
      "step": 10300
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0419529676437378,
      "learning_rate": 1.9196754287452923e-05,
      "loss": 2.5559,
      "step": 10301
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.8458824157714844,
      "learning_rate": 1.9196592599116378e-05,
      "loss": 2.5664,
      "step": 10302
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9256242513656616,
      "learning_rate": 1.919643089518911e-05,
      "loss": 2.6793,
      "step": 10303
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9639269113540649,
      "learning_rate": 1.9196269175671404e-05,
      "loss": 2.6327,
      "step": 10304
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0231741666793823,
      "learning_rate": 1.9196107440563516e-05,
      "loss": 2.3951,
      "step": 10305
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0990999937057495,
      "learning_rate": 1.919594568986573e-05,
      "loss": 2.3339,
      "step": 10306
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.1887315511703491,
      "learning_rate": 1.919578392357832e-05,
      "loss": 2.4017,
      "step": 10307
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9529128670692444,
      "learning_rate": 1.9195622141701554e-05,
      "loss": 2.5795,
      "step": 10308
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.993586003780365,
      "learning_rate": 1.9195460344235718e-05,
      "loss": 2.5503,
      "step": 10309
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9613595604896545,
      "learning_rate": 1.9195298531181073e-05,
      "loss": 2.3647,
      "step": 10310
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0125718116760254,
      "learning_rate": 1.9195136702537902e-05,
      "loss": 2.764,
      "step": 10311
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9565983414649963,
      "learning_rate": 1.919497485830648e-05,
      "loss": 2.5073,
      "step": 10312
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.2094745635986328,
      "learning_rate": 1.9194812998487074e-05,
      "loss": 2.6669,
      "step": 10313
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0324664115905762,
      "learning_rate": 1.9194651123079963e-05,
      "loss": 2.3596,
      "step": 10314
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.1019476652145386,
      "learning_rate": 1.9194489232085422e-05,
      "loss": 2.5702,
      "step": 10315
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0101341009140015,
      "learning_rate": 1.9194327325503724e-05,
      "loss": 2.5383,
      "step": 10316
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0514326095581055,
      "learning_rate": 1.9194165403335145e-05,
      "loss": 2.5779,
      "step": 10317
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.078021764755249,
      "learning_rate": 1.919400346557996e-05,
      "loss": 2.4881,
      "step": 10318
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9600306749343872,
      "learning_rate": 1.919384151223844e-05,
      "loss": 2.695,
      "step": 10319
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0450671911239624,
      "learning_rate": 1.919367954331086e-05,
      "loss": 2.6942,
      "step": 10320
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0521501302719116,
      "learning_rate": 1.91935175587975e-05,
      "loss": 2.4746,
      "step": 10321
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.246606469154358,
      "learning_rate": 1.919335555869863e-05,
      "loss": 2.5131,
      "step": 10322
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9804206490516663,
      "learning_rate": 1.9193193543014525e-05,
      "loss": 2.5505,
      "step": 10323
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.8993239402770996,
      "learning_rate": 1.919303151174546e-05,
      "loss": 2.4905,
      "step": 10324
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0701844692230225,
      "learning_rate": 1.9192869464891708e-05,
      "loss": 2.5915,
      "step": 10325
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9738999605178833,
      "learning_rate": 1.9192707402453545e-05,
      "loss": 2.5188,
      "step": 10326
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.1990783214569092,
      "learning_rate": 1.919254532443125e-05,
      "loss": 2.5446,
      "step": 10327
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.8892751336097717,
      "learning_rate": 1.9192383230825094e-05,
      "loss": 2.6537,
      "step": 10328
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0486960411071777,
      "learning_rate": 1.9192221121635347e-05,
      "loss": 2.8039,
      "step": 10329
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0117586851119995,
      "learning_rate": 1.9192058996862292e-05,
      "loss": 2.4326,
      "step": 10330
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.943372905254364,
      "learning_rate": 1.9191896856506202e-05,
      "loss": 2.6567,
      "step": 10331
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0717865228652954,
      "learning_rate": 1.9191734700567348e-05,
      "loss": 2.5405,
      "step": 10332
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0307888984680176,
      "learning_rate": 1.919157252904601e-05,
      "loss": 2.7658,
      "step": 10333
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0690715312957764,
      "learning_rate": 1.9191410341942455e-05,
      "loss": 2.7313,
      "step": 10334
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9422328472137451,
      "learning_rate": 1.9191248139256965e-05,
      "loss": 2.5641,
      "step": 10335
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0032894611358643,
      "learning_rate": 1.9191085920989815e-05,
      "loss": 2.367,
      "step": 10336
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.1717175245285034,
      "learning_rate": 1.9190923687141278e-05,
      "loss": 2.7789,
      "step": 10337
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0629515647888184,
      "learning_rate": 1.9190761437711627e-05,
      "loss": 2.6111,
      "step": 10338
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.048816204071045,
      "learning_rate": 1.919059917270114e-05,
      "loss": 2.4937,
      "step": 10339
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9596903324127197,
      "learning_rate": 1.9190436892110093e-05,
      "loss": 2.4848,
      "step": 10340
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.8859308362007141,
      "learning_rate": 1.9190274595938758e-05,
      "loss": 2.5679,
      "step": 10341
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.06343412399292,
      "learning_rate": 1.919011228418741e-05,
      "loss": 2.5976,
      "step": 10342
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.105288028717041,
      "learning_rate": 1.918994995685633e-05,
      "loss": 2.6173,
      "step": 10343
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.116883397102356,
      "learning_rate": 1.9189787613945782e-05,
      "loss": 2.6205,
      "step": 10344
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0207940340042114,
      "learning_rate": 1.9189625255456055e-05,
      "loss": 2.3041,
      "step": 10345
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0347353219985962,
      "learning_rate": 1.9189462881387414e-05,
      "loss": 2.7042,
      "step": 10346
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0541127920150757,
      "learning_rate": 1.918930049174014e-05,
      "loss": 2.447,
      "step": 10347
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.1739051342010498,
      "learning_rate": 1.91891380865145e-05,
      "loss": 2.4781,
      "step": 10348
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9710239171981812,
      "learning_rate": 1.918897566571078e-05,
      "loss": 2.415,
      "step": 10349
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9854431748390198,
      "learning_rate": 1.918881322932925e-05,
      "loss": 2.6067,
      "step": 10350
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0011001825332642,
      "learning_rate": 1.9188650777370184e-05,
      "loss": 2.4467,
      "step": 10351
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9918697476387024,
      "learning_rate": 1.9188488309833862e-05,
      "loss": 2.5893,
      "step": 10352
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9816539287567139,
      "learning_rate": 1.9188325826720557e-05,
      "loss": 2.6086,
      "step": 10353
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.061082124710083,
      "learning_rate": 1.9188163328030543e-05,
      "loss": 2.3889,
      "step": 10354
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0433101654052734,
      "learning_rate": 1.91880008137641e-05,
      "loss": 2.6072,
      "step": 10355
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0469911098480225,
      "learning_rate": 1.9187838283921493e-05,
      "loss": 2.6171,
      "step": 10356
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0561909675598145,
      "learning_rate": 1.918767573850301e-05,
      "loss": 2.5233,
      "step": 10357
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0088587999343872,
      "learning_rate": 1.9187513177508918e-05,
      "loss": 2.47,
      "step": 10358
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0133814811706543,
      "learning_rate": 1.91873506009395e-05,
      "loss": 2.6249,
      "step": 10359
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9563086032867432,
      "learning_rate": 1.9187188008795026e-05,
      "loss": 2.6523,
      "step": 10360
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9915754199028015,
      "learning_rate": 1.9187025401075772e-05,
      "loss": 2.417,
      "step": 10361
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0667533874511719,
      "learning_rate": 1.9186862777782014e-05,
      "loss": 2.6356,
      "step": 10362
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0057814121246338,
      "learning_rate": 1.918670013891403e-05,
      "loss": 2.4758,
      "step": 10363
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0171993970870972,
      "learning_rate": 1.9186537484472093e-05,
      "loss": 2.3329,
      "step": 10364
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0752537250518799,
      "learning_rate": 1.9186374814456482e-05,
      "loss": 2.717,
      "step": 10365
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0243628025054932,
      "learning_rate": 1.918621212886747e-05,
      "loss": 2.6542,
      "step": 10366
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.3777190446853638,
      "learning_rate": 1.9186049427705332e-05,
      "loss": 2.5161,
      "step": 10367
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.240177035331726,
      "learning_rate": 1.918588671097035e-05,
      "loss": 2.5293,
      "step": 10368
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0181840658187866,
      "learning_rate": 1.9185723978662787e-05,
      "loss": 2.7085,
      "step": 10369
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9866178631782532,
      "learning_rate": 1.9185561230782934e-05,
      "loss": 2.3681,
      "step": 10370
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9728995561599731,
      "learning_rate": 1.9185398467331054e-05,
      "loss": 2.8061,
      "step": 10371
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.919176459312439,
      "learning_rate": 1.9185235688307433e-05,
      "loss": 2.4705,
      "step": 10372
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0160294771194458,
      "learning_rate": 1.918507289371234e-05,
      "loss": 2.6214,
      "step": 10373
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.1472817659378052,
      "learning_rate": 1.918491008354606e-05,
      "loss": 2.3626,
      "step": 10374
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0485239028930664,
      "learning_rate": 1.9184747257808858e-05,
      "loss": 2.6008,
      "step": 10375
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9699440002441406,
      "learning_rate": 1.9184584416501017e-05,
      "loss": 2.6254,
      "step": 10376
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0801074504852295,
      "learning_rate": 1.918442155962281e-05,
      "loss": 2.4827,
      "step": 10377
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0231965780258179,
      "learning_rate": 1.918425868717451e-05,
      "loss": 2.6347,
      "step": 10378
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0827727317810059,
      "learning_rate": 1.91840957991564e-05,
      "loss": 2.5544,
      "step": 10379
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.2723093032836914,
      "learning_rate": 1.9183932895568757e-05,
      "loss": 2.4339,
      "step": 10380
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.1177870035171509,
      "learning_rate": 1.918376997641185e-05,
      "loss": 2.6295,
      "step": 10381
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0356240272521973,
      "learning_rate": 1.918360704168596e-05,
      "loss": 2.5389,
      "step": 10382
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.172776222229004,
      "learning_rate": 1.918344409139136e-05,
      "loss": 2.6709,
      "step": 10383
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.1771166324615479,
      "learning_rate": 1.918328112552833e-05,
      "loss": 2.8398,
      "step": 10384
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9446939826011658,
      "learning_rate": 1.9183118144097142e-05,
      "loss": 2.7365,
      "step": 10385
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0417308807373047,
      "learning_rate": 1.9182955147098075e-05,
      "loss": 2.3661,
      "step": 10386
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9945443868637085,
      "learning_rate": 1.9182792134531407e-05,
      "loss": 2.4645,
      "step": 10387
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9070518016815186,
      "learning_rate": 1.918262910639741e-05,
      "loss": 2.6327,
      "step": 10388
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.116194725036621,
      "learning_rate": 1.9182466062696364e-05,
      "loss": 2.4491,
      "step": 10389
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0741089582443237,
      "learning_rate": 1.918230300342854e-05,
      "loss": 2.9024,
      "step": 10390
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.1953562498092651,
      "learning_rate": 1.9182139928594225e-05,
      "loss": 2.5424,
      "step": 10391
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.183512806892395,
      "learning_rate": 1.918197683819369e-05,
      "loss": 2.5263,
      "step": 10392
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9425988793373108,
      "learning_rate": 1.9181813732227205e-05,
      "loss": 2.585,
      "step": 10393
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.068112850189209,
      "learning_rate": 1.9181650610695054e-05,
      "loss": 2.4608,
      "step": 10394
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0659799575805664,
      "learning_rate": 1.918148747359751e-05,
      "loss": 2.5975,
      "step": 10395
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9867761135101318,
      "learning_rate": 1.9181324320934852e-05,
      "loss": 2.5114,
      "step": 10396
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0268661975860596,
      "learning_rate": 1.9181161152707357e-05,
      "loss": 2.6774,
      "step": 10397
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.1505261659622192,
      "learning_rate": 1.91809979689153e-05,
      "loss": 2.6862,
      "step": 10398
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.1279873847961426,
      "learning_rate": 1.9180834769558958e-05,
      "loss": 2.5469,
      "step": 10399
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0357797145843506,
      "learning_rate": 1.9180671554638605e-05,
      "loss": 2.6532,
      "step": 10400
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.076790690422058,
      "learning_rate": 1.9180508324154524e-05,
      "loss": 2.8364,
      "step": 10401
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.1699544191360474,
      "learning_rate": 1.9180345078106985e-05,
      "loss": 2.6322,
      "step": 10402
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.1283698081970215,
      "learning_rate": 1.918018181649627e-05,
      "loss": 2.5388,
      "step": 10403
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9295153617858887,
      "learning_rate": 1.918001853932265e-05,
      "loss": 2.831,
      "step": 10404
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9526814818382263,
      "learning_rate": 1.9179855246586408e-05,
      "loss": 2.5966,
      "step": 10405
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0317509174346924,
      "learning_rate": 1.917969193828782e-05,
      "loss": 2.528,
      "step": 10406
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.2068464756011963,
      "learning_rate": 1.9179528614427158e-05,
      "loss": 2.2854,
      "step": 10407
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.136793613433838,
      "learning_rate": 1.9179365275004702e-05,
      "loss": 2.7904,
      "step": 10408
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.1701326370239258,
      "learning_rate": 1.917920192002073e-05,
      "loss": 2.8376,
      "step": 10409
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0130245685577393,
      "learning_rate": 1.917903854947552e-05,
      "loss": 2.5709,
      "step": 10410
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0253721475601196,
      "learning_rate": 1.917887516336934e-05,
      "loss": 2.5469,
      "step": 10411
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.1421037912368774,
      "learning_rate": 1.917871176170248e-05,
      "loss": 2.4703,
      "step": 10412
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9991397261619568,
      "learning_rate": 1.917854834447521e-05,
      "loss": 2.5565,
      "step": 10413
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0833876132965088,
      "learning_rate": 1.9178384911687804e-05,
      "loss": 2.79,
      "step": 10414
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9557576775550842,
      "learning_rate": 1.9178221463340544e-05,
      "loss": 2.624,
      "step": 10415
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9476280212402344,
      "learning_rate": 1.9178057999433708e-05,
      "loss": 2.6565,
      "step": 10416
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0267590284347534,
      "learning_rate": 1.9177894519967568e-05,
      "loss": 2.71,
      "step": 10417
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9837409853935242,
      "learning_rate": 1.9177731024942406e-05,
      "loss": 2.6093,
      "step": 10418
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0297987461090088,
      "learning_rate": 1.9177567514358498e-05,
      "loss": 2.6118,
      "step": 10419
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.914928138256073,
      "learning_rate": 1.9177403988216116e-05,
      "loss": 2.5518,
      "step": 10420
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.999242901802063,
      "learning_rate": 1.917724044651555e-05,
      "loss": 2.6035,
      "step": 10421
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0191986560821533,
      "learning_rate": 1.917707688925706e-05,
      "loss": 2.3778,
      "step": 10422
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.1087284088134766,
      "learning_rate": 1.9176913316440936e-05,
      "loss": 2.5677,
      "step": 10423
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9413615465164185,
      "learning_rate": 1.9176749728067454e-05,
      "loss": 2.7644,
      "step": 10424
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.1108866930007935,
      "learning_rate": 1.9176586124136887e-05,
      "loss": 2.5023,
      "step": 10425
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.1841834783554077,
      "learning_rate": 1.9176422504649513e-05,
      "loss": 2.4835,
      "step": 10426
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0926513671875,
      "learning_rate": 1.917625886960561e-05,
      "loss": 2.651,
      "step": 10427
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9413369297981262,
      "learning_rate": 1.9176095219005458e-05,
      "loss": 2.4638,
      "step": 10428
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.943778395652771,
      "learning_rate": 1.9175931552849333e-05,
      "loss": 2.5961,
      "step": 10429
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.927425742149353,
      "learning_rate": 1.917576787113751e-05,
      "loss": 2.5421,
      "step": 10430
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9634019732475281,
      "learning_rate": 1.9175604173870268e-05,
      "loss": 2.7989,
      "step": 10431
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9367138147354126,
      "learning_rate": 1.9175440461047886e-05,
      "loss": 2.5845,
      "step": 10432
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9169178605079651,
      "learning_rate": 1.917527673267064e-05,
      "loss": 2.3985,
      "step": 10433
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0197585821151733,
      "learning_rate": 1.917511298873881e-05,
      "loss": 2.5443,
      "step": 10434
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.040601134300232,
      "learning_rate": 1.9174949229252668e-05,
      "loss": 2.5352,
      "step": 10435
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0760165452957153,
      "learning_rate": 1.9174785454212497e-05,
      "loss": 2.5883,
      "step": 10436
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0532246828079224,
      "learning_rate": 1.9174621663618573e-05,
      "loss": 2.5222,
      "step": 10437
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9637717008590698,
      "learning_rate": 1.917445785747117e-05,
      "loss": 2.6398,
      "step": 10438
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.085994839668274,
      "learning_rate": 1.9174294035770575e-05,
      "loss": 2.6355,
      "step": 10439
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.1639295816421509,
      "learning_rate": 1.9174130198517057e-05,
      "loss": 2.5794,
      "step": 10440
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9581688046455383,
      "learning_rate": 1.9173966345710897e-05,
      "loss": 2.4237,
      "step": 10441
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.007796049118042,
      "learning_rate": 1.917380247735237e-05,
      "loss": 2.3654,
      "step": 10442
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9761484265327454,
      "learning_rate": 1.9173638593441762e-05,
      "loss": 2.745,
      "step": 10443
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9844896793365479,
      "learning_rate": 1.9173474693979343e-05,
      "loss": 2.4318,
      "step": 10444
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.1518739461898804,
      "learning_rate": 1.9173310778965392e-05,
      "loss": 2.551,
      "step": 10445
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9853463768959045,
      "learning_rate": 1.9173146848400184e-05,
      "loss": 2.5202,
      "step": 10446
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9898627400398254,
      "learning_rate": 1.9172982902284007e-05,
      "loss": 2.7284,
      "step": 10447
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.057628870010376,
      "learning_rate": 1.9172818940617128e-05,
      "loss": 2.4648,
      "step": 10448
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.1121114492416382,
      "learning_rate": 1.917265496339983e-05,
      "loss": 2.549,
      "step": 10449
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.1339670419692993,
      "learning_rate": 1.9172490970632394e-05,
      "loss": 2.7171,
      "step": 10450
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9235043525695801,
      "learning_rate": 1.917232696231509e-05,
      "loss": 2.3331,
      "step": 10451
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0956395864486694,
      "learning_rate": 1.9172162938448204e-05,
      "loss": 2.6019,
      "step": 10452
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.596468210220337,
      "learning_rate": 1.9171998899032012e-05,
      "loss": 2.6946,
      "step": 10453
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.020149827003479,
      "learning_rate": 1.9171834844066785e-05,
      "loss": 2.4813,
      "step": 10454
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9889047145843506,
      "learning_rate": 1.917167077355281e-05,
      "loss": 2.4604,
      "step": 10455
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.046352744102478,
      "learning_rate": 1.9171506687490366e-05,
      "loss": 2.6482,
      "step": 10456
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9826037287712097,
      "learning_rate": 1.9171342585879725e-05,
      "loss": 2.4183,
      "step": 10457
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0205278396606445,
      "learning_rate": 1.9171178468721162e-05,
      "loss": 2.4723,
      "step": 10458
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0305653810501099,
      "learning_rate": 1.9171014336014966e-05,
      "loss": 2.4831,
      "step": 10459
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9916089773178101,
      "learning_rate": 1.917085018776141e-05,
      "loss": 2.6178,
      "step": 10460
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0926570892333984,
      "learning_rate": 1.917068602396077e-05,
      "loss": 2.6547,
      "step": 10461
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0497723817825317,
      "learning_rate": 1.9170521844613326e-05,
      "loss": 2.4875,
      "step": 10462
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0257529020309448,
      "learning_rate": 1.917035764971936e-05,
      "loss": 2.6464,
      "step": 10463
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0671272277832031,
      "learning_rate": 1.9170193439279146e-05,
      "loss": 2.7492,
      "step": 10464
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9123449921607971,
      "learning_rate": 1.9170029213292963e-05,
      "loss": 2.6883,
      "step": 10465
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9899309873580933,
      "learning_rate": 1.916986497176109e-05,
      "loss": 2.6819,
      "step": 10466
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0246515274047852,
      "learning_rate": 1.9169700714683802e-05,
      "loss": 2.4845,
      "step": 10467
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0524269342422485,
      "learning_rate": 1.9169536442061384e-05,
      "loss": 2.1813,
      "step": 10468
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0606330633163452,
      "learning_rate": 1.9169372153894113e-05,
      "loss": 2.6765,
      "step": 10469
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0075984001159668,
      "learning_rate": 1.916920785018226e-05,
      "loss": 2.4449,
      "step": 10470
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0424387454986572,
      "learning_rate": 1.9169043530926116e-05,
      "loss": 2.7405,
      "step": 10471
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9136935472488403,
      "learning_rate": 1.916887919612595e-05,
      "loss": 2.5635,
      "step": 10472
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.087633728981018,
      "learning_rate": 1.9168714845782042e-05,
      "loss": 2.5076,
      "step": 10473
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0907529592514038,
      "learning_rate": 1.9168550479894676e-05,
      "loss": 2.5152,
      "step": 10474
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9617814421653748,
      "learning_rate": 1.9168386098464124e-05,
      "loss": 2.2916,
      "step": 10475
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0860483646392822,
      "learning_rate": 1.916822170149067e-05,
      "loss": 2.663,
      "step": 10476
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0584056377410889,
      "learning_rate": 1.916805728897459e-05,
      "loss": 2.4415,
      "step": 10477
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0692044496536255,
      "learning_rate": 1.916789286091616e-05,
      "loss": 2.8127,
      "step": 10478
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0437874794006348,
      "learning_rate": 1.9167728417315663e-05,
      "loss": 2.5916,
      "step": 10479
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.1602474451065063,
      "learning_rate": 1.9167563958173375e-05,
      "loss": 2.6904,
      "step": 10480
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.5489351749420166,
      "learning_rate": 1.916739948348958e-05,
      "loss": 2.4924,
      "step": 10481
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.1467700004577637,
      "learning_rate": 1.916723499326455e-05,
      "loss": 2.7411,
      "step": 10482
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.020919919013977,
      "learning_rate": 1.9167070487498567e-05,
      "loss": 2.6016,
      "step": 10483
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9892421364784241,
      "learning_rate": 1.916690596619191e-05,
      "loss": 2.4851,
      "step": 10484
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0093873739242554,
      "learning_rate": 1.9166741429344856e-05,
      "loss": 2.5623,
      "step": 10485
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9585354328155518,
      "learning_rate": 1.916657687695769e-05,
      "loss": 2.6946,
      "step": 10486
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0568768978118896,
      "learning_rate": 1.9166412309030683e-05,
      "loss": 2.4691,
      "step": 10487
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.1196342706680298,
      "learning_rate": 1.916624772556412e-05,
      "loss": 2.4177,
      "step": 10488
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9774077534675598,
      "learning_rate": 1.9166083126558275e-05,
      "loss": 2.4285,
      "step": 10489
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9949453473091125,
      "learning_rate": 1.9165918512013434e-05,
      "loss": 2.6217,
      "step": 10490
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0556113719940186,
      "learning_rate": 1.9165753881929868e-05,
      "loss": 2.6892,
      "step": 10491
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0220404863357544,
      "learning_rate": 1.916558923630786e-05,
      "loss": 2.6422,
      "step": 10492
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.052907109260559,
      "learning_rate": 1.9165424575147687e-05,
      "loss": 2.5452,
      "step": 10493
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.8740325570106506,
      "learning_rate": 1.9165259898449635e-05,
      "loss": 2.5257,
      "step": 10494
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0135053396224976,
      "learning_rate": 1.9165095206213976e-05,
      "loss": 2.489,
      "step": 10495
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.987623929977417,
      "learning_rate": 1.916493049844099e-05,
      "loss": 2.4792,
      "step": 10496
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0280983448028564,
      "learning_rate": 1.916476577513096e-05,
      "loss": 2.748,
      "step": 10497
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9900841116905212,
      "learning_rate": 1.916460103628416e-05,
      "loss": 2.524,
      "step": 10498
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9941661953926086,
      "learning_rate": 1.9164436281900873e-05,
      "loss": 2.6738,
      "step": 10499
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.037089228630066,
      "learning_rate": 1.9164271511981377e-05,
      "loss": 2.598,
      "step": 10500
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0582075119018555,
      "learning_rate": 1.9164106726525954e-05,
      "loss": 2.8445,
      "step": 10501
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0012120008468628,
      "learning_rate": 1.916394192553488e-05,
      "loss": 2.5794,
      "step": 10502
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.1339879035949707,
      "learning_rate": 1.916377710900844e-05,
      "loss": 2.4121,
      "step": 10503
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0164955854415894,
      "learning_rate": 1.9163612276946902e-05,
      "loss": 2.6306,
      "step": 10504
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.8799725770950317,
      "learning_rate": 1.9163447429350554e-05,
      "loss": 2.523,
      "step": 10505
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.081013560295105,
      "learning_rate": 1.9163282566219676e-05,
      "loss": 2.5459,
      "step": 10506
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.011636734008789,
      "learning_rate": 1.9163117687554546e-05,
      "loss": 2.4666,
      "step": 10507
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0751338005065918,
      "learning_rate": 1.916295279335544e-05,
      "loss": 2.6669,
      "step": 10508
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9864461421966553,
      "learning_rate": 1.9162787883622643e-05,
      "loss": 2.7328,
      "step": 10509
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.1585086584091187,
      "learning_rate": 1.916262295835643e-05,
      "loss": 2.6734,
      "step": 10510
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.1103174686431885,
      "learning_rate": 1.9162458017557082e-05,
      "loss": 2.6157,
      "step": 10511
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.1124061346054077,
      "learning_rate": 1.916229306122488e-05,
      "loss": 2.5961,
      "step": 10512
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.104546070098877,
      "learning_rate": 1.9162128089360103e-05,
      "loss": 2.4379,
      "step": 10513
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0310337543487549,
      "learning_rate": 1.916196310196303e-05,
      "loss": 2.568,
      "step": 10514
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.95514976978302,
      "learning_rate": 1.9161798099033945e-05,
      "loss": 2.5833,
      "step": 10515
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9674519300460815,
      "learning_rate": 1.9161633080573118e-05,
      "loss": 2.4813,
      "step": 10516
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9548211097717285,
      "learning_rate": 1.9161468046580837e-05,
      "loss": 2.5784,
      "step": 10517
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.1194031238555908,
      "learning_rate": 1.916130299705738e-05,
      "loss": 2.4047,
      "step": 10518
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0130906105041504,
      "learning_rate": 1.9161137932003027e-05,
      "loss": 2.5103,
      "step": 10519
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0151121616363525,
      "learning_rate": 1.9160972851418056e-05,
      "loss": 2.4705,
      "step": 10520
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9961034655570984,
      "learning_rate": 1.916080775530275e-05,
      "loss": 2.3739,
      "step": 10521
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9233102798461914,
      "learning_rate": 1.916064264365738e-05,
      "loss": 2.538,
      "step": 10522
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.1026716232299805,
      "learning_rate": 1.916047751648224e-05,
      "loss": 2.4901,
      "step": 10523
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0471887588500977,
      "learning_rate": 1.91603123737776e-05,
      "loss": 2.6039,
      "step": 10524
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9512073397636414,
      "learning_rate": 1.9160147215543744e-05,
      "loss": 2.4847,
      "step": 10525
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0543746948242188,
      "learning_rate": 1.9159982041780947e-05,
      "loss": 2.8251,
      "step": 10526
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9959934949874878,
      "learning_rate": 1.9159816852489496e-05,
      "loss": 2.432,
      "step": 10527
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0567764043807983,
      "learning_rate": 1.9159651647669667e-05,
      "loss": 2.5879,
      "step": 10528
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9671300053596497,
      "learning_rate": 1.9159486427321742e-05,
      "loss": 2.4179,
      "step": 10529
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0326695442199707,
      "learning_rate": 1.9159321191445996e-05,
      "loss": 2.4527,
      "step": 10530
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.079963207244873,
      "learning_rate": 1.9159155940042716e-05,
      "loss": 2.493,
      "step": 10531
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.044742465019226,
      "learning_rate": 1.9158990673112177e-05,
      "loss": 2.3691,
      "step": 10532
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0447657108306885,
      "learning_rate": 1.9158825390654664e-05,
      "loss": 2.498,
      "step": 10533
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.2003979682922363,
      "learning_rate": 1.915866009267045e-05,
      "loss": 2.6306,
      "step": 10534
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.1622668504714966,
      "learning_rate": 1.9158494779159824e-05,
      "loss": 2.6156,
      "step": 10535
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.078626275062561,
      "learning_rate": 1.915832945012306e-05,
      "loss": 2.566,
      "step": 10536
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0127615928649902,
      "learning_rate": 1.915816410556044e-05,
      "loss": 2.5844,
      "step": 10537
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.1870875358581543,
      "learning_rate": 1.915799874547225e-05,
      "loss": 2.4684,
      "step": 10538
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9314553141593933,
      "learning_rate": 1.9157833369858758e-05,
      "loss": 2.5447,
      "step": 10539
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9917140603065491,
      "learning_rate": 1.9157667978720253e-05,
      "loss": 2.6697,
      "step": 10540
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.213065505027771,
      "learning_rate": 1.9157502572057012e-05,
      "loss": 2.592,
      "step": 10541
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9648480415344238,
      "learning_rate": 1.915733714986932e-05,
      "loss": 2.6227,
      "step": 10542
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0061155557632446,
      "learning_rate": 1.9157171712157452e-05,
      "loss": 2.5294,
      "step": 10543
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9416047930717468,
      "learning_rate": 1.9157006258921695e-05,
      "loss": 2.5152,
      "step": 10544
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9951382875442505,
      "learning_rate": 1.915684079016232e-05,
      "loss": 2.523,
      "step": 10545
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0004774332046509,
      "learning_rate": 1.9156675305879617e-05,
      "loss": 2.5943,
      "step": 10546
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.1983392238616943,
      "learning_rate": 1.9156509806073863e-05,
      "loss": 2.4992,
      "step": 10547
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.8780987858772278,
      "learning_rate": 1.9156344290745335e-05,
      "loss": 2.5508,
      "step": 10548
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9312491416931152,
      "learning_rate": 1.915617875989432e-05,
      "loss": 2.4328,
      "step": 10549
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.033380150794983,
      "learning_rate": 1.915601321352109e-05,
      "loss": 2.5668,
      "step": 10550
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9929112792015076,
      "learning_rate": 1.9155847651625935e-05,
      "loss": 2.5872,
      "step": 10551
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0125575065612793,
      "learning_rate": 1.915568207420913e-05,
      "loss": 2.5391,
      "step": 10552
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9647542238235474,
      "learning_rate": 1.9155516481270958e-05,
      "loss": 2.2799,
      "step": 10553
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9688026309013367,
      "learning_rate": 1.9155350872811697e-05,
      "loss": 2.7066,
      "step": 10554
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.098237156867981,
      "learning_rate": 1.915518524883163e-05,
      "loss": 2.7738,
      "step": 10555
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.011674165725708,
      "learning_rate": 1.9155019609331037e-05,
      "loss": 2.566,
      "step": 10556
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0556995868682861,
      "learning_rate": 1.9154853954310205e-05,
      "loss": 2.6318,
      "step": 10557
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.002830982208252,
      "learning_rate": 1.9154688283769402e-05,
      "loss": 2.4282,
      "step": 10558
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9770310521125793,
      "learning_rate": 1.9154522597708922e-05,
      "loss": 2.6033,
      "step": 10559
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0308741331100464,
      "learning_rate": 1.9154356896129037e-05,
      "loss": 2.655,
      "step": 10560
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9471821188926697,
      "learning_rate": 1.915419117903003e-05,
      "loss": 2.6377,
      "step": 10561
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0156172513961792,
      "learning_rate": 1.9154025446412182e-05,
      "loss": 2.5961,
      "step": 10562
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9746588468551636,
      "learning_rate": 1.9153859698275775e-05,
      "loss": 2.5383,
      "step": 10563
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9886695146560669,
      "learning_rate": 1.915369393462109e-05,
      "loss": 2.1352,
      "step": 10564
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.1922674179077148,
      "learning_rate": 1.915352815544841e-05,
      "loss": 2.2428,
      "step": 10565
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0361803770065308,
      "learning_rate": 1.915336236075801e-05,
      "loss": 2.3463,
      "step": 10566
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.084966778755188,
      "learning_rate": 1.915319655055018e-05,
      "loss": 2.7045,
      "step": 10567
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0198498964309692,
      "learning_rate": 1.915303072482519e-05,
      "loss": 2.696,
      "step": 10568
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9992647171020508,
      "learning_rate": 1.915286488358333e-05,
      "loss": 2.6271,
      "step": 10569
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.17771315574646,
      "learning_rate": 1.915269902682488e-05,
      "loss": 2.5914,
      "step": 10570
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.1542385816574097,
      "learning_rate": 1.9152533154550116e-05,
      "loss": 2.582,
      "step": 10571
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9546933770179749,
      "learning_rate": 1.9152367266759325e-05,
      "loss": 2.6655,
      "step": 10572
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.1077834367752075,
      "learning_rate": 1.9152201363452783e-05,
      "loss": 2.5064,
      "step": 10573
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9272311329841614,
      "learning_rate": 1.9152035444630777e-05,
      "loss": 2.3855,
      "step": 10574
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9801915287971497,
      "learning_rate": 1.9151869510293583e-05,
      "loss": 2.3161,
      "step": 10575
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0046370029449463,
      "learning_rate": 1.9151703560441484e-05,
      "loss": 2.6451,
      "step": 10576
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0561902523040771,
      "learning_rate": 1.9151537595074763e-05,
      "loss": 2.5874,
      "step": 10577
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0151420831680298,
      "learning_rate": 1.9151371614193702e-05,
      "loss": 2.6957,
      "step": 10578
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.1058552265167236,
      "learning_rate": 1.915120561779858e-05,
      "loss": 2.5646,
      "step": 10579
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0875136852264404,
      "learning_rate": 1.9151039605889677e-05,
      "loss": 2.5487,
      "step": 10580
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9946866631507874,
      "learning_rate": 1.9150873578467276e-05,
      "loss": 2.5462,
      "step": 10581
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9384557008743286,
      "learning_rate": 1.9150707535531663e-05,
      "loss": 2.5239,
      "step": 10582
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9114360213279724,
      "learning_rate": 1.915054147708311e-05,
      "loss": 2.3993,
      "step": 10583
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.056329607963562,
      "learning_rate": 1.915037540312191e-05,
      "loss": 2.6067,
      "step": 10584
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.026757001876831,
      "learning_rate": 1.9150209313648335e-05,
      "loss": 2.5346,
      "step": 10585
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.1664601564407349,
      "learning_rate": 1.915004320866267e-05,
      "loss": 2.3305,
      "step": 10586
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0166828632354736,
      "learning_rate": 1.9149877088165198e-05,
      "loss": 2.6948,
      "step": 10587
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.1746445894241333,
      "learning_rate": 1.9149710952156195e-05,
      "loss": 2.5554,
      "step": 10588
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.008984088897705,
      "learning_rate": 1.9149544800635952e-05,
      "loss": 2.3676,
      "step": 10589
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.256487250328064,
      "learning_rate": 1.9149378633604744e-05,
      "loss": 2.6035,
      "step": 10590
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9738113284111023,
      "learning_rate": 1.914921245106285e-05,
      "loss": 2.3406,
      "step": 10591
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0246293544769287,
      "learning_rate": 1.9149046253010562e-05,
      "loss": 2.7034,
      "step": 10592
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.185907244682312,
      "learning_rate": 1.9148880039448158e-05,
      "loss": 2.6432,
      "step": 10593
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.2822020053863525,
      "learning_rate": 1.9148713810375913e-05,
      "loss": 2.6719,
      "step": 10594
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9605487585067749,
      "learning_rate": 1.9148547565794112e-05,
      "loss": 2.4753,
      "step": 10595
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0366483926773071,
      "learning_rate": 1.914838130570304e-05,
      "loss": 2.4957,
      "step": 10596
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9872549176216125,
      "learning_rate": 1.9148215030102973e-05,
      "loss": 2.3097,
      "step": 10597
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0364940166473389,
      "learning_rate": 1.91480487389942e-05,
      "loss": 2.5915,
      "step": 10598
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0860520601272583,
      "learning_rate": 1.9147882432377005e-05,
      "loss": 2.5085,
      "step": 10599
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9816800355911255,
      "learning_rate": 1.914771611025166e-05,
      "loss": 2.5162,
      "step": 10600
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9843267202377319,
      "learning_rate": 1.9147549772618453e-05,
      "loss": 2.5091,
      "step": 10601
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9871957302093506,
      "learning_rate": 1.914738341947766e-05,
      "loss": 2.6584,
      "step": 10602
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9427940845489502,
      "learning_rate": 1.9147217050829575e-05,
      "loss": 2.604,
      "step": 10603
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9829334616661072,
      "learning_rate": 1.9147050666674467e-05,
      "loss": 2.7695,
      "step": 10604
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.1772723197937012,
      "learning_rate": 1.9146884267012626e-05,
      "loss": 2.4596,
      "step": 10605
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0355502367019653,
      "learning_rate": 1.9146717851844336e-05,
      "loss": 2.7569,
      "step": 10606
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0153427124023438,
      "learning_rate": 1.9146551421169867e-05,
      "loss": 2.5944,
      "step": 10607
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9569417834281921,
      "learning_rate": 1.9146384974989515e-05,
      "loss": 2.4369,
      "step": 10608
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9844401478767395,
      "learning_rate": 1.914621851330355e-05,
      "loss": 2.5302,
      "step": 10609
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9907415509223938,
      "learning_rate": 1.914605203611227e-05,
      "loss": 2.5119,
      "step": 10610
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.004029393196106,
      "learning_rate": 1.914588554341594e-05,
      "loss": 2.6026,
      "step": 10611
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0228610038757324,
      "learning_rate": 1.9145719035214853e-05,
      "loss": 2.4638,
      "step": 10612
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9518954753875732,
      "learning_rate": 1.914555251150929e-05,
      "loss": 2.6557,
      "step": 10613
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9373789429664612,
      "learning_rate": 1.914538597229953e-05,
      "loss": 2.5154,
      "step": 10614
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.1009737253189087,
      "learning_rate": 1.9145219417585857e-05,
      "loss": 2.6143,
      "step": 10615
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9779537916183472,
      "learning_rate": 1.914505284736855e-05,
      "loss": 2.6038,
      "step": 10616
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0228692293167114,
      "learning_rate": 1.91448862616479e-05,
      "loss": 2.561,
      "step": 10617
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9217814207077026,
      "learning_rate": 1.914471966042418e-05,
      "loss": 2.4984,
      "step": 10618
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0775091648101807,
      "learning_rate": 1.914455304369768e-05,
      "loss": 2.6317,
      "step": 10619
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9638999700546265,
      "learning_rate": 1.9144386411468674e-05,
      "loss": 2.4036,
      "step": 10620
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0625423192977905,
      "learning_rate": 1.9144219763737456e-05,
      "loss": 2.6976,
      "step": 10621
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0009263753890991,
      "learning_rate": 1.9144053100504295e-05,
      "loss": 2.5394,
      "step": 10622
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.1306047439575195,
      "learning_rate": 1.9143886421769485e-05,
      "loss": 2.4958,
      "step": 10623
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.4006121158599854,
      "learning_rate": 1.91437197275333e-05,
      "loss": 2.3622,
      "step": 10624
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.06614351272583,
      "learning_rate": 1.9143553017796032e-05,
      "loss": 2.6476,
      "step": 10625
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9898613691329956,
      "learning_rate": 1.9143386292557955e-05,
      "loss": 2.7639,
      "step": 10626
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0373228788375854,
      "learning_rate": 1.9143219551819358e-05,
      "loss": 2.419,
      "step": 10627
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.018265724182129,
      "learning_rate": 1.9143052795580515e-05,
      "loss": 2.5152,
      "step": 10628
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.018338680267334,
      "learning_rate": 1.914288602384172e-05,
      "loss": 2.4496,
      "step": 10629
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.1975297927856445,
      "learning_rate": 1.9142719236603245e-05,
      "loss": 2.523,
      "step": 10630
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0421987771987915,
      "learning_rate": 1.9142552433865384e-05,
      "loss": 2.4647,
      "step": 10631
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.1670221090316772,
      "learning_rate": 1.914238561562841e-05,
      "loss": 2.4968,
      "step": 10632
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.1406420469284058,
      "learning_rate": 1.914221878189261e-05,
      "loss": 2.3948,
      "step": 10633
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0401660203933716,
      "learning_rate": 1.9142051932658265e-05,
      "loss": 2.4974,
      "step": 10634
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.067089319229126,
      "learning_rate": 1.9141885067925662e-05,
      "loss": 2.6573,
      "step": 10635
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0269334316253662,
      "learning_rate": 1.9141718187695074e-05,
      "loss": 2.471,
      "step": 10636
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.5550363063812256,
      "learning_rate": 1.9141551291966798e-05,
      "loss": 2.6129,
      "step": 10637
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.333248257637024,
      "learning_rate": 1.914138438074111e-05,
      "loss": 2.4178,
      "step": 10638
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.047081470489502,
      "learning_rate": 1.9141217454018285e-05,
      "loss": 2.4448,
      "step": 10639
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9125462770462036,
      "learning_rate": 1.914105051179862e-05,
      "loss": 2.5847,
      "step": 10640
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0633212327957153,
      "learning_rate": 1.914088355408239e-05,
      "loss": 2.6484,
      "step": 10641
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.1460164785385132,
      "learning_rate": 1.914071658086988e-05,
      "loss": 2.6473,
      "step": 10642
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.122137427330017,
      "learning_rate": 1.9140549592161372e-05,
      "loss": 2.5074,
      "step": 10643
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.1579803228378296,
      "learning_rate": 1.914038258795715e-05,
      "loss": 2.6947,
      "step": 10644
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0479774475097656,
      "learning_rate": 1.91402155682575e-05,
      "loss": 2.567,
      "step": 10645
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9612206816673279,
      "learning_rate": 1.9140048533062697e-05,
      "loss": 2.4181,
      "step": 10646
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.1898614168167114,
      "learning_rate": 1.9139881482373033e-05,
      "loss": 2.4752,
      "step": 10647
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9983654022216797,
      "learning_rate": 1.913971441618879e-05,
      "loss": 2.5486,
      "step": 10648
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9711160659790039,
      "learning_rate": 1.9139547334510245e-05,
      "loss": 2.4067,
      "step": 10649
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0398099422454834,
      "learning_rate": 1.9139380237337684e-05,
      "loss": 2.3725,
      "step": 10650
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0384252071380615,
      "learning_rate": 1.913921312467139e-05,
      "loss": 2.5078,
      "step": 10651
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9699798822402954,
      "learning_rate": 1.9139045996511654e-05,
      "loss": 2.429,
      "step": 10652
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9929474592208862,
      "learning_rate": 1.913887885285875e-05,
      "loss": 2.5637,
      "step": 10653
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.075336217880249,
      "learning_rate": 1.913871169371296e-05,
      "loss": 2.644,
      "step": 10654
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.99417644739151,
      "learning_rate": 1.9138544519074577e-05,
      "loss": 2.5682,
      "step": 10655
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.048930287361145,
      "learning_rate": 1.9138377328943876e-05,
      "loss": 2.5177,
      "step": 10656
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9179383516311646,
      "learning_rate": 1.9138210123321146e-05,
      "loss": 2.4872,
      "step": 10657
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.104140281677246,
      "learning_rate": 1.9138042902206665e-05,
      "loss": 2.7335,
      "step": 10658
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0382534265518188,
      "learning_rate": 1.9137875665600723e-05,
      "loss": 2.5816,
      "step": 10659
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.050353765487671,
      "learning_rate": 1.91377084135036e-05,
      "loss": 2.591,
      "step": 10660
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9837141633033752,
      "learning_rate": 1.9137541145915574e-05,
      "loss": 2.5186,
      "step": 10661
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0642297267913818,
      "learning_rate": 1.913737386283694e-05,
      "loss": 2.7052,
      "step": 10662
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0218291282653809,
      "learning_rate": 1.9137206564267972e-05,
      "loss": 2.4942,
      "step": 10663
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0552006959915161,
      "learning_rate": 1.913703925020896e-05,
      "loss": 2.4949,
      "step": 10664
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.1802526712417603,
      "learning_rate": 1.9136871920660182e-05,
      "loss": 2.6194,
      "step": 10665
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.172237753868103,
      "learning_rate": 1.9136704575621924e-05,
      "loss": 2.5455,
      "step": 10666
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.928560733795166,
      "learning_rate": 1.9136537215094473e-05,
      "loss": 2.5023,
      "step": 10667
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9105455875396729,
      "learning_rate": 1.913636983907811e-05,
      "loss": 2.7206,
      "step": 10668
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.079824686050415,
      "learning_rate": 1.9136202447573114e-05,
      "loss": 2.4399,
      "step": 10669
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.006514310836792,
      "learning_rate": 1.913603504057978e-05,
      "loss": 2.5803,
      "step": 10670
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0383590459823608,
      "learning_rate": 1.913586761809838e-05,
      "loss": 2.3518,
      "step": 10671
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9373037815093994,
      "learning_rate": 1.9135700180129207e-05,
      "loss": 2.6701,
      "step": 10672
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9782907962799072,
      "learning_rate": 1.913553272667254e-05,
      "loss": 2.7557,
      "step": 10673
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0239346027374268,
      "learning_rate": 1.9135365257728664e-05,
      "loss": 2.3682,
      "step": 10674
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.1415445804595947,
      "learning_rate": 1.913519777329786e-05,
      "loss": 2.6345,
      "step": 10675
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.969096302986145,
      "learning_rate": 1.9135030273380417e-05,
      "loss": 2.5145,
      "step": 10676
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9495989084243774,
      "learning_rate": 1.9134862757976616e-05,
      "loss": 2.7957,
      "step": 10677
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.8992278575897217,
      "learning_rate": 1.9134695227086743e-05,
      "loss": 2.3924,
      "step": 10678
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.116676926612854,
      "learning_rate": 1.913452768071108e-05,
      "loss": 2.6552,
      "step": 10679
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9638752341270447,
      "learning_rate": 1.913436011884991e-05,
      "loss": 2.6347,
      "step": 10680
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0145148038864136,
      "learning_rate": 1.9134192541503523e-05,
      "loss": 2.5489,
      "step": 10681
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9463219046592712,
      "learning_rate": 1.9134024948672194e-05,
      "loss": 2.6476,
      "step": 10682
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9646298289299011,
      "learning_rate": 1.9133857340356218e-05,
      "loss": 2.4733,
      "step": 10683
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9795072078704834,
      "learning_rate": 1.9133689716555866e-05,
      "loss": 2.7565,
      "step": 10684
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9948826432228088,
      "learning_rate": 1.9133522077271434e-05,
      "loss": 2.4233,
      "step": 10685
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.968986988067627,
      "learning_rate": 1.9133354422503198e-05,
      "loss": 2.7968,
      "step": 10686
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9883979558944702,
      "learning_rate": 1.9133186752251448e-05,
      "loss": 2.5784,
      "step": 10687
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.964352011680603,
      "learning_rate": 1.9133019066516464e-05,
      "loss": 2.5517,
      "step": 10688
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.1137830018997192,
      "learning_rate": 1.9132851365298534e-05,
      "loss": 2.753,
      "step": 10689
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.1501737833023071,
      "learning_rate": 1.913268364859794e-05,
      "loss": 2.5015,
      "step": 10690
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.031939148902893,
      "learning_rate": 1.9132515916414966e-05,
      "loss": 2.7874,
      "step": 10691
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0217152833938599,
      "learning_rate": 1.9132348168749903e-05,
      "loss": 2.6523,
      "step": 10692
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.1641731262207031,
      "learning_rate": 1.913218040560302e-05,
      "loss": 2.6775,
      "step": 10693
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9279316663742065,
      "learning_rate": 1.9132012626974616e-05,
      "loss": 2.4329,
      "step": 10694
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0991029739379883,
      "learning_rate": 1.913184483286497e-05,
      "loss": 2.5757,
      "step": 10695
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.2298915386199951,
      "learning_rate": 1.9131677023274367e-05,
      "loss": 2.4745,
      "step": 10696
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0514646768569946,
      "learning_rate": 1.913150919820309e-05,
      "loss": 2.5412,
      "step": 10697
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0140832662582397,
      "learning_rate": 1.913134135765143e-05,
      "loss": 2.6902,
      "step": 10698
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.1161012649536133,
      "learning_rate": 1.913117350161966e-05,
      "loss": 2.3525,
      "step": 10699
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0929945707321167,
      "learning_rate": 1.913100563010807e-05,
      "loss": 2.4665,
      "step": 10700
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0521811246871948,
      "learning_rate": 1.913083774311695e-05,
      "loss": 2.7141,
      "step": 10701
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.025935411453247,
      "learning_rate": 1.913066984064658e-05,
      "loss": 2.5472,
      "step": 10702
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.1497457027435303,
      "learning_rate": 1.9130501922697244e-05,
      "loss": 2.4888,
      "step": 10703
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0469614267349243,
      "learning_rate": 1.9130333989269226e-05,
      "loss": 2.5916,
      "step": 10704
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.167715311050415,
      "learning_rate": 1.9130166040362813e-05,
      "loss": 2.6643,
      "step": 10705
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.013961672782898,
      "learning_rate": 1.912999807597829e-05,
      "loss": 2.5939,
      "step": 10706
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.996596097946167,
      "learning_rate": 1.912983009611594e-05,
      "loss": 2.5272,
      "step": 10707
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0007303953170776,
      "learning_rate": 1.9129662100776047e-05,
      "loss": 2.5414,
      "step": 10708
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0309169292449951,
      "learning_rate": 1.9129494089958898e-05,
      "loss": 2.5179,
      "step": 10709
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0910851955413818,
      "learning_rate": 1.9129326063664776e-05,
      "loss": 2.5333,
      "step": 10710
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9548762440681458,
      "learning_rate": 1.9129158021893967e-05,
      "loss": 2.6184,
      "step": 10711
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.015977382659912,
      "learning_rate": 1.9128989964646758e-05,
      "loss": 2.4555,
      "step": 10712
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9917058348655701,
      "learning_rate": 1.912882189192343e-05,
      "loss": 2.5731,
      "step": 10713
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9665109515190125,
      "learning_rate": 1.912865380372427e-05,
      "loss": 2.7406,
      "step": 10714
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9685423970222473,
      "learning_rate": 1.912848570004956e-05,
      "loss": 2.7101,
      "step": 10715
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0118592977523804,
      "learning_rate": 1.912831758089959e-05,
      "loss": 2.4787,
      "step": 10716
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0583198070526123,
      "learning_rate": 1.9128149446274644e-05,
      "loss": 2.6552,
      "step": 10717
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0536025762557983,
      "learning_rate": 1.9127981296175006e-05,
      "loss": 2.5801,
      "step": 10718
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.2013676166534424,
      "learning_rate": 1.9127813130600956e-05,
      "loss": 2.8451,
      "step": 10719
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0432289838790894,
      "learning_rate": 1.9127644949552784e-05,
      "loss": 2.6782,
      "step": 10720
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9660575985908508,
      "learning_rate": 1.912747675303078e-05,
      "loss": 2.5412,
      "step": 10721
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9979541301727295,
      "learning_rate": 1.9127308541035218e-05,
      "loss": 2.2639,
      "step": 10722
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0508506298065186,
      "learning_rate": 1.912714031356639e-05,
      "loss": 2.8269,
      "step": 10723
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0117993354797363,
      "learning_rate": 1.9126972070624586e-05,
      "loss": 2.6201,
      "step": 10724
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0470819473266602,
      "learning_rate": 1.912680381221008e-05,
      "loss": 2.706,
      "step": 10725
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0417875051498413,
      "learning_rate": 1.9126635538323163e-05,
      "loss": 2.5164,
      "step": 10726
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0861411094665527,
      "learning_rate": 1.9126467248964122e-05,
      "loss": 2.449,
      "step": 10727
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0352106094360352,
      "learning_rate": 1.9126298944133238e-05,
      "loss": 2.5838,
      "step": 10728
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0182464122772217,
      "learning_rate": 1.9126130623830802e-05,
      "loss": 2.4743,
      "step": 10729
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0036767721176147,
      "learning_rate": 1.9125962288057093e-05,
      "loss": 2.6288,
      "step": 10730
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9906678199768066,
      "learning_rate": 1.9125793936812402e-05,
      "loss": 2.5585,
      "step": 10731
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0018705129623413,
      "learning_rate": 1.912562557009701e-05,
      "loss": 2.4499,
      "step": 10732
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9995020627975464,
      "learning_rate": 1.9125457187911204e-05,
      "loss": 2.4259,
      "step": 10733
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.001729130744934,
      "learning_rate": 1.9125288790255267e-05,
      "loss": 2.3775,
      "step": 10734
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.1111425161361694,
      "learning_rate": 1.9125120377129492e-05,
      "loss": 2.5319,
      "step": 10735
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9793581962585449,
      "learning_rate": 1.9124951948534156e-05,
      "loss": 2.6598,
      "step": 10736
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9878761172294617,
      "learning_rate": 1.9124783504469554e-05,
      "loss": 2.6522,
      "step": 10737
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9624568819999695,
      "learning_rate": 1.912461504493596e-05,
      "loss": 2.6112,
      "step": 10738
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.008315920829773,
      "learning_rate": 1.9124446569933666e-05,
      "loss": 2.7757,
      "step": 10739
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9749917984008789,
      "learning_rate": 1.912427807946296e-05,
      "loss": 2.3216,
      "step": 10740
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.020049810409546,
      "learning_rate": 1.912410957352412e-05,
      "loss": 2.4122,
      "step": 10741
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9154790639877319,
      "learning_rate": 1.912394105211744e-05,
      "loss": 2.7191,
      "step": 10742
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0949184894561768,
      "learning_rate": 1.91237725152432e-05,
      "loss": 2.4469,
      "step": 10743
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9568865895271301,
      "learning_rate": 1.9123603962901687e-05,
      "loss": 2.3911,
      "step": 10744
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9859980344772339,
      "learning_rate": 1.9123435395093185e-05,
      "loss": 2.5904,
      "step": 10745
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.1490986347198486,
      "learning_rate": 1.912326681181799e-05,
      "loss": 2.6567,
      "step": 10746
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.981035590171814,
      "learning_rate": 1.9123098213076374e-05,
      "loss": 2.4459,
      "step": 10747
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9950293898582458,
      "learning_rate": 1.912292959886863e-05,
      "loss": 2.4732,
      "step": 10748
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0227854251861572,
      "learning_rate": 1.912276096919504e-05,
      "loss": 2.5913,
      "step": 10749
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9850001335144043,
      "learning_rate": 1.9122592324055897e-05,
      "loss": 2.5574,
      "step": 10750
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0521314144134521,
      "learning_rate": 1.9122423663451483e-05,
      "loss": 2.4705,
      "step": 10751
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.2555267810821533,
      "learning_rate": 1.9122254987382077e-05,
      "loss": 2.6211,
      "step": 10752
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9705926775932312,
      "learning_rate": 1.9122086295847974e-05,
      "loss": 2.2653,
      "step": 10753
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9346579313278198,
      "learning_rate": 1.9121917588849462e-05,
      "loss": 2.4009,
      "step": 10754
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.2416012287139893,
      "learning_rate": 1.9121748866386815e-05,
      "loss": 2.604,
      "step": 10755
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.1022839546203613,
      "learning_rate": 1.912158012846033e-05,
      "loss": 2.4731,
      "step": 10756
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.1256444454193115,
      "learning_rate": 1.912141137507029e-05,
      "loss": 2.7054,
      "step": 10757
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0374661684036255,
      "learning_rate": 1.9121242606216978e-05,
      "loss": 2.5549,
      "step": 10758
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.023972988128662,
      "learning_rate": 1.9121073821900685e-05,
      "loss": 2.5978,
      "step": 10759
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0235832929611206,
      "learning_rate": 1.9120905022121694e-05,
      "loss": 2.2292,
      "step": 10760
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.131260633468628,
      "learning_rate": 1.912073620688029e-05,
      "loss": 2.5988,
      "step": 10761
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0530864000320435,
      "learning_rate": 1.9120567376176762e-05,
      "loss": 2.4444,
      "step": 10762
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9462326765060425,
      "learning_rate": 1.9120398530011394e-05,
      "loss": 2.8676,
      "step": 10763
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0324729681015015,
      "learning_rate": 1.9120229668384476e-05,
      "loss": 2.6473,
      "step": 10764
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9678148031234741,
      "learning_rate": 1.912006079129629e-05,
      "loss": 2.6622,
      "step": 10765
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0292912721633911,
      "learning_rate": 1.9119891898747126e-05,
      "loss": 2.5689,
      "step": 10766
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.077771544456482,
      "learning_rate": 1.9119722990737263e-05,
      "loss": 2.8021,
      "step": 10767
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9875453114509583,
      "learning_rate": 1.9119554067267e-05,
      "loss": 2.6132,
      "step": 10768
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0635521411895752,
      "learning_rate": 1.9119385128336612e-05,
      "loss": 2.6846,
      "step": 10769
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9318717122077942,
      "learning_rate": 1.9119216173946386e-05,
      "loss": 2.3842,
      "step": 10770
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9670408964157104,
      "learning_rate": 1.9119047204096617e-05,
      "loss": 2.5631,
      "step": 10771
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0057573318481445,
      "learning_rate": 1.9118878218787588e-05,
      "loss": 2.4453,
      "step": 10772
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.043087363243103,
      "learning_rate": 1.911870921801958e-05,
      "loss": 2.6951,
      "step": 10773
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0465662479400635,
      "learning_rate": 1.911854020179288e-05,
      "loss": 2.6685,
      "step": 10774
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9572163224220276,
      "learning_rate": 1.9118371170107784e-05,
      "loss": 2.5882,
      "step": 10775
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.010007381439209,
      "learning_rate": 1.911820212296457e-05,
      "loss": 2.4609,
      "step": 10776
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0670561790466309,
      "learning_rate": 1.9118033060363528e-05,
      "loss": 2.5406,
      "step": 10777
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.012986421585083,
      "learning_rate": 1.911786398230494e-05,
      "loss": 2.6778,
      "step": 10778
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0089298486709595,
      "learning_rate": 1.91176948887891e-05,
      "loss": 2.8502,
      "step": 10779
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9374539256095886,
      "learning_rate": 1.9117525779816288e-05,
      "loss": 2.7683,
      "step": 10780
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9246054887771606,
      "learning_rate": 1.9117356655386797e-05,
      "loss": 2.583,
      "step": 10781
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9919224977493286,
      "learning_rate": 1.911718751550091e-05,
      "loss": 2.6316,
      "step": 10782
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9508082866668701,
      "learning_rate": 1.911701836015891e-05,
      "loss": 2.5988,
      "step": 10783
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.1008120775222778,
      "learning_rate": 1.911684918936109e-05,
      "loss": 2.479,
      "step": 10784
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9318655133247375,
      "learning_rate": 1.9116680003107735e-05,
      "loss": 2.7647,
      "step": 10785
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9630119800567627,
      "learning_rate": 1.911651080139913e-05,
      "loss": 2.5575,
      "step": 10786
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0577986240386963,
      "learning_rate": 1.9116341584235565e-05,
      "loss": 2.6057,
      "step": 10787
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0135087966918945,
      "learning_rate": 1.911617235161732e-05,
      "loss": 2.4666,
      "step": 10788
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.1185914278030396,
      "learning_rate": 1.9116003103544695e-05,
      "loss": 2.5245,
      "step": 10789
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.99558424949646,
      "learning_rate": 1.9115833840017964e-05,
      "loss": 2.5327,
      "step": 10790
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9639453887939453,
      "learning_rate": 1.9115664561037418e-05,
      "loss": 2.6482,
      "step": 10791
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0799987316131592,
      "learning_rate": 1.911549526660335e-05,
      "loss": 2.3911,
      "step": 10792
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0697534084320068,
      "learning_rate": 1.9115325956716035e-05,
      "loss": 2.5069,
      "step": 10793
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9672025442123413,
      "learning_rate": 1.911515663137577e-05,
      "loss": 2.6176,
      "step": 10794
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9595881700515747,
      "learning_rate": 1.9114987290582842e-05,
      "loss": 2.6539,
      "step": 10795
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.232120156288147,
      "learning_rate": 1.911481793433753e-05,
      "loss": 2.3823,
      "step": 10796
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0224339962005615,
      "learning_rate": 1.9114648562640127e-05,
      "loss": 2.5079,
      "step": 10797
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0574132204055786,
      "learning_rate": 1.9114479175490923e-05,
      "loss": 2.3291,
      "step": 10798
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0100390911102295,
      "learning_rate": 1.9114309772890197e-05,
      "loss": 2.3694,
      "step": 10799
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0753220319747925,
      "learning_rate": 1.9114140354838243e-05,
      "loss": 2.5759,
      "step": 10800
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0505585670471191,
      "learning_rate": 1.9113970921335343e-05,
      "loss": 2.7077,
      "step": 10801
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9509434103965759,
      "learning_rate": 1.911380147238179e-05,
      "loss": 2.5638,
      "step": 10802
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9988659024238586,
      "learning_rate": 1.9113632007977868e-05,
      "loss": 2.6162,
      "step": 10803
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0242191553115845,
      "learning_rate": 1.9113462528123862e-05,
      "loss": 2.7219,
      "step": 10804
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9267550706863403,
      "learning_rate": 1.9113293032820063e-05,
      "loss": 2.3836,
      "step": 10805
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0343804359436035,
      "learning_rate": 1.9113123522066756e-05,
      "loss": 2.5661,
      "step": 10806
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.080706000328064,
      "learning_rate": 1.9112953995864232e-05,
      "loss": 2.4154,
      "step": 10807
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0846712589263916,
      "learning_rate": 1.9112784454212775e-05,
      "loss": 2.3506,
      "step": 10808
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0596673488616943,
      "learning_rate": 1.9112614897112675e-05,
      "loss": 2.5029,
      "step": 10809
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.029226541519165,
      "learning_rate": 1.9112445324564214e-05,
      "loss": 2.581,
      "step": 10810
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.224558711051941,
      "learning_rate": 1.9112275736567686e-05,
      "loss": 2.638,
      "step": 10811
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0712391138076782,
      "learning_rate": 1.9112106133123377e-05,
      "loss": 2.4994,
      "step": 10812
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0325143337249756,
      "learning_rate": 1.911193651423157e-05,
      "loss": 2.4624,
      "step": 10813
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0652140378952026,
      "learning_rate": 1.9111766879892556e-05,
      "loss": 2.5064,
      "step": 10814
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.1408199071884155,
      "learning_rate": 1.911159723010662e-05,
      "loss": 2.6009,
      "step": 10815
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0861032009124756,
      "learning_rate": 1.911142756487406e-05,
      "loss": 2.5604,
      "step": 10816
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.041584849357605,
      "learning_rate": 1.911125788419515e-05,
      "loss": 2.6282,
      "step": 10817
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0803776979446411,
      "learning_rate": 1.9111088188070182e-05,
      "loss": 2.8164,
      "step": 10818
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0016376972198486,
      "learning_rate": 1.9110918476499448e-05,
      "loss": 2.5756,
      "step": 10819
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.1273822784423828,
      "learning_rate": 1.9110748749483234e-05,
      "loss": 2.5195,
      "step": 10820
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.033370852470398,
      "learning_rate": 1.9110579007021824e-05,
      "loss": 2.518,
      "step": 10821
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0137391090393066,
      "learning_rate": 1.911040924911551e-05,
      "loss": 2.5804,
      "step": 10822
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.8719502687454224,
      "learning_rate": 1.9110239475764575e-05,
      "loss": 2.6732,
      "step": 10823
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0369497537612915,
      "learning_rate": 1.911006968696931e-05,
      "loss": 2.4656,
      "step": 10824
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9975677132606506,
      "learning_rate": 1.9109899882730008e-05,
      "loss": 2.498,
      "step": 10825
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9637517929077148,
      "learning_rate": 1.910973006304695e-05,
      "loss": 2.65,
      "step": 10826
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0488582849502563,
      "learning_rate": 1.910956022792042e-05,
      "loss": 2.5414,
      "step": 10827
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0278875827789307,
      "learning_rate": 1.9109390377350715e-05,
      "loss": 2.5142,
      "step": 10828
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.11067533493042,
      "learning_rate": 1.9109220511338118e-05,
      "loss": 2.5273,
      "step": 10829
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0971158742904663,
      "learning_rate": 1.910905062988292e-05,
      "loss": 2.6195,
      "step": 10830
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9969490766525269,
      "learning_rate": 1.9108880732985407e-05,
      "loss": 2.3349,
      "step": 10831
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9466527104377747,
      "learning_rate": 1.9108710820645865e-05,
      "loss": 2.5839,
      "step": 10832
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9893963932991028,
      "learning_rate": 1.9108540892864583e-05,
      "loss": 2.6158,
      "step": 10833
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9517309069633484,
      "learning_rate": 1.9108370949641854e-05,
      "loss": 2.6362,
      "step": 10834
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.1145113706588745,
      "learning_rate": 1.910820099097796e-05,
      "loss": 2.3617,
      "step": 10835
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.1978994607925415,
      "learning_rate": 1.9108031016873197e-05,
      "loss": 2.43,
      "step": 10836
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9814668893814087,
      "learning_rate": 1.910786102732784e-05,
      "loss": 2.61,
      "step": 10837
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9659357666969299,
      "learning_rate": 1.910769102234219e-05,
      "loss": 2.5655,
      "step": 10838
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9533899426460266,
      "learning_rate": 1.9107521001916528e-05,
      "loss": 2.5621,
      "step": 10839
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0200214385986328,
      "learning_rate": 1.9107350966051145e-05,
      "loss": 2.7337,
      "step": 10840
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.045836091041565,
      "learning_rate": 1.910718091474633e-05,
      "loss": 2.4423,
      "step": 10841
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.5284597873687744,
      "learning_rate": 1.910701084800237e-05,
      "loss": 2.4843,
      "step": 10842
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.035408854484558,
      "learning_rate": 1.910684076581955e-05,
      "loss": 2.7574,
      "step": 10843
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0246015787124634,
      "learning_rate": 1.9106670668198162e-05,
      "loss": 2.6417,
      "step": 10844
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0161430835723877,
      "learning_rate": 1.9106500555138496e-05,
      "loss": 2.6585,
      "step": 10845
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9700358510017395,
      "learning_rate": 1.910633042664084e-05,
      "loss": 2.4037,
      "step": 10846
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.029520034790039,
      "learning_rate": 1.9106160282705473e-05,
      "loss": 2.3874,
      "step": 10847
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0139065980911255,
      "learning_rate": 1.9105990123332695e-05,
      "loss": 2.5157,
      "step": 10848
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9858187437057495,
      "learning_rate": 1.9105819948522793e-05,
      "loss": 2.5254,
      "step": 10849
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0804818868637085,
      "learning_rate": 1.9105649758276054e-05,
      "loss": 2.3451,
      "step": 10850
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9529776573181152,
      "learning_rate": 1.910547955259276e-05,
      "loss": 2.5739,
      "step": 10851
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0548549890518188,
      "learning_rate": 1.910530933147321e-05,
      "loss": 2.522,
      "step": 10852
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0318989753723145,
      "learning_rate": 1.9105139094917686e-05,
      "loss": 2.7012,
      "step": 10853
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9800726771354675,
      "learning_rate": 1.910496884292648e-05,
      "loss": 2.5701,
      "step": 10854
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0362578630447388,
      "learning_rate": 1.9104798575499878e-05,
      "loss": 2.578,
      "step": 10855
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.173280119895935,
      "learning_rate": 1.9104628292638167e-05,
      "loss": 2.5934,
      "step": 10856
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9965683817863464,
      "learning_rate": 1.9104457994341642e-05,
      "loss": 2.5818,
      "step": 10857
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0149426460266113,
      "learning_rate": 1.9104287680610584e-05,
      "loss": 2.67,
      "step": 10858
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.01300048828125,
      "learning_rate": 1.910411735144529e-05,
      "loss": 2.6037,
      "step": 10859
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9816242456436157,
      "learning_rate": 1.910394700684604e-05,
      "loss": 2.5141,
      "step": 10860
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.039621353149414,
      "learning_rate": 1.910377664681313e-05,
      "loss": 2.4955,
      "step": 10861
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.1088413000106812,
      "learning_rate": 1.9103606271346846e-05,
      "loss": 2.5888,
      "step": 10862
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.95815509557724,
      "learning_rate": 1.9103435880447475e-05,
      "loss": 2.5741,
      "step": 10863
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0071836709976196,
      "learning_rate": 1.910326547411531e-05,
      "loss": 2.4367,
      "step": 10864
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0149049758911133,
      "learning_rate": 1.9103095052350637e-05,
      "loss": 2.4594,
      "step": 10865
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0439224243164062,
      "learning_rate": 1.9102924615153746e-05,
      "loss": 2.413,
      "step": 10866
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.1914840936660767,
      "learning_rate": 1.9102754162524925e-05,
      "loss": 2.7746,
      "step": 10867
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.034293532371521,
      "learning_rate": 1.9102583694464464e-05,
      "loss": 2.2836,
      "step": 10868
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0142267942428589,
      "learning_rate": 1.910241321097265e-05,
      "loss": 2.6911,
      "step": 10869
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.053755521774292,
      "learning_rate": 1.9102242712049773e-05,
      "loss": 2.6683,
      "step": 10870
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0329703092575073,
      "learning_rate": 1.9102072197696122e-05,
      "loss": 2.6777,
      "step": 10871
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.4782397747039795,
      "learning_rate": 1.9101901667911988e-05,
      "loss": 2.7058,
      "step": 10872
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9777666330337524,
      "learning_rate": 1.910173112269766e-05,
      "loss": 2.6748,
      "step": 10873
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9298331141471863,
      "learning_rate": 1.9101560562053426e-05,
      "loss": 2.5124,
      "step": 10874
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0188164710998535,
      "learning_rate": 1.9101389985979574e-05,
      "loss": 2.627,
      "step": 10875
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9611198306083679,
      "learning_rate": 1.910121939447639e-05,
      "loss": 2.4873,
      "step": 10876
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9888296723365784,
      "learning_rate": 1.910104878754417e-05,
      "loss": 2.3647,
      "step": 10877
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0845459699630737,
      "learning_rate": 1.9100878165183204e-05,
      "loss": 2.6216,
      "step": 10878
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9556045532226562,
      "learning_rate": 1.9100707527393773e-05,
      "loss": 2.4737,
      "step": 10879
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9289824366569519,
      "learning_rate": 1.9100536874176172e-05,
      "loss": 2.5665,
      "step": 10880
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.2219932079315186,
      "learning_rate": 1.9100366205530693e-05,
      "loss": 2.4773,
      "step": 10881
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.06557035446167,
      "learning_rate": 1.910019552145762e-05,
      "loss": 2.5217,
      "step": 10882
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0038642883300781,
      "learning_rate": 1.910002482195724e-05,
      "loss": 2.616,
      "step": 10883
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9774024486541748,
      "learning_rate": 1.909985410702985e-05,
      "loss": 2.519,
      "step": 10884
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9256428480148315,
      "learning_rate": 1.9099683376675734e-05,
      "loss": 2.5713,
      "step": 10885
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0300685167312622,
      "learning_rate": 1.9099512630895186e-05,
      "loss": 2.4975,
      "step": 10886
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9991256594657898,
      "learning_rate": 1.9099341869688487e-05,
      "loss": 2.562,
      "step": 10887
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.92429518699646,
      "learning_rate": 1.909917109305594e-05,
      "loss": 2.5788,
      "step": 10888
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.069915771484375,
      "learning_rate": 1.9099000300997825e-05,
      "loss": 2.55,
      "step": 10889
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0515010356903076,
      "learning_rate": 1.909882949351443e-05,
      "loss": 2.5009,
      "step": 10890
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0469459295272827,
      "learning_rate": 1.9098658670606046e-05,
      "loss": 2.6495,
      "step": 10891
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0614476203918457,
      "learning_rate": 1.9098487832272966e-05,
      "loss": 2.4772,
      "step": 10892
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9926992058753967,
      "learning_rate": 1.9098316978515483e-05,
      "loss": 2.7224,
      "step": 10893
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0162146091461182,
      "learning_rate": 1.9098146109333876e-05,
      "loss": 2.5935,
      "step": 10894
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.085209846496582,
      "learning_rate": 1.9097975224728443e-05,
      "loss": 2.5467,
      "step": 10895
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0879303216934204,
      "learning_rate": 1.9097804324699472e-05,
      "loss": 2.768,
      "step": 10896
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0624014139175415,
      "learning_rate": 1.909763340924725e-05,
      "loss": 2.5229,
      "step": 10897
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.067107081413269,
      "learning_rate": 1.9097462478372066e-05,
      "loss": 2.5678,
      "step": 10898
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.031600832939148,
      "learning_rate": 1.9097291532074215e-05,
      "loss": 2.624,
      "step": 10899
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.006911277770996,
      "learning_rate": 1.9097120570353984e-05,
      "loss": 2.6752,
      "step": 10900
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9711843132972717,
      "learning_rate": 1.909694959321166e-05,
      "loss": 2.5611,
      "step": 10901
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0022401809692383,
      "learning_rate": 1.9096778600647542e-05,
      "loss": 2.6077,
      "step": 10902
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9441115260124207,
      "learning_rate": 1.909660759266191e-05,
      "loss": 2.6885,
      "step": 10903
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0032305717468262,
      "learning_rate": 1.9096436569255056e-05,
      "loss": 2.6877,
      "step": 10904
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0706980228424072,
      "learning_rate": 1.9096265530427274e-05,
      "loss": 2.4327,
      "step": 10905
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0220661163330078,
      "learning_rate": 1.9096094476178852e-05,
      "loss": 2.7398,
      "step": 10906
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9958853125572205,
      "learning_rate": 1.9095923406510076e-05,
      "loss": 2.3685,
      "step": 10907
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9674391150474548,
      "learning_rate": 1.909575232142124e-05,
      "loss": 2.5325,
      "step": 10908
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0075387954711914,
      "learning_rate": 1.9095581220912635e-05,
      "loss": 2.4784,
      "step": 10909
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0455689430236816,
      "learning_rate": 1.909541010498455e-05,
      "loss": 2.3121,
      "step": 10910
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9639121294021606,
      "learning_rate": 1.9095238973637275e-05,
      "loss": 2.3916,
      "step": 10911
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9171897768974304,
      "learning_rate": 1.90950678268711e-05,
      "loss": 2.4389,
      "step": 10912
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.1877120733261108,
      "learning_rate": 1.909489666468631e-05,
      "loss": 2.5554,
      "step": 10913
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9360769391059875,
      "learning_rate": 1.9094725487083207e-05,
      "loss": 2.7053,
      "step": 10914
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9518247842788696,
      "learning_rate": 1.909455429406207e-05,
      "loss": 2.7112,
      "step": 10915
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.994560956954956,
      "learning_rate": 1.909438308562319e-05,
      "loss": 2.4421,
      "step": 10916
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9696785807609558,
      "learning_rate": 1.9094211861766866e-05,
      "loss": 2.7264,
      "step": 10917
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.045392632484436,
      "learning_rate": 1.9094040622493383e-05,
      "loss": 2.4959,
      "step": 10918
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9732906818389893,
      "learning_rate": 1.909386936780303e-05,
      "loss": 2.7851,
      "step": 10919
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9931839108467102,
      "learning_rate": 1.9093698097696097e-05,
      "loss": 2.5687,
      "step": 10920
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9664913415908813,
      "learning_rate": 1.9093526812172877e-05,
      "loss": 2.5383,
      "step": 10921
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.008366584777832,
      "learning_rate": 1.9093355511233663e-05,
      "loss": 2.4874,
      "step": 10922
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.966012179851532,
      "learning_rate": 1.909318419487874e-05,
      "loss": 2.4604,
      "step": 10923
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9536334872245789,
      "learning_rate": 1.9093012863108397e-05,
      "loss": 2.5812,
      "step": 10924
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0388140678405762,
      "learning_rate": 1.9092841515922927e-05,
      "loss": 2.5963,
      "step": 10925
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.1184258460998535,
      "learning_rate": 1.9092670153322622e-05,
      "loss": 2.4793,
      "step": 10926
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0081135034561157,
      "learning_rate": 1.9092498775307775e-05,
      "loss": 2.6149,
      "step": 10927
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.1186156272888184,
      "learning_rate": 1.909232738187867e-05,
      "loss": 2.5241,
      "step": 10928
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9701220989227295,
      "learning_rate": 1.9092155973035604e-05,
      "loss": 2.544,
      "step": 10929
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.1200309991836548,
      "learning_rate": 1.909198454877886e-05,
      "loss": 2.4362,
      "step": 10930
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0229932069778442,
      "learning_rate": 1.9091813109108734e-05,
      "loss": 2.5798,
      "step": 10931
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0028822422027588,
      "learning_rate": 1.9091641654025513e-05,
      "loss": 2.7481,
      "step": 10932
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9959241151809692,
      "learning_rate": 1.9091470183529495e-05,
      "loss": 2.3723,
      "step": 10933
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0164868831634521,
      "learning_rate": 1.9091298697620963e-05,
      "loss": 2.4753,
      "step": 10934
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0332481861114502,
      "learning_rate": 1.9091127196300212e-05,
      "loss": 2.5838,
      "step": 10935
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0677894353866577,
      "learning_rate": 1.909095567956753e-05,
      "loss": 2.5688,
      "step": 10936
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.006710410118103,
      "learning_rate": 1.909078414742321e-05,
      "loss": 2.5518,
      "step": 10937
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0155929327011108,
      "learning_rate": 1.9090612599867543e-05,
      "loss": 2.5847,
      "step": 10938
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0162266492843628,
      "learning_rate": 1.909044103690082e-05,
      "loss": 2.3794,
      "step": 10939
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9416903853416443,
      "learning_rate": 1.9090269458523327e-05,
      "loss": 2.4707,
      "step": 10940
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9618193507194519,
      "learning_rate": 1.9090097864735354e-05,
      "loss": 2.5304,
      "step": 10941
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9773923754692078,
      "learning_rate": 1.9089926255537205e-05,
      "loss": 2.342,
      "step": 10942
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9126024842262268,
      "learning_rate": 1.9089754630929156e-05,
      "loss": 2.5645,
      "step": 10943
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0111573934555054,
      "learning_rate": 1.9089582990911507e-05,
      "loss": 2.4221,
      "step": 10944
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0310732126235962,
      "learning_rate": 1.9089411335484547e-05,
      "loss": 2.706,
      "step": 10945
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0274505615234375,
      "learning_rate": 1.9089239664648563e-05,
      "loss": 2.3909,
      "step": 10946
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0400594472885132,
      "learning_rate": 1.9089067978403854e-05,
      "loss": 2.5529,
      "step": 10947
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0469046831130981,
      "learning_rate": 1.9088896276750704e-05,
      "loss": 2.3796,
      "step": 10948
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0174498558044434,
      "learning_rate": 1.9088724559689405e-05,
      "loss": 2.4255,
      "step": 10949
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9858600497245789,
      "learning_rate": 1.908855282722025e-05,
      "loss": 2.5512,
      "step": 10950
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.3140846490859985,
      "learning_rate": 1.908838107934353e-05,
      "loss": 2.7737,
      "step": 10951
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9948456287384033,
      "learning_rate": 1.9088209316059535e-05,
      "loss": 2.5008,
      "step": 10952
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0007734298706055,
      "learning_rate": 1.9088037537368556e-05,
      "loss": 2.5372,
      "step": 10953
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0493857860565186,
      "learning_rate": 1.9087865743270888e-05,
      "loss": 2.4524,
      "step": 10954
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.1918905973434448,
      "learning_rate": 1.9087693933766818e-05,
      "loss": 2.4976,
      "step": 10955
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9726653099060059,
      "learning_rate": 1.908752210885664e-05,
      "loss": 2.549,
      "step": 10956
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9606709480285645,
      "learning_rate": 1.908735026854064e-05,
      "loss": 2.5541,
      "step": 10957
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0298813581466675,
      "learning_rate": 1.9087178412819117e-05,
      "loss": 2.7787,
      "step": 10958
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9135967493057251,
      "learning_rate": 1.9087006541692354e-05,
      "loss": 2.6281,
      "step": 10959
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9015485048294067,
      "learning_rate": 1.908683465516065e-05,
      "loss": 2.511,
      "step": 10960
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.1158543825149536,
      "learning_rate": 1.9086662753224293e-05,
      "loss": 2.4297,
      "step": 10961
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0261062383651733,
      "learning_rate": 1.9086490835883578e-05,
      "loss": 2.6945,
      "step": 10962
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0206598043441772,
      "learning_rate": 1.908631890313879e-05,
      "loss": 2.8465,
      "step": 10963
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9276135563850403,
      "learning_rate": 1.908614695499022e-05,
      "loss": 2.5532,
      "step": 10964
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.1936616897583008,
      "learning_rate": 1.908597499143817e-05,
      "loss": 2.6086,
      "step": 10965
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.1049387454986572,
      "learning_rate": 1.908580301248292e-05,
      "loss": 2.3918,
      "step": 10966
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9981362223625183,
      "learning_rate": 1.9085631018124767e-05,
      "loss": 2.475,
      "step": 10967
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.07673180103302,
      "learning_rate": 1.9085459008364003e-05,
      "loss": 2.6835,
      "step": 10968
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9981421828269958,
      "learning_rate": 1.908528698320092e-05,
      "loss": 2.2028,
      "step": 10969
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0057867765426636,
      "learning_rate": 1.9085114942635804e-05,
      "loss": 2.6634,
      "step": 10970
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9143624901771545,
      "learning_rate": 1.9084942886668952e-05,
      "loss": 2.4779,
      "step": 10971
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0359636545181274,
      "learning_rate": 1.9084770815300656e-05,
      "loss": 2.7547,
      "step": 10972
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0682929754257202,
      "learning_rate": 1.9084598728531206e-05,
      "loss": 2.7848,
      "step": 10973
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.1031349897384644,
      "learning_rate": 1.9084426626360892e-05,
      "loss": 2.6532,
      "step": 10974
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0900782346725464,
      "learning_rate": 1.9084254508790008e-05,
      "loss": 2.4667,
      "step": 10975
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.023146629333496,
      "learning_rate": 1.9084082375818845e-05,
      "loss": 2.6882,
      "step": 10976
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0443143844604492,
      "learning_rate": 1.90839102274477e-05,
      "loss": 2.5142,
      "step": 10977
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9128695726394653,
      "learning_rate": 1.9083738063676853e-05,
      "loss": 2.3587,
      "step": 10978
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.3027161359786987,
      "learning_rate": 1.908356588450661e-05,
      "loss": 2.5398,
      "step": 10979
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0128875970840454,
      "learning_rate": 1.9083393689937248e-05,
      "loss": 2.7366,
      "step": 10980
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9316678047180176,
      "learning_rate": 1.9083221479969073e-05,
      "loss": 2.6218,
      "step": 10981
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0074262619018555,
      "learning_rate": 1.9083049254602367e-05,
      "loss": 2.6316,
      "step": 10982
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0054880380630493,
      "learning_rate": 1.9082877013837425e-05,
      "loss": 2.5851,
      "step": 10983
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9892603158950806,
      "learning_rate": 1.9082704757674543e-05,
      "loss": 2.2867,
      "step": 10984
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0199050903320312,
      "learning_rate": 1.9082532486114007e-05,
      "loss": 2.2286,
      "step": 10985
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9808872938156128,
      "learning_rate": 1.908236019915611e-05,
      "loss": 2.5286,
      "step": 10986
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9808646440505981,
      "learning_rate": 1.908218789680115e-05,
      "loss": 2.6349,
      "step": 10987
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9118948578834534,
      "learning_rate": 1.9082015579049415e-05,
      "loss": 2.5992,
      "step": 10988
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.2732617855072021,
      "learning_rate": 1.908184324590119e-05,
      "loss": 2.6048,
      "step": 10989
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0160014629364014,
      "learning_rate": 1.9081670897356783e-05,
      "loss": 2.3966,
      "step": 10990
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.137829303741455,
      "learning_rate": 1.9081498533416472e-05,
      "loss": 2.4969,
      "step": 10991
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9927608370780945,
      "learning_rate": 1.9081326154080556e-05,
      "loss": 2.404,
      "step": 10992
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.03528892993927,
      "learning_rate": 1.9081153759349327e-05,
      "loss": 2.5918,
      "step": 10993
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.949451208114624,
      "learning_rate": 1.9080981349223072e-05,
      "loss": 2.7372,
      "step": 10994
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9810163974761963,
      "learning_rate": 1.908080892370209e-05,
      "loss": 2.4852,
      "step": 10995
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0759848356246948,
      "learning_rate": 1.9080636482786672e-05,
      "loss": 2.5178,
      "step": 10996
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.196221113204956,
      "learning_rate": 1.9080464026477104e-05,
      "loss": 2.6272,
      "step": 10997
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9347333312034607,
      "learning_rate": 1.9080291554773688e-05,
      "loss": 2.5254,
      "step": 10998
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0069031715393066,
      "learning_rate": 1.908011906767671e-05,
      "loss": 2.4361,
      "step": 10999
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9889963865280151,
      "learning_rate": 1.9079946565186467e-05,
      "loss": 2.5866,
      "step": 11000
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9832209944725037,
      "learning_rate": 1.9079774047303245e-05,
      "loss": 2.8883,
      "step": 11001
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0670373439788818,
      "learning_rate": 1.907960151402734e-05,
      "loss": 2.6129,
      "step": 11002
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0380370616912842,
      "learning_rate": 1.9079428965359044e-05,
      "loss": 2.6632,
      "step": 11003
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.144275188446045,
      "learning_rate": 1.9079256401298652e-05,
      "loss": 2.5521,
      "step": 11004
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9911410212516785,
      "learning_rate": 1.9079083821846455e-05,
      "loss": 2.6779,
      "step": 11005
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0539714097976685,
      "learning_rate": 1.9078911227002744e-05,
      "loss": 2.4865,
      "step": 11006
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.025174856185913,
      "learning_rate": 1.907873861676781e-05,
      "loss": 2.5061,
      "step": 11007
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0938172340393066,
      "learning_rate": 1.907856599114195e-05,
      "loss": 2.5346,
      "step": 11008
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0801137685775757,
      "learning_rate": 1.9078393350125454e-05,
      "loss": 2.6558,
      "step": 11009
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.1071345806121826,
      "learning_rate": 1.907822069371862e-05,
      "loss": 2.5772,
      "step": 11010
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9674044847488403,
      "learning_rate": 1.9078048021921735e-05,
      "loss": 2.5305,
      "step": 11011
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.010877013206482,
      "learning_rate": 1.907787533473509e-05,
      "loss": 2.7888,
      "step": 11012
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.070449709892273,
      "learning_rate": 1.9077702632158982e-05,
      "loss": 2.4176,
      "step": 11013
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0773613452911377,
      "learning_rate": 1.9077529914193703e-05,
      "loss": 2.6176,
      "step": 11014
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9858399033546448,
      "learning_rate": 1.9077357180839546e-05,
      "loss": 2.6885,
      "step": 11015
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0424939393997192,
      "learning_rate": 1.90771844320968e-05,
      "loss": 2.4501,
      "step": 11016
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.011526346206665,
      "learning_rate": 1.9077011667965765e-05,
      "loss": 2.4751,
      "step": 11017
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.1461764574050903,
      "learning_rate": 1.9076838888446727e-05,
      "loss": 2.4739,
      "step": 11018
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.055618166923523,
      "learning_rate": 1.9076666093539983e-05,
      "loss": 2.7625,
      "step": 11019
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9905296564102173,
      "learning_rate": 1.9076493283245826e-05,
      "loss": 2.5002,
      "step": 11020
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.1598756313323975,
      "learning_rate": 1.907632045756455e-05,
      "loss": 2.6059,
      "step": 11021
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0474129915237427,
      "learning_rate": 1.9076147616496438e-05,
      "loss": 2.5081,
      "step": 11022
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9714550375938416,
      "learning_rate": 1.9075974760041797e-05,
      "loss": 2.3607,
      "step": 11023
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.1097848415374756,
      "learning_rate": 1.907580188820091e-05,
      "loss": 2.478,
      "step": 11024
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0531103610992432,
      "learning_rate": 1.9075629000974078e-05,
      "loss": 2.498,
      "step": 11025
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.174362301826477,
      "learning_rate": 1.9075456098361586e-05,
      "loss": 2.6595,
      "step": 11026
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.2572965621948242,
      "learning_rate": 1.9075283180363733e-05,
      "loss": 2.5655,
      "step": 11027
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0810391902923584,
      "learning_rate": 1.907511024698081e-05,
      "loss": 2.6027,
      "step": 11028
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0315264463424683,
      "learning_rate": 1.907493729821311e-05,
      "loss": 2.4172,
      "step": 11029
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.022005558013916,
      "learning_rate": 1.9074764334060928e-05,
      "loss": 2.4535,
      "step": 11030
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9001328945159912,
      "learning_rate": 1.9074591354524553e-05,
      "loss": 2.3989,
      "step": 11031
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0578298568725586,
      "learning_rate": 1.9074418359604284e-05,
      "loss": 2.5475,
      "step": 11032
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.009292721748352,
      "learning_rate": 1.907424534930041e-05,
      "loss": 2.7821,
      "step": 11033
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9629298448562622,
      "learning_rate": 1.9074072323613228e-05,
      "loss": 2.5311,
      "step": 11034
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9621503949165344,
      "learning_rate": 1.9073899282543026e-05,
      "loss": 2.6906,
      "step": 11035
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0408235788345337,
      "learning_rate": 1.90737262260901e-05,
      "loss": 2.6317,
      "step": 11036
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0964584350585938,
      "learning_rate": 1.9073553154254743e-05,
      "loss": 2.6729,
      "step": 11037
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.101345419883728,
      "learning_rate": 1.907338006703725e-05,
      "loss": 2.6317,
      "step": 11038
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0845947265625,
      "learning_rate": 1.9073206964437916e-05,
      "loss": 2.5487,
      "step": 11039
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.08072829246521,
      "learning_rate": 1.907303384645703e-05,
      "loss": 2.6586,
      "step": 11040
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.044448971748352,
      "learning_rate": 1.907286071309489e-05,
      "loss": 2.5342,
      "step": 11041
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9478996396064758,
      "learning_rate": 1.907268756435178e-05,
      "loss": 2.5964,
      "step": 11042
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0582218170166016,
      "learning_rate": 1.9072514400228007e-05,
      "loss": 2.6178,
      "step": 11043
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.163084626197815,
      "learning_rate": 1.9072341220723856e-05,
      "loss": 2.6588,
      "step": 11044
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0464707612991333,
      "learning_rate": 1.907216802583962e-05,
      "loss": 2.6832,
      "step": 11045
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0001492500305176,
      "learning_rate": 1.90719948155756e-05,
      "loss": 2.4886,
      "step": 11046
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0931416749954224,
      "learning_rate": 1.907182158993208e-05,
      "loss": 2.5693,
      "step": 11047
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.129547357559204,
      "learning_rate": 1.9071648348909365e-05,
      "loss": 2.4814,
      "step": 11048
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9788745641708374,
      "learning_rate": 1.9071475092507738e-05,
      "loss": 2.4692,
      "step": 11049
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0913541316986084,
      "learning_rate": 1.9071301820727496e-05,
      "loss": 2.5835,
      "step": 11050
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0235430002212524,
      "learning_rate": 1.9071128533568935e-05,
      "loss": 2.6796,
      "step": 11051
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0815889835357666,
      "learning_rate": 1.9070955231032347e-05,
      "loss": 2.5332,
      "step": 11052
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9325611591339111,
      "learning_rate": 1.907078191311803e-05,
      "loss": 2.8527,
      "step": 11053
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.1423600912094116,
      "learning_rate": 1.907060857982627e-05,
      "loss": 2.5071,
      "step": 11054
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0694141387939453,
      "learning_rate": 1.9070435231157363e-05,
      "loss": 2.5116,
      "step": 11055
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0242023468017578,
      "learning_rate": 1.9070261867111612e-05,
      "loss": 2.5333,
      "step": 11056
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0625485181808472,
      "learning_rate": 1.9070088487689295e-05,
      "loss": 2.4857,
      "step": 11057
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9187350273132324,
      "learning_rate": 1.906991509289072e-05,
      "loss": 2.4886,
      "step": 11058
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0523523092269897,
      "learning_rate": 1.9069741682716176e-05,
      "loss": 2.6208,
      "step": 11059
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0892916917800903,
      "learning_rate": 1.9069568257165952e-05,
      "loss": 2.8523,
      "step": 11060
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0987858772277832,
      "learning_rate": 1.906939481624035e-05,
      "loss": 2.4792,
      "step": 11061
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9961385130882263,
      "learning_rate": 1.906922135993966e-05,
      "loss": 2.352,
      "step": 11062
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.087957739830017,
      "learning_rate": 1.9069047888264175e-05,
      "loss": 2.4939,
      "step": 11063
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0416507720947266,
      "learning_rate": 1.9068874401214193e-05,
      "loss": 2.8939,
      "step": 11064
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.004551887512207,
      "learning_rate": 1.9068700898790004e-05,
      "loss": 2.5649,
      "step": 11065
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.09782075881958,
      "learning_rate": 1.9068527380991905e-05,
      "loss": 2.7265,
      "step": 11066
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.1557326316833496,
      "learning_rate": 1.906835384782019e-05,
      "loss": 2.6537,
      "step": 11067
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0555987358093262,
      "learning_rate": 1.906818029927515e-05,
      "loss": 2.537,
      "step": 11068
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9388332366943359,
      "learning_rate": 1.906800673535709e-05,
      "loss": 2.5052,
      "step": 11069
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.1365927457809448,
      "learning_rate": 1.9067833156066286e-05,
      "loss": 2.4682,
      "step": 11070
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9588942527770996,
      "learning_rate": 1.9067659561403044e-05,
      "loss": 2.7008,
      "step": 11071
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9595475792884827,
      "learning_rate": 1.9067485951367656e-05,
      "loss": 2.4657,
      "step": 11072
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0818673372268677,
      "learning_rate": 1.906731232596042e-05,
      "loss": 2.4479,
      "step": 11073
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9813566207885742,
      "learning_rate": 1.9067138685181624e-05,
      "loss": 2.5742,
      "step": 11074
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.1444604396820068,
      "learning_rate": 1.9066965029031563e-05,
      "loss": 2.6379,
      "step": 11075
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.1161319017410278,
      "learning_rate": 1.9066791357510537e-05,
      "loss": 2.6348,
      "step": 11076
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0668197870254517,
      "learning_rate": 1.9066617670618835e-05,
      "loss": 2.4566,
      "step": 11077
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0330973863601685,
      "learning_rate": 1.9066443968356758e-05,
      "loss": 2.5378,
      "step": 11078
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.132327675819397,
      "learning_rate": 1.9066270250724594e-05,
      "loss": 2.6295,
      "step": 11079
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.004953145980835,
      "learning_rate": 1.9066096517722637e-05,
      "loss": 2.4704,
      "step": 11080
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.063157081604004,
      "learning_rate": 1.906592276935119e-05,
      "loss": 2.5142,
      "step": 11081
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9335644841194153,
      "learning_rate": 1.9065749005610533e-05,
      "loss": 2.6424,
      "step": 11082
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9872397184371948,
      "learning_rate": 1.9065575226500975e-05,
      "loss": 2.8845,
      "step": 11083
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0602482557296753,
      "learning_rate": 1.9065401432022807e-05,
      "loss": 2.1894,
      "step": 11084
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9982299208641052,
      "learning_rate": 1.9065227622176317e-05,
      "loss": 2.3893,
      "step": 11085
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9365822672843933,
      "learning_rate": 1.9065053796961807e-05,
      "loss": 2.588,
      "step": 11086
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0423293113708496,
      "learning_rate": 1.9064879956379565e-05,
      "loss": 2.5768,
      "step": 11087
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9001514315605164,
      "learning_rate": 1.906470610042989e-05,
      "loss": 2.3824,
      "step": 11088
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.04764986038208,
      "learning_rate": 1.906453222911308e-05,
      "loss": 2.3056,
      "step": 11089
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0215797424316406,
      "learning_rate": 1.9064358342429422e-05,
      "loss": 2.4618,
      "step": 11090
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.137887716293335,
      "learning_rate": 1.9064184440379215e-05,
      "loss": 2.4235,
      "step": 11091
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0589812994003296,
      "learning_rate": 1.9064010522962756e-05,
      "loss": 2.505,
      "step": 11092
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0602725744247437,
      "learning_rate": 1.9063836590180337e-05,
      "loss": 2.6117,
      "step": 11093
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9597444534301758,
      "learning_rate": 1.9063662642032256e-05,
      "loss": 2.7233,
      "step": 11094
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.136084794998169,
      "learning_rate": 1.90634886785188e-05,
      "loss": 2.578,
      "step": 11095
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.1223976612091064,
      "learning_rate": 1.9063314699640272e-05,
      "loss": 2.7507,
      "step": 11096
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.019613265991211,
      "learning_rate": 1.906314070539696e-05,
      "loss": 2.5953,
      "step": 11097
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0443916320800781,
      "learning_rate": 1.9062966695789167e-05,
      "loss": 2.5099,
      "step": 11098
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0526245832443237,
      "learning_rate": 1.9062792670817183e-05,
      "loss": 2.6082,
      "step": 11099
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.2865492105484009,
      "learning_rate": 1.9062618630481306e-05,
      "loss": 2.6134,
      "step": 11100
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.2068686485290527,
      "learning_rate": 1.9062444574781826e-05,
      "loss": 2.6403,
      "step": 11101
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0290193557739258,
      "learning_rate": 1.9062270503719042e-05,
      "loss": 2.4409,
      "step": 11102
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.8913986086845398,
      "learning_rate": 1.906209641729325e-05,
      "loss": 2.5986,
      "step": 11103
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9512045979499817,
      "learning_rate": 1.906192231550474e-05,
      "loss": 2.4728,
      "step": 11104
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9591494202613831,
      "learning_rate": 1.906174819835381e-05,
      "loss": 2.5023,
      "step": 11105
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.956733763217926,
      "learning_rate": 1.906157406584076e-05,
      "loss": 2.6577,
      "step": 11106
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.173692226409912,
      "learning_rate": 1.906139991796588e-05,
      "loss": 2.481,
      "step": 11107
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0833160877227783,
      "learning_rate": 1.9061225754729465e-05,
      "loss": 2.5703,
      "step": 11108
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9390556216239929,
      "learning_rate": 1.906105157613181e-05,
      "loss": 2.686,
      "step": 11109
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0653518438339233,
      "learning_rate": 1.906087738217321e-05,
      "loss": 2.6191,
      "step": 11110
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0711791515350342,
      "learning_rate": 1.906070317285397e-05,
      "loss": 2.401,
      "step": 11111
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0888328552246094,
      "learning_rate": 1.906052894817437e-05,
      "loss": 2.4691,
      "step": 11112
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.080645203590393,
      "learning_rate": 1.9060354708134715e-05,
      "loss": 2.3996,
      "step": 11113
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0952092409133911,
      "learning_rate": 1.9060180452735298e-05,
      "loss": 2.3319,
      "step": 11114
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.045251488685608,
      "learning_rate": 1.9060006181976413e-05,
      "loss": 2.3952,
      "step": 11115
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.1314818859100342,
      "learning_rate": 1.9059831895858357e-05,
      "loss": 2.5694,
      "step": 11116
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9380687475204468,
      "learning_rate": 1.905965759438143e-05,
      "loss": 2.768,
      "step": 11117
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0541642904281616,
      "learning_rate": 1.9059483277545917e-05,
      "loss": 2.5124,
      "step": 11118
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9090774059295654,
      "learning_rate": 1.905930894535212e-05,
      "loss": 2.7026,
      "step": 11119
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0784255266189575,
      "learning_rate": 1.9059134597800338e-05,
      "loss": 2.4667,
      "step": 11120
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9908506274223328,
      "learning_rate": 1.9058960234890858e-05,
      "loss": 2.4847,
      "step": 11121
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0084471702575684,
      "learning_rate": 1.9058785856623983e-05,
      "loss": 2.6382,
      "step": 11122
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0475525856018066,
      "learning_rate": 1.9058611463000004e-05,
      "loss": 2.525,
      "step": 11123
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.09324312210083,
      "learning_rate": 1.905843705401922e-05,
      "loss": 2.7482,
      "step": 11124
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.987455427646637,
      "learning_rate": 1.905826262968192e-05,
      "loss": 2.7655,
      "step": 11125
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9632111191749573,
      "learning_rate": 1.905808818998841e-05,
      "loss": 2.6567,
      "step": 11126
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9792705774307251,
      "learning_rate": 1.905791373493898e-05,
      "loss": 2.4792,
      "step": 11127
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.2479153871536255,
      "learning_rate": 1.9057739264533924e-05,
      "loss": 2.4537,
      "step": 11128
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0230953693389893,
      "learning_rate": 1.905756477877354e-05,
      "loss": 2.5399,
      "step": 11129
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.029957890510559,
      "learning_rate": 1.9057390277658126e-05,
      "loss": 2.6254,
      "step": 11130
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0468405485153198,
      "learning_rate": 1.9057215761187973e-05,
      "loss": 2.3803,
      "step": 11131
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0021412372589111,
      "learning_rate": 1.905704122936338e-05,
      "loss": 2.3895,
      "step": 11132
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0086179971694946,
      "learning_rate": 1.9056866682184643e-05,
      "loss": 2.4742,
      "step": 11133
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9714980125427246,
      "learning_rate": 1.9056692119652055e-05,
      "loss": 2.485,
      "step": 11134
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0503995418548584,
      "learning_rate": 1.9056517541765918e-05,
      "loss": 2.5938,
      "step": 11135
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.1670483350753784,
      "learning_rate": 1.905634294852652e-05,
      "loss": 2.6678,
      "step": 11136
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9969838261604309,
      "learning_rate": 1.9056168339934164e-05,
      "loss": 2.5906,
      "step": 11137
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0232762098312378,
      "learning_rate": 1.905599371598914e-05,
      "loss": 2.5961,
      "step": 11138
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9562255144119263,
      "learning_rate": 1.9055819076691752e-05,
      "loss": 2.6899,
      "step": 11139
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0327857732772827,
      "learning_rate": 1.9055644422042285e-05,
      "loss": 2.6699,
      "step": 11140
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.1463404893875122,
      "learning_rate": 1.9055469752041044e-05,
      "loss": 2.3471,
      "step": 11141
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9660621285438538,
      "learning_rate": 1.9055295066688324e-05,
      "loss": 2.5745,
      "step": 11142
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.041062831878662,
      "learning_rate": 1.9055120365984418e-05,
      "loss": 2.7332,
      "step": 11143
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0778502225875854,
      "learning_rate": 1.9054945649929623e-05,
      "loss": 2.5432,
      "step": 11144
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9930750131607056,
      "learning_rate": 1.905477091852424e-05,
      "loss": 2.6201,
      "step": 11145
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.091038465499878,
      "learning_rate": 1.9054596171768556e-05,
      "loss": 2.6721,
      "step": 11146
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9872841835021973,
      "learning_rate": 1.9054421409662873e-05,
      "loss": 2.6917,
      "step": 11147
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0720679759979248,
      "learning_rate": 1.9054246632207486e-05,
      "loss": 2.6781,
      "step": 11148
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.161948561668396,
      "learning_rate": 1.9054071839402697e-05,
      "loss": 2.4753,
      "step": 11149
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.1987884044647217,
      "learning_rate": 1.9053897031248793e-05,
      "loss": 2.4947,
      "step": 11150
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.1877542734146118,
      "learning_rate": 1.9053722207746076e-05,
      "loss": 2.4637,
      "step": 11151
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.030489206314087,
      "learning_rate": 1.9053547368894837e-05,
      "loss": 2.7741,
      "step": 11152
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9905027747154236,
      "learning_rate": 1.9053372514695382e-05,
      "loss": 2.5805,
      "step": 11153
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.032781958580017,
      "learning_rate": 1.9053197645147995e-05,
      "loss": 2.6563,
      "step": 11154
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0772117376327515,
      "learning_rate": 1.9053022760252987e-05,
      "loss": 2.7121,
      "step": 11155
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0571569204330444,
      "learning_rate": 1.905284786001064e-05,
      "loss": 2.6054,
      "step": 11156
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.1523315906524658,
      "learning_rate": 1.9052672944421263e-05,
      "loss": 2.4802,
      "step": 11157
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.1085401773452759,
      "learning_rate": 1.9052498013485143e-05,
      "loss": 2.5311,
      "step": 11158
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0277884006500244,
      "learning_rate": 1.905232306720258e-05,
      "loss": 2.5155,
      "step": 11159
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0025867223739624,
      "learning_rate": 1.905214810557387e-05,
      "loss": 2.6123,
      "step": 11160
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0320930480957031,
      "learning_rate": 1.9051973128599313e-05,
      "loss": 2.4017,
      "step": 11161
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.2080048322677612,
      "learning_rate": 1.9051798136279203e-05,
      "loss": 2.6018,
      "step": 11162
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0874086618423462,
      "learning_rate": 1.9051623128613837e-05,
      "loss": 2.6134,
      "step": 11163
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0055954456329346,
      "learning_rate": 1.905144810560351e-05,
      "loss": 2.4049,
      "step": 11164
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9724318981170654,
      "learning_rate": 1.9051273067248522e-05,
      "loss": 2.4269,
      "step": 11165
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.050951361656189,
      "learning_rate": 1.9051098013549165e-05,
      "loss": 2.4893,
      "step": 11166
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9789493680000305,
      "learning_rate": 1.9050922944505742e-05,
      "loss": 2.5409,
      "step": 11167
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.1086522340774536,
      "learning_rate": 1.9050747860118545e-05,
      "loss": 2.6987,
      "step": 11168
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9286010265350342,
      "learning_rate": 1.905057276038787e-05,
      "loss": 2.7104,
      "step": 11169
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0783803462982178,
      "learning_rate": 1.9050397645314018e-05,
      "loss": 2.6935,
      "step": 11170
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0881993770599365,
      "learning_rate": 1.905022251489728e-05,
      "loss": 2.5903,
      "step": 11171
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9143735766410828,
      "learning_rate": 1.9050047369137962e-05,
      "loss": 2.6448,
      "step": 11172
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.115460753440857,
      "learning_rate": 1.9049872208036356e-05,
      "loss": 2.4722,
      "step": 11173
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5216426849365234,
      "learning_rate": 1.9049697031592757e-05,
      "loss": 2.4681,
      "step": 11174
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0248316526412964,
      "learning_rate": 1.904952183980746e-05,
      "loss": 2.4268,
      "step": 11175
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9145914912223816,
      "learning_rate": 1.9049346632680774e-05,
      "loss": 2.5378,
      "step": 11176
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0097851753234863,
      "learning_rate": 1.9049171410212982e-05,
      "loss": 2.8382,
      "step": 11177
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9565028548240662,
      "learning_rate": 1.9048996172404387e-05,
      "loss": 2.5602,
      "step": 11178
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9848664402961731,
      "learning_rate": 1.9048820919255287e-05,
      "loss": 2.7928,
      "step": 11179
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0184379816055298,
      "learning_rate": 1.9048645650765976e-05,
      "loss": 2.5375,
      "step": 11180
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9965556263923645,
      "learning_rate": 1.9048470366936757e-05,
      "loss": 2.5271,
      "step": 11181
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0341070890426636,
      "learning_rate": 1.9048295067767916e-05,
      "loss": 2.5592,
      "step": 11182
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0351545810699463,
      "learning_rate": 1.9048119753259763e-05,
      "loss": 2.4807,
      "step": 11183
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.065830111503601,
      "learning_rate": 1.904794442341259e-05,
      "loss": 2.3763,
      "step": 11184
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.071365237236023,
      "learning_rate": 1.9047769078226694e-05,
      "loss": 2.609,
      "step": 11185
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9197301864624023,
      "learning_rate": 1.9047593717702368e-05,
      "loss": 2.4859,
      "step": 11186
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0351629257202148,
      "learning_rate": 1.9047418341839915e-05,
      "loss": 2.6878,
      "step": 11187
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9890198111534119,
      "learning_rate": 1.9047242950639637e-05,
      "loss": 2.5854,
      "step": 11188
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0263971090316772,
      "learning_rate": 1.9047067544101814e-05,
      "loss": 2.5053,
      "step": 11189
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.060661792755127,
      "learning_rate": 1.904689212222676e-05,
      "loss": 2.3409,
      "step": 11190
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.064541220664978,
      "learning_rate": 1.9046716685014768e-05,
      "loss": 2.3961,
      "step": 11191
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.069151520729065,
      "learning_rate": 1.9046541232466133e-05,
      "loss": 2.5257,
      "step": 11192
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.128779411315918,
      "learning_rate": 1.9046365764581153e-05,
      "loss": 2.561,
      "step": 11193
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9320483803749084,
      "learning_rate": 1.9046190281360127e-05,
      "loss": 2.5713,
      "step": 11194
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.1425905227661133,
      "learning_rate": 1.904601478280335e-05,
      "loss": 2.4621,
      "step": 11195
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.2314844131469727,
      "learning_rate": 1.904583926891112e-05,
      "loss": 2.8347,
      "step": 11196
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.112684726715088,
      "learning_rate": 1.904566373968374e-05,
      "loss": 2.3742,
      "step": 11197
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9959829449653625,
      "learning_rate": 1.9045488195121503e-05,
      "loss": 2.5482,
      "step": 11198
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0140362977981567,
      "learning_rate": 1.9045312635224703e-05,
      "loss": 2.682,
      "step": 11199
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0089634656906128,
      "learning_rate": 1.9045137059993642e-05,
      "loss": 2.6447,
      "step": 11200
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.047175407409668,
      "learning_rate": 1.904496146942862e-05,
      "loss": 2.6663,
      "step": 11201
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0690243244171143,
      "learning_rate": 1.904478586352993e-05,
      "loss": 2.5076,
      "step": 11202
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9777388572692871,
      "learning_rate": 1.9044610242297872e-05,
      "loss": 2.5411,
      "step": 11203
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9719953536987305,
      "learning_rate": 1.904443460573274e-05,
      "loss": 2.6584,
      "step": 11204
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9274183511734009,
      "learning_rate": 1.904425895383484e-05,
      "loss": 2.4543,
      "step": 11205
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9376554489135742,
      "learning_rate": 1.9044083286604463e-05,
      "loss": 2.5182,
      "step": 11206
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9876237511634827,
      "learning_rate": 1.9043907604041907e-05,
      "loss": 2.6371,
      "step": 11207
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.215645432472229,
      "learning_rate": 1.9043731906147468e-05,
      "loss": 2.6468,
      "step": 11208
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9894717931747437,
      "learning_rate": 1.9043556192921452e-05,
      "loss": 2.5075,
      "step": 11209
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9534835815429688,
      "learning_rate": 1.9043380464364153e-05,
      "loss": 2.255,
      "step": 11210
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.1500376462936401,
      "learning_rate": 1.9043204720475866e-05,
      "loss": 2.5988,
      "step": 11211
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0985503196716309,
      "learning_rate": 1.9043028961256892e-05,
      "loss": 2.5189,
      "step": 11212
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0622656345367432,
      "learning_rate": 1.9042853186707524e-05,
      "loss": 2.6237,
      "step": 11213
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0825397968292236,
      "learning_rate": 1.9042677396828068e-05,
      "loss": 2.6621,
      "step": 11214
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.1806973218917847,
      "learning_rate": 1.9042501591618817e-05,
      "loss": 2.7079,
      "step": 11215
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9527177214622498,
      "learning_rate": 1.904232577108007e-05,
      "loss": 2.5421,
      "step": 11216
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9965793490409851,
      "learning_rate": 1.9042149935212122e-05,
      "loss": 2.6481,
      "step": 11217
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9483284950256348,
      "learning_rate": 1.9041974084015277e-05,
      "loss": 2.3791,
      "step": 11218
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9538002014160156,
      "learning_rate": 1.904179821748983e-05,
      "loss": 2.6453,
      "step": 11219
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9329636096954346,
      "learning_rate": 1.904162233563608e-05,
      "loss": 2.5648,
      "step": 11220
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9985055327415466,
      "learning_rate": 1.9041446438454323e-05,
      "loss": 2.6328,
      "step": 11221
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9387288689613342,
      "learning_rate": 1.904127052594486e-05,
      "loss": 2.6225,
      "step": 11222
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0263134241104126,
      "learning_rate": 1.9041094598107985e-05,
      "loss": 2.6582,
      "step": 11223
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0840836763381958,
      "learning_rate": 1.9040918654944e-05,
      "loss": 2.6055,
      "step": 11224
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0983259677886963,
      "learning_rate": 1.9040742696453204e-05,
      "loss": 2.5086,
      "step": 11225
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.012588620185852,
      "learning_rate": 1.9040566722635895e-05,
      "loss": 2.4316,
      "step": 11226
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.1789190769195557,
      "learning_rate": 1.9040390733492366e-05,
      "loss": 2.5511,
      "step": 11227
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0711928606033325,
      "learning_rate": 1.9040214729022924e-05,
      "loss": 2.3057,
      "step": 11228
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0736193656921387,
      "learning_rate": 1.904003870922786e-05,
      "loss": 2.6868,
      "step": 11229
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.060243010520935,
      "learning_rate": 1.9039862674107474e-05,
      "loss": 2.4296,
      "step": 11230
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.998201847076416,
      "learning_rate": 1.9039686623662067e-05,
      "loss": 2.7987,
      "step": 11231
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9477269053459167,
      "learning_rate": 1.9039510557891935e-05,
      "loss": 2.559,
      "step": 11232
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0616425275802612,
      "learning_rate": 1.903933447679738e-05,
      "loss": 2.5955,
      "step": 11233
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9053151607513428,
      "learning_rate": 1.9039158380378696e-05,
      "loss": 2.5285,
      "step": 11234
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0428454875946045,
      "learning_rate": 1.9038982268636187e-05,
      "loss": 2.3918,
      "step": 11235
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0439902544021606,
      "learning_rate": 1.9038806141570144e-05,
      "loss": 2.4245,
      "step": 11236
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.026904821395874,
      "learning_rate": 1.9038629999180868e-05,
      "loss": 2.4625,
      "step": 11237
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9243875741958618,
      "learning_rate": 1.9038453841468665e-05,
      "loss": 2.6055,
      "step": 11238
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9506273865699768,
      "learning_rate": 1.9038277668433825e-05,
      "loss": 2.6191,
      "step": 11239
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0255118608474731,
      "learning_rate": 1.903810148007665e-05,
      "loss": 2.7004,
      "step": 11240
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9383275508880615,
      "learning_rate": 1.9037925276397436e-05,
      "loss": 2.6304,
      "step": 11241
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0020949840545654,
      "learning_rate": 1.9037749057396487e-05,
      "loss": 2.5723,
      "step": 11242
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0339857339859009,
      "learning_rate": 1.9037572823074097e-05,
      "loss": 2.7296,
      "step": 11243
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0109364986419678,
      "learning_rate": 1.903739657343057e-05,
      "loss": 2.4729,
      "step": 11244
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0034478902816772,
      "learning_rate": 1.9037220308466197e-05,
      "loss": 2.4089,
      "step": 11245
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9757254719734192,
      "learning_rate": 1.9037044028181282e-05,
      "loss": 2.341,
      "step": 11246
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9636803865432739,
      "learning_rate": 1.9036867732576123e-05,
      "loss": 2.5795,
      "step": 11247
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9947671294212341,
      "learning_rate": 1.9036691421651018e-05,
      "loss": 2.5775,
      "step": 11248
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0152270793914795,
      "learning_rate": 1.903651509540627e-05,
      "loss": 2.5912,
      "step": 11249
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.078768014907837,
      "learning_rate": 1.9036338753842172e-05,
      "loss": 2.6115,
      "step": 11250
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0000609159469604,
      "learning_rate": 1.9036162396959028e-05,
      "loss": 2.6465,
      "step": 11251
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.116518259048462,
      "learning_rate": 1.903598602475713e-05,
      "loss": 2.4993,
      "step": 11252
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.062827229499817,
      "learning_rate": 1.9035809637236787e-05,
      "loss": 2.3923,
      "step": 11253
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0075407028198242,
      "learning_rate": 1.903563323439829e-05,
      "loss": 2.3085,
      "step": 11254
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0252195596694946,
      "learning_rate": 1.9035456816241938e-05,
      "loss": 2.4331,
      "step": 11255
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.021252989768982,
      "learning_rate": 1.903528038276804e-05,
      "loss": 2.4829,
      "step": 11256
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0274895429611206,
      "learning_rate": 1.903510393397688e-05,
      "loss": 2.4751,
      "step": 11257
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0192084312438965,
      "learning_rate": 1.9034927469868766e-05,
      "loss": 2.6131,
      "step": 11258
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.989249050617218,
      "learning_rate": 1.9034750990443997e-05,
      "loss": 2.4851,
      "step": 11259
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9698565602302551,
      "learning_rate": 1.9034574495702872e-05,
      "loss": 2.4837,
      "step": 11260
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0395559072494507,
      "learning_rate": 1.903439798564569e-05,
      "loss": 2.7479,
      "step": 11261
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.034152865409851,
      "learning_rate": 1.9034221460272752e-05,
      "loss": 2.4924,
      "step": 11262
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0408514738082886,
      "learning_rate": 1.9034044919584353e-05,
      "loss": 2.7237,
      "step": 11263
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0836437940597534,
      "learning_rate": 1.903386836358079e-05,
      "loss": 2.4941,
      "step": 11264
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9864499568939209,
      "learning_rate": 1.9033691792262373e-05,
      "loss": 2.6073,
      "step": 11265
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9294871687889099,
      "learning_rate": 1.9033515205629392e-05,
      "loss": 2.5782,
      "step": 11266
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.1137800216674805,
      "learning_rate": 1.903333860368215e-05,
      "loss": 2.413,
      "step": 11267
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0606319904327393,
      "learning_rate": 1.9033161986420946e-05,
      "loss": 2.5678,
      "step": 11268
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9711009860038757,
      "learning_rate": 1.9032985353846077e-05,
      "loss": 2.4528,
      "step": 11269
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.941917896270752,
      "learning_rate": 1.9032808705957845e-05,
      "loss": 2.4361,
      "step": 11270
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0556886196136475,
      "learning_rate": 1.9032632042756552e-05,
      "loss": 2.7349,
      "step": 11271
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.002184510231018,
      "learning_rate": 1.9032455364242492e-05,
      "loss": 2.4943,
      "step": 11272
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0892499685287476,
      "learning_rate": 1.9032278670415963e-05,
      "loss": 2.5177,
      "step": 11273
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9905616641044617,
      "learning_rate": 1.9032101961277275e-05,
      "loss": 2.536,
      "step": 11274
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0269876718521118,
      "learning_rate": 1.9031925236826717e-05,
      "loss": 2.3876,
      "step": 11275
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0151100158691406,
      "learning_rate": 1.9031748497064596e-05,
      "loss": 2.567,
      "step": 11276
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9790034294128418,
      "learning_rate": 1.9031571741991205e-05,
      "loss": 2.3972,
      "step": 11277
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9950592517852783,
      "learning_rate": 1.9031394971606848e-05,
      "loss": 2.4106,
      "step": 11278
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9601832032203674,
      "learning_rate": 1.9031218185911827e-05,
      "loss": 2.65,
      "step": 11279
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.089911937713623,
      "learning_rate": 1.9031041384906435e-05,
      "loss": 2.3582,
      "step": 11280
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.947752058506012,
      "learning_rate": 1.9030864568590974e-05,
      "loss": 2.8046,
      "step": 11281
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.040553092956543,
      "learning_rate": 1.9030687736965747e-05,
      "loss": 2.4376,
      "step": 11282
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.3057918548583984,
      "learning_rate": 1.903051089003105e-05,
      "loss": 2.6475,
      "step": 11283
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.984368085861206,
      "learning_rate": 1.9030334027787186e-05,
      "loss": 2.9404,
      "step": 11284
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.055014967918396,
      "learning_rate": 1.9030157150234455e-05,
      "loss": 2.6748,
      "step": 11285
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0960524082183838,
      "learning_rate": 1.902998025737315e-05,
      "loss": 2.7091,
      "step": 11286
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0456147193908691,
      "learning_rate": 1.902980334920358e-05,
      "loss": 2.7432,
      "step": 11287
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0047792196273804,
      "learning_rate": 1.9029626425726036e-05,
      "loss": 2.4249,
      "step": 11288
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0775651931762695,
      "learning_rate": 1.902944948694083e-05,
      "loss": 2.4152,
      "step": 11289
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.1578867435455322,
      "learning_rate": 1.902927253284825e-05,
      "loss": 2.609,
      "step": 11290
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0183005332946777,
      "learning_rate": 1.9029095563448604e-05,
      "loss": 2.623,
      "step": 11291
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9428096413612366,
      "learning_rate": 1.9028918578742182e-05,
      "loss": 2.4015,
      "step": 11292
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0377070903778076,
      "learning_rate": 1.9028741578729297e-05,
      "loss": 2.6422,
      "step": 11293
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.982575535774231,
      "learning_rate": 1.902856456341024e-05,
      "loss": 2.7557,
      "step": 11294
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9658081531524658,
      "learning_rate": 1.9028387532785316e-05,
      "loss": 2.7412,
      "step": 11295
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.1495131254196167,
      "learning_rate": 1.9028210486854822e-05,
      "loss": 2.6739,
      "step": 11296
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9506412148475647,
      "learning_rate": 1.9028033425619062e-05,
      "loss": 2.4662,
      "step": 11297
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9043307304382324,
      "learning_rate": 1.9027856349078328e-05,
      "loss": 2.4012,
      "step": 11298
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0672894716262817,
      "learning_rate": 1.902767925723293e-05,
      "loss": 2.5241,
      "step": 11299
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0090125799179077,
      "learning_rate": 1.902750215008316e-05,
      "loss": 2.6621,
      "step": 11300
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.010788083076477,
      "learning_rate": 1.9027325027629325e-05,
      "loss": 2.5153,
      "step": 11301
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0150728225708008,
      "learning_rate": 1.902714788987172e-05,
      "loss": 2.5567,
      "step": 11302
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0579668283462524,
      "learning_rate": 1.902697073681065e-05,
      "loss": 2.6623,
      "step": 11303
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.1114031076431274,
      "learning_rate": 1.9026793568446413e-05,
      "loss": 2.499,
      "step": 11304
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9284084439277649,
      "learning_rate": 1.9026616384779308e-05,
      "loss": 2.5441,
      "step": 11305
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9585529565811157,
      "learning_rate": 1.9026439185809638e-05,
      "loss": 2.652,
      "step": 11306
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9258702397346497,
      "learning_rate": 1.90262619715377e-05,
      "loss": 2.4377,
      "step": 11307
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9996799826622009,
      "learning_rate": 1.9026084741963796e-05,
      "loss": 2.6247,
      "step": 11308
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.2296303510665894,
      "learning_rate": 1.9025907497088227e-05,
      "loss": 2.4579,
      "step": 11309
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9320798516273499,
      "learning_rate": 1.9025730236911295e-05,
      "loss": 2.5163,
      "step": 11310
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9853265881538391,
      "learning_rate": 1.90255529614333e-05,
      "loss": 2.3988,
      "step": 11311
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9500340819358826,
      "learning_rate": 1.902537567065454e-05,
      "loss": 2.4297,
      "step": 11312
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.008703589439392,
      "learning_rate": 1.9025198364575317e-05,
      "loss": 2.5044,
      "step": 11313
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.1103726625442505,
      "learning_rate": 1.902502104319593e-05,
      "loss": 2.8641,
      "step": 11314
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0084365606307983,
      "learning_rate": 1.9024843706516682e-05,
      "loss": 2.4254,
      "step": 11315
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0583178997039795,
      "learning_rate": 1.9024666354537876e-05,
      "loss": 2.4475,
      "step": 11316
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.149012565612793,
      "learning_rate": 1.9024488987259805e-05,
      "loss": 2.6999,
      "step": 11317
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0685259103775024,
      "learning_rate": 1.9024311604682774e-05,
      "loss": 2.6022,
      "step": 11318
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0696061849594116,
      "learning_rate": 1.9024134206807088e-05,
      "loss": 2.3724,
      "step": 11319
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9282938241958618,
      "learning_rate": 1.902395679363304e-05,
      "loss": 2.3145,
      "step": 11320
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0218254327774048,
      "learning_rate": 1.9023779365160937e-05,
      "loss": 2.4546,
      "step": 11321
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.2251906394958496,
      "learning_rate": 1.9023601921391078e-05,
      "loss": 2.5,
      "step": 11322
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.424622654914856,
      "learning_rate": 1.902342446232376e-05,
      "loss": 2.3605,
      "step": 11323
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.10542631149292,
      "learning_rate": 1.9023246987959284e-05,
      "loss": 2.5381,
      "step": 11324
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9701945185661316,
      "learning_rate": 1.902306949829796e-05,
      "loss": 2.4835,
      "step": 11325
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9996382594108582,
      "learning_rate": 1.9022891993340077e-05,
      "loss": 2.4277,
      "step": 11326
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0160799026489258,
      "learning_rate": 1.9022714473085944e-05,
      "loss": 2.3256,
      "step": 11327
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.1333982944488525,
      "learning_rate": 1.902253693753586e-05,
      "loss": 2.5859,
      "step": 11328
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0362690687179565,
      "learning_rate": 1.9022359386690128e-05,
      "loss": 2.7191,
      "step": 11329
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.1692935228347778,
      "learning_rate": 1.9022181820549043e-05,
      "loss": 2.5377,
      "step": 11330
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.99507737159729,
      "learning_rate": 1.902200423911291e-05,
      "loss": 2.555,
      "step": 11331
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.189136266708374,
      "learning_rate": 1.902182664238203e-05,
      "loss": 2.5757,
      "step": 11332
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.974652111530304,
      "learning_rate": 1.9021649030356704e-05,
      "loss": 2.5018,
      "step": 11333
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0335922241210938,
      "learning_rate": 1.9021471403037234e-05,
      "loss": 2.4667,
      "step": 11334
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9775955677032471,
      "learning_rate": 1.9021293760423914e-05,
      "loss": 2.492,
      "step": 11335
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0432413816452026,
      "learning_rate": 1.9021116102517055e-05,
      "loss": 2.7643,
      "step": 11336
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9485159516334534,
      "learning_rate": 1.9020938429316955e-05,
      "loss": 2.4754,
      "step": 11337
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.019782543182373,
      "learning_rate": 1.9020760740823913e-05,
      "loss": 2.4628,
      "step": 11338
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9513415694236755,
      "learning_rate": 1.902058303703823e-05,
      "loss": 2.5345,
      "step": 11339
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0742007493972778,
      "learning_rate": 1.9020405317960212e-05,
      "loss": 2.7058,
      "step": 11340
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0735050439834595,
      "learning_rate": 1.9020227583590154e-05,
      "loss": 2.7321,
      "step": 11341
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0557061433792114,
      "learning_rate": 1.902004983392836e-05,
      "loss": 2.6317,
      "step": 11342
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9958510994911194,
      "learning_rate": 1.9019872068975137e-05,
      "loss": 2.6379,
      "step": 11343
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0263373851776123,
      "learning_rate": 1.9019694288730774e-05,
      "loss": 2.6521,
      "step": 11344
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9168546199798584,
      "learning_rate": 1.9019516493195586e-05,
      "loss": 2.7438,
      "step": 11345
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9855021238327026,
      "learning_rate": 1.9019338682369864e-05,
      "loss": 2.3474,
      "step": 11346
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0316743850708008,
      "learning_rate": 1.9019160856253914e-05,
      "loss": 2.4595,
      "step": 11347
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.015116572380066,
      "learning_rate": 1.9018983014848036e-05,
      "loss": 2.5408,
      "step": 11348
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0125806331634521,
      "learning_rate": 1.9018805158152532e-05,
      "loss": 2.5197,
      "step": 11349
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0349887609481812,
      "learning_rate": 1.9018627286167704e-05,
      "loss": 2.5882,
      "step": 11350
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0516958236694336,
      "learning_rate": 1.9018449398893857e-05,
      "loss": 2.4683,
      "step": 11351
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.944969892501831,
      "learning_rate": 1.9018271496331286e-05,
      "loss": 2.6335,
      "step": 11352
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.8974602222442627,
      "learning_rate": 1.9018093578480295e-05,
      "loss": 2.5763,
      "step": 11353
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.066933274269104,
      "learning_rate": 1.9017915645341186e-05,
      "loss": 2.7958,
      "step": 11354
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.01987624168396,
      "learning_rate": 1.9017737696914264e-05,
      "loss": 2.6143,
      "step": 11355
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0410596132278442,
      "learning_rate": 1.901755973319982e-05,
      "loss": 2.7491,
      "step": 11356
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9416669011116028,
      "learning_rate": 1.901738175419817e-05,
      "loss": 2.588,
      "step": 11357
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.8763383626937866,
      "learning_rate": 1.9017203759909608e-05,
      "loss": 2.6929,
      "step": 11358
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.1308724880218506,
      "learning_rate": 1.9017025750334436e-05,
      "loss": 2.4111,
      "step": 11359
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.166473627090454,
      "learning_rate": 1.9016847725472954e-05,
      "loss": 2.6204,
      "step": 11360
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9900059700012207,
      "learning_rate": 1.901666968532547e-05,
      "loss": 2.345,
      "step": 11361
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9534971714019775,
      "learning_rate": 1.901649162989228e-05,
      "loss": 2.4053,
      "step": 11362
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0405553579330444,
      "learning_rate": 1.9016313559173688e-05,
      "loss": 2.3996,
      "step": 11363
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0156866312026978,
      "learning_rate": 1.9016135473169994e-05,
      "loss": 2.452,
      "step": 11364
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0656065940856934,
      "learning_rate": 1.9015957371881502e-05,
      "loss": 2.5884,
      "step": 11365
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.121291160583496,
      "learning_rate": 1.9015779255308518e-05,
      "loss": 2.534,
      "step": 11366
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.080381989479065,
      "learning_rate": 1.9015601123451336e-05,
      "loss": 2.6943,
      "step": 11367
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.1559584140777588,
      "learning_rate": 1.901542297631026e-05,
      "loss": 2.5218,
      "step": 11368
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0202572345733643,
      "learning_rate": 1.9015244813885594e-05,
      "loss": 2.6238,
      "step": 11369
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9609518051147461,
      "learning_rate": 1.9015066636177645e-05,
      "loss": 2.5339,
      "step": 11370
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0726398229599,
      "learning_rate": 1.9014888443186703e-05,
      "loss": 2.2848,
      "step": 11371
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.1503986120224,
      "learning_rate": 1.901471023491308e-05,
      "loss": 2.6129,
      "step": 11372
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9729711413383484,
      "learning_rate": 1.9014532011357072e-05,
      "loss": 2.5666,
      "step": 11373
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0984070301055908,
      "learning_rate": 1.9014353772518985e-05,
      "loss": 2.5883,
      "step": 11374
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.1378445625305176,
      "learning_rate": 1.9014175518399122e-05,
      "loss": 2.697,
      "step": 11375
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0146970748901367,
      "learning_rate": 1.9013997248997783e-05,
      "loss": 2.3607,
      "step": 11376
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9300671219825745,
      "learning_rate": 1.9013818964315267e-05,
      "loss": 2.3976,
      "step": 11377
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0944905281066895,
      "learning_rate": 1.901364066435188e-05,
      "loss": 2.5418,
      "step": 11378
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0010441541671753,
      "learning_rate": 1.901346234910793e-05,
      "loss": 2.5494,
      "step": 11379
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0804754495620728,
      "learning_rate": 1.9013284018583712e-05,
      "loss": 2.568,
      "step": 11380
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.1423442363739014,
      "learning_rate": 1.9013105672779523e-05,
      "loss": 2.4527,
      "step": 11381
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.027160882949829,
      "learning_rate": 1.901292731169568e-05,
      "loss": 2.6448,
      "step": 11382
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9784604907035828,
      "learning_rate": 1.9012748935332473e-05,
      "loss": 2.6428,
      "step": 11383
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0824612379074097,
      "learning_rate": 1.9012570543690208e-05,
      "loss": 2.5404,
      "step": 11384
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9846654534339905,
      "learning_rate": 1.901239213676919e-05,
      "loss": 2.4591,
      "step": 11385
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0202014446258545,
      "learning_rate": 1.901221371456972e-05,
      "loss": 2.6017,
      "step": 11386
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.089404821395874,
      "learning_rate": 1.9012035277092096e-05,
      "loss": 2.6377,
      "step": 11387
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9826940298080444,
      "learning_rate": 1.901185682433663e-05,
      "loss": 2.6872,
      "step": 11388
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0473003387451172,
      "learning_rate": 1.9011678356303616e-05,
      "loss": 2.4502,
      "step": 11389
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0050922632217407,
      "learning_rate": 1.9011499872993358e-05,
      "loss": 2.5941,
      "step": 11390
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0550155639648438,
      "learning_rate": 1.9011321374406164e-05,
      "loss": 2.6566,
      "step": 11391
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0038440227508545,
      "learning_rate": 1.9011142860542333e-05,
      "loss": 2.4878,
      "step": 11392
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9513740539550781,
      "learning_rate": 1.9010964331402165e-05,
      "loss": 2.5103,
      "step": 11393
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0049818754196167,
      "learning_rate": 1.9010785786985965e-05,
      "loss": 2.515,
      "step": 11394
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.019511103630066,
      "learning_rate": 1.9010607227294037e-05,
      "loss": 2.5946,
      "step": 11395
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9988080859184265,
      "learning_rate": 1.9010428652326683e-05,
      "loss": 2.5085,
      "step": 11396
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0185267925262451,
      "learning_rate": 1.9010250062084205e-05,
      "loss": 2.6434,
      "step": 11397
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0312385559082031,
      "learning_rate": 1.9010071456566907e-05,
      "loss": 2.5724,
      "step": 11398
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0565894842147827,
      "learning_rate": 1.9009892835775087e-05,
      "loss": 2.4877,
      "step": 11399
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.038583517074585,
      "learning_rate": 1.9009714199709054e-05,
      "loss": 2.5326,
      "step": 11400
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0458333492279053,
      "learning_rate": 1.900953554836911e-05,
      "loss": 2.5496,
      "step": 11401
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9524747133255005,
      "learning_rate": 1.9009356881755553e-05,
      "loss": 2.507,
      "step": 11402
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.160775065422058,
      "learning_rate": 1.900917819986869e-05,
      "loss": 2.4534,
      "step": 11403
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0256922245025635,
      "learning_rate": 1.9008999502708824e-05,
      "loss": 2.5396,
      "step": 11404
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0249067544937134,
      "learning_rate": 1.9008820790276256e-05,
      "loss": 2.7076,
      "step": 11405
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0536977052688599,
      "learning_rate": 1.900864206257129e-05,
      "loss": 2.4669,
      "step": 11406
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9854414463043213,
      "learning_rate": 1.9008463319594233e-05,
      "loss": 2.1339,
      "step": 11407
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.1676069498062134,
      "learning_rate": 1.9008284561345382e-05,
      "loss": 2.3899,
      "step": 11408
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9704493880271912,
      "learning_rate": 1.9008105787825038e-05,
      "loss": 2.5422,
      "step": 11409
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.1189005374908447,
      "learning_rate": 1.9007926999033514e-05,
      "loss": 2.5342,
      "step": 11410
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.2111021280288696,
      "learning_rate": 1.9007748194971103e-05,
      "loss": 2.6152,
      "step": 11411
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.084958553314209,
      "learning_rate": 1.9007569375638115e-05,
      "loss": 2.5308,
      "step": 11412
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9277654886245728,
      "learning_rate": 1.9007390541034848e-05,
      "loss": 2.5586,
      "step": 11413
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0857616662979126,
      "learning_rate": 1.900721169116161e-05,
      "loss": 2.8157,
      "step": 11414
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9824880361557007,
      "learning_rate": 1.90070328260187e-05,
      "loss": 2.5921,
      "step": 11415
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0120692253112793,
      "learning_rate": 1.9006853945606424e-05,
      "loss": 2.8066,
      "step": 11416
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9868975877761841,
      "learning_rate": 1.9006675049925085e-05,
      "loss": 2.3697,
      "step": 11417
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0998358726501465,
      "learning_rate": 1.9006496138974983e-05,
      "loss": 2.6122,
      "step": 11418
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0537081956863403,
      "learning_rate": 1.9006317212756426e-05,
      "loss": 2.6376,
      "step": 11419
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9507600665092468,
      "learning_rate": 1.9006138271269713e-05,
      "loss": 2.5652,
      "step": 11420
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.00618314743042,
      "learning_rate": 1.9005959314515153e-05,
      "loss": 2.3915,
      "step": 11421
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.063987374305725,
      "learning_rate": 1.9005780342493045e-05,
      "loss": 2.4461,
      "step": 11422
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0581904649734497,
      "learning_rate": 1.9005601355203693e-05,
      "loss": 2.7886,
      "step": 11423
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0197558403015137,
      "learning_rate": 1.90054223526474e-05,
      "loss": 2.4988,
      "step": 11424
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0987051725387573,
      "learning_rate": 1.900524333482447e-05,
      "loss": 2.7556,
      "step": 11425
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.016493558883667,
      "learning_rate": 1.9005064301735206e-05,
      "loss": 2.5833,
      "step": 11426
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0771880149841309,
      "learning_rate": 1.9004885253379915e-05,
      "loss": 2.5972,
      "step": 11427
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0271142721176147,
      "learning_rate": 1.9004706189758898e-05,
      "loss": 2.7288,
      "step": 11428
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0230131149291992,
      "learning_rate": 1.9004527110872457e-05,
      "loss": 2.6421,
      "step": 11429
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9956290125846863,
      "learning_rate": 1.9004348016720896e-05,
      "loss": 2.5372,
      "step": 11430
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0659170150756836,
      "learning_rate": 1.900416890730452e-05,
      "loss": 2.537,
      "step": 11431
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0440266132354736,
      "learning_rate": 1.9003989782623633e-05,
      "loss": 2.5736,
      "step": 11432
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0416523218154907,
      "learning_rate": 1.9003810642678537e-05,
      "loss": 2.533,
      "step": 11433
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0307537317276,
      "learning_rate": 1.900363148746954e-05,
      "loss": 2.4614,
      "step": 11434
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0123569965362549,
      "learning_rate": 1.900345231699694e-05,
      "loss": 2.4811,
      "step": 11435
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9719909429550171,
      "learning_rate": 1.900327313126104e-05,
      "loss": 2.5061,
      "step": 11436
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0139739513397217,
      "learning_rate": 1.900309393026215e-05,
      "loss": 2.6442,
      "step": 11437
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.1278468370437622,
      "learning_rate": 1.900291471400057e-05,
      "loss": 2.4898,
      "step": 11438
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.098023772239685,
      "learning_rate": 1.9002735482476603e-05,
      "loss": 2.6255,
      "step": 11439
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0274643898010254,
      "learning_rate": 1.9002556235690556e-05,
      "loss": 2.5131,
      "step": 11440
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9339815378189087,
      "learning_rate": 1.900237697364273e-05,
      "loss": 2.4422,
      "step": 11441
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9473863244056702,
      "learning_rate": 1.9002197696333434e-05,
      "loss": 2.3886,
      "step": 11442
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9469642639160156,
      "learning_rate": 1.9002018403762962e-05,
      "loss": 2.3216,
      "step": 11443
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.1614543199539185,
      "learning_rate": 1.9001839095931626e-05,
      "loss": 2.6311,
      "step": 11444
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9403109550476074,
      "learning_rate": 1.9001659772839728e-05,
      "loss": 2.4257,
      "step": 11445
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9580495357513428,
      "learning_rate": 1.9001480434487573e-05,
      "loss": 2.4922,
      "step": 11446
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.109364628791809,
      "learning_rate": 1.9001301080875465e-05,
      "loss": 2.4637,
      "step": 11447
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.084971308708191,
      "learning_rate": 1.9001121712003706e-05,
      "loss": 2.488,
      "step": 11448
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0679771900177002,
      "learning_rate": 1.9000942327872598e-05,
      "loss": 2.5895,
      "step": 11449
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.1608983278274536,
      "learning_rate": 1.9000762928482452e-05,
      "loss": 2.4722,
      "step": 11450
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0002415180206299,
      "learning_rate": 1.9000583513833563e-05,
      "loss": 2.2264,
      "step": 11451
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9970374703407288,
      "learning_rate": 1.9000404083926247e-05,
      "loss": 2.3793,
      "step": 11452
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.244408369064331,
      "learning_rate": 1.9000224638760797e-05,
      "loss": 2.5582,
      "step": 11453
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9717860221862793,
      "learning_rate": 1.9000045178337523e-05,
      "loss": 2.47,
      "step": 11454
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0036088228225708,
      "learning_rate": 1.899986570265673e-05,
      "loss": 2.6189,
      "step": 11455
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0225355625152588,
      "learning_rate": 1.8999686211718716e-05,
      "loss": 2.5557,
      "step": 11456
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.1923426389694214,
      "learning_rate": 1.8999506705523794e-05,
      "loss": 2.7557,
      "step": 11457
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.143149495124817,
      "learning_rate": 1.899932718407226e-05,
      "loss": 2.4239,
      "step": 11458
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.003784418106079,
      "learning_rate": 1.8999147647364426e-05,
      "loss": 2.7329,
      "step": 11459
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.10017991065979,
      "learning_rate": 1.899896809540059e-05,
      "loss": 2.5645,
      "step": 11460
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9151410460472107,
      "learning_rate": 1.899878852818106e-05,
      "loss": 2.424,
      "step": 11461
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.2729912996292114,
      "learning_rate": 1.899860894570614e-05,
      "loss": 2.6611,
      "step": 11462
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.055842638015747,
      "learning_rate": 1.8998429347976134e-05,
      "loss": 2.5713,
      "step": 11463
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.021353840827942,
      "learning_rate": 1.8998249734991347e-05,
      "loss": 2.5878,
      "step": 11464
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0719707012176514,
      "learning_rate": 1.899807010675208e-05,
      "loss": 2.5181,
      "step": 11465
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0285283327102661,
      "learning_rate": 1.8997890463258642e-05,
      "loss": 2.4768,
      "step": 11466
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0699388980865479,
      "learning_rate": 1.8997710804511334e-05,
      "loss": 2.5681,
      "step": 11467
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.1493843793869019,
      "learning_rate": 1.8997531130510464e-05,
      "loss": 2.5871,
      "step": 11468
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.957094132900238,
      "learning_rate": 1.8997351441256337e-05,
      "loss": 2.6201,
      "step": 11469
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9915432929992676,
      "learning_rate": 1.899717173674925e-05,
      "loss": 2.4539,
      "step": 11470
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.006435751914978,
      "learning_rate": 1.899699201698952e-05,
      "loss": 2.5184,
      "step": 11471
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0062675476074219,
      "learning_rate": 1.899681228197744e-05,
      "loss": 2.7081,
      "step": 11472
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0518404245376587,
      "learning_rate": 1.8996632531713323e-05,
      "loss": 2.7082,
      "step": 11473
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9503419399261475,
      "learning_rate": 1.8996452766197468e-05,
      "loss": 2.5209,
      "step": 11474
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9846940636634827,
      "learning_rate": 1.8996272985430185e-05,
      "loss": 2.6579,
      "step": 11475
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9517724514007568,
      "learning_rate": 1.8996093189411773e-05,
      "loss": 2.5737,
      "step": 11476
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0180240869522095,
      "learning_rate": 1.899591337814254e-05,
      "loss": 2.6064,
      "step": 11477
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0388470888137817,
      "learning_rate": 1.8995733551622792e-05,
      "loss": 2.5538,
      "step": 11478
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0822763442993164,
      "learning_rate": 1.8995553709852834e-05,
      "loss": 2.7965,
      "step": 11479
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.2568352222442627,
      "learning_rate": 1.8995373852832965e-05,
      "loss": 2.6643,
      "step": 11480
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0455576181411743,
      "learning_rate": 1.89951939805635e-05,
      "loss": 2.5415,
      "step": 11481
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.080845832824707,
      "learning_rate": 1.899501409304473e-05,
      "loss": 2.5227,
      "step": 11482
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.1184529066085815,
      "learning_rate": 1.8994834190276973e-05,
      "loss": 2.7986,
      "step": 11483
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.1279277801513672,
      "learning_rate": 1.899465427226053e-05,
      "loss": 2.6048,
      "step": 11484
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0909148454666138,
      "learning_rate": 1.8994474338995706e-05,
      "loss": 2.7201,
      "step": 11485
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9415764212608337,
      "learning_rate": 1.89942943904828e-05,
      "loss": 2.669,
      "step": 11486
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.106383204460144,
      "learning_rate": 1.8994114426722124e-05,
      "loss": 2.704,
      "step": 11487
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0990647077560425,
      "learning_rate": 1.8993934447713983e-05,
      "loss": 2.6842,
      "step": 11488
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.098903775215149,
      "learning_rate": 1.899375445345868e-05,
      "loss": 2.3875,
      "step": 11489
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0089235305786133,
      "learning_rate": 1.8993574443956523e-05,
      "loss": 2.5695,
      "step": 11490
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0156641006469727,
      "learning_rate": 1.899339441920781e-05,
      "loss": 2.5307,
      "step": 11491
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0255810022354126,
      "learning_rate": 1.8993214379212854e-05,
      "loss": 2.5869,
      "step": 11492
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.051686406135559,
      "learning_rate": 1.8993034323971954e-05,
      "loss": 2.5731,
      "step": 11493
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.1073535680770874,
      "learning_rate": 1.899285425348542e-05,
      "loss": 2.6106,
      "step": 11494
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0090068578720093,
      "learning_rate": 1.8992674167753558e-05,
      "loss": 2.3492,
      "step": 11495
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0404369831085205,
      "learning_rate": 1.899249406677667e-05,
      "loss": 2.5697,
      "step": 11496
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0529804229736328,
      "learning_rate": 1.899231395055506e-05,
      "loss": 2.7198,
      "step": 11497
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0442912578582764,
      "learning_rate": 1.8992133819089036e-05,
      "loss": 2.6426,
      "step": 11498
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.8961951732635498,
      "learning_rate": 1.8991953672378906e-05,
      "loss": 2.4397,
      "step": 11499
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0404136180877686,
      "learning_rate": 1.8991773510424967e-05,
      "loss": 2.6288,
      "step": 11500
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.1410943269729614,
      "learning_rate": 1.8991593333227532e-05,
      "loss": 2.6055,
      "step": 11501
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0189162492752075,
      "learning_rate": 1.8991413140786906e-05,
      "loss": 2.6321,
      "step": 11502
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0743706226348877,
      "learning_rate": 1.8991232933103393e-05,
      "loss": 2.6924,
      "step": 11503
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0190733671188354,
      "learning_rate": 1.89910527101773e-05,
      "loss": 2.5765,
      "step": 11504
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.1912189722061157,
      "learning_rate": 1.8990872472008925e-05,
      "loss": 2.5288,
      "step": 11505
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.092574954032898,
      "learning_rate": 1.8990692218598583e-05,
      "loss": 2.5412,
      "step": 11506
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0045744180679321,
      "learning_rate": 1.8990511949946573e-05,
      "loss": 2.5948,
      "step": 11507
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0807912349700928,
      "learning_rate": 1.8990331666053206e-05,
      "loss": 2.6677,
      "step": 11508
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.1081668138504028,
      "learning_rate": 1.899015136691878e-05,
      "loss": 2.8091,
      "step": 11509
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.084177017211914,
      "learning_rate": 1.8989971052543615e-05,
      "loss": 2.3985,
      "step": 11510
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0579489469528198,
      "learning_rate": 1.8989790722928002e-05,
      "loss": 2.507,
      "step": 11511
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9818737506866455,
      "learning_rate": 1.898961037807225e-05,
      "loss": 2.7068,
      "step": 11512
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.1965773105621338,
      "learning_rate": 1.8989430017976672e-05,
      "loss": 2.736,
      "step": 11513
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0535507202148438,
      "learning_rate": 1.8989249642641564e-05,
      "loss": 2.6291,
      "step": 11514
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9116702675819397,
      "learning_rate": 1.8989069252067236e-05,
      "loss": 2.4944,
      "step": 11515
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.114654302597046,
      "learning_rate": 1.8988888846254e-05,
      "loss": 2.4749,
      "step": 11516
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9917629361152649,
      "learning_rate": 1.8988708425202152e-05,
      "loss": 2.6382,
      "step": 11517
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9254369735717773,
      "learning_rate": 1.8988527988912003e-05,
      "loss": 2.4133,
      "step": 11518
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.096872329711914,
      "learning_rate": 1.8988347537383856e-05,
      "loss": 2.5908,
      "step": 11519
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0424165725708008,
      "learning_rate": 1.8988167070618023e-05,
      "loss": 2.4872,
      "step": 11520
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.186586618423462,
      "learning_rate": 1.89879865886148e-05,
      "loss": 2.5653,
      "step": 11521
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.1646636724472046,
      "learning_rate": 1.8987806091374504e-05,
      "loss": 2.5015,
      "step": 11522
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0187562704086304,
      "learning_rate": 1.898762557889743e-05,
      "loss": 2.3855,
      "step": 11523
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9365755319595337,
      "learning_rate": 1.8987445051183894e-05,
      "loss": 2.5129,
      "step": 11524
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.001007318496704,
      "learning_rate": 1.8987264508234197e-05,
      "loss": 2.3709,
      "step": 11525
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.077709674835205,
      "learning_rate": 1.8987083950048647e-05,
      "loss": 2.7197,
      "step": 11526
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0000076293945312,
      "learning_rate": 1.8986903376627546e-05,
      "loss": 2.402,
      "step": 11527
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9623270630836487,
      "learning_rate": 1.8986722787971202e-05,
      "loss": 2.4767,
      "step": 11528
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.004400372505188,
      "learning_rate": 1.8986542184079924e-05,
      "loss": 2.491,
      "step": 11529
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.1708180904388428,
      "learning_rate": 1.8986361564954017e-05,
      "loss": 2.5149,
      "step": 11530
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.1141282320022583,
      "learning_rate": 1.8986180930593784e-05,
      "loss": 2.718,
      "step": 11531
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9998393654823303,
      "learning_rate": 1.8986000280999535e-05,
      "loss": 2.533,
      "step": 11532
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0746463537216187,
      "learning_rate": 1.8985819616171575e-05,
      "loss": 2.5689,
      "step": 11533
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.164400339126587,
      "learning_rate": 1.8985638936110212e-05,
      "loss": 2.5044,
      "step": 11534
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0602794885635376,
      "learning_rate": 1.898545824081575e-05,
      "loss": 2.7031,
      "step": 11535
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0422377586364746,
      "learning_rate": 1.8985277530288497e-05,
      "loss": 2.5492,
      "step": 11536
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9192249178886414,
      "learning_rate": 1.8985096804528754e-05,
      "loss": 2.257,
      "step": 11537
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0193480253219604,
      "learning_rate": 1.8984916063536835e-05,
      "loss": 2.527,
      "step": 11538
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9370668530464172,
      "learning_rate": 1.8984735307313044e-05,
      "loss": 2.6368,
      "step": 11539
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0760654211044312,
      "learning_rate": 1.8984554535857683e-05,
      "loss": 2.6719,
      "step": 11540
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0640714168548584,
      "learning_rate": 1.8984373749171064e-05,
      "loss": 2.6867,
      "step": 11541
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0285078287124634,
      "learning_rate": 1.8984192947253488e-05,
      "loss": 2.648,
      "step": 11542
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0526320934295654,
      "learning_rate": 1.898401213010527e-05,
      "loss": 2.6148,
      "step": 11543
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0375066995620728,
      "learning_rate": 1.8983831297726708e-05,
      "loss": 2.5196,
      "step": 11544
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9146267771720886,
      "learning_rate": 1.8983650450118115e-05,
      "loss": 2.4043,
      "step": 11545
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0549167394638062,
      "learning_rate": 1.898346958727979e-05,
      "loss": 2.5365,
      "step": 11546
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.991156280040741,
      "learning_rate": 1.8983288709212046e-05,
      "loss": 2.4283,
      "step": 11547
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.1639037132263184,
      "learning_rate": 1.898310781591519e-05,
      "loss": 2.6408,
      "step": 11548
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.1033443212509155,
      "learning_rate": 1.8982926907389525e-05,
      "loss": 2.4777,
      "step": 11549
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9972312450408936,
      "learning_rate": 1.8982745983635357e-05,
      "loss": 2.3694,
      "step": 11550
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.2775771617889404,
      "learning_rate": 1.8982565044653e-05,
      "loss": 2.8539,
      "step": 11551
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9419966340065002,
      "learning_rate": 1.8982384090442752e-05,
      "loss": 2.4921,
      "step": 11552
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0084383487701416,
      "learning_rate": 1.8982203121004922e-05,
      "loss": 2.5568,
      "step": 11553
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9472382664680481,
      "learning_rate": 1.898202213633982e-05,
      "loss": 2.4934,
      "step": 11554
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0150424242019653,
      "learning_rate": 1.8981841136447752e-05,
      "loss": 2.5991,
      "step": 11555
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0380617380142212,
      "learning_rate": 1.8981660121329022e-05,
      "loss": 2.4302,
      "step": 11556
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9739757776260376,
      "learning_rate": 1.898147909098394e-05,
      "loss": 2.399,
      "step": 11557
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0628044605255127,
      "learning_rate": 1.8981298045412813e-05,
      "loss": 2.8354,
      "step": 11558
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.219746470451355,
      "learning_rate": 1.8981116984615944e-05,
      "loss": 2.3917,
      "step": 11559
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.1066216230392456,
      "learning_rate": 1.8980935908593644e-05,
      "loss": 2.7912,
      "step": 11560
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0017321109771729,
      "learning_rate": 1.8980754817346215e-05,
      "loss": 2.555,
      "step": 11561
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0080459117889404,
      "learning_rate": 1.8980573710873972e-05,
      "loss": 2.5004,
      "step": 11562
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0037564039230347,
      "learning_rate": 1.8980392589177216e-05,
      "loss": 2.5284,
      "step": 11563
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5704339742660522,
      "learning_rate": 1.8980211452256252e-05,
      "loss": 2.6896,
      "step": 11564
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9639081358909607,
      "learning_rate": 1.8980030300111396e-05,
      "loss": 2.6062,
      "step": 11565
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9778286814689636,
      "learning_rate": 1.8979849132742946e-05,
      "loss": 2.6424,
      "step": 11566
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9884037971496582,
      "learning_rate": 1.8979667950151216e-05,
      "loss": 2.7787,
      "step": 11567
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9670284390449524,
      "learning_rate": 1.897948675233651e-05,
      "loss": 2.565,
      "step": 11568
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0455832481384277,
      "learning_rate": 1.8979305539299132e-05,
      "loss": 2.4163,
      "step": 11569
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.001409888267517,
      "learning_rate": 1.8979124311039394e-05,
      "loss": 2.5905,
      "step": 11570
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.1593941450119019,
      "learning_rate": 1.89789430675576e-05,
      "loss": 2.2802,
      "step": 11571
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9714367389678955,
      "learning_rate": 1.897876180885406e-05,
      "loss": 2.4682,
      "step": 11572
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9911375641822815,
      "learning_rate": 1.8978580534929082e-05,
      "loss": 2.4997,
      "step": 11573
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9596070647239685,
      "learning_rate": 1.897839924578297e-05,
      "loss": 2.4718,
      "step": 11574
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.1446447372436523,
      "learning_rate": 1.8978217941416033e-05,
      "loss": 2.6828,
      "step": 11575
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0276437997817993,
      "learning_rate": 1.8978036621828576e-05,
      "loss": 2.4794,
      "step": 11576
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.1969103813171387,
      "learning_rate": 1.8977855287020913e-05,
      "loss": 2.5896,
      "step": 11577
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.1474905014038086,
      "learning_rate": 1.897767393699334e-05,
      "loss": 2.5889,
      "step": 11578
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0124061107635498,
      "learning_rate": 1.897749257174618e-05,
      "loss": 2.5297,
      "step": 11579
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0783593654632568,
      "learning_rate": 1.8977311191279727e-05,
      "loss": 2.657,
      "step": 11580
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0729488134384155,
      "learning_rate": 1.897712979559429e-05,
      "loss": 2.6229,
      "step": 11581
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0806430578231812,
      "learning_rate": 1.8976948384690187e-05,
      "loss": 2.4857,
      "step": 11582
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9744282960891724,
      "learning_rate": 1.8976766958567713e-05,
      "loss": 2.6463,
      "step": 11583
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.1784483194351196,
      "learning_rate": 1.8976585517227184e-05,
      "loss": 2.6964,
      "step": 11584
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9961391091346741,
      "learning_rate": 1.8976404060668902e-05,
      "loss": 2.555,
      "step": 11585
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9003770351409912,
      "learning_rate": 1.897622258889318e-05,
      "loss": 2.7432,
      "step": 11586
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.1460880041122437,
      "learning_rate": 1.897604110190032e-05,
      "loss": 2.5656,
      "step": 11587
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0327730178833008,
      "learning_rate": 1.8975859599690633e-05,
      "loss": 2.5231,
      "step": 11588
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0161237716674805,
      "learning_rate": 1.8975678082264428e-05,
      "loss": 2.2921,
      "step": 11589
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.1469030380249023,
      "learning_rate": 1.897549654962201e-05,
      "loss": 2.5138,
      "step": 11590
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.96299809217453,
      "learning_rate": 1.8975315001763687e-05,
      "loss": 2.3998,
      "step": 11591
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0553615093231201,
      "learning_rate": 1.897513343868977e-05,
      "loss": 2.4818,
      "step": 11592
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9118088483810425,
      "learning_rate": 1.897495186040056e-05,
      "loss": 2.4162,
      "step": 11593
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9412862062454224,
      "learning_rate": 1.8974770266896373e-05,
      "loss": 2.5399,
      "step": 11594
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0190924406051636,
      "learning_rate": 1.8974588658177512e-05,
      "loss": 2.6345,
      "step": 11595
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9525578618049622,
      "learning_rate": 1.8974407034244283e-05,
      "loss": 2.577,
      "step": 11596
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9833930134773254,
      "learning_rate": 1.8974225395096997e-05,
      "loss": 2.5839,
      "step": 11597
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9750582575798035,
      "learning_rate": 1.8974043740735966e-05,
      "loss": 2.5438,
      "step": 11598
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9627408385276794,
      "learning_rate": 1.8973862071161492e-05,
      "loss": 2.5689,
      "step": 11599
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9798375368118286,
      "learning_rate": 1.8973680386373882e-05,
      "loss": 2.4016,
      "step": 11600
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0500009059906006,
      "learning_rate": 1.8973498686373445e-05,
      "loss": 2.3733,
      "step": 11601
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9381074905395508,
      "learning_rate": 1.8973316971160497e-05,
      "loss": 2.4117,
      "step": 11602
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0172494649887085,
      "learning_rate": 1.8973135240735334e-05,
      "loss": 2.48,
      "step": 11603
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9020715355873108,
      "learning_rate": 1.8972953495098273e-05,
      "loss": 2.4378,
      "step": 11604
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0032479763031006,
      "learning_rate": 1.8972771734249616e-05,
      "loss": 2.6528,
      "step": 11605
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.132933497428894,
      "learning_rate": 1.8972589958189676e-05,
      "loss": 2.4588,
      "step": 11606
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9479206204414368,
      "learning_rate": 1.897240816691876e-05,
      "loss": 2.596,
      "step": 11607
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0866076946258545,
      "learning_rate": 1.897222636043717e-05,
      "loss": 2.5409,
      "step": 11608
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0715558528900146,
      "learning_rate": 1.8972044538745224e-05,
      "loss": 2.5187,
      "step": 11609
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0747638940811157,
      "learning_rate": 1.8971862701843226e-05,
      "loss": 2.4394,
      "step": 11610
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9891786575317383,
      "learning_rate": 1.8971680849731484e-05,
      "loss": 2.6958,
      "step": 11611
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0052002668380737,
      "learning_rate": 1.8971498982410304e-05,
      "loss": 2.349,
      "step": 11612
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9457177519798279,
      "learning_rate": 1.8971317099879996e-05,
      "loss": 2.6572,
      "step": 11613
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0171998739242554,
      "learning_rate": 1.897113520214087e-05,
      "loss": 2.5466,
      "step": 11614
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0809096097946167,
      "learning_rate": 1.8970953289193237e-05,
      "loss": 2.4215,
      "step": 11615
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.1906721591949463,
      "learning_rate": 1.8970771361037398e-05,
      "loss": 2.7752,
      "step": 11616
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9851244688034058,
      "learning_rate": 1.8970589417673665e-05,
      "loss": 2.5931,
      "step": 11617
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.063611388206482,
      "learning_rate": 1.897040745910235e-05,
      "loss": 2.6446,
      "step": 11618
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.044035792350769,
      "learning_rate": 1.8970225485323755e-05,
      "loss": 2.524,
      "step": 11619
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0520007610321045,
      "learning_rate": 1.8970043496338194e-05,
      "loss": 2.4734,
      "step": 11620
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0571086406707764,
      "learning_rate": 1.896986149214597e-05,
      "loss": 2.6068,
      "step": 11621
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0193544626235962,
      "learning_rate": 1.8969679472747392e-05,
      "loss": 2.4057,
      "step": 11622
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0270529985427856,
      "learning_rate": 1.8969497438142778e-05,
      "loss": 2.5887,
      "step": 11623
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.01288640499115,
      "learning_rate": 1.8969315388332425e-05,
      "loss": 2.7889,
      "step": 11624
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0164560079574585,
      "learning_rate": 1.8969133323316645e-05,
      "loss": 2.5202,
      "step": 11625
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.020232915878296,
      "learning_rate": 1.8968951243095756e-05,
      "loss": 2.364,
      "step": 11626
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0327081680297852,
      "learning_rate": 1.8968769147670053e-05,
      "loss": 2.4951,
      "step": 11627
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9841262102127075,
      "learning_rate": 1.896858703703985e-05,
      "loss": 2.4344,
      "step": 11628
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.025921106338501,
      "learning_rate": 1.8968404911205458e-05,
      "loss": 2.7082,
      "step": 11629
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9587830305099487,
      "learning_rate": 1.8968222770167183e-05,
      "loss": 2.6497,
      "step": 11630
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0258005857467651,
      "learning_rate": 1.8968040613925335e-05,
      "loss": 2.5037,
      "step": 11631
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.937326967716217,
      "learning_rate": 1.896785844248022e-05,
      "loss": 2.6145,
      "step": 11632
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0777733325958252,
      "learning_rate": 1.8967676255832152e-05,
      "loss": 2.6264,
      "step": 11633
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.1509671211242676,
      "learning_rate": 1.8967494053981438e-05,
      "loss": 2.4791,
      "step": 11634
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.3808648586273193,
      "learning_rate": 1.8967311836928383e-05,
      "loss": 2.5392,
      "step": 11635
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.1218767166137695,
      "learning_rate": 1.8967129604673303e-05,
      "loss": 2.3882,
      "step": 11636
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0540064573287964,
      "learning_rate": 1.89669473572165e-05,
      "loss": 2.5797,
      "step": 11637
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0880693197250366,
      "learning_rate": 1.8966765094558286e-05,
      "loss": 2.5107,
      "step": 11638
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0908454656600952,
      "learning_rate": 1.896658281669897e-05,
      "loss": 2.5019,
      "step": 11639
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.1482194662094116,
      "learning_rate": 1.896640052363886e-05,
      "loss": 2.3625,
      "step": 11640
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.1351890563964844,
      "learning_rate": 1.896621821537827e-05,
      "loss": 2.6595,
      "step": 11641
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0203840732574463,
      "learning_rate": 1.89660358919175e-05,
      "loss": 2.2802,
      "step": 11642
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.022279977798462,
      "learning_rate": 1.8965853553256865e-05,
      "loss": 2.5686,
      "step": 11643
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0046015977859497,
      "learning_rate": 1.8965671199396673e-05,
      "loss": 2.6936,
      "step": 11644
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9799137711524963,
      "learning_rate": 1.8965488830337234e-05,
      "loss": 2.3378,
      "step": 11645
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.957703173160553,
      "learning_rate": 1.8965306446078856e-05,
      "loss": 2.6162,
      "step": 11646
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.024874210357666,
      "learning_rate": 1.8965124046621854e-05,
      "loss": 2.5713,
      "step": 11647
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0101113319396973,
      "learning_rate": 1.8964941631966526e-05,
      "loss": 2.6762,
      "step": 11648
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9805973172187805,
      "learning_rate": 1.8964759202113187e-05,
      "loss": 2.4946,
      "step": 11649
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0401514768600464,
      "learning_rate": 1.896457675706215e-05,
      "loss": 2.4385,
      "step": 11650
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0986398458480835,
      "learning_rate": 1.8964394296813715e-05,
      "loss": 2.4958,
      "step": 11651
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0363489389419556,
      "learning_rate": 1.8964211821368202e-05,
      "loss": 2.6445,
      "step": 11652
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0231906175613403,
      "learning_rate": 1.8964029330725914e-05,
      "loss": 2.3269,
      "step": 11653
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9755011796951294,
      "learning_rate": 1.896384682488716e-05,
      "loss": 2.3808,
      "step": 11654
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0169352293014526,
      "learning_rate": 1.8963664303852254e-05,
      "loss": 2.677,
      "step": 11655
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9593929052352905,
      "learning_rate": 1.89634817676215e-05,
      "loss": 2.6783,
      "step": 11656
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.2355488538742065,
      "learning_rate": 1.896329921619521e-05,
      "loss": 2.6235,
      "step": 11657
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.073803186416626,
      "learning_rate": 1.8963116649573696e-05,
      "loss": 2.3576,
      "step": 11658
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9202339053153992,
      "learning_rate": 1.8962934067757263e-05,
      "loss": 2.6348,
      "step": 11659
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0333621501922607,
      "learning_rate": 1.896275147074622e-05,
      "loss": 2.5997,
      "step": 11660
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0249651670455933,
      "learning_rate": 1.8962568858540882e-05,
      "loss": 2.6417,
      "step": 11661
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.053972601890564,
      "learning_rate": 1.8962386231141557e-05,
      "loss": 2.5208,
      "step": 11662
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0435527563095093,
      "learning_rate": 1.8962203588548552e-05,
      "loss": 2.4685,
      "step": 11663
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9764003753662109,
      "learning_rate": 1.8962020930762178e-05,
      "loss": 2.7671,
      "step": 11664
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.117335319519043,
      "learning_rate": 1.896183825778274e-05,
      "loss": 2.5176,
      "step": 11665
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9301450252532959,
      "learning_rate": 1.8961655569610557e-05,
      "loss": 2.4755,
      "step": 11666
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0824077129364014,
      "learning_rate": 1.8961472866245933e-05,
      "loss": 2.762,
      "step": 11667
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0298171043395996,
      "learning_rate": 1.896129014768918e-05,
      "loss": 2.6295,
      "step": 11668
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.8955643773078918,
      "learning_rate": 1.8961107413940603e-05,
      "loss": 2.5096,
      "step": 11669
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.009305715560913,
      "learning_rate": 1.8960924665000518e-05,
      "loss": 2.4036,
      "step": 11670
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0415666103363037,
      "learning_rate": 1.8960741900869228e-05,
      "loss": 2.4141,
      "step": 11671
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.998619794845581,
      "learning_rate": 1.8960559121547053e-05,
      "loss": 2.5554,
      "step": 11672
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0870375633239746,
      "learning_rate": 1.896037632703429e-05,
      "loss": 2.495,
      "step": 11673
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0592222213745117,
      "learning_rate": 1.896019351733126e-05,
      "loss": 2.5918,
      "step": 11674
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9036237001419067,
      "learning_rate": 1.896001069243827e-05,
      "loss": 2.5491,
      "step": 11675
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0229555368423462,
      "learning_rate": 1.8959827852355623e-05,
      "loss": 2.5221,
      "step": 11676
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0602295398712158,
      "learning_rate": 1.8959644997083635e-05,
      "loss": 2.5667,
      "step": 11677
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.8971642255783081,
      "learning_rate": 1.895946212662262e-05,
      "loss": 2.6236,
      "step": 11678
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.1099340915679932,
      "learning_rate": 1.895927924097288e-05,
      "loss": 2.614,
      "step": 11679
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.2331438064575195,
      "learning_rate": 1.8959096340134725e-05,
      "loss": 2.7454,
      "step": 11680
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0688952207565308,
      "learning_rate": 1.8958913424108476e-05,
      "loss": 2.4591,
      "step": 11681
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.003853678703308,
      "learning_rate": 1.895873049289443e-05,
      "loss": 2.2979,
      "step": 11682
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.032224416732788,
      "learning_rate": 1.8958547546492908e-05,
      "loss": 2.8153,
      "step": 11683
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.054930567741394,
      "learning_rate": 1.895836458490421e-05,
      "loss": 2.2839,
      "step": 11684
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0365298986434937,
      "learning_rate": 1.8958181608128652e-05,
      "loss": 2.5107,
      "step": 11685
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0879474878311157,
      "learning_rate": 1.8957998616166543e-05,
      "loss": 2.4242,
      "step": 11686
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.97666996717453,
      "learning_rate": 1.8957815609018194e-05,
      "loss": 2.4646,
      "step": 11687
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9560642838478088,
      "learning_rate": 1.8957632586683914e-05,
      "loss": 2.6363,
      "step": 11688
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9953775405883789,
      "learning_rate": 1.8957449549164013e-05,
      "loss": 2.7761,
      "step": 11689
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9964912533760071,
      "learning_rate": 1.8957266496458803e-05,
      "loss": 2.519,
      "step": 11690
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.053107500076294,
      "learning_rate": 1.8957083428568593e-05,
      "loss": 2.5069,
      "step": 11691
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.947364330291748,
      "learning_rate": 1.89569003454937e-05,
      "loss": 2.4301,
      "step": 11692
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9361783862113953,
      "learning_rate": 1.895671724723442e-05,
      "loss": 2.5266,
      "step": 11693
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0940494537353516,
      "learning_rate": 1.8956534133791073e-05,
      "loss": 2.7382,
      "step": 11694
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.988946259021759,
      "learning_rate": 1.895635100516397e-05,
      "loss": 2.491,
      "step": 11695
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0104246139526367,
      "learning_rate": 1.895616786135342e-05,
      "loss": 2.5085,
      "step": 11696
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0354077816009521,
      "learning_rate": 1.8955984702359736e-05,
      "loss": 2.3751,
      "step": 11697
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.4132628440856934,
      "learning_rate": 1.895580152818322e-05,
      "loss": 2.396,
      "step": 11698
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.126160740852356,
      "learning_rate": 1.895561833882419e-05,
      "loss": 2.5232,
      "step": 11699
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9780533909797668,
      "learning_rate": 1.8955435134282956e-05,
      "loss": 2.8064,
      "step": 11700
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.056476354598999,
      "learning_rate": 1.8955251914559824e-05,
      "loss": 2.4912,
      "step": 11701
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.2176499366760254,
      "learning_rate": 1.895506867965511e-05,
      "loss": 2.6809,
      "step": 11702
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.2083648443222046,
      "learning_rate": 1.8954885429569123e-05,
      "loss": 2.6659,
      "step": 11703
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0714221000671387,
      "learning_rate": 1.895470216430217e-05,
      "loss": 2.5878,
      "step": 11704
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9216089248657227,
      "learning_rate": 1.895451888385457e-05,
      "loss": 2.414,
      "step": 11705
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0753257274627686,
      "learning_rate": 1.8954335588226623e-05,
      "loss": 2.4935,
      "step": 11706
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.1224493980407715,
      "learning_rate": 1.895415227741865e-05,
      "loss": 2.5976,
      "step": 11707
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.112280249595642,
      "learning_rate": 1.895396895143095e-05,
      "loss": 2.5423,
      "step": 11708
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.1364315748214722,
      "learning_rate": 1.8953785610263847e-05,
      "loss": 2.5478,
      "step": 11709
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9627412557601929,
      "learning_rate": 1.8953602253917645e-05,
      "loss": 2.4469,
      "step": 11710
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0642131567001343,
      "learning_rate": 1.8953418882392656e-05,
      "loss": 2.7111,
      "step": 11711
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0436172485351562,
      "learning_rate": 1.895323549568919e-05,
      "loss": 2.4418,
      "step": 11712
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9887644648551941,
      "learning_rate": 1.8953052093807552e-05,
      "loss": 2.5771,
      "step": 11713
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.966735303401947,
      "learning_rate": 1.8952868676748064e-05,
      "loss": 2.4227,
      "step": 11714
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9600692987442017,
      "learning_rate": 1.8952685244511034e-05,
      "loss": 2.3212,
      "step": 11715
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.03130042552948,
      "learning_rate": 1.8952501797096768e-05,
      "loss": 2.7421,
      "step": 11716
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.2006744146347046,
      "learning_rate": 1.895231833450558e-05,
      "loss": 2.4858,
      "step": 11717
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9208966493606567,
      "learning_rate": 1.8952134856737784e-05,
      "loss": 2.6969,
      "step": 11718
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.043418288230896,
      "learning_rate": 1.8951951363793685e-05,
      "loss": 2.4443,
      "step": 11719
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.938919186592102,
      "learning_rate": 1.8951767855673598e-05,
      "loss": 2.5317,
      "step": 11720
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0578309297561646,
      "learning_rate": 1.8951584332377832e-05,
      "loss": 2.4792,
      "step": 11721
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.941949725151062,
      "learning_rate": 1.89514007939067e-05,
      "loss": 2.4069,
      "step": 11722
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.044666051864624,
      "learning_rate": 1.8951217240260515e-05,
      "loss": 2.3323,
      "step": 11723
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.065887451171875,
      "learning_rate": 1.8951033671439584e-05,
      "loss": 2.3131,
      "step": 11724
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0128352642059326,
      "learning_rate": 1.895085008744422e-05,
      "loss": 2.3585,
      "step": 11725
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.2433724403381348,
      "learning_rate": 1.8950666488274733e-05,
      "loss": 2.4995,
      "step": 11726
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.2178659439086914,
      "learning_rate": 1.8950482873931436e-05,
      "loss": 2.6655,
      "step": 11727
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.3703464269638062,
      "learning_rate": 1.895029924441464e-05,
      "loss": 2.3857,
      "step": 11728
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0260732173919678,
      "learning_rate": 1.8950115599724654e-05,
      "loss": 2.7763,
      "step": 11729
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0445438623428345,
      "learning_rate": 1.8949931939861793e-05,
      "loss": 2.8456,
      "step": 11730
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9250503182411194,
      "learning_rate": 1.8949748264826366e-05,
      "loss": 2.5785,
      "step": 11731
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.022723913192749,
      "learning_rate": 1.8949564574618685e-05,
      "loss": 2.5526,
      "step": 11732
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0619348287582397,
      "learning_rate": 1.894938086923906e-05,
      "loss": 2.428,
      "step": 11733
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.004237174987793,
      "learning_rate": 1.8949197148687808e-05,
      "loss": 2.5803,
      "step": 11734
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0706257820129395,
      "learning_rate": 1.8949013412965235e-05,
      "loss": 2.363,
      "step": 11735
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9986110925674438,
      "learning_rate": 1.894882966207165e-05,
      "loss": 2.8388,
      "step": 11736
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9976620078086853,
      "learning_rate": 1.8948645896007372e-05,
      "loss": 2.1971,
      "step": 11737
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9740182161331177,
      "learning_rate": 1.8948462114772706e-05,
      "loss": 2.6006,
      "step": 11738
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.004107117652893,
      "learning_rate": 1.894827831836797e-05,
      "loss": 2.4999,
      "step": 11739
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.01409113407135,
      "learning_rate": 1.894809450679347e-05,
      "loss": 2.5904,
      "step": 11740
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0172258615493774,
      "learning_rate": 1.894791068004952e-05,
      "loss": 2.3356,
      "step": 11741
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0161566734313965,
      "learning_rate": 1.8947726838136427e-05,
      "loss": 2.6067,
      "step": 11742
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0410585403442383,
      "learning_rate": 1.8947542981054514e-05,
      "loss": 2.3679,
      "step": 11743
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.119168758392334,
      "learning_rate": 1.894735910880408e-05,
      "loss": 2.583,
      "step": 11744
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9788041114807129,
      "learning_rate": 1.8947175221385446e-05,
      "loss": 2.5578,
      "step": 11745
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0042222738265991,
      "learning_rate": 1.8946991318798917e-05,
      "loss": 2.2995,
      "step": 11746
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.8941933512687683,
      "learning_rate": 1.894680740104481e-05,
      "loss": 2.5359,
      "step": 11747
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.053733468055725,
      "learning_rate": 1.8946623468123433e-05,
      "loss": 2.4944,
      "step": 11748
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0337109565734863,
      "learning_rate": 1.8946439520035097e-05,
      "loss": 2.3889,
      "step": 11749
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9834901094436646,
      "learning_rate": 1.8946255556780122e-05,
      "loss": 2.4967,
      "step": 11750
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0417819023132324,
      "learning_rate": 1.894607157835881e-05,
      "loss": 2.5792,
      "step": 11751
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.1861861944198608,
      "learning_rate": 1.8945887584771478e-05,
      "loss": 2.393,
      "step": 11752
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9989914894104004,
      "learning_rate": 1.8945703576018436e-05,
      "loss": 2.5322,
      "step": 11753
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0462478399276733,
      "learning_rate": 1.89455195521e-05,
      "loss": 2.5421,
      "step": 11754
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.1715939044952393,
      "learning_rate": 1.8945335513016477e-05,
      "loss": 2.6087,
      "step": 11755
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.1432989835739136,
      "learning_rate": 1.8945151458768178e-05,
      "loss": 2.3666,
      "step": 11756
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9990283250808716,
      "learning_rate": 1.894496738935542e-05,
      "loss": 2.5402,
      "step": 11757
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.1204644441604614,
      "learning_rate": 1.8944783304778513e-05,
      "loss": 2.5286,
      "step": 11758
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.079842209815979,
      "learning_rate": 1.8944599205037768e-05,
      "loss": 2.6672,
      "step": 11759
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.3329259157180786,
      "learning_rate": 1.89444150901335e-05,
      "loss": 2.4995,
      "step": 11760
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9430765509605408,
      "learning_rate": 1.894423096006602e-05,
      "loss": 2.6334,
      "step": 11761
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9230284094810486,
      "learning_rate": 1.894404681483564e-05,
      "loss": 2.6555,
      "step": 11762
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0214366912841797,
      "learning_rate": 1.8943862654442666e-05,
      "loss": 2.6139,
      "step": 11763
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.8955851793289185,
      "learning_rate": 1.8943678478887417e-05,
      "loss": 2.4475,
      "step": 11764
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9491888880729675,
      "learning_rate": 1.8943494288170207e-05,
      "loss": 2.6067,
      "step": 11765
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.953576922416687,
      "learning_rate": 1.894331008229134e-05,
      "loss": 2.624,
      "step": 11766
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0266917943954468,
      "learning_rate": 1.894312586125114e-05,
      "loss": 2.5709,
      "step": 11767
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0210425853729248,
      "learning_rate": 1.8942941625049908e-05,
      "loss": 2.3104,
      "step": 11768
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.958138644695282,
      "learning_rate": 1.8942757373687965e-05,
      "loss": 2.5193,
      "step": 11769
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9337050318717957,
      "learning_rate": 1.8942573107165616e-05,
      "loss": 2.434,
      "step": 11770
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9802862405776978,
      "learning_rate": 1.894238882548318e-05,
      "loss": 2.6555,
      "step": 11771
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9916601777076721,
      "learning_rate": 1.8942204528640964e-05,
      "loss": 2.3382,
      "step": 11772
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9863710999488831,
      "learning_rate": 1.8942020216639282e-05,
      "loss": 2.6083,
      "step": 11773
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0746111869812012,
      "learning_rate": 1.8941835889478447e-05,
      "loss": 2.5534,
      "step": 11774
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0344232320785522,
      "learning_rate": 1.8941651547158772e-05,
      "loss": 2.4765,
      "step": 11775
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0015590190887451,
      "learning_rate": 1.8941467189680568e-05,
      "loss": 2.4717,
      "step": 11776
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9906246662139893,
      "learning_rate": 1.8941282817044154e-05,
      "loss": 2.5585,
      "step": 11777
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0309154987335205,
      "learning_rate": 1.894109842924983e-05,
      "loss": 2.4864,
      "step": 11778
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0203089714050293,
      "learning_rate": 1.8940914026297922e-05,
      "loss": 2.3927,
      "step": 11779
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0636510848999023,
      "learning_rate": 1.8940729608188734e-05,
      "loss": 2.5908,
      "step": 11780
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9744771122932434,
      "learning_rate": 1.8940545174922582e-05,
      "loss": 2.5065,
      "step": 11781
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5797595977783203,
      "learning_rate": 1.8940360726499776e-05,
      "loss": 2.4983,
      "step": 11782
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.00552499294281,
      "learning_rate": 1.8940176262920633e-05,
      "loss": 2.5555,
      "step": 11783
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9780257940292358,
      "learning_rate": 1.8939991784185458e-05,
      "loss": 2.3835,
      "step": 11784
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9793531894683838,
      "learning_rate": 1.8939807290294573e-05,
      "loss": 2.5585,
      "step": 11785
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0722050666809082,
      "learning_rate": 1.8939622781248283e-05,
      "loss": 2.4532,
      "step": 11786
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.03180992603302,
      "learning_rate": 1.8939438257046907e-05,
      "loss": 2.6436,
      "step": 11787
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0561096668243408,
      "learning_rate": 1.8939253717690757e-05,
      "loss": 2.595,
      "step": 11788
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9913501739501953,
      "learning_rate": 1.8939069163180143e-05,
      "loss": 2.4596,
      "step": 11789
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.038448691368103,
      "learning_rate": 1.893888459351538e-05,
      "loss": 2.5759,
      "step": 11790
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0807465314865112,
      "learning_rate": 1.8938700008696774e-05,
      "loss": 2.8496,
      "step": 11791
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.1593347787857056,
      "learning_rate": 1.893851540872465e-05,
      "loss": 2.5963,
      "step": 11792
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.1264212131500244,
      "learning_rate": 1.893833079359931e-05,
      "loss": 2.7024,
      "step": 11793
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9807801246643066,
      "learning_rate": 1.8938146163321074e-05,
      "loss": 2.4911,
      "step": 11794
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0131807327270508,
      "learning_rate": 1.8937961517890256e-05,
      "loss": 2.5414,
      "step": 11795
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.075368881225586,
      "learning_rate": 1.8937776857307163e-05,
      "loss": 2.5858,
      "step": 11796
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0332156419754028,
      "learning_rate": 1.8937592181572108e-05,
      "loss": 2.7597,
      "step": 11797
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.1308501958847046,
      "learning_rate": 1.8937407490685412e-05,
      "loss": 2.5898,
      "step": 11798
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.3459010124206543,
      "learning_rate": 1.893722278464738e-05,
      "loss": 2.7386,
      "step": 11799
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0662659406661987,
      "learning_rate": 1.893703806345833e-05,
      "loss": 2.4504,
      "step": 11800
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.1986442804336548,
      "learning_rate": 1.893685332711857e-05,
      "loss": 2.8067,
      "step": 11801
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0479958057403564,
      "learning_rate": 1.8936668575628416e-05,
      "loss": 2.8795,
      "step": 11802
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9380537271499634,
      "learning_rate": 1.8936483808988182e-05,
      "loss": 2.517,
      "step": 11803
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.99265456199646,
      "learning_rate": 1.8936299027198185e-05,
      "loss": 2.4145,
      "step": 11804
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.2029532194137573,
      "learning_rate": 1.8936114230258732e-05,
      "loss": 2.5912,
      "step": 11805
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5542160272598267,
      "learning_rate": 1.8935929418170136e-05,
      "loss": 2.4421,
      "step": 11806
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0993174314498901,
      "learning_rate": 1.8935744590932715e-05,
      "loss": 2.5918,
      "step": 11807
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.018449306488037,
      "learning_rate": 1.893555974854678e-05,
      "loss": 2.5016,
      "step": 11808
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0108991861343384,
      "learning_rate": 1.8935374891012643e-05,
      "loss": 2.5475,
      "step": 11809
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.949391782283783,
      "learning_rate": 1.8935190018330617e-05,
      "loss": 2.5927,
      "step": 11810
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.034066081047058,
      "learning_rate": 1.8935005130501022e-05,
      "loss": 2.5259,
      "step": 11811
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.2191804647445679,
      "learning_rate": 1.8934820227524164e-05,
      "loss": 2.4678,
      "step": 11812
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0461034774780273,
      "learning_rate": 1.893463530940036e-05,
      "loss": 2.595,
      "step": 11813
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.057965636253357,
      "learning_rate": 1.8934450376129923e-05,
      "loss": 2.65,
      "step": 11814
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.072105050086975,
      "learning_rate": 1.8934265427713163e-05,
      "loss": 2.6376,
      "step": 11815
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.038919448852539,
      "learning_rate": 1.89340804641504e-05,
      "loss": 2.3029,
      "step": 11816
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9786205291748047,
      "learning_rate": 1.8933895485441943e-05,
      "loss": 2.5124,
      "step": 11817
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.080119252204895,
      "learning_rate": 1.8933710491588106e-05,
      "loss": 2.4554,
      "step": 11818
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.1210967302322388,
      "learning_rate": 1.8933525482589205e-05,
      "loss": 2.6135,
      "step": 11819
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.1515167951583862,
      "learning_rate": 1.8933340458445547e-05,
      "loss": 2.6101,
      "step": 11820
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.989983320236206,
      "learning_rate": 1.8933155419157456e-05,
      "loss": 2.6709,
      "step": 11821
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0084928274154663,
      "learning_rate": 1.893297036472524e-05,
      "loss": 2.3365,
      "step": 11822
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.075520634651184,
      "learning_rate": 1.8932785295149208e-05,
      "loss": 2.6506,
      "step": 11823
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.928544282913208,
      "learning_rate": 1.8932600210429685e-05,
      "loss": 2.4059,
      "step": 11824
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0432218313217163,
      "learning_rate": 1.8932415110566974e-05,
      "loss": 2.6572,
      "step": 11825
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.995377779006958,
      "learning_rate": 1.89322299955614e-05,
      "loss": 2.5011,
      "step": 11826
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0386096239089966,
      "learning_rate": 1.8932044865413265e-05,
      "loss": 2.6976,
      "step": 11827
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0166633129119873,
      "learning_rate": 1.8931859720122887e-05,
      "loss": 2.3886,
      "step": 11828
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.2095584869384766,
      "learning_rate": 1.8931674559690582e-05,
      "loss": 2.3753,
      "step": 11829
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0024155378341675,
      "learning_rate": 1.8931489384116665e-05,
      "loss": 2.5244,
      "step": 11830
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.04164457321167,
      "learning_rate": 1.8931304193401444e-05,
      "loss": 2.5644,
      "step": 11831
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0711735486984253,
      "learning_rate": 1.8931118987545242e-05,
      "loss": 2.6357,
      "step": 11832
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.1504020690917969,
      "learning_rate": 1.8930933766548363e-05,
      "loss": 2.7765,
      "step": 11833
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9562485218048096,
      "learning_rate": 1.8930748530411126e-05,
      "loss": 2.4914,
      "step": 11834
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.1101497411727905,
      "learning_rate": 1.8930563279133846e-05,
      "loss": 2.5456,
      "step": 11835
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.003617763519287,
      "learning_rate": 1.893037801271684e-05,
      "loss": 2.5796,
      "step": 11836
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.1624834537506104,
      "learning_rate": 1.893019273116041e-05,
      "loss": 2.5399,
      "step": 11837
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9742496609687805,
      "learning_rate": 1.8930007434464882e-05,
      "loss": 2.5682,
      "step": 11838
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.1644924879074097,
      "learning_rate": 1.8929822122630567e-05,
      "loss": 2.6327,
      "step": 11839
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.042115330696106,
      "learning_rate": 1.8929636795657774e-05,
      "loss": 2.4439,
      "step": 11840
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0094616413116455,
      "learning_rate": 1.8929451453546822e-05,
      "loss": 2.7317,
      "step": 11841
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.1326602697372437,
      "learning_rate": 1.892926609629803e-05,
      "loss": 2.5329,
      "step": 11842
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.2303134202957153,
      "learning_rate": 1.8929080723911704e-05,
      "loss": 2.7937,
      "step": 11843
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9951208829879761,
      "learning_rate": 1.8928895336388158e-05,
      "loss": 2.6158,
      "step": 11844
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0270459651947021,
      "learning_rate": 1.8928709933727713e-05,
      "loss": 2.4964,
      "step": 11845
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0425958633422852,
      "learning_rate": 1.892852451593068e-05,
      "loss": 2.6271,
      "step": 11846
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.2250102758407593,
      "learning_rate": 1.892833908299737e-05,
      "loss": 2.5518,
      "step": 11847
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9746605157852173,
      "learning_rate": 1.8928153634928102e-05,
      "loss": 2.4333,
      "step": 11848
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.029504418373108,
      "learning_rate": 1.892796817172319e-05,
      "loss": 2.4099,
      "step": 11849
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0218682289123535,
      "learning_rate": 1.8927782693382942e-05,
      "loss": 2.4405,
      "step": 11850
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.1284972429275513,
      "learning_rate": 1.8927597199907683e-05,
      "loss": 2.6749,
      "step": 11851
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9998680353164673,
      "learning_rate": 1.892741169129772e-05,
      "loss": 2.6618,
      "step": 11852
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9972798228263855,
      "learning_rate": 1.892722616755337e-05,
      "loss": 2.5781,
      "step": 11853
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0417091846466064,
      "learning_rate": 1.8927040628674948e-05,
      "loss": 2.6203,
      "step": 11854
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9740177989006042,
      "learning_rate": 1.8926855074662763e-05,
      "loss": 2.3128,
      "step": 11855
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9561841487884521,
      "learning_rate": 1.8926669505517142e-05,
      "loss": 2.4534,
      "step": 11856
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.989011824131012,
      "learning_rate": 1.8926483921238386e-05,
      "loss": 2.3344,
      "step": 11857
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9874972701072693,
      "learning_rate": 1.8926298321826817e-05,
      "loss": 2.4938,
      "step": 11858
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.1384353637695312,
      "learning_rate": 1.8926112707282748e-05,
      "loss": 2.4384,
      "step": 11859
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.995219886302948,
      "learning_rate": 1.8925927077606495e-05,
      "loss": 2.2808,
      "step": 11860
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0890253782272339,
      "learning_rate": 1.892574143279837e-05,
      "loss": 2.3608,
      "step": 11861
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0404529571533203,
      "learning_rate": 1.8925555772858686e-05,
      "loss": 2.7586,
      "step": 11862
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0266261100769043,
      "learning_rate": 1.8925370097787766e-05,
      "loss": 2.4362,
      "step": 11863
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.1227219104766846,
      "learning_rate": 1.8925184407585914e-05,
      "loss": 2.7037,
      "step": 11864
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0712279081344604,
      "learning_rate": 1.8924998702253455e-05,
      "loss": 2.3933,
      "step": 11865
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.054600477218628,
      "learning_rate": 1.8924812981790697e-05,
      "loss": 2.7055,
      "step": 11866
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.3522863388061523,
      "learning_rate": 1.892462724619796e-05,
      "loss": 2.5334,
      "step": 11867
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0058057308197021,
      "learning_rate": 1.892444149547555e-05,
      "loss": 2.3586,
      "step": 11868
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.1103097200393677,
      "learning_rate": 1.8924255729623794e-05,
      "loss": 2.6782,
      "step": 11869
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.1488885879516602,
      "learning_rate": 1.8924069948643e-05,
      "loss": 2.5217,
      "step": 11870
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0678791999816895,
      "learning_rate": 1.892388415253348e-05,
      "loss": 2.5966,
      "step": 11871
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9998377561569214,
      "learning_rate": 1.8923698341295552e-05,
      "loss": 2.434,
      "step": 11872
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9626724720001221,
      "learning_rate": 1.892351251492954e-05,
      "loss": 2.3813,
      "step": 11873
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9806178212165833,
      "learning_rate": 1.8923326673435744e-05,
      "loss": 2.5164,
      "step": 11874
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.2082958221435547,
      "learning_rate": 1.8923140816814486e-05,
      "loss": 2.7061,
      "step": 11875
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0033838748931885,
      "learning_rate": 1.892295494506608e-05,
      "loss": 2.646,
      "step": 11876
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0821155309677124,
      "learning_rate": 1.8922769058190845e-05,
      "loss": 2.6513,
      "step": 11877
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9569186568260193,
      "learning_rate": 1.892258315618909e-05,
      "loss": 2.4318,
      "step": 11878
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0350825786590576,
      "learning_rate": 1.8922397239061133e-05,
      "loss": 2.4138,
      "step": 11879
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0654124021530151,
      "learning_rate": 1.8922211306807294e-05,
      "loss": 2.7171,
      "step": 11880
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.2308863401412964,
      "learning_rate": 1.8922025359427882e-05,
      "loss": 2.492,
      "step": 11881
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.2047314643859863,
      "learning_rate": 1.8921839396923213e-05,
      "loss": 2.3079,
      "step": 11882
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.045621633529663,
      "learning_rate": 1.89216534192936e-05,
      "loss": 2.6457,
      "step": 11883
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.3219798803329468,
      "learning_rate": 1.8921467426539366e-05,
      "loss": 2.5187,
      "step": 11884
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.032261848449707,
      "learning_rate": 1.8921281418660816e-05,
      "loss": 2.4125,
      "step": 11885
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0778496265411377,
      "learning_rate": 1.892109539565828e-05,
      "loss": 2.4644,
      "step": 11886
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.1762739419937134,
      "learning_rate": 1.8920909357532056e-05,
      "loss": 2.6298,
      "step": 11887
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9679607152938843,
      "learning_rate": 1.892072330428247e-05,
      "loss": 2.6586,
      "step": 11888
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9714720249176025,
      "learning_rate": 1.8920537235909837e-05,
      "loss": 2.3886,
      "step": 11889
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0566328763961792,
      "learning_rate": 1.892035115241447e-05,
      "loss": 2.5137,
      "step": 11890
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.1696219444274902,
      "learning_rate": 1.8920165053796685e-05,
      "loss": 2.4527,
      "step": 11891
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0326288938522339,
      "learning_rate": 1.8919978940056797e-05,
      "loss": 2.4847,
      "step": 11892
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.010823369026184,
      "learning_rate": 1.8919792811195125e-05,
      "loss": 2.5653,
      "step": 11893
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0927320718765259,
      "learning_rate": 1.8919606667211977e-05,
      "loss": 2.441,
      "step": 11894
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9109954237937927,
      "learning_rate": 1.8919420508107674e-05,
      "loss": 2.597,
      "step": 11895
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0478267669677734,
      "learning_rate": 1.8919234333882532e-05,
      "loss": 2.6407,
      "step": 11896
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9581774473190308,
      "learning_rate": 1.8919048144536866e-05,
      "loss": 2.4984,
      "step": 11897
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0531737804412842,
      "learning_rate": 1.8918861940070988e-05,
      "loss": 2.3388,
      "step": 11898
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.994853675365448,
      "learning_rate": 1.891867572048522e-05,
      "loss": 2.6003,
      "step": 11899
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.110084891319275,
      "learning_rate": 1.8918489485779875e-05,
      "loss": 2.5723,
      "step": 11900
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9690325260162354,
      "learning_rate": 1.8918303235955267e-05,
      "loss": 2.5311,
      "step": 11901
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.1063694953918457,
      "learning_rate": 1.891811697101171e-05,
      "loss": 2.7522,
      "step": 11902
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0508971214294434,
      "learning_rate": 1.8917930690949525e-05,
      "loss": 2.7543,
      "step": 11903
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9803519248962402,
      "learning_rate": 1.8917744395769023e-05,
      "loss": 2.5477,
      "step": 11904
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9619875550270081,
      "learning_rate": 1.8917558085470525e-05,
      "loss": 2.4786,
      "step": 11905
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9868036508560181,
      "learning_rate": 1.8917371760054346e-05,
      "loss": 2.5737,
      "step": 11906
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.2342607975006104,
      "learning_rate": 1.8917185419520797e-05,
      "loss": 2.5237,
      "step": 11907
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.1169720888137817,
      "learning_rate": 1.8916999063870197e-05,
      "loss": 2.653,
      "step": 11908
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9971490502357483,
      "learning_rate": 1.891681269310286e-05,
      "loss": 2.4528,
      "step": 11909
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.4646815061569214,
      "learning_rate": 1.891662630721911e-05,
      "loss": 2.6369,
      "step": 11910
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9604549407958984,
      "learning_rate": 1.891643990621925e-05,
      "loss": 2.3056,
      "step": 11911
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9563671946525574,
      "learning_rate": 1.8916253490103606e-05,
      "loss": 2.3382,
      "step": 11912
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.6845448017120361,
      "learning_rate": 1.891606705887249e-05,
      "loss": 2.6897,
      "step": 11913
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0503664016723633,
      "learning_rate": 1.891588061252622e-05,
      "loss": 2.6167,
      "step": 11914
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.131216049194336,
      "learning_rate": 1.8915694151065107e-05,
      "loss": 2.6156,
      "step": 11915
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0456300973892212,
      "learning_rate": 1.8915507674489474e-05,
      "loss": 2.4083,
      "step": 11916
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0268954038619995,
      "learning_rate": 1.8915321182799636e-05,
      "loss": 2.6124,
      "step": 11917
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.2425960302352905,
      "learning_rate": 1.89151346759959e-05,
      "loss": 2.5618,
      "step": 11918
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.035226821899414,
      "learning_rate": 1.8914948154078595e-05,
      "loss": 2.6816,
      "step": 11919
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0584348440170288,
      "learning_rate": 1.8914761617048032e-05,
      "loss": 2.8038,
      "step": 11920
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5362318754196167,
      "learning_rate": 1.8914575064904525e-05,
      "loss": 2.3366,
      "step": 11921
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9798128604888916,
      "learning_rate": 1.8914388497648394e-05,
      "loss": 2.4555,
      "step": 11922
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0346986055374146,
      "learning_rate": 1.891420191527995e-05,
      "loss": 2.6049,
      "step": 11923
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9638054966926575,
      "learning_rate": 1.8914015317799518e-05,
      "loss": 2.3655,
      "step": 11924
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0004160404205322,
      "learning_rate": 1.8913828705207402e-05,
      "loss": 2.6354,
      "step": 11925
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9518007636070251,
      "learning_rate": 1.891364207750393e-05,
      "loss": 2.589,
      "step": 11926
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9955395460128784,
      "learning_rate": 1.8913455434689416e-05,
      "loss": 2.5751,
      "step": 11927
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9407193064689636,
      "learning_rate": 1.8913268776764168e-05,
      "loss": 2.6444,
      "step": 11928
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0940464735031128,
      "learning_rate": 1.8913082103728513e-05,
      "loss": 2.753,
      "step": 11929
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0436725616455078,
      "learning_rate": 1.8912895415582763e-05,
      "loss": 2.4371,
      "step": 11930
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.030188798904419,
      "learning_rate": 1.891270871232723e-05,
      "loss": 2.4507,
      "step": 11931
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9751465916633606,
      "learning_rate": 1.891252199396224e-05,
      "loss": 2.5996,
      "step": 11932
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.970854640007019,
      "learning_rate": 1.8912335260488104e-05,
      "loss": 2.4134,
      "step": 11933
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.953960657119751,
      "learning_rate": 1.8912148511905137e-05,
      "loss": 2.5813,
      "step": 11934
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0896259546279907,
      "learning_rate": 1.891196174821366e-05,
      "loss": 2.6027,
      "step": 11935
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0489094257354736,
      "learning_rate": 1.8911774969413988e-05,
      "loss": 2.6603,
      "step": 11936
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0467579364776611,
      "learning_rate": 1.8911588175506434e-05,
      "loss": 2.3933,
      "step": 11937
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0488590002059937,
      "learning_rate": 1.8911401366491318e-05,
      "loss": 2.59,
      "step": 11938
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0068453550338745,
      "learning_rate": 1.891121454236896e-05,
      "loss": 2.4611,
      "step": 11939
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.089868426322937,
      "learning_rate": 1.8911027703139667e-05,
      "loss": 2.6883,
      "step": 11940
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0608646869659424,
      "learning_rate": 1.8910840848803766e-05,
      "loss": 2.5858,
      "step": 11941
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9688246250152588,
      "learning_rate": 1.8910653979361567e-05,
      "loss": 2.5955,
      "step": 11942
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0168874263763428,
      "learning_rate": 1.8910467094813393e-05,
      "loss": 2.8252,
      "step": 11943
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.047426700592041,
      "learning_rate": 1.8910280195159555e-05,
      "loss": 2.5056,
      "step": 11944
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0062508583068848,
      "learning_rate": 1.8910093280400375e-05,
      "loss": 2.5724,
      "step": 11945
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9931614995002747,
      "learning_rate": 1.8909906350536165e-05,
      "loss": 2.5357,
      "step": 11946
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0909719467163086,
      "learning_rate": 1.890971940556724e-05,
      "loss": 2.7025,
      "step": 11947
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.1095811128616333,
      "learning_rate": 1.8909532445493926e-05,
      "loss": 2.3727,
      "step": 11948
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.037490963935852,
      "learning_rate": 1.8909345470316533e-05,
      "loss": 2.5184,
      "step": 11949
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0241540670394897,
      "learning_rate": 1.8909158480035377e-05,
      "loss": 2.6016,
      "step": 11950
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0337605476379395,
      "learning_rate": 1.8908971474650784e-05,
      "loss": 2.5167,
      "step": 11951
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0582983493804932,
      "learning_rate": 1.890878445416306e-05,
      "loss": 2.677,
      "step": 11952
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9956519603729248,
      "learning_rate": 1.8908597418572528e-05,
      "loss": 2.4685,
      "step": 11953
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0049141645431519,
      "learning_rate": 1.8908410367879505e-05,
      "loss": 2.5669,
      "step": 11954
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0386158227920532,
      "learning_rate": 1.8908223302084305e-05,
      "loss": 2.7678,
      "step": 11955
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0393011569976807,
      "learning_rate": 1.8908036221187246e-05,
      "loss": 2.5656,
      "step": 11956
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.3406704664230347,
      "learning_rate": 1.890784912518865e-05,
      "loss": 2.7224,
      "step": 11957
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.07194983959198,
      "learning_rate": 1.890766201408883e-05,
      "loss": 2.7205,
      "step": 11958
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.1098828315734863,
      "learning_rate": 1.8907474887888098e-05,
      "loss": 2.241,
      "step": 11959
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.917130708694458,
      "learning_rate": 1.8907287746586778e-05,
      "loss": 2.5457,
      "step": 11960
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0648077726364136,
      "learning_rate": 1.890710059018519e-05,
      "loss": 2.6046,
      "step": 11961
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.073004961013794,
      "learning_rate": 1.8906913418683648e-05,
      "loss": 2.4324,
      "step": 11962
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0189708471298218,
      "learning_rate": 1.8906726232082465e-05,
      "loss": 2.4533,
      "step": 11963
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.8911586999893188,
      "learning_rate": 1.8906539030381967e-05,
      "loss": 2.7009,
      "step": 11964
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0709202289581299,
      "learning_rate": 1.890635181358246e-05,
      "loss": 2.2978,
      "step": 11965
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.1095296144485474,
      "learning_rate": 1.890616458168427e-05,
      "loss": 2.7417,
      "step": 11966
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.8907879590988159,
      "learning_rate": 1.8905977334687714e-05,
      "loss": 2.546,
      "step": 11967
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.2914317846298218,
      "learning_rate": 1.890579007259311e-05,
      "loss": 2.4636,
      "step": 11968
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9407650828361511,
      "learning_rate": 1.890560279540077e-05,
      "loss": 2.4702,
      "step": 11969
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.112332820892334,
      "learning_rate": 1.8905415503111012e-05,
      "loss": 2.6698,
      "step": 11970
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9903937578201294,
      "learning_rate": 1.8905228195724157e-05,
      "loss": 2.3295,
      "step": 11971
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.035749912261963,
      "learning_rate": 1.8905040873240525e-05,
      "loss": 2.4777,
      "step": 11972
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9875733852386475,
      "learning_rate": 1.8904853535660428e-05,
      "loss": 2.2672,
      "step": 11973
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.966827929019928,
      "learning_rate": 1.8904666182984184e-05,
      "loss": 2.5374,
      "step": 11974
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9469479322433472,
      "learning_rate": 1.8904478815212113e-05,
      "loss": 2.4925,
      "step": 11975
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0937119722366333,
      "learning_rate": 1.8904291432344536e-05,
      "loss": 2.4267,
      "step": 11976
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9781932830810547,
      "learning_rate": 1.890410403438176e-05,
      "loss": 2.616,
      "step": 11977
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.1222110986709595,
      "learning_rate": 1.8903916621324116e-05,
      "loss": 2.6047,
      "step": 11978
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9714208245277405,
      "learning_rate": 1.890372919317191e-05,
      "loss": 2.6236,
      "step": 11979
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9785026907920837,
      "learning_rate": 1.8903541749925467e-05,
      "loss": 2.4112,
      "step": 11980
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.1526693105697632,
      "learning_rate": 1.8903354291585102e-05,
      "loss": 2.3834,
      "step": 11981
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0567522048950195,
      "learning_rate": 1.8903166818151135e-05,
      "loss": 2.376,
      "step": 11982
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.1176420450210571,
      "learning_rate": 1.8902979329623882e-05,
      "loss": 2.6615,
      "step": 11983
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.1793385744094849,
      "learning_rate": 1.8902791826003657e-05,
      "loss": 2.7148,
      "step": 11984
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.1140830516815186,
      "learning_rate": 1.8902604307290785e-05,
      "loss": 2.4487,
      "step": 11985
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.131371021270752,
      "learning_rate": 1.8902416773485582e-05,
      "loss": 2.4168,
      "step": 11986
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9794110059738159,
      "learning_rate": 1.8902229224588363e-05,
      "loss": 2.483,
      "step": 11987
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0327513217926025,
      "learning_rate": 1.890204166059945e-05,
      "loss": 2.217,
      "step": 11988
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0438990592956543,
      "learning_rate": 1.8901854081519156e-05,
      "loss": 2.5234,
      "step": 11989
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.077433466911316,
      "learning_rate": 1.8901666487347802e-05,
      "loss": 2.5465,
      "step": 11990
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.1366221904754639,
      "learning_rate": 1.8901478878085705e-05,
      "loss": 2.3802,
      "step": 11991
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9961220622062683,
      "learning_rate": 1.8901291253733185e-05,
      "loss": 2.4587,
      "step": 11992
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.023476004600525,
      "learning_rate": 1.8901103614290556e-05,
      "loss": 2.5116,
      "step": 11993
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0891284942626953,
      "learning_rate": 1.890091595975814e-05,
      "loss": 2.4418,
      "step": 11994
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0539982318878174,
      "learning_rate": 1.890072829013626e-05,
      "loss": 2.4373,
      "step": 11995
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0319371223449707,
      "learning_rate": 1.890054060542522e-05,
      "loss": 2.5202,
      "step": 11996
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0076802968978882,
      "learning_rate": 1.8900352905625352e-05,
      "loss": 2.5785,
      "step": 11997
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.985069751739502,
      "learning_rate": 1.8900165190736966e-05,
      "loss": 2.6412,
      "step": 11998
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0509288311004639,
      "learning_rate": 1.8899977460760383e-05,
      "loss": 2.6135,
      "step": 11999
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0596694946289062,
      "learning_rate": 1.8899789715695923e-05,
      "loss": 2.7182,
      "step": 12000
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.038225769996643,
      "learning_rate": 1.8899601955543896e-05,
      "loss": 2.4911,
      "step": 12001
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.2098439931869507,
      "learning_rate": 1.8899414180304633e-05,
      "loss": 2.494,
      "step": 12002
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9014737010002136,
      "learning_rate": 1.8899226389978446e-05,
      "loss": 2.6645,
      "step": 12003
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9212118983268738,
      "learning_rate": 1.8899038584565647e-05,
      "loss": 2.4269,
      "step": 12004
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9882451295852661,
      "learning_rate": 1.8898850764066564e-05,
      "loss": 2.6529,
      "step": 12005
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0687932968139648,
      "learning_rate": 1.8898662928481516e-05,
      "loss": 2.6787,
      "step": 12006
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0959081649780273,
      "learning_rate": 1.8898475077810815e-05,
      "loss": 2.6286,
      "step": 12007
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.2304470539093018,
      "learning_rate": 1.889828721205478e-05,
      "loss": 2.6967,
      "step": 12008
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.8967252969741821,
      "learning_rate": 1.8898099331213733e-05,
      "loss": 2.4158,
      "step": 12009
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0545697212219238,
      "learning_rate": 1.889791143528799e-05,
      "loss": 2.5758,
      "step": 12010
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9854145646095276,
      "learning_rate": 1.889772352427787e-05,
      "loss": 2.5574,
      "step": 12011
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9272921681404114,
      "learning_rate": 1.8897535598183692e-05,
      "loss": 2.6148,
      "step": 12012
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9553123712539673,
      "learning_rate": 1.8897347657005776e-05,
      "loss": 2.719,
      "step": 12013
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.2012064456939697,
      "learning_rate": 1.889715970074444e-05,
      "loss": 2.6057,
      "step": 12014
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9258356690406799,
      "learning_rate": 1.88969717294e-05,
      "loss": 2.5007,
      "step": 12015
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0265734195709229,
      "learning_rate": 1.8896783742972776e-05,
      "loss": 2.5499,
      "step": 12016
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0043920278549194,
      "learning_rate": 1.889659574146309e-05,
      "loss": 2.3054,
      "step": 12017
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9227846264839172,
      "learning_rate": 1.8896407724871254e-05,
      "loss": 2.5766,
      "step": 12018
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0145987272262573,
      "learning_rate": 1.8896219693197594e-05,
      "loss": 2.5983,
      "step": 12019
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.1007500886917114,
      "learning_rate": 1.8896031646442425e-05,
      "loss": 2.5556,
      "step": 12020
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.138554334640503,
      "learning_rate": 1.8895843584606067e-05,
      "loss": 2.4427,
      "step": 12021
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.030981421470642,
      "learning_rate": 1.8895655507688838e-05,
      "loss": 2.5603,
      "step": 12022
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0303610563278198,
      "learning_rate": 1.8895467415691057e-05,
      "loss": 2.4451,
      "step": 12023
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.105268955230713,
      "learning_rate": 1.889527930861304e-05,
      "loss": 2.6487,
      "step": 12024
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.164758324623108,
      "learning_rate": 1.8895091186455106e-05,
      "loss": 2.6329,
      "step": 12025
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9533175230026245,
      "learning_rate": 1.8894903049217584e-05,
      "loss": 2.441,
      "step": 12026
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9866524934768677,
      "learning_rate": 1.8894714896900784e-05,
      "loss": 2.5869,
      "step": 12027
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0160013437271118,
      "learning_rate": 1.8894526729505023e-05,
      "loss": 2.3217,
      "step": 12028
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.148031234741211,
      "learning_rate": 1.8894338547030625e-05,
      "loss": 2.4021,
      "step": 12029
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.018951654434204,
      "learning_rate": 1.8894150349477906e-05,
      "loss": 2.8451,
      "step": 12030
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0020290613174438,
      "learning_rate": 1.889396213684719e-05,
      "loss": 2.5198,
      "step": 12031
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9601383805274963,
      "learning_rate": 1.889377390913879e-05,
      "loss": 2.458,
      "step": 12032
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0147103071212769,
      "learning_rate": 1.889358566635303e-05,
      "loss": 2.6704,
      "step": 12033
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9544684886932373,
      "learning_rate": 1.8893397408490224e-05,
      "loss": 2.2023,
      "step": 12034
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0152255296707153,
      "learning_rate": 1.8893209135550695e-05,
      "loss": 2.5774,
      "step": 12035
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.2348304986953735,
      "learning_rate": 1.889302084753476e-05,
      "loss": 2.299,
      "step": 12036
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.1229270696640015,
      "learning_rate": 1.8892832544442743e-05,
      "loss": 2.5232,
      "step": 12037
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0908817052841187,
      "learning_rate": 1.8892644226274958e-05,
      "loss": 2.6541,
      "step": 12038
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0728422403335571,
      "learning_rate": 1.8892455893031725e-05,
      "loss": 2.7715,
      "step": 12039
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9635242223739624,
      "learning_rate": 1.8892267544713365e-05,
      "loss": 2.4974,
      "step": 12040
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.1351946592330933,
      "learning_rate": 1.8892079181320193e-05,
      "loss": 2.604,
      "step": 12041
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.030147671699524,
      "learning_rate": 1.8891890802852536e-05,
      "loss": 2.4761,
      "step": 12042
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.002170205116272,
      "learning_rate": 1.8891702409310707e-05,
      "loss": 2.4023,
      "step": 12043
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0072317123413086,
      "learning_rate": 1.889151400069503e-05,
      "loss": 2.4117,
      "step": 12044
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.1304117441177368,
      "learning_rate": 1.8891325577005818e-05,
      "loss": 2.6941,
      "step": 12045
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9595931172370911,
      "learning_rate": 1.8891137138243398e-05,
      "loss": 2.4435,
      "step": 12046
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9512031078338623,
      "learning_rate": 1.8890948684408083e-05,
      "loss": 2.2431,
      "step": 12047
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9370851516723633,
      "learning_rate": 1.8890760215500194e-05,
      "loss": 2.3671,
      "step": 12048
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.1063933372497559,
      "learning_rate": 1.8890571731520055e-05,
      "loss": 2.4715,
      "step": 12049
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0522420406341553,
      "learning_rate": 1.889038323246798e-05,
      "loss": 2.3825,
      "step": 12050
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.1727248430252075,
      "learning_rate": 1.8890194718344293e-05,
      "loss": 2.4189,
      "step": 12051
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.149834394454956,
      "learning_rate": 1.889000618914931e-05,
      "loss": 2.5262,
      "step": 12052
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9708847403526306,
      "learning_rate": 1.8889817644883354e-05,
      "loss": 2.5846,
      "step": 12053
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.979325532913208,
      "learning_rate": 1.8889629085546742e-05,
      "loss": 2.2519,
      "step": 12054
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0738892555236816,
      "learning_rate": 1.888944051113979e-05,
      "loss": 2.559,
      "step": 12055
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9826838374137878,
      "learning_rate": 1.8889251921662826e-05,
      "loss": 2.4912,
      "step": 12056
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0764445066452026,
      "learning_rate": 1.8889063317116166e-05,
      "loss": 2.7701,
      "step": 12057
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9757999181747437,
      "learning_rate": 1.888887469750013e-05,
      "loss": 2.5057,
      "step": 12058
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.132304310798645,
      "learning_rate": 1.8888686062815034e-05,
      "loss": 2.6305,
      "step": 12059
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9282230138778687,
      "learning_rate": 1.8888497413061203e-05,
      "loss": 2.7922,
      "step": 12060
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.061389684677124,
      "learning_rate": 1.8888308748238957e-05,
      "loss": 2.5538,
      "step": 12061
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0492970943450928,
      "learning_rate": 1.888812006834861e-05,
      "loss": 2.3845,
      "step": 12062
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0176372528076172,
      "learning_rate": 1.8887931373390484e-05,
      "loss": 2.6103,
      "step": 12063
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0027869939804077,
      "learning_rate": 1.8887742663364905e-05,
      "loss": 2.541,
      "step": 12064
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0194512605667114,
      "learning_rate": 1.888755393827219e-05,
      "loss": 2.6017,
      "step": 12065
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9843850135803223,
      "learning_rate": 1.888736519811265e-05,
      "loss": 2.5327,
      "step": 12066
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0440016984939575,
      "learning_rate": 1.8887176442886615e-05,
      "loss": 2.5373,
      "step": 12067
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9706728458404541,
      "learning_rate": 1.8886987672594404e-05,
      "loss": 2.5333,
      "step": 12068
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9767773151397705,
      "learning_rate": 1.8886798887236334e-05,
      "loss": 2.6381,
      "step": 12069
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0027780532836914,
      "learning_rate": 1.8886610086812726e-05,
      "loss": 2.3848,
      "step": 12070
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.022905945777893,
      "learning_rate": 1.88864212713239e-05,
      "loss": 2.4146,
      "step": 12071
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.096769094467163,
      "learning_rate": 1.888623244077018e-05,
      "loss": 2.5832,
      "step": 12072
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0755794048309326,
      "learning_rate": 1.888604359515188e-05,
      "loss": 2.5589,
      "step": 12073
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9746997952461243,
      "learning_rate": 1.8885854734469318e-05,
      "loss": 2.4717,
      "step": 12074
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0392704010009766,
      "learning_rate": 1.8885665858722827e-05,
      "loss": 2.5342,
      "step": 12075
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9775305986404419,
      "learning_rate": 1.8885476967912716e-05,
      "loss": 2.225,
      "step": 12076
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0236830711364746,
      "learning_rate": 1.8885288062039303e-05,
      "loss": 2.5522,
      "step": 12077
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.1659786701202393,
      "learning_rate": 1.888509914110292e-05,
      "loss": 2.7177,
      "step": 12078
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0336154699325562,
      "learning_rate": 1.8884910205103878e-05,
      "loss": 2.5835,
      "step": 12079
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.1321308612823486,
      "learning_rate": 1.8884721254042504e-05,
      "loss": 2.5918,
      "step": 12080
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.026900291442871,
      "learning_rate": 1.8884532287919112e-05,
      "loss": 2.4342,
      "step": 12081
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0499731302261353,
      "learning_rate": 1.8884343306734024e-05,
      "loss": 2.4844,
      "step": 12082
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0340567827224731,
      "learning_rate": 1.8884154310487562e-05,
      "loss": 2.7674,
      "step": 12083
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0429471731185913,
      "learning_rate": 1.8883965299180045e-05,
      "loss": 2.4929,
      "step": 12084
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.010634183883667,
      "learning_rate": 1.8883776272811794e-05,
      "loss": 2.6459,
      "step": 12085
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.303304672241211,
      "learning_rate": 1.8883587231383127e-05,
      "loss": 2.4919,
      "step": 12086
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.011816382408142,
      "learning_rate": 1.888339817489437e-05,
      "loss": 2.563,
      "step": 12087
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0494831800460815,
      "learning_rate": 1.888320910334584e-05,
      "loss": 2.589,
      "step": 12088
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9798397421836853,
      "learning_rate": 1.8883020016737856e-05,
      "loss": 2.4749,
      "step": 12089
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.018730878829956,
      "learning_rate": 1.8882830915070744e-05,
      "loss": 2.5308,
      "step": 12090
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9468953013420105,
      "learning_rate": 1.888264179834482e-05,
      "loss": 2.4538,
      "step": 12091
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9337314963340759,
      "learning_rate": 1.8882452666560404e-05,
      "loss": 2.5259,
      "step": 12092
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9759584665298462,
      "learning_rate": 1.888226351971782e-05,
      "loss": 2.3911,
      "step": 12093
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0117994546890259,
      "learning_rate": 1.8882074357817385e-05,
      "loss": 2.7841,
      "step": 12094
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0488356351852417,
      "learning_rate": 1.8881885180859422e-05,
      "loss": 2.4623,
      "step": 12095
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.080614447593689,
      "learning_rate": 1.8881695988844252e-05,
      "loss": 2.3367,
      "step": 12096
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.049452781677246,
      "learning_rate": 1.8881506781772195e-05,
      "loss": 2.5554,
      "step": 12097
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.986730694770813,
      "learning_rate": 1.888131755964357e-05,
      "loss": 2.5859,
      "step": 12098
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9742986559867859,
      "learning_rate": 1.8881128322458702e-05,
      "loss": 2.5121,
      "step": 12099
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9978978633880615,
      "learning_rate": 1.888093907021791e-05,
      "loss": 2.3091,
      "step": 12100
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.987243115901947,
      "learning_rate": 1.8880749802921512e-05,
      "loss": 2.4328,
      "step": 12101
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0195186138153076,
      "learning_rate": 1.888056052056983e-05,
      "loss": 2.3265,
      "step": 12102
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0331411361694336,
      "learning_rate": 1.8880371223163194e-05,
      "loss": 2.4728,
      "step": 12103
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0841569900512695,
      "learning_rate": 1.8880181910701907e-05,
      "loss": 2.46,
      "step": 12104
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0924804210662842,
      "learning_rate": 1.8879992583186304e-05,
      "loss": 2.626,
      "step": 12105
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0155460834503174,
      "learning_rate": 1.8879803240616703e-05,
      "loss": 2.4668,
      "step": 12106
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9923164248466492,
      "learning_rate": 1.8879613882993423e-05,
      "loss": 2.6501,
      "step": 12107
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9071486592292786,
      "learning_rate": 1.8879424510316785e-05,
      "loss": 2.4676,
      "step": 12108
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0747405290603638,
      "learning_rate": 1.887923512258711e-05,
      "loss": 2.4881,
      "step": 12109
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.2472316026687622,
      "learning_rate": 1.8879045719804722e-05,
      "loss": 2.3286,
      "step": 12110
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0457202196121216,
      "learning_rate": 1.887885630196994e-05,
      "loss": 2.4252,
      "step": 12111
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9302694797515869,
      "learning_rate": 1.887866686908308e-05,
      "loss": 2.476,
      "step": 12112
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9557456970214844,
      "learning_rate": 1.8878477421144476e-05,
      "loss": 2.7202,
      "step": 12113
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9769630432128906,
      "learning_rate": 1.8878287958154437e-05,
      "loss": 2.3274,
      "step": 12114
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9585596323013306,
      "learning_rate": 1.887809848011329e-05,
      "loss": 2.4766,
      "step": 12115
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9951781630516052,
      "learning_rate": 1.8877908987021354e-05,
      "loss": 2.4865,
      "step": 12116
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0560423135757446,
      "learning_rate": 1.887771947887895e-05,
      "loss": 2.2817,
      "step": 12117
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.1636651754379272,
      "learning_rate": 1.8877529955686403e-05,
      "loss": 2.4747,
      "step": 12118
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0131909847259521,
      "learning_rate": 1.887734041744403e-05,
      "loss": 2.393,
      "step": 12119
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.1127220392227173,
      "learning_rate": 1.8877150864152153e-05,
      "loss": 2.3879,
      "step": 12120
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9975820183753967,
      "learning_rate": 1.8876961295811097e-05,
      "loss": 2.4225,
      "step": 12121
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9633520245552063,
      "learning_rate": 1.887677171242118e-05,
      "loss": 2.3317,
      "step": 12122
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.092237949371338,
      "learning_rate": 1.887658211398272e-05,
      "loss": 2.4447,
      "step": 12123
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0591799020767212,
      "learning_rate": 1.8876392500496047e-05,
      "loss": 2.5988,
      "step": 12124
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0084474086761475,
      "learning_rate": 1.8876202871961475e-05,
      "loss": 2.6175,
      "step": 12125
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0084327459335327,
      "learning_rate": 1.887601322837933e-05,
      "loss": 2.678,
      "step": 12126
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.1378393173217773,
      "learning_rate": 1.8875823569749935e-05,
      "loss": 2.6075,
      "step": 12127
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.1533372402191162,
      "learning_rate": 1.8875633896073603e-05,
      "loss": 2.4485,
      "step": 12128
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.1089192628860474,
      "learning_rate": 1.8875444207350663e-05,
      "loss": 2.3412,
      "step": 12129
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.020945429801941,
      "learning_rate": 1.8875254503581436e-05,
      "loss": 2.5146,
      "step": 12130
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.2405378818511963,
      "learning_rate": 1.887506478476624e-05,
      "loss": 2.7477,
      "step": 12131
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.027590751647949,
      "learning_rate": 1.88748750509054e-05,
      "loss": 2.4046,
      "step": 12132
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0278300046920776,
      "learning_rate": 1.8874685301999233e-05,
      "loss": 2.379,
      "step": 12133
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9958520531654358,
      "learning_rate": 1.8874495538048067e-05,
      "loss": 2.2681,
      "step": 12134
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9927261471748352,
      "learning_rate": 1.887430575905222e-05,
      "loss": 2.6259,
      "step": 12135
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0852162837982178,
      "learning_rate": 1.887411596501202e-05,
      "loss": 2.7188,
      "step": 12136
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.009232759475708,
      "learning_rate": 1.8873926155927775e-05,
      "loss": 2.7098,
      "step": 12137
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0810760259628296,
      "learning_rate": 1.887373633179982e-05,
      "loss": 2.7798,
      "step": 12138
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.027336597442627,
      "learning_rate": 1.887354649262847e-05,
      "loss": 2.5921,
      "step": 12139
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9745767116546631,
      "learning_rate": 1.887335663841405e-05,
      "loss": 2.5603,
      "step": 12140
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9661058187484741,
      "learning_rate": 1.8873166769156878e-05,
      "loss": 2.6052,
      "step": 12141
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0181771516799927,
      "learning_rate": 1.8872976884857283e-05,
      "loss": 2.4923,
      "step": 12142
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.2039804458618164,
      "learning_rate": 1.887278698551558e-05,
      "loss": 2.4768,
      "step": 12143
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9349273443222046,
      "learning_rate": 1.887259707113209e-05,
      "loss": 2.5621,
      "step": 12144
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0384092330932617,
      "learning_rate": 1.887240714170714e-05,
      "loss": 2.4265,
      "step": 12145
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9540975093841553,
      "learning_rate": 1.8872217197241053e-05,
      "loss": 2.4219,
      "step": 12146
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0445581674575806,
      "learning_rate": 1.8872027237734144e-05,
      "loss": 2.5868,
      "step": 12147
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0110622644424438,
      "learning_rate": 1.8871837263186742e-05,
      "loss": 2.564,
      "step": 12148
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.066314697265625,
      "learning_rate": 1.8871647273599166e-05,
      "loss": 2.4843,
      "step": 12149
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0295361280441284,
      "learning_rate": 1.887145726897174e-05,
      "loss": 2.3444,
      "step": 12150
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.1178197860717773,
      "learning_rate": 1.8871267249304785e-05,
      "loss": 2.4694,
      "step": 12151
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9498396515846252,
      "learning_rate": 1.8871077214598616e-05,
      "loss": 2.6262,
      "step": 12152
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0964491367340088,
      "learning_rate": 1.8870887164853567e-05,
      "loss": 2.6488,
      "step": 12153
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9742169380187988,
      "learning_rate": 1.8870697100069954e-05,
      "loss": 2.6224,
      "step": 12154
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9941868782043457,
      "learning_rate": 1.88705070202481e-05,
      "loss": 2.6234,
      "step": 12155
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.1382272243499756,
      "learning_rate": 1.8870316925388325e-05,
      "loss": 2.2973,
      "step": 12156
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0275158882141113,
      "learning_rate": 1.887012681549096e-05,
      "loss": 2.7842,
      "step": 12157
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9537166357040405,
      "learning_rate": 1.8869936690556315e-05,
      "loss": 2.4796,
      "step": 12158
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.1535125970840454,
      "learning_rate": 1.886974655058472e-05,
      "loss": 2.5042,
      "step": 12159
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.950677752494812,
      "learning_rate": 1.8869556395576495e-05,
      "loss": 2.476,
      "step": 12160
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0829836130142212,
      "learning_rate": 1.8869366225531968e-05,
      "loss": 2.6213,
      "step": 12161
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.05344557762146,
      "learning_rate": 1.886917604045145e-05,
      "loss": 2.7491,
      "step": 12162
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0551166534423828,
      "learning_rate": 1.8868985840335272e-05,
      "loss": 2.4527,
      "step": 12163
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.1358656883239746,
      "learning_rate": 1.8868795625183757e-05,
      "loss": 2.4902,
      "step": 12164
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0396623611450195,
      "learning_rate": 1.8868605394997218e-05,
      "loss": 2.4144,
      "step": 12165
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.983612596988678,
      "learning_rate": 1.886841514977599e-05,
      "loss": 2.3808,
      "step": 12166
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.907811164855957,
      "learning_rate": 1.8868224889520387e-05,
      "loss": 2.5771,
      "step": 12167
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0318129062652588,
      "learning_rate": 1.886803461423074e-05,
      "loss": 2.5351,
      "step": 12168
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.1268341541290283,
      "learning_rate": 1.8867844323907357e-05,
      "loss": 2.6209,
      "step": 12169
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9191110134124756,
      "learning_rate": 1.8867654018550574e-05,
      "loss": 2.7284,
      "step": 12170
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9852651953697205,
      "learning_rate": 1.8867463698160707e-05,
      "loss": 2.5321,
      "step": 12171
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9937828779220581,
      "learning_rate": 1.886727336273808e-05,
      "loss": 2.5346,
      "step": 12172
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.998405933380127,
      "learning_rate": 1.886708301228302e-05,
      "loss": 2.5122,
      "step": 12173
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.1536420583724976,
      "learning_rate": 1.8866892646795845e-05,
      "loss": 2.4195,
      "step": 12174
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0211540460586548,
      "learning_rate": 1.8866702266276878e-05,
      "loss": 2.6093,
      "step": 12175
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9533341526985168,
      "learning_rate": 1.8866511870726444e-05,
      "loss": 2.5261,
      "step": 12176
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0608607530593872,
      "learning_rate": 1.8866321460144862e-05,
      "loss": 2.43,
      "step": 12177
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9551354646682739,
      "learning_rate": 1.8866131034532456e-05,
      "loss": 2.7607,
      "step": 12178
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.137775182723999,
      "learning_rate": 1.8865940593889553e-05,
      "loss": 2.7763,
      "step": 12179
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0803418159484863,
      "learning_rate": 1.8865750138216473e-05,
      "loss": 2.631,
      "step": 12180
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0127698183059692,
      "learning_rate": 1.8865559667513536e-05,
      "loss": 2.4723,
      "step": 12181
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.053279995918274,
      "learning_rate": 1.886536918178107e-05,
      "loss": 2.3587,
      "step": 12182
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.982333779335022,
      "learning_rate": 1.8865178681019392e-05,
      "loss": 2.552,
      "step": 12183
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0441488027572632,
      "learning_rate": 1.886498816522883e-05,
      "loss": 2.5567,
      "step": 12184
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9148234724998474,
      "learning_rate": 1.8864797634409705e-05,
      "loss": 2.626,
      "step": 12185
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0553127527236938,
      "learning_rate": 1.8864607088562344e-05,
      "loss": 2.7193,
      "step": 12186
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0037795305252075,
      "learning_rate": 1.886441652768706e-05,
      "loss": 2.6191,
      "step": 12187
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0184712409973145,
      "learning_rate": 1.8864225951784187e-05,
      "loss": 2.4287,
      "step": 12188
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0397357940673828,
      "learning_rate": 1.8864035360854044e-05,
      "loss": 2.5454,
      "step": 12189
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0517364740371704,
      "learning_rate": 1.8863844754896953e-05,
      "loss": 2.6237,
      "step": 12190
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0483691692352295,
      "learning_rate": 1.8863654133913237e-05,
      "loss": 2.5595,
      "step": 12191
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9756684303283691,
      "learning_rate": 1.8863463497903217e-05,
      "loss": 2.5831,
      "step": 12192
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.054213047027588,
      "learning_rate": 1.8863272846867223e-05,
      "loss": 2.5338,
      "step": 12193
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0500456094741821,
      "learning_rate": 1.8863082180805576e-05,
      "loss": 2.5333,
      "step": 12194
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0805197954177856,
      "learning_rate": 1.886289149971859e-05,
      "loss": 2.654,
      "step": 12195
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.982456386089325,
      "learning_rate": 1.8862700803606603e-05,
      "loss": 2.7801,
      "step": 12196
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0816320180892944,
      "learning_rate": 1.8862510092469928e-05,
      "loss": 2.567,
      "step": 12197
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.073237657546997,
      "learning_rate": 1.886231936630889e-05,
      "loss": 2.5705,
      "step": 12198
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9572314620018005,
      "learning_rate": 1.8862128625123817e-05,
      "loss": 2.5324,
      "step": 12199
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9693709015846252,
      "learning_rate": 1.8861937868915028e-05,
      "loss": 2.7548,
      "step": 12200
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0806059837341309,
      "learning_rate": 1.8861747097682846e-05,
      "loss": 2.5899,
      "step": 12201
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0030803680419922,
      "learning_rate": 1.88615563114276e-05,
      "loss": 2.5043,
      "step": 12202
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9671444892883301,
      "learning_rate": 1.8861365510149604e-05,
      "loss": 2.4404,
      "step": 12203
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9307047724723816,
      "learning_rate": 1.886117469384919e-05,
      "loss": 2.479,
      "step": 12204
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.006014108657837,
      "learning_rate": 1.8860983862526675e-05,
      "loss": 2.3943,
      "step": 12205
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0920900106430054,
      "learning_rate": 1.886079301618239e-05,
      "loss": 2.4731,
      "step": 12206
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.042932152748108,
      "learning_rate": 1.8860602154816653e-05,
      "loss": 2.4438,
      "step": 12207
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.1799205541610718,
      "learning_rate": 1.8860411278429786e-05,
      "loss": 2.4498,
      "step": 12208
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9935790300369263,
      "learning_rate": 1.8860220387022118e-05,
      "loss": 2.6344,
      "step": 12209
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0000355243682861,
      "learning_rate": 1.886002948059397e-05,
      "loss": 2.2902,
      "step": 12210
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9776847958564758,
      "learning_rate": 1.8859838559145665e-05,
      "loss": 2.6854,
      "step": 12211
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0100144147872925,
      "learning_rate": 1.8859647622677525e-05,
      "loss": 2.5526,
      "step": 12212
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0538321733474731,
      "learning_rate": 1.885945667118988e-05,
      "loss": 2.3709,
      "step": 12213
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.1543705463409424,
      "learning_rate": 1.885926570468305e-05,
      "loss": 2.5397,
      "step": 12214
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0562701225280762,
      "learning_rate": 1.8859074723157356e-05,
      "loss": 2.4388,
      "step": 12215
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9211398959159851,
      "learning_rate": 1.8858883726613126e-05,
      "loss": 2.2567,
      "step": 12216
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.1181062459945679,
      "learning_rate": 1.885869271505068e-05,
      "loss": 2.612,
      "step": 12217
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.1322654485702515,
      "learning_rate": 1.8858501688470347e-05,
      "loss": 2.6132,
      "step": 12218
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.025259017944336,
      "learning_rate": 1.8858310646872448e-05,
      "loss": 2.3987,
      "step": 12219
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.129633903503418,
      "learning_rate": 1.88581195902573e-05,
      "loss": 2.6462,
      "step": 12220
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.042345404624939,
      "learning_rate": 1.8857928518625237e-05,
      "loss": 2.3716,
      "step": 12221
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0005911588668823,
      "learning_rate": 1.8857737431976583e-05,
      "loss": 2.472,
      "step": 12222
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.016776442527771,
      "learning_rate": 1.8857546330311654e-05,
      "loss": 2.4926,
      "step": 12223
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0750682353973389,
      "learning_rate": 1.8857355213630783e-05,
      "loss": 2.6054,
      "step": 12224
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9950440526008606,
      "learning_rate": 1.8857164081934284e-05,
      "loss": 2.6254,
      "step": 12225
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0104955434799194,
      "learning_rate": 1.885697293522249e-05,
      "loss": 2.5663,
      "step": 12226
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9555155634880066,
      "learning_rate": 1.8856781773495715e-05,
      "loss": 2.5711,
      "step": 12227
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.968654215335846,
      "learning_rate": 1.8856590596754296e-05,
      "loss": 2.4859,
      "step": 12228
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0012694597244263,
      "learning_rate": 1.885639940499855e-05,
      "loss": 2.5054,
      "step": 12229
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9738569259643555,
      "learning_rate": 1.88562081982288e-05,
      "loss": 2.5757,
      "step": 12230
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.1727919578552246,
      "learning_rate": 1.885601697644537e-05,
      "loss": 2.3602,
      "step": 12231
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0708949565887451,
      "learning_rate": 1.885582573964859e-05,
      "loss": 2.5101,
      "step": 12232
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9602116942405701,
      "learning_rate": 1.8855634487838776e-05,
      "loss": 2.4932,
      "step": 12233
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0449328422546387,
      "learning_rate": 1.885544322101626e-05,
      "loss": 2.4675,
      "step": 12234
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0651178359985352,
      "learning_rate": 1.885525193918136e-05,
      "loss": 2.6028,
      "step": 12235
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9135273098945618,
      "learning_rate": 1.8855060642334404e-05,
      "loss": 2.4177,
      "step": 12236
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0719959735870361,
      "learning_rate": 1.8854869330475715e-05,
      "loss": 2.343,
      "step": 12237
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.1549346446990967,
      "learning_rate": 1.8854678003605617e-05,
      "loss": 2.4634,
      "step": 12238
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0017095804214478,
      "learning_rate": 1.8854486661724438e-05,
      "loss": 2.51,
      "step": 12239
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.1647799015045166,
      "learning_rate": 1.8854295304832497e-05,
      "loss": 2.6168,
      "step": 12240
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.1435093879699707,
      "learning_rate": 1.8854103932930123e-05,
      "loss": 2.4869,
      "step": 12241
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.033812165260315,
      "learning_rate": 1.8853912546017633e-05,
      "loss": 2.5755,
      "step": 12242
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.1560759544372559,
      "learning_rate": 1.885372114409536e-05,
      "loss": 2.5341,
      "step": 12243
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0145560503005981,
      "learning_rate": 1.8853529727163624e-05,
      "loss": 2.5408,
      "step": 12244
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.164628505706787,
      "learning_rate": 1.885333829522275e-05,
      "loss": 2.505,
      "step": 12245
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0241858959197998,
      "learning_rate": 1.8853146848273064e-05,
      "loss": 2.5419,
      "step": 12246
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0181645154953003,
      "learning_rate": 1.885295538631489e-05,
      "loss": 2.383,
      "step": 12247
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0228039026260376,
      "learning_rate": 1.8852763909348554e-05,
      "loss": 2.6743,
      "step": 12248
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0461137294769287,
      "learning_rate": 1.8852572417374377e-05,
      "loss": 2.5453,
      "step": 12249
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.2209022045135498,
      "learning_rate": 1.8852380910392683e-05,
      "loss": 2.555,
      "step": 12250
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0138044357299805,
      "learning_rate": 1.8852189388403802e-05,
      "loss": 2.445,
      "step": 12251
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.1830211877822876,
      "learning_rate": 1.8851997851408056e-05,
      "loss": 2.4533,
      "step": 12252
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.1132932901382446,
      "learning_rate": 1.8851806299405765e-05,
      "loss": 2.3749,
      "step": 12253
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0025086402893066,
      "learning_rate": 1.8851614732397262e-05,
      "loss": 2.5427,
      "step": 12254
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.085098385810852,
      "learning_rate": 1.885142315038287e-05,
      "loss": 2.3873,
      "step": 12255
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0100575685501099,
      "learning_rate": 1.885123155336291e-05,
      "loss": 2.686,
      "step": 12256
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9940868616104126,
      "learning_rate": 1.8851039941337706e-05,
      "loss": 2.5445,
      "step": 12257
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0379091501235962,
      "learning_rate": 1.8850848314307584e-05,
      "loss": 2.6288,
      "step": 12258
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9405454397201538,
      "learning_rate": 1.8850656672272874e-05,
      "loss": 2.6754,
      "step": 12259
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0746960639953613,
      "learning_rate": 1.8850465015233897e-05,
      "loss": 2.6835,
      "step": 12260
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0161635875701904,
      "learning_rate": 1.8850273343190978e-05,
      "loss": 2.6163,
      "step": 12261
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9884660243988037,
      "learning_rate": 1.8850081656144443e-05,
      "loss": 2.5109,
      "step": 12262
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.054322361946106,
      "learning_rate": 1.8849889954094612e-05,
      "loss": 2.5573,
      "step": 12263
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0345656871795654,
      "learning_rate": 1.884969823704182e-05,
      "loss": 2.6493,
      "step": 12264
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.1991140842437744,
      "learning_rate": 1.884950650498638e-05,
      "loss": 2.5651,
      "step": 12265
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9445055723190308,
      "learning_rate": 1.8849314757928626e-05,
      "loss": 2.4209,
      "step": 12266
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.1059406995773315,
      "learning_rate": 1.884912299586888e-05,
      "loss": 2.4357,
      "step": 12267
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.043298602104187,
      "learning_rate": 1.8848931218807465e-05,
      "loss": 2.3858,
      "step": 12268
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.1283477544784546,
      "learning_rate": 1.8848739426744707e-05,
      "loss": 2.5116,
      "step": 12269
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0079690217971802,
      "learning_rate": 1.884854761968094e-05,
      "loss": 2.5081,
      "step": 12270
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.27585506439209,
      "learning_rate": 1.8848355797616477e-05,
      "loss": 2.5013,
      "step": 12271
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.2109899520874023,
      "learning_rate": 1.8848163960551648e-05,
      "loss": 2.4408,
      "step": 12272
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9643738865852356,
      "learning_rate": 1.8847972108486774e-05,
      "loss": 2.44,
      "step": 12273
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0903273820877075,
      "learning_rate": 1.884778024142219e-05,
      "loss": 2.451,
      "step": 12274
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9218128323554993,
      "learning_rate": 1.884758835935821e-05,
      "loss": 2.3797,
      "step": 12275
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9740798473358154,
      "learning_rate": 1.8847396462295168e-05,
      "loss": 2.7188,
      "step": 12276
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0349048376083374,
      "learning_rate": 1.8847204550233388e-05,
      "loss": 2.5327,
      "step": 12277
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9772080779075623,
      "learning_rate": 1.884701262317319e-05,
      "loss": 2.2152,
      "step": 12278
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9692940711975098,
      "learning_rate": 1.8846820681114906e-05,
      "loss": 2.5456,
      "step": 12279
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.022264838218689,
      "learning_rate": 1.8846628724058854e-05,
      "loss": 2.4964,
      "step": 12280
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9666091799736023,
      "learning_rate": 1.8846436752005366e-05,
      "loss": 2.4575,
      "step": 12281
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.031853199005127,
      "learning_rate": 1.8846244764954766e-05,
      "loss": 2.3065,
      "step": 12282
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0473212003707886,
      "learning_rate": 1.8846052762907378e-05,
      "loss": 2.6303,
      "step": 12283
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0045931339263916,
      "learning_rate": 1.8845860745863524e-05,
      "loss": 2.4299,
      "step": 12284
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9975338578224182,
      "learning_rate": 1.884566871382354e-05,
      "loss": 2.6172,
      "step": 12285
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0679610967636108,
      "learning_rate": 1.884547666678774e-05,
      "loss": 2.4998,
      "step": 12286
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.2066047191619873,
      "learning_rate": 1.884528460475646e-05,
      "loss": 2.6274,
      "step": 12287
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.996775209903717,
      "learning_rate": 1.8845092527730012e-05,
      "loss": 2.5187,
      "step": 12288
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0273144245147705,
      "learning_rate": 1.8844900435708737e-05,
      "loss": 2.635,
      "step": 12289
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.1767274141311646,
      "learning_rate": 1.8844708328692952e-05,
      "loss": 2.4162,
      "step": 12290
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.149301528930664,
      "learning_rate": 1.8844516206682984e-05,
      "loss": 2.6236,
      "step": 12291
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9559305310249329,
      "learning_rate": 1.8844324069679157e-05,
      "loss": 2.4693,
      "step": 12292
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0342433452606201,
      "learning_rate": 1.88441319176818e-05,
      "loss": 2.5893,
      "step": 12293
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0556011199951172,
      "learning_rate": 1.8843939750691238e-05,
      "loss": 2.4683,
      "step": 12294
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.109311819076538,
      "learning_rate": 1.8843747568707796e-05,
      "loss": 2.5797,
      "step": 12295
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.1083511114120483,
      "learning_rate": 1.88435553717318e-05,
      "loss": 2.438,
      "step": 12296
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.966585099697113,
      "learning_rate": 1.8843363159763575e-05,
      "loss": 2.6041,
      "step": 12297
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0058552026748657,
      "learning_rate": 1.8843170932803447e-05,
      "loss": 2.4837,
      "step": 12298
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0410436391830444,
      "learning_rate": 1.8842978690851743e-05,
      "loss": 2.5006,
      "step": 12299
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.1281695365905762,
      "learning_rate": 1.8842786433908792e-05,
      "loss": 2.4728,
      "step": 12300
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.1489522457122803,
      "learning_rate": 1.884259416197491e-05,
      "loss": 2.362,
      "step": 12301
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9782857298851013,
      "learning_rate": 1.8842401875050433e-05,
      "loss": 2.5843,
      "step": 12302
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9411050081253052,
      "learning_rate": 1.884220957313568e-05,
      "loss": 2.4066,
      "step": 12303
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.046605110168457,
      "learning_rate": 1.8842017256230987e-05,
      "loss": 2.5026,
      "step": 12304
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0986521244049072,
      "learning_rate": 1.884182492433667e-05,
      "loss": 2.5333,
      "step": 12305
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9548298716545105,
      "learning_rate": 1.8841632577453057e-05,
      "loss": 2.6454,
      "step": 12306
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9450012445449829,
      "learning_rate": 1.8841440215580475e-05,
      "loss": 2.3413,
      "step": 12307
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0855381488800049,
      "learning_rate": 1.884124783871925e-05,
      "loss": 2.4909,
      "step": 12308
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.1129357814788818,
      "learning_rate": 1.8841055446869713e-05,
      "loss": 2.6601,
      "step": 12309
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.2178540229797363,
      "learning_rate": 1.8840863040032183e-05,
      "loss": 2.5045,
      "step": 12310
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9538248777389526,
      "learning_rate": 1.8840670618206985e-05,
      "loss": 2.5577,
      "step": 12311
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0143119096755981,
      "learning_rate": 1.8840478181394454e-05,
      "loss": 2.3664,
      "step": 12312
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.99702388048172,
      "learning_rate": 1.8840285729594915e-05,
      "loss": 2.577,
      "step": 12313
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0775431394577026,
      "learning_rate": 1.8840093262808682e-05,
      "loss": 2.656,
      "step": 12314
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9509381055831909,
      "learning_rate": 1.8839900781036096e-05,
      "loss": 2.4891,
      "step": 12315
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.4123224020004272,
      "learning_rate": 1.8839708284277475e-05,
      "loss": 2.5522,
      "step": 12316
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0422005653381348,
      "learning_rate": 1.883951577253315e-05,
      "loss": 2.6785,
      "step": 12317
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0469270944595337,
      "learning_rate": 1.8839323245803443e-05,
      "loss": 2.3832,
      "step": 12318
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.082319736480713,
      "learning_rate": 1.8839130704088683e-05,
      "loss": 2.6368,
      "step": 12319
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0812424421310425,
      "learning_rate": 1.8838938147389195e-05,
      "loss": 2.4062,
      "step": 12320
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.2044963836669922,
      "learning_rate": 1.8838745575705305e-05,
      "loss": 2.6991,
      "step": 12321
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.967883288860321,
      "learning_rate": 1.8838552989037345e-05,
      "loss": 2.5421,
      "step": 12322
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0108025074005127,
      "learning_rate": 1.8838360387385632e-05,
      "loss": 2.6263,
      "step": 12323
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0773060321807861,
      "learning_rate": 1.88381677707505e-05,
      "loss": 2.5292,
      "step": 12324
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9693469405174255,
      "learning_rate": 1.8837975139132273e-05,
      "loss": 2.5513,
      "step": 12325
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0009266138076782,
      "learning_rate": 1.8837782492531277e-05,
      "loss": 2.539,
      "step": 12326
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9817094802856445,
      "learning_rate": 1.8837589830947843e-05,
      "loss": 2.5924,
      "step": 12327
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0806108713150024,
      "learning_rate": 1.883739715438229e-05,
      "loss": 2.639,
      "step": 12328
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.047565221786499,
      "learning_rate": 1.883720446283495e-05,
      "loss": 2.6785,
      "step": 12329
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0603694915771484,
      "learning_rate": 1.883701175630615e-05,
      "loss": 2.3569,
      "step": 12330
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0844013690948486,
      "learning_rate": 1.8836819034796213e-05,
      "loss": 2.4911,
      "step": 12331
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.065895915031433,
      "learning_rate": 1.8836626298305468e-05,
      "loss": 2.4806,
      "step": 12332
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9445483088493347,
      "learning_rate": 1.883643354683424e-05,
      "loss": 2.5263,
      "step": 12333
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.1094484329223633,
      "learning_rate": 1.883624078038286e-05,
      "loss": 2.5486,
      "step": 12334
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0289318561553955,
      "learning_rate": 1.883604799895165e-05,
      "loss": 2.7598,
      "step": 12335
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9840492606163025,
      "learning_rate": 1.8835855202540937e-05,
      "loss": 2.471,
      "step": 12336
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.092522144317627,
      "learning_rate": 1.8835662391151053e-05,
      "loss": 2.5292,
      "step": 12337
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.266696572303772,
      "learning_rate": 1.8835469564782323e-05,
      "loss": 2.71,
      "step": 12338
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9967401027679443,
      "learning_rate": 1.883527672343507e-05,
      "loss": 2.4196,
      "step": 12339
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9626911282539368,
      "learning_rate": 1.8835083867109622e-05,
      "loss": 2.7093,
      "step": 12340
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.953154444694519,
      "learning_rate": 1.883489099580631e-05,
      "loss": 2.5415,
      "step": 12341
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.988574743270874,
      "learning_rate": 1.8834698109525455e-05,
      "loss": 2.4701,
      "step": 12342
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.147122859954834,
      "learning_rate": 1.883450520826739e-05,
      "loss": 2.5156,
      "step": 12343
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.132981777191162,
      "learning_rate": 1.883431229203244e-05,
      "loss": 2.5483,
      "step": 12344
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9688064455986023,
      "learning_rate": 1.883411936082093e-05,
      "loss": 2.5226,
      "step": 12345
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0839381217956543,
      "learning_rate": 1.8833926414633186e-05,
      "loss": 2.4925,
      "step": 12346
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.1456170082092285,
      "learning_rate": 1.883373345346954e-05,
      "loss": 2.4899,
      "step": 12347
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0666002035140991,
      "learning_rate": 1.8833540477330318e-05,
      "loss": 2.3539,
      "step": 12348
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.364012598991394,
      "learning_rate": 1.883334748621584e-05,
      "loss": 2.56,
      "step": 12349
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0055370330810547,
      "learning_rate": 1.8833154480126442e-05,
      "loss": 2.6811,
      "step": 12350
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.99617999792099,
      "learning_rate": 1.8832961459062453e-05,
      "loss": 2.5267,
      "step": 12351
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0923988819122314,
      "learning_rate": 1.883276842302419e-05,
      "loss": 2.6258,
      "step": 12352
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0375229120254517,
      "learning_rate": 1.8832575372011986e-05,
      "loss": 2.5954,
      "step": 12353
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0440322160720825,
      "learning_rate": 1.8832382306026167e-05,
      "loss": 2.5108,
      "step": 12354
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0633703470230103,
      "learning_rate": 1.8832189225067065e-05,
      "loss": 2.648,
      "step": 12355
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.2644292116165161,
      "learning_rate": 1.8831996129135e-05,
      "loss": 2.635,
      "step": 12356
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.1406127214431763,
      "learning_rate": 1.8831803018230304e-05,
      "loss": 2.3196,
      "step": 12357
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0493381023406982,
      "learning_rate": 1.8831609892353305e-05,
      "loss": 2.5719,
      "step": 12358
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0602329969406128,
      "learning_rate": 1.8831416751504322e-05,
      "loss": 2.5429,
      "step": 12359
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.118931770324707,
      "learning_rate": 1.8831223595683696e-05,
      "loss": 2.4101,
      "step": 12360
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0831283330917358,
      "learning_rate": 1.8831030424891745e-05,
      "loss": 2.5477,
      "step": 12361
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.016621470451355,
      "learning_rate": 1.88308372391288e-05,
      "loss": 2.4579,
      "step": 12362
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5115355253219604,
      "learning_rate": 1.8830644038395186e-05,
      "loss": 2.6354,
      "step": 12363
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.2225873470306396,
      "learning_rate": 1.883045082269123e-05,
      "loss": 2.3893,
      "step": 12364
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.04741370677948,
      "learning_rate": 1.883025759201727e-05,
      "loss": 2.6827,
      "step": 12365
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0273494720458984,
      "learning_rate": 1.8830064346373618e-05,
      "loss": 2.6642,
      "step": 12366
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.1252421140670776,
      "learning_rate": 1.8829871085760607e-05,
      "loss": 2.6218,
      "step": 12367
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.290379285812378,
      "learning_rate": 1.882967781017857e-05,
      "loss": 2.6098,
      "step": 12368
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0347201824188232,
      "learning_rate": 1.882948451962783e-05,
      "loss": 2.7197,
      "step": 12369
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9161953926086426,
      "learning_rate": 1.8829291214108717e-05,
      "loss": 2.4431,
      "step": 12370
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0281355381011963,
      "learning_rate": 1.8829097893621556e-05,
      "loss": 2.4836,
      "step": 12371
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.1555728912353516,
      "learning_rate": 1.882890455816668e-05,
      "loss": 2.4044,
      "step": 12372
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.073980689048767,
      "learning_rate": 1.8828711207744407e-05,
      "loss": 2.4475,
      "step": 12373
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.213051438331604,
      "learning_rate": 1.8828517842355073e-05,
      "loss": 2.5726,
      "step": 12374
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0252381563186646,
      "learning_rate": 1.8828324461999003e-05,
      "loss": 2.6433,
      "step": 12375
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.1044546365737915,
      "learning_rate": 1.8828131066676528e-05,
      "loss": 2.456,
      "step": 12376
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9777973294258118,
      "learning_rate": 1.882793765638797e-05,
      "loss": 2.477,
      "step": 12377
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0178664922714233,
      "learning_rate": 1.882774423113366e-05,
      "loss": 2.6713,
      "step": 12378
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0506335496902466,
      "learning_rate": 1.8827550790913927e-05,
      "loss": 2.3603,
      "step": 12379
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0212880373001099,
      "learning_rate": 1.88273573357291e-05,
      "loss": 2.609,
      "step": 12380
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9942664504051208,
      "learning_rate": 1.88271638655795e-05,
      "loss": 2.4365,
      "step": 12381
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.01790452003479,
      "learning_rate": 1.8826970380465464e-05,
      "loss": 2.4698,
      "step": 12382
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.018457055091858,
      "learning_rate": 1.8826776880387314e-05,
      "loss": 2.5136,
      "step": 12383
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9825610518455505,
      "learning_rate": 1.8826583365345382e-05,
      "loss": 2.4612,
      "step": 12384
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0423662662506104,
      "learning_rate": 1.8826389835339993e-05,
      "loss": 2.4832,
      "step": 12385
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.952238142490387,
      "learning_rate": 1.8826196290371475e-05,
      "loss": 2.3374,
      "step": 12386
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.3225767612457275,
      "learning_rate": 1.8826002730440157e-05,
      "loss": 2.4647,
      "step": 12387
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0806424617767334,
      "learning_rate": 1.8825809155546366e-05,
      "loss": 2.4178,
      "step": 12388
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9864113926887512,
      "learning_rate": 1.8825615565690435e-05,
      "loss": 2.5444,
      "step": 12389
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9982084035873413,
      "learning_rate": 1.8825421960872683e-05,
      "loss": 2.4211,
      "step": 12390
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9295141696929932,
      "learning_rate": 1.882522834109345e-05,
      "loss": 2.4868,
      "step": 12391
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0677275657653809,
      "learning_rate": 1.8825034706353053e-05,
      "loss": 2.6068,
      "step": 12392
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0141494274139404,
      "learning_rate": 1.882484105665183e-05,
      "loss": 2.514,
      "step": 12393
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9996234774589539,
      "learning_rate": 1.88246473919901e-05,
      "loss": 2.4767,
      "step": 12394
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0614278316497803,
      "learning_rate": 1.88244537123682e-05,
      "loss": 2.594,
      "step": 12395
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9666709899902344,
      "learning_rate": 1.8824260017786454e-05,
      "loss": 2.6176,
      "step": 12396
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0144587755203247,
      "learning_rate": 1.882406630824519e-05,
      "loss": 2.4503,
      "step": 12397
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.025684118270874,
      "learning_rate": 1.8823872583744737e-05,
      "loss": 2.5499,
      "step": 12398
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.176388144493103,
      "learning_rate": 1.882367884428542e-05,
      "loss": 2.6473,
      "step": 12399
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0151278972625732,
      "learning_rate": 1.8823485089867573e-05,
      "loss": 2.5933,
      "step": 12400
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9889392256736755,
      "learning_rate": 1.8823291320491523e-05,
      "loss": 2.4793,
      "step": 12401
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.005602240562439,
      "learning_rate": 1.88230975361576e-05,
      "loss": 2.6113,
      "step": 12402
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0711510181427002,
      "learning_rate": 1.8822903736866128e-05,
      "loss": 2.4517,
      "step": 12403
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0313493013381958,
      "learning_rate": 1.8822709922617437e-05,
      "loss": 2.6942,
      "step": 12404
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9469907879829407,
      "learning_rate": 1.882251609341186e-05,
      "loss": 2.339,
      "step": 12405
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.086289882659912,
      "learning_rate": 1.882232224924972e-05,
      "loss": 2.6499,
      "step": 12406
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.1114797592163086,
      "learning_rate": 1.8822128390131345e-05,
      "loss": 2.4689,
      "step": 12407
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0743632316589355,
      "learning_rate": 1.882193451605707e-05,
      "loss": 2.6757,
      "step": 12408
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0209739208221436,
      "learning_rate": 1.8821740627027218e-05,
      "loss": 2.4767,
      "step": 12409
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0379663705825806,
      "learning_rate": 1.8821546723042118e-05,
      "loss": 2.5937,
      "step": 12410
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0234553813934326,
      "learning_rate": 1.8821352804102103e-05,
      "loss": 2.5782,
      "step": 12411
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9825406074523926,
      "learning_rate": 1.88211588702075e-05,
      "loss": 2.6379,
      "step": 12412
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0771846771240234,
      "learning_rate": 1.8820964921358635e-05,
      "loss": 2.5652,
      "step": 12413
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0844677686691284,
      "learning_rate": 1.8820770957555838e-05,
      "loss": 2.7401,
      "step": 12414
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0238120555877686,
      "learning_rate": 1.8820576978799444e-05,
      "loss": 2.4564,
      "step": 12415
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9442502856254578,
      "learning_rate": 1.8820382985089768e-05,
      "loss": 2.2969,
      "step": 12416
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.083587646484375,
      "learning_rate": 1.8820188976427154e-05,
      "loss": 2.4193,
      "step": 12417
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.1250224113464355,
      "learning_rate": 1.8819994952811922e-05,
      "loss": 2.4822,
      "step": 12418
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0293362140655518,
      "learning_rate": 1.88198009142444e-05,
      "loss": 2.5362,
      "step": 12419
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0276730060577393,
      "learning_rate": 1.8819606860724925e-05,
      "loss": 2.7077,
      "step": 12420
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9626933932304382,
      "learning_rate": 1.881941279225382e-05,
      "loss": 2.5048,
      "step": 12421
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9441952109336853,
      "learning_rate": 1.8819218708831414e-05,
      "loss": 2.6512,
      "step": 12422
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0808885097503662,
      "learning_rate": 1.8819024610458037e-05,
      "loss": 2.5813,
      "step": 12423
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9353381991386414,
      "learning_rate": 1.8818830497134017e-05,
      "loss": 2.5021,
      "step": 12424
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9912883639335632,
      "learning_rate": 1.8818636368859685e-05,
      "loss": 2.4889,
      "step": 12425
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0807743072509766,
      "learning_rate": 1.8818442225635368e-05,
      "loss": 2.5654,
      "step": 12426
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9883819222450256,
      "learning_rate": 1.88182480674614e-05,
      "loss": 2.3066,
      "step": 12427
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.03535795211792,
      "learning_rate": 1.88180538943381e-05,
      "loss": 2.608,
      "step": 12428
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0384018421173096,
      "learning_rate": 1.881785970626581e-05,
      "loss": 2.4778,
      "step": 12429
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0107057094573975,
      "learning_rate": 1.881766550324485e-05,
      "loss": 2.6474,
      "step": 12430
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.1231905221939087,
      "learning_rate": 1.8817471285275558e-05,
      "loss": 2.3546,
      "step": 12431
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0474885702133179,
      "learning_rate": 1.8817277052358248e-05,
      "loss": 2.5589,
      "step": 12432
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.1108239889144897,
      "learning_rate": 1.8817082804493263e-05,
      "loss": 2.6231,
      "step": 12433
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.2508220672607422,
      "learning_rate": 1.8816888541680927e-05,
      "loss": 2.497,
      "step": 12434
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0037401914596558,
      "learning_rate": 1.8816694263921574e-05,
      "loss": 2.7363,
      "step": 12435
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0687613487243652,
      "learning_rate": 1.8816499971215525e-05,
      "loss": 2.5462,
      "step": 12436
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0271601676940918,
      "learning_rate": 1.8816305663563116e-05,
      "loss": 2.6368,
      "step": 12437
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9794133305549622,
      "learning_rate": 1.8816111340964678e-05,
      "loss": 2.4857,
      "step": 12438
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0587527751922607,
      "learning_rate": 1.8815917003420533e-05,
      "loss": 2.4707,
      "step": 12439
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9691872000694275,
      "learning_rate": 1.8815722650931014e-05,
      "loss": 2.5009,
      "step": 12440
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0319936275482178,
      "learning_rate": 1.8815528283496452e-05,
      "loss": 2.6427,
      "step": 12441
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.966293215751648,
      "learning_rate": 1.8815333901117173e-05,
      "loss": 2.2297,
      "step": 12442
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9812106490135193,
      "learning_rate": 1.881513950379351e-05,
      "loss": 2.3863,
      "step": 12443
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.975106418132782,
      "learning_rate": 1.8814945091525794e-05,
      "loss": 2.5289,
      "step": 12444
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0066033601760864,
      "learning_rate": 1.881475066431435e-05,
      "loss": 2.484,
      "step": 12445
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.081437349319458,
      "learning_rate": 1.8814556222159507e-05,
      "loss": 2.4658,
      "step": 12446
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0510940551757812,
      "learning_rate": 1.88143617650616e-05,
      "loss": 2.4308,
      "step": 12447
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0456725358963013,
      "learning_rate": 1.8814167293020955e-05,
      "loss": 2.6461,
      "step": 12448
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0009186267852783,
      "learning_rate": 1.8813972806037904e-05,
      "loss": 2.4235,
      "step": 12449
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0024491548538208,
      "learning_rate": 1.8813778304112773e-05,
      "loss": 2.509,
      "step": 12450
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9539660215377808,
      "learning_rate": 1.8813583787245895e-05,
      "loss": 2.4446,
      "step": 12451
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9943865537643433,
      "learning_rate": 1.88133892554376e-05,
      "loss": 2.4478,
      "step": 12452
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9390099048614502,
      "learning_rate": 1.8813194708688213e-05,
      "loss": 2.719,
      "step": 12453
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.1040949821472168,
      "learning_rate": 1.8813000146998073e-05,
      "loss": 2.5311,
      "step": 12454
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9754233956336975,
      "learning_rate": 1.88128055703675e-05,
      "loss": 2.4481,
      "step": 12455
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9435118436813354,
      "learning_rate": 1.8812610978796828e-05,
      "loss": 2.7471,
      "step": 12456
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0816837549209595,
      "learning_rate": 1.881241637228639e-05,
      "loss": 2.4532,
      "step": 12457
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.97072833776474,
      "learning_rate": 1.881222175083651e-05,
      "loss": 2.6468,
      "step": 12458
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9810889363288879,
      "learning_rate": 1.8812027114447523e-05,
      "loss": 2.3932,
      "step": 12459
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0879971981048584,
      "learning_rate": 1.8811832463119756e-05,
      "loss": 2.6704,
      "step": 12460
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0972789525985718,
      "learning_rate": 1.8811637796853536e-05,
      "loss": 2.5242,
      "step": 12461
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9731652736663818,
      "learning_rate": 1.8811443115649198e-05,
      "loss": 2.518,
      "step": 12462
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0073641538619995,
      "learning_rate": 1.8811248419507072e-05,
      "loss": 2.562,
      "step": 12463
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0413984060287476,
      "learning_rate": 1.881105370842749e-05,
      "loss": 2.4354,
      "step": 12464
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9095136523246765,
      "learning_rate": 1.8810858982410776e-05,
      "loss": 2.7508,
      "step": 12465
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9936240911483765,
      "learning_rate": 1.8810664241457263e-05,
      "loss": 2.549,
      "step": 12466
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.109049677848816,
      "learning_rate": 1.881046948556728e-05,
      "loss": 2.8064,
      "step": 12467
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0907378196716309,
      "learning_rate": 1.881027471474116e-05,
      "loss": 2.6349,
      "step": 12468
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.089373230934143,
      "learning_rate": 1.881007992897923e-05,
      "loss": 2.5338,
      "step": 12469
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9464375376701355,
      "learning_rate": 1.8809885128281825e-05,
      "loss": 2.2419,
      "step": 12470
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9596166610717773,
      "learning_rate": 1.8809690312649273e-05,
      "loss": 2.5315,
      "step": 12471
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9543318152427673,
      "learning_rate": 1.8809495482081897e-05,
      "loss": 2.5332,
      "step": 12472
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.3089592456817627,
      "learning_rate": 1.8809300636580035e-05,
      "loss": 2.6586,
      "step": 12473
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9732905626296997,
      "learning_rate": 1.8809105776144017e-05,
      "loss": 2.6183,
      "step": 12474
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0056021213531494,
      "learning_rate": 1.8808910900774175e-05,
      "loss": 2.5897,
      "step": 12475
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0868284702301025,
      "learning_rate": 1.8808716010470834e-05,
      "loss": 2.4873,
      "step": 12476
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0308964252471924,
      "learning_rate": 1.880852110523433e-05,
      "loss": 2.5358,
      "step": 12477
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.071343183517456,
      "learning_rate": 1.8808326185064985e-05,
      "loss": 2.5725,
      "step": 12478
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.1078130006790161,
      "learning_rate": 1.8808131249963138e-05,
      "loss": 2.426,
      "step": 12479
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.998151957988739,
      "learning_rate": 1.8807936299929113e-05,
      "loss": 2.5764,
      "step": 12480
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.50225031375885,
      "learning_rate": 1.880774133496325e-05,
      "loss": 2.5828,
      "step": 12481
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0289733409881592,
      "learning_rate": 1.880754635506587e-05,
      "loss": 2.609,
      "step": 12482
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0471855401992798,
      "learning_rate": 1.8807351360237307e-05,
      "loss": 2.5781,
      "step": 12483
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9911723136901855,
      "learning_rate": 1.880715635047789e-05,
      "loss": 2.5793,
      "step": 12484
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.3393642902374268,
      "learning_rate": 1.880696132578795e-05,
      "loss": 2.5344,
      "step": 12485
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.199826717376709,
      "learning_rate": 1.8806766286167823e-05,
      "loss": 2.3903,
      "step": 12486
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.2660605907440186,
      "learning_rate": 1.880657123161783e-05,
      "loss": 2.5445,
      "step": 12487
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.239046335220337,
      "learning_rate": 1.8806376162138313e-05,
      "loss": 2.5651,
      "step": 12488
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9568693041801453,
      "learning_rate": 1.8806181077729592e-05,
      "loss": 2.4306,
      "step": 12489
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0523097515106201,
      "learning_rate": 1.8805985978392004e-05,
      "loss": 2.6887,
      "step": 12490
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0287609100341797,
      "learning_rate": 1.880579086412588e-05,
      "loss": 2.651,
      "step": 12491
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0909000635147095,
      "learning_rate": 1.8805595734931543e-05,
      "loss": 2.5002,
      "step": 12492
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9639648199081421,
      "learning_rate": 1.8805400590809334e-05,
      "loss": 2.3385,
      "step": 12493
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.2394084930419922,
      "learning_rate": 1.880520543175958e-05,
      "loss": 2.5007,
      "step": 12494
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.1232763528823853,
      "learning_rate": 1.880501025778261e-05,
      "loss": 2.4669,
      "step": 12495
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.1170755624771118,
      "learning_rate": 1.8804815068878755e-05,
      "loss": 2.6232,
      "step": 12496
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.1092338562011719,
      "learning_rate": 1.880461986504835e-05,
      "loss": 2.3628,
      "step": 12497
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0913138389587402,
      "learning_rate": 1.880442464629172e-05,
      "loss": 2.4198,
      "step": 12498
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9974513053894043,
      "learning_rate": 1.88042294126092e-05,
      "loss": 2.5238,
      "step": 12499
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.1231231689453125,
      "learning_rate": 1.880403416400112e-05,
      "loss": 2.3444,
      "step": 12500
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.400163412094116,
      "learning_rate": 1.8803838900467816e-05,
      "loss": 2.5935,
      "step": 12501
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9235785603523254,
      "learning_rate": 1.8803643622009607e-05,
      "loss": 2.3381,
      "step": 12502
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0103936195373535,
      "learning_rate": 1.8803448328626833e-05,
      "loss": 2.4876,
      "step": 12503
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0376532077789307,
      "learning_rate": 1.8803253020319824e-05,
      "loss": 2.4812,
      "step": 12504
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0932942628860474,
      "learning_rate": 1.880305769708891e-05,
      "loss": 2.6758,
      "step": 12505
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.2002348899841309,
      "learning_rate": 1.880286235893442e-05,
      "loss": 2.3654,
      "step": 12506
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0488824844360352,
      "learning_rate": 1.880266700585669e-05,
      "loss": 2.3892,
      "step": 12507
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0018384456634521,
      "learning_rate": 1.880247163785605e-05,
      "loss": 2.4053,
      "step": 12508
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.2074837684631348,
      "learning_rate": 1.8802276254932826e-05,
      "loss": 2.5064,
      "step": 12509
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.987643837928772,
      "learning_rate": 1.8802080857087353e-05,
      "loss": 2.4248,
      "step": 12510
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9634953737258911,
      "learning_rate": 1.8801885444319968e-05,
      "loss": 2.654,
      "step": 12511
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.045328140258789,
      "learning_rate": 1.880169001663099e-05,
      "loss": 2.4886,
      "step": 12512
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0360363721847534,
      "learning_rate": 1.8801494574020764e-05,
      "loss": 2.5141,
      "step": 12513
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0572174787521362,
      "learning_rate": 1.880129911648961e-05,
      "loss": 2.6914,
      "step": 12514
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.959722101688385,
      "learning_rate": 1.8801103644037863e-05,
      "loss": 2.3989,
      "step": 12515
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9831874966621399,
      "learning_rate": 1.8800908156665855e-05,
      "loss": 2.6949,
      "step": 12516
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9867786169052124,
      "learning_rate": 1.8800712654373918e-05,
      "loss": 2.4999,
      "step": 12517
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9972978830337524,
      "learning_rate": 1.8800517137162383e-05,
      "loss": 2.4678,
      "step": 12518
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9996419548988342,
      "learning_rate": 1.8800321605031583e-05,
      "loss": 2.6201,
      "step": 12519
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.05256187915802,
      "learning_rate": 1.8800126057981846e-05,
      "loss": 2.4379,
      "step": 12520
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0248178243637085,
      "learning_rate": 1.8799930496013507e-05,
      "loss": 2.3697,
      "step": 12521
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.1418453454971313,
      "learning_rate": 1.879973491912689e-05,
      "loss": 2.7141,
      "step": 12522
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0893900394439697,
      "learning_rate": 1.879953932732234e-05,
      "loss": 2.4824,
      "step": 12523
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.140176773071289,
      "learning_rate": 1.8799343720600174e-05,
      "loss": 2.4468,
      "step": 12524
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0437427759170532,
      "learning_rate": 1.8799148098960732e-05,
      "loss": 2.4146,
      "step": 12525
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.025681972503662,
      "learning_rate": 1.879895246240435e-05,
      "loss": 2.4923,
      "step": 12526
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.1085306406021118,
      "learning_rate": 1.8798756810931346e-05,
      "loss": 2.6127,
      "step": 12527
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9646363258361816,
      "learning_rate": 1.8798561144542066e-05,
      "loss": 2.4,
      "step": 12528
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.1270381212234497,
      "learning_rate": 1.879836546323683e-05,
      "loss": 2.6112,
      "step": 12529
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0230789184570312,
      "learning_rate": 1.879816976701598e-05,
      "loss": 2.5416,
      "step": 12530
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0298398733139038,
      "learning_rate": 1.879797405587984e-05,
      "loss": 2.5708,
      "step": 12531
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.8960316777229309,
      "learning_rate": 1.8797778329828742e-05,
      "loss": 2.4967,
      "step": 12532
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0608079433441162,
      "learning_rate": 1.8797582588863026e-05,
      "loss": 2.4821,
      "step": 12533
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.049835205078125,
      "learning_rate": 1.8797386832983014e-05,
      "loss": 2.4164,
      "step": 12534
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0573151111602783,
      "learning_rate": 1.8797191062189043e-05,
      "loss": 2.3994,
      "step": 12535
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9900501370429993,
      "learning_rate": 1.8796995276481443e-05,
      "loss": 2.4754,
      "step": 12536
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0608569383621216,
      "learning_rate": 1.8796799475860548e-05,
      "loss": 2.4038,
      "step": 12537
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0221418142318726,
      "learning_rate": 1.8796603660326686e-05,
      "loss": 2.6018,
      "step": 12538
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0920541286468506,
      "learning_rate": 1.8796407829880196e-05,
      "loss": 2.4317,
      "step": 12539
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0216546058654785,
      "learning_rate": 1.8796211984521406e-05,
      "loss": 2.4687,
      "step": 12540
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.050865888595581,
      "learning_rate": 1.879601612425064e-05,
      "loss": 2.5407,
      "step": 12541
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0787932872772217,
      "learning_rate": 1.8795820249068248e-05,
      "loss": 2.5468,
      "step": 12542
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9624752998352051,
      "learning_rate": 1.8795624358974544e-05,
      "loss": 2.3678,
      "step": 12543
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.2019309997558594,
      "learning_rate": 1.879542845396987e-05,
      "loss": 2.4328,
      "step": 12544
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0271313190460205,
      "learning_rate": 1.8795232534054558e-05,
      "loss": 2.5088,
      "step": 12545
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.6191073656082153,
      "learning_rate": 1.8795036599228936e-05,
      "loss": 2.7002,
      "step": 12546
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.4119479656219482,
      "learning_rate": 1.879484064949334e-05,
      "loss": 2.6141,
      "step": 12547
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.152513027191162,
      "learning_rate": 1.8794644684848098e-05,
      "loss": 2.3138,
      "step": 12548
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0525397062301636,
      "learning_rate": 1.879444870529355e-05,
      "loss": 2.4958,
      "step": 12549
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0702195167541504,
      "learning_rate": 1.8794252710830015e-05,
      "loss": 2.5568,
      "step": 12550
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.2673358917236328,
      "learning_rate": 1.8794056701457836e-05,
      "loss": 2.426,
      "step": 12551
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.19927179813385,
      "learning_rate": 1.8793860677177346e-05,
      "loss": 2.5688,
      "step": 12552
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.470827579498291,
      "learning_rate": 1.8793664637988873e-05,
      "loss": 2.4223,
      "step": 12553
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0128365755081177,
      "learning_rate": 1.8793468583892748e-05,
      "loss": 2.4092,
      "step": 12554
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0251247882843018,
      "learning_rate": 1.87932725148893e-05,
      "loss": 2.4996,
      "step": 12555
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0015766620635986,
      "learning_rate": 1.8793076430978877e-05,
      "loss": 2.5129,
      "step": 12556
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.124908685684204,
      "learning_rate": 1.8792880332161795e-05,
      "loss": 2.608,
      "step": 12557
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9740301370620728,
      "learning_rate": 1.8792684218438396e-05,
      "loss": 2.5668,
      "step": 12558
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.136649489402771,
      "learning_rate": 1.879248808980901e-05,
      "loss": 2.623,
      "step": 12559
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.202780842781067,
      "learning_rate": 1.8792291946273965e-05,
      "loss": 2.5477,
      "step": 12560
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.006296992301941,
      "learning_rate": 1.87920957878336e-05,
      "loss": 2.7007,
      "step": 12561
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9935833215713501,
      "learning_rate": 1.8791899614488243e-05,
      "loss": 2.5049,
      "step": 12562
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0168126821517944,
      "learning_rate": 1.879170342623823e-05,
      "loss": 2.5456,
      "step": 12563
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9622071981430054,
      "learning_rate": 1.8791507223083888e-05,
      "loss": 2.416,
      "step": 12564
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.905242383480072,
      "learning_rate": 1.879131100502556e-05,
      "loss": 2.3439,
      "step": 12565
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0162363052368164,
      "learning_rate": 1.8791114772063567e-05,
      "loss": 2.687,
      "step": 12566
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.1830726861953735,
      "learning_rate": 1.8790918524198248e-05,
      "loss": 2.6399,
      "step": 12567
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0539872646331787,
      "learning_rate": 1.8790722261429936e-05,
      "loss": 2.6375,
      "step": 12568
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.102959394454956,
      "learning_rate": 1.879052598375896e-05,
      "loss": 2.498,
      "step": 12569
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0485889911651611,
      "learning_rate": 1.8790329691185656e-05,
      "loss": 2.4405,
      "step": 12570
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0062646865844727,
      "learning_rate": 1.8790133383710355e-05,
      "loss": 2.4783,
      "step": 12571
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0315388441085815,
      "learning_rate": 1.878993706133339e-05,
      "loss": 2.6151,
      "step": 12572
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0134682655334473,
      "learning_rate": 1.8789740724055097e-05,
      "loss": 2.5188,
      "step": 12573
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0096771717071533,
      "learning_rate": 1.8789544371875806e-05,
      "loss": 2.4164,
      "step": 12574
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.1102423667907715,
      "learning_rate": 1.8789348004795847e-05,
      "loss": 2.3909,
      "step": 12575
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0018712282180786,
      "learning_rate": 1.878915162281556e-05,
      "loss": 2.7135,
      "step": 12576
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.057364583015442,
      "learning_rate": 1.8788955225935274e-05,
      "loss": 2.4806,
      "step": 12577
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0793015956878662,
      "learning_rate": 1.8788758814155317e-05,
      "loss": 2.401,
      "step": 12578
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9303359389305115,
      "learning_rate": 1.878856238747603e-05,
      "loss": 2.6254,
      "step": 12579
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.012840747833252,
      "learning_rate": 1.878836594589774e-05,
      "loss": 2.5425,
      "step": 12580
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9998690485954285,
      "learning_rate": 1.878816948942079e-05,
      "loss": 2.6202,
      "step": 12581
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0081305503845215,
      "learning_rate": 1.87879730180455e-05,
      "loss": 2.6076,
      "step": 12582
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9599623680114746,
      "learning_rate": 1.8787776531772208e-05,
      "loss": 2.6563,
      "step": 12583
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.066285490989685,
      "learning_rate": 1.8787580030601252e-05,
      "loss": 2.4644,
      "step": 12584
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.1909736394882202,
      "learning_rate": 1.8787383514532958e-05,
      "loss": 2.6272,
      "step": 12585
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9957934617996216,
      "learning_rate": 1.8787186983567666e-05,
      "loss": 2.6429,
      "step": 12586
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.1047430038452148,
      "learning_rate": 1.8786990437705703e-05,
      "loss": 2.5342,
      "step": 12587
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0501430034637451,
      "learning_rate": 1.8786793876947402e-05,
      "loss": 2.4159,
      "step": 12588
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9883686304092407,
      "learning_rate": 1.8786597301293105e-05,
      "loss": 2.4961,
      "step": 12589
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.013800859451294,
      "learning_rate": 1.8786400710743134e-05,
      "loss": 2.2605,
      "step": 12590
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0521444082260132,
      "learning_rate": 1.878620410529783e-05,
      "loss": 2.5782,
      "step": 12591
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9864242076873779,
      "learning_rate": 1.878600748495752e-05,
      "loss": 2.5809,
      "step": 12592
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0420490503311157,
      "learning_rate": 1.8785810849722544e-05,
      "loss": 2.5818,
      "step": 12593
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9928065538406372,
      "learning_rate": 1.8785614199593234e-05,
      "loss": 2.2449,
      "step": 12594
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.065265417098999,
      "learning_rate": 1.878541753456992e-05,
      "loss": 2.7074,
      "step": 12595
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0777077674865723,
      "learning_rate": 1.878522085465294e-05,
      "loss": 2.4567,
      "step": 12596
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9641479849815369,
      "learning_rate": 1.878502415984262e-05,
      "loss": 2.4233,
      "step": 12597
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9383671283721924,
      "learning_rate": 1.87848274501393e-05,
      "loss": 2.4662,
      "step": 12598
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0247300863265991,
      "learning_rate": 1.878463072554331e-05,
      "loss": 2.5784,
      "step": 12599
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0755757093429565,
      "learning_rate": 1.8784433986054987e-05,
      "loss": 2.6203,
      "step": 12600
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.007218837738037,
      "learning_rate": 1.8784237231674665e-05,
      "loss": 2.4895,
      "step": 12601
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0667616128921509,
      "learning_rate": 1.878404046240267e-05,
      "loss": 2.4923,
      "step": 12602
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.3130922317504883,
      "learning_rate": 1.8783843678239348e-05,
      "loss": 2.2742,
      "step": 12603
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0408164262771606,
      "learning_rate": 1.8783646879185017e-05,
      "loss": 2.5133,
      "step": 12604
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9426219463348389,
      "learning_rate": 1.8783450065240023e-05,
      "loss": 2.6378,
      "step": 12605
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0162279605865479,
      "learning_rate": 1.8783253236404695e-05,
      "loss": 2.5616,
      "step": 12606
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.023089051246643,
      "learning_rate": 1.8783056392679368e-05,
      "loss": 2.6448,
      "step": 12607
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.1119393110275269,
      "learning_rate": 1.8782859534064374e-05,
      "loss": 2.4224,
      "step": 12608
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0903443098068237,
      "learning_rate": 1.8782662660560048e-05,
      "loss": 2.6545,
      "step": 12609
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.1903445720672607,
      "learning_rate": 1.8782465772166723e-05,
      "loss": 2.481,
      "step": 12610
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.1576063632965088,
      "learning_rate": 1.878226886888473e-05,
      "loss": 2.3947,
      "step": 12611
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9362457990646362,
      "learning_rate": 1.8782071950714413e-05,
      "loss": 2.4707,
      "step": 12612
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.923774778842926,
      "learning_rate": 1.8781875017656096e-05,
      "loss": 2.4549,
      "step": 12613
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.1291598081588745,
      "learning_rate": 1.8781678069710114e-05,
      "loss": 2.4676,
      "step": 12614
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9862481951713562,
      "learning_rate": 1.8781481106876806e-05,
      "loss": 2.367,
      "step": 12615
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.99833744764328,
      "learning_rate": 1.87812841291565e-05,
      "loss": 2.7228,
      "step": 12616
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.065482497215271,
      "learning_rate": 1.878108713654953e-05,
      "loss": 2.4111,
      "step": 12617
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0126597881317139,
      "learning_rate": 1.8780890129056238e-05,
      "loss": 2.4822,
      "step": 12618
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9658522605895996,
      "learning_rate": 1.8780693106676948e-05,
      "loss": 2.4326,
      "step": 12619
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9302221536636353,
      "learning_rate": 1.8780496069412e-05,
      "loss": 2.5915,
      "step": 12620
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9630941152572632,
      "learning_rate": 1.8780299017261726e-05,
      "loss": 2.5757,
      "step": 12621
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.117108702659607,
      "learning_rate": 1.8780101950226462e-05,
      "loss": 2.6913,
      "step": 12622
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.041637659072876,
      "learning_rate": 1.877990486830654e-05,
      "loss": 2.5781,
      "step": 12623
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.012170672416687,
      "learning_rate": 1.8779707771502293e-05,
      "loss": 2.3383,
      "step": 12624
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.1815553903579712,
      "learning_rate": 1.8779510659814057e-05,
      "loss": 2.5333,
      "step": 12625
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0735887289047241,
      "learning_rate": 1.877931353324217e-05,
      "loss": 2.4024,
      "step": 12626
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9524408578872681,
      "learning_rate": 1.8779116391786956e-05,
      "loss": 2.416,
      "step": 12627
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9756922125816345,
      "learning_rate": 1.877891923544876e-05,
      "loss": 2.5169,
      "step": 12628
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.034852147102356,
      "learning_rate": 1.877872206422791e-05,
      "loss": 2.4571,
      "step": 12629
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.1305373907089233,
      "learning_rate": 1.877852487812474e-05,
      "loss": 2.5038,
      "step": 12630
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.08196222782135,
      "learning_rate": 1.8778327677139588e-05,
      "loss": 2.766,
      "step": 12631
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.954863965511322,
      "learning_rate": 1.877813046127279e-05,
      "loss": 2.4064,
      "step": 12632
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0238958597183228,
      "learning_rate": 1.877793323052467e-05,
      "loss": 2.5644,
      "step": 12633
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0364902019500732,
      "learning_rate": 1.877773598489557e-05,
      "loss": 2.6439,
      "step": 12634
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0254874229431152,
      "learning_rate": 1.8777538724385827e-05,
      "loss": 2.4515,
      "step": 12635
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.1408699750900269,
      "learning_rate": 1.8777341448995773e-05,
      "loss": 2.4931,
      "step": 12636
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0255091190338135,
      "learning_rate": 1.8777144158725735e-05,
      "loss": 2.5109,
      "step": 12637
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0809637308120728,
      "learning_rate": 1.877694685357606e-05,
      "loss": 2.7872,
      "step": 12638
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.084614634513855,
      "learning_rate": 1.8776749533547073e-05,
      "loss": 2.4474,
      "step": 12639
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0166358947753906,
      "learning_rate": 1.8776552198639114e-05,
      "loss": 2.3531,
      "step": 12640
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.920659601688385,
      "learning_rate": 1.8776354848852512e-05,
      "loss": 2.5192,
      "step": 12641
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.044883370399475,
      "learning_rate": 1.8776157484187608e-05,
      "loss": 2.6537,
      "step": 12642
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9631039500236511,
      "learning_rate": 1.8775960104644732e-05,
      "loss": 2.4972,
      "step": 12643
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0262314081192017,
      "learning_rate": 1.877576271022422e-05,
      "loss": 2.5411,
      "step": 12644
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.3272887468338013,
      "learning_rate": 1.877556530092641e-05,
      "loss": 2.5065,
      "step": 12645
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0028940439224243,
      "learning_rate": 1.877536787675163e-05,
      "loss": 2.3326,
      "step": 12646
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.2158396244049072,
      "learning_rate": 1.8775170437700217e-05,
      "loss": 2.5538,
      "step": 12647
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9222566485404968,
      "learning_rate": 1.877497298377251e-05,
      "loss": 2.4753,
      "step": 12648
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.1592894792556763,
      "learning_rate": 1.877477551496884e-05,
      "loss": 2.44,
      "step": 12649
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0095202922821045,
      "learning_rate": 1.877457803128954e-05,
      "loss": 2.5872,
      "step": 12650
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0988513231277466,
      "learning_rate": 1.8774380532734948e-05,
      "loss": 2.5447,
      "step": 12651
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9941927790641785,
      "learning_rate": 1.87741830193054e-05,
      "loss": 2.4994,
      "step": 12652
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0365285873413086,
      "learning_rate": 1.8773985491001228e-05,
      "loss": 2.4999,
      "step": 12653
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9867210984230042,
      "learning_rate": 1.8773787947822767e-05,
      "loss": 2.5737,
      "step": 12654
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0145968198776245,
      "learning_rate": 1.877359038977035e-05,
      "loss": 2.1289,
      "step": 12655
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9770193099975586,
      "learning_rate": 1.8773392816844317e-05,
      "loss": 2.6295,
      "step": 12656
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.04643976688385,
      "learning_rate": 1.8773195229045e-05,
      "loss": 2.3329,
      "step": 12657
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0724653005599976,
      "learning_rate": 1.8772997626372736e-05,
      "loss": 2.3067,
      "step": 12658
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0035789012908936,
      "learning_rate": 1.8772800008827856e-05,
      "loss": 2.4747,
      "step": 12659
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9662978053092957,
      "learning_rate": 1.8772602376410697e-05,
      "loss": 2.555,
      "step": 12660
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.1287610530853271,
      "learning_rate": 1.8772404729121598e-05,
      "loss": 2.4336,
      "step": 12661
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.1300362348556519,
      "learning_rate": 1.8772207066960887e-05,
      "loss": 2.4626,
      "step": 12662
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9754628539085388,
      "learning_rate": 1.8772009389928904e-05,
      "loss": 2.5211,
      "step": 12663
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.107542872428894,
      "learning_rate": 1.877181169802598e-05,
      "loss": 2.4131,
      "step": 12664
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.997621476650238,
      "learning_rate": 1.8771613991252457e-05,
      "loss": 2.4717,
      "step": 12665
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0291929244995117,
      "learning_rate": 1.877141626960866e-05,
      "loss": 2.5598,
      "step": 12666
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.209212064743042,
      "learning_rate": 1.8771218533094937e-05,
      "loss": 2.3154,
      "step": 12667
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0187923908233643,
      "learning_rate": 1.8771020781711615e-05,
      "loss": 2.6402,
      "step": 12668
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.1992552280426025,
      "learning_rate": 1.877082301545903e-05,
      "loss": 2.5557,
      "step": 12669
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.091101050376892,
      "learning_rate": 1.877062523433751e-05,
      "loss": 2.8188,
      "step": 12670
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9511841535568237,
      "learning_rate": 1.877042743834741e-05,
      "loss": 2.6584,
      "step": 12671
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9930310845375061,
      "learning_rate": 1.8770229627489046e-05,
      "loss": 2.2426,
      "step": 12672
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.11454176902771,
      "learning_rate": 1.8770031801762766e-05,
      "loss": 2.6448,
      "step": 12673
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0142253637313843,
      "learning_rate": 1.8769833961168897e-05,
      "loss": 2.5482,
      "step": 12674
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.3902709484100342,
      "learning_rate": 1.8769636105707776e-05,
      "loss": 2.343,
      "step": 12675
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1049273014068604,
      "learning_rate": 1.8769438235379744e-05,
      "loss": 2.5271,
      "step": 12676
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9168351888656616,
      "learning_rate": 1.876924035018513e-05,
      "loss": 2.2843,
      "step": 12677
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.047049641609192,
      "learning_rate": 1.876904245012427e-05,
      "loss": 2.6492,
      "step": 12678
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9888367652893066,
      "learning_rate": 1.8768844535197503e-05,
      "loss": 2.463,
      "step": 12679
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.111845850944519,
      "learning_rate": 1.8768646605405165e-05,
      "loss": 2.2988,
      "step": 12680
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0650944709777832,
      "learning_rate": 1.876844866074759e-05,
      "loss": 2.6109,
      "step": 12681
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.967506468296051,
      "learning_rate": 1.8768250701225107e-05,
      "loss": 2.4995,
      "step": 12682
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1061683893203735,
      "learning_rate": 1.8768052726838063e-05,
      "loss": 2.4681,
      "step": 12683
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1568312644958496,
      "learning_rate": 1.8767854737586788e-05,
      "loss": 2.6243,
      "step": 12684
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.076005220413208,
      "learning_rate": 1.876765673347162e-05,
      "loss": 2.5175,
      "step": 12685
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1098732948303223,
      "learning_rate": 1.8767458714492885e-05,
      "loss": 2.348,
      "step": 12686
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0683375597000122,
      "learning_rate": 1.8767260680650932e-05,
      "loss": 2.5808,
      "step": 12687
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.2190237045288086,
      "learning_rate": 1.876706263194609e-05,
      "loss": 2.4905,
      "step": 12688
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0881739854812622,
      "learning_rate": 1.8766864568378694e-05,
      "loss": 2.5732,
      "step": 12689
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0692548751831055,
      "learning_rate": 1.8766666489949083e-05,
      "loss": 2.468,
      "step": 12690
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.012420654296875,
      "learning_rate": 1.8766468396657594e-05,
      "loss": 2.6023,
      "step": 12691
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1912448406219482,
      "learning_rate": 1.8766270288504556e-05,
      "loss": 2.4879,
      "step": 12692
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.050787091255188,
      "learning_rate": 1.8766072165490312e-05,
      "loss": 2.4703,
      "step": 12693
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0697364807128906,
      "learning_rate": 1.8765874027615193e-05,
      "loss": 2.4064,
      "step": 12694
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0050976276397705,
      "learning_rate": 1.8765675874879535e-05,
      "loss": 2.7099,
      "step": 12695
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0292601585388184,
      "learning_rate": 1.876547770728368e-05,
      "loss": 2.6108,
      "step": 12696
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.075472116470337,
      "learning_rate": 1.8765279524827957e-05,
      "loss": 2.2812,
      "step": 12697
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9835924506187439,
      "learning_rate": 1.8765081327512705e-05,
      "loss": 2.6346,
      "step": 12698
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9607961773872375,
      "learning_rate": 1.8764883115338258e-05,
      "loss": 2.6013,
      "step": 12699
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.058219075202942,
      "learning_rate": 1.8764684888304953e-05,
      "loss": 2.5726,
      "step": 12700
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.000542402267456,
      "learning_rate": 1.876448664641313e-05,
      "loss": 2.5768,
      "step": 12701
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.140619158744812,
      "learning_rate": 1.876428838966312e-05,
      "loss": 2.557,
      "step": 12702
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9455078840255737,
      "learning_rate": 1.8764090118055263e-05,
      "loss": 2.495,
      "step": 12703
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9793699383735657,
      "learning_rate": 1.876389183158989e-05,
      "loss": 2.2373,
      "step": 12704
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0873470306396484,
      "learning_rate": 1.876369353026734e-05,
      "loss": 2.5075,
      "step": 12705
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0167497396469116,
      "learning_rate": 1.876349521408795e-05,
      "loss": 2.3918,
      "step": 12706
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0682555437088013,
      "learning_rate": 1.876329688305206e-05,
      "loss": 2.6133,
      "step": 12707
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.92601478099823,
      "learning_rate": 1.876309853716e-05,
      "loss": 2.3302,
      "step": 12708
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0568801164627075,
      "learning_rate": 1.8762900176412104e-05,
      "loss": 2.5277,
      "step": 12709
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0205142498016357,
      "learning_rate": 1.8762701800808715e-05,
      "loss": 2.3023,
      "step": 12710
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0550117492675781,
      "learning_rate": 1.8762503410350163e-05,
      "loss": 2.7166,
      "step": 12711
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0443027019500732,
      "learning_rate": 1.8762305005036792e-05,
      "loss": 2.3391,
      "step": 12712
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1151961088180542,
      "learning_rate": 1.8762106584868933e-05,
      "loss": 2.3101,
      "step": 12713
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0132418870925903,
      "learning_rate": 1.876190814984692e-05,
      "loss": 2.5053,
      "step": 12714
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9870031476020813,
      "learning_rate": 1.87617096999711e-05,
      "loss": 2.5846,
      "step": 12715
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9547933340072632,
      "learning_rate": 1.8761511235241795e-05,
      "loss": 2.6404,
      "step": 12716
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0646814107894897,
      "learning_rate": 1.8761312755659355e-05,
      "loss": 2.5197,
      "step": 12717
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.3118045330047607,
      "learning_rate": 1.8761114261224108e-05,
      "loss": 2.4183,
      "step": 12718
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1054762601852417,
      "learning_rate": 1.8760915751936392e-05,
      "loss": 2.7736,
      "step": 12719
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0565855503082275,
      "learning_rate": 1.8760717227796546e-05,
      "loss": 2.8938,
      "step": 12720
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9380812644958496,
      "learning_rate": 1.8760518688804903e-05,
      "loss": 2.3231,
      "step": 12721
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0819209814071655,
      "learning_rate": 1.8760320134961804e-05,
      "loss": 2.4831,
      "step": 12722
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1692752838134766,
      "learning_rate": 1.876012156626758e-05,
      "loss": 2.4581,
      "step": 12723
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9974027872085571,
      "learning_rate": 1.875992298272257e-05,
      "loss": 2.6688,
      "step": 12724
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0322282314300537,
      "learning_rate": 1.8759724384327117e-05,
      "loss": 2.5884,
      "step": 12725
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9819047451019287,
      "learning_rate": 1.8759525771081547e-05,
      "loss": 2.6772,
      "step": 12726
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.032753825187683,
      "learning_rate": 1.8759327142986204e-05,
      "loss": 2.515,
      "step": 12727
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.019633412361145,
      "learning_rate": 1.8759128500041423e-05,
      "loss": 2.2926,
      "step": 12728
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0631157159805298,
      "learning_rate": 1.8758929842247537e-05,
      "loss": 2.5962,
      "step": 12729
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9844141006469727,
      "learning_rate": 1.875873116960489e-05,
      "loss": 2.4666,
      "step": 12730
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.005049705505371,
      "learning_rate": 1.875853248211381e-05,
      "loss": 2.3946,
      "step": 12731
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9420908093452454,
      "learning_rate": 1.875833377977464e-05,
      "loss": 2.5486,
      "step": 12732
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9933266639709473,
      "learning_rate": 1.875813506258772e-05,
      "loss": 2.6014,
      "step": 12733
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0397626161575317,
      "learning_rate": 1.875793633055338e-05,
      "loss": 2.626,
      "step": 12734
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.8696245551109314,
      "learning_rate": 1.875773758367196e-05,
      "loss": 2.5093,
      "step": 12735
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1397618055343628,
      "learning_rate": 1.8757538821943796e-05,
      "loss": 2.6564,
      "step": 12736
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9613590836524963,
      "learning_rate": 1.8757340045369223e-05,
      "loss": 2.3937,
      "step": 12737
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0574431419372559,
      "learning_rate": 1.875714125394858e-05,
      "loss": 2.6449,
      "step": 12738
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9612212777137756,
      "learning_rate": 1.8756942447682207e-05,
      "loss": 2.4975,
      "step": 12739
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.2135735750198364,
      "learning_rate": 1.8756743626570438e-05,
      "loss": 2.679,
      "step": 12740
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0592310428619385,
      "learning_rate": 1.875654479061361e-05,
      "loss": 2.5256,
      "step": 12741
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.973899245262146,
      "learning_rate": 1.8756345939812056e-05,
      "loss": 2.5526,
      "step": 12742
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0299720764160156,
      "learning_rate": 1.8756147074166124e-05,
      "loss": 2.3582,
      "step": 12743
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1013305187225342,
      "learning_rate": 1.875594819367614e-05,
      "loss": 2.6072,
      "step": 12744
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0685980319976807,
      "learning_rate": 1.8755749298342448e-05,
      "loss": 2.5435,
      "step": 12745
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.8930128812789917,
      "learning_rate": 1.875555038816538e-05,
      "loss": 2.5552,
      "step": 12746
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9060968160629272,
      "learning_rate": 1.875535146314528e-05,
      "loss": 2.5247,
      "step": 12747
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9759409427642822,
      "learning_rate": 1.875515252328248e-05,
      "loss": 2.5959,
      "step": 12748
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9613211750984192,
      "learning_rate": 1.8754953568577317e-05,
      "loss": 2.4638,
      "step": 12749
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1228387355804443,
      "learning_rate": 1.875475459903013e-05,
      "loss": 2.4998,
      "step": 12750
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.8987240791320801,
      "learning_rate": 1.8754555614641257e-05,
      "loss": 2.5454,
      "step": 12751
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.176128625869751,
      "learning_rate": 1.8754356615411037e-05,
      "loss": 2.4037,
      "step": 12752
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0678966045379639,
      "learning_rate": 1.8754157601339802e-05,
      "loss": 2.5791,
      "step": 12753
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0213347673416138,
      "learning_rate": 1.875395857242789e-05,
      "loss": 2.8341,
      "step": 12754
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9983294606208801,
      "learning_rate": 1.8753759528675646e-05,
      "loss": 2.5448,
      "step": 12755
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0086466073989868,
      "learning_rate": 1.8753560470083397e-05,
      "loss": 2.6028,
      "step": 12756
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9759849905967712,
      "learning_rate": 1.875336139665149e-05,
      "loss": 2.6576,
      "step": 12757
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0809764862060547,
      "learning_rate": 1.875316230838026e-05,
      "loss": 2.4394,
      "step": 12758
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0121580362319946,
      "learning_rate": 1.8752963205270033e-05,
      "loss": 2.5824,
      "step": 12759
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9697273373603821,
      "learning_rate": 1.8752764087321165e-05,
      "loss": 2.5393,
      "step": 12760
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0144025087356567,
      "learning_rate": 1.875256495453398e-05,
      "loss": 2.6134,
      "step": 12761
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0993186235427856,
      "learning_rate": 1.8752365806908825e-05,
      "loss": 2.6397,
      "step": 12762
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9772622585296631,
      "learning_rate": 1.8752166644446028e-05,
      "loss": 2.5952,
      "step": 12763
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0912128686904907,
      "learning_rate": 1.8751967467145934e-05,
      "loss": 2.4104,
      "step": 12764
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.082547664642334,
      "learning_rate": 1.875176827500888e-05,
      "loss": 2.3907,
      "step": 12765
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0861034393310547,
      "learning_rate": 1.8751569068035194e-05,
      "loss": 2.516,
      "step": 12766
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1592620611190796,
      "learning_rate": 1.8751369846225227e-05,
      "loss": 2.3718,
      "step": 12767
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.112833857536316,
      "learning_rate": 1.875117060957931e-05,
      "loss": 2.4731,
      "step": 12768
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.229781150817871,
      "learning_rate": 1.8750971358097786e-05,
      "loss": 2.7068,
      "step": 12769
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.8179610967636108,
      "learning_rate": 1.8750772091780987e-05,
      "loss": 2.3567,
      "step": 12770
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9727054238319397,
      "learning_rate": 1.875057281062925e-05,
      "loss": 2.4698,
      "step": 12771
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.8962961435317993,
      "learning_rate": 1.8750373514642913e-05,
      "loss": 2.462,
      "step": 12772
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9738026261329651,
      "learning_rate": 1.8750174203822325e-05,
      "loss": 2.7035,
      "step": 12773
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1115461587905884,
      "learning_rate": 1.874997487816781e-05,
      "loss": 2.5673,
      "step": 12774
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0518327951431274,
      "learning_rate": 1.874977553767971e-05,
      "loss": 2.4978,
      "step": 12775
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0368402004241943,
      "learning_rate": 1.8749576182358365e-05,
      "loss": 2.6106,
      "step": 12776
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0201022624969482,
      "learning_rate": 1.8749376812204113e-05,
      "loss": 2.5963,
      "step": 12777
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9421773552894592,
      "learning_rate": 1.874917742721729e-05,
      "loss": 2.58,
      "step": 12778
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9806784987449646,
      "learning_rate": 1.8748978027398236e-05,
      "loss": 2.4413,
      "step": 12779
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9972591400146484,
      "learning_rate": 1.874877861274729e-05,
      "loss": 2.4566,
      "step": 12780
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0876209735870361,
      "learning_rate": 1.8748579183264785e-05,
      "loss": 2.4843,
      "step": 12781
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9820600152015686,
      "learning_rate": 1.8748379738951063e-05,
      "loss": 2.3927,
      "step": 12782
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.10947585105896,
      "learning_rate": 1.874818027980646e-05,
      "loss": 2.3803,
      "step": 12783
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1605252027511597,
      "learning_rate": 1.8747980805831316e-05,
      "loss": 2.6197,
      "step": 12784
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0015430450439453,
      "learning_rate": 1.8747781317025972e-05,
      "loss": 2.3983,
      "step": 12785
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0547611713409424,
      "learning_rate": 1.8747581813390758e-05,
      "loss": 2.6749,
      "step": 12786
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9751386642456055,
      "learning_rate": 1.8747382294926024e-05,
      "loss": 2.4368,
      "step": 12787
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9639023542404175,
      "learning_rate": 1.8747182761632094e-05,
      "loss": 2.5199,
      "step": 12788
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9861310124397278,
      "learning_rate": 1.8746983213509314e-05,
      "loss": 2.7044,
      "step": 12789
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9889892935752869,
      "learning_rate": 1.8746783650558026e-05,
      "loss": 2.5213,
      "step": 12790
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9982582926750183,
      "learning_rate": 1.8746584072778562e-05,
      "loss": 2.7988,
      "step": 12791
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0660929679870605,
      "learning_rate": 1.8746384480171262e-05,
      "loss": 2.4736,
      "step": 12792
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0103293657302856,
      "learning_rate": 1.8746184872736463e-05,
      "loss": 2.6339,
      "step": 12793
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0907326936721802,
      "learning_rate": 1.874598525047451e-05,
      "loss": 2.4481,
      "step": 12794
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9748975038528442,
      "learning_rate": 1.874578561338573e-05,
      "loss": 2.5084,
      "step": 12795
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9635620713233948,
      "learning_rate": 1.8745585961470474e-05,
      "loss": 2.4253,
      "step": 12796
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0131900310516357,
      "learning_rate": 1.8745386294729075e-05,
      "loss": 2.537,
      "step": 12797
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9518424868583679,
      "learning_rate": 1.8745186613161865e-05,
      "loss": 2.4958,
      "step": 12798
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.2619402408599854,
      "learning_rate": 1.8744986916769193e-05,
      "loss": 2.2519,
      "step": 12799
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0192993879318237,
      "learning_rate": 1.8744787205551394e-05,
      "loss": 2.6323,
      "step": 12800
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1044398546218872,
      "learning_rate": 1.8744587479508802e-05,
      "loss": 2.5705,
      "step": 12801
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.06062650680542,
      "learning_rate": 1.874438773864176e-05,
      "loss": 2.522,
      "step": 12802
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0677851438522339,
      "learning_rate": 1.874418798295061e-05,
      "loss": 2.5501,
      "step": 12803
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.030624508857727,
      "learning_rate": 1.8743988212435683e-05,
      "loss": 2.3974,
      "step": 12804
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9845271706581116,
      "learning_rate": 1.874378842709732e-05,
      "loss": 2.5386,
      "step": 12805
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0429880619049072,
      "learning_rate": 1.8743588626935866e-05,
      "loss": 2.5011,
      "step": 12806
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1761345863342285,
      "learning_rate": 1.874338881195165e-05,
      "loss": 2.5185,
      "step": 12807
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0696121454238892,
      "learning_rate": 1.8743188982145017e-05,
      "loss": 2.5786,
      "step": 12808
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0863014459609985,
      "learning_rate": 1.87429891375163e-05,
      "loss": 2.6123,
      "step": 12809
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9286277294158936,
      "learning_rate": 1.8742789278065846e-05,
      "loss": 2.7523,
      "step": 12810
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0342819690704346,
      "learning_rate": 1.8742589403793988e-05,
      "loss": 2.6619,
      "step": 12811
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.204830288887024,
      "learning_rate": 1.874238951470107e-05,
      "loss": 2.5001,
      "step": 12812
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0993640422821045,
      "learning_rate": 1.8742189610787423e-05,
      "loss": 2.625,
      "step": 12813
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0149686336517334,
      "learning_rate": 1.874198969205339e-05,
      "loss": 2.2819,
      "step": 12814
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.108262300491333,
      "learning_rate": 1.8741789758499313e-05,
      "loss": 2.4792,
      "step": 12815
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.351375699043274,
      "learning_rate": 1.8741589810125526e-05,
      "loss": 2.5093,
      "step": 12816
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1855289936065674,
      "learning_rate": 1.874138984693237e-05,
      "loss": 2.6224,
      "step": 12817
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9598658084869385,
      "learning_rate": 1.874118986892019e-05,
      "loss": 2.3717,
      "step": 12818
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9736545085906982,
      "learning_rate": 1.8740989876089314e-05,
      "loss": 2.5505,
      "step": 12819
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1630884408950806,
      "learning_rate": 1.8740789868440087e-05,
      "loss": 2.5226,
      "step": 12820
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1130009889602661,
      "learning_rate": 1.8740589845972846e-05,
      "loss": 2.627,
      "step": 12821
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9957000017166138,
      "learning_rate": 1.8740389808687933e-05,
      "loss": 2.4786,
      "step": 12822
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9400097727775574,
      "learning_rate": 1.8740189756585682e-05,
      "loss": 2.3891,
      "step": 12823
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.043094277381897,
      "learning_rate": 1.873998968966644e-05,
      "loss": 2.351,
      "step": 12824
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9130223989486694,
      "learning_rate": 1.873978960793054e-05,
      "loss": 2.3094,
      "step": 12825
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9650757908821106,
      "learning_rate": 1.8739589511378323e-05,
      "loss": 2.3211,
      "step": 12826
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0214627981185913,
      "learning_rate": 1.8739389400010126e-05,
      "loss": 2.6022,
      "step": 12827
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.150534987449646,
      "learning_rate": 1.8739189273826292e-05,
      "loss": 2.4369,
      "step": 12828
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0424429178237915,
      "learning_rate": 1.873898913282716e-05,
      "loss": 2.4873,
      "step": 12829
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0372246503829956,
      "learning_rate": 1.8738788977013068e-05,
      "loss": 2.681,
      "step": 12830
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9675244688987732,
      "learning_rate": 1.8738588806384355e-05,
      "loss": 2.4776,
      "step": 12831
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0916872024536133,
      "learning_rate": 1.873838862094136e-05,
      "loss": 2.4994,
      "step": 12832
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0636175870895386,
      "learning_rate": 1.8738188420684422e-05,
      "loss": 2.4179,
      "step": 12833
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0739480257034302,
      "learning_rate": 1.8737988205613883e-05,
      "loss": 2.3089,
      "step": 12834
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0420082807540894,
      "learning_rate": 1.8737787975730076e-05,
      "loss": 2.2864,
      "step": 12835
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9242218136787415,
      "learning_rate": 1.873758773103335e-05,
      "loss": 2.4663,
      "step": 12836
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.8231221437454224,
      "learning_rate": 1.873738747152404e-05,
      "loss": 2.5356,
      "step": 12837
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1168357133865356,
      "learning_rate": 1.8737187197202486e-05,
      "loss": 2.7281,
      "step": 12838
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0047515630722046,
      "learning_rate": 1.8736986908069022e-05,
      "loss": 2.5238,
      "step": 12839
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.016115427017212,
      "learning_rate": 1.8736786604123995e-05,
      "loss": 2.3922,
      "step": 12840
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0066248178482056,
      "learning_rate": 1.8736586285367742e-05,
      "loss": 2.6697,
      "step": 12841
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0390616655349731,
      "learning_rate": 1.87363859518006e-05,
      "loss": 2.5919,
      "step": 12842
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.027611494064331,
      "learning_rate": 1.8736185603422915e-05,
      "loss": 2.6747,
      "step": 12843
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.384641647338867,
      "learning_rate": 1.873598524023502e-05,
      "loss": 2.5891,
      "step": 12844
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9956831336021423,
      "learning_rate": 1.873578486223726e-05,
      "loss": 2.5104,
      "step": 12845
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9608829617500305,
      "learning_rate": 1.873558446942997e-05,
      "loss": 2.5225,
      "step": 12846
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0578542947769165,
      "learning_rate": 1.873538406181349e-05,
      "loss": 2.3017,
      "step": 12847
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0255237817764282,
      "learning_rate": 1.8735183639388167e-05,
      "loss": 2.6385,
      "step": 12848
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9587178826332092,
      "learning_rate": 1.873498320215433e-05,
      "loss": 2.5509,
      "step": 12849
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0022125244140625,
      "learning_rate": 1.8734782750112327e-05,
      "loss": 2.373,
      "step": 12850
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0758291482925415,
      "learning_rate": 1.8734582283262494e-05,
      "loss": 2.717,
      "step": 12851
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0276936292648315,
      "learning_rate": 1.873438180160517e-05,
      "loss": 2.5134,
      "step": 12852
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0518887042999268,
      "learning_rate": 1.87341813051407e-05,
      "loss": 2.6305,
      "step": 12853
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0180197954177856,
      "learning_rate": 1.8733980793869418e-05,
      "loss": 2.598,
      "step": 12854
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0731127262115479,
      "learning_rate": 1.8733780267791667e-05,
      "loss": 2.5515,
      "step": 12855
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0414602756500244,
      "learning_rate": 1.8733579726907787e-05,
      "loss": 2.4572,
      "step": 12856
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9965465664863586,
      "learning_rate": 1.8733379171218117e-05,
      "loss": 2.643,
      "step": 12857
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.179441213607788,
      "learning_rate": 1.8733178600723e-05,
      "loss": 2.5289,
      "step": 12858
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9966893196105957,
      "learning_rate": 1.873297801542277e-05,
      "loss": 2.5271,
      "step": 12859
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9753398895263672,
      "learning_rate": 1.873277741531777e-05,
      "loss": 2.4599,
      "step": 12860
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9665988087654114,
      "learning_rate": 1.8732576800408342e-05,
      "loss": 2.5582,
      "step": 12861
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.092620611190796,
      "learning_rate": 1.8732376170694823e-05,
      "loss": 2.3336,
      "step": 12862
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0124000310897827,
      "learning_rate": 1.8732175526177556e-05,
      "loss": 2.4022,
      "step": 12863
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1437535285949707,
      "learning_rate": 1.873197486685688e-05,
      "loss": 2.6768,
      "step": 12864
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1602786779403687,
      "learning_rate": 1.8731774192733136e-05,
      "loss": 2.6437,
      "step": 12865
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9513011574745178,
      "learning_rate": 1.873157350380666e-05,
      "loss": 2.3635,
      "step": 12866
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0366990566253662,
      "learning_rate": 1.87313728000778e-05,
      "loss": 2.3303,
      "step": 12867
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0485671758651733,
      "learning_rate": 1.873117208154689e-05,
      "loss": 2.6217,
      "step": 12868
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.100573182106018,
      "learning_rate": 1.873097134821427e-05,
      "loss": 2.709,
      "step": 12869
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0542609691619873,
      "learning_rate": 1.8730770600080283e-05,
      "loss": 2.6426,
      "step": 12870
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9982360601425171,
      "learning_rate": 1.873056983714527e-05,
      "loss": 2.5045,
      "step": 12871
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0584467649459839,
      "learning_rate": 1.8730369059409567e-05,
      "loss": 2.2666,
      "step": 12872
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.031919002532959,
      "learning_rate": 1.8730168266873517e-05,
      "loss": 2.6795,
      "step": 12873
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.025450587272644,
      "learning_rate": 1.8729967459537466e-05,
      "loss": 2.5909,
      "step": 12874
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0509780645370483,
      "learning_rate": 1.8729766637401744e-05,
      "loss": 2.5233,
      "step": 12875
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0488361120224,
      "learning_rate": 1.87295658004667e-05,
      "loss": 2.1919,
      "step": 12876
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.972149133682251,
      "learning_rate": 1.872936494873267e-05,
      "loss": 2.4351,
      "step": 12877
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.080524206161499,
      "learning_rate": 1.8729164082199996e-05,
      "loss": 2.4487,
      "step": 12878
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9664080739021301,
      "learning_rate": 1.8728963200869014e-05,
      "loss": 2.2834,
      "step": 12879
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0530372858047485,
      "learning_rate": 1.8728762304740074e-05,
      "loss": 2.5656,
      "step": 12880
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0239033699035645,
      "learning_rate": 1.872856139381351e-05,
      "loss": 2.6703,
      "step": 12881
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.4163274765014648,
      "learning_rate": 1.872836046808966e-05,
      "loss": 2.5294,
      "step": 12882
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9731613993644714,
      "learning_rate": 1.8728159527568873e-05,
      "loss": 2.4378,
      "step": 12883
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1519964933395386,
      "learning_rate": 1.8727958572251484e-05,
      "loss": 2.5152,
      "step": 12884
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9839588403701782,
      "learning_rate": 1.8727757602137836e-05,
      "loss": 2.5784,
      "step": 12885
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0738285779953003,
      "learning_rate": 1.8727556617228267e-05,
      "loss": 2.5542,
      "step": 12886
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9610087871551514,
      "learning_rate": 1.8727355617523116e-05,
      "loss": 2.5662,
      "step": 12887
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1843303442001343,
      "learning_rate": 1.872715460302273e-05,
      "loss": 2.4672,
      "step": 12888
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1302320957183838,
      "learning_rate": 1.8726953573727447e-05,
      "loss": 2.4205,
      "step": 12889
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0518232583999634,
      "learning_rate": 1.8726752529637607e-05,
      "loss": 2.5375,
      "step": 12890
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.072547197341919,
      "learning_rate": 1.872655147075355e-05,
      "loss": 2.4342,
      "step": 12891
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.005178689956665,
      "learning_rate": 1.8726350397075623e-05,
      "loss": 2.5682,
      "step": 12892
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.037421703338623,
      "learning_rate": 1.8726149308604156e-05,
      "loss": 2.4202,
      "step": 12893
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0715055465698242,
      "learning_rate": 1.8725948205339497e-05,
      "loss": 2.4738,
      "step": 12894
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9825333952903748,
      "learning_rate": 1.8725747087281986e-05,
      "loss": 2.4647,
      "step": 12895
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0446959733963013,
      "learning_rate": 1.8725545954431967e-05,
      "loss": 2.5575,
      "step": 12896
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.012578010559082,
      "learning_rate": 1.8725344806789777e-05,
      "loss": 2.5191,
      "step": 12897
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.157333493232727,
      "learning_rate": 1.8725143644355753e-05,
      "loss": 2.3739,
      "step": 12898
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.068371295928955,
      "learning_rate": 1.8724942467130243e-05,
      "loss": 2.4363,
      "step": 12899
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.028207778930664,
      "learning_rate": 1.872474127511359e-05,
      "loss": 2.3302,
      "step": 12900
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1776005029678345,
      "learning_rate": 1.872454006830613e-05,
      "loss": 2.6741,
      "step": 12901
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1186892986297607,
      "learning_rate": 1.8724338846708202e-05,
      "loss": 2.5887,
      "step": 12902
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1027758121490479,
      "learning_rate": 1.872413761032015e-05,
      "loss": 2.5877,
      "step": 12903
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0153898000717163,
      "learning_rate": 1.8723936359142316e-05,
      "loss": 2.6016,
      "step": 12904
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1723536252975464,
      "learning_rate": 1.872373509317504e-05,
      "loss": 2.6032,
      "step": 12905
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.092333436012268,
      "learning_rate": 1.8723533812418666e-05,
      "loss": 2.286,
      "step": 12906
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1014667749404907,
      "learning_rate": 1.8723332516873533e-05,
      "loss": 2.5286,
      "step": 12907
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.976515531539917,
      "learning_rate": 1.872313120653998e-05,
      "loss": 2.522,
      "step": 12908
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.2282159328460693,
      "learning_rate": 1.872292988141835e-05,
      "loss": 2.5585,
      "step": 12909
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0053242444992065,
      "learning_rate": 1.872272854150899e-05,
      "loss": 2.6185,
      "step": 12910
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.2144330739974976,
      "learning_rate": 1.872252718681223e-05,
      "loss": 2.3945,
      "step": 12911
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0219658613204956,
      "learning_rate": 1.872232581732842e-05,
      "loss": 2.5394,
      "step": 12912
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9714217782020569,
      "learning_rate": 1.8722124433057897e-05,
      "loss": 2.6949,
      "step": 12913
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0665456056594849,
      "learning_rate": 1.8721923034001004e-05,
      "loss": 2.2809,
      "step": 12914
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0018032789230347,
      "learning_rate": 1.8721721620158084e-05,
      "loss": 2.7027,
      "step": 12915
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0230563879013062,
      "learning_rate": 1.872152019152948e-05,
      "loss": 2.6717,
      "step": 12916
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9804325699806213,
      "learning_rate": 1.8721318748115527e-05,
      "loss": 2.4074,
      "step": 12917
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0387823581695557,
      "learning_rate": 1.8721117289916568e-05,
      "loss": 2.581,
      "step": 12918
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9790088534355164,
      "learning_rate": 1.872091581693295e-05,
      "loss": 2.559,
      "step": 12919
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9084585905075073,
      "learning_rate": 1.872071432916501e-05,
      "loss": 2.4092,
      "step": 12920
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.099081039428711,
      "learning_rate": 1.872051282661309e-05,
      "loss": 2.4815,
      "step": 12921
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9534106254577637,
      "learning_rate": 1.8720311309277534e-05,
      "loss": 2.527,
      "step": 12922
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0844275951385498,
      "learning_rate": 1.8720109777158684e-05,
      "loss": 2.351,
      "step": 12923
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0852528810501099,
      "learning_rate": 1.8719908230256876e-05,
      "loss": 2.7379,
      "step": 12924
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9485822916030884,
      "learning_rate": 1.8719706668572456e-05,
      "loss": 2.6512,
      "step": 12925
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1549484729766846,
      "learning_rate": 1.8719505092105766e-05,
      "loss": 2.8445,
      "step": 12926
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9380367994308472,
      "learning_rate": 1.8719303500857145e-05,
      "loss": 2.6944,
      "step": 12927
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0048414468765259,
      "learning_rate": 1.8719101894826938e-05,
      "loss": 2.5837,
      "step": 12928
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9693883061408997,
      "learning_rate": 1.8718900274015485e-05,
      "loss": 2.3606,
      "step": 12929
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1195148229599,
      "learning_rate": 1.8718698638423127e-05,
      "loss": 2.3874,
      "step": 12930
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9955881237983704,
      "learning_rate": 1.8718496988050207e-05,
      "loss": 2.4499,
      "step": 12931
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0607787370681763,
      "learning_rate": 1.8718295322897067e-05,
      "loss": 2.2753,
      "step": 12932
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1446559429168701,
      "learning_rate": 1.8718093642964045e-05,
      "loss": 2.3387,
      "step": 12933
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.181564211845398,
      "learning_rate": 1.871789194825149e-05,
      "loss": 2.6507,
      "step": 12934
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0200926065444946,
      "learning_rate": 1.871769023875974e-05,
      "loss": 2.5598,
      "step": 12935
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0592520236968994,
      "learning_rate": 1.871748851448914e-05,
      "loss": 2.477,
      "step": 12936
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0044584274291992,
      "learning_rate": 1.8717286775440028e-05,
      "loss": 2.679,
      "step": 12937
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1122695207595825,
      "learning_rate": 1.8717085021612747e-05,
      "loss": 2.3475,
      "step": 12938
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0545847415924072,
      "learning_rate": 1.8716883253007637e-05,
      "loss": 2.3594,
      "step": 12939
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.127394437789917,
      "learning_rate": 1.8716681469625045e-05,
      "loss": 2.5399,
      "step": 12940
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1189106702804565,
      "learning_rate": 1.871647967146531e-05,
      "loss": 2.6292,
      "step": 12941
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9798449277877808,
      "learning_rate": 1.8716277858528773e-05,
      "loss": 2.708,
      "step": 12942
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.001731514930725,
      "learning_rate": 1.8716076030815783e-05,
      "loss": 2.6856,
      "step": 12943
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.2306504249572754,
      "learning_rate": 1.871587418832667e-05,
      "loss": 2.5827,
      "step": 12944
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0720511674880981,
      "learning_rate": 1.871567233106179e-05,
      "loss": 2.6256,
      "step": 12945
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.937014639377594,
      "learning_rate": 1.871547045902147e-05,
      "loss": 2.2788,
      "step": 12946
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9458617568016052,
      "learning_rate": 1.8715268572206063e-05,
      "loss": 2.4369,
      "step": 12947
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.068212628364563,
      "learning_rate": 1.871506667061591e-05,
      "loss": 2.6004,
      "step": 12948
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9850972890853882,
      "learning_rate": 1.8714864754251354e-05,
      "loss": 2.4987,
      "step": 12949
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0256657600402832,
      "learning_rate": 1.8714662823112735e-05,
      "loss": 2.5359,
      "step": 12950
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1381604671478271,
      "learning_rate": 1.8714460877200394e-05,
      "loss": 2.6532,
      "step": 12951
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9921725392341614,
      "learning_rate": 1.8714258916514673e-05,
      "loss": 2.8002,
      "step": 12952
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1283683776855469,
      "learning_rate": 1.871405694105592e-05,
      "loss": 2.4454,
      "step": 12953
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9536690711975098,
      "learning_rate": 1.8713854950824473e-05,
      "loss": 2.7188,
      "step": 12954
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.062578797340393,
      "learning_rate": 1.8713652945820674e-05,
      "loss": 2.5469,
      "step": 12955
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0967214107513428,
      "learning_rate": 1.8713450926044868e-05,
      "loss": 2.5734,
      "step": 12956
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0382230281829834,
      "learning_rate": 1.8713248891497397e-05,
      "loss": 2.5638,
      "step": 12957
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.939666211605072,
      "learning_rate": 1.8713046842178603e-05,
      "loss": 2.5272,
      "step": 12958
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0770434141159058,
      "learning_rate": 1.8712844778088824e-05,
      "loss": 2.4143,
      "step": 12959
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9877264499664307,
      "learning_rate": 1.8712642699228408e-05,
      "loss": 2.5984,
      "step": 12960
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0415198802947998,
      "learning_rate": 1.8712440605597698e-05,
      "loss": 2.4058,
      "step": 12961
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9650167226791382,
      "learning_rate": 1.871223849719703e-05,
      "loss": 2.3179,
      "step": 12962
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.058896780014038,
      "learning_rate": 1.871203637402676e-05,
      "loss": 2.3135,
      "step": 12963
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.023354411125183,
      "learning_rate": 1.8711834236087218e-05,
      "loss": 2.4113,
      "step": 12964
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0546587705612183,
      "learning_rate": 1.8711632083378752e-05,
      "loss": 2.6198,
      "step": 12965
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.992462158203125,
      "learning_rate": 1.87114299159017e-05,
      "loss": 2.6637,
      "step": 12966
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9990796446800232,
      "learning_rate": 1.8711227733656413e-05,
      "loss": 2.5397,
      "step": 12967
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1226543188095093,
      "learning_rate": 1.8711025536643225e-05,
      "loss": 2.5136,
      "step": 12968
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0599075555801392,
      "learning_rate": 1.8710823324862485e-05,
      "loss": 2.4093,
      "step": 12969
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.98185133934021,
      "learning_rate": 1.8710621098314536e-05,
      "loss": 2.783,
      "step": 12970
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.011634349822998,
      "learning_rate": 1.8710418856999712e-05,
      "loss": 2.5285,
      "step": 12971
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.92668616771698,
      "learning_rate": 1.8710216600918366e-05,
      "loss": 2.3118,
      "step": 12972
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9885720610618591,
      "learning_rate": 1.8710014330070837e-05,
      "loss": 2.4601,
      "step": 12973
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9641817212104797,
      "learning_rate": 1.870981204445747e-05,
      "loss": 2.3286,
      "step": 12974
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.8708170056343079,
      "learning_rate": 1.8709609744078606e-05,
      "loss": 2.4975,
      "step": 12975
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9761907458305359,
      "learning_rate": 1.8709407428934584e-05,
      "loss": 2.6018,
      "step": 12976
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9109424948692322,
      "learning_rate": 1.8709205099025752e-05,
      "loss": 2.5021,
      "step": 12977
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.081880807876587,
      "learning_rate": 1.8709002754352458e-05,
      "loss": 3.029,
      "step": 12978
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0626288652420044,
      "learning_rate": 1.8708800394915033e-05,
      "loss": 2.6618,
      "step": 12979
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9728791117668152,
      "learning_rate": 1.8708598020713825e-05,
      "loss": 2.4226,
      "step": 12980
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9816855192184448,
      "learning_rate": 1.870839563174918e-05,
      "loss": 2.4321,
      "step": 12981
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9873435497283936,
      "learning_rate": 1.870819322802144e-05,
      "loss": 2.6375,
      "step": 12982
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0755780935287476,
      "learning_rate": 1.870799080953095e-05,
      "loss": 2.5494,
      "step": 12983
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.079915165901184,
      "learning_rate": 1.8707788376278047e-05,
      "loss": 2.7094,
      "step": 12984
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0202875137329102,
      "learning_rate": 1.8707585928263077e-05,
      "loss": 2.612,
      "step": 12985
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.027953863143921,
      "learning_rate": 1.8707383465486387e-05,
      "loss": 2.4873,
      "step": 12986
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1212499141693115,
      "learning_rate": 1.8707180987948314e-05,
      "loss": 2.4754,
      "step": 12987
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1047297716140747,
      "learning_rate": 1.8706978495649205e-05,
      "loss": 2.708,
      "step": 12988
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0709763765335083,
      "learning_rate": 1.8706775988589405e-05,
      "loss": 2.5139,
      "step": 12989
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0599054098129272,
      "learning_rate": 1.870657346676925e-05,
      "loss": 2.7452,
      "step": 12990
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0260601043701172,
      "learning_rate": 1.8706370930189095e-05,
      "loss": 2.5108,
      "step": 12991
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1103663444519043,
      "learning_rate": 1.870616837884927e-05,
      "loss": 2.512,
      "step": 12992
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.274780511856079,
      "learning_rate": 1.870596581275013e-05,
      "loss": 2.5661,
      "step": 12993
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1407955884933472,
      "learning_rate": 1.8705763231892012e-05,
      "loss": 2.5621,
      "step": 12994
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0611974000930786,
      "learning_rate": 1.8705560636275263e-05,
      "loss": 2.2717,
      "step": 12995
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.994719922542572,
      "learning_rate": 1.870535802590022e-05,
      "loss": 2.4688,
      "step": 12996
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9774287343025208,
      "learning_rate": 1.870515540076723e-05,
      "loss": 2.5749,
      "step": 12997
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1114747524261475,
      "learning_rate": 1.8704952760876645e-05,
      "loss": 2.5518,
      "step": 12998
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9752132892608643,
      "learning_rate": 1.8704750106228795e-05,
      "loss": 2.4284,
      "step": 12999
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.2526921033859253,
      "learning_rate": 1.870454743682403e-05,
      "loss": 2.6428,
      "step": 13000
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.009587049484253,
      "learning_rate": 1.8704344752662693e-05,
      "loss": 2.1646,
      "step": 13001
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1945236921310425,
      "learning_rate": 1.870414205374513e-05,
      "loss": 2.4814,
      "step": 13002
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0833563804626465,
      "learning_rate": 1.8703939340071676e-05,
      "loss": 2.5492,
      "step": 13003
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.072837471961975,
      "learning_rate": 1.8703736611642688e-05,
      "loss": 2.4776,
      "step": 13004
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.03102707862854,
      "learning_rate": 1.8703533868458498e-05,
      "loss": 2.384,
      "step": 13005
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9866832494735718,
      "learning_rate": 1.8703331110519455e-05,
      "loss": 2.4716,
      "step": 13006
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1033891439437866,
      "learning_rate": 1.8703128337825905e-05,
      "loss": 2.759,
      "step": 13007
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.15362548828125,
      "learning_rate": 1.8702925550378186e-05,
      "loss": 2.4435,
      "step": 13008
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9236379265785217,
      "learning_rate": 1.8702722748176643e-05,
      "loss": 2.5053,
      "step": 13009
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9780324101448059,
      "learning_rate": 1.8702519931221627e-05,
      "loss": 2.7306,
      "step": 13010
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.2459452152252197,
      "learning_rate": 1.8702317099513468e-05,
      "loss": 2.4211,
      "step": 13011
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.111080288887024,
      "learning_rate": 1.8702114253052523e-05,
      "loss": 2.5392,
      "step": 13012
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.036601185798645,
      "learning_rate": 1.870191139183913e-05,
      "loss": 2.3985,
      "step": 13013
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9266020059585571,
      "learning_rate": 1.8701708515873635e-05,
      "loss": 2.634,
      "step": 13014
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9594742059707642,
      "learning_rate": 1.870150562515638e-05,
      "loss": 2.3798,
      "step": 13015
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1082913875579834,
      "learning_rate": 1.870130271968771e-05,
      "loss": 2.3915,
      "step": 13016
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9740522503852844,
      "learning_rate": 1.8701099799467968e-05,
      "loss": 2.3379,
      "step": 13017
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.019938588142395,
      "learning_rate": 1.8700896864497498e-05,
      "loss": 2.3887,
      "step": 13018
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9983043074607849,
      "learning_rate": 1.8700693914776646e-05,
      "loss": 2.5902,
      "step": 13019
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0750937461853027,
      "learning_rate": 1.8700490950305753e-05,
      "loss": 2.5756,
      "step": 13020
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.5092002153396606,
      "learning_rate": 1.8700287971085167e-05,
      "loss": 2.3918,
      "step": 13021
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.074928879737854,
      "learning_rate": 1.870008497711523e-05,
      "loss": 2.4881,
      "step": 13022
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1476322412490845,
      "learning_rate": 1.8699881968396284e-05,
      "loss": 2.4853,
      "step": 13023
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1251825094223022,
      "learning_rate": 1.8699678944928675e-05,
      "loss": 2.4969,
      "step": 13024
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.019956111907959,
      "learning_rate": 1.869947590671275e-05,
      "loss": 2.7758,
      "step": 13025
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0123120546340942,
      "learning_rate": 1.869927285374885e-05,
      "loss": 2.5229,
      "step": 13026
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0835932493209839,
      "learning_rate": 1.869906978603732e-05,
      "loss": 2.6864,
      "step": 13027
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.3858270645141602,
      "learning_rate": 1.86988667035785e-05,
      "loss": 2.2186,
      "step": 13028
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0737777948379517,
      "learning_rate": 1.8698663606372743e-05,
      "loss": 2.5845,
      "step": 13029
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0895662307739258,
      "learning_rate": 1.8698460494420387e-05,
      "loss": 2.5593,
      "step": 13030
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9957725405693054,
      "learning_rate": 1.8698257367721776e-05,
      "loss": 2.4217,
      "step": 13031
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0688024759292603,
      "learning_rate": 1.869805422627726e-05,
      "loss": 2.5478,
      "step": 13032
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0030500888824463,
      "learning_rate": 1.8697851070087177e-05,
      "loss": 2.4749,
      "step": 13033
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0199429988861084,
      "learning_rate": 1.8697647899151878e-05,
      "loss": 2.4813,
      "step": 13034
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9821689128875732,
      "learning_rate": 1.86974447134717e-05,
      "loss": 2.8126,
      "step": 13035
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1124316453933716,
      "learning_rate": 1.8697241513046992e-05,
      "loss": 2.5683,
      "step": 13036
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.925619900226593,
      "learning_rate": 1.8697038297878097e-05,
      "loss": 2.3935,
      "step": 13037
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0305290222167969,
      "learning_rate": 1.869683506796536e-05,
      "loss": 2.5773,
      "step": 13038
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9888830780982971,
      "learning_rate": 1.8696631823309124e-05,
      "loss": 2.5151,
      "step": 13039
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0228773355484009,
      "learning_rate": 1.8696428563909738e-05,
      "loss": 2.4723,
      "step": 13040
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1130582094192505,
      "learning_rate": 1.8696225289767543e-05,
      "loss": 2.3654,
      "step": 13041
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0018881559371948,
      "learning_rate": 1.869602200088288e-05,
      "loss": 2.5463,
      "step": 13042
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9764313697814941,
      "learning_rate": 1.8695818697256107e-05,
      "loss": 2.4427,
      "step": 13043
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0009020566940308,
      "learning_rate": 1.8695615378887557e-05,
      "loss": 2.4443,
      "step": 13044
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.2619320154190063,
      "learning_rate": 1.869541204577757e-05,
      "loss": 2.4119,
      "step": 13045
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0839447975158691,
      "learning_rate": 1.8695208697926504e-05,
      "loss": 2.5085,
      "step": 13046
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0650134086608887,
      "learning_rate": 1.8695005335334696e-05,
      "loss": 2.6765,
      "step": 13047
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9606180191040039,
      "learning_rate": 1.8694801958002492e-05,
      "loss": 2.408,
      "step": 13048
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9830369353294373,
      "learning_rate": 1.8694598565930237e-05,
      "loss": 2.5317,
      "step": 13049
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0363560914993286,
      "learning_rate": 1.8694395159118278e-05,
      "loss": 2.4215,
      "step": 13050
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0447858572006226,
      "learning_rate": 1.8694191737566956e-05,
      "loss": 2.7447,
      "step": 13051
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.05474853515625,
      "learning_rate": 1.8693988301276617e-05,
      "loss": 2.5651,
      "step": 13052
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0445798635482788,
      "learning_rate": 1.869378485024761e-05,
      "loss": 2.3364,
      "step": 13053
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1921709775924683,
      "learning_rate": 1.8693581384480274e-05,
      "loss": 2.5419,
      "step": 13054
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.873219907283783,
      "learning_rate": 1.8693377903974957e-05,
      "loss": 2.4476,
      "step": 13055
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9568476676940918,
      "learning_rate": 1.8693174408732003e-05,
      "loss": 2.5194,
      "step": 13056
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1382830142974854,
      "learning_rate": 1.869297089875176e-05,
      "loss": 2.8835,
      "step": 13057
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.2207961082458496,
      "learning_rate": 1.8692767374034565e-05,
      "loss": 2.6788,
      "step": 13058
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9663769006729126,
      "learning_rate": 1.8692563834580774e-05,
      "loss": 2.2472,
      "step": 13059
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.054101824760437,
      "learning_rate": 1.8692360280390723e-05,
      "loss": 2.626,
      "step": 13060
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9566144347190857,
      "learning_rate": 1.8692156711464758e-05,
      "loss": 2.7482,
      "step": 13061
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0128709077835083,
      "learning_rate": 1.869195312780323e-05,
      "loss": 2.4944,
      "step": 13062
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9344873428344727,
      "learning_rate": 1.8691749529406485e-05,
      "loss": 2.366,
      "step": 13063
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9661645889282227,
      "learning_rate": 1.8691545916274857e-05,
      "loss": 2.4947,
      "step": 13064
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.129978895187378,
      "learning_rate": 1.86913422884087e-05,
      "loss": 2.5076,
      "step": 13065
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0785161256790161,
      "learning_rate": 1.869113864580836e-05,
      "loss": 2.5039,
      "step": 13066
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9601184725761414,
      "learning_rate": 1.8690934988474175e-05,
      "loss": 2.4837,
      "step": 13067
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9333688020706177,
      "learning_rate": 1.86907313164065e-05,
      "loss": 2.5737,
      "step": 13068
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.390280842781067,
      "learning_rate": 1.869052762960567e-05,
      "loss": 2.4967,
      "step": 13069
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1120684146881104,
      "learning_rate": 1.8690323928072038e-05,
      "loss": 2.4198,
      "step": 13070
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0431979894638062,
      "learning_rate": 1.8690120211805946e-05,
      "loss": 2.4831,
      "step": 13071
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1058528423309326,
      "learning_rate": 1.8689916480807738e-05,
      "loss": 2.8495,
      "step": 13072
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9007443189620972,
      "learning_rate": 1.8689712735077765e-05,
      "loss": 2.4041,
      "step": 13073
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9736125469207764,
      "learning_rate": 1.868950897461637e-05,
      "loss": 2.5619,
      "step": 13074
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0105113983154297,
      "learning_rate": 1.8689305199423893e-05,
      "loss": 2.6211,
      "step": 13075
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0350662469863892,
      "learning_rate": 1.8689101409500687e-05,
      "loss": 2.2923,
      "step": 13076
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9452018737792969,
      "learning_rate": 1.8688897604847093e-05,
      "loss": 2.5249,
      "step": 13077
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0612119436264038,
      "learning_rate": 1.8688693785463458e-05,
      "loss": 2.5605,
      "step": 13078
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9138258695602417,
      "learning_rate": 1.8688489951350122e-05,
      "loss": 2.4774,
      "step": 13079
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0285258293151855,
      "learning_rate": 1.8688286102507443e-05,
      "loss": 2.5231,
      "step": 13080
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.206108808517456,
      "learning_rate": 1.8688082238935755e-05,
      "loss": 2.5741,
      "step": 13081
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9596579074859619,
      "learning_rate": 1.8687878360635408e-05,
      "loss": 2.3439,
      "step": 13082
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1741020679473877,
      "learning_rate": 1.868767446760675e-05,
      "loss": 2.5489,
      "step": 13083
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.2845189571380615,
      "learning_rate": 1.8687470559850124e-05,
      "loss": 2.5883,
      "step": 13084
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.980366587638855,
      "learning_rate": 1.8687266637365876e-05,
      "loss": 2.4759,
      "step": 13085
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9997421503067017,
      "learning_rate": 1.868706270015435e-05,
      "loss": 2.397,
      "step": 13086
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1348013877868652,
      "learning_rate": 1.8686858748215896e-05,
      "loss": 2.4828,
      "step": 13087
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9915561079978943,
      "learning_rate": 1.8686654781550856e-05,
      "loss": 2.4732,
      "step": 13088
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0200986862182617,
      "learning_rate": 1.8686450800159575e-05,
      "loss": 2.5265,
      "step": 13089
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9430323243141174,
      "learning_rate": 1.8686246804042403e-05,
      "loss": 2.3508,
      "step": 13090
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9362999200820923,
      "learning_rate": 1.8686042793199682e-05,
      "loss": 2.6429,
      "step": 13091
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.937616229057312,
      "learning_rate": 1.8685838767631757e-05,
      "loss": 2.5612,
      "step": 13092
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0039345026016235,
      "learning_rate": 1.868563472733898e-05,
      "loss": 2.4693,
      "step": 13093
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0489020347595215,
      "learning_rate": 1.8685430672321693e-05,
      "loss": 2.521,
      "step": 13094
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.023895263671875,
      "learning_rate": 1.868522660258024e-05,
      "loss": 2.3891,
      "step": 13095
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0559550523757935,
      "learning_rate": 1.868502251811497e-05,
      "loss": 2.4043,
      "step": 13096
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0032157897949219,
      "learning_rate": 1.868481841892623e-05,
      "loss": 2.4269,
      "step": 13097
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0598161220550537,
      "learning_rate": 1.8684614305014364e-05,
      "loss": 2.5102,
      "step": 13098
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1502665281295776,
      "learning_rate": 1.8684410176379715e-05,
      "loss": 2.4035,
      "step": 13099
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0352883338928223,
      "learning_rate": 1.8684206033022633e-05,
      "loss": 2.2727,
      "step": 13100
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9765489101409912,
      "learning_rate": 1.8684001874943464e-05,
      "loss": 2.5352,
      "step": 13101
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.2791389226913452,
      "learning_rate": 1.8683797702142555e-05,
      "loss": 2.8141,
      "step": 13102
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0448814630508423,
      "learning_rate": 1.868359351462025e-05,
      "loss": 2.771,
      "step": 13103
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9817587733268738,
      "learning_rate": 1.8683389312376892e-05,
      "loss": 2.4912,
      "step": 13104
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0590883493423462,
      "learning_rate": 1.8683185095412834e-05,
      "loss": 2.5987,
      "step": 13105
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9387562870979309,
      "learning_rate": 1.8682980863728416e-05,
      "loss": 2.4979,
      "step": 13106
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9117715954780579,
      "learning_rate": 1.8682776617323986e-05,
      "loss": 2.4664,
      "step": 13107
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.8546305894851685,
      "learning_rate": 1.86825723561999e-05,
      "loss": 2.5131,
      "step": 13108
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1689798831939697,
      "learning_rate": 1.868236808035649e-05,
      "loss": 2.7349,
      "step": 13109
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0251526832580566,
      "learning_rate": 1.8682163789794104e-05,
      "loss": 2.52,
      "step": 13110
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.069071888923645,
      "learning_rate": 1.8681959484513098e-05,
      "loss": 2.3768,
      "step": 13111
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.98646080493927,
      "learning_rate": 1.8681755164513807e-05,
      "loss": 2.6717,
      "step": 13112
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.996363639831543,
      "learning_rate": 1.868155082979659e-05,
      "loss": 2.4619,
      "step": 13113
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.2276312112808228,
      "learning_rate": 1.8681346480361782e-05,
      "loss": 2.494,
      "step": 13114
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.006004810333252,
      "learning_rate": 1.8681142116209737e-05,
      "loss": 2.6329,
      "step": 13115
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9909120798110962,
      "learning_rate": 1.8680937737340795e-05,
      "loss": 2.1954,
      "step": 13116
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1310428380966187,
      "learning_rate": 1.8680733343755307e-05,
      "loss": 2.5002,
      "step": 13117
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1167744398117065,
      "learning_rate": 1.868052893545362e-05,
      "loss": 2.3849,
      "step": 13118
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0484347343444824,
      "learning_rate": 1.868032451243608e-05,
      "loss": 2.5034,
      "step": 13119
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9098386168479919,
      "learning_rate": 1.868012007470303e-05,
      "loss": 2.3806,
      "step": 13120
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.078111171722412,
      "learning_rate": 1.867991562225482e-05,
      "loss": 2.6472,
      "step": 13121
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.049371600151062,
      "learning_rate": 1.8679711155091794e-05,
      "loss": 2.616,
      "step": 13122
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9557371735572815,
      "learning_rate": 1.8679506673214298e-05,
      "loss": 2.5544,
      "step": 13123
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9570479989051819,
      "learning_rate": 1.8679302176622685e-05,
      "loss": 2.4616,
      "step": 13124
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.061509609222412,
      "learning_rate": 1.8679097665317296e-05,
      "loss": 2.4447,
      "step": 13125
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.5494039058685303,
      "learning_rate": 1.867889313929848e-05,
      "loss": 2.2104,
      "step": 13126
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9544612169265747,
      "learning_rate": 1.867868859856658e-05,
      "loss": 2.4846,
      "step": 13127
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.8910427689552307,
      "learning_rate": 1.867848404312195e-05,
      "loss": 2.6692,
      "step": 13128
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0133955478668213,
      "learning_rate": 1.8678279472964932e-05,
      "loss": 2.7176,
      "step": 13129
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0151891708374023,
      "learning_rate": 1.8678074888095873e-05,
      "loss": 2.4066,
      "step": 13130
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0029503107070923,
      "learning_rate": 1.867787028851512e-05,
      "loss": 2.5898,
      "step": 13131
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1314682960510254,
      "learning_rate": 1.8677665674223017e-05,
      "loss": 2.5315,
      "step": 13132
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1157519817352295,
      "learning_rate": 1.8677461045219916e-05,
      "loss": 2.4891,
      "step": 13133
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0803791284561157,
      "learning_rate": 1.8677256401506164e-05,
      "loss": 2.3635,
      "step": 13134
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9675509929656982,
      "learning_rate": 1.8677051743082103e-05,
      "loss": 2.4952,
      "step": 13135
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1392199993133545,
      "learning_rate": 1.867684706994808e-05,
      "loss": 2.4815,
      "step": 13136
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0274901390075684,
      "learning_rate": 1.867664238210445e-05,
      "loss": 2.552,
      "step": 13137
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0866961479187012,
      "learning_rate": 1.8676437679551553e-05,
      "loss": 2.6811,
      "step": 13138
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9876757860183716,
      "learning_rate": 1.8676232962289737e-05,
      "loss": 2.5258,
      "step": 13139
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.2138328552246094,
      "learning_rate": 1.867602823031935e-05,
      "loss": 2.6176,
      "step": 13140
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9781427383422852,
      "learning_rate": 1.8675823483640737e-05,
      "loss": 2.5448,
      "step": 13141
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.021354079246521,
      "learning_rate": 1.8675618722254247e-05,
      "loss": 2.6314,
      "step": 13142
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.011649489402771,
      "learning_rate": 1.867541394616023e-05,
      "loss": 2.4218,
      "step": 13143
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1332532167434692,
      "learning_rate": 1.8675209155359027e-05,
      "loss": 2.6057,
      "step": 13144
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.937565803527832,
      "learning_rate": 1.867500434985099e-05,
      "loss": 2.6657,
      "step": 13145
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.076667070388794,
      "learning_rate": 1.8674799529636468e-05,
      "loss": 2.6932,
      "step": 13146
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9780614376068115,
      "learning_rate": 1.86745946947158e-05,
      "loss": 2.4434,
      "step": 13147
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0542709827423096,
      "learning_rate": 1.8674389845089337e-05,
      "loss": 2.382,
      "step": 13148
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0426627397537231,
      "learning_rate": 1.867418498075743e-05,
      "loss": 2.6818,
      "step": 13149
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0404808521270752,
      "learning_rate": 1.8673980101720424e-05,
      "loss": 2.4014,
      "step": 13150
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.036157488822937,
      "learning_rate": 1.8673775207978663e-05,
      "loss": 2.3201,
      "step": 13151
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1108952760696411,
      "learning_rate": 1.86735702995325e-05,
      "loss": 2.7203,
      "step": 13152
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1103233098983765,
      "learning_rate": 1.867336537638228e-05,
      "loss": 2.5586,
      "step": 13153
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1483230590820312,
      "learning_rate": 1.8673160438528347e-05,
      "loss": 2.518,
      "step": 13154
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.174185037612915,
      "learning_rate": 1.8672955485971055e-05,
      "loss": 2.4829,
      "step": 13155
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9835134148597717,
      "learning_rate": 1.8672750518710744e-05,
      "loss": 2.5652,
      "step": 13156
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.8902485966682434,
      "learning_rate": 1.8672545536747768e-05,
      "loss": 2.2685,
      "step": 13157
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0501199960708618,
      "learning_rate": 1.867234054008247e-05,
      "loss": 2.396,
      "step": 13158
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0239733457565308,
      "learning_rate": 1.8672135528715204e-05,
      "loss": 2.4026,
      "step": 13159
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.8897417783737183,
      "learning_rate": 1.867193050264631e-05,
      "loss": 2.2511,
      "step": 13160
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1649665832519531,
      "learning_rate": 1.8671725461876137e-05,
      "loss": 2.6135,
      "step": 13161
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0282166004180908,
      "learning_rate": 1.8671520406405036e-05,
      "loss": 2.4668,
      "step": 13162
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0903221368789673,
      "learning_rate": 1.867131533623335e-05,
      "loss": 2.6173,
      "step": 13163
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9644133448600769,
      "learning_rate": 1.8671110251361434e-05,
      "loss": 2.4637,
      "step": 13164
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9915850758552551,
      "learning_rate": 1.867090515178963e-05,
      "loss": 2.4298,
      "step": 13165
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9578985571861267,
      "learning_rate": 1.8670700037518285e-05,
      "loss": 2.6176,
      "step": 13166
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0612832307815552,
      "learning_rate": 1.867049490854775e-05,
      "loss": 2.4839,
      "step": 13167
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9514541625976562,
      "learning_rate": 1.8670289764878368e-05,
      "loss": 2.5372,
      "step": 13168
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0131076574325562,
      "learning_rate": 1.8670084606510494e-05,
      "loss": 2.4431,
      "step": 13169
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9709819555282593,
      "learning_rate": 1.866987943344447e-05,
      "loss": 2.3769,
      "step": 13170
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0200836658477783,
      "learning_rate": 1.8669674245680643e-05,
      "loss": 2.295,
      "step": 13171
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0405967235565186,
      "learning_rate": 1.8669469043219368e-05,
      "loss": 2.4594,
      "step": 13172
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0633856058120728,
      "learning_rate": 1.8669263826060987e-05,
      "loss": 2.3938,
      "step": 13173
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0888057947158813,
      "learning_rate": 1.866905859420585e-05,
      "loss": 2.5144,
      "step": 13174
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.220707893371582,
      "learning_rate": 1.86688533476543e-05,
      "loss": 2.3425,
      "step": 13175
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.93403559923172,
      "learning_rate": 1.8668648086406692e-05,
      "loss": 2.5007,
      "step": 13176
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9202076196670532,
      "learning_rate": 1.8668442810463374e-05,
      "loss": 2.3417,
      "step": 13177
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9770689010620117,
      "learning_rate": 1.866823751982469e-05,
      "loss": 2.6614,
      "step": 13178
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1711636781692505,
      "learning_rate": 1.8668032214490988e-05,
      "loss": 2.2928,
      "step": 13179
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1851073503494263,
      "learning_rate": 1.8667826894462614e-05,
      "loss": 2.366,
      "step": 13180
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0411802530288696,
      "learning_rate": 1.8667621559739922e-05,
      "loss": 2.6449,
      "step": 13181
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9678133726119995,
      "learning_rate": 1.866741621032326e-05,
      "loss": 2.6428,
      "step": 13182
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9561359882354736,
      "learning_rate": 1.866721084621297e-05,
      "loss": 2.4701,
      "step": 13183
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0148708820343018,
      "learning_rate": 1.8667005467409406e-05,
      "loss": 2.657,
      "step": 13184
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.110266923904419,
      "learning_rate": 1.866680007391291e-05,
      "loss": 2.6449,
      "step": 13185
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1738777160644531,
      "learning_rate": 1.866659466572384e-05,
      "loss": 2.5224,
      "step": 13186
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.3129255771636963,
      "learning_rate": 1.866638924284253e-05,
      "loss": 2.768,
      "step": 13187
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0649465322494507,
      "learning_rate": 1.8666183805269342e-05,
      "loss": 2.3555,
      "step": 13188
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0767112970352173,
      "learning_rate": 1.866597835300462e-05,
      "loss": 2.715,
      "step": 13189
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9580972194671631,
      "learning_rate": 1.8665772886048708e-05,
      "loss": 2.5252,
      "step": 13190
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0154380798339844,
      "learning_rate": 1.8665567404401955e-05,
      "loss": 2.5775,
      "step": 13191
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1813217401504517,
      "learning_rate": 1.866536190806472e-05,
      "loss": 2.5635,
      "step": 13192
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0193535089492798,
      "learning_rate": 1.8665156397037338e-05,
      "loss": 2.3065,
      "step": 13193
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1250511407852173,
      "learning_rate": 1.866495087132016e-05,
      "loss": 2.4826,
      "step": 13194
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9166616797447205,
      "learning_rate": 1.866474533091354e-05,
      "loss": 2.5327,
      "step": 13195
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0488916635513306,
      "learning_rate": 1.8664539775817823e-05,
      "loss": 2.5334,
      "step": 13196
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0144847631454468,
      "learning_rate": 1.8664334206033358e-05,
      "loss": 2.3783,
      "step": 13197
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9375640749931335,
      "learning_rate": 1.8664128621560495e-05,
      "loss": 2.7309,
      "step": 13198
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9633671045303345,
      "learning_rate": 1.8663923022399575e-05,
      "loss": 2.5233,
      "step": 13199
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0541677474975586,
      "learning_rate": 1.8663717408550955e-05,
      "loss": 2.7439,
      "step": 13200
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.064619779586792,
      "learning_rate": 1.8663511780014984e-05,
      "loss": 2.6154,
      "step": 13201
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0614553689956665,
      "learning_rate": 1.8663306136792007e-05,
      "loss": 2.5019,
      "step": 13202
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.045434594154358,
      "learning_rate": 1.866310047888237e-05,
      "loss": 2.6228,
      "step": 13203
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1189261674880981,
      "learning_rate": 1.8662894806286428e-05,
      "loss": 2.6371,
      "step": 13204
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.4044469594955444,
      "learning_rate": 1.8662689119004524e-05,
      "loss": 2.5404,
      "step": 13205
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0417677164077759,
      "learning_rate": 1.866248341703701e-05,
      "loss": 2.5126,
      "step": 13206
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9969832897186279,
      "learning_rate": 1.8662277700384235e-05,
      "loss": 2.6039,
      "step": 13207
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9846047759056091,
      "learning_rate": 1.8662071969046545e-05,
      "loss": 2.811,
      "step": 13208
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1612637042999268,
      "learning_rate": 1.8661866223024292e-05,
      "loss": 2.4723,
      "step": 13209
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0134854316711426,
      "learning_rate": 1.8661660462317825e-05,
      "loss": 2.5543,
      "step": 13210
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1182605028152466,
      "learning_rate": 1.8661454686927486e-05,
      "loss": 2.283,
      "step": 13211
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0297211408615112,
      "learning_rate": 1.8661248896853634e-05,
      "loss": 2.597,
      "step": 13212
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9807564616203308,
      "learning_rate": 1.866104309209661e-05,
      "loss": 2.5078,
      "step": 13213
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0951502323150635,
      "learning_rate": 1.8660837272656765e-05,
      "loss": 2.4336,
      "step": 13214
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9705515503883362,
      "learning_rate": 1.866063143853445e-05,
      "loss": 2.7168,
      "step": 13215
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0255542993545532,
      "learning_rate": 1.866042558973001e-05,
      "loss": 2.3649,
      "step": 13216
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0723371505737305,
      "learning_rate": 1.86602197262438e-05,
      "loss": 2.3814,
      "step": 13217
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0998715162277222,
      "learning_rate": 1.8660013848076163e-05,
      "loss": 2.6248,
      "step": 13218
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0906621217727661,
      "learning_rate": 1.865980795522745e-05,
      "loss": 2.361,
      "step": 13219
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.020029902458191,
      "learning_rate": 1.8659602047698017e-05,
      "loss": 2.3225,
      "step": 13220
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9677386283874512,
      "learning_rate": 1.8659396125488197e-05,
      "loss": 2.5148,
      "step": 13221
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0935670137405396,
      "learning_rate": 1.8659190188598355e-05,
      "loss": 2.4364,
      "step": 13222
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.056641936302185,
      "learning_rate": 1.8658984237028833e-05,
      "loss": 2.3672,
      "step": 13223
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0239523649215698,
      "learning_rate": 1.8658778270779982e-05,
      "loss": 2.4438,
      "step": 13224
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0550845861434937,
      "learning_rate": 1.8658572289852145e-05,
      "loss": 2.6971,
      "step": 13225
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0999001264572144,
      "learning_rate": 1.865836629424568e-05,
      "loss": 2.5561,
      "step": 13226
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0677416324615479,
      "learning_rate": 1.8658160283960932e-05,
      "loss": 2.7042,
      "step": 13227
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0334385633468628,
      "learning_rate": 1.865795425899825e-05,
      "loss": 2.5811,
      "step": 13228
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9594365954399109,
      "learning_rate": 1.8657748219357986e-05,
      "loss": 2.477,
      "step": 13229
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0258111953735352,
      "learning_rate": 1.8657542165040487e-05,
      "loss": 2.6518,
      "step": 13230
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.2950798273086548,
      "learning_rate": 1.8657336096046103e-05,
      "loss": 2.6337,
      "step": 13231
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.158096194267273,
      "learning_rate": 1.8657130012375182e-05,
      "loss": 2.6947,
      "step": 13232
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9994716644287109,
      "learning_rate": 1.8656923914028073e-05,
      "loss": 2.4361,
      "step": 13233
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0715962648391724,
      "learning_rate": 1.865671780100513e-05,
      "loss": 2.4645,
      "step": 13234
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.4234349727630615,
      "learning_rate": 1.8656511673306697e-05,
      "loss": 2.6087,
      "step": 13235
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9304200410842896,
      "learning_rate": 1.8656305530933126e-05,
      "loss": 2.5998,
      "step": 13236
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0683269500732422,
      "learning_rate": 1.8656099373884768e-05,
      "loss": 2.3959,
      "step": 13237
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9583419561386108,
      "learning_rate": 1.8655893202161966e-05,
      "loss": 2.3403,
      "step": 13238
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0383880138397217,
      "learning_rate": 1.8655687015765078e-05,
      "loss": 2.4416,
      "step": 13239
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0772219896316528,
      "learning_rate": 1.8655480814694445e-05,
      "loss": 2.309,
      "step": 13240
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1096771955490112,
      "learning_rate": 1.8655274598950427e-05,
      "loss": 2.7103,
      "step": 13241
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1259297132492065,
      "learning_rate": 1.8655068368533366e-05,
      "loss": 2.6457,
      "step": 13242
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.255425214767456,
      "learning_rate": 1.8654862123443613e-05,
      "loss": 2.728,
      "step": 13243
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0478798151016235,
      "learning_rate": 1.865465586368152e-05,
      "loss": 2.5368,
      "step": 13244
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1519713401794434,
      "learning_rate": 1.865444958924743e-05,
      "loss": 2.5825,
      "step": 13245
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0999823808670044,
      "learning_rate": 1.8654243300141703e-05,
      "loss": 2.5565,
      "step": 13246
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0272475481033325,
      "learning_rate": 1.8654036996364676e-05,
      "loss": 2.5211,
      "step": 13247
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.012798547744751,
      "learning_rate": 1.8653830677916714e-05,
      "loss": 2.5395,
      "step": 13248
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1304616928100586,
      "learning_rate": 1.8653624344798157e-05,
      "loss": 2.4616,
      "step": 13249
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9819445013999939,
      "learning_rate": 1.865341799700935e-05,
      "loss": 2.5419,
      "step": 13250
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.8904184103012085,
      "learning_rate": 1.8653211634550653e-05,
      "loss": 2.6388,
      "step": 13251
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0493093729019165,
      "learning_rate": 1.8653005257422413e-05,
      "loss": 2.5002,
      "step": 13252
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1429448127746582,
      "learning_rate": 1.8652798865624978e-05,
      "loss": 2.4446,
      "step": 13253
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.3344404697418213,
      "learning_rate": 1.86525924591587e-05,
      "loss": 2.3621,
      "step": 13254
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.950131893157959,
      "learning_rate": 1.865238603802393e-05,
      "loss": 2.5571,
      "step": 13255
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9373712539672852,
      "learning_rate": 1.865217960222101e-05,
      "loss": 2.6294,
      "step": 13256
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0061593055725098,
      "learning_rate": 1.86519731517503e-05,
      "loss": 2.6381,
      "step": 13257
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1931747198104858,
      "learning_rate": 1.8651766686612144e-05,
      "loss": 2.4949,
      "step": 13258
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0120770931243896,
      "learning_rate": 1.8651560206806896e-05,
      "loss": 2.7855,
      "step": 13259
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0696929693222046,
      "learning_rate": 1.8651353712334902e-05,
      "loss": 2.4161,
      "step": 13260
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0974031686782837,
      "learning_rate": 1.8651147203196513e-05,
      "loss": 2.7487,
      "step": 13261
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.2213423252105713,
      "learning_rate": 1.865094067939208e-05,
      "loss": 2.6058,
      "step": 13262
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.083848237991333,
      "learning_rate": 1.8650734140921953e-05,
      "loss": 2.4524,
      "step": 13263
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9984604716300964,
      "learning_rate": 1.8650527587786483e-05,
      "loss": 2.4949,
      "step": 13264
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.950630247592926,
      "learning_rate": 1.865032101998602e-05,
      "loss": 2.5428,
      "step": 13265
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0876575708389282,
      "learning_rate": 1.865011443752091e-05,
      "loss": 2.4934,
      "step": 13266
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9961220026016235,
      "learning_rate": 1.864990784039151e-05,
      "loss": 2.647,
      "step": 13267
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.019044280052185,
      "learning_rate": 1.8649701228598168e-05,
      "loss": 2.4006,
      "step": 13268
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0468096733093262,
      "learning_rate": 1.864949460214123e-05,
      "loss": 2.309,
      "step": 13269
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0234489440917969,
      "learning_rate": 1.864928796102105e-05,
      "loss": 2.6328,
      "step": 13270
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9424838423728943,
      "learning_rate": 1.864908130523798e-05,
      "loss": 2.4451,
      "step": 13271
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9663594365119934,
      "learning_rate": 1.8648874634792365e-05,
      "loss": 2.644,
      "step": 13272
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9857026934623718,
      "learning_rate": 1.8648667949684558e-05,
      "loss": 2.4296,
      "step": 13273
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.132871389389038,
      "learning_rate": 1.8648461249914912e-05,
      "loss": 2.5912,
      "step": 13274
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0563383102416992,
      "learning_rate": 1.8648254535483777e-05,
      "loss": 2.5684,
      "step": 13275
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0563123226165771,
      "learning_rate": 1.8648047806391497e-05,
      "loss": 2.6147,
      "step": 13276
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0177501440048218,
      "learning_rate": 1.8647841062638432e-05,
      "loss": 2.4861,
      "step": 13277
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.8791157603263855,
      "learning_rate": 1.8647634304224928e-05,
      "loss": 2.4688,
      "step": 13278
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0154865980148315,
      "learning_rate": 1.8647427531151333e-05,
      "loss": 2.3418,
      "step": 13279
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9751463532447815,
      "learning_rate": 1.8647220743418e-05,
      "loss": 2.4067,
      "step": 13280
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.070622444152832,
      "learning_rate": 1.8647013941025278e-05,
      "loss": 2.4645,
      "step": 13281
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0241137742996216,
      "learning_rate": 1.864680712397352e-05,
      "loss": 2.6439,
      "step": 13282
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0487216711044312,
      "learning_rate": 1.8646600292263076e-05,
      "loss": 2.4897,
      "step": 13283
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9682208299636841,
      "learning_rate": 1.8646393445894294e-05,
      "loss": 2.5625,
      "step": 13284
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9681140184402466,
      "learning_rate": 1.864618658486753e-05,
      "loss": 2.4317,
      "step": 13285
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9688208103179932,
      "learning_rate": 1.864597970918313e-05,
      "loss": 2.5457,
      "step": 13286
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9886261224746704,
      "learning_rate": 1.8645772818841445e-05,
      "loss": 2.4251,
      "step": 13287
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9690265655517578,
      "learning_rate": 1.8645565913842824e-05,
      "loss": 2.5811,
      "step": 13288
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9758620858192444,
      "learning_rate": 1.8645358994187627e-05,
      "loss": 2.6097,
      "step": 13289
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9377043843269348,
      "learning_rate": 1.8645152059876194e-05,
      "loss": 2.4032,
      "step": 13290
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9499172568321228,
      "learning_rate": 1.8644945110908883e-05,
      "loss": 2.5956,
      "step": 13291
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0483802556991577,
      "learning_rate": 1.864473814728604e-05,
      "loss": 2.3488,
      "step": 13292
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.3728845119476318,
      "learning_rate": 1.8644531169008022e-05,
      "loss": 2.3941,
      "step": 13293
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0174064636230469,
      "learning_rate": 1.8644324176075174e-05,
      "loss": 2.4743,
      "step": 13294
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.062193512916565,
      "learning_rate": 1.8644117168487846e-05,
      "loss": 2.3267,
      "step": 13295
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9835702180862427,
      "learning_rate": 1.8643910146246395e-05,
      "loss": 2.326,
      "step": 13296
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9720146059989929,
      "learning_rate": 1.8643703109351165e-05,
      "loss": 2.748,
      "step": 13297
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0689196586608887,
      "learning_rate": 1.8643496057802512e-05,
      "loss": 2.48,
      "step": 13298
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9643892645835876,
      "learning_rate": 1.864328899160079e-05,
      "loss": 2.6736,
      "step": 13299
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9948844313621521,
      "learning_rate": 1.8643081910746338e-05,
      "loss": 2.5545,
      "step": 13300
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.168593406677246,
      "learning_rate": 1.864287481523952e-05,
      "loss": 2.7535,
      "step": 13301
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0593568086624146,
      "learning_rate": 1.864266770508068e-05,
      "loss": 2.5645,
      "step": 13302
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0524379014968872,
      "learning_rate": 1.8642460580270173e-05,
      "loss": 2.56,
      "step": 13303
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.014054536819458,
      "learning_rate": 1.8642253440808347e-05,
      "loss": 2.5561,
      "step": 13304
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9341958165168762,
      "learning_rate": 1.864204628669555e-05,
      "loss": 2.5959,
      "step": 13305
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.034592628479004,
      "learning_rate": 1.8641839117932145e-05,
      "loss": 2.3187,
      "step": 13306
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0242743492126465,
      "learning_rate": 1.864163193451847e-05,
      "loss": 2.6509,
      "step": 13307
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9538372159004211,
      "learning_rate": 1.8641424736454886e-05,
      "loss": 2.4545,
      "step": 13308
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.916339099407196,
      "learning_rate": 1.8641217523741735e-05,
      "loss": 2.39,
      "step": 13309
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9911349415779114,
      "learning_rate": 1.8641010296379377e-05,
      "loss": 2.7352,
      "step": 13310
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0690021514892578,
      "learning_rate": 1.864080305436816e-05,
      "loss": 2.2702,
      "step": 13311
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9277550578117371,
      "learning_rate": 1.8640595797708435e-05,
      "loss": 2.638,
      "step": 13312
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1991956233978271,
      "learning_rate": 1.864038852640055e-05,
      "loss": 2.5512,
      "step": 13313
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9878063201904297,
      "learning_rate": 1.864018124044486e-05,
      "loss": 2.7021,
      "step": 13314
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0265635251998901,
      "learning_rate": 1.863997393984172e-05,
      "loss": 2.3124,
      "step": 13315
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0675935745239258,
      "learning_rate": 1.8639766624591478e-05,
      "loss": 2.5391,
      "step": 13316
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9972053170204163,
      "learning_rate": 1.863955929469448e-05,
      "loss": 2.5132,
      "step": 13317
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9431913495063782,
      "learning_rate": 1.8639351950151086e-05,
      "loss": 2.5336,
      "step": 13318
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0710527896881104,
      "learning_rate": 1.8639144590961643e-05,
      "loss": 2.2699,
      "step": 13319
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1463087797164917,
      "learning_rate": 1.8638937217126505e-05,
      "loss": 2.6868,
      "step": 13320
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1252156496047974,
      "learning_rate": 1.863872982864602e-05,
      "loss": 2.4048,
      "step": 13321
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9014503955841064,
      "learning_rate": 1.863852242552054e-05,
      "loss": 2.4385,
      "step": 13322
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9092085957527161,
      "learning_rate": 1.863831500775042e-05,
      "loss": 2.2442,
      "step": 13323
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.3442226648330688,
      "learning_rate": 1.8638107575336016e-05,
      "loss": 2.6153,
      "step": 13324
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0187650918960571,
      "learning_rate": 1.8637900128277668e-05,
      "loss": 2.534,
      "step": 13325
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9696604013442993,
      "learning_rate": 1.8637692666575733e-05,
      "loss": 2.445,
      "step": 13326
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0769189596176147,
      "learning_rate": 1.8637485190230564e-05,
      "loss": 2.6088,
      "step": 13327
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0507458448410034,
      "learning_rate": 1.8637277699242512e-05,
      "loss": 2.4793,
      "step": 13328
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9954549074172974,
      "learning_rate": 1.8637070193611924e-05,
      "loss": 2.5859,
      "step": 13329
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0599803924560547,
      "learning_rate": 1.8636862673339165e-05,
      "loss": 2.3699,
      "step": 13330
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1401865482330322,
      "learning_rate": 1.863665513842457e-05,
      "loss": 2.5949,
      "step": 13331
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1088701486587524,
      "learning_rate": 1.86364475888685e-05,
      "loss": 2.5485,
      "step": 13332
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0094873905181885,
      "learning_rate": 1.8636240024671314e-05,
      "loss": 2.4769,
      "step": 13333
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0679057836532593,
      "learning_rate": 1.8636032445833346e-05,
      "loss": 2.4183,
      "step": 13334
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9481292366981506,
      "learning_rate": 1.863582485235496e-05,
      "loss": 2.679,
      "step": 13335
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.968116044998169,
      "learning_rate": 1.8635617244236507e-05,
      "loss": 2.5469,
      "step": 13336
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9405292868614197,
      "learning_rate": 1.8635409621478335e-05,
      "loss": 2.5107,
      "step": 13337
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.2091126441955566,
      "learning_rate": 1.86352019840808e-05,
      "loss": 2.5729,
      "step": 13338
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.8974095582962036,
      "learning_rate": 1.8634994332044255e-05,
      "loss": 2.3803,
      "step": 13339
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.925673246383667,
      "learning_rate": 1.8634786665369047e-05,
      "loss": 2.5509,
      "step": 13340
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9919841885566711,
      "learning_rate": 1.863457898405553e-05,
      "loss": 2.6968,
      "step": 13341
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0771427154541016,
      "learning_rate": 1.8634371288104054e-05,
      "loss": 2.7744,
      "step": 13342
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0928878784179688,
      "learning_rate": 1.863416357751498e-05,
      "loss": 2.6212,
      "step": 13343
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9973418116569519,
      "learning_rate": 1.8633955852288646e-05,
      "loss": 2.5638,
      "step": 13344
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0886647701263428,
      "learning_rate": 1.8633748112425415e-05,
      "loss": 2.6964,
      "step": 13345
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0337783098220825,
      "learning_rate": 1.863354035792564e-05,
      "loss": 2.6378,
      "step": 13346
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.8867877721786499,
      "learning_rate": 1.8633332588789662e-05,
      "loss": 2.571,
      "step": 13347
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0879650115966797,
      "learning_rate": 1.8633124805017848e-05,
      "loss": 2.5398,
      "step": 13348
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9606442451477051,
      "learning_rate": 1.863291700661054e-05,
      "loss": 2.5303,
      "step": 13349
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0025643110275269,
      "learning_rate": 1.863270919356809e-05,
      "loss": 2.6288,
      "step": 13350
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0377824306488037,
      "learning_rate": 1.8632501365890855e-05,
      "loss": 2.5901,
      "step": 13351
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0382850170135498,
      "learning_rate": 1.8632293523579185e-05,
      "loss": 2.5526,
      "step": 13352
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9603212475776672,
      "learning_rate": 1.863208566663344e-05,
      "loss": 2.6706,
      "step": 13353
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9659607410430908,
      "learning_rate": 1.8631877795053955e-05,
      "loss": 2.4911,
      "step": 13354
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.062824010848999,
      "learning_rate": 1.8631669908841096e-05,
      "loss": 2.5419,
      "step": 13355
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9781836271286011,
      "learning_rate": 1.8631462007995217e-05,
      "loss": 2.4284,
      "step": 13356
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0501374006271362,
      "learning_rate": 1.8631254092516662e-05,
      "loss": 2.5832,
      "step": 13357
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0140001773834229,
      "learning_rate": 1.8631046162405785e-05,
      "loss": 2.5691,
      "step": 13358
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.078182339668274,
      "learning_rate": 1.8630838217662943e-05,
      "loss": 2.7528,
      "step": 13359
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9709105491638184,
      "learning_rate": 1.8630630258288487e-05,
      "loss": 2.4998,
      "step": 13360
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9220976829528809,
      "learning_rate": 1.8630422284282767e-05,
      "loss": 2.497,
      "step": 13361
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0372546911239624,
      "learning_rate": 1.8630214295646135e-05,
      "loss": 2.6178,
      "step": 13362
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9952558279037476,
      "learning_rate": 1.8630006292378954e-05,
      "loss": 2.4021,
      "step": 13363
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0088560581207275,
      "learning_rate": 1.8629798274481562e-05,
      "loss": 2.4658,
      "step": 13364
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.033956527709961,
      "learning_rate": 1.8629590241954316e-05,
      "loss": 2.586,
      "step": 13365
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.028428554534912,
      "learning_rate": 1.8629382194797573e-05,
      "loss": 2.5349,
      "step": 13366
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1836374998092651,
      "learning_rate": 1.8629174133011688e-05,
      "loss": 2.5276,
      "step": 13367
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0732669830322266,
      "learning_rate": 1.8628966056597004e-05,
      "loss": 2.4774,
      "step": 13368
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0488847494125366,
      "learning_rate": 1.862875796555388e-05,
      "loss": 2.4362,
      "step": 13369
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0632466077804565,
      "learning_rate": 1.862854985988267e-05,
      "loss": 2.5511,
      "step": 13370
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.8556069135665894,
      "learning_rate": 1.8628341739583723e-05,
      "loss": 2.642,
      "step": 13371
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1713234186172485,
      "learning_rate": 1.8628133604657392e-05,
      "loss": 2.6841,
      "step": 13372
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9389069080352783,
      "learning_rate": 1.8627925455104033e-05,
      "loss": 2.5536,
      "step": 13373
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1076767444610596,
      "learning_rate": 1.8627717290923994e-05,
      "loss": 2.458,
      "step": 13374
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9521125555038452,
      "learning_rate": 1.8627509112117633e-05,
      "loss": 2.4937,
      "step": 13375
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1329467296600342,
      "learning_rate": 1.86273009186853e-05,
      "loss": 2.5102,
      "step": 13376
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9946328401565552,
      "learning_rate": 1.8627092710627352e-05,
      "loss": 2.3067,
      "step": 13377
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.937160849571228,
      "learning_rate": 1.8626884487944136e-05,
      "loss": 2.6402,
      "step": 13378
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.03636634349823,
      "learning_rate": 1.8626676250636012e-05,
      "loss": 2.7604,
      "step": 13379
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9773697257041931,
      "learning_rate": 1.8626467998703322e-05,
      "loss": 2.4887,
      "step": 13380
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9666332602500916,
      "learning_rate": 1.862625973214643e-05,
      "loss": 2.5377,
      "step": 13381
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0455851554870605,
      "learning_rate": 1.8626051450965685e-05,
      "loss": 2.5384,
      "step": 13382
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1432693004608154,
      "learning_rate": 1.8625843155161438e-05,
      "loss": 2.3961,
      "step": 13383
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.160590410232544,
      "learning_rate": 1.8625634844734045e-05,
      "loss": 2.5628,
      "step": 13384
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0639822483062744,
      "learning_rate": 1.862542651968386e-05,
      "loss": 2.7194,
      "step": 13385
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9902997612953186,
      "learning_rate": 1.862521818001123e-05,
      "loss": 2.6408,
      "step": 13386
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.008638858795166,
      "learning_rate": 1.8625009825716516e-05,
      "loss": 2.5487,
      "step": 13387
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1051729917526245,
      "learning_rate": 1.8624801456800065e-05,
      "loss": 2.4434,
      "step": 13388
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9832531213760376,
      "learning_rate": 1.862459307326224e-05,
      "loss": 2.5813,
      "step": 13389
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0500990152359009,
      "learning_rate": 1.862438467510338e-05,
      "loss": 2.4711,
      "step": 13390
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9701523780822754,
      "learning_rate": 1.862417626232385e-05,
      "loss": 2.4818,
      "step": 13391
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9752910137176514,
      "learning_rate": 1.8623967834923994e-05,
      "loss": 2.4766,
      "step": 13392
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9599064588546753,
      "learning_rate": 1.8623759392904175e-05,
      "loss": 2.6018,
      "step": 13393
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9993948936462402,
      "learning_rate": 1.8623550936264737e-05,
      "loss": 2.3797,
      "step": 13394
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0487322807312012,
      "learning_rate": 1.862334246500604e-05,
      "loss": 2.651,
      "step": 13395
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9878994226455688,
      "learning_rate": 1.8623133979128442e-05,
      "loss": 2.7196,
      "step": 13396
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0158637762069702,
      "learning_rate": 1.8622925478632283e-05,
      "loss": 2.6259,
      "step": 13397
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.4452998638153076,
      "learning_rate": 1.8622716963517925e-05,
      "loss": 2.3384,
      "step": 13398
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0385112762451172,
      "learning_rate": 1.862250843378572e-05,
      "loss": 2.5042,
      "step": 13399
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9798895716667175,
      "learning_rate": 1.862229988943602e-05,
      "loss": 2.5731,
      "step": 13400
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9455457329750061,
      "learning_rate": 1.862209133046918e-05,
      "loss": 2.4636,
      "step": 13401
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1462435722351074,
      "learning_rate": 1.8621882756885554e-05,
      "loss": 2.4386,
      "step": 13402
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9744657874107361,
      "learning_rate": 1.8621674168685494e-05,
      "loss": 2.6617,
      "step": 13403
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.2021002769470215,
      "learning_rate": 1.8621465565869358e-05,
      "loss": 2.424,
      "step": 13404
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.91304612159729,
      "learning_rate": 1.8621256948437496e-05,
      "loss": 2.6262,
      "step": 13405
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9862588047981262,
      "learning_rate": 1.8621048316390256e-05,
      "loss": 2.4675,
      "step": 13406
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1740950345993042,
      "learning_rate": 1.8620839669728002e-05,
      "loss": 2.3961,
      "step": 13407
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.068034291267395,
      "learning_rate": 1.8620631008451086e-05,
      "loss": 2.4039,
      "step": 13408
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9599848389625549,
      "learning_rate": 1.8620422332559855e-05,
      "loss": 2.3983,
      "step": 13409
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0472826957702637,
      "learning_rate": 1.862021364205467e-05,
      "loss": 2.4623,
      "step": 13410
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9929919242858887,
      "learning_rate": 1.862000493693588e-05,
      "loss": 2.3643,
      "step": 13411
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.984538733959198,
      "learning_rate": 1.8619796217203842e-05,
      "loss": 2.5143,
      "step": 13412
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0592399835586548,
      "learning_rate": 1.861958748285891e-05,
      "loss": 2.4558,
      "step": 13413
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.033047080039978,
      "learning_rate": 1.8619378733901428e-05,
      "loss": 2.5543,
      "step": 13414
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0116733312606812,
      "learning_rate": 1.8619169970331764e-05,
      "loss": 2.6026,
      "step": 13415
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9853188991546631,
      "learning_rate": 1.8618961192150263e-05,
      "loss": 2.2807,
      "step": 13416
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9564499258995056,
      "learning_rate": 1.8618752399357285e-05,
      "loss": 2.5368,
      "step": 13417
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0210093259811401,
      "learning_rate": 1.861854359195318e-05,
      "loss": 2.5686,
      "step": 13418
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9107542037963867,
      "learning_rate": 1.8618334769938303e-05,
      "loss": 2.4135,
      "step": 13419
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0580202341079712,
      "learning_rate": 1.8618125933313006e-05,
      "loss": 2.5688,
      "step": 13420
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0763680934906006,
      "learning_rate": 1.861791708207765e-05,
      "loss": 2.5672,
      "step": 13421
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.057289958000183,
      "learning_rate": 1.8617708216232578e-05,
      "loss": 2.5976,
      "step": 13422
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.3292672634124756,
      "learning_rate": 1.8617499335778155e-05,
      "loss": 2.4676,
      "step": 13423
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.2438445091247559,
      "learning_rate": 1.8617290440714724e-05,
      "loss": 2.4457,
      "step": 13424
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0549577474594116,
      "learning_rate": 1.8617081531042647e-05,
      "loss": 2.3281,
      "step": 13425
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0641353130340576,
      "learning_rate": 1.8616872606762282e-05,
      "loss": 2.3832,
      "step": 13426
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9942260980606079,
      "learning_rate": 1.8616663667873972e-05,
      "loss": 2.6987,
      "step": 13427
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0391926765441895,
      "learning_rate": 1.861645471437808e-05,
      "loss": 2.5721,
      "step": 13428
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9921682476997375,
      "learning_rate": 1.8616245746274956e-05,
      "loss": 2.4047,
      "step": 13429
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0873092412948608,
      "learning_rate": 1.8616036763564953e-05,
      "loss": 2.5813,
      "step": 13430
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0204496383666992,
      "learning_rate": 1.861582776624843e-05,
      "loss": 2.4806,
      "step": 13431
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0128542184829712,
      "learning_rate": 1.8615618754325738e-05,
      "loss": 2.6807,
      "step": 13432
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.990178644657135,
      "learning_rate": 1.8615409727797233e-05,
      "loss": 2.7507,
      "step": 13433
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9264841675758362,
      "learning_rate": 1.861520068666327e-05,
      "loss": 2.4582,
      "step": 13434
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0811572074890137,
      "learning_rate": 1.86149916309242e-05,
      "loss": 2.4248,
      "step": 13435
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.175872564315796,
      "learning_rate": 1.861478256058038e-05,
      "loss": 2.3852,
      "step": 13436
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0917898416519165,
      "learning_rate": 1.8614573475632164e-05,
      "loss": 2.5536,
      "step": 13437
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.942371129989624,
      "learning_rate": 1.8614364376079902e-05,
      "loss": 2.5844,
      "step": 13438
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0762962102890015,
      "learning_rate": 1.861415526192396e-05,
      "loss": 2.6067,
      "step": 13439
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9592325091362,
      "learning_rate": 1.8613946133164683e-05,
      "loss": 2.4886,
      "step": 13440
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.1355278491973877,
      "learning_rate": 1.8613736989802424e-05,
      "loss": 2.4603,
      "step": 13441
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0352338552474976,
      "learning_rate": 1.8613527831837547e-05,
      "loss": 2.6022,
      "step": 13442
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0202398300170898,
      "learning_rate": 1.8613318659270394e-05,
      "loss": 2.372,
      "step": 13443
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0683091878890991,
      "learning_rate": 1.8613109472101332e-05,
      "loss": 2.5146,
      "step": 13444
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9974420666694641,
      "learning_rate": 1.861290027033071e-05,
      "loss": 2.6132,
      "step": 13445
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9832022190093994,
      "learning_rate": 1.8612691053958876e-05,
      "loss": 2.6248,
      "step": 13446
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.022580623626709,
      "learning_rate": 1.86124818229862e-05,
      "loss": 2.4307,
      "step": 13447
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.1119312047958374,
      "learning_rate": 1.8612272577413027e-05,
      "loss": 2.4278,
      "step": 13448
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9894348978996277,
      "learning_rate": 1.8612063317239707e-05,
      "loss": 2.5663,
      "step": 13449
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0902023315429688,
      "learning_rate": 1.8611854042466605e-05,
      "loss": 2.6283,
      "step": 13450
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.1431374549865723,
      "learning_rate": 1.8611644753094073e-05,
      "loss": 2.5151,
      "step": 13451
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0850759744644165,
      "learning_rate": 1.8611435449122464e-05,
      "loss": 2.4274,
      "step": 13452
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9724858999252319,
      "learning_rate": 1.8611226130552128e-05,
      "loss": 2.5555,
      "step": 13453
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.1007235050201416,
      "learning_rate": 1.861101679738343e-05,
      "loss": 2.7093,
      "step": 13454
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.8987116813659668,
      "learning_rate": 1.861080744961672e-05,
      "loss": 2.392,
      "step": 13455
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.1301960945129395,
      "learning_rate": 1.861059808725235e-05,
      "loss": 2.4737,
      "step": 13456
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0515809059143066,
      "learning_rate": 1.861038871029068e-05,
      "loss": 2.1764,
      "step": 13457
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0410807132720947,
      "learning_rate": 1.8610179318732057e-05,
      "loss": 2.4533,
      "step": 13458
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.1371327638626099,
      "learning_rate": 1.8609969912576846e-05,
      "loss": 2.5511,
      "step": 13459
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9956759214401245,
      "learning_rate": 1.8609760491825398e-05,
      "loss": 2.537,
      "step": 13460
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0602144002914429,
      "learning_rate": 1.8609551056478068e-05,
      "loss": 2.465,
      "step": 13461
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0344603061676025,
      "learning_rate": 1.8609341606535208e-05,
      "loss": 2.6624,
      "step": 13462
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0063787698745728,
      "learning_rate": 1.8609132141997178e-05,
      "loss": 2.5191,
      "step": 13463
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0538979768753052,
      "learning_rate": 1.860892266286433e-05,
      "loss": 2.3999,
      "step": 13464
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9685565233230591,
      "learning_rate": 1.8608713169137022e-05,
      "loss": 2.5521,
      "step": 13465
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9951242208480835,
      "learning_rate": 1.86085036608156e-05,
      "loss": 2.6675,
      "step": 13466
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.1511435508728027,
      "learning_rate": 1.8608294137900433e-05,
      "loss": 2.6209,
      "step": 13467
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9691857099533081,
      "learning_rate": 1.860808460039187e-05,
      "loss": 2.4814,
      "step": 13468
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.1221864223480225,
      "learning_rate": 1.860787504829026e-05,
      "loss": 2.7364,
      "step": 13469
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0024830102920532,
      "learning_rate": 1.860766548159597e-05,
      "loss": 2.391,
      "step": 13470
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9272534251213074,
      "learning_rate": 1.8607455900309346e-05,
      "loss": 2.2616,
      "step": 13471
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0605133771896362,
      "learning_rate": 1.8607246304430746e-05,
      "loss": 2.5228,
      "step": 13472
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.3448904752731323,
      "learning_rate": 1.8607036693960526e-05,
      "loss": 2.502,
      "step": 13473
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.104547142982483,
      "learning_rate": 1.8606827068899042e-05,
      "loss": 2.6569,
      "step": 13474
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0979650020599365,
      "learning_rate": 1.860661742924665e-05,
      "loss": 2.3772,
      "step": 13475
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0312949419021606,
      "learning_rate": 1.86064077750037e-05,
      "loss": 2.5188,
      "step": 13476
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0991350412368774,
      "learning_rate": 1.8606198106170558e-05,
      "loss": 2.632,
      "step": 13477
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0265467166900635,
      "learning_rate": 1.8605988422747566e-05,
      "loss": 2.4528,
      "step": 13478
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0489414930343628,
      "learning_rate": 1.860577872473509e-05,
      "loss": 2.6786,
      "step": 13479
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0983102321624756,
      "learning_rate": 1.860556901213348e-05,
      "loss": 2.538,
      "step": 13480
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0191764831542969,
      "learning_rate": 1.8605359284943094e-05,
      "loss": 2.655,
      "step": 13481
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0219823122024536,
      "learning_rate": 1.860514954316429e-05,
      "loss": 2.3514,
      "step": 13482
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0472520589828491,
      "learning_rate": 1.8604939786797414e-05,
      "loss": 2.4686,
      "step": 13483
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9165725111961365,
      "learning_rate": 1.860473001584283e-05,
      "loss": 2.4406,
      "step": 13484
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9992127418518066,
      "learning_rate": 1.8604520230300895e-05,
      "loss": 2.3709,
      "step": 13485
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9648457765579224,
      "learning_rate": 1.860431043017196e-05,
      "loss": 2.3382,
      "step": 13486
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.957887589931488,
      "learning_rate": 1.860410061545638e-05,
      "loss": 2.5647,
      "step": 13487
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0176995992660522,
      "learning_rate": 1.8603890786154513e-05,
      "loss": 2.6903,
      "step": 13488
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9801107048988342,
      "learning_rate": 1.8603680942266713e-05,
      "loss": 2.4735,
      "step": 13489
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0873842239379883,
      "learning_rate": 1.860347108379334e-05,
      "loss": 2.7562,
      "step": 13490
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9883347153663635,
      "learning_rate": 1.8603261210734745e-05,
      "loss": 2.4995,
      "step": 13491
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9956435561180115,
      "learning_rate": 1.860305132309129e-05,
      "loss": 2.6055,
      "step": 13492
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.042381763458252,
      "learning_rate": 1.860284142086332e-05,
      "loss": 2.595,
      "step": 13493
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.1087706089019775,
      "learning_rate": 1.86026315040512e-05,
      "loss": 2.6816,
      "step": 13494
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0573649406433105,
      "learning_rate": 1.8602421572655283e-05,
      "loss": 2.3037,
      "step": 13495
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0455883741378784,
      "learning_rate": 1.8602211626675926e-05,
      "loss": 2.6509,
      "step": 13496
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0216784477233887,
      "learning_rate": 1.8602001666113484e-05,
      "loss": 2.4888,
      "step": 13497
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.24809730052948,
      "learning_rate": 1.860179169096831e-05,
      "loss": 2.3414,
      "step": 13498
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0452929735183716,
      "learning_rate": 1.860158170124077e-05,
      "loss": 2.3038,
      "step": 13499
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9547039866447449,
      "learning_rate": 1.8601371696931206e-05,
      "loss": 2.5428,
      "step": 13500
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0345081090927124,
      "learning_rate": 1.860116167803998e-05,
      "loss": 2.4656,
      "step": 13501
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0638933181762695,
      "learning_rate": 1.8600951644567454e-05,
      "loss": 2.3977,
      "step": 13502
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0076894760131836,
      "learning_rate": 1.8600741596513976e-05,
      "loss": 2.5298,
      "step": 13503
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0899889469146729,
      "learning_rate": 1.8600531533879907e-05,
      "loss": 2.4317,
      "step": 13504
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0460150241851807,
      "learning_rate": 1.86003214566656e-05,
      "loss": 2.5275,
      "step": 13505
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9886406064033508,
      "learning_rate": 1.860011136487141e-05,
      "loss": 2.4239,
      "step": 13506
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0502715110778809,
      "learning_rate": 1.85999012584977e-05,
      "loss": 2.5386,
      "step": 13507
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.236365795135498,
      "learning_rate": 1.859969113754482e-05,
      "loss": 2.3284,
      "step": 13508
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0890947580337524,
      "learning_rate": 1.8599481002013125e-05,
      "loss": 2.3716,
      "step": 13509
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.071331262588501,
      "learning_rate": 1.8599270851902975e-05,
      "loss": 2.5022,
      "step": 13510
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.1189601421356201,
      "learning_rate": 1.8599060687214727e-05,
      "loss": 2.4657,
      "step": 13511
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9661935567855835,
      "learning_rate": 1.8598850507948737e-05,
      "loss": 2.7003,
      "step": 13512
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9996966123580933,
      "learning_rate": 1.8598640314105358e-05,
      "loss": 2.6202,
      "step": 13513
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0301517248153687,
      "learning_rate": 1.8598430105684948e-05,
      "loss": 2.2591,
      "step": 13514
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0052651166915894,
      "learning_rate": 1.8598219882687867e-05,
      "loss": 2.5415,
      "step": 13515
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.074771523475647,
      "learning_rate": 1.8598009645114464e-05,
      "loss": 2.5218,
      "step": 13516
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0527634620666504,
      "learning_rate": 1.85977993929651e-05,
      "loss": 2.5243,
      "step": 13517
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.1133301258087158,
      "learning_rate": 1.8597589126240133e-05,
      "loss": 2.4409,
      "step": 13518
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0167773962020874,
      "learning_rate": 1.8597378844939914e-05,
      "loss": 2.5578,
      "step": 13519
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0410935878753662,
      "learning_rate": 1.8597168549064806e-05,
      "loss": 2.2887,
      "step": 13520
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0015110969543457,
      "learning_rate": 1.8596958238615165e-05,
      "loss": 2.463,
      "step": 13521
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0067565441131592,
      "learning_rate": 1.859674791359134e-05,
      "loss": 2.5751,
      "step": 13522
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0217194557189941,
      "learning_rate": 1.859653757399369e-05,
      "loss": 2.5373,
      "step": 13523
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0072965621948242,
      "learning_rate": 1.859632721982258e-05,
      "loss": 2.8009,
      "step": 13524
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9294111132621765,
      "learning_rate": 1.859611685107836e-05,
      "loss": 2.698,
      "step": 13525
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.2244911193847656,
      "learning_rate": 1.8595906467761384e-05,
      "loss": 2.5251,
      "step": 13526
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0183806419372559,
      "learning_rate": 1.8595696069872013e-05,
      "loss": 2.511,
      "step": 13527
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9837228655815125,
      "learning_rate": 1.8595485657410603e-05,
      "loss": 2.7964,
      "step": 13528
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.1109905242919922,
      "learning_rate": 1.859527523037751e-05,
      "loss": 2.6182,
      "step": 13529
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.004221796989441,
      "learning_rate": 1.8595064788773092e-05,
      "loss": 2.6595,
      "step": 13530
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9328120350837708,
      "learning_rate": 1.8594854332597703e-05,
      "loss": 2.5753,
      "step": 13531
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9671854972839355,
      "learning_rate": 1.8594643861851705e-05,
      "loss": 2.6534,
      "step": 13532
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.2963004112243652,
      "learning_rate": 1.859443337653545e-05,
      "loss": 2.6059,
      "step": 13533
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.1020957231521606,
      "learning_rate": 1.8594222876649294e-05,
      "loss": 2.4537,
      "step": 13534
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.992057740688324,
      "learning_rate": 1.8594012362193597e-05,
      "loss": 2.6074,
      "step": 13535
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.112317442893982,
      "learning_rate": 1.8593801833168713e-05,
      "loss": 2.5156,
      "step": 13536
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9720344543457031,
      "learning_rate": 1.8593591289575004e-05,
      "loss": 2.5207,
      "step": 13537
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0169202089309692,
      "learning_rate": 1.8593380731412827e-05,
      "loss": 2.5278,
      "step": 13538
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0087209939956665,
      "learning_rate": 1.859317015868253e-05,
      "loss": 2.6038,
      "step": 13539
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0409671068191528,
      "learning_rate": 1.8592959571384476e-05,
      "loss": 2.3495,
      "step": 13540
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0075349807739258,
      "learning_rate": 1.8592748969519024e-05,
      "loss": 2.1522,
      "step": 13541
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9806574583053589,
      "learning_rate": 1.859253835308653e-05,
      "loss": 2.3723,
      "step": 13542
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0886143445968628,
      "learning_rate": 1.8592327722087346e-05,
      "loss": 2.5143,
      "step": 13543
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0314035415649414,
      "learning_rate": 1.8592117076521835e-05,
      "loss": 2.5708,
      "step": 13544
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0111539363861084,
      "learning_rate": 1.8591906416390352e-05,
      "loss": 2.2728,
      "step": 13545
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.029495358467102,
      "learning_rate": 1.8591695741693253e-05,
      "loss": 2.2605,
      "step": 13546
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0218441486358643,
      "learning_rate": 1.8591485052430898e-05,
      "loss": 2.6769,
      "step": 13547
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9877014756202698,
      "learning_rate": 1.859127434860364e-05,
      "loss": 2.4504,
      "step": 13548
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0168126821517944,
      "learning_rate": 1.859106363021184e-05,
      "loss": 2.739,
      "step": 13549
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9752545356750488,
      "learning_rate": 1.8590852897255852e-05,
      "loss": 2.2343,
      "step": 13550
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.1601725816726685,
      "learning_rate": 1.859064214973604e-05,
      "loss": 2.7811,
      "step": 13551
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0201096534729004,
      "learning_rate": 1.8590431387652752e-05,
      "loss": 2.4463,
      "step": 13552
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0019755363464355,
      "learning_rate": 1.859022061100635e-05,
      "loss": 2.5251,
      "step": 13553
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0240768194198608,
      "learning_rate": 1.8590009819797194e-05,
      "loss": 2.5689,
      "step": 13554
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9556982517242432,
      "learning_rate": 1.8589799014025636e-05,
      "loss": 2.6186,
      "step": 13555
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.1106948852539062,
      "learning_rate": 1.8589588193692037e-05,
      "loss": 2.5882,
      "step": 13556
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0045886039733887,
      "learning_rate": 1.8589377358796752e-05,
      "loss": 2.5593,
      "step": 13557
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9631462693214417,
      "learning_rate": 1.8589166509340137e-05,
      "loss": 2.6713,
      "step": 13558
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9872270822525024,
      "learning_rate": 1.858895564532256e-05,
      "loss": 2.575,
      "step": 13559
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.050171136856079,
      "learning_rate": 1.8588744766744362e-05,
      "loss": 2.7187,
      "step": 13560
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9204664826393127,
      "learning_rate": 1.8588533873605913e-05,
      "loss": 2.5813,
      "step": 13561
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0374705791473389,
      "learning_rate": 1.8588322965907563e-05,
      "loss": 2.5294,
      "step": 13562
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9585211277008057,
      "learning_rate": 1.8588112043649677e-05,
      "loss": 2.5987,
      "step": 13563
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9339240193367004,
      "learning_rate": 1.8587901106832608e-05,
      "loss": 2.6375,
      "step": 13564
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9400618076324463,
      "learning_rate": 1.8587690155456713e-05,
      "loss": 2.4025,
      "step": 13565
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0920512676239014,
      "learning_rate": 1.8587479189522352e-05,
      "loss": 2.4528,
      "step": 13566
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0562677383422852,
      "learning_rate": 1.858726820902988e-05,
      "loss": 2.5862,
      "step": 13567
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0400404930114746,
      "learning_rate": 1.8587057213979655e-05,
      "loss": 2.5044,
      "step": 13568
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0507729053497314,
      "learning_rate": 1.8586846204372037e-05,
      "loss": 2.5379,
      "step": 13569
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0306459665298462,
      "learning_rate": 1.8586635180207383e-05,
      "loss": 2.6093,
      "step": 13570
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9902704954147339,
      "learning_rate": 1.858642414148605e-05,
      "loss": 2.4368,
      "step": 13571
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.100639820098877,
      "learning_rate": 1.8586213088208396e-05,
      "loss": 2.4903,
      "step": 13572
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0279624462127686,
      "learning_rate": 1.858600202037478e-05,
      "loss": 2.7089,
      "step": 13573
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.986788809299469,
      "learning_rate": 1.8585790937985556e-05,
      "loss": 2.6397,
      "step": 13574
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9843562841415405,
      "learning_rate": 1.8585579841041087e-05,
      "loss": 2.3607,
      "step": 13575
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.1693966388702393,
      "learning_rate": 1.8585368729541728e-05,
      "loss": 2.4956,
      "step": 13576
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0808480978012085,
      "learning_rate": 1.858515760348784e-05,
      "loss": 2.5713,
      "step": 13577
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.1390246152877808,
      "learning_rate": 1.8584946462879773e-05,
      "loss": 2.5252,
      "step": 13578
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9702205061912537,
      "learning_rate": 1.858473530771789e-05,
      "loss": 2.5273,
      "step": 13579
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0361433029174805,
      "learning_rate": 1.8584524138002552e-05,
      "loss": 2.512,
      "step": 13580
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.039062261581421,
      "learning_rate": 1.8584312953734115e-05,
      "loss": 2.4796,
      "step": 13581
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0421451330184937,
      "learning_rate": 1.8584101754912934e-05,
      "loss": 2.5242,
      "step": 13582
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9245554804801941,
      "learning_rate": 1.8583890541539367e-05,
      "loss": 2.4085,
      "step": 13583
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0457721948623657,
      "learning_rate": 1.8583679313613778e-05,
      "loss": 2.3192,
      "step": 13584
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.041253924369812,
      "learning_rate": 1.8583468071136522e-05,
      "loss": 2.7253,
      "step": 13585
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0190532207489014,
      "learning_rate": 1.858325681410795e-05,
      "loss": 2.8149,
      "step": 13586
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.029199242591858,
      "learning_rate": 1.8583045542528433e-05,
      "loss": 2.7197,
      "step": 13587
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0177109241485596,
      "learning_rate": 1.858283425639832e-05,
      "loss": 2.5975,
      "step": 13588
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0130517482757568,
      "learning_rate": 1.858262295571797e-05,
      "loss": 2.6318,
      "step": 13589
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0661702156066895,
      "learning_rate": 1.8582411640487748e-05,
      "loss": 2.4916,
      "step": 13590
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9921836256980896,
      "learning_rate": 1.8582200310708005e-05,
      "loss": 2.5344,
      "step": 13591
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0167784690856934,
      "learning_rate": 1.85819889663791e-05,
      "loss": 2.6682,
      "step": 13592
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0124410390853882,
      "learning_rate": 1.8581777607501394e-05,
      "loss": 2.4695,
      "step": 13593
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0490349531173706,
      "learning_rate": 1.8581566234075246e-05,
      "loss": 2.6809,
      "step": 13594
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0397974252700806,
      "learning_rate": 1.858135484610101e-05,
      "loss": 2.4831,
      "step": 13595
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0224573612213135,
      "learning_rate": 1.8581143443579047e-05,
      "loss": 2.5776,
      "step": 13596
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9909846186637878,
      "learning_rate": 1.8580932026509717e-05,
      "loss": 2.5052,
      "step": 13597
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9882920980453491,
      "learning_rate": 1.8580720594893375e-05,
      "loss": 2.3652,
      "step": 13598
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9828038811683655,
      "learning_rate": 1.8580509148730384e-05,
      "loss": 2.3325,
      "step": 13599
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0268661975860596,
      "learning_rate": 1.8580297688021096e-05,
      "loss": 2.353,
      "step": 13600
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.063079833984375,
      "learning_rate": 1.8580086212765874e-05,
      "loss": 2.3858,
      "step": 13601
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9336090683937073,
      "learning_rate": 1.8579874722965077e-05,
      "loss": 2.5956,
      "step": 13602
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.016966700553894,
      "learning_rate": 1.857966321861906e-05,
      "loss": 2.6645,
      "step": 13603
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.028544545173645,
      "learning_rate": 1.8579451699728185e-05,
      "loss": 2.6767,
      "step": 13604
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0333306789398193,
      "learning_rate": 1.8579240166292808e-05,
      "loss": 2.3469,
      "step": 13605
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.1387332677841187,
      "learning_rate": 1.8579028618313292e-05,
      "loss": 2.4454,
      "step": 13606
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0840494632720947,
      "learning_rate": 1.857881705578999e-05,
      "loss": 2.86,
      "step": 13607
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.1136218309402466,
      "learning_rate": 1.857860547872326e-05,
      "loss": 2.5293,
      "step": 13608
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.018080472946167,
      "learning_rate": 1.8578393887113468e-05,
      "loss": 2.3279,
      "step": 13609
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.1466455459594727,
      "learning_rate": 1.857818228096097e-05,
      "loss": 2.5156,
      "step": 13610
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.023206114768982,
      "learning_rate": 1.857797066026612e-05,
      "loss": 2.6545,
      "step": 13611
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9892317652702332,
      "learning_rate": 1.857775902502928e-05,
      "loss": 2.5321,
      "step": 13612
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0341308116912842,
      "learning_rate": 1.857754737525081e-05,
      "loss": 2.596,
      "step": 13613
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0795129537582397,
      "learning_rate": 1.8577335710931068e-05,
      "loss": 2.6279,
      "step": 13614
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0978021621704102,
      "learning_rate": 1.857712403207041e-05,
      "loss": 2.6377,
      "step": 13615
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.4498867988586426,
      "learning_rate": 1.8576912338669198e-05,
      "loss": 2.4577,
      "step": 13616
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9086750149726868,
      "learning_rate": 1.8576700630727795e-05,
      "loss": 2.5091,
      "step": 13617
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9038055539131165,
      "learning_rate": 1.857648890824655e-05,
      "loss": 2.4242,
      "step": 13618
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.948725700378418,
      "learning_rate": 1.8576277171225827e-05,
      "loss": 2.7113,
      "step": 13619
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0154995918273926,
      "learning_rate": 1.8576065419665985e-05,
      "loss": 2.1458,
      "step": 13620
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0682824850082397,
      "learning_rate": 1.857585365356738e-05,
      "loss": 2.4919,
      "step": 13621
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9067275524139404,
      "learning_rate": 1.857564187293038e-05,
      "loss": 2.4889,
      "step": 13622
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.243470549583435,
      "learning_rate": 1.8575430077755338e-05,
      "loss": 2.6759,
      "step": 13623
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0404706001281738,
      "learning_rate": 1.857521826804261e-05,
      "loss": 2.3317,
      "step": 13624
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.94718337059021,
      "learning_rate": 1.857500644379256e-05,
      "loss": 2.5675,
      "step": 13625
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0967485904693604,
      "learning_rate": 1.8574794605005538e-05,
      "loss": 2.5216,
      "step": 13626
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0979712009429932,
      "learning_rate": 1.8574582751681917e-05,
      "loss": 2.4958,
      "step": 13627
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.022654414176941,
      "learning_rate": 1.857437088382205e-05,
      "loss": 2.6496,
      "step": 13628
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9837915897369385,
      "learning_rate": 1.8574159001426295e-05,
      "loss": 2.5991,
      "step": 13629
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9397380352020264,
      "learning_rate": 1.857394710449501e-05,
      "loss": 2.5631,
      "step": 13630
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.068333625793457,
      "learning_rate": 1.8573735193028553e-05,
      "loss": 2.488,
      "step": 13631
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9429948329925537,
      "learning_rate": 1.8573523267027293e-05,
      "loss": 2.5304,
      "step": 13632
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0042494535446167,
      "learning_rate": 1.8573311326491578e-05,
      "loss": 2.6488,
      "step": 13633
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.1225155591964722,
      "learning_rate": 1.8573099371421773e-05,
      "loss": 2.7525,
      "step": 13634
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.1274417638778687,
      "learning_rate": 1.8572887401818238e-05,
      "loss": 2.6262,
      "step": 13635
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0235035419464111,
      "learning_rate": 1.857267541768133e-05,
      "loss": 2.6944,
      "step": 13636
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9668083786964417,
      "learning_rate": 1.8572463419011404e-05,
      "loss": 2.4574,
      "step": 13637
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.062913417816162,
      "learning_rate": 1.857225140580883e-05,
      "loss": 2.6922,
      "step": 13638
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0569610595703125,
      "learning_rate": 1.857203937807396e-05,
      "loss": 2.76,
      "step": 13639
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.1336702108383179,
      "learning_rate": 1.8571827335807155e-05,
      "loss": 2.4568,
      "step": 13640
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.108988881111145,
      "learning_rate": 1.8571615279008775e-05,
      "loss": 2.5085,
      "step": 13641
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0273370742797852,
      "learning_rate": 1.8571403207679178e-05,
      "loss": 2.1698,
      "step": 13642
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0456969738006592,
      "learning_rate": 1.8571191121818725e-05,
      "loss": 2.579,
      "step": 13643
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9971634149551392,
      "learning_rate": 1.8570979021427777e-05,
      "loss": 2.6104,
      "step": 13644
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0230352878570557,
      "learning_rate": 1.857076690650669e-05,
      "loss": 2.7937,
      "step": 13645
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9604034423828125,
      "learning_rate": 1.8570554777055824e-05,
      "loss": 2.3815,
      "step": 13646
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0729930400848389,
      "learning_rate": 1.857034263307554e-05,
      "loss": 2.4921,
      "step": 13647
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.211561679840088,
      "learning_rate": 1.85701304745662e-05,
      "loss": 2.5776,
      "step": 13648
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0859925746917725,
      "learning_rate": 1.8569918301528156e-05,
      "loss": 2.6991,
      "step": 13649
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9502233862876892,
      "learning_rate": 1.856970611396178e-05,
      "loss": 2.2908,
      "step": 13650
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0796771049499512,
      "learning_rate": 1.856949391186742e-05,
      "loss": 2.7266,
      "step": 13651
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.1114485263824463,
      "learning_rate": 1.8569281695245443e-05,
      "loss": 2.7487,
      "step": 13652
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0180840492248535,
      "learning_rate": 1.8569069464096206e-05,
      "loss": 2.6848,
      "step": 13653
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.035373568534851,
      "learning_rate": 1.856885721842007e-05,
      "loss": 2.5174,
      "step": 13654
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.091227412223816,
      "learning_rate": 1.8568644958217386e-05,
      "loss": 2.6952,
      "step": 13655
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0248384475708008,
      "learning_rate": 1.856843268348853e-05,
      "loss": 2.533,
      "step": 13656
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.01960027217865,
      "learning_rate": 1.856822039423385e-05,
      "loss": 2.4491,
      "step": 13657
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0036900043487549,
      "learning_rate": 1.856800809045371e-05,
      "loss": 2.7096,
      "step": 13658
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0379307270050049,
      "learning_rate": 1.856779577214847e-05,
      "loss": 2.7168,
      "step": 13659
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.196238398551941,
      "learning_rate": 1.8567583439318488e-05,
      "loss": 2.5176,
      "step": 13660
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.976466178894043,
      "learning_rate": 1.8567371091964127e-05,
      "loss": 2.4449,
      "step": 13661
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0238276720046997,
      "learning_rate": 1.8567158730085743e-05,
      "loss": 2.5771,
      "step": 13662
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0248914957046509,
      "learning_rate": 1.85669463536837e-05,
      "loss": 2.5065,
      "step": 13663
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.988810122013092,
      "learning_rate": 1.8566733962758353e-05,
      "loss": 2.4251,
      "step": 13664
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.2973036766052246,
      "learning_rate": 1.8566521557310068e-05,
      "loss": 2.5705,
      "step": 13665
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.139243245124817,
      "learning_rate": 1.8566309137339203e-05,
      "loss": 2.2316,
      "step": 13666
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9762417674064636,
      "learning_rate": 1.8566096702846117e-05,
      "loss": 2.669,
      "step": 13667
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0578564405441284,
      "learning_rate": 1.8565884253831167e-05,
      "loss": 2.4736,
      "step": 13668
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.013490080833435,
      "learning_rate": 1.8565671790294718e-05,
      "loss": 2.6249,
      "step": 13669
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9363232254981995,
      "learning_rate": 1.8565459312237132e-05,
      "loss": 2.5023,
      "step": 13670
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.3770813941955566,
      "learning_rate": 1.8565246819658766e-05,
      "loss": 2.5388,
      "step": 13671
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0246474742889404,
      "learning_rate": 1.8565034312559978e-05,
      "loss": 2.3792,
      "step": 13672
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.022182822227478,
      "learning_rate": 1.856482179094113e-05,
      "loss": 2.3749,
      "step": 13673
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0249191522598267,
      "learning_rate": 1.8564609254802587e-05,
      "loss": 2.3437,
      "step": 13674
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.014477014541626,
      "learning_rate": 1.8564396704144703e-05,
      "loss": 2.7394,
      "step": 13675
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9734084606170654,
      "learning_rate": 1.8564184138967838e-05,
      "loss": 2.5213,
      "step": 13676
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.1359466314315796,
      "learning_rate": 1.8563971559272356e-05,
      "loss": 2.3491,
      "step": 13677
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0186856985092163,
      "learning_rate": 1.8563758965058618e-05,
      "loss": 2.7489,
      "step": 13678
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9959302544593811,
      "learning_rate": 1.8563546356326983e-05,
      "loss": 2.4016,
      "step": 13679
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9377492666244507,
      "learning_rate": 1.856333373307781e-05,
      "loss": 2.6365,
      "step": 13680
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0957070589065552,
      "learning_rate": 1.856312109531146e-05,
      "loss": 2.5558,
      "step": 13681
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9700526595115662,
      "learning_rate": 1.8562908443028294e-05,
      "loss": 2.5908,
      "step": 13682
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.1613595485687256,
      "learning_rate": 1.856269577622867e-05,
      "loss": 2.5802,
      "step": 13683
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0417972803115845,
      "learning_rate": 1.8562483094912956e-05,
      "loss": 2.3531,
      "step": 13684
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0196239948272705,
      "learning_rate": 1.8562270399081506e-05,
      "loss": 2.4078,
      "step": 13685
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9776376485824585,
      "learning_rate": 1.8562057688734682e-05,
      "loss": 2.5704,
      "step": 13686
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0084155797958374,
      "learning_rate": 1.8561844963872848e-05,
      "loss": 2.6623,
      "step": 13687
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.065015196800232,
      "learning_rate": 1.8561632224496355e-05,
      "loss": 2.3161,
      "step": 13688
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9604210257530212,
      "learning_rate": 1.8561419470605573e-05,
      "loss": 2.4468,
      "step": 13689
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0175347328186035,
      "learning_rate": 1.856120670220086e-05,
      "loss": 2.7423,
      "step": 13690
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9693474173545837,
      "learning_rate": 1.8560993919282576e-05,
      "loss": 2.7821,
      "step": 13691
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.000399112701416,
      "learning_rate": 1.8560781121851085e-05,
      "loss": 2.4603,
      "step": 13692
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.2891472578048706,
      "learning_rate": 1.8560568309906742e-05,
      "loss": 2.283,
      "step": 13693
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0120347738265991,
      "learning_rate": 1.856035548344991e-05,
      "loss": 2.7135,
      "step": 13694
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0601418018341064,
      "learning_rate": 1.8560142642480952e-05,
      "loss": 2.5059,
      "step": 13695
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0097885131835938,
      "learning_rate": 1.8559929787000227e-05,
      "loss": 2.2935,
      "step": 13696
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.990449070930481,
      "learning_rate": 1.8559716917008096e-05,
      "loss": 2.4009,
      "step": 13697
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0527271032333374,
      "learning_rate": 1.8559504032504918e-05,
      "loss": 2.5741,
      "step": 13698
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9421226382255554,
      "learning_rate": 1.855929113349106e-05,
      "loss": 2.3433,
      "step": 13699
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0747311115264893,
      "learning_rate": 1.8559078219966873e-05,
      "loss": 2.4587,
      "step": 13700
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0049402713775635,
      "learning_rate": 1.855886529193273e-05,
      "loss": 2.3836,
      "step": 13701
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0299510955810547,
      "learning_rate": 1.8558652349388983e-05,
      "loss": 2.3089,
      "step": 13702
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0093724727630615,
      "learning_rate": 1.8558439392336e-05,
      "loss": 2.4433,
      "step": 13703
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.017310619354248,
      "learning_rate": 1.8558226420774132e-05,
      "loss": 2.5674,
      "step": 13704
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0900424718856812,
      "learning_rate": 1.855801343470375e-05,
      "loss": 2.5152,
      "step": 13705
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9827632904052734,
      "learning_rate": 1.8557800434125206e-05,
      "loss": 2.4411,
      "step": 13706
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.1421928405761719,
      "learning_rate": 1.8557587419038872e-05,
      "loss": 2.5283,
      "step": 13707
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0577044486999512,
      "learning_rate": 1.8557374389445097e-05,
      "loss": 2.4844,
      "step": 13708
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.153832197189331,
      "learning_rate": 1.8557161345344253e-05,
      "loss": 2.4155,
      "step": 13709
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9921198487281799,
      "learning_rate": 1.8556948286736695e-05,
      "loss": 2.6014,
      "step": 13710
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.2734930515289307,
      "learning_rate": 1.8556735213622784e-05,
      "loss": 2.5062,
      "step": 13711
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.02377450466156,
      "learning_rate": 1.8556522126002884e-05,
      "loss": 2.3253,
      "step": 13712
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0508278608322144,
      "learning_rate": 1.8556309023877356e-05,
      "loss": 2.4917,
      "step": 13713
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.029972791671753,
      "learning_rate": 1.8556095907246557e-05,
      "loss": 2.6173,
      "step": 13714
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9762114882469177,
      "learning_rate": 1.8555882776110855e-05,
      "loss": 2.3197,
      "step": 13715
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.047816276550293,
      "learning_rate": 1.855566963047061e-05,
      "loss": 2.7368,
      "step": 13716
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0476080179214478,
      "learning_rate": 1.8555456470326176e-05,
      "loss": 2.5961,
      "step": 13717
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0320361852645874,
      "learning_rate": 1.8555243295677924e-05,
      "loss": 2.4507,
      "step": 13718
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.130841851234436,
      "learning_rate": 1.8555030106526207e-05,
      "loss": 2.5555,
      "step": 13719
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.1625958681106567,
      "learning_rate": 1.8554816902871393e-05,
      "loss": 2.5568,
      "step": 13720
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0545623302459717,
      "learning_rate": 1.855460368471384e-05,
      "loss": 2.4967,
      "step": 13721
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0731275081634521,
      "learning_rate": 1.855439045205391e-05,
      "loss": 2.3504,
      "step": 13722
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.957222580909729,
      "learning_rate": 1.8554177204891967e-05,
      "loss": 2.3845,
      "step": 13723
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.1394282579421997,
      "learning_rate": 1.855396394322837e-05,
      "loss": 2.4581,
      "step": 13724
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.985804557800293,
      "learning_rate": 1.855375066706348e-05,
      "loss": 2.3934,
      "step": 13725
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9676510691642761,
      "learning_rate": 1.855353737639766e-05,
      "loss": 2.518,
      "step": 13726
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.113774061203003,
      "learning_rate": 1.855332407123127e-05,
      "loss": 2.3848,
      "step": 13727
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.120540976524353,
      "learning_rate": 1.855311075156467e-05,
      "loss": 2.6135,
      "step": 13728
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.1420263051986694,
      "learning_rate": 1.855289741739823e-05,
      "loss": 2.3823,
      "step": 13729
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0530365705490112,
      "learning_rate": 1.85526840687323e-05,
      "loss": 2.4923,
      "step": 13730
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.012914776802063,
      "learning_rate": 1.8552470705567254e-05,
      "loss": 2.3245,
      "step": 13731
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9939793944358826,
      "learning_rate": 1.8552257327903443e-05,
      "loss": 2.511,
      "step": 13732
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0673493146896362,
      "learning_rate": 1.8552043935741233e-05,
      "loss": 2.4287,
      "step": 13733
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9436793923377991,
      "learning_rate": 1.8551830529080987e-05,
      "loss": 2.679,
      "step": 13734
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9705690741539001,
      "learning_rate": 1.8551617107923067e-05,
      "loss": 2.4314,
      "step": 13735
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9664095044136047,
      "learning_rate": 1.8551403672267832e-05,
      "loss": 2.6428,
      "step": 13736
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.048693299293518,
      "learning_rate": 1.8551190222115646e-05,
      "loss": 2.5131,
      "step": 13737
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.1734650135040283,
      "learning_rate": 1.855097675746687e-05,
      "loss": 2.4154,
      "step": 13738
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9919093251228333,
      "learning_rate": 1.8550763278321864e-05,
      "loss": 2.2922,
      "step": 13739
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.1430630683898926,
      "learning_rate": 1.8550549784680994e-05,
      "loss": 2.4379,
      "step": 13740
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.2042346000671387,
      "learning_rate": 1.8550336276544622e-05,
      "loss": 2.437,
      "step": 13741
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9427896738052368,
      "learning_rate": 1.8550122753913104e-05,
      "loss": 2.3678,
      "step": 13742
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0769482851028442,
      "learning_rate": 1.8549909216786805e-05,
      "loss": 2.4858,
      "step": 13743
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9833381175994873,
      "learning_rate": 1.854969566516609e-05,
      "loss": 2.4389,
      "step": 13744
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.062645435333252,
      "learning_rate": 1.8549482099051318e-05,
      "loss": 2.6799,
      "step": 13745
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0913305282592773,
      "learning_rate": 1.854926851844285e-05,
      "loss": 2.5669,
      "step": 13746
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0809214115142822,
      "learning_rate": 1.8549054923341054e-05,
      "loss": 2.6361,
      "step": 13747
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9665805101394653,
      "learning_rate": 1.8548841313746286e-05,
      "loss": 2.5415,
      "step": 13748
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9955780506134033,
      "learning_rate": 1.854862768965891e-05,
      "loss": 2.4073,
      "step": 13749
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9092087149620056,
      "learning_rate": 1.8548414051079287e-05,
      "loss": 2.5754,
      "step": 13750
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.2367430925369263,
      "learning_rate": 1.854820039800778e-05,
      "loss": 2.5774,
      "step": 13751
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.121954083442688,
      "learning_rate": 1.8547986730444755e-05,
      "loss": 2.3634,
      "step": 13752
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9716013073921204,
      "learning_rate": 1.854777304839057e-05,
      "loss": 2.3997,
      "step": 13753
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.006391167640686,
      "learning_rate": 1.8547559351845587e-05,
      "loss": 2.3742,
      "step": 13754
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9662158489227295,
      "learning_rate": 1.8547345640810173e-05,
      "loss": 2.4636,
      "step": 13755
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0497184991836548,
      "learning_rate": 1.8547131915284682e-05,
      "loss": 2.4405,
      "step": 13756
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0116333961486816,
      "learning_rate": 1.8546918175269483e-05,
      "loss": 2.4792,
      "step": 13757
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0508739948272705,
      "learning_rate": 1.8546704420764935e-05,
      "loss": 2.5015,
      "step": 13758
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0626779794692993,
      "learning_rate": 1.8546490651771404e-05,
      "loss": 2.2795,
      "step": 13759
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.440807819366455,
      "learning_rate": 1.854627686828925e-05,
      "loss": 2.5148,
      "step": 13760
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9905495047569275,
      "learning_rate": 1.8546063070318833e-05,
      "loss": 2.4453,
      "step": 13761
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9855846166610718,
      "learning_rate": 1.854584925786052e-05,
      "loss": 2.5127,
      "step": 13762
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.2497421503067017,
      "learning_rate": 1.8545635430914673e-05,
      "loss": 2.5072,
      "step": 13763
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.2570981979370117,
      "learning_rate": 1.854542158948165e-05,
      "loss": 2.455,
      "step": 13764
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0787248611450195,
      "learning_rate": 1.8545207733561817e-05,
      "loss": 2.5189,
      "step": 13765
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.091081142425537,
      "learning_rate": 1.8544993863155535e-05,
      "loss": 2.3565,
      "step": 13766
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.129654049873352,
      "learning_rate": 1.854477997826317e-05,
      "loss": 2.5329,
      "step": 13767
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9552907943725586,
      "learning_rate": 1.854456607888508e-05,
      "loss": 2.3851,
      "step": 13768
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.107631802558899,
      "learning_rate": 1.8544352165021632e-05,
      "loss": 2.4582,
      "step": 13769
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.068866491317749,
      "learning_rate": 1.8544138236673187e-05,
      "loss": 2.731,
      "step": 13770
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0217195749282837,
      "learning_rate": 1.8543924293840106e-05,
      "loss": 2.5827,
      "step": 13771
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0338984727859497,
      "learning_rate": 1.8543710336522752e-05,
      "loss": 2.7531,
      "step": 13772
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0161460638046265,
      "learning_rate": 1.854349636472149e-05,
      "loss": 2.6895,
      "step": 13773
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9841512441635132,
      "learning_rate": 1.854328237843668e-05,
      "loss": 2.5194,
      "step": 13774
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0556083917617798,
      "learning_rate": 1.8543068377668685e-05,
      "loss": 2.3451,
      "step": 13775
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0406686067581177,
      "learning_rate": 1.8542854362417872e-05,
      "loss": 2.3145,
      "step": 13776
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0495612621307373,
      "learning_rate": 1.8542640332684598e-05,
      "loss": 2.4431,
      "step": 13777
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0277858972549438,
      "learning_rate": 1.854242628846923e-05,
      "loss": 2.5607,
      "step": 13778
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.028851866722107,
      "learning_rate": 1.8542212229772127e-05,
      "loss": 2.5078,
      "step": 13779
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9490759372711182,
      "learning_rate": 1.854199815659366e-05,
      "loss": 2.6716,
      "step": 13780
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0324928760528564,
      "learning_rate": 1.854178406893418e-05,
      "loss": 2.3965,
      "step": 13781
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.131358027458191,
      "learning_rate": 1.854156996679406e-05,
      "loss": 2.5897,
      "step": 13782
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0139049291610718,
      "learning_rate": 1.8541355850173656e-05,
      "loss": 2.6275,
      "step": 13783
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.204034447669983,
      "learning_rate": 1.8541141719073333e-05,
      "loss": 2.5085,
      "step": 13784
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.1097424030303955,
      "learning_rate": 1.854092757349346e-05,
      "loss": 2.5547,
      "step": 13785
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0915379524230957,
      "learning_rate": 1.8540713413434392e-05,
      "loss": 2.4918,
      "step": 13786
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.042398452758789,
      "learning_rate": 1.8540499238896495e-05,
      "loss": 2.543,
      "step": 13787
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.2157005071640015,
      "learning_rate": 1.8540285049880132e-05,
      "loss": 2.7393,
      "step": 13788
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.1199842691421509,
      "learning_rate": 1.854007084638567e-05,
      "loss": 2.4856,
      "step": 13789
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.968674898147583,
      "learning_rate": 1.8539856628413464e-05,
      "loss": 2.4144,
      "step": 13790
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9519121646881104,
      "learning_rate": 1.8539642395963884e-05,
      "loss": 2.4728,
      "step": 13791
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.1310601234436035,
      "learning_rate": 1.853942814903729e-05,
      "loss": 2.6304,
      "step": 13792
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0082002878189087,
      "learning_rate": 1.8539213887634044e-05,
      "loss": 2.5693,
      "step": 13793
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0379189252853394,
      "learning_rate": 1.8538999611754517e-05,
      "loss": 2.3168,
      "step": 13794
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.990398108959198,
      "learning_rate": 1.853878532139906e-05,
      "loss": 2.5918,
      "step": 13795
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.242066502571106,
      "learning_rate": 1.8538571016568045e-05,
      "loss": 2.4584,
      "step": 13796
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0788792371749878,
      "learning_rate": 1.8538356697261837e-05,
      "loss": 2.3903,
      "step": 13797
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.189056158065796,
      "learning_rate": 1.853814236348079e-05,
      "loss": 2.7073,
      "step": 13798
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9318040609359741,
      "learning_rate": 1.8537928015225276e-05,
      "loss": 2.4568,
      "step": 13799
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.939230740070343,
      "learning_rate": 1.8537713652495655e-05,
      "loss": 2.7137,
      "step": 13800
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9861344695091248,
      "learning_rate": 1.8537499275292288e-05,
      "loss": 2.4795,
      "step": 13801
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0487004518508911,
      "learning_rate": 1.8537284883615544e-05,
      "loss": 2.5305,
      "step": 13802
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.1241140365600586,
      "learning_rate": 1.8537070477465778e-05,
      "loss": 2.7747,
      "step": 13803
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9757043719291687,
      "learning_rate": 1.8536856056843366e-05,
      "loss": 2.3925,
      "step": 13804
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9545379877090454,
      "learning_rate": 1.853664162174866e-05,
      "loss": 2.5865,
      "step": 13805
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0999897718429565,
      "learning_rate": 1.853642717218203e-05,
      "loss": 2.273,
      "step": 13806
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9870830774307251,
      "learning_rate": 1.8536212708143837e-05,
      "loss": 2.6913,
      "step": 13807
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.3724113702774048,
      "learning_rate": 1.8535998229634445e-05,
      "loss": 2.5239,
      "step": 13808
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0777809619903564,
      "learning_rate": 1.8535783736654217e-05,
      "loss": 2.6788,
      "step": 13809
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0873205661773682,
      "learning_rate": 1.8535569229203515e-05,
      "loss": 2.3604,
      "step": 13810
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.1952613592147827,
      "learning_rate": 1.8535354707282712e-05,
      "loss": 2.5449,
      "step": 13811
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.2907084226608276,
      "learning_rate": 1.8535140170892157e-05,
      "loss": 2.4886,
      "step": 13812
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.980699360370636,
      "learning_rate": 1.8534925620032223e-05,
      "loss": 2.6783,
      "step": 13813
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9446865320205688,
      "learning_rate": 1.8534711054703276e-05,
      "loss": 2.2911,
      "step": 13814
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0042754411697388,
      "learning_rate": 1.8534496474905674e-05,
      "loss": 2.3429,
      "step": 13815
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.077427864074707,
      "learning_rate": 1.853428188063978e-05,
      "loss": 2.6473,
      "step": 13816
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0665416717529297,
      "learning_rate": 1.8534067271905964e-05,
      "loss": 2.7148,
      "step": 13817
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0471711158752441,
      "learning_rate": 1.8533852648704585e-05,
      "loss": 2.6256,
      "step": 13818
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.109422206878662,
      "learning_rate": 1.8533638011036005e-05,
      "loss": 2.5837,
      "step": 13819
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.069140911102295,
      "learning_rate": 1.8533423358900596e-05,
      "loss": 2.4017,
      "step": 13820
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.006946086883545,
      "learning_rate": 1.8533208692298713e-05,
      "loss": 2.4784,
      "step": 13821
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0198025703430176,
      "learning_rate": 1.8532994011230726e-05,
      "loss": 2.6146,
      "step": 13822
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.055783748626709,
      "learning_rate": 1.8532779315696995e-05,
      "loss": 2.4713,
      "step": 13823
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.118528962135315,
      "learning_rate": 1.8532564605697886e-05,
      "loss": 2.5021,
      "step": 13824
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.369969367980957,
      "learning_rate": 1.8532349881233762e-05,
      "loss": 2.756,
      "step": 13825
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.1044329404830933,
      "learning_rate": 1.8532135142304985e-05,
      "loss": 2.5213,
      "step": 13826
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0471919775009155,
      "learning_rate": 1.8531920388911924e-05,
      "loss": 2.5314,
      "step": 13827
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0922636985778809,
      "learning_rate": 1.8531705621054943e-05,
      "loss": 2.4641,
      "step": 13828
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0577070713043213,
      "learning_rate": 1.8531490838734403e-05,
      "loss": 2.7602,
      "step": 13829
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0019694566726685,
      "learning_rate": 1.8531276041950666e-05,
      "loss": 2.4795,
      "step": 13830
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.1364164352416992,
      "learning_rate": 1.8531061230704103e-05,
      "loss": 2.5524,
      "step": 13831
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9143181443214417,
      "learning_rate": 1.8530846404995073e-05,
      "loss": 2.5908,
      "step": 13832
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0886832475662231,
      "learning_rate": 1.853063156482394e-05,
      "loss": 2.5645,
      "step": 13833
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9737259745597839,
      "learning_rate": 1.853041671019107e-05,
      "loss": 2.5431,
      "step": 13834
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0114816427230835,
      "learning_rate": 1.8530201841096825e-05,
      "loss": 2.4759,
      "step": 13835
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.2084964513778687,
      "learning_rate": 1.8529986957541574e-05,
      "loss": 2.584,
      "step": 13836
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9568939208984375,
      "learning_rate": 1.8529772059525678e-05,
      "loss": 2.4322,
      "step": 13837
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0107742547988892,
      "learning_rate": 1.8529557147049502e-05,
      "loss": 2.1886,
      "step": 13838
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9517744779586792,
      "learning_rate": 1.8529342220113408e-05,
      "loss": 2.3592,
      "step": 13839
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0052404403686523,
      "learning_rate": 1.8529127278717766e-05,
      "loss": 2.4572,
      "step": 13840
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0317134857177734,
      "learning_rate": 1.8528912322862936e-05,
      "loss": 2.7667,
      "step": 13841
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.1050649881362915,
      "learning_rate": 1.8528697352549278e-05,
      "loss": 2.4592,
      "step": 13842
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9898478388786316,
      "learning_rate": 1.8528482367777167e-05,
      "loss": 2.6252,
      "step": 13843
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.009041666984558,
      "learning_rate": 1.8528267368546963e-05,
      "loss": 2.4782,
      "step": 13844
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9907132983207703,
      "learning_rate": 1.8528052354859024e-05,
      "loss": 2.7351,
      "step": 13845
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.98476243019104,
      "learning_rate": 1.8527837326713723e-05,
      "loss": 2.6199,
      "step": 13846
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9474256634712219,
      "learning_rate": 1.8527622284111424e-05,
      "loss": 2.3082,
      "step": 13847
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0798821449279785,
      "learning_rate": 1.8527407227052485e-05,
      "loss": 2.4336,
      "step": 13848
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.1093193292617798,
      "learning_rate": 1.8527192155537278e-05,
      "loss": 2.537,
      "step": 13849
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.5817654132843018,
      "learning_rate": 1.852697706956616e-05,
      "loss": 2.7033,
      "step": 13850
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0437334775924683,
      "learning_rate": 1.8526761969139503e-05,
      "loss": 2.4195,
      "step": 13851
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0524827241897583,
      "learning_rate": 1.8526546854257673e-05,
      "loss": 2.395,
      "step": 13852
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0545108318328857,
      "learning_rate": 1.8526331724921026e-05,
      "loss": 2.4571,
      "step": 13853
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0577198266983032,
      "learning_rate": 1.852611658112993e-05,
      "loss": 2.475,
      "step": 13854
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9447769522666931,
      "learning_rate": 1.852590142288475e-05,
      "loss": 2.4885,
      "step": 13855
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.1827712059020996,
      "learning_rate": 1.852568625018585e-05,
      "loss": 2.4732,
      "step": 13856
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.1124937534332275,
      "learning_rate": 1.8525471063033605e-05,
      "loss": 2.4828,
      "step": 13857
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0328752994537354,
      "learning_rate": 1.852525586142836e-05,
      "loss": 2.4362,
      "step": 13858
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9166346788406372,
      "learning_rate": 1.85250406453705e-05,
      "loss": 2.5902,
      "step": 13859
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9768039584159851,
      "learning_rate": 1.8524825414860377e-05,
      "loss": 2.5989,
      "step": 13860
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.004517912864685,
      "learning_rate": 1.852461016989836e-05,
      "loss": 2.3945,
      "step": 13861
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0446604490280151,
      "learning_rate": 1.852439491048481e-05,
      "loss": 2.4539,
      "step": 13862
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9426584839820862,
      "learning_rate": 1.8524179636620097e-05,
      "loss": 2.5145,
      "step": 13863
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.5129836797714233,
      "learning_rate": 1.8523964348304592e-05,
      "loss": 2.4748,
      "step": 13864
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0957565307617188,
      "learning_rate": 1.8523749045538644e-05,
      "loss": 2.5747,
      "step": 13865
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.969735860824585,
      "learning_rate": 1.8523533728322628e-05,
      "loss": 2.3493,
      "step": 13866
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.219374418258667,
      "learning_rate": 1.852331839665691e-05,
      "loss": 2.5015,
      "step": 13867
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9990339279174805,
      "learning_rate": 1.852310305054185e-05,
      "loss": 2.7214,
      "step": 13868
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9446194767951965,
      "learning_rate": 1.8522887689977816e-05,
      "loss": 2.4611,
      "step": 13869
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9064151644706726,
      "learning_rate": 1.852267231496517e-05,
      "loss": 2.2674,
      "step": 13870
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.209175705909729,
      "learning_rate": 1.8522456925504286e-05,
      "loss": 2.2326,
      "step": 13871
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0280482769012451,
      "learning_rate": 1.8522241521595515e-05,
      "loss": 2.5763,
      "step": 13872
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.4306354522705078,
      "learning_rate": 1.8522026103239236e-05,
      "loss": 2.4342,
      "step": 13873
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9602564573287964,
      "learning_rate": 1.8521810670435804e-05,
      "loss": 2.5501,
      "step": 13874
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0382978916168213,
      "learning_rate": 1.852159522318559e-05,
      "loss": 2.5004,
      "step": 13875
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9784868359565735,
      "learning_rate": 1.852137976148896e-05,
      "loss": 2.3991,
      "step": 13876
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9781507849693298,
      "learning_rate": 1.8521164285346277e-05,
      "loss": 2.4981,
      "step": 13877
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.1152453422546387,
      "learning_rate": 1.85209487947579e-05,
      "loss": 2.6179,
      "step": 13878
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.4128848314285278,
      "learning_rate": 1.852073328972421e-05,
      "loss": 2.6849,
      "step": 13879
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0191534757614136,
      "learning_rate": 1.8520517770245555e-05,
      "loss": 2.4288,
      "step": 13880
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9880427718162537,
      "learning_rate": 1.8520302236322313e-05,
      "loss": 2.8505,
      "step": 13881
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9044248461723328,
      "learning_rate": 1.852008668795484e-05,
      "loss": 2.6064,
      "step": 13882
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.1131175756454468,
      "learning_rate": 1.851987112514351e-05,
      "loss": 2.2572,
      "step": 13883
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9873411655426025,
      "learning_rate": 1.8519655547888688e-05,
      "loss": 2.4812,
      "step": 13884
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0794159173965454,
      "learning_rate": 1.8519439956190728e-05,
      "loss": 2.3832,
      "step": 13885
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0929397344589233,
      "learning_rate": 1.8519224350050006e-05,
      "loss": 2.5738,
      "step": 13886
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.000065803527832,
      "learning_rate": 1.8519008729466884e-05,
      "loss": 2.5162,
      "step": 13887
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0658007860183716,
      "learning_rate": 1.8518793094441735e-05,
      "loss": 2.4007,
      "step": 13888
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9914517402648926,
      "learning_rate": 1.851857744497491e-05,
      "loss": 2.3843,
      "step": 13889
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.1625293493270874,
      "learning_rate": 1.8518361781066786e-05,
      "loss": 2.4857,
      "step": 13890
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0112817287445068,
      "learning_rate": 1.8518146102717724e-05,
      "loss": 2.5604,
      "step": 13891
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.1010968685150146,
      "learning_rate": 1.851793040992809e-05,
      "loss": 2.4323,
      "step": 13892
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9948228597640991,
      "learning_rate": 1.8517714702698254e-05,
      "loss": 2.4743,
      "step": 13893
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0361465215682983,
      "learning_rate": 1.851749898102858e-05,
      "loss": 2.6138,
      "step": 13894
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.991205632686615,
      "learning_rate": 1.8517283244919428e-05,
      "loss": 2.4412,
      "step": 13895
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0754314661026,
      "learning_rate": 1.8517067494371165e-05,
      "loss": 2.5244,
      "step": 13896
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9755176305770874,
      "learning_rate": 1.8516851729384165e-05,
      "loss": 2.5899,
      "step": 13897
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0548096895217896,
      "learning_rate": 1.8516635949958784e-05,
      "loss": 2.5636,
      "step": 13898
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.1172529458999634,
      "learning_rate": 1.8516420156095395e-05,
      "loss": 2.5506,
      "step": 13899
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.1267045736312866,
      "learning_rate": 1.851620434779436e-05,
      "loss": 2.4825,
      "step": 13900
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.1094465255737305,
      "learning_rate": 1.8515988525056043e-05,
      "loss": 2.5255,
      "step": 13901
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0424580574035645,
      "learning_rate": 1.8515772687880816e-05,
      "loss": 2.6443,
      "step": 13902
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.1114834547042847,
      "learning_rate": 1.851555683626904e-05,
      "loss": 2.5331,
      "step": 13903
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9658899307250977,
      "learning_rate": 1.8515340970221084e-05,
      "loss": 2.6395,
      "step": 13904
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.1470657587051392,
      "learning_rate": 1.851512508973731e-05,
      "loss": 2.3335,
      "step": 13905
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0656650066375732,
      "learning_rate": 1.8514909194818085e-05,
      "loss": 2.671,
      "step": 13906
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9879010915756226,
      "learning_rate": 1.851469328546378e-05,
      "loss": 2.4687,
      "step": 13907
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.071947693824768,
      "learning_rate": 1.8514477361674755e-05,
      "loss": 2.5758,
      "step": 13908
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0482511520385742,
      "learning_rate": 1.8514261423451377e-05,
      "loss": 2.8058,
      "step": 13909
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0880649089813232,
      "learning_rate": 1.8514045470794016e-05,
      "loss": 2.3609,
      "step": 13910
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.201133370399475,
      "learning_rate": 1.8513829503703034e-05,
      "loss": 2.3103,
      "step": 13911
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9858090281486511,
      "learning_rate": 1.85136135221788e-05,
      "loss": 2.4711,
      "step": 13912
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9679577350616455,
      "learning_rate": 1.851339752622168e-05,
      "loss": 2.4377,
      "step": 13913
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.1656277179718018,
      "learning_rate": 1.851318151583204e-05,
      "loss": 2.5276,
      "step": 13914
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0644898414611816,
      "learning_rate": 1.851296549101024e-05,
      "loss": 2.4828,
      "step": 13915
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9571747183799744,
      "learning_rate": 1.8512749451756654e-05,
      "loss": 2.7528,
      "step": 13916
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0597444772720337,
      "learning_rate": 1.8512533398071648e-05,
      "loss": 2.3655,
      "step": 13917
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9577142596244812,
      "learning_rate": 1.851231732995558e-05,
      "loss": 2.3356,
      "step": 13918
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9876158833503723,
      "learning_rate": 1.8512101247408828e-05,
      "loss": 2.6043,
      "step": 13919
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.059340000152588,
      "learning_rate": 1.851188515043175e-05,
      "loss": 2.4242,
      "step": 13920
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0858098268508911,
      "learning_rate": 1.8511669039024715e-05,
      "loss": 2.7227,
      "step": 13921
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0298196077346802,
      "learning_rate": 1.851145291318809e-05,
      "loss": 2.4922,
      "step": 13922
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0131202936172485,
      "learning_rate": 1.8511236772922242e-05,
      "loss": 2.5325,
      "step": 13923
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.037158727645874,
      "learning_rate": 1.8511020618227533e-05,
      "loss": 2.5408,
      "step": 13924
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.1211131811141968,
      "learning_rate": 1.8510804449104333e-05,
      "loss": 2.5481,
      "step": 13925
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0492547750473022,
      "learning_rate": 1.851058826555301e-05,
      "loss": 2.4316,
      "step": 13926
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9980981945991516,
      "learning_rate": 1.8510372067573925e-05,
      "loss": 2.5165,
      "step": 13927
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9872399568557739,
      "learning_rate": 1.851015585516745e-05,
      "loss": 2.6634,
      "step": 13928
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9714349508285522,
      "learning_rate": 1.8509939628333947e-05,
      "loss": 2.5745,
      "step": 13929
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9611914157867432,
      "learning_rate": 1.850972338707379e-05,
      "loss": 2.4293,
      "step": 13930
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0361499786376953,
      "learning_rate": 1.8509507131387338e-05,
      "loss": 2.578,
      "step": 13931
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0059047937393188,
      "learning_rate": 1.850929086127496e-05,
      "loss": 2.333,
      "step": 13932
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9884565472602844,
      "learning_rate": 1.8509074576737023e-05,
      "loss": 2.7218,
      "step": 13933
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0481704473495483,
      "learning_rate": 1.8508858277773894e-05,
      "loss": 2.5921,
      "step": 13934
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0409239530563354,
      "learning_rate": 1.850864196438594e-05,
      "loss": 2.5524,
      "step": 13935
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0323517322540283,
      "learning_rate": 1.8508425636573526e-05,
      "loss": 2.3793,
      "step": 13936
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0490739345550537,
      "learning_rate": 1.8508209294337022e-05,
      "loss": 2.6761,
      "step": 13937
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9907083511352539,
      "learning_rate": 1.850799293767679e-05,
      "loss": 2.5663,
      "step": 13938
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0024378299713135,
      "learning_rate": 1.85077765665932e-05,
      "loss": 2.5003,
      "step": 13939
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.1190310716629028,
      "learning_rate": 1.8507560181086614e-05,
      "loss": 2.5448,
      "step": 13940
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9452284574508667,
      "learning_rate": 1.850734378115741e-05,
      "loss": 2.5504,
      "step": 13941
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9902544021606445,
      "learning_rate": 1.8507127366805942e-05,
      "loss": 2.3823,
      "step": 13942
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9856080412864685,
      "learning_rate": 1.8506910938032585e-05,
      "loss": 2.5794,
      "step": 13943
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0269795656204224,
      "learning_rate": 1.8506694494837702e-05,
      "loss": 2.3115,
      "step": 13944
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.969538688659668,
      "learning_rate": 1.8506478037221663e-05,
      "loss": 2.431,
      "step": 13945
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.1328142881393433,
      "learning_rate": 1.8506261565184834e-05,
      "loss": 2.5684,
      "step": 13946
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0259790420532227,
      "learning_rate": 1.850604507872758e-05,
      "loss": 2.3593,
      "step": 13947
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0048943758010864,
      "learning_rate": 1.850582857785027e-05,
      "loss": 2.4015,
      "step": 13948
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9295235872268677,
      "learning_rate": 1.8505612062553272e-05,
      "loss": 2.3137,
      "step": 13949
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.04998779296875,
      "learning_rate": 1.850539553283695e-05,
      "loss": 2.4764,
      "step": 13950
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.144703984260559,
      "learning_rate": 1.850517898870167e-05,
      "loss": 2.5165,
      "step": 13951
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0002739429473877,
      "learning_rate": 1.8504962430147806e-05,
      "loss": 2.584,
      "step": 13952
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9536712765693665,
      "learning_rate": 1.8504745857175715e-05,
      "loss": 2.3541,
      "step": 13953
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0708433389663696,
      "learning_rate": 1.850452926978578e-05,
      "loss": 2.4829,
      "step": 13954
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0570560693740845,
      "learning_rate": 1.850431266797835e-05,
      "loss": 2.3146,
      "step": 13955
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.060447335243225,
      "learning_rate": 1.85040960517538e-05,
      "loss": 2.5135,
      "step": 13956
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9364571571350098,
      "learning_rate": 1.85038794211125e-05,
      "loss": 2.4467,
      "step": 13957
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0018972158432007,
      "learning_rate": 1.8503662776054815e-05,
      "loss": 2.6646,
      "step": 13958
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.154001235961914,
      "learning_rate": 1.850344611658111e-05,
      "loss": 2.59,
      "step": 13959
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0223491191864014,
      "learning_rate": 1.8503229442691757e-05,
      "loss": 2.6088,
      "step": 13960
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9951496124267578,
      "learning_rate": 1.8503012754387118e-05,
      "loss": 2.236,
      "step": 13961
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.1424660682678223,
      "learning_rate": 1.8502796051667564e-05,
      "loss": 2.4247,
      "step": 13962
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9850589036941528,
      "learning_rate": 1.850257933453346e-05,
      "loss": 2.6082,
      "step": 13963
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.1026206016540527,
      "learning_rate": 1.8502362602985178e-05,
      "loss": 2.5055,
      "step": 13964
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.034222960472107,
      "learning_rate": 1.850214585702308e-05,
      "loss": 2.3962,
      "step": 13965
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.059678316116333,
      "learning_rate": 1.8501929096647536e-05,
      "loss": 2.5188,
      "step": 13966
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.103209376335144,
      "learning_rate": 1.8501712321858912e-05,
      "loss": 2.3317,
      "step": 13967
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0515764951705933,
      "learning_rate": 1.850149553265758e-05,
      "loss": 2.3891,
      "step": 13968
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.998396098613739,
      "learning_rate": 1.8501278729043902e-05,
      "loss": 2.7696,
      "step": 13969
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0689064264297485,
      "learning_rate": 1.8501061911018248e-05,
      "loss": 2.7829,
      "step": 13970
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0892817974090576,
      "learning_rate": 1.8500845078580985e-05,
      "loss": 2.3826,
      "step": 13971
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9730070233345032,
      "learning_rate": 1.850062823173248e-05,
      "loss": 2.5369,
      "step": 13972
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9850181937217712,
      "learning_rate": 1.8500411370473102e-05,
      "loss": 2.5415,
      "step": 13973
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0756962299346924,
      "learning_rate": 1.8500194494803217e-05,
      "loss": 2.7248,
      "step": 13974
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.145607829093933,
      "learning_rate": 1.8499977604723197e-05,
      "loss": 2.677,
      "step": 13975
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0190156698226929,
      "learning_rate": 1.8499760700233402e-05,
      "loss": 2.618,
      "step": 13976
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9796890020370483,
      "learning_rate": 1.8499543781334204e-05,
      "loss": 2.5132,
      "step": 13977
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.997871994972229,
      "learning_rate": 1.8499326848025977e-05,
      "loss": 2.4937,
      "step": 13978
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0825223922729492,
      "learning_rate": 1.849910990030908e-05,
      "loss": 2.5416,
      "step": 13979
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9667558670043945,
      "learning_rate": 1.8498892938183882e-05,
      "loss": 2.5153,
      "step": 13980
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.121747612953186,
      "learning_rate": 1.849867596165075e-05,
      "loss": 2.5959,
      "step": 13981
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0352764129638672,
      "learning_rate": 1.849845897071006e-05,
      "loss": 2.3696,
      "step": 13982
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0324286222457886,
      "learning_rate": 1.8498241965362167e-05,
      "loss": 2.3192,
      "step": 13983
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.1892292499542236,
      "learning_rate": 1.849802494560745e-05,
      "loss": 2.5495,
      "step": 13984
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.7479138374328613,
      "learning_rate": 1.849780791144627e-05,
      "loss": 2.4893,
      "step": 13985
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.8872804045677185,
      "learning_rate": 1.8497590862879e-05,
      "loss": 2.7029,
      "step": 13986
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.1490598917007446,
      "learning_rate": 1.8497373799906006e-05,
      "loss": 2.6917,
      "step": 13987
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0476115942001343,
      "learning_rate": 1.8497156722527655e-05,
      "loss": 2.5383,
      "step": 13988
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9933100938796997,
      "learning_rate": 1.8496939630744313e-05,
      "loss": 2.4735,
      "step": 13989
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0285145044326782,
      "learning_rate": 1.8496722524556353e-05,
      "loss": 2.587,
      "step": 13990
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9687412977218628,
      "learning_rate": 1.849650540396414e-05,
      "loss": 2.3004,
      "step": 13991
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0790519714355469,
      "learning_rate": 1.8496288268968043e-05,
      "loss": 2.3266,
      "step": 13992
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.000101089477539,
      "learning_rate": 1.849607111956843e-05,
      "loss": 2.5484,
      "step": 13993
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.097325325012207,
      "learning_rate": 1.8495853955765667e-05,
      "loss": 2.4162,
      "step": 13994
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.084106206893921,
      "learning_rate": 1.8495636777560126e-05,
      "loss": 2.6232,
      "step": 13995
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.1221373081207275,
      "learning_rate": 1.8495419584952176e-05,
      "loss": 2.5839,
      "step": 13996
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.000111699104309,
      "learning_rate": 1.849520237794218e-05,
      "loss": 2.4323,
      "step": 13997
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.011516809463501,
      "learning_rate": 1.8494985156530507e-05,
      "loss": 2.6257,
      "step": 13998
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9870070815086365,
      "learning_rate": 1.849476792071753e-05,
      "loss": 2.5551,
      "step": 13999
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.1027106046676636,
      "learning_rate": 1.849455067050361e-05,
      "loss": 2.5325,
      "step": 14000
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0357223749160767,
      "learning_rate": 1.8494333405889122e-05,
      "loss": 2.3762,
      "step": 14001
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.987444281578064,
      "learning_rate": 1.849411612687443e-05,
      "loss": 2.4173,
      "step": 14002
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0781333446502686,
      "learning_rate": 1.8493898833459905e-05,
      "loss": 2.4374,
      "step": 14003
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.037648320198059,
      "learning_rate": 1.849368152564592e-05,
      "loss": 2.4972,
      "step": 14004
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9493960738182068,
      "learning_rate": 1.849346420343283e-05,
      "loss": 2.4451,
      "step": 14005
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.1579781770706177,
      "learning_rate": 1.8493246866821016e-05,
      "loss": 2.5713,
      "step": 14006
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9476361274719238,
      "learning_rate": 1.849302951581084e-05,
      "loss": 2.603,
      "step": 14007
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9162507057189941,
      "learning_rate": 1.8492812150402673e-05,
      "loss": 2.5214,
      "step": 14008
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9954502582550049,
      "learning_rate": 1.8492594770596884e-05,
      "loss": 2.4622,
      "step": 14009
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0385032892227173,
      "learning_rate": 1.849237737639384e-05,
      "loss": 2.4832,
      "step": 14010
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0789949893951416,
      "learning_rate": 1.849215996779391e-05,
      "loss": 2.5359,
      "step": 14011
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9378491044044495,
      "learning_rate": 1.849194254479746e-05,
      "loss": 2.3558,
      "step": 14012
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9590149521827698,
      "learning_rate": 1.849172510740486e-05,
      "loss": 2.3419,
      "step": 14013
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.1593565940856934,
      "learning_rate": 1.8491507655616484e-05,
      "loss": 2.5554,
      "step": 14014
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0216230154037476,
      "learning_rate": 1.8491290189432694e-05,
      "loss": 2.4555,
      "step": 14015
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9329771399497986,
      "learning_rate": 1.8491072708853864e-05,
      "loss": 2.6919,
      "step": 14016
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0071110725402832,
      "learning_rate": 1.8490855213880356e-05,
      "loss": 2.5092,
      "step": 14017
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9563032388687134,
      "learning_rate": 1.8490637704512542e-05,
      "loss": 2.446,
      "step": 14018
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.214130163192749,
      "learning_rate": 1.8490420180750793e-05,
      "loss": 2.5383,
      "step": 14019
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.9442853927612305,
      "learning_rate": 1.8490202642595474e-05,
      "loss": 2.6846,
      "step": 14020
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9510801434516907,
      "learning_rate": 1.8489985090046958e-05,
      "loss": 2.5992,
      "step": 14021
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9650717973709106,
      "learning_rate": 1.848976752310561e-05,
      "loss": 2.5543,
      "step": 14022
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0181572437286377,
      "learning_rate": 1.8489549941771797e-05,
      "loss": 2.5121,
      "step": 14023
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0882530212402344,
      "learning_rate": 1.8489332346045895e-05,
      "loss": 2.5606,
      "step": 14024
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9283697605133057,
      "learning_rate": 1.848911473592827e-05,
      "loss": 2.2071,
      "step": 14025
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0800703763961792,
      "learning_rate": 1.8488897111419287e-05,
      "loss": 2.3796,
      "step": 14026
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9563822746276855,
      "learning_rate": 1.848867947251932e-05,
      "loss": 2.5407,
      "step": 14027
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.3204840421676636,
      "learning_rate": 1.8488461819228736e-05,
      "loss": 2.5442,
      "step": 14028
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0005841255187988,
      "learning_rate": 1.8488244151547903e-05,
      "loss": 2.4054,
      "step": 14029
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0602432489395142,
      "learning_rate": 1.848802646947719e-05,
      "loss": 2.3941,
      "step": 14030
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0064088106155396,
      "learning_rate": 1.848780877301697e-05,
      "loss": 2.3182,
      "step": 14031
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0962238311767578,
      "learning_rate": 1.8487591062167605e-05,
      "loss": 2.5172,
      "step": 14032
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.055698037147522,
      "learning_rate": 1.8487373336929467e-05,
      "loss": 2.6704,
      "step": 14033
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.15523099899292,
      "learning_rate": 1.848715559730293e-05,
      "loss": 2.4793,
      "step": 14034
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0771498680114746,
      "learning_rate": 1.848693784328836e-05,
      "loss": 2.5926,
      "step": 14035
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.976463794708252,
      "learning_rate": 1.848672007488612e-05,
      "loss": 2.6357,
      "step": 14036
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0025172233581543,
      "learning_rate": 1.8486502292096587e-05,
      "loss": 2.5304,
      "step": 14037
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0338506698608398,
      "learning_rate": 1.8486284494920127e-05,
      "loss": 2.5779,
      "step": 14038
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0642577409744263,
      "learning_rate": 1.848606668335711e-05,
      "loss": 2.5587,
      "step": 14039
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0326883792877197,
      "learning_rate": 1.8485848857407908e-05,
      "loss": 2.5478,
      "step": 14040
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9608352780342102,
      "learning_rate": 1.8485631017072884e-05,
      "loss": 2.782,
      "step": 14041
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.2063039541244507,
      "learning_rate": 1.8485413162352414e-05,
      "loss": 2.3318,
      "step": 14042
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0434601306915283,
      "learning_rate": 1.8485195293246862e-05,
      "loss": 2.4688,
      "step": 14043
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.187724232673645,
      "learning_rate": 1.84849774097566e-05,
      "loss": 2.6084,
      "step": 14044
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9462956786155701,
      "learning_rate": 1.8484759511881997e-05,
      "loss": 2.3011,
      "step": 14045
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.1052714586257935,
      "learning_rate": 1.8484541599623423e-05,
      "loss": 2.5725,
      "step": 14046
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9982590675354004,
      "learning_rate": 1.8484323672981244e-05,
      "loss": 2.6389,
      "step": 14047
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.151613712310791,
      "learning_rate": 1.8484105731955836e-05,
      "loss": 2.5088,
      "step": 14048
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9543294906616211,
      "learning_rate": 1.8483887776547563e-05,
      "loss": 2.466,
      "step": 14049
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0353517532348633,
      "learning_rate": 1.848366980675679e-05,
      "loss": 2.3979,
      "step": 14050
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0080896615982056,
      "learning_rate": 1.84834518225839e-05,
      "loss": 2.4029,
      "step": 14051
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9529969096183777,
      "learning_rate": 1.8483233824029253e-05,
      "loss": 2.3538,
      "step": 14052
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0602895021438599,
      "learning_rate": 1.8483015811093217e-05,
      "loss": 2.5839,
      "step": 14053
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.1376410722732544,
      "learning_rate": 1.848279778377617e-05,
      "loss": 2.704,
      "step": 14054
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.1037144660949707,
      "learning_rate": 1.8482579742078478e-05,
      "loss": 2.6148,
      "step": 14055
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9398228526115417,
      "learning_rate": 1.8482361686000506e-05,
      "loss": 2.1523,
      "step": 14056
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9704081416130066,
      "learning_rate": 1.848214361554263e-05,
      "loss": 2.551,
      "step": 14057
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9512416124343872,
      "learning_rate": 1.8481925530705215e-05,
      "loss": 2.5855,
      "step": 14058
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0964001417160034,
      "learning_rate": 1.848170743148863e-05,
      "loss": 2.5256,
      "step": 14059
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0583107471466064,
      "learning_rate": 1.848148931789325e-05,
      "loss": 2.6825,
      "step": 14060
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.140560269355774,
      "learning_rate": 1.8481271189919443e-05,
      "loss": 2.6353,
      "step": 14061
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.1364904642105103,
      "learning_rate": 1.8481053047567578e-05,
      "loss": 2.5555,
      "step": 14062
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9948729872703552,
      "learning_rate": 1.8480834890838023e-05,
      "loss": 2.4818,
      "step": 14063
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.087968111038208,
      "learning_rate": 1.848061671973115e-05,
      "loss": 2.58,
      "step": 14064
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.1206598281860352,
      "learning_rate": 1.8480398534247327e-05,
      "loss": 2.5484,
      "step": 14065
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0388624668121338,
      "learning_rate": 1.848018033438693e-05,
      "loss": 2.3865,
      "step": 14066
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9369828701019287,
      "learning_rate": 1.8479962120150318e-05,
      "loss": 2.3576,
      "step": 14067
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.1265910863876343,
      "learning_rate": 1.847974389153787e-05,
      "loss": 2.4552,
      "step": 14068
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9358604550361633,
      "learning_rate": 1.8479525648549953e-05,
      "loss": 2.5507,
      "step": 14069
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9721848964691162,
      "learning_rate": 1.8479307391186937e-05,
      "loss": 2.5425,
      "step": 14070
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0610079765319824,
      "learning_rate": 1.8479089119449193e-05,
      "loss": 2.4678,
      "step": 14071
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.994761049747467,
      "learning_rate": 1.8478870833337087e-05,
      "loss": 2.4663,
      "step": 14072
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0452607870101929,
      "learning_rate": 1.8478652532850994e-05,
      "loss": 2.6,
      "step": 14073
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0229160785675049,
      "learning_rate": 1.8478434217991283e-05,
      "loss": 2.6846,
      "step": 14074
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9882445931434631,
      "learning_rate": 1.8478215888758325e-05,
      "loss": 2.5263,
      "step": 14075
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0629684925079346,
      "learning_rate": 1.8477997545152485e-05,
      "loss": 2.4891,
      "step": 14076
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.01994788646698,
      "learning_rate": 1.8477779187174136e-05,
      "loss": 2.4217,
      "step": 14077
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.099860668182373,
      "learning_rate": 1.847756081482365e-05,
      "loss": 2.4936,
      "step": 14078
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.1039175987243652,
      "learning_rate": 1.8477342428101397e-05,
      "loss": 2.5401,
      "step": 14079
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9603409767150879,
      "learning_rate": 1.8477124027007746e-05,
      "loss": 2.4774,
      "step": 14080
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0283178091049194,
      "learning_rate": 1.8476905611543066e-05,
      "loss": 2.5227,
      "step": 14081
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0795007944107056,
      "learning_rate": 1.847668718170773e-05,
      "loss": 2.4218,
      "step": 14082
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.053787112236023,
      "learning_rate": 1.8476468737502108e-05,
      "loss": 2.3823,
      "step": 14083
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0875762701034546,
      "learning_rate": 1.8476250278926565e-05,
      "loss": 2.6659,
      "step": 14084
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0321216583251953,
      "learning_rate": 1.8476031805981477e-05,
      "loss": 2.7426,
      "step": 14085
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.2885979413986206,
      "learning_rate": 1.8475813318667218e-05,
      "loss": 2.493,
      "step": 14086
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.3792693614959717,
      "learning_rate": 1.847559481698415e-05,
      "loss": 2.3694,
      "step": 14087
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.022022008895874,
      "learning_rate": 1.8475376300932643e-05,
      "loss": 2.6031,
      "step": 14088
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0905762910842896,
      "learning_rate": 1.8475157770513076e-05,
      "loss": 2.6903,
      "step": 14089
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0995315313339233,
      "learning_rate": 1.8474939225725815e-05,
      "loss": 2.2348,
      "step": 14090
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0111842155456543,
      "learning_rate": 1.847472066657123e-05,
      "loss": 2.4235,
      "step": 14091
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0030462741851807,
      "learning_rate": 1.847450209304969e-05,
      "loss": 2.4626,
      "step": 14092
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9706465601921082,
      "learning_rate": 1.8474283505161567e-05,
      "loss": 2.2124,
      "step": 14093
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9771750569343567,
      "learning_rate": 1.8474064902907233e-05,
      "loss": 2.5064,
      "step": 14094
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.3212887048721313,
      "learning_rate": 1.8473846286287058e-05,
      "loss": 2.763,
      "step": 14095
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9911571741104126,
      "learning_rate": 1.847362765530141e-05,
      "loss": 2.695,
      "step": 14096
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0335255861282349,
      "learning_rate": 1.8473409009950665e-05,
      "loss": 2.3705,
      "step": 14097
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9829024076461792,
      "learning_rate": 1.847319035023519e-05,
      "loss": 2.6665,
      "step": 14098
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.052053689956665,
      "learning_rate": 1.847297167615535e-05,
      "loss": 2.5441,
      "step": 14099
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9963861107826233,
      "learning_rate": 1.8472752987711532e-05,
      "loss": 2.6942,
      "step": 14100
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.012911081314087,
      "learning_rate": 1.8472534284904088e-05,
      "loss": 2.4947,
      "step": 14101
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9508237838745117,
      "learning_rate": 1.84723155677334e-05,
      "loss": 2.6461,
      "step": 14102
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0013843774795532,
      "learning_rate": 1.8472096836199838e-05,
      "loss": 2.4104,
      "step": 14103
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.008884072303772,
      "learning_rate": 1.847187809030377e-05,
      "loss": 2.6384,
      "step": 14104
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0789802074432373,
      "learning_rate": 1.8471659330045564e-05,
      "loss": 2.4392,
      "step": 14105
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9976202845573425,
      "learning_rate": 1.8471440555425596e-05,
      "loss": 2.3446,
      "step": 14106
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.1377012729644775,
      "learning_rate": 1.8471221766444238e-05,
      "loss": 2.6862,
      "step": 14107
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.146780014038086,
      "learning_rate": 1.847100296310186e-05,
      "loss": 2.388,
      "step": 14108
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.1051992177963257,
      "learning_rate": 1.847078414539883e-05,
      "loss": 2.6929,
      "step": 14109
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0387645959854126,
      "learning_rate": 1.8470565313335515e-05,
      "loss": 2.7197,
      "step": 14110
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9523546099662781,
      "learning_rate": 1.8470346466912297e-05,
      "loss": 2.7354,
      "step": 14111
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9959710836410522,
      "learning_rate": 1.847012760612954e-05,
      "loss": 2.5236,
      "step": 14112
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0056349039077759,
      "learning_rate": 1.8469908730987613e-05,
      "loss": 2.4503,
      "step": 14113
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9661648869514465,
      "learning_rate": 1.8469689841486894e-05,
      "loss": 2.5591,
      "step": 14114
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.077040433883667,
      "learning_rate": 1.846947093762775e-05,
      "loss": 2.4043,
      "step": 14115
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9362578988075256,
      "learning_rate": 1.846925201941055e-05,
      "loss": 2.4626,
      "step": 14116
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0600535869598389,
      "learning_rate": 1.846903308683567e-05,
      "loss": 2.5416,
      "step": 14117
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9970381259918213,
      "learning_rate": 1.8468814139903477e-05,
      "loss": 2.3919,
      "step": 14118
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9935874938964844,
      "learning_rate": 1.8468595178614347e-05,
      "loss": 2.5563,
      "step": 14119
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0722177028656006,
      "learning_rate": 1.8468376202968646e-05,
      "loss": 2.4857,
      "step": 14120
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.3631970882415771,
      "learning_rate": 1.8468157212966746e-05,
      "loss": 2.4563,
      "step": 14121
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.110094666481018,
      "learning_rate": 1.8467938208609023e-05,
      "loss": 2.4862,
      "step": 14122
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0591727495193481,
      "learning_rate": 1.846771918989584e-05,
      "loss": 2.4113,
      "step": 14123
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0066885948181152,
      "learning_rate": 1.8467500156827577e-05,
      "loss": 2.5249,
      "step": 14124
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.032286524772644,
      "learning_rate": 1.84672811094046e-05,
      "loss": 2.4846,
      "step": 14125
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0299633741378784,
      "learning_rate": 1.8467062047627284e-05,
      "loss": 2.3331,
      "step": 14126
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.095974326133728,
      "learning_rate": 1.8466842971495995e-05,
      "loss": 2.4513,
      "step": 14127
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9958102703094482,
      "learning_rate": 1.846662388101111e-05,
      "loss": 2.4383,
      "step": 14128
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0545589923858643,
      "learning_rate": 1.8466404776173e-05,
      "loss": 2.442,
      "step": 14129
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0988966226577759,
      "learning_rate": 1.846618565698203e-05,
      "loss": 2.7269,
      "step": 14130
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0236876010894775,
      "learning_rate": 1.8465966523438578e-05,
      "loss": 2.294,
      "step": 14131
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0810487270355225,
      "learning_rate": 1.846574737554301e-05,
      "loss": 2.6657,
      "step": 14132
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0220266580581665,
      "learning_rate": 1.8465528213295708e-05,
      "loss": 2.6287,
      "step": 14133
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0600489377975464,
      "learning_rate": 1.8465309036697034e-05,
      "loss": 2.5407,
      "step": 14134
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9802705645561218,
      "learning_rate": 1.8465089845747358e-05,
      "loss": 2.3765,
      "step": 14135
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0127304792404175,
      "learning_rate": 1.8464870640447057e-05,
      "loss": 2.5336,
      "step": 14136
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.8898575901985168,
      "learning_rate": 1.84646514207965e-05,
      "loss": 2.447,
      "step": 14137
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9505419135093689,
      "learning_rate": 1.8464432186796064e-05,
      "loss": 2.664,
      "step": 14138
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0330990552902222,
      "learning_rate": 1.8464212938446116e-05,
      "loss": 2.5279,
      "step": 14139
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.5061064958572388,
      "learning_rate": 1.8463993675747028e-05,
      "loss": 2.3875,
      "step": 14140
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0512808561325073,
      "learning_rate": 1.846377439869917e-05,
      "loss": 2.605,
      "step": 14141
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0641471147537231,
      "learning_rate": 1.8463555107302914e-05,
      "loss": 2.6511,
      "step": 14142
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0551183223724365,
      "learning_rate": 1.846333580155864e-05,
      "loss": 2.5502,
      "step": 14143
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0660901069641113,
      "learning_rate": 1.8463116481466707e-05,
      "loss": 2.2881,
      "step": 14144
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.05013108253479,
      "learning_rate": 1.8462897147027495e-05,
      "loss": 2.6263,
      "step": 14145
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0283706188201904,
      "learning_rate": 1.8462677798241375e-05,
      "loss": 2.5001,
      "step": 14146
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.083648920059204,
      "learning_rate": 1.8462458435108716e-05,
      "loss": 2.5988,
      "step": 14147
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0630229711532593,
      "learning_rate": 1.8462239057629893e-05,
      "loss": 2.5681,
      "step": 14148
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0675034523010254,
      "learning_rate": 1.8462019665805277e-05,
      "loss": 2.445,
      "step": 14149
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0006356239318848,
      "learning_rate": 1.846180025963524e-05,
      "loss": 2.412,
      "step": 14150
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.022204041481018,
      "learning_rate": 1.8461580839120147e-05,
      "loss": 2.3501,
      "step": 14151
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0598100423812866,
      "learning_rate": 1.846136140426038e-05,
      "loss": 2.2889,
      "step": 14152
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0603327751159668,
      "learning_rate": 1.846114195505631e-05,
      "loss": 2.4149,
      "step": 14153
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0131468772888184,
      "learning_rate": 1.8460922491508305e-05,
      "loss": 2.8307,
      "step": 14154
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.8916856646537781,
      "learning_rate": 1.8460703013616738e-05,
      "loss": 2.5402,
      "step": 14155
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.144557237625122,
      "learning_rate": 1.8460483521381983e-05,
      "loss": 2.5732,
      "step": 14156
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0145654678344727,
      "learning_rate": 1.8460264014804406e-05,
      "loss": 2.5979,
      "step": 14157
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.2348403930664062,
      "learning_rate": 1.846004449388439e-05,
      "loss": 2.5081,
      "step": 14158
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9397201538085938,
      "learning_rate": 1.84598249586223e-05,
      "loss": 2.6665,
      "step": 14159
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0562947988510132,
      "learning_rate": 1.8459605409018504e-05,
      "loss": 2.3545,
      "step": 14160
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0515308380126953,
      "learning_rate": 1.845938584507338e-05,
      "loss": 2.6086,
      "step": 14161
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0452606678009033,
      "learning_rate": 1.8459166266787302e-05,
      "loss": 2.7246,
      "step": 14162
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9469851851463318,
      "learning_rate": 1.845894667416064e-05,
      "loss": 2.5439,
      "step": 14163
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9797993302345276,
      "learning_rate": 1.8458727067193766e-05,
      "loss": 2.6275,
      "step": 14164
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.125062346458435,
      "learning_rate": 1.845850744588705e-05,
      "loss": 2.6169,
      "step": 14165
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.092812180519104,
      "learning_rate": 1.845828781024087e-05,
      "loss": 2.417,
      "step": 14166
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0636792182922363,
      "learning_rate": 1.8458068160255592e-05,
      "loss": 2.7049,
      "step": 14167
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9779411554336548,
      "learning_rate": 1.8457848495931593e-05,
      "loss": 2.7481,
      "step": 14168
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0134028196334839,
      "learning_rate": 1.845762881726924e-05,
      "loss": 2.3641,
      "step": 14169
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9612484574317932,
      "learning_rate": 1.8457409124268914e-05,
      "loss": 2.497,
      "step": 14170
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9746793508529663,
      "learning_rate": 1.8457189416930982e-05,
      "loss": 2.5388,
      "step": 14171
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9517659544944763,
      "learning_rate": 1.8456969695255815e-05,
      "loss": 2.4868,
      "step": 14172
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9759001135826111,
      "learning_rate": 1.8456749959243786e-05,
      "loss": 2.7054,
      "step": 14173
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0743430852890015,
      "learning_rate": 1.8456530208895273e-05,
      "loss": 2.5215,
      "step": 14174
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.1076399087905884,
      "learning_rate": 1.8456310444210643e-05,
      "loss": 2.4187,
      "step": 14175
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0557185411453247,
      "learning_rate": 1.8456090665190268e-05,
      "loss": 2.4148,
      "step": 14176
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0153733491897583,
      "learning_rate": 1.8455870871834528e-05,
      "loss": 2.4542,
      "step": 14177
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.158904790878296,
      "learning_rate": 1.8455651064143785e-05,
      "loss": 2.5337,
      "step": 14178
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9816735982894897,
      "learning_rate": 1.8455431242118416e-05,
      "loss": 2.5545,
      "step": 14179
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9255999326705933,
      "learning_rate": 1.8455211405758796e-05,
      "loss": 2.6459,
      "step": 14180
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.276779055595398,
      "learning_rate": 1.84549915550653e-05,
      "loss": 2.6315,
      "step": 14181
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0300562381744385,
      "learning_rate": 1.8454771690038293e-05,
      "loss": 2.426,
      "step": 14182
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9978501796722412,
      "learning_rate": 1.8454551810678155e-05,
      "loss": 2.4316,
      "step": 14183
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.113525390625,
      "learning_rate": 1.8454331916985253e-05,
      "loss": 2.3783,
      "step": 14184
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9217022061347961,
      "learning_rate": 1.8454112008959965e-05,
      "loss": 2.435,
      "step": 14185
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0165960788726807,
      "learning_rate": 1.8453892086602655e-05,
      "loss": 2.5845,
      "step": 14186
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.1377590894699097,
      "learning_rate": 1.8453672149913708e-05,
      "loss": 2.5475,
      "step": 14187
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0379835367202759,
      "learning_rate": 1.8453452198893488e-05,
      "loss": 2.5528,
      "step": 14188
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.006225347518921,
      "learning_rate": 1.845323223354237e-05,
      "loss": 2.7505,
      "step": 14189
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9539048671722412,
      "learning_rate": 1.8453012253860728e-05,
      "loss": 2.4395,
      "step": 14190
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0458660125732422,
      "learning_rate": 1.8452792259848934e-05,
      "loss": 2.4165,
      "step": 14191
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9894160628318787,
      "learning_rate": 1.8452572251507362e-05,
      "loss": 2.4069,
      "step": 14192
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0054399967193604,
      "learning_rate": 1.8452352228836385e-05,
      "loss": 2.5635,
      "step": 14193
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0574134588241577,
      "learning_rate": 1.8452132191836378e-05,
      "loss": 2.549,
      "step": 14194
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9460330605506897,
      "learning_rate": 1.8451912140507707e-05,
      "loss": 2.5806,
      "step": 14195
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9588295817375183,
      "learning_rate": 1.845169207485075e-05,
      "loss": 2.7354,
      "step": 14196
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.1283693313598633,
      "learning_rate": 1.845147199486588e-05,
      "loss": 2.3402,
      "step": 14197
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0069292783737183,
      "learning_rate": 1.845125190055347e-05,
      "loss": 2.4997,
      "step": 14198
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9751546382904053,
      "learning_rate": 1.8451031791913892e-05,
      "loss": 2.6595,
      "step": 14199
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.1134023666381836,
      "learning_rate": 1.8450811668947522e-05,
      "loss": 2.5193,
      "step": 14200
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0481102466583252,
      "learning_rate": 1.845059153165473e-05,
      "loss": 2.4346,
      "step": 14201
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9810603260993958,
      "learning_rate": 1.8450371380035887e-05,
      "loss": 2.3532,
      "step": 14202
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.055596113204956,
      "learning_rate": 1.8450151214091373e-05,
      "loss": 2.3754,
      "step": 14203
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9956697225570679,
      "learning_rate": 1.8449931033821554e-05,
      "loss": 2.7123,
      "step": 14204
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9848672747612,
      "learning_rate": 1.844971083922681e-05,
      "loss": 2.5066,
      "step": 14205
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9996389746665955,
      "learning_rate": 1.8449490630307512e-05,
      "loss": 2.2748,
      "step": 14206
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0152214765548706,
      "learning_rate": 1.8449270407064032e-05,
      "loss": 2.3928,
      "step": 14207
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0327410697937012,
      "learning_rate": 1.8449050169496743e-05,
      "loss": 2.334,
      "step": 14208
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.006111741065979,
      "learning_rate": 1.844882991760602e-05,
      "loss": 2.453,
      "step": 14209
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0334393978118896,
      "learning_rate": 1.8448609651392236e-05,
      "loss": 2.5067,
      "step": 14210
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0256181955337524,
      "learning_rate": 1.8448389370855762e-05,
      "loss": 2.2818,
      "step": 14211
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1163643598556519,
      "learning_rate": 1.8448169075996974e-05,
      "loss": 2.3392,
      "step": 14212
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0427374839782715,
      "learning_rate": 1.8447948766816245e-05,
      "loss": 2.6167,
      "step": 14213
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0074982643127441,
      "learning_rate": 1.844772844331395e-05,
      "loss": 2.5902,
      "step": 14214
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.140596628189087,
      "learning_rate": 1.844750810549046e-05,
      "loss": 2.6373,
      "step": 14215
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.10335373878479,
      "learning_rate": 1.844728775334615e-05,
      "loss": 2.5112,
      "step": 14216
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.105147361755371,
      "learning_rate": 1.8447067386881392e-05,
      "loss": 2.4423,
      "step": 14217
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9452331066131592,
      "learning_rate": 1.844684700609656e-05,
      "loss": 2.3984,
      "step": 14218
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9546242952346802,
      "learning_rate": 1.844662661099203e-05,
      "loss": 2.504,
      "step": 14219
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9931342005729675,
      "learning_rate": 1.8446406201568173e-05,
      "loss": 2.3973,
      "step": 14220
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9459481239318848,
      "learning_rate": 1.8446185777825363e-05,
      "loss": 2.4793,
      "step": 14221
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.2250792980194092,
      "learning_rate": 1.844596533976397e-05,
      "loss": 2.4589,
      "step": 14222
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9947899580001831,
      "learning_rate": 1.844574488738438e-05,
      "loss": 2.4782,
      "step": 14223
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9604560732841492,
      "learning_rate": 1.8445524420686954e-05,
      "loss": 2.2022,
      "step": 14224
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9757143259048462,
      "learning_rate": 1.8445303939672073e-05,
      "loss": 2.6746,
      "step": 14225
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.188727855682373,
      "learning_rate": 1.8445083444340105e-05,
      "loss": 2.2473,
      "step": 14226
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.2706393003463745,
      "learning_rate": 1.8444862934691428e-05,
      "loss": 2.4185,
      "step": 14227
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9507459998130798,
      "learning_rate": 1.8444642410726413e-05,
      "loss": 2.516,
      "step": 14228
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0721542835235596,
      "learning_rate": 1.844442187244544e-05,
      "loss": 2.5,
      "step": 14229
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0581278800964355,
      "learning_rate": 1.8444201319848875e-05,
      "loss": 2.384,
      "step": 14230
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9951406121253967,
      "learning_rate": 1.8443980752937098e-05,
      "loss": 2.3616,
      "step": 14231
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0012754201889038,
      "learning_rate": 1.844376017171048e-05,
      "loss": 2.4776,
      "step": 14232
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0579534769058228,
      "learning_rate": 1.8443539576169392e-05,
      "loss": 2.3543,
      "step": 14233
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0486456155776978,
      "learning_rate": 1.8443318966314213e-05,
      "loss": 2.521,
      "step": 14234
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.955548882484436,
      "learning_rate": 1.8443098342145313e-05,
      "loss": 2.2973,
      "step": 14235
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0080817937850952,
      "learning_rate": 1.8442877703663073e-05,
      "loss": 2.3991,
      "step": 14236
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9365665912628174,
      "learning_rate": 1.844265705086786e-05,
      "loss": 2.5,
      "step": 14237
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0707800388336182,
      "learning_rate": 1.8442436383760047e-05,
      "loss": 2.4603,
      "step": 14238
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1258642673492432,
      "learning_rate": 1.8442215702340017e-05,
      "loss": 2.5386,
      "step": 14239
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1009873151779175,
      "learning_rate": 1.8441995006608135e-05,
      "loss": 2.5698,
      "step": 14240
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1662088632583618,
      "learning_rate": 1.8441774296564777e-05,
      "loss": 2.6134,
      "step": 14241
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.082529067993164,
      "learning_rate": 1.8441553572210324e-05,
      "loss": 2.5006,
      "step": 14242
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0547049045562744,
      "learning_rate": 1.844133283354514e-05,
      "loss": 2.3571,
      "step": 14243
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9976827502250671,
      "learning_rate": 1.844111208056961e-05,
      "loss": 2.5244,
      "step": 14244
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0231939554214478,
      "learning_rate": 1.8440891313284097e-05,
      "loss": 2.6045,
      "step": 14245
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9267946481704712,
      "learning_rate": 1.8440670531688984e-05,
      "loss": 2.4431,
      "step": 14246
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9318065643310547,
      "learning_rate": 1.844044973578464e-05,
      "loss": 2.421,
      "step": 14247
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0338979959487915,
      "learning_rate": 1.844022892557144e-05,
      "loss": 2.4322,
      "step": 14248
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1507006883621216,
      "learning_rate": 1.8440008101049765e-05,
      "loss": 2.5083,
      "step": 14249
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0840864181518555,
      "learning_rate": 1.8439787262219978e-05,
      "loss": 2.5999,
      "step": 14250
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.055724859237671,
      "learning_rate": 1.8439566409082462e-05,
      "loss": 2.4156,
      "step": 14251
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9659200310707092,
      "learning_rate": 1.843934554163759e-05,
      "loss": 2.3629,
      "step": 14252
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.008461594581604,
      "learning_rate": 1.843912465988573e-05,
      "loss": 2.5545,
      "step": 14253
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5230202674865723,
      "learning_rate": 1.843890376382727e-05,
      "loss": 2.3785,
      "step": 14254
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0549534559249878,
      "learning_rate": 1.8438682853462572e-05,
      "loss": 2.4843,
      "step": 14255
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.2066901922225952,
      "learning_rate": 1.8438461928792013e-05,
      "loss": 2.5355,
      "step": 14256
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9424762725830078,
      "learning_rate": 1.8438240989815968e-05,
      "loss": 2.4062,
      "step": 14257
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0357025861740112,
      "learning_rate": 1.8438020036534818e-05,
      "loss": 2.1548,
      "step": 14258
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9138870239257812,
      "learning_rate": 1.8437799068948928e-05,
      "loss": 2.4964,
      "step": 14259
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.992909848690033,
      "learning_rate": 1.843757808705868e-05,
      "loss": 2.6564,
      "step": 14260
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0579537153244019,
      "learning_rate": 1.8437357090864446e-05,
      "loss": 2.5689,
      "step": 14261
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.029547095298767,
      "learning_rate": 1.8437136080366596e-05,
      "loss": 2.522,
      "step": 14262
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0036616325378418,
      "learning_rate": 1.843691505556551e-05,
      "loss": 2.6203,
      "step": 14263
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1006197929382324,
      "learning_rate": 1.8436694016461566e-05,
      "loss": 2.6186,
      "step": 14264
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.2211641073226929,
      "learning_rate": 1.8436472963055128e-05,
      "loss": 2.7975,
      "step": 14265
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0192794799804688,
      "learning_rate": 1.8436251895346583e-05,
      "loss": 2.3635,
      "step": 14266
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.2799460887908936,
      "learning_rate": 1.8436030813336296e-05,
      "loss": 2.4146,
      "step": 14267
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.2081127166748047,
      "learning_rate": 1.8435809717024647e-05,
      "loss": 2.5566,
      "step": 14268
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9555680751800537,
      "learning_rate": 1.8435588606412006e-05,
      "loss": 2.6178,
      "step": 14269
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0243381261825562,
      "learning_rate": 1.8435367481498755e-05,
      "loss": 2.6333,
      "step": 14270
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.012215495109558,
      "learning_rate": 1.8435146342285263e-05,
      "loss": 2.4588,
      "step": 14271
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1477874517440796,
      "learning_rate": 1.843492518877191e-05,
      "loss": 2.2516,
      "step": 14272
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0185211896896362,
      "learning_rate": 1.8434704020959064e-05,
      "loss": 2.5221,
      "step": 14273
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9638640284538269,
      "learning_rate": 1.8434482838847104e-05,
      "loss": 2.4683,
      "step": 14274
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9814720153808594,
      "learning_rate": 1.8434261642436412e-05,
      "loss": 2.7466,
      "step": 14275
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9674534797668457,
      "learning_rate": 1.8434040431727346e-05,
      "loss": 2.5999,
      "step": 14276
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0492565631866455,
      "learning_rate": 1.8433819206720297e-05,
      "loss": 2.5494,
      "step": 14277
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1215978860855103,
      "learning_rate": 1.8433597967415632e-05,
      "loss": 2.3256,
      "step": 14278
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.3827989101409912,
      "learning_rate": 1.843337671381373e-05,
      "loss": 2.5766,
      "step": 14279
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0534651279449463,
      "learning_rate": 1.843315544591496e-05,
      "loss": 2.4317,
      "step": 14280
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0503129959106445,
      "learning_rate": 1.8432934163719702e-05,
      "loss": 2.6339,
      "step": 14281
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.0984442234039307,
      "learning_rate": 1.8432712867228334e-05,
      "loss": 2.6042,
      "step": 14282
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0883716344833374,
      "learning_rate": 1.8432491556441224e-05,
      "loss": 2.3907,
      "step": 14283
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0093209743499756,
      "learning_rate": 1.843227023135875e-05,
      "loss": 2.5831,
      "step": 14284
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1084113121032715,
      "learning_rate": 1.843204889198129e-05,
      "loss": 2.3544,
      "step": 14285
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.092668056488037,
      "learning_rate": 1.8431827538309214e-05,
      "loss": 2.5324,
      "step": 14286
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9482700824737549,
      "learning_rate": 1.84316061703429e-05,
      "loss": 2.6115,
      "step": 14287
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.083282232284546,
      "learning_rate": 1.8431384788082728e-05,
      "loss": 2.415,
      "step": 14288
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.8921733498573303,
      "learning_rate": 1.8431163391529067e-05,
      "loss": 2.414,
      "step": 14289
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0507872104644775,
      "learning_rate": 1.8430941980682294e-05,
      "loss": 2.4861,
      "step": 14290
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0198578834533691,
      "learning_rate": 1.8430720555542785e-05,
      "loss": 2.4522,
      "step": 14291
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0444791316986084,
      "learning_rate": 1.8430499116110914e-05,
      "loss": 2.4684,
      "step": 14292
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.329617977142334,
      "learning_rate": 1.843027766238706e-05,
      "loss": 2.4179,
      "step": 14293
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0999236106872559,
      "learning_rate": 1.8430056194371593e-05,
      "loss": 2.5365,
      "step": 14294
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.2111891508102417,
      "learning_rate": 1.8429834712064892e-05,
      "loss": 2.5005,
      "step": 14295
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.163147211074829,
      "learning_rate": 1.842961321546733e-05,
      "loss": 2.4998,
      "step": 14296
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0193400382995605,
      "learning_rate": 1.8429391704579284e-05,
      "loss": 2.4516,
      "step": 14297
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1066367626190186,
      "learning_rate": 1.842917017940113e-05,
      "loss": 2.3249,
      "step": 14298
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0895251035690308,
      "learning_rate": 1.8428948639933246e-05,
      "loss": 2.4671,
      "step": 14299
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0749406814575195,
      "learning_rate": 1.8428727086176003e-05,
      "loss": 2.4902,
      "step": 14300
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0152685642242432,
      "learning_rate": 1.842850551812978e-05,
      "loss": 2.3625,
      "step": 14301
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.028812050819397,
      "learning_rate": 1.8428283935794947e-05,
      "loss": 2.4794,
      "step": 14302
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.078819751739502,
      "learning_rate": 1.8428062339171887e-05,
      "loss": 2.6713,
      "step": 14303
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.101564884185791,
      "learning_rate": 1.842784072826097e-05,
      "loss": 2.6317,
      "step": 14304
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9094209671020508,
      "learning_rate": 1.8427619103062576e-05,
      "loss": 2.3678,
      "step": 14305
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.939450204372406,
      "learning_rate": 1.842739746357708e-05,
      "loss": 2.3753,
      "step": 14306
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0909343957901,
      "learning_rate": 1.8427175809804853e-05,
      "loss": 2.4532,
      "step": 14307
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1648476123809814,
      "learning_rate": 1.8426954141746277e-05,
      "loss": 2.4294,
      "step": 14308
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9611251950263977,
      "learning_rate": 1.8426732459401724e-05,
      "loss": 2.3703,
      "step": 14309
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1731679439544678,
      "learning_rate": 1.8426510762771568e-05,
      "loss": 2.5167,
      "step": 14310
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0739790201187134,
      "learning_rate": 1.842628905185619e-05,
      "loss": 2.5582,
      "step": 14311
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9295178651809692,
      "learning_rate": 1.8426067326655965e-05,
      "loss": 2.5722,
      "step": 14312
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0171358585357666,
      "learning_rate": 1.8425845587171264e-05,
      "loss": 2.4053,
      "step": 14313
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1889433860778809,
      "learning_rate": 1.842562383340247e-05,
      "loss": 2.6836,
      "step": 14314
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0410497188568115,
      "learning_rate": 1.8425402065349954e-05,
      "loss": 2.3962,
      "step": 14315
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1687980890274048,
      "learning_rate": 1.8425180283014094e-05,
      "loss": 2.4307,
      "step": 14316
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.078590750694275,
      "learning_rate": 1.8424958486395262e-05,
      "loss": 2.5382,
      "step": 14317
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.072052001953125,
      "learning_rate": 1.842473667549384e-05,
      "loss": 2.4265,
      "step": 14318
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0670092105865479,
      "learning_rate": 1.84245148503102e-05,
      "loss": 2.3236,
      "step": 14319
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.3014166355133057,
      "learning_rate": 1.8424293010844722e-05,
      "loss": 2.3987,
      "step": 14320
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9344671964645386,
      "learning_rate": 1.8424071157097777e-05,
      "loss": 2.3456,
      "step": 14321
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.102289080619812,
      "learning_rate": 1.842384928906974e-05,
      "loss": 2.5958,
      "step": 14322
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0118821859359741,
      "learning_rate": 1.8423627406760994e-05,
      "loss": 2.6654,
      "step": 14323
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0976296663284302,
      "learning_rate": 1.8423405510171912e-05,
      "loss": 2.572,
      "step": 14324
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9620150923728943,
      "learning_rate": 1.842318359930287e-05,
      "loss": 2.5405,
      "step": 14325
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.3383134603500366,
      "learning_rate": 1.8422961674154244e-05,
      "loss": 2.422,
      "step": 14326
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0125999450683594,
      "learning_rate": 1.842273973472641e-05,
      "loss": 2.534,
      "step": 14327
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0015085935592651,
      "learning_rate": 1.8422517781019748e-05,
      "loss": 2.7444,
      "step": 14328
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9586878418922424,
      "learning_rate": 1.8422295813034625e-05,
      "loss": 2.4125,
      "step": 14329
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1187189817428589,
      "learning_rate": 1.8422073830771426e-05,
      "loss": 2.3659,
      "step": 14330
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.3868210315704346,
      "learning_rate": 1.8421851834230522e-05,
      "loss": 2.3671,
      "step": 14331
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0335534811019897,
      "learning_rate": 1.8421629823412294e-05,
      "loss": 2.5984,
      "step": 14332
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.075850248336792,
      "learning_rate": 1.8421407798317116e-05,
      "loss": 2.5016,
      "step": 14333
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0339456796646118,
      "learning_rate": 1.8421185758945364e-05,
      "loss": 2.5266,
      "step": 14334
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0344489812850952,
      "learning_rate": 1.8420963705297417e-05,
      "loss": 2.6077,
      "step": 14335
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0641871690750122,
      "learning_rate": 1.8420741637373648e-05,
      "loss": 2.5135,
      "step": 14336
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1420725584030151,
      "learning_rate": 1.8420519555174433e-05,
      "loss": 2.5215,
      "step": 14337
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0243897438049316,
      "learning_rate": 1.8420297458700154e-05,
      "loss": 2.4683,
      "step": 14338
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.073936939239502,
      "learning_rate": 1.8420075347951182e-05,
      "loss": 2.5409,
      "step": 14339
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1431101560592651,
      "learning_rate": 1.8419853222927895e-05,
      "loss": 2.5725,
      "step": 14340
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1799185276031494,
      "learning_rate": 1.841963108363067e-05,
      "loss": 2.5589,
      "step": 14341
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1194970607757568,
      "learning_rate": 1.8419408930059886e-05,
      "loss": 2.7847,
      "step": 14342
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.057461142539978,
      "learning_rate": 1.8419186762215916e-05,
      "loss": 2.4753,
      "step": 14343
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.05705988407135,
      "learning_rate": 1.8418964580099136e-05,
      "loss": 2.257,
      "step": 14344
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1673436164855957,
      "learning_rate": 1.8418742383709925e-05,
      "loss": 2.6343,
      "step": 14345
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0844238996505737,
      "learning_rate": 1.841852017304866e-05,
      "loss": 2.3347,
      "step": 14346
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0528498888015747,
      "learning_rate": 1.8418297948115716e-05,
      "loss": 2.514,
      "step": 14347
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9968503713607788,
      "learning_rate": 1.8418075708911473e-05,
      "loss": 2.527,
      "step": 14348
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0906234979629517,
      "learning_rate": 1.84178534554363e-05,
      "loss": 2.5548,
      "step": 14349
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.042209506034851,
      "learning_rate": 1.8417631187690584e-05,
      "loss": 2.4779,
      "step": 14350
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0047932863235474,
      "learning_rate": 1.8417408905674694e-05,
      "loss": 2.5369,
      "step": 14351
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9657132029533386,
      "learning_rate": 1.8417186609389013e-05,
      "loss": 2.2722,
      "step": 14352
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0122276544570923,
      "learning_rate": 1.8416964298833913e-05,
      "loss": 2.5613,
      "step": 14353
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0135762691497803,
      "learning_rate": 1.8416741974009774e-05,
      "loss": 2.2858,
      "step": 14354
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9015774726867676,
      "learning_rate": 1.841651963491697e-05,
      "loss": 2.5225,
      "step": 14355
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0013126134872437,
      "learning_rate": 1.8416297281555876e-05,
      "loss": 2.4727,
      "step": 14356
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.144752860069275,
      "learning_rate": 1.8416074913926877e-05,
      "loss": 2.6585,
      "step": 14357
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9695423245429993,
      "learning_rate": 1.8415852532030342e-05,
      "loss": 2.5716,
      "step": 14358
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.2316689491271973,
      "learning_rate": 1.8415630135866652e-05,
      "loss": 2.3662,
      "step": 14359
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.992457389831543,
      "learning_rate": 1.8415407725436186e-05,
      "loss": 2.4681,
      "step": 14360
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.4700379371643066,
      "learning_rate": 1.8415185300739317e-05,
      "loss": 2.3624,
      "step": 14361
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.2197599411010742,
      "learning_rate": 1.841496286177642e-05,
      "loss": 2.6145,
      "step": 14362
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0679917335510254,
      "learning_rate": 1.841474040854788e-05,
      "loss": 2.5003,
      "step": 14363
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0836076736450195,
      "learning_rate": 1.841451794105407e-05,
      "loss": 2.6407,
      "step": 14364
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.040937900543213,
      "learning_rate": 1.8414295459295363e-05,
      "loss": 2.4513,
      "step": 14365
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.908172070980072,
      "learning_rate": 1.8414072963272143e-05,
      "loss": 2.3143,
      "step": 14366
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.2149982452392578,
      "learning_rate": 1.8413850452984784e-05,
      "loss": 2.5243,
      "step": 14367
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0625287294387817,
      "learning_rate": 1.841362792843366e-05,
      "loss": 2.6235,
      "step": 14368
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1418529748916626,
      "learning_rate": 1.8413405389619153e-05,
      "loss": 2.4959,
      "step": 14369
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1592366695404053,
      "learning_rate": 1.841318283654164e-05,
      "loss": 2.5304,
      "step": 14370
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0595813989639282,
      "learning_rate": 1.8412960269201496e-05,
      "loss": 2.6767,
      "step": 14371
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9778610467910767,
      "learning_rate": 1.8412737687599102e-05,
      "loss": 2.4406,
      "step": 14372
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9606781601905823,
      "learning_rate": 1.8412515091734828e-05,
      "loss": 2.4291,
      "step": 14373
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0237945318222046,
      "learning_rate": 1.841229248160906e-05,
      "loss": 2.4442,
      "step": 14374
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.043229341506958,
      "learning_rate": 1.841206985722217e-05,
      "loss": 2.5747,
      "step": 14375
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0202441215515137,
      "learning_rate": 1.8411847218574535e-05,
      "loss": 2.2053,
      "step": 14376
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1924773454666138,
      "learning_rate": 1.8411624565666536e-05,
      "loss": 2.4929,
      "step": 14377
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9632012844085693,
      "learning_rate": 1.841140189849855e-05,
      "loss": 2.6045,
      "step": 14378
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1840672492980957,
      "learning_rate": 1.8411179217070953e-05,
      "loss": 2.3303,
      "step": 14379
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9838675856590271,
      "learning_rate": 1.8410956521384124e-05,
      "loss": 2.3207,
      "step": 14380
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9285082817077637,
      "learning_rate": 1.8410733811438438e-05,
      "loss": 2.5431,
      "step": 14381
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0518767833709717,
      "learning_rate": 1.8410511087234273e-05,
      "loss": 2.3477,
      "step": 14382
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1160846948623657,
      "learning_rate": 1.841028834877201e-05,
      "loss": 2.4489,
      "step": 14383
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0867184400558472,
      "learning_rate": 1.841006559605202e-05,
      "loss": 2.6629,
      "step": 14384
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.088428258895874,
      "learning_rate": 1.8409842829074686e-05,
      "loss": 2.3782,
      "step": 14385
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1239356994628906,
      "learning_rate": 1.8409620047840386e-05,
      "loss": 2.4847,
      "step": 14386
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9893928170204163,
      "learning_rate": 1.840939725234949e-05,
      "loss": 2.564,
      "step": 14387
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.092481017112732,
      "learning_rate": 1.840917444260239e-05,
      "loss": 2.3868,
      "step": 14388
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0970680713653564,
      "learning_rate": 1.8408951618599453e-05,
      "loss": 2.5441,
      "step": 14389
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0972533226013184,
      "learning_rate": 1.8408728780341058e-05,
      "loss": 2.5475,
      "step": 14390
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1349462270736694,
      "learning_rate": 1.8408505927827583e-05,
      "loss": 2.5745,
      "step": 14391
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0030786991119385,
      "learning_rate": 1.8408283061059408e-05,
      "loss": 2.5585,
      "step": 14392
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0980615615844727,
      "learning_rate": 1.8408060180036912e-05,
      "loss": 2.7229,
      "step": 14393
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0469727516174316,
      "learning_rate": 1.8407837284760467e-05,
      "loss": 2.4329,
      "step": 14394
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9766099452972412,
      "learning_rate": 1.8407614375230457e-05,
      "loss": 2.5325,
      "step": 14395
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0022748708724976,
      "learning_rate": 1.8407391451447254e-05,
      "loss": 2.5771,
      "step": 14396
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0965547561645508,
      "learning_rate": 1.840716851341124e-05,
      "loss": 2.467,
      "step": 14397
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0558083057403564,
      "learning_rate": 1.840694556112279e-05,
      "loss": 2.192,
      "step": 14398
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.039056420326233,
      "learning_rate": 1.840672259458229e-05,
      "loss": 2.3661,
      "step": 14399
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.108176589012146,
      "learning_rate": 1.8406499613790107e-05,
      "loss": 2.6611,
      "step": 14400
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0122677087783813,
      "learning_rate": 1.8406276618746626e-05,
      "loss": 2.5898,
      "step": 14401
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9987766146659851,
      "learning_rate": 1.840605360945222e-05,
      "loss": 2.6508,
      "step": 14402
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0654069185256958,
      "learning_rate": 1.8405830585907273e-05,
      "loss": 2.6032,
      "step": 14403
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9780251979827881,
      "learning_rate": 1.8405607548112162e-05,
      "loss": 2.5256,
      "step": 14404
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0438232421875,
      "learning_rate": 1.840538449606726e-05,
      "loss": 2.6034,
      "step": 14405
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0019201040267944,
      "learning_rate": 1.8405161429772948e-05,
      "loss": 2.4451,
      "step": 14406
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.021721363067627,
      "learning_rate": 1.8404938349229605e-05,
      "loss": 2.3129,
      "step": 14407
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0507258176803589,
      "learning_rate": 1.840471525443761e-05,
      "loss": 2.2913,
      "step": 14408
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0284377336502075,
      "learning_rate": 1.840449214539734e-05,
      "loss": 2.6292,
      "step": 14409
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9652925729751587,
      "learning_rate": 1.8404269022109174e-05,
      "loss": 2.42,
      "step": 14410
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9795227646827698,
      "learning_rate": 1.8404045884573487e-05,
      "loss": 2.3494,
      "step": 14411
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.3454087972640991,
      "learning_rate": 1.840382273279066e-05,
      "loss": 2.5227,
      "step": 14412
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0303505659103394,
      "learning_rate": 1.8403599566761073e-05,
      "loss": 2.609,
      "step": 14413
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.093674898147583,
      "learning_rate": 1.84033763864851e-05,
      "loss": 2.3908,
      "step": 14414
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9544721245765686,
      "learning_rate": 1.8403153191963126e-05,
      "loss": 2.3852,
      "step": 14415
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9641156196594238,
      "learning_rate": 1.840292998319552e-05,
      "loss": 2.4675,
      "step": 14416
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9465973377227783,
      "learning_rate": 1.8402706760182667e-05,
      "loss": 2.2783,
      "step": 14417
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0264967679977417,
      "learning_rate": 1.8402483522924947e-05,
      "loss": 2.5554,
      "step": 14418
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0649666786193848,
      "learning_rate": 1.8402260271422728e-05,
      "loss": 2.5249,
      "step": 14419
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0030092000961304,
      "learning_rate": 1.8402037005676404e-05,
      "loss": 2.59,
      "step": 14420
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.051159143447876,
      "learning_rate": 1.8401813725686343e-05,
      "loss": 2.6564,
      "step": 14421
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9625203013420105,
      "learning_rate": 1.840159043145292e-05,
      "loss": 2.4509,
      "step": 14422
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0624200105667114,
      "learning_rate": 1.840136712297653e-05,
      "loss": 2.5139,
      "step": 14423
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.984259843826294,
      "learning_rate": 1.840114380025753e-05,
      "loss": 2.2637,
      "step": 14424
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1374969482421875,
      "learning_rate": 1.8400920463296316e-05,
      "loss": 2.5448,
      "step": 14425
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9581896662712097,
      "learning_rate": 1.840069711209326e-05,
      "loss": 2.3754,
      "step": 14426
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0484755039215088,
      "learning_rate": 1.840047374664874e-05,
      "loss": 2.5692,
      "step": 14427
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.052796483039856,
      "learning_rate": 1.8400250366963134e-05,
      "loss": 2.3436,
      "step": 14428
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0998680591583252,
      "learning_rate": 1.840002697303682e-05,
      "loss": 2.4027,
      "step": 14429
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0323870182037354,
      "learning_rate": 1.8399803564870183e-05,
      "loss": 2.49,
      "step": 14430
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1612489223480225,
      "learning_rate": 1.83995801424636e-05,
      "loss": 2.375,
      "step": 14431
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9527267217636108,
      "learning_rate": 1.8399356705817442e-05,
      "loss": 2.3337,
      "step": 14432
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.044878363609314,
      "learning_rate": 1.8399133254932094e-05,
      "loss": 2.4043,
      "step": 14433
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0723819732666016,
      "learning_rate": 1.8398909789807936e-05,
      "loss": 2.4388,
      "step": 14434
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1057041883468628,
      "learning_rate": 1.8398686310445346e-05,
      "loss": 2.5188,
      "step": 14435
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9840799570083618,
      "learning_rate": 1.83984628168447e-05,
      "loss": 2.283,
      "step": 14436
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9566245079040527,
      "learning_rate": 1.8398239309006377e-05,
      "loss": 2.6307,
      "step": 14437
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9703491926193237,
      "learning_rate": 1.839801578693076e-05,
      "loss": 2.5313,
      "step": 14438
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.104609489440918,
      "learning_rate": 1.839779225061822e-05,
      "loss": 2.5223,
      "step": 14439
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0003893375396729,
      "learning_rate": 1.8397568700069147e-05,
      "loss": 2.5949,
      "step": 14440
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9461751580238342,
      "learning_rate": 1.8397345135283912e-05,
      "loss": 2.5427,
      "step": 14441
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.2094296216964722,
      "learning_rate": 1.83971215562629e-05,
      "loss": 2.4918,
      "step": 14442
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1487499475479126,
      "learning_rate": 1.839689796300648e-05,
      "loss": 2.6867,
      "step": 14443
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0066181421279907,
      "learning_rate": 1.839667435551504e-05,
      "loss": 2.4512,
      "step": 14444
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.123823881149292,
      "learning_rate": 1.8396450733788958e-05,
      "loss": 2.6414,
      "step": 14445
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1189441680908203,
      "learning_rate": 1.8396227097828608e-05,
      "loss": 2.5139,
      "step": 14446
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1123095750808716,
      "learning_rate": 1.8396003447634378e-05,
      "loss": 2.5233,
      "step": 14447
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.038110613822937,
      "learning_rate": 1.839577978320664e-05,
      "loss": 2.433,
      "step": 14448
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1442846059799194,
      "learning_rate": 1.839555610454577e-05,
      "loss": 2.5042,
      "step": 14449
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0803699493408203,
      "learning_rate": 1.839533241165216e-05,
      "loss": 2.6004,
      "step": 14450
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9520859122276306,
      "learning_rate": 1.8395108704526177e-05,
      "loss": 2.6679,
      "step": 14451
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0212926864624023,
      "learning_rate": 1.8394884983168206e-05,
      "loss": 2.3152,
      "step": 14452
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0177277326583862,
      "learning_rate": 1.8394661247578622e-05,
      "loss": 2.5209,
      "step": 14453
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1347328424453735,
      "learning_rate": 1.839443749775781e-05,
      "loss": 2.2485,
      "step": 14454
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1452234983444214,
      "learning_rate": 1.839421373370615e-05,
      "loss": 2.626,
      "step": 14455
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1151090860366821,
      "learning_rate": 1.8393989955424013e-05,
      "loss": 2.3318,
      "step": 14456
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1928094625473022,
      "learning_rate": 1.8393766162911785e-05,
      "loss": 2.6832,
      "step": 14457
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9993653893470764,
      "learning_rate": 1.839354235616984e-05,
      "loss": 2.5396,
      "step": 14458
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0602344274520874,
      "learning_rate": 1.8393318535198562e-05,
      "loss": 2.4033,
      "step": 14459
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1212905645370483,
      "learning_rate": 1.8393094699998333e-05,
      "loss": 2.4554,
      "step": 14460
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1972495317459106,
      "learning_rate": 1.839287085056953e-05,
      "loss": 2.6695,
      "step": 14461
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.220873236656189,
      "learning_rate": 1.8392646986912527e-05,
      "loss": 2.5018,
      "step": 14462
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9771291017532349,
      "learning_rate": 1.839242310902771e-05,
      "loss": 2.4889,
      "step": 14463
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1086149215698242,
      "learning_rate": 1.8392199216915455e-05,
      "loss": 2.6273,
      "step": 14464
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.2385202646255493,
      "learning_rate": 1.8391975310576144e-05,
      "loss": 2.3276,
      "step": 14465
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1933388710021973,
      "learning_rate": 1.8391751390010156e-05,
      "loss": 2.6674,
      "step": 14466
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9421413540840149,
      "learning_rate": 1.839152745521787e-05,
      "loss": 2.6022,
      "step": 14467
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.081607699394226,
      "learning_rate": 1.8391303506199665e-05,
      "loss": 2.4234,
      "step": 14468
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1710631847381592,
      "learning_rate": 1.8391079542955922e-05,
      "loss": 2.5484,
      "step": 14469
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9664244651794434,
      "learning_rate": 1.8390855565487023e-05,
      "loss": 2.5481,
      "step": 14470
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9950569868087769,
      "learning_rate": 1.839063157379334e-05,
      "loss": 2.5041,
      "step": 14471
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9508281350135803,
      "learning_rate": 1.839040756787526e-05,
      "loss": 2.5717,
      "step": 14472
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9501590132713318,
      "learning_rate": 1.8390183547733163e-05,
      "loss": 2.4975,
      "step": 14473
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0202853679656982,
      "learning_rate": 1.838995951336742e-05,
      "loss": 2.8095,
      "step": 14474
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.948245108127594,
      "learning_rate": 1.8389735464778423e-05,
      "loss": 2.4251,
      "step": 14475
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1411880254745483,
      "learning_rate": 1.8389511401966543e-05,
      "loss": 2.5897,
      "step": 14476
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1797367334365845,
      "learning_rate": 1.8389287324932164e-05,
      "loss": 2.3931,
      "step": 14477
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0232056379318237,
      "learning_rate": 1.8389063233675664e-05,
      "loss": 2.5447,
      "step": 14478
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9582566618919373,
      "learning_rate": 1.8388839128197423e-05,
      "loss": 2.4741,
      "step": 14479
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1035590171813965,
      "learning_rate": 1.8388615008497817e-05,
      "loss": 2.6154,
      "step": 14480
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1530510187149048,
      "learning_rate": 1.8388390874577237e-05,
      "loss": 2.6375,
      "step": 14481
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0134565830230713,
      "learning_rate": 1.8388166726436054e-05,
      "loss": 2.6488,
      "step": 14482
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0487332344055176,
      "learning_rate": 1.838794256407465e-05,
      "loss": 2.5571,
      "step": 14483
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0336312055587769,
      "learning_rate": 1.8387718387493405e-05,
      "loss": 2.6104,
      "step": 14484
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.039546251296997,
      "learning_rate": 1.83874941966927e-05,
      "loss": 2.5609,
      "step": 14485
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1346890926361084,
      "learning_rate": 1.838726999167291e-05,
      "loss": 2.6521,
      "step": 14486
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.980161726474762,
      "learning_rate": 1.8387045772434425e-05,
      "loss": 2.4478,
      "step": 14487
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.094592571258545,
      "learning_rate": 1.8386821538977617e-05,
      "loss": 2.5112,
      "step": 14488
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0564208030700684,
      "learning_rate": 1.838659729130287e-05,
      "loss": 2.518,
      "step": 14489
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0890799760818481,
      "learning_rate": 1.8386373029410563e-05,
      "loss": 2.4875,
      "step": 14490
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9286859035491943,
      "learning_rate": 1.8386148753301075e-05,
      "loss": 2.5289,
      "step": 14491
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0527281761169434,
      "learning_rate": 1.8385924462974784e-05,
      "loss": 2.598,
      "step": 14492
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.087567687034607,
      "learning_rate": 1.838570015843208e-05,
      "loss": 2.5845,
      "step": 14493
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1212263107299805,
      "learning_rate": 1.8385475839673332e-05,
      "loss": 2.4142,
      "step": 14494
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9783902764320374,
      "learning_rate": 1.8385251506698925e-05,
      "loss": 2.7003,
      "step": 14495
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9839465618133545,
      "learning_rate": 1.838502715950924e-05,
      "loss": 2.4581,
      "step": 14496
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9455553889274597,
      "learning_rate": 1.838480279810466e-05,
      "loss": 2.3669,
      "step": 14497
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0809029340744019,
      "learning_rate": 1.838457842248556e-05,
      "loss": 2.7395,
      "step": 14498
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.3175160884857178,
      "learning_rate": 1.838435403265232e-05,
      "loss": 2.4239,
      "step": 14499
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.109739899635315,
      "learning_rate": 1.8384129628605324e-05,
      "loss": 2.4856,
      "step": 14500
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9966753125190735,
      "learning_rate": 1.838390521034495e-05,
      "loss": 2.3162,
      "step": 14501
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0166679620742798,
      "learning_rate": 1.8383680777871582e-05,
      "loss": 2.4349,
      "step": 14502
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1029943227767944,
      "learning_rate": 1.8383456331185597e-05,
      "loss": 2.2496,
      "step": 14503
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9155033230781555,
      "learning_rate": 1.8383231870287378e-05,
      "loss": 2.6015,
      "step": 14504
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9933594465255737,
      "learning_rate": 1.8383007395177305e-05,
      "loss": 2.5188,
      "step": 14505
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5521758794784546,
      "learning_rate": 1.8382782905855757e-05,
      "loss": 2.6272,
      "step": 14506
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0655887126922607,
      "learning_rate": 1.838255840232311e-05,
      "loss": 2.3825,
      "step": 14507
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1464155912399292,
      "learning_rate": 1.8382333884579755e-05,
      "loss": 2.4475,
      "step": 14508
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1651941537857056,
      "learning_rate": 1.8382109352626066e-05,
      "loss": 2.6237,
      "step": 14509
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9918188452720642,
      "learning_rate": 1.8381884806462428e-05,
      "loss": 2.6647,
      "step": 14510
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0051350593566895,
      "learning_rate": 1.8381660246089215e-05,
      "loss": 2.2779,
      "step": 14511
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9798467755317688,
      "learning_rate": 1.838143567150681e-05,
      "loss": 2.5628,
      "step": 14512
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9847376346588135,
      "learning_rate": 1.83812110827156e-05,
      "loss": 2.4535,
      "step": 14513
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9268010258674622,
      "learning_rate": 1.8380986479715955e-05,
      "loss": 2.5794,
      "step": 14514
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1184592247009277,
      "learning_rate": 1.8380761862508268e-05,
      "loss": 2.5274,
      "step": 14515
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.98749178647995,
      "learning_rate": 1.838053723109291e-05,
      "loss": 2.4599,
      "step": 14516
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0482243299484253,
      "learning_rate": 1.8380312585470264e-05,
      "loss": 2.2672,
      "step": 14517
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1810473203659058,
      "learning_rate": 1.8380087925640717e-05,
      "loss": 2.5217,
      "step": 14518
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0243613719940186,
      "learning_rate": 1.837986325160464e-05,
      "loss": 2.5582,
      "step": 14519
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1179046630859375,
      "learning_rate": 1.8379638563362423e-05,
      "loss": 2.3462,
      "step": 14520
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9916074872016907,
      "learning_rate": 1.8379413860914443e-05,
      "loss": 2.5199,
      "step": 14521
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0241880416870117,
      "learning_rate": 1.8379189144261076e-05,
      "loss": 2.6253,
      "step": 14522
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1850801706314087,
      "learning_rate": 1.837896441340271e-05,
      "loss": 2.4427,
      "step": 14523
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0232622623443604,
      "learning_rate": 1.8378739668339723e-05,
      "loss": 2.3372,
      "step": 14524
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0301612615585327,
      "learning_rate": 1.83785149090725e-05,
      "loss": 2.4994,
      "step": 14525
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0099393129348755,
      "learning_rate": 1.8378290135601412e-05,
      "loss": 2.5655,
      "step": 14526
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1197749376296997,
      "learning_rate": 1.837806534792685e-05,
      "loss": 2.1538,
      "step": 14527
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0072669982910156,
      "learning_rate": 1.8377840546049196e-05,
      "loss": 2.5475,
      "step": 14528
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0604056119918823,
      "learning_rate": 1.837761572996882e-05,
      "loss": 2.5455,
      "step": 14529
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0408819913864136,
      "learning_rate": 1.8377390899686115e-05,
      "loss": 2.5285,
      "step": 14530
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0698601007461548,
      "learning_rate": 1.8377166055201457e-05,
      "loss": 2.589,
      "step": 14531
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0919270515441895,
      "learning_rate": 1.8376941196515224e-05,
      "loss": 2.2261,
      "step": 14532
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.158293604850769,
      "learning_rate": 1.8376716323627803e-05,
      "loss": 2.5403,
      "step": 14533
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0651775598526,
      "learning_rate": 1.8376491436539573e-05,
      "loss": 2.6437,
      "step": 14534
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0399504899978638,
      "learning_rate": 1.8376266535250913e-05,
      "loss": 2.4729,
      "step": 14535
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0779500007629395,
      "learning_rate": 1.8376041619762207e-05,
      "loss": 2.5643,
      "step": 14536
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1024595499038696,
      "learning_rate": 1.8375816690073834e-05,
      "loss": 2.5663,
      "step": 14537
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0604814291000366,
      "learning_rate": 1.837559174618618e-05,
      "loss": 2.6257,
      "step": 14538
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1216872930526733,
      "learning_rate": 1.837536678809962e-05,
      "loss": 2.4479,
      "step": 14539
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0074971914291382,
      "learning_rate": 1.8375141815814544e-05,
      "loss": 2.6636,
      "step": 14540
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0175673961639404,
      "learning_rate": 1.8374916829331324e-05,
      "loss": 2.5596,
      "step": 14541
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.2210485935211182,
      "learning_rate": 1.8374691828650344e-05,
      "loss": 2.4806,
      "step": 14542
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9851155877113342,
      "learning_rate": 1.837446681377199e-05,
      "loss": 2.4846,
      "step": 14543
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1607747077941895,
      "learning_rate": 1.837424178469664e-05,
      "loss": 2.492,
      "step": 14544
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0517321825027466,
      "learning_rate": 1.8374016741424674e-05,
      "loss": 2.6393,
      "step": 14545
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0576200485229492,
      "learning_rate": 1.8373791683956478e-05,
      "loss": 2.4877,
      "step": 14546
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0220972299575806,
      "learning_rate": 1.8373566612292426e-05,
      "loss": 2.4834,
      "step": 14547
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0693726539611816,
      "learning_rate": 1.837334152643291e-05,
      "loss": 2.375,
      "step": 14548
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0913751125335693,
      "learning_rate": 1.8373116426378302e-05,
      "loss": 2.6125,
      "step": 14549
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.994056224822998,
      "learning_rate": 1.8372891312128985e-05,
      "loss": 2.4623,
      "step": 14550
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9719409346580505,
      "learning_rate": 1.8372666183685347e-05,
      "loss": 2.2981,
      "step": 14551
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0359346866607666,
      "learning_rate": 1.8372441041047768e-05,
      "loss": 2.5469,
      "step": 14552
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0682570934295654,
      "learning_rate": 1.8372215884216627e-05,
      "loss": 2.4185,
      "step": 14553
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1205003261566162,
      "learning_rate": 1.83719907131923e-05,
      "loss": 2.4879,
      "step": 14554
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.002671241760254,
      "learning_rate": 1.837176552797518e-05,
      "loss": 2.6461,
      "step": 14555
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0670194625854492,
      "learning_rate": 1.8371540328565643e-05,
      "loss": 2.4679,
      "step": 14556
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9945303201675415,
      "learning_rate": 1.8371315114964074e-05,
      "loss": 2.5716,
      "step": 14557
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9689657688140869,
      "learning_rate": 1.837108988717085e-05,
      "loss": 2.5084,
      "step": 14558
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9113025665283203,
      "learning_rate": 1.8370864645186353e-05,
      "loss": 2.7177,
      "step": 14559
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0370393991470337,
      "learning_rate": 1.837063938901097e-05,
      "loss": 2.6536,
      "step": 14560
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0339672565460205,
      "learning_rate": 1.837041411864508e-05,
      "loss": 2.4103,
      "step": 14561
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0535163879394531,
      "learning_rate": 1.8370188834089064e-05,
      "loss": 2.5455,
      "step": 14562
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0190210342407227,
      "learning_rate": 1.83699635353433e-05,
      "loss": 2.7128,
      "step": 14563
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0046743154525757,
      "learning_rate": 1.836973822240818e-05,
      "loss": 2.3213,
      "step": 14564
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1715335845947266,
      "learning_rate": 1.8369512895284078e-05,
      "loss": 2.535,
      "step": 14565
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0770468711853027,
      "learning_rate": 1.836928755397138e-05,
      "loss": 2.4925,
      "step": 14566
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.920617938041687,
      "learning_rate": 1.8369062198470464e-05,
      "loss": 2.4906,
      "step": 14567
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9492910504341125,
      "learning_rate": 1.8368836828781716e-05,
      "loss": 2.6571,
      "step": 14568
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9488155245780945,
      "learning_rate": 1.8368611444905518e-05,
      "loss": 2.5233,
      "step": 14569
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0350292921066284,
      "learning_rate": 1.8368386046842254e-05,
      "loss": 2.5865,
      "step": 14570
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0188066959381104,
      "learning_rate": 1.83681606345923e-05,
      "loss": 2.2436,
      "step": 14571
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.2621110677719116,
      "learning_rate": 1.836793520815604e-05,
      "loss": 2.2583,
      "step": 14572
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.2019857168197632,
      "learning_rate": 1.8367709767533855e-05,
      "loss": 2.429,
      "step": 14573
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.366365909576416,
      "learning_rate": 1.836748431272613e-05,
      "loss": 2.773,
      "step": 14574
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0341821908950806,
      "learning_rate": 1.8367258843733247e-05,
      "loss": 2.5386,
      "step": 14575
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1088719367980957,
      "learning_rate": 1.836703336055559e-05,
      "loss": 2.4566,
      "step": 14576
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1055504083633423,
      "learning_rate": 1.8366807863193537e-05,
      "loss": 2.3835,
      "step": 14577
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0633115768432617,
      "learning_rate": 1.836658235164747e-05,
      "loss": 2.7408,
      "step": 14578
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1119660139083862,
      "learning_rate": 1.836635682591778e-05,
      "loss": 2.514,
      "step": 14579
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0723917484283447,
      "learning_rate": 1.8366131286004837e-05,
      "loss": 2.6253,
      "step": 14580
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0112049579620361,
      "learning_rate": 1.836590573190903e-05,
      "loss": 2.4295,
      "step": 14581
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9722815155982971,
      "learning_rate": 1.8365680163630744e-05,
      "loss": 2.4461,
      "step": 14582
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.149426817893982,
      "learning_rate": 1.8365454581170355e-05,
      "loss": 2.3439,
      "step": 14583
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0843178033828735,
      "learning_rate": 1.836522898452825e-05,
      "loss": 2.5525,
      "step": 14584
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0671279430389404,
      "learning_rate": 1.836500337370481e-05,
      "loss": 2.4331,
      "step": 14585
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1078102588653564,
      "learning_rate": 1.8364777748700416e-05,
      "loss": 2.4413,
      "step": 14586
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9558005332946777,
      "learning_rate": 1.8364552109515454e-05,
      "loss": 2.6723,
      "step": 14587
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9689010977745056,
      "learning_rate": 1.8364326456150303e-05,
      "loss": 2.5237,
      "step": 14588
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0195441246032715,
      "learning_rate": 1.8364100788605346e-05,
      "loss": 2.4688,
      "step": 14589
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.145313024520874,
      "learning_rate": 1.8363875106880966e-05,
      "loss": 2.6481,
      "step": 14590
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0145056247711182,
      "learning_rate": 1.836364941097755e-05,
      "loss": 2.6191,
      "step": 14591
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.979704737663269,
      "learning_rate": 1.836342370089547e-05,
      "loss": 2.5196,
      "step": 14592
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0661590099334717,
      "learning_rate": 1.8363197976635123e-05,
      "loss": 2.6869,
      "step": 14593
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9780865907669067,
      "learning_rate": 1.836297223819688e-05,
      "loss": 2.6495,
      "step": 14594
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1079347133636475,
      "learning_rate": 1.8362746485581128e-05,
      "loss": 2.2988,
      "step": 14595
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1700679063796997,
      "learning_rate": 1.836252071878825e-05,
      "loss": 2.4935,
      "step": 14596
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0655457973480225,
      "learning_rate": 1.836229493781863e-05,
      "loss": 2.5851,
      "step": 14597
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0405302047729492,
      "learning_rate": 1.8362069142672644e-05,
      "loss": 2.3885,
      "step": 14598
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1042429208755493,
      "learning_rate": 1.8361843333350686e-05,
      "loss": 2.5725,
      "step": 14599
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0643832683563232,
      "learning_rate": 1.8361617509853126e-05,
      "loss": 2.5482,
      "step": 14600
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0244321823120117,
      "learning_rate": 1.8361391672180358e-05,
      "loss": 2.2747,
      "step": 14601
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9875575304031372,
      "learning_rate": 1.836116582033276e-05,
      "loss": 2.2263,
      "step": 14602
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1020267009735107,
      "learning_rate": 1.836093995431071e-05,
      "loss": 2.3359,
      "step": 14603
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.056768774986267,
      "learning_rate": 1.83607140741146e-05,
      "loss": 2.4813,
      "step": 14604
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9100534319877625,
      "learning_rate": 1.836048817974481e-05,
      "loss": 2.6837,
      "step": 14605
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.540239691734314,
      "learning_rate": 1.8360262271201722e-05,
      "loss": 2.5992,
      "step": 14606
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.064691185951233,
      "learning_rate": 1.8360036348485715e-05,
      "loss": 2.6773,
      "step": 14607
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.061226487159729,
      "learning_rate": 1.835981041159718e-05,
      "loss": 2.7744,
      "step": 14608
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.196124792098999,
      "learning_rate": 1.835958446053649e-05,
      "loss": 2.3645,
      "step": 14609
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0345638990402222,
      "learning_rate": 1.835935849530404e-05,
      "loss": 2.5325,
      "step": 14610
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9427995681762695,
      "learning_rate": 1.8359132515900205e-05,
      "loss": 2.5695,
      "step": 14611
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0198960304260254,
      "learning_rate": 1.8358906522325372e-05,
      "loss": 2.603,
      "step": 14612
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0367748737335205,
      "learning_rate": 1.8358680514579918e-05,
      "loss": 2.6035,
      "step": 14613
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0581341981887817,
      "learning_rate": 1.8358454492664233e-05,
      "loss": 2.6514,
      "step": 14614
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0549970865249634,
      "learning_rate": 1.8358228456578697e-05,
      "loss": 2.4812,
      "step": 14615
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0166423320770264,
      "learning_rate": 1.8358002406323692e-05,
      "loss": 2.4345,
      "step": 14616
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1657460927963257,
      "learning_rate": 1.8357776341899604e-05,
      "loss": 2.3515,
      "step": 14617
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1938869953155518,
      "learning_rate": 1.8357550263306817e-05,
      "loss": 2.4401,
      "step": 14618
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1825339794158936,
      "learning_rate": 1.835732417054571e-05,
      "loss": 2.5913,
      "step": 14619
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.167305588722229,
      "learning_rate": 1.835709806361667e-05,
      "loss": 2.4623,
      "step": 14620
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9941480159759521,
      "learning_rate": 1.835687194252008e-05,
      "loss": 2.5696,
      "step": 14621
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9766229391098022,
      "learning_rate": 1.835664580725632e-05,
      "loss": 2.7217,
      "step": 14622
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.05789053440094,
      "learning_rate": 1.8356419657825778e-05,
      "loss": 2.5961,
      "step": 14623
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.2512973546981812,
      "learning_rate": 1.8356193494228837e-05,
      "loss": 2.4805,
      "step": 14624
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.517870306968689,
      "learning_rate": 1.8355967316465873e-05,
      "loss": 2.6549,
      "step": 14625
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1715223789215088,
      "learning_rate": 1.8355741124537276e-05,
      "loss": 2.2484,
      "step": 14626
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.3745249509811401,
      "learning_rate": 1.835551491844343e-05,
      "loss": 2.4261,
      "step": 14627
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9709398746490479,
      "learning_rate": 1.835528869818472e-05,
      "loss": 2.472,
      "step": 14628
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9880826473236084,
      "learning_rate": 1.8355062463761522e-05,
      "loss": 2.5057,
      "step": 14629
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0364539623260498,
      "learning_rate": 1.8354836215174227e-05,
      "loss": 2.7259,
      "step": 14630
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9766077399253845,
      "learning_rate": 1.8354609952423212e-05,
      "loss": 2.3667,
      "step": 14631
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9325932264328003,
      "learning_rate": 1.8354383675508865e-05,
      "loss": 2.3704,
      "step": 14632
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0770132541656494,
      "learning_rate": 1.835415738443157e-05,
      "loss": 2.5164,
      "step": 14633
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0486353635787964,
      "learning_rate": 1.8353931079191708e-05,
      "loss": 2.4106,
      "step": 14634
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.3083231449127197,
      "learning_rate": 1.8353704759789663e-05,
      "loss": 2.4532,
      "step": 14635
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0856781005859375,
      "learning_rate": 1.835347842622582e-05,
      "loss": 2.4879,
      "step": 14636
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9624953866004944,
      "learning_rate": 1.8353252078500565e-05,
      "loss": 2.4343,
      "step": 14637
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0491960048675537,
      "learning_rate": 1.835302571661428e-05,
      "loss": 2.6101,
      "step": 14638
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0324958562850952,
      "learning_rate": 1.8352799340567342e-05,
      "loss": 2.3316,
      "step": 14639
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0442103147506714,
      "learning_rate": 1.8352572950360143e-05,
      "loss": 2.5674,
      "step": 14640
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.2166346311569214,
      "learning_rate": 1.8352346545993067e-05,
      "loss": 2.4769,
      "step": 14641
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0385441780090332,
      "learning_rate": 1.8352120127466493e-05,
      "loss": 2.4189,
      "step": 14642
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9396117329597473,
      "learning_rate": 1.835189369478081e-05,
      "loss": 2.5567,
      "step": 14643
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0248463153839111,
      "learning_rate": 1.8351667247936392e-05,
      "loss": 2.5335,
      "step": 14644
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9289315938949585,
      "learning_rate": 1.8351440786933637e-05,
      "loss": 2.5045,
      "step": 14645
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.2391250133514404,
      "learning_rate": 1.835121431177292e-05,
      "loss": 2.5804,
      "step": 14646
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.3683245182037354,
      "learning_rate": 1.835098782245462e-05,
      "loss": 2.1277,
      "step": 14647
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0566121339797974,
      "learning_rate": 1.8350761318979137e-05,
      "loss": 2.6325,
      "step": 14648
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0343189239501953,
      "learning_rate": 1.835053480134684e-05,
      "loss": 2.6044,
      "step": 14649
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0146287679672241,
      "learning_rate": 1.8350308269558116e-05,
      "loss": 2.4227,
      "step": 14650
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0768282413482666,
      "learning_rate": 1.835008172361336e-05,
      "loss": 2.3584,
      "step": 14651
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0035836696624756,
      "learning_rate": 1.834985516351294e-05,
      "loss": 2.5592,
      "step": 14652
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.038144588470459,
      "learning_rate": 1.8349628589257248e-05,
      "loss": 2.3731,
      "step": 14653
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0363359451293945,
      "learning_rate": 1.8349402000846673e-05,
      "loss": 2.6537,
      "step": 14654
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9730335474014282,
      "learning_rate": 1.8349175398281592e-05,
      "loss": 2.2733,
      "step": 14655
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.039736270904541,
      "learning_rate": 1.8348948781562387e-05,
      "loss": 2.2288,
      "step": 14656
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.2065536975860596,
      "learning_rate": 1.8348722150689455e-05,
      "loss": 2.6381,
      "step": 14657
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1434193849563599,
      "learning_rate": 1.8348495505663164e-05,
      "loss": 2.4345,
      "step": 14658
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.02695631980896,
      "learning_rate": 1.8348268846483908e-05,
      "loss": 2.4838,
      "step": 14659
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0385866165161133,
      "learning_rate": 1.8348042173152067e-05,
      "loss": 2.4968,
      "step": 14660
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.234924077987671,
      "learning_rate": 1.834781548566803e-05,
      "loss": 2.3147,
      "step": 14661
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.125353217124939,
      "learning_rate": 1.834758878403218e-05,
      "loss": 2.5766,
      "step": 14662
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.010425329208374,
      "learning_rate": 1.8347362068244895e-05,
      "loss": 2.5986,
      "step": 14663
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.985421359539032,
      "learning_rate": 1.8347135338306567e-05,
      "loss": 2.3606,
      "step": 14664
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9908804297447205,
      "learning_rate": 1.8346908594217577e-05,
      "loss": 2.1779,
      "step": 14665
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1745450496673584,
      "learning_rate": 1.834668183597831e-05,
      "loss": 2.5556,
      "step": 14666
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0959954261779785,
      "learning_rate": 1.8346455063589147e-05,
      "loss": 2.4633,
      "step": 14667
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.044555902481079,
      "learning_rate": 1.834622827705048e-05,
      "loss": 2.4631,
      "step": 14668
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1486469507217407,
      "learning_rate": 1.834600147636269e-05,
      "loss": 2.4799,
      "step": 14669
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9943580627441406,
      "learning_rate": 1.8345774661526157e-05,
      "loss": 2.5494,
      "step": 14670
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1029764413833618,
      "learning_rate": 1.834554783254127e-05,
      "loss": 2.5702,
      "step": 14671
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9732601046562195,
      "learning_rate": 1.8345320989408418e-05,
      "loss": 2.5946,
      "step": 14672
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0326905250549316,
      "learning_rate": 1.8345094132127977e-05,
      "loss": 2.5636,
      "step": 14673
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.2778699398040771,
      "learning_rate": 1.8344867260700333e-05,
      "loss": 2.552,
      "step": 14674
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.968295693397522,
      "learning_rate": 1.8344640375125874e-05,
      "loss": 2.5013,
      "step": 14675
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.005433440208435,
      "learning_rate": 1.8344413475404982e-05,
      "loss": 2.2509,
      "step": 14676
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0505919456481934,
      "learning_rate": 1.8344186561538048e-05,
      "loss": 2.7697,
      "step": 14677
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0884321928024292,
      "learning_rate": 1.8343959633525445e-05,
      "loss": 2.3665,
      "step": 14678
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.029319167137146,
      "learning_rate": 1.8343732691367566e-05,
      "loss": 2.5241,
      "step": 14679
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.063157081604004,
      "learning_rate": 1.8343505735064795e-05,
      "loss": 2.5873,
      "step": 14680
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9099678993225098,
      "learning_rate": 1.8343278764617516e-05,
      "loss": 2.3881,
      "step": 14681
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0376454591751099,
      "learning_rate": 1.8343051780026112e-05,
      "loss": 2.6586,
      "step": 14682
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0009249448776245,
      "learning_rate": 1.8342824781290972e-05,
      "loss": 2.3672,
      "step": 14683
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0668607950210571,
      "learning_rate": 1.8342597768412475e-05,
      "loss": 2.42,
      "step": 14684
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1510162353515625,
      "learning_rate": 1.8342370741391012e-05,
      "loss": 2.5292,
      "step": 14685
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0088719129562378,
      "learning_rate": 1.834214370022696e-05,
      "loss": 2.2623,
      "step": 14686
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0082393884658813,
      "learning_rate": 1.8341916644920713e-05,
      "loss": 2.5593,
      "step": 14687
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1535791158676147,
      "learning_rate": 1.834168957547265e-05,
      "loss": 2.5318,
      "step": 14688
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.096707820892334,
      "learning_rate": 1.834146249188316e-05,
      "loss": 2.6375,
      "step": 14689
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.088403344154358,
      "learning_rate": 1.8341235394152622e-05,
      "loss": 2.5351,
      "step": 14690
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.050291895866394,
      "learning_rate": 1.834100828228143e-05,
      "loss": 2.6877,
      "step": 14691
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0018821954727173,
      "learning_rate": 1.834078115626996e-05,
      "loss": 2.4626,
      "step": 14692
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9483533501625061,
      "learning_rate": 1.83405540161186e-05,
      "loss": 2.4135,
      "step": 14693
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0375703573226929,
      "learning_rate": 1.8340326861827735e-05,
      "loss": 2.3799,
      "step": 14694
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1129475831985474,
      "learning_rate": 1.8340099693397754e-05,
      "loss": 2.3827,
      "step": 14695
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9743995666503906,
      "learning_rate": 1.8339872510829037e-05,
      "loss": 2.6432,
      "step": 14696
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.900540292263031,
      "learning_rate": 1.833964531412197e-05,
      "loss": 2.4798,
      "step": 14697
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.175925850868225,
      "learning_rate": 1.8339418103276937e-05,
      "loss": 2.3969,
      "step": 14698
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9867833852767944,
      "learning_rate": 1.833919087829433e-05,
      "loss": 2.6104,
      "step": 14699
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0546602010726929,
      "learning_rate": 1.833896363917453e-05,
      "loss": 2.4863,
      "step": 14700
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.127301812171936,
      "learning_rate": 1.8338736385917917e-05,
      "loss": 2.3469,
      "step": 14701
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9968746900558472,
      "learning_rate": 1.8338509118524885e-05,
      "loss": 2.4112,
      "step": 14702
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1793538331985474,
      "learning_rate": 1.8338281836995815e-05,
      "loss": 2.4254,
      "step": 14703
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.00773286819458,
      "learning_rate": 1.8338054541331093e-05,
      "loss": 2.5884,
      "step": 14704
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9880170226097107,
      "learning_rate": 1.8337827231531103e-05,
      "loss": 2.3656,
      "step": 14705
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1676790714263916,
      "learning_rate": 1.833759990759623e-05,
      "loss": 2.3935,
      "step": 14706
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0278352499008179,
      "learning_rate": 1.8337372569526863e-05,
      "loss": 2.5124,
      "step": 14707
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9504891633987427,
      "learning_rate": 1.8337145217323383e-05,
      "loss": 2.4665,
      "step": 14708
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0499807596206665,
      "learning_rate": 1.833691785098618e-05,
      "loss": 2.4288,
      "step": 14709
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9718074202537537,
      "learning_rate": 1.833669047051563e-05,
      "loss": 2.4518,
      "step": 14710
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0132181644439697,
      "learning_rate": 1.8336463075912135e-05,
      "loss": 2.3396,
      "step": 14711
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.042960286140442,
      "learning_rate": 1.8336235667176067e-05,
      "loss": 2.368,
      "step": 14712
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0284188985824585,
      "learning_rate": 1.8336008244307813e-05,
      "loss": 2.2855,
      "step": 14713
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0686569213867188,
      "learning_rate": 1.8335780807307763e-05,
      "loss": 2.237,
      "step": 14714
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9690371155738831,
      "learning_rate": 1.83355533561763e-05,
      "loss": 2.398,
      "step": 14715
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0189130306243896,
      "learning_rate": 1.833532589091381e-05,
      "loss": 2.4954,
      "step": 14716
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.3926986455917358,
      "learning_rate": 1.8335098411520682e-05,
      "loss": 2.4965,
      "step": 14717
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9944481253623962,
      "learning_rate": 1.8334870917997297e-05,
      "loss": 2.3993,
      "step": 14718
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9184914827346802,
      "learning_rate": 1.833464341034404e-05,
      "loss": 2.5586,
      "step": 14719
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1935020685195923,
      "learning_rate": 1.83344158885613e-05,
      "loss": 2.6251,
      "step": 14720
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1627142429351807,
      "learning_rate": 1.8334188352649464e-05,
      "loss": 2.4158,
      "step": 14721
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0408823490142822,
      "learning_rate": 1.833396080260891e-05,
      "loss": 2.4867,
      "step": 14722
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9849345088005066,
      "learning_rate": 1.8333733238440033e-05,
      "loss": 2.5182,
      "step": 14723
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1316355466842651,
      "learning_rate": 1.8333505660143212e-05,
      "loss": 2.4807,
      "step": 14724
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0838370323181152,
      "learning_rate": 1.8333278067718837e-05,
      "loss": 2.5701,
      "step": 14725
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.03183913230896,
      "learning_rate": 1.8333050461167294e-05,
      "loss": 2.6124,
      "step": 14726
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9830767512321472,
      "learning_rate": 1.8332822840488966e-05,
      "loss": 2.5079,
      "step": 14727
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9324316382408142,
      "learning_rate": 1.8332595205684236e-05,
      "loss": 2.7248,
      "step": 14728
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0284366607666016,
      "learning_rate": 1.83323675567535e-05,
      "loss": 2.6881,
      "step": 14729
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.03180992603302,
      "learning_rate": 1.8332139893697134e-05,
      "loss": 2.449,
      "step": 14730
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9064574241638184,
      "learning_rate": 1.833191221651553e-05,
      "loss": 2.5727,
      "step": 14731
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0984607934951782,
      "learning_rate": 1.8331684525209073e-05,
      "loss": 2.386,
      "step": 14732
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9792357683181763,
      "learning_rate": 1.8331456819778143e-05,
      "loss": 2.5339,
      "step": 14733
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9486084580421448,
      "learning_rate": 1.8331229100223135e-05,
      "loss": 2.4409,
      "step": 14734
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9499660134315491,
      "learning_rate": 1.833100136654443e-05,
      "loss": 2.6102,
      "step": 14735
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9880542755126953,
      "learning_rate": 1.8330773618742416e-05,
      "loss": 2.3418,
      "step": 14736
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.2884985208511353,
      "learning_rate": 1.833054585681748e-05,
      "loss": 2.3835,
      "step": 14737
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0243675708770752,
      "learning_rate": 1.833031808077e-05,
      "loss": 2.4158,
      "step": 14738
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9070566296577454,
      "learning_rate": 1.8330090290600374e-05,
      "loss": 2.4227,
      "step": 14739
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0135985612869263,
      "learning_rate": 1.832986248630898e-05,
      "loss": 2.5426,
      "step": 14740
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.024006724357605,
      "learning_rate": 1.8329634667896207e-05,
      "loss": 2.4803,
      "step": 14741
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9834938049316406,
      "learning_rate": 1.832940683536244e-05,
      "loss": 2.7187,
      "step": 14742
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0746104717254639,
      "learning_rate": 1.8329178988708065e-05,
      "loss": 2.5651,
      "step": 14743
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0189377069473267,
      "learning_rate": 1.8328951127933474e-05,
      "loss": 2.4535,
      "step": 14744
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0260848999023438,
      "learning_rate": 1.8328723253039044e-05,
      "loss": 2.6268,
      "step": 14745
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0405092239379883,
      "learning_rate": 1.832849536402517e-05,
      "loss": 2.33,
      "step": 14746
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.050045371055603,
      "learning_rate": 1.8328267460892234e-05,
      "loss": 2.4911,
      "step": 14747
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0157071352005005,
      "learning_rate": 1.832803954364062e-05,
      "loss": 2.6939,
      "step": 14748
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.041564702987671,
      "learning_rate": 1.832781161227072e-05,
      "loss": 2.437,
      "step": 14749
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9651273488998413,
      "learning_rate": 1.8327583666782917e-05,
      "loss": 2.5745,
      "step": 14750
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0861761569976807,
      "learning_rate": 1.8327355707177593e-05,
      "loss": 2.4308,
      "step": 14751
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1166757345199585,
      "learning_rate": 1.8327127733455146e-05,
      "loss": 2.7428,
      "step": 14752
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9577299952507019,
      "learning_rate": 1.8326899745615954e-05,
      "loss": 2.3725,
      "step": 14753
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9589025974273682,
      "learning_rate": 1.8326671743660405e-05,
      "loss": 2.4638,
      "step": 14754
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9965956807136536,
      "learning_rate": 1.8326443727588886e-05,
      "loss": 2.4651,
      "step": 14755
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0480066537857056,
      "learning_rate": 1.832621569740178e-05,
      "loss": 2.4714,
      "step": 14756
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0520000457763672,
      "learning_rate": 1.832598765309948e-05,
      "loss": 2.427,
      "step": 14757
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9677229523658752,
      "learning_rate": 1.8325759594682372e-05,
      "loss": 2.537,
      "step": 14758
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1111351251602173,
      "learning_rate": 1.8325531522150835e-05,
      "loss": 2.4952,
      "step": 14759
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1388921737670898,
      "learning_rate": 1.8325303435505267e-05,
      "loss": 2.6154,
      "step": 14760
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1026290655136108,
      "learning_rate": 1.8325075334746045e-05,
      "loss": 2.4109,
      "step": 14761
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.2153791189193726,
      "learning_rate": 1.8324847219873558e-05,
      "loss": 2.4402,
      "step": 14762
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1908692121505737,
      "learning_rate": 1.83246190908882e-05,
      "loss": 2.2971,
      "step": 14763
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0145692825317383,
      "learning_rate": 1.8324390947790345e-05,
      "loss": 2.382,
      "step": 14764
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1467279195785522,
      "learning_rate": 1.832416279058039e-05,
      "loss": 2.5785,
      "step": 14765
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.15971040725708,
      "learning_rate": 1.8323934619258716e-05,
      "loss": 2.5108,
      "step": 14766
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9275482892990112,
      "learning_rate": 1.8323706433825713e-05,
      "loss": 2.7571,
      "step": 14767
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1679779291152954,
      "learning_rate": 1.8323478234281766e-05,
      "loss": 2.6126,
      "step": 14768
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.028109073638916,
      "learning_rate": 1.8323250020627263e-05,
      "loss": 2.6532,
      "step": 14769
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0223084688186646,
      "learning_rate": 1.8323021792862593e-05,
      "loss": 2.6898,
      "step": 14770
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0648664236068726,
      "learning_rate": 1.8322793550988137e-05,
      "loss": 2.5444,
      "step": 14771
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1131548881530762,
      "learning_rate": 1.832256529500429e-05,
      "loss": 2.4908,
      "step": 14772
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0280503034591675,
      "learning_rate": 1.8322337024911432e-05,
      "loss": 2.3933,
      "step": 14773
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.2397112846374512,
      "learning_rate": 1.832210874070995e-05,
      "loss": 2.4345,
      "step": 14774
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1680653095245361,
      "learning_rate": 1.8321880442400236e-05,
      "loss": 2.4804,
      "step": 14775
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1104031801223755,
      "learning_rate": 1.8321652129982674e-05,
      "loss": 2.6571,
      "step": 14776
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9789363145828247,
      "learning_rate": 1.8321423803457656e-05,
      "loss": 2.4624,
      "step": 14777
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.051514983177185,
      "learning_rate": 1.8321195462825558e-05,
      "loss": 2.4114,
      "step": 14778
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.919732391834259,
      "learning_rate": 1.8320967108086777e-05,
      "loss": 2.5564,
      "step": 14779
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9847507476806641,
      "learning_rate": 1.8320738739241696e-05,
      "loss": 2.5944,
      "step": 14780
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.067941427230835,
      "learning_rate": 1.83205103562907e-05,
      "loss": 2.4403,
      "step": 14781
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.970307469367981,
      "learning_rate": 1.8320281959234183e-05,
      "loss": 2.4779,
      "step": 14782
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0364925861358643,
      "learning_rate": 1.832005354807253e-05,
      "loss": 2.5863,
      "step": 14783
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9382455945014954,
      "learning_rate": 1.8319825122806122e-05,
      "loss": 2.6675,
      "step": 14784
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0188918113708496,
      "learning_rate": 1.8319596683435353e-05,
      "loss": 2.5217,
      "step": 14785
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0995731353759766,
      "learning_rate": 1.8319368229960604e-05,
      "loss": 2.5822,
      "step": 14786
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9257434606552124,
      "learning_rate": 1.8319139762382272e-05,
      "loss": 2.4202,
      "step": 14787
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.118137001991272,
      "learning_rate": 1.8318911280700734e-05,
      "loss": 2.6758,
      "step": 14788
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0658977031707764,
      "learning_rate": 1.8318682784916388e-05,
      "loss": 2.6027,
      "step": 14789
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0607620477676392,
      "learning_rate": 1.831845427502961e-05,
      "loss": 2.6253,
      "step": 14790
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0642127990722656,
      "learning_rate": 1.8318225751040796e-05,
      "loss": 2.4638,
      "step": 14791
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0987508296966553,
      "learning_rate": 1.831799721295033e-05,
      "loss": 2.4152,
      "step": 14792
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0332670211791992,
      "learning_rate": 1.8317768660758596e-05,
      "loss": 2.7691,
      "step": 14793
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9742127060890198,
      "learning_rate": 1.8317540094465988e-05,
      "loss": 2.3747,
      "step": 14794
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0559436082839966,
      "learning_rate": 1.831731151407289e-05,
      "loss": 2.6929,
      "step": 14795
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.075576901435852,
      "learning_rate": 1.831708291957969e-05,
      "loss": 2.36,
      "step": 14796
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9758827090263367,
      "learning_rate": 1.8316854310986776e-05,
      "loss": 2.721,
      "step": 14797
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0690821409225464,
      "learning_rate": 1.8316625688294536e-05,
      "loss": 2.5459,
      "step": 14798
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.996540367603302,
      "learning_rate": 1.8316397051503355e-05,
      "loss": 2.5273,
      "step": 14799
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0165424346923828,
      "learning_rate": 1.8316168400613623e-05,
      "loss": 2.4684,
      "step": 14800
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0150691270828247,
      "learning_rate": 1.8315939735625728e-05,
      "loss": 2.5915,
      "step": 14801
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.2251393795013428,
      "learning_rate": 1.8315711056540054e-05,
      "loss": 2.3184,
      "step": 14802
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0079976320266724,
      "learning_rate": 1.831548236335699e-05,
      "loss": 2.3073,
      "step": 14803
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0498284101486206,
      "learning_rate": 1.8315253656076927e-05,
      "loss": 2.6507,
      "step": 14804
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1706860065460205,
      "learning_rate": 1.8315024934700253e-05,
      "loss": 2.5466,
      "step": 14805
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0970096588134766,
      "learning_rate": 1.8314796199227352e-05,
      "loss": 2.4854,
      "step": 14806
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9685049057006836,
      "learning_rate": 1.8314567449658613e-05,
      "loss": 2.585,
      "step": 14807
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1525390148162842,
      "learning_rate": 1.8314338685994422e-05,
      "loss": 2.3544,
      "step": 14808
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9558642506599426,
      "learning_rate": 1.831410990823517e-05,
      "loss": 2.4159,
      "step": 14809
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0526233911514282,
      "learning_rate": 1.831388111638125e-05,
      "loss": 2.6723,
      "step": 14810
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0199313163757324,
      "learning_rate": 1.831365231043303e-05,
      "loss": 2.5933,
      "step": 14811
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0681793689727783,
      "learning_rate": 1.8313423490390924e-05,
      "loss": 2.5561,
      "step": 14812
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0390419960021973,
      "learning_rate": 1.83131946562553e-05,
      "loss": 2.3264,
      "step": 14813
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0928640365600586,
      "learning_rate": 1.8312965808026555e-05,
      "loss": 2.4698,
      "step": 14814
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9629400372505188,
      "learning_rate": 1.8312736945705072e-05,
      "loss": 2.8596,
      "step": 14815
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9863677620887756,
      "learning_rate": 1.831250806929125e-05,
      "loss": 2.5969,
      "step": 14816
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0263279676437378,
      "learning_rate": 1.8312279178785463e-05,
      "loss": 2.4808,
      "step": 14817
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0004322528839111,
      "learning_rate": 1.8312050274188106e-05,
      "loss": 2.6266,
      "step": 14818
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.2131476402282715,
      "learning_rate": 1.831182135549957e-05,
      "loss": 2.5114,
      "step": 14819
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1872423887252808,
      "learning_rate": 1.8311592422720236e-05,
      "loss": 2.5617,
      "step": 14820
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0295500755310059,
      "learning_rate": 1.8311363475850497e-05,
      "loss": 2.5143,
      "step": 14821
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0048141479492188,
      "learning_rate": 1.8311134514890737e-05,
      "loss": 2.3648,
      "step": 14822
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.119630217552185,
      "learning_rate": 1.831090553984135e-05,
      "loss": 2.5665,
      "step": 14823
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9658954739570618,
      "learning_rate": 1.831067655070272e-05,
      "loss": 2.4021,
      "step": 14824
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1329779624938965,
      "learning_rate": 1.8310447547475234e-05,
      "loss": 2.3988,
      "step": 14825
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.2080435752868652,
      "learning_rate": 1.8310218530159286e-05,
      "loss": 2.4127,
      "step": 14826
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.171177625656128,
      "learning_rate": 1.8309989498755255e-05,
      "loss": 2.5808,
      "step": 14827
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9583616852760315,
      "learning_rate": 1.830976045326354e-05,
      "loss": 2.5768,
      "step": 14828
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9790261387825012,
      "learning_rate": 1.830953139368452e-05,
      "loss": 2.5298,
      "step": 14829
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0295994281768799,
      "learning_rate": 1.830930232001859e-05,
      "loss": 2.6081,
      "step": 14830
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0260716676712036,
      "learning_rate": 1.8309073232266138e-05,
      "loss": 2.655,
      "step": 14831
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.2457916736602783,
      "learning_rate": 1.8308844130427546e-05,
      "loss": 2.6425,
      "step": 14832
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0377600193023682,
      "learning_rate": 1.830861501450321e-05,
      "loss": 2.5456,
      "step": 14833
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5751571655273438,
      "learning_rate": 1.8308385884493513e-05,
      "loss": 2.6765,
      "step": 14834
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0053688287734985,
      "learning_rate": 1.8308156740398843e-05,
      "loss": 2.6017,
      "step": 14835
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0615237951278687,
      "learning_rate": 1.8307927582219594e-05,
      "loss": 2.6884,
      "step": 14836
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0767841339111328,
      "learning_rate": 1.8307698409956153e-05,
      "loss": 2.6291,
      "step": 14837
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1222196817398071,
      "learning_rate": 1.8307469223608904e-05,
      "loss": 2.6562,
      "step": 14838
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1261026859283447,
      "learning_rate": 1.830724002317824e-05,
      "loss": 2.5219,
      "step": 14839
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0545339584350586,
      "learning_rate": 1.8307010808664544e-05,
      "loss": 2.6077,
      "step": 14840
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.068467617034912,
      "learning_rate": 1.8306781580068214e-05,
      "loss": 2.5123,
      "step": 14841
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1175756454467773,
      "learning_rate": 1.8306552337389632e-05,
      "loss": 2.6368,
      "step": 14842
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0056705474853516,
      "learning_rate": 1.8306323080629186e-05,
      "loss": 2.7221,
      "step": 14843
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0307466983795166,
      "learning_rate": 1.8306093809787264e-05,
      "loss": 2.7572,
      "step": 14844
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9958429932594299,
      "learning_rate": 1.8305864524864262e-05,
      "loss": 2.3963,
      "step": 14845
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9001543521881104,
      "learning_rate": 1.830563522586056e-05,
      "loss": 2.4399,
      "step": 14846
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0844323635101318,
      "learning_rate": 1.8305405912776554e-05,
      "loss": 2.3963,
      "step": 14847
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9905623197555542,
      "learning_rate": 1.8305176585612626e-05,
      "loss": 2.3917,
      "step": 14848
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0665650367736816,
      "learning_rate": 1.830494724436917e-05,
      "loss": 2.4336,
      "step": 14849
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0542179346084595,
      "learning_rate": 1.8304717889046572e-05,
      "loss": 2.5813,
      "step": 14850
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.974090039730072,
      "learning_rate": 1.8304488519645224e-05,
      "loss": 2.5853,
      "step": 14851
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0596373081207275,
      "learning_rate": 1.830425913616551e-05,
      "loss": 2.3373,
      "step": 14852
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0511749982833862,
      "learning_rate": 1.8304029738607823e-05,
      "loss": 2.5026,
      "step": 14853
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0665563344955444,
      "learning_rate": 1.830380032697255e-05,
      "loss": 2.5665,
      "step": 14854
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0193583965301514,
      "learning_rate": 1.8303570901260074e-05,
      "loss": 2.5649,
      "step": 14855
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.069649338722229,
      "learning_rate": 1.8303341461470797e-05,
      "loss": 2.5238,
      "step": 14856
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.019666314125061,
      "learning_rate": 1.8303112007605103e-05,
      "loss": 2.66,
      "step": 14857
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0216302871704102,
      "learning_rate": 1.8302882539663374e-05,
      "loss": 2.5055,
      "step": 14858
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9834858179092407,
      "learning_rate": 1.8302653057646004e-05,
      "loss": 2.4153,
      "step": 14859
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.059098243713379,
      "learning_rate": 1.8302423561553383e-05,
      "loss": 2.5409,
      "step": 14860
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1690665483474731,
      "learning_rate": 1.83021940513859e-05,
      "loss": 2.6026,
      "step": 14861
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9715515971183777,
      "learning_rate": 1.8301964527143945e-05,
      "loss": 2.3301,
      "step": 14862
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9748697280883789,
      "learning_rate": 1.83017349888279e-05,
      "loss": 2.4521,
      "step": 14863
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9575968980789185,
      "learning_rate": 1.8301505436438166e-05,
      "loss": 2.5214,
      "step": 14864
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.097049593925476,
      "learning_rate": 1.830127586997512e-05,
      "loss": 2.5972,
      "step": 14865
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9455434083938599,
      "learning_rate": 1.8301046289439163e-05,
      "loss": 2.4134,
      "step": 14866
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.3065645694732666,
      "learning_rate": 1.830081669483067e-05,
      "loss": 2.4182,
      "step": 14867
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.119637131690979,
      "learning_rate": 1.8300587086150044e-05,
      "loss": 2.8206,
      "step": 14868
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0795180797576904,
      "learning_rate": 1.8300357463397666e-05,
      "loss": 2.5462,
      "step": 14869
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9894168972969055,
      "learning_rate": 1.830012782657393e-05,
      "loss": 2.5926,
      "step": 14870
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0411336421966553,
      "learning_rate": 1.8299898175679223e-05,
      "loss": 2.4493,
      "step": 14871
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9362137317657471,
      "learning_rate": 1.8299668510713934e-05,
      "loss": 2.7451,
      "step": 14872
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0094249248504639,
      "learning_rate": 1.829943883167845e-05,
      "loss": 2.4548,
      "step": 14873
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9914431571960449,
      "learning_rate": 1.8299209138573168e-05,
      "loss": 2.4615,
      "step": 14874
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.87830650806427,
      "learning_rate": 1.8298979431398466e-05,
      "loss": 2.1283,
      "step": 14875
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0262248516082764,
      "learning_rate": 1.8298749710154743e-05,
      "loss": 2.386,
      "step": 14876
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9761455059051514,
      "learning_rate": 1.8298519974842385e-05,
      "loss": 2.5068,
      "step": 14877
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.2204164266586304,
      "learning_rate": 1.8298290225461786e-05,
      "loss": 2.4985,
      "step": 14878
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1968995332717896,
      "learning_rate": 1.8298060462013326e-05,
      "loss": 2.647,
      "step": 14879
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.073293685913086,
      "learning_rate": 1.8297830684497403e-05,
      "loss": 2.5968,
      "step": 14880
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0760959386825562,
      "learning_rate": 1.82976008929144e-05,
      "loss": 2.4449,
      "step": 14881
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9913629293441772,
      "learning_rate": 1.829737108726471e-05,
      "loss": 2.5231,
      "step": 14882
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1098898649215698,
      "learning_rate": 1.8297141267548726e-05,
      "loss": 2.6384,
      "step": 14883
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1141051054000854,
      "learning_rate": 1.829691143376683e-05,
      "loss": 2.5911,
      "step": 14884
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0800011157989502,
      "learning_rate": 1.829668158591942e-05,
      "loss": 2.5668,
      "step": 14885
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0458970069885254,
      "learning_rate": 1.829645172400688e-05,
      "loss": 2.5659,
      "step": 14886
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9913390874862671,
      "learning_rate": 1.8296221848029597e-05,
      "loss": 2.5888,
      "step": 14887
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0109846591949463,
      "learning_rate": 1.8295991957987973e-05,
      "loss": 2.5652,
      "step": 14888
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.219260811805725,
      "learning_rate": 1.8295762053882383e-05,
      "loss": 2.3785,
      "step": 14889
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9951314926147461,
      "learning_rate": 1.8295532135713224e-05,
      "loss": 2.4943,
      "step": 14890
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.039023995399475,
      "learning_rate": 1.8295302203480885e-05,
      "loss": 2.4439,
      "step": 14891
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9675456881523132,
      "learning_rate": 1.829507225718576e-05,
      "loss": 2.4723,
      "step": 14892
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.377348780632019,
      "learning_rate": 1.829484229682823e-05,
      "loss": 2.702,
      "step": 14893
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9991065263748169,
      "learning_rate": 1.829461232240869e-05,
      "loss": 2.5043,
      "step": 14894
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0131107568740845,
      "learning_rate": 1.829438233392753e-05,
      "loss": 2.6785,
      "step": 14895
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9977772831916809,
      "learning_rate": 1.829415233138514e-05,
      "loss": 2.4742,
      "step": 14896
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0502943992614746,
      "learning_rate": 1.829392231478191e-05,
      "loss": 2.3358,
      "step": 14897
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0034213066101074,
      "learning_rate": 1.8293692284118227e-05,
      "loss": 2.3179,
      "step": 14898
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0269181728363037,
      "learning_rate": 1.8293462239394482e-05,
      "loss": 2.2898,
      "step": 14899
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0559712648391724,
      "learning_rate": 1.8293232180611067e-05,
      "loss": 2.2505,
      "step": 14900
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9407845139503479,
      "learning_rate": 1.8293002107768373e-05,
      "loss": 2.5628,
      "step": 14901
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9723266959190369,
      "learning_rate": 1.8292772020866784e-05,
      "loss": 2.391,
      "step": 14902
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1820794343948364,
      "learning_rate": 1.82925419199067e-05,
      "loss": 2.6421,
      "step": 14903
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9926676750183105,
      "learning_rate": 1.8292311804888502e-05,
      "loss": 2.5278,
      "step": 14904
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.118595838546753,
      "learning_rate": 1.8292081675812584e-05,
      "loss": 2.2108,
      "step": 14905
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9811825156211853,
      "learning_rate": 1.829185153267933e-05,
      "loss": 2.4004,
      "step": 14906
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0800378322601318,
      "learning_rate": 1.8291621375489143e-05,
      "loss": 2.5925,
      "step": 14907
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0120761394500732,
      "learning_rate": 1.8291391204242403e-05,
      "loss": 2.4863,
      "step": 14908
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1422886848449707,
      "learning_rate": 1.8291161018939503e-05,
      "loss": 2.4527,
      "step": 14909
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0056114196777344,
      "learning_rate": 1.8290930819580833e-05,
      "loss": 2.526,
      "step": 14910
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1260716915130615,
      "learning_rate": 1.8290700606166783e-05,
      "loss": 2.6793,
      "step": 14911
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.303468108177185,
      "learning_rate": 1.8290470378697746e-05,
      "loss": 2.4278,
      "step": 14912
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.087348461151123,
      "learning_rate": 1.829024013717411e-05,
      "loss": 2.6193,
      "step": 14913
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.3521636724472046,
      "learning_rate": 1.8290009881596263e-05,
      "loss": 2.6594,
      "step": 14914
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0830401182174683,
      "learning_rate": 1.82897796119646e-05,
      "loss": 2.4941,
      "step": 14915
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0877323150634766,
      "learning_rate": 1.8289549328279507e-05,
      "loss": 2.6429,
      "step": 14916
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0237576961517334,
      "learning_rate": 1.8289319030541375e-05,
      "loss": 2.5624,
      "step": 14917
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0646297931671143,
      "learning_rate": 1.82890887187506e-05,
      "loss": 2.6309,
      "step": 14918
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.143430471420288,
      "learning_rate": 1.8288858392907568e-05,
      "loss": 2.3921,
      "step": 14919
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.2111291885375977,
      "learning_rate": 1.8288628053012668e-05,
      "loss": 2.4977,
      "step": 14920
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0254818201065063,
      "learning_rate": 1.828839769906629e-05,
      "loss": 2.4954,
      "step": 14921
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1038808822631836,
      "learning_rate": 1.828816733106883e-05,
      "loss": 2.6346,
      "step": 14922
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.01466965675354,
      "learning_rate": 1.8287936949020678e-05,
      "loss": 2.5621,
      "step": 14923
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.984805703163147,
      "learning_rate": 1.8287706552922218e-05,
      "loss": 2.5417,
      "step": 14924
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0122772455215454,
      "learning_rate": 1.8287476142773846e-05,
      "loss": 2.4544,
      "step": 14925
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0399681329727173,
      "learning_rate": 1.828724571857595e-05,
      "loss": 2.3381,
      "step": 14926
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1132452487945557,
      "learning_rate": 1.8287015280328922e-05,
      "loss": 2.4886,
      "step": 14927
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9721402525901794,
      "learning_rate": 1.8286784828033153e-05,
      "loss": 2.5442,
      "step": 14928
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0903171300888062,
      "learning_rate": 1.828655436168903e-05,
      "loss": 2.302,
      "step": 14929
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0127757787704468,
      "learning_rate": 1.828632388129695e-05,
      "loss": 2.74,
      "step": 14930
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9688135981559753,
      "learning_rate": 1.82860933868573e-05,
      "loss": 2.5142,
      "step": 14931
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0798041820526123,
      "learning_rate": 1.828586287837047e-05,
      "loss": 2.5446,
      "step": 14932
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0294221639633179,
      "learning_rate": 1.8285632355836855e-05,
      "loss": 2.4161,
      "step": 14933
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0883054733276367,
      "learning_rate": 1.828540181925684e-05,
      "loss": 2.6622,
      "step": 14934
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0770339965820312,
      "learning_rate": 1.828517126863082e-05,
      "loss": 2.4289,
      "step": 14935
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.185238242149353,
      "learning_rate": 1.8284940703959183e-05,
      "loss": 2.5346,
      "step": 14936
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0730677843093872,
      "learning_rate": 1.828471012524232e-05,
      "loss": 2.5315,
      "step": 14937
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.2073192596435547,
      "learning_rate": 1.828447953248063e-05,
      "loss": 2.4978,
      "step": 14938
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0354516506195068,
      "learning_rate": 1.828424892567449e-05,
      "loss": 2.4016,
      "step": 14939
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0703206062316895,
      "learning_rate": 1.82840183048243e-05,
      "loss": 2.5675,
      "step": 14940
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.161834955215454,
      "learning_rate": 1.828378766993045e-05,
      "loss": 2.6745,
      "step": 14941
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.031625509262085,
      "learning_rate": 1.828355702099333e-05,
      "loss": 2.6174,
      "step": 14942
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0140150785446167,
      "learning_rate": 1.828332635801333e-05,
      "loss": 2.6275,
      "step": 14943
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0073890686035156,
      "learning_rate": 1.8283095680990845e-05,
      "loss": 2.4794,
      "step": 14944
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0231349468231201,
      "learning_rate": 1.828286498992626e-05,
      "loss": 2.6673,
      "step": 14945
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9764547348022461,
      "learning_rate": 1.8282634284819968e-05,
      "loss": 2.3767,
      "step": 14946
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9970791935920715,
      "learning_rate": 1.8282403565672368e-05,
      "loss": 2.5678,
      "step": 14947
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0708541870117188,
      "learning_rate": 1.828217283248384e-05,
      "loss": 2.414,
      "step": 14948
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1223775148391724,
      "learning_rate": 1.8281942085254778e-05,
      "loss": 2.763,
      "step": 14949
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1059941053390503,
      "learning_rate": 1.8281711323985577e-05,
      "loss": 2.2401,
      "step": 14950
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0359867811203003,
      "learning_rate": 1.8281480548676626e-05,
      "loss": 2.5657,
      "step": 14951
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0312443971633911,
      "learning_rate": 1.8281249759328318e-05,
      "loss": 2.3649,
      "step": 14952
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0452181100845337,
      "learning_rate": 1.8281018955941038e-05,
      "loss": 2.4712,
      "step": 14953
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0155073404312134,
      "learning_rate": 1.8280788138515183e-05,
      "loss": 2.7209,
      "step": 14954
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0917613506317139,
      "learning_rate": 1.828055730705115e-05,
      "loss": 2.402,
      "step": 14955
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1148288249969482,
      "learning_rate": 1.8280326461549313e-05,
      "loss": 2.3938,
      "step": 14956
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9966578483581543,
      "learning_rate": 1.828009560201008e-05,
      "loss": 2.5825,
      "step": 14957
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9075813293457031,
      "learning_rate": 1.8279864728433833e-05,
      "loss": 2.7113,
      "step": 14958
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0870550870895386,
      "learning_rate": 1.827963384082097e-05,
      "loss": 2.3755,
      "step": 14959
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0418338775634766,
      "learning_rate": 1.8279402939171875e-05,
      "loss": 2.4496,
      "step": 14960
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.980664074420929,
      "learning_rate": 1.8279172023486946e-05,
      "loss": 2.4682,
      "step": 14961
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9951192736625671,
      "learning_rate": 1.8278941093766572e-05,
      "loss": 2.7174,
      "step": 14962
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9858148694038391,
      "learning_rate": 1.8278710150011143e-05,
      "loss": 2.39,
      "step": 14963
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1288272142410278,
      "learning_rate": 1.8278479192221053e-05,
      "loss": 2.4731,
      "step": 14964
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1811645030975342,
      "learning_rate": 1.827824822039669e-05,
      "loss": 2.6046,
      "step": 14965
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9201146960258484,
      "learning_rate": 1.827801723453845e-05,
      "loss": 2.5207,
      "step": 14966
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1148182153701782,
      "learning_rate": 1.8277786234646725e-05,
      "loss": 2.8038,
      "step": 14967
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9933161735534668,
      "learning_rate": 1.8277555220721898e-05,
      "loss": 2.5333,
      "step": 14968
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0022804737091064,
      "learning_rate": 1.827732419276437e-05,
      "loss": 2.4909,
      "step": 14969
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9639511108398438,
      "learning_rate": 1.8277093150774534e-05,
      "loss": 2.4011,
      "step": 14970
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.2060027122497559,
      "learning_rate": 1.827686209475277e-05,
      "loss": 2.3452,
      "step": 14971
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9824743866920471,
      "learning_rate": 1.827663102469948e-05,
      "loss": 2.4039,
      "step": 14972
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9805877804756165,
      "learning_rate": 1.827639994061505e-05,
      "loss": 2.4342,
      "step": 14973
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1482969522476196,
      "learning_rate": 1.8276168842499876e-05,
      "loss": 2.7457,
      "step": 14974
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9497470259666443,
      "learning_rate": 1.827593773035435e-05,
      "loss": 2.6571,
      "step": 14975
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0869089365005493,
      "learning_rate": 1.827570660417886e-05,
      "loss": 2.4772,
      "step": 14976
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.087648868560791,
      "learning_rate": 1.8275475463973796e-05,
      "loss": 2.5581,
      "step": 14977
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0671062469482422,
      "learning_rate": 1.8275244309739557e-05,
      "loss": 2.4364,
      "step": 14978
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1440426111221313,
      "learning_rate": 1.827501314147653e-05,
      "loss": 2.4187,
      "step": 14979
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.056901454925537,
      "learning_rate": 1.827478195918511e-05,
      "loss": 2.444,
      "step": 14980
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9754975438117981,
      "learning_rate": 1.8274550762865688e-05,
      "loss": 2.4216,
      "step": 14981
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9734151363372803,
      "learning_rate": 1.8274319552518657e-05,
      "loss": 2.5641,
      "step": 14982
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0981694459915161,
      "learning_rate": 1.8274088328144405e-05,
      "loss": 2.3523,
      "step": 14983
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0772618055343628,
      "learning_rate": 1.8273857089743324e-05,
      "loss": 2.4024,
      "step": 14984
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.15542471408844,
      "learning_rate": 1.8273625837315808e-05,
      "loss": 2.4405,
      "step": 14985
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1488080024719238,
      "learning_rate": 1.827339457086225e-05,
      "loss": 2.4644,
      "step": 14986
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0842467546463013,
      "learning_rate": 1.8273163290383042e-05,
      "loss": 2.3748,
      "step": 14987
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9702255725860596,
      "learning_rate": 1.8272931995878575e-05,
      "loss": 2.534,
      "step": 14988
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0037455558776855,
      "learning_rate": 1.827270068734924e-05,
      "loss": 2.4888,
      "step": 14989
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9729002714157104,
      "learning_rate": 1.827246936479543e-05,
      "loss": 2.5107,
      "step": 14990
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0733039379119873,
      "learning_rate": 1.8272238028217542e-05,
      "loss": 2.7679,
      "step": 14991
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.063546061515808,
      "learning_rate": 1.827200667761596e-05,
      "loss": 2.5958,
      "step": 14992
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0407497882843018,
      "learning_rate": 1.8271775312991083e-05,
      "loss": 2.7459,
      "step": 14993
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1015820503234863,
      "learning_rate": 1.8271543934343298e-05,
      "loss": 2.5377,
      "step": 14994
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1127597093582153,
      "learning_rate": 1.8271312541672998e-05,
      "loss": 2.2613,
      "step": 14995
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0736838579177856,
      "learning_rate": 1.8271081134980582e-05,
      "loss": 2.6625,
      "step": 14996
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9977214336395264,
      "learning_rate": 1.8270849714266434e-05,
      "loss": 2.5262,
      "step": 14997
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4237728118896484,
      "learning_rate": 1.8270618279530946e-05,
      "loss": 2.5414,
      "step": 14998
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1299424171447754,
      "learning_rate": 1.8270386830774518e-05,
      "loss": 2.5924,
      "step": 14999
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9961338639259338,
      "learning_rate": 1.8270155367997537e-05,
      "loss": 2.6804,
      "step": 15000
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.2112362384796143,
      "learning_rate": 1.8269923891200395e-05,
      "loss": 2.5803,
      "step": 15001
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1004256010055542,
      "learning_rate": 1.826969240038349e-05,
      "loss": 2.4362,
      "step": 15002
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.208972692489624,
      "learning_rate": 1.8269460895547206e-05,
      "loss": 2.6665,
      "step": 15003
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0475820302963257,
      "learning_rate": 1.8269229376691944e-05,
      "loss": 2.3849,
      "step": 15004
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.918197512626648,
      "learning_rate": 1.826899784381809e-05,
      "loss": 2.5413,
      "step": 15005
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0028294324874878,
      "learning_rate": 1.8268766296926038e-05,
      "loss": 2.3631,
      "step": 15006
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0328727960586548,
      "learning_rate": 1.8268534736016184e-05,
      "loss": 2.6181,
      "step": 15007
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9626501798629761,
      "learning_rate": 1.8268303161088913e-05,
      "loss": 2.4614,
      "step": 15008
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9887137413024902,
      "learning_rate": 1.8268071572144626e-05,
      "loss": 2.6229,
      "step": 15009
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9659159183502197,
      "learning_rate": 1.8267839969183712e-05,
      "loss": 2.4775,
      "step": 15010
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9654554128646851,
      "learning_rate": 1.8267608352206565e-05,
      "loss": 2.4566,
      "step": 15011
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0369256734848022,
      "learning_rate": 1.8267376721213574e-05,
      "loss": 2.6455,
      "step": 15012
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1223307847976685,
      "learning_rate": 1.8267145076205135e-05,
      "loss": 2.6034,
      "step": 15013
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9244855642318726,
      "learning_rate": 1.826691341718164e-05,
      "loss": 2.5143,
      "step": 15014
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0166388750076294,
      "learning_rate": 1.8266681744143483e-05,
      "loss": 2.5614,
      "step": 15015
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.2104551792144775,
      "learning_rate": 1.826645005709105e-05,
      "loss": 2.5943,
      "step": 15016
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0978220701217651,
      "learning_rate": 1.8266218356024743e-05,
      "loss": 2.6751,
      "step": 15017
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0866774320602417,
      "learning_rate": 1.826598664094495e-05,
      "loss": 2.5548,
      "step": 15018
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0268381834030151,
      "learning_rate": 1.8265754911852064e-05,
      "loss": 2.6058,
      "step": 15019
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0425729751586914,
      "learning_rate": 1.8265523168746483e-05,
      "loss": 2.3945,
      "step": 15020
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.003387451171875,
      "learning_rate": 1.8265291411628588e-05,
      "loss": 2.3806,
      "step": 15021
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0085713863372803,
      "learning_rate": 1.8265059640498783e-05,
      "loss": 2.5255,
      "step": 15022
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9853740334510803,
      "learning_rate": 1.8264827855357457e-05,
      "loss": 2.54,
      "step": 15023
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0709731578826904,
      "learning_rate": 1.8264596056205004e-05,
      "loss": 2.6495,
      "step": 15024
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0704466104507446,
      "learning_rate": 1.8264364243041815e-05,
      "loss": 2.6636,
      "step": 15025
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.097009539604187,
      "learning_rate": 1.8264132415868286e-05,
      "loss": 2.5267,
      "step": 15026
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0624672174453735,
      "learning_rate": 1.8263900574684807e-05,
      "loss": 2.442,
      "step": 15027
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0505324602127075,
      "learning_rate": 1.8263668719491773e-05,
      "loss": 2.5549,
      "step": 15028
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0053861141204834,
      "learning_rate": 1.8263436850289573e-05,
      "loss": 2.3978,
      "step": 15029
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0777347087860107,
      "learning_rate": 1.8263204967078603e-05,
      "loss": 2.6119,
      "step": 15030
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0279179811477661,
      "learning_rate": 1.826297306985926e-05,
      "loss": 2.3266,
      "step": 15031
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.2269623279571533,
      "learning_rate": 1.826274115863193e-05,
      "loss": 2.4692,
      "step": 15032
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0628994703292847,
      "learning_rate": 1.8262509233397013e-05,
      "loss": 2.8391,
      "step": 15033
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0532647371292114,
      "learning_rate": 1.8262277294154895e-05,
      "loss": 2.4782,
      "step": 15034
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9783628582954407,
      "learning_rate": 1.8262045340905972e-05,
      "loss": 2.3472,
      "step": 15035
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9276686310768127,
      "learning_rate": 1.8261813373650645e-05,
      "loss": 2.4597,
      "step": 15036
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.032509446144104,
      "learning_rate": 1.8261581392389296e-05,
      "loss": 2.5435,
      "step": 15037
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1887832880020142,
      "learning_rate": 1.8261349397122322e-05,
      "loss": 2.4591,
      "step": 15038
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.030125379562378,
      "learning_rate": 1.826111738785012e-05,
      "loss": 2.6193,
      "step": 15039
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9537376165390015,
      "learning_rate": 1.8260885364573076e-05,
      "loss": 2.4317,
      "step": 15040
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0174944400787354,
      "learning_rate": 1.8260653327291594e-05,
      "loss": 2.5654,
      "step": 15041
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0317245721817017,
      "learning_rate": 1.8260421276006055e-05,
      "loss": 2.4832,
      "step": 15042
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0084484815597534,
      "learning_rate": 1.8260189210716862e-05,
      "loss": 2.7976,
      "step": 15043
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.065403938293457,
      "learning_rate": 1.8259957131424403e-05,
      "loss": 2.399,
      "step": 15044
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0232255458831787,
      "learning_rate": 1.8259725038129075e-05,
      "loss": 2.6162,
      "step": 15045
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0320875644683838,
      "learning_rate": 1.8259492930831267e-05,
      "loss": 2.3759,
      "step": 15046
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9994348287582397,
      "learning_rate": 1.8259260809531377e-05,
      "loss": 2.4545,
      "step": 15047
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1239370107650757,
      "learning_rate": 1.82590286742298e-05,
      "loss": 2.2365,
      "step": 15048
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0333372354507446,
      "learning_rate": 1.8258796524926922e-05,
      "loss": 2.6701,
      "step": 15049
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9879000782966614,
      "learning_rate": 1.825856436162314e-05,
      "loss": 2.5265,
      "step": 15050
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1314091682434082,
      "learning_rate": 1.825833218431885e-05,
      "loss": 2.4518,
      "step": 15051
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0966049432754517,
      "learning_rate": 1.8258099993014445e-05,
      "loss": 2.6711,
      "step": 15052
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.2164536714553833,
      "learning_rate": 1.8257867787710317e-05,
      "loss": 2.532,
      "step": 15053
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0818932056427002,
      "learning_rate": 1.825763556840686e-05,
      "loss": 2.3861,
      "step": 15054
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.083984613418579,
      "learning_rate": 1.8257403335104466e-05,
      "loss": 2.5751,
      "step": 15055
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9621016979217529,
      "learning_rate": 1.8257171087803533e-05,
      "loss": 2.4865,
      "step": 15056
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0122215747833252,
      "learning_rate": 1.8256938826504455e-05,
      "loss": 2.7178,
      "step": 15057
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.035749077796936,
      "learning_rate": 1.825670655120762e-05,
      "loss": 2.39,
      "step": 15058
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0057376623153687,
      "learning_rate": 1.8256474261913425e-05,
      "loss": 2.5688,
      "step": 15059
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9693965315818787,
      "learning_rate": 1.8256241958622266e-05,
      "loss": 2.4989,
      "step": 15060
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0594580173492432,
      "learning_rate": 1.825600964133453e-05,
      "loss": 2.5123,
      "step": 15061
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0719895362854004,
      "learning_rate": 1.8255777310050616e-05,
      "loss": 2.5649,
      "step": 15062
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.009407877922058,
      "learning_rate": 1.825554496477092e-05,
      "loss": 2.456,
      "step": 15063
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9801713228225708,
      "learning_rate": 1.825531260549583e-05,
      "loss": 2.4422,
      "step": 15064
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9492313861846924,
      "learning_rate": 1.8255080232225747e-05,
      "loss": 2.5145,
      "step": 15065
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0143349170684814,
      "learning_rate": 1.825484784496106e-05,
      "loss": 2.4961,
      "step": 15066
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0158427953720093,
      "learning_rate": 1.8254615443702164e-05,
      "loss": 2.4985,
      "step": 15067
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0075349807739258,
      "learning_rate": 1.825438302844945e-05,
      "loss": 2.5399,
      "step": 15068
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1162258386611938,
      "learning_rate": 1.8254150599203316e-05,
      "loss": 2.5283,
      "step": 15069
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9809932112693787,
      "learning_rate": 1.8253918155964157e-05,
      "loss": 2.487,
      "step": 15070
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0341730117797852,
      "learning_rate": 1.8253685698732366e-05,
      "loss": 2.3791,
      "step": 15071
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0827337503433228,
      "learning_rate": 1.8253453227508332e-05,
      "loss": 2.2792,
      "step": 15072
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0351718664169312,
      "learning_rate": 1.8253220742292456e-05,
      "loss": 2.5652,
      "step": 15073
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1144688129425049,
      "learning_rate": 1.8252988243085127e-05,
      "loss": 2.3455,
      "step": 15074
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9612565040588379,
      "learning_rate": 1.8252755729886745e-05,
      "loss": 2.4655,
      "step": 15075
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.037214756011963,
      "learning_rate": 1.82525232026977e-05,
      "loss": 2.4655,
      "step": 15076
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0185390710830688,
      "learning_rate": 1.825229066151838e-05,
      "loss": 2.6514,
      "step": 15077
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0164273977279663,
      "learning_rate": 1.8252058106349194e-05,
      "loss": 2.3945,
      "step": 15078
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9871421456336975,
      "learning_rate": 1.8251825537190523e-05,
      "loss": 2.4729,
      "step": 15079
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9350804090499878,
      "learning_rate": 1.8251592954042768e-05,
      "loss": 2.3449,
      "step": 15080
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0000765323638916,
      "learning_rate": 1.8251360356906324e-05,
      "loss": 2.5402,
      "step": 15081
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0185227394104004,
      "learning_rate": 1.8251127745781583e-05,
      "loss": 2.6574,
      "step": 15082
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0491151809692383,
      "learning_rate": 1.825089512066894e-05,
      "loss": 2.3034,
      "step": 15083
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9333990812301636,
      "learning_rate": 1.8250662481568785e-05,
      "loss": 2.7865,
      "step": 15084
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9892567992210388,
      "learning_rate": 1.825042982848152e-05,
      "loss": 2.4422,
      "step": 15085
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.01190984249115,
      "learning_rate": 1.8250197161407534e-05,
      "loss": 2.5021,
      "step": 15086
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9853807091712952,
      "learning_rate": 1.8249964480347226e-05,
      "loss": 2.4774,
      "step": 15087
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0201551914215088,
      "learning_rate": 1.8249731785300983e-05,
      "loss": 2.4577,
      "step": 15088
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0674262046813965,
      "learning_rate": 1.8249499076269207e-05,
      "loss": 2.4843,
      "step": 15089
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0522074699401855,
      "learning_rate": 1.8249266353252287e-05,
      "loss": 2.553,
      "step": 15090
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1151797771453857,
      "learning_rate": 1.824903361625062e-05,
      "loss": 2.6194,
      "step": 15091
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5271130800247192,
      "learning_rate": 1.8248800865264604e-05,
      "loss": 2.565,
      "step": 15092
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0812238454818726,
      "learning_rate": 1.824856810029463e-05,
      "loss": 2.695,
      "step": 15093
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0436820983886719,
      "learning_rate": 1.824833532134109e-05,
      "loss": 2.5294,
      "step": 15094
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0628494024276733,
      "learning_rate": 1.824810252840438e-05,
      "loss": 2.4603,
      "step": 15095
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0447221994400024,
      "learning_rate": 1.8247869721484898e-05,
      "loss": 2.3533,
      "step": 15096
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1376100778579712,
      "learning_rate": 1.8247636900583037e-05,
      "loss": 2.3738,
      "step": 15097
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9579091668128967,
      "learning_rate": 1.8247404065699194e-05,
      "loss": 2.5914,
      "step": 15098
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0107213258743286,
      "learning_rate": 1.8247171216833758e-05,
      "loss": 2.5364,
      "step": 15099
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1438868045806885,
      "learning_rate": 1.824693835398713e-05,
      "loss": 2.6622,
      "step": 15100
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9859461784362793,
      "learning_rate": 1.82467054771597e-05,
      "loss": 2.5449,
      "step": 15101
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1215810775756836,
      "learning_rate": 1.8246472586351862e-05,
      "loss": 2.3663,
      "step": 15102
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0841879844665527,
      "learning_rate": 1.8246239681564016e-05,
      "loss": 2.4463,
      "step": 15103
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9284913539886475,
      "learning_rate": 1.824600676279655e-05,
      "loss": 2.6217,
      "step": 15104
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9816442728042603,
      "learning_rate": 1.824577383004987e-05,
      "loss": 2.4556,
      "step": 15105
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0323383808135986,
      "learning_rate": 1.8245540883324358e-05,
      "loss": 2.5438,
      "step": 15106
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0333898067474365,
      "learning_rate": 1.8245307922620418e-05,
      "loss": 2.4359,
      "step": 15107
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0003201961517334,
      "learning_rate": 1.824507494793844e-05,
      "loss": 2.7236,
      "step": 15108
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0969269275665283,
      "learning_rate": 1.824484195927882e-05,
      "loss": 2.4209,
      "step": 15109
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5874882936477661,
      "learning_rate": 1.8244608956641955e-05,
      "loss": 2.5451,
      "step": 15110
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0525474548339844,
      "learning_rate": 1.824437594002824e-05,
      "loss": 2.5244,
      "step": 15111
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0511393547058105,
      "learning_rate": 1.8244142909438064e-05,
      "loss": 2.5594,
      "step": 15112
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.058042049407959,
      "learning_rate": 1.824390986487183e-05,
      "loss": 2.5662,
      "step": 15113
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9420033693313599,
      "learning_rate": 1.824367680632993e-05,
      "loss": 2.5303,
      "step": 15114
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9728195667266846,
      "learning_rate": 1.8243443733812757e-05,
      "loss": 2.7556,
      "step": 15115
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0378351211547852,
      "learning_rate": 1.8243210647320714e-05,
      "loss": 2.8074,
      "step": 15116
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0727455615997314,
      "learning_rate": 1.8242977546854183e-05,
      "loss": 2.574,
      "step": 15117
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.132112979888916,
      "learning_rate": 1.824274443241357e-05,
      "loss": 2.3529,
      "step": 15118
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1815459728240967,
      "learning_rate": 1.8242511303999265e-05,
      "loss": 2.5653,
      "step": 15119
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0076956748962402,
      "learning_rate": 1.8242278161611663e-05,
      "loss": 2.3203,
      "step": 15120
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0394351482391357,
      "learning_rate": 1.8242045005251163e-05,
      "loss": 2.4633,
      "step": 15121
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9757962226867676,
      "learning_rate": 1.824181183491816e-05,
      "loss": 2.3572,
      "step": 15122
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.964017927646637,
      "learning_rate": 1.8241578650613045e-05,
      "loss": 2.4158,
      "step": 15123
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.3517690896987915,
      "learning_rate": 1.8241345452336215e-05,
      "loss": 2.6256,
      "step": 15124
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0402501821517944,
      "learning_rate": 1.8241112240088067e-05,
      "loss": 2.353,
      "step": 15125
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0866533517837524,
      "learning_rate": 1.8240879013868995e-05,
      "loss": 2.4653,
      "step": 15126
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9461703300476074,
      "learning_rate": 1.82406457736794e-05,
      "loss": 2.539,
      "step": 15127
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0061538219451904,
      "learning_rate": 1.8240412519519664e-05,
      "loss": 2.4102,
      "step": 15128
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0685290098190308,
      "learning_rate": 1.8240179251390198e-05,
      "loss": 2.3602,
      "step": 15129
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9292589426040649,
      "learning_rate": 1.8239945969291387e-05,
      "loss": 2.3677,
      "step": 15130
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.935633659362793,
      "learning_rate": 1.823971267322363e-05,
      "loss": 2.3618,
      "step": 15131
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1074284315109253,
      "learning_rate": 1.823947936318732e-05,
      "loss": 2.3883,
      "step": 15132
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0185763835906982,
      "learning_rate": 1.8239246039182855e-05,
      "loss": 2.4419,
      "step": 15133
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.2226709127426147,
      "learning_rate": 1.8239012701210634e-05,
      "loss": 2.5715,
      "step": 15134
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1624414920806885,
      "learning_rate": 1.8238779349271046e-05,
      "loss": 2.7805,
      "step": 15135
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0725820064544678,
      "learning_rate": 1.8238545983364487e-05,
      "loss": 2.1755,
      "step": 15136
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0376112461090088,
      "learning_rate": 1.823831260349136e-05,
      "loss": 2.3998,
      "step": 15137
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0112295150756836,
      "learning_rate": 1.8238079209652052e-05,
      "loss": 2.5068,
      "step": 15138
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9270557165145874,
      "learning_rate": 1.8237845801846965e-05,
      "loss": 2.3693,
      "step": 15139
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.123376488685608,
      "learning_rate": 1.823761238007649e-05,
      "loss": 2.4305,
      "step": 15140
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1218081712722778,
      "learning_rate": 1.8237378944341023e-05,
      "loss": 2.4281,
      "step": 15141
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1666579246520996,
      "learning_rate": 1.8237145494640968e-05,
      "loss": 2.4749,
      "step": 15142
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1193550825119019,
      "learning_rate": 1.823691203097671e-05,
      "loss": 2.2951,
      "step": 15143
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0126510858535767,
      "learning_rate": 1.8236678553348648e-05,
      "loss": 2.6669,
      "step": 15144
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.017027735710144,
      "learning_rate": 1.823644506175718e-05,
      "loss": 2.4906,
      "step": 15145
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9455379843711853,
      "learning_rate": 1.82362115562027e-05,
      "loss": 2.671,
      "step": 15146
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0747910737991333,
      "learning_rate": 1.8235978036685606e-05,
      "loss": 2.3772,
      "step": 15147
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9432392716407776,
      "learning_rate": 1.823574450320629e-05,
      "loss": 2.5305,
      "step": 15148
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.050491452217102,
      "learning_rate": 1.8235510955765153e-05,
      "loss": 2.5809,
      "step": 15149
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.200979471206665,
      "learning_rate": 1.8235277394362587e-05,
      "loss": 2.4987,
      "step": 15150
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0495890378952026,
      "learning_rate": 1.8235043818998988e-05,
      "loss": 2.4489,
      "step": 15151
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9708746671676636,
      "learning_rate": 1.8234810229674757e-05,
      "loss": 2.4156,
      "step": 15152
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0001577138900757,
      "learning_rate": 1.823457662639028e-05,
      "loss": 2.5675,
      "step": 15153
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0427128076553345,
      "learning_rate": 1.8234343009145964e-05,
      "loss": 2.4691,
      "step": 15154
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9753251075744629,
      "learning_rate": 1.8234109377942198e-05,
      "loss": 2.6692,
      "step": 15155
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0754733085632324,
      "learning_rate": 1.823387573277938e-05,
      "loss": 2.4902,
      "step": 15156
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.074569821357727,
      "learning_rate": 1.8233642073657906e-05,
      "loss": 2.1183,
      "step": 15157
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.051965594291687,
      "learning_rate": 1.8233408400578177e-05,
      "loss": 2.3339,
      "step": 15158
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9509549736976624,
      "learning_rate": 1.823317471354058e-05,
      "loss": 2.5622,
      "step": 15159
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0216295719146729,
      "learning_rate": 1.823294101254552e-05,
      "loss": 2.3028,
      "step": 15160
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9679383039474487,
      "learning_rate": 1.8232707297593383e-05,
      "loss": 2.4994,
      "step": 15161
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1041947603225708,
      "learning_rate": 1.8232473568684573e-05,
      "loss": 2.0967,
      "step": 15162
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.12832772731781,
      "learning_rate": 1.8232239825819486e-05,
      "loss": 2.4515,
      "step": 15163
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9882268905639648,
      "learning_rate": 1.823200606899852e-05,
      "loss": 2.5309,
      "step": 15164
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9725661277770996,
      "learning_rate": 1.823177229822206e-05,
      "loss": 2.2027,
      "step": 15165
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9511997103691101,
      "learning_rate": 1.8231538513490513e-05,
      "loss": 2.4783,
      "step": 15166
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1151043176651,
      "learning_rate": 1.8231304714804277e-05,
      "loss": 2.5157,
      "step": 15167
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0054490566253662,
      "learning_rate": 1.823107090216374e-05,
      "loss": 2.4947,
      "step": 15168
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1228636503219604,
      "learning_rate": 1.82308370755693e-05,
      "loss": 2.5783,
      "step": 15169
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.8941424489021301,
      "learning_rate": 1.8230603235021363e-05,
      "loss": 2.2018,
      "step": 15170
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1464745998382568,
      "learning_rate": 1.823036938052031e-05,
      "loss": 2.3081,
      "step": 15171
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9998014569282532,
      "learning_rate": 1.823013551206655e-05,
      "loss": 2.5844,
      "step": 15172
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.072248935699463,
      "learning_rate": 1.8229901629660477e-05,
      "loss": 2.7485,
      "step": 15173
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.063008189201355,
      "learning_rate": 1.822966773330248e-05,
      "loss": 2.6563,
      "step": 15174
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.049279808998108,
      "learning_rate": 1.822943382299297e-05,
      "loss": 2.5814,
      "step": 15175
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0536000728607178,
      "learning_rate": 1.8229199898732327e-05,
      "loss": 2.5807,
      "step": 15176
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.949463963508606,
      "learning_rate": 1.8228965960520955e-05,
      "loss": 2.4798,
      "step": 15177
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9945673942565918,
      "learning_rate": 1.8228732008359254e-05,
      "loss": 2.4867,
      "step": 15178
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1176691055297852,
      "learning_rate": 1.8228498042247613e-05,
      "loss": 2.5759,
      "step": 15179
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.982911229133606,
      "learning_rate": 1.822826406218644e-05,
      "loss": 2.382,
      "step": 15180
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9072153568267822,
      "learning_rate": 1.8228030068176118e-05,
      "loss": 2.3955,
      "step": 15181
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9735156297683716,
      "learning_rate": 1.822779606021705e-05,
      "loss": 2.2552,
      "step": 15182
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9904422163963318,
      "learning_rate": 1.8227562038309637e-05,
      "loss": 2.5631,
      "step": 15183
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9253341555595398,
      "learning_rate": 1.8227328002454272e-05,
      "loss": 2.5249,
      "step": 15184
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1074261665344238,
      "learning_rate": 1.822709395265135e-05,
      "loss": 2.3799,
      "step": 15185
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0495479106903076,
      "learning_rate": 1.822685988890127e-05,
      "loss": 2.4218,
      "step": 15186
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0999679565429688,
      "learning_rate": 1.8226625811204427e-05,
      "loss": 2.7179,
      "step": 15187
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.021240234375,
      "learning_rate": 1.8226391719561222e-05,
      "loss": 2.4285,
      "step": 15188
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9420196413993835,
      "learning_rate": 1.8226157613972043e-05,
      "loss": 2.3863,
      "step": 15189
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9802827835083008,
      "learning_rate": 1.8225923494437296e-05,
      "loss": 2.5818,
      "step": 15190
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.954134464263916,
      "learning_rate": 1.8225689360957375e-05,
      "loss": 2.5503,
      "step": 15191
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9980998039245605,
      "learning_rate": 1.8225455213532676e-05,
      "loss": 2.4141,
      "step": 15192
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.17581307888031,
      "learning_rate": 1.8225221052163596e-05,
      "loss": 2.4766,
      "step": 15193
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0525299310684204,
      "learning_rate": 1.822498687685053e-05,
      "loss": 2.3849,
      "step": 15194
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0717781782150269,
      "learning_rate": 1.8224752687593883e-05,
      "loss": 2.6641,
      "step": 15195
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.2013131380081177,
      "learning_rate": 1.8224518484394043e-05,
      "loss": 2.4391,
      "step": 15196
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9721249341964722,
      "learning_rate": 1.822428426725141e-05,
      "loss": 2.361,
      "step": 15197
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.296074628829956,
      "learning_rate": 1.8224050036166386e-05,
      "loss": 2.5594,
      "step": 15198
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0316762924194336,
      "learning_rate": 1.8223815791139355e-05,
      "loss": 2.6802,
      "step": 15199
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0100009441375732,
      "learning_rate": 1.822358153217073e-05,
      "loss": 2.3645,
      "step": 15200
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0825831890106201,
      "learning_rate": 1.82233472592609e-05,
      "loss": 2.6046,
      "step": 15201
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0830762386322021,
      "learning_rate": 1.822311297241026e-05,
      "loss": 2.5396,
      "step": 15202
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9602159857749939,
      "learning_rate": 1.822287867161921e-05,
      "loss": 2.6485,
      "step": 15203
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0668971538543701,
      "learning_rate": 1.822264435688815e-05,
      "loss": 2.4094,
      "step": 15204
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.951931893825531,
      "learning_rate": 1.8222410028217474e-05,
      "loss": 2.345,
      "step": 15205
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1059359312057495,
      "learning_rate": 1.8222175685607578e-05,
      "loss": 2.5722,
      "step": 15206
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1304328441619873,
      "learning_rate": 1.8221941329058864e-05,
      "loss": 2.3986,
      "step": 15207
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0964553356170654,
      "learning_rate": 1.8221706958571725e-05,
      "loss": 2.6477,
      "step": 15208
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.966454267501831,
      "learning_rate": 1.822147257414656e-05,
      "loss": 2.5022,
      "step": 15209
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9436203241348267,
      "learning_rate": 1.8221238175783763e-05,
      "loss": 2.5139,
      "step": 15210
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0619231462478638,
      "learning_rate": 1.8221003763483737e-05,
      "loss": 2.3742,
      "step": 15211
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.93550705909729,
      "learning_rate": 1.8220769337246878e-05,
      "loss": 2.7436,
      "step": 15212
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0132603645324707,
      "learning_rate": 1.8220534897073583e-05,
      "loss": 2.337,
      "step": 15213
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0895549058914185,
      "learning_rate": 1.8220300442964244e-05,
      "loss": 2.54,
      "step": 15214
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5903114080429077,
      "learning_rate": 1.822006597491927e-05,
      "loss": 2.6203,
      "step": 15215
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9548789262771606,
      "learning_rate": 1.8219831492939043e-05,
      "loss": 2.3627,
      "step": 15216
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0662580728530884,
      "learning_rate": 1.8219596997023976e-05,
      "loss": 2.4552,
      "step": 15217
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.968906581401825,
      "learning_rate": 1.8219362487174455e-05,
      "loss": 2.4075,
      "step": 15218
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9990350008010864,
      "learning_rate": 1.8219127963390887e-05,
      "loss": 2.5985,
      "step": 15219
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.965675413608551,
      "learning_rate": 1.8218893425673663e-05,
      "loss": 2.6703,
      "step": 15220
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9314917922019958,
      "learning_rate": 1.8218658874023182e-05,
      "loss": 2.3552,
      "step": 15221
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1046404838562012,
      "learning_rate": 1.821842430843984e-05,
      "loss": 2.6024,
      "step": 15222
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9952536225318909,
      "learning_rate": 1.821818972892404e-05,
      "loss": 2.4731,
      "step": 15223
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0066486597061157,
      "learning_rate": 1.8217955135476172e-05,
      "loss": 2.6588,
      "step": 15224
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0226675271987915,
      "learning_rate": 1.821772052809664e-05,
      "loss": 2.5283,
      "step": 15225
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9357298612594604,
      "learning_rate": 1.8217485906785845e-05,
      "loss": 2.5376,
      "step": 15226
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1039304733276367,
      "learning_rate": 1.8217251271544175e-05,
      "loss": 2.6086,
      "step": 15227
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.950406551361084,
      "learning_rate": 1.8217016622372033e-05,
      "loss": 2.6713,
      "step": 15228
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.6982020139694214,
      "learning_rate": 1.8216781959269813e-05,
      "loss": 2.3193,
      "step": 15229
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0604490041732788,
      "learning_rate": 1.8216547282237923e-05,
      "loss": 2.5087,
      "step": 15230
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0756034851074219,
      "learning_rate": 1.8216312591276746e-05,
      "loss": 2.6693,
      "step": 15231
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.012954831123352,
      "learning_rate": 1.8216077886386695e-05,
      "loss": 2.3659,
      "step": 15232
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0498642921447754,
      "learning_rate": 1.8215843167568156e-05,
      "loss": 2.4612,
      "step": 15233
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9852861166000366,
      "learning_rate": 1.821560843482153e-05,
      "loss": 2.3983,
      "step": 15234
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1703088283538818,
      "learning_rate": 1.8215373688147216e-05,
      "loss": 2.5392,
      "step": 15235
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0471575260162354,
      "learning_rate": 1.821513892754562e-05,
      "loss": 2.3215,
      "step": 15236
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0477427244186401,
      "learning_rate": 1.8214904153017127e-05,
      "loss": 2.3542,
      "step": 15237
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1094802618026733,
      "learning_rate": 1.821466936456214e-05,
      "loss": 2.4888,
      "step": 15238
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9800442457199097,
      "learning_rate": 1.8214434562181055e-05,
      "loss": 2.5439,
      "step": 15239
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9768965840339661,
      "learning_rate": 1.8214199745874275e-05,
      "loss": 2.3334,
      "step": 15240
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1836761236190796,
      "learning_rate": 1.8213964915642196e-05,
      "loss": 2.3899,
      "step": 15241
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0045655965805054,
      "learning_rate": 1.8213730071485217e-05,
      "loss": 2.6083,
      "step": 15242
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0502636432647705,
      "learning_rate": 1.8213495213403735e-05,
      "loss": 2.5375,
      "step": 15243
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1769121885299683,
      "learning_rate": 1.821326034139814e-05,
      "loss": 2.5931,
      "step": 15244
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9784731864929199,
      "learning_rate": 1.821302545546885e-05,
      "loss": 2.5047,
      "step": 15245
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0134453773498535,
      "learning_rate": 1.821279055561624e-05,
      "loss": 2.342,
      "step": 15246
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.2241066694259644,
      "learning_rate": 1.8212555641840726e-05,
      "loss": 2.5165,
      "step": 15247
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.025106430053711,
      "learning_rate": 1.82123207141427e-05,
      "loss": 2.6992,
      "step": 15248
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0873773097991943,
      "learning_rate": 1.821208577252256e-05,
      "loss": 2.4972,
      "step": 15249
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9740958213806152,
      "learning_rate": 1.82118508169807e-05,
      "loss": 2.65,
      "step": 15250
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0042743682861328,
      "learning_rate": 1.8211615847517526e-05,
      "loss": 2.6026,
      "step": 15251
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0623513460159302,
      "learning_rate": 1.8211380864133433e-05,
      "loss": 2.1487,
      "step": 15252
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0332205295562744,
      "learning_rate": 1.8211145866828818e-05,
      "loss": 2.4785,
      "step": 15253
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9478015899658203,
      "learning_rate": 1.8210910855604076e-05,
      "loss": 2.5046,
      "step": 15254
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9807761311531067,
      "learning_rate": 1.8210675830459616e-05,
      "loss": 2.4273,
      "step": 15255
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0024337768554688,
      "learning_rate": 1.8210440791395832e-05,
      "loss": 2.3117,
      "step": 15256
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9300947189331055,
      "learning_rate": 1.821020573841312e-05,
      "loss": 2.3919,
      "step": 15257
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1007851362228394,
      "learning_rate": 1.8209970671511876e-05,
      "loss": 2.3876,
      "step": 15258
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1007554531097412,
      "learning_rate": 1.8209735590692502e-05,
      "loss": 2.5251,
      "step": 15259
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.2672817707061768,
      "learning_rate": 1.8209500495955398e-05,
      "loss": 2.6455,
      "step": 15260
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.116337537765503,
      "learning_rate": 1.8209265387300963e-05,
      "loss": 2.5368,
      "step": 15261
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0489871501922607,
      "learning_rate": 1.820903026472959e-05,
      "loss": 2.6368,
      "step": 15262
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0974730253219604,
      "learning_rate": 1.8208795128241683e-05,
      "loss": 2.7459,
      "step": 15263
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9340896606445312,
      "learning_rate": 1.8208559977837638e-05,
      "loss": 2.4559,
      "step": 15264
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9541897773742676,
      "learning_rate": 1.8208324813517855e-05,
      "loss": 2.4159,
      "step": 15265
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9882563948631287,
      "learning_rate": 1.8208089635282733e-05,
      "loss": 2.469,
      "step": 15266
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0140471458435059,
      "learning_rate": 1.820785444313267e-05,
      "loss": 2.5435,
      "step": 15267
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0396744012832642,
      "learning_rate": 1.8207619237068063e-05,
      "loss": 2.6634,
      "step": 15268
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1163684129714966,
      "learning_rate": 1.820738401708931e-05,
      "loss": 2.7615,
      "step": 15269
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0654397010803223,
      "learning_rate": 1.8207148783196816e-05,
      "loss": 2.435,
      "step": 15270
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.084494709968567,
      "learning_rate": 1.820691353539097e-05,
      "loss": 2.7817,
      "step": 15271
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9968613982200623,
      "learning_rate": 1.8206678273672184e-05,
      "loss": 2.5327,
      "step": 15272
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9636781215667725,
      "learning_rate": 1.8206442998040847e-05,
      "loss": 2.6012,
      "step": 15273
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9697801470756531,
      "learning_rate": 1.8206207708497362e-05,
      "loss": 2.5042,
      "step": 15274
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0397082567214966,
      "learning_rate": 1.820597240504212e-05,
      "loss": 2.6204,
      "step": 15275
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0058521032333374,
      "learning_rate": 1.8205737087675532e-05,
      "loss": 2.288,
      "step": 15276
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.047729253768921,
      "learning_rate": 1.820550175639799e-05,
      "loss": 2.7628,
      "step": 15277
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0620275735855103,
      "learning_rate": 1.8205266411209888e-05,
      "loss": 2.404,
      "step": 15278
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.03190279006958,
      "learning_rate": 1.820503105211164e-05,
      "loss": 2.5113,
      "step": 15279
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0110359191894531,
      "learning_rate": 1.820479567910363e-05,
      "loss": 2.5785,
      "step": 15280
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0194346904754639,
      "learning_rate": 1.8204560292186264e-05,
      "loss": 2.2853,
      "step": 15281
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0638337135314941,
      "learning_rate": 1.820432489135994e-05,
      "loss": 2.4642,
      "step": 15282
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0395957231521606,
      "learning_rate": 1.820408947662506e-05,
      "loss": 2.7674,
      "step": 15283
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9979626536369324,
      "learning_rate": 1.8203854047982015e-05,
      "loss": 2.5311,
      "step": 15284
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9880668520927429,
      "learning_rate": 1.8203618605431214e-05,
      "loss": 2.6417,
      "step": 15285
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0974822044372559,
      "learning_rate": 1.8203383148973047e-05,
      "loss": 2.7415,
      "step": 15286
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9817268252372742,
      "learning_rate": 1.820314767860792e-05,
      "loss": 2.4558,
      "step": 15287
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9246811866760254,
      "learning_rate": 1.820291219433623e-05,
      "loss": 2.7087,
      "step": 15288
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0711742639541626,
      "learning_rate": 1.8202676696158377e-05,
      "loss": 2.5436,
      "step": 15289
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0814599990844727,
      "learning_rate": 1.8202441184074756e-05,
      "loss": 2.5675,
      "step": 15290
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9411538243293762,
      "learning_rate": 1.8202205658085776e-05,
      "loss": 2.7061,
      "step": 15291
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.2148141860961914,
      "learning_rate": 1.820197011819182e-05,
      "loss": 2.3692,
      "step": 15292
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0032519102096558,
      "learning_rate": 1.8201734564393305e-05,
      "loss": 2.6388,
      "step": 15293
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9267808794975281,
      "learning_rate": 1.8201498996690618e-05,
      "loss": 2.4732,
      "step": 15294
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1091904640197754,
      "learning_rate": 1.8201263415084166e-05,
      "loss": 2.4821,
      "step": 15295
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9725857973098755,
      "learning_rate": 1.820102781957434e-05,
      "loss": 2.416,
      "step": 15296
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.04741632938385,
      "learning_rate": 1.8200792210161548e-05,
      "loss": 2.6261,
      "step": 15297
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0477133989334106,
      "learning_rate": 1.8200556586846184e-05,
      "loss": 2.5563,
      "step": 15298
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0415488481521606,
      "learning_rate": 1.820032094962865e-05,
      "loss": 2.5358,
      "step": 15299
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.080003261566162,
      "learning_rate": 1.820008529850935e-05,
      "loss": 2.5747,
      "step": 15300
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.8971127867698669,
      "learning_rate": 1.8199849633488673e-05,
      "loss": 2.3414,
      "step": 15301
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.035749912261963,
      "learning_rate": 1.8199613954567022e-05,
      "loss": 2.3736,
      "step": 15302
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9637104272842407,
      "learning_rate": 1.8199378261744806e-05,
      "loss": 2.3288,
      "step": 15303
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0202258825302124,
      "learning_rate": 1.819914255502241e-05,
      "loss": 2.8072,
      "step": 15304
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9784793257713318,
      "learning_rate": 1.8198906834400243e-05,
      "loss": 2.7396,
      "step": 15305
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9847426414489746,
      "learning_rate": 1.81986710998787e-05,
      "loss": 2.4532,
      "step": 15306
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0126428604125977,
      "learning_rate": 1.8198435351458186e-05,
      "loss": 2.2636,
      "step": 15307
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.069313645362854,
      "learning_rate": 1.8198199589139094e-05,
      "loss": 2.4267,
      "step": 15308
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0410685539245605,
      "learning_rate": 1.819796381292183e-05,
      "loss": 2.5707,
      "step": 15309
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.010225772857666,
      "learning_rate": 1.819772802280679e-05,
      "loss": 2.4591,
      "step": 15310
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.042033076286316,
      "learning_rate": 1.8197492218794375e-05,
      "loss": 2.5184,
      "step": 15311
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0234153270721436,
      "learning_rate": 1.8197256400884982e-05,
      "loss": 2.546,
      "step": 15312
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9862517714500427,
      "learning_rate": 1.8197020569079015e-05,
      "loss": 2.5125,
      "step": 15313
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0079011917114258,
      "learning_rate": 1.8196784723376872e-05,
      "loss": 2.5187,
      "step": 15314
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0429036617279053,
      "learning_rate": 1.819654886377895e-05,
      "loss": 2.4304,
      "step": 15315
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9481370449066162,
      "learning_rate": 1.8196312990285653e-05,
      "loss": 2.4532,
      "step": 15316
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.086442232131958,
      "learning_rate": 1.8196077102897383e-05,
      "loss": 2.5961,
      "step": 15317
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0128915309906006,
      "learning_rate": 1.819584120161453e-05,
      "loss": 2.5148,
      "step": 15318
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.979505717754364,
      "learning_rate": 1.8195605286437506e-05,
      "loss": 2.3711,
      "step": 15319
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9631224870681763,
      "learning_rate": 1.8195369357366704e-05,
      "loss": 2.5037,
      "step": 15320
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9530642032623291,
      "learning_rate": 1.8195133414402523e-05,
      "loss": 2.7407,
      "step": 15321
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0125837326049805,
      "learning_rate": 1.8194897457545364e-05,
      "loss": 2.4535,
      "step": 15322
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9498614072799683,
      "learning_rate": 1.8194661486795632e-05,
      "loss": 2.5205,
      "step": 15323
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0952105522155762,
      "learning_rate": 1.819442550215372e-05,
      "loss": 2.3243,
      "step": 15324
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9624925255775452,
      "learning_rate": 1.8194189503620033e-05,
      "loss": 2.4147,
      "step": 15325
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1536779403686523,
      "learning_rate": 1.8193953491194966e-05,
      "loss": 2.4367,
      "step": 15326
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1308289766311646,
      "learning_rate": 1.8193717464878927e-05,
      "loss": 2.4776,
      "step": 15327
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0307410955429077,
      "learning_rate": 1.8193481424672307e-05,
      "loss": 2.396,
      "step": 15328
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.103164792060852,
      "learning_rate": 1.8193245370575515e-05,
      "loss": 2.266,
      "step": 15329
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0585646629333496,
      "learning_rate": 1.8193009302588946e-05,
      "loss": 2.6183,
      "step": 15330
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9591931700706482,
      "learning_rate": 1.8192773220713e-05,
      "loss": 2.5587,
      "step": 15331
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0198204517364502,
      "learning_rate": 1.8192537124948077e-05,
      "loss": 2.4653,
      "step": 15332
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.2569406032562256,
      "learning_rate": 1.819230101529458e-05,
      "loss": 2.4743,
      "step": 15333
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0295794010162354,
      "learning_rate": 1.819206489175291e-05,
      "loss": 2.4096,
      "step": 15334
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9093390703201294,
      "learning_rate": 1.819182875432346e-05,
      "loss": 2.3649,
      "step": 15335
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0656888484954834,
      "learning_rate": 1.8191592603006636e-05,
      "loss": 2.5192,
      "step": 15336
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9531729817390442,
      "learning_rate": 1.8191356437802843e-05,
      "loss": 2.4444,
      "step": 15337
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0564537048339844,
      "learning_rate": 1.8191120258712472e-05,
      "loss": 2.3642,
      "step": 15338
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.118318796157837,
      "learning_rate": 1.819088406573593e-05,
      "loss": 2.505,
      "step": 15339
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1564372777938843,
      "learning_rate": 1.819064785887361e-05,
      "loss": 2.5801,
      "step": 15340
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9657266736030579,
      "learning_rate": 1.8190411638125923e-05,
      "loss": 2.4001,
      "step": 15341
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.6490873098373413,
      "learning_rate": 1.819017540349326e-05,
      "loss": 2.5649,
      "step": 15342
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.951017439365387,
      "learning_rate": 1.8189939154976028e-05,
      "loss": 2.6028,
      "step": 15343
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0547279119491577,
      "learning_rate": 1.8189702892574624e-05,
      "loss": 2.4786,
      "step": 15344
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0054419040679932,
      "learning_rate": 1.8189466616289446e-05,
      "loss": 2.4282,
      "step": 15345
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9921283721923828,
      "learning_rate": 1.8189230326120902e-05,
      "loss": 2.491,
      "step": 15346
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9770616888999939,
      "learning_rate": 1.8188994022069388e-05,
      "loss": 2.3325,
      "step": 15347
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9123978018760681,
      "learning_rate": 1.81887577041353e-05,
      "loss": 2.5786,
      "step": 15348
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0622628927230835,
      "learning_rate": 1.8188521372319052e-05,
      "loss": 2.6989,
      "step": 15349
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0468246936798096,
      "learning_rate": 1.818828502662103e-05,
      "loss": 2.5323,
      "step": 15350
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9863309264183044,
      "learning_rate": 1.8188048667041645e-05,
      "loss": 2.7096,
      "step": 15351
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0500093698501587,
      "learning_rate": 1.818781229358129e-05,
      "loss": 2.4089,
      "step": 15352
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0541651248931885,
      "learning_rate": 1.818757590624037e-05,
      "loss": 2.5949,
      "step": 15353
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0106109380722046,
      "learning_rate": 1.818733950501929e-05,
      "loss": 2.5963,
      "step": 15354
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.087450385093689,
      "learning_rate": 1.818710308991844e-05,
      "loss": 2.5564,
      "step": 15355
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0170526504516602,
      "learning_rate": 1.818686666093823e-05,
      "loss": 2.6896,
      "step": 15356
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.113125205039978,
      "learning_rate": 1.8186630218079056e-05,
      "loss": 2.4848,
      "step": 15357
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9820645451545715,
      "learning_rate": 1.8186393761341318e-05,
      "loss": 2.3518,
      "step": 15358
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9666995406150818,
      "learning_rate": 1.8186157290725422e-05,
      "loss": 2.3302,
      "step": 15359
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.105427622795105,
      "learning_rate": 1.8185920806231766e-05,
      "loss": 2.5309,
      "step": 15360
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.106191873550415,
      "learning_rate": 1.8185684307860752e-05,
      "loss": 2.2498,
      "step": 15361
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0345577001571655,
      "learning_rate": 1.8185447795612778e-05,
      "loss": 2.5466,
      "step": 15362
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0471417903900146,
      "learning_rate": 1.818521126948825e-05,
      "loss": 2.352,
      "step": 15363
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9747099280357361,
      "learning_rate": 1.818497472948756e-05,
      "loss": 2.5548,
      "step": 15364
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0100898742675781,
      "learning_rate": 1.818473817561112e-05,
      "loss": 2.3456,
      "step": 15365
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9964667558670044,
      "learning_rate": 1.8184501607859325e-05,
      "loss": 2.3732,
      "step": 15366
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9977318048477173,
      "learning_rate": 1.8184265026232575e-05,
      "loss": 2.324,
      "step": 15367
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.2502660751342773,
      "learning_rate": 1.8184028430731274e-05,
      "loss": 2.4593,
      "step": 15368
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.970074474811554,
      "learning_rate": 1.818379182135582e-05,
      "loss": 2.3644,
      "step": 15369
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9294451475143433,
      "learning_rate": 1.818355519810662e-05,
      "loss": 2.6332,
      "step": 15370
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0831153392791748,
      "learning_rate": 1.8183318560984068e-05,
      "loss": 2.5287,
      "step": 15371
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1138502359390259,
      "learning_rate": 1.818308190998857e-05,
      "loss": 2.4365,
      "step": 15372
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1372841596603394,
      "learning_rate": 1.8182845245120525e-05,
      "loss": 2.417,
      "step": 15373
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9806767106056213,
      "learning_rate": 1.8182608566380337e-05,
      "loss": 2.4106,
      "step": 15374
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9828526377677917,
      "learning_rate": 1.8182371873768402e-05,
      "loss": 2.6019,
      "step": 15375
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.238612413406372,
      "learning_rate": 1.8182135167285127e-05,
      "loss": 2.2448,
      "step": 15376
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.03500497341156,
      "learning_rate": 1.818189844693091e-05,
      "loss": 2.6795,
      "step": 15377
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0395482778549194,
      "learning_rate": 1.8181661712706152e-05,
      "loss": 2.6564,
      "step": 15378
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0716724395751953,
      "learning_rate": 1.8181424964611255e-05,
      "loss": 2.6441,
      "step": 15379
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0676071643829346,
      "learning_rate": 1.8181188202646624e-05,
      "loss": 2.6465,
      "step": 15380
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0410102605819702,
      "learning_rate": 1.8180951426812652e-05,
      "loss": 2.2881,
      "step": 15381
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0212619304656982,
      "learning_rate": 1.8180714637109748e-05,
      "loss": 2.6152,
      "step": 15382
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.099362850189209,
      "learning_rate": 1.8180477833538313e-05,
      "loss": 2.5246,
      "step": 15383
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9945508241653442,
      "learning_rate": 1.8180241016098743e-05,
      "loss": 2.4162,
      "step": 15384
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9876851439476013,
      "learning_rate": 1.818000418479144e-05,
      "loss": 2.5505,
      "step": 15385
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0233738422393799,
      "learning_rate": 1.8179767339616815e-05,
      "loss": 2.7299,
      "step": 15386
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.032386064529419,
      "learning_rate": 1.817953048057526e-05,
      "loss": 2.4914,
      "step": 15387
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1038790941238403,
      "learning_rate": 1.8179293607667177e-05,
      "loss": 2.5303,
      "step": 15388
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9401345252990723,
      "learning_rate": 1.8179056720892973e-05,
      "loss": 2.3576,
      "step": 15389
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.124463438987732,
      "learning_rate": 1.8178819820253047e-05,
      "loss": 2.5473,
      "step": 15390
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0420382022857666,
      "learning_rate": 1.8178582905747797e-05,
      "loss": 2.5534,
      "step": 15391
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1076912879943848,
      "learning_rate": 1.8178345977377628e-05,
      "loss": 2.5587,
      "step": 15392
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.000693678855896,
      "learning_rate": 1.8178109035142942e-05,
      "loss": 2.3475,
      "step": 15393
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9542815089225769,
      "learning_rate": 1.8177872079044143e-05,
      "loss": 2.5278,
      "step": 15394
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0142309665679932,
      "learning_rate": 1.8177635109081627e-05,
      "loss": 2.4527,
      "step": 15395
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9757984280586243,
      "learning_rate": 1.8177398125255798e-05,
      "loss": 2.3075,
      "step": 15396
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0278395414352417,
      "learning_rate": 1.8177161127567058e-05,
      "loss": 2.4417,
      "step": 15397
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9724344611167908,
      "learning_rate": 1.817692411601581e-05,
      "loss": 2.4418,
      "step": 15398
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9927520155906677,
      "learning_rate": 1.8176687090602455e-05,
      "loss": 2.4045,
      "step": 15399
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0362075567245483,
      "learning_rate": 1.8176450051327396e-05,
      "loss": 2.4134,
      "step": 15400
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.035683274269104,
      "learning_rate": 1.8176212998191032e-05,
      "loss": 2.5763,
      "step": 15401
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1596769094467163,
      "learning_rate": 1.8175975931193767e-05,
      "loss": 2.5069,
      "step": 15402
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.035369873046875,
      "learning_rate": 1.8175738850336e-05,
      "loss": 2.5,
      "step": 15403
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.082459568977356,
      "learning_rate": 1.8175501755618138e-05,
      "loss": 2.4808,
      "step": 15404
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0527818202972412,
      "learning_rate": 1.8175264647040574e-05,
      "loss": 2.3361,
      "step": 15405
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0701950788497925,
      "learning_rate": 1.8175027524603725e-05,
      "loss": 2.4236,
      "step": 15406
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9866486191749573,
      "learning_rate": 1.817479038830798e-05,
      "loss": 2.4873,
      "step": 15407
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9795156717300415,
      "learning_rate": 1.8174553238153744e-05,
      "loss": 2.434,
      "step": 15408
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.03984534740448,
      "learning_rate": 1.8174316074141424e-05,
      "loss": 2.6213,
      "step": 15409
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9289011359214783,
      "learning_rate": 1.8174078896271412e-05,
      "loss": 2.6327,
      "step": 15410
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9741863012313843,
      "learning_rate": 1.817384170454412e-05,
      "loss": 2.6755,
      "step": 15411
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9240131378173828,
      "learning_rate": 1.8173604498959946e-05,
      "loss": 2.3673,
      "step": 15412
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0450170040130615,
      "learning_rate": 1.817336727951929e-05,
      "loss": 2.6237,
      "step": 15413
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.068506121635437,
      "learning_rate": 1.817313004622256e-05,
      "loss": 2.5761,
      "step": 15414
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1233463287353516,
      "learning_rate": 1.8172892799070152e-05,
      "loss": 2.4307,
      "step": 15415
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.057056188583374,
      "learning_rate": 1.8172655538062474e-05,
      "loss": 2.5592,
      "step": 15416
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9305860996246338,
      "learning_rate": 1.817241826319992e-05,
      "loss": 2.4283,
      "step": 15417
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.082917332649231,
      "learning_rate": 1.8172180974482905e-05,
      "loss": 2.1955,
      "step": 15418
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0759140253067017,
      "learning_rate": 1.817194367191182e-05,
      "loss": 2.3972,
      "step": 15419
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0247538089752197,
      "learning_rate": 1.817170635548707e-05,
      "loss": 2.4355,
      "step": 15420
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0224246978759766,
      "learning_rate": 1.8171469025209058e-05,
      "loss": 2.4006,
      "step": 15421
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0517547130584717,
      "learning_rate": 1.817123168107819e-05,
      "loss": 2.4871,
      "step": 15422
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0076173543930054,
      "learning_rate": 1.8170994323094863e-05,
      "loss": 2.4211,
      "step": 15423
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0573465824127197,
      "learning_rate": 1.817075695125948e-05,
      "loss": 2.5162,
      "step": 15424
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9978101253509521,
      "learning_rate": 1.8170519565572444e-05,
      "loss": 2.6289,
      "step": 15425
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.284936547279358,
      "learning_rate": 1.8170282166034164e-05,
      "loss": 2.451,
      "step": 15426
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1405202150344849,
      "learning_rate": 1.817004475264503e-05,
      "loss": 2.4738,
      "step": 15427
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.180484414100647,
      "learning_rate": 1.8169807325405456e-05,
      "loss": 2.382,
      "step": 15428
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9954941868782043,
      "learning_rate": 1.8169569884315836e-05,
      "loss": 2.4419,
      "step": 15429
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0960402488708496,
      "learning_rate": 1.816933242937658e-05,
      "loss": 2.504,
      "step": 15430
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0273652076721191,
      "learning_rate": 1.8169094960588084e-05,
      "loss": 2.7341,
      "step": 15431
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0732547044754028,
      "learning_rate": 1.8168857477950755e-05,
      "loss": 2.6375,
      "step": 15432
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.3340694904327393,
      "learning_rate": 1.8168619981464994e-05,
      "loss": 2.2389,
      "step": 15433
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.34523344039917,
      "learning_rate": 1.8168382471131202e-05,
      "loss": 2.5195,
      "step": 15434
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9754443168640137,
      "learning_rate": 1.8168144946949784e-05,
      "loss": 2.4906,
      "step": 15435
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1910004615783691,
      "learning_rate": 1.8167907408921142e-05,
      "loss": 2.4358,
      "step": 15436
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0798194408416748,
      "learning_rate": 1.8167669857045677e-05,
      "loss": 2.5305,
      "step": 15437
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0459829568862915,
      "learning_rate": 1.8167432291323795e-05,
      "loss": 2.4234,
      "step": 15438
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.017101526260376,
      "learning_rate": 1.81671947117559e-05,
      "loss": 2.4142,
      "step": 15439
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1300286054611206,
      "learning_rate": 1.8166957118342385e-05,
      "loss": 2.6235,
      "step": 15440
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.2456765174865723,
      "learning_rate": 1.8166719511083663e-05,
      "loss": 2.4691,
      "step": 15441
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4667140245437622,
      "learning_rate": 1.816648188998013e-05,
      "loss": 2.4422,
      "step": 15442
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1632733345031738,
      "learning_rate": 1.81662442550322e-05,
      "loss": 2.4853,
      "step": 15443
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.02366042137146,
      "learning_rate": 1.8166006606240262e-05,
      "loss": 2.4917,
      "step": 15444
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.092997431755066,
      "learning_rate": 1.816576894360473e-05,
      "loss": 2.6352,
      "step": 15445
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.40869140625,
      "learning_rate": 1.8165531267125994e-05,
      "loss": 2.4815,
      "step": 15446
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1812700033187866,
      "learning_rate": 1.816529357680447e-05,
      "loss": 2.4908,
      "step": 15447
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.185840129852295,
      "learning_rate": 1.8165055872640556e-05,
      "loss": 2.3385,
      "step": 15448
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1189160346984863,
      "learning_rate": 1.8164818154634655e-05,
      "loss": 2.5409,
      "step": 15449
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9931011199951172,
      "learning_rate": 1.8164580422787166e-05,
      "loss": 2.6811,
      "step": 15450
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1343518495559692,
      "learning_rate": 1.81643426770985e-05,
      "loss": 2.2529,
      "step": 15451
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1546119451522827,
      "learning_rate": 1.8164104917569056e-05,
      "loss": 2.3282,
      "step": 15452
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0033996105194092,
      "learning_rate": 1.816386714419923e-05,
      "loss": 2.4997,
      "step": 15453
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1346241235733032,
      "learning_rate": 1.8163629356989438e-05,
      "loss": 2.4837,
      "step": 15454
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.029524326324463,
      "learning_rate": 1.8163391555940078e-05,
      "loss": 2.3988,
      "step": 15455
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9820697903633118,
      "learning_rate": 1.816315374105155e-05,
      "loss": 2.6081,
      "step": 15456
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.019057273864746,
      "learning_rate": 1.8162915912324256e-05,
      "loss": 2.3613,
      "step": 15457
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9525421261787415,
      "learning_rate": 1.8162678069758606e-05,
      "loss": 2.4626,
      "step": 15458
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0327529907226562,
      "learning_rate": 1.8162440213355e-05,
      "loss": 2.4345,
      "step": 15459
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.959821343421936,
      "learning_rate": 1.816220234311384e-05,
      "loss": 2.3024,
      "step": 15460
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0641396045684814,
      "learning_rate": 1.816196445903553e-05,
      "loss": 2.6194,
      "step": 15461
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.216025471687317,
      "learning_rate": 1.8161726561120473e-05,
      "loss": 2.4885,
      "step": 15462
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0548865795135498,
      "learning_rate": 1.8161488649369074e-05,
      "loss": 2.5621,
      "step": 15463
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0911169052124023,
      "learning_rate": 1.8161250723781737e-05,
      "loss": 2.5906,
      "step": 15464
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0977870225906372,
      "learning_rate": 1.816101278435886e-05,
      "loss": 2.5833,
      "step": 15465
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0689549446105957,
      "learning_rate": 1.816077483110085e-05,
      "loss": 2.4739,
      "step": 15466
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0365040302276611,
      "learning_rate": 1.8160536864008113e-05,
      "loss": 2.4116,
      "step": 15467
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1653822660446167,
      "learning_rate": 1.8160298883081047e-05,
      "loss": 2.3296,
      "step": 15468
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0802295207977295,
      "learning_rate": 1.816006088832006e-05,
      "loss": 2.5342,
      "step": 15469
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1503660678863525,
      "learning_rate": 1.815982287972555e-05,
      "loss": 2.5424,
      "step": 15470
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0498244762420654,
      "learning_rate": 1.815958485729793e-05,
      "loss": 2.5007,
      "step": 15471
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.064964771270752,
      "learning_rate": 1.8159346821037594e-05,
      "loss": 2.5969,
      "step": 15472
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.2652140855789185,
      "learning_rate": 1.815910877094495e-05,
      "loss": 2.3918,
      "step": 15473
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.156647801399231,
      "learning_rate": 1.8158870707020398e-05,
      "loss": 2.5066,
      "step": 15474
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.078434705734253,
      "learning_rate": 1.8158632629264348e-05,
      "loss": 2.309,
      "step": 15475
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.136165738105774,
      "learning_rate": 1.8158394537677198e-05,
      "loss": 2.3861,
      "step": 15476
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.340492844581604,
      "learning_rate": 1.815815643225935e-05,
      "loss": 2.5451,
      "step": 15477
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1523375511169434,
      "learning_rate": 1.8157918313011217e-05,
      "loss": 2.5838,
      "step": 15478
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0670104026794434,
      "learning_rate": 1.8157680179933196e-05,
      "loss": 2.6714,
      "step": 15479
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9701367616653442,
      "learning_rate": 1.8157442033025687e-05,
      "loss": 2.583,
      "step": 15480
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0364940166473389,
      "learning_rate": 1.8157203872289103e-05,
      "loss": 2.2874,
      "step": 15481
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1857813596725464,
      "learning_rate": 1.815696569772384e-05,
      "loss": 2.36,
      "step": 15482
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.038158893585205,
      "learning_rate": 1.815672750933031e-05,
      "loss": 2.2988,
      "step": 15483
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1820122003555298,
      "learning_rate": 1.8156489307108903e-05,
      "loss": 2.631,
      "step": 15484
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0705945491790771,
      "learning_rate": 1.8156251091060037e-05,
      "loss": 2.6184,
      "step": 15485
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9096875786781311,
      "learning_rate": 1.815601286118411e-05,
      "loss": 2.3886,
      "step": 15486
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1559593677520752,
      "learning_rate": 1.8155774617481525e-05,
      "loss": 2.7065,
      "step": 15487
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1020607948303223,
      "learning_rate": 1.8155536359952686e-05,
      "loss": 2.7443,
      "step": 15488
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0836150646209717,
      "learning_rate": 1.8155298088598e-05,
      "loss": 2.4729,
      "step": 15489
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0348560810089111,
      "learning_rate": 1.8155059803417865e-05,
      "loss": 2.5608,
      "step": 15490
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9601361751556396,
      "learning_rate": 1.8154821504412693e-05,
      "loss": 2.744,
      "step": 15491
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0173519849777222,
      "learning_rate": 1.8154583191582882e-05,
      "loss": 2.4889,
      "step": 15492
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9817796945571899,
      "learning_rate": 1.8154344864928836e-05,
      "loss": 2.5359,
      "step": 15493
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0758291482925415,
      "learning_rate": 1.8154106524450962e-05,
      "loss": 2.5054,
      "step": 15494
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0571086406707764,
      "learning_rate": 1.8153868170149666e-05,
      "loss": 2.7085,
      "step": 15495
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0382959842681885,
      "learning_rate": 1.8153629802025348e-05,
      "loss": 2.4797,
      "step": 15496
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.035321593284607,
      "learning_rate": 1.815339142007841e-05,
      "loss": 2.4164,
      "step": 15497
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0715152025222778,
      "learning_rate": 1.8153153024309262e-05,
      "loss": 2.3526,
      "step": 15498
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0529240369796753,
      "learning_rate": 1.81529146147183e-05,
      "loss": 2.5906,
      "step": 15499
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9090850949287415,
      "learning_rate": 1.815267619130594e-05,
      "loss": 2.486,
      "step": 15500
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.19286048412323,
      "learning_rate": 1.8152437754072573e-05,
      "loss": 2.3637,
      "step": 15501
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0139464139938354,
      "learning_rate": 1.8152199303018616e-05,
      "loss": 2.6585,
      "step": 15502
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0477079153060913,
      "learning_rate": 1.8151960838144464e-05,
      "loss": 2.527,
      "step": 15503
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0501279830932617,
      "learning_rate": 1.8151722359450525e-05,
      "loss": 2.5839,
      "step": 15504
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.03048837184906,
      "learning_rate": 1.8151483866937202e-05,
      "loss": 2.5083,
      "step": 15505
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0190465450286865,
      "learning_rate": 1.81512453606049e-05,
      "loss": 2.219,
      "step": 15506
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.2173399925231934,
      "learning_rate": 1.8151006840454025e-05,
      "loss": 2.2724,
      "step": 15507
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0085160732269287,
      "learning_rate": 1.815076830648498e-05,
      "loss": 2.6318,
      "step": 15508
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0632818937301636,
      "learning_rate": 1.8150529758698166e-05,
      "loss": 2.479,
      "step": 15509
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1218833923339844,
      "learning_rate": 1.8150291197093993e-05,
      "loss": 2.5039,
      "step": 15510
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0265295505523682,
      "learning_rate": 1.815005262167286e-05,
      "loss": 2.6444,
      "step": 15511
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0434367656707764,
      "learning_rate": 1.8149814032435177e-05,
      "loss": 2.3848,
      "step": 15512
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0868444442749023,
      "learning_rate": 1.8149575429381345e-05,
      "loss": 2.5789,
      "step": 15513
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1190476417541504,
      "learning_rate": 1.8149336812511768e-05,
      "loss": 2.5802,
      "step": 15514
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0087352991104126,
      "learning_rate": 1.8149098181826854e-05,
      "loss": 2.4508,
      "step": 15515
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0320632457733154,
      "learning_rate": 1.8148859537327005e-05,
      "loss": 2.5029,
      "step": 15516
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0672674179077148,
      "learning_rate": 1.814862087901262e-05,
      "loss": 2.3329,
      "step": 15517
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0215522050857544,
      "learning_rate": 1.8148382206884116e-05,
      "loss": 2.6285,
      "step": 15518
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9647033214569092,
      "learning_rate": 1.814814352094189e-05,
      "loss": 2.2414,
      "step": 15519
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0862642526626587,
      "learning_rate": 1.8147904821186347e-05,
      "loss": 2.6028,
      "step": 15520
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0460537672042847,
      "learning_rate": 1.8147666107617893e-05,
      "loss": 2.5923,
      "step": 15521
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0029551982879639,
      "learning_rate": 1.814742738023693e-05,
      "loss": 2.579,
      "step": 15522
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.007175087928772,
      "learning_rate": 1.814718863904387e-05,
      "loss": 2.6329,
      "step": 15523
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0050132274627686,
      "learning_rate": 1.8146949884039107e-05,
      "loss": 2.3164,
      "step": 15524
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.2161906957626343,
      "learning_rate": 1.8146711115223054e-05,
      "loss": 2.5583,
      "step": 15525
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1959165334701538,
      "learning_rate": 1.8146472332596113e-05,
      "loss": 2.6006,
      "step": 15526
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0684642791748047,
      "learning_rate": 1.8146233536158685e-05,
      "loss": 2.5029,
      "step": 15527
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.979390561580658,
      "learning_rate": 1.8145994725911182e-05,
      "loss": 2.607,
      "step": 15528
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.3131202459335327,
      "learning_rate": 1.814575590185401e-05,
      "loss": 2.6212,
      "step": 15529
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.158213496208191,
      "learning_rate": 1.814551706398756e-05,
      "loss": 2.4479,
      "step": 15530
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9938281774520874,
      "learning_rate": 1.814527821231225e-05,
      "loss": 2.5202,
      "step": 15531
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0914174318313599,
      "learning_rate": 1.8145039346828482e-05,
      "loss": 2.6796,
      "step": 15532
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9989704489707947,
      "learning_rate": 1.814480046753666e-05,
      "loss": 2.3698,
      "step": 15533
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1702684164047241,
      "learning_rate": 1.814456157443719e-05,
      "loss": 2.4397,
      "step": 15534
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1172741651535034,
      "learning_rate": 1.814432266753048e-05,
      "loss": 2.4272,
      "step": 15535
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.2288155555725098,
      "learning_rate": 1.8144083746816923e-05,
      "loss": 2.5253,
      "step": 15536
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0883855819702148,
      "learning_rate": 1.8143844812296937e-05,
      "loss": 2.4841,
      "step": 15537
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0035936832427979,
      "learning_rate": 1.814360586397092e-05,
      "loss": 2.5947,
      "step": 15538
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0713436603546143,
      "learning_rate": 1.8143366901839282e-05,
      "loss": 2.5023,
      "step": 15539
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0768086910247803,
      "learning_rate": 1.814312792590242e-05,
      "loss": 2.5289,
      "step": 15540
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5005351305007935,
      "learning_rate": 1.8142888936160748e-05,
      "loss": 2.6727,
      "step": 15541
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9774800539016724,
      "learning_rate": 1.814264993261467e-05,
      "loss": 2.5636,
      "step": 15542
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0090223550796509,
      "learning_rate": 1.8142410915264587e-05,
      "loss": 2.4334,
      "step": 15543
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9576540589332581,
      "learning_rate": 1.8142171884110908e-05,
      "loss": 2.4287,
      "step": 15544
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0346986055374146,
      "learning_rate": 1.8141932839154034e-05,
      "loss": 2.5927,
      "step": 15545
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0058757066726685,
      "learning_rate": 1.814169378039437e-05,
      "loss": 2.3507,
      "step": 15546
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0415239334106445,
      "learning_rate": 1.8141454707832325e-05,
      "loss": 2.2366,
      "step": 15547
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9486332535743713,
      "learning_rate": 1.8141215621468307e-05,
      "loss": 2.3021,
      "step": 15548
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0307776927947998,
      "learning_rate": 1.8140976521302713e-05,
      "loss": 2.4574,
      "step": 15549
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9654401540756226,
      "learning_rate": 1.8140737407335954e-05,
      "loss": 2.5886,
      "step": 15550
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9313886165618896,
      "learning_rate": 1.8140498279568433e-05,
      "loss": 2.2743,
      "step": 15551
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.994285523891449,
      "learning_rate": 1.8140259138000564e-05,
      "loss": 2.5486,
      "step": 15552
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0107170343399048,
      "learning_rate": 1.814001998263274e-05,
      "loss": 2.687,
      "step": 15553
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0164999961853027,
      "learning_rate": 1.8139780813465368e-05,
      "loss": 2.467,
      "step": 15554
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0741316080093384,
      "learning_rate": 1.8139541630498858e-05,
      "loss": 2.6915,
      "step": 15555
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.033052921295166,
      "learning_rate": 1.8139302433733615e-05,
      "loss": 2.3458,
      "step": 15556
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9930043816566467,
      "learning_rate": 1.8139063223170047e-05,
      "loss": 2.4714,
      "step": 15557
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9837747812271118,
      "learning_rate": 1.8138823998808552e-05,
      "loss": 2.5992,
      "step": 15558
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0304286479949951,
      "learning_rate": 1.813858476064954e-05,
      "loss": 2.653,
      "step": 15559
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0442966222763062,
      "learning_rate": 1.8138345508693418e-05,
      "loss": 2.3772,
      "step": 15560
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9884058833122253,
      "learning_rate": 1.813810624294059e-05,
      "loss": 2.3755,
      "step": 15561
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0149263143539429,
      "learning_rate": 1.813786696339146e-05,
      "loss": 2.4451,
      "step": 15562
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.117828130722046,
      "learning_rate": 1.8137627670046437e-05,
      "loss": 2.4911,
      "step": 15563
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.2456507682800293,
      "learning_rate": 1.8137388362905924e-05,
      "loss": 2.5754,
      "step": 15564
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0026710033416748,
      "learning_rate": 1.813714904197033e-05,
      "loss": 2.5472,
      "step": 15565
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.147000789642334,
      "learning_rate": 1.8136909707240056e-05,
      "loss": 2.6198,
      "step": 15566
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0493485927581787,
      "learning_rate": 1.813667035871551e-05,
      "loss": 2.5635,
      "step": 15567
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.2417370080947876,
      "learning_rate": 1.8136430996397097e-05,
      "loss": 2.2474,
      "step": 15568
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0026193857192993,
      "learning_rate": 1.813619162028523e-05,
      "loss": 2.6323,
      "step": 15569
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0793476104736328,
      "learning_rate": 1.81359522303803e-05,
      "loss": 2.511,
      "step": 15570
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9955102205276489,
      "learning_rate": 1.8135712826682726e-05,
      "loss": 2.3461,
      "step": 15571
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0671740770339966,
      "learning_rate": 1.8135473409192908e-05,
      "loss": 2.4813,
      "step": 15572
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1394411325454712,
      "learning_rate": 1.8135233977911253e-05,
      "loss": 2.3442,
      "step": 15573
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0762102603912354,
      "learning_rate": 1.8134994532838166e-05,
      "loss": 2.4499,
      "step": 15574
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.120581030845642,
      "learning_rate": 1.8134755073974055e-05,
      "loss": 2.478,
      "step": 15575
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.140298843383789,
      "learning_rate": 1.8134515601319326e-05,
      "loss": 2.4532,
      "step": 15576
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1754603385925293,
      "learning_rate": 1.813427611487438e-05,
      "loss": 2.3974,
      "step": 15577
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0634530782699585,
      "learning_rate": 1.813403661463963e-05,
      "loss": 2.5485,
      "step": 15578
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.122944951057434,
      "learning_rate": 1.8133797100615477e-05,
      "loss": 2.6388,
      "step": 15579
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1374366283416748,
      "learning_rate": 1.8133557572802332e-05,
      "loss": 2.4598,
      "step": 15580
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0188220739364624,
      "learning_rate": 1.813331803120059e-05,
      "loss": 2.2627,
      "step": 15581
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0434914827346802,
      "learning_rate": 1.8133078475810676e-05,
      "loss": 2.38,
      "step": 15582
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1216084957122803,
      "learning_rate": 1.8132838906632978e-05,
      "loss": 2.4293,
      "step": 15583
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1071720123291016,
      "learning_rate": 1.8132599323667912e-05,
      "loss": 2.4473,
      "step": 15584
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0316747426986694,
      "learning_rate": 1.813235972691588e-05,
      "loss": 2.6424,
      "step": 15585
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1768735647201538,
      "learning_rate": 1.813212011637729e-05,
      "loss": 2.696,
      "step": 15586
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9500038027763367,
      "learning_rate": 1.8131880492052546e-05,
      "loss": 2.3651,
      "step": 15587
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0218878984451294,
      "learning_rate": 1.813164085394206e-05,
      "loss": 2.5201,
      "step": 15588
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1021844148635864,
      "learning_rate": 1.813140120204623e-05,
      "loss": 2.4419,
      "step": 15589
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9661592841148376,
      "learning_rate": 1.8131161536365467e-05,
      "loss": 2.4193,
      "step": 15590
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0216848850250244,
      "learning_rate": 1.813092185690018e-05,
      "loss": 2.7004,
      "step": 15591
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0551905632019043,
      "learning_rate": 1.8130682163650767e-05,
      "loss": 2.3716,
      "step": 15592
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9702919125556946,
      "learning_rate": 1.8130442456617644e-05,
      "loss": 2.6081,
      "step": 15593
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9827528595924377,
      "learning_rate": 1.813020273580121e-05,
      "loss": 2.5316,
      "step": 15594
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9421220421791077,
      "learning_rate": 1.8129963001201875e-05,
      "loss": 2.4522,
      "step": 15595
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1220405101776123,
      "learning_rate": 1.8129723252820046e-05,
      "loss": 2.6489,
      "step": 15596
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0902655124664307,
      "learning_rate": 1.8129483490656127e-05,
      "loss": 2.3307,
      "step": 15597
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0866920948028564,
      "learning_rate": 1.8129243714710528e-05,
      "loss": 2.5225,
      "step": 15598
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0354883670806885,
      "learning_rate": 1.812900392498365e-05,
      "loss": 2.513,
      "step": 15599
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1397030353546143,
      "learning_rate": 1.8128764121475905e-05,
      "loss": 2.3099,
      "step": 15600
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1660916805267334,
      "learning_rate": 1.8128524304187695e-05,
      "loss": 2.5855,
      "step": 15601
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.023472785949707,
      "learning_rate": 1.8128284473119428e-05,
      "loss": 2.5515,
      "step": 15602
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0417943000793457,
      "learning_rate": 1.8128044628271513e-05,
      "loss": 2.4864,
      "step": 15603
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.2610582113265991,
      "learning_rate": 1.812780476964435e-05,
      "loss": 2.5053,
      "step": 15604
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0524163246154785,
      "learning_rate": 1.8127564897238356e-05,
      "loss": 2.3929,
      "step": 15605
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.127739429473877,
      "learning_rate": 1.8127325011053933e-05,
      "loss": 2.6521,
      "step": 15606
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9911603331565857,
      "learning_rate": 1.8127085111091485e-05,
      "loss": 2.4722,
      "step": 15607
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9648467898368835,
      "learning_rate": 1.812684519735142e-05,
      "loss": 2.2726,
      "step": 15608
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0614043474197388,
      "learning_rate": 1.8126605269834143e-05,
      "loss": 2.4335,
      "step": 15609
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0530738830566406,
      "learning_rate": 1.812636532854007e-05,
      "loss": 2.414,
      "step": 15610
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0580298900604248,
      "learning_rate": 1.812612537346959e-05,
      "loss": 2.2052,
      "step": 15611
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9468727707862854,
      "learning_rate": 1.8125885404623127e-05,
      "loss": 2.5628,
      "step": 15612
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.093856692314148,
      "learning_rate": 1.8125645422001082e-05,
      "loss": 2.321,
      "step": 15613
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9568798542022705,
      "learning_rate": 1.812540542560386e-05,
      "loss": 2.6211,
      "step": 15614
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0755326747894287,
      "learning_rate": 1.8125165415431868e-05,
      "loss": 2.678,
      "step": 15615
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.076744556427002,
      "learning_rate": 1.8124925391485514e-05,
      "loss": 2.6072,
      "step": 15616
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.01978600025177,
      "learning_rate": 1.8124685353765206e-05,
      "loss": 2.536,
      "step": 15617
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0711344480514526,
      "learning_rate": 1.8124445302271346e-05,
      "loss": 2.5684,
      "step": 15618
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0610687732696533,
      "learning_rate": 1.8124205237004347e-05,
      "loss": 2.5453,
      "step": 15619
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0495887994766235,
      "learning_rate": 1.8123965157964617e-05,
      "loss": 2.5203,
      "step": 15620
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.973307728767395,
      "learning_rate": 1.8123725065152557e-05,
      "loss": 2.5053,
      "step": 15621
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.018373966217041,
      "learning_rate": 1.8123484958568575e-05,
      "loss": 2.315,
      "step": 15622
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1005737781524658,
      "learning_rate": 1.8123244838213078e-05,
      "loss": 2.4068,
      "step": 15623
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0742461681365967,
      "learning_rate": 1.812300470408648e-05,
      "loss": 2.2656,
      "step": 15624
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0449817180633545,
      "learning_rate": 1.8122764556189182e-05,
      "loss": 2.5413,
      "step": 15625
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9235543012619019,
      "learning_rate": 1.8122524394521588e-05,
      "loss": 2.2469,
      "step": 15626
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.086987853050232,
      "learning_rate": 1.8122284219084113e-05,
      "loss": 2.5601,
      "step": 15627
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0440616607666016,
      "learning_rate": 1.8122044029877157e-05,
      "loss": 2.6412,
      "step": 15628
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1096854209899902,
      "learning_rate": 1.8121803826901134e-05,
      "loss": 2.4175,
      "step": 15629
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9180030226707458,
      "learning_rate": 1.8121563610156445e-05,
      "loss": 2.4789,
      "step": 15630
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9192672371864319,
      "learning_rate": 1.81213233796435e-05,
      "loss": 2.3518,
      "step": 15631
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9841811060905457,
      "learning_rate": 1.8121083135362707e-05,
      "loss": 2.3964,
      "step": 15632
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.984719455242157,
      "learning_rate": 1.8120842877314473e-05,
      "loss": 2.5087,
      "step": 15633
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.110965371131897,
      "learning_rate": 1.8120602605499208e-05,
      "loss": 2.5883,
      "step": 15634
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9814528226852417,
      "learning_rate": 1.812036231991731e-05,
      "loss": 2.5036,
      "step": 15635
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9962673187255859,
      "learning_rate": 1.8120122020569195e-05,
      "loss": 2.3426,
      "step": 15636
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0653512477874756,
      "learning_rate": 1.8119881707455267e-05,
      "loss": 2.3286,
      "step": 15637
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9541467428207397,
      "learning_rate": 1.8119641380575936e-05,
      "loss": 2.4182,
      "step": 15638
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9691005945205688,
      "learning_rate": 1.8119401039931604e-05,
      "loss": 2.6001,
      "step": 15639
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0412441492080688,
      "learning_rate": 1.8119160685522685e-05,
      "loss": 2.8731,
      "step": 15640
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9978856444358826,
      "learning_rate": 1.8118920317349583e-05,
      "loss": 2.465,
      "step": 15641
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9939695596694946,
      "learning_rate": 1.8118679935412706e-05,
      "loss": 2.5318,
      "step": 15642
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.071195363998413,
      "learning_rate": 1.8118439539712463e-05,
      "loss": 2.5042,
      "step": 15643
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1298103332519531,
      "learning_rate": 1.8118199130249255e-05,
      "loss": 2.4588,
      "step": 15644
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.8738776445388794,
      "learning_rate": 1.81179587070235e-05,
      "loss": 2.3644,
      "step": 15645
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9358712434768677,
      "learning_rate": 1.8117718270035598e-05,
      "loss": 2.5194,
      "step": 15646
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1056346893310547,
      "learning_rate": 1.8117477819285958e-05,
      "loss": 2.5338,
      "step": 15647
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0062780380249023,
      "learning_rate": 1.8117237354774987e-05,
      "loss": 2.4633,
      "step": 15648
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0094220638275146,
      "learning_rate": 1.8116996876503096e-05,
      "loss": 2.4116,
      "step": 15649
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0946125984191895,
      "learning_rate": 1.8116756384470693e-05,
      "loss": 2.549,
      "step": 15650
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0694684982299805,
      "learning_rate": 1.8116515878678182e-05,
      "loss": 2.4035,
      "step": 15651
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9918914437294006,
      "learning_rate": 1.811627535912597e-05,
      "loss": 2.573,
      "step": 15652
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1447559595108032,
      "learning_rate": 1.811603482581447e-05,
      "loss": 2.4766,
      "step": 15653
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1297657489776611,
      "learning_rate": 1.8115794278744086e-05,
      "loss": 2.7112,
      "step": 15654
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0079079866409302,
      "learning_rate": 1.8115553717915224e-05,
      "loss": 2.6463,
      "step": 15655
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0345507860183716,
      "learning_rate": 1.8115313143328296e-05,
      "loss": 2.344,
      "step": 15656
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1921602487564087,
      "learning_rate": 1.811507255498371e-05,
      "loss": 2.6403,
      "step": 15657
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9924434423446655,
      "learning_rate": 1.811483195288187e-05,
      "loss": 2.4011,
      "step": 15658
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.321997046470642,
      "learning_rate": 1.8114591337023188e-05,
      "loss": 2.524,
      "step": 15659
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0970240831375122,
      "learning_rate": 1.8114350707408067e-05,
      "loss": 2.4388,
      "step": 15660
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0348142385482788,
      "learning_rate": 1.8114110064036918e-05,
      "loss": 2.4054,
      "step": 15661
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.3699207305908203,
      "learning_rate": 1.811386940691015e-05,
      "loss": 2.4507,
      "step": 15662
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0876946449279785,
      "learning_rate": 1.8113628736028167e-05,
      "loss": 2.4469,
      "step": 15663
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0573315620422363,
      "learning_rate": 1.8113388051391382e-05,
      "loss": 2.6866,
      "step": 15664
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0168403387069702,
      "learning_rate": 1.81131473530002e-05,
      "loss": 2.6797,
      "step": 15665
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9762415885925293,
      "learning_rate": 1.811290664085503e-05,
      "loss": 2.5383,
      "step": 15666
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0669399499893188,
      "learning_rate": 1.811266591495628e-05,
      "loss": 2.533,
      "step": 15667
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0981422662734985,
      "learning_rate": 1.811242517530436e-05,
      "loss": 2.7319,
      "step": 15668
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0660780668258667,
      "learning_rate": 1.8112184421899673e-05,
      "loss": 2.4382,
      "step": 15669
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.163413405418396,
      "learning_rate": 1.811194365474263e-05,
      "loss": 2.6448,
      "step": 15670
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.311729073524475,
      "learning_rate": 1.8111702873833643e-05,
      "loss": 2.492,
      "step": 15671
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9938216209411621,
      "learning_rate": 1.8111462079173112e-05,
      "loss": 2.3252,
      "step": 15672
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.125462293624878,
      "learning_rate": 1.8111221270761452e-05,
      "loss": 2.367,
      "step": 15673
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1129372119903564,
      "learning_rate": 1.811098044859907e-05,
      "loss": 2.5001,
      "step": 15674
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0821402072906494,
      "learning_rate": 1.811073961268637e-05,
      "loss": 2.2713,
      "step": 15675
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0649176836013794,
      "learning_rate": 1.8110498763023766e-05,
      "loss": 2.4374,
      "step": 15676
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0776643753051758,
      "learning_rate": 1.8110257899611664e-05,
      "loss": 2.6265,
      "step": 15677
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.2283812761306763,
      "learning_rate": 1.8110017022450467e-05,
      "loss": 2.6516,
      "step": 15678
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0259326696395874,
      "learning_rate": 1.8109776131540596e-05,
      "loss": 2.3206,
      "step": 15679
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9732682704925537,
      "learning_rate": 1.810953522688245e-05,
      "loss": 2.6469,
      "step": 15680
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.041521430015564,
      "learning_rate": 1.8109294308476436e-05,
      "loss": 2.5414,
      "step": 15681
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9189499020576477,
      "learning_rate": 1.8109053376322966e-05,
      "loss": 2.4973,
      "step": 15682
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9191820025444031,
      "learning_rate": 1.810881243042245e-05,
      "loss": 2.4662,
      "step": 15683
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0710848569869995,
      "learning_rate": 1.8108571470775293e-05,
      "loss": 2.4824,
      "step": 15684
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0245901346206665,
      "learning_rate": 1.8108330497381907e-05,
      "loss": 2.4391,
      "step": 15685
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.265557050704956,
      "learning_rate": 1.81080895102427e-05,
      "loss": 2.2918,
      "step": 15686
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.6945111751556396,
      "learning_rate": 1.8107848509358077e-05,
      "loss": 2.4658,
      "step": 15687
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0674277544021606,
      "learning_rate": 1.810760749472845e-05,
      "loss": 2.3002,
      "step": 15688
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0126925706863403,
      "learning_rate": 1.8107366466354222e-05,
      "loss": 2.7762,
      "step": 15689
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1135300397872925,
      "learning_rate": 1.810712542423581e-05,
      "loss": 2.361,
      "step": 15690
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0473201274871826,
      "learning_rate": 1.810688436837362e-05,
      "loss": 2.4394,
      "step": 15691
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0948690176010132,
      "learning_rate": 1.8106643298768058e-05,
      "loss": 2.5224,
      "step": 15692
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0772488117218018,
      "learning_rate": 1.810640221541953e-05,
      "loss": 2.6468,
      "step": 15693
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0804439783096313,
      "learning_rate": 1.810616111832845e-05,
      "loss": 2.7248,
      "step": 15694
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0334508419036865,
      "learning_rate": 1.8105920007495225e-05,
      "loss": 2.2733,
      "step": 15695
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0165752172470093,
      "learning_rate": 1.8105678882920268e-05,
      "loss": 2.326,
      "step": 15696
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9886704683303833,
      "learning_rate": 1.8105437744603983e-05,
      "loss": 2.6833,
      "step": 15697
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1080421209335327,
      "learning_rate": 1.8105196592546774e-05,
      "loss": 2.5831,
      "step": 15698
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9988494515419006,
      "learning_rate": 1.810495542674906e-05,
      "loss": 2.5563,
      "step": 15699
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9316617250442505,
      "learning_rate": 1.8104714247211246e-05,
      "loss": 2.4554,
      "step": 15700
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9972434639930725,
      "learning_rate": 1.810447305393374e-05,
      "loss": 2.5547,
      "step": 15701
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0472954511642456,
      "learning_rate": 1.810423184691695e-05,
      "loss": 2.4457,
      "step": 15702
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.2049058675765991,
      "learning_rate": 1.8103990626161284e-05,
      "loss": 2.5184,
      "step": 15703
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.2148818969726562,
      "learning_rate": 1.8103749391667155e-05,
      "loss": 2.3961,
      "step": 15704
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.986194372177124,
      "learning_rate": 1.810350814343497e-05,
      "loss": 2.4296,
      "step": 15705
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1286029815673828,
      "learning_rate": 1.810326688146514e-05,
      "loss": 2.2564,
      "step": 15706
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1550829410552979,
      "learning_rate": 1.8103025605758066e-05,
      "loss": 2.5056,
      "step": 15707
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0595667362213135,
      "learning_rate": 1.810278431631417e-05,
      "loss": 2.6956,
      "step": 15708
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0560522079467773,
      "learning_rate": 1.8102543013133848e-05,
      "loss": 2.487,
      "step": 15709
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0407334566116333,
      "learning_rate": 1.8102301696217515e-05,
      "loss": 2.4686,
      "step": 15710
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9590803980827332,
      "learning_rate": 1.8102060365565584e-05,
      "loss": 2.5016,
      "step": 15711
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0313007831573486,
      "learning_rate": 1.810181902117846e-05,
      "loss": 2.4522,
      "step": 15712
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0144399404525757,
      "learning_rate": 1.8101577663056548e-05,
      "loss": 2.5075,
      "step": 15713
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.2173106670379639,
      "learning_rate": 1.8101336291200265e-05,
      "loss": 2.307,
      "step": 15714
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9649970531463623,
      "learning_rate": 1.8101094905610014e-05,
      "loss": 2.2921,
      "step": 15715
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9190672039985657,
      "learning_rate": 1.810085350628621e-05,
      "loss": 2.2319,
      "step": 15716
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.053470253944397,
      "learning_rate": 1.8100612093229258e-05,
      "loss": 2.3886,
      "step": 15717
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9572173357009888,
      "learning_rate": 1.8100370666439565e-05,
      "loss": 2.4545,
      "step": 15718
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0339035987854004,
      "learning_rate": 1.810012922591755e-05,
      "loss": 2.5735,
      "step": 15719
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1797168254852295,
      "learning_rate": 1.809988777166361e-05,
      "loss": 2.6149,
      "step": 15720
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1451923847198486,
      "learning_rate": 1.8099646303678165e-05,
      "loss": 2.5367,
      "step": 15721
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.165907382965088,
      "learning_rate": 1.8099404821961618e-05,
      "loss": 2.584,
      "step": 15722
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0085774660110474,
      "learning_rate": 1.809916332651438e-05,
      "loss": 2.3993,
      "step": 15723
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0664303302764893,
      "learning_rate": 1.809892181733686e-05,
      "loss": 2.4506,
      "step": 15724
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9428021311759949,
      "learning_rate": 1.8098680294429468e-05,
      "loss": 2.6412,
      "step": 15725
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9625028967857361,
      "learning_rate": 1.8098438757792612e-05,
      "loss": 2.38,
      "step": 15726
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.053187608718872,
      "learning_rate": 1.8098197207426708e-05,
      "loss": 2.7838,
      "step": 15727
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9481489062309265,
      "learning_rate": 1.8097955643332154e-05,
      "loss": 2.5692,
      "step": 15728
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1024082899093628,
      "learning_rate": 1.8097714065509372e-05,
      "loss": 2.5209,
      "step": 15729
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0266731977462769,
      "learning_rate": 1.809747247395876e-05,
      "loss": 2.6696,
      "step": 15730
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9229030609130859,
      "learning_rate": 1.8097230868680733e-05,
      "loss": 2.2876,
      "step": 15731
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0734689235687256,
      "learning_rate": 1.8096989249675704e-05,
      "loss": 2.7028,
      "step": 15732
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9099612236022949,
      "learning_rate": 1.8096747616944078e-05,
      "loss": 2.2326,
      "step": 15733
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1428289413452148,
      "learning_rate": 1.8096505970486264e-05,
      "loss": 2.6084,
      "step": 15734
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9798320531845093,
      "learning_rate": 1.8096264310302673e-05,
      "loss": 2.4951,
      "step": 15735
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.2595930099487305,
      "learning_rate": 1.8096022636393717e-05,
      "loss": 2.4187,
      "step": 15736
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.902086615562439,
      "learning_rate": 1.80957809487598e-05,
      "loss": 2.2335,
      "step": 15737
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0398555994033813,
      "learning_rate": 1.8095539247401342e-05,
      "loss": 2.5535,
      "step": 15738
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1377112865447998,
      "learning_rate": 1.8095297532318745e-05,
      "loss": 2.5603,
      "step": 15739
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.102138638496399,
      "learning_rate": 1.8095055803512417e-05,
      "loss": 2.6732,
      "step": 15740
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9952211976051331,
      "learning_rate": 1.8094814060982774e-05,
      "loss": 2.4421,
      "step": 15741
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0378203392028809,
      "learning_rate": 1.8094572304730218e-05,
      "loss": 2.4573,
      "step": 15742
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9480184316635132,
      "learning_rate": 1.8094330534755165e-05,
      "loss": 2.4717,
      "step": 15743
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.001844882965088,
      "learning_rate": 1.8094088751058026e-05,
      "loss": 2.3661,
      "step": 15744
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9934988617897034,
      "learning_rate": 1.8093846953639208e-05,
      "loss": 2.5175,
      "step": 15745
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.2805551290512085,
      "learning_rate": 1.809360514249912e-05,
      "loss": 2.3399,
      "step": 15746
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9671261310577393,
      "learning_rate": 1.809336331763817e-05,
      "loss": 2.5263,
      "step": 15747
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0260257720947266,
      "learning_rate": 1.8093121479056775e-05,
      "loss": 2.4445,
      "step": 15748
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9234744906425476,
      "learning_rate": 1.809287962675534e-05,
      "loss": 2.4018,
      "step": 15749
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9235315322875977,
      "learning_rate": 1.8092637760734278e-05,
      "loss": 2.6782,
      "step": 15750
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0741490125656128,
      "learning_rate": 1.8092395880993994e-05,
      "loss": 2.3696,
      "step": 15751
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9862124919891357,
      "learning_rate": 1.8092153987534904e-05,
      "loss": 2.4976,
      "step": 15752
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1516118049621582,
      "learning_rate": 1.809191208035741e-05,
      "loss": 2.2707,
      "step": 15753
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.09818434715271,
      "learning_rate": 1.8091670159461934e-05,
      "loss": 2.4925,
      "step": 15754
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9823369979858398,
      "learning_rate": 1.8091428224848874e-05,
      "loss": 2.4274,
      "step": 15755
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0870373249053955,
      "learning_rate": 1.8091186276518648e-05,
      "loss": 2.5189,
      "step": 15756
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1330411434173584,
      "learning_rate": 1.8090944314471665e-05,
      "loss": 2.4512,
      "step": 15757
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0062421560287476,
      "learning_rate": 1.809070233870833e-05,
      "loss": 2.3832,
      "step": 15758
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0702193975448608,
      "learning_rate": 1.809046034922906e-05,
      "loss": 2.3798,
      "step": 15759
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9313051104545593,
      "learning_rate": 1.8090218346034264e-05,
      "loss": 2.3383,
      "step": 15760
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0381273031234741,
      "learning_rate": 1.8089976329124347e-05,
      "loss": 2.5445,
      "step": 15761
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0037587881088257,
      "learning_rate": 1.8089734298499727e-05,
      "loss": 2.5722,
      "step": 15762
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.2140573263168335,
      "learning_rate": 1.8089492254160806e-05,
      "loss": 2.5628,
      "step": 15763
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1472902297973633,
      "learning_rate": 1.8089250196108e-05,
      "loss": 2.2246,
      "step": 15764
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9885084629058838,
      "learning_rate": 1.8089008124341717e-05,
      "loss": 2.2989,
      "step": 15765
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.050013780593872,
      "learning_rate": 1.808876603886237e-05,
      "loss": 2.495,
      "step": 15766
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.012412190437317,
      "learning_rate": 1.8088523939670367e-05,
      "loss": 2.2823,
      "step": 15767
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0371216535568237,
      "learning_rate": 1.8088281826766118e-05,
      "loss": 2.3233,
      "step": 15768
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0037102699279785,
      "learning_rate": 1.8088039700150038e-05,
      "loss": 2.5039,
      "step": 15769
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.966374933719635,
      "learning_rate": 1.8087797559822528e-05,
      "loss": 2.4923,
      "step": 15770
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.3908675909042358,
      "learning_rate": 1.808755540578401e-05,
      "loss": 2.3658,
      "step": 15771
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0314693450927734,
      "learning_rate": 1.808731323803489e-05,
      "loss": 2.5786,
      "step": 15772
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9617568850517273,
      "learning_rate": 1.8087071056575572e-05,
      "loss": 2.492,
      "step": 15773
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.115143895149231,
      "learning_rate": 1.8086828861406477e-05,
      "loss": 2.35,
      "step": 15774
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.086576223373413,
      "learning_rate": 1.8086586652528008e-05,
      "loss": 2.6246,
      "step": 15775
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0695111751556396,
      "learning_rate": 1.808634442994058e-05,
      "loss": 2.4576,
      "step": 15776
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0104268789291382,
      "learning_rate": 1.8086102193644598e-05,
      "loss": 2.5545,
      "step": 15777
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0448116064071655,
      "learning_rate": 1.808585994364048e-05,
      "loss": 2.3976,
      "step": 15778
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0901457071304321,
      "learning_rate": 1.8085617679928632e-05,
      "loss": 2.462,
      "step": 15779
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9758607149124146,
      "learning_rate": 1.808537540250947e-05,
      "loss": 2.5532,
      "step": 15780
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9579604864120483,
      "learning_rate": 1.8085133111383397e-05,
      "loss": 2.5615,
      "step": 15781
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.015259861946106,
      "learning_rate": 1.8084890806550827e-05,
      "loss": 2.7235,
      "step": 15782
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9853534698486328,
      "learning_rate": 1.8084648488012172e-05,
      "loss": 2.3115,
      "step": 15783
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0173351764678955,
      "learning_rate": 1.8084406155767843e-05,
      "loss": 2.6705,
      "step": 15784
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0786799192428589,
      "learning_rate": 1.808416380981825e-05,
      "loss": 2.5446,
      "step": 15785
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9429572224617004,
      "learning_rate": 1.8083921450163804e-05,
      "loss": 2.5311,
      "step": 15786
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9390232563018799,
      "learning_rate": 1.8083679076804913e-05,
      "loss": 2.5365,
      "step": 15787
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0285178422927856,
      "learning_rate": 1.8083436689741994e-05,
      "loss": 2.5051,
      "step": 15788
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0916798114776611,
      "learning_rate": 1.808319428897545e-05,
      "loss": 2.6706,
      "step": 15789
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0056438446044922,
      "learning_rate": 1.80829518745057e-05,
      "loss": 2.4957,
      "step": 15790
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.025638222694397,
      "learning_rate": 1.808270944633315e-05,
      "loss": 2.4907,
      "step": 15791
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0412081480026245,
      "learning_rate": 1.8082467004458214e-05,
      "loss": 2.3539,
      "step": 15792
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.131462812423706,
      "learning_rate": 1.8082224548881297e-05,
      "loss": 2.5654,
      "step": 15793
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9600658416748047,
      "learning_rate": 1.808198207960282e-05,
      "loss": 2.3444,
      "step": 15794
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1481486558914185,
      "learning_rate": 1.8081739596623184e-05,
      "loss": 2.5109,
      "step": 15795
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0213192701339722,
      "learning_rate": 1.8081497099942808e-05,
      "loss": 2.6759,
      "step": 15796
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1294989585876465,
      "learning_rate": 1.8081254589562097e-05,
      "loss": 2.5331,
      "step": 15797
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1187984943389893,
      "learning_rate": 1.8081012065481462e-05,
      "loss": 2.5107,
      "step": 15798
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.152881145477295,
      "learning_rate": 1.808076952770132e-05,
      "loss": 2.6146,
      "step": 15799
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1821190118789673,
      "learning_rate": 1.8080526976222075e-05,
      "loss": 2.534,
      "step": 15800
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0177677869796753,
      "learning_rate": 1.808028441104415e-05,
      "loss": 2.6462,
      "step": 15801
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0031005144119263,
      "learning_rate": 1.808004183216794e-05,
      "loss": 2.5599,
      "step": 15802
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9788972735404968,
      "learning_rate": 1.8079799239593874e-05,
      "loss": 2.4745,
      "step": 15803
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.3346697092056274,
      "learning_rate": 1.8079556633322345e-05,
      "loss": 2.5646,
      "step": 15804
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.006856083869934,
      "learning_rate": 1.807931401335378e-05,
      "loss": 2.2489,
      "step": 15805
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0575039386749268,
      "learning_rate": 1.807907137968858e-05,
      "loss": 2.544,
      "step": 15806
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.011279821395874,
      "learning_rate": 1.807882873232716e-05,
      "loss": 2.4859,
      "step": 15807
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1473232507705688,
      "learning_rate": 1.8078586071269927e-05,
      "loss": 2.5938,
      "step": 15808
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0089702606201172,
      "learning_rate": 1.8078343396517302e-05,
      "loss": 2.5929,
      "step": 15809
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.069891095161438,
      "learning_rate": 1.8078100708069686e-05,
      "loss": 2.4763,
      "step": 15810
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9469267725944519,
      "learning_rate": 1.8077858005927503e-05,
      "loss": 2.2582,
      "step": 15811
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0239698886871338,
      "learning_rate": 1.807761529009115e-05,
      "loss": 2.3309,
      "step": 15812
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9541184902191162,
      "learning_rate": 1.807737256056105e-05,
      "loss": 2.3729,
      "step": 15813
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0738242864608765,
      "learning_rate": 1.8077129817337608e-05,
      "loss": 2.4736,
      "step": 15814
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9579078555107117,
      "learning_rate": 1.8076887060421233e-05,
      "loss": 2.4849,
      "step": 15815
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.100557565689087,
      "learning_rate": 1.8076644289812346e-05,
      "loss": 2.6094,
      "step": 15816
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9546511173248291,
      "learning_rate": 1.807640150551135e-05,
      "loss": 2.3987,
      "step": 15817
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0891538858413696,
      "learning_rate": 1.8076158707518662e-05,
      "loss": 2.494,
      "step": 15818
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9263272881507874,
      "learning_rate": 1.807591589583469e-05,
      "loss": 2.43,
      "step": 15819
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0810290575027466,
      "learning_rate": 1.807567307045985e-05,
      "loss": 2.3732,
      "step": 15820
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0743074417114258,
      "learning_rate": 1.807543023139455e-05,
      "loss": 2.4618,
      "step": 15821
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0438302755355835,
      "learning_rate": 1.80751873786392e-05,
      "loss": 2.5609,
      "step": 15822
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0549421310424805,
      "learning_rate": 1.8074944512194215e-05,
      "loss": 2.4559,
      "step": 15823
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1401516199111938,
      "learning_rate": 1.8074701632060008e-05,
      "loss": 2.5207,
      "step": 15824
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9588012099266052,
      "learning_rate": 1.807445873823699e-05,
      "loss": 2.5862,
      "step": 15825
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1412889957427979,
      "learning_rate": 1.807421583072557e-05,
      "loss": 2.4785,
      "step": 15826
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9664754271507263,
      "learning_rate": 1.8073972909526157e-05,
      "loss": 2.5747,
      "step": 15827
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1420329809188843,
      "learning_rate": 1.8073729974639168e-05,
      "loss": 2.5266,
      "step": 15828
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.981135904788971,
      "learning_rate": 1.8073487026065017e-05,
      "loss": 2.6142,
      "step": 15829
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0866543054580688,
      "learning_rate": 1.807324406380411e-05,
      "loss": 2.3895,
      "step": 15830
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9230415225028992,
      "learning_rate": 1.8073001087856867e-05,
      "loss": 2.5514,
      "step": 15831
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.215823769569397,
      "learning_rate": 1.807275809822369e-05,
      "loss": 2.5013,
      "step": 15832
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.038639783859253,
      "learning_rate": 1.8072515094904998e-05,
      "loss": 2.4526,
      "step": 15833
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0538591146469116,
      "learning_rate": 1.80722720779012e-05,
      "loss": 2.4502,
      "step": 15834
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0230451822280884,
      "learning_rate": 1.8072029047212707e-05,
      "loss": 2.3802,
      "step": 15835
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0250191688537598,
      "learning_rate": 1.8071786002839932e-05,
      "loss": 2.3934,
      "step": 15836
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1026027202606201,
      "learning_rate": 1.8071542944783287e-05,
      "loss": 2.6589,
      "step": 15837
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.6257160902023315,
      "learning_rate": 1.8071299873043186e-05,
      "loss": 2.4895,
      "step": 15838
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0017999410629272,
      "learning_rate": 1.807105678762004e-05,
      "loss": 2.4955,
      "step": 15839
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1150321960449219,
      "learning_rate": 1.807081368851426e-05,
      "loss": 2.3849,
      "step": 15840
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.955291748046875,
      "learning_rate": 1.807057057572626e-05,
      "loss": 2.5113,
      "step": 15841
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1098920106887817,
      "learning_rate": 1.807032744925645e-05,
      "loss": 2.7413,
      "step": 15842
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0828993320465088,
      "learning_rate": 1.807008430910524e-05,
      "loss": 2.7067,
      "step": 15843
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9583919048309326,
      "learning_rate": 1.8069841155273052e-05,
      "loss": 2.3167,
      "step": 15844
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0717966556549072,
      "learning_rate": 1.8069597987760284e-05,
      "loss": 2.6321,
      "step": 15845
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0006519556045532,
      "learning_rate": 1.8069354806567362e-05,
      "loss": 2.5159,
      "step": 15846
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0604221820831299,
      "learning_rate": 1.806911161169469e-05,
      "loss": 2.4242,
      "step": 15847
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1377524137496948,
      "learning_rate": 1.806886840314268e-05,
      "loss": 2.4239,
      "step": 15848
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1564109325408936,
      "learning_rate": 1.8068625180911747e-05,
      "loss": 2.6171,
      "step": 15849
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0846657752990723,
      "learning_rate": 1.8068381945002305e-05,
      "loss": 2.5697,
      "step": 15850
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0662344694137573,
      "learning_rate": 1.8068138695414763e-05,
      "loss": 2.7061,
      "step": 15851
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9703705906867981,
      "learning_rate": 1.8067895432149535e-05,
      "loss": 2.2506,
      "step": 15852
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0293079614639282,
      "learning_rate": 1.8067652155207033e-05,
      "loss": 2.4114,
      "step": 15853
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9740417003631592,
      "learning_rate": 1.806740886458767e-05,
      "loss": 2.4487,
      "step": 15854
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0762771368026733,
      "learning_rate": 1.8067165560291857e-05,
      "loss": 2.467,
      "step": 15855
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9744142293930054,
      "learning_rate": 1.8066922242320007e-05,
      "loss": 2.3632,
      "step": 15856
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.083756446838379,
      "learning_rate": 1.8066678910672536e-05,
      "loss": 2.3553,
      "step": 15857
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.041202425956726,
      "learning_rate": 1.806643556534985e-05,
      "loss": 2.4484,
      "step": 15858
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1649507284164429,
      "learning_rate": 1.8066192206352366e-05,
      "loss": 2.4254,
      "step": 15859
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9660435914993286,
      "learning_rate": 1.8065948833680497e-05,
      "loss": 2.3992,
      "step": 15860
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1944502592086792,
      "learning_rate": 1.8065705447334653e-05,
      "loss": 2.6264,
      "step": 15861
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0992684364318848,
      "learning_rate": 1.8065462047315248e-05,
      "loss": 2.2816,
      "step": 15862
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9837352633476257,
      "learning_rate": 1.8065218633622688e-05,
      "loss": 2.4206,
      "step": 15863
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1712286472320557,
      "learning_rate": 1.8064975206257404e-05,
      "loss": 2.3553,
      "step": 15864
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0350916385650635,
      "learning_rate": 1.8064731765219785e-05,
      "loss": 2.5537,
      "step": 15865
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9318796992301941,
      "learning_rate": 1.806448831051026e-05,
      "loss": 2.3084,
      "step": 15866
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.2903510332107544,
      "learning_rate": 1.806424484212924e-05,
      "loss": 2.5698,
      "step": 15867
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9858142733573914,
      "learning_rate": 1.8064001360077133e-05,
      "loss": 2.5369,
      "step": 15868
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9889654517173767,
      "learning_rate": 1.806375786435435e-05,
      "loss": 2.3402,
      "step": 15869
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1607239246368408,
      "learning_rate": 1.8063514354961312e-05,
      "loss": 2.5784,
      "step": 15870
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.042607307434082,
      "learning_rate": 1.806327083189842e-05,
      "loss": 2.4727,
      "step": 15871
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.2470277547836304,
      "learning_rate": 1.8063027295166103e-05,
      "loss": 2.597,
      "step": 15872
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.02424955368042,
      "learning_rate": 1.806278374476476e-05,
      "loss": 2.5886,
      "step": 15873
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0713459253311157,
      "learning_rate": 1.806254018069481e-05,
      "loss": 2.3049,
      "step": 15874
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0462923049926758,
      "learning_rate": 1.8062296602956664e-05,
      "loss": 2.3723,
      "step": 15875
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.2777490615844727,
      "learning_rate": 1.8062053011550737e-05,
      "loss": 2.3229,
      "step": 15876
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0364941358566284,
      "learning_rate": 1.806180940647744e-05,
      "loss": 2.6073,
      "step": 15877
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0201112031936646,
      "learning_rate": 1.8061565787737186e-05,
      "loss": 2.417,
      "step": 15878
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9395750164985657,
      "learning_rate": 1.8061322155330385e-05,
      "loss": 2.5286,
      "step": 15879
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9863989353179932,
      "learning_rate": 1.806107850925746e-05,
      "loss": 2.5854,
      "step": 15880
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0925017595291138,
      "learning_rate": 1.8060834849518814e-05,
      "loss": 2.6209,
      "step": 15881
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0722541809082031,
      "learning_rate": 1.8060591176114863e-05,
      "loss": 2.5464,
      "step": 15882
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.024590015411377,
      "learning_rate": 1.806034748904602e-05,
      "loss": 2.3061,
      "step": 15883
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.2264007329940796,
      "learning_rate": 1.80601037883127e-05,
      "loss": 2.4291,
      "step": 15884
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0240271091461182,
      "learning_rate": 1.8059860073915317e-05,
      "loss": 2.3115,
      "step": 15885
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.067731499671936,
      "learning_rate": 1.8059616345854282e-05,
      "loss": 2.5964,
      "step": 15886
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.186219334602356,
      "learning_rate": 1.805937260413001e-05,
      "loss": 2.3578,
      "step": 15887
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.015604853630066,
      "learning_rate": 1.8059128848742905e-05,
      "loss": 2.5121,
      "step": 15888
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1668181419372559,
      "learning_rate": 1.8058885079693393e-05,
      "loss": 2.4358,
      "step": 15889
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0361230373382568,
      "learning_rate": 1.8058641296981883e-05,
      "loss": 2.3471,
      "step": 15890
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9618847370147705,
      "learning_rate": 1.8058397500608786e-05,
      "loss": 2.323,
      "step": 15891
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0259006023406982,
      "learning_rate": 1.8058153690574514e-05,
      "loss": 2.4993,
      "step": 15892
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9425655007362366,
      "learning_rate": 1.8057909866879488e-05,
      "loss": 2.6332,
      "step": 15893
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9830043911933899,
      "learning_rate": 1.805766602952411e-05,
      "loss": 2.5266,
      "step": 15894
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0073974132537842,
      "learning_rate": 1.8057422178508807e-05,
      "loss": 2.3785,
      "step": 15895
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9960071444511414,
      "learning_rate": 1.8057178313833978e-05,
      "loss": 2.3968,
      "step": 15896
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.987906277179718,
      "learning_rate": 1.8056934435500048e-05,
      "loss": 2.4241,
      "step": 15897
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0509097576141357,
      "learning_rate": 1.8056690543507427e-05,
      "loss": 2.4555,
      "step": 15898
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9755575060844421,
      "learning_rate": 1.8056446637856522e-05,
      "loss": 2.3611,
      "step": 15899
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0431824922561646,
      "learning_rate": 1.8056202718547757e-05,
      "loss": 2.5554,
      "step": 15900
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0547488927841187,
      "learning_rate": 1.8055958785581537e-05,
      "loss": 2.4869,
      "step": 15901
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1224056482315063,
      "learning_rate": 1.805571483895828e-05,
      "loss": 2.4397,
      "step": 15902
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0802093744277954,
      "learning_rate": 1.8055470878678398e-05,
      "loss": 2.5992,
      "step": 15903
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1694307327270508,
      "learning_rate": 1.8055226904742305e-05,
      "loss": 2.6333,
      "step": 15904
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1460031270980835,
      "learning_rate": 1.8054982917150416e-05,
      "loss": 2.4411,
      "step": 15905
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.035198450088501,
      "learning_rate": 1.8054738915903142e-05,
      "loss": 2.3968,
      "step": 15906
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0692427158355713,
      "learning_rate": 1.8054494901000898e-05,
      "loss": 2.5329,
      "step": 15907
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.5918116569519043,
      "learning_rate": 1.80542508724441e-05,
      "loss": 2.238,
      "step": 15908
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.067118525505066,
      "learning_rate": 1.8054006830233157e-05,
      "loss": 2.6012,
      "step": 15909
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9665466547012329,
      "learning_rate": 1.8053762774368487e-05,
      "loss": 2.4046,
      "step": 15910
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9881870746612549,
      "learning_rate": 1.8053518704850497e-05,
      "loss": 2.6923,
      "step": 15911
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1362851858139038,
      "learning_rate": 1.805327462167961e-05,
      "loss": 2.3404,
      "step": 15912
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9250501394271851,
      "learning_rate": 1.805303052485623e-05,
      "loss": 2.3031,
      "step": 15913
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0192105770111084,
      "learning_rate": 1.8052786414380782e-05,
      "loss": 2.5071,
      "step": 15914
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.009819507598877,
      "learning_rate": 1.8052542290253673e-05,
      "loss": 2.2905,
      "step": 15915
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9969422817230225,
      "learning_rate": 1.8052298152475317e-05,
      "loss": 2.4968,
      "step": 15916
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.8816255927085876,
      "learning_rate": 1.8052054001046127e-05,
      "loss": 2.3425,
      "step": 15917
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0289381742477417,
      "learning_rate": 1.805180983596652e-05,
      "loss": 2.5741,
      "step": 15918
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0701264142990112,
      "learning_rate": 1.8051565657236905e-05,
      "loss": 2.3863,
      "step": 15919
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1248512268066406,
      "learning_rate": 1.8051321464857704e-05,
      "loss": 2.9458,
      "step": 15920
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9603057503700256,
      "learning_rate": 1.8051077258829326e-05,
      "loss": 2.2922,
      "step": 15921
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9810751080513,
      "learning_rate": 1.8050833039152184e-05,
      "loss": 2.6309,
      "step": 15922
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0016332864761353,
      "learning_rate": 1.8050588805826695e-05,
      "loss": 2.3435,
      "step": 15923
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.917252242565155,
      "learning_rate": 1.805034455885327e-05,
      "loss": 2.5054,
      "step": 15924
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.125180721282959,
      "learning_rate": 1.8050100298232324e-05,
      "loss": 2.3529,
      "step": 15925
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.987048864364624,
      "learning_rate": 1.804985602396427e-05,
      "loss": 2.304,
      "step": 15926
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0721384286880493,
      "learning_rate": 1.8049611736049528e-05,
      "loss": 2.5158,
      "step": 15927
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9418948888778687,
      "learning_rate": 1.8049367434488508e-05,
      "loss": 2.7762,
      "step": 15928
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9525445699691772,
      "learning_rate": 1.804912311928162e-05,
      "loss": 2.3204,
      "step": 15929
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9925684332847595,
      "learning_rate": 1.8048878790429282e-05,
      "loss": 2.4228,
      "step": 15930
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9853650331497192,
      "learning_rate": 1.8048634447931912e-05,
      "loss": 2.5242,
      "step": 15931
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0718852281570435,
      "learning_rate": 1.804839009178992e-05,
      "loss": 2.3398,
      "step": 15932
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.031691312789917,
      "learning_rate": 1.804814572200372e-05,
      "loss": 2.598,
      "step": 15933
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0172206163406372,
      "learning_rate": 1.8047901338573726e-05,
      "loss": 2.4506,
      "step": 15934
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0238864421844482,
      "learning_rate": 1.8047656941500356e-05,
      "loss": 2.5882,
      "step": 15935
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9290102124214172,
      "learning_rate": 1.804741253078402e-05,
      "loss": 2.2872,
      "step": 15936
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.032028317451477,
      "learning_rate": 1.8047168106425138e-05,
      "loss": 2.2624,
      "step": 15937
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9814482927322388,
      "learning_rate": 1.8046923668424115e-05,
      "loss": 2.3358,
      "step": 15938
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0721904039382935,
      "learning_rate": 1.8046679216781373e-05,
      "loss": 2.5013,
      "step": 15939
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0571428537368774,
      "learning_rate": 1.8046434751497324e-05,
      "loss": 2.3887,
      "step": 15940
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0609790086746216,
      "learning_rate": 1.8046190272572383e-05,
      "loss": 2.312,
      "step": 15941
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.051933765411377,
      "learning_rate": 1.804594578000696e-05,
      "loss": 2.4831,
      "step": 15942
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.049854040145874,
      "learning_rate": 1.804570127380148e-05,
      "loss": 2.446,
      "step": 15943
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0268961191177368,
      "learning_rate": 1.804545675395635e-05,
      "loss": 2.5626,
      "step": 15944
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0826084613800049,
      "learning_rate": 1.8045212220471984e-05,
      "loss": 2.3385,
      "step": 15945
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1363356113433838,
      "learning_rate": 1.80449676733488e-05,
      "loss": 2.4113,
      "step": 15946
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0226236581802368,
      "learning_rate": 1.804472311258721e-05,
      "loss": 2.5685,
      "step": 15947
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.3311582803726196,
      "learning_rate": 1.8044478538187628e-05,
      "loss": 2.6424,
      "step": 15948
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1082830429077148,
      "learning_rate": 1.804423395015047e-05,
      "loss": 2.3839,
      "step": 15949
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9847266674041748,
      "learning_rate": 1.8043989348476153e-05,
      "loss": 2.6678,
      "step": 15950
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0884414911270142,
      "learning_rate": 1.804374473316509e-05,
      "loss": 2.528,
      "step": 15951
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9962230324745178,
      "learning_rate": 1.804350010421769e-05,
      "loss": 2.5351,
      "step": 15952
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0256683826446533,
      "learning_rate": 1.8043255461634377e-05,
      "loss": 2.4795,
      "step": 15953
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.030898094177246,
      "learning_rate": 1.804301080541556e-05,
      "loss": 2.5652,
      "step": 15954
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1840980052947998,
      "learning_rate": 1.8042766135561656e-05,
      "loss": 2.4779,
      "step": 15955
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.086926817893982,
      "learning_rate": 1.8042521452073078e-05,
      "loss": 2.5479,
      "step": 15956
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9415382742881775,
      "learning_rate": 1.8042276754950243e-05,
      "loss": 2.4207,
      "step": 15957
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9727470874786377,
      "learning_rate": 1.8042032044193563e-05,
      "loss": 2.4722,
      "step": 15958
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0458495616912842,
      "learning_rate": 1.8041787319803457e-05,
      "loss": 2.4836,
      "step": 15959
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.093633770942688,
      "learning_rate": 1.8041542581780336e-05,
      "loss": 2.5983,
      "step": 15960
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1630383729934692,
      "learning_rate": 1.8041297830124616e-05,
      "loss": 2.3699,
      "step": 15961
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.235904335975647,
      "learning_rate": 1.8041053064836713e-05,
      "loss": 2.3522,
      "step": 15962
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.024982213973999,
      "learning_rate": 1.804080828591704e-05,
      "loss": 2.4586,
      "step": 15963
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0495591163635254,
      "learning_rate": 1.8040563493366012e-05,
      "loss": 2.359,
      "step": 15964
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0107303857803345,
      "learning_rate": 1.8040318687184047e-05,
      "loss": 2.2921,
      "step": 15965
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1311321258544922,
      "learning_rate": 1.804007386737156e-05,
      "loss": 2.7071,
      "step": 15966
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0053858757019043,
      "learning_rate": 1.803982903392896e-05,
      "loss": 2.582,
      "step": 15967
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.203049898147583,
      "learning_rate": 1.803958418685667e-05,
      "loss": 2.5033,
      "step": 15968
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1759246587753296,
      "learning_rate": 1.80393393261551e-05,
      "loss": 2.5491,
      "step": 15969
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1941187381744385,
      "learning_rate": 1.8039094451824665e-05,
      "loss": 2.4469,
      "step": 15970
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9701990485191345,
      "learning_rate": 1.8038849563865782e-05,
      "loss": 2.6635,
      "step": 15971
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9640706181526184,
      "learning_rate": 1.8038604662278864e-05,
      "loss": 2.4015,
      "step": 15972
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0252776145935059,
      "learning_rate": 1.803835974706433e-05,
      "loss": 2.4619,
      "step": 15973
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0799466371536255,
      "learning_rate": 1.8038114818222593e-05,
      "loss": 2.3264,
      "step": 15974
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.063597321510315,
      "learning_rate": 1.803786987575407e-05,
      "loss": 2.2502,
      "step": 15975
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0060133934020996,
      "learning_rate": 1.8037624919659172e-05,
      "loss": 2.575,
      "step": 15976
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1508862972259521,
      "learning_rate": 1.803737994993832e-05,
      "loss": 2.6759,
      "step": 15977
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.091932773590088,
      "learning_rate": 1.8037134966591922e-05,
      "loss": 2.4785,
      "step": 15978
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.059922456741333,
      "learning_rate": 1.80368899696204e-05,
      "loss": 2.4455,
      "step": 15979
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0856257677078247,
      "learning_rate": 1.8036644959024167e-05,
      "loss": 2.3213,
      "step": 15980
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.038881540298462,
      "learning_rate": 1.8036399934803636e-05,
      "loss": 2.5896,
      "step": 15981
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0225093364715576,
      "learning_rate": 1.8036154896959223e-05,
      "loss": 2.5445,
      "step": 15982
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0139553546905518,
      "learning_rate": 1.8035909845491346e-05,
      "loss": 2.3649,
      "step": 15983
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.8999399542808533,
      "learning_rate": 1.803566478040042e-05,
      "loss": 2.5358,
      "step": 15984
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1889654397964478,
      "learning_rate": 1.803541970168686e-05,
      "loss": 2.5301,
      "step": 15985
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1714836359024048,
      "learning_rate": 1.8035174609351083e-05,
      "loss": 2.4697,
      "step": 15986
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0203375816345215,
      "learning_rate": 1.80349295033935e-05,
      "loss": 2.554,
      "step": 15987
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9975656867027283,
      "learning_rate": 1.803468438381453e-05,
      "loss": 2.4282,
      "step": 15988
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9876423478126526,
      "learning_rate": 1.8034439250614587e-05,
      "loss": 2.4247,
      "step": 15989
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.028182029724121,
      "learning_rate": 1.803419410379409e-05,
      "loss": 2.6127,
      "step": 15990
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9607612490653992,
      "learning_rate": 1.803394894335345e-05,
      "loss": 2.6359,
      "step": 15991
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1143063306808472,
      "learning_rate": 1.8033703769293085e-05,
      "loss": 2.5037,
      "step": 15992
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.027402400970459,
      "learning_rate": 1.803345858161341e-05,
      "loss": 2.6209,
      "step": 15993
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.027495265007019,
      "learning_rate": 1.803321338031484e-05,
      "loss": 2.4734,
      "step": 15994
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1717360019683838,
      "learning_rate": 1.803296816539779e-05,
      "loss": 2.299,
      "step": 15995
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.3035961389541626,
      "learning_rate": 1.803272293686268e-05,
      "loss": 2.2871,
      "step": 15996
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9729546308517456,
      "learning_rate": 1.8032477694709926e-05,
      "loss": 2.5214,
      "step": 15997
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9214858412742615,
      "learning_rate": 1.8032232438939935e-05,
      "loss": 2.3926,
      "step": 15998
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.024671196937561,
      "learning_rate": 1.803198716955313e-05,
      "loss": 2.504,
      "step": 15999
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0543190240859985,
      "learning_rate": 1.803174188654993e-05,
      "loss": 2.4411,
      "step": 16000
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.2570711374282837,
      "learning_rate": 1.803149658993074e-05,
      "loss": 2.595,
      "step": 16001
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.2380362749099731,
      "learning_rate": 1.8031251279695982e-05,
      "loss": 2.4911,
      "step": 16002
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.30463445186615,
      "learning_rate": 1.8031005955846074e-05,
      "loss": 2.4036,
      "step": 16003
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.183030366897583,
      "learning_rate": 1.8030760618381432e-05,
      "loss": 2.5116,
      "step": 16004
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.117952823638916,
      "learning_rate": 1.8030515267302467e-05,
      "loss": 2.5739,
      "step": 16005
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0113770961761475,
      "learning_rate": 1.8030269902609595e-05,
      "loss": 2.4185,
      "step": 16006
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1109540462493896,
      "learning_rate": 1.8030024524303238e-05,
      "loss": 2.4777,
      "step": 16007
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.024737000465393,
      "learning_rate": 1.802977913238381e-05,
      "loss": 2.3089,
      "step": 16008
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0293657779693604,
      "learning_rate": 1.8029533726851723e-05,
      "loss": 2.5107,
      "step": 16009
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0845677852630615,
      "learning_rate": 1.8029288307707396e-05,
      "loss": 2.6964,
      "step": 16010
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9653232097625732,
      "learning_rate": 1.8029042874951243e-05,
      "loss": 2.4196,
      "step": 16011
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.110918641090393,
      "learning_rate": 1.8028797428583683e-05,
      "loss": 2.2547,
      "step": 16012
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0963313579559326,
      "learning_rate": 1.802855196860513e-05,
      "loss": 2.2545,
      "step": 16013
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0183926820755005,
      "learning_rate": 1.8028306495016003e-05,
      "loss": 2.7536,
      "step": 16014
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.995953381061554,
      "learning_rate": 1.8028061007816716e-05,
      "loss": 2.4718,
      "step": 16015
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1108283996582031,
      "learning_rate": 1.8027815507007686e-05,
      "loss": 2.4223,
      "step": 16016
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1204687356948853,
      "learning_rate": 1.8027569992589324e-05,
      "loss": 2.5083,
      "step": 16017
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0848296880722046,
      "learning_rate": 1.8027324464562052e-05,
      "loss": 2.4689,
      "step": 16018
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9534109234809875,
      "learning_rate": 1.8027078922926288e-05,
      "loss": 2.491,
      "step": 16019
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9898174405097961,
      "learning_rate": 1.8026833367682442e-05,
      "loss": 2.4391,
      "step": 16020
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0567381381988525,
      "learning_rate": 1.8026587798830934e-05,
      "loss": 2.2756,
      "step": 16021
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9500789046287537,
      "learning_rate": 1.802634221637218e-05,
      "loss": 2.4041,
      "step": 16022
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0255792140960693,
      "learning_rate": 1.8026096620306597e-05,
      "loss": 2.779,
      "step": 16023
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0908324718475342,
      "learning_rate": 1.80258510106346e-05,
      "loss": 2.4707,
      "step": 16024
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0762951374053955,
      "learning_rate": 1.8025605387356603e-05,
      "loss": 2.7918,
      "step": 16025
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1175049543380737,
      "learning_rate": 1.8025359750473027e-05,
      "loss": 2.4317,
      "step": 16026
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1649492979049683,
      "learning_rate": 1.802511409998429e-05,
      "loss": 2.7751,
      "step": 16027
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.004614233970642,
      "learning_rate": 1.80248684358908e-05,
      "loss": 2.4464,
      "step": 16028
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9894861578941345,
      "learning_rate": 1.802462275819298e-05,
      "loss": 2.4385,
      "step": 16029
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.011486530303955,
      "learning_rate": 1.8024377066891246e-05,
      "loss": 2.4265,
      "step": 16030
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.983323872089386,
      "learning_rate": 1.8024131361986012e-05,
      "loss": 2.6611,
      "step": 16031
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0344661474227905,
      "learning_rate": 1.8023885643477697e-05,
      "loss": 2.4507,
      "step": 16032
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0065908432006836,
      "learning_rate": 1.8023639911366717e-05,
      "loss": 2.3987,
      "step": 16033
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0452145338058472,
      "learning_rate": 1.8023394165653486e-05,
      "loss": 2.4487,
      "step": 16034
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9683802127838135,
      "learning_rate": 1.8023148406338427e-05,
      "loss": 2.4903,
      "step": 16035
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1891570091247559,
      "learning_rate": 1.8022902633421946e-05,
      "loss": 2.5803,
      "step": 16036
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9288550019264221,
      "learning_rate": 1.8022656846904475e-05,
      "loss": 2.3946,
      "step": 16037
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9910513758659363,
      "learning_rate": 1.8022411046786416e-05,
      "loss": 2.5621,
      "step": 16038
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0422924757003784,
      "learning_rate": 1.802216523306819e-05,
      "loss": 2.4714,
      "step": 16039
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1135952472686768,
      "learning_rate": 1.8021919405750218e-05,
      "loss": 2.5994,
      "step": 16040
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9989737868309021,
      "learning_rate": 1.8021673564832913e-05,
      "loss": 2.728,
      "step": 16041
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0742875337600708,
      "learning_rate": 1.8021427710316694e-05,
      "loss": 2.2838,
      "step": 16042
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.013379454612732,
      "learning_rate": 1.802118184220197e-05,
      "loss": 2.3581,
      "step": 16043
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9683118462562561,
      "learning_rate": 1.8020935960489173e-05,
      "loss": 2.3391,
      "step": 16044
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9721899628639221,
      "learning_rate": 1.802069006517871e-05,
      "loss": 2.4497,
      "step": 16045
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0547384023666382,
      "learning_rate": 1.8020444156270994e-05,
      "loss": 2.5628,
      "step": 16046
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1649490594863892,
      "learning_rate": 1.802019823376645e-05,
      "loss": 2.445,
      "step": 16047
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9642187356948853,
      "learning_rate": 1.801995229766549e-05,
      "loss": 2.5961,
      "step": 16048
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.157904863357544,
      "learning_rate": 1.8019706347968534e-05,
      "loss": 2.5608,
      "step": 16049
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1178464889526367,
      "learning_rate": 1.8019460384675994e-05,
      "loss": 2.5815,
      "step": 16050
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0284901857376099,
      "learning_rate": 1.80192144077883e-05,
      "loss": 2.467,
      "step": 16051
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9810299277305603,
      "learning_rate": 1.801896841730585e-05,
      "loss": 2.519,
      "step": 16052
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0822674036026,
      "learning_rate": 1.8018722413229073e-05,
      "loss": 2.4953,
      "step": 16053
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9783962368965149,
      "learning_rate": 1.8018476395558384e-05,
      "loss": 2.5296,
      "step": 16054
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1508709192276,
      "learning_rate": 1.8018230364294202e-05,
      "loss": 2.3256,
      "step": 16055
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0349924564361572,
      "learning_rate": 1.801798431943694e-05,
      "loss": 2.2885,
      "step": 16056
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9372663497924805,
      "learning_rate": 1.8017738260987015e-05,
      "loss": 2.3556,
      "step": 16057
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0160865783691406,
      "learning_rate": 1.801749218894485e-05,
      "loss": 2.3693,
      "step": 16058
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.052148699760437,
      "learning_rate": 1.8017246103310854e-05,
      "loss": 2.7302,
      "step": 16059
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0997692346572876,
      "learning_rate": 1.801700000408545e-05,
      "loss": 2.4352,
      "step": 16060
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0148003101348877,
      "learning_rate": 1.8016753891269058e-05,
      "loss": 2.5372,
      "step": 16061
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9787150025367737,
      "learning_rate": 1.8016507764862086e-05,
      "loss": 2.8726,
      "step": 16062
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0115127563476562,
      "learning_rate": 1.8016261624864956e-05,
      "loss": 2.4464,
      "step": 16063
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1714285612106323,
      "learning_rate": 1.801601547127809e-05,
      "loss": 2.4157,
      "step": 16064
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9612406492233276,
      "learning_rate": 1.8015769304101894e-05,
      "loss": 2.3435,
      "step": 16065
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0807932615280151,
      "learning_rate": 1.8015523123336796e-05,
      "loss": 2.1856,
      "step": 16066
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0108784437179565,
      "learning_rate": 1.801527692898321e-05,
      "loss": 2.3841,
      "step": 16067
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9938440322875977,
      "learning_rate": 1.801503072104155e-05,
      "loss": 2.3507,
      "step": 16068
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9910404682159424,
      "learning_rate": 1.8014784499512238e-05,
      "loss": 2.6133,
      "step": 16069
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9823810458183289,
      "learning_rate": 1.801453826439569e-05,
      "loss": 2.5831,
      "step": 16070
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9778785705566406,
      "learning_rate": 1.801429201569232e-05,
      "loss": 2.4275,
      "step": 16071
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0613932609558105,
      "learning_rate": 1.801404575340255e-05,
      "loss": 2.4355,
      "step": 16072
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1537585258483887,
      "learning_rate": 1.8013799477526795e-05,
      "loss": 2.5702,
      "step": 16073
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9885426759719849,
      "learning_rate": 1.8013553188065476e-05,
      "loss": 2.634,
      "step": 16074
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1333982944488525,
      "learning_rate": 1.8013306885019006e-05,
      "loss": 2.6333,
      "step": 16075
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0389115810394287,
      "learning_rate": 1.8013060568387804e-05,
      "loss": 2.4636,
      "step": 16076
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.8982657194137573,
      "learning_rate": 1.8012814238172288e-05,
      "loss": 2.2977,
      "step": 16077
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9884929656982422,
      "learning_rate": 1.8012567894372877e-05,
      "loss": 2.4091,
      "step": 16078
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1654084920883179,
      "learning_rate": 1.801232153698999e-05,
      "loss": 2.4305,
      "step": 16079
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.042222023010254,
      "learning_rate": 1.8012075166024036e-05,
      "loss": 2.6099,
      "step": 16080
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1069309711456299,
      "learning_rate": 1.801182878147544e-05,
      "loss": 2.6965,
      "step": 16081
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9745842218399048,
      "learning_rate": 1.801158238334462e-05,
      "loss": 2.6289,
      "step": 16082
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0545777082443237,
      "learning_rate": 1.8011335971631992e-05,
      "loss": 2.5543,
      "step": 16083
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0526398420333862,
      "learning_rate": 1.8011089546337973e-05,
      "loss": 2.3247,
      "step": 16084
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.004722237586975,
      "learning_rate": 1.801084310746298e-05,
      "loss": 2.3151,
      "step": 16085
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9630823135375977,
      "learning_rate": 1.8010596655007436e-05,
      "loss": 2.3394,
      "step": 16086
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0342165231704712,
      "learning_rate": 1.801035018897175e-05,
      "loss": 2.6234,
      "step": 16087
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.070576786994934,
      "learning_rate": 1.8010103709356348e-05,
      "loss": 2.4331,
      "step": 16088
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9431965351104736,
      "learning_rate": 1.8009857216161647e-05,
      "loss": 2.595,
      "step": 16089
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.27364182472229,
      "learning_rate": 1.8009610709388056e-05,
      "loss": 2.293,
      "step": 16090
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9531055688858032,
      "learning_rate": 1.8009364189036004e-05,
      "loss": 2.3956,
      "step": 16091
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9343004822731018,
      "learning_rate": 1.8009117655105903e-05,
      "loss": 2.3331,
      "step": 16092
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9764643907546997,
      "learning_rate": 1.8008871107598172e-05,
      "loss": 2.5137,
      "step": 16093
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9256295561790466,
      "learning_rate": 1.800862454651323e-05,
      "loss": 2.4461,
      "step": 16094
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0459882020950317,
      "learning_rate": 1.8008377971851498e-05,
      "loss": 2.3845,
      "step": 16095
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1999274492263794,
      "learning_rate": 1.8008131383613384e-05,
      "loss": 2.6404,
      "step": 16096
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.6756688356399536,
      "learning_rate": 1.8007884781799315e-05,
      "loss": 2.4535,
      "step": 16097
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0759047269821167,
      "learning_rate": 1.800763816640971e-05,
      "loss": 2.4814,
      "step": 16098
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0790400505065918,
      "learning_rate": 1.800739153744498e-05,
      "loss": 2.5377,
      "step": 16099
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0798606872558594,
      "learning_rate": 1.8007144894905545e-05,
      "loss": 2.67,
      "step": 16100
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0118850469589233,
      "learning_rate": 1.8006898238791825e-05,
      "loss": 2.4761,
      "step": 16101
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1913063526153564,
      "learning_rate": 1.800665156910424e-05,
      "loss": 2.268,
      "step": 16102
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0961353778839111,
      "learning_rate": 1.8006404885843203e-05,
      "loss": 2.4421,
      "step": 16103
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1828540563583374,
      "learning_rate": 1.8006158189009136e-05,
      "loss": 2.3256,
      "step": 16104
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1154816150665283,
      "learning_rate": 1.8005911478602458e-05,
      "loss": 2.5566,
      "step": 16105
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9772310853004456,
      "learning_rate": 1.8005664754623585e-05,
      "loss": 2.5299,
      "step": 16106
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.335479974746704,
      "learning_rate": 1.8005418017072936e-05,
      "loss": 2.4533,
      "step": 16107
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0957610607147217,
      "learning_rate": 1.8005171265950933e-05,
      "loss": 2.394,
      "step": 16108
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.154671549797058,
      "learning_rate": 1.8004924501257984e-05,
      "loss": 2.5989,
      "step": 16109
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.088151216506958,
      "learning_rate": 1.8004677722994518e-05,
      "loss": 2.5356,
      "step": 16110
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.007322072982788,
      "learning_rate": 1.800443093116095e-05,
      "loss": 2.5061,
      "step": 16111
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0588233470916748,
      "learning_rate": 1.8004184125757693e-05,
      "loss": 2.3849,
      "step": 16112
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0396803617477417,
      "learning_rate": 1.8003937306785173e-05,
      "loss": 2.7921,
      "step": 16113
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9201549887657166,
      "learning_rate": 1.8003690474243804e-05,
      "loss": 2.5363,
      "step": 16114
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9703960418701172,
      "learning_rate": 1.8003443628134008e-05,
      "loss": 2.4079,
      "step": 16115
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9953879714012146,
      "learning_rate": 1.80031967684562e-05,
      "loss": 2.4899,
      "step": 16116
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9875050783157349,
      "learning_rate": 1.80029498952108e-05,
      "loss": 2.5878,
      "step": 16117
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0121991634368896,
      "learning_rate": 1.8002703008398228e-05,
      "loss": 2.6641,
      "step": 16118
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1740361452102661,
      "learning_rate": 1.80024561080189e-05,
      "loss": 2.58,
      "step": 16119
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0063306093215942,
      "learning_rate": 1.8002209194073232e-05,
      "loss": 2.5072,
      "step": 16120
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.017735481262207,
      "learning_rate": 1.8001962266561652e-05,
      "loss": 2.6624,
      "step": 16121
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1346299648284912,
      "learning_rate": 1.800171532548457e-05,
      "loss": 2.4931,
      "step": 16122
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.062130331993103,
      "learning_rate": 1.8001468370842405e-05,
      "loss": 2.6011,
      "step": 16123
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9919609427452087,
      "learning_rate": 1.800122140263558e-05,
      "loss": 2.506,
      "step": 16124
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0086631774902344,
      "learning_rate": 1.8000974420864513e-05,
      "loss": 2.4194,
      "step": 16125
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0057299137115479,
      "learning_rate": 1.800072742552962e-05,
      "loss": 2.488,
      "step": 16126
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0746580362319946,
      "learning_rate": 1.800048041663132e-05,
      "loss": 2.3432,
      "step": 16127
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1090220212936401,
      "learning_rate": 1.8000233394170032e-05,
      "loss": 2.5294,
      "step": 16128
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9992746710777283,
      "learning_rate": 1.7999986358146176e-05,
      "loss": 2.5914,
      "step": 16129
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9057720899581909,
      "learning_rate": 1.7999739308560174e-05,
      "loss": 2.2443,
      "step": 16130
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0371103286743164,
      "learning_rate": 1.799949224541244e-05,
      "loss": 2.3923,
      "step": 16131
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0579721927642822,
      "learning_rate": 1.799924516870339e-05,
      "loss": 2.4571,
      "step": 16132
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1552419662475586,
      "learning_rate": 1.7998998078433448e-05,
      "loss": 2.4016,
      "step": 16133
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9934902191162109,
      "learning_rate": 1.7998750974603036e-05,
      "loss": 2.6618,
      "step": 16134
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1325547695159912,
      "learning_rate": 1.7998503857212564e-05,
      "loss": 2.8,
      "step": 16135
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0960872173309326,
      "learning_rate": 1.7998256726262457e-05,
      "loss": 2.3328,
      "step": 16136
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1043195724487305,
      "learning_rate": 1.7998009581753132e-05,
      "loss": 2.4789,
      "step": 16137
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1153703927993774,
      "learning_rate": 1.799776242368501e-05,
      "loss": 2.5774,
      "step": 16138
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9693607687950134,
      "learning_rate": 1.7997515252058512e-05,
      "loss": 2.6775,
      "step": 16139
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.050571322441101,
      "learning_rate": 1.7997268066874048e-05,
      "loss": 2.5471,
      "step": 16140
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0228511095046997,
      "learning_rate": 1.7997020868132045e-05,
      "loss": 2.3697,
      "step": 16141
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9611108899116516,
      "learning_rate": 1.7996773655832918e-05,
      "loss": 2.6919,
      "step": 16142
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9373955726623535,
      "learning_rate": 1.799652642997709e-05,
      "loss": 2.2924,
      "step": 16143
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.092394232749939,
      "learning_rate": 1.7996279190564976e-05,
      "loss": 2.5095,
      "step": 16144
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1980087757110596,
      "learning_rate": 1.7996031937597e-05,
      "loss": 2.6471,
      "step": 16145
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.3528501987457275,
      "learning_rate": 1.7995784671073573e-05,
      "loss": 2.3444,
      "step": 16146
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.001859188079834,
      "learning_rate": 1.7995537390995123e-05,
      "loss": 2.7893,
      "step": 16147
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9641094207763672,
      "learning_rate": 1.7995290097362063e-05,
      "loss": 2.5089,
      "step": 16148
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.2665579319000244,
      "learning_rate": 1.799504279017482e-05,
      "loss": 2.5706,
      "step": 16149
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0026428699493408,
      "learning_rate": 1.79947954694338e-05,
      "loss": 2.714,
      "step": 16150
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9919078350067139,
      "learning_rate": 1.7994548135139438e-05,
      "loss": 2.6904,
      "step": 16151
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9496887922286987,
      "learning_rate": 1.7994300787292142e-05,
      "loss": 2.613,
      "step": 16152
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9908765554428101,
      "learning_rate": 1.7994053425892336e-05,
      "loss": 2.6013,
      "step": 16153
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9775705933570862,
      "learning_rate": 1.7993806050940436e-05,
      "loss": 2.6044,
      "step": 16154
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1389265060424805,
      "learning_rate": 1.7993558662436867e-05,
      "loss": 2.4129,
      "step": 16155
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0449789762496948,
      "learning_rate": 1.7993311260382044e-05,
      "loss": 2.4148,
      "step": 16156
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0332682132720947,
      "learning_rate": 1.7993063844776385e-05,
      "loss": 2.469,
      "step": 16157
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.129561185836792,
      "learning_rate": 1.7992816415620313e-05,
      "loss": 2.6003,
      "step": 16158
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9782373309135437,
      "learning_rate": 1.7992568972914248e-05,
      "loss": 2.4694,
      "step": 16159
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0958895683288574,
      "learning_rate": 1.799232151665861e-05,
      "loss": 2.5063,
      "step": 16160
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.150015115737915,
      "learning_rate": 1.7992074046853813e-05,
      "loss": 2.4923,
      "step": 16161
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1895250082015991,
      "learning_rate": 1.799182656350028e-05,
      "loss": 2.4549,
      "step": 16162
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.132888913154602,
      "learning_rate": 1.7991579066598428e-05,
      "loss": 2.5773,
      "step": 16163
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1691603660583496,
      "learning_rate": 1.799133155614868e-05,
      "loss": 2.5151,
      "step": 16164
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.185279130935669,
      "learning_rate": 1.7991084032151457e-05,
      "loss": 2.2978,
      "step": 16165
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0354338884353638,
      "learning_rate": 1.799083649460718e-05,
      "loss": 2.343,
      "step": 16166
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9633072018623352,
      "learning_rate": 1.799058894351626e-05,
      "loss": 2.3603,
      "step": 16167
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.3270938396453857,
      "learning_rate": 1.7990341378879116e-05,
      "loss": 2.5118,
      "step": 16168
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0408921241760254,
      "learning_rate": 1.7990093800696183e-05,
      "loss": 2.4767,
      "step": 16169
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.010020136833191,
      "learning_rate": 1.7989846208967864e-05,
      "loss": 2.6341,
      "step": 16170
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1765259504318237,
      "learning_rate": 1.798959860369459e-05,
      "loss": 2.5215,
      "step": 16171
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.108441710472107,
      "learning_rate": 1.7989350984876776e-05,
      "loss": 2.4119,
      "step": 16172
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.225368618965149,
      "learning_rate": 1.798910335251484e-05,
      "loss": 2.4018,
      "step": 16173
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.129703402519226,
      "learning_rate": 1.7988855706609206e-05,
      "loss": 2.366,
      "step": 16174
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.001623272895813,
      "learning_rate": 1.798860804716029e-05,
      "loss": 2.6586,
      "step": 16175
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1169859170913696,
      "learning_rate": 1.7988360374168516e-05,
      "loss": 2.6896,
      "step": 16176
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.102050542831421,
      "learning_rate": 1.79881126876343e-05,
      "loss": 2.4487,
      "step": 16177
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1886619329452515,
      "learning_rate": 1.7987864987558064e-05,
      "loss": 2.5418,
      "step": 16178
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0062445402145386,
      "learning_rate": 1.7987617273940227e-05,
      "loss": 2.4882,
      "step": 16179
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0114814043045044,
      "learning_rate": 1.798736954678121e-05,
      "loss": 2.4148,
      "step": 16180
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9605525135993958,
      "learning_rate": 1.7987121806081432e-05,
      "loss": 2.4573,
      "step": 16181
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9710890054702759,
      "learning_rate": 1.798687405184131e-05,
      "loss": 2.3968,
      "step": 16182
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0733436346054077,
      "learning_rate": 1.7986626284061273e-05,
      "loss": 2.5493,
      "step": 16183
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9504460096359253,
      "learning_rate": 1.7986378502741733e-05,
      "loss": 2.4047,
      "step": 16184
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.258117437362671,
      "learning_rate": 1.7986130707883112e-05,
      "loss": 2.6118,
      "step": 16185
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0559991598129272,
      "learning_rate": 1.7985882899485833e-05,
      "loss": 2.4907,
      "step": 16186
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0027576684951782,
      "learning_rate": 1.7985635077550307e-05,
      "loss": 2.4112,
      "step": 16187
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1033238172531128,
      "learning_rate": 1.7985387242076965e-05,
      "loss": 2.3539,
      "step": 16188
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0945292711257935,
      "learning_rate": 1.7985139393066224e-05,
      "loss": 2.3858,
      "step": 16189
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9808148741722107,
      "learning_rate": 1.79848915305185e-05,
      "loss": 2.4541,
      "step": 16190
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1878777742385864,
      "learning_rate": 1.798464365443422e-05,
      "loss": 2.577,
      "step": 16191
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0762126445770264,
      "learning_rate": 1.7984395764813797e-05,
      "loss": 2.5268,
      "step": 16192
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0418375730514526,
      "learning_rate": 1.7984147861657654e-05,
      "loss": 2.4257,
      "step": 16193
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.066454291343689,
      "learning_rate": 1.7983899944966215e-05,
      "loss": 2.4859,
      "step": 16194
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9449966549873352,
      "learning_rate": 1.7983652014739895e-05,
      "loss": 2.411,
      "step": 16195
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9879478812217712,
      "learning_rate": 1.7983404070979117e-05,
      "loss": 2.5322,
      "step": 16196
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1122392416000366,
      "learning_rate": 1.79831561136843e-05,
      "loss": 2.3353,
      "step": 16197
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1257990598678589,
      "learning_rate": 1.798290814285587e-05,
      "loss": 2.403,
      "step": 16198
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0035542249679565,
      "learning_rate": 1.7982660158494237e-05,
      "loss": 2.3243,
      "step": 16199
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0102696418762207,
      "learning_rate": 1.798241216059983e-05,
      "loss": 2.5153,
      "step": 16200
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0757720470428467,
      "learning_rate": 1.7982164149173063e-05,
      "loss": 2.473,
      "step": 16201
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9719799160957336,
      "learning_rate": 1.7981916124214365e-05,
      "loss": 2.4314,
      "step": 16202
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9862024188041687,
      "learning_rate": 1.7981668085724146e-05,
      "loss": 2.5077,
      "step": 16203
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0980494022369385,
      "learning_rate": 1.7981420033702836e-05,
      "loss": 2.6177,
      "step": 16204
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0436997413635254,
      "learning_rate": 1.7981171968150846e-05,
      "loss": 2.4741,
      "step": 16205
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0626921653747559,
      "learning_rate": 1.7980923889068606e-05,
      "loss": 2.2147,
      "step": 16206
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0810670852661133,
      "learning_rate": 1.7980675796456536e-05,
      "loss": 2.4955,
      "step": 16207
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.093652367591858,
      "learning_rate": 1.7980427690315046e-05,
      "loss": 2.3063,
      "step": 16208
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0790598392486572,
      "learning_rate": 1.7980179570644565e-05,
      "loss": 2.4756,
      "step": 16209
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9600595235824585,
      "learning_rate": 1.7979931437445516e-05,
      "loss": 2.5909,
      "step": 16210
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0126813650131226,
      "learning_rate": 1.7979683290718313e-05,
      "loss": 2.5038,
      "step": 16211
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9720807075500488,
      "learning_rate": 1.797943513046338e-05,
      "loss": 2.6429,
      "step": 16212
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9402466416358948,
      "learning_rate": 1.7979186956681136e-05,
      "loss": 2.2616,
      "step": 16213
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0836670398712158,
      "learning_rate": 1.7978938769372005e-05,
      "loss": 2.5515,
      "step": 16214
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0941416025161743,
      "learning_rate": 1.7978690568536406e-05,
      "loss": 2.469,
      "step": 16215
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.990752637386322,
      "learning_rate": 1.797844235417476e-05,
      "loss": 2.7066,
      "step": 16216
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1956746578216553,
      "learning_rate": 1.7978194126287483e-05,
      "loss": 2.619,
      "step": 16217
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0592085123062134,
      "learning_rate": 1.7977945884875004e-05,
      "loss": 2.3441,
      "step": 16218
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1322312355041504,
      "learning_rate": 1.797769762993774e-05,
      "loss": 2.4671,
      "step": 16219
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1055777072906494,
      "learning_rate": 1.797744936147611e-05,
      "loss": 2.544,
      "step": 16220
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1923480033874512,
      "learning_rate": 1.797720107949054e-05,
      "loss": 2.3132,
      "step": 16221
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0595849752426147,
      "learning_rate": 1.7976952783981444e-05,
      "loss": 2.4367,
      "step": 16222
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9190593361854553,
      "learning_rate": 1.7976704474949247e-05,
      "loss": 2.3831,
      "step": 16223
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0254358053207397,
      "learning_rate": 1.797645615239437e-05,
      "loss": 2.3666,
      "step": 16224
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0183974504470825,
      "learning_rate": 1.7976207816317234e-05,
      "loss": 2.3785,
      "step": 16225
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9692081809043884,
      "learning_rate": 1.7975959466718257e-05,
      "loss": 2.3918,
      "step": 16226
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0166136026382446,
      "learning_rate": 1.7975711103597864e-05,
      "loss": 2.4978,
      "step": 16227
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.945357084274292,
      "learning_rate": 1.7975462726956476e-05,
      "loss": 2.5052,
      "step": 16228
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1394171714782715,
      "learning_rate": 1.797521433679451e-05,
      "loss": 2.4901,
      "step": 16229
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0622661113739014,
      "learning_rate": 1.7974965933112393e-05,
      "loss": 2.3389,
      "step": 16230
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0892993211746216,
      "learning_rate": 1.7974717515910542e-05,
      "loss": 2.4586,
      "step": 16231
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9492627382278442,
      "learning_rate": 1.7974469085189377e-05,
      "loss": 2.3723,
      "step": 16232
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0680160522460938,
      "learning_rate": 1.7974220640949322e-05,
      "loss": 2.4882,
      "step": 16233
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1371335983276367,
      "learning_rate": 1.7973972183190795e-05,
      "loss": 2.6153,
      "step": 16234
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9316680431365967,
      "learning_rate": 1.7973723711914224e-05,
      "loss": 2.7083,
      "step": 16235
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.046645164489746,
      "learning_rate": 1.7973475227120025e-05,
      "loss": 2.6035,
      "step": 16236
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9725053310394287,
      "learning_rate": 1.7973226728808615e-05,
      "loss": 2.5994,
      "step": 16237
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9560186266899109,
      "learning_rate": 1.7972978216980424e-05,
      "loss": 2.5539,
      "step": 16238
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0257421731948853,
      "learning_rate": 1.7972729691635867e-05,
      "loss": 2.4226,
      "step": 16239
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9990925788879395,
      "learning_rate": 1.797248115277537e-05,
      "loss": 2.3781,
      "step": 16240
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0419455766677856,
      "learning_rate": 1.7972232600399353e-05,
      "loss": 2.5468,
      "step": 16241
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0390866994857788,
      "learning_rate": 1.7971984034508237e-05,
      "loss": 2.4363,
      "step": 16242
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.2044148445129395,
      "learning_rate": 1.7971735455102442e-05,
      "loss": 2.4557,
      "step": 16243
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0717133283615112,
      "learning_rate": 1.7971486862182386e-05,
      "loss": 2.5412,
      "step": 16244
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0318015813827515,
      "learning_rate": 1.79712382557485e-05,
      "loss": 2.6521,
      "step": 16245
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.033818244934082,
      "learning_rate": 1.79709896358012e-05,
      "loss": 2.4037,
      "step": 16246
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.2750701904296875,
      "learning_rate": 1.7970741002340904e-05,
      "loss": 2.4721,
      "step": 16247
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9981615543365479,
      "learning_rate": 1.797049235536804e-05,
      "loss": 2.4807,
      "step": 16248
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.101035714149475,
      "learning_rate": 1.7970243694883028e-05,
      "loss": 2.6115,
      "step": 16249
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0688393115997314,
      "learning_rate": 1.7969995020886283e-05,
      "loss": 2.433,
      "step": 16250
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0172556638717651,
      "learning_rate": 1.7969746333378237e-05,
      "loss": 2.651,
      "step": 16251
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.092968225479126,
      "learning_rate": 1.7969497632359304e-05,
      "loss": 2.6049,
      "step": 16252
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.3377186059951782,
      "learning_rate": 1.7969248917829908e-05,
      "loss": 2.2466,
      "step": 16253
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0850849151611328,
      "learning_rate": 1.796900018979047e-05,
      "loss": 2.5379,
      "step": 16254
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.186514973640442,
      "learning_rate": 1.7968751448241415e-05,
      "loss": 2.4289,
      "step": 16255
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0072391033172607,
      "learning_rate": 1.796850269318316e-05,
      "loss": 2.523,
      "step": 16256
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0355345010757446,
      "learning_rate": 1.7968253924616127e-05,
      "loss": 2.5531,
      "step": 16257
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1376519203186035,
      "learning_rate": 1.7968005142540746e-05,
      "loss": 2.5834,
      "step": 16258
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.2993348836898804,
      "learning_rate": 1.7967756346957425e-05,
      "loss": 2.5977,
      "step": 16259
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1534465551376343,
      "learning_rate": 1.79675075378666e-05,
      "loss": 2.5663,
      "step": 16260
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0306835174560547,
      "learning_rate": 1.796725871526868e-05,
      "loss": 2.4701,
      "step": 16261
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0082948207855225,
      "learning_rate": 1.7967009879164092e-05,
      "loss": 2.6739,
      "step": 16262
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.049932837486267,
      "learning_rate": 1.7966761029553263e-05,
      "loss": 2.3766,
      "step": 16263
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.8619074821472168,
      "learning_rate": 1.796651216643661e-05,
      "loss": 2.3807,
      "step": 16264
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0360794067382812,
      "learning_rate": 1.796626328981455e-05,
      "loss": 2.4511,
      "step": 16265
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0236746072769165,
      "learning_rate": 1.796601439968751e-05,
      "loss": 2.5213,
      "step": 16266
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0587464570999146,
      "learning_rate": 1.796576549605592e-05,
      "loss": 2.4655,
      "step": 16267
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.060841679573059,
      "learning_rate": 1.796551657892019e-05,
      "loss": 2.4795,
      "step": 16268
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0383048057556152,
      "learning_rate": 1.7965267648280744e-05,
      "loss": 2.5526,
      "step": 16269
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0717400312423706,
      "learning_rate": 1.796501870413801e-05,
      "loss": 2.4626,
      "step": 16270
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9663326740264893,
      "learning_rate": 1.79647697464924e-05,
      "loss": 2.444,
      "step": 16271
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9844931960105896,
      "learning_rate": 1.7964520775344346e-05,
      "loss": 2.3925,
      "step": 16272
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.092491626739502,
      "learning_rate": 1.7964271790694266e-05,
      "loss": 2.4465,
      "step": 16273
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9824325442314148,
      "learning_rate": 1.7964022792542582e-05,
      "loss": 2.4733,
      "step": 16274
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0622949600219727,
      "learning_rate": 1.7963773780889715e-05,
      "loss": 2.4698,
      "step": 16275
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.2451863288879395,
      "learning_rate": 1.796352475573609e-05,
      "loss": 2.2798,
      "step": 16276
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.054148554801941,
      "learning_rate": 1.7963275717082128e-05,
      "loss": 2.3947,
      "step": 16277
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9580790400505066,
      "learning_rate": 1.7963026664928248e-05,
      "loss": 2.5328,
      "step": 16278
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0466402769088745,
      "learning_rate": 1.7962777599274882e-05,
      "loss": 2.367,
      "step": 16279
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.2344499826431274,
      "learning_rate": 1.796252852012244e-05,
      "loss": 2.4966,
      "step": 16280
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0938003063201904,
      "learning_rate": 1.7962279427471347e-05,
      "loss": 2.3914,
      "step": 16281
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.009458065032959,
      "learning_rate": 1.7962030321322034e-05,
      "loss": 2.5898,
      "step": 16282
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0637543201446533,
      "learning_rate": 1.7961781201674915e-05,
      "loss": 2.36,
      "step": 16283
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.061208724975586,
      "learning_rate": 1.7961532068530412e-05,
      "loss": 2.6868,
      "step": 16284
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.044577717781067,
      "learning_rate": 1.7961282921888953e-05,
      "loss": 2.1914,
      "step": 16285
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0226131677627563,
      "learning_rate": 1.7961033761750956e-05,
      "loss": 2.625,
      "step": 16286
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.046112060546875,
      "learning_rate": 1.7960784588116844e-05,
      "loss": 2.5181,
      "step": 16287
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.264478325843811,
      "learning_rate": 1.7960535400987038e-05,
      "loss": 2.4388,
      "step": 16288
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0816453695297241,
      "learning_rate": 1.7960286200361964e-05,
      "loss": 2.5562,
      "step": 16289
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1124608516693115,
      "learning_rate": 1.7960036986242046e-05,
      "loss": 2.6979,
      "step": 16290
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0824248790740967,
      "learning_rate": 1.79597877586277e-05,
      "loss": 2.488,
      "step": 16291
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0122995376586914,
      "learning_rate": 1.7959538517519354e-05,
      "loss": 2.3985,
      "step": 16292
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1020922660827637,
      "learning_rate": 1.7959289262917427e-05,
      "loss": 2.5292,
      "step": 16293
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1265757083892822,
      "learning_rate": 1.7959039994822343e-05,
      "loss": 2.5435,
      "step": 16294
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0319617986679077,
      "learning_rate": 1.7958790713234526e-05,
      "loss": 2.4363,
      "step": 16295
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0143004655838013,
      "learning_rate": 1.7958541418154396e-05,
      "loss": 2.4399,
      "step": 16296
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9799516201019287,
      "learning_rate": 1.7958292109582376e-05,
      "loss": 2.441,
      "step": 16297
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.168175458908081,
      "learning_rate": 1.795804278751889e-05,
      "loss": 2.6029,
      "step": 16298
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1123051643371582,
      "learning_rate": 1.795779345196436e-05,
      "loss": 2.6057,
      "step": 16299
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9781900644302368,
      "learning_rate": 1.7957544102919212e-05,
      "loss": 2.3859,
      "step": 16300
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0176353454589844,
      "learning_rate": 1.795729474038386e-05,
      "loss": 2.4576,
      "step": 16301
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0043827295303345,
      "learning_rate": 1.7957045364358735e-05,
      "loss": 2.5196,
      "step": 16302
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0530049800872803,
      "learning_rate": 1.795679597484426e-05,
      "loss": 2.4479,
      "step": 16303
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.211540937423706,
      "learning_rate": 1.795654657184085e-05,
      "loss": 2.5616,
      "step": 16304
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.010830283164978,
      "learning_rate": 1.7956297155348937e-05,
      "loss": 2.4042,
      "step": 16305
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.934439480304718,
      "learning_rate": 1.7956047725368934e-05,
      "loss": 2.3328,
      "step": 16306
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.90666264295578,
      "learning_rate": 1.7955798281901272e-05,
      "loss": 2.3735,
      "step": 16307
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.081856369972229,
      "learning_rate": 1.7955548824946372e-05,
      "loss": 2.4138,
      "step": 16308
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9556505084037781,
      "learning_rate": 1.7955299354504655e-05,
      "loss": 2.4701,
      "step": 16309
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9727667570114136,
      "learning_rate": 1.7955049870576547e-05,
      "loss": 2.4971,
      "step": 16310
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9978852272033691,
      "learning_rate": 1.7954800373162465e-05,
      "loss": 2.4811,
      "step": 16311
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1298367977142334,
      "learning_rate": 1.795455086226284e-05,
      "loss": 2.3359,
      "step": 16312
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.398285150527954,
      "learning_rate": 1.7954301337878086e-05,
      "loss": 2.7295,
      "step": 16313
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9658121466636658,
      "learning_rate": 1.7954051800008633e-05,
      "loss": 2.5301,
      "step": 16314
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0310109853744507,
      "learning_rate": 1.7953802248654907e-05,
      "loss": 2.4485,
      "step": 16315
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0611586570739746,
      "learning_rate": 1.795355268381732e-05,
      "loss": 2.5439,
      "step": 16316
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0259712934494019,
      "learning_rate": 1.79533031054963e-05,
      "loss": 2.4096,
      "step": 16317
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9743700623512268,
      "learning_rate": 1.7953053513692276e-05,
      "loss": 2.7089,
      "step": 16318
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1881886720657349,
      "learning_rate": 1.795280390840566e-05,
      "loss": 2.5094,
      "step": 16319
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0889784097671509,
      "learning_rate": 1.7952554289636888e-05,
      "loss": 2.567,
      "step": 16320
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1065633296966553,
      "learning_rate": 1.7952304657386375e-05,
      "loss": 2.1324,
      "step": 16321
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0293997526168823,
      "learning_rate": 1.7952055011654547e-05,
      "loss": 2.3931,
      "step": 16322
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.089369773864746,
      "learning_rate": 1.7951805352441824e-05,
      "loss": 2.2945,
      "step": 16323
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0476148128509521,
      "learning_rate": 1.795155567974863e-05,
      "loss": 2.5107,
      "step": 16324
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0277879238128662,
      "learning_rate": 1.795130599357539e-05,
      "loss": 2.5702,
      "step": 16325
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9987704157829285,
      "learning_rate": 1.7951056293922528e-05,
      "loss": 2.5378,
      "step": 16326
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9788576364517212,
      "learning_rate": 1.7950806580790465e-05,
      "loss": 2.4575,
      "step": 16327
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0040228366851807,
      "learning_rate": 1.7950556854179625e-05,
      "loss": 2.7839,
      "step": 16328
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0451291799545288,
      "learning_rate": 1.7950307114090434e-05,
      "loss": 2.6793,
      "step": 16329
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9813165068626404,
      "learning_rate": 1.795005736052331e-05,
      "loss": 2.4832,
      "step": 16330
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0100642442703247,
      "learning_rate": 1.7949807593478684e-05,
      "loss": 2.4511,
      "step": 16331
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0608989000320435,
      "learning_rate": 1.7949557812956972e-05,
      "loss": 2.2634,
      "step": 16332
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0489466190338135,
      "learning_rate": 1.79493080189586e-05,
      "loss": 2.3863,
      "step": 16333
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.013771891593933,
      "learning_rate": 1.794905821148399e-05,
      "loss": 2.6118,
      "step": 16334
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0308564901351929,
      "learning_rate": 1.794880839053357e-05,
      "loss": 2.6304,
      "step": 16335
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0293152332305908,
      "learning_rate": 1.7948558556107763e-05,
      "loss": 2.3804,
      "step": 16336
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0984755754470825,
      "learning_rate": 1.794830870820699e-05,
      "loss": 2.3652,
      "step": 16337
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0031847953796387,
      "learning_rate": 1.7948058846831676e-05,
      "loss": 2.3397,
      "step": 16338
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1161233186721802,
      "learning_rate": 1.794780897198224e-05,
      "loss": 2.7192,
      "step": 16339
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0329629182815552,
      "learning_rate": 1.794755908365911e-05,
      "loss": 2.5144,
      "step": 16340
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0016438961029053,
      "learning_rate": 1.7947309181862705e-05,
      "loss": 2.6603,
      "step": 16341
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0002918243408203,
      "learning_rate": 1.794705926659346e-05,
      "loss": 2.5615,
      "step": 16342
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0348711013793945,
      "learning_rate": 1.7946809337851787e-05,
      "loss": 2.6457,
      "step": 16343
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0989211797714233,
      "learning_rate": 1.7946559395638115e-05,
      "loss": 2.446,
      "step": 16344
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.101603627204895,
      "learning_rate": 1.7946309439952866e-05,
      "loss": 2.44,
      "step": 16345
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0998588800430298,
      "learning_rate": 1.7946059470796466e-05,
      "loss": 2.4771,
      "step": 16346
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0589616298675537,
      "learning_rate": 1.7945809488169334e-05,
      "loss": 2.6381,
      "step": 16347
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9687404632568359,
      "learning_rate": 1.79455594920719e-05,
      "loss": 2.4314,
      "step": 16348
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0935035943984985,
      "learning_rate": 1.7945309482504584e-05,
      "loss": 2.3129,
      "step": 16349
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9745925068855286,
      "learning_rate": 1.7945059459467808e-05,
      "loss": 2.2873,
      "step": 16350
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9508561491966248,
      "learning_rate": 1.7944809422962e-05,
      "loss": 2.6889,
      "step": 16351
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0466670989990234,
      "learning_rate": 1.7944559372987583e-05,
      "loss": 2.4031,
      "step": 16352
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9947922229766846,
      "learning_rate": 1.7944309309544977e-05,
      "loss": 2.7236,
      "step": 16353
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0334010124206543,
      "learning_rate": 1.794405923263461e-05,
      "loss": 2.3891,
      "step": 16354
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1108710765838623,
      "learning_rate": 1.794380914225691e-05,
      "loss": 2.3187,
      "step": 16355
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0398653745651245,
      "learning_rate": 1.7943559038412294e-05,
      "loss": 2.4175,
      "step": 16356
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0302468538284302,
      "learning_rate": 1.7943308921101184e-05,
      "loss": 2.6862,
      "step": 16357
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1020480394363403,
      "learning_rate": 1.7943058790324013e-05,
      "loss": 2.4289,
      "step": 16358
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1656806468963623,
      "learning_rate": 1.7942808646081196e-05,
      "loss": 2.2906,
      "step": 16359
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0018974542617798,
      "learning_rate": 1.794255848837316e-05,
      "loss": 2.3579,
      "step": 16360
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0574942827224731,
      "learning_rate": 1.7942308317200335e-05,
      "loss": 2.3114,
      "step": 16361
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.015738606452942,
      "learning_rate": 1.7942058132563137e-05,
      "loss": 2.3604,
      "step": 16362
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9421388506889343,
      "learning_rate": 1.7941807934461998e-05,
      "loss": 2.7163,
      "step": 16363
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0956939458847046,
      "learning_rate": 1.794155772289733e-05,
      "loss": 2.5393,
      "step": 16364
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0892325639724731,
      "learning_rate": 1.794130749786957e-05,
      "loss": 2.4631,
      "step": 16365
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9989500641822815,
      "learning_rate": 1.7941057259379136e-05,
      "loss": 2.1856,
      "step": 16366
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0392041206359863,
      "learning_rate": 1.7940807007426455e-05,
      "loss": 2.1738,
      "step": 16367
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0977728366851807,
      "learning_rate": 1.7940556742011946e-05,
      "loss": 2.6438,
      "step": 16368
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9645183086395264,
      "learning_rate": 1.7940306463136038e-05,
      "loss": 2.5448,
      "step": 16369
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9644246101379395,
      "learning_rate": 1.7940056170799157e-05,
      "loss": 2.4636,
      "step": 16370
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9585068225860596,
      "learning_rate": 1.793980586500172e-05,
      "loss": 2.4801,
      "step": 16371
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9519110321998596,
      "learning_rate": 1.793955554574416e-05,
      "loss": 2.6251,
      "step": 16372
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1351083517074585,
      "learning_rate": 1.7939305213026892e-05,
      "loss": 2.3011,
      "step": 16373
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1250176429748535,
      "learning_rate": 1.7939054866850347e-05,
      "loss": 2.4797,
      "step": 16374
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0690075159072876,
      "learning_rate": 1.793880450721495e-05,
      "loss": 2.6424,
      "step": 16375
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0914068222045898,
      "learning_rate": 1.793855413412112e-05,
      "loss": 2.5203,
      "step": 16376
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9641558527946472,
      "learning_rate": 1.7938303747569286e-05,
      "loss": 2.4665,
      "step": 16377
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9973803758621216,
      "learning_rate": 1.7938053347559873e-05,
      "loss": 2.6054,
      "step": 16378
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.087765097618103,
      "learning_rate": 1.79378029340933e-05,
      "loss": 2.2942,
      "step": 16379
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0978032350540161,
      "learning_rate": 1.7937552507169998e-05,
      "loss": 2.7306,
      "step": 16380
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0756827592849731,
      "learning_rate": 1.793730206679039e-05,
      "loss": 2.4758,
      "step": 16381
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0556511878967285,
      "learning_rate": 1.7937051612954895e-05,
      "loss": 2.6528,
      "step": 16382
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.066401481628418,
      "learning_rate": 1.7936801145663944e-05,
      "loss": 2.3625,
      "step": 16383
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1086009740829468,
      "learning_rate": 1.793655066491796e-05,
      "loss": 2.3396,
      "step": 16384
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9565154910087585,
      "learning_rate": 1.7936300170717366e-05,
      "loss": 2.5384,
      "step": 16385
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0056099891662598,
      "learning_rate": 1.7936049663062588e-05,
      "loss": 2.4423,
      "step": 16386
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.2270790338516235,
      "learning_rate": 1.7935799141954048e-05,
      "loss": 2.5503,
      "step": 16387
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9960904121398926,
      "learning_rate": 1.7935548607392177e-05,
      "loss": 2.455,
      "step": 16388
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0802974700927734,
      "learning_rate": 1.7935298059377395e-05,
      "loss": 2.381,
      "step": 16389
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0371476411819458,
      "learning_rate": 1.7935047497910126e-05,
      "loss": 2.6933,
      "step": 16390
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0564379692077637,
      "learning_rate": 1.79347969229908e-05,
      "loss": 2.5681,
      "step": 16391
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9978496432304382,
      "learning_rate": 1.7934546334619836e-05,
      "loss": 2.5987,
      "step": 16392
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0979968309402466,
      "learning_rate": 1.7934295732797657e-05,
      "loss": 2.3029,
      "step": 16393
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9858158826828003,
      "learning_rate": 1.7934045117524698e-05,
      "loss": 2.086,
      "step": 16394
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1714038848876953,
      "learning_rate": 1.7933794488801373e-05,
      "loss": 2.6717,
      "step": 16395
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1025713682174683,
      "learning_rate": 1.7933543846628114e-05,
      "loss": 2.6436,
      "step": 16396
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9860014915466309,
      "learning_rate": 1.7933293191005344e-05,
      "loss": 2.4804,
      "step": 16397
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.2642537355422974,
      "learning_rate": 1.7933042521933487e-05,
      "loss": 2.5796,
      "step": 16398
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.954614520072937,
      "learning_rate": 1.7932791839412965e-05,
      "loss": 2.2904,
      "step": 16399
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1006180047988892,
      "learning_rate": 1.793254114344421e-05,
      "loss": 2.4129,
      "step": 16400
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0940368175506592,
      "learning_rate": 1.7932290434027645e-05,
      "loss": 2.6812,
      "step": 16401
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9360101819038391,
      "learning_rate": 1.793203971116369e-05,
      "loss": 2.6406,
      "step": 16402
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9582206606864929,
      "learning_rate": 1.7931788974852774e-05,
      "loss": 2.6918,
      "step": 16403
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9371283650398254,
      "learning_rate": 1.7931538225095323e-05,
      "loss": 2.4939,
      "step": 16404
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0229928493499756,
      "learning_rate": 1.793128746189176e-05,
      "loss": 2.3891,
      "step": 16405
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0543862581253052,
      "learning_rate": 1.7931036685242505e-05,
      "loss": 2.4536,
      "step": 16406
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0948727130889893,
      "learning_rate": 1.7930785895147997e-05,
      "loss": 2.7119,
      "step": 16407
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.2239271402359009,
      "learning_rate": 1.793053509160865e-05,
      "loss": 2.7452,
      "step": 16408
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1277343034744263,
      "learning_rate": 1.7930284274624893e-05,
      "loss": 2.5619,
      "step": 16409
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0195153951644897,
      "learning_rate": 1.7930033444197145e-05,
      "loss": 2.3048,
      "step": 16410
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.021040916442871,
      "learning_rate": 1.7929782600325843e-05,
      "loss": 2.6158,
      "step": 16411
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9703651070594788,
      "learning_rate": 1.7929531743011402e-05,
      "loss": 2.2716,
      "step": 16412
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.2017773389816284,
      "learning_rate": 1.792928087225425e-05,
      "loss": 2.533,
      "step": 16413
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0371646881103516,
      "learning_rate": 1.792902998805482e-05,
      "loss": 2.3672,
      "step": 16414
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9859558939933777,
      "learning_rate": 1.7928779090413523e-05,
      "loss": 2.5758,
      "step": 16415
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0609362125396729,
      "learning_rate": 1.7928528179330796e-05,
      "loss": 2.6445,
      "step": 16416
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0458014011383057,
      "learning_rate": 1.792827725480706e-05,
      "loss": 2.3909,
      "step": 16417
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9476436376571655,
      "learning_rate": 1.7928026316842743e-05,
      "loss": 2.4986,
      "step": 16418
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0587539672851562,
      "learning_rate": 1.7927775365438263e-05,
      "loss": 2.4942,
      "step": 16419
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0120646953582764,
      "learning_rate": 1.7927524400594054e-05,
      "loss": 2.5451,
      "step": 16420
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0400935411453247,
      "learning_rate": 1.792727342231054e-05,
      "loss": 2.6574,
      "step": 16421
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.007261872291565,
      "learning_rate": 1.7927022430588138e-05,
      "loss": 2.1683,
      "step": 16422
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1142586469650269,
      "learning_rate": 1.7926771425427285e-05,
      "loss": 2.4338,
      "step": 16423
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0849828720092773,
      "learning_rate": 1.7926520406828403e-05,
      "loss": 2.5023,
      "step": 16424
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.299919605255127,
      "learning_rate": 1.7926269374791913e-05,
      "loss": 2.7772,
      "step": 16425
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.987419605255127,
      "learning_rate": 1.792601832931824e-05,
      "loss": 2.4992,
      "step": 16426
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9179685115814209,
      "learning_rate": 1.7925767270407818e-05,
      "loss": 2.4262,
      "step": 16427
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.948706865310669,
      "learning_rate": 1.792551619806107e-05,
      "loss": 2.286,
      "step": 16428
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0514544248580933,
      "learning_rate": 1.7925265112278415e-05,
      "loss": 2.3246,
      "step": 16429
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9379002451896667,
      "learning_rate": 1.7925014013060286e-05,
      "loss": 2.4082,
      "step": 16430
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.2975736856460571,
      "learning_rate": 1.7924762900407103e-05,
      "loss": 2.3141,
      "step": 16431
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1464923620224,
      "learning_rate": 1.79245117743193e-05,
      "loss": 2.496,
      "step": 16432
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9605399370193481,
      "learning_rate": 1.7924260634797293e-05,
      "loss": 2.3438,
      "step": 16433
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1700950860977173,
      "learning_rate": 1.792400948184151e-05,
      "loss": 2.3487,
      "step": 16434
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1837077140808105,
      "learning_rate": 1.7923758315452384e-05,
      "loss": 2.3322,
      "step": 16435
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.2475941181182861,
      "learning_rate": 1.7923507135630332e-05,
      "loss": 2.3456,
      "step": 16436
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.982149064540863,
      "learning_rate": 1.7923255942375784e-05,
      "loss": 2.4787,
      "step": 16437
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1622304916381836,
      "learning_rate": 1.7923004735689167e-05,
      "loss": 2.4677,
      "step": 16438
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0840504169464111,
      "learning_rate": 1.79227535155709e-05,
      "loss": 2.5282,
      "step": 16439
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.065506100654602,
      "learning_rate": 1.7922502282021424e-05,
      "loss": 2.68,
      "step": 16440
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0444493293762207,
      "learning_rate": 1.7922251035041145e-05,
      "loss": 2.6741,
      "step": 16441
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0708898305892944,
      "learning_rate": 1.79219997746305e-05,
      "loss": 2.4854,
      "step": 16442
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.3060777187347412,
      "learning_rate": 1.792174850078992e-05,
      "loss": 2.5339,
      "step": 16443
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0068011283874512,
      "learning_rate": 1.792149721351982e-05,
      "loss": 2.5399,
      "step": 16444
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9806472659111023,
      "learning_rate": 1.7921245912820634e-05,
      "loss": 2.2322,
      "step": 16445
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0906537771224976,
      "learning_rate": 1.7920994598692784e-05,
      "loss": 2.56,
      "step": 16446
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.2107943296432495,
      "learning_rate": 1.7920743271136693e-05,
      "loss": 2.4635,
      "step": 16447
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0235540866851807,
      "learning_rate": 1.7920491930152796e-05,
      "loss": 2.5216,
      "step": 16448
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.045137882232666,
      "learning_rate": 1.792024057574151e-05,
      "loss": 2.6119,
      "step": 16449
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0025441646575928,
      "learning_rate": 1.791998920790327e-05,
      "loss": 2.6922,
      "step": 16450
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0000262260437012,
      "learning_rate": 1.791973782663849e-05,
      "loss": 2.5255,
      "step": 16451
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0946763753890991,
      "learning_rate": 1.7919486431947608e-05,
      "loss": 2.4471,
      "step": 16452
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9796499013900757,
      "learning_rate": 1.7919235023831043e-05,
      "loss": 2.4043,
      "step": 16453
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9776232838630676,
      "learning_rate": 1.7918983602289227e-05,
      "loss": 2.4239,
      "step": 16454
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0647047758102417,
      "learning_rate": 1.7918732167322583e-05,
      "loss": 2.5062,
      "step": 16455
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0266433954238892,
      "learning_rate": 1.7918480718931533e-05,
      "loss": 2.6205,
      "step": 16456
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9989307522773743,
      "learning_rate": 1.7918229257116512e-05,
      "loss": 2.4418,
      "step": 16457
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.89822918176651,
      "learning_rate": 1.7917977781877944e-05,
      "loss": 2.3503,
      "step": 16458
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0093802213668823,
      "learning_rate": 1.7917726293216247e-05,
      "loss": 2.4548,
      "step": 16459
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0064276456832886,
      "learning_rate": 1.7917474791131857e-05,
      "loss": 2.5465,
      "step": 16460
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9708204865455627,
      "learning_rate": 1.7917223275625197e-05,
      "loss": 2.5374,
      "step": 16461
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9964699149131775,
      "learning_rate": 1.791697174669669e-05,
      "loss": 2.3026,
      "step": 16462
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.2051717042922974,
      "learning_rate": 1.791672020434677e-05,
      "loss": 2.5067,
      "step": 16463
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9678173661231995,
      "learning_rate": 1.7916468648575855e-05,
      "loss": 2.424,
      "step": 16464
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0115516185760498,
      "learning_rate": 1.7916217079384378e-05,
      "loss": 2.3851,
      "step": 16465
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.2387728691101074,
      "learning_rate": 1.7915965496772762e-05,
      "loss": 2.5963,
      "step": 16466
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0653047561645508,
      "learning_rate": 1.7915713900741437e-05,
      "loss": 2.3751,
      "step": 16467
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9847724437713623,
      "learning_rate": 1.7915462291290823e-05,
      "loss": 2.3728,
      "step": 16468
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0130655765533447,
      "learning_rate": 1.791521066842135e-05,
      "loss": 2.7548,
      "step": 16469
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.061094880104065,
      "learning_rate": 1.791495903213345e-05,
      "loss": 2.4051,
      "step": 16470
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.5675673484802246,
      "learning_rate": 1.7914707382427545e-05,
      "loss": 2.3789,
      "step": 16471
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.109907627105713,
      "learning_rate": 1.7914455719304058e-05,
      "loss": 2.4451,
      "step": 16472
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1678950786590576,
      "learning_rate": 1.791420404276342e-05,
      "loss": 2.3905,
      "step": 16473
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.121100664138794,
      "learning_rate": 1.7913952352806055e-05,
      "loss": 2.5,
      "step": 16474
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0062346458435059,
      "learning_rate": 1.7913700649432396e-05,
      "loss": 2.5657,
      "step": 16475
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0310736894607544,
      "learning_rate": 1.791344893264286e-05,
      "loss": 2.5404,
      "step": 16476
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0177817344665527,
      "learning_rate": 1.791319720243788e-05,
      "loss": 2.4485,
      "step": 16477
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1397724151611328,
      "learning_rate": 1.7912945458817885e-05,
      "loss": 2.3876,
      "step": 16478
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9841406941413879,
      "learning_rate": 1.7912693701783297e-05,
      "loss": 2.5381,
      "step": 16479
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1828526258468628,
      "learning_rate": 1.7912441931334542e-05,
      "loss": 2.5937,
      "step": 16480
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9625906348228455,
      "learning_rate": 1.7912190147472048e-05,
      "loss": 2.2602,
      "step": 16481
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9093290567398071,
      "learning_rate": 1.7911938350196246e-05,
      "loss": 2.5308,
      "step": 16482
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9895135164260864,
      "learning_rate": 1.7911686539507558e-05,
      "loss": 2.4772,
      "step": 16483
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0033116340637207,
      "learning_rate": 1.791143471540641e-05,
      "loss": 2.3688,
      "step": 16484
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0618813037872314,
      "learning_rate": 1.7911182877893237e-05,
      "loss": 2.3933,
      "step": 16485
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1107592582702637,
      "learning_rate": 1.7910931026968458e-05,
      "loss": 2.4162,
      "step": 16486
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1376993656158447,
      "learning_rate": 1.79106791626325e-05,
      "loss": 2.4188,
      "step": 16487
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9454181790351868,
      "learning_rate": 1.7910427284885797e-05,
      "loss": 2.4511,
      "step": 16488
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0390195846557617,
      "learning_rate": 1.791017539372877e-05,
      "loss": 2.2962,
      "step": 16489
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.2110280990600586,
      "learning_rate": 1.7909923489161845e-05,
      "loss": 2.5439,
      "step": 16490
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.043379545211792,
      "learning_rate": 1.7909671571185453e-05,
      "loss": 2.4274,
      "step": 16491
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1481492519378662,
      "learning_rate": 1.7909419639800018e-05,
      "loss": 2.4713,
      "step": 16492
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9926930069923401,
      "learning_rate": 1.790916769500597e-05,
      "loss": 2.5694,
      "step": 16493
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9452211260795593,
      "learning_rate": 1.7908915736803734e-05,
      "loss": 2.5024,
      "step": 16494
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1910839080810547,
      "learning_rate": 1.7908663765193738e-05,
      "loss": 2.3995,
      "step": 16495
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.146598219871521,
      "learning_rate": 1.7908411780176406e-05,
      "loss": 2.566,
      "step": 16496
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0359231233596802,
      "learning_rate": 1.7908159781752172e-05,
      "loss": 2.4548,
      "step": 16497
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9854915142059326,
      "learning_rate": 1.7907907769921457e-05,
      "loss": 2.3967,
      "step": 16498
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1185179948806763,
      "learning_rate": 1.7907655744684694e-05,
      "loss": 2.6341,
      "step": 16499
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0277012586593628,
      "learning_rate": 1.7907403706042303e-05,
      "loss": 2.4552,
      "step": 16500
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1475199460983276,
      "learning_rate": 1.7907151653994717e-05,
      "loss": 2.3912,
      "step": 16501
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9996597766876221,
      "learning_rate": 1.790689958854236e-05,
      "loss": 2.5161,
      "step": 16502
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9483014941215515,
      "learning_rate": 1.790664750968566e-05,
      "loss": 2.4614,
      "step": 16503
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.970028817653656,
      "learning_rate": 1.7906395417425048e-05,
      "loss": 2.5419,
      "step": 16504
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.095757007598877,
      "learning_rate": 1.7906143311760948e-05,
      "loss": 2.0877,
      "step": 16505
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.4518654346466064,
      "learning_rate": 1.7905891192693788e-05,
      "loss": 2.5859,
      "step": 16506
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.077605962753296,
      "learning_rate": 1.7905639060223995e-05,
      "loss": 2.3463,
      "step": 16507
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9484046101570129,
      "learning_rate": 1.7905386914351997e-05,
      "loss": 2.6013,
      "step": 16508
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9596984386444092,
      "learning_rate": 1.790513475507822e-05,
      "loss": 2.1379,
      "step": 16509
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9790652990341187,
      "learning_rate": 1.7904882582403093e-05,
      "loss": 2.5128,
      "step": 16510
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0929735898971558,
      "learning_rate": 1.790463039632704e-05,
      "loss": 2.5672,
      "step": 16511
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9679946303367615,
      "learning_rate": 1.7904378196850495e-05,
      "loss": 2.8442,
      "step": 16512
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.000447392463684,
      "learning_rate": 1.790412598397388e-05,
      "loss": 2.6292,
      "step": 16513
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0590603351593018,
      "learning_rate": 1.790387375769763e-05,
      "loss": 2.5087,
      "step": 16514
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9867202639579773,
      "learning_rate": 1.7903621518022162e-05,
      "loss": 2.4434,
      "step": 16515
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1002956628799438,
      "learning_rate": 1.7903369264947912e-05,
      "loss": 2.3833,
      "step": 16516
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0638868808746338,
      "learning_rate": 1.79031169984753e-05,
      "loss": 2.3196,
      "step": 16517
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0805708169937134,
      "learning_rate": 1.790286471860476e-05,
      "loss": 2.5034,
      "step": 16518
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0750936269760132,
      "learning_rate": 1.790261242533672e-05,
      "loss": 2.4469,
      "step": 16519
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0312929153442383,
      "learning_rate": 1.7902360118671607e-05,
      "loss": 2.5031,
      "step": 16520
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0106086730957031,
      "learning_rate": 1.7902107798609844e-05,
      "loss": 2.5159,
      "step": 16521
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9822081923484802,
      "learning_rate": 1.7901855465151866e-05,
      "loss": 2.4553,
      "step": 16522
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0899549722671509,
      "learning_rate": 1.7901603118298095e-05,
      "loss": 2.5812,
      "step": 16523
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9624750018119812,
      "learning_rate": 1.7901350758048956e-05,
      "loss": 2.4777,
      "step": 16524
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.031894326210022,
      "learning_rate": 1.7901098384404888e-05,
      "loss": 2.5808,
      "step": 16525
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9818031191825867,
      "learning_rate": 1.790084599736631e-05,
      "loss": 2.4345,
      "step": 16526
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1367003917694092,
      "learning_rate": 1.7900593596933652e-05,
      "loss": 2.492,
      "step": 16527
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0938551425933838,
      "learning_rate": 1.790034118310734e-05,
      "loss": 2.3766,
      "step": 16528
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0405168533325195,
      "learning_rate": 1.7900088755887806e-05,
      "loss": 2.5173,
      "step": 16529
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9590237140655518,
      "learning_rate": 1.7899836315275478e-05,
      "loss": 2.3581,
      "step": 16530
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.150303602218628,
      "learning_rate": 1.789958386127078e-05,
      "loss": 2.6988,
      "step": 16531
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.030885934829712,
      "learning_rate": 1.7899331393874137e-05,
      "loss": 2.6448,
      "step": 16532
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9594902396202087,
      "learning_rate": 1.789907891308599e-05,
      "loss": 2.5958,
      "step": 16533
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9558054208755493,
      "learning_rate": 1.7898826418906754e-05,
      "loss": 2.4612,
      "step": 16534
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9927381277084351,
      "learning_rate": 1.7898573911336864e-05,
      "loss": 2.6113,
      "step": 16535
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0523227453231812,
      "learning_rate": 1.7898321390376745e-05,
      "loss": 2.395,
      "step": 16536
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0737417936325073,
      "learning_rate": 1.7898068856026824e-05,
      "loss": 2.3696,
      "step": 16537
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0373497009277344,
      "learning_rate": 1.7897816308287535e-05,
      "loss": 2.4641,
      "step": 16538
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.029044270515442,
      "learning_rate": 1.78975637471593e-05,
      "loss": 2.7343,
      "step": 16539
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0366084575653076,
      "learning_rate": 1.7897311172642552e-05,
      "loss": 2.4652,
      "step": 16540
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9035086631774902,
      "learning_rate": 1.7897058584737713e-05,
      "loss": 2.4898,
      "step": 16541
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0597079992294312,
      "learning_rate": 1.7896805983445216e-05,
      "loss": 2.5646,
      "step": 16542
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9758129119873047,
      "learning_rate": 1.7896553368765493e-05,
      "loss": 2.5161,
      "step": 16543
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.2981815338134766,
      "learning_rate": 1.7896300740698962e-05,
      "loss": 2.513,
      "step": 16544
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0544064044952393,
      "learning_rate": 1.789604809924606e-05,
      "loss": 2.3527,
      "step": 16545
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0763436555862427,
      "learning_rate": 1.789579544440721e-05,
      "loss": 2.3801,
      "step": 16546
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0731385946273804,
      "learning_rate": 1.789554277618284e-05,
      "loss": 2.5198,
      "step": 16547
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9699040651321411,
      "learning_rate": 1.789529009457338e-05,
      "loss": 2.4017,
      "step": 16548
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9748300909996033,
      "learning_rate": 1.7895037399579263e-05,
      "loss": 2.4837,
      "step": 16549
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0213953256607056,
      "learning_rate": 1.7894784691200914e-05,
      "loss": 2.4067,
      "step": 16550
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0484814643859863,
      "learning_rate": 1.789453196943876e-05,
      "loss": 2.487,
      "step": 16551
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1203378438949585,
      "learning_rate": 1.7894279234293228e-05,
      "loss": 2.3642,
      "step": 16552
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9736982583999634,
      "learning_rate": 1.7894026485764748e-05,
      "loss": 2.6243,
      "step": 16553
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.104252576828003,
      "learning_rate": 1.789377372385375e-05,
      "loss": 2.4311,
      "step": 16554
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0284020900726318,
      "learning_rate": 1.7893520948560663e-05,
      "loss": 2.3814,
      "step": 16555
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.003273367881775,
      "learning_rate": 1.7893268159885915e-05,
      "loss": 2.5284,
      "step": 16556
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9637323617935181,
      "learning_rate": 1.7893015357829933e-05,
      "loss": 2.6131,
      "step": 16557
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0996137857437134,
      "learning_rate": 1.7892762542393145e-05,
      "loss": 2.4694,
      "step": 16558
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.8575043082237244,
      "learning_rate": 1.789250971357598e-05,
      "loss": 2.3561,
      "step": 16559
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9532966017723083,
      "learning_rate": 1.789225687137887e-05,
      "loss": 2.202,
      "step": 16560
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0777379274368286,
      "learning_rate": 1.789200401580224e-05,
      "loss": 2.4326,
      "step": 16561
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9193996787071228,
      "learning_rate": 1.789175114684652e-05,
      "loss": 2.3441,
      "step": 16562
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9127923846244812,
      "learning_rate": 1.7891498264512137e-05,
      "loss": 2.3999,
      "step": 16563
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0738005638122559,
      "learning_rate": 1.7891245368799523e-05,
      "loss": 2.7042,
      "step": 16564
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0980331897735596,
      "learning_rate": 1.7890992459709104e-05,
      "loss": 2.6751,
      "step": 16565
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.992872416973114,
      "learning_rate": 1.789073953724131e-05,
      "loss": 2.5317,
      "step": 16566
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9415580630302429,
      "learning_rate": 1.789048660139657e-05,
      "loss": 2.5144,
      "step": 16567
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0597329139709473,
      "learning_rate": 1.7890233652175312e-05,
      "loss": 2.2396,
      "step": 16568
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0460654497146606,
      "learning_rate": 1.7889980689577964e-05,
      "loss": 2.4466,
      "step": 16569
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9745272397994995,
      "learning_rate": 1.7889727713604953e-05,
      "loss": 2.5349,
      "step": 16570
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9808167219161987,
      "learning_rate": 1.7889474724256715e-05,
      "loss": 2.5069,
      "step": 16571
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.2890805006027222,
      "learning_rate": 1.7889221721533675e-05,
      "loss": 2.4048,
      "step": 16572
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1399474143981934,
      "learning_rate": 1.788896870543626e-05,
      "loss": 2.2984,
      "step": 16573
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.520451545715332,
      "learning_rate": 1.7888715675964897e-05,
      "loss": 2.6074,
      "step": 16574
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0469375848770142,
      "learning_rate": 1.7888462633120024e-05,
      "loss": 2.7518,
      "step": 16575
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1339104175567627,
      "learning_rate": 1.7888209576902062e-05,
      "loss": 2.4632,
      "step": 16576
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0602470636367798,
      "learning_rate": 1.7887956507311443e-05,
      "loss": 2.3168,
      "step": 16577
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1035168170928955,
      "learning_rate": 1.7887703424348592e-05,
      "loss": 2.4595,
      "step": 16578
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9498723149299622,
      "learning_rate": 1.7887450328013947e-05,
      "loss": 2.5549,
      "step": 16579
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1480551958084106,
      "learning_rate": 1.7887197218307928e-05,
      "loss": 2.4495,
      "step": 16580
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1685376167297363,
      "learning_rate": 1.788694409523097e-05,
      "loss": 2.3707,
      "step": 16581
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1830743551254272,
      "learning_rate": 1.7886690958783497e-05,
      "loss": 2.3474,
      "step": 16582
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9776126146316528,
      "learning_rate": 1.788643780896594e-05,
      "loss": 2.5347,
      "step": 16583
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0223397016525269,
      "learning_rate": 1.7886184645778733e-05,
      "loss": 2.5895,
      "step": 16584
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.2569390535354614,
      "learning_rate": 1.7885931469222296e-05,
      "loss": 2.4494,
      "step": 16585
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.2037092447280884,
      "learning_rate": 1.788567827929707e-05,
      "loss": 2.4116,
      "step": 16586
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.989867627620697,
      "learning_rate": 1.7885425076003472e-05,
      "loss": 2.5329,
      "step": 16587
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0497229099273682,
      "learning_rate": 1.7885171859341942e-05,
      "loss": 2.4265,
      "step": 16588
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9931308627128601,
      "learning_rate": 1.7884918629312898e-05,
      "loss": 2.2585,
      "step": 16589
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9743249416351318,
      "learning_rate": 1.788466538591678e-05,
      "loss": 2.4485,
      "step": 16590
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.295374870300293,
      "learning_rate": 1.788441212915401e-05,
      "loss": 2.6345,
      "step": 16591
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9878767132759094,
      "learning_rate": 1.7884158859025022e-05,
      "loss": 2.3431,
      "step": 16592
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9675965309143066,
      "learning_rate": 1.788390557553024e-05,
      "loss": 2.3909,
      "step": 16593
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.20173978805542,
      "learning_rate": 1.78836522786701e-05,
      "loss": 2.3994,
      "step": 16594
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9923719763755798,
      "learning_rate": 1.788339896844503e-05,
      "loss": 2.6737,
      "step": 16595
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.2100285291671753,
      "learning_rate": 1.7883145644855454e-05,
      "loss": 2.5772,
      "step": 16596
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1374412775039673,
      "learning_rate": 1.788289230790181e-05,
      "loss": 2.6126,
      "step": 16597
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0062665939331055,
      "learning_rate": 1.7882638957584514e-05,
      "loss": 2.5413,
      "step": 16598
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9628356099128723,
      "learning_rate": 1.788238559390401e-05,
      "loss": 2.5018,
      "step": 16599
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0099207162857056,
      "learning_rate": 1.7882132216860723e-05,
      "loss": 2.3983,
      "step": 16600
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0052982568740845,
      "learning_rate": 1.7881878826455077e-05,
      "loss": 2.4483,
      "step": 16601
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.093055009841919,
      "learning_rate": 1.7881625422687506e-05,
      "loss": 2.2237,
      "step": 16602
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0909606218338013,
      "learning_rate": 1.788137200555844e-05,
      "loss": 2.4321,
      "step": 16603
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0526591539382935,
      "learning_rate": 1.788111857506831e-05,
      "loss": 2.3189,
      "step": 16604
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1083601713180542,
      "learning_rate": 1.7880865131217544e-05,
      "loss": 2.5252,
      "step": 16605
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0433356761932373,
      "learning_rate": 1.7880611674006566e-05,
      "loss": 2.3668,
      "step": 16606
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1109347343444824,
      "learning_rate": 1.7880358203435814e-05,
      "loss": 2.3147,
      "step": 16607
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.124657392501831,
      "learning_rate": 1.7880104719505716e-05,
      "loss": 2.4981,
      "step": 16608
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.027361512184143,
      "learning_rate": 1.78798512222167e-05,
      "loss": 2.2287,
      "step": 16609
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0161991119384766,
      "learning_rate": 1.7879597711569197e-05,
      "loss": 2.4678,
      "step": 16610
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0980236530303955,
      "learning_rate": 1.7879344187563632e-05,
      "loss": 2.5989,
      "step": 16611
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9512976408004761,
      "learning_rate": 1.7879090650200443e-05,
      "loss": 2.557,
      "step": 16612
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0982624292373657,
      "learning_rate": 1.787883709948005e-05,
      "loss": 2.5208,
      "step": 16613
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0856651067733765,
      "learning_rate": 1.787858353540289e-05,
      "loss": 2.5305,
      "step": 16614
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0929301977157593,
      "learning_rate": 1.7878329957969395e-05,
      "loss": 2.614,
      "step": 16615
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1107275485992432,
      "learning_rate": 1.787807636717999e-05,
      "loss": 2.5522,
      "step": 16616
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.04756498336792,
      "learning_rate": 1.78778227630351e-05,
      "loss": 2.4959,
      "step": 16617
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.8999400734901428,
      "learning_rate": 1.7877569145535166e-05,
      "loss": 2.4916,
      "step": 16618
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1138776540756226,
      "learning_rate": 1.787731551468061e-05,
      "loss": 2.3197,
      "step": 16619
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.116763949394226,
      "learning_rate": 1.787706187047187e-05,
      "loss": 2.5414,
      "step": 16620
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.960970401763916,
      "learning_rate": 1.7876808212909368e-05,
      "loss": 2.5002,
      "step": 16621
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0649601221084595,
      "learning_rate": 1.7876554541993535e-05,
      "loss": 2.4796,
      "step": 16622
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0403929948806763,
      "learning_rate": 1.78763008577248e-05,
      "loss": 2.5531,
      "step": 16623
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9617592692375183,
      "learning_rate": 1.78760471601036e-05,
      "loss": 2.4845,
      "step": 16624
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0499277114868164,
      "learning_rate": 1.7875793449130364e-05,
      "loss": 2.5793,
      "step": 16625
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.052076816558838,
      "learning_rate": 1.7875539724805516e-05,
      "loss": 2.7313,
      "step": 16626
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0169519186019897,
      "learning_rate": 1.7875285987129487e-05,
      "loss": 2.3889,
      "step": 16627
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9619851112365723,
      "learning_rate": 1.7875032236102714e-05,
      "loss": 2.442,
      "step": 16628
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0564236640930176,
      "learning_rate": 1.7874778471725618e-05,
      "loss": 2.4432,
      "step": 16629
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0405877828598022,
      "learning_rate": 1.7874524693998636e-05,
      "loss": 2.3414,
      "step": 16630
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.007058024406433,
      "learning_rate": 1.7874270902922194e-05,
      "loss": 2.6201,
      "step": 16631
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0517984628677368,
      "learning_rate": 1.7874017098496727e-05,
      "loss": 2.2571,
      "step": 16632
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.029342770576477,
      "learning_rate": 1.787376328072266e-05,
      "loss": 2.4588,
      "step": 16633
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0551693439483643,
      "learning_rate": 1.787350944960043e-05,
      "loss": 2.3821,
      "step": 16634
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9755502343177795,
      "learning_rate": 1.787325560513046e-05,
      "loss": 2.5012,
      "step": 16635
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0781688690185547,
      "learning_rate": 1.787300174731318e-05,
      "loss": 2.4898,
      "step": 16636
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9621635675430298,
      "learning_rate": 1.7872747876149028e-05,
      "loss": 2.4206,
      "step": 16637
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.052852749824524,
      "learning_rate": 1.7872493991638432e-05,
      "loss": 2.5395,
      "step": 16638
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0801613330841064,
      "learning_rate": 1.7872240093781815e-05,
      "loss": 2.5015,
      "step": 16639
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1431514024734497,
      "learning_rate": 1.7871986182579615e-05,
      "loss": 2.7027,
      "step": 16640
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1961052417755127,
      "learning_rate": 1.787173225803226e-05,
      "loss": 2.2788,
      "step": 16641
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.918201744556427,
      "learning_rate": 1.7871478320140185e-05,
      "loss": 2.6299,
      "step": 16642
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0107349157333374,
      "learning_rate": 1.7871224368903816e-05,
      "loss": 2.4962,
      "step": 16643
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1683402061462402,
      "learning_rate": 1.7870970404323578e-05,
      "loss": 2.4777,
      "step": 16644
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9587785005569458,
      "learning_rate": 1.787071642639991e-05,
      "loss": 2.2505,
      "step": 16645
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0059596300125122,
      "learning_rate": 1.7870462435133243e-05,
      "loss": 2.5046,
      "step": 16646
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.12104070186615,
      "learning_rate": 1.7870208430524e-05,
      "loss": 2.5513,
      "step": 16647
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0394275188446045,
      "learning_rate": 1.7869954412572622e-05,
      "loss": 2.4273,
      "step": 16648
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.058518648147583,
      "learning_rate": 1.7869700381279527e-05,
      "loss": 2.4861,
      "step": 16649
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0093820095062256,
      "learning_rate": 1.786944633664516e-05,
      "loss": 2.4317,
      "step": 16650
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0492981672286987,
      "learning_rate": 1.7869192278669936e-05,
      "loss": 2.5464,
      "step": 16651
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0835227966308594,
      "learning_rate": 1.78689382073543e-05,
      "loss": 2.5921,
      "step": 16652
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9793615937232971,
      "learning_rate": 1.7868684122698675e-05,
      "loss": 2.7161,
      "step": 16653
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0525671243667603,
      "learning_rate": 1.786843002470349e-05,
      "loss": 2.5599,
      "step": 16654
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1751176118850708,
      "learning_rate": 1.7868175913369184e-05,
      "loss": 2.3665,
      "step": 16655
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1633433103561401,
      "learning_rate": 1.7867921788696182e-05,
      "loss": 2.2835,
      "step": 16656
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.28425931930542,
      "learning_rate": 1.786766765068491e-05,
      "loss": 2.422,
      "step": 16657
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9943771362304688,
      "learning_rate": 1.786741349933581e-05,
      "loss": 2.2208,
      "step": 16658
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1548701524734497,
      "learning_rate": 1.786715933464931e-05,
      "loss": 2.5001,
      "step": 16659
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1652474403381348,
      "learning_rate": 1.786690515662583e-05,
      "loss": 2.444,
      "step": 16660
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0934481620788574,
      "learning_rate": 1.7866650965265817e-05,
      "loss": 2.3397,
      "step": 16661
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.131771206855774,
      "learning_rate": 1.786639676056969e-05,
      "loss": 2.4896,
      "step": 16662
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0228391885757446,
      "learning_rate": 1.7866142542537884e-05,
      "loss": 2.3004,
      "step": 16663
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.079013705253601,
      "learning_rate": 1.786588831117083e-05,
      "loss": 2.3944,
      "step": 16664
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9371861219406128,
      "learning_rate": 1.786563406646896e-05,
      "loss": 2.4297,
      "step": 16665
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9973390102386475,
      "learning_rate": 1.7865379808432703e-05,
      "loss": 2.3603,
      "step": 16666
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0458420515060425,
      "learning_rate": 1.786512553706249e-05,
      "loss": 2.5889,
      "step": 16667
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9783229827880859,
      "learning_rate": 1.786487125235876e-05,
      "loss": 2.5118,
      "step": 16668
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.4485317468643188,
      "learning_rate": 1.786461695432193e-05,
      "loss": 2.2354,
      "step": 16669
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.150823950767517,
      "learning_rate": 1.786436264295244e-05,
      "loss": 2.4099,
      "step": 16670
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0290029048919678,
      "learning_rate": 1.7864108318250718e-05,
      "loss": 2.6016,
      "step": 16671
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9932451248168945,
      "learning_rate": 1.7863853980217198e-05,
      "loss": 2.6259,
      "step": 16672
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0159716606140137,
      "learning_rate": 1.786359962885231e-05,
      "loss": 2.5967,
      "step": 16673
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9443920254707336,
      "learning_rate": 1.7863345264156486e-05,
      "loss": 2.5688,
      "step": 16674
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1002839803695679,
      "learning_rate": 1.7863090886130153e-05,
      "loss": 2.4833,
      "step": 16675
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1883609294891357,
      "learning_rate": 1.7862836494773747e-05,
      "loss": 2.4485,
      "step": 16676
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0419808626174927,
      "learning_rate": 1.78625820900877e-05,
      "loss": 2.5605,
      "step": 16677
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.339327335357666,
      "learning_rate": 1.7862327672072438e-05,
      "loss": 2.395,
      "step": 16678
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1362407207489014,
      "learning_rate": 1.7862073240728396e-05,
      "loss": 2.3075,
      "step": 16679
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.989420473575592,
      "learning_rate": 1.7861818796056006e-05,
      "loss": 2.5243,
      "step": 16680
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0644546747207642,
      "learning_rate": 1.7861564338055694e-05,
      "loss": 2.3273,
      "step": 16681
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.979661226272583,
      "learning_rate": 1.78613098667279e-05,
      "loss": 2.3563,
      "step": 16682
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0508040189743042,
      "learning_rate": 1.7861055382073047e-05,
      "loss": 2.5604,
      "step": 16683
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0183436870574951,
      "learning_rate": 1.786080088409157e-05,
      "loss": 2.5463,
      "step": 16684
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9651680588722229,
      "learning_rate": 1.78605463727839e-05,
      "loss": 2.6212,
      "step": 16685
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1505334377288818,
      "learning_rate": 1.7860291848150476e-05,
      "loss": 2.5139,
      "step": 16686
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9538118839263916,
      "learning_rate": 1.7860037310191715e-05,
      "loss": 2.3606,
      "step": 16687
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.2090270519256592,
      "learning_rate": 1.7859782758908058e-05,
      "loss": 2.6014,
      "step": 16688
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9442409873008728,
      "learning_rate": 1.7859528194299936e-05,
      "loss": 2.521,
      "step": 16689
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.2841932773590088,
      "learning_rate": 1.7859273616367776e-05,
      "loss": 2.448,
      "step": 16690
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0768249034881592,
      "learning_rate": 1.7859019025112018e-05,
      "loss": 2.6422,
      "step": 16691
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.18337881565094,
      "learning_rate": 1.7858764420533082e-05,
      "loss": 2.4172,
      "step": 16692
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0407911539077759,
      "learning_rate": 1.785850980263141e-05,
      "loss": 2.5278,
      "step": 16693
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9977492690086365,
      "learning_rate": 1.785825517140743e-05,
      "loss": 2.4195,
      "step": 16694
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0769504308700562,
      "learning_rate": 1.785800052686157e-05,
      "loss": 2.5275,
      "step": 16695
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0935403108596802,
      "learning_rate": 1.7857745868994265e-05,
      "loss": 2.4179,
      "step": 16696
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.155826449394226,
      "learning_rate": 1.785749119780595e-05,
      "loss": 2.5674,
      "step": 16697
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9966188073158264,
      "learning_rate": 1.785723651329705e-05,
      "loss": 2.4978,
      "step": 16698
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9730563759803772,
      "learning_rate": 1.7856981815468004e-05,
      "loss": 2.4844,
      "step": 16699
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0355232954025269,
      "learning_rate": 1.7856727104319235e-05,
      "loss": 2.3657,
      "step": 16700
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.008022427558899,
      "learning_rate": 1.7856472379851184e-05,
      "loss": 2.6318,
      "step": 16701
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.043631672859192,
      "learning_rate": 1.7856217642064276e-05,
      "loss": 2.275,
      "step": 16702
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0506254434585571,
      "learning_rate": 1.7855962890958943e-05,
      "loss": 2.6072,
      "step": 16703
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0936565399169922,
      "learning_rate": 1.7855708126535627e-05,
      "loss": 2.4978,
      "step": 16704
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0080608129501343,
      "learning_rate": 1.7855453348794744e-05,
      "loss": 2.4408,
      "step": 16705
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.025455117225647,
      "learning_rate": 1.7855198557736738e-05,
      "loss": 2.557,
      "step": 16706
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.898955225944519,
      "learning_rate": 1.785494375336204e-05,
      "loss": 2.487,
      "step": 16707
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0324580669403076,
      "learning_rate": 1.7854688935671073e-05,
      "loss": 2.493,
      "step": 16708
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.977179229259491,
      "learning_rate": 1.785443410466428e-05,
      "loss": 2.3335,
      "step": 16709
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9766834378242493,
      "learning_rate": 1.7854179260342083e-05,
      "loss": 2.2639,
      "step": 16710
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.990411102771759,
      "learning_rate": 1.785392440270492e-05,
      "loss": 2.3605,
      "step": 16711
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0347732305526733,
      "learning_rate": 1.7853669531753227e-05,
      "loss": 2.5702,
      "step": 16712
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.18936026096344,
      "learning_rate": 1.7853414647487427e-05,
      "loss": 2.2404,
      "step": 16713
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.016776204109192,
      "learning_rate": 1.7853159749907957e-05,
      "loss": 2.4932,
      "step": 16714
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0006508827209473,
      "learning_rate": 1.7852904839015246e-05,
      "loss": 2.5403,
      "step": 16715
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.36675226688385,
      "learning_rate": 1.7852649914809732e-05,
      "loss": 2.3238,
      "step": 16716
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.8908584713935852,
      "learning_rate": 1.785239497729184e-05,
      "loss": 2.4911,
      "step": 16717
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0218735933303833,
      "learning_rate": 1.785214002646201e-05,
      "loss": 2.4871,
      "step": 16718
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9523122310638428,
      "learning_rate": 1.785188506232067e-05,
      "loss": 2.5262,
      "step": 16719
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0779998302459717,
      "learning_rate": 1.785163008486825e-05,
      "loss": 2.6041,
      "step": 16720
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9669451713562012,
      "learning_rate": 1.7851375094105182e-05,
      "loss": 2.4522,
      "step": 16721
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0215147733688354,
      "learning_rate": 1.7851120090031905e-05,
      "loss": 2.6803,
      "step": 16722
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9695765972137451,
      "learning_rate": 1.7850865072648847e-05,
      "loss": 2.7257,
      "step": 16723
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9923195838928223,
      "learning_rate": 1.7850610041956438e-05,
      "loss": 2.6257,
      "step": 16724
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0565787553787231,
      "learning_rate": 1.7850354997955115e-05,
      "loss": 2.5016,
      "step": 16725
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9369799494743347,
      "learning_rate": 1.7850099940645305e-05,
      "loss": 2.5584,
      "step": 16726
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1019397974014282,
      "learning_rate": 1.7849844870027446e-05,
      "loss": 2.4595,
      "step": 16727
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1563613414764404,
      "learning_rate": 1.784958978610197e-05,
      "loss": 2.432,
      "step": 16728
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0820369720458984,
      "learning_rate": 1.7849334688869306e-05,
      "loss": 2.4187,
      "step": 16729
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0354856252670288,
      "learning_rate": 1.7849079578329886e-05,
      "loss": 2.5066,
      "step": 16730
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.020418643951416,
      "learning_rate": 1.7848824454484144e-05,
      "loss": 2.4061,
      "step": 16731
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.8971412777900696,
      "learning_rate": 1.7848569317332517e-05,
      "loss": 2.4782,
      "step": 16732
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1790056228637695,
      "learning_rate": 1.784831416687543e-05,
      "loss": 2.6559,
      "step": 16733
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0067265033721924,
      "learning_rate": 1.7848059003113318e-05,
      "loss": 2.5181,
      "step": 16734
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1115177869796753,
      "learning_rate": 1.7847803826046615e-05,
      "loss": 2.2325,
      "step": 16735
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.997751772403717,
      "learning_rate": 1.7847548635675753e-05,
      "loss": 2.4509,
      "step": 16736
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0972647666931152,
      "learning_rate": 1.7847293432001167e-05,
      "loss": 2.5354,
      "step": 16737
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.019171118736267,
      "learning_rate": 1.7847038215023286e-05,
      "loss": 2.5694,
      "step": 16738
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9900447726249695,
      "learning_rate": 1.7846782984742545e-05,
      "loss": 2.7152,
      "step": 16739
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0345571041107178,
      "learning_rate": 1.7846527741159375e-05,
      "loss": 2.3787,
      "step": 16740
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1654173135757446,
      "learning_rate": 1.7846272484274207e-05,
      "loss": 2.4219,
      "step": 16741
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0445221662521362,
      "learning_rate": 1.7846017214087477e-05,
      "loss": 2.4493,
      "step": 16742
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.09321129322052,
      "learning_rate": 1.784576193059962e-05,
      "loss": 2.5409,
      "step": 16743
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1280357837677002,
      "learning_rate": 1.7845506633811065e-05,
      "loss": 2.4397,
      "step": 16744
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.2293154001235962,
      "learning_rate": 1.7845251323722243e-05,
      "loss": 2.5024,
      "step": 16745
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.3279703855514526,
      "learning_rate": 1.784499600033359e-05,
      "loss": 2.472,
      "step": 16746
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1245065927505493,
      "learning_rate": 1.784474066364554e-05,
      "loss": 2.5064,
      "step": 16747
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9789196252822876,
      "learning_rate": 1.784448531365852e-05,
      "loss": 2.579,
      "step": 16748
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0132583379745483,
      "learning_rate": 1.784422995037297e-05,
      "loss": 2.5611,
      "step": 16749
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0348352193832397,
      "learning_rate": 1.784397457378932e-05,
      "loss": 2.736,
      "step": 16750
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.044174313545227,
      "learning_rate": 1.7843719183908002e-05,
      "loss": 2.3725,
      "step": 16751
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1162620782852173,
      "learning_rate": 1.784346378072945e-05,
      "loss": 2.4547,
      "step": 16752
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9906265139579773,
      "learning_rate": 1.7843208364254095e-05,
      "loss": 2.44,
      "step": 16753
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0633788108825684,
      "learning_rate": 1.7842952934482374e-05,
      "loss": 2.497,
      "step": 16754
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.087101697921753,
      "learning_rate": 1.7842697491414716e-05,
      "loss": 2.4159,
      "step": 16755
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9417968392372131,
      "learning_rate": 1.7842442035051557e-05,
      "loss": 2.6164,
      "step": 16756
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0388343334197998,
      "learning_rate": 1.7842186565393325e-05,
      "loss": 2.5284,
      "step": 16757
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0328658819198608,
      "learning_rate": 1.7841931082440464e-05,
      "loss": 2.6088,
      "step": 16758
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1659808158874512,
      "learning_rate": 1.7841675586193394e-05,
      "loss": 2.6412,
      "step": 16759
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9716154336929321,
      "learning_rate": 1.7841420076652555e-05,
      "loss": 2.3707,
      "step": 16760
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0127512216567993,
      "learning_rate": 1.784116455381838e-05,
      "loss": 2.4917,
      "step": 16761
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0357410907745361,
      "learning_rate": 1.78409090176913e-05,
      "loss": 2.4997,
      "step": 16762
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9881979823112488,
      "learning_rate": 1.7840653468271755e-05,
      "loss": 2.6412,
      "step": 16763
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0199568271636963,
      "learning_rate": 1.7840397905560167e-05,
      "loss": 2.3571,
      "step": 16764
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0456568002700806,
      "learning_rate": 1.7840142329556976e-05,
      "loss": 2.4003,
      "step": 16765
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.077288031578064,
      "learning_rate": 1.7839886740262615e-05,
      "loss": 2.4854,
      "step": 16766
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.019639015197754,
      "learning_rate": 1.783963113767752e-05,
      "loss": 2.2893,
      "step": 16767
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1052782535552979,
      "learning_rate": 1.7839375521802117e-05,
      "loss": 2.4779,
      "step": 16768
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9993960857391357,
      "learning_rate": 1.7839119892636844e-05,
      "loss": 2.4054,
      "step": 16769
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.191580891609192,
      "learning_rate": 1.7838864250182136e-05,
      "loss": 2.6158,
      "step": 16770
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1628743410110474,
      "learning_rate": 1.7838608594438423e-05,
      "loss": 2.5646,
      "step": 16771
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0034561157226562,
      "learning_rate": 1.7838352925406138e-05,
      "loss": 2.3507,
      "step": 16772
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0197564363479614,
      "learning_rate": 1.7838097243085716e-05,
      "loss": 2.5611,
      "step": 16773
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9963411092758179,
      "learning_rate": 1.7837841547477593e-05,
      "loss": 2.5091,
      "step": 16774
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0648036003112793,
      "learning_rate": 1.78375858385822e-05,
      "loss": 2.5788,
      "step": 16775
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9953762888908386,
      "learning_rate": 1.7837330116399964e-05,
      "loss": 2.6747,
      "step": 16776
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0360503196716309,
      "learning_rate": 1.783707438093133e-05,
      "loss": 2.497,
      "step": 16777
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0234025716781616,
      "learning_rate": 1.7836818632176726e-05,
      "loss": 2.3202,
      "step": 16778
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.3697854280471802,
      "learning_rate": 1.7836562870136588e-05,
      "loss": 2.5201,
      "step": 16779
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1079943180084229,
      "learning_rate": 1.7836307094811345e-05,
      "loss": 2.5047,
      "step": 16780
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9351114630699158,
      "learning_rate": 1.7836051306201434e-05,
      "loss": 2.6232,
      "step": 16781
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1170001029968262,
      "learning_rate": 1.783579550430729e-05,
      "loss": 2.6399,
      "step": 16782
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1880069971084595,
      "learning_rate": 1.783553968912934e-05,
      "loss": 2.5739,
      "step": 16783
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.192041039466858,
      "learning_rate": 1.783528386066802e-05,
      "loss": 2.2859,
      "step": 16784
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9781436920166016,
      "learning_rate": 1.783502801892377e-05,
      "loss": 2.2759,
      "step": 16785
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1964679956436157,
      "learning_rate": 1.783477216389702e-05,
      "loss": 2.283,
      "step": 16786
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0924627780914307,
      "learning_rate": 1.7834516295588204e-05,
      "loss": 2.5034,
      "step": 16787
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1923329830169678,
      "learning_rate": 1.783426041399775e-05,
      "loss": 2.4255,
      "step": 16788
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.055336594581604,
      "learning_rate": 1.7834004519126104e-05,
      "loss": 2.3709,
      "step": 16789
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0904128551483154,
      "learning_rate": 1.783374861097369e-05,
      "loss": 2.3724,
      "step": 16790
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9957032203674316,
      "learning_rate": 1.783349268954094e-05,
      "loss": 2.4848,
      "step": 16791
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0066783428192139,
      "learning_rate": 1.7833236754828297e-05,
      "loss": 2.5426,
      "step": 16792
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.003376841545105,
      "learning_rate": 1.7832980806836188e-05,
      "loss": 2.2047,
      "step": 16793
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9109139442443848,
      "learning_rate": 1.783272484556505e-05,
      "loss": 2.2443,
      "step": 16794
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.028557300567627,
      "learning_rate": 1.7832468871015315e-05,
      "loss": 2.3798,
      "step": 16795
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1338826417922974,
      "learning_rate": 1.783221288318742e-05,
      "loss": 2.5058,
      "step": 16796
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.988947868347168,
      "learning_rate": 1.78319568820818e-05,
      "loss": 2.5823,
      "step": 16797
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0346412658691406,
      "learning_rate": 1.783170086769888e-05,
      "loss": 2.6479,
      "step": 16798
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0609548091888428,
      "learning_rate": 1.78314448400391e-05,
      "loss": 2.4863,
      "step": 16799
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.3330392837524414,
      "learning_rate": 1.7831188799102897e-05,
      "loss": 2.5242,
      "step": 16800
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1004854440689087,
      "learning_rate": 1.7830932744890704e-05,
      "loss": 2.4684,
      "step": 16801
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1642342805862427,
      "learning_rate": 1.7830676677402947e-05,
      "loss": 2.5487,
      "step": 16802
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1426746845245361,
      "learning_rate": 1.7830420596640073e-05,
      "loss": 2.4731,
      "step": 16803
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0359135866165161,
      "learning_rate": 1.7830164502602506e-05,
      "loss": 2.4845,
      "step": 16804
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0274145603179932,
      "learning_rate": 1.782990839529068e-05,
      "loss": 2.4716,
      "step": 16805
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0052721500396729,
      "learning_rate": 1.7829652274705038e-05,
      "loss": 2.4899,
      "step": 16806
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0683612823486328,
      "learning_rate": 1.7829396140846005e-05,
      "loss": 2.6279,
      "step": 16807
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1183902025222778,
      "learning_rate": 1.782913999371402e-05,
      "loss": 2.5831,
      "step": 16808
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1669727563858032,
      "learning_rate": 1.7828883833309517e-05,
      "loss": 2.4117,
      "step": 16809
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.3819178342819214,
      "learning_rate": 1.7828627659632933e-05,
      "loss": 2.4539,
      "step": 16810
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.037570834159851,
      "learning_rate": 1.7828371472684694e-05,
      "loss": 2.4012,
      "step": 16811
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0399441719055176,
      "learning_rate": 1.782811527246524e-05,
      "loss": 2.6766,
      "step": 16812
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9341514110565186,
      "learning_rate": 1.7827859058975008e-05,
      "loss": 2.4689,
      "step": 16813
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0035091638565063,
      "learning_rate": 1.7827602832214425e-05,
      "loss": 2.348,
      "step": 16814
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1404576301574707,
      "learning_rate": 1.7827346592183932e-05,
      "loss": 2.6459,
      "step": 16815
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9993805289268494,
      "learning_rate": 1.782709033888396e-05,
      "loss": 2.505,
      "step": 16816
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0536302328109741,
      "learning_rate": 1.7826834072314943e-05,
      "loss": 2.5418,
      "step": 16817
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0516114234924316,
      "learning_rate": 1.7826577792477317e-05,
      "loss": 2.5682,
      "step": 16818
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0609791278839111,
      "learning_rate": 1.7826321499371516e-05,
      "loss": 2.451,
      "step": 16819
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0458420515060425,
      "learning_rate": 1.7826065192997978e-05,
      "loss": 2.468,
      "step": 16820
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0813170671463013,
      "learning_rate": 1.782580887335713e-05,
      "loss": 2.6166,
      "step": 16821
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9168940186500549,
      "learning_rate": 1.782555254044941e-05,
      "loss": 2.4925,
      "step": 16822
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1373608112335205,
      "learning_rate": 1.7825296194275252e-05,
      "loss": 2.6608,
      "step": 16823
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.093998670578003,
      "learning_rate": 1.7825039834835096e-05,
      "loss": 2.6166,
      "step": 16824
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9779574275016785,
      "learning_rate": 1.7824783462129368e-05,
      "loss": 2.4595,
      "step": 16825
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.035032868385315,
      "learning_rate": 1.782452707615851e-05,
      "loss": 2.525,
      "step": 16826
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0096662044525146,
      "learning_rate": 1.7824270676922954e-05,
      "loss": 2.7042,
      "step": 16827
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.364587426185608,
      "learning_rate": 1.782401426442313e-05,
      "loss": 2.558,
      "step": 16828
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0094313621520996,
      "learning_rate": 1.782375783865948e-05,
      "loss": 2.6679,
      "step": 16829
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9366636276245117,
      "learning_rate": 1.7823501399632434e-05,
      "loss": 2.4816,
      "step": 16830
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0809516906738281,
      "learning_rate": 1.782324494734243e-05,
      "loss": 2.2905,
      "step": 16831
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.107295274734497,
      "learning_rate": 1.7822988481789897e-05,
      "loss": 2.5007,
      "step": 16832
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.915917694568634,
      "learning_rate": 1.782273200297528e-05,
      "loss": 2.4652,
      "step": 16833
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1303832530975342,
      "learning_rate": 1.7822475510899004e-05,
      "loss": 2.8559,
      "step": 16834
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1108717918395996,
      "learning_rate": 1.782221900556151e-05,
      "loss": 2.4887,
      "step": 16835
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.134260654449463,
      "learning_rate": 1.782196248696323e-05,
      "loss": 2.4663,
      "step": 16836
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.2176839113235474,
      "learning_rate": 1.7821705955104593e-05,
      "loss": 2.2155,
      "step": 16837
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.054738163948059,
      "learning_rate": 1.7821449409986043e-05,
      "loss": 2.4849,
      "step": 16838
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.2160197496414185,
      "learning_rate": 1.7821192851608016e-05,
      "loss": 2.5375,
      "step": 16839
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1275231838226318,
      "learning_rate": 1.782093627997094e-05,
      "loss": 2.5946,
      "step": 16840
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.2755614519119263,
      "learning_rate": 1.7820679695075253e-05,
      "loss": 2.4573,
      "step": 16841
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.980398416519165,
      "learning_rate": 1.7820423096921392e-05,
      "loss": 2.0791,
      "step": 16842
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0178720951080322,
      "learning_rate": 1.7820166485509788e-05,
      "loss": 2.4746,
      "step": 16843
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0994811058044434,
      "learning_rate": 1.7819909860840875e-05,
      "loss": 2.4308,
      "step": 16844
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1671574115753174,
      "learning_rate": 1.7819653222915097e-05,
      "loss": 2.3774,
      "step": 16845
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0154509544372559,
      "learning_rate": 1.781939657173288e-05,
      "loss": 2.5208,
      "step": 16846
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.4177340269088745,
      "learning_rate": 1.781913990729466e-05,
      "loss": 2.5658,
      "step": 16847
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0629736185073853,
      "learning_rate": 1.7818883229600877e-05,
      "loss": 2.4519,
      "step": 16848
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9628344178199768,
      "learning_rate": 1.781862653865196e-05,
      "loss": 2.3975,
      "step": 16849
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0970839262008667,
      "learning_rate": 1.7818369834448352e-05,
      "loss": 2.5094,
      "step": 16850
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1857370138168335,
      "learning_rate": 1.781811311699048e-05,
      "loss": 2.3914,
      "step": 16851
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.102247714996338,
      "learning_rate": 1.7817856386278785e-05,
      "loss": 2.4396,
      "step": 16852
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.2328522205352783,
      "learning_rate": 1.78175996423137e-05,
      "loss": 2.205,
      "step": 16853
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9504004716873169,
      "learning_rate": 1.7817342885095658e-05,
      "loss": 2.4081,
      "step": 16854
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9429080486297607,
      "learning_rate": 1.78170861146251e-05,
      "loss": 2.4743,
      "step": 16855
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9975416660308838,
      "learning_rate": 1.7816829330902455e-05,
      "loss": 2.5104,
      "step": 16856
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0213680267333984,
      "learning_rate": 1.781657253392817e-05,
      "loss": 2.4225,
      "step": 16857
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.10774564743042,
      "learning_rate": 1.7816315723702658e-05,
      "loss": 2.5401,
      "step": 16858
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.157515048980713,
      "learning_rate": 1.7816058900226375e-05,
      "loss": 2.3813,
      "step": 16859
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0666167736053467,
      "learning_rate": 1.781580206349975e-05,
      "loss": 2.5658,
      "step": 16860
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0678833723068237,
      "learning_rate": 1.781554521352322e-05,
      "loss": 2.2183,
      "step": 16861
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9898846745491028,
      "learning_rate": 1.7815288350297213e-05,
      "loss": 2.4636,
      "step": 16862
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9193816781044006,
      "learning_rate": 1.781503147382217e-05,
      "loss": 2.3725,
      "step": 16863
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9984208345413208,
      "learning_rate": 1.7814774584098528e-05,
      "loss": 2.7652,
      "step": 16864
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0022908449172974,
      "learning_rate": 1.7814517681126718e-05,
      "loss": 2.5468,
      "step": 16865
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9265182018280029,
      "learning_rate": 1.781426076490718e-05,
      "loss": 2.3694,
      "step": 16866
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.2480496168136597,
      "learning_rate": 1.781400383544035e-05,
      "loss": 2.5548,
      "step": 16867
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.023568034172058,
      "learning_rate": 1.781374689272666e-05,
      "loss": 2.3732,
      "step": 16868
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9784900546073914,
      "learning_rate": 1.7813489936766545e-05,
      "loss": 2.4733,
      "step": 16869
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.073119044303894,
      "learning_rate": 1.7813232967560442e-05,
      "loss": 2.5221,
      "step": 16870
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0427073240280151,
      "learning_rate": 1.7812975985108793e-05,
      "loss": 2.321,
      "step": 16871
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9564824104309082,
      "learning_rate": 1.7812718989412022e-05,
      "loss": 2.5524,
      "step": 16872
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9952777028083801,
      "learning_rate": 1.781246198047057e-05,
      "loss": 2.5353,
      "step": 16873
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1212767362594604,
      "learning_rate": 1.7812204958284877e-05,
      "loss": 2.6474,
      "step": 16874
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.061995029449463,
      "learning_rate": 1.781194792285537e-05,
      "loss": 2.488,
      "step": 16875
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0137100219726562,
      "learning_rate": 1.7811690874182495e-05,
      "loss": 2.6028,
      "step": 16876
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9640584588050842,
      "learning_rate": 1.7811433812266677e-05,
      "loss": 2.4437,
      "step": 16877
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.021130084991455,
      "learning_rate": 1.781117673710836e-05,
      "loss": 2.4035,
      "step": 16878
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0208948850631714,
      "learning_rate": 1.7810919648707977e-05,
      "loss": 2.5421,
      "step": 16879
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0508135557174683,
      "learning_rate": 1.7810662547065965e-05,
      "loss": 2.4482,
      "step": 16880
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9473710656166077,
      "learning_rate": 1.7810405432182754e-05,
      "loss": 2.5889,
      "step": 16881
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0316908359527588,
      "learning_rate": 1.781014830405879e-05,
      "loss": 2.4948,
      "step": 16882
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1196215152740479,
      "learning_rate": 1.7809891162694503e-05,
      "loss": 2.505,
      "step": 16883
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0053452253341675,
      "learning_rate": 1.7809634008090323e-05,
      "loss": 2.4267,
      "step": 16884
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1059941053390503,
      "learning_rate": 1.7809376840246696e-05,
      "loss": 2.3493,
      "step": 16885
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.069139003753662,
      "learning_rate": 1.780911965916406e-05,
      "loss": 2.5196,
      "step": 16886
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1245601177215576,
      "learning_rate": 1.7808862464842836e-05,
      "loss": 2.5833,
      "step": 16887
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0941057205200195,
      "learning_rate": 1.7808605257283474e-05,
      "loss": 2.3278,
      "step": 16888
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.906262218952179,
      "learning_rate": 1.7808348036486403e-05,
      "loss": 2.146,
      "step": 16889
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0165510177612305,
      "learning_rate": 1.7808090802452064e-05,
      "loss": 2.5011,
      "step": 16890
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.986976146697998,
      "learning_rate": 1.780783355518089e-05,
      "loss": 2.3935,
      "step": 16891
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9911192059516907,
      "learning_rate": 1.7807576294673316e-05,
      "loss": 2.4038,
      "step": 16892
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0805253982543945,
      "learning_rate": 1.780731902092978e-05,
      "loss": 2.5266,
      "step": 16893
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.091357946395874,
      "learning_rate": 1.7807061733950723e-05,
      "loss": 2.5337,
      "step": 16894
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.945124626159668,
      "learning_rate": 1.7806804433736568e-05,
      "loss": 2.5465,
      "step": 16895
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1450746059417725,
      "learning_rate": 1.7806547120287764e-05,
      "loss": 2.6863,
      "step": 16896
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.070335030555725,
      "learning_rate": 1.780628979360474e-05,
      "loss": 2.4769,
      "step": 16897
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0319942235946655,
      "learning_rate": 1.7806032453687936e-05,
      "loss": 2.5547,
      "step": 16898
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.091274380683899,
      "learning_rate": 1.7805775100537786e-05,
      "loss": 2.4378,
      "step": 16899
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0193110704421997,
      "learning_rate": 1.780551773415473e-05,
      "loss": 2.5187,
      "step": 16900
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9509557485580444,
      "learning_rate": 1.7805260354539197e-05,
      "loss": 2.34,
      "step": 16901
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9587776064872742,
      "learning_rate": 1.7805002961691632e-05,
      "loss": 2.2214,
      "step": 16902
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.2268379926681519,
      "learning_rate": 1.7804745555612463e-05,
      "loss": 2.3008,
      "step": 16903
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0906541347503662,
      "learning_rate": 1.7804488136302132e-05,
      "loss": 2.2117,
      "step": 16904
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0706876516342163,
      "learning_rate": 1.780423070376108e-05,
      "loss": 2.5399,
      "step": 16905
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9423045516014099,
      "learning_rate": 1.780397325798973e-05,
      "loss": 2.4911,
      "step": 16906
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9897993803024292,
      "learning_rate": 1.780371579898853e-05,
      "loss": 2.5558,
      "step": 16907
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0111942291259766,
      "learning_rate": 1.7803458326757906e-05,
      "loss": 2.4488,
      "step": 16908
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0060601234436035,
      "learning_rate": 1.7803200841298306e-05,
      "loss": 2.4028,
      "step": 16909
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.075857400894165,
      "learning_rate": 1.780294334261016e-05,
      "loss": 2.6157,
      "step": 16910
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0599305629730225,
      "learning_rate": 1.7802685830693906e-05,
      "loss": 2.4097,
      "step": 16911
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0843254327774048,
      "learning_rate": 1.7802428305549982e-05,
      "loss": 2.5391,
      "step": 16912
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0978786945343018,
      "learning_rate": 1.780217076717882e-05,
      "loss": 2.3832,
      "step": 16913
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0494366884231567,
      "learning_rate": 1.7801913215580858e-05,
      "loss": 2.4967,
      "step": 16914
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.043521523475647,
      "learning_rate": 1.780165565075654e-05,
      "loss": 2.7006,
      "step": 16915
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0295917987823486,
      "learning_rate": 1.7801398072706293e-05,
      "loss": 2.5247,
      "step": 16916
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9562649130821228,
      "learning_rate": 1.7801140481430556e-05,
      "loss": 2.4215,
      "step": 16917
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9500450491905212,
      "learning_rate": 1.7800882876929773e-05,
      "loss": 2.4559,
      "step": 16918
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0957337617874146,
      "learning_rate": 1.7800625259204373e-05,
      "loss": 2.4391,
      "step": 16919
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9607177972793579,
      "learning_rate": 1.780036762825479e-05,
      "loss": 2.3191,
      "step": 16920
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.105085015296936,
      "learning_rate": 1.780010998408147e-05,
      "loss": 2.5641,
      "step": 16921
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.10271155834198,
      "learning_rate": 1.779985232668484e-05,
      "loss": 2.4718,
      "step": 16922
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0260322093963623,
      "learning_rate": 1.7799594656065346e-05,
      "loss": 2.3341,
      "step": 16923
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0111072063446045,
      "learning_rate": 1.779933697222342e-05,
      "loss": 2.6037,
      "step": 16924
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0090688467025757,
      "learning_rate": 1.7799079275159496e-05,
      "loss": 2.5121,
      "step": 16925
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.5037028789520264,
      "learning_rate": 1.779882156487402e-05,
      "loss": 2.4345,
      "step": 16926
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0943851470947266,
      "learning_rate": 1.779856384136742e-05,
      "loss": 2.3953,
      "step": 16927
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0549260377883911,
      "learning_rate": 1.7798306104640138e-05,
      "loss": 2.401,
      "step": 16928
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.157326102256775,
      "learning_rate": 1.7798048354692607e-05,
      "loss": 2.6443,
      "step": 16929
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9643399715423584,
      "learning_rate": 1.779779059152527e-05,
      "loss": 2.487,
      "step": 16930
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.035191535949707,
      "learning_rate": 1.7797532815138556e-05,
      "loss": 2.4257,
      "step": 16931
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9788632392883301,
      "learning_rate": 1.779727502553291e-05,
      "loss": 2.537,
      "step": 16932
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0952244997024536,
      "learning_rate": 1.779701722270876e-05,
      "loss": 2.508,
      "step": 16933
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0482620000839233,
      "learning_rate": 1.7796759406666552e-05,
      "loss": 2.4176,
      "step": 16934
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1097164154052734,
      "learning_rate": 1.7796501577406718e-05,
      "loss": 2.3935,
      "step": 16935
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0880663394927979,
      "learning_rate": 1.7796243734929697e-05,
      "loss": 2.2921,
      "step": 16936
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0211751461029053,
      "learning_rate": 1.7795985879235923e-05,
      "loss": 2.4413,
      "step": 16937
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9605996012687683,
      "learning_rate": 1.779572801032584e-05,
      "loss": 2.5859,
      "step": 16938
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9933070540428162,
      "learning_rate": 1.7795470128199877e-05,
      "loss": 2.2044,
      "step": 16939
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1237715482711792,
      "learning_rate": 1.7795212232858476e-05,
      "loss": 2.3928,
      "step": 16940
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9770937561988831,
      "learning_rate": 1.779495432430207e-05,
      "loss": 2.3573,
      "step": 16941
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9176480174064636,
      "learning_rate": 1.7794696402531104e-05,
      "loss": 2.6222,
      "step": 16942
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9487994909286499,
      "learning_rate": 1.779443846754601e-05,
      "loss": 2.399,
      "step": 16943
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0940693616867065,
      "learning_rate": 1.7794180519347225e-05,
      "loss": 2.5256,
      "step": 16944
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0755565166473389,
      "learning_rate": 1.7793922557935187e-05,
      "loss": 2.1938,
      "step": 16945
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1963707208633423,
      "learning_rate": 1.7793664583310334e-05,
      "loss": 2.6435,
      "step": 16946
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1265946626663208,
      "learning_rate": 1.77934065954731e-05,
      "loss": 2.3677,
      "step": 16947
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9614124894142151,
      "learning_rate": 1.779314859442393e-05,
      "loss": 2.6328,
      "step": 16948
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0074183940887451,
      "learning_rate": 1.7792890580163257e-05,
      "loss": 2.3844,
      "step": 16949
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1612848043441772,
      "learning_rate": 1.7792632552691513e-05,
      "loss": 2.4121,
      "step": 16950
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0522687435150146,
      "learning_rate": 1.7792374512009145e-05,
      "loss": 2.4542,
      "step": 16951
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9827010631561279,
      "learning_rate": 1.7792116458116583e-05,
      "loss": 2.3851,
      "step": 16952
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0791244506835938,
      "learning_rate": 1.7791858391014264e-05,
      "loss": 2.5054,
      "step": 16953
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0246058702468872,
      "learning_rate": 1.7791600310702633e-05,
      "loss": 2.5979,
      "step": 16954
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0812779664993286,
      "learning_rate": 1.7791342217182123e-05,
      "loss": 2.4115,
      "step": 16955
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0960097312927246,
      "learning_rate": 1.779108411045317e-05,
      "loss": 2.3849,
      "step": 16956
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0697048902511597,
      "learning_rate": 1.7790825990516216e-05,
      "loss": 2.5066,
      "step": 16957
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9659015536308289,
      "learning_rate": 1.7790567857371694e-05,
      "loss": 2.6813,
      "step": 16958
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0218455791473389,
      "learning_rate": 1.7790309711020045e-05,
      "loss": 2.4243,
      "step": 16959
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0163830518722534,
      "learning_rate": 1.7790051551461705e-05,
      "loss": 2.4611,
      "step": 16960
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0341445207595825,
      "learning_rate": 1.7789793378697112e-05,
      "loss": 2.7349,
      "step": 16961
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1015963554382324,
      "learning_rate": 1.7789535192726703e-05,
      "loss": 2.3235,
      "step": 16962
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0476305484771729,
      "learning_rate": 1.7789276993550918e-05,
      "loss": 2.4738,
      "step": 16963
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0440598726272583,
      "learning_rate": 1.778901878117019e-05,
      "loss": 2.2884,
      "step": 16964
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.2134830951690674,
      "learning_rate": 1.7788760555584962e-05,
      "loss": 2.6564,
      "step": 16965
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0313125848770142,
      "learning_rate": 1.7788502316795665e-05,
      "loss": 2.4804,
      "step": 16966
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9672722816467285,
      "learning_rate": 1.7788244064802747e-05,
      "loss": 2.5057,
      "step": 16967
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9393357038497925,
      "learning_rate": 1.7787985799606636e-05,
      "loss": 2.318,
      "step": 16968
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1054686307907104,
      "learning_rate": 1.7787727521207778e-05,
      "loss": 2.6876,
      "step": 16969
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9209042191505432,
      "learning_rate": 1.7787469229606604e-05,
      "loss": 2.3638,
      "step": 16970
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0857607126235962,
      "learning_rate": 1.7787210924803554e-05,
      "loss": 2.5846,
      "step": 16971
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0771260261535645,
      "learning_rate": 1.778695260679907e-05,
      "loss": 2.3556,
      "step": 16972
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0420029163360596,
      "learning_rate": 1.778669427559358e-05,
      "loss": 2.5425,
      "step": 16973
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1997709274291992,
      "learning_rate": 1.7786435931187536e-05,
      "loss": 2.2453,
      "step": 16974
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0661004781723022,
      "learning_rate": 1.7786177573581364e-05,
      "loss": 2.6026,
      "step": 16975
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.2007334232330322,
      "learning_rate": 1.7785919202775507e-05,
      "loss": 2.5834,
      "step": 16976
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9983443021774292,
      "learning_rate": 1.77856608187704e-05,
      "loss": 2.3578,
      "step": 16977
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.082098126411438,
      "learning_rate": 1.7785402421566488e-05,
      "loss": 2.5462,
      "step": 16978
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0791561603546143,
      "learning_rate": 1.7785144011164203e-05,
      "loss": 2.411,
      "step": 16979
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.015129566192627,
      "learning_rate": 1.778488558756398e-05,
      "loss": 2.5012,
      "step": 16980
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1528351306915283,
      "learning_rate": 1.7784627150766267e-05,
      "loss": 2.447,
      "step": 16981
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1394727230072021,
      "learning_rate": 1.7784368700771494e-05,
      "loss": 2.3084,
      "step": 16982
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.131532073020935,
      "learning_rate": 1.7784110237580103e-05,
      "loss": 2.3172,
      "step": 16983
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.310691237449646,
      "learning_rate": 1.778385176119253e-05,
      "loss": 2.5875,
      "step": 16984
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.979317843914032,
      "learning_rate": 1.7783593271609217e-05,
      "loss": 2.3636,
      "step": 16985
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0634208917617798,
      "learning_rate": 1.7783334768830592e-05,
      "loss": 2.5258,
      "step": 16986
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0561373233795166,
      "learning_rate": 1.778307625285711e-05,
      "loss": 2.2846,
      "step": 16987
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1198406219482422,
      "learning_rate": 1.7782817723689194e-05,
      "loss": 2.4426,
      "step": 16988
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0056341886520386,
      "learning_rate": 1.778255918132729e-05,
      "loss": 2.5705,
      "step": 16989
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1119565963745117,
      "learning_rate": 1.7782300625771832e-05,
      "loss": 2.4789,
      "step": 16990
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1712924242019653,
      "learning_rate": 1.7782042057023266e-05,
      "loss": 2.4005,
      "step": 16991
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0960432291030884,
      "learning_rate": 1.7781783475082023e-05,
      "loss": 2.5321,
      "step": 16992
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0836434364318848,
      "learning_rate": 1.7781524879948544e-05,
      "loss": 2.3507,
      "step": 16993
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.037360668182373,
      "learning_rate": 1.7781266271623265e-05,
      "loss": 2.2901,
      "step": 16994
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0112030506134033,
      "learning_rate": 1.7781007650106627e-05,
      "loss": 2.7444,
      "step": 16995
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9709484577178955,
      "learning_rate": 1.7780749015399067e-05,
      "loss": 2.4112,
      "step": 16996
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0791997909545898,
      "learning_rate": 1.7780490367501026e-05,
      "loss": 2.3586,
      "step": 16997
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1461501121520996,
      "learning_rate": 1.778023170641294e-05,
      "loss": 2.5954,
      "step": 16998
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0952234268188477,
      "learning_rate": 1.7779973032135246e-05,
      "loss": 2.5788,
      "step": 16999
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.062638759613037,
      "learning_rate": 1.7779714344668386e-05,
      "loss": 2.3933,
      "step": 17000
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1337339878082275,
      "learning_rate": 1.77794556440128e-05,
      "loss": 2.6851,
      "step": 17001
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0589638948440552,
      "learning_rate": 1.777919693016892e-05,
      "loss": 2.5845,
      "step": 17002
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.036293864250183,
      "learning_rate": 1.777893820313719e-05,
      "loss": 2.6126,
      "step": 17003
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.962522566318512,
      "learning_rate": 1.7778679462918046e-05,
      "loss": 2.399,
      "step": 17004
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1032658815383911,
      "learning_rate": 1.777842070951193e-05,
      "loss": 2.5688,
      "step": 17005
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.085292935371399,
      "learning_rate": 1.7778161942919278e-05,
      "loss": 2.3016,
      "step": 17006
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1251046657562256,
      "learning_rate": 1.7777903163140527e-05,
      "loss": 2.3711,
      "step": 17007
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.12636399269104,
      "learning_rate": 1.777764437017612e-05,
      "loss": 2.5054,
      "step": 17008
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0491282939910889,
      "learning_rate": 1.7777385564026493e-05,
      "loss": 2.3511,
      "step": 17009
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0026510953903198,
      "learning_rate": 1.7777126744692083e-05,
      "loss": 2.5102,
      "step": 17010
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1424375772476196,
      "learning_rate": 1.7776867912173334e-05,
      "loss": 2.4452,
      "step": 17011
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.3339769840240479,
      "learning_rate": 1.7776609066470677e-05,
      "loss": 2.5349,
      "step": 17012
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0583899021148682,
      "learning_rate": 1.777635020758456e-05,
      "loss": 2.4796,
      "step": 17013
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1543940305709839,
      "learning_rate": 1.7776091335515415e-05,
      "loss": 2.2905,
      "step": 17014
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0851984024047852,
      "learning_rate": 1.7775832450263685e-05,
      "loss": 2.6989,
      "step": 17015
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.095678448677063,
      "learning_rate": 1.7775573551829804e-05,
      "loss": 2.4798,
      "step": 17016
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9819660186767578,
      "learning_rate": 1.7775314640214218e-05,
      "loss": 2.3743,
      "step": 17017
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.077292799949646,
      "learning_rate": 1.777505571541736e-05,
      "loss": 2.4779,
      "step": 17018
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0673335790634155,
      "learning_rate": 1.777479677743967e-05,
      "loss": 2.3379,
      "step": 17019
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0474213361740112,
      "learning_rate": 1.7774537826281587e-05,
      "loss": 2.4159,
      "step": 17020
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9593267440795898,
      "learning_rate": 1.7774278861943556e-05,
      "loss": 2.4617,
      "step": 17021
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9650455117225647,
      "learning_rate": 1.7774019884426004e-05,
      "loss": 2.4388,
      "step": 17022
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.120420217514038,
      "learning_rate": 1.7773760893729382e-05,
      "loss": 2.4122,
      "step": 17023
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9967633485794067,
      "learning_rate": 1.7773501889854123e-05,
      "loss": 2.4392,
      "step": 17024
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0839512348175049,
      "learning_rate": 1.7773242872800668e-05,
      "loss": 2.6138,
      "step": 17025
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0319348573684692,
      "learning_rate": 1.777298384256945e-05,
      "loss": 2.4662,
      "step": 17026
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.066993236541748,
      "learning_rate": 1.7772724799160918e-05,
      "loss": 2.3718,
      "step": 17027
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0727479457855225,
      "learning_rate": 1.7772465742575506e-05,
      "loss": 2.2968,
      "step": 17028
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9182437658309937,
      "learning_rate": 1.777220667281365e-05,
      "loss": 2.5044,
      "step": 17029
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1422536373138428,
      "learning_rate": 1.7771947589875798e-05,
      "loss": 2.423,
      "step": 17030
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0034904479980469,
      "learning_rate": 1.777168849376238e-05,
      "loss": 2.5733,
      "step": 17031
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.2519402503967285,
      "learning_rate": 1.7771429384473845e-05,
      "loss": 2.42,
      "step": 17032
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1527647972106934,
      "learning_rate": 1.777117026201062e-05,
      "loss": 2.5448,
      "step": 17033
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1796875,
      "learning_rate": 1.7770911126373155e-05,
      "loss": 2.2356,
      "step": 17034
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.141095757484436,
      "learning_rate": 1.7770651977561885e-05,
      "loss": 2.7147,
      "step": 17035
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.061400055885315,
      "learning_rate": 1.7770392815577247e-05,
      "loss": 2.3917,
      "step": 17036
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.2230195999145508,
      "learning_rate": 1.777013364041968e-05,
      "loss": 2.4664,
      "step": 17037
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.007562518119812,
      "learning_rate": 1.7769874452089632e-05,
      "loss": 2.4801,
      "step": 17038
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.235674500465393,
      "learning_rate": 1.7769615250587533e-05,
      "loss": 2.235,
      "step": 17039
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0808099508285522,
      "learning_rate": 1.7769356035913827e-05,
      "loss": 2.7759,
      "step": 17040
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0416001081466675,
      "learning_rate": 1.7769096808068952e-05,
      "loss": 2.5801,
      "step": 17041
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1093586683273315,
      "learning_rate": 1.7768837567053348e-05,
      "loss": 2.5109,
      "step": 17042
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0962867736816406,
      "learning_rate": 1.7768578312867456e-05,
      "loss": 2.5938,
      "step": 17043
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0044527053833008,
      "learning_rate": 1.7768319045511713e-05,
      "loss": 2.5342,
      "step": 17044
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9404666423797607,
      "learning_rate": 1.776805976498656e-05,
      "loss": 2.5657,
      "step": 17045
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0444625616073608,
      "learning_rate": 1.7767800471292435e-05,
      "loss": 2.2901,
      "step": 17046
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0146623849868774,
      "learning_rate": 1.7767541164429777e-05,
      "loss": 2.247,
      "step": 17047
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0937540531158447,
      "learning_rate": 1.776728184439903e-05,
      "loss": 2.4907,
      "step": 17048
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.006917953491211,
      "learning_rate": 1.7767022511200628e-05,
      "loss": 2.5356,
      "step": 17049
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0268422365188599,
      "learning_rate": 1.7766763164835013e-05,
      "loss": 2.7263,
      "step": 17050
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.3349336385726929,
      "learning_rate": 1.7766503805302627e-05,
      "loss": 2.5545,
      "step": 17051
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.019849181175232,
      "learning_rate": 1.7766244432603907e-05,
      "loss": 2.3382,
      "step": 17052
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0261216163635254,
      "learning_rate": 1.7765985046739293e-05,
      "loss": 2.49,
      "step": 17053
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.2267584800720215,
      "learning_rate": 1.7765725647709225e-05,
      "loss": 2.2535,
      "step": 17054
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0466866493225098,
      "learning_rate": 1.7765466235514145e-05,
      "loss": 2.5442,
      "step": 17055
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0859618186950684,
      "learning_rate": 1.776520681015449e-05,
      "loss": 2.3538,
      "step": 17056
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0261458158493042,
      "learning_rate": 1.77649473716307e-05,
      "loss": 2.3378,
      "step": 17057
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.024741768836975,
      "learning_rate": 1.7764687919943213e-05,
      "loss": 2.5256,
      "step": 17058
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9880181550979614,
      "learning_rate": 1.7764428455092475e-05,
      "loss": 2.5384,
      "step": 17059
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1286089420318604,
      "learning_rate": 1.776416897707892e-05,
      "loss": 2.6704,
      "step": 17060
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0605096817016602,
      "learning_rate": 1.776390948590299e-05,
      "loss": 2.5084,
      "step": 17061
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9939549565315247,
      "learning_rate": 1.7763649981565125e-05,
      "loss": 2.2763,
      "step": 17062
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1448495388031006,
      "learning_rate": 1.7763390464065763e-05,
      "loss": 2.333,
      "step": 17063
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0798522233963013,
      "learning_rate": 1.7763130933405346e-05,
      "loss": 2.3104,
      "step": 17064
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1206305027008057,
      "learning_rate": 1.7762871389584316e-05,
      "loss": 2.4046,
      "step": 17065
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9977906942367554,
      "learning_rate": 1.776261183260311e-05,
      "loss": 2.3062,
      "step": 17066
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0896652936935425,
      "learning_rate": 1.7762352262462168e-05,
      "loss": 2.617,
      "step": 17067
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0900195837020874,
      "learning_rate": 1.7762092679161932e-05,
      "loss": 2.2288,
      "step": 17068
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0819514989852905,
      "learning_rate": 1.7761833082702837e-05,
      "loss": 2.4807,
      "step": 17069
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0423533916473389,
      "learning_rate": 1.776157347308533e-05,
      "loss": 2.4866,
      "step": 17070
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0659794807434082,
      "learning_rate": 1.7761313850309848e-05,
      "loss": 2.4631,
      "step": 17071
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9247516989707947,
      "learning_rate": 1.7761054214376828e-05,
      "loss": 2.3712,
      "step": 17072
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0266921520233154,
      "learning_rate": 1.7760794565286717e-05,
      "loss": 2.3793,
      "step": 17073
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1274057626724243,
      "learning_rate": 1.776053490303995e-05,
      "loss": 2.4727,
      "step": 17074
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0596410036087036,
      "learning_rate": 1.776027522763697e-05,
      "loss": 2.3417,
      "step": 17075
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9209871292114258,
      "learning_rate": 1.7760015539078212e-05,
      "loss": 2.3734,
      "step": 17076
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1112895011901855,
      "learning_rate": 1.775975583736412e-05,
      "loss": 2.6193,
      "step": 17077
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9861702919006348,
      "learning_rate": 1.7759496122495137e-05,
      "loss": 2.2669,
      "step": 17078
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9491966366767883,
      "learning_rate": 1.7759236394471703e-05,
      "loss": 2.6244,
      "step": 17079
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0088753700256348,
      "learning_rate": 1.7758976653294254e-05,
      "loss": 2.6699,
      "step": 17080
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0179880857467651,
      "learning_rate": 1.775871689896323e-05,
      "loss": 2.4135,
      "step": 17081
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9975343942642212,
      "learning_rate": 1.7758457131479075e-05,
      "loss": 2.4221,
      "step": 17082
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.052731990814209,
      "learning_rate": 1.7758197350842226e-05,
      "loss": 2.3293,
      "step": 17083
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.095786690711975,
      "learning_rate": 1.775793755705313e-05,
      "loss": 2.3469,
      "step": 17084
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0731958150863647,
      "learning_rate": 1.775767775011222e-05,
      "loss": 2.2387,
      "step": 17085
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0332703590393066,
      "learning_rate": 1.775741793001994e-05,
      "loss": 2.3543,
      "step": 17086
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1525799036026,
      "learning_rate": 1.775715809677673e-05,
      "loss": 2.5357,
      "step": 17087
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.087526559829712,
      "learning_rate": 1.775689825038303e-05,
      "loss": 2.6194,
      "step": 17088
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.030117154121399,
      "learning_rate": 1.775663839083928e-05,
      "loss": 2.5391,
      "step": 17089
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1170953512191772,
      "learning_rate": 1.7756378518145922e-05,
      "loss": 2.4593,
      "step": 17090
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0431016683578491,
      "learning_rate": 1.7756118632303394e-05,
      "loss": 2.573,
      "step": 17091
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0204041004180908,
      "learning_rate": 1.7755858733312142e-05,
      "loss": 2.5118,
      "step": 17092
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0703309774398804,
      "learning_rate": 1.77555988211726e-05,
      "loss": 2.3155,
      "step": 17093
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9372531771659851,
      "learning_rate": 1.775533889588521e-05,
      "loss": 2.4019,
      "step": 17094
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0847446918487549,
      "learning_rate": 1.775507895745042e-05,
      "loss": 2.5757,
      "step": 17095
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.005376935005188,
      "learning_rate": 1.775481900586866e-05,
      "loss": 2.357,
      "step": 17096
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.004555106163025,
      "learning_rate": 1.7754559041140376e-05,
      "loss": 2.3569,
      "step": 17097
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.12364661693573,
      "learning_rate": 1.775429906326601e-05,
      "loss": 2.3615,
      "step": 17098
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.126628041267395,
      "learning_rate": 1.7754039072246e-05,
      "loss": 2.3389,
      "step": 17099
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9001815319061279,
      "learning_rate": 1.7753779068080787e-05,
      "loss": 2.4969,
      "step": 17100
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0578186511993408,
      "learning_rate": 1.7753519050770817e-05,
      "loss": 2.4844,
      "step": 17101
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0591087341308594,
      "learning_rate": 1.7753259020316522e-05,
      "loss": 2.4889,
      "step": 17102
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.950859546661377,
      "learning_rate": 1.775299897671835e-05,
      "loss": 2.4381,
      "step": 17103
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0410689115524292,
      "learning_rate": 1.7752738919976735e-05,
      "loss": 2.3988,
      "step": 17104
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0396077632904053,
      "learning_rate": 1.7752478850092125e-05,
      "loss": 2.4366,
      "step": 17105
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0261682271957397,
      "learning_rate": 1.7752218767064958e-05,
      "loss": 2.4798,
      "step": 17106
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.058958888053894,
      "learning_rate": 1.7751958670895672e-05,
      "loss": 2.387,
      "step": 17107
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.04402756690979,
      "learning_rate": 1.7751698561584714e-05,
      "loss": 2.4612,
      "step": 17108
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.2000898122787476,
      "learning_rate": 1.775143843913252e-05,
      "loss": 2.6234,
      "step": 17109
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1011604070663452,
      "learning_rate": 1.7751178303539532e-05,
      "loss": 2.5707,
      "step": 17110
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.4969209432601929,
      "learning_rate": 1.7750918154806193e-05,
      "loss": 2.2961,
      "step": 17111
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1211655139923096,
      "learning_rate": 1.775065799293294e-05,
      "loss": 2.3318,
      "step": 17112
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9284581542015076,
      "learning_rate": 1.775039781792022e-05,
      "loss": 2.4312,
      "step": 17113
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.226804256439209,
      "learning_rate": 1.775013762976847e-05,
      "loss": 2.4062,
      "step": 17114
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9943402409553528,
      "learning_rate": 1.774987742847813e-05,
      "loss": 2.5167,
      "step": 17115
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0454238653182983,
      "learning_rate": 1.7749617214049647e-05,
      "loss": 2.6402,
      "step": 17116
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9717212319374084,
      "learning_rate": 1.7749356986483454e-05,
      "loss": 2.2881,
      "step": 17117
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.12819504737854,
      "learning_rate": 1.7749096745779996e-05,
      "loss": 2.2473,
      "step": 17118
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0802606344223022,
      "learning_rate": 1.7748836491939718e-05,
      "loss": 2.4587,
      "step": 17119
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.2075804471969604,
      "learning_rate": 1.7748576224963053e-05,
      "loss": 2.3539,
      "step": 17120
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0202008485794067,
      "learning_rate": 1.774831594485045e-05,
      "loss": 2.6486,
      "step": 17121
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.096200704574585,
      "learning_rate": 1.7748055651602345e-05,
      "loss": 2.6457,
      "step": 17122
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.111270546913147,
      "learning_rate": 1.7747795345219183e-05,
      "loss": 2.6124,
      "step": 17123
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1172676086425781,
      "learning_rate": 1.7747535025701404e-05,
      "loss": 2.5569,
      "step": 17124
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0425435304641724,
      "learning_rate": 1.7747274693049446e-05,
      "loss": 2.3405,
      "step": 17125
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1207568645477295,
      "learning_rate": 1.774701434726376e-05,
      "loss": 2.4443,
      "step": 17126
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0313987731933594,
      "learning_rate": 1.7746753988344774e-05,
      "loss": 2.3775,
      "step": 17127
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0195175409317017,
      "learning_rate": 1.7746493616292937e-05,
      "loss": 2.4001,
      "step": 17128
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9648410081863403,
      "learning_rate": 1.774623323110869e-05,
      "loss": 2.6112,
      "step": 17129
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9737508893013,
      "learning_rate": 1.7745972832792473e-05,
      "loss": 2.5258,
      "step": 17130
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1123476028442383,
      "learning_rate": 1.774571242134473e-05,
      "loss": 2.4369,
      "step": 17131
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1142969131469727,
      "learning_rate": 1.77454519967659e-05,
      "loss": 2.3837,
      "step": 17132
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1885666847229004,
      "learning_rate": 1.7745191559056425e-05,
      "loss": 2.593,
      "step": 17133
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.160961389541626,
      "learning_rate": 1.7744931108216748e-05,
      "loss": 2.4626,
      "step": 17134
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9972589015960693,
      "learning_rate": 1.7744670644247308e-05,
      "loss": 2.3781,
      "step": 17135
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.173027515411377,
      "learning_rate": 1.774441016714855e-05,
      "loss": 2.5695,
      "step": 17136
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0485070943832397,
      "learning_rate": 1.774414967692091e-05,
      "loss": 2.3733,
      "step": 17137
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.95119708776474,
      "learning_rate": 1.7743889173564835e-05,
      "loss": 2.5074,
      "step": 17138
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1630899906158447,
      "learning_rate": 1.7743628657080765e-05,
      "loss": 2.2143,
      "step": 17139
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0719587802886963,
      "learning_rate": 1.774336812746914e-05,
      "loss": 2.5103,
      "step": 17140
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1184433698654175,
      "learning_rate": 1.7743107584730403e-05,
      "loss": 2.515,
      "step": 17141
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.96441650390625,
      "learning_rate": 1.7742847028864997e-05,
      "loss": 2.5676,
      "step": 17142
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1465016603469849,
      "learning_rate": 1.774258645987336e-05,
      "loss": 2.4992,
      "step": 17143
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0141606330871582,
      "learning_rate": 1.774232587775594e-05,
      "loss": 2.4464,
      "step": 17144
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0054782629013062,
      "learning_rate": 1.774206528251317e-05,
      "loss": 2.5957,
      "step": 17145
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0908552408218384,
      "learning_rate": 1.77418046741455e-05,
      "loss": 2.4787,
      "step": 17146
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.050301194190979,
      "learning_rate": 1.7741544052653367e-05,
      "loss": 2.5885,
      "step": 17147
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0115212202072144,
      "learning_rate": 1.7741283418037217e-05,
      "loss": 2.5023,
      "step": 17148
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.053640365600586,
      "learning_rate": 1.7741022770297487e-05,
      "loss": 2.6075,
      "step": 17149
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0383422374725342,
      "learning_rate": 1.7740762109434618e-05,
      "loss": 2.2811,
      "step": 17150
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.2847025394439697,
      "learning_rate": 1.7740501435449057e-05,
      "loss": 2.4305,
      "step": 17151
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1418118476867676,
      "learning_rate": 1.7740240748341245e-05,
      "loss": 2.3739,
      "step": 17152
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1484248638153076,
      "learning_rate": 1.7739980048111625e-05,
      "loss": 2.6221,
      "step": 17153
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0891852378845215,
      "learning_rate": 1.7739719334760632e-05,
      "loss": 2.5464,
      "step": 17154
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0990829467773438,
      "learning_rate": 1.7739458608288715e-05,
      "loss": 2.503,
      "step": 17155
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.041175127029419,
      "learning_rate": 1.7739197868696313e-05,
      "loss": 2.5301,
      "step": 17156
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.4626120328903198,
      "learning_rate": 1.773893711598387e-05,
      "loss": 2.4053,
      "step": 17157
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.049806833267212,
      "learning_rate": 1.7738676350151824e-05,
      "loss": 2.4681,
      "step": 17158
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9958004951477051,
      "learning_rate": 1.7738415571200622e-05,
      "loss": 2.2143,
      "step": 17159
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.164915680885315,
      "learning_rate": 1.7738154779130707e-05,
      "loss": 2.6283,
      "step": 17160
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9666568636894226,
      "learning_rate": 1.7737893973942514e-05,
      "loss": 2.722,
      "step": 17161
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0667724609375,
      "learning_rate": 1.7737633155636488e-05,
      "loss": 2.5047,
      "step": 17162
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0646047592163086,
      "learning_rate": 1.7737372324213073e-05,
      "loss": 2.8162,
      "step": 17163
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9991000294685364,
      "learning_rate": 1.773711147967271e-05,
      "loss": 2.4035,
      "step": 17164
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0432360172271729,
      "learning_rate": 1.7736850622015846e-05,
      "loss": 2.4038,
      "step": 17165
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1028554439544678,
      "learning_rate": 1.7736589751242915e-05,
      "loss": 2.6281,
      "step": 17166
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.086071491241455,
      "learning_rate": 1.773632886735436e-05,
      "loss": 2.4977,
      "step": 17167
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.067352533340454,
      "learning_rate": 1.7736067970350632e-05,
      "loss": 2.3511,
      "step": 17168
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0039602518081665,
      "learning_rate": 1.7735807060232167e-05,
      "loss": 2.586,
      "step": 17169
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1465426683425903,
      "learning_rate": 1.7735546136999406e-05,
      "loss": 2.5584,
      "step": 17170
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0063174962997437,
      "learning_rate": 1.7735285200652793e-05,
      "loss": 2.4467,
      "step": 17171
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9642667770385742,
      "learning_rate": 1.7735024251192775e-05,
      "loss": 2.5819,
      "step": 17172
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9752554893493652,
      "learning_rate": 1.7734763288619786e-05,
      "loss": 2.3809,
      "step": 17173
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.953887403011322,
      "learning_rate": 1.7734502312934272e-05,
      "loss": 2.4774,
      "step": 17174
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.2097411155700684,
      "learning_rate": 1.7734241324136682e-05,
      "loss": 2.4265,
      "step": 17175
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.175927758216858,
      "learning_rate": 1.7733980322227446e-05,
      "loss": 2.5152,
      "step": 17176
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1540430784225464,
      "learning_rate": 1.7733719307207013e-05,
      "loss": 2.5063,
      "step": 17177
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.064663290977478,
      "learning_rate": 1.7733458279075825e-05,
      "loss": 2.4385,
      "step": 17178
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0147420167922974,
      "learning_rate": 1.773319723783433e-05,
      "loss": 2.4626,
      "step": 17179
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0158884525299072,
      "learning_rate": 1.7732936183482962e-05,
      "loss": 2.4863,
      "step": 17180
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.095887303352356,
      "learning_rate": 1.7732675116022167e-05,
      "loss": 2.4938,
      "step": 17181
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0530388355255127,
      "learning_rate": 1.773241403545239e-05,
      "loss": 2.3294,
      "step": 17182
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.2562311887741089,
      "learning_rate": 1.7732152941774065e-05,
      "loss": 2.7258,
      "step": 17183
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.981901228427887,
      "learning_rate": 1.7731891834987648e-05,
      "loss": 2.1818,
      "step": 17184
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0058032274246216,
      "learning_rate": 1.773163071509357e-05,
      "loss": 2.6845,
      "step": 17185
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9783816933631897,
      "learning_rate": 1.7731369582092276e-05,
      "loss": 2.4996,
      "step": 17186
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0017218589782715,
      "learning_rate": 1.7731108435984215e-05,
      "loss": 2.3713,
      "step": 17187
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1043126583099365,
      "learning_rate": 1.7730847276769826e-05,
      "loss": 2.6095,
      "step": 17188
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9332748651504517,
      "learning_rate": 1.773058610444955e-05,
      "loss": 2.5873,
      "step": 17189
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.081884741783142,
      "learning_rate": 1.773032491902383e-05,
      "loss": 2.6105,
      "step": 17190
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.053049087524414,
      "learning_rate": 1.773006372049311e-05,
      "loss": 2.6002,
      "step": 17191
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.965815007686615,
      "learning_rate": 1.772980250885783e-05,
      "loss": 2.6685,
      "step": 17192
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.113474726676941,
      "learning_rate": 1.7729541284118438e-05,
      "loss": 2.1865,
      "step": 17193
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.118756890296936,
      "learning_rate": 1.7729280046275373e-05,
      "loss": 2.5711,
      "step": 17194
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0744550228118896,
      "learning_rate": 1.7729018795329086e-05,
      "loss": 2.3278,
      "step": 17195
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.2097817659378052,
      "learning_rate": 1.7728757531280004e-05,
      "loss": 2.425,
      "step": 17196
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0453587770462036,
      "learning_rate": 1.7728496254128584e-05,
      "loss": 2.7544,
      "step": 17197
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1460578441619873,
      "learning_rate": 1.772823496387526e-05,
      "loss": 2.4196,
      "step": 17198
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9689169526100159,
      "learning_rate": 1.772797366052048e-05,
      "loss": 2.3851,
      "step": 17199
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.006287932395935,
      "learning_rate": 1.7727712344064687e-05,
      "loss": 2.4226,
      "step": 17200
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0111079216003418,
      "learning_rate": 1.7727451014508324e-05,
      "loss": 2.5496,
      "step": 17201
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.2447540760040283,
      "learning_rate": 1.772718967185183e-05,
      "loss": 2.5784,
      "step": 17202
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.98261958360672,
      "learning_rate": 1.772692831609565e-05,
      "loss": 2.4841,
      "step": 17203
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0673699378967285,
      "learning_rate": 1.7726666947240233e-05,
      "loss": 2.6473,
      "step": 17204
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0083905458450317,
      "learning_rate": 1.7726405565286015e-05,
      "loss": 2.3199,
      "step": 17205
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1674275398254395,
      "learning_rate": 1.772614417023344e-05,
      "loss": 2.6666,
      "step": 17206
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1180095672607422,
      "learning_rate": 1.772588276208295e-05,
      "loss": 2.4648,
      "step": 17207
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.219139814376831,
      "learning_rate": 1.772562134083499e-05,
      "loss": 2.6035,
      "step": 17208
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9694907665252686,
      "learning_rate": 1.7725359906490007e-05,
      "loss": 2.5404,
      "step": 17209
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9568260312080383,
      "learning_rate": 1.772509845904844e-05,
      "loss": 2.6979,
      "step": 17210
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0332562923431396,
      "learning_rate": 1.7724836998510734e-05,
      "loss": 2.505,
      "step": 17211
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0443249940872192,
      "learning_rate": 1.772457552487733e-05,
      "loss": 2.5113,
      "step": 17212
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.13179349899292,
      "learning_rate": 1.7724314038148674e-05,
      "loss": 2.5469,
      "step": 17213
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.051967978477478,
      "learning_rate": 1.7724052538325204e-05,
      "loss": 2.4273,
      "step": 17214
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1025875806808472,
      "learning_rate": 1.772379102540737e-05,
      "loss": 2.4239,
      "step": 17215
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1709626913070679,
      "learning_rate": 1.7723529499395613e-05,
      "loss": 2.7193,
      "step": 17216
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9806209802627563,
      "learning_rate": 1.7723267960290376e-05,
      "loss": 2.4675,
      "step": 17217
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.079443335533142,
      "learning_rate": 1.77230064080921e-05,
      "loss": 2.4172,
      "step": 17218
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9515694379806519,
      "learning_rate": 1.772274484280123e-05,
      "loss": 2.4833,
      "step": 17219
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9853333234786987,
      "learning_rate": 1.7722483264418213e-05,
      "loss": 2.7305,
      "step": 17220
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9989022612571716,
      "learning_rate": 1.772222167294349e-05,
      "loss": 2.6281,
      "step": 17221
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9135178923606873,
      "learning_rate": 1.7721960068377502e-05,
      "loss": 2.4822,
      "step": 17222
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.971084475517273,
      "learning_rate": 1.7721698450720693e-05,
      "loss": 2.5872,
      "step": 17223
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.147329568862915,
      "learning_rate": 1.772143681997351e-05,
      "loss": 2.4519,
      "step": 17224
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9304136037826538,
      "learning_rate": 1.7721175176136392e-05,
      "loss": 2.605,
      "step": 17225
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.050666332244873,
      "learning_rate": 1.7720913519209788e-05,
      "loss": 2.3076,
      "step": 17226
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.3019466400146484,
      "learning_rate": 1.7720651849194135e-05,
      "loss": 2.3052,
      "step": 17227
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.101728916168213,
      "learning_rate": 1.7720390166089886e-05,
      "loss": 2.4371,
      "step": 17228
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.8117116689682007,
      "learning_rate": 1.7720128469897474e-05,
      "loss": 2.286,
      "step": 17229
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0206892490386963,
      "learning_rate": 1.771986676061735e-05,
      "loss": 2.598,
      "step": 17230
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.023402214050293,
      "learning_rate": 1.7719605038249957e-05,
      "loss": 2.3552,
      "step": 17231
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1256321668624878,
      "learning_rate": 1.771934330279573e-05,
      "loss": 2.7375,
      "step": 17232
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1202625036239624,
      "learning_rate": 1.7719081554255124e-05,
      "loss": 2.4222,
      "step": 17233
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1456482410430908,
      "learning_rate": 1.771881979262858e-05,
      "loss": 2.3636,
      "step": 17234
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.929766833782196,
      "learning_rate": 1.7718558017916538e-05,
      "loss": 2.4418,
      "step": 17235
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9879403114318848,
      "learning_rate": 1.7718296230119443e-05,
      "loss": 2.3258,
      "step": 17236
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.015349268913269,
      "learning_rate": 1.771803442923774e-05,
      "loss": 2.4326,
      "step": 17237
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9926488399505615,
      "learning_rate": 1.7717772615271873e-05,
      "loss": 2.4721,
      "step": 17238
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.002255916595459,
      "learning_rate": 1.7717510788222286e-05,
      "loss": 2.5722,
      "step": 17239
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0435280799865723,
      "learning_rate": 1.771724894808942e-05,
      "loss": 2.3879,
      "step": 17240
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0622901916503906,
      "learning_rate": 1.7716987094873722e-05,
      "loss": 2.5504,
      "step": 17241
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9947127103805542,
      "learning_rate": 1.7716725228575637e-05,
      "loss": 2.3119,
      "step": 17242
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1348624229431152,
      "learning_rate": 1.7716463349195605e-05,
      "loss": 2.2667,
      "step": 17243
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0091487169265747,
      "learning_rate": 1.771620145673407e-05,
      "loss": 2.4147,
      "step": 17244
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0787965059280396,
      "learning_rate": 1.7715939551191482e-05,
      "loss": 2.5221,
      "step": 17245
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0234999656677246,
      "learning_rate": 1.771567763256828e-05,
      "loss": 2.5946,
      "step": 17246
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1011532545089722,
      "learning_rate": 1.7715415700864907e-05,
      "loss": 2.4885,
      "step": 17247
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0134421586990356,
      "learning_rate": 1.771515375608181e-05,
      "loss": 2.5181,
      "step": 17248
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0907399654388428,
      "learning_rate": 1.7714891798219432e-05,
      "loss": 2.5668,
      "step": 17249
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0722471475601196,
      "learning_rate": 1.7714629827278215e-05,
      "loss": 2.499,
      "step": 17250
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9905266761779785,
      "learning_rate": 1.771436784325861e-05,
      "loss": 2.4177,
      "step": 17251
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9836798906326294,
      "learning_rate": 1.771410584616105e-05,
      "loss": 2.3506,
      "step": 17252
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1538316011428833,
      "learning_rate": 1.7713843835985988e-05,
      "loss": 2.2593,
      "step": 17253
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0537748336791992,
      "learning_rate": 1.771358181273387e-05,
      "loss": 2.4419,
      "step": 17254
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0277161598205566,
      "learning_rate": 1.7713319776405128e-05,
      "loss": 2.5034,
      "step": 17255
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0173609256744385,
      "learning_rate": 1.771305772700022e-05,
      "loss": 2.4014,
      "step": 17256
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.061394453048706,
      "learning_rate": 1.771279566451958e-05,
      "loss": 2.4415,
      "step": 17257
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9930486679077148,
      "learning_rate": 1.7712533588963662e-05,
      "loss": 2.2913,
      "step": 17258
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1016520261764526,
      "learning_rate": 1.77122715003329e-05,
      "loss": 2.7402,
      "step": 17259
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0428789854049683,
      "learning_rate": 1.7712009398627746e-05,
      "loss": 2.3053,
      "step": 17260
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0253909826278687,
      "learning_rate": 1.7711747283848642e-05,
      "loss": 2.6505,
      "step": 17261
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9377718567848206,
      "learning_rate": 1.771148515599603e-05,
      "loss": 2.4686,
      "step": 17262
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.6017831563949585,
      "learning_rate": 1.7711223015070356e-05,
      "loss": 2.493,
      "step": 17263
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9596558809280396,
      "learning_rate": 1.7710960861072065e-05,
      "loss": 2.334,
      "step": 17264
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.976516604423523,
      "learning_rate": 1.7710698694001603e-05,
      "loss": 2.5495,
      "step": 17265
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0030245780944824,
      "learning_rate": 1.771043651385941e-05,
      "loss": 2.7316,
      "step": 17266
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.987290620803833,
      "learning_rate": 1.7710174320645932e-05,
      "loss": 2.3734,
      "step": 17267
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0929882526397705,
      "learning_rate": 1.770991211436162e-05,
      "loss": 2.415,
      "step": 17268
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9649865627288818,
      "learning_rate": 1.7709649895006906e-05,
      "loss": 2.3987,
      "step": 17269
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1981091499328613,
      "learning_rate": 1.7709387662582248e-05,
      "loss": 2.3577,
      "step": 17270
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9362917542457581,
      "learning_rate": 1.770912541708808e-05,
      "loss": 2.4537,
      "step": 17271
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.998814582824707,
      "learning_rate": 1.770886315852485e-05,
      "loss": 2.6627,
      "step": 17272
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0762470960617065,
      "learning_rate": 1.7708600886893005e-05,
      "loss": 2.3756,
      "step": 17273
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.029980182647705,
      "learning_rate": 1.770833860219299e-05,
      "loss": 2.3646,
      "step": 17274
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0601149797439575,
      "learning_rate": 1.7708076304425246e-05,
      "loss": 2.721,
      "step": 17275
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9974309802055359,
      "learning_rate": 1.7707813993590215e-05,
      "loss": 2.5089,
      "step": 17276
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9292002320289612,
      "learning_rate": 1.7707551669688354e-05,
      "loss": 2.7321,
      "step": 17277
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.085128664970398,
      "learning_rate": 1.7707289332720094e-05,
      "loss": 2.2987,
      "step": 17278
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9972686171531677,
      "learning_rate": 1.7707026982685883e-05,
      "loss": 2.5225,
      "step": 17279
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9640325903892517,
      "learning_rate": 1.7706764619586172e-05,
      "loss": 2.5893,
      "step": 17280
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0063191652297974,
      "learning_rate": 1.7706502243421402e-05,
      "loss": 2.4067,
      "step": 17281
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1103194952011108,
      "learning_rate": 1.7706239854192016e-05,
      "loss": 2.3966,
      "step": 17282
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.139939546585083,
      "learning_rate": 1.770597745189846e-05,
      "loss": 2.4092,
      "step": 17283
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1252305507659912,
      "learning_rate": 1.770571503654118e-05,
      "loss": 2.4135,
      "step": 17284
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9639040231704712,
      "learning_rate": 1.770545260812062e-05,
      "loss": 2.341,
      "step": 17285
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.167913556098938,
      "learning_rate": 1.770519016663723e-05,
      "loss": 2.4491,
      "step": 17286
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1809360980987549,
      "learning_rate": 1.7704927712091445e-05,
      "loss": 2.377,
      "step": 17287
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0286577939987183,
      "learning_rate": 1.7704665244483715e-05,
      "loss": 2.1727,
      "step": 17288
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0374308824539185,
      "learning_rate": 1.7704402763814486e-05,
      "loss": 2.4289,
      "step": 17289
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0447638034820557,
      "learning_rate": 1.7704140270084202e-05,
      "loss": 2.4114,
      "step": 17290
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1127450466156006,
      "learning_rate": 1.7703877763293307e-05,
      "loss": 2.4377,
      "step": 17291
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9612603783607483,
      "learning_rate": 1.7703615243442245e-05,
      "loss": 2.5556,
      "step": 17292
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0857914686203003,
      "learning_rate": 1.7703352710531466e-05,
      "loss": 2.4036,
      "step": 17293
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0075960159301758,
      "learning_rate": 1.770309016456141e-05,
      "loss": 2.7926,
      "step": 17294
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0262292623519897,
      "learning_rate": 1.7702827605532524e-05,
      "loss": 2.3812,
      "step": 17295
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9802611470222473,
      "learning_rate": 1.7702565033445255e-05,
      "loss": 2.4971,
      "step": 17296
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9983265995979309,
      "learning_rate": 1.7702302448300045e-05,
      "loss": 2.4671,
      "step": 17297
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9862893223762512,
      "learning_rate": 1.770203985009734e-05,
      "loss": 2.4939,
      "step": 17298
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0994293689727783,
      "learning_rate": 1.7701777238837586e-05,
      "loss": 2.451,
      "step": 17299
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9868623614311218,
      "learning_rate": 1.7701514614521232e-05,
      "loss": 2.556,
      "step": 17300
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.168972373008728,
      "learning_rate": 1.7701251977148716e-05,
      "loss": 2.714,
      "step": 17301
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9735391139984131,
      "learning_rate": 1.7700989326720485e-05,
      "loss": 2.6742,
      "step": 17302
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9382818937301636,
      "learning_rate": 1.7700726663236985e-05,
      "loss": 2.4194,
      "step": 17303
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0284911394119263,
      "learning_rate": 1.770046398669866e-05,
      "loss": 2.6179,
      "step": 17304
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.021375060081482,
      "learning_rate": 1.770020129710596e-05,
      "loss": 2.8369,
      "step": 17305
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9495466947555542,
      "learning_rate": 1.7699938594459327e-05,
      "loss": 2.6049,
      "step": 17306
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9413308501243591,
      "learning_rate": 1.7699675878759208e-05,
      "loss": 2.3819,
      "step": 17307
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0313947200775146,
      "learning_rate": 1.7699413150006046e-05,
      "loss": 2.5648,
      "step": 17308
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.026604175567627,
      "learning_rate": 1.769915040820029e-05,
      "loss": 2.4807,
      "step": 17309
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.060907244682312,
      "learning_rate": 1.769888765334238e-05,
      "loss": 2.5003,
      "step": 17310
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0624926090240479,
      "learning_rate": 1.7698624885432765e-05,
      "loss": 2.6867,
      "step": 17311
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1332372426986694,
      "learning_rate": 1.7698362104471888e-05,
      "loss": 2.6974,
      "step": 17312
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9679806232452393,
      "learning_rate": 1.76980993104602e-05,
      "loss": 2.4124,
      "step": 17313
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0828427076339722,
      "learning_rate": 1.7697836503398142e-05,
      "loss": 2.4349,
      "step": 17314
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9805865287780762,
      "learning_rate": 1.769757368328616e-05,
      "loss": 2.4072,
      "step": 17315
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1661417484283447,
      "learning_rate": 1.7697310850124704e-05,
      "loss": 2.2993,
      "step": 17316
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9831217527389526,
      "learning_rate": 1.769704800391421e-05,
      "loss": 2.6479,
      "step": 17317
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.019398808479309,
      "learning_rate": 1.769678514465513e-05,
      "loss": 2.4174,
      "step": 17318
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.148786187171936,
      "learning_rate": 1.769652227234791e-05,
      "loss": 2.4637,
      "step": 17319
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.5954978466033936,
      "learning_rate": 1.7696259386992992e-05,
      "loss": 2.5201,
      "step": 17320
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0322083234786987,
      "learning_rate": 1.7695996488590824e-05,
      "loss": 2.4299,
      "step": 17321
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0814138650894165,
      "learning_rate": 1.7695733577141854e-05,
      "loss": 2.5524,
      "step": 17322
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1188055276870728,
      "learning_rate": 1.7695470652646528e-05,
      "loss": 2.4377,
      "step": 17323
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0375640392303467,
      "learning_rate": 1.7695207715105284e-05,
      "loss": 2.7024,
      "step": 17324
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9432722330093384,
      "learning_rate": 1.7694944764518575e-05,
      "loss": 2.5241,
      "step": 17325
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0998495817184448,
      "learning_rate": 1.7694681800886846e-05,
      "loss": 2.3803,
      "step": 17326
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1560457944869995,
      "learning_rate": 1.7694418824210543e-05,
      "loss": 2.3647,
      "step": 17327
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9966299533843994,
      "learning_rate": 1.7694155834490105e-05,
      "loss": 2.4975,
      "step": 17328
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1095240116119385,
      "learning_rate": 1.7693892831725986e-05,
      "loss": 2.5921,
      "step": 17329
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.152627944946289,
      "learning_rate": 1.769362981591863e-05,
      "loss": 2.4976,
      "step": 17330
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9947757124900818,
      "learning_rate": 1.769336678706848e-05,
      "loss": 2.7459,
      "step": 17331
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9765461087226868,
      "learning_rate": 1.7693103745175985e-05,
      "loss": 2.5664,
      "step": 17332
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9494974613189697,
      "learning_rate": 1.769284069024159e-05,
      "loss": 2.4869,
      "step": 17333
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9632716774940491,
      "learning_rate": 1.769257762226574e-05,
      "loss": 2.8076,
      "step": 17334
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9768856167793274,
      "learning_rate": 1.769231454124888e-05,
      "loss": 2.5645,
      "step": 17335
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.012442708015442,
      "learning_rate": 1.769205144719146e-05,
      "loss": 2.4776,
      "step": 17336
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1068953275680542,
      "learning_rate": 1.7691788340093923e-05,
      "loss": 2.2061,
      "step": 17337
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1015523672103882,
      "learning_rate": 1.7691525219956716e-05,
      "loss": 2.3991,
      "step": 17338
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0322989225387573,
      "learning_rate": 1.7691262086780284e-05,
      "loss": 2.6024,
      "step": 17339
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0549739599227905,
      "learning_rate": 1.7690998940565075e-05,
      "loss": 2.3371,
      "step": 17340
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9834441542625427,
      "learning_rate": 1.769073578131153e-05,
      "loss": 2.3425,
      "step": 17341
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9429751634597778,
      "learning_rate": 1.7690472609020104e-05,
      "loss": 2.3574,
      "step": 17342
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9600780010223389,
      "learning_rate": 1.7690209423691237e-05,
      "loss": 2.3658,
      "step": 17343
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0222833156585693,
      "learning_rate": 1.7689946225325374e-05,
      "loss": 2.4845,
      "step": 17344
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0215891599655151,
      "learning_rate": 1.7689683013922966e-05,
      "loss": 2.4552,
      "step": 17345
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0379303693771362,
      "learning_rate": 1.7689419789484457e-05,
      "loss": 2.2735,
      "step": 17346
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9776690006256104,
      "learning_rate": 1.7689156552010294e-05,
      "loss": 2.5474,
      "step": 17347
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9462987780570984,
      "learning_rate": 1.768889330150092e-05,
      "loss": 2.5958,
      "step": 17348
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0054547786712646,
      "learning_rate": 1.7688630037956784e-05,
      "loss": 2.5278,
      "step": 17349
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.061690092086792,
      "learning_rate": 1.768836676137833e-05,
      "loss": 2.3801,
      "step": 17350
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.095315933227539,
      "learning_rate": 1.768810347176601e-05,
      "loss": 2.502,
      "step": 17351
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9879916906356812,
      "learning_rate": 1.7687840169120265e-05,
      "loss": 2.5469,
      "step": 17352
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9186292886734009,
      "learning_rate": 1.7687576853441545e-05,
      "loss": 2.4264,
      "step": 17353
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0148324966430664,
      "learning_rate": 1.768731352473029e-05,
      "loss": 2.4626,
      "step": 17354
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9850149154663086,
      "learning_rate": 1.7687050182986952e-05,
      "loss": 2.3339,
      "step": 17355
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9913727641105652,
      "learning_rate": 1.7686786828211977e-05,
      "loss": 2.4481,
      "step": 17356
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0084196329116821,
      "learning_rate": 1.7686523460405813e-05,
      "loss": 2.6287,
      "step": 17357
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0367767810821533,
      "learning_rate": 1.76862600795689e-05,
      "loss": 2.453,
      "step": 17358
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.018746018409729,
      "learning_rate": 1.768599668570169e-05,
      "loss": 2.3535,
      "step": 17359
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0421594381332397,
      "learning_rate": 1.7685733278804628e-05,
      "loss": 2.6206,
      "step": 17360
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.103203296661377,
      "learning_rate": 1.768546985887816e-05,
      "loss": 2.6661,
      "step": 17361
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.016556739807129,
      "learning_rate": 1.7685206425922736e-05,
      "loss": 2.4893,
      "step": 17362
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.002409815788269,
      "learning_rate": 1.76849429799388e-05,
      "loss": 2.6244,
      "step": 17363
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0864630937576294,
      "learning_rate": 1.7684679520926797e-05,
      "loss": 2.4884,
      "step": 17364
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0531197786331177,
      "learning_rate": 1.768441604888717e-05,
      "loss": 2.4849,
      "step": 17365
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.2710325717926025,
      "learning_rate": 1.768415256382038e-05,
      "loss": 2.5251,
      "step": 17366
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0367870330810547,
      "learning_rate": 1.7683889065726858e-05,
      "loss": 2.5838,
      "step": 17367
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9203677773475647,
      "learning_rate": 1.7683625554607056e-05,
      "loss": 2.4418,
      "step": 17368
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9398068785667419,
      "learning_rate": 1.768336203046143e-05,
      "loss": 2.4207,
      "step": 17369
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0912609100341797,
      "learning_rate": 1.768309849329041e-05,
      "loss": 2.7106,
      "step": 17370
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1050916910171509,
      "learning_rate": 1.7682834943094455e-05,
      "loss": 2.5579,
      "step": 17371
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9368932843208313,
      "learning_rate": 1.768257137987401e-05,
      "loss": 2.5065,
      "step": 17372
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9727880954742432,
      "learning_rate": 1.7682307803629516e-05,
      "loss": 2.491,
      "step": 17373
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9913049936294556,
      "learning_rate": 1.7682044214361428e-05,
      "loss": 2.5001,
      "step": 17374
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0048977136611938,
      "learning_rate": 1.7681780612070185e-05,
      "loss": 2.582,
      "step": 17375
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0156288146972656,
      "learning_rate": 1.768151699675624e-05,
      "loss": 2.6181,
      "step": 17376
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.005406379699707,
      "learning_rate": 1.7681253368420038e-05,
      "loss": 2.3413,
      "step": 17377
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9686049222946167,
      "learning_rate": 1.7680989727062022e-05,
      "loss": 2.5095,
      "step": 17378
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.043878197669983,
      "learning_rate": 1.7680726072682643e-05,
      "loss": 2.4078,
      "step": 17379
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.974137008190155,
      "learning_rate": 1.768046240528235e-05,
      "loss": 2.5356,
      "step": 17380
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9778615236282349,
      "learning_rate": 1.7680198724861582e-05,
      "loss": 2.4327,
      "step": 17381
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9651901125907898,
      "learning_rate": 1.7679935031420795e-05,
      "loss": 2.425,
      "step": 17382
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0324784517288208,
      "learning_rate": 1.7679671324960433e-05,
      "loss": 2.5937,
      "step": 17383
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9845941662788391,
      "learning_rate": 1.767940760548094e-05,
      "loss": 2.5038,
      "step": 17384
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1189067363739014,
      "learning_rate": 1.7679143872982766e-05,
      "loss": 2.3922,
      "step": 17385
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0693612098693848,
      "learning_rate": 1.767888012746636e-05,
      "loss": 2.4369,
      "step": 17386
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.155863881111145,
      "learning_rate": 1.7678616368932164e-05,
      "loss": 2.4298,
      "step": 17387
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1143189668655396,
      "learning_rate": 1.767835259738063e-05,
      "loss": 2.3555,
      "step": 17388
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0448235273361206,
      "learning_rate": 1.7678088812812203e-05,
      "loss": 2.5034,
      "step": 17389
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.15091872215271,
      "learning_rate": 1.767782501522733e-05,
      "loss": 2.5439,
      "step": 17390
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1471874713897705,
      "learning_rate": 1.767756120462645e-05,
      "loss": 2.3487,
      "step": 17391
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1344804763793945,
      "learning_rate": 1.767729738101003e-05,
      "loss": 2.4943,
      "step": 17392
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.079332709312439,
      "learning_rate": 1.7677033544378498e-05,
      "loss": 2.4872,
      "step": 17393
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0219255685806274,
      "learning_rate": 1.7676769694732315e-05,
      "loss": 2.2386,
      "step": 17394
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0377577543258667,
      "learning_rate": 1.767650583207192e-05,
      "loss": 2.3456,
      "step": 17395
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0433963537216187,
      "learning_rate": 1.7676241956397763e-05,
      "loss": 2.4619,
      "step": 17396
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0032005310058594,
      "learning_rate": 1.7675978067710287e-05,
      "loss": 2.4601,
      "step": 17397
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.130057692527771,
      "learning_rate": 1.767571416600995e-05,
      "loss": 2.5534,
      "step": 17398
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9046450257301331,
      "learning_rate": 1.767545025129719e-05,
      "loss": 2.3358,
      "step": 17399
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0385838747024536,
      "learning_rate": 1.7675186323572456e-05,
      "loss": 2.6091,
      "step": 17400
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0517281293869019,
      "learning_rate": 1.7674922382836198e-05,
      "loss": 2.5905,
      "step": 17401
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9367307424545288,
      "learning_rate": 1.767465842908886e-05,
      "loss": 2.4028,
      "step": 17402
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9613519906997681,
      "learning_rate": 1.7674394462330893e-05,
      "loss": 2.3927,
      "step": 17403
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0659418106079102,
      "learning_rate": 1.7674130482562747e-05,
      "loss": 2.4881,
      "step": 17404
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.05719792842865,
      "learning_rate": 1.767386648978486e-05,
      "loss": 2.7522,
      "step": 17405
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0586812496185303,
      "learning_rate": 1.7673602483997684e-05,
      "loss": 2.6569,
      "step": 17406
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.034346103668213,
      "learning_rate": 1.7673338465201672e-05,
      "loss": 2.4558,
      "step": 17407
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9255325794219971,
      "learning_rate": 1.7673074433397266e-05,
      "loss": 2.4782,
      "step": 17408
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9008747935295105,
      "learning_rate": 1.7672810388584915e-05,
      "loss": 2.4981,
      "step": 17409
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0845264196395874,
      "learning_rate": 1.7672546330765068e-05,
      "loss": 2.5687,
      "step": 17410
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9243091344833374,
      "learning_rate": 1.7672282259938166e-05,
      "loss": 2.2625,
      "step": 17411
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1143276691436768,
      "learning_rate": 1.7672018176104667e-05,
      "loss": 2.6254,
      "step": 17412
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.086405634880066,
      "learning_rate": 1.767175407926501e-05,
      "loss": 2.3597,
      "step": 17413
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.048890233039856,
      "learning_rate": 1.767148996941965e-05,
      "loss": 2.4305,
      "step": 17414
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0345877408981323,
      "learning_rate": 1.7671225846569027e-05,
      "loss": 2.5531,
      "step": 17415
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1859815120697021,
      "learning_rate": 1.7670961710713593e-05,
      "loss": 2.6751,
      "step": 17416
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0389323234558105,
      "learning_rate": 1.76706975618538e-05,
      "loss": 2.4952,
      "step": 17417
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9994369745254517,
      "learning_rate": 1.767043339999009e-05,
      "loss": 2.542,
      "step": 17418
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0331677198410034,
      "learning_rate": 1.767016922512291e-05,
      "loss": 2.3518,
      "step": 17419
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0611509084701538,
      "learning_rate": 1.7669905037252708e-05,
      "loss": 2.3238,
      "step": 17420
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1252328157424927,
      "learning_rate": 1.7669640836379936e-05,
      "loss": 2.6696,
      "step": 17421
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0638427734375,
      "learning_rate": 1.766937662250504e-05,
      "loss": 2.5602,
      "step": 17422
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1273412704467773,
      "learning_rate": 1.766911239562847e-05,
      "loss": 2.4642,
      "step": 17423
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1356862783432007,
      "learning_rate": 1.7668848155750667e-05,
      "loss": 2.4345,
      "step": 17424
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9934941530227661,
      "learning_rate": 1.7668583902872086e-05,
      "loss": 2.3529,
      "step": 17425
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0097169876098633,
      "learning_rate": 1.766831963699317e-05,
      "loss": 2.6209,
      "step": 17426
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9668060541152954,
      "learning_rate": 1.7668055358114374e-05,
      "loss": 2.5967,
      "step": 17427
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0499296188354492,
      "learning_rate": 1.766779106623614e-05,
      "loss": 2.4325,
      "step": 17428
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.999610185623169,
      "learning_rate": 1.7667526761358916e-05,
      "loss": 2.2913,
      "step": 17429
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9991300702095032,
      "learning_rate": 1.7667262443483153e-05,
      "loss": 2.8174,
      "step": 17430
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.2061823606491089,
      "learning_rate": 1.7666998112609297e-05,
      "loss": 2.3115,
      "step": 17431
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.343781590461731,
      "learning_rate": 1.7666733768737797e-05,
      "loss": 2.4752,
      "step": 17432
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1357899904251099,
      "learning_rate": 1.7666469411869103e-05,
      "loss": 2.4454,
      "step": 17433
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1617426872253418,
      "learning_rate": 1.766620504200366e-05,
      "loss": 2.6049,
      "step": 17434
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0729655027389526,
      "learning_rate": 1.7665940659141916e-05,
      "loss": 2.584,
      "step": 17435
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0760281085968018,
      "learning_rate": 1.7665676263284322e-05,
      "loss": 2.562,
      "step": 17436
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.037741780281067,
      "learning_rate": 1.766541185443132e-05,
      "loss": 2.4884,
      "step": 17437
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0698481798171997,
      "learning_rate": 1.7665147432583368e-05,
      "loss": 2.5077,
      "step": 17438
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1249173879623413,
      "learning_rate": 1.766488299774091e-05,
      "loss": 2.4593,
      "step": 17439
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0768706798553467,
      "learning_rate": 1.766461854990439e-05,
      "loss": 2.2028,
      "step": 17440
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1611032485961914,
      "learning_rate": 1.7664354089074263e-05,
      "loss": 2.4177,
      "step": 17441
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1403148174285889,
      "learning_rate": 1.7664089615250976e-05,
      "loss": 2.2849,
      "step": 17442
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0130090713500977,
      "learning_rate": 1.766382512843497e-05,
      "loss": 2.4944,
      "step": 17443
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0199992656707764,
      "learning_rate": 1.76635606286267e-05,
      "loss": 2.4843,
      "step": 17444
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0972471237182617,
      "learning_rate": 1.766329611582662e-05,
      "loss": 2.708,
      "step": 17445
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0885424613952637,
      "learning_rate": 1.7663031590035163e-05,
      "loss": 2.2067,
      "step": 17446
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0557953119277954,
      "learning_rate": 1.766276705125279e-05,
      "loss": 2.4357,
      "step": 17447
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0635457038879395,
      "learning_rate": 1.7662502499479945e-05,
      "loss": 2.4793,
      "step": 17448
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9666638970375061,
      "learning_rate": 1.7662237934717076e-05,
      "loss": 2.3088,
      "step": 17449
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0667054653167725,
      "learning_rate": 1.766197335696464e-05,
      "loss": 2.5484,
      "step": 17450
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9752022624015808,
      "learning_rate": 1.766170876622307e-05,
      "loss": 2.5302,
      "step": 17451
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.2604308128356934,
      "learning_rate": 1.7661444162492826e-05,
      "loss": 2.458,
      "step": 17452
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0301570892333984,
      "learning_rate": 1.766117954577435e-05,
      "loss": 2.4723,
      "step": 17453
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0950229167938232,
      "learning_rate": 1.7660914916068098e-05,
      "loss": 2.4752,
      "step": 17454
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1185146570205688,
      "learning_rate": 1.7660650273374512e-05,
      "loss": 2.5468,
      "step": 17455
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1209156513214111,
      "learning_rate": 1.7660385617694044e-05,
      "loss": 2.4705,
      "step": 17456
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9962239265441895,
      "learning_rate": 1.766012094902714e-05,
      "loss": 2.3984,
      "step": 17457
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9234871864318848,
      "learning_rate": 1.7659856267374254e-05,
      "loss": 2.5072,
      "step": 17458
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0012625455856323,
      "learning_rate": 1.7659591572735833e-05,
      "loss": 2.4415,
      "step": 17459
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0069934129714966,
      "learning_rate": 1.765932686511232e-05,
      "loss": 2.4404,
      "step": 17460
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9720304608345032,
      "learning_rate": 1.7659062144504168e-05,
      "loss": 2.5109,
      "step": 17461
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.036932349205017,
      "learning_rate": 1.7658797410911827e-05,
      "loss": 2.3342,
      "step": 17462
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9613587856292725,
      "learning_rate": 1.765853266433574e-05,
      "loss": 2.448,
      "step": 17463
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0374763011932373,
      "learning_rate": 1.7658267904776365e-05,
      "loss": 2.4373,
      "step": 17464
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0473575592041016,
      "learning_rate": 1.7658003132234146e-05,
      "loss": 2.3681,
      "step": 17465
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1284294128417969,
      "learning_rate": 1.7657738346709528e-05,
      "loss": 2.5729,
      "step": 17466
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0932884216308594,
      "learning_rate": 1.7657473548202967e-05,
      "loss": 2.4254,
      "step": 17467
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9855012893676758,
      "learning_rate": 1.7657208736714905e-05,
      "loss": 2.3925,
      "step": 17468
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.00078284740448,
      "learning_rate": 1.7656943912245798e-05,
      "loss": 2.5695,
      "step": 17469
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0301600694656372,
      "learning_rate": 1.765667907479609e-05,
      "loss": 2.38,
      "step": 17470
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1184194087982178,
      "learning_rate": 1.765641422436623e-05,
      "loss": 2.4131,
      "step": 17471
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0895376205444336,
      "learning_rate": 1.7656149360956673e-05,
      "loss": 2.5668,
      "step": 17472
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1552197933197021,
      "learning_rate": 1.7655884484567862e-05,
      "loss": 2.6412,
      "step": 17473
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1817212104797363,
      "learning_rate": 1.7655619595200243e-05,
      "loss": 2.4789,
      "step": 17474
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0014159679412842,
      "learning_rate": 1.7655354692854273e-05,
      "loss": 2.4129,
      "step": 17475
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.038488745689392,
      "learning_rate": 1.7655089777530397e-05,
      "loss": 2.3153,
      "step": 17476
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.090195894241333,
      "learning_rate": 1.7654824849229068e-05,
      "loss": 2.5279,
      "step": 17477
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0593390464782715,
      "learning_rate": 1.765455990795073e-05,
      "loss": 2.6076,
      "step": 17478
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0410548448562622,
      "learning_rate": 1.765429495369583e-05,
      "loss": 2.6425,
      "step": 17479
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1417337656021118,
      "learning_rate": 1.7654029986464823e-05,
      "loss": 2.493,
      "step": 17480
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0647920370101929,
      "learning_rate": 1.765376500625816e-05,
      "loss": 2.7641,
      "step": 17481
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9617193937301636,
      "learning_rate": 1.7653500013076284e-05,
      "loss": 2.5931,
      "step": 17482
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.045417308807373,
      "learning_rate": 1.7653235006919647e-05,
      "loss": 2.4405,
      "step": 17483
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0668408870697021,
      "learning_rate": 1.76529699877887e-05,
      "loss": 2.4096,
      "step": 17484
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1214563846588135,
      "learning_rate": 1.765270495568389e-05,
      "loss": 2.7528,
      "step": 17485
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9628027081489563,
      "learning_rate": 1.765243991060566e-05,
      "loss": 2.2497,
      "step": 17486
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.095557689666748,
      "learning_rate": 1.7652174852554473e-05,
      "loss": 2.345,
      "step": 17487
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0482008457183838,
      "learning_rate": 1.765190978153077e-05,
      "loss": 2.5447,
      "step": 17488
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1525262594223022,
      "learning_rate": 1.7651644697535e-05,
      "loss": 2.4238,
      "step": 17489
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0785093307495117,
      "learning_rate": 1.7651379600567614e-05,
      "loss": 2.6205,
      "step": 17490
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0135178565979004,
      "learning_rate": 1.7651114490629065e-05,
      "loss": 2.2671,
      "step": 17491
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0043584108352661,
      "learning_rate": 1.7650849367719796e-05,
      "loss": 2.4172,
      "step": 17492
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.949711263179779,
      "learning_rate": 1.765058423184026e-05,
      "loss": 2.3865,
      "step": 17493
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9802866578102112,
      "learning_rate": 1.7650319082990904e-05,
      "loss": 2.5281,
      "step": 17494
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0281832218170166,
      "learning_rate": 1.7650053921172183e-05,
      "loss": 2.4234,
      "step": 17495
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0089137554168701,
      "learning_rate": 1.764978874638454e-05,
      "loss": 2.4739,
      "step": 17496
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0387959480285645,
      "learning_rate": 1.764952355862843e-05,
      "loss": 2.5085,
      "step": 17497
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9912595152854919,
      "learning_rate": 1.7649258357904298e-05,
      "loss": 2.4671,
      "step": 17498
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0859789848327637,
      "learning_rate": 1.7648993144212595e-05,
      "loss": 2.6097,
      "step": 17499
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.106294870376587,
      "learning_rate": 1.7648727917553773e-05,
      "loss": 2.5146,
      "step": 17500
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9914864897727966,
      "learning_rate": 1.764846267792828e-05,
      "loss": 2.4923,
      "step": 17501
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.06602144241333,
      "learning_rate": 1.764819742533656e-05,
      "loss": 2.4261,
      "step": 17502
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1235811710357666,
      "learning_rate": 1.7647932159779072e-05,
      "loss": 2.4232,
      "step": 17503
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1057711839675903,
      "learning_rate": 1.7647666881256264e-05,
      "loss": 2.5099,
      "step": 17504
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9259084463119507,
      "learning_rate": 1.7647401589768583e-05,
      "loss": 2.4168,
      "step": 17505
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9803548455238342,
      "learning_rate": 1.7647136285316476e-05,
      "loss": 2.6054,
      "step": 17506
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9787020683288574,
      "learning_rate": 1.7646870967900402e-05,
      "loss": 2.4685,
      "step": 17507
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0235280990600586,
      "learning_rate": 1.76466056375208e-05,
      "loss": 2.4832,
      "step": 17508
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9473618865013123,
      "learning_rate": 1.7646340294178127e-05,
      "loss": 2.4969,
      "step": 17509
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0061064958572388,
      "learning_rate": 1.764607493787283e-05,
      "loss": 2.3355,
      "step": 17510
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1469923257827759,
      "learning_rate": 1.7645809568605358e-05,
      "loss": 2.4914,
      "step": 17511
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0614840984344482,
      "learning_rate": 1.7645544186376162e-05,
      "loss": 2.3456,
      "step": 17512
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.3744947910308838,
      "learning_rate": 1.7645278791185695e-05,
      "loss": 2.4267,
      "step": 17513
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0414068698883057,
      "learning_rate": 1.76450133830344e-05,
      "loss": 2.3833,
      "step": 17514
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9802369475364685,
      "learning_rate": 1.7644747961922734e-05,
      "loss": 2.3993,
      "step": 17515
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9676508903503418,
      "learning_rate": 1.764448252785114e-05,
      "loss": 2.6356,
      "step": 17516
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9926956295967102,
      "learning_rate": 1.7644217080820075e-05,
      "loss": 2.6192,
      "step": 17517
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9729257822036743,
      "learning_rate": 1.7643951620829985e-05,
      "loss": 2.3304,
      "step": 17518
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1112117767333984,
      "learning_rate": 1.7643686147881324e-05,
      "loss": 2.486,
      "step": 17519
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0754412412643433,
      "learning_rate": 1.7643420661974537e-05,
      "loss": 2.4827,
      "step": 17520
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9863398671150208,
      "learning_rate": 1.7643155163110073e-05,
      "loss": 2.2365,
      "step": 17521
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0858150720596313,
      "learning_rate": 1.764288965128839e-05,
      "loss": 2.5735,
      "step": 17522
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.043434977531433,
      "learning_rate": 1.7642624126509932e-05,
      "loss": 2.4541,
      "step": 17523
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.019716739654541,
      "learning_rate": 1.7642358588775147e-05,
      "loss": 2.5226,
      "step": 17524
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9959703087806702,
      "learning_rate": 1.7642093038084494e-05,
      "loss": 2.4494,
      "step": 17525
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0684117078781128,
      "learning_rate": 1.7641827474438412e-05,
      "loss": 2.6921,
      "step": 17526
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.029737114906311,
      "learning_rate": 1.764156189783736e-05,
      "loss": 2.3583,
      "step": 17527
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9966765642166138,
      "learning_rate": 1.7641296308281787e-05,
      "loss": 2.2819,
      "step": 17528
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1256129741668701,
      "learning_rate": 1.7641030705772136e-05,
      "loss": 2.6733,
      "step": 17529
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1028636693954468,
      "learning_rate": 1.7640765090308867e-05,
      "loss": 2.2696,
      "step": 17530
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0474047660827637,
      "learning_rate": 1.7640499461892425e-05,
      "loss": 2.1775,
      "step": 17531
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9381707906723022,
      "learning_rate": 1.7640233820523263e-05,
      "loss": 2.7803,
      "step": 17532
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.930486798286438,
      "learning_rate": 1.7639968166201823e-05,
      "loss": 2.5003,
      "step": 17533
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1419063806533813,
      "learning_rate": 1.7639702498928568e-05,
      "loss": 2.5515,
      "step": 17534
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9747002720832825,
      "learning_rate": 1.7639436818703945e-05,
      "loss": 2.4887,
      "step": 17535
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0898256301879883,
      "learning_rate": 1.7639171125528395e-05,
      "loss": 2.3009,
      "step": 17536
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0546921491622925,
      "learning_rate": 1.763890541940238e-05,
      "loss": 2.4521,
      "step": 17537
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1122045516967773,
      "learning_rate": 1.7638639700326344e-05,
      "loss": 2.3448,
      "step": 17538
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1039165258407593,
      "learning_rate": 1.7638373968300736e-05,
      "loss": 2.4251,
      "step": 17539
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1330584287643433,
      "learning_rate": 1.763810822332601e-05,
      "loss": 2.4902,
      "step": 17540
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9757827520370483,
      "learning_rate": 1.7637842465402623e-05,
      "loss": 2.3487,
      "step": 17541
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9733985066413879,
      "learning_rate": 1.763757669453101e-05,
      "loss": 2.3533,
      "step": 17542
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0122677087783813,
      "learning_rate": 1.7637310910711636e-05,
      "loss": 2.4673,
      "step": 17543
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1004778146743774,
      "learning_rate": 1.7637045113944946e-05,
      "loss": 2.4107,
      "step": 17544
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0685656070709229,
      "learning_rate": 1.7636779304231385e-05,
      "loss": 2.485,
      "step": 17545
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.302999496459961,
      "learning_rate": 1.7636513481571412e-05,
      "loss": 2.526,
      "step": 17546
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0200343132019043,
      "learning_rate": 1.7636247645965477e-05,
      "loss": 2.5761,
      "step": 17547
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0585367679595947,
      "learning_rate": 1.7635981797414025e-05,
      "loss": 2.4694,
      "step": 17548
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9573979377746582,
      "learning_rate": 1.763571593591751e-05,
      "loss": 2.4132,
      "step": 17549
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0100252628326416,
      "learning_rate": 1.7635450061476388e-05,
      "loss": 2.4042,
      "step": 17550
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1179455518722534,
      "learning_rate": 1.7635184174091098e-05,
      "loss": 2.5553,
      "step": 17551
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9959757328033447,
      "learning_rate": 1.76349182737621e-05,
      "loss": 2.4087,
      "step": 17552
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9491451978683472,
      "learning_rate": 1.763465236048984e-05,
      "loss": 2.5487,
      "step": 17553
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0199960470199585,
      "learning_rate": 1.763438643427477e-05,
      "loss": 2.4828,
      "step": 17554
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0750021934509277,
      "learning_rate": 1.7634120495117347e-05,
      "loss": 2.6557,
      "step": 17555
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0665684938430786,
      "learning_rate": 1.763385454301801e-05,
      "loss": 2.241,
      "step": 17556
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0350620746612549,
      "learning_rate": 1.7633588577977218e-05,
      "loss": 2.417,
      "step": 17557
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.2145721912384033,
      "learning_rate": 1.7633322599995422e-05,
      "loss": 2.8,
      "step": 17558
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1887167692184448,
      "learning_rate": 1.7633056609073073e-05,
      "loss": 2.5914,
      "step": 17559
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0868548154830933,
      "learning_rate": 1.7632790605210615e-05,
      "loss": 2.2365,
      "step": 17560
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1406474113464355,
      "learning_rate": 1.7632524588408503e-05,
      "loss": 2.6762,
      "step": 17561
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9396053552627563,
      "learning_rate": 1.7632258558667193e-05,
      "loss": 2.5926,
      "step": 17562
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0088380575180054,
      "learning_rate": 1.763199251598713e-05,
      "loss": 2.4608,
      "step": 17563
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1268117427825928,
      "learning_rate": 1.763172646036877e-05,
      "loss": 2.3279,
      "step": 17564
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9913615584373474,
      "learning_rate": 1.7631460391812555e-05,
      "loss": 2.3792,
      "step": 17565
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.4674053192138672,
      "learning_rate": 1.763119431031895e-05,
      "loss": 2.529,
      "step": 17566
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.018248200416565,
      "learning_rate": 1.763092821588839e-05,
      "loss": 2.3576,
      "step": 17567
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9654186367988586,
      "learning_rate": 1.763066210852134e-05,
      "loss": 2.4712,
      "step": 17568
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.053540825843811,
      "learning_rate": 1.763039598821824e-05,
      "loss": 2.5477,
      "step": 17569
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0618172883987427,
      "learning_rate": 1.763012985497955e-05,
      "loss": 2.5538,
      "step": 17570
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0441008806228638,
      "learning_rate": 1.7629863708805716e-05,
      "loss": 2.7441,
      "step": 17571
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.3486639261245728,
      "learning_rate": 1.7629597549697193e-05,
      "loss": 2.482,
      "step": 17572
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.2395771741867065,
      "learning_rate": 1.7629331377654426e-05,
      "loss": 2.2099,
      "step": 17573
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0842951536178589,
      "learning_rate": 1.7629065192677876e-05,
      "loss": 2.3295,
      "step": 17574
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0485116243362427,
      "learning_rate": 1.7628798994767983e-05,
      "loss": 2.3567,
      "step": 17575
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0059831142425537,
      "learning_rate": 1.7628532783925205e-05,
      "loss": 2.2848,
      "step": 17576
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0735048055648804,
      "learning_rate": 1.7628266560149995e-05,
      "loss": 2.4649,
      "step": 17577
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9068537950515747,
      "learning_rate": 1.76280003234428e-05,
      "loss": 2.5564,
      "step": 17578
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1002804040908813,
      "learning_rate": 1.762773407380407e-05,
      "loss": 2.6394,
      "step": 17579
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0885541439056396,
      "learning_rate": 1.762746781123426e-05,
      "loss": 2.6323,
      "step": 17580
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.121634602546692,
      "learning_rate": 1.7627201535733823e-05,
      "loss": 2.6191,
      "step": 17581
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9788912534713745,
      "learning_rate": 1.7626935247303207e-05,
      "loss": 2.7342,
      "step": 17582
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1744561195373535,
      "learning_rate": 1.7626668945942862e-05,
      "loss": 2.488,
      "step": 17583
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1329797506332397,
      "learning_rate": 1.7626402631653245e-05,
      "loss": 2.4737,
      "step": 17584
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0078352689743042,
      "learning_rate": 1.76261363044348e-05,
      "loss": 2.2966,
      "step": 17585
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9983193278312683,
      "learning_rate": 1.7625869964287988e-05,
      "loss": 2.4137,
      "step": 17586
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0674693584442139,
      "learning_rate": 1.7625603611213253e-05,
      "loss": 2.4908,
      "step": 17587
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1081901788711548,
      "learning_rate": 1.762533724521105e-05,
      "loss": 2.7764,
      "step": 17588
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1224101781845093,
      "learning_rate": 1.7625070866281825e-05,
      "loss": 2.5197,
      "step": 17589
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0238335132598877,
      "learning_rate": 1.762480447442604e-05,
      "loss": 2.4901,
      "step": 17590
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.012082576751709,
      "learning_rate": 1.7624538069644134e-05,
      "loss": 2.2725,
      "step": 17591
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9499267339706421,
      "learning_rate": 1.7624271651936567e-05,
      "loss": 2.627,
      "step": 17592
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0431299209594727,
      "learning_rate": 1.7624005221303796e-05,
      "loss": 2.4958,
      "step": 17593
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.021013617515564,
      "learning_rate": 1.7623738777746258e-05,
      "loss": 2.3562,
      "step": 17594
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0041847229003906,
      "learning_rate": 1.7623472321264413e-05,
      "loss": 2.6352,
      "step": 17595
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9310562610626221,
      "learning_rate": 1.7623205851858712e-05,
      "loss": 2.6035,
      "step": 17596
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9552452564239502,
      "learning_rate": 1.762293936952961e-05,
      "loss": 2.3434,
      "step": 17597
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0269263982772827,
      "learning_rate": 1.7622672874277552e-05,
      "loss": 2.3969,
      "step": 17598
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1032041311264038,
      "learning_rate": 1.7622406366102994e-05,
      "loss": 2.2929,
      "step": 17599
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0054022073745728,
      "learning_rate": 1.762213984500639e-05,
      "loss": 2.5078,
      "step": 17600
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0145758390426636,
      "learning_rate": 1.762187331098818e-05,
      "loss": 2.7157,
      "step": 17601
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9713075160980225,
      "learning_rate": 1.7621606764048835e-05,
      "loss": 2.3873,
      "step": 17602
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0885746479034424,
      "learning_rate": 1.7621340204188794e-05,
      "loss": 2.3687,
      "step": 17603
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1242096424102783,
      "learning_rate": 1.7621073631408506e-05,
      "loss": 2.409,
      "step": 17604
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0859251022338867,
      "learning_rate": 1.7620807045708434e-05,
      "loss": 2.4848,
      "step": 17605
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0190613269805908,
      "learning_rate": 1.7620540447089022e-05,
      "loss": 2.4512,
      "step": 17606
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1176661252975464,
      "learning_rate": 1.7620273835550725e-05,
      "loss": 2.6971,
      "step": 17607
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0168415307998657,
      "learning_rate": 1.7620007211093993e-05,
      "loss": 2.5455,
      "step": 17608
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0951610803604126,
      "learning_rate": 1.761974057371928e-05,
      "loss": 2.4012,
      "step": 17609
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0755077600479126,
      "learning_rate": 1.7619473923427038e-05,
      "loss": 2.377,
      "step": 17610
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9806486964225769,
      "learning_rate": 1.7619207260217718e-05,
      "loss": 2.7352,
      "step": 17611
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.042367696762085,
      "learning_rate": 1.761894058409177e-05,
      "loss": 2.5757,
      "step": 17612
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0446560382843018,
      "learning_rate": 1.761867389504965e-05,
      "loss": 2.2813,
      "step": 17613
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.2453625202178955,
      "learning_rate": 1.7618407193091812e-05,
      "loss": 2.4715,
      "step": 17614
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.3024126291275024,
      "learning_rate": 1.76181404782187e-05,
      "loss": 2.3921,
      "step": 17615
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1153881549835205,
      "learning_rate": 1.7617873750430773e-05,
      "loss": 2.4884,
      "step": 17616
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.884592592716217,
      "learning_rate": 1.7617607009728476e-05,
      "loss": 2.4888,
      "step": 17617
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0010353326797485,
      "learning_rate": 1.761734025611227e-05,
      "loss": 2.414,
      "step": 17618
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.136229395866394,
      "learning_rate": 1.7617073489582605e-05,
      "loss": 2.448,
      "step": 17619
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1710152626037598,
      "learning_rate": 1.761680671013993e-05,
      "loss": 2.5852,
      "step": 17620
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0395963191986084,
      "learning_rate": 1.76165399177847e-05,
      "loss": 2.4057,
      "step": 17621
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0084729194641113,
      "learning_rate": 1.761627311251737e-05,
      "loss": 2.5337,
      "step": 17622
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0178128480911255,
      "learning_rate": 1.761600629433838e-05,
      "loss": 2.6503,
      "step": 17623
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1471410989761353,
      "learning_rate": 1.76157394632482e-05,
      "loss": 2.5139,
      "step": 17624
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9648178815841675,
      "learning_rate": 1.7615472619247266e-05,
      "loss": 2.4993,
      "step": 17625
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.174386739730835,
      "learning_rate": 1.761520576233604e-05,
      "loss": 2.483,
      "step": 17626
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.2608431577682495,
      "learning_rate": 1.761493889251497e-05,
      "loss": 2.2218,
      "step": 17627
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.2022902965545654,
      "learning_rate": 1.7614672009784514e-05,
      "loss": 2.6936,
      "step": 17628
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9966155886650085,
      "learning_rate": 1.7614405114145115e-05,
      "loss": 2.7011,
      "step": 17629
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0675338506698608,
      "learning_rate": 1.761413820559724e-05,
      "loss": 2.7519,
      "step": 17630
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.084348201751709,
      "learning_rate": 1.7613871284141327e-05,
      "loss": 2.5307,
      "step": 17631
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9829381704330444,
      "learning_rate": 1.7613604349777835e-05,
      "loss": 2.4482,
      "step": 17632
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.035283088684082,
      "learning_rate": 1.7613337402507215e-05,
      "loss": 2.4167,
      "step": 17633
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0292836427688599,
      "learning_rate": 1.7613070442329923e-05,
      "loss": 2.4166,
      "step": 17634
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.040513038635254,
      "learning_rate": 1.7612803469246407e-05,
      "loss": 2.4046,
      "step": 17635
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.017520785331726,
      "learning_rate": 1.761253648325712e-05,
      "loss": 2.4315,
      "step": 17636
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0042539834976196,
      "learning_rate": 1.7612269484362522e-05,
      "loss": 2.3221,
      "step": 17637
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0203739404678345,
      "learning_rate": 1.7612002472563055e-05,
      "loss": 2.4171,
      "step": 17638
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0927857160568237,
      "learning_rate": 1.7611735447859176e-05,
      "loss": 2.5156,
      "step": 17639
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1923739910125732,
      "learning_rate": 1.761146841025134e-05,
      "loss": 2.3004,
      "step": 17640
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0173171758651733,
      "learning_rate": 1.7611201359739996e-05,
      "loss": 2.5937,
      "step": 17641
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0969170331954956,
      "learning_rate": 1.76109342963256e-05,
      "loss": 2.3213,
      "step": 17642
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0584025382995605,
      "learning_rate": 1.7610667220008602e-05,
      "loss": 2.3976,
      "step": 17643
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.006723165512085,
      "learning_rate": 1.761040013078946e-05,
      "loss": 2.4934,
      "step": 17644
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.011023998260498,
      "learning_rate": 1.7610133028668616e-05,
      "loss": 2.5059,
      "step": 17645
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0287411212921143,
      "learning_rate": 1.7609865913646535e-05,
      "loss": 2.3602,
      "step": 17646
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0649244785308838,
      "learning_rate": 1.7609598785723662e-05,
      "loss": 2.3774,
      "step": 17647
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0066226720809937,
      "learning_rate": 1.7609331644900453e-05,
      "loss": 2.4395,
      "step": 17648
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0724977254867554,
      "learning_rate": 1.7609064491177362e-05,
      "loss": 2.5111,
      "step": 17649
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9496122598648071,
      "learning_rate": 1.7608797324554838e-05,
      "loss": 2.6151,
      "step": 17650
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0714001655578613,
      "learning_rate": 1.7608530145033336e-05,
      "loss": 2.4737,
      "step": 17651
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0177383422851562,
      "learning_rate": 1.7608262952613312e-05,
      "loss": 2.4606,
      "step": 17652
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9885332584381104,
      "learning_rate": 1.7607995747295212e-05,
      "loss": 2.4682,
      "step": 17653
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.075646996498108,
      "learning_rate": 1.7607728529079494e-05,
      "loss": 2.5403,
      "step": 17654
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0011736154556274,
      "learning_rate": 1.760746129796661e-05,
      "loss": 2.4447,
      "step": 17655
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.016196370124817,
      "learning_rate": 1.7607194053957015e-05,
      "loss": 2.3271,
      "step": 17656
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1722499132156372,
      "learning_rate": 1.7606926797051156e-05,
      "loss": 2.4444,
      "step": 17657
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.000331997871399,
      "learning_rate": 1.760665952724949e-05,
      "loss": 2.6588,
      "step": 17658
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0066267251968384,
      "learning_rate": 1.7606392244552475e-05,
      "loss": 2.6412,
      "step": 17659
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1085138320922852,
      "learning_rate": 1.760612494896056e-05,
      "loss": 2.3458,
      "step": 17660
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0267943143844604,
      "learning_rate": 1.760585764047419e-05,
      "loss": 2.1641,
      "step": 17661
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0353426933288574,
      "learning_rate": 1.760559031909383e-05,
      "loss": 2.5254,
      "step": 17662
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9764001369476318,
      "learning_rate": 1.760532298481993e-05,
      "loss": 2.3371,
      "step": 17663
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0931029319763184,
      "learning_rate": 1.760505563765294e-05,
      "loss": 2.5674,
      "step": 17664
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9487898945808411,
      "learning_rate": 1.7604788277593315e-05,
      "loss": 2.4719,
      "step": 17665
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.185349464416504,
      "learning_rate": 1.760452090464151e-05,
      "loss": 2.569,
      "step": 17666
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1475037336349487,
      "learning_rate": 1.7604253518797975e-05,
      "loss": 2.5529,
      "step": 17667
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.11565363407135,
      "learning_rate": 1.7603986120063164e-05,
      "loss": 2.3265,
      "step": 17668
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9707863926887512,
      "learning_rate": 1.7603718708437535e-05,
      "loss": 2.5704,
      "step": 17669
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.2400892972946167,
      "learning_rate": 1.7603451283921535e-05,
      "loss": 2.3994,
      "step": 17670
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1153329610824585,
      "learning_rate": 1.760318384651562e-05,
      "loss": 2.5278,
      "step": 17671
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0933575630187988,
      "learning_rate": 1.7602916396220245e-05,
      "loss": 2.61,
      "step": 17672
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0481094121932983,
      "learning_rate": 1.7602648933035863e-05,
      "loss": 2.4203,
      "step": 17673
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0558233261108398,
      "learning_rate": 1.7602381456962922e-05,
      "loss": 2.3741,
      "step": 17674
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.038364052772522,
      "learning_rate": 1.760211396800188e-05,
      "loss": 2.6586,
      "step": 17675
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0803773403167725,
      "learning_rate": 1.7601846466153196e-05,
      "loss": 2.4382,
      "step": 17676
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9471609592437744,
      "learning_rate": 1.760157895141731e-05,
      "loss": 2.5152,
      "step": 17677
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.001344084739685,
      "learning_rate": 1.7601311423794687e-05,
      "loss": 2.466,
      "step": 17678
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0852899551391602,
      "learning_rate": 1.7601043883285775e-05,
      "loss": 2.3556,
      "step": 17679
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0999952554702759,
      "learning_rate": 1.7600776329891032e-05,
      "loss": 2.3809,
      "step": 17680
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1591488122940063,
      "learning_rate": 1.7600508763610906e-05,
      "loss": 2.366,
      "step": 17681
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0188424587249756,
      "learning_rate": 1.7600241184445853e-05,
      "loss": 2.5101,
      "step": 17682
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1118601560592651,
      "learning_rate": 1.759997359239633e-05,
      "loss": 2.6695,
      "step": 17683
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0099326372146606,
      "learning_rate": 1.7599705987462788e-05,
      "loss": 2.3489,
      "step": 17684
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1071381568908691,
      "learning_rate": 1.759943836964568e-05,
      "loss": 2.6032,
      "step": 17685
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0432307720184326,
      "learning_rate": 1.7599170738945455e-05,
      "loss": 2.5236,
      "step": 17686
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1298829317092896,
      "learning_rate": 1.7598903095362578e-05,
      "loss": 2.5064,
      "step": 17687
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0447043180465698,
      "learning_rate": 1.759863543889749e-05,
      "loss": 2.638,
      "step": 17688
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1731832027435303,
      "learning_rate": 1.7598367769550656e-05,
      "loss": 2.5351,
      "step": 17689
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1971789598464966,
      "learning_rate": 1.7598100087322524e-05,
      "loss": 2.3901,
      "step": 17690
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0431873798370361,
      "learning_rate": 1.7597832392213548e-05,
      "loss": 2.4032,
      "step": 17691
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0822707414627075,
      "learning_rate": 1.7597564684224183e-05,
      "loss": 2.4478,
      "step": 17692
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0282745361328125,
      "learning_rate": 1.759729696335488e-05,
      "loss": 2.4911,
      "step": 17693
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0017650127410889,
      "learning_rate": 1.75970292296061e-05,
      "loss": 2.6035,
      "step": 17694
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.2042198181152344,
      "learning_rate": 1.7596761482978292e-05,
      "loss": 2.4549,
      "step": 17695
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.256097674369812,
      "learning_rate": 1.7596493723471905e-05,
      "loss": 2.4538,
      "step": 17696
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0283912420272827,
      "learning_rate": 1.7596225951087402e-05,
      "loss": 2.4388,
      "step": 17697
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.000221848487854,
      "learning_rate": 1.759595816582523e-05,
      "loss": 2.5413,
      "step": 17698
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9988972544670105,
      "learning_rate": 1.759569036768585e-05,
      "loss": 2.4875,
      "step": 17699
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1785520315170288,
      "learning_rate": 1.759542255666971e-05,
      "loss": 2.629,
      "step": 17700
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.269737720489502,
      "learning_rate": 1.7595154732777264e-05,
      "loss": 2.4335,
      "step": 17701
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.2105103731155396,
      "learning_rate": 1.759488689600897e-05,
      "loss": 2.5531,
      "step": 17702
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1531367301940918,
      "learning_rate": 1.7594619046365277e-05,
      "loss": 2.4945,
      "step": 17703
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9930555820465088,
      "learning_rate": 1.7594351183846643e-05,
      "loss": 2.5113,
      "step": 17704
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0401445627212524,
      "learning_rate": 1.759408330845352e-05,
      "loss": 2.5914,
      "step": 17705
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0335619449615479,
      "learning_rate": 1.7593815420186372e-05,
      "loss": 2.4125,
      "step": 17706
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0432093143463135,
      "learning_rate": 1.7593547519045634e-05,
      "loss": 2.2153,
      "step": 17707
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9131659269332886,
      "learning_rate": 1.7593279605031774e-05,
      "loss": 2.4315,
      "step": 17708
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0210539102554321,
      "learning_rate": 1.7593011678145243e-05,
      "loss": 2.7451,
      "step": 17709
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9093205332756042,
      "learning_rate": 1.7592743738386497e-05,
      "loss": 2.4557,
      "step": 17710
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.043511986732483,
      "learning_rate": 1.7592475785755988e-05,
      "loss": 2.3615,
      "step": 17711
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.123060941696167,
      "learning_rate": 1.7592207820254167e-05,
      "loss": 2.5693,
      "step": 17712
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0614279508590698,
      "learning_rate": 1.7591939841881496e-05,
      "loss": 2.4414,
      "step": 17713
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0995526313781738,
      "learning_rate": 1.7591671850638425e-05,
      "loss": 2.5393,
      "step": 17714
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.029807209968567,
      "learning_rate": 1.7591403846525402e-05,
      "loss": 2.3694,
      "step": 17715
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0040462017059326,
      "learning_rate": 1.759113582954289e-05,
      "loss": 2.3863,
      "step": 17716
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.42929208278656,
      "learning_rate": 1.759086779969135e-05,
      "loss": 2.5529,
      "step": 17717
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.2457553148269653,
      "learning_rate": 1.7590599756971215e-05,
      "loss": 2.5443,
      "step": 17718
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0135623216629028,
      "learning_rate": 1.7590331701382957e-05,
      "loss": 2.4163,
      "step": 17719
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0620808601379395,
      "learning_rate": 1.7590063632927025e-05,
      "loss": 2.4997,
      "step": 17720
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0192015171051025,
      "learning_rate": 1.7589795551603873e-05,
      "loss": 2.4084,
      "step": 17721
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0899531841278076,
      "learning_rate": 1.758952745741396e-05,
      "loss": 2.3231,
      "step": 17722
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0461554527282715,
      "learning_rate": 1.7589259350357732e-05,
      "loss": 2.3769,
      "step": 17723
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.151421070098877,
      "learning_rate": 1.758899123043565e-05,
      "loss": 2.4246,
      "step": 17724
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9843660593032837,
      "learning_rate": 1.7588723097648167e-05,
      "loss": 2.428,
      "step": 17725
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9444342255592346,
      "learning_rate": 1.7588454951995736e-05,
      "loss": 2.4221,
      "step": 17726
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9867327809333801,
      "learning_rate": 1.7588186793478815e-05,
      "loss": 2.2636,
      "step": 17727
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0245862007141113,
      "learning_rate": 1.7587918622097853e-05,
      "loss": 2.649,
      "step": 17728
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9002285003662109,
      "learning_rate": 1.7587650437853314e-05,
      "loss": 2.3855,
      "step": 17729
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9791778326034546,
      "learning_rate": 1.7587382240745643e-05,
      "loss": 2.4041,
      "step": 17730
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0589944124221802,
      "learning_rate": 1.7587114030775297e-05,
      "loss": 2.5732,
      "step": 17731
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0180569887161255,
      "learning_rate": 1.7586845807942735e-05,
      "loss": 2.4144,
      "step": 17732
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9227378368377686,
      "learning_rate": 1.7586577572248408e-05,
      "loss": 2.1627,
      "step": 17733
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0050100088119507,
      "learning_rate": 1.7586309323692773e-05,
      "loss": 2.3423,
      "step": 17734
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1314960718154907,
      "learning_rate": 1.7586041062276278e-05,
      "loss": 2.4197,
      "step": 17735
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0967868566513062,
      "learning_rate": 1.7585772787999386e-05,
      "loss": 2.4005,
      "step": 17736
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0078564882278442,
      "learning_rate": 1.7585504500862553e-05,
      "loss": 2.3241,
      "step": 17737
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0317987203598022,
      "learning_rate": 1.7585236200866224e-05,
      "loss": 2.6552,
      "step": 17738
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.2006515264511108,
      "learning_rate": 1.758496788801086e-05,
      "loss": 2.4315,
      "step": 17739
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9563610553741455,
      "learning_rate": 1.7584699562296915e-05,
      "loss": 2.4637,
      "step": 17740
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.213577389717102,
      "learning_rate": 1.758443122372485e-05,
      "loss": 2.2662,
      "step": 17741
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.013324499130249,
      "learning_rate": 1.7584162872295108e-05,
      "loss": 2.4254,
      "step": 17742
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0108307600021362,
      "learning_rate": 1.7583894508008152e-05,
      "loss": 2.3829,
      "step": 17743
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0230040550231934,
      "learning_rate": 1.7583626130864435e-05,
      "loss": 2.4255,
      "step": 17744
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9939620494842529,
      "learning_rate": 1.7583357740864415e-05,
      "loss": 2.8114,
      "step": 17745
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9530280828475952,
      "learning_rate": 1.7583089338008543e-05,
      "loss": 2.5954,
      "step": 17746
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0273008346557617,
      "learning_rate": 1.758282092229727e-05,
      "loss": 2.4291,
      "step": 17747
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.2931392192840576,
      "learning_rate": 1.758255249373106e-05,
      "loss": 2.4248,
      "step": 17748
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1317771673202515,
      "learning_rate": 1.7582284052310362e-05,
      "loss": 2.5957,
      "step": 17749
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1305054426193237,
      "learning_rate": 1.758201559803564e-05,
      "loss": 2.7129,
      "step": 17750
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9714049696922302,
      "learning_rate": 1.7581747130907333e-05,
      "loss": 2.3396,
      "step": 17751
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9542351961135864,
      "learning_rate": 1.758147865092591e-05,
      "loss": 2.4812,
      "step": 17752
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0452574491500854,
      "learning_rate": 1.7581210158091823e-05,
      "loss": 2.3784,
      "step": 17753
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0116363763809204,
      "learning_rate": 1.7580941652405523e-05,
      "loss": 2.5842,
      "step": 17754
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1360336542129517,
      "learning_rate": 1.7580673133867468e-05,
      "loss": 2.4648,
      "step": 17755
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0154454708099365,
      "learning_rate": 1.7580404602478113e-05,
      "loss": 2.4387,
      "step": 17756
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.933097243309021,
      "learning_rate": 1.7580136058237916e-05,
      "loss": 2.2953,
      "step": 17757
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0330935716629028,
      "learning_rate": 1.757986750114733e-05,
      "loss": 2.4477,
      "step": 17758
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0043179988861084,
      "learning_rate": 1.7579598931206807e-05,
      "loss": 2.3669,
      "step": 17759
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.2177209854125977,
      "learning_rate": 1.7579330348416803e-05,
      "loss": 2.407,
      "step": 17760
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1272934675216675,
      "learning_rate": 1.757906175277778e-05,
      "loss": 2.4995,
      "step": 17761
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9949588179588318,
      "learning_rate": 1.7578793144290188e-05,
      "loss": 2.3574,
      "step": 17762
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0755959749221802,
      "learning_rate": 1.7578524522954483e-05,
      "loss": 2.7063,
      "step": 17763
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9981552362442017,
      "learning_rate": 1.757825588877112e-05,
      "loss": 2.3413,
      "step": 17764
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0822325944900513,
      "learning_rate": 1.7577987241740558e-05,
      "loss": 2.3231,
      "step": 17765
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.086159110069275,
      "learning_rate": 1.7577718581863244e-05,
      "loss": 2.2745,
      "step": 17766
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0084609985351562,
      "learning_rate": 1.7577449909139642e-05,
      "loss": 2.3473,
      "step": 17767
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9389176964759827,
      "learning_rate": 1.7577181223570206e-05,
      "loss": 2.3861,
      "step": 17768
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0594440698623657,
      "learning_rate": 1.7576912525155386e-05,
      "loss": 2.3816,
      "step": 17769
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9840093851089478,
      "learning_rate": 1.7576643813895648e-05,
      "loss": 2.3673,
      "step": 17770
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9995139837265015,
      "learning_rate": 1.7576375089791433e-05,
      "loss": 2.3187,
      "step": 17771
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.016891360282898,
      "learning_rate": 1.7576106352843208e-05,
      "loss": 2.538,
      "step": 17772
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0303131341934204,
      "learning_rate": 1.7575837603051426e-05,
      "loss": 2.6037,
      "step": 17773
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0085444450378418,
      "learning_rate": 1.757556884041654e-05,
      "loss": 2.5356,
      "step": 17774
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9933316111564636,
      "learning_rate": 1.757530006493901e-05,
      "loss": 2.4383,
      "step": 17775
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0584837198257446,
      "learning_rate": 1.7575031276619285e-05,
      "loss": 2.6696,
      "step": 17776
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0520994663238525,
      "learning_rate": 1.7574762475457827e-05,
      "loss": 2.2561,
      "step": 17777
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1201622486114502,
      "learning_rate": 1.757449366145509e-05,
      "loss": 2.4181,
      "step": 17778
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9626697897911072,
      "learning_rate": 1.757422483461153e-05,
      "loss": 2.4368,
      "step": 17779
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9708638787269592,
      "learning_rate": 1.75739559949276e-05,
      "loss": 2.3324,
      "step": 17780
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1802284717559814,
      "learning_rate": 1.7573687142403754e-05,
      "loss": 2.6084,
      "step": 17781
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1245602369308472,
      "learning_rate": 1.7573418277040458e-05,
      "loss": 2.431,
      "step": 17782
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1603020429611206,
      "learning_rate": 1.7573149398838155e-05,
      "loss": 2.4688,
      "step": 17783
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0325595140457153,
      "learning_rate": 1.7572880507797313e-05,
      "loss": 2.4116,
      "step": 17784
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0340656042099,
      "learning_rate": 1.7572611603918376e-05,
      "loss": 2.745,
      "step": 17785
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.124454379081726,
      "learning_rate": 1.7572342687201807e-05,
      "loss": 2.3703,
      "step": 17786
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0506728887557983,
      "learning_rate": 1.7572073757648064e-05,
      "loss": 2.3524,
      "step": 17787
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0901861190795898,
      "learning_rate": 1.7571804815257598e-05,
      "loss": 2.4815,
      "step": 17788
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9904453158378601,
      "learning_rate": 1.7571535860030863e-05,
      "loss": 2.5458,
      "step": 17789
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0214072465896606,
      "learning_rate": 1.7571266891968322e-05,
      "loss": 2.4639,
      "step": 17790
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.3369227647781372,
      "learning_rate": 1.7570997911070427e-05,
      "loss": 2.662,
      "step": 17791
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0711989402770996,
      "learning_rate": 1.7570728917337635e-05,
      "loss": 2.4974,
      "step": 17792
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.3812589645385742,
      "learning_rate": 1.75704599107704e-05,
      "loss": 2.3387,
      "step": 17793
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9518117308616638,
      "learning_rate": 1.7570190891369175e-05,
      "loss": 2.5021,
      "step": 17794
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1114287376403809,
      "learning_rate": 1.7569921859134426e-05,
      "loss": 2.4526,
      "step": 17795
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0561174154281616,
      "learning_rate": 1.7569652814066602e-05,
      "loss": 2.5451,
      "step": 17796
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.2429627180099487,
      "learning_rate": 1.756938375616616e-05,
      "loss": 2.3816,
      "step": 17797
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.941524088382721,
      "learning_rate": 1.7569114685433558e-05,
      "loss": 2.2825,
      "step": 17798
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0202809572219849,
      "learning_rate": 1.756884560186925e-05,
      "loss": 2.3535,
      "step": 17799
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9777190685272217,
      "learning_rate": 1.7568576505473692e-05,
      "loss": 2.3083,
      "step": 17800
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1884325742721558,
      "learning_rate": 1.7568307396247344e-05,
      "loss": 2.5843,
      "step": 17801
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0354374647140503,
      "learning_rate": 1.756803827419066e-05,
      "loss": 2.6518,
      "step": 17802
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1224751472473145,
      "learning_rate": 1.7567769139304092e-05,
      "loss": 2.4338,
      "step": 17803
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.2010812759399414,
      "learning_rate": 1.7567499991588103e-05,
      "loss": 2.365,
      "step": 17804
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0014718770980835,
      "learning_rate": 1.7567230831043143e-05,
      "loss": 2.6159,
      "step": 17805
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.058444857597351,
      "learning_rate": 1.7566961657669672e-05,
      "loss": 2.4635,
      "step": 17806
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0405945777893066,
      "learning_rate": 1.7566692471468148e-05,
      "loss": 2.3123,
      "step": 17807
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1384034156799316,
      "learning_rate": 1.7566423272439025e-05,
      "loss": 2.361,
      "step": 17808
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.8970932960510254,
      "learning_rate": 1.7566154060582756e-05,
      "loss": 2.6367,
      "step": 17809
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.120107650756836,
      "learning_rate": 1.7565884835899804e-05,
      "loss": 2.822,
      "step": 17810
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1162471771240234,
      "learning_rate": 1.7565615598390625e-05,
      "loss": 2.4995,
      "step": 17811
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9637694954872131,
      "learning_rate": 1.756534634805567e-05,
      "loss": 2.6373,
      "step": 17812
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.013606309890747,
      "learning_rate": 1.7565077084895398e-05,
      "loss": 2.5372,
      "step": 17813
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0231248140335083,
      "learning_rate": 1.7564807808910265e-05,
      "loss": 2.4045,
      "step": 17814
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1800998449325562,
      "learning_rate": 1.756453852010073e-05,
      "loss": 2.324,
      "step": 17815
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9972763657569885,
      "learning_rate": 1.7564269218467245e-05,
      "loss": 2.4553,
      "step": 17816
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.031684398651123,
      "learning_rate": 1.756399990401027e-05,
      "loss": 2.4399,
      "step": 17817
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9841057062149048,
      "learning_rate": 1.756373057673026e-05,
      "loss": 2.5824,
      "step": 17818
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0705069303512573,
      "learning_rate": 1.7563461236627675e-05,
      "loss": 2.5049,
      "step": 17819
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0018919706344604,
      "learning_rate": 1.756319188370297e-05,
      "loss": 2.4511,
      "step": 17820
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1618913412094116,
      "learning_rate": 1.7562922517956596e-05,
      "loss": 2.5687,
      "step": 17821
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0254806280136108,
      "learning_rate": 1.7562653139389018e-05,
      "loss": 2.526,
      "step": 17822
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.114478588104248,
      "learning_rate": 1.7562383748000687e-05,
      "loss": 2.4145,
      "step": 17823
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9859544634819031,
      "learning_rate": 1.7562114343792066e-05,
      "loss": 2.5345,
      "step": 17824
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0669773817062378,
      "learning_rate": 1.7561844926763602e-05,
      "loss": 2.4565,
      "step": 17825
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0881805419921875,
      "learning_rate": 1.7561575496915758e-05,
      "loss": 2.4668,
      "step": 17826
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9871941208839417,
      "learning_rate": 1.756130605424899e-05,
      "loss": 2.0921,
      "step": 17827
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.050424337387085,
      "learning_rate": 1.7561036598763754e-05,
      "loss": 2.3366,
      "step": 17828
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0713553428649902,
      "learning_rate": 1.756076713046051e-05,
      "loss": 2.7285,
      "step": 17829
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0805624723434448,
      "learning_rate": 1.756049764933971e-05,
      "loss": 2.5221,
      "step": 17830
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.050838589668274,
      "learning_rate": 1.7560228155401816e-05,
      "loss": 2.5345,
      "step": 17831
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9701719880104065,
      "learning_rate": 1.755995864864728e-05,
      "loss": 2.4733,
      "step": 17832
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0602580308914185,
      "learning_rate": 1.755968912907656e-05,
      "loss": 2.2687,
      "step": 17833
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1247680187225342,
      "learning_rate": 1.7559419596690115e-05,
      "loss": 2.3446,
      "step": 17834
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9930896162986755,
      "learning_rate": 1.75591500514884e-05,
      "loss": 2.5176,
      "step": 17835
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9891589879989624,
      "learning_rate": 1.7558880493471875e-05,
      "loss": 2.639,
      "step": 17836
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0619240999221802,
      "learning_rate": 1.755861092264099e-05,
      "loss": 2.5613,
      "step": 17837
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1078524589538574,
      "learning_rate": 1.755834133899621e-05,
      "loss": 2.3524,
      "step": 17838
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0127344131469727,
      "learning_rate": 1.7558071742537987e-05,
      "loss": 2.5629,
      "step": 17839
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0395818948745728,
      "learning_rate": 1.755780213326678e-05,
      "loss": 2.638,
      "step": 17840
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9922409653663635,
      "learning_rate": 1.755753251118305e-05,
      "loss": 2.265,
      "step": 17841
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0525007247924805,
      "learning_rate": 1.755726287628724e-05,
      "loss": 2.436,
      "step": 17842
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0781382322311401,
      "learning_rate": 1.7556993228579826e-05,
      "loss": 2.3944,
      "step": 17843
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9325955510139465,
      "learning_rate": 1.7556723568061254e-05,
      "loss": 2.5134,
      "step": 17844
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9711316227912903,
      "learning_rate": 1.7556453894731983e-05,
      "loss": 2.495,
      "step": 17845
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9391016364097595,
      "learning_rate": 1.7556184208592467e-05,
      "loss": 2.687,
      "step": 17846
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0321482419967651,
      "learning_rate": 1.755591450964317e-05,
      "loss": 2.3494,
      "step": 17847
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.21364426612854,
      "learning_rate": 1.7555644797884544e-05,
      "loss": 2.5001,
      "step": 17848
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0472347736358643,
      "learning_rate": 1.7555375073317047e-05,
      "loss": 2.4249,
      "step": 17849
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9809859395027161,
      "learning_rate": 1.7555105335941142e-05,
      "loss": 2.2665,
      "step": 17850
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.049651861190796,
      "learning_rate": 1.7554835585757275e-05,
      "loss": 2.4684,
      "step": 17851
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0830413103103638,
      "learning_rate": 1.7554565822765915e-05,
      "loss": 2.3368,
      "step": 17852
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0207691192626953,
      "learning_rate": 1.755429604696751e-05,
      "loss": 2.4529,
      "step": 17853
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0992008447647095,
      "learning_rate": 1.7554026258362524e-05,
      "loss": 2.2631,
      "step": 17854
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0663484334945679,
      "learning_rate": 1.7553756456951413e-05,
      "loss": 2.4764,
      "step": 17855
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9408169984817505,
      "learning_rate": 1.755348664273463e-05,
      "loss": 2.5291,
      "step": 17856
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0707322359085083,
      "learning_rate": 1.755321681571264e-05,
      "loss": 2.4535,
      "step": 17857
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0136088132858276,
      "learning_rate": 1.7552946975885895e-05,
      "loss": 2.5117,
      "step": 17858
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0265123844146729,
      "learning_rate": 1.755267712325485e-05,
      "loss": 2.5265,
      "step": 17859
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0062416791915894,
      "learning_rate": 1.7552407257819967e-05,
      "loss": 2.6936,
      "step": 17860
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0209589004516602,
      "learning_rate": 1.7552137379581707e-05,
      "loss": 2.4659,
      "step": 17861
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0069595575332642,
      "learning_rate": 1.7551867488540517e-05,
      "loss": 2.5575,
      "step": 17862
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.2007695436477661,
      "learning_rate": 1.7551597584696864e-05,
      "loss": 2.4324,
      "step": 17863
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0204548835754395,
      "learning_rate": 1.7551327668051202e-05,
      "loss": 2.5108,
      "step": 17864
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1583210229873657,
      "learning_rate": 1.7551057738603986e-05,
      "loss": 2.5104,
      "step": 17865
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9171802997589111,
      "learning_rate": 1.755078779635568e-05,
      "loss": 2.3687,
      "step": 17866
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0625638961791992,
      "learning_rate": 1.7550517841306734e-05,
      "loss": 2.5131,
      "step": 17867
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0050867795944214,
      "learning_rate": 1.7550247873457613e-05,
      "loss": 2.4582,
      "step": 17868
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9725878834724426,
      "learning_rate": 1.754997789280877e-05,
      "loss": 2.5873,
      "step": 17869
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0543744564056396,
      "learning_rate": 1.7549707899360662e-05,
      "loss": 2.6457,
      "step": 17870
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1105258464813232,
      "learning_rate": 1.754943789311375e-05,
      "loss": 2.326,
      "step": 17871
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0474048852920532,
      "learning_rate": 1.7549167874068495e-05,
      "loss": 2.4171,
      "step": 17872
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9362286329269409,
      "learning_rate": 1.7548897842225347e-05,
      "loss": 2.4512,
      "step": 17873
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0283688306808472,
      "learning_rate": 1.7548627797584764e-05,
      "loss": 2.5856,
      "step": 17874
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.032363772392273,
      "learning_rate": 1.7548357740147207e-05,
      "loss": 2.6692,
      "step": 17875
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1082608699798584,
      "learning_rate": 1.7548087669913134e-05,
      "loss": 2.4938,
      "step": 17876
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0942072868347168,
      "learning_rate": 1.7547817586883005e-05,
      "loss": 2.3049,
      "step": 17877
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0919406414031982,
      "learning_rate": 1.7547547491057274e-05,
      "loss": 2.4114,
      "step": 17878
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.012446403503418,
      "learning_rate": 1.75472773824364e-05,
      "loss": 2.2459,
      "step": 17879
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0365445613861084,
      "learning_rate": 1.7547007261020842e-05,
      "loss": 2.3801,
      "step": 17880
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9950250387191772,
      "learning_rate": 1.7546737126811055e-05,
      "loss": 2.2812,
      "step": 17881
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1052963733673096,
      "learning_rate": 1.75464669798075e-05,
      "loss": 2.4681,
      "step": 17882
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0103726387023926,
      "learning_rate": 1.754619682001063e-05,
      "loss": 2.3364,
      "step": 17883
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0109463930130005,
      "learning_rate": 1.7545926647420914e-05,
      "loss": 2.6336,
      "step": 17884
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0157054662704468,
      "learning_rate": 1.7545656462038797e-05,
      "loss": 2.5234,
      "step": 17885
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0036494731903076,
      "learning_rate": 1.7545386263864744e-05,
      "loss": 2.3769,
      "step": 17886
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.514411211013794,
      "learning_rate": 1.7545116052899216e-05,
      "loss": 2.3435,
      "step": 17887
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.34391450881958,
      "learning_rate": 1.7544845829142664e-05,
      "loss": 2.6398,
      "step": 17888
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0623215436935425,
      "learning_rate": 1.754457559259555e-05,
      "loss": 2.2493,
      "step": 17889
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1576343774795532,
      "learning_rate": 1.754430534325833e-05,
      "loss": 2.3376,
      "step": 17890
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0632144212722778,
      "learning_rate": 1.7544035081131464e-05,
      "loss": 2.3669,
      "step": 17891
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0775314569473267,
      "learning_rate": 1.754376480621541e-05,
      "loss": 2.6501,
      "step": 17892
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9757794141769409,
      "learning_rate": 1.7543494518510625e-05,
      "loss": 2.4586,
      "step": 17893
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.105898141860962,
      "learning_rate": 1.754322421801757e-05,
      "loss": 2.5726,
      "step": 17894
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.982085645198822,
      "learning_rate": 1.75429539047367e-05,
      "loss": 2.6183,
      "step": 17895
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.085695743560791,
      "learning_rate": 1.7542683578668475e-05,
      "loss": 2.4418,
      "step": 17896
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1436920166015625,
      "learning_rate": 1.7542413239813353e-05,
      "loss": 2.3174,
      "step": 17897
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.090476393699646,
      "learning_rate": 1.754214288817179e-05,
      "loss": 2.7551,
      "step": 17898
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.2117500305175781,
      "learning_rate": 1.7541872523744245e-05,
      "loss": 2.5855,
      "step": 17899
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.2038593292236328,
      "learning_rate": 1.754160214653118e-05,
      "loss": 2.6108,
      "step": 17900
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0365427732467651,
      "learning_rate": 1.7541331756533053e-05,
      "loss": 2.4256,
      "step": 17901
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0164239406585693,
      "learning_rate": 1.754106135375032e-05,
      "loss": 2.3909,
      "step": 17902
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0747421979904175,
      "learning_rate": 1.754079093818344e-05,
      "loss": 2.4151,
      "step": 17903
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9486657381057739,
      "learning_rate": 1.754052050983287e-05,
      "loss": 2.2421,
      "step": 17904
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9822675585746765,
      "learning_rate": 1.754025006869907e-05,
      "loss": 2.5364,
      "step": 17905
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.3388240337371826,
      "learning_rate": 1.7539979614782497e-05,
      "loss": 2.351,
      "step": 17906
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.008707046508789,
      "learning_rate": 1.753970914808361e-05,
      "loss": 2.3764,
      "step": 17907
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0645735263824463,
      "learning_rate": 1.7539438668602873e-05,
      "loss": 2.2968,
      "step": 17908
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9768733978271484,
      "learning_rate": 1.7539168176340735e-05,
      "loss": 2.5462,
      "step": 17909
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9606707096099854,
      "learning_rate": 1.753889767129766e-05,
      "loss": 2.5127,
      "step": 17910
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0557100772857666,
      "learning_rate": 1.753862715347411e-05,
      "loss": 2.6009,
      "step": 17911
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0444691181182861,
      "learning_rate": 1.7538356622870534e-05,
      "loss": 2.3989,
      "step": 17912
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0835459232330322,
      "learning_rate": 1.75380860794874e-05,
      "loss": 2.5348,
      "step": 17913
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.056070327758789,
      "learning_rate": 1.753781552332516e-05,
      "loss": 2.5517,
      "step": 17914
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0131933689117432,
      "learning_rate": 1.7537544954384276e-05,
      "loss": 2.5297,
      "step": 17915
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9847881197929382,
      "learning_rate": 1.753727437266521e-05,
      "loss": 2.3441,
      "step": 17916
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0567376613616943,
      "learning_rate": 1.7537003778168413e-05,
      "loss": 2.4071,
      "step": 17917
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.057254672050476,
      "learning_rate": 1.753673317089435e-05,
      "loss": 2.5262,
      "step": 17918
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0726391077041626,
      "learning_rate": 1.7536462550843472e-05,
      "loss": 2.5391,
      "step": 17919
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.2479203939437866,
      "learning_rate": 1.7536191918016248e-05,
      "loss": 2.4626,
      "step": 17920
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0830167531967163,
      "learning_rate": 1.753592127241313e-05,
      "loss": 2.7537,
      "step": 17921
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1799588203430176,
      "learning_rate": 1.7535650614034582e-05,
      "loss": 2.3922,
      "step": 17922
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0635414123535156,
      "learning_rate": 1.7535379942881055e-05,
      "loss": 2.5647,
      "step": 17923
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.5288426876068115,
      "learning_rate": 1.7535109258953017e-05,
      "loss": 2.4355,
      "step": 17924
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.017897129058838,
      "learning_rate": 1.7534838562250917e-05,
      "loss": 2.4458,
      "step": 17925
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9693016409873962,
      "learning_rate": 1.753456785277522e-05,
      "loss": 2.4355,
      "step": 17926
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0147762298583984,
      "learning_rate": 1.753429713052639e-05,
      "loss": 2.389,
      "step": 17927
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.215104103088379,
      "learning_rate": 1.7534026395504873e-05,
      "loss": 2.5661,
      "step": 17928
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0802174806594849,
      "learning_rate": 1.753375564771114e-05,
      "loss": 2.3543,
      "step": 17929
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1616612672805786,
      "learning_rate": 1.7533484887145643e-05,
      "loss": 2.4918,
      "step": 17930
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9190435409545898,
      "learning_rate": 1.7533214113808843e-05,
      "loss": 2.4367,
      "step": 17931
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1938698291778564,
      "learning_rate": 1.7532943327701198e-05,
      "loss": 2.4594,
      "step": 17932
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0707817077636719,
      "learning_rate": 1.7532672528823173e-05,
      "loss": 2.3104,
      "step": 17933
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1020987033843994,
      "learning_rate": 1.753240171717522e-05,
      "loss": 2.5401,
      "step": 17934
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1342394351959229,
      "learning_rate": 1.7532130892757798e-05,
      "loss": 2.5218,
      "step": 17935
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.8774539828300476,
      "learning_rate": 1.753186005557137e-05,
      "loss": 2.3166,
      "step": 17936
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0911073684692383,
      "learning_rate": 1.753158920561639e-05,
      "loss": 2.3888,
      "step": 17937
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.062957525253296,
      "learning_rate": 1.7531318342893325e-05,
      "loss": 2.5285,
      "step": 17938
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0858005285263062,
      "learning_rate": 1.753104746740263e-05,
      "loss": 2.21,
      "step": 17939
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1519213914871216,
      "learning_rate": 1.753077657914476e-05,
      "loss": 2.3329,
      "step": 17940
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.2298355102539062,
      "learning_rate": 1.7530505678120183e-05,
      "loss": 2.4282,
      "step": 17941
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0296502113342285,
      "learning_rate": 1.753023476432935e-05,
      "loss": 2.4737,
      "step": 17942
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.4627076387405396,
      "learning_rate": 1.7529963837772728e-05,
      "loss": 2.5516,
      "step": 17943
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1099588871002197,
      "learning_rate": 1.752969289845077e-05,
      "loss": 2.4906,
      "step": 17944
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0839217901229858,
      "learning_rate": 1.7529421946363938e-05,
      "loss": 2.6422,
      "step": 17945
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0911513566970825,
      "learning_rate": 1.7529150981512687e-05,
      "loss": 2.4946,
      "step": 17946
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9810398817062378,
      "learning_rate": 1.7528880003897486e-05,
      "loss": 2.4888,
      "step": 17947
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0232588052749634,
      "learning_rate": 1.7528609013518786e-05,
      "loss": 2.4933,
      "step": 17948
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1199179887771606,
      "learning_rate": 1.752833801037705e-05,
      "loss": 2.5279,
      "step": 17949
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0657305717468262,
      "learning_rate": 1.7528066994472733e-05,
      "loss": 2.4985,
      "step": 17950
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.945216178894043,
      "learning_rate": 1.7527795965806304e-05,
      "loss": 2.3657,
      "step": 17951
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.2912299633026123,
      "learning_rate": 1.752752492437821e-05,
      "loss": 2.4901,
      "step": 17952
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1873537302017212,
      "learning_rate": 1.752725387018892e-05,
      "loss": 2.5497,
      "step": 17953
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0158157348632812,
      "learning_rate": 1.752698280323889e-05,
      "loss": 2.0521,
      "step": 17954
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9519787430763245,
      "learning_rate": 1.7526711723528577e-05,
      "loss": 2.234,
      "step": 17955
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1336477994918823,
      "learning_rate": 1.7526440631058446e-05,
      "loss": 2.547,
      "step": 17956
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0107357501983643,
      "learning_rate": 1.7526169525828957e-05,
      "loss": 2.5482,
      "step": 17957
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0849418640136719,
      "learning_rate": 1.7525898407840562e-05,
      "loss": 2.3143,
      "step": 17958
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1181572675704956,
      "learning_rate": 1.7525627277093728e-05,
      "loss": 2.7051,
      "step": 17959
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.059476375579834,
      "learning_rate": 1.752535613358891e-05,
      "loss": 2.3578,
      "step": 17960
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9871358275413513,
      "learning_rate": 1.7525084977326573e-05,
      "loss": 2.4342,
      "step": 17961
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0778650045394897,
      "learning_rate": 1.7524813808307168e-05,
      "loss": 2.4976,
      "step": 17962
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9383625984191895,
      "learning_rate": 1.752454262653116e-05,
      "loss": 2.5342,
      "step": 17963
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.2144865989685059,
      "learning_rate": 1.7524271431999012e-05,
      "loss": 2.5217,
      "step": 17964
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0031758546829224,
      "learning_rate": 1.752400022471118e-05,
      "loss": 2.2914,
      "step": 17965
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1197099685668945,
      "learning_rate": 1.7523729004668123e-05,
      "loss": 2.6421,
      "step": 17966
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9855912923812866,
      "learning_rate": 1.75234577718703e-05,
      "loss": 2.4504,
      "step": 17967
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9575836658477783,
      "learning_rate": 1.7523186526318176e-05,
      "loss": 2.5026,
      "step": 17968
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9856534600257874,
      "learning_rate": 1.7522915268012208e-05,
      "loss": 2.5297,
      "step": 17969
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9701752662658691,
      "learning_rate": 1.7522643996952854e-05,
      "loss": 2.4353,
      "step": 17970
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9898843765258789,
      "learning_rate": 1.7522372713140572e-05,
      "loss": 2.4816,
      "step": 17971
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9662222862243652,
      "learning_rate": 1.752210141657583e-05,
      "loss": 2.4364,
      "step": 17972
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.01853346824646,
      "learning_rate": 1.7521830107259082e-05,
      "loss": 2.7748,
      "step": 17973
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9426591992378235,
      "learning_rate": 1.7521558785190787e-05,
      "loss": 2.4327,
      "step": 17974
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0337380170822144,
      "learning_rate": 1.7521287450371408e-05,
      "loss": 2.5342,
      "step": 17975
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0161880254745483,
      "learning_rate": 1.7521016102801405e-05,
      "loss": 2.5081,
      "step": 17976
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0888023376464844,
      "learning_rate": 1.7520744742481238e-05,
      "loss": 2.4593,
      "step": 17977
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0296025276184082,
      "learning_rate": 1.7520473369411365e-05,
      "loss": 2.3252,
      "step": 17978
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0845905542373657,
      "learning_rate": 1.7520201983592248e-05,
      "loss": 2.4626,
      "step": 17979
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0450570583343506,
      "learning_rate": 1.7519930585024342e-05,
      "loss": 2.643,
      "step": 17980
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9942348003387451,
      "learning_rate": 1.7519659173708116e-05,
      "loss": 2.2557,
      "step": 17981
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.3101741075515747,
      "learning_rate": 1.7519387749644024e-05,
      "loss": 2.4841,
      "step": 17982
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.2032151222229004,
      "learning_rate": 1.7519116312832527e-05,
      "loss": 2.3822,
      "step": 17983
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0648170709609985,
      "learning_rate": 1.7518844863274085e-05,
      "loss": 2.2783,
      "step": 17984
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0498918294906616,
      "learning_rate": 1.751857340096916e-05,
      "loss": 2.5983,
      "step": 17985
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1021167039871216,
      "learning_rate": 1.751830192591821e-05,
      "loss": 2.5475,
      "step": 17986
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.2615551948547363,
      "learning_rate": 1.75180304381217e-05,
      "loss": 2.4463,
      "step": 17987
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.330415964126587,
      "learning_rate": 1.751775893758008e-05,
      "loss": 2.3284,
      "step": 17988
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0254557132720947,
      "learning_rate": 1.7517487424293824e-05,
      "loss": 2.4289,
      "step": 17989
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9429880380630493,
      "learning_rate": 1.751721589826338e-05,
      "loss": 2.3971,
      "step": 17990
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0200142860412598,
      "learning_rate": 1.7516944359489214e-05,
      "loss": 2.6117,
      "step": 17991
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9789559841156006,
      "learning_rate": 1.7516672807971787e-05,
      "loss": 2.3327,
      "step": 17992
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.988828182220459,
      "learning_rate": 1.751640124371156e-05,
      "loss": 2.5262,
      "step": 17993
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0652580261230469,
      "learning_rate": 1.7516129666708987e-05,
      "loss": 2.6089,
      "step": 17994
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9512333273887634,
      "learning_rate": 1.7515858076964535e-05,
      "loss": 2.5002,
      "step": 17995
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0817564725875854,
      "learning_rate": 1.7515586474478662e-05,
      "loss": 2.4038,
      "step": 17996
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0891364812850952,
      "learning_rate": 1.7515314859251828e-05,
      "loss": 2.6973,
      "step": 17997
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.100612998008728,
      "learning_rate": 1.7515043231284496e-05,
      "loss": 2.2008,
      "step": 17998
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0544893741607666,
      "learning_rate": 1.7514771590577125e-05,
      "loss": 2.4054,
      "step": 17999
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0426037311553955,
      "learning_rate": 1.7514499937130175e-05,
      "loss": 2.4829,
      "step": 18000
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.082360863685608,
      "learning_rate": 1.7514228270944104e-05,
      "loss": 2.335,
      "step": 18001
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1282609701156616,
      "learning_rate": 1.7513956592019375e-05,
      "loss": 2.7993,
      "step": 18002
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0061397552490234,
      "learning_rate": 1.7513684900356454e-05,
      "loss": 2.3288,
      "step": 18003
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0546993017196655,
      "learning_rate": 1.751341319595579e-05,
      "loss": 2.3273,
      "step": 18004
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0400632619857788,
      "learning_rate": 1.7513141478817854e-05,
      "loss": 2.4283,
      "step": 18005
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9600652456283569,
      "learning_rate": 1.7512869748943105e-05,
      "loss": 2.3077,
      "step": 18006
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.2162853479385376,
      "learning_rate": 1.7512598006331995e-05,
      "loss": 2.4109,
      "step": 18007
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.171525478363037,
      "learning_rate": 1.7512326250984996e-05,
      "loss": 2.4937,
      "step": 18008
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.178314208984375,
      "learning_rate": 1.7512054482902562e-05,
      "loss": 2.7554,
      "step": 18009
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1321028470993042,
      "learning_rate": 1.7511782702085157e-05,
      "loss": 2.7524,
      "step": 18010
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.089987874031067,
      "learning_rate": 1.751151090853324e-05,
      "loss": 2.6053,
      "step": 18011
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.076233983039856,
      "learning_rate": 1.7511239102247267e-05,
      "loss": 2.491,
      "step": 18012
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0179308652877808,
      "learning_rate": 1.7510967283227708e-05,
      "loss": 2.2272,
      "step": 18013
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0246739387512207,
      "learning_rate": 1.7510695451475017e-05,
      "loss": 2.3758,
      "step": 18014
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.038542628288269,
      "learning_rate": 1.751042360698966e-05,
      "loss": 2.683,
      "step": 18015
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9624679088592529,
      "learning_rate": 1.7510151749772094e-05,
      "loss": 2.6308,
      "step": 18016
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1232023239135742,
      "learning_rate": 1.750987987982278e-05,
      "loss": 2.5333,
      "step": 18017
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9308458566665649,
      "learning_rate": 1.7509607997142183e-05,
      "loss": 2.2928,
      "step": 18018
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.139171838760376,
      "learning_rate": 1.7509336101730755e-05,
      "loss": 2.5854,
      "step": 18019
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1731325387954712,
      "learning_rate": 1.750906419358897e-05,
      "loss": 2.6657,
      "step": 18020
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.2171496152877808,
      "learning_rate": 1.7508792272717276e-05,
      "loss": 2.4621,
      "step": 18021
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.931846559047699,
      "learning_rate": 1.7508520339116144e-05,
      "loss": 2.6609,
      "step": 18022
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9601398706436157,
      "learning_rate": 1.7508248392786028e-05,
      "loss": 2.5331,
      "step": 18023
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.999665379524231,
      "learning_rate": 1.7507976433727395e-05,
      "loss": 2.5448,
      "step": 18024
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0764435529708862,
      "learning_rate": 1.75077044619407e-05,
      "loss": 2.7572,
      "step": 18025
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9663171768188477,
      "learning_rate": 1.7507432477426406e-05,
      "loss": 2.3724,
      "step": 18026
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1957706212997437,
      "learning_rate": 1.7507160480184978e-05,
      "loss": 2.5187,
      "step": 18027
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.251888394355774,
      "learning_rate": 1.750688847021687e-05,
      "loss": 2.5454,
      "step": 18028
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0533136129379272,
      "learning_rate": 1.750661644752255e-05,
      "loss": 2.52,
      "step": 18029
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0288556814193726,
      "learning_rate": 1.7506344412102477e-05,
      "loss": 2.4258,
      "step": 18030
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1034330129623413,
      "learning_rate": 1.750607236395711e-05,
      "loss": 2.7519,
      "step": 18031
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0788811445236206,
      "learning_rate": 1.7505800303086912e-05,
      "loss": 2.4817,
      "step": 18032
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9767287969589233,
      "learning_rate": 1.7505528229492346e-05,
      "loss": 2.4114,
      "step": 18033
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9984802007675171,
      "learning_rate": 1.750525614317387e-05,
      "loss": 2.4749,
      "step": 18034
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9863404631614685,
      "learning_rate": 1.7504984044131947e-05,
      "loss": 2.6689,
      "step": 18035
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0199579000473022,
      "learning_rate": 1.7504711932367033e-05,
      "loss": 2.4331,
      "step": 18036
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0041331052780151,
      "learning_rate": 1.75044398078796e-05,
      "loss": 2.6347,
      "step": 18037
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1275819540023804,
      "learning_rate": 1.75041676706701e-05,
      "loss": 2.583,
      "step": 18038
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9740435481071472,
      "learning_rate": 1.7503895520739e-05,
      "loss": 2.4073,
      "step": 18039
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.036985158920288,
      "learning_rate": 1.7503623358086755e-05,
      "loss": 2.478,
      "step": 18040
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.062360405921936,
      "learning_rate": 1.750335118271383e-05,
      "loss": 2.6436,
      "step": 18041
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0685265064239502,
      "learning_rate": 1.750307899462069e-05,
      "loss": 2.3705,
      "step": 18042
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0872904062271118,
      "learning_rate": 1.7502806793807797e-05,
      "loss": 2.5196,
      "step": 18043
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0194965600967407,
      "learning_rate": 1.7502534580275604e-05,
      "loss": 2.6051,
      "step": 18044
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0573852062225342,
      "learning_rate": 1.750226235402458e-05,
      "loss": 2.4588,
      "step": 18045
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0276281833648682,
      "learning_rate": 1.750199011505518e-05,
      "loss": 2.2208,
      "step": 18046
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.2081561088562012,
      "learning_rate": 1.750171786336787e-05,
      "loss": 2.2706,
      "step": 18047
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0530356168746948,
      "learning_rate": 1.7501445598963114e-05,
      "loss": 2.428,
      "step": 18048
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0406569242477417,
      "learning_rate": 1.7501173321841367e-05,
      "loss": 2.4701,
      "step": 18049
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0640826225280762,
      "learning_rate": 1.7500901032003093e-05,
      "loss": 2.569,
      "step": 18050
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0020668506622314,
      "learning_rate": 1.7500628729448758e-05,
      "loss": 2.732,
      "step": 18051
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1293247938156128,
      "learning_rate": 1.7500356414178822e-05,
      "loss": 2.5502,
      "step": 18052
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9940037727355957,
      "learning_rate": 1.750008408619374e-05,
      "loss": 2.5433,
      "step": 18053
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.940627932548523,
      "learning_rate": 1.7499811745493983e-05,
      "loss": 2.373,
      "step": 18054
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.012046456336975,
      "learning_rate": 1.7499539392080005e-05,
      "loss": 2.3646,
      "step": 18055
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0481232404708862,
      "learning_rate": 1.749926702595227e-05,
      "loss": 2.5415,
      "step": 18056
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0230919122695923,
      "learning_rate": 1.749899464711124e-05,
      "loss": 2.6852,
      "step": 18057
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.045365810394287,
      "learning_rate": 1.749872225555738e-05,
      "loss": 2.4645,
      "step": 18058
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.969443142414093,
      "learning_rate": 1.7498449851291147e-05,
      "loss": 2.4156,
      "step": 18059
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0491764545440674,
      "learning_rate": 1.7498177434313008e-05,
      "loss": 2.4039,
      "step": 18060
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1063309907913208,
      "learning_rate": 1.749790500462342e-05,
      "loss": 2.4898,
      "step": 18061
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.3214415311813354,
      "learning_rate": 1.7497632562222846e-05,
      "loss": 2.556,
      "step": 18062
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0362666845321655,
      "learning_rate": 1.749736010711175e-05,
      "loss": 2.4075,
      "step": 18063
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0632680654525757,
      "learning_rate": 1.7497087639290592e-05,
      "loss": 2.6747,
      "step": 18064
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0067051649093628,
      "learning_rate": 1.7496815158759835e-05,
      "loss": 2.4149,
      "step": 18065
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9672788977622986,
      "learning_rate": 1.749654266551994e-05,
      "loss": 2.4551,
      "step": 18066
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.13023042678833,
      "learning_rate": 1.7496270159571363e-05,
      "loss": 2.5827,
      "step": 18067
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1888725757598877,
      "learning_rate": 1.7495997640914583e-05,
      "loss": 2.4807,
      "step": 18068
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9468830823898315,
      "learning_rate": 1.7495725109550045e-05,
      "loss": 2.552,
      "step": 18069
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0153332948684692,
      "learning_rate": 1.7495452565478217e-05,
      "loss": 2.4297,
      "step": 18070
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.042523980140686,
      "learning_rate": 1.749518000869956e-05,
      "loss": 2.5106,
      "step": 18071
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0288946628570557,
      "learning_rate": 1.749490743921454e-05,
      "loss": 2.5505,
      "step": 18072
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1621633768081665,
      "learning_rate": 1.7494634857023616e-05,
      "loss": 2.636,
      "step": 18073
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.4725786447525024,
      "learning_rate": 1.749436226212725e-05,
      "loss": 2.3156,
      "step": 18074
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1002601385116577,
      "learning_rate": 1.74940896545259e-05,
      "loss": 2.3498,
      "step": 18075
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1141587495803833,
      "learning_rate": 1.749381703422004e-05,
      "loss": 2.5623,
      "step": 18076
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9932968616485596,
      "learning_rate": 1.749354440121012e-05,
      "loss": 2.2634,
      "step": 18077
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1111751794815063,
      "learning_rate": 1.749327175549661e-05,
      "loss": 2.6887,
      "step": 18078
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1466861963272095,
      "learning_rate": 1.7492999097079968e-05,
      "loss": 2.2997,
      "step": 18079
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9915410876274109,
      "learning_rate": 1.749272642596066e-05,
      "loss": 2.4389,
      "step": 18080
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.033167839050293,
      "learning_rate": 1.749245374213914e-05,
      "loss": 2.4379,
      "step": 18081
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.8671227693557739,
      "learning_rate": 1.7492181045615878e-05,
      "loss": 2.3785,
      "step": 18082
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0137299299240112,
      "learning_rate": 1.7491908336391335e-05,
      "loss": 2.5954,
      "step": 18083
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9788252711296082,
      "learning_rate": 1.7491635614465973e-05,
      "loss": 2.5594,
      "step": 18084
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1511075496673584,
      "learning_rate": 1.7491362879840257e-05,
      "loss": 2.4288,
      "step": 18085
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9590762257575989,
      "learning_rate": 1.749109013251464e-05,
      "loss": 2.3739,
      "step": 18086
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0148773193359375,
      "learning_rate": 1.7490817372489595e-05,
      "loss": 2.55,
      "step": 18087
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9951735734939575,
      "learning_rate": 1.749054459976558e-05,
      "loss": 2.4589,
      "step": 18088
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.4566893577575684,
      "learning_rate": 1.7490271814343054e-05,
      "loss": 2.3852,
      "step": 18089
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0797526836395264,
      "learning_rate": 1.7489999016222484e-05,
      "loss": 2.445,
      "step": 18090
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9494506120681763,
      "learning_rate": 1.7489726205404335e-05,
      "loss": 2.4243,
      "step": 18091
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1116504669189453,
      "learning_rate": 1.7489453381889064e-05,
      "loss": 2.2651,
      "step": 18092
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0722087621688843,
      "learning_rate": 1.7489180545677134e-05,
      "loss": 2.5047,
      "step": 18093
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9995684623718262,
      "learning_rate": 1.748890769676901e-05,
      "loss": 2.4521,
      "step": 18094
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0828474760055542,
      "learning_rate": 1.7488634835165155e-05,
      "loss": 2.4388,
      "step": 18095
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0673093795776367,
      "learning_rate": 1.748836196086603e-05,
      "loss": 2.1574,
      "step": 18096
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9549334049224854,
      "learning_rate": 1.7488089073872094e-05,
      "loss": 2.5331,
      "step": 18097
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9429392218589783,
      "learning_rate": 1.7487816174183817e-05,
      "loss": 2.3949,
      "step": 18098
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.053930401802063,
      "learning_rate": 1.7487543261801657e-05,
      "loss": 2.5073,
      "step": 18099
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1044038534164429,
      "learning_rate": 1.7487270336726075e-05,
      "loss": 2.4568,
      "step": 18100
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9977974891662598,
      "learning_rate": 1.7486997398957538e-05,
      "loss": 2.3573,
      "step": 18101
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.034929633140564,
      "learning_rate": 1.748672444849651e-05,
      "loss": 2.6115,
      "step": 18102
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9752976298332214,
      "learning_rate": 1.7486451485343447e-05,
      "loss": 2.5207,
      "step": 18103
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9401324391365051,
      "learning_rate": 1.7486178509498817e-05,
      "loss": 2.3228,
      "step": 18104
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0438066720962524,
      "learning_rate": 1.748590552096308e-05,
      "loss": 2.3626,
      "step": 18105
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1576683521270752,
      "learning_rate": 1.74856325197367e-05,
      "loss": 2.3191,
      "step": 18106
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1499626636505127,
      "learning_rate": 1.7485359505820142e-05,
      "loss": 2.5458,
      "step": 18107
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9574012756347656,
      "learning_rate": 1.7485086479213865e-05,
      "loss": 2.4123,
      "step": 18108
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9755229949951172,
      "learning_rate": 1.7484813439918333e-05,
      "loss": 2.2672,
      "step": 18109
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.993628978729248,
      "learning_rate": 1.7484540387934006e-05,
      "loss": 2.5182,
      "step": 18110
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0973047018051147,
      "learning_rate": 1.7484267323261357e-05,
      "loss": 2.3159,
      "step": 18111
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1158101558685303,
      "learning_rate": 1.748399424590084e-05,
      "loss": 2.4114,
      "step": 18112
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1234285831451416,
      "learning_rate": 1.748372115585292e-05,
      "loss": 2.4684,
      "step": 18113
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9615909457206726,
      "learning_rate": 1.748344805311806e-05,
      "loss": 2.3008,
      "step": 18114
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0891138315200806,
      "learning_rate": 1.7483174937696722e-05,
      "loss": 2.5373,
      "step": 18115
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.223757266998291,
      "learning_rate": 1.7482901809589372e-05,
      "loss": 2.3046,
      "step": 18116
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.106035828590393,
      "learning_rate": 1.7482628668796467e-05,
      "loss": 2.1804,
      "step": 18117
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1334834098815918,
      "learning_rate": 1.7482355515318478e-05,
      "loss": 2.4861,
      "step": 18118
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0932719707489014,
      "learning_rate": 1.7482082349155862e-05,
      "loss": 2.6616,
      "step": 18119
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0696333646774292,
      "learning_rate": 1.7481809170309087e-05,
      "loss": 2.4506,
      "step": 18120
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0893741846084595,
      "learning_rate": 1.7481535978778613e-05,
      "loss": 2.3881,
      "step": 18121
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0562082529067993,
      "learning_rate": 1.74812627745649e-05,
      "loss": 2.4221,
      "step": 18122
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.2012660503387451,
      "learning_rate": 1.748098955766842e-05,
      "loss": 2.6466,
      "step": 18123
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.982452392578125,
      "learning_rate": 1.7480716328089627e-05,
      "loss": 2.4489,
      "step": 18124
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1619230508804321,
      "learning_rate": 1.7480443085828992e-05,
      "loss": 2.4264,
      "step": 18125
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0257378816604614,
      "learning_rate": 1.748016983088697e-05,
      "loss": 2.7356,
      "step": 18126
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.114749789237976,
      "learning_rate": 1.747989656326403e-05,
      "loss": 2.4586,
      "step": 18127
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0007284879684448,
      "learning_rate": 1.7479623282960633e-05,
      "loss": 2.6141,
      "step": 18128
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1098443269729614,
      "learning_rate": 1.7479349989977246e-05,
      "loss": 2.4347,
      "step": 18129
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0193675756454468,
      "learning_rate": 1.7479076684314328e-05,
      "loss": 2.4643,
      "step": 18130
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0405439138412476,
      "learning_rate": 1.7478803365972343e-05,
      "loss": 2.4656,
      "step": 18131
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.2061936855316162,
      "learning_rate": 1.747853003495175e-05,
      "loss": 2.5305,
      "step": 18132
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9652414917945862,
      "learning_rate": 1.7478256691253025e-05,
      "loss": 2.4979,
      "step": 18133
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9218205809593201,
      "learning_rate": 1.747798333487662e-05,
      "loss": 2.5525,
      "step": 18134
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0976128578186035,
      "learning_rate": 1.7477709965823004e-05,
      "loss": 2.3594,
      "step": 18135
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0530470609664917,
      "learning_rate": 1.747743658409264e-05,
      "loss": 2.3512,
      "step": 18136
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1036267280578613,
      "learning_rate": 1.747716318968599e-05,
      "loss": 2.5441,
      "step": 18137
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0055158138275146,
      "learning_rate": 1.7476889782603517e-05,
      "loss": 2.3375,
      "step": 18138
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1471693515777588,
      "learning_rate": 1.747661636284568e-05,
      "loss": 2.3406,
      "step": 18139
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.022592306137085,
      "learning_rate": 1.7476342930412955e-05,
      "loss": 2.4834,
      "step": 18140
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9762154221534729,
      "learning_rate": 1.7476069485305795e-05,
      "loss": 2.4203,
      "step": 18141
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.004625678062439,
      "learning_rate": 1.7475796027524666e-05,
      "loss": 2.3958,
      "step": 18142
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1255993843078613,
      "learning_rate": 1.7475522557070035e-05,
      "loss": 2.4247,
      "step": 18143
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1132923364639282,
      "learning_rate": 1.747524907394236e-05,
      "loss": 2.2889,
      "step": 18144
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1056777238845825,
      "learning_rate": 1.7474975578142107e-05,
      "loss": 2.5296,
      "step": 18145
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9704470634460449,
      "learning_rate": 1.7474702069669743e-05,
      "loss": 2.3957,
      "step": 18146
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.052722692489624,
      "learning_rate": 1.747442854852573e-05,
      "loss": 2.4062,
      "step": 18147
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0577373504638672,
      "learning_rate": 1.7474155014710526e-05,
      "loss": 2.4439,
      "step": 18148
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9985060095787048,
      "learning_rate": 1.7473881468224604e-05,
      "loss": 2.5551,
      "step": 18149
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.013558030128479,
      "learning_rate": 1.747360790906842e-05,
      "loss": 2.4677,
      "step": 18150
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0939815044403076,
      "learning_rate": 1.747333433724244e-05,
      "loss": 2.4398,
      "step": 18151
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0147223472595215,
      "learning_rate": 1.7473060752747133e-05,
      "loss": 2.5171,
      "step": 18152
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.063515543937683,
      "learning_rate": 1.7472787155582955e-05,
      "loss": 2.2252,
      "step": 18153
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.938361644744873,
      "learning_rate": 1.7472513545750373e-05,
      "loss": 2.4285,
      "step": 18154
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0324667692184448,
      "learning_rate": 1.747223992324985e-05,
      "loss": 2.2675,
      "step": 18155
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9939338564872742,
      "learning_rate": 1.7471966288081853e-05,
      "loss": 2.4563,
      "step": 18156
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.134986162185669,
      "learning_rate": 1.7471692640246844e-05,
      "loss": 2.2848,
      "step": 18157
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1527234315872192,
      "learning_rate": 1.7471418979745286e-05,
      "loss": 2.8472,
      "step": 18158
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1066209077835083,
      "learning_rate": 1.7471145306577645e-05,
      "loss": 2.5994,
      "step": 18159
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.124747633934021,
      "learning_rate": 1.7470871620744382e-05,
      "loss": 2.2988,
      "step": 18160
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0107238292694092,
      "learning_rate": 1.7470597922245963e-05,
      "loss": 2.5335,
      "step": 18161
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0071512460708618,
      "learning_rate": 1.7470324211082853e-05,
      "loss": 2.3997,
      "step": 18162
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.086607575416565,
      "learning_rate": 1.747005048725551e-05,
      "loss": 2.5471,
      "step": 18163
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0172920227050781,
      "learning_rate": 1.7469776750764405e-05,
      "loss": 2.5732,
      "step": 18164
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1683462858200073,
      "learning_rate": 1.746950300161e-05,
      "loss": 2.5007,
      "step": 18165
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.043958067893982,
      "learning_rate": 1.746922923979276e-05,
      "loss": 2.4972,
      "step": 18166
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0056548118591309,
      "learning_rate": 1.7468955465313147e-05,
      "loss": 2.4274,
      "step": 18167
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0839534997940063,
      "learning_rate": 1.7468681678171626e-05,
      "loss": 2.2931,
      "step": 18168
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0447295904159546,
      "learning_rate": 1.746840787836866e-05,
      "loss": 2.7069,
      "step": 18169
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.093341588973999,
      "learning_rate": 1.7468134065904716e-05,
      "loss": 2.396,
      "step": 18170
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9683196544647217,
      "learning_rate": 1.7467860240780255e-05,
      "loss": 2.6602,
      "step": 18171
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.05503511428833,
      "learning_rate": 1.7467586402995742e-05,
      "loss": 2.6465,
      "step": 18172
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.051872968673706,
      "learning_rate": 1.7467312552551644e-05,
      "loss": 2.649,
      "step": 18173
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0961729288101196,
      "learning_rate": 1.746703868944842e-05,
      "loss": 2.4064,
      "step": 18174
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9773066639900208,
      "learning_rate": 1.746676481368654e-05,
      "loss": 2.5621,
      "step": 18175
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.400272011756897,
      "learning_rate": 1.7466490925266468e-05,
      "loss": 2.5312,
      "step": 18176
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0300016403198242,
      "learning_rate": 1.746621702418866e-05,
      "loss": 2.2371,
      "step": 18177
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0144346952438354,
      "learning_rate": 1.746594311045359e-05,
      "loss": 2.2425,
      "step": 18178
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9982336759567261,
      "learning_rate": 1.7465669184061716e-05,
      "loss": 2.4691,
      "step": 18179
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9694673418998718,
      "learning_rate": 1.7465395245013513e-05,
      "loss": 2.4825,
      "step": 18180
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9922929406166077,
      "learning_rate": 1.746512129330943e-05,
      "loss": 2.7402,
      "step": 18181
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.2060965299606323,
      "learning_rate": 1.7464847328949942e-05,
      "loss": 2.3546,
      "step": 18182
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9976485371589661,
      "learning_rate": 1.7464573351935506e-05,
      "loss": 2.5586,
      "step": 18183
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9604226350784302,
      "learning_rate": 1.7464299362266595e-05,
      "loss": 2.4854,
      "step": 18184
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0242626667022705,
      "learning_rate": 1.746402535994367e-05,
      "loss": 2.4927,
      "step": 18185
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.071376085281372,
      "learning_rate": 1.746375134496719e-05,
      "loss": 2.7453,
      "step": 18186
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0613633394241333,
      "learning_rate": 1.7463477317337628e-05,
      "loss": 2.5568,
      "step": 18187
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0068806409835815,
      "learning_rate": 1.7463203277055445e-05,
      "loss": 2.5332,
      "step": 18188
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.990720808506012,
      "learning_rate": 1.7462929224121104e-05,
      "loss": 2.3636,
      "step": 18189
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9959872364997864,
      "learning_rate": 1.7462655158535072e-05,
      "loss": 2.3296,
      "step": 18190
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0286918878555298,
      "learning_rate": 1.746238108029781e-05,
      "loss": 2.542,
      "step": 18191
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9140909314155579,
      "learning_rate": 1.746210698940979e-05,
      "loss": 2.4415,
      "step": 18192
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.081947684288025,
      "learning_rate": 1.746183288587147e-05,
      "loss": 2.4494,
      "step": 18193
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9815682768821716,
      "learning_rate": 1.746155876968332e-05,
      "loss": 2.5612,
      "step": 18194
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9840854406356812,
      "learning_rate": 1.7461284640845793e-05,
      "loss": 2.374,
      "step": 18195
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9989175200462341,
      "learning_rate": 1.746101049935937e-05,
      "loss": 2.6314,
      "step": 18196
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.112926721572876,
      "learning_rate": 1.7460736345224502e-05,
      "loss": 2.4351,
      "step": 18197
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.2446051836013794,
      "learning_rate": 1.7460462178441666e-05,
      "loss": 2.3334,
      "step": 18198
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0308706760406494,
      "learning_rate": 1.7460187999011316e-05,
      "loss": 2.6163,
      "step": 18199
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0220484733581543,
      "learning_rate": 1.745991380693392e-05,
      "loss": 2.3811,
      "step": 18200
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.274165153503418,
      "learning_rate": 1.7459639602209947e-05,
      "loss": 2.4328,
      "step": 18201
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1025089025497437,
      "learning_rate": 1.7459365384839853e-05,
      "loss": 2.4993,
      "step": 18202
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9424461126327515,
      "learning_rate": 1.7459091154824117e-05,
      "loss": 2.497,
      "step": 18203
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0976471900939941,
      "learning_rate": 1.745881691216319e-05,
      "loss": 2.1633,
      "step": 18204
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1374167203903198,
      "learning_rate": 1.7458542656857544e-05,
      "loss": 2.4871,
      "step": 18205
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0214096307754517,
      "learning_rate": 1.7458268388907644e-05,
      "loss": 2.3722,
      "step": 18206
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.051496982574463,
      "learning_rate": 1.7457994108313953e-05,
      "loss": 2.3362,
      "step": 18207
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0917141437530518,
      "learning_rate": 1.7457719815076938e-05,
      "loss": 2.6603,
      "step": 18208
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.003157377243042,
      "learning_rate": 1.745744550919706e-05,
      "loss": 2.3533,
      "step": 18209
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0198348760604858,
      "learning_rate": 1.7457171190674787e-05,
      "loss": 2.6836,
      "step": 18210
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0087308883666992,
      "learning_rate": 1.7456896859510584e-05,
      "loss": 2.5,
      "step": 18211
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0658726692199707,
      "learning_rate": 1.7456622515704912e-05,
      "loss": 2.4544,
      "step": 18212
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0744928121566772,
      "learning_rate": 1.7456348159258243e-05,
      "loss": 2.5864,
      "step": 18213
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.2026375532150269,
      "learning_rate": 1.7456073790171036e-05,
      "loss": 2.3217,
      "step": 18214
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0812511444091797,
      "learning_rate": 1.7455799408443763e-05,
      "loss": 2.4,
      "step": 18215
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0015393495559692,
      "learning_rate": 1.7455525014076885e-05,
      "loss": 2.583,
      "step": 18216
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1183998584747314,
      "learning_rate": 1.7455250607070863e-05,
      "loss": 2.5912,
      "step": 18217
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1170510053634644,
      "learning_rate": 1.745497618742617e-05,
      "loss": 2.6522,
      "step": 18218
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.045111060142517,
      "learning_rate": 1.7454701755143266e-05,
      "loss": 2.4164,
      "step": 18219
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.04723060131073,
      "learning_rate": 1.745442731022262e-05,
      "loss": 2.4791,
      "step": 18220
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9777174592018127,
      "learning_rate": 1.745415285266469e-05,
      "loss": 2.6584,
      "step": 18221
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1909573078155518,
      "learning_rate": 1.745387838246995e-05,
      "loss": 2.5069,
      "step": 18222
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.173094391822815,
      "learning_rate": 1.7453603899638864e-05,
      "loss": 2.5337,
      "step": 18223
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0623154640197754,
      "learning_rate": 1.745332940417189e-05,
      "loss": 2.3444,
      "step": 18224
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9822328090667725,
      "learning_rate": 1.7453054896069502e-05,
      "loss": 2.4998,
      "step": 18225
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1074377298355103,
      "learning_rate": 1.7452780375332163e-05,
      "loss": 2.4966,
      "step": 18226
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9351662397384644,
      "learning_rate": 1.7452505841960334e-05,
      "loss": 2.3548,
      "step": 18227
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9628039598464966,
      "learning_rate": 1.7452231295954484e-05,
      "loss": 2.5619,
      "step": 18228
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0356639623641968,
      "learning_rate": 1.7451956737315077e-05,
      "loss": 2.3397,
      "step": 18229
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9365836977958679,
      "learning_rate": 1.745168216604258e-05,
      "loss": 2.3888,
      "step": 18230
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9880267977714539,
      "learning_rate": 1.745140758213746e-05,
      "loss": 2.4785,
      "step": 18231
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9974142909049988,
      "learning_rate": 1.7451132985600176e-05,
      "loss": 2.4617,
      "step": 18232
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.03094482421875,
      "learning_rate": 1.7450858376431204e-05,
      "loss": 2.277,
      "step": 18233
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0856711864471436,
      "learning_rate": 1.7450583754631e-05,
      "loss": 2.5436,
      "step": 18234
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9701048731803894,
      "learning_rate": 1.7450309120200032e-05,
      "loss": 2.6884,
      "step": 18235
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1125288009643555,
      "learning_rate": 1.7450034473138768e-05,
      "loss": 2.4216,
      "step": 18236
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.045606255531311,
      "learning_rate": 1.7449759813447672e-05,
      "loss": 2.5816,
      "step": 18237
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.110792875289917,
      "learning_rate": 1.744948514112721e-05,
      "loss": 2.4692,
      "step": 18238
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9730973839759827,
      "learning_rate": 1.7449210456177847e-05,
      "loss": 2.369,
      "step": 18239
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1080527305603027,
      "learning_rate": 1.744893575860005e-05,
      "loss": 2.4142,
      "step": 18240
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0942391157150269,
      "learning_rate": 1.744866104839428e-05,
      "loss": 2.5615,
      "step": 18241
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9569087624549866,
      "learning_rate": 1.7448386325561012e-05,
      "loss": 2.5214,
      "step": 18242
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.3284821510314941,
      "learning_rate": 1.7448111590100702e-05,
      "loss": 2.3205,
      "step": 18243
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.2042112350463867,
      "learning_rate": 1.7447836842013824e-05,
      "loss": 2.7394,
      "step": 18244
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0038795471191406,
      "learning_rate": 1.7447562081300836e-05,
      "loss": 2.3114,
      "step": 18245
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9858425259590149,
      "learning_rate": 1.744728730796221e-05,
      "loss": 2.4559,
      "step": 18246
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9914580583572388,
      "learning_rate": 1.7447012521998406e-05,
      "loss": 2.5547,
      "step": 18247
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0353130102157593,
      "learning_rate": 1.7446737723409896e-05,
      "loss": 2.5408,
      "step": 18248
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.2961342334747314,
      "learning_rate": 1.7446462912197143e-05,
      "loss": 2.3452,
      "step": 18249
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0222530364990234,
      "learning_rate": 1.7446188088360613e-05,
      "loss": 2.8786,
      "step": 18250
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0929754972457886,
      "learning_rate": 1.744591325190077e-05,
      "loss": 2.3112,
      "step": 18251
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0450208187103271,
      "learning_rate": 1.744563840281808e-05,
      "loss": 2.3432,
      "step": 18252
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0728168487548828,
      "learning_rate": 1.744536354111302e-05,
      "loss": 2.3696,
      "step": 18253
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1129308938980103,
      "learning_rate": 1.7445088666786037e-05,
      "loss": 2.3445,
      "step": 18254
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0525996685028076,
      "learning_rate": 1.7444813779837612e-05,
      "loss": 2.5282,
      "step": 18255
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0283925533294678,
      "learning_rate": 1.7444538880268202e-05,
      "loss": 2.4544,
      "step": 18256
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1026204824447632,
      "learning_rate": 1.744426396807828e-05,
      "loss": 2.4101,
      "step": 18257
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9475536346435547,
      "learning_rate": 1.7443989043268304e-05,
      "loss": 2.4116,
      "step": 18258
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0389349460601807,
      "learning_rate": 1.744371410583875e-05,
      "loss": 2.6549,
      "step": 18259
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0331487655639648,
      "learning_rate": 1.7443439155790077e-05,
      "loss": 2.4291,
      "step": 18260
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0053683519363403,
      "learning_rate": 1.7443164193122754e-05,
      "loss": 2.4882,
      "step": 18261
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0714088678359985,
      "learning_rate": 1.7442889217837242e-05,
      "loss": 2.4053,
      "step": 18262
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0231949090957642,
      "learning_rate": 1.7442614229934015e-05,
      "loss": 2.5812,
      "step": 18263
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0106407403945923,
      "learning_rate": 1.7442339229413535e-05,
      "loss": 2.525,
      "step": 18264
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9559572339057922,
      "learning_rate": 1.744206421627627e-05,
      "loss": 2.553,
      "step": 18265
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9858956336975098,
      "learning_rate": 1.7441789190522683e-05,
      "loss": 2.5491,
      "step": 18266
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9886865019798279,
      "learning_rate": 1.7441514152153244e-05,
      "loss": 2.5947,
      "step": 18267
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0071145296096802,
      "learning_rate": 1.7441239101168414e-05,
      "loss": 2.4133,
      "step": 18268
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1086928844451904,
      "learning_rate": 1.7440964037568665e-05,
      "loss": 2.5925,
      "step": 18269
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9954926371574402,
      "learning_rate": 1.744068896135446e-05,
      "loss": 2.4604,
      "step": 18270
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0038750171661377,
      "learning_rate": 1.7440413872526266e-05,
      "loss": 2.3601,
      "step": 18271
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0988184213638306,
      "learning_rate": 1.744013877108455e-05,
      "loss": 2.5607,
      "step": 18272
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0962114334106445,
      "learning_rate": 1.7439863657029784e-05,
      "loss": 2.4987,
      "step": 18273
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1757066249847412,
      "learning_rate": 1.743958853036242e-05,
      "loss": 2.5116,
      "step": 18274
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.356452226638794,
      "learning_rate": 1.7439313391082938e-05,
      "loss": 2.4652,
      "step": 18275
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0821088552474976,
      "learning_rate": 1.7439038239191795e-05,
      "loss": 2.2961,
      "step": 18276
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1884444952011108,
      "learning_rate": 1.7438763074689467e-05,
      "loss": 2.4705,
      "step": 18277
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9879838824272156,
      "learning_rate": 1.743848789757641e-05,
      "loss": 2.4833,
      "step": 18278
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9684444665908813,
      "learning_rate": 1.74382127078531e-05,
      "loss": 2.5897,
      "step": 18279
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0786798000335693,
      "learning_rate": 1.743793750552e-05,
      "loss": 2.4313,
      "step": 18280
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9888889193534851,
      "learning_rate": 1.743766229057757e-05,
      "loss": 2.5253,
      "step": 18281
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9755116701126099,
      "learning_rate": 1.7437387063026286e-05,
      "loss": 2.6685,
      "step": 18282
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9749469757080078,
      "learning_rate": 1.7437111822866612e-05,
      "loss": 2.4884,
      "step": 18283
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0768651962280273,
      "learning_rate": 1.7436836570099012e-05,
      "loss": 2.8254,
      "step": 18284
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1234797239303589,
      "learning_rate": 1.7436561304723953e-05,
      "loss": 2.1329,
      "step": 18285
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0016123056411743,
      "learning_rate": 1.7436286026741907e-05,
      "loss": 2.7476,
      "step": 18286
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0452289581298828,
      "learning_rate": 1.7436010736153334e-05,
      "loss": 2.7893,
      "step": 18287
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0633769035339355,
      "learning_rate": 1.74357354329587e-05,
      "loss": 2.4324,
      "step": 18288
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.976448655128479,
      "learning_rate": 1.743546011715848e-05,
      "loss": 2.6977,
      "step": 18289
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9978240728378296,
      "learning_rate": 1.743518478875313e-05,
      "loss": 2.5066,
      "step": 18290
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0316067934036255,
      "learning_rate": 1.743490944774313e-05,
      "loss": 2.2952,
      "step": 18291
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0279489755630493,
      "learning_rate": 1.7434634094128935e-05,
      "loss": 2.2985,
      "step": 18292
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0253870487213135,
      "learning_rate": 1.7434358727911016e-05,
      "loss": 2.5833,
      "step": 18293
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9920907020568848,
      "learning_rate": 1.743408334908984e-05,
      "loss": 2.3217,
      "step": 18294
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0990666151046753,
      "learning_rate": 1.7433807957665874e-05,
      "loss": 2.4552,
      "step": 18295
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0533552169799805,
      "learning_rate": 1.7433532553639585e-05,
      "loss": 2.6455,
      "step": 18296
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9894964694976807,
      "learning_rate": 1.7433257137011435e-05,
      "loss": 2.4797,
      "step": 18297
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0703784227371216,
      "learning_rate": 1.74329817077819e-05,
      "loss": 2.7505,
      "step": 18298
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.018533706665039,
      "learning_rate": 1.7432706265951442e-05,
      "loss": 2.3956,
      "step": 18299
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.930849015712738,
      "learning_rate": 1.743243081152053e-05,
      "loss": 2.5399,
      "step": 18300
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.001406192779541,
      "learning_rate": 1.7432155344489624e-05,
      "loss": 2.6973,
      "step": 18301
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.079820156097412,
      "learning_rate": 1.74318798648592e-05,
      "loss": 2.1805,
      "step": 18302
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1175066232681274,
      "learning_rate": 1.7431604372629718e-05,
      "loss": 2.4514,
      "step": 18303
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0044103860855103,
      "learning_rate": 1.743132886780165e-05,
      "loss": 2.7877,
      "step": 18304
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9521180987358093,
      "learning_rate": 1.743105335037546e-05,
      "loss": 2.8321,
      "step": 18305
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.014618158340454,
      "learning_rate": 1.7430777820351622e-05,
      "loss": 2.3553,
      "step": 18306
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.09416663646698,
      "learning_rate": 1.743050227773059e-05,
      "loss": 2.6497,
      "step": 18307
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0308061838150024,
      "learning_rate": 1.7430226722512842e-05,
      "loss": 2.7418,
      "step": 18308
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0232828855514526,
      "learning_rate": 1.742995115469884e-05,
      "loss": 2.4044,
      "step": 18309
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9993467330932617,
      "learning_rate": 1.7429675574289055e-05,
      "loss": 2.4829,
      "step": 18310
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9979612827301025,
      "learning_rate": 1.742939998128395e-05,
      "loss": 2.5606,
      "step": 18311
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.043647289276123,
      "learning_rate": 1.7429124375683996e-05,
      "loss": 2.4684,
      "step": 18312
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9828768372535706,
      "learning_rate": 1.7428848757489662e-05,
      "loss": 2.5137,
      "step": 18313
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.084053635597229,
      "learning_rate": 1.7428573126701405e-05,
      "loss": 2.4851,
      "step": 18314
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.962415337562561,
      "learning_rate": 1.7428297483319702e-05,
      "loss": 2.3715,
      "step": 18315
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.2490235567092896,
      "learning_rate": 1.7428021827345015e-05,
      "loss": 2.5558,
      "step": 18316
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0920039415359497,
      "learning_rate": 1.7427746158777815e-05,
      "loss": 2.5298,
      "step": 18317
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9814844727516174,
      "learning_rate": 1.742747047761857e-05,
      "loss": 2.4021,
      "step": 18318
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0680419206619263,
      "learning_rate": 1.7427194783867744e-05,
      "loss": 2.1832,
      "step": 18319
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.984738826751709,
      "learning_rate": 1.7426919077525805e-05,
      "loss": 2.5043,
      "step": 18320
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.083748698234558,
      "learning_rate": 1.742664335859322e-05,
      "loss": 2.4701,
      "step": 18321
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0578504800796509,
      "learning_rate": 1.7426367627070457e-05,
      "loss": 2.4591,
      "step": 18322
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.062435269355774,
      "learning_rate": 1.7426091882957987e-05,
      "loss": 2.4468,
      "step": 18323
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.3212237358093262,
      "learning_rate": 1.7425816126256274e-05,
      "loss": 2.5538,
      "step": 18324
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9277303814888,
      "learning_rate": 1.7425540356965783e-05,
      "loss": 2.4492,
      "step": 18325
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9326233267784119,
      "learning_rate": 1.7425264575086988e-05,
      "loss": 2.4114,
      "step": 18326
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0430898666381836,
      "learning_rate": 1.742498878062035e-05,
      "loss": 2.4296,
      "step": 18327
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.015486478805542,
      "learning_rate": 1.742471297356634e-05,
      "loss": 2.472,
      "step": 18328
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9295063018798828,
      "learning_rate": 1.7424437153925426e-05,
      "loss": 2.6619,
      "step": 18329
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1048051118850708,
      "learning_rate": 1.7424161321698073e-05,
      "loss": 2.5723,
      "step": 18330
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1833701133728027,
      "learning_rate": 1.7423885476884753e-05,
      "loss": 2.6364,
      "step": 18331
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.045590877532959,
      "learning_rate": 1.742360961948593e-05,
      "loss": 2.4915,
      "step": 18332
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0365747213363647,
      "learning_rate": 1.742333374950207e-05,
      "loss": 2.3441,
      "step": 18333
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.058792233467102,
      "learning_rate": 1.7423057866933642e-05,
      "loss": 2.3682,
      "step": 18334
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.100021481513977,
      "learning_rate": 1.742278197178112e-05,
      "loss": 2.3655,
      "step": 18335
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.080878734588623,
      "learning_rate": 1.7422506064044965e-05,
      "loss": 2.2797,
      "step": 18336
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9802884459495544,
      "learning_rate": 1.742223014372564e-05,
      "loss": 2.4612,
      "step": 18337
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9849364757537842,
      "learning_rate": 1.7421954210823625e-05,
      "loss": 2.5738,
      "step": 18338
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9835231304168701,
      "learning_rate": 1.7421678265339382e-05,
      "loss": 2.4153,
      "step": 18339
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0862888097763062,
      "learning_rate": 1.742140230727338e-05,
      "loss": 2.3092,
      "step": 18340
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.032254695892334,
      "learning_rate": 1.7421126336626085e-05,
      "loss": 2.5668,
      "step": 18341
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0087215900421143,
      "learning_rate": 1.742085035339796e-05,
      "loss": 2.4123,
      "step": 18342
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.281313419342041,
      "learning_rate": 1.7420574357589483e-05,
      "loss": 2.55,
      "step": 18343
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9238954186439514,
      "learning_rate": 1.7420298349201114e-05,
      "loss": 2.4038,
      "step": 18344
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9967833161354065,
      "learning_rate": 1.7420022328233327e-05,
      "loss": 2.516,
      "step": 18345
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.03684663772583,
      "learning_rate": 1.7419746294686585e-05,
      "loss": 2.6987,
      "step": 18346
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0616658926010132,
      "learning_rate": 1.741947024856136e-05,
      "loss": 2.6415,
      "step": 18347
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9360396265983582,
      "learning_rate": 1.7419194189858116e-05,
      "loss": 2.525,
      "step": 18348
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.2712585926055908,
      "learning_rate": 1.7418918118577324e-05,
      "loss": 2.3747,
      "step": 18349
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1399909257888794,
      "learning_rate": 1.7418642034719448e-05,
      "loss": 2.5571,
      "step": 18350
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.121970295906067,
      "learning_rate": 1.741836593828496e-05,
      "loss": 2.404,
      "step": 18351
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0561081171035767,
      "learning_rate": 1.741808982927433e-05,
      "loss": 2.5402,
      "step": 18352
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0536298751831055,
      "learning_rate": 1.741781370768802e-05,
      "loss": 2.4277,
      "step": 18353
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.090012788772583,
      "learning_rate": 1.74175375735265e-05,
      "loss": 2.6796,
      "step": 18354
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0157039165496826,
      "learning_rate": 1.7417261426790242e-05,
      "loss": 2.3781,
      "step": 18355
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0487784147262573,
      "learning_rate": 1.741698526747971e-05,
      "loss": 2.426,
      "step": 18356
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.2125024795532227,
      "learning_rate": 1.7416709095595376e-05,
      "loss": 2.3018,
      "step": 18357
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9777871370315552,
      "learning_rate": 1.7416432911137704e-05,
      "loss": 2.4761,
      "step": 18358
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.973179280757904,
      "learning_rate": 1.7416156714107163e-05,
      "loss": 2.1685,
      "step": 18359
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.104445457458496,
      "learning_rate": 1.7415880504504223e-05,
      "loss": 2.6493,
      "step": 18360
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0318955183029175,
      "learning_rate": 1.7415604282329353e-05,
      "loss": 2.507,
      "step": 18361
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1009012460708618,
      "learning_rate": 1.7415328047583017e-05,
      "loss": 2.4564,
      "step": 18362
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1281085014343262,
      "learning_rate": 1.7415051800265688e-05,
      "loss": 2.5758,
      "step": 18363
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9587419629096985,
      "learning_rate": 1.741477554037783e-05,
      "loss": 2.4376,
      "step": 18364
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1651829481124878,
      "learning_rate": 1.7414499267919914e-05,
      "loss": 2.4866,
      "step": 18365
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.027154564857483,
      "learning_rate": 1.741422298289241e-05,
      "loss": 2.5959,
      "step": 18366
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.961856484413147,
      "learning_rate": 1.7413946685295782e-05,
      "loss": 2.4161,
      "step": 18367
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.2121407985687256,
      "learning_rate": 1.7413670375130506e-05,
      "loss": 2.4005,
      "step": 18368
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0018665790557861,
      "learning_rate": 1.741339405239704e-05,
      "loss": 2.2086,
      "step": 18369
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9540544152259827,
      "learning_rate": 1.7413117717095858e-05,
      "loss": 2.3276,
      "step": 18370
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1233335733413696,
      "learning_rate": 1.7412841369227427e-05,
      "loss": 2.505,
      "step": 18371
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0031969547271729,
      "learning_rate": 1.7412565008792223e-05,
      "loss": 2.6041,
      "step": 18372
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0430790185928345,
      "learning_rate": 1.7412288635790702e-05,
      "loss": 2.4393,
      "step": 18373
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1076308488845825,
      "learning_rate": 1.741201225022334e-05,
      "loss": 2.4809,
      "step": 18374
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0589828491210938,
      "learning_rate": 1.7411735852090602e-05,
      "loss": 2.4174,
      "step": 18375
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.04970121383667,
      "learning_rate": 1.7411459441392964e-05,
      "loss": 2.3577,
      "step": 18376
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0467463731765747,
      "learning_rate": 1.7411183018130886e-05,
      "loss": 2.2733,
      "step": 18377
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0156174898147583,
      "learning_rate": 1.741090658230484e-05,
      "loss": 2.6857,
      "step": 18378
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.024985432624817,
      "learning_rate": 1.7410630133915297e-05,
      "loss": 2.6155,
      "step": 18379
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9941433668136597,
      "learning_rate": 1.741035367296272e-05,
      "loss": 2.4458,
      "step": 18380
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0074371099472046,
      "learning_rate": 1.7410077199447584e-05,
      "loss": 2.6328,
      "step": 18381
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.006638526916504,
      "learning_rate": 1.740980071337035e-05,
      "loss": 2.5985,
      "step": 18382
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.2356975078582764,
      "learning_rate": 1.7409524214731494e-05,
      "loss": 2.3584,
      "step": 18383
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9066047668457031,
      "learning_rate": 1.7409247703531483e-05,
      "loss": 2.4637,
      "step": 18384
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9598872661590576,
      "learning_rate": 1.7408971179770782e-05,
      "loss": 2.5487,
      "step": 18385
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1127535104751587,
      "learning_rate": 1.7408694643449868e-05,
      "loss": 2.5184,
      "step": 18386
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9361423850059509,
      "learning_rate": 1.74084180945692e-05,
      "loss": 2.5537,
      "step": 18387
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0309066772460938,
      "learning_rate": 1.740814153312925e-05,
      "loss": 2.4107,
      "step": 18388
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.072019100189209,
      "learning_rate": 1.740786495913049e-05,
      "loss": 2.4697,
      "step": 18389
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0154109001159668,
      "learning_rate": 1.7407588372573388e-05,
      "loss": 2.4688,
      "step": 18390
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1722689867019653,
      "learning_rate": 1.740731177345841e-05,
      "loss": 2.345,
      "step": 18391
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.942128598690033,
      "learning_rate": 1.740703516178603e-05,
      "loss": 2.4397,
      "step": 18392
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0613269805908203,
      "learning_rate": 1.7406758537556713e-05,
      "loss": 2.2714,
      "step": 18393
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0596809387207031,
      "learning_rate": 1.7406481900770925e-05,
      "loss": 2.5155,
      "step": 18394
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.949733316898346,
      "learning_rate": 1.740620525142914e-05,
      "loss": 2.2693,
      "step": 18395
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1187024116516113,
      "learning_rate": 1.7405928589531826e-05,
      "loss": 2.7703,
      "step": 18396
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.036088466644287,
      "learning_rate": 1.7405651915079452e-05,
      "loss": 2.549,
      "step": 18397
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0585006475448608,
      "learning_rate": 1.7405375228072487e-05,
      "loss": 2.4607,
      "step": 18398
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9540553092956543,
      "learning_rate": 1.74050985285114e-05,
      "loss": 2.464,
      "step": 18399
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.019453763961792,
      "learning_rate": 1.7404821816396657e-05,
      "loss": 2.324,
      "step": 18400
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0479481220245361,
      "learning_rate": 1.7404545091728734e-05,
      "loss": 2.4065,
      "step": 18401
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1784616708755493,
      "learning_rate": 1.7404268354508094e-05,
      "loss": 2.3098,
      "step": 18402
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0498828887939453,
      "learning_rate": 1.740399160473521e-05,
      "loss": 2.4581,
      "step": 18403
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0084376335144043,
      "learning_rate": 1.7403714842410545e-05,
      "loss": 2.4745,
      "step": 18404
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.019572377204895,
      "learning_rate": 1.7403438067534578e-05,
      "loss": 2.495,
      "step": 18405
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0498056411743164,
      "learning_rate": 1.7403161280107768e-05,
      "loss": 2.4045,
      "step": 18406
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0748348236083984,
      "learning_rate": 1.740288448013059e-05,
      "loss": 2.3834,
      "step": 18407
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.392254114151001,
      "learning_rate": 1.7402607667603513e-05,
      "loss": 2.5048,
      "step": 18408
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.05880868434906,
      "learning_rate": 1.7402330842527003e-05,
      "loss": 2.4795,
      "step": 18409
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9427338242530823,
      "learning_rate": 1.7402054004901537e-05,
      "loss": 2.3944,
      "step": 18410
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.2149641513824463,
      "learning_rate": 1.7401777154727573e-05,
      "loss": 2.4556,
      "step": 18411
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0927996635437012,
      "learning_rate": 1.7401500292005592e-05,
      "loss": 2.4533,
      "step": 18412
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.091859221458435,
      "learning_rate": 1.7401223416736054e-05,
      "loss": 2.3524,
      "step": 18413
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0655509233474731,
      "learning_rate": 1.7400946528919435e-05,
      "loss": 2.6192,
      "step": 18414
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0548956394195557,
      "learning_rate": 1.74006696285562e-05,
      "loss": 2.5595,
      "step": 18415
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0175007581710815,
      "learning_rate": 1.7400392715646823e-05,
      "loss": 2.4058,
      "step": 18416
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0690449476242065,
      "learning_rate": 1.7400115790191765e-05,
      "loss": 2.4225,
      "step": 18417
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0728205442428589,
      "learning_rate": 1.7399838852191503e-05,
      "loss": 2.4925,
      "step": 18418
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0222786664962769,
      "learning_rate": 1.7399561901646506e-05,
      "loss": 2.2506,
      "step": 18419
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0230004787445068,
      "learning_rate": 1.739928493855724e-05,
      "loss": 2.4341,
      "step": 18420
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0827823877334595,
      "learning_rate": 1.7399007962924178e-05,
      "loss": 2.3875,
      "step": 18421
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1192060708999634,
      "learning_rate": 1.7398730974747788e-05,
      "loss": 2.3466,
      "step": 18422
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0833483934402466,
      "learning_rate": 1.7398453974028537e-05,
      "loss": 2.3446,
      "step": 18423
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.2088638544082642,
      "learning_rate": 1.73981769607669e-05,
      "loss": 2.3137,
      "step": 18424
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.3189152479171753,
      "learning_rate": 1.739789993496334e-05,
      "loss": 2.5765,
      "step": 18425
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1086399555206299,
      "learning_rate": 1.7397622896618335e-05,
      "loss": 2.5794,
      "step": 18426
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9789160490036011,
      "learning_rate": 1.739734584573235e-05,
      "loss": 2.5642,
      "step": 18427
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9951139092445374,
      "learning_rate": 1.739706878230585e-05,
      "loss": 2.4759,
      "step": 18428
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.004582405090332,
      "learning_rate": 1.7396791706339312e-05,
      "loss": 2.6203,
      "step": 18429
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.071293830871582,
      "learning_rate": 1.73965146178332e-05,
      "loss": 2.6002,
      "step": 18430
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9978718757629395,
      "learning_rate": 1.7396237516787994e-05,
      "loss": 2.5646,
      "step": 18431
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.3685113191604614,
      "learning_rate": 1.7395960403204154e-05,
      "loss": 2.3692,
      "step": 18432
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0629066228866577,
      "learning_rate": 1.7395683277082146e-05,
      "loss": 2.4785,
      "step": 18433
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0152069330215454,
      "learning_rate": 1.7395406138422456e-05,
      "loss": 2.5117,
      "step": 18434
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9573256969451904,
      "learning_rate": 1.739512898722554e-05,
      "loss": 2.5656,
      "step": 18435
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0868769884109497,
      "learning_rate": 1.7394851823491866e-05,
      "loss": 2.4288,
      "step": 18436
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1502810716629028,
      "learning_rate": 1.7394574647221917e-05,
      "loss": 2.5192,
      "step": 18437
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.084532618522644,
      "learning_rate": 1.739429745841615e-05,
      "loss": 2.5494,
      "step": 18438
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9536959528923035,
      "learning_rate": 1.739402025707505e-05,
      "loss": 2.5258,
      "step": 18439
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1167027950286865,
      "learning_rate": 1.739374304319907e-05,
      "loss": 2.4853,
      "step": 18440
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0156289339065552,
      "learning_rate": 1.7393465816788687e-05,
      "loss": 2.4459,
      "step": 18441
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9950023889541626,
      "learning_rate": 1.7393188577844374e-05,
      "loss": 2.4327,
      "step": 18442
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0846413373947144,
      "learning_rate": 1.7392911326366596e-05,
      "loss": 2.4898,
      "step": 18443
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.165528655052185,
      "learning_rate": 1.7392634062355828e-05,
      "loss": 2.4607,
      "step": 18444
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0299118757247925,
      "learning_rate": 1.7392356785812536e-05,
      "loss": 2.4365,
      "step": 18445
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1037765741348267,
      "learning_rate": 1.739207949673719e-05,
      "loss": 2.337,
      "step": 18446
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0130349397659302,
      "learning_rate": 1.7391802195130266e-05,
      "loss": 2.3607,
      "step": 18447
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9249504208564758,
      "learning_rate": 1.7391524880992226e-05,
      "loss": 2.1303,
      "step": 18448
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0384520292282104,
      "learning_rate": 1.739124755432355e-05,
      "loss": 2.5845,
      "step": 18449
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0710148811340332,
      "learning_rate": 1.7390970215124693e-05,
      "loss": 2.7569,
      "step": 18450
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9991264939308167,
      "learning_rate": 1.739069286339614e-05,
      "loss": 2.6447,
      "step": 18451
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0610917806625366,
      "learning_rate": 1.7390415499138352e-05,
      "loss": 2.6052,
      "step": 18452
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1403477191925049,
      "learning_rate": 1.7390138122351804e-05,
      "loss": 2.2621,
      "step": 18453
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.069185495376587,
      "learning_rate": 1.7389860733036968e-05,
      "loss": 2.6017,
      "step": 18454
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0158685445785522,
      "learning_rate": 1.7389583331194308e-05,
      "loss": 2.4387,
      "step": 18455
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0918020009994507,
      "learning_rate": 1.7389305916824295e-05,
      "loss": 2.518,
      "step": 18456
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0886250734329224,
      "learning_rate": 1.7389028489927403e-05,
      "loss": 2.2566,
      "step": 18457
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9772047400474548,
      "learning_rate": 1.7388751050504104e-05,
      "loss": 2.229,
      "step": 18458
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.3675810098648071,
      "learning_rate": 1.7388473598554864e-05,
      "loss": 2.7174,
      "step": 18459
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0120980739593506,
      "learning_rate": 1.7388196134080152e-05,
      "loss": 2.4236,
      "step": 18460
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1108317375183105,
      "learning_rate": 1.738791865708044e-05,
      "loss": 2.6269,
      "step": 18461
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.178192377090454,
      "learning_rate": 1.7387641167556203e-05,
      "loss": 2.6899,
      "step": 18462
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0249345302581787,
      "learning_rate": 1.7387363665507906e-05,
      "loss": 2.6006,
      "step": 18463
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9588302373886108,
      "learning_rate": 1.7387086150936023e-05,
      "loss": 2.5297,
      "step": 18464
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0409448146820068,
      "learning_rate": 1.7386808623841022e-05,
      "loss": 2.588,
      "step": 18465
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0918830633163452,
      "learning_rate": 1.738653108422337e-05,
      "loss": 2.4066,
      "step": 18466
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1326802968978882,
      "learning_rate": 1.7386253532083547e-05,
      "loss": 2.3905,
      "step": 18467
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9345942735671997,
      "learning_rate": 1.7385975967422015e-05,
      "loss": 2.5167,
      "step": 18468
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.035485029220581,
      "learning_rate": 1.738569839023925e-05,
      "loss": 2.5666,
      "step": 18469
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1022861003875732,
      "learning_rate": 1.738542080053572e-05,
      "loss": 2.6285,
      "step": 18470
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0738505125045776,
      "learning_rate": 1.7385143198311895e-05,
      "loss": 2.5712,
      "step": 18471
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0881998538970947,
      "learning_rate": 1.7384865583568246e-05,
      "loss": 2.4887,
      "step": 18472
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0760024785995483,
      "learning_rate": 1.7384587956305246e-05,
      "loss": 2.4942,
      "step": 18473
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1522778272628784,
      "learning_rate": 1.7384310316523364e-05,
      "loss": 2.4464,
      "step": 18474
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.272805094718933,
      "learning_rate": 1.7384032664223067e-05,
      "loss": 2.4825,
      "step": 18475
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0461819171905518,
      "learning_rate": 1.738375499940483e-05,
      "loss": 2.6243,
      "step": 18476
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9754101037979126,
      "learning_rate": 1.7383477322069127e-05,
      "loss": 2.3042,
      "step": 18477
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9431880116462708,
      "learning_rate": 1.7383199632216417e-05,
      "loss": 2.4022,
      "step": 18478
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0848485231399536,
      "learning_rate": 1.7382921929847186e-05,
      "loss": 2.7278,
      "step": 18479
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0333893299102783,
      "learning_rate": 1.7382644214961894e-05,
      "loss": 2.3948,
      "step": 18480
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1579389572143555,
      "learning_rate": 1.7382366487561016e-05,
      "loss": 2.5655,
      "step": 18481
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9671226739883423,
      "learning_rate": 1.738208874764502e-05,
      "loss": 2.3663,
      "step": 18482
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1582591533660889,
      "learning_rate": 1.738181099521438e-05,
      "loss": 2.4127,
      "step": 18483
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0805466175079346,
      "learning_rate": 1.7381533230269566e-05,
      "loss": 2.4606,
      "step": 18484
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0131773948669434,
      "learning_rate": 1.7381255452811046e-05,
      "loss": 2.2822,
      "step": 18485
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0028516054153442,
      "learning_rate": 1.7380977662839296e-05,
      "loss": 2.5673,
      "step": 18486
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0187464952468872,
      "learning_rate": 1.738069986035478e-05,
      "loss": 2.4924,
      "step": 18487
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9614427089691162,
      "learning_rate": 1.7380422045357978e-05,
      "loss": 2.2368,
      "step": 18488
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.2653971910476685,
      "learning_rate": 1.7380144217849356e-05,
      "loss": 2.3457,
      "step": 18489
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0463663339614868,
      "learning_rate": 1.7379866377829383e-05,
      "loss": 2.3257,
      "step": 18490
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.8431756496429443,
      "learning_rate": 1.7379588525298534e-05,
      "loss": 2.5387,
      "step": 18491
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0794039964675903,
      "learning_rate": 1.7379310660257275e-05,
      "loss": 2.374,
      "step": 18492
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1318018436431885,
      "learning_rate": 1.7379032782706082e-05,
      "loss": 2.3306,
      "step": 18493
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9782496690750122,
      "learning_rate": 1.7378754892645425e-05,
      "loss": 2.4602,
      "step": 18494
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.2321608066558838,
      "learning_rate": 1.7378476990075772e-05,
      "loss": 2.4104,
      "step": 18495
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0694022178649902,
      "learning_rate": 1.7378199074997598e-05,
      "loss": 2.4428,
      "step": 18496
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9819068312644958,
      "learning_rate": 1.7377921147411372e-05,
      "loss": 2.4279,
      "step": 18497
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9811705350875854,
      "learning_rate": 1.7377643207317567e-05,
      "loss": 2.8283,
      "step": 18498
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1350123882293701,
      "learning_rate": 1.7377365254716653e-05,
      "loss": 2.5122,
      "step": 18499
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.3627349138259888,
      "learning_rate": 1.7377087289609104e-05,
      "loss": 2.3373,
      "step": 18500
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1133893728256226,
      "learning_rate": 1.7376809311995385e-05,
      "loss": 2.5125,
      "step": 18501
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9769538640975952,
      "learning_rate": 1.737653132187597e-05,
      "loss": 2.3072,
      "step": 18502
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0283467769622803,
      "learning_rate": 1.7376253319251332e-05,
      "loss": 2.4067,
      "step": 18503
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.991385281085968,
      "learning_rate": 1.737597530412194e-05,
      "loss": 2.5296,
      "step": 18504
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0127291679382324,
      "learning_rate": 1.7375697276488266e-05,
      "loss": 2.3435,
      "step": 18505
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0320758819580078,
      "learning_rate": 1.7375419236350786e-05,
      "loss": 2.7318,
      "step": 18506
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0515819787979126,
      "learning_rate": 1.737514118370997e-05,
      "loss": 2.4345,
      "step": 18507
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0338127613067627,
      "learning_rate": 1.737486311856628e-05,
      "loss": 2.201,
      "step": 18508
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9981393814086914,
      "learning_rate": 1.7374585040920192e-05,
      "loss": 2.5221,
      "step": 18509
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.107062816619873,
      "learning_rate": 1.7374306950772183e-05,
      "loss": 2.4038,
      "step": 18510
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0954855680465698,
      "learning_rate": 1.7374028848122723e-05,
      "loss": 2.4794,
      "step": 18511
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9366088509559631,
      "learning_rate": 1.737375073297228e-05,
      "loss": 2.5279,
      "step": 18512
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9889116883277893,
      "learning_rate": 1.7373472605321327e-05,
      "loss": 2.3712,
      "step": 18513
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9879065155982971,
      "learning_rate": 1.7373194465170334e-05,
      "loss": 2.0426,
      "step": 18514
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.239251732826233,
      "learning_rate": 1.7372916312519773e-05,
      "loss": 2.293,
      "step": 18515
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0542548894882202,
      "learning_rate": 1.7372638147370117e-05,
      "loss": 2.4588,
      "step": 18516
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.017411470413208,
      "learning_rate": 1.737235996972184e-05,
      "loss": 2.5184,
      "step": 18517
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1585320234298706,
      "learning_rate": 1.7372081779575407e-05,
      "loss": 2.4102,
      "step": 18518
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0367170572280884,
      "learning_rate": 1.7371803576931295e-05,
      "loss": 2.2616,
      "step": 18519
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9602668285369873,
      "learning_rate": 1.7371525361789974e-05,
      "loss": 2.5858,
      "step": 18520
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1471319198608398,
      "learning_rate": 1.7371247134151915e-05,
      "loss": 2.5997,
      "step": 18521
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0039845705032349,
      "learning_rate": 1.7370968894017586e-05,
      "loss": 2.4382,
      "step": 18522
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9780462384223938,
      "learning_rate": 1.737069064138747e-05,
      "loss": 2.35,
      "step": 18523
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9864062666893005,
      "learning_rate": 1.737041237626203e-05,
      "loss": 2.2236,
      "step": 18524
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0840411186218262,
      "learning_rate": 1.7370134098641735e-05,
      "loss": 2.5177,
      "step": 18525
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.029544711112976,
      "learning_rate": 1.7369855808527065e-05,
      "loss": 2.2187,
      "step": 18526
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0811569690704346,
      "learning_rate": 1.7369577505918486e-05,
      "loss": 2.4248,
      "step": 18527
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0211713314056396,
      "learning_rate": 1.7369299190816473e-05,
      "loss": 2.7504,
      "step": 18528
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0084972381591797,
      "learning_rate": 1.7369020863221492e-05,
      "loss": 2.4878,
      "step": 18529
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1450544595718384,
      "learning_rate": 1.7368742523134023e-05,
      "loss": 2.427,
      "step": 18530
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.04267156124115,
      "learning_rate": 1.7368464170554532e-05,
      "loss": 2.5055,
      "step": 18531
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0074949264526367,
      "learning_rate": 1.7368185805483497e-05,
      "loss": 2.527,
      "step": 18532
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1110368967056274,
      "learning_rate": 1.7367907427921383e-05,
      "loss": 2.4202,
      "step": 18533
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9449881911277771,
      "learning_rate": 1.7367629037868667e-05,
      "loss": 2.2487,
      "step": 18534
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0454820394515991,
      "learning_rate": 1.7367350635325816e-05,
      "loss": 2.315,
      "step": 18535
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0723203420639038,
      "learning_rate": 1.7367072220293305e-05,
      "loss": 2.599,
      "step": 18536
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0874159336090088,
      "learning_rate": 1.7366793792771605e-05,
      "loss": 2.3824,
      "step": 18537
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.028960943222046,
      "learning_rate": 1.736651535276119e-05,
      "loss": 2.6284,
      "step": 18538
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0460805892944336,
      "learning_rate": 1.7366236900262532e-05,
      "loss": 2.348,
      "step": 18539
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.61411714553833,
      "learning_rate": 1.73659584352761e-05,
      "loss": 2.5767,
      "step": 18540
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.053758978843689,
      "learning_rate": 1.736567995780237e-05,
      "loss": 2.4626,
      "step": 18541
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0248489379882812,
      "learning_rate": 1.736540146784181e-05,
      "loss": 2.8298,
      "step": 18542
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0152889490127563,
      "learning_rate": 1.7365122965394893e-05,
      "loss": 2.4305,
      "step": 18543
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9602042436599731,
      "learning_rate": 1.7364844450462097e-05,
      "loss": 2.4809,
      "step": 18544
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9780274629592896,
      "learning_rate": 1.7364565923043882e-05,
      "loss": 2.4965,
      "step": 18545
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9704350829124451,
      "learning_rate": 1.7364287383140734e-05,
      "loss": 2.2784,
      "step": 18546
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0721838474273682,
      "learning_rate": 1.7364008830753117e-05,
      "loss": 2.4166,
      "step": 18547
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0596590042114258,
      "learning_rate": 1.7363730265881504e-05,
      "loss": 2.2947,
      "step": 18548
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9598661065101624,
      "learning_rate": 1.736345168852637e-05,
      "loss": 2.4686,
      "step": 18549
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9615408182144165,
      "learning_rate": 1.7363173098688184e-05,
      "loss": 2.5225,
      "step": 18550
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9968069791793823,
      "learning_rate": 1.736289449636742e-05,
      "loss": 2.4004,
      "step": 18551
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0614640712738037,
      "learning_rate": 1.736261588156455e-05,
      "loss": 2.1329,
      "step": 18552
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.079903244972229,
      "learning_rate": 1.736233725428005e-05,
      "loss": 2.5752,
      "step": 18553
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9961888790130615,
      "learning_rate": 1.7362058614514383e-05,
      "loss": 2.4465,
      "step": 18554
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0162992477416992,
      "learning_rate": 1.736177996226803e-05,
      "loss": 2.5113,
      "step": 18555
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1420902013778687,
      "learning_rate": 1.736150129754146e-05,
      "loss": 2.5557,
      "step": 18556
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.976032018661499,
      "learning_rate": 1.7361222620335146e-05,
      "loss": 2.4119,
      "step": 18557
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9622450470924377,
      "learning_rate": 1.736094393064956e-05,
      "loss": 2.4576,
      "step": 18558
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.045630931854248,
      "learning_rate": 1.7360665228485175e-05,
      "loss": 2.437,
      "step": 18559
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1237225532531738,
      "learning_rate": 1.7360386513842464e-05,
      "loss": 2.5921,
      "step": 18560
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1404277086257935,
      "learning_rate": 1.7360107786721897e-05,
      "loss": 2.527,
      "step": 18561
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0293962955474854,
      "learning_rate": 1.7359829047123948e-05,
      "loss": 2.2853,
      "step": 18562
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0141115188598633,
      "learning_rate": 1.7359550295049096e-05,
      "loss": 2.191,
      "step": 18563
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.2074451446533203,
      "learning_rate": 1.73592715304978e-05,
      "loss": 2.4908,
      "step": 18564
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9729033708572388,
      "learning_rate": 1.7358992753470544e-05,
      "loss": 2.4404,
      "step": 18565
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0493149757385254,
      "learning_rate": 1.7358713963967796e-05,
      "loss": 2.2861,
      "step": 18566
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9638898372650146,
      "learning_rate": 1.735843516199003e-05,
      "loss": 2.2243,
      "step": 18567
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1004915237426758,
      "learning_rate": 1.7358156347537715e-05,
      "loss": 2.475,
      "step": 18568
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9706053733825684,
      "learning_rate": 1.7357877520611327e-05,
      "loss": 2.5016,
      "step": 18569
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9803573489189148,
      "learning_rate": 1.735759868121134e-05,
      "loss": 2.4403,
      "step": 18570
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0819309949874878,
      "learning_rate": 1.7357319829338225e-05,
      "loss": 2.5162,
      "step": 18571
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.007212519645691,
      "learning_rate": 1.7357040964992452e-05,
      "loss": 2.5284,
      "step": 18572
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.2508805990219116,
      "learning_rate": 1.73567620881745e-05,
      "loss": 2.4109,
      "step": 18573
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9885020852088928,
      "learning_rate": 1.7356483198884836e-05,
      "loss": 2.2772,
      "step": 18574
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.2666666507720947,
      "learning_rate": 1.7356204297123934e-05,
      "loss": 2.451,
      "step": 18575
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.168365716934204,
      "learning_rate": 1.735592538289227e-05,
      "loss": 2.2283,
      "step": 18576
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0878878831863403,
      "learning_rate": 1.7355646456190315e-05,
      "loss": 2.6401,
      "step": 18577
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.087361454963684,
      "learning_rate": 1.735536751701854e-05,
      "loss": 2.457,
      "step": 18578
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0884819030761719,
      "learning_rate": 1.735508856537742e-05,
      "loss": 2.4948,
      "step": 18579
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.072594404220581,
      "learning_rate": 1.735480960126743e-05,
      "loss": 2.4902,
      "step": 18580
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0970146656036377,
      "learning_rate": 1.7354530624689038e-05,
      "loss": 2.5723,
      "step": 18581
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0884746313095093,
      "learning_rate": 1.735425163564272e-05,
      "loss": 2.4919,
      "step": 18582
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0503249168395996,
      "learning_rate": 1.7353972634128943e-05,
      "loss": 2.5541,
      "step": 18583
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0467491149902344,
      "learning_rate": 1.735369362014819e-05,
      "loss": 2.338,
      "step": 18584
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0393472909927368,
      "learning_rate": 1.7353414593700924e-05,
      "loss": 2.2987,
      "step": 18585
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0459054708480835,
      "learning_rate": 1.7353135554787628e-05,
      "loss": 2.4917,
      "step": 18586
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0481600761413574,
      "learning_rate": 1.735285650340877e-05,
      "loss": 2.278,
      "step": 18587
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0096421241760254,
      "learning_rate": 1.735257743956482e-05,
      "loss": 2.3883,
      "step": 18588
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0102365016937256,
      "learning_rate": 1.735229836325626e-05,
      "loss": 2.4787,
      "step": 18589
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0393216609954834,
      "learning_rate": 1.7352019274483552e-05,
      "loss": 2.5595,
      "step": 18590
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0577865839004517,
      "learning_rate": 1.7351740173247177e-05,
      "loss": 2.5511,
      "step": 18591
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.709688663482666,
      "learning_rate": 1.7351461059547604e-05,
      "loss": 2.4961,
      "step": 18592
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1036851406097412,
      "learning_rate": 1.735118193338531e-05,
      "loss": 2.5032,
      "step": 18593
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0720784664154053,
      "learning_rate": 1.7350902794760768e-05,
      "loss": 2.5117,
      "step": 18594
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0713493824005127,
      "learning_rate": 1.7350623643674444e-05,
      "loss": 2.542,
      "step": 18595
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0868951082229614,
      "learning_rate": 1.735034448012682e-05,
      "loss": 2.5614,
      "step": 18596
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9990379214286804,
      "learning_rate": 1.7350065304118363e-05,
      "loss": 2.4412,
      "step": 18597
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9574803113937378,
      "learning_rate": 1.734978611564955e-05,
      "loss": 2.3473,
      "step": 18598
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.2288715839385986,
      "learning_rate": 1.7349506914720855e-05,
      "loss": 2.5087,
      "step": 18599
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0496809482574463,
      "learning_rate": 1.734922770133275e-05,
      "loss": 2.5354,
      "step": 18600
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.245185375213623,
      "learning_rate": 1.7348948475485706e-05,
      "loss": 2.3588,
      "step": 18601
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1426142454147339,
      "learning_rate": 1.73486692371802e-05,
      "loss": 2.6329,
      "step": 18602
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9339738488197327,
      "learning_rate": 1.7348389986416703e-05,
      "loss": 2.5843,
      "step": 18603
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.219882607460022,
      "learning_rate": 1.734811072319569e-05,
      "loss": 2.6634,
      "step": 18604
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.096582293510437,
      "learning_rate": 1.7347831447517635e-05,
      "loss": 2.4711,
      "step": 18605
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9645026326179504,
      "learning_rate": 1.7347552159383007e-05,
      "loss": 2.6671,
      "step": 18606
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1694633960723877,
      "learning_rate": 1.734727285879228e-05,
      "loss": 2.5256,
      "step": 18607
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.073343276977539,
      "learning_rate": 1.7346993545745933e-05,
      "loss": 2.2934,
      "step": 18608
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9147890210151672,
      "learning_rate": 1.734671422024444e-05,
      "loss": 2.5783,
      "step": 18609
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0008753538131714,
      "learning_rate": 1.7346434882288265e-05,
      "loss": 2.3443,
      "step": 18610
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9939671754837036,
      "learning_rate": 1.7346155531877894e-05,
      "loss": 2.5871,
      "step": 18611
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0491034984588623,
      "learning_rate": 1.734587616901379e-05,
      "loss": 2.5704,
      "step": 18612
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9772005081176758,
      "learning_rate": 1.7345596793696432e-05,
      "loss": 2.4646,
      "step": 18613
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.003055453300476,
      "learning_rate": 1.734531740592629e-05,
      "loss": 2.6089,
      "step": 18614
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0563626289367676,
      "learning_rate": 1.7345038005703843e-05,
      "loss": 2.5213,
      "step": 18615
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9597843885421753,
      "learning_rate": 1.734475859302956e-05,
      "loss": 2.5152,
      "step": 18616
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0804240703582764,
      "learning_rate": 1.7344479167903915e-05,
      "loss": 2.4796,
      "step": 18617
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.143899917602539,
      "learning_rate": 1.7344199730327385e-05,
      "loss": 2.5733,
      "step": 18618
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0362434387207031,
      "learning_rate": 1.7343920280300444e-05,
      "loss": 2.7334,
      "step": 18619
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9958499073982239,
      "learning_rate": 1.7343640817823562e-05,
      "loss": 2.6616,
      "step": 18620
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.8866865038871765,
      "learning_rate": 1.7343361342897213e-05,
      "loss": 2.5496,
      "step": 18621
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9661852121353149,
      "learning_rate": 1.7343081855521872e-05,
      "loss": 2.3014,
      "step": 18622
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.004095196723938,
      "learning_rate": 1.7342802355698017e-05,
      "loss": 2.3397,
      "step": 18623
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9540286660194397,
      "learning_rate": 1.7342522843426114e-05,
      "loss": 2.6011,
      "step": 18624
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1117562055587769,
      "learning_rate": 1.734224331870664e-05,
      "loss": 2.6588,
      "step": 18625
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9547342658042908,
      "learning_rate": 1.734196378154007e-05,
      "loss": 2.5795,
      "step": 18626
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.2785186767578125,
      "learning_rate": 1.7341684231926877e-05,
      "loss": 2.545,
      "step": 18627
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9731422662734985,
      "learning_rate": 1.734140466986754e-05,
      "loss": 2.3131,
      "step": 18628
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0856722593307495,
      "learning_rate": 1.734112509536252e-05,
      "loss": 2.334,
      "step": 18629
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9514279961585999,
      "learning_rate": 1.7340845508412304e-05,
      "loss": 2.5954,
      "step": 18630
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0104984045028687,
      "learning_rate": 1.734056590901736e-05,
      "loss": 2.3889,
      "step": 18631
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0524705648422241,
      "learning_rate": 1.7340286297178164e-05,
      "loss": 2.5842,
      "step": 18632
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1636990308761597,
      "learning_rate": 1.7340006672895187e-05,
      "loss": 2.6239,
      "step": 18633
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9470835328102112,
      "learning_rate": 1.733972703616891e-05,
      "loss": 2.4715,
      "step": 18634
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9991010427474976,
      "learning_rate": 1.73394473869998e-05,
      "loss": 2.3726,
      "step": 18635
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.00057852268219,
      "learning_rate": 1.733916772538833e-05,
      "loss": 2.6291,
      "step": 18636
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0831397771835327,
      "learning_rate": 1.7338888051334982e-05,
      "loss": 2.2885,
      "step": 18637
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.055449366569519,
      "learning_rate": 1.7338608364840223e-05,
      "loss": 2.6858,
      "step": 18638
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0832769870758057,
      "learning_rate": 1.733832866590453e-05,
      "loss": 2.4124,
      "step": 18639
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.2448822259902954,
      "learning_rate": 1.7338048954528378e-05,
      "loss": 2.5046,
      "step": 18640
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0621334314346313,
      "learning_rate": 1.733776923071224e-05,
      "loss": 2.5252,
      "step": 18641
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0386865139007568,
      "learning_rate": 1.733748949445659e-05,
      "loss": 2.4837,
      "step": 18642
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9971072673797607,
      "learning_rate": 1.73372097457619e-05,
      "loss": 2.5407,
      "step": 18643
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0472385883331299,
      "learning_rate": 1.7336929984628648e-05,
      "loss": 2.3749,
      "step": 18644
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.055206060409546,
      "learning_rate": 1.7336650211057306e-05,
      "loss": 2.3191,
      "step": 18645
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9764320254325867,
      "learning_rate": 1.7336370425048353e-05,
      "loss": 2.5726,
      "step": 18646
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.2175793647766113,
      "learning_rate": 1.7336090626602256e-05,
      "loss": 2.3453,
      "step": 18647
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0564249753952026,
      "learning_rate": 1.7335810815719493e-05,
      "loss": 2.575,
      "step": 18648
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.00495183467865,
      "learning_rate": 1.733553099240054e-05,
      "loss": 2.5192,
      "step": 18649
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1622921228408813,
      "learning_rate": 1.733525115664587e-05,
      "loss": 2.4662,
      "step": 18650
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.037247657775879,
      "learning_rate": 1.733497130845596e-05,
      "loss": 2.3512,
      "step": 18651
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0412161350250244,
      "learning_rate": 1.7334691447831273e-05,
      "loss": 2.434,
      "step": 18652
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0107157230377197,
      "learning_rate": 1.7334411574772297e-05,
      "loss": 2.3966,
      "step": 18653
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0802102088928223,
      "learning_rate": 1.73341316892795e-05,
      "loss": 2.6238,
      "step": 18654
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0431305170059204,
      "learning_rate": 1.733385179135336e-05,
      "loss": 2.426,
      "step": 18655
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9930128455162048,
      "learning_rate": 1.7333571880994347e-05,
      "loss": 2.506,
      "step": 18656
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0027803182601929,
      "learning_rate": 1.7333291958202943e-05,
      "loss": 2.353,
      "step": 18657
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0439163446426392,
      "learning_rate": 1.7333012022979612e-05,
      "loss": 2.2187,
      "step": 18658
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9874308705329895,
      "learning_rate": 1.7332732075324833e-05,
      "loss": 2.4907,
      "step": 18659
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.049653172492981,
      "learning_rate": 1.7332452115239086e-05,
      "loss": 2.5058,
      "step": 18660
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0819332599639893,
      "learning_rate": 1.733217214272284e-05,
      "loss": 2.3504,
      "step": 18661
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1677147150039673,
      "learning_rate": 1.733189215777657e-05,
      "loss": 2.4418,
      "step": 18662
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1130471229553223,
      "learning_rate": 1.7331612160400754e-05,
      "loss": 2.3687,
      "step": 18663
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0497969388961792,
      "learning_rate": 1.733133215059586e-05,
      "loss": 2.4692,
      "step": 18664
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0723342895507812,
      "learning_rate": 1.7331052128362367e-05,
      "loss": 2.3774,
      "step": 18665
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0159050226211548,
      "learning_rate": 1.7330772093700753e-05,
      "loss": 2.3909,
      "step": 18666
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.040262222290039,
      "learning_rate": 1.733049204661149e-05,
      "loss": 2.4761,
      "step": 18667
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0500741004943848,
      "learning_rate": 1.7330211987095048e-05,
      "loss": 2.3233,
      "step": 18668
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.941419243812561,
      "learning_rate": 1.7329931915151906e-05,
      "loss": 2.5202,
      "step": 18669
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0864320993423462,
      "learning_rate": 1.7329651830782543e-05,
      "loss": 2.7862,
      "step": 18670
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1252386569976807,
      "learning_rate": 1.7329371733987423e-05,
      "loss": 2.4272,
      "step": 18671
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9831497669219971,
      "learning_rate": 1.732909162476703e-05,
      "loss": 2.5417,
      "step": 18672
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1871058940887451,
      "learning_rate": 1.732881150312184e-05,
      "loss": 2.4986,
      "step": 18673
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1157481670379639,
      "learning_rate": 1.732853136905232e-05,
      "loss": 2.3006,
      "step": 18674
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1598197221755981,
      "learning_rate": 1.7328251222558948e-05,
      "loss": 2.3621,
      "step": 18675
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0179669857025146,
      "learning_rate": 1.7327971063642205e-05,
      "loss": 2.4243,
      "step": 18676
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0240488052368164,
      "learning_rate": 1.7327690892302555e-05,
      "loss": 2.6869,
      "step": 18677
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.092612624168396,
      "learning_rate": 1.7327410708540482e-05,
      "loss": 2.6983,
      "step": 18678
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1972967386245728,
      "learning_rate": 1.7327130512356455e-05,
      "loss": 2.3488,
      "step": 18679
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.2231169939041138,
      "learning_rate": 1.7326850303750956e-05,
      "loss": 2.6597,
      "step": 18680
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9796091914176941,
      "learning_rate": 1.7326570082724452e-05,
      "loss": 2.5531,
      "step": 18681
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0003762245178223,
      "learning_rate": 1.7326289849277426e-05,
      "loss": 2.3983,
      "step": 18682
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0317517518997192,
      "learning_rate": 1.7326009603410345e-05,
      "loss": 2.3257,
      "step": 18683
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0385204553604126,
      "learning_rate": 1.732572934512369e-05,
      "loss": 2.4,
      "step": 18684
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0835410356521606,
      "learning_rate": 1.7325449074417932e-05,
      "loss": 2.6778,
      "step": 18685
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.4448026418685913,
      "learning_rate": 1.732516879129355e-05,
      "loss": 2.3736,
      "step": 18686
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1799919605255127,
      "learning_rate": 1.7324888495751018e-05,
      "loss": 2.4726,
      "step": 18687
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0240877866744995,
      "learning_rate": 1.732460818779081e-05,
      "loss": 2.3344,
      "step": 18688
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.048005223274231,
      "learning_rate": 1.73243278674134e-05,
      "loss": 2.4833,
      "step": 18689
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1222597360610962,
      "learning_rate": 1.7324047534619265e-05,
      "loss": 2.5826,
      "step": 18690
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0517024993896484,
      "learning_rate": 1.7323767189408884e-05,
      "loss": 2.4144,
      "step": 18691
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1306761503219604,
      "learning_rate": 1.732348683178273e-05,
      "loss": 2.4383,
      "step": 18692
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.058595061302185,
      "learning_rate": 1.7323206461741268e-05,
      "loss": 2.4787,
      "step": 18693
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0343457460403442,
      "learning_rate": 1.732292607928499e-05,
      "loss": 2.3473,
      "step": 18694
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0679442882537842,
      "learning_rate": 1.732264568441436e-05,
      "loss": 2.4997,
      "step": 18695
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1604857444763184,
      "learning_rate": 1.7322365277129857e-05,
      "loss": 2.5915,
      "step": 18696
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.2908968925476074,
      "learning_rate": 1.7322084857431955e-05,
      "loss": 2.3628,
      "step": 18697
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.079911708831787,
      "learning_rate": 1.7321804425321133e-05,
      "loss": 2.3924,
      "step": 18698
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.068267822265625,
      "learning_rate": 1.7321523980797864e-05,
      "loss": 2.6585,
      "step": 18699
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.2166051864624023,
      "learning_rate": 1.732124352386262e-05,
      "loss": 2.4797,
      "step": 18700
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.2352707386016846,
      "learning_rate": 1.7320963054515885e-05,
      "loss": 2.2909,
      "step": 18701
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9831930994987488,
      "learning_rate": 1.7320682572758125e-05,
      "loss": 2.4598,
      "step": 18702
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0622618198394775,
      "learning_rate": 1.732040207858982e-05,
      "loss": 2.4733,
      "step": 18703
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0326313972473145,
      "learning_rate": 1.7320121572011447e-05,
      "loss": 2.4808,
      "step": 18704
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0458790063858032,
      "learning_rate": 1.7319841053023482e-05,
      "loss": 2.4308,
      "step": 18705
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.041669249534607,
      "learning_rate": 1.7319560521626392e-05,
      "loss": 2.5376,
      "step": 18706
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.068967342376709,
      "learning_rate": 1.7319279977820663e-05,
      "loss": 2.4075,
      "step": 18707
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9578573107719421,
      "learning_rate": 1.7318999421606765e-05,
      "loss": 2.3095,
      "step": 18708
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0024687051773071,
      "learning_rate": 1.7318718852985176e-05,
      "loss": 2.5749,
      "step": 18709
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0868725776672363,
      "learning_rate": 1.731843827195637e-05,
      "loss": 2.3163,
      "step": 18710
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9219399690628052,
      "learning_rate": 1.7318157678520827e-05,
      "loss": 2.3362,
      "step": 18711
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9556685090065002,
      "learning_rate": 1.7317877072679018e-05,
      "loss": 2.3946,
      "step": 18712
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0235648155212402,
      "learning_rate": 1.7317596454431415e-05,
      "loss": 2.3082,
      "step": 18713
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1349564790725708,
      "learning_rate": 1.7317315823778503e-05,
      "loss": 2.4095,
      "step": 18714
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.070556402206421,
      "learning_rate": 1.7317035180720754e-05,
      "loss": 2.4948,
      "step": 18715
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0381659269332886,
      "learning_rate": 1.731675452525864e-05,
      "loss": 2.3364,
      "step": 18716
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9791591167449951,
      "learning_rate": 1.7316473857392643e-05,
      "loss": 2.5343,
      "step": 18717
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0331472158432007,
      "learning_rate": 1.7316193177123235e-05,
      "loss": 2.7431,
      "step": 18718
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9853424429893494,
      "learning_rate": 1.731591248445089e-05,
      "loss": 2.4409,
      "step": 18719
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.998532235622406,
      "learning_rate": 1.7315631779376088e-05,
      "loss": 2.4848,
      "step": 18720
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9550837874412537,
      "learning_rate": 1.7315351061899304e-05,
      "loss": 2.4977,
      "step": 18721
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0281767845153809,
      "learning_rate": 1.7315070332021012e-05,
      "loss": 2.3995,
      "step": 18722
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0020036697387695,
      "learning_rate": 1.7314789589741688e-05,
      "loss": 2.333,
      "step": 18723
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.981257975101471,
      "learning_rate": 1.731450883506181e-05,
      "loss": 2.366,
      "step": 18724
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0237528085708618,
      "learning_rate": 1.7314228067981856e-05,
      "loss": 2.3342,
      "step": 18725
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0729796886444092,
      "learning_rate": 1.7313947288502294e-05,
      "loss": 2.5644,
      "step": 18726
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1216552257537842,
      "learning_rate": 1.731366649662361e-05,
      "loss": 2.4733,
      "step": 18727
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0036617517471313,
      "learning_rate": 1.731338569234627e-05,
      "loss": 2.5215,
      "step": 18728
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9778857827186584,
      "learning_rate": 1.7313104875670758e-05,
      "loss": 2.3762,
      "step": 18729
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.960684597492218,
      "learning_rate": 1.7312824046597546e-05,
      "loss": 2.5048,
      "step": 18730
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0224552154541016,
      "learning_rate": 1.731254320512711e-05,
      "loss": 2.5271,
      "step": 18731
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0261969566345215,
      "learning_rate": 1.7312262351259928e-05,
      "loss": 2.4636,
      "step": 18732
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.114219069480896,
      "learning_rate": 1.7311981484996474e-05,
      "loss": 2.4836,
      "step": 18733
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9742188453674316,
      "learning_rate": 1.7311700606337228e-05,
      "loss": 2.5895,
      "step": 18734
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.13925302028656,
      "learning_rate": 1.731141971528266e-05,
      "loss": 2.2281,
      "step": 18735
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9813451766967773,
      "learning_rate": 1.7311138811833255e-05,
      "loss": 2.3375,
      "step": 18736
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.039566993713379,
      "learning_rate": 1.7310857895989483e-05,
      "loss": 2.2694,
      "step": 18737
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0477824211120605,
      "learning_rate": 1.731057696775182e-05,
      "loss": 2.385,
      "step": 18738
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1019364595413208,
      "learning_rate": 1.731029602712074e-05,
      "loss": 2.5187,
      "step": 18739
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9764646887779236,
      "learning_rate": 1.7310015074096727e-05,
      "loss": 2.3497,
      "step": 18740
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9720853567123413,
      "learning_rate": 1.7309734108680254e-05,
      "loss": 2.4621,
      "step": 18741
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9860519766807556,
      "learning_rate": 1.7309453130871793e-05,
      "loss": 2.3437,
      "step": 18742
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0131219625473022,
      "learning_rate": 1.7309172140671827e-05,
      "loss": 2.5236,
      "step": 18743
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.967574954032898,
      "learning_rate": 1.730889113808083e-05,
      "loss": 2.4226,
      "step": 18744
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9563689231872559,
      "learning_rate": 1.730861012309927e-05,
      "loss": 2.4766,
      "step": 18745
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9685707688331604,
      "learning_rate": 1.730832909572764e-05,
      "loss": 2.6613,
      "step": 18746
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9690569639205933,
      "learning_rate": 1.7308048055966403e-05,
      "loss": 2.3834,
      "step": 18747
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0881199836730957,
      "learning_rate": 1.7307767003816036e-05,
      "loss": 2.5637,
      "step": 18748
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.2890156507492065,
      "learning_rate": 1.7307485939277023e-05,
      "loss": 2.5476,
      "step": 18749
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9922603964805603,
      "learning_rate": 1.7307204862349837e-05,
      "loss": 2.4832,
      "step": 18750
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0963152647018433,
      "learning_rate": 1.7306923773034955e-05,
      "loss": 2.7338,
      "step": 18751
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9285978078842163,
      "learning_rate": 1.7306642671332848e-05,
      "loss": 2.558,
      "step": 18752
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0166025161743164,
      "learning_rate": 1.7306361557244003e-05,
      "loss": 2.4121,
      "step": 18753
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0385587215423584,
      "learning_rate": 1.730608043076889e-05,
      "loss": 2.502,
      "step": 18754
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1742733716964722,
      "learning_rate": 1.7305799291907984e-05,
      "loss": 2.4688,
      "step": 18755
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0528966188430786,
      "learning_rate": 1.7305518140661762e-05,
      "loss": 2.4456,
      "step": 18756
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1137584447860718,
      "learning_rate": 1.730523697703071e-05,
      "loss": 2.6597,
      "step": 18757
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0922927856445312,
      "learning_rate": 1.7304955801015292e-05,
      "loss": 2.5385,
      "step": 18758
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0041359663009644,
      "learning_rate": 1.730467461261599e-05,
      "loss": 2.4823,
      "step": 18759
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.040832281112671,
      "learning_rate": 1.730439341183328e-05,
      "loss": 2.1957,
      "step": 18760
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.068199634552002,
      "learning_rate": 1.730411219866764e-05,
      "loss": 2.4393,
      "step": 18761
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0803375244140625,
      "learning_rate": 1.7303830973119545e-05,
      "loss": 2.4035,
      "step": 18762
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0697563886642456,
      "learning_rate": 1.7303549735189475e-05,
      "loss": 2.4729,
      "step": 18763
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0475106239318848,
      "learning_rate": 1.7303268484877902e-05,
      "loss": 2.6224,
      "step": 18764
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0256258249282837,
      "learning_rate": 1.730298722218531e-05,
      "loss": 2.5343,
      "step": 18765
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.8639145493507385,
      "learning_rate": 1.7302705947112167e-05,
      "loss": 2.3323,
      "step": 18766
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0984036922454834,
      "learning_rate": 1.7302424659658954e-05,
      "loss": 2.4189,
      "step": 18767
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0935004949569702,
      "learning_rate": 1.730214335982615e-05,
      "loss": 2.5049,
      "step": 18768
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.003949522972107,
      "learning_rate": 1.730186204761423e-05,
      "loss": 2.3475,
      "step": 18769
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.096461534500122,
      "learning_rate": 1.730158072302367e-05,
      "loss": 2.2985,
      "step": 18770
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0490487813949585,
      "learning_rate": 1.7301299386054944e-05,
      "loss": 2.6293,
      "step": 18771
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9830255508422852,
      "learning_rate": 1.730101803670854e-05,
      "loss": 2.3789,
      "step": 18772
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0012636184692383,
      "learning_rate": 1.730073667498492e-05,
      "loss": 2.4749,
      "step": 18773
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.065785527229309,
      "learning_rate": 1.7300455300884573e-05,
      "loss": 2.6779,
      "step": 18774
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.044405460357666,
      "learning_rate": 1.730017391440797e-05,
      "loss": 2.491,
      "step": 18775
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.076399326324463,
      "learning_rate": 1.729989251555559e-05,
      "loss": 2.3497,
      "step": 18776
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9342994689941406,
      "learning_rate": 1.729961110432791e-05,
      "loss": 2.4007,
      "step": 18777
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0564221143722534,
      "learning_rate": 1.729932968072541e-05,
      "loss": 2.4766,
      "step": 18778
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0379570722579956,
      "learning_rate": 1.7299048244748557e-05,
      "loss": 2.3467,
      "step": 18779
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1307872533798218,
      "learning_rate": 1.7298766796397838e-05,
      "loss": 2.1356,
      "step": 18780
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9797676205635071,
      "learning_rate": 1.7298485335673725e-05,
      "loss": 2.6516,
      "step": 18781
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.079411506652832,
      "learning_rate": 1.7298203862576698e-05,
      "loss": 2.5436,
      "step": 18782
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0105868577957153,
      "learning_rate": 1.7297922377107236e-05,
      "loss": 2.3037,
      "step": 18783
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9660491943359375,
      "learning_rate": 1.729764087926581e-05,
      "loss": 2.2996,
      "step": 18784
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0252575874328613,
      "learning_rate": 1.72973593690529e-05,
      "loss": 2.572,
      "step": 18785
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1100304126739502,
      "learning_rate": 1.7297077846468987e-05,
      "loss": 2.5243,
      "step": 18786
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.187946081161499,
      "learning_rate": 1.7296796311514546e-05,
      "loss": 2.4372,
      "step": 18787
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0520401000976562,
      "learning_rate": 1.729651476419005e-05,
      "loss": 2.4904,
      "step": 18788
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.058287501335144,
      "learning_rate": 1.7296233204495982e-05,
      "loss": 2.5551,
      "step": 18789
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0928184986114502,
      "learning_rate": 1.7295951632432818e-05,
      "loss": 2.5621,
      "step": 18790
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0043537616729736,
      "learning_rate": 1.729567004800103e-05,
      "loss": 2.4627,
      "step": 18791
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1254522800445557,
      "learning_rate": 1.7295388451201106e-05,
      "loss": 2.6481,
      "step": 18792
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0653986930847168,
      "learning_rate": 1.7295106842033514e-05,
      "loss": 2.2258,
      "step": 18793
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1079132556915283,
      "learning_rate": 1.7294825220498733e-05,
      "loss": 2.6099,
      "step": 18794
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1634166240692139,
      "learning_rate": 1.7294543586597244e-05,
      "loss": 2.4738,
      "step": 18795
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.097368597984314,
      "learning_rate": 1.729426194032952e-05,
      "loss": 2.4601,
      "step": 18796
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.027320146560669,
      "learning_rate": 1.7293980281696042e-05,
      "loss": 2.5245,
      "step": 18797
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0271885395050049,
      "learning_rate": 1.7293698610697292e-05,
      "loss": 2.4684,
      "step": 18798
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.655765414237976,
      "learning_rate": 1.7293416927333737e-05,
      "loss": 2.592,
      "step": 18799
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0811892747879028,
      "learning_rate": 1.729313523160586e-05,
      "loss": 2.5276,
      "step": 18800
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0510621070861816,
      "learning_rate": 1.7292853523514134e-05,
      "loss": 2.3506,
      "step": 18801
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0548371076583862,
      "learning_rate": 1.7292571803059047e-05,
      "loss": 2.3076,
      "step": 18802
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1624886989593506,
      "learning_rate": 1.7292290070241067e-05,
      "loss": 2.6155,
      "step": 18803
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1366240978240967,
      "learning_rate": 1.7292008325060676e-05,
      "loss": 2.4611,
      "step": 18804
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9910300374031067,
      "learning_rate": 1.7291726567518346e-05,
      "loss": 2.4668,
      "step": 18805
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0192314386367798,
      "learning_rate": 1.7291444797614564e-05,
      "loss": 2.203,
      "step": 18806
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0016076564788818,
      "learning_rate": 1.7291163015349803e-05,
      "loss": 2.3774,
      "step": 18807
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9982733130455017,
      "learning_rate": 1.7290881220724537e-05,
      "loss": 2.4273,
      "step": 18808
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0671665668487549,
      "learning_rate": 1.729059941373925e-05,
      "loss": 2.2962,
      "step": 18809
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0710891485214233,
      "learning_rate": 1.7290317594394416e-05,
      "loss": 2.3469,
      "step": 18810
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9427964091300964,
      "learning_rate": 1.7290035762690514e-05,
      "loss": 2.5587,
      "step": 18811
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1652435064315796,
      "learning_rate": 1.728975391862802e-05,
      "loss": 2.4419,
      "step": 18812
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9843260645866394,
      "learning_rate": 1.7289472062207415e-05,
      "loss": 2.4011,
      "step": 18813
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1650750637054443,
      "learning_rate": 1.7289190193429175e-05,
      "loss": 2.4657,
      "step": 18814
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0770915746688843,
      "learning_rate": 1.728890831229378e-05,
      "loss": 2.3006,
      "step": 18815
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.042328119277954,
      "learning_rate": 1.72886264188017e-05,
      "loss": 2.5505,
      "step": 18816
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0363458395004272,
      "learning_rate": 1.728834451295342e-05,
      "loss": 2.5534,
      "step": 18817
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.053104281425476,
      "learning_rate": 1.7288062594749416e-05,
      "loss": 2.5758,
      "step": 18818
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0485590696334839,
      "learning_rate": 1.728778066419017e-05,
      "loss": 2.4594,
      "step": 18819
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1154003143310547,
      "learning_rate": 1.7287498721276156e-05,
      "loss": 2.2679,
      "step": 18820
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0636178255081177,
      "learning_rate": 1.728721676600785e-05,
      "loss": 2.638,
      "step": 18821
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0434492826461792,
      "learning_rate": 1.7286934798385736e-05,
      "loss": 2.2842,
      "step": 18822
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.3381121158599854,
      "learning_rate": 1.7286652818410285e-05,
      "loss": 2.3991,
      "step": 18823
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1558091640472412,
      "learning_rate": 1.7286370826081983e-05,
      "loss": 2.4107,
      "step": 18824
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.004269003868103,
      "learning_rate": 1.7286088821401298e-05,
      "loss": 2.8208,
      "step": 18825
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0771751403808594,
      "learning_rate": 1.7285806804368714e-05,
      "loss": 2.2909,
      "step": 18826
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9929501414299011,
      "learning_rate": 1.7285524774984712e-05,
      "loss": 2.4397,
      "step": 18827
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9692512154579163,
      "learning_rate": 1.7285242733249765e-05,
      "loss": 2.3282,
      "step": 18828
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9867382645606995,
      "learning_rate": 1.7284960679164355e-05,
      "loss": 2.3709,
      "step": 18829
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0544214248657227,
      "learning_rate": 1.7284678612728957e-05,
      "loss": 2.5349,
      "step": 18830
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.032820224761963,
      "learning_rate": 1.7284396533944046e-05,
      "loss": 2.0815,
      "step": 18831
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.031168818473816,
      "learning_rate": 1.728411444281011e-05,
      "loss": 2.339,
      "step": 18832
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0583045482635498,
      "learning_rate": 1.728383233932762e-05,
      "loss": 2.5299,
      "step": 18833
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9732760190963745,
      "learning_rate": 1.7283550223497055e-05,
      "loss": 2.4337,
      "step": 18834
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9889435768127441,
      "learning_rate": 1.728326809531889e-05,
      "loss": 2.5541,
      "step": 18835
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0591639280319214,
      "learning_rate": 1.7282985954793617e-05,
      "loss": 2.5789,
      "step": 18836
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.026357889175415,
      "learning_rate": 1.7282703801921702e-05,
      "loss": 2.5734,
      "step": 18837
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.170860767364502,
      "learning_rate": 1.728242163670362e-05,
      "loss": 2.3161,
      "step": 18838
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0017876625061035,
      "learning_rate": 1.7282139459139862e-05,
      "loss": 2.5098,
      "step": 18839
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1049392223358154,
      "learning_rate": 1.7281857269230896e-05,
      "loss": 2.4459,
      "step": 18840
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0570695400238037,
      "learning_rate": 1.7281575066977208e-05,
      "loss": 2.4895,
      "step": 18841
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1352248191833496,
      "learning_rate": 1.7281292852379268e-05,
      "loss": 2.2255,
      "step": 18842
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.2374217510223389,
      "learning_rate": 1.728101062543756e-05,
      "loss": 2.3229,
      "step": 18843
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1023143529891968,
      "learning_rate": 1.7280728386152565e-05,
      "loss": 2.4128,
      "step": 18844
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0884500741958618,
      "learning_rate": 1.7280446134524753e-05,
      "loss": 2.4094,
      "step": 18845
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.3039838075637817,
      "learning_rate": 1.7280163870554615e-05,
      "loss": 2.5787,
      "step": 18846
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.079221487045288,
      "learning_rate": 1.7279881594242613e-05,
      "loss": 2.4566,
      "step": 18847
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0100680589675903,
      "learning_rate": 1.727959930558924e-05,
      "loss": 2.5317,
      "step": 18848
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.036921739578247,
      "learning_rate": 1.7279317004594966e-05,
      "loss": 2.2356,
      "step": 18849
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0267337560653687,
      "learning_rate": 1.7279034691260276e-05,
      "loss": 2.1752,
      "step": 18850
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1034446954727173,
      "learning_rate": 1.7278752365585644e-05,
      "loss": 2.3821,
      "step": 18851
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1539803743362427,
      "learning_rate": 1.7278470027571548e-05,
      "loss": 2.3659,
      "step": 18852
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.119420051574707,
      "learning_rate": 1.7278187677218472e-05,
      "loss": 2.3017,
      "step": 18853
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0055497884750366,
      "learning_rate": 1.7277905314526887e-05,
      "loss": 2.6242,
      "step": 18854
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1562902927398682,
      "learning_rate": 1.7277622939497282e-05,
      "loss": 2.3396,
      "step": 18855
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0924975872039795,
      "learning_rate": 1.7277340552130123e-05,
      "loss": 2.3389,
      "step": 18856
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0081088542938232,
      "learning_rate": 1.72770581524259e-05,
      "loss": 2.5341,
      "step": 18857
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0406414270401,
      "learning_rate": 1.7276775740385084e-05,
      "loss": 2.5491,
      "step": 18858
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.2071325778961182,
      "learning_rate": 1.7276493316008156e-05,
      "loss": 2.3149,
      "step": 18859
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0494986772537231,
      "learning_rate": 1.7276210879295595e-05,
      "loss": 2.3861,
      "step": 18860
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0901681184768677,
      "learning_rate": 1.7275928430247883e-05,
      "loss": 2.4164,
      "step": 18861
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1209791898727417,
      "learning_rate": 1.72756459688655e-05,
      "loss": 2.242,
      "step": 18862
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0261783599853516,
      "learning_rate": 1.7275363495148914e-05,
      "loss": 2.4117,
      "step": 18863
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9751890301704407,
      "learning_rate": 1.7275081009098612e-05,
      "loss": 2.5621,
      "step": 18864
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1997395753860474,
      "learning_rate": 1.7274798510715074e-05,
      "loss": 2.5104,
      "step": 18865
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.010748028755188,
      "learning_rate": 1.727451599999878e-05,
      "loss": 2.3497,
      "step": 18866
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0073463916778564,
      "learning_rate": 1.7274233476950197e-05,
      "loss": 2.368,
      "step": 18867
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0684616565704346,
      "learning_rate": 1.7273950941569818e-05,
      "loss": 2.3891,
      "step": 18868
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.966644823551178,
      "learning_rate": 1.7273668393858116e-05,
      "loss": 2.4514,
      "step": 18869
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.030943751335144,
      "learning_rate": 1.727338583381557e-05,
      "loss": 2.5545,
      "step": 18870
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0408284664154053,
      "learning_rate": 1.727310326144266e-05,
      "loss": 2.3184,
      "step": 18871
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.2191423177719116,
      "learning_rate": 1.7272820676739866e-05,
      "loss": 2.388,
      "step": 18872
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9612571001052856,
      "learning_rate": 1.7272538079707666e-05,
      "loss": 2.58,
      "step": 18873
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1145572662353516,
      "learning_rate": 1.7272255470346534e-05,
      "loss": 2.4807,
      "step": 18874
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9489688277244568,
      "learning_rate": 1.727197284865696e-05,
      "loss": 2.2609,
      "step": 18875
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.104446291923523,
      "learning_rate": 1.7271690214639415e-05,
      "loss": 2.2605,
      "step": 18876
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9873368740081787,
      "learning_rate": 1.7271407568294375e-05,
      "loss": 2.6548,
      "step": 18877
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1235265731811523,
      "learning_rate": 1.727112490962233e-05,
      "loss": 2.5286,
      "step": 18878
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0796687602996826,
      "learning_rate": 1.727084223862375e-05,
      "loss": 2.4771,
      "step": 18879
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0150967836380005,
      "learning_rate": 1.727055955529912e-05,
      "loss": 2.2485,
      "step": 18880
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0773098468780518,
      "learning_rate": 1.7270276859648918e-05,
      "loss": 2.4921,
      "step": 18881
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.013648509979248,
      "learning_rate": 1.7269994151673622e-05,
      "loss": 2.4778,
      "step": 18882
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9942681193351746,
      "learning_rate": 1.726971143137371e-05,
      "loss": 2.3166,
      "step": 18883
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0276190042495728,
      "learning_rate": 1.7269428698749663e-05,
      "loss": 2.4888,
      "step": 18884
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0492644309997559,
      "learning_rate": 1.726914595380196e-05,
      "loss": 2.37,
      "step": 18885
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.04745614528656,
      "learning_rate": 1.7268863196531077e-05,
      "loss": 2.6463,
      "step": 18886
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0003304481506348,
      "learning_rate": 1.72685804269375e-05,
      "loss": 2.5218,
      "step": 18887
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0775885581970215,
      "learning_rate": 1.7268297645021705e-05,
      "loss": 2.4829,
      "step": 18888
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9931278824806213,
      "learning_rate": 1.726801485078417e-05,
      "loss": 2.3565,
      "step": 18889
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1180050373077393,
      "learning_rate": 1.7267732044225377e-05,
      "loss": 2.6306,
      "step": 18890
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0828181505203247,
      "learning_rate": 1.7267449225345805e-05,
      "loss": 2.3557,
      "step": 18891
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.484462022781372,
      "learning_rate": 1.7267166394145935e-05,
      "loss": 2.5431,
      "step": 18892
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9616051912307739,
      "learning_rate": 1.726688355062624e-05,
      "loss": 2.5008,
      "step": 18893
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.08330237865448,
      "learning_rate": 1.7266600694787206e-05,
      "loss": 2.5402,
      "step": 18894
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.944202721118927,
      "learning_rate": 1.726631782662931e-05,
      "loss": 2.3341,
      "step": 18895
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1014708280563354,
      "learning_rate": 1.726603494615303e-05,
      "loss": 2.6211,
      "step": 18896
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0488741397857666,
      "learning_rate": 1.726575205335885e-05,
      "loss": 2.4481,
      "step": 18897
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9803187847137451,
      "learning_rate": 1.7265469148247243e-05,
      "loss": 2.449,
      "step": 18898
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1591408252716064,
      "learning_rate": 1.7265186230818694e-05,
      "loss": 2.3851,
      "step": 18899
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.052661657333374,
      "learning_rate": 1.7264903301073683e-05,
      "loss": 2.3061,
      "step": 18900
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0529502630233765,
      "learning_rate": 1.7264620359012687e-05,
      "loss": 2.5218,
      "step": 18901
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.6282035112380981,
      "learning_rate": 1.7264337404636185e-05,
      "loss": 2.2881,
      "step": 18902
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0086820125579834,
      "learning_rate": 1.726405443794466e-05,
      "loss": 2.5095,
      "step": 18903
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9855378270149231,
      "learning_rate": 1.7263771458938588e-05,
      "loss": 2.3396,
      "step": 18904
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0005725622177124,
      "learning_rate": 1.7263488467618456e-05,
      "loss": 2.514,
      "step": 18905
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0071539878845215,
      "learning_rate": 1.726320546398473e-05,
      "loss": 2.444,
      "step": 18906
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.034440279006958,
      "learning_rate": 1.7262922448037902e-05,
      "loss": 2.3188,
      "step": 18907
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.8570227026939392,
      "learning_rate": 1.726263941977845e-05,
      "loss": 2.5896,
      "step": 18908
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1159696578979492,
      "learning_rate": 1.7262356379206848e-05,
      "loss": 2.4093,
      "step": 18909
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0253052711486816,
      "learning_rate": 1.726207332632358e-05,
      "loss": 2.3844,
      "step": 18910
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.017957329750061,
      "learning_rate": 1.7261790261129128e-05,
      "loss": 2.3034,
      "step": 18911
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9739671349525452,
      "learning_rate": 1.726150718362397e-05,
      "loss": 2.3896,
      "step": 18912
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0016616582870483,
      "learning_rate": 1.7261224093808583e-05,
      "loss": 2.5383,
      "step": 18913
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1969619989395142,
      "learning_rate": 1.726094099168345e-05,
      "loss": 2.4968,
      "step": 18914
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0700491666793823,
      "learning_rate": 1.7260657877249046e-05,
      "loss": 2.4389,
      "step": 18915
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0041977167129517,
      "learning_rate": 1.726037475050586e-05,
      "loss": 2.6747,
      "step": 18916
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0020132064819336,
      "learning_rate": 1.7260091611454365e-05,
      "loss": 2.612,
      "step": 18917
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0587365627288818,
      "learning_rate": 1.725980846009504e-05,
      "loss": 2.3853,
      "step": 18918
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0762391090393066,
      "learning_rate": 1.7259525296428373e-05,
      "loss": 2.4061,
      "step": 18919
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1268221139907837,
      "learning_rate": 1.7259242120454835e-05,
      "loss": 2.5855,
      "step": 18920
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0073071718215942,
      "learning_rate": 1.725895893217491e-05,
      "loss": 2.3899,
      "step": 18921
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0570701360702515,
      "learning_rate": 1.725867573158908e-05,
      "loss": 2.7123,
      "step": 18922
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.064603328704834,
      "learning_rate": 1.7258392518697826e-05,
      "loss": 2.6213,
      "step": 18923
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9972318410873413,
      "learning_rate": 1.725810929350162e-05,
      "loss": 2.4181,
      "step": 18924
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0592551231384277,
      "learning_rate": 1.7257826056000954e-05,
      "loss": 2.5239,
      "step": 18925
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1736650466918945,
      "learning_rate": 1.7257542806196293e-05,
      "loss": 2.5545,
      "step": 18926
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0189741849899292,
      "learning_rate": 1.7257259544088133e-05,
      "loss": 2.5703,
      "step": 18927
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0471413135528564,
      "learning_rate": 1.7256976269676942e-05,
      "loss": 2.4937,
      "step": 18928
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.020572543144226,
      "learning_rate": 1.7256692982963207e-05,
      "loss": 2.3019,
      "step": 18929
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0191917419433594,
      "learning_rate": 1.725640968394741e-05,
      "loss": 2.3498,
      "step": 18930
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9886236786842346,
      "learning_rate": 1.725612637263002e-05,
      "loss": 2.4241,
      "step": 18931
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.990722119808197,
      "learning_rate": 1.7255843049011535e-05,
      "loss": 2.5512,
      "step": 18932
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1035958528518677,
      "learning_rate": 1.725555971309242e-05,
      "loss": 2.4655,
      "step": 18933
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0174305438995361,
      "learning_rate": 1.725527636487316e-05,
      "loss": 2.6031,
      "step": 18934
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.2828073501586914,
      "learning_rate": 1.7254993004354235e-05,
      "loss": 2.5146,
      "step": 18935
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.985551118850708,
      "learning_rate": 1.7254709631536128e-05,
      "loss": 2.4971,
      "step": 18936
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.000506043434143,
      "learning_rate": 1.725442624641932e-05,
      "loss": 2.3626,
      "step": 18937
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9744448661804199,
      "learning_rate": 1.7254142849004284e-05,
      "loss": 2.5177,
      "step": 18938
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0652637481689453,
      "learning_rate": 1.7253859439291512e-05,
      "loss": 2.3376,
      "step": 18939
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0495586395263672,
      "learning_rate": 1.7253576017281476e-05,
      "loss": 2.405,
      "step": 18940
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1512700319290161,
      "learning_rate": 1.7253292582974655e-05,
      "loss": 2.553,
      "step": 18941
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0327359437942505,
      "learning_rate": 1.7253009136371536e-05,
      "loss": 2.4695,
      "step": 18942
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0729639530181885,
      "learning_rate": 1.7252725677472598e-05,
      "loss": 2.6204,
      "step": 18943
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0431902408599854,
      "learning_rate": 1.725244220627832e-05,
      "loss": 2.5731,
      "step": 18944
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9910880327224731,
      "learning_rate": 1.725215872278918e-05,
      "loss": 2.5358,
      "step": 18945
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1080152988433838,
      "learning_rate": 1.7251875227005665e-05,
      "loss": 2.3185,
      "step": 18946
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1942414045333862,
      "learning_rate": 1.7251591718928248e-05,
      "loss": 2.2068,
      "step": 18947
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0311977863311768,
      "learning_rate": 1.7251308198557415e-05,
      "loss": 2.6085,
      "step": 18948
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.2576663494110107,
      "learning_rate": 1.7251024665893644e-05,
      "loss": 2.6035,
      "step": 18949
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0071507692337036,
      "learning_rate": 1.725074112093742e-05,
      "loss": 2.6865,
      "step": 18950
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0767256021499634,
      "learning_rate": 1.7250457563689218e-05,
      "loss": 2.5654,
      "step": 18951
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.000999093055725,
      "learning_rate": 1.725017399414952e-05,
      "loss": 2.3836,
      "step": 18952
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.2479894161224365,
      "learning_rate": 1.724989041231881e-05,
      "loss": 2.4437,
      "step": 18953
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.194395899772644,
      "learning_rate": 1.724960681819757e-05,
      "loss": 2.3999,
      "step": 18954
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9965362548828125,
      "learning_rate": 1.7249323211786274e-05,
      "loss": 2.3578,
      "step": 18955
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1099739074707031,
      "learning_rate": 1.7249039593085406e-05,
      "loss": 2.6035,
      "step": 18956
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9628105759620667,
      "learning_rate": 1.7248755962095445e-05,
      "loss": 2.3152,
      "step": 18957
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9258610606193542,
      "learning_rate": 1.724847231881688e-05,
      "loss": 2.4116,
      "step": 18958
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0214364528656006,
      "learning_rate": 1.7248188663250182e-05,
      "loss": 2.5289,
      "step": 18959
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9983512759208679,
      "learning_rate": 1.7247904995395832e-05,
      "loss": 2.3899,
      "step": 18960
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0362834930419922,
      "learning_rate": 1.724762131525432e-05,
      "loss": 2.5352,
      "step": 18961
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0266869068145752,
      "learning_rate": 1.724733762282612e-05,
      "loss": 2.5851,
      "step": 18962
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9538334608078003,
      "learning_rate": 1.7247053918111715e-05,
      "loss": 2.5367,
      "step": 18963
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.2483634948730469,
      "learning_rate": 1.7246770201111582e-05,
      "loss": 2.5709,
      "step": 18964
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0042024850845337,
      "learning_rate": 1.724648647182621e-05,
      "loss": 2.373,
      "step": 18965
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9312552809715271,
      "learning_rate": 1.7246202730256072e-05,
      "loss": 2.6045,
      "step": 18966
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.169386625289917,
      "learning_rate": 1.724591897640165e-05,
      "loss": 2.5362,
      "step": 18967
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0267640352249146,
      "learning_rate": 1.7245635210263432e-05,
      "loss": 2.4788,
      "step": 18968
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.2092252969741821,
      "learning_rate": 1.7245351431841895e-05,
      "loss": 2.3299,
      "step": 18969
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9560943245887756,
      "learning_rate": 1.724506764113752e-05,
      "loss": 2.4995,
      "step": 18970
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4387022256851196,
      "learning_rate": 1.7244783838150785e-05,
      "loss": 2.2649,
      "step": 18971
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.101580023765564,
      "learning_rate": 1.7244500022882172e-05,
      "loss": 2.438,
      "step": 18972
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.149004340171814,
      "learning_rate": 1.7244216195332168e-05,
      "loss": 2.5385,
      "step": 18973
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0100942850112915,
      "learning_rate": 1.7243932355501246e-05,
      "loss": 2.3634,
      "step": 18974
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1677485704421997,
      "learning_rate": 1.7243648503389895e-05,
      "loss": 2.2831,
      "step": 18975
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9796234369277954,
      "learning_rate": 1.724336463899859e-05,
      "loss": 2.4507,
      "step": 18976
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0327714681625366,
      "learning_rate": 1.7243080762327812e-05,
      "loss": 2.4843,
      "step": 18977
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0026218891143799,
      "learning_rate": 1.7242796873378053e-05,
      "loss": 2.473,
      "step": 18978
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0314832925796509,
      "learning_rate": 1.724251297214978e-05,
      "loss": 2.429,
      "step": 18979
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0026942491531372,
      "learning_rate": 1.724222905864348e-05,
      "loss": 2.6465,
      "step": 18980
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0450772047042847,
      "learning_rate": 1.724194513285964e-05,
      "loss": 2.4857,
      "step": 18981
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0483286380767822,
      "learning_rate": 1.724166119479873e-05,
      "loss": 2.7613,
      "step": 18982
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.2022426128387451,
      "learning_rate": 1.7241377244461238e-05,
      "loss": 2.4463,
      "step": 18983
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9590951800346375,
      "learning_rate": 1.724109328184765e-05,
      "loss": 2.3759,
      "step": 18984
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0053538084030151,
      "learning_rate": 1.724080930695844e-05,
      "loss": 2.4522,
      "step": 18985
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0780789852142334,
      "learning_rate": 1.724052531979409e-05,
      "loss": 2.3751,
      "step": 18986
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9890566468238831,
      "learning_rate": 1.7240241320355084e-05,
      "loss": 2.5372,
      "step": 18987
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1127244234085083,
      "learning_rate": 1.7239957308641903e-05,
      "loss": 2.4702,
      "step": 18988
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.2114734649658203,
      "learning_rate": 1.7239673284655027e-05,
      "loss": 2.6062,
      "step": 18989
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0238579511642456,
      "learning_rate": 1.723938924839494e-05,
      "loss": 2.1956,
      "step": 18990
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0669656991958618,
      "learning_rate": 1.723910519986212e-05,
      "loss": 2.7082,
      "step": 18991
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9983647465705872,
      "learning_rate": 1.7238821139057052e-05,
      "loss": 2.6219,
      "step": 18992
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0023442506790161,
      "learning_rate": 1.7238537065980216e-05,
      "loss": 2.555,
      "step": 18993
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9179344773292542,
      "learning_rate": 1.7238252980632096e-05,
      "loss": 2.4938,
      "step": 18994
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.102594256401062,
      "learning_rate": 1.7237968883013168e-05,
      "loss": 2.4006,
      "step": 18995
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1127156019210815,
      "learning_rate": 1.7237684773123917e-05,
      "loss": 2.3139,
      "step": 18996
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0528900623321533,
      "learning_rate": 1.7237400650964826e-05,
      "loss": 2.4575,
      "step": 18997
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.2488864660263062,
      "learning_rate": 1.7237116516536373e-05,
      "loss": 2.6475,
      "step": 18998
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0040037631988525,
      "learning_rate": 1.7236832369839045e-05,
      "loss": 2.3845,
      "step": 18999
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0192854404449463,
      "learning_rate": 1.723654821087332e-05,
      "loss": 2.328,
      "step": 19000
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0823900699615479,
      "learning_rate": 1.7236264039639676e-05,
      "loss": 2.3572,
      "step": 19001
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9440797567367554,
      "learning_rate": 1.7235979856138604e-05,
      "loss": 2.4284,
      "step": 19002
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.2258378267288208,
      "learning_rate": 1.723569566037058e-05,
      "loss": 2.4175,
      "step": 19003
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1317225694656372,
      "learning_rate": 1.723541145233609e-05,
      "loss": 2.394,
      "step": 19004
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0442007780075073,
      "learning_rate": 1.7235127232035607e-05,
      "loss": 2.3615,
      "step": 19005
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.070751667022705,
      "learning_rate": 1.7234842999469616e-05,
      "loss": 2.6209,
      "step": 19006
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0248994827270508,
      "learning_rate": 1.7234558754638605e-05,
      "loss": 2.5278,
      "step": 19007
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0483248233795166,
      "learning_rate": 1.7234274497543054e-05,
      "loss": 2.4779,
      "step": 19008
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.100180745124817,
      "learning_rate": 1.7233990228183443e-05,
      "loss": 2.4252,
      "step": 19009
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0479280948638916,
      "learning_rate": 1.723370594656025e-05,
      "loss": 2.2428,
      "step": 19010
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9808290600776672,
      "learning_rate": 1.7233421652673964e-05,
      "loss": 2.6241,
      "step": 19011
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0596871376037598,
      "learning_rate": 1.7233137346525062e-05,
      "loss": 2.5914,
      "step": 19012
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9647108912467957,
      "learning_rate": 1.7232853028114027e-05,
      "loss": 2.2558,
      "step": 19013
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.045051097869873,
      "learning_rate": 1.7232568697441343e-05,
      "loss": 2.3059,
      "step": 19014
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0186572074890137,
      "learning_rate": 1.723228435450749e-05,
      "loss": 2.3747,
      "step": 19015
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9863811135292053,
      "learning_rate": 1.723199999931295e-05,
      "loss": 2.4861,
      "step": 19016
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1897556781768799,
      "learning_rate": 1.723171563185821e-05,
      "loss": 2.3755,
      "step": 19017
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1001954078674316,
      "learning_rate": 1.7231431252143745e-05,
      "loss": 2.4454,
      "step": 19018
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0230326652526855,
      "learning_rate": 1.723114686017004e-05,
      "loss": 2.3178,
      "step": 19019
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.093091607093811,
      "learning_rate": 1.723086245593758e-05,
      "loss": 2.3953,
      "step": 19020
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0780967473983765,
      "learning_rate": 1.723057803944684e-05,
      "loss": 2.5485,
      "step": 19021
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9983813166618347,
      "learning_rate": 1.7230293610698304e-05,
      "loss": 2.3909,
      "step": 19022
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.064296841621399,
      "learning_rate": 1.7230009169692464e-05,
      "loss": 2.3626,
      "step": 19023
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1292765140533447,
      "learning_rate": 1.722972471642979e-05,
      "loss": 2.4375,
      "step": 19024
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.551717519760132,
      "learning_rate": 1.722944025091077e-05,
      "loss": 2.4257,
      "step": 19025
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9814096093177795,
      "learning_rate": 1.7229155773135887e-05,
      "loss": 2.4454,
      "step": 19026
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9374780058860779,
      "learning_rate": 1.722887128310562e-05,
      "loss": 2.4094,
      "step": 19027
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0368306636810303,
      "learning_rate": 1.7228586780820454e-05,
      "loss": 2.4564,
      "step": 19028
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9418562054634094,
      "learning_rate": 1.7228302266280868e-05,
      "loss": 2.4263,
      "step": 19029
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0292893648147583,
      "learning_rate": 1.7228017739487347e-05,
      "loss": 2.5062,
      "step": 19030
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1363950967788696,
      "learning_rate": 1.7227733200440374e-05,
      "loss": 2.2917,
      "step": 19031
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.3085064888000488,
      "learning_rate": 1.7227448649140428e-05,
      "loss": 2.2158,
      "step": 19032
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0830178260803223,
      "learning_rate": 1.7227164085587995e-05,
      "loss": 2.5263,
      "step": 19033
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.118077039718628,
      "learning_rate": 1.722687950978356e-05,
      "loss": 2.4501,
      "step": 19034
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.082033634185791,
      "learning_rate": 1.7226594921727595e-05,
      "loss": 2.4436,
      "step": 19035
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9731752276420593,
      "learning_rate": 1.7226310321420593e-05,
      "loss": 2.5746,
      "step": 19036
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9821657538414001,
      "learning_rate": 1.722602570886303e-05,
      "loss": 2.5677,
      "step": 19037
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9260851740837097,
      "learning_rate": 1.722574108405539e-05,
      "loss": 2.402,
      "step": 19038
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9186335802078247,
      "learning_rate": 1.722545644699816e-05,
      "loss": 2.2669,
      "step": 19039
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1732802391052246,
      "learning_rate": 1.7225171797691817e-05,
      "loss": 2.609,
      "step": 19040
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0319314002990723,
      "learning_rate": 1.7224887136136846e-05,
      "loss": 2.4964,
      "step": 19041
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0007413625717163,
      "learning_rate": 1.7224602462333727e-05,
      "loss": 2.631,
      "step": 19042
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.009262204170227,
      "learning_rate": 1.7224317776282945e-05,
      "loss": 2.4121,
      "step": 19043
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.083469271659851,
      "learning_rate": 1.722403307798499e-05,
      "loss": 2.4485,
      "step": 19044
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0510343313217163,
      "learning_rate": 1.7223748367440328e-05,
      "loss": 2.4593,
      "step": 19045
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9996203184127808,
      "learning_rate": 1.722346364464945e-05,
      "loss": 2.3804,
      "step": 19046
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9677892327308655,
      "learning_rate": 1.7223178909612844e-05,
      "loss": 2.5101,
      "step": 19047
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1807736158370972,
      "learning_rate": 1.722289416233099e-05,
      "loss": 2.5663,
      "step": 19048
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1394295692443848,
      "learning_rate": 1.722260940280436e-05,
      "loss": 2.7709,
      "step": 19049
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1383700370788574,
      "learning_rate": 1.7222324631033452e-05,
      "loss": 2.5877,
      "step": 19050
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0129748582839966,
      "learning_rate": 1.722203984701874e-05,
      "loss": 2.7358,
      "step": 19051
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0399043560028076,
      "learning_rate": 1.7221755050760708e-05,
      "loss": 2.3203,
      "step": 19052
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.075103998184204,
      "learning_rate": 1.7221470242259845e-05,
      "loss": 2.3931,
      "step": 19053
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0042897462844849,
      "learning_rate": 1.7221185421516625e-05,
      "loss": 2.3366,
      "step": 19054
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.036149740219116,
      "learning_rate": 1.7220900588531532e-05,
      "loss": 2.4116,
      "step": 19055
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.2635319232940674,
      "learning_rate": 1.7220615743305052e-05,
      "loss": 2.4083,
      "step": 19056
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.015795111656189,
      "learning_rate": 1.7220330885837672e-05,
      "loss": 2.3864,
      "step": 19057
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1170843839645386,
      "learning_rate": 1.7220046016129866e-05,
      "loss": 2.6995,
      "step": 19058
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9361132979393005,
      "learning_rate": 1.7219761134182122e-05,
      "loss": 2.393,
      "step": 19059
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.3605326414108276,
      "learning_rate": 1.7219476239994925e-05,
      "loss": 2.6039,
      "step": 19060
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.030211329460144,
      "learning_rate": 1.721919133356875e-05,
      "loss": 2.405,
      "step": 19061
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0835955142974854,
      "learning_rate": 1.7218906414904085e-05,
      "loss": 2.428,
      "step": 19062
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.081078052520752,
      "learning_rate": 1.7218621484001418e-05,
      "loss": 2.5923,
      "step": 19063
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9208466410636902,
      "learning_rate": 1.7218336540861226e-05,
      "loss": 2.304,
      "step": 19064
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1952705383300781,
      "learning_rate": 1.721805158548399e-05,
      "loss": 2.6028,
      "step": 19065
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9866287112236023,
      "learning_rate": 1.7217766617870196e-05,
      "loss": 2.5059,
      "step": 19066
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9595381617546082,
      "learning_rate": 1.7217481638020328e-05,
      "loss": 2.4052,
      "step": 19067
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0194939374923706,
      "learning_rate": 1.7217196645934868e-05,
      "loss": 2.4689,
      "step": 19068
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.2115122079849243,
      "learning_rate": 1.7216911641614303e-05,
      "loss": 2.7731,
      "step": 19069
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.124710202217102,
      "learning_rate": 1.721662662505911e-05,
      "loss": 2.6666,
      "step": 19070
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.2308521270751953,
      "learning_rate": 1.7216341596269774e-05,
      "loss": 2.4129,
      "step": 19071
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0541387796401978,
      "learning_rate": 1.721605655524678e-05,
      "loss": 2.3478,
      "step": 19072
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.7428033351898193,
      "learning_rate": 1.7215771501990613e-05,
      "loss": 2.5393,
      "step": 19073
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.2573509216308594,
      "learning_rate": 1.721548643650175e-05,
      "loss": 2.6752,
      "step": 19074
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.940246820449829,
      "learning_rate": 1.7215201358780676e-05,
      "loss": 2.6087,
      "step": 19075
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0674471855163574,
      "learning_rate": 1.7214916268827882e-05,
      "loss": 2.4544,
      "step": 19076
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9564023017883301,
      "learning_rate": 1.721463116664384e-05,
      "loss": 2.5555,
      "step": 19077
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0154011249542236,
      "learning_rate": 1.7214346052229042e-05,
      "loss": 2.5536,
      "step": 19078
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1800576448440552,
      "learning_rate": 1.7214060925583966e-05,
      "loss": 2.3525,
      "step": 19079
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1303415298461914,
      "learning_rate": 1.72137757867091e-05,
      "loss": 2.2487,
      "step": 19080
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0876128673553467,
      "learning_rate": 1.7213490635604924e-05,
      "loss": 2.518,
      "step": 19081
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9590672254562378,
      "learning_rate": 1.721320547227192e-05,
      "loss": 2.6007,
      "step": 19082
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.135466456413269,
      "learning_rate": 1.7212920296710577e-05,
      "loss": 2.7124,
      "step": 19083
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.2323341369628906,
      "learning_rate": 1.721263510892137e-05,
      "loss": 2.3165,
      "step": 19084
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.046205759048462,
      "learning_rate": 1.721234990890479e-05,
      "loss": 2.4983,
      "step": 19085
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.012181282043457,
      "learning_rate": 1.721206469666132e-05,
      "loss": 2.3527,
      "step": 19086
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0359511375427246,
      "learning_rate": 1.721177947219144e-05,
      "loss": 2.6902,
      "step": 19087
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1552011966705322,
      "learning_rate": 1.7211494235495637e-05,
      "loss": 2.5273,
      "step": 19088
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0076041221618652,
      "learning_rate": 1.721120898657439e-05,
      "loss": 2.5115,
      "step": 19089
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0501084327697754,
      "learning_rate": 1.721092372542819e-05,
      "loss": 2.4912,
      "step": 19090
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9839075803756714,
      "learning_rate": 1.7210638452057508e-05,
      "loss": 2.4747,
      "step": 19091
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.04574716091156,
      "learning_rate": 1.721035316646284e-05,
      "loss": 2.5677,
      "step": 19092
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9932891726493835,
      "learning_rate": 1.7210067868644667e-05,
      "loss": 2.3348,
      "step": 19093
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0705056190490723,
      "learning_rate": 1.720978255860347e-05,
      "loss": 2.2154,
      "step": 19094
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.082445502281189,
      "learning_rate": 1.720949723633973e-05,
      "loss": 2.4684,
      "step": 19095
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.069030523300171,
      "learning_rate": 1.7209211901853934e-05,
      "loss": 2.6068,
      "step": 19096
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0768370628356934,
      "learning_rate": 1.720892655514657e-05,
      "loss": 2.5498,
      "step": 19097
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9979273080825806,
      "learning_rate": 1.7208641196218115e-05,
      "loss": 2.1612,
      "step": 19098
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9936947822570801,
      "learning_rate": 1.7208355825069057e-05,
      "loss": 2.3506,
      "step": 19099
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0865073204040527,
      "learning_rate": 1.7208070441699876e-05,
      "loss": 2.6483,
      "step": 19100
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9537297487258911,
      "learning_rate": 1.7207785046111058e-05,
      "loss": 2.5469,
      "step": 19101
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0763534307479858,
      "learning_rate": 1.720749963830309e-05,
      "loss": 2.4567,
      "step": 19102
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1300227642059326,
      "learning_rate": 1.7207214218276445e-05,
      "loss": 2.3303,
      "step": 19103
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9803057312965393,
      "learning_rate": 1.720692878603162e-05,
      "loss": 2.2289,
      "step": 19104
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9671974778175354,
      "learning_rate": 1.7206643341569095e-05,
      "loss": 2.457,
      "step": 19105
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0909615755081177,
      "learning_rate": 1.7206357884889348e-05,
      "loss": 2.4206,
      "step": 19106
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.2693244218826294,
      "learning_rate": 1.720607241599287e-05,
      "loss": 2.1851,
      "step": 19107
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0665361881256104,
      "learning_rate": 1.7205786934880146e-05,
      "loss": 2.557,
      "step": 19108
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0051134824752808,
      "learning_rate": 1.7205501441551647e-05,
      "loss": 2.571,
      "step": 19109
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0091509819030762,
      "learning_rate": 1.720521593600787e-05,
      "loss": 2.4122,
      "step": 19110
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1147587299346924,
      "learning_rate": 1.7204930418249298e-05,
      "loss": 2.5231,
      "step": 19111
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1012321710586548,
      "learning_rate": 1.720464488827641e-05,
      "loss": 2.4652,
      "step": 19112
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9059781432151794,
      "learning_rate": 1.7204359346089692e-05,
      "loss": 2.5094,
      "step": 19113
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0547575950622559,
      "learning_rate": 1.7204073791689627e-05,
      "loss": 2.5658,
      "step": 19114
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1036276817321777,
      "learning_rate": 1.72037882250767e-05,
      "loss": 2.2628,
      "step": 19115
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9796242713928223,
      "learning_rate": 1.72035026462514e-05,
      "loss": 2.3991,
      "step": 19116
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9879648089408875,
      "learning_rate": 1.7203217055214203e-05,
      "loss": 2.4524,
      "step": 19117
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.115709900856018,
      "learning_rate": 1.7202931451965595e-05,
      "loss": 2.4833,
      "step": 19118
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1272683143615723,
      "learning_rate": 1.7202645836506066e-05,
      "loss": 2.2716,
      "step": 19119
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.2586954832077026,
      "learning_rate": 1.7202360208836092e-05,
      "loss": 2.4284,
      "step": 19120
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1675668954849243,
      "learning_rate": 1.7202074568956166e-05,
      "loss": 2.4847,
      "step": 19121
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.054654598236084,
      "learning_rate": 1.7201788916866764e-05,
      "loss": 2.463,
      "step": 19122
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9356778264045715,
      "learning_rate": 1.7201503252568375e-05,
      "loss": 2.4866,
      "step": 19123
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1002652645111084,
      "learning_rate": 1.720121757606148e-05,
      "loss": 2.5212,
      "step": 19124
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0431500673294067,
      "learning_rate": 1.7200931887346567e-05,
      "loss": 2.5544,
      "step": 19125
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.2076640129089355,
      "learning_rate": 1.720064618642412e-05,
      "loss": 2.5653,
      "step": 19126
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.8721925616264343,
      "learning_rate": 1.720036047329462e-05,
      "loss": 2.2124,
      "step": 19127
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0208556652069092,
      "learning_rate": 1.7200074747958557e-05,
      "loss": 2.4815,
      "step": 19128
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9586196541786194,
      "learning_rate": 1.719978901041641e-05,
      "loss": 2.2783,
      "step": 19129
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1065365076065063,
      "learning_rate": 1.7199503260668665e-05,
      "loss": 2.3176,
      "step": 19130
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9652266502380371,
      "learning_rate": 1.7199217498715804e-05,
      "loss": 2.4501,
      "step": 19131
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9959518313407898,
      "learning_rate": 1.7198931724558316e-05,
      "loss": 2.4072,
      "step": 19132
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9706032276153564,
      "learning_rate": 1.7198645938196682e-05,
      "loss": 2.4386,
      "step": 19133
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9970710873603821,
      "learning_rate": 1.719836013963139e-05,
      "loss": 2.5511,
      "step": 19134
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9554152488708496,
      "learning_rate": 1.7198074328862923e-05,
      "loss": 2.3561,
      "step": 19135
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1621460914611816,
      "learning_rate": 1.7197788505891763e-05,
      "loss": 2.6308,
      "step": 19136
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1457182168960571,
      "learning_rate": 1.71975026707184e-05,
      "loss": 2.2899,
      "step": 19137
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9365442395210266,
      "learning_rate": 1.7197216823343312e-05,
      "loss": 2.4244,
      "step": 19138
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0327039957046509,
      "learning_rate": 1.7196930963766986e-05,
      "loss": 2.3223,
      "step": 19139
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0119850635528564,
      "learning_rate": 1.719664509198991e-05,
      "loss": 2.3009,
      "step": 19140
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.998757541179657,
      "learning_rate": 1.7196359208012564e-05,
      "loss": 2.2278,
      "step": 19141
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0691370964050293,
      "learning_rate": 1.7196073311835436e-05,
      "loss": 2.4885,
      "step": 19142
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0875728130340576,
      "learning_rate": 1.719578740345901e-05,
      "loss": 2.2481,
      "step": 19143
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.231016755104065,
      "learning_rate": 1.7195501482883767e-05,
      "loss": 2.5399,
      "step": 19144
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0756117105484009,
      "learning_rate": 1.7195215550110198e-05,
      "loss": 2.5343,
      "step": 19145
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0308891534805298,
      "learning_rate": 1.7194929605138782e-05,
      "loss": 2.6013,
      "step": 19146
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9099777340888977,
      "learning_rate": 1.7194643647970007e-05,
      "loss": 2.3037,
      "step": 19147
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0626555681228638,
      "learning_rate": 1.7194357678604357e-05,
      "loss": 2.4599,
      "step": 19148
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.130656361579895,
      "learning_rate": 1.719407169704232e-05,
      "loss": 2.5567,
      "step": 19149
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1099454164505005,
      "learning_rate": 1.7193785703284375e-05,
      "loss": 2.3987,
      "step": 19150
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.117092490196228,
      "learning_rate": 1.719349969733101e-05,
      "loss": 2.2127,
      "step": 19151
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1345707178115845,
      "learning_rate": 1.719321367918271e-05,
      "loss": 2.3841,
      "step": 19152
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0539720058441162,
      "learning_rate": 1.7192927648839953e-05,
      "loss": 2.3264,
      "step": 19153
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0981262922286987,
      "learning_rate": 1.7192641606303237e-05,
      "loss": 2.3966,
      "step": 19154
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1681276559829712,
      "learning_rate": 1.7192355551573037e-05,
      "loss": 2.4261,
      "step": 19155
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1555978059768677,
      "learning_rate": 1.7192069484649843e-05,
      "loss": 2.4828,
      "step": 19156
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9617100358009338,
      "learning_rate": 1.7191783405534134e-05,
      "loss": 2.4199,
      "step": 19157
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9696418046951294,
      "learning_rate": 1.7191497314226402e-05,
      "loss": 2.4143,
      "step": 19158
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.994714081287384,
      "learning_rate": 1.719121121072713e-05,
      "loss": 2.533,
      "step": 19159
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.04056978225708,
      "learning_rate": 1.7190925095036795e-05,
      "loss": 2.4836,
      "step": 19160
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.150924563407898,
      "learning_rate": 1.71906389671559e-05,
      "loss": 2.5988,
      "step": 19161
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0739686489105225,
      "learning_rate": 1.7190352827084907e-05,
      "loss": 2.4027,
      "step": 19162
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9508327841758728,
      "learning_rate": 1.7190066674824322e-05,
      "loss": 2.2688,
      "step": 19163
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.02578604221344,
      "learning_rate": 1.7189780510374618e-05,
      "loss": 2.3696,
      "step": 19164
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0997940301895142,
      "learning_rate": 1.7189494333736283e-05,
      "loss": 2.5062,
      "step": 19165
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.031312346458435,
      "learning_rate": 1.7189208144909804e-05,
      "loss": 2.4021,
      "step": 19166
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.041845679283142,
      "learning_rate": 1.7188921943895662e-05,
      "loss": 2.5283,
      "step": 19167
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0379244089126587,
      "learning_rate": 1.7188635730694347e-05,
      "loss": 2.5724,
      "step": 19168
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0286496877670288,
      "learning_rate": 1.718834950530634e-05,
      "loss": 2.4551,
      "step": 19169
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.162881851196289,
      "learning_rate": 1.7188063267732133e-05,
      "loss": 2.464,
      "step": 19170
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0778465270996094,
      "learning_rate": 1.71877770179722e-05,
      "loss": 2.4392,
      "step": 19171
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9803428053855896,
      "learning_rate": 1.718749075602704e-05,
      "loss": 2.5709,
      "step": 19172
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9595296382904053,
      "learning_rate": 1.7187204481897123e-05,
      "loss": 2.6385,
      "step": 19173
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.13444983959198,
      "learning_rate": 1.718691819558295e-05,
      "loss": 2.5043,
      "step": 19174
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9721261262893677,
      "learning_rate": 1.7186631897084995e-05,
      "loss": 2.3898,
      "step": 19175
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0307697057724,
      "learning_rate": 1.7186345586403747e-05,
      "loss": 2.4774,
      "step": 19176
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0742567777633667,
      "learning_rate": 1.718605926353969e-05,
      "loss": 2.3552,
      "step": 19177
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9248135089874268,
      "learning_rate": 1.7185772928493313e-05,
      "loss": 2.3301,
      "step": 19178
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0494446754455566,
      "learning_rate": 1.71854865812651e-05,
      "loss": 2.2208,
      "step": 19179
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0890130996704102,
      "learning_rate": 1.7185200221855533e-05,
      "loss": 2.458,
      "step": 19180
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0857760906219482,
      "learning_rate": 1.71849138502651e-05,
      "loss": 2.4121,
      "step": 19181
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.071571946144104,
      "learning_rate": 1.7184627466494294e-05,
      "loss": 2.4165,
      "step": 19182
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9996612071990967,
      "learning_rate": 1.7184341070543583e-05,
      "loss": 2.521,
      "step": 19183
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0666898488998413,
      "learning_rate": 1.7184054662413468e-05,
      "loss": 2.4734,
      "step": 19184
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.2611753940582275,
      "learning_rate": 1.7183768242104428e-05,
      "loss": 2.325,
      "step": 19185
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.08482027053833,
      "learning_rate": 1.718348180961695e-05,
      "loss": 2.3744,
      "step": 19186
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0592597723007202,
      "learning_rate": 1.718319536495152e-05,
      "loss": 2.4091,
      "step": 19187
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.042653203010559,
      "learning_rate": 1.7182908908108623e-05,
      "loss": 2.5268,
      "step": 19188
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.210882544517517,
      "learning_rate": 1.7182622439088745e-05,
      "loss": 2.3096,
      "step": 19189
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.937564492225647,
      "learning_rate": 1.7182335957892368e-05,
      "loss": 2.3354,
      "step": 19190
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0862327814102173,
      "learning_rate": 1.7182049464519983e-05,
      "loss": 2.3742,
      "step": 19191
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4207301139831543,
      "learning_rate": 1.7181762958972076e-05,
      "loss": 2.5984,
      "step": 19192
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1508768796920776,
      "learning_rate": 1.7181476441249124e-05,
      "loss": 2.4186,
      "step": 19193
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0086034536361694,
      "learning_rate": 1.7181189911351622e-05,
      "loss": 2.4639,
      "step": 19194
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1230045557022095,
      "learning_rate": 1.7180903369280054e-05,
      "loss": 2.4629,
      "step": 19195
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.029590368270874,
      "learning_rate": 1.7180616815034905e-05,
      "loss": 2.4694,
      "step": 19196
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9684159159660339,
      "learning_rate": 1.718033024861666e-05,
      "loss": 2.5032,
      "step": 19197
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0543075799942017,
      "learning_rate": 1.7180043670025805e-05,
      "loss": 2.4294,
      "step": 19198
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.085553765296936,
      "learning_rate": 1.7179757079262823e-05,
      "loss": 2.4339,
      "step": 19199
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0208145380020142,
      "learning_rate": 1.7179470476328207e-05,
      "loss": 2.4532,
      "step": 19200
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.060508131980896,
      "learning_rate": 1.7179183861222433e-05,
      "loss": 2.5034,
      "step": 19201
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.6299504041671753,
      "learning_rate": 1.7178897233945998e-05,
      "loss": 2.2846,
      "step": 19202
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1078852415084839,
      "learning_rate": 1.717861059449938e-05,
      "loss": 2.5951,
      "step": 19203
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0338751077651978,
      "learning_rate": 1.7178323942883068e-05,
      "loss": 2.6234,
      "step": 19204
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.010818362236023,
      "learning_rate": 1.7178037279097548e-05,
      "loss": 2.4862,
      "step": 19205
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1440041065216064,
      "learning_rate": 1.71777506031433e-05,
      "loss": 2.4481,
      "step": 19206
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.078026533126831,
      "learning_rate": 1.717746391502082e-05,
      "loss": 2.3503,
      "step": 19207
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0484856367111206,
      "learning_rate": 1.7177177214730593e-05,
      "loss": 2.4782,
      "step": 19208
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9822837710380554,
      "learning_rate": 1.7176890502273096e-05,
      "loss": 2.4012,
      "step": 19209
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.2569835186004639,
      "learning_rate": 1.7176603777648822e-05,
      "loss": 2.355,
      "step": 19210
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.107077717781067,
      "learning_rate": 1.7176317040858254e-05,
      "loss": 2.6238,
      "step": 19211
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0620859861373901,
      "learning_rate": 1.717603029190188e-05,
      "loss": 2.3751,
      "step": 19212
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9718373417854309,
      "learning_rate": 1.7175743530780186e-05,
      "loss": 2.5528,
      "step": 19213
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0003348588943481,
      "learning_rate": 1.7175456757493658e-05,
      "loss": 2.3718,
      "step": 19214
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0206350088119507,
      "learning_rate": 1.7175169972042782e-05,
      "loss": 2.3092,
      "step": 19215
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1913090944290161,
      "learning_rate": 1.7174883174428045e-05,
      "loss": 2.4218,
      "step": 19216
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0652748346328735,
      "learning_rate": 1.7174596364649932e-05,
      "loss": 2.5817,
      "step": 19217
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0746607780456543,
      "learning_rate": 1.7174309542708928e-05,
      "loss": 2.7006,
      "step": 19218
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.2412763833999634,
      "learning_rate": 1.7174022708605523e-05,
      "loss": 2.4727,
      "step": 19219
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0967295169830322,
      "learning_rate": 1.71737358623402e-05,
      "loss": 2.5736,
      "step": 19220
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9759241342544556,
      "learning_rate": 1.7173449003913446e-05,
      "loss": 2.4073,
      "step": 19221
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9855167865753174,
      "learning_rate": 1.7173162133325747e-05,
      "loss": 2.2957,
      "step": 19222
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1539065837860107,
      "learning_rate": 1.7172875250577595e-05,
      "loss": 2.5372,
      "step": 19223
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0068230628967285,
      "learning_rate": 1.7172588355669466e-05,
      "loss": 2.3074,
      "step": 19224
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1529455184936523,
      "learning_rate": 1.717230144860185e-05,
      "loss": 2.3798,
      "step": 19225
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9946436285972595,
      "learning_rate": 1.7172014529375242e-05,
      "loss": 2.6042,
      "step": 19226
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1360267400741577,
      "learning_rate": 1.7171727597990117e-05,
      "loss": 2.3444,
      "step": 19227
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1054667234420776,
      "learning_rate": 1.7171440654446968e-05,
      "loss": 2.2893,
      "step": 19228
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0579113960266113,
      "learning_rate": 1.7171153698746275e-05,
      "loss": 2.3469,
      "step": 19229
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0334274768829346,
      "learning_rate": 1.717086673088853e-05,
      "loss": 2.448,
      "step": 19230
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1581038236618042,
      "learning_rate": 1.717057975087422e-05,
      "loss": 2.3716,
      "step": 19231
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9966490864753723,
      "learning_rate": 1.7170292758703832e-05,
      "loss": 2.4387,
      "step": 19232
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0632957220077515,
      "learning_rate": 1.7170005754377846e-05,
      "loss": 2.2579,
      "step": 19233
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0130468606948853,
      "learning_rate": 1.7169718737896755e-05,
      "loss": 2.5607,
      "step": 19234
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.043441891670227,
      "learning_rate": 1.7169431709261046e-05,
      "loss": 2.467,
      "step": 19235
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0100030899047852,
      "learning_rate": 1.7169144668471197e-05,
      "loss": 2.6417,
      "step": 19236
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9686528444290161,
      "learning_rate": 1.7168857615527706e-05,
      "loss": 2.5376,
      "step": 19237
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.057739496231079,
      "learning_rate": 1.7168570550431054e-05,
      "loss": 2.6052,
      "step": 19238
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0531195402145386,
      "learning_rate": 1.7168283473181725e-05,
      "loss": 2.5972,
      "step": 19239
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0729800462722778,
      "learning_rate": 1.716799638378021e-05,
      "loss": 2.6758,
      "step": 19240
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.2049870491027832,
      "learning_rate": 1.7167709282226997e-05,
      "loss": 2.4465,
      "step": 19241
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9853296279907227,
      "learning_rate": 1.7167422168522566e-05,
      "loss": 2.2481,
      "step": 19242
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1076542139053345,
      "learning_rate": 1.716713504266741e-05,
      "loss": 2.5049,
      "step": 19243
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.293910026550293,
      "learning_rate": 1.716684790466201e-05,
      "loss": 2.2421,
      "step": 19244
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1273467540740967,
      "learning_rate": 1.7166560754506863e-05,
      "loss": 2.7055,
      "step": 19245
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1145107746124268,
      "learning_rate": 1.7166273592202443e-05,
      "loss": 2.6186,
      "step": 19246
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9940698146820068,
      "learning_rate": 1.7165986417749246e-05,
      "loss": 2.4006,
      "step": 19247
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0457979440689087,
      "learning_rate": 1.7165699231147754e-05,
      "loss": 2.6614,
      "step": 19248
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0779314041137695,
      "learning_rate": 1.7165412032398457e-05,
      "loss": 2.3922,
      "step": 19249
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9962737560272217,
      "learning_rate": 1.716512482150184e-05,
      "loss": 2.4705,
      "step": 19250
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.035420298576355,
      "learning_rate": 1.716483759845839e-05,
      "loss": 2.3861,
      "step": 19251
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0401403903961182,
      "learning_rate": 1.7164550363268594e-05,
      "loss": 2.5573,
      "step": 19252
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.000849723815918,
      "learning_rate": 1.7164263115932942e-05,
      "loss": 2.4554,
      "step": 19253
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4283862113952637,
      "learning_rate": 1.716397585645192e-05,
      "loss": 2.2993,
      "step": 19254
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0290846824645996,
      "learning_rate": 1.7163688584826004e-05,
      "loss": 2.7085,
      "step": 19255
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1116856336593628,
      "learning_rate": 1.7163401301055698e-05,
      "loss": 2.377,
      "step": 19256
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.082988977432251,
      "learning_rate": 1.716311400514148e-05,
      "loss": 2.5129,
      "step": 19257
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9695484638214111,
      "learning_rate": 1.7162826697083835e-05,
      "loss": 2.4357,
      "step": 19258
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9568454027175903,
      "learning_rate": 1.7162539376883255e-05,
      "loss": 2.3198,
      "step": 19259
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.031934380531311,
      "learning_rate": 1.7162252044540227e-05,
      "loss": 2.2864,
      "step": 19260
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0165917873382568,
      "learning_rate": 1.7161964700055235e-05,
      "loss": 2.3947,
      "step": 19261
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0679324865341187,
      "learning_rate": 1.716167734342877e-05,
      "loss": 2.4957,
      "step": 19262
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0426005125045776,
      "learning_rate": 1.716138997466131e-05,
      "loss": 2.3824,
      "step": 19263
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.014295220375061,
      "learning_rate": 1.7161102593753358e-05,
      "loss": 2.4164,
      "step": 19264
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.3317447900772095,
      "learning_rate": 1.7160815200705386e-05,
      "loss": 2.5161,
      "step": 19265
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1367645263671875,
      "learning_rate": 1.716052779551789e-05,
      "loss": 2.4171,
      "step": 19266
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0306141376495361,
      "learning_rate": 1.7160240378191356e-05,
      "loss": 2.3896,
      "step": 19267
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0533182621002197,
      "learning_rate": 1.715995294872627e-05,
      "loss": 2.5533,
      "step": 19268
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9745203852653503,
      "learning_rate": 1.7159665507123117e-05,
      "loss": 2.5568,
      "step": 19269
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0118318796157837,
      "learning_rate": 1.7159378053382385e-05,
      "loss": 2.4187,
      "step": 19270
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.096299171447754,
      "learning_rate": 1.7159090587504565e-05,
      "loss": 2.4133,
      "step": 19271
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0233434438705444,
      "learning_rate": 1.7158803109490142e-05,
      "loss": 2.3672,
      "step": 19272
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1361790895462036,
      "learning_rate": 1.71585156193396e-05,
      "loss": 2.4694,
      "step": 19273
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0220263004302979,
      "learning_rate": 1.7158228117053436e-05,
      "loss": 2.3781,
      "step": 19274
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0443180799484253,
      "learning_rate": 1.7157940602632127e-05,
      "loss": 2.5811,
      "step": 19275
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.2709647417068481,
      "learning_rate": 1.7157653076076164e-05,
      "loss": 2.2543,
      "step": 19276
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.017795205116272,
      "learning_rate": 1.7157365537386037e-05,
      "loss": 2.4854,
      "step": 19277
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0782665014266968,
      "learning_rate": 1.715707798656223e-05,
      "loss": 2.3572,
      "step": 19278
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9859654307365417,
      "learning_rate": 1.7156790423605233e-05,
      "loss": 2.3472,
      "step": 19279
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1314501762390137,
      "learning_rate": 1.7156502848515532e-05,
      "loss": 2.5991,
      "step": 19280
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.336032748222351,
      "learning_rate": 1.7156215261293616e-05,
      "loss": 2.4288,
      "step": 19281
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0545207262039185,
      "learning_rate": 1.7155927661939972e-05,
      "loss": 2.53,
      "step": 19282
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9869299530982971,
      "learning_rate": 1.7155640050455088e-05,
      "loss": 2.2897,
      "step": 19283
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9797654151916504,
      "learning_rate": 1.7155352426839445e-05,
      "loss": 2.3324,
      "step": 19284
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0900167226791382,
      "learning_rate": 1.7155064791093542e-05,
      "loss": 2.3723,
      "step": 19285
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1281681060791016,
      "learning_rate": 1.7154777143217856e-05,
      "loss": 2.5558,
      "step": 19286
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.033445954322815,
      "learning_rate": 1.7154489483212886e-05,
      "loss": 2.4645,
      "step": 19287
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0411542654037476,
      "learning_rate": 1.715420181107911e-05,
      "loss": 2.5952,
      "step": 19288
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0517337322235107,
      "learning_rate": 1.7153914126817017e-05,
      "loss": 2.5676,
      "step": 19289
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1814780235290527,
      "learning_rate": 1.7153626430427096e-05,
      "loss": 2.4928,
      "step": 19290
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.895103394985199,
      "learning_rate": 1.715333872190984e-05,
      "loss": 2.5787,
      "step": 19291
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0384941101074219,
      "learning_rate": 1.7153051001265728e-05,
      "loss": 2.5879,
      "step": 19292
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1549724340438843,
      "learning_rate": 1.7152763268495253e-05,
      "loss": 2.3296,
      "step": 19293
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9565714597702026,
      "learning_rate": 1.7152475523598902e-05,
      "loss": 2.4523,
      "step": 19294
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0082836151123047,
      "learning_rate": 1.7152187766577163e-05,
      "loss": 2.6043,
      "step": 19295
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0817205905914307,
      "learning_rate": 1.7151899997430523e-05,
      "loss": 2.3797,
      "step": 19296
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0198497772216797,
      "learning_rate": 1.715161221615947e-05,
      "loss": 2.4879,
      "step": 19297
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0857816934585571,
      "learning_rate": 1.715132442276449e-05,
      "loss": 2.7066,
      "step": 19298
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0220448970794678,
      "learning_rate": 1.7151036617246076e-05,
      "loss": 2.4446,
      "step": 19299
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9800035357475281,
      "learning_rate": 1.715074879960471e-05,
      "loss": 2.4881,
      "step": 19300
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.05914306640625,
      "learning_rate": 1.7150460969840885e-05,
      "loss": 2.3818,
      "step": 19301
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.028050422668457,
      "learning_rate": 1.7150173127955084e-05,
      "loss": 2.26,
      "step": 19302
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0268138647079468,
      "learning_rate": 1.7149885273947798e-05,
      "loss": 2.276,
      "step": 19303
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1084296703338623,
      "learning_rate": 1.7149597407819518e-05,
      "loss": 2.5869,
      "step": 19304
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.026491641998291,
      "learning_rate": 1.7149309529570724e-05,
      "loss": 2.3002,
      "step": 19305
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0241154432296753,
      "learning_rate": 1.7149021639201913e-05,
      "loss": 2.3339,
      "step": 19306
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9534628391265869,
      "learning_rate": 1.7148733736713564e-05,
      "loss": 2.2145,
      "step": 19307
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.049015760421753,
      "learning_rate": 1.7148445822106173e-05,
      "loss": 2.5472,
      "step": 19308
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0269923210144043,
      "learning_rate": 1.7148157895380224e-05,
      "loss": 2.5136,
      "step": 19309
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9533423781394958,
      "learning_rate": 1.714786995653621e-05,
      "loss": 2.661,
      "step": 19310
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0900160074234009,
      "learning_rate": 1.7147582005574607e-05,
      "loss": 2.5774,
      "step": 19311
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0316534042358398,
      "learning_rate": 1.7147294042495915e-05,
      "loss": 2.5608,
      "step": 19312
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0946860313415527,
      "learning_rate": 1.714700606730062e-05,
      "loss": 2.3585,
      "step": 19313
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.016800045967102,
      "learning_rate": 1.714671807998921e-05,
      "loss": 2.1986,
      "step": 19314
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1293184757232666,
      "learning_rate": 1.7146430080562165e-05,
      "loss": 2.5437,
      "step": 19315
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0888139009475708,
      "learning_rate": 1.7146142069019985e-05,
      "loss": 2.274,
      "step": 19316
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.050944447517395,
      "learning_rate": 1.7145854045363153e-05,
      "loss": 2.4843,
      "step": 19317
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0512357950210571,
      "learning_rate": 1.7145566009592158e-05,
      "loss": 2.4017,
      "step": 19318
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0594727993011475,
      "learning_rate": 1.7145277961707483e-05,
      "loss": 2.3213,
      "step": 19319
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1678967475891113,
      "learning_rate": 1.714498990170963e-05,
      "loss": 2.3411,
      "step": 19320
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.411213994026184,
      "learning_rate": 1.7144701829599073e-05,
      "loss": 2.4937,
      "step": 19321
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0270406007766724,
      "learning_rate": 1.714441374537631e-05,
      "loss": 2.3997,
      "step": 19322
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.175814151763916,
      "learning_rate": 1.7144125649041817e-05,
      "loss": 2.5138,
      "step": 19323
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0827348232269287,
      "learning_rate": 1.7143837540596096e-05,
      "loss": 2.3219,
      "step": 19324
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.083796739578247,
      "learning_rate": 1.714354942003963e-05,
      "loss": 2.4304,
      "step": 19325
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0009886026382446,
      "learning_rate": 1.714326128737291e-05,
      "loss": 2.6218,
      "step": 19326
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1525601148605347,
      "learning_rate": 1.714297314259642e-05,
      "loss": 2.3031,
      "step": 19327
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.2172821760177612,
      "learning_rate": 1.7142684985710646e-05,
      "loss": 2.4613,
      "step": 19328
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9352683424949646,
      "learning_rate": 1.7142396816716086e-05,
      "loss": 2.3872,
      "step": 19329
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0573451519012451,
      "learning_rate": 1.7142108635613224e-05,
      "loss": 2.5529,
      "step": 19330
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0923562049865723,
      "learning_rate": 1.7141820442402548e-05,
      "loss": 2.5517,
      "step": 19331
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0418152809143066,
      "learning_rate": 1.7141532237084545e-05,
      "loss": 2.4365,
      "step": 19332
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0918183326721191,
      "learning_rate": 1.7141244019659704e-05,
      "loss": 2.4413,
      "step": 19333
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1661293506622314,
      "learning_rate": 1.7140955790128516e-05,
      "loss": 2.4807,
      "step": 19334
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0200207233428955,
      "learning_rate": 1.714066754849147e-05,
      "loss": 2.4912,
      "step": 19335
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9894397258758545,
      "learning_rate": 1.714037929474905e-05,
      "loss": 2.4644,
      "step": 19336
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9072953462600708,
      "learning_rate": 1.714009102890175e-05,
      "loss": 2.4279,
      "step": 19337
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0083398818969727,
      "learning_rate": 1.713980275095006e-05,
      "loss": 2.4509,
      "step": 19338
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9563644528388977,
      "learning_rate": 1.7139514460894458e-05,
      "loss": 2.516,
      "step": 19339
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9945822358131409,
      "learning_rate": 1.7139226158735446e-05,
      "loss": 2.4504,
      "step": 19340
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1289246082305908,
      "learning_rate": 1.7138937844473506e-05,
      "loss": 2.3918,
      "step": 19341
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9858494400978088,
      "learning_rate": 1.713864951810912e-05,
      "loss": 2.2586,
      "step": 19342
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0135465860366821,
      "learning_rate": 1.713836117964279e-05,
      "loss": 2.5056,
      "step": 19343
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0634199380874634,
      "learning_rate": 1.7138072829075003e-05,
      "loss": 2.5415,
      "step": 19344
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0413994789123535,
      "learning_rate": 1.7137784466406238e-05,
      "loss": 2.5414,
      "step": 19345
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0928298234939575,
      "learning_rate": 1.713749609163699e-05,
      "loss": 2.3682,
      "step": 19346
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0145251750946045,
      "learning_rate": 1.713720770476775e-05,
      "loss": 2.575,
      "step": 19347
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0080327987670898,
      "learning_rate": 1.7136919305799003e-05,
      "loss": 2.461,
      "step": 19348
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.064523458480835,
      "learning_rate": 1.713663089473124e-05,
      "loss": 2.4304,
      "step": 19349
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9901105165481567,
      "learning_rate": 1.713634247156495e-05,
      "loss": 2.4788,
      "step": 19350
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1586358547210693,
      "learning_rate": 1.713605403630062e-05,
      "loss": 2.5294,
      "step": 19351
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9887171983718872,
      "learning_rate": 1.7135765588938742e-05,
      "loss": 2.4862,
      "step": 19352
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1393795013427734,
      "learning_rate": 1.71354771294798e-05,
      "loss": 2.4195,
      "step": 19353
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.230517029762268,
      "learning_rate": 1.7135188657924292e-05,
      "loss": 2.2962,
      "step": 19354
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0200492143630981,
      "learning_rate": 1.7134900174272696e-05,
      "loss": 2.4769,
      "step": 19355
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9497652053833008,
      "learning_rate": 1.7134611678525507e-05,
      "loss": 2.3349,
      "step": 19356
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9939903616905212,
      "learning_rate": 1.7134323170683216e-05,
      "loss": 2.6549,
      "step": 19357
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.3260842561721802,
      "learning_rate": 1.7134034650746308e-05,
      "loss": 2.2548,
      "step": 19358
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0950884819030762,
      "learning_rate": 1.7133746118715273e-05,
      "loss": 2.2727,
      "step": 19359
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0623042583465576,
      "learning_rate": 1.7133457574590604e-05,
      "loss": 2.5075,
      "step": 19360
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1328274011611938,
      "learning_rate": 1.7133169018372784e-05,
      "loss": 2.4016,
      "step": 19361
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.240228295326233,
      "learning_rate": 1.7132880450062305e-05,
      "loss": 2.4821,
      "step": 19362
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1153368949890137,
      "learning_rate": 1.713259186965966e-05,
      "loss": 2.7143,
      "step": 19363
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0618517398834229,
      "learning_rate": 1.713230327716533e-05,
      "loss": 2.388,
      "step": 19364
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9555812478065491,
      "learning_rate": 1.713201467257981e-05,
      "loss": 2.6821,
      "step": 19365
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0439550876617432,
      "learning_rate": 1.713172605590359e-05,
      "loss": 2.3178,
      "step": 19366
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.014830231666565,
      "learning_rate": 1.7131437427137158e-05,
      "loss": 2.5664,
      "step": 19367
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0398426055908203,
      "learning_rate": 1.7131148786280997e-05,
      "loss": 2.4902,
      "step": 19368
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.107405662536621,
      "learning_rate": 1.7130860133335606e-05,
      "loss": 2.4585,
      "step": 19369
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.937962532043457,
      "learning_rate": 1.7130571468301473e-05,
      "loss": 2.5283,
      "step": 19370
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0952527523040771,
      "learning_rate": 1.713028279117908e-05,
      "loss": 2.5824,
      "step": 19371
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0801868438720703,
      "learning_rate": 1.7129994101968927e-05,
      "loss": 2.3912,
      "step": 19372
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1334030628204346,
      "learning_rate": 1.7129705400671494e-05,
      "loss": 2.6089,
      "step": 19373
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9601690173149109,
      "learning_rate": 1.712941668728727e-05,
      "loss": 2.3636,
      "step": 19374
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1704505681991577,
      "learning_rate": 1.7129127961816753e-05,
      "loss": 2.313,
      "step": 19375
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0163174867630005,
      "learning_rate": 1.7128839224260427e-05,
      "loss": 2.4843,
      "step": 19376
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0180493593215942,
      "learning_rate": 1.712855047461878e-05,
      "loss": 2.4449,
      "step": 19377
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0805563926696777,
      "learning_rate": 1.7128261712892307e-05,
      "loss": 2.5877,
      "step": 19378
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0298492908477783,
      "learning_rate": 1.7127972939081494e-05,
      "loss": 2.5743,
      "step": 19379
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0028810501098633,
      "learning_rate": 1.712768415318683e-05,
      "loss": 2.3966,
      "step": 19380
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.234479546546936,
      "learning_rate": 1.7127395355208807e-05,
      "loss": 2.354,
      "step": 19381
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.132946491241455,
      "learning_rate": 1.712710654514791e-05,
      "loss": 2.4575,
      "step": 19382
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0733234882354736,
      "learning_rate": 1.7126817723004637e-05,
      "loss": 2.4733,
      "step": 19383
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0521477460861206,
      "learning_rate": 1.7126528888779467e-05,
      "loss": 2.4105,
      "step": 19384
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9735271334648132,
      "learning_rate": 1.7126240042472897e-05,
      "loss": 2.3494,
      "step": 19385
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0325814485549927,
      "learning_rate": 1.7125951184085417e-05,
      "loss": 2.4371,
      "step": 19386
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0109856128692627,
      "learning_rate": 1.712566231361751e-05,
      "loss": 2.7065,
      "step": 19387
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0834790468215942,
      "learning_rate": 1.7125373431069673e-05,
      "loss": 2.4407,
      "step": 19388
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9923285841941833,
      "learning_rate": 1.712508453644239e-05,
      "loss": 2.2664,
      "step": 19389
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.8721644878387451,
      "learning_rate": 1.7124795629736157e-05,
      "loss": 2.1872,
      "step": 19390
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1278427839279175,
      "learning_rate": 1.7124506710951458e-05,
      "loss": 2.2799,
      "step": 19391
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9931561350822449,
      "learning_rate": 1.7124217780088787e-05,
      "loss": 2.5936,
      "step": 19392
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9480710625648499,
      "learning_rate": 1.7123928837148632e-05,
      "loss": 2.4441,
      "step": 19393
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.996461033821106,
      "learning_rate": 1.712363988213148e-05,
      "loss": 2.3022,
      "step": 19394
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0743461847305298,
      "learning_rate": 1.7123350915037824e-05,
      "loss": 2.5361,
      "step": 19395
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0541118383407593,
      "learning_rate": 1.7123061935868156e-05,
      "loss": 2.4321,
      "step": 19396
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9955079555511475,
      "learning_rate": 1.7122772944622962e-05,
      "loss": 2.1958,
      "step": 19397
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0998055934906006,
      "learning_rate": 1.712248394130273e-05,
      "loss": 2.394,
      "step": 19398
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.050005555152893,
      "learning_rate": 1.712219492590796e-05,
      "loss": 2.4274,
      "step": 19399
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.018892526626587,
      "learning_rate": 1.7121905898439136e-05,
      "loss": 2.4656,
      "step": 19400
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.224130630493164,
      "learning_rate": 1.712161685889674e-05,
      "loss": 2.571,
      "step": 19401
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.151471495628357,
      "learning_rate": 1.7121327807281273e-05,
      "loss": 2.4058,
      "step": 19402
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0457344055175781,
      "learning_rate": 1.712103874359322e-05,
      "loss": 2.5985,
      "step": 19403
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0344593524932861,
      "learning_rate": 1.7120749667833076e-05,
      "loss": 2.6966,
      "step": 19404
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1291612386703491,
      "learning_rate": 1.7120460580001325e-05,
      "loss": 2.4163,
      "step": 19405
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.066408395767212,
      "learning_rate": 1.7120171480098457e-05,
      "loss": 2.59,
      "step": 19406
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.223425269126892,
      "learning_rate": 1.7119882368124967e-05,
      "loss": 2.6984,
      "step": 19407
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9933330416679382,
      "learning_rate": 1.711959324408134e-05,
      "loss": 2.4596,
      "step": 19408
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0675938129425049,
      "learning_rate": 1.7119304107968073e-05,
      "loss": 2.6602,
      "step": 19409
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0939370393753052,
      "learning_rate": 1.7119014959785648e-05,
      "loss": 2.6319,
      "step": 19410
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0297431945800781,
      "learning_rate": 1.7118725799534563e-05,
      "loss": 2.6075,
      "step": 19411
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.000841498374939,
      "learning_rate": 1.71184366272153e-05,
      "loss": 2.539,
      "step": 19412
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1482559442520142,
      "learning_rate": 1.7118147442828357e-05,
      "loss": 2.422,
      "step": 19413
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9512112736701965,
      "learning_rate": 1.711785824637422e-05,
      "loss": 2.4499,
      "step": 19414
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.140769362449646,
      "learning_rate": 1.711756903785338e-05,
      "loss": 2.5007,
      "step": 19415
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9822385311126709,
      "learning_rate": 1.7117279817266328e-05,
      "loss": 2.6281,
      "step": 19416
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9659852385520935,
      "learning_rate": 1.7116990584613553e-05,
      "loss": 2.4432,
      "step": 19417
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.2245545387268066,
      "learning_rate": 1.7116701339895546e-05,
      "loss": 2.6902,
      "step": 19418
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0216948986053467,
      "learning_rate": 1.7116412083112798e-05,
      "loss": 2.2255,
      "step": 19419
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0229709148406982,
      "learning_rate": 1.7116122814265797e-05,
      "loss": 2.4316,
      "step": 19420
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0538232326507568,
      "learning_rate": 1.7115833533355034e-05,
      "loss": 2.3724,
      "step": 19421
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0068249702453613,
      "learning_rate": 1.7115544240381004e-05,
      "loss": 2.4973,
      "step": 19422
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0221365690231323,
      "learning_rate": 1.7115254935344192e-05,
      "loss": 2.4425,
      "step": 19423
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0499708652496338,
      "learning_rate": 1.711496561824509e-05,
      "loss": 2.3846,
      "step": 19424
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0466492176055908,
      "learning_rate": 1.711467628908419e-05,
      "loss": 2.699,
      "step": 19425
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0244263410568237,
      "learning_rate": 1.7114386947861982e-05,
      "loss": 2.7524,
      "step": 19426
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0109444856643677,
      "learning_rate": 1.7114097594578956e-05,
      "loss": 2.2427,
      "step": 19427
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0315723419189453,
      "learning_rate": 1.7113808229235604e-05,
      "loss": 2.3347,
      "step": 19428
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9289405345916748,
      "learning_rate": 1.711351885183241e-05,
      "loss": 2.4926,
      "step": 19429
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0290383100509644,
      "learning_rate": 1.711322946236987e-05,
      "loss": 2.4218,
      "step": 19430
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0154467821121216,
      "learning_rate": 1.7112940060848478e-05,
      "loss": 2.5735,
      "step": 19431
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9965695142745972,
      "learning_rate": 1.711265064726872e-05,
      "loss": 2.1607,
      "step": 19432
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0088485479354858,
      "learning_rate": 1.7112361221631086e-05,
      "loss": 2.4425,
      "step": 19433
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9902868270874023,
      "learning_rate": 1.7112071783936066e-05,
      "loss": 2.5971,
      "step": 19434
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.053063154220581,
      "learning_rate": 1.7111782334184156e-05,
      "loss": 2.3218,
      "step": 19435
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0380234718322754,
      "learning_rate": 1.7111492872375843e-05,
      "loss": 2.4835,
      "step": 19436
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0205154418945312,
      "learning_rate": 1.711120339851162e-05,
      "loss": 2.6749,
      "step": 19437
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9979801774024963,
      "learning_rate": 1.7110913912591967e-05,
      "loss": 2.4679,
      "step": 19438
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0788410902023315,
      "learning_rate": 1.7110624414617394e-05,
      "loss": 2.4344,
      "step": 19439
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1799311637878418,
      "learning_rate": 1.7110334904588376e-05,
      "loss": 2.5098,
      "step": 19440
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0365413427352905,
      "learning_rate": 1.7110045382505412e-05,
      "loss": 2.2811,
      "step": 19441
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.002720594406128,
      "learning_rate": 1.710975584836899e-05,
      "loss": 2.5267,
      "step": 19442
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0042881965637207,
      "learning_rate": 1.7109466302179597e-05,
      "loss": 2.5878,
      "step": 19443
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9475572109222412,
      "learning_rate": 1.710917674393773e-05,
      "loss": 2.4894,
      "step": 19444
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9435242414474487,
      "learning_rate": 1.7108887173643878e-05,
      "loss": 2.5239,
      "step": 19445
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.134060025215149,
      "learning_rate": 1.710859759129853e-05,
      "loss": 2.4128,
      "step": 19446
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9562133550643921,
      "learning_rate": 1.7108307996902182e-05,
      "loss": 2.4014,
      "step": 19447
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9884717464447021,
      "learning_rate": 1.710801839045532e-05,
      "loss": 2.6391,
      "step": 19448
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.153627872467041,
      "learning_rate": 1.7107728771958437e-05,
      "loss": 2.4495,
      "step": 19449
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.429404616355896,
      "learning_rate": 1.710743914141202e-05,
      "loss": 2.6022,
      "step": 19450
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9846726655960083,
      "learning_rate": 1.7107149498816566e-05,
      "loss": 2.3598,
      "step": 19451
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0122828483581543,
      "learning_rate": 1.710685984417256e-05,
      "loss": 2.2975,
      "step": 19452
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.6493592262268066,
      "learning_rate": 1.71065701774805e-05,
      "loss": 2.4978,
      "step": 19453
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.205524206161499,
      "learning_rate": 1.710628049874087e-05,
      "loss": 2.5919,
      "step": 19454
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.029909372329712,
      "learning_rate": 1.710599080795417e-05,
      "loss": 2.3745,
      "step": 19455
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.04775071144104,
      "learning_rate": 1.7105701105120882e-05,
      "loss": 2.3609,
      "step": 19456
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0763664245605469,
      "learning_rate": 1.71054113902415e-05,
      "loss": 2.4162,
      "step": 19457
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0586347579956055,
      "learning_rate": 1.7105121663316516e-05,
      "loss": 2.3319,
      "step": 19458
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0267785787582397,
      "learning_rate": 1.7104831924346422e-05,
      "loss": 2.5237,
      "step": 19459
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0221227407455444,
      "learning_rate": 1.710454217333171e-05,
      "loss": 2.4456,
      "step": 19460
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0543473958969116,
      "learning_rate": 1.7104252410272866e-05,
      "loss": 2.4522,
      "step": 19461
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0709502696990967,
      "learning_rate": 1.7103962635170387e-05,
      "loss": 2.6157,
      "step": 19462
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.45789635181427,
      "learning_rate": 1.7103672848024762e-05,
      "loss": 2.3652,
      "step": 19463
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.961950957775116,
      "learning_rate": 1.710338304883648e-05,
      "loss": 2.5213,
      "step": 19464
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1051912307739258,
      "learning_rate": 1.7103093237606037e-05,
      "loss": 2.2636,
      "step": 19465
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0308172702789307,
      "learning_rate": 1.7102803414333918e-05,
      "loss": 2.3089,
      "step": 19466
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9476022720336914,
      "learning_rate": 1.7102513579020623e-05,
      "loss": 2.2929,
      "step": 19467
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.062069296836853,
      "learning_rate": 1.7102223731666636e-05,
      "loss": 2.4555,
      "step": 19468
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0112470388412476,
      "learning_rate": 1.7101933872272452e-05,
      "loss": 2.46,
      "step": 19469
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.103376865386963,
      "learning_rate": 1.710164400083856e-05,
      "loss": 2.2818,
      "step": 19470
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0537457466125488,
      "learning_rate": 1.710135411736545e-05,
      "loss": 2.5017,
      "step": 19471
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1174544095993042,
      "learning_rate": 1.710106422185362e-05,
      "loss": 2.4314,
      "step": 19472
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9956257939338684,
      "learning_rate": 1.710077431430356e-05,
      "loss": 2.6595,
      "step": 19473
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0260260105133057,
      "learning_rate": 1.7100484394715754e-05,
      "loss": 2.5231,
      "step": 19474
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0011123418807983,
      "learning_rate": 1.71001944630907e-05,
      "loss": 2.3767,
      "step": 19475
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1367042064666748,
      "learning_rate": 1.709990451942889e-05,
      "loss": 2.662,
      "step": 19476
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9722318053245544,
      "learning_rate": 1.709961456373081e-05,
      "loss": 2.7989,
      "step": 19477
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9498370289802551,
      "learning_rate": 1.7099324595996956e-05,
      "loss": 2.2171,
      "step": 19478
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0711687803268433,
      "learning_rate": 1.7099034616227823e-05,
      "loss": 2.3116,
      "step": 19479
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0256872177124023,
      "learning_rate": 1.709874462442389e-05,
      "loss": 2.4602,
      "step": 19480
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9285393357276917,
      "learning_rate": 1.709845462058566e-05,
      "loss": 2.3842,
      "step": 19481
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0703675746917725,
      "learning_rate": 1.7098164604713626e-05,
      "loss": 2.4824,
      "step": 19482
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0268441438674927,
      "learning_rate": 1.709787457680827e-05,
      "loss": 2.4263,
      "step": 19483
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9884957671165466,
      "learning_rate": 1.7097584536870092e-05,
      "loss": 2.5385,
      "step": 19484
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0384750366210938,
      "learning_rate": 1.709729448489958e-05,
      "loss": 2.3566,
      "step": 19485
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0511095523834229,
      "learning_rate": 1.7097004420897224e-05,
      "loss": 2.4158,
      "step": 19486
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9800772070884705,
      "learning_rate": 1.7096714344863522e-05,
      "loss": 2.4786,
      "step": 19487
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0207977294921875,
      "learning_rate": 1.709642425679896e-05,
      "loss": 2.4465,
      "step": 19488
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0847691297531128,
      "learning_rate": 1.7096134156704033e-05,
      "loss": 2.5319,
      "step": 19489
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0152041912078857,
      "learning_rate": 1.7095844044579226e-05,
      "loss": 2.2825,
      "step": 19490
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9892999529838562,
      "learning_rate": 1.709555392042504e-05,
      "loss": 2.338,
      "step": 19491
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.2210439443588257,
      "learning_rate": 1.7095263784241966e-05,
      "loss": 2.3722,
      "step": 19492
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0522584915161133,
      "learning_rate": 1.7094973636030485e-05,
      "loss": 2.4892,
      "step": 19493
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0270805358886719,
      "learning_rate": 1.7094683475791103e-05,
      "loss": 2.2642,
      "step": 19494
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.081471562385559,
      "learning_rate": 1.70943933035243e-05,
      "loss": 2.6149,
      "step": 19495
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.962712287902832,
      "learning_rate": 1.7094103119230578e-05,
      "loss": 2.2781,
      "step": 19496
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1378021240234375,
      "learning_rate": 1.7093812922910425e-05,
      "loss": 2.4089,
      "step": 19497
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1953226327896118,
      "learning_rate": 1.709352271456433e-05,
      "loss": 2.335,
      "step": 19498
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0662298202514648,
      "learning_rate": 1.709323249419279e-05,
      "loss": 2.2532,
      "step": 19499
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9334585070610046,
      "learning_rate": 1.7092942261796293e-05,
      "loss": 2.5198,
      "step": 19500
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9745309948921204,
      "learning_rate": 1.7092652017375334e-05,
      "loss": 2.197,
      "step": 19501
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0216608047485352,
      "learning_rate": 1.70923617609304e-05,
      "loss": 2.3995,
      "step": 19502
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1383509635925293,
      "learning_rate": 1.7092071492461987e-05,
      "loss": 2.4728,
      "step": 19503
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9777595400810242,
      "learning_rate": 1.7091781211970587e-05,
      "loss": 2.6417,
      "step": 19504
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9714184403419495,
      "learning_rate": 1.7091490919456693e-05,
      "loss": 2.4968,
      "step": 19505
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9276347160339355,
      "learning_rate": 1.7091200614920797e-05,
      "loss": 2.4059,
      "step": 19506
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0296474695205688,
      "learning_rate": 1.7090910298363388e-05,
      "loss": 2.644,
      "step": 19507
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9499043226242065,
      "learning_rate": 1.7090619969784963e-05,
      "loss": 2.3494,
      "step": 19508
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0733819007873535,
      "learning_rate": 1.7090329629186008e-05,
      "loss": 2.5642,
      "step": 19509
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.02082359790802,
      "learning_rate": 1.709003927656702e-05,
      "loss": 2.3953,
      "step": 19510
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9507620930671692,
      "learning_rate": 1.708974891192849e-05,
      "loss": 2.4791,
      "step": 19511
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9983687400817871,
      "learning_rate": 1.708945853527091e-05,
      "loss": 2.5609,
      "step": 19512
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9826143383979797,
      "learning_rate": 1.7089168146594774e-05,
      "loss": 2.4113,
      "step": 19513
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.017203688621521,
      "learning_rate": 1.7088877745900566e-05,
      "loss": 2.5524,
      "step": 19514
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0276615619659424,
      "learning_rate": 1.7088587333188794e-05,
      "loss": 2.8352,
      "step": 19515
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.013162612915039,
      "learning_rate": 1.7088296908459935e-05,
      "loss": 2.5001,
      "step": 19516
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0815542936325073,
      "learning_rate": 1.7088006471714488e-05,
      "loss": 2.5733,
      "step": 19517
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0699506998062134,
      "learning_rate": 1.7087716022952946e-05,
      "loss": 2.2703,
      "step": 19518
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0614076852798462,
      "learning_rate": 1.70874255621758e-05,
      "loss": 2.5512,
      "step": 19519
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1722259521484375,
      "learning_rate": 1.7087135089383545e-05,
      "loss": 2.2986,
      "step": 19520
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.03995943069458,
      "learning_rate": 1.7086844604576672e-05,
      "loss": 2.6386,
      "step": 19521
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0960391759872437,
      "learning_rate": 1.708655410775567e-05,
      "loss": 2.4796,
      "step": 19522
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0663783550262451,
      "learning_rate": 1.708626359892103e-05,
      "loss": 2.4549,
      "step": 19523
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.034589171409607,
      "learning_rate": 1.7085973078073254e-05,
      "loss": 2.499,
      "step": 19524
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.979672372341156,
      "learning_rate": 1.708568254521283e-05,
      "loss": 2.6178,
      "step": 19525
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0758347511291504,
      "learning_rate": 1.7085392000340245e-05,
      "loss": 2.4839,
      "step": 19526
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.3117228746414185,
      "learning_rate": 1.7085101443456e-05,
      "loss": 2.3944,
      "step": 19527
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0621554851531982,
      "learning_rate": 1.7084810874560583e-05,
      "loss": 2.5422,
      "step": 19528
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1607935428619385,
      "learning_rate": 1.7084520293654488e-05,
      "loss": 2.5769,
      "step": 19529
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0857582092285156,
      "learning_rate": 1.7084229700738205e-05,
      "loss": 2.2589,
      "step": 19530
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0203192234039307,
      "learning_rate": 1.708393909581223e-05,
      "loss": 2.3492,
      "step": 19531
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0075052976608276,
      "learning_rate": 1.7083648478877054e-05,
      "loss": 2.4351,
      "step": 19532
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0378016233444214,
      "learning_rate": 1.708335784993317e-05,
      "loss": 2.3333,
      "step": 19533
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0798919200897217,
      "learning_rate": 1.708306720898107e-05,
      "loss": 2.3438,
      "step": 19534
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1924707889556885,
      "learning_rate": 1.7082776556021246e-05,
      "loss": 2.6009,
      "step": 19535
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9526392221450806,
      "learning_rate": 1.7082485891054195e-05,
      "loss": 2.497,
      "step": 19536
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0918364524841309,
      "learning_rate": 1.7082195214080405e-05,
      "loss": 2.4608,
      "step": 19537
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0255929231643677,
      "learning_rate": 1.7081904525100373e-05,
      "loss": 2.4352,
      "step": 19538
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1041219234466553,
      "learning_rate": 1.7081613824114585e-05,
      "loss": 2.4075,
      "step": 19539
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9505926966667175,
      "learning_rate": 1.708132311112354e-05,
      "loss": 2.401,
      "step": 19540
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0545305013656616,
      "learning_rate": 1.708103238612773e-05,
      "loss": 2.4437,
      "step": 19541
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0184223651885986,
      "learning_rate": 1.708074164912765e-05,
      "loss": 2.3987,
      "step": 19542
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.087177038192749,
      "learning_rate": 1.7080450900123783e-05,
      "loss": 2.6657,
      "step": 19543
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0537155866622925,
      "learning_rate": 1.7080160139116634e-05,
      "loss": 2.3942,
      "step": 19544
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0118812322616577,
      "learning_rate": 1.707986936610669e-05,
      "loss": 2.6223,
      "step": 19545
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.077588438987732,
      "learning_rate": 1.7079578581094443e-05,
      "loss": 2.2685,
      "step": 19546
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9608778953552246,
      "learning_rate": 1.7079287784080387e-05,
      "loss": 2.3961,
      "step": 19547
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1970888376235962,
      "learning_rate": 1.707899697506502e-05,
      "loss": 2.4053,
      "step": 19548
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0837743282318115,
      "learning_rate": 1.7078706154048824e-05,
      "loss": 2.2366,
      "step": 19549
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.056698203086853,
      "learning_rate": 1.70784153210323e-05,
      "loss": 2.5391,
      "step": 19550
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9684407711029053,
      "learning_rate": 1.7078124476015942e-05,
      "loss": 2.6167,
      "step": 19551
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0696829557418823,
      "learning_rate": 1.707783361900024e-05,
      "loss": 2.3519,
      "step": 19552
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0142136812210083,
      "learning_rate": 1.707754274998569e-05,
      "loss": 2.3789,
      "step": 19553
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.170663833618164,
      "learning_rate": 1.7077251868972777e-05,
      "loss": 2.5361,
      "step": 19554
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0872275829315186,
      "learning_rate": 1.7076960975962004e-05,
      "loss": 2.4195,
      "step": 19555
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.102466344833374,
      "learning_rate": 1.707667007095386e-05,
      "loss": 2.4109,
      "step": 19556
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9678661227226257,
      "learning_rate": 1.7076379153948835e-05,
      "loss": 2.1543,
      "step": 19557
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1391690969467163,
      "learning_rate": 1.7076088224947432e-05,
      "loss": 2.6029,
      "step": 19558
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.025292158126831,
      "learning_rate": 1.7075797283950132e-05,
      "loss": 2.3383,
      "step": 19559
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.5318264961242676,
      "learning_rate": 1.7075506330957435e-05,
      "loss": 2.4388,
      "step": 19560
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1501405239105225,
      "learning_rate": 1.7075215365969834e-05,
      "loss": 2.4806,
      "step": 19561
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.2187600135803223,
      "learning_rate": 1.707492438898782e-05,
      "loss": 2.3257,
      "step": 19562
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0563547611236572,
      "learning_rate": 1.707463340001189e-05,
      "loss": 2.2554,
      "step": 19563
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.027426838874817,
      "learning_rate": 1.7074342399042532e-05,
      "loss": 2.3335,
      "step": 19564
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0025819540023804,
      "learning_rate": 1.7074051386080242e-05,
      "loss": 2.5004,
      "step": 19565
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9866046905517578,
      "learning_rate": 1.7073760361125517e-05,
      "loss": 2.572,
      "step": 19566
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0491760969161987,
      "learning_rate": 1.7073469324178845e-05,
      "loss": 2.2591,
      "step": 19567
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.2131356000900269,
      "learning_rate": 1.707317827524072e-05,
      "loss": 2.3452,
      "step": 19568
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.218794345855713,
      "learning_rate": 1.707288721431164e-05,
      "loss": 2.3518,
      "step": 19569
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.010567545890808,
      "learning_rate": 1.707259614139209e-05,
      "loss": 2.4898,
      "step": 19570
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.084911584854126,
      "learning_rate": 1.7072305056482576e-05,
      "loss": 2.3451,
      "step": 19571
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0906935930252075,
      "learning_rate": 1.707201395958358e-05,
      "loss": 2.5531,
      "step": 19572
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9474388957023621,
      "learning_rate": 1.70717228506956e-05,
      "loss": 2.383,
      "step": 19573
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0467884540557861,
      "learning_rate": 1.7071431729819127e-05,
      "loss": 2.5132,
      "step": 19574
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9680212140083313,
      "learning_rate": 1.707114059695466e-05,
      "loss": 2.5073,
      "step": 19575
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0635948181152344,
      "learning_rate": 1.707084945210269e-05,
      "loss": 2.6138,
      "step": 19576
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9532974362373352,
      "learning_rate": 1.707055829526371e-05,
      "loss": 2.2451,
      "step": 19577
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0179113149642944,
      "learning_rate": 1.707026712643821e-05,
      "loss": 2.5837,
      "step": 19578
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9944773316383362,
      "learning_rate": 1.7069975945626684e-05,
      "loss": 2.5067,
      "step": 19579
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0476770401000977,
      "learning_rate": 1.7069684752829632e-05,
      "loss": 2.5755,
      "step": 19580
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0659115314483643,
      "learning_rate": 1.7069393548047546e-05,
      "loss": 2.5178,
      "step": 19581
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0323373079299927,
      "learning_rate": 1.706910233128092e-05,
      "loss": 2.296,
      "step": 19582
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0562469959259033,
      "learning_rate": 1.706881110253024e-05,
      "loss": 2.3202,
      "step": 19583
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0552794933319092,
      "learning_rate": 1.7068519861796006e-05,
      "loss": 2.5054,
      "step": 19584
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1171038150787354,
      "learning_rate": 1.7068228609078712e-05,
      "loss": 2.415,
      "step": 19585
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0354137420654297,
      "learning_rate": 1.7067937344378854e-05,
      "loss": 2.4136,
      "step": 19586
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9377217292785645,
      "learning_rate": 1.7067646067696918e-05,
      "loss": 2.4975,
      "step": 19587
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1094286441802979,
      "learning_rate": 1.7067354779033406e-05,
      "loss": 2.269,
      "step": 19588
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.110865831375122,
      "learning_rate": 1.7067063478388806e-05,
      "loss": 2.3203,
      "step": 19589
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0206255912780762,
      "learning_rate": 1.7066772165763613e-05,
      "loss": 2.4826,
      "step": 19590
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.051867127418518,
      "learning_rate": 1.7066480841158325e-05,
      "loss": 2.3848,
      "step": 19591
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0624921321868896,
      "learning_rate": 1.7066189504573433e-05,
      "loss": 2.7032,
      "step": 19592
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9731799364089966,
      "learning_rate": 1.7065898156009425e-05,
      "loss": 2.5046,
      "step": 19593
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9621379971504211,
      "learning_rate": 1.7065606795466806e-05,
      "loss": 2.4063,
      "step": 19594
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.364750623703003,
      "learning_rate": 1.7065315422946063e-05,
      "loss": 2.647,
      "step": 19595
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0597178936004639,
      "learning_rate": 1.7065024038447692e-05,
      "loss": 2.51,
      "step": 19596
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0734530687332153,
      "learning_rate": 1.7064732641972187e-05,
      "loss": 2.42,
      "step": 19597
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.5194896459579468,
      "learning_rate": 1.706444123352004e-05,
      "loss": 2.3943,
      "step": 19598
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0119121074676514,
      "learning_rate": 1.7064149813091746e-05,
      "loss": 2.691,
      "step": 19599
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1706827878952026,
      "learning_rate": 1.70638583806878e-05,
      "loss": 2.3925,
      "step": 19600
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.033975601196289,
      "learning_rate": 1.7063566936308696e-05,
      "loss": 2.4949,
      "step": 19601
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0704278945922852,
      "learning_rate": 1.7063275479954927e-05,
      "loss": 2.27,
      "step": 19602
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0810128450393677,
      "learning_rate": 1.706298401162699e-05,
      "loss": 2.3875,
      "step": 19603
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1420321464538574,
      "learning_rate": 1.7062692531325374e-05,
      "loss": 2.6666,
      "step": 19604
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.189291000366211,
      "learning_rate": 1.7062401039050574e-05,
      "loss": 2.5054,
      "step": 19605
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1109758615493774,
      "learning_rate": 1.706210953480309e-05,
      "loss": 2.3358,
      "step": 19606
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.042988896369934,
      "learning_rate": 1.706181801858341e-05,
      "loss": 2.3516,
      "step": 19607
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0355594158172607,
      "learning_rate": 1.7061526490392032e-05,
      "loss": 2.5142,
      "step": 19608
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.162522315979004,
      "learning_rate": 1.706123495022945e-05,
      "loss": 2.5674,
      "step": 19609
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0937235355377197,
      "learning_rate": 1.7060943398096153e-05,
      "loss": 2.6135,
      "step": 19610
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0970754623413086,
      "learning_rate": 1.706065183399264e-05,
      "loss": 2.5073,
      "step": 19611
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9681509137153625,
      "learning_rate": 1.706036025791941e-05,
      "loss": 2.2423,
      "step": 19612
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0545454025268555,
      "learning_rate": 1.7060068669876947e-05,
      "loss": 2.3259,
      "step": 19613
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0094623565673828,
      "learning_rate": 1.705977706986575e-05,
      "loss": 2.588,
      "step": 19614
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1136682033538818,
      "learning_rate": 1.7059485457886317e-05,
      "loss": 2.6642,
      "step": 19615
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0294153690338135,
      "learning_rate": 1.7059193833939134e-05,
      "loss": 2.5573,
      "step": 19616
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9677990078926086,
      "learning_rate": 1.70589021980247e-05,
      "loss": 2.3062,
      "step": 19617
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1315113306045532,
      "learning_rate": 1.7058610550143515e-05,
      "loss": 2.4041,
      "step": 19618
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9817639589309692,
      "learning_rate": 1.7058318890296065e-05,
      "loss": 2.2653,
      "step": 19619
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.019295334815979,
      "learning_rate": 1.7058027218482847e-05,
      "loss": 2.3559,
      "step": 19620
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9059426784515381,
      "learning_rate": 1.7057735534704358e-05,
      "loss": 2.392,
      "step": 19621
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1533926725387573,
      "learning_rate": 1.705744383896109e-05,
      "loss": 2.3865,
      "step": 19622
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0757393836975098,
      "learning_rate": 1.7057152131253537e-05,
      "loss": 2.6653,
      "step": 19623
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9608213305473328,
      "learning_rate": 1.7056860411582193e-05,
      "loss": 2.5407,
      "step": 19624
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1784478425979614,
      "learning_rate": 1.7056568679947554e-05,
      "loss": 2.5913,
      "step": 19625
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.065641164779663,
      "learning_rate": 1.7056276936350114e-05,
      "loss": 2.6212,
      "step": 19626
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0360536575317383,
      "learning_rate": 1.705598518079037e-05,
      "loss": 2.596,
      "step": 19627
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9661299586296082,
      "learning_rate": 1.7055693413268816e-05,
      "loss": 2.4923,
      "step": 19628
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.116987943649292,
      "learning_rate": 1.7055401633785944e-05,
      "loss": 2.5533,
      "step": 19629
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0020085573196411,
      "learning_rate": 1.705510984234225e-05,
      "loss": 2.5136,
      "step": 19630
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9708606600761414,
      "learning_rate": 1.705481803893823e-05,
      "loss": 2.3264,
      "step": 19631
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9485307335853577,
      "learning_rate": 1.7054526223574375e-05,
      "loss": 2.5838,
      "step": 19632
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0391063690185547,
      "learning_rate": 1.7054234396251184e-05,
      "loss": 2.2511,
      "step": 19633
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.3250927925109863,
      "learning_rate": 1.7053942556969148e-05,
      "loss": 2.5261,
      "step": 19634
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0694760084152222,
      "learning_rate": 1.7053650705728765e-05,
      "loss": 2.5671,
      "step": 19635
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0048097372055054,
      "learning_rate": 1.7053358842530527e-05,
      "loss": 2.4543,
      "step": 19636
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9771038889884949,
      "learning_rate": 1.705306696737493e-05,
      "loss": 2.4784,
      "step": 19637
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.043581247329712,
      "learning_rate": 1.7052775080262472e-05,
      "loss": 2.3907,
      "step": 19638
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1513348817825317,
      "learning_rate": 1.705248318119364e-05,
      "loss": 2.7192,
      "step": 19639
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.015255093574524,
      "learning_rate": 1.7052191270168935e-05,
      "loss": 2.4535,
      "step": 19640
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.2166221141815186,
      "learning_rate": 1.7051899347188853e-05,
      "loss": 2.3919,
      "step": 19641
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1037403345108032,
      "learning_rate": 1.7051607412253887e-05,
      "loss": 2.4292,
      "step": 19642
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.020795464515686,
      "learning_rate": 1.7051315465364528e-05,
      "loss": 2.5431,
      "step": 19643
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4147204160690308,
      "learning_rate": 1.7051023506521277e-05,
      "loss": 2.3738,
      "step": 19644
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1300603151321411,
      "learning_rate": 1.705073153572462e-05,
      "loss": 2.2913,
      "step": 19645
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1817584037780762,
      "learning_rate": 1.7050439552975063e-05,
      "loss": 2.6728,
      "step": 19646
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9840680956840515,
      "learning_rate": 1.7050147558273097e-05,
      "loss": 2.2591,
      "step": 19647
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1198300123214722,
      "learning_rate": 1.7049855551619214e-05,
      "loss": 2.491,
      "step": 19648
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0986435413360596,
      "learning_rate": 1.7049563533013912e-05,
      "loss": 2.5109,
      "step": 19649
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9254279732704163,
      "learning_rate": 1.7049271502457685e-05,
      "loss": 2.3874,
      "step": 19650
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1192783117294312,
      "learning_rate": 1.7048979459951028e-05,
      "loss": 2.5652,
      "step": 19651
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0050126314163208,
      "learning_rate": 1.7048687405494438e-05,
      "loss": 2.5061,
      "step": 19652
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.2701059579849243,
      "learning_rate": 1.7048395339088408e-05,
      "loss": 2.5726,
      "step": 19653
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.070789098739624,
      "learning_rate": 1.704810326073343e-05,
      "loss": 2.4609,
      "step": 19654
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9982205033302307,
      "learning_rate": 1.7047811170430005e-05,
      "loss": 2.5809,
      "step": 19655
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1343278884887695,
      "learning_rate": 1.7047519068178625e-05,
      "loss": 2.4667,
      "step": 19656
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.093969702720642,
      "learning_rate": 1.704722695397979e-05,
      "loss": 2.4198,
      "step": 19657
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9039372205734253,
      "learning_rate": 1.7046934827833987e-05,
      "loss": 2.4436,
      "step": 19658
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1028988361358643,
      "learning_rate": 1.7046642689741716e-05,
      "loss": 2.3927,
      "step": 19659
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9616323709487915,
      "learning_rate": 1.7046350539703475e-05,
      "loss": 2.2971,
      "step": 19660
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9294474720954895,
      "learning_rate": 1.7046058377719755e-05,
      "loss": 2.2781,
      "step": 19661
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1570161581039429,
      "learning_rate": 1.704576620379105e-05,
      "loss": 2.3353,
      "step": 19662
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0511455535888672,
      "learning_rate": 1.704547401791786e-05,
      "loss": 2.4183,
      "step": 19663
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9762261509895325,
      "learning_rate": 1.704518182010068e-05,
      "loss": 2.5385,
      "step": 19664
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9919660091400146,
      "learning_rate": 1.7044889610339995e-05,
      "loss": 2.5622,
      "step": 19665
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9385251998901367,
      "learning_rate": 1.7044597388636315e-05,
      "loss": 2.3155,
      "step": 19666
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.153551459312439,
      "learning_rate": 1.704430515499013e-05,
      "loss": 2.395,
      "step": 19667
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.056396722793579,
      "learning_rate": 1.7044012909401932e-05,
      "loss": 2.4879,
      "step": 19668
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9536473751068115,
      "learning_rate": 1.704372065187222e-05,
      "loss": 2.4993,
      "step": 19669
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0711305141448975,
      "learning_rate": 1.704342838240149e-05,
      "loss": 2.5416,
      "step": 19670
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0180487632751465,
      "learning_rate": 1.7043136100990235e-05,
      "loss": 2.6159,
      "step": 19671
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9837331771850586,
      "learning_rate": 1.7042843807638946e-05,
      "loss": 2.4527,
      "step": 19672
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0243052244186401,
      "learning_rate": 1.704255150234813e-05,
      "loss": 2.3634,
      "step": 19673
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9856877326965332,
      "learning_rate": 1.7042259185118274e-05,
      "loss": 2.2273,
      "step": 19674
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0093945264816284,
      "learning_rate": 1.704196685594988e-05,
      "loss": 2.3561,
      "step": 19675
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0475045442581177,
      "learning_rate": 1.7041674514843436e-05,
      "loss": 2.3634,
      "step": 19676
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.2013955116271973,
      "learning_rate": 1.704138216179944e-05,
      "loss": 2.5468,
      "step": 19677
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.100009560585022,
      "learning_rate": 1.7041089796818393e-05,
      "loss": 2.4675,
      "step": 19678
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0010077953338623,
      "learning_rate": 1.7040797419900784e-05,
      "loss": 2.297,
      "step": 19679
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0991623401641846,
      "learning_rate": 1.704050503104711e-05,
      "loss": 2.5829,
      "step": 19680
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.038140892982483,
      "learning_rate": 1.704021263025787e-05,
      "loss": 2.5933,
      "step": 19681
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0823322534561157,
      "learning_rate": 1.7039920217533555e-05,
      "loss": 2.3464,
      "step": 19682
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9868897199630737,
      "learning_rate": 1.7039627792874666e-05,
      "loss": 2.4614,
      "step": 19683
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1457386016845703,
      "learning_rate": 1.7039335356281696e-05,
      "loss": 2.4006,
      "step": 19684
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1968828439712524,
      "learning_rate": 1.703904290775514e-05,
      "loss": 2.4192,
      "step": 19685
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1788872480392456,
      "learning_rate": 1.703875044729549e-05,
      "loss": 2.5987,
      "step": 19686
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1886017322540283,
      "learning_rate": 1.7038457974903252e-05,
      "loss": 2.3937,
      "step": 19687
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1536118984222412,
      "learning_rate": 1.7038165490578915e-05,
      "loss": 2.5196,
      "step": 19688
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1793140172958374,
      "learning_rate": 1.7037872994322976e-05,
      "loss": 2.5365,
      "step": 19689
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9429978132247925,
      "learning_rate": 1.703758048613593e-05,
      "loss": 2.4616,
      "step": 19690
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0056829452514648,
      "learning_rate": 1.7037287966018272e-05,
      "loss": 2.3221,
      "step": 19691
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0062159299850464,
      "learning_rate": 1.7036995433970506e-05,
      "loss": 2.4556,
      "step": 19692
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1848713159561157,
      "learning_rate": 1.7036702889993114e-05,
      "loss": 2.5185,
      "step": 19693
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0999759435653687,
      "learning_rate": 1.7036410334086602e-05,
      "loss": 2.5308,
      "step": 19694
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1924396753311157,
      "learning_rate": 1.7036117766251467e-05,
      "loss": 2.4192,
      "step": 19695
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.8829460144042969,
      "learning_rate": 1.7035825186488193e-05,
      "loss": 2.4745,
      "step": 19696
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1693658828735352,
      "learning_rate": 1.7035532594797294e-05,
      "loss": 2.3761,
      "step": 19697
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0784026384353638,
      "learning_rate": 1.703523999117925e-05,
      "loss": 2.4838,
      "step": 19698
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.038295030593872,
      "learning_rate": 1.703494737563457e-05,
      "loss": 2.5473,
      "step": 19699
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0006518363952637,
      "learning_rate": 1.7034654748163733e-05,
      "loss": 2.2335,
      "step": 19700
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0330665111541748,
      "learning_rate": 1.7034362108767254e-05,
      "loss": 2.5102,
      "step": 19701
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9750328063964844,
      "learning_rate": 1.703406945744562e-05,
      "loss": 2.3217,
      "step": 19702
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.2126320600509644,
      "learning_rate": 1.7033776794199325e-05,
      "loss": 2.3494,
      "step": 19703
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9966474771499634,
      "learning_rate": 1.703348411902887e-05,
      "loss": 2.3234,
      "step": 19704
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.11421799659729,
      "learning_rate": 1.7033191431934745e-05,
      "loss": 2.5793,
      "step": 19705
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0401525497436523,
      "learning_rate": 1.7032898732917455e-05,
      "loss": 2.3772,
      "step": 19706
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0262805223464966,
      "learning_rate": 1.7032606021977493e-05,
      "loss": 2.4783,
      "step": 19707
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0563325881958008,
      "learning_rate": 1.703231329911535e-05,
      "loss": 2.4716,
      "step": 19708
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9777311682701111,
      "learning_rate": 1.7032020564331527e-05,
      "loss": 2.7499,
      "step": 19709
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1042554378509521,
      "learning_rate": 1.703172781762652e-05,
      "loss": 2.5602,
      "step": 19710
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9776485562324524,
      "learning_rate": 1.7031435059000822e-05,
      "loss": 2.3001,
      "step": 19711
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0505784749984741,
      "learning_rate": 1.7031142288454934e-05,
      "loss": 2.6895,
      "step": 19712
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9645839333534241,
      "learning_rate": 1.7030849505989348e-05,
      "loss": 2.118,
      "step": 19713
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9858279228210449,
      "learning_rate": 1.703055671160457e-05,
      "loss": 2.2532,
      "step": 19714
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0602362155914307,
      "learning_rate": 1.703026390530108e-05,
      "loss": 2.4036,
      "step": 19715
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0154780149459839,
      "learning_rate": 1.7029971087079387e-05,
      "loss": 2.6005,
      "step": 19716
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0326451063156128,
      "learning_rate": 1.7029678256939984e-05,
      "loss": 2.5276,
      "step": 19717
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9968867897987366,
      "learning_rate": 1.7029385414883367e-05,
      "loss": 2.4394,
      "step": 19718
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9881001710891724,
      "learning_rate": 1.702909256091003e-05,
      "loss": 2.48,
      "step": 19719
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1091876029968262,
      "learning_rate": 1.7028799695020473e-05,
      "loss": 2.5442,
      "step": 19720
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9509945511817932,
      "learning_rate": 1.7028506817215197e-05,
      "loss": 2.3251,
      "step": 19721
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0526973009109497,
      "learning_rate": 1.7028213927494687e-05,
      "loss": 2.4163,
      "step": 19722
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0628302097320557,
      "learning_rate": 1.7027921025859446e-05,
      "loss": 2.5061,
      "step": 19723
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9936439990997314,
      "learning_rate": 1.702762811230997e-05,
      "loss": 2.3605,
      "step": 19724
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0669392347335815,
      "learning_rate": 1.702733518684676e-05,
      "loss": 2.5089,
      "step": 19725
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9862927198410034,
      "learning_rate": 1.70270422494703e-05,
      "loss": 2.2274,
      "step": 19726
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1338162422180176,
      "learning_rate": 1.7026749300181105e-05,
      "loss": 2.4435,
      "step": 19727
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0318946838378906,
      "learning_rate": 1.7026456338979655e-05,
      "loss": 2.4991,
      "step": 19728
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9709929823875427,
      "learning_rate": 1.7026163365866454e-05,
      "loss": 2.2396,
      "step": 19729
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0471705198287964,
      "learning_rate": 1.7025870380842e-05,
      "loss": 2.7197,
      "step": 19730
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0011707544326782,
      "learning_rate": 1.7025577383906786e-05,
      "loss": 2.4151,
      "step": 19731
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0656871795654297,
      "learning_rate": 1.702528437506131e-05,
      "loss": 2.5255,
      "step": 19732
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0370755195617676,
      "learning_rate": 1.7024991354306068e-05,
      "loss": 2.4984,
      "step": 19733
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9624440670013428,
      "learning_rate": 1.7024698321641562e-05,
      "loss": 2.8304,
      "step": 19734
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1093887090682983,
      "learning_rate": 1.7024405277068278e-05,
      "loss": 2.5767,
      "step": 19735
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0868659019470215,
      "learning_rate": 1.7024112220586724e-05,
      "loss": 2.5317,
      "step": 19736
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0755431652069092,
      "learning_rate": 1.702381915219739e-05,
      "loss": 2.2393,
      "step": 19737
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.3149700164794922,
      "learning_rate": 1.7023526071900776e-05,
      "loss": 2.3245,
      "step": 19738
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1664611101150513,
      "learning_rate": 1.7023232979697376e-05,
      "loss": 2.7076,
      "step": 19739
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.009658932685852,
      "learning_rate": 1.702293987558769e-05,
      "loss": 2.4025,
      "step": 19740
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.011564016342163,
      "learning_rate": 1.7022646759572216e-05,
      "loss": 2.3356,
      "step": 19741
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.011323094367981,
      "learning_rate": 1.7022353631651445e-05,
      "loss": 2.3623,
      "step": 19742
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.219926118850708,
      "learning_rate": 1.7022060491825877e-05,
      "loss": 2.4251,
      "step": 19743
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.039425253868103,
      "learning_rate": 1.702176734009601e-05,
      "loss": 2.4162,
      "step": 19744
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0086826086044312,
      "learning_rate": 1.7021474176462342e-05,
      "loss": 2.603,
      "step": 19745
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.062883734703064,
      "learning_rate": 1.7021181000925367e-05,
      "loss": 2.4748,
      "step": 19746
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0569688081741333,
      "learning_rate": 1.7020887813485583e-05,
      "loss": 2.4375,
      "step": 19747
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0755993127822876,
      "learning_rate": 1.702059461414349e-05,
      "loss": 2.3699,
      "step": 19748
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0502371788024902,
      "learning_rate": 1.7020301402899576e-05,
      "loss": 2.4857,
      "step": 19749
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.027451753616333,
      "learning_rate": 1.702000817975435e-05,
      "loss": 2.4899,
      "step": 19750
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0090328454971313,
      "learning_rate": 1.70197149447083e-05,
      "loss": 2.4137,
      "step": 19751
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0325957536697388,
      "learning_rate": 1.701942169776193e-05,
      "loss": 2.4877,
      "step": 19752
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1030539274215698,
      "learning_rate": 1.7019128438915734e-05,
      "loss": 2.5308,
      "step": 19753
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.2270675897598267,
      "learning_rate": 1.7018835168170205e-05,
      "loss": 2.2929,
      "step": 19754
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0900697708129883,
      "learning_rate": 1.7018541885525847e-05,
      "loss": 2.2919,
      "step": 19755
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1569494009017944,
      "learning_rate": 1.7018248590983155e-05,
      "loss": 2.5541,
      "step": 19756
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1731274127960205,
      "learning_rate": 1.7017955284542625e-05,
      "loss": 2.5746,
      "step": 19757
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0423128604888916,
      "learning_rate": 1.701766196620475e-05,
      "loss": 2.4621,
      "step": 19758
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9673646092414856,
      "learning_rate": 1.7017368635970036e-05,
      "loss": 2.2834,
      "step": 19759
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9267380833625793,
      "learning_rate": 1.701707529383898e-05,
      "loss": 2.5641,
      "step": 19760
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0299485921859741,
      "learning_rate": 1.7016781939812072e-05,
      "loss": 2.4171,
      "step": 19761
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0028400421142578,
      "learning_rate": 1.7016488573889812e-05,
      "loss": 2.4828,
      "step": 19762
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0598787069320679,
      "learning_rate": 1.70161951960727e-05,
      "loss": 2.4566,
      "step": 19763
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9873338937759399,
      "learning_rate": 1.7015901806361227e-05,
      "loss": 2.6429,
      "step": 19764
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9723201394081116,
      "learning_rate": 1.70156084047559e-05,
      "loss": 2.4902,
      "step": 19765
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9667088389396667,
      "learning_rate": 1.701531499125721e-05,
      "loss": 2.4451,
      "step": 19766
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0005357265472412,
      "learning_rate": 1.7015021565865656e-05,
      "loss": 2.3388,
      "step": 19767
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.055787205696106,
      "learning_rate": 1.701472812858173e-05,
      "loss": 2.6131,
      "step": 19768
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.2718989849090576,
      "learning_rate": 1.701443467940594e-05,
      "loss": 2.3938,
      "step": 19769
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.967931866645813,
      "learning_rate": 1.701414121833878e-05,
      "loss": 2.6237,
      "step": 19770
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1279678344726562,
      "learning_rate": 1.701384774538074e-05,
      "loss": 2.3728,
      "step": 19771
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.119339942932129,
      "learning_rate": 1.701355426053233e-05,
      "loss": 2.3841,
      "step": 19772
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0187398195266724,
      "learning_rate": 1.7013260763794036e-05,
      "loss": 2.4904,
      "step": 19773
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1158329248428345,
      "learning_rate": 1.7012967255166362e-05,
      "loss": 2.5483,
      "step": 19774
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0633870363235474,
      "learning_rate": 1.7012673734649802e-05,
      "loss": 2.3056,
      "step": 19775
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1566441059112549,
      "learning_rate": 1.7012380202244856e-05,
      "loss": 2.4082,
      "step": 19776
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0466419458389282,
      "learning_rate": 1.701208665795202e-05,
      "loss": 2.3912,
      "step": 19777
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9516410827636719,
      "learning_rate": 1.7011793101771794e-05,
      "loss": 2.1317,
      "step": 19778
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0754345655441284,
      "learning_rate": 1.7011499533704678e-05,
      "loss": 2.4574,
      "step": 19779
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0934847593307495,
      "learning_rate": 1.701120595375116e-05,
      "loss": 2.4601,
      "step": 19780
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.128649353981018,
      "learning_rate": 1.7010912361911747e-05,
      "loss": 2.3704,
      "step": 19781
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1211750507354736,
      "learning_rate": 1.701061875818693e-05,
      "loss": 2.3626,
      "step": 19782
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0708363056182861,
      "learning_rate": 1.7010325142577214e-05,
      "loss": 2.1345,
      "step": 19783
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1857002973556519,
      "learning_rate": 1.7010031515083093e-05,
      "loss": 2.455,
      "step": 19784
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0610406398773193,
      "learning_rate": 1.7009737875705062e-05,
      "loss": 2.3393,
      "step": 19785
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0476816892623901,
      "learning_rate": 1.700944422444362e-05,
      "loss": 2.7018,
      "step": 19786
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0017321109771729,
      "learning_rate": 1.7009150561299274e-05,
      "loss": 2.4019,
      "step": 19787
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0315265655517578,
      "learning_rate": 1.7008856886272506e-05,
      "loss": 2.4235,
      "step": 19788
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0903472900390625,
      "learning_rate": 1.7008563199363824e-05,
      "loss": 2.4426,
      "step": 19789
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.282122015953064,
      "learning_rate": 1.7008269500573727e-05,
      "loss": 2.5149,
      "step": 19790
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9413230419158936,
      "learning_rate": 1.7007975789902705e-05,
      "loss": 2.5092,
      "step": 19791
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0962680578231812,
      "learning_rate": 1.7007682067351266e-05,
      "loss": 2.5287,
      "step": 19792
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9830994606018066,
      "learning_rate": 1.70073883329199e-05,
      "loss": 2.7703,
      "step": 19793
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9689701199531555,
      "learning_rate": 1.7007094586609105e-05,
      "loss": 2.423,
      "step": 19794
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0662319660186768,
      "learning_rate": 1.7006800828419386e-05,
      "loss": 2.3983,
      "step": 19795
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9932522177696228,
      "learning_rate": 1.7006507058351232e-05,
      "loss": 2.4969,
      "step": 19796
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.2662591934204102,
      "learning_rate": 1.7006213276405152e-05,
      "loss": 2.4915,
      "step": 19797
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1453131437301636,
      "learning_rate": 1.700591948258163e-05,
      "loss": 2.4332,
      "step": 19798
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9975748062133789,
      "learning_rate": 1.7005625676881176e-05,
      "loss": 2.5438,
      "step": 19799
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1241683959960938,
      "learning_rate": 1.7005331859304286e-05,
      "loss": 2.4048,
      "step": 19800
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0534306764602661,
      "learning_rate": 1.7005038029851455e-05,
      "loss": 2.286,
      "step": 19801
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0250191688537598,
      "learning_rate": 1.7004744188523178e-05,
      "loss": 2.4551,
      "step": 19802
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0754766464233398,
      "learning_rate": 1.700445033531996e-05,
      "loss": 2.3883,
      "step": 19803
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9678317904472351,
      "learning_rate": 1.7004156470242296e-05,
      "loss": 2.4767,
      "step": 19804
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.928065836429596,
      "learning_rate": 1.7003862593290683e-05,
      "loss": 2.511,
      "step": 19805
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.040157437324524,
      "learning_rate": 1.7003568704465623e-05,
      "loss": 2.3978,
      "step": 19806
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1014128923416138,
      "learning_rate": 1.700327480376761e-05,
      "loss": 2.3471,
      "step": 19807
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.082168698310852,
      "learning_rate": 1.7002980891197145e-05,
      "loss": 2.5746,
      "step": 19808
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0162081718444824,
      "learning_rate": 1.7002686966754726e-05,
      "loss": 2.5865,
      "step": 19809
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9968216419219971,
      "learning_rate": 1.700239303044085e-05,
      "loss": 2.374,
      "step": 19810
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9977436661720276,
      "learning_rate": 1.7002099082256015e-05,
      "loss": 2.2529,
      "step": 19811
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.132248878479004,
      "learning_rate": 1.7001805122200722e-05,
      "loss": 2.4072,
      "step": 19812
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.001625418663025,
      "learning_rate": 1.7001511150275466e-05,
      "loss": 2.5526,
      "step": 19813
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9642870426177979,
      "learning_rate": 1.700121716648075e-05,
      "loss": 2.3248,
      "step": 19814
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.017037034034729,
      "learning_rate": 1.7000923170817067e-05,
      "loss": 2.4238,
      "step": 19815
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.029272198677063,
      "learning_rate": 1.700062916328492e-05,
      "loss": 2.4908,
      "step": 19816
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9928924441337585,
      "learning_rate": 1.7000335143884802e-05,
      "loss": 2.3835,
      "step": 19817
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1006747484207153,
      "learning_rate": 1.7000041112617217e-05,
      "loss": 2.4687,
      "step": 19818
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0258888006210327,
      "learning_rate": 1.699974706948266e-05,
      "loss": 2.4603,
      "step": 19819
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9966445565223694,
      "learning_rate": 1.699945301448163e-05,
      "loss": 2.4303,
      "step": 19820
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1830439567565918,
      "learning_rate": 1.6999158947614627e-05,
      "loss": 2.3263,
      "step": 19821
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0266048908233643,
      "learning_rate": 1.6998864868882148e-05,
      "loss": 2.4363,
      "step": 19822
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.2916967868804932,
      "learning_rate": 1.6998570778284692e-05,
      "loss": 2.5342,
      "step": 19823
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.009971022605896,
      "learning_rate": 1.699827667582276e-05,
      "loss": 2.3177,
      "step": 19824
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0842218399047852,
      "learning_rate": 1.6997982561496846e-05,
      "loss": 2.3853,
      "step": 19825
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0004264116287231,
      "learning_rate": 1.6997688435307452e-05,
      "loss": 2.7199,
      "step": 19826
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0705111026763916,
      "learning_rate": 1.6997394297255077e-05,
      "loss": 2.4767,
      "step": 19827
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0175243616104126,
      "learning_rate": 1.6997100147340216e-05,
      "loss": 2.3871,
      "step": 19828
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0515094995498657,
      "learning_rate": 1.699680598556337e-05,
      "loss": 2.5738,
      "step": 19829
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0696215629577637,
      "learning_rate": 1.6996511811925037e-05,
      "loss": 2.4435,
      "step": 19830
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0258264541625977,
      "learning_rate": 1.6996217626425718e-05,
      "loss": 2.4564,
      "step": 19831
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0248018503189087,
      "learning_rate": 1.699592342906591e-05,
      "loss": 2.3552,
      "step": 19832
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9934043288230896,
      "learning_rate": 1.699562921984611e-05,
      "loss": 2.6179,
      "step": 19833
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0553920269012451,
      "learning_rate": 1.699533499876682e-05,
      "loss": 2.5318,
      "step": 19834
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.1916284561157227,
      "learning_rate": 1.6995040765828537e-05,
      "loss": 2.3389,
      "step": 19835
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0636471509933472,
      "learning_rate": 1.699474652103176e-05,
      "loss": 2.4274,
      "step": 19836
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9737499952316284,
      "learning_rate": 1.6994452264376987e-05,
      "loss": 2.3799,
      "step": 19837
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0031630992889404,
      "learning_rate": 1.6994157995864717e-05,
      "loss": 2.4721,
      "step": 19838
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.009770393371582,
      "learning_rate": 1.699386371549545e-05,
      "loss": 2.5357,
      "step": 19839
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0301564931869507,
      "learning_rate": 1.699356942326969e-05,
      "loss": 2.2859,
      "step": 19840
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0960527658462524,
      "learning_rate": 1.6993275119187924e-05,
      "loss": 2.4152,
      "step": 19841
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0064760446548462,
      "learning_rate": 1.699298080325066e-05,
      "loss": 2.4424,
      "step": 19842
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0458976030349731,
      "learning_rate": 1.6992686475458393e-05,
      "loss": 2.549,
      "step": 19843
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0667134523391724,
      "learning_rate": 1.699239213581162e-05,
      "loss": 2.3258,
      "step": 19844
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0136926174163818,
      "learning_rate": 1.6992097784310847e-05,
      "loss": 2.5386,
      "step": 19845
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9906461238861084,
      "learning_rate": 1.6991803420956574e-05,
      "loss": 2.447,
      "step": 19846
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.021142601966858,
      "learning_rate": 1.699150904574929e-05,
      "loss": 2.3598,
      "step": 19847
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0785541534423828,
      "learning_rate": 1.6991214658689495e-05,
      "loss": 2.4946,
      "step": 19848
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0834122896194458,
      "learning_rate": 1.6990920259777702e-05,
      "loss": 2.4854,
      "step": 19849
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0909547805786133,
      "learning_rate": 1.6990625849014392e-05,
      "loss": 2.6875,
      "step": 19850
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0073639154434204,
      "learning_rate": 1.6990331426400076e-05,
      "loss": 2.4708,
      "step": 19851
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0037935972213745,
      "learning_rate": 1.6990036991935253e-05,
      "loss": 2.3892,
      "step": 19852
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0099185705184937,
      "learning_rate": 1.6989742545620416e-05,
      "loss": 2.4987,
      "step": 19853
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0034518241882324,
      "learning_rate": 1.6989448087456065e-05,
      "loss": 2.5817,
      "step": 19854
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9862201809883118,
      "learning_rate": 1.6989153617442703e-05,
      "loss": 2.424,
      "step": 19855
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0518534183502197,
      "learning_rate": 1.6988859135580827e-05,
      "loss": 2.6257,
      "step": 19856
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9391754269599915,
      "learning_rate": 1.6988564641870936e-05,
      "loss": 2.2493,
      "step": 19857
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1372904777526855,
      "learning_rate": 1.6988270136313533e-05,
      "loss": 2.4533,
      "step": 19858
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0607571601867676,
      "learning_rate": 1.698797561890911e-05,
      "loss": 2.3188,
      "step": 19859
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.135660171508789,
      "learning_rate": 1.698768108965817e-05,
      "loss": 2.678,
      "step": 19860
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.058485746383667,
      "learning_rate": 1.6987386548561218e-05,
      "loss": 2.5269,
      "step": 19861
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.261439561843872,
      "learning_rate": 1.6987091995618743e-05,
      "loss": 2.1877,
      "step": 19862
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0768404006958008,
      "learning_rate": 1.6986797430831253e-05,
      "loss": 2.3486,
      "step": 19863
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.2129278182983398,
      "learning_rate": 1.6986502854199242e-05,
      "loss": 2.4424,
      "step": 19864
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.993282675743103,
      "learning_rate": 1.698620826572321e-05,
      "loss": 2.3401,
      "step": 19865
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0362340211868286,
      "learning_rate": 1.698591366540366e-05,
      "loss": 2.4037,
      "step": 19866
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1876800060272217,
      "learning_rate": 1.698561905324109e-05,
      "loss": 2.4803,
      "step": 19867
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9241699576377869,
      "learning_rate": 1.6985324429235993e-05,
      "loss": 2.4247,
      "step": 19868
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.2142091989517212,
      "learning_rate": 1.6985029793388877e-05,
      "loss": 2.4775,
      "step": 19869
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.043400526046753,
      "learning_rate": 1.698473514570024e-05,
      "loss": 2.5048,
      "step": 19870
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0014301538467407,
      "learning_rate": 1.6984440486170578e-05,
      "loss": 2.4578,
      "step": 19871
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1097906827926636,
      "learning_rate": 1.6984145814800395e-05,
      "loss": 2.4136,
      "step": 19872
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9514955878257751,
      "learning_rate": 1.6983851131590185e-05,
      "loss": 2.5807,
      "step": 19873
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0054421424865723,
      "learning_rate": 1.6983556436540453e-05,
      "loss": 2.269,
      "step": 19874
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1497573852539062,
      "learning_rate": 1.698326172965169e-05,
      "loss": 2.3893,
      "step": 19875
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9905301332473755,
      "learning_rate": 1.6982967010924407e-05,
      "loss": 2.2137,
      "step": 19876
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1048088073730469,
      "learning_rate": 1.69826722803591e-05,
      "loss": 2.4304,
      "step": 19877
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0027469396591187,
      "learning_rate": 1.698237753795626e-05,
      "loss": 2.5052,
      "step": 19878
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.172706127166748,
      "learning_rate": 1.6982082783716403e-05,
      "loss": 2.4601,
      "step": 19879
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.017020344734192,
      "learning_rate": 1.6981788017640015e-05,
      "loss": 2.5669,
      "step": 19880
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1841601133346558,
      "learning_rate": 1.6981493239727598e-05,
      "loss": 2.5028,
      "step": 19881
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9292911291122437,
      "learning_rate": 1.6981198449979657e-05,
      "loss": 2.324,
      "step": 19882
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9908191561698914,
      "learning_rate": 1.6980903648396688e-05,
      "loss": 2.4877,
      "step": 19883
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0564885139465332,
      "learning_rate": 1.6980608834979187e-05,
      "loss": 2.4627,
      "step": 19884
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0835962295532227,
      "learning_rate": 1.6980314009727664e-05,
      "loss": 2.5589,
      "step": 19885
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9778081774711609,
      "learning_rate": 1.6980019172642613e-05,
      "loss": 2.4051,
      "step": 19886
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.160843849182129,
      "learning_rate": 1.697972432372453e-05,
      "loss": 2.6147,
      "step": 19887
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1435812711715698,
      "learning_rate": 1.697942946297392e-05,
      "loss": 2.3508,
      "step": 19888
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.028691291809082,
      "learning_rate": 1.697913459039128e-05,
      "loss": 2.5373,
      "step": 19889
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9816784858703613,
      "learning_rate": 1.6978839705977114e-05,
      "loss": 2.3741,
      "step": 19890
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.05491042137146,
      "learning_rate": 1.6978544809731917e-05,
      "loss": 2.5891,
      "step": 19891
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.118944764137268,
      "learning_rate": 1.697824990165619e-05,
      "loss": 2.5851,
      "step": 19892
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1581255197525024,
      "learning_rate": 1.6977954981750437e-05,
      "loss": 2.3023,
      "step": 19893
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0584744215011597,
      "learning_rate": 1.6977660050015157e-05,
      "loss": 2.4581,
      "step": 19894
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0039982795715332,
      "learning_rate": 1.6977365106450844e-05,
      "loss": 2.3695,
      "step": 19895
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0310862064361572,
      "learning_rate": 1.6977070151058004e-05,
      "loss": 2.4196,
      "step": 19896
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9689860939979553,
      "learning_rate": 1.6976775183837135e-05,
      "loss": 2.3745,
      "step": 19897
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0214406251907349,
      "learning_rate": 1.6976480204788736e-05,
      "loss": 2.4665,
      "step": 19898
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0527750253677368,
      "learning_rate": 1.697618521391331e-05,
      "loss": 2.3974,
      "step": 19899
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.2977173328399658,
      "learning_rate": 1.6975890211211356e-05,
      "loss": 2.658,
      "step": 19900
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.033333659172058,
      "learning_rate": 1.697559519668337e-05,
      "loss": 2.4546,
      "step": 19901
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0578949451446533,
      "learning_rate": 1.697530017032986e-05,
      "loss": 2.5047,
      "step": 19902
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.007784366607666,
      "learning_rate": 1.697500513215132e-05,
      "loss": 2.371,
      "step": 19903
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.041791319847107,
      "learning_rate": 1.6974710082148253e-05,
      "loss": 2.5174,
      "step": 19904
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.094564437866211,
      "learning_rate": 1.6974415020321156e-05,
      "loss": 2.3942,
      "step": 19905
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.03611421585083,
      "learning_rate": 1.697411994667053e-05,
      "loss": 2.2482,
      "step": 19906
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0554964542388916,
      "learning_rate": 1.697382486119688e-05,
      "loss": 2.5324,
      "step": 19907
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0764437913894653,
      "learning_rate": 1.6973529763900697e-05,
      "loss": 2.4858,
      "step": 19908
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1062654256820679,
      "learning_rate": 1.6973234654782494e-05,
      "loss": 2.2989,
      "step": 19909
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.120803952217102,
      "learning_rate": 1.697293953384276e-05,
      "loss": 2.4972,
      "step": 19910
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9669756889343262,
      "learning_rate": 1.6972644401082004e-05,
      "loss": 2.4517,
      "step": 19911
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0281885862350464,
      "learning_rate": 1.6972349256500717e-05,
      "loss": 2.2419,
      "step": 19912
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.2507026195526123,
      "learning_rate": 1.6972054100099407e-05,
      "loss": 2.3073,
      "step": 19913
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9683802127838135,
      "learning_rate": 1.697175893187857e-05,
      "loss": 2.3105,
      "step": 19914
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9689227938652039,
      "learning_rate": 1.6971463751838713e-05,
      "loss": 2.4468,
      "step": 19915
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.2209742069244385,
      "learning_rate": 1.6971168559980326e-05,
      "loss": 2.4739,
      "step": 19916
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.2367119789123535,
      "learning_rate": 1.6970873356303918e-05,
      "loss": 2.3838,
      "step": 19917
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.071905255317688,
      "learning_rate": 1.6970578140809984e-05,
      "loss": 2.2645,
      "step": 19918
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0434372425079346,
      "learning_rate": 1.697028291349903e-05,
      "loss": 2.527,
      "step": 19919
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.2146530151367188,
      "learning_rate": 1.6969987674371552e-05,
      "loss": 2.589,
      "step": 19920
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1971536874771118,
      "learning_rate": 1.696969242342805e-05,
      "loss": 2.5506,
      "step": 19921
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1348071098327637,
      "learning_rate": 1.6969397160669027e-05,
      "loss": 2.6028,
      "step": 19922
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0827229022979736,
      "learning_rate": 1.6969101886094982e-05,
      "loss": 2.5753,
      "step": 19923
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9685366153717041,
      "learning_rate": 1.696880659970642e-05,
      "loss": 2.4753,
      "step": 19924
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.2948126792907715,
      "learning_rate": 1.6968511301503836e-05,
      "loss": 2.4396,
      "step": 19925
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0017108917236328,
      "learning_rate": 1.6968215991487734e-05,
      "loss": 2.5887,
      "step": 19926
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0511888265609741,
      "learning_rate": 1.6967920669658612e-05,
      "loss": 2.4757,
      "step": 19927
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.062458872795105,
      "learning_rate": 1.6967625336016972e-05,
      "loss": 2.5239,
      "step": 19928
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0099570751190186,
      "learning_rate": 1.6967329990563316e-05,
      "loss": 2.2095,
      "step": 19929
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1353628635406494,
      "learning_rate": 1.696703463329814e-05,
      "loss": 2.5787,
      "step": 19930
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.007110357284546,
      "learning_rate": 1.696673926422195e-05,
      "loss": 2.3567,
      "step": 19931
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.038338541984558,
      "learning_rate": 1.696644388333525e-05,
      "loss": 2.2768,
      "step": 19932
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9867348074913025,
      "learning_rate": 1.696614849063853e-05,
      "loss": 2.4495,
      "step": 19933
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0341370105743408,
      "learning_rate": 1.6965853086132295e-05,
      "loss": 2.4815,
      "step": 19934
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1457704305648804,
      "learning_rate": 1.696555766981705e-05,
      "loss": 2.7396,
      "step": 19935
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1005935668945312,
      "learning_rate": 1.6965262241693294e-05,
      "loss": 2.5175,
      "step": 19936
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.030397891998291,
      "learning_rate": 1.696496680176152e-05,
      "loss": 2.46,
      "step": 19937
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1489797830581665,
      "learning_rate": 1.6964671350022245e-05,
      "loss": 2.6358,
      "step": 19938
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0577532052993774,
      "learning_rate": 1.6964375886475956e-05,
      "loss": 2.1275,
      "step": 19939
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.2135480642318726,
      "learning_rate": 1.696408041112316e-05,
      "loss": 2.4009,
      "step": 19940
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0897213220596313,
      "learning_rate": 1.6963784923964354e-05,
      "loss": 2.2919,
      "step": 19941
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9563769698143005,
      "learning_rate": 1.6963489425000043e-05,
      "loss": 2.505,
      "step": 19942
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.043409824371338,
      "learning_rate": 1.6963193914230722e-05,
      "loss": 2.2667,
      "step": 19943
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0411690473556519,
      "learning_rate": 1.6962898391656902e-05,
      "loss": 2.3924,
      "step": 19944
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.064154863357544,
      "learning_rate": 1.6962602857279076e-05,
      "loss": 2.5414,
      "step": 19945
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0816409587860107,
      "learning_rate": 1.6962307311097747e-05,
      "loss": 2.3833,
      "step": 19946
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9828272461891174,
      "learning_rate": 1.6962011753113414e-05,
      "loss": 2.4577,
      "step": 19947
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0929330587387085,
      "learning_rate": 1.696171618332658e-05,
      "loss": 2.1923,
      "step": 19948
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0757150650024414,
      "learning_rate": 1.696142060173775e-05,
      "loss": 2.222,
      "step": 19949
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.093790054321289,
      "learning_rate": 1.696112500834742e-05,
      "loss": 2.5737,
      "step": 19950
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0039012432098389,
      "learning_rate": 1.6960829403156093e-05,
      "loss": 2.4696,
      "step": 19951
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0711593627929688,
      "learning_rate": 1.6960533786164267e-05,
      "loss": 2.4547,
      "step": 19952
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0743191242218018,
      "learning_rate": 1.6960238157372448e-05,
      "loss": 2.5011,
      "step": 19953
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0735228061676025,
      "learning_rate": 1.6959942516781135e-05,
      "loss": 2.3533,
      "step": 19954
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9225029945373535,
      "learning_rate": 1.695964686439083e-05,
      "loss": 2.3227,
      "step": 19955
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.127116084098816,
      "learning_rate": 1.6959351200202032e-05,
      "loss": 2.3172,
      "step": 19956
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1177219152450562,
      "learning_rate": 1.6959055524215242e-05,
      "loss": 2.4918,
      "step": 19957
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1636794805526733,
      "learning_rate": 1.6958759836430963e-05,
      "loss": 2.6008,
      "step": 19958
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0255217552185059,
      "learning_rate": 1.69584641368497e-05,
      "loss": 2.487,
      "step": 19959
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1177005767822266,
      "learning_rate": 1.6958168425471946e-05,
      "loss": 2.319,
      "step": 19960
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9375619292259216,
      "learning_rate": 1.695787270229821e-05,
      "loss": 2.2479,
      "step": 19961
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.169183373451233,
      "learning_rate": 1.695757696732899e-05,
      "loss": 2.2097,
      "step": 19962
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0552233457565308,
      "learning_rate": 1.6957281220564783e-05,
      "loss": 2.6093,
      "step": 19963
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.296549677848816,
      "learning_rate": 1.69569854620061e-05,
      "loss": 2.2429,
      "step": 19964
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0745195150375366,
      "learning_rate": 1.6956689691653437e-05,
      "loss": 2.5918,
      "step": 19965
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1042122840881348,
      "learning_rate": 1.6956393909507292e-05,
      "loss": 2.7746,
      "step": 19966
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.025646448135376,
      "learning_rate": 1.6956098115568172e-05,
      "loss": 2.4969,
      "step": 19967
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1537563800811768,
      "learning_rate": 1.6955802309836575e-05,
      "loss": 2.4484,
      "step": 19968
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0825319290161133,
      "learning_rate": 1.6955506492313e-05,
      "loss": 2.4619,
      "step": 19969
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0061126947402954,
      "learning_rate": 1.6955210662997964e-05,
      "loss": 2.3998,
      "step": 19970
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9512633681297302,
      "learning_rate": 1.6954914821891946e-05,
      "loss": 2.2561,
      "step": 19971
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1339360475540161,
      "learning_rate": 1.6954618968995464e-05,
      "loss": 2.3491,
      "step": 19972
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0558933019638062,
      "learning_rate": 1.6954323104309015e-05,
      "loss": 2.4039,
      "step": 19973
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9683630466461182,
      "learning_rate": 1.6954027227833094e-05,
      "loss": 2.3686,
      "step": 19974
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0260756015777588,
      "learning_rate": 1.6953731339568212e-05,
      "loss": 2.6499,
      "step": 19975
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9114206433296204,
      "learning_rate": 1.6953435439514864e-05,
      "loss": 2.3935,
      "step": 19976
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0453511476516724,
      "learning_rate": 1.695313952767356e-05,
      "loss": 2.401,
      "step": 19977
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.04683256149292,
      "learning_rate": 1.6952843604044794e-05,
      "loss": 2.4353,
      "step": 19978
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.928512454032898,
      "learning_rate": 1.6952547668629068e-05,
      "loss": 2.5705,
      "step": 19979
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.978641152381897,
      "learning_rate": 1.6952251721426885e-05,
      "loss": 2.7267,
      "step": 19980
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0478523969650269,
      "learning_rate": 1.6951955762438747e-05,
      "loss": 2.3292,
      "step": 19981
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0416929721832275,
      "learning_rate": 1.6951659791665155e-05,
      "loss": 2.2485,
      "step": 19982
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1393969058990479,
      "learning_rate": 1.6951363809106617e-05,
      "loss": 2.6613,
      "step": 19983
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0726310014724731,
      "learning_rate": 1.6951067814763625e-05,
      "loss": 2.4637,
      "step": 19984
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0264601707458496,
      "learning_rate": 1.6950771808636687e-05,
      "loss": 2.8004,
      "step": 19985
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1903293132781982,
      "learning_rate": 1.6950475790726298e-05,
      "loss": 2.5821,
      "step": 19986
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.109150767326355,
      "learning_rate": 1.695017976103297e-05,
      "loss": 2.7576,
      "step": 19987
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1929957866668701,
      "learning_rate": 1.6949883719557198e-05,
      "loss": 2.4202,
      "step": 19988
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.13847017288208,
      "learning_rate": 1.6949587666299487e-05,
      "loss": 2.5408,
      "step": 19989
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0417985916137695,
      "learning_rate": 1.6949291601260336e-05,
      "loss": 2.5602,
      "step": 19990
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0872243642807007,
      "learning_rate": 1.694899552444025e-05,
      "loss": 2.4906,
      "step": 19991
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.148675799369812,
      "learning_rate": 1.6948699435839725e-05,
      "loss": 2.3662,
      "step": 19992
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0465731620788574,
      "learning_rate": 1.694840333545927e-05,
      "loss": 2.4582,
      "step": 19993
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.3012372255325317,
      "learning_rate": 1.6948107223299386e-05,
      "loss": 2.4788,
      "step": 19994
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9345462918281555,
      "learning_rate": 1.694781109936057e-05,
      "loss": 2.5774,
      "step": 19995
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0361981391906738,
      "learning_rate": 1.694751496364333e-05,
      "loss": 2.3551,
      "step": 19996
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0290614366531372,
      "learning_rate": 1.6947218816148163e-05,
      "loss": 2.6922,
      "step": 19997
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0702859163284302,
      "learning_rate": 1.6946922656875574e-05,
      "loss": 2.2839,
      "step": 19998
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.042736291885376,
      "learning_rate": 1.6946626485826066e-05,
      "loss": 2.4552,
      "step": 19999
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1093695163726807,
      "learning_rate": 1.6946330303000137e-05,
      "loss": 2.4455,
      "step": 20000
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0895657539367676,
      "learning_rate": 1.6946034108398292e-05,
      "loss": 2.275,
      "step": 20001
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0817216634750366,
      "learning_rate": 1.6945737902021033e-05,
      "loss": 2.5783,
      "step": 20002
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9210947155952454,
      "learning_rate": 1.6945441683868863e-05,
      "loss": 2.2277,
      "step": 20003
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9182254076004028,
      "learning_rate": 1.694514545394228e-05,
      "loss": 2.3105,
      "step": 20004
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0845203399658203,
      "learning_rate": 1.6944849212241793e-05,
      "loss": 2.5182,
      "step": 20005
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9408392310142517,
      "learning_rate": 1.6944552958767898e-05,
      "loss": 2.4568,
      "step": 20006
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.2360939979553223,
      "learning_rate": 1.6944256693521103e-05,
      "loss": 2.2877,
      "step": 20007
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9406389594078064,
      "learning_rate": 1.6943960416501902e-05,
      "loss": 2.4603,
      "step": 20008
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9672709107398987,
      "learning_rate": 1.6943664127710803e-05,
      "loss": 2.5536,
      "step": 20009
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1091790199279785,
      "learning_rate": 1.6943367827148308e-05,
      "loss": 2.4755,
      "step": 20010
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.066462755203247,
      "learning_rate": 1.694307151481492e-05,
      "loss": 2.4237,
      "step": 20011
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0610538721084595,
      "learning_rate": 1.694277519071114e-05,
      "loss": 2.4665,
      "step": 20012
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1258739233016968,
      "learning_rate": 1.694247885483747e-05,
      "loss": 2.5194,
      "step": 20013
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9717193841934204,
      "learning_rate": 1.694218250719441e-05,
      "loss": 2.4637,
      "step": 20014
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0029189586639404,
      "learning_rate": 1.6941886147782468e-05,
      "loss": 2.4443,
      "step": 20015
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1362833976745605,
      "learning_rate": 1.6941589776602145e-05,
      "loss": 2.4996,
      "step": 20016
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0334420204162598,
      "learning_rate": 1.6941293393653938e-05,
      "loss": 2.3506,
      "step": 20017
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1557503938674927,
      "learning_rate": 1.694099699893836e-05,
      "loss": 2.587,
      "step": 20018
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9771904349327087,
      "learning_rate": 1.69407005924559e-05,
      "loss": 2.5273,
      "step": 20019
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0110104084014893,
      "learning_rate": 1.6940404174207073e-05,
      "loss": 2.3866,
      "step": 20020
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0344220399856567,
      "learning_rate": 1.694010774419237e-05,
      "loss": 2.5524,
      "step": 20021
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1691175699234009,
      "learning_rate": 1.6939811302412304e-05,
      "loss": 2.4561,
      "step": 20022
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0207171440124512,
      "learning_rate": 1.693951484886737e-05,
      "loss": 2.6511,
      "step": 20023
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.096563696861267,
      "learning_rate": 1.6939218383558075e-05,
      "loss": 2.4655,
      "step": 20024
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1011388301849365,
      "learning_rate": 1.693892190648492e-05,
      "loss": 2.4429,
      "step": 20025
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9980815649032593,
      "learning_rate": 1.6938625417648408e-05,
      "loss": 2.3839,
      "step": 20026
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.061776041984558,
      "learning_rate": 1.6938328917049043e-05,
      "loss": 2.2459,
      "step": 20027
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1158571243286133,
      "learning_rate": 1.6938032404687325e-05,
      "loss": 2.3822,
      "step": 20028
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9942694306373596,
      "learning_rate": 1.693773588056376e-05,
      "loss": 2.2442,
      "step": 20029
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1503371000289917,
      "learning_rate": 1.6937439344678846e-05,
      "loss": 2.4173,
      "step": 20030
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.038611650466919,
      "learning_rate": 1.6937142797033086e-05,
      "loss": 2.5602,
      "step": 20031
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1838278770446777,
      "learning_rate": 1.6936846237626988e-05,
      "loss": 2.0962,
      "step": 20032
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.00998854637146,
      "learning_rate": 1.6936549666461048e-05,
      "loss": 2.4949,
      "step": 20033
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9854083061218262,
      "learning_rate": 1.6936253083535776e-05,
      "loss": 2.678,
      "step": 20034
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1178916692733765,
      "learning_rate": 1.6935956488851667e-05,
      "loss": 2.4035,
      "step": 20035
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9571695327758789,
      "learning_rate": 1.6935659882409233e-05,
      "loss": 2.4873,
      "step": 20036
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.8973312973976135,
      "learning_rate": 1.693536326420897e-05,
      "loss": 2.4405,
      "step": 20037
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0202258825302124,
      "learning_rate": 1.6935066634251383e-05,
      "loss": 2.5415,
      "step": 20038
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0546238422393799,
      "learning_rate": 1.6934769992536973e-05,
      "loss": 2.5813,
      "step": 20039
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.06270432472229,
      "learning_rate": 1.6934473339066247e-05,
      "loss": 2.3481,
      "step": 20040
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.063645601272583,
      "learning_rate": 1.6934176673839703e-05,
      "loss": 2.2834,
      "step": 20041
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.011141300201416,
      "learning_rate": 1.6933879996857846e-05,
      "loss": 2.4567,
      "step": 20042
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.020039677619934,
      "learning_rate": 1.6933583308121177e-05,
      "loss": 2.4196,
      "step": 20043
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0815147161483765,
      "learning_rate": 1.6933286607630206e-05,
      "loss": 2.3934,
      "step": 20044
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1605896949768066,
      "learning_rate": 1.693298989538543e-05,
      "loss": 2.4248,
      "step": 20045
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9504368901252747,
      "learning_rate": 1.6932693171387355e-05,
      "loss": 2.4231,
      "step": 20046
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9556418657302856,
      "learning_rate": 1.6932396435636475e-05,
      "loss": 2.4052,
      "step": 20047
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0547010898590088,
      "learning_rate": 1.6932099688133304e-05,
      "loss": 2.3925,
      "step": 20048
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0738736391067505,
      "learning_rate": 1.6931802928878344e-05,
      "loss": 2.4271,
      "step": 20049
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.053735613822937,
      "learning_rate": 1.6931506157872096e-05,
      "loss": 2.6295,
      "step": 20050
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1393860578536987,
      "learning_rate": 1.693120937511506e-05,
      "loss": 2.6063,
      "step": 20051
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.065169334411621,
      "learning_rate": 1.693091258060774e-05,
      "loss": 2.4477,
      "step": 20052
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0086989402770996,
      "learning_rate": 1.6930615774350644e-05,
      "loss": 2.56,
      "step": 20053
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0175107717514038,
      "learning_rate": 1.6930318956344266e-05,
      "loss": 2.4803,
      "step": 20054
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0723330974578857,
      "learning_rate": 1.6930022126589124e-05,
      "loss": 2.5689,
      "step": 20055
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.094054937362671,
      "learning_rate": 1.6929725285085706e-05,
      "loss": 2.4545,
      "step": 20056
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.2739107608795166,
      "learning_rate": 1.6929428431834524e-05,
      "loss": 2.2995,
      "step": 20057
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0634522438049316,
      "learning_rate": 1.692913156683608e-05,
      "loss": 2.2846,
      "step": 20058
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.2204809188842773,
      "learning_rate": 1.692883469009087e-05,
      "loss": 2.7294,
      "step": 20059
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0415911674499512,
      "learning_rate": 1.6928537801599408e-05,
      "loss": 2.5788,
      "step": 20060
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1876778602600098,
      "learning_rate": 1.6928240901362194e-05,
      "loss": 2.3684,
      "step": 20061
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.211641788482666,
      "learning_rate": 1.6927943989379726e-05,
      "loss": 2.6155,
      "step": 20062
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0962849855422974,
      "learning_rate": 1.6927647065652516e-05,
      "loss": 2.4346,
      "step": 20063
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1430517435073853,
      "learning_rate": 1.6927350130181063e-05,
      "loss": 2.4012,
      "step": 20064
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.055404543876648,
      "learning_rate": 1.6927053182965863e-05,
      "loss": 2.39,
      "step": 20065
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9375126957893372,
      "learning_rate": 1.692675622400743e-05,
      "loss": 2.5383,
      "step": 20066
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1588059663772583,
      "learning_rate": 1.6926459253306266e-05,
      "loss": 2.3745,
      "step": 20067
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0656684637069702,
      "learning_rate": 1.692616227086287e-05,
      "loss": 2.3713,
      "step": 20068
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0844001770019531,
      "learning_rate": 1.692586527667775e-05,
      "loss": 2.478,
      "step": 20069
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.2199888229370117,
      "learning_rate": 1.6925568270751407e-05,
      "loss": 2.5328,
      "step": 20070
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.063316822052002,
      "learning_rate": 1.6925271253084347e-05,
      "loss": 2.4707,
      "step": 20071
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0117799043655396,
      "learning_rate": 1.6924974223677068e-05,
      "loss": 2.4156,
      "step": 20072
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0594884157180786,
      "learning_rate": 1.6924677182530075e-05,
      "loss": 2.4296,
      "step": 20073
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.005616307258606,
      "learning_rate": 1.6924380129643877e-05,
      "loss": 2.2749,
      "step": 20074
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.8954735994338989,
      "learning_rate": 1.6924083065018974e-05,
      "loss": 2.3864,
      "step": 20075
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1207869052886963,
      "learning_rate": 1.6923785988655864e-05,
      "loss": 2.2252,
      "step": 20076
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9362987875938416,
      "learning_rate": 1.6923488900555064e-05,
      "loss": 2.66,
      "step": 20077
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.026125192642212,
      "learning_rate": 1.6923191800717063e-05,
      "loss": 2.3642,
      "step": 20078
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9612524509429932,
      "learning_rate": 1.6922894689142378e-05,
      "loss": 2.4849,
      "step": 20079
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9480193257331848,
      "learning_rate": 1.69225975658315e-05,
      "loss": 2.4751,
      "step": 20080
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1285173892974854,
      "learning_rate": 1.6922300430784947e-05,
      "loss": 2.3439,
      "step": 20081
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9727270603179932,
      "learning_rate": 1.6922003284003208e-05,
      "loss": 2.5869,
      "step": 20082
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1605881452560425,
      "learning_rate": 1.6921706125486797e-05,
      "loss": 2.4652,
      "step": 20083
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.189444661140442,
      "learning_rate": 1.6921408955236214e-05,
      "loss": 2.4062,
      "step": 20084
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9859983921051025,
      "learning_rate": 1.692111177325196e-05,
      "loss": 2.446,
      "step": 20085
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1900168657302856,
      "learning_rate": 1.6920814579534542e-05,
      "loss": 2.329,
      "step": 20086
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.2601492404937744,
      "learning_rate": 1.6920517374084466e-05,
      "loss": 2.4216,
      "step": 20087
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0131893157958984,
      "learning_rate": 1.692022015690223e-05,
      "loss": 2.5127,
      "step": 20088
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0784931182861328,
      "learning_rate": 1.6919922927988345e-05,
      "loss": 2.7132,
      "step": 20089
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9388710856437683,
      "learning_rate": 1.691962568734331e-05,
      "loss": 2.3941,
      "step": 20090
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9939112663269043,
      "learning_rate": 1.6919328434967633e-05,
      "loss": 2.5652,
      "step": 20091
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.02913498878479,
      "learning_rate": 1.6919031170861812e-05,
      "loss": 2.2939,
      "step": 20092
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9725632667541504,
      "learning_rate": 1.6918733895026354e-05,
      "loss": 2.2788,
      "step": 20093
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0150203704833984,
      "learning_rate": 1.6918436607461763e-05,
      "loss": 2.5515,
      "step": 20094
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.013318419456482,
      "learning_rate": 1.6918139308168543e-05,
      "loss": 2.4521,
      "step": 20095
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.142350673675537,
      "learning_rate": 1.69178419971472e-05,
      "loss": 2.438,
      "step": 20096
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0252501964569092,
      "learning_rate": 1.6917544674398233e-05,
      "loss": 2.4717,
      "step": 20097
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.076417088508606,
      "learning_rate": 1.6917247339922148e-05,
      "loss": 2.308,
      "step": 20098
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.091545820236206,
      "learning_rate": 1.6916949993719455e-05,
      "loss": 2.788,
      "step": 20099
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0552372932434082,
      "learning_rate": 1.691665263579065e-05,
      "loss": 2.7172,
      "step": 20100
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0716979503631592,
      "learning_rate": 1.691635526613624e-05,
      "loss": 2.7732,
      "step": 20101
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0248149633407593,
      "learning_rate": 1.691605788475673e-05,
      "loss": 2.4638,
      "step": 20102
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1519038677215576,
      "learning_rate": 1.6915760491652623e-05,
      "loss": 2.3133,
      "step": 20103
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1800878047943115,
      "learning_rate": 1.6915463086824428e-05,
      "loss": 2.4286,
      "step": 20104
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.14567232131958,
      "learning_rate": 1.691516567027264e-05,
      "loss": 2.3283,
      "step": 20105
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1290794610977173,
      "learning_rate": 1.691486824199777e-05,
      "loss": 2.3491,
      "step": 20106
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.2397547960281372,
      "learning_rate": 1.6914570802000315e-05,
      "loss": 2.2625,
      "step": 20107
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0095444917678833,
      "learning_rate": 1.691427335028079e-05,
      "loss": 2.5709,
      "step": 20108
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0867241621017456,
      "learning_rate": 1.6913975886839694e-05,
      "loss": 2.5336,
      "step": 20109
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9933764934539795,
      "learning_rate": 1.6913678411677528e-05,
      "loss": 2.2814,
      "step": 20110
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1405686140060425,
      "learning_rate": 1.6913380924794802e-05,
      "loss": 2.6641,
      "step": 20111
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9252893328666687,
      "learning_rate": 1.6913083426192016e-05,
      "loss": 2.2986,
      "step": 20112
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0433530807495117,
      "learning_rate": 1.6912785915869676e-05,
      "loss": 2.3512,
      "step": 20113
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4742945432662964,
      "learning_rate": 1.691248839382829e-05,
      "loss": 2.396,
      "step": 20114
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0874979496002197,
      "learning_rate": 1.6912190860068354e-05,
      "loss": 2.4912,
      "step": 20115
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9671890735626221,
      "learning_rate": 1.691189331459038e-05,
      "loss": 2.4006,
      "step": 20116
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.3089910745620728,
      "learning_rate": 1.6911595757394868e-05,
      "loss": 2.4975,
      "step": 20117
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9946532249450684,
      "learning_rate": 1.6911298188482327e-05,
      "loss": 2.6135,
      "step": 20118
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0544970035552979,
      "learning_rate": 1.6911000607853253e-05,
      "loss": 2.5466,
      "step": 20119
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.916995644569397,
      "learning_rate": 1.691070301550816e-05,
      "loss": 2.2682,
      "step": 20120
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1954022645950317,
      "learning_rate": 1.691040541144755e-05,
      "loss": 2.4738,
      "step": 20121
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0619841814041138,
      "learning_rate": 1.691010779567192e-05,
      "loss": 2.3504,
      "step": 20122
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.3788375854492188,
      "learning_rate": 1.6909810168181786e-05,
      "loss": 2.3671,
      "step": 20123
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.018096685409546,
      "learning_rate": 1.6909512528977646e-05,
      "loss": 2.3649,
      "step": 20124
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9805083870887756,
      "learning_rate": 1.6909214878060004e-05,
      "loss": 2.649,
      "step": 20125
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.975053608417511,
      "learning_rate": 1.690891721542937e-05,
      "loss": 2.415,
      "step": 20126
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0935087203979492,
      "learning_rate": 1.6908619541086237e-05,
      "loss": 2.4449,
      "step": 20127
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1099390983581543,
      "learning_rate": 1.6908321855031123e-05,
      "loss": 2.6415,
      "step": 20128
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1389093399047852,
      "learning_rate": 1.690802415726453e-05,
      "loss": 2.473,
      "step": 20129
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.998711347579956,
      "learning_rate": 1.6907726447786954e-05,
      "loss": 2.5231,
      "step": 20130
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1393554210662842,
      "learning_rate": 1.690742872659891e-05,
      "loss": 2.4267,
      "step": 20131
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.2304775714874268,
      "learning_rate": 1.6907130993700895e-05,
      "loss": 2.4805,
      "step": 20132
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.111505150794983,
      "learning_rate": 1.690683324909342e-05,
      "loss": 2.4083,
      "step": 20133
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1106284856796265,
      "learning_rate": 1.6906535492776983e-05,
      "loss": 2.469,
      "step": 20134
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.2913775444030762,
      "learning_rate": 1.6906237724752095e-05,
      "loss": 2.3981,
      "step": 20135
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9837782382965088,
      "learning_rate": 1.690593994501926e-05,
      "loss": 2.3827,
      "step": 20136
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0164512395858765,
      "learning_rate": 1.6905642153578977e-05,
      "loss": 2.2823,
      "step": 20137
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.925619900226593,
      "learning_rate": 1.690534435043176e-05,
      "loss": 2.3317,
      "step": 20138
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9762798547744751,
      "learning_rate": 1.6905046535578103e-05,
      "loss": 2.6177,
      "step": 20139
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0121554136276245,
      "learning_rate": 1.6904748709018523e-05,
      "loss": 2.3927,
      "step": 20140
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.026271104812622,
      "learning_rate": 1.6904450870753512e-05,
      "loss": 2.4513,
      "step": 20141
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0469995737075806,
      "learning_rate": 1.6904153020783588e-05,
      "loss": 2.578,
      "step": 20142
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9770066142082214,
      "learning_rate": 1.6903855159109247e-05,
      "loss": 2.308,
      "step": 20143
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9651886820793152,
      "learning_rate": 1.6903557285730997e-05,
      "loss": 2.3962,
      "step": 20144
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.8828914761543274,
      "learning_rate": 1.6903259400649338e-05,
      "loss": 2.3293,
      "step": 20145
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1252870559692383,
      "learning_rate": 1.6902961503864783e-05,
      "loss": 2.5518,
      "step": 20146
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1114437580108643,
      "learning_rate": 1.6902663595377834e-05,
      "loss": 2.484,
      "step": 20147
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0567786693572998,
      "learning_rate": 1.6902365675188994e-05,
      "loss": 2.4188,
      "step": 20148
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0943604707717896,
      "learning_rate": 1.6902067743298772e-05,
      "loss": 2.3764,
      "step": 20149
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.2294726371765137,
      "learning_rate": 1.6901769799707668e-05,
      "loss": 2.544,
      "step": 20150
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9752048254013062,
      "learning_rate": 1.690147184441619e-05,
      "loss": 2.5259,
      "step": 20151
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1036087274551392,
      "learning_rate": 1.6901173877424845e-05,
      "loss": 2.5094,
      "step": 20152
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.017473816871643,
      "learning_rate": 1.6900875898734132e-05,
      "loss": 2.5911,
      "step": 20153
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0749579668045044,
      "learning_rate": 1.6900577908344562e-05,
      "loss": 2.4106,
      "step": 20154
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0617451667785645,
      "learning_rate": 1.690027990625664e-05,
      "loss": 2.4859,
      "step": 20155
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0784213542938232,
      "learning_rate": 1.6899981892470866e-05,
      "loss": 2.3782,
      "step": 20156
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0315735340118408,
      "learning_rate": 1.6899683866987753e-05,
      "loss": 2.5468,
      "step": 20157
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0422254800796509,
      "learning_rate": 1.68993858298078e-05,
      "loss": 2.623,
      "step": 20158
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9787502288818359,
      "learning_rate": 1.6899087780931512e-05,
      "loss": 2.3114,
      "step": 20159
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0579297542572021,
      "learning_rate": 1.68987897203594e-05,
      "loss": 2.3761,
      "step": 20160
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0857034921646118,
      "learning_rate": 1.6898491648091965e-05,
      "loss": 2.235,
      "step": 20161
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.059565544128418,
      "learning_rate": 1.6898193564129714e-05,
      "loss": 2.4597,
      "step": 20162
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0921071767807007,
      "learning_rate": 1.6897895468473146e-05,
      "loss": 2.5394,
      "step": 20163
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0896787643432617,
      "learning_rate": 1.6897597361122774e-05,
      "loss": 2.5001,
      "step": 20164
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9465306401252747,
      "learning_rate": 1.6897299242079105e-05,
      "loss": 2.5788,
      "step": 20165
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1085320711135864,
      "learning_rate": 1.6897001111342637e-05,
      "loss": 2.3852,
      "step": 20166
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.2134535312652588,
      "learning_rate": 1.689670296891388e-05,
      "loss": 2.5737,
      "step": 20167
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1228772401809692,
      "learning_rate": 1.6896404814793336e-05,
      "loss": 2.5167,
      "step": 20168
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.3150633573532104,
      "learning_rate": 1.689610664898152e-05,
      "loss": 2.2459,
      "step": 20169
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0245881080627441,
      "learning_rate": 1.6895808471478922e-05,
      "loss": 2.2082,
      "step": 20170
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1863139867782593,
      "learning_rate": 1.689551028228606e-05,
      "loss": 2.5943,
      "step": 20171
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9734266996383667,
      "learning_rate": 1.6895212081403434e-05,
      "loss": 2.5157,
      "step": 20172
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0128973722457886,
      "learning_rate": 1.689491386883155e-05,
      "loss": 2.4686,
      "step": 20173
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0665044784545898,
      "learning_rate": 1.6894615644570914e-05,
      "loss": 2.4816,
      "step": 20174
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0011327266693115,
      "learning_rate": 1.6894317408622038e-05,
      "loss": 2.6927,
      "step": 20175
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9912912845611572,
      "learning_rate": 1.6894019160985415e-05,
      "loss": 2.3539,
      "step": 20176
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.060441255569458,
      "learning_rate": 1.6893720901661556e-05,
      "loss": 2.5884,
      "step": 20177
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0849579572677612,
      "learning_rate": 1.6893422630650975e-05,
      "loss": 2.2368,
      "step": 20178
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0241613388061523,
      "learning_rate": 1.6893124347954164e-05,
      "loss": 2.3926,
      "step": 20179
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1018650531768799,
      "learning_rate": 1.6892826053571635e-05,
      "loss": 2.3508,
      "step": 20180
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0031834840774536,
      "learning_rate": 1.6892527747503898e-05,
      "loss": 2.3787,
      "step": 20181
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0101414918899536,
      "learning_rate": 1.6892229429751453e-05,
      "loss": 2.4518,
      "step": 20182
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.128858208656311,
      "learning_rate": 1.6891931100314808e-05,
      "loss": 2.6404,
      "step": 20183
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1954344511032104,
      "learning_rate": 1.6891632759194465e-05,
      "loss": 2.4221,
      "step": 20184
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0508344173431396,
      "learning_rate": 1.6891334406390933e-05,
      "loss": 2.3898,
      "step": 20185
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9788734316825867,
      "learning_rate": 1.689103604190472e-05,
      "loss": 2.4319,
      "step": 20186
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1048930883407593,
      "learning_rate": 1.6890737665736328e-05,
      "loss": 2.2832,
      "step": 20187
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9412927031517029,
      "learning_rate": 1.6890439277886265e-05,
      "loss": 2.3864,
      "step": 20188
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0790140628814697,
      "learning_rate": 1.6890140878355035e-05,
      "loss": 2.3178,
      "step": 20189
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.2039909362792969,
      "learning_rate": 1.6889842467143147e-05,
      "loss": 2.3978,
      "step": 20190
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9881388545036316,
      "learning_rate": 1.6889544044251104e-05,
      "loss": 2.5568,
      "step": 20191
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9711332321166992,
      "learning_rate": 1.6889245609679408e-05,
      "loss": 2.5398,
      "step": 20192
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.146925926208496,
      "learning_rate": 1.6888947163428575e-05,
      "loss": 2.3633,
      "step": 20193
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0449353456497192,
      "learning_rate": 1.6888648705499103e-05,
      "loss": 2.5149,
      "step": 20194
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9925093650817871,
      "learning_rate": 1.6888350235891506e-05,
      "loss": 2.5145,
      "step": 20195
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0621811151504517,
      "learning_rate": 1.688805175460628e-05,
      "loss": 2.3495,
      "step": 20196
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0377528667449951,
      "learning_rate": 1.688775326164393e-05,
      "loss": 2.407,
      "step": 20197
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0045286417007446,
      "learning_rate": 1.6887454757004974e-05,
      "loss": 2.4133,
      "step": 20198
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.072920560836792,
      "learning_rate": 1.688715624068991e-05,
      "loss": 2.3976,
      "step": 20199
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.3492928743362427,
      "learning_rate": 1.6886857712699246e-05,
      "loss": 2.3874,
      "step": 20200
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1153662204742432,
      "learning_rate": 1.6886559173033486e-05,
      "loss": 2.3079,
      "step": 20201
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9555715322494507,
      "learning_rate": 1.6886260621693145e-05,
      "loss": 2.4609,
      "step": 20202
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1591155529022217,
      "learning_rate": 1.6885962058678712e-05,
      "loss": 2.6508,
      "step": 20203
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0026469230651855,
      "learning_rate": 1.6885663483990708e-05,
      "loss": 2.5584,
      "step": 20204
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0691885948181152,
      "learning_rate": 1.6885364897629638e-05,
      "loss": 2.3989,
      "step": 20205
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.101315975189209,
      "learning_rate": 1.6885066299596e-05,
      "loss": 2.5754,
      "step": 20206
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0491175651550293,
      "learning_rate": 1.6884767689890305e-05,
      "loss": 2.4467,
      "step": 20207
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1693857908248901,
      "learning_rate": 1.688446906851306e-05,
      "loss": 2.6384,
      "step": 20208
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9377223253250122,
      "learning_rate": 1.688417043546477e-05,
      "loss": 2.3473,
      "step": 20209
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1023778915405273,
      "learning_rate": 1.6883871790745938e-05,
      "loss": 2.5415,
      "step": 20210
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9788236618041992,
      "learning_rate": 1.6883573134357076e-05,
      "loss": 2.2977,
      "step": 20211
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9743611812591553,
      "learning_rate": 1.688327446629869e-05,
      "loss": 2.4646,
      "step": 20212
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0844309329986572,
      "learning_rate": 1.688297578657128e-05,
      "loss": 2.3266,
      "step": 20213
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.220558762550354,
      "learning_rate": 1.688267709517536e-05,
      "loss": 2.4818,
      "step": 20214
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0797704458236694,
      "learning_rate": 1.6882378392111432e-05,
      "loss": 2.4599,
      "step": 20215
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0812013149261475,
      "learning_rate": 1.6882079677380005e-05,
      "loss": 2.6184,
      "step": 20216
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0142024755477905,
      "learning_rate": 1.688178095098158e-05,
      "loss": 2.4058,
      "step": 20217
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1049550771713257,
      "learning_rate": 1.688148221291667e-05,
      "loss": 2.4871,
      "step": 20218
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0817476511001587,
      "learning_rate": 1.6881183463185778e-05,
      "loss": 2.581,
      "step": 20219
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9837813377380371,
      "learning_rate": 1.6880884701789412e-05,
      "loss": 2.3537,
      "step": 20220
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9261224269866943,
      "learning_rate": 1.6880585928728077e-05,
      "loss": 2.3011,
      "step": 20221
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.00931715965271,
      "learning_rate": 1.6880287144002282e-05,
      "loss": 2.4712,
      "step": 20222
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.951724112033844,
      "learning_rate": 1.687998834761253e-05,
      "loss": 2.6107,
      "step": 20223
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0418927669525146,
      "learning_rate": 1.6879689539559327e-05,
      "loss": 2.4268,
      "step": 20224
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0833460092544556,
      "learning_rate": 1.6879390719843184e-05,
      "loss": 2.4938,
      "step": 20225
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.016205906867981,
      "learning_rate": 1.6879091888464606e-05,
      "loss": 2.5176,
      "step": 20226
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9556963443756104,
      "learning_rate": 1.68787930454241e-05,
      "loss": 2.3359,
      "step": 20227
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9754902124404907,
      "learning_rate": 1.6878494190722168e-05,
      "loss": 2.4017,
      "step": 20228
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0210736989974976,
      "learning_rate": 1.687819532435932e-05,
      "loss": 2.3752,
      "step": 20229
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0895296335220337,
      "learning_rate": 1.6877896446336065e-05,
      "loss": 2.2951,
      "step": 20230
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.015554428100586,
      "learning_rate": 1.6877597556652907e-05,
      "loss": 2.3879,
      "step": 20231
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0717390775680542,
      "learning_rate": 1.6877298655310353e-05,
      "loss": 2.7169,
      "step": 20232
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0571247339248657,
      "learning_rate": 1.687699974230891e-05,
      "loss": 2.4987,
      "step": 20233
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.071325421333313,
      "learning_rate": 1.6876700817649087e-05,
      "loss": 2.4537,
      "step": 20234
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0677201747894287,
      "learning_rate": 1.6876401881331384e-05,
      "loss": 2.3142,
      "step": 20235
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1794919967651367,
      "learning_rate": 1.6876102933356316e-05,
      "loss": 2.445,
      "step": 20236
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.124107003211975,
      "learning_rate": 1.6875803973724384e-05,
      "loss": 2.2806,
      "step": 20237
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0016554594039917,
      "learning_rate": 1.6875505002436096e-05,
      "loss": 2.3611,
      "step": 20238
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9782299399375916,
      "learning_rate": 1.687520601949196e-05,
      "loss": 2.6209,
      "step": 20239
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.5648300647735596,
      "learning_rate": 1.6874907024892485e-05,
      "loss": 2.5522,
      "step": 20240
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.3714590072631836,
      "learning_rate": 1.6874608018638175e-05,
      "loss": 2.4123,
      "step": 20241
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0363025665283203,
      "learning_rate": 1.6874309000729536e-05,
      "loss": 2.3453,
      "step": 20242
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.198757529258728,
      "learning_rate": 1.6874009971167075e-05,
      "loss": 2.676,
      "step": 20243
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0370875597000122,
      "learning_rate": 1.68737109299513e-05,
      "loss": 2.51,
      "step": 20244
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1319345235824585,
      "learning_rate": 1.6873411877082722e-05,
      "loss": 2.3552,
      "step": 20245
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0363054275512695,
      "learning_rate": 1.687311281256184e-05,
      "loss": 2.5907,
      "step": 20246
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0573177337646484,
      "learning_rate": 1.6872813736389168e-05,
      "loss": 2.4412,
      "step": 20247
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0866022109985352,
      "learning_rate": 1.687251464856521e-05,
      "loss": 2.5615,
      "step": 20248
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0065478086471558,
      "learning_rate": 1.687221554909047e-05,
      "loss": 2.5114,
      "step": 20249
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9708865880966187,
      "learning_rate": 1.687191643796546e-05,
      "loss": 2.5454,
      "step": 20250
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1239595413208008,
      "learning_rate": 1.6871617315190686e-05,
      "loss": 2.4002,
      "step": 20251
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0306782722473145,
      "learning_rate": 1.6871318180766655e-05,
      "loss": 2.3436,
      "step": 20252
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0563126802444458,
      "learning_rate": 1.6871019034693873e-05,
      "loss": 2.3973,
      "step": 20253
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.092081904411316,
      "learning_rate": 1.6870719876972844e-05,
      "loss": 2.2273,
      "step": 20254
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0022740364074707,
      "learning_rate": 1.6870420707604082e-05,
      "loss": 2.5412,
      "step": 20255
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.085883378982544,
      "learning_rate": 1.687012152658809e-05,
      "loss": 2.372,
      "step": 20256
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.124799370765686,
      "learning_rate": 1.6869822333925378e-05,
      "loss": 2.2697,
      "step": 20257
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9822704792022705,
      "learning_rate": 1.686952312961645e-05,
      "loss": 2.5199,
      "step": 20258
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0478826761245728,
      "learning_rate": 1.6869223913661813e-05,
      "loss": 2.5199,
      "step": 20259
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.083482265472412,
      "learning_rate": 1.686892468606198e-05,
      "loss": 2.4225,
      "step": 20260
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0708034038543701,
      "learning_rate": 1.6868625446817447e-05,
      "loss": 2.6765,
      "step": 20261
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1077455282211304,
      "learning_rate": 1.6868326195928736e-05,
      "loss": 2.2265,
      "step": 20262
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0629361867904663,
      "learning_rate": 1.686802693339634e-05,
      "loss": 2.4497,
      "step": 20263
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.8829553723335266,
      "learning_rate": 1.6867727659220777e-05,
      "loss": 2.42,
      "step": 20264
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1582577228546143,
      "learning_rate": 1.6867428373402552e-05,
      "loss": 2.5082,
      "step": 20265
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0341795682907104,
      "learning_rate": 1.6867129075942168e-05,
      "loss": 2.3049,
      "step": 20266
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9926713109016418,
      "learning_rate": 1.6866829766840135e-05,
      "loss": 2.6281,
      "step": 20267
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.067716121673584,
      "learning_rate": 1.6866530446096963e-05,
      "loss": 2.6492,
      "step": 20268
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.118117332458496,
      "learning_rate": 1.6866231113713153e-05,
      "loss": 2.5203,
      "step": 20269
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.02214515209198,
      "learning_rate": 1.686593176968922e-05,
      "loss": 2.4702,
      "step": 20270
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0663925409317017,
      "learning_rate": 1.6865632414025667e-05,
      "loss": 2.5737,
      "step": 20271
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1007145643234253,
      "learning_rate": 1.6865333046722998e-05,
      "loss": 2.3382,
      "step": 20272
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0990029573440552,
      "learning_rate": 1.686503366778173e-05,
      "loss": 2.3653,
      "step": 20273
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0143696069717407,
      "learning_rate": 1.6864734277202365e-05,
      "loss": 2.3224,
      "step": 20274
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.093997836112976,
      "learning_rate": 1.6864434874985408e-05,
      "loss": 2.5202,
      "step": 20275
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0226168632507324,
      "learning_rate": 1.6864135461131373e-05,
      "loss": 2.3354,
      "step": 20276
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9821670055389404,
      "learning_rate": 1.6863836035640762e-05,
      "loss": 2.2298,
      "step": 20277
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9813532829284668,
      "learning_rate": 1.6863536598514088e-05,
      "loss": 2.4187,
      "step": 20278
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0284572839736938,
      "learning_rate": 1.6863237149751853e-05,
      "loss": 2.4764,
      "step": 20279
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.220491886138916,
      "learning_rate": 1.6862937689354564e-05,
      "loss": 2.3706,
      "step": 20280
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0325146913528442,
      "learning_rate": 1.6862638217322736e-05,
      "loss": 2.4051,
      "step": 20281
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9562644958496094,
      "learning_rate": 1.6862338733656872e-05,
      "loss": 2.4585,
      "step": 20282
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.052238941192627,
      "learning_rate": 1.686203923835748e-05,
      "loss": 2.3607,
      "step": 20283
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0499415397644043,
      "learning_rate": 1.6861739731425064e-05,
      "loss": 2.2494,
      "step": 20284
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9997202754020691,
      "learning_rate": 1.686144021286014e-05,
      "loss": 2.4337,
      "step": 20285
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1691548824310303,
      "learning_rate": 1.6861140682663205e-05,
      "loss": 2.6253,
      "step": 20286
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1381831169128418,
      "learning_rate": 1.686084114083478e-05,
      "loss": 2.5056,
      "step": 20287
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0855814218521118,
      "learning_rate": 1.6860541587375364e-05,
      "loss": 2.2817,
      "step": 20288
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.53104829788208,
      "learning_rate": 1.6860242022285467e-05,
      "loss": 2.4036,
      "step": 20289
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0421333312988281,
      "learning_rate": 1.6859942445565594e-05,
      "loss": 2.3879,
      "step": 20290
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0233936309814453,
      "learning_rate": 1.6859642857216257e-05,
      "loss": 2.3332,
      "step": 20291
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.053291916847229,
      "learning_rate": 1.685934325723796e-05,
      "loss": 2.569,
      "step": 20292
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9549680948257446,
      "learning_rate": 1.685904364563122e-05,
      "loss": 2.3143,
      "step": 20293
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1558289527893066,
      "learning_rate": 1.6858744022396532e-05,
      "loss": 2.4586,
      "step": 20294
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.120137333869934,
      "learning_rate": 1.685844438753441e-05,
      "loss": 2.4272,
      "step": 20295
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0851596593856812,
      "learning_rate": 1.685814474104537e-05,
      "loss": 2.3986,
      "step": 20296
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0959830284118652,
      "learning_rate": 1.6857845082929902e-05,
      "loss": 2.5793,
      "step": 20297
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.049623727798462,
      "learning_rate": 1.685754541318853e-05,
      "loss": 2.4422,
      "step": 20298
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.2105563879013062,
      "learning_rate": 1.6857245731821753e-05,
      "loss": 2.3122,
      "step": 20299
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.137046456336975,
      "learning_rate": 1.685694603883008e-05,
      "loss": 2.3881,
      "step": 20300
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9165040850639343,
      "learning_rate": 1.6856646334214027e-05,
      "loss": 2.42,
      "step": 20301
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0461246967315674,
      "learning_rate": 1.6856346617974092e-05,
      "loss": 2.5933,
      "step": 20302
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.034085750579834,
      "learning_rate": 1.685604689011079e-05,
      "loss": 2.3917,
      "step": 20303
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1521813869476318,
      "learning_rate": 1.6855747150624627e-05,
      "loss": 2.5671,
      "step": 20304
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9922257661819458,
      "learning_rate": 1.6855447399516107e-05,
      "loss": 2.4093,
      "step": 20305
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0264272689819336,
      "learning_rate": 1.6855147636785742e-05,
      "loss": 2.5011,
      "step": 20306
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0838544368743896,
      "learning_rate": 1.6854847862434044e-05,
      "loss": 2.5611,
      "step": 20307
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1057542562484741,
      "learning_rate": 1.6854548076461516e-05,
      "loss": 2.3671,
      "step": 20308
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0868993997573853,
      "learning_rate": 1.6854248278868664e-05,
      "loss": 2.4149,
      "step": 20309
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.081740140914917,
      "learning_rate": 1.6853948469656e-05,
      "loss": 2.3355,
      "step": 20310
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9673563241958618,
      "learning_rate": 1.6853648648824034e-05,
      "loss": 2.3695,
      "step": 20311
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9631599187850952,
      "learning_rate": 1.6853348816373273e-05,
      "loss": 2.2358,
      "step": 20312
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0629074573516846,
      "learning_rate": 1.685304897230422e-05,
      "loss": 2.3066,
      "step": 20313
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1302587985992432,
      "learning_rate": 1.685274911661739e-05,
      "loss": 2.4492,
      "step": 20314
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0159531831741333,
      "learning_rate": 1.685244924931329e-05,
      "loss": 2.3464,
      "step": 20315
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.067245602607727,
      "learning_rate": 1.685214937039243e-05,
      "loss": 2.5228,
      "step": 20316
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.062921166419983,
      "learning_rate": 1.6851849479855312e-05,
      "loss": 2.5822,
      "step": 20317
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0853818655014038,
      "learning_rate": 1.6851549577702447e-05,
      "loss": 2.5279,
      "step": 20318
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1140466928482056,
      "learning_rate": 1.6851249663934348e-05,
      "loss": 2.3756,
      "step": 20319
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9457863569259644,
      "learning_rate": 1.6850949738551522e-05,
      "loss": 2.219,
      "step": 20320
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.045204520225525,
      "learning_rate": 1.685064980155447e-05,
      "loss": 2.5782,
      "step": 20321
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.2200943231582642,
      "learning_rate": 1.6850349852943707e-05,
      "loss": 2.503,
      "step": 20322
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0751383304595947,
      "learning_rate": 1.6850049892719743e-05,
      "loss": 2.3451,
      "step": 20323
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0863350629806519,
      "learning_rate": 1.684974992088308e-05,
      "loss": 2.3983,
      "step": 20324
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1363989114761353,
      "learning_rate": 1.6849449937434235e-05,
      "loss": 2.4239,
      "step": 20325
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.2210029363632202,
      "learning_rate": 1.6849149942373707e-05,
      "loss": 2.3849,
      "step": 20326
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0426853895187378,
      "learning_rate": 1.6848849935702017e-05,
      "loss": 2.5232,
      "step": 20327
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9638765454292297,
      "learning_rate": 1.684854991741966e-05,
      "loss": 2.3929,
      "step": 20328
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0497043132781982,
      "learning_rate": 1.6848249887527156e-05,
      "loss": 2.3555,
      "step": 20329
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0602551698684692,
      "learning_rate": 1.6847949846025e-05,
      "loss": 2.383,
      "step": 20330
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0288680791854858,
      "learning_rate": 1.6847649792913717e-05,
      "loss": 2.2035,
      "step": 20331
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1972942352294922,
      "learning_rate": 1.6847349728193806e-05,
      "loss": 2.294,
      "step": 20332
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9327647686004639,
      "learning_rate": 1.6847049651865776e-05,
      "loss": 2.319,
      "step": 20333
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0715450048446655,
      "learning_rate": 1.684674956393014e-05,
      "loss": 2.3111,
      "step": 20334
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0481367111206055,
      "learning_rate": 1.68464494643874e-05,
      "loss": 2.3144,
      "step": 20335
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.970705509185791,
      "learning_rate": 1.6846149353238074e-05,
      "loss": 2.4151,
      "step": 20336
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.194566011428833,
      "learning_rate": 1.684584923048266e-05,
      "loss": 2.4563,
      "step": 20337
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9345659017562866,
      "learning_rate": 1.6845549096121676e-05,
      "loss": 2.4423,
      "step": 20338
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.012795090675354,
      "learning_rate": 1.6845248950155625e-05,
      "loss": 2.4104,
      "step": 20339
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0339010953903198,
      "learning_rate": 1.6844948792585015e-05,
      "loss": 2.4771,
      "step": 20340
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9577798247337341,
      "learning_rate": 1.6844648623410363e-05,
      "loss": 2.5052,
      "step": 20341
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.4519083499908447,
      "learning_rate": 1.6844348442632173e-05,
      "loss": 2.3593,
      "step": 20342
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0350359678268433,
      "learning_rate": 1.684404825025095e-05,
      "loss": 2.562,
      "step": 20343
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.06389319896698,
      "learning_rate": 1.6843748046267207e-05,
      "loss": 2.5696,
      "step": 20344
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0818792581558228,
      "learning_rate": 1.6843447830681455e-05,
      "loss": 2.3843,
      "step": 20345
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.052343726158142,
      "learning_rate": 1.6843147603494197e-05,
      "loss": 2.271,
      "step": 20346
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9625726342201233,
      "learning_rate": 1.684284736470595e-05,
      "loss": 2.3128,
      "step": 20347
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9636698365211487,
      "learning_rate": 1.684254711431721e-05,
      "loss": 2.4757,
      "step": 20348
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0434901714324951,
      "learning_rate": 1.6842246852328498e-05,
      "loss": 2.4616,
      "step": 20349
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9840830564498901,
      "learning_rate": 1.684194657874032e-05,
      "loss": 2.4939,
      "step": 20350
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0296968221664429,
      "learning_rate": 1.6841646293553187e-05,
      "loss": 2.3257,
      "step": 20351
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9851583242416382,
      "learning_rate": 1.68413459967676e-05,
      "loss": 2.3833,
      "step": 20352
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0123752355575562,
      "learning_rate": 1.6841045688384076e-05,
      "loss": 2.6479,
      "step": 20353
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1216726303100586,
      "learning_rate": 1.6840745368403126e-05,
      "loss": 2.4592,
      "step": 20354
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.955668032169342,
      "learning_rate": 1.6840445036825247e-05,
      "loss": 2.5068,
      "step": 20355
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0955374240875244,
      "learning_rate": 1.6840144693650963e-05,
      "loss": 2.1881,
      "step": 20356
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.07232666015625,
      "learning_rate": 1.683984433888077e-05,
      "loss": 2.5368,
      "step": 20357
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0014386177062988,
      "learning_rate": 1.6839543972515184e-05,
      "loss": 2.4639,
      "step": 20358
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0091204643249512,
      "learning_rate": 1.6839243594554713e-05,
      "loss": 2.4436,
      "step": 20359
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9470067024230957,
      "learning_rate": 1.6838943204999873e-05,
      "loss": 2.4914,
      "step": 20360
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.087320327758789,
      "learning_rate": 1.683864280385116e-05,
      "loss": 2.5199,
      "step": 20361
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0315707921981812,
      "learning_rate": 1.6838342391109094e-05,
      "loss": 2.7798,
      "step": 20362
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9448443055152893,
      "learning_rate": 1.683804196677418e-05,
      "loss": 2.4336,
      "step": 20363
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0174635648727417,
      "learning_rate": 1.6837741530846924e-05,
      "loss": 2.3746,
      "step": 20364
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0839896202087402,
      "learning_rate": 1.683744108332784e-05,
      "loss": 2.263,
      "step": 20365
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.030916690826416,
      "learning_rate": 1.683714062421744e-05,
      "loss": 2.3877,
      "step": 20366
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9271106123924255,
      "learning_rate": 1.6836840153516228e-05,
      "loss": 2.3354,
      "step": 20367
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9920828342437744,
      "learning_rate": 1.683653967122471e-05,
      "loss": 2.6217,
      "step": 20368
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9511493444442749,
      "learning_rate": 1.683623917734341e-05,
      "loss": 2.4154,
      "step": 20369
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0426366329193115,
      "learning_rate": 1.6835938671872822e-05,
      "loss": 2.5307,
      "step": 20370
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9412859678268433,
      "learning_rate": 1.683563815481346e-05,
      "loss": 2.6362,
      "step": 20371
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.054054856300354,
      "learning_rate": 1.6835337626165836e-05,
      "loss": 2.303,
      "step": 20372
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0758501291275024,
      "learning_rate": 1.683503708593046e-05,
      "loss": 2.4752,
      "step": 20373
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.097619652748108,
      "learning_rate": 1.683473653410784e-05,
      "loss": 2.4649,
      "step": 20374
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9933717846870422,
      "learning_rate": 1.6834435970698485e-05,
      "loss": 2.4326,
      "step": 20375
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.036234736442566,
      "learning_rate": 1.6834135395702904e-05,
      "loss": 2.4949,
      "step": 20376
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.120665192604065,
      "learning_rate": 1.6833834809121607e-05,
      "loss": 2.6564,
      "step": 20377
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.138443946838379,
      "learning_rate": 1.6833534210955104e-05,
      "loss": 2.2433,
      "step": 20378
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0259417295455933,
      "learning_rate": 1.6833233601203908e-05,
      "loss": 2.51,
      "step": 20379
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0258852243423462,
      "learning_rate": 1.683293297986852e-05,
      "loss": 2.47,
      "step": 20380
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.084492802619934,
      "learning_rate": 1.683263234694946e-05,
      "loss": 2.3294,
      "step": 20381
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0924161672592163,
      "learning_rate": 1.6832331702447228e-05,
      "loss": 2.4324,
      "step": 20382
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9536674618721008,
      "learning_rate": 1.6832031046362337e-05,
      "loss": 2.4442,
      "step": 20383
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.14572274684906,
      "learning_rate": 1.6831730378695304e-05,
      "loss": 2.6108,
      "step": 20384
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.071776032447815,
      "learning_rate": 1.6831429699446627e-05,
      "loss": 2.4396,
      "step": 20385
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9167341589927673,
      "learning_rate": 1.6831129008616824e-05,
      "loss": 2.4987,
      "step": 20386
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.00710129737854,
      "learning_rate": 1.6830828306206403e-05,
      "loss": 2.3599,
      "step": 20387
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.2871142625808716,
      "learning_rate": 1.683052759221587e-05,
      "loss": 2.2262,
      "step": 20388
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1878608465194702,
      "learning_rate": 1.683022686664574e-05,
      "loss": 2.3849,
      "step": 20389
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.4513214826583862,
      "learning_rate": 1.6829926129496515e-05,
      "loss": 2.4837,
      "step": 20390
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1193785667419434,
      "learning_rate": 1.6829625380768714e-05,
      "loss": 2.4599,
      "step": 20391
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9571153521537781,
      "learning_rate": 1.6829324620462846e-05,
      "loss": 2.5635,
      "step": 20392
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0616531372070312,
      "learning_rate": 1.6829023848579417e-05,
      "loss": 2.2456,
      "step": 20393
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1059967279434204,
      "learning_rate": 1.6828723065118934e-05,
      "loss": 2.5371,
      "step": 20394
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.028403639793396,
      "learning_rate": 1.6828422270081913e-05,
      "loss": 2.3777,
      "step": 20395
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1105272769927979,
      "learning_rate": 1.682812146346886e-05,
      "loss": 2.6425,
      "step": 20396
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0672008991241455,
      "learning_rate": 1.6827820645280287e-05,
      "loss": 2.3804,
      "step": 20397
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0291519165039062,
      "learning_rate": 1.6827519815516708e-05,
      "loss": 2.2829,
      "step": 20398
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0764814615249634,
      "learning_rate": 1.6827218974178623e-05,
      "loss": 2.4786,
      "step": 20399
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0952688455581665,
      "learning_rate": 1.6826918121266552e-05,
      "loss": 2.542,
      "step": 20400
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0609219074249268,
      "learning_rate": 1.6826617256780998e-05,
      "loss": 2.8478,
      "step": 20401
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9432307481765747,
      "learning_rate": 1.6826316380722474e-05,
      "loss": 2.2795,
      "step": 20402
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.158624529838562,
      "learning_rate": 1.682601549309149e-05,
      "loss": 2.5752,
      "step": 20403
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0026413202285767,
      "learning_rate": 1.6825714593888554e-05,
      "loss": 2.25,
      "step": 20404
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9503651857376099,
      "learning_rate": 1.682541368311418e-05,
      "loss": 2.4981,
      "step": 20405
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.098945140838623,
      "learning_rate": 1.6825112760768878e-05,
      "loss": 2.2628,
      "step": 20406
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1580493450164795,
      "learning_rate": 1.6824811826853155e-05,
      "loss": 2.4973,
      "step": 20407
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1397700309753418,
      "learning_rate": 1.6824510881367518e-05,
      "loss": 2.4156,
      "step": 20408
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9832690358161926,
      "learning_rate": 1.6824209924312485e-05,
      "loss": 2.5083,
      "step": 20409
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0116654634475708,
      "learning_rate": 1.6823908955688564e-05,
      "loss": 2.3082,
      "step": 20410
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0626466274261475,
      "learning_rate": 1.682360797549626e-05,
      "loss": 2.6911,
      "step": 20411
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9854717254638672,
      "learning_rate": 1.682330698373609e-05,
      "loss": 2.3783,
      "step": 20412
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.3559201955795288,
      "learning_rate": 1.6823005980408563e-05,
      "loss": 2.545,
      "step": 20413
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1926487684249878,
      "learning_rate": 1.6822704965514186e-05,
      "loss": 2.1824,
      "step": 20414
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.3298487663269043,
      "learning_rate": 1.6822403939053473e-05,
      "loss": 2.3571,
      "step": 20415
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9957705140113831,
      "learning_rate": 1.682210290102693e-05,
      "loss": 2.4692,
      "step": 20416
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0121203660964966,
      "learning_rate": 1.682180185143507e-05,
      "loss": 2.5453,
      "step": 20417
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0544023513793945,
      "learning_rate": 1.68215007902784e-05,
      "loss": 2.6029,
      "step": 20418
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9157084822654724,
      "learning_rate": 1.682119971755744e-05,
      "loss": 2.4591,
      "step": 20419
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.086582899093628,
      "learning_rate": 1.6820898633272692e-05,
      "loss": 2.0846,
      "step": 20420
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1018673181533813,
      "learning_rate": 1.6820597537424666e-05,
      "loss": 2.5435,
      "step": 20421
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9442548155784607,
      "learning_rate": 1.6820296430013875e-05,
      "loss": 2.2838,
      "step": 20422
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0185261964797974,
      "learning_rate": 1.681999531104083e-05,
      "loss": 2.3633,
      "step": 20423
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0180166959762573,
      "learning_rate": 1.681969418050604e-05,
      "loss": 2.6235,
      "step": 20424
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0017937421798706,
      "learning_rate": 1.6819393038410018e-05,
      "loss": 2.4251,
      "step": 20425
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0763497352600098,
      "learning_rate": 1.681909188475327e-05,
      "loss": 2.515,
      "step": 20426
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9993857145309448,
      "learning_rate": 1.681879071953631e-05,
      "loss": 2.4123,
      "step": 20427
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1880970001220703,
      "learning_rate": 1.681848954275965e-05,
      "loss": 2.293,
      "step": 20428
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.10097074508667,
      "learning_rate": 1.6818188354423798e-05,
      "loss": 2.3399,
      "step": 20429
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9777666926383972,
      "learning_rate": 1.681788715452926e-05,
      "loss": 2.4865,
      "step": 20430
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0128326416015625,
      "learning_rate": 1.6817585943076555e-05,
      "loss": 2.4356,
      "step": 20431
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0917588472366333,
      "learning_rate": 1.6817284720066193e-05,
      "loss": 2.3555,
      "step": 20432
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0350481271743774,
      "learning_rate": 1.6816983485498677e-05,
      "loss": 2.4313,
      "step": 20433
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1345577239990234,
      "learning_rate": 1.6816682239374525e-05,
      "loss": 2.4946,
      "step": 20434
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.058653712272644,
      "learning_rate": 1.6816380981694244e-05,
      "loss": 2.5933,
      "step": 20435
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.130637764930725,
      "learning_rate": 1.6816079712458347e-05,
      "loss": 2.3378,
      "step": 20436
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0793275833129883,
      "learning_rate": 1.6815778431667345e-05,
      "loss": 2.5474,
      "step": 20437
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0391782522201538,
      "learning_rate": 1.6815477139321747e-05,
      "loss": 2.3701,
      "step": 20438
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0843009948730469,
      "learning_rate": 1.6815175835422063e-05,
      "loss": 2.449,
      "step": 20439
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0701689720153809,
      "learning_rate": 1.6814874519968803e-05,
      "loss": 2.4304,
      "step": 20440
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0871635675430298,
      "learning_rate": 1.6814573192962484e-05,
      "loss": 2.4789,
      "step": 20441
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1032458543777466,
      "learning_rate": 1.681427185440361e-05,
      "loss": 2.6222,
      "step": 20442
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.130738377571106,
      "learning_rate": 1.6813970504292693e-05,
      "loss": 2.2879,
      "step": 20443
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1295725107192993,
      "learning_rate": 1.6813669142630248e-05,
      "loss": 2.4143,
      "step": 20444
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9838663339614868,
      "learning_rate": 1.681336776941678e-05,
      "loss": 2.4567,
      "step": 20445
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.01084303855896,
      "learning_rate": 1.681306638465281e-05,
      "loss": 2.2041,
      "step": 20446
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.3060407638549805,
      "learning_rate": 1.6812764988338837e-05,
      "loss": 2.393,
      "step": 20447
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.361148715019226,
      "learning_rate": 1.6812463580475376e-05,
      "loss": 2.4789,
      "step": 20448
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.170864224433899,
      "learning_rate": 1.6812162161062943e-05,
      "loss": 2.3359,
      "step": 20449
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.038572072982788,
      "learning_rate": 1.6811860730102042e-05,
      "loss": 2.4293,
      "step": 20450
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9813218712806702,
      "learning_rate": 1.681155928759319e-05,
      "loss": 2.2874,
      "step": 20451
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9626728296279907,
      "learning_rate": 1.6811257833536892e-05,
      "loss": 2.491,
      "step": 20452
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0742753744125366,
      "learning_rate": 1.6810956367933662e-05,
      "loss": 2.7326,
      "step": 20453
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1703099012374878,
      "learning_rate": 1.6810654890784014e-05,
      "loss": 2.4009,
      "step": 20454
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0542190074920654,
      "learning_rate": 1.6810353402088453e-05,
      "loss": 2.4466,
      "step": 20455
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0673739910125732,
      "learning_rate": 1.681005190184749e-05,
      "loss": 2.5999,
      "step": 20456
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0835195779800415,
      "learning_rate": 1.6809750390061646e-05,
      "loss": 2.4415,
      "step": 20457
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0129992961883545,
      "learning_rate": 1.6809448866731424e-05,
      "loss": 2.6216,
      "step": 20458
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0516612529754639,
      "learning_rate": 1.6809147331857338e-05,
      "loss": 2.3069,
      "step": 20459
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9353633522987366,
      "learning_rate": 1.6808845785439897e-05,
      "loss": 2.399,
      "step": 20460
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0544867515563965,
      "learning_rate": 1.6808544227479613e-05,
      "loss": 2.6097,
      "step": 20461
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.016123652458191,
      "learning_rate": 1.6808242657976994e-05,
      "loss": 2.3647,
      "step": 20462
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1411532163619995,
      "learning_rate": 1.6807941076932558e-05,
      "loss": 2.4991,
      "step": 20463
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9917075037956238,
      "learning_rate": 1.680763948434681e-05,
      "loss": 2.5355,
      "step": 20464
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.049572229385376,
      "learning_rate": 1.680733788022027e-05,
      "loss": 2.4584,
      "step": 20465
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9851558804512024,
      "learning_rate": 1.680703626455344e-05,
      "loss": 2.5336,
      "step": 20466
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0407274961471558,
      "learning_rate": 1.6806734637346833e-05,
      "loss": 2.2837,
      "step": 20467
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9300239086151123,
      "learning_rate": 1.6806432998600965e-05,
      "loss": 2.5708,
      "step": 20468
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0512967109680176,
      "learning_rate": 1.6806131348316343e-05,
      "loss": 2.2974,
      "step": 20469
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.111498475074768,
      "learning_rate": 1.680582968649348e-05,
      "loss": 2.4548,
      "step": 20470
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.2325626611709595,
      "learning_rate": 1.6805528013132887e-05,
      "loss": 2.4424,
      "step": 20471
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.274810552597046,
      "learning_rate": 1.6805226328235073e-05,
      "loss": 2.4068,
      "step": 20472
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.103704571723938,
      "learning_rate": 1.680492463180056e-05,
      "loss": 2.4563,
      "step": 20473
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9612347483634949,
      "learning_rate": 1.6804622923829845e-05,
      "loss": 2.4037,
      "step": 20474
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9789583086967468,
      "learning_rate": 1.6804321204323448e-05,
      "loss": 2.3581,
      "step": 20475
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.4206136465072632,
      "learning_rate": 1.6804019473281878e-05,
      "loss": 2.4447,
      "step": 20476
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9979057312011719,
      "learning_rate": 1.6803717730705643e-05,
      "loss": 2.4916,
      "step": 20477
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.285251498222351,
      "learning_rate": 1.6803415976595263e-05,
      "loss": 2.575,
      "step": 20478
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1153241395950317,
      "learning_rate": 1.6803114210951248e-05,
      "loss": 2.4336,
      "step": 20479
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0477581024169922,
      "learning_rate": 1.6802812433774104e-05,
      "loss": 2.4875,
      "step": 20480
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1468372344970703,
      "learning_rate": 1.6802510645064342e-05,
      "loss": 2.4534,
      "step": 20481
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.102835774421692,
      "learning_rate": 1.680220884482248e-05,
      "loss": 2.5618,
      "step": 20482
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.8793156147003174,
      "learning_rate": 1.6801907033049025e-05,
      "loss": 2.4769,
      "step": 20483
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0498303174972534,
      "learning_rate": 1.6801605209744493e-05,
      "loss": 2.5727,
      "step": 20484
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9791820049285889,
      "learning_rate": 1.6801303374909393e-05,
      "loss": 2.386,
      "step": 20485
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.105729103088379,
      "learning_rate": 1.6801001528544232e-05,
      "loss": 2.5085,
      "step": 20486
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.009493112564087,
      "learning_rate": 1.680069967064953e-05,
      "loss": 2.4753,
      "step": 20487
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0758050680160522,
      "learning_rate": 1.6800397801225796e-05,
      "loss": 2.5025,
      "step": 20488
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.008723258972168,
      "learning_rate": 1.680009592027354e-05,
      "loss": 2.6596,
      "step": 20489
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0002148151397705,
      "learning_rate": 1.6799794027793274e-05,
      "loss": 2.443,
      "step": 20490
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.989169180393219,
      "learning_rate": 1.679949212378551e-05,
      "loss": 2.4569,
      "step": 20491
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1728789806365967,
      "learning_rate": 1.679919020825076e-05,
      "loss": 2.1172,
      "step": 20492
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.052878499031067,
      "learning_rate": 1.6798888281189538e-05,
      "loss": 2.4313,
      "step": 20493
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0012365579605103,
      "learning_rate": 1.679858634260235e-05,
      "loss": 2.3627,
      "step": 20494
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0061819553375244,
      "learning_rate": 1.6798284392489712e-05,
      "loss": 2.5798,
      "step": 20495
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0634005069732666,
      "learning_rate": 1.6797982430852136e-05,
      "loss": 2.555,
      "step": 20496
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0385527610778809,
      "learning_rate": 1.6797680457690132e-05,
      "loss": 2.4462,
      "step": 20497
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.209336519241333,
      "learning_rate": 1.679737847300422e-05,
      "loss": 2.6767,
      "step": 20498
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0615078210830688,
      "learning_rate": 1.6797076476794903e-05,
      "loss": 2.4548,
      "step": 20499
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9521887302398682,
      "learning_rate": 1.6796774469062693e-05,
      "loss": 2.4292,
      "step": 20500
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0951118469238281,
      "learning_rate": 1.6796472449808103e-05,
      "loss": 2.7082,
      "step": 20501
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0225554704666138,
      "learning_rate": 1.679617041903165e-05,
      "loss": 2.3967,
      "step": 20502
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.3561931848526,
      "learning_rate": 1.679586837673384e-05,
      "loss": 2.5083,
      "step": 20503
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9453416466712952,
      "learning_rate": 1.6795566322915186e-05,
      "loss": 2.4747,
      "step": 20504
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.060867428779602,
      "learning_rate": 1.6795264257576204e-05,
      "loss": 2.4519,
      "step": 20505
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1451247930526733,
      "learning_rate": 1.6794962180717404e-05,
      "loss": 2.5001,
      "step": 20506
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0028557777404785,
      "learning_rate": 1.6794660092339294e-05,
      "loss": 2.5348,
      "step": 20507
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0695738792419434,
      "learning_rate": 1.6794357992442394e-05,
      "loss": 2.372,
      "step": 20508
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9676730632781982,
      "learning_rate": 1.6794055881027208e-05,
      "loss": 2.6939,
      "step": 20509
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0799100399017334,
      "learning_rate": 1.6793753758094257e-05,
      "loss": 2.3901,
      "step": 20510
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.054695963859558,
      "learning_rate": 1.6793451623644044e-05,
      "loss": 2.335,
      "step": 20511
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.043318748474121,
      "learning_rate": 1.6793149477677086e-05,
      "loss": 2.3827,
      "step": 20512
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.061394214630127,
      "learning_rate": 1.6792847320193895e-05,
      "loss": 2.489,
      "step": 20513
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9969852566719055,
      "learning_rate": 1.6792545151194987e-05,
      "loss": 2.3736,
      "step": 20514
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.043308973312378,
      "learning_rate": 1.6792242970680862e-05,
      "loss": 2.5126,
      "step": 20515
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0769044160842896,
      "learning_rate": 1.6791940778652046e-05,
      "loss": 2.3569,
      "step": 20516
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0254600048065186,
      "learning_rate": 1.6791638575109046e-05,
      "loss": 2.267,
      "step": 20517
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0279037952423096,
      "learning_rate": 1.6791336360052372e-05,
      "loss": 2.4885,
      "step": 20518
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.003448486328125,
      "learning_rate": 1.679103413348254e-05,
      "loss": 2.32,
      "step": 20519
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0892130136489868,
      "learning_rate": 1.679073189540006e-05,
      "loss": 2.3451,
      "step": 20520
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0637074708938599,
      "learning_rate": 1.6790429645805445e-05,
      "loss": 2.4609,
      "step": 20521
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.994841456413269,
      "learning_rate": 1.6790127384699208e-05,
      "loss": 2.4442,
      "step": 20522
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.127539038658142,
      "learning_rate": 1.6789825112081857e-05,
      "loss": 2.4154,
      "step": 20523
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0555561780929565,
      "learning_rate": 1.678952282795391e-05,
      "loss": 2.4541,
      "step": 20524
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1461323499679565,
      "learning_rate": 1.678922053231588e-05,
      "loss": 2.4986,
      "step": 20525
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1042864322662354,
      "learning_rate": 1.6788918225168276e-05,
      "loss": 2.3427,
      "step": 20526
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1242161989212036,
      "learning_rate": 1.6788615906511613e-05,
      "loss": 2.4465,
      "step": 20527
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1456291675567627,
      "learning_rate": 1.6788313576346403e-05,
      "loss": 2.7063,
      "step": 20528
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1154948472976685,
      "learning_rate": 1.6788011234673157e-05,
      "loss": 2.5323,
      "step": 20529
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0144224166870117,
      "learning_rate": 1.678770888149239e-05,
      "loss": 2.6454,
      "step": 20530
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1260432004928589,
      "learning_rate": 1.6787406516804608e-05,
      "loss": 2.5651,
      "step": 20531
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1707793474197388,
      "learning_rate": 1.6787104140610334e-05,
      "loss": 2.5742,
      "step": 20532
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.073479175567627,
      "learning_rate": 1.6786801752910073e-05,
      "loss": 2.4759,
      "step": 20533
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.166131615638733,
      "learning_rate": 1.6786499353704336e-05,
      "loss": 2.4273,
      "step": 20534
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9437723755836487,
      "learning_rate": 1.6786196942993643e-05,
      "loss": 2.5293,
      "step": 20535
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9613697528839111,
      "learning_rate": 1.67858945207785e-05,
      "loss": 2.5433,
      "step": 20536
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0170376300811768,
      "learning_rate": 1.6785592087059427e-05,
      "loss": 2.2873,
      "step": 20537
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.2268532514572144,
      "learning_rate": 1.6785289641836932e-05,
      "loss": 2.6052,
      "step": 20538
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0420904159545898,
      "learning_rate": 1.6784987185111526e-05,
      "loss": 2.5083,
      "step": 20539
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.09946608543396,
      "learning_rate": 1.6784684716883724e-05,
      "loss": 2.7116,
      "step": 20540
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1827470064163208,
      "learning_rate": 1.678438223715404e-05,
      "loss": 2.7924,
      "step": 20541
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0430570840835571,
      "learning_rate": 1.6784079745922987e-05,
      "loss": 2.431,
      "step": 20542
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9980714917182922,
      "learning_rate": 1.6783777243191074e-05,
      "loss": 2.6461,
      "step": 20543
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0786126852035522,
      "learning_rate": 1.6783474728958816e-05,
      "loss": 2.2586,
      "step": 20544
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0650990009307861,
      "learning_rate": 1.6783172203226727e-05,
      "loss": 2.4271,
      "step": 20545
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0106267929077148,
      "learning_rate": 1.678286966599532e-05,
      "loss": 2.4711,
      "step": 20546
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0347124338150024,
      "learning_rate": 1.6782567117265105e-05,
      "loss": 2.4442,
      "step": 20547
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0660992860794067,
      "learning_rate": 1.6782264557036593e-05,
      "loss": 2.6656,
      "step": 20548
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0053333044052124,
      "learning_rate": 1.6781961985310306e-05,
      "loss": 2.465,
      "step": 20549
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.2536791563034058,
      "learning_rate": 1.678165940208675e-05,
      "loss": 2.4496,
      "step": 20550
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0097568035125732,
      "learning_rate": 1.678135680736644e-05,
      "loss": 2.5715,
      "step": 20551
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.7981632947921753,
      "learning_rate": 1.6781054201149886e-05,
      "loss": 2.413,
      "step": 20552
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1748175621032715,
      "learning_rate": 1.6780751583437608e-05,
      "loss": 2.6039,
      "step": 20553
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0640207529067993,
      "learning_rate": 1.6780448954230107e-05,
      "loss": 2.259,
      "step": 20554
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0992406606674194,
      "learning_rate": 1.678014631352791e-05,
      "loss": 2.4623,
      "step": 20555
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0666007995605469,
      "learning_rate": 1.677984366133152e-05,
      "loss": 2.0559,
      "step": 20556
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9581518173217773,
      "learning_rate": 1.6779540997641457e-05,
      "loss": 2.2448,
      "step": 20557
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9114850163459778,
      "learning_rate": 1.677923832245823e-05,
      "loss": 2.3509,
      "step": 20558
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9652993083000183,
      "learning_rate": 1.677893563578235e-05,
      "loss": 2.5071,
      "step": 20559
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1400443315505981,
      "learning_rate": 1.6778632937614335e-05,
      "loss": 2.5978,
      "step": 20560
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9804372191429138,
      "learning_rate": 1.6778330227954696e-05,
      "loss": 2.2335,
      "step": 20561
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9546053409576416,
      "learning_rate": 1.6778027506803948e-05,
      "loss": 2.3663,
      "step": 20562
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.20782470703125,
      "learning_rate": 1.6777724774162597e-05,
      "loss": 2.2176,
      "step": 20563
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.342592477798462,
      "learning_rate": 1.6777422030031167e-05,
      "loss": 2.3739,
      "step": 20564
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9625648856163025,
      "learning_rate": 1.6777119274410165e-05,
      "loss": 2.3763,
      "step": 20565
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9538732767105103,
      "learning_rate": 1.6776816507300103e-05,
      "loss": 2.5335,
      "step": 20566
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.083768606185913,
      "learning_rate": 1.67765137287015e-05,
      "loss": 2.5344,
      "step": 20567
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.053162693977356,
      "learning_rate": 1.6776210938614862e-05,
      "loss": 2.4495,
      "step": 20568
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.2069636583328247,
      "learning_rate": 1.6775908137040707e-05,
      "loss": 2.1463,
      "step": 20569
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9987471103668213,
      "learning_rate": 1.6775605323979548e-05,
      "loss": 2.4267,
      "step": 20570
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1213198900222778,
      "learning_rate": 1.6775302499431897e-05,
      "loss": 2.4393,
      "step": 20571
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1388170719146729,
      "learning_rate": 1.677499966339827e-05,
      "loss": 2.4932,
      "step": 20572
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0346852540969849,
      "learning_rate": 1.6774696815879177e-05,
      "loss": 2.3885,
      "step": 20573
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9580057263374329,
      "learning_rate": 1.6774393956875134e-05,
      "loss": 2.5839,
      "step": 20574
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9844440221786499,
      "learning_rate": 1.6774091086386652e-05,
      "loss": 2.3776,
      "step": 20575
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0776240825653076,
      "learning_rate": 1.6773788204414247e-05,
      "loss": 2.6218,
      "step": 20576
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0613800287246704,
      "learning_rate": 1.677348531095843e-05,
      "loss": 2.6184,
      "step": 20577
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.147631287574768,
      "learning_rate": 1.6773182406019715e-05,
      "loss": 2.2501,
      "step": 20578
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.970589816570282,
      "learning_rate": 1.677287948959862e-05,
      "loss": 2.6219,
      "step": 20579
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0495885610580444,
      "learning_rate": 1.677257656169565e-05,
      "loss": 2.3825,
      "step": 20580
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.3102359771728516,
      "learning_rate": 1.6772273622311327e-05,
      "loss": 2.388,
      "step": 20581
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.4826960563659668,
      "learning_rate": 1.6771970671446157e-05,
      "loss": 2.3668,
      "step": 20582
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.014308214187622,
      "learning_rate": 1.6771667709100663e-05,
      "loss": 2.48,
      "step": 20583
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0350900888442993,
      "learning_rate": 1.677136473527535e-05,
      "loss": 2.4437,
      "step": 20584
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.168624997138977,
      "learning_rate": 1.6771061749970736e-05,
      "loss": 2.524,
      "step": 20585
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0471221208572388,
      "learning_rate": 1.6770758753187332e-05,
      "loss": 2.5203,
      "step": 20586
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0208593606948853,
      "learning_rate": 1.6770455744925655e-05,
      "loss": 2.4055,
      "step": 20587
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0873674154281616,
      "learning_rate": 1.6770152725186214e-05,
      "loss": 2.5278,
      "step": 20588
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.093393325805664,
      "learning_rate": 1.6769849693969524e-05,
      "loss": 2.3916,
      "step": 20589
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0223281383514404,
      "learning_rate": 1.6769546651276103e-05,
      "loss": 2.4366,
      "step": 20590
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9534496068954468,
      "learning_rate": 1.6769243597106464e-05,
      "loss": 2.478,
      "step": 20591
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9573884606361389,
      "learning_rate": 1.6768940531461117e-05,
      "loss": 2.2505,
      "step": 20592
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9942792057991028,
      "learning_rate": 1.6768637454340574e-05,
      "loss": 2.2009,
      "step": 20593
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0140759944915771,
      "learning_rate": 1.6768334365745353e-05,
      "loss": 2.3902,
      "step": 20594
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1504623889923096,
      "learning_rate": 1.6768031265675967e-05,
      "loss": 2.4463,
      "step": 20595
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1544790267944336,
      "learning_rate": 1.676772815413293e-05,
      "loss": 2.6109,
      "step": 20596
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.041768193244934,
      "learning_rate": 1.6767425031116758e-05,
      "loss": 2.3484,
      "step": 20597
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0071537494659424,
      "learning_rate": 1.6767121896627965e-05,
      "loss": 2.2542,
      "step": 20598
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.956360399723053,
      "learning_rate": 1.676681875066706e-05,
      "loss": 2.4869,
      "step": 20599
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0289344787597656,
      "learning_rate": 1.6766515593234555e-05,
      "loss": 2.4391,
      "step": 20600
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1277775764465332,
      "learning_rate": 1.6766212424330972e-05,
      "loss": 2.3305,
      "step": 20601
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1742905378341675,
      "learning_rate": 1.6765909243956818e-05,
      "loss": 2.3387,
      "step": 20602
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0477560758590698,
      "learning_rate": 1.6765606052112615e-05,
      "loss": 2.5694,
      "step": 20603
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.2512547969818115,
      "learning_rate": 1.676530284879887e-05,
      "loss": 2.6541,
      "step": 20604
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.3400474786758423,
      "learning_rate": 1.67649996340161e-05,
      "loss": 2.494,
      "step": 20605
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.075015902519226,
      "learning_rate": 1.6764696407764817e-05,
      "loss": 2.2605,
      "step": 20606
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9704580903053284,
      "learning_rate": 1.676439317004554e-05,
      "loss": 2.3714,
      "step": 20607
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0149394273757935,
      "learning_rate": 1.6764089920858775e-05,
      "loss": 2.6288,
      "step": 20608
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0044503211975098,
      "learning_rate": 1.6763786660205044e-05,
      "loss": 2.5405,
      "step": 20609
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0948736667633057,
      "learning_rate": 1.6763483388084855e-05,
      "loss": 2.5778,
      "step": 20610
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0233620405197144,
      "learning_rate": 1.6763180104498723e-05,
      "loss": 2.4802,
      "step": 20611
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.2573200464248657,
      "learning_rate": 1.6762876809447167e-05,
      "loss": 2.5321,
      "step": 20612
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1205259561538696,
      "learning_rate": 1.67625735029307e-05,
      "loss": 2.4254,
      "step": 20613
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9761547446250916,
      "learning_rate": 1.676227018494983e-05,
      "loss": 2.2527,
      "step": 20614
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0084980726242065,
      "learning_rate": 1.6761966855505076e-05,
      "loss": 2.72,
      "step": 20615
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0325859785079956,
      "learning_rate": 1.6761663514596952e-05,
      "loss": 2.648,
      "step": 20616
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0745151042938232,
      "learning_rate": 1.6761360162225972e-05,
      "loss": 2.5154,
      "step": 20617
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0458511114120483,
      "learning_rate": 1.6761056798392654e-05,
      "loss": 2.677,
      "step": 20618
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.106036901473999,
      "learning_rate": 1.6760753423097506e-05,
      "loss": 2.388,
      "step": 20619
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.297278642654419,
      "learning_rate": 1.676045003634104e-05,
      "loss": 2.2141,
      "step": 20620
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0904086828231812,
      "learning_rate": 1.676014663812378e-05,
      "loss": 2.5442,
      "step": 20621
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0573656558990479,
      "learning_rate": 1.6759843228446232e-05,
      "loss": 2.57,
      "step": 20622
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.971686065196991,
      "learning_rate": 1.675953980730892e-05,
      "loss": 2.2809,
      "step": 20623
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1096844673156738,
      "learning_rate": 1.6759236374712345e-05,
      "loss": 2.4499,
      "step": 20624
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.100963830947876,
      "learning_rate": 1.675893293065703e-05,
      "loss": 2.6869,
      "step": 20625
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.014233112335205,
      "learning_rate": 1.6758629475143493e-05,
      "loss": 2.5042,
      "step": 20626
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9871883988380432,
      "learning_rate": 1.675832600817224e-05,
      "loss": 2.6284,
      "step": 20627
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9883226752281189,
      "learning_rate": 1.6758022529743788e-05,
      "loss": 2.6456,
      "step": 20628
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0050995349884033,
      "learning_rate": 1.6757719039858656e-05,
      "loss": 2.2601,
      "step": 20629
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1298284530639648,
      "learning_rate": 1.6757415538517352e-05,
      "loss": 2.4022,
      "step": 20630
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9483985304832458,
      "learning_rate": 1.6757112025720395e-05,
      "loss": 2.3301,
      "step": 20631
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0681596994400024,
      "learning_rate": 1.6756808501468293e-05,
      "loss": 2.5992,
      "step": 20632
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9962413907051086,
      "learning_rate": 1.6756504965761575e-05,
      "loss": 2.4933,
      "step": 20633
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.031766414642334,
      "learning_rate": 1.6756201418600735e-05,
      "loss": 2.2655,
      "step": 20634
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1239464282989502,
      "learning_rate": 1.6755897859986305e-05,
      "loss": 2.443,
      "step": 20635
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1641569137573242,
      "learning_rate": 1.6755594289918794e-05,
      "loss": 2.2961,
      "step": 20636
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.209571123123169,
      "learning_rate": 1.6755290708398714e-05,
      "loss": 2.3109,
      "step": 20637
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0367822647094727,
      "learning_rate": 1.6754987115426578e-05,
      "loss": 2.2963,
      "step": 20638
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1068450212478638,
      "learning_rate": 1.675468351100291e-05,
      "loss": 2.401,
      "step": 20639
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.069258451461792,
      "learning_rate": 1.6754379895128216e-05,
      "loss": 2.5573,
      "step": 20640
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9906785488128662,
      "learning_rate": 1.6754076267803016e-05,
      "loss": 2.3499,
      "step": 20641
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9328372478485107,
      "learning_rate": 1.6753772629027818e-05,
      "loss": 2.4646,
      "step": 20642
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.122695803642273,
      "learning_rate": 1.6753468978803146e-05,
      "loss": 2.4226,
      "step": 20643
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0158289670944214,
      "learning_rate": 1.6753165317129507e-05,
      "loss": 2.5773,
      "step": 20644
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9812787175178528,
      "learning_rate": 1.675286164400742e-05,
      "loss": 2.3142,
      "step": 20645
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9993320107460022,
      "learning_rate": 1.6752557959437396e-05,
      "loss": 2.2294,
      "step": 20646
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.2586966753005981,
      "learning_rate": 1.6752254263419955e-05,
      "loss": 2.3572,
      "step": 20647
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9911119937896729,
      "learning_rate": 1.6751950555955606e-05,
      "loss": 2.3102,
      "step": 20648
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1849192380905151,
      "learning_rate": 1.6751646837044867e-05,
      "loss": 2.4772,
      "step": 20649
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0267753601074219,
      "learning_rate": 1.675134310668826e-05,
      "loss": 2.4152,
      "step": 20650
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.029703974723816,
      "learning_rate": 1.6751039364886286e-05,
      "loss": 2.3877,
      "step": 20651
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0491224527359009,
      "learning_rate": 1.6750735611639467e-05,
      "loss": 2.4689,
      "step": 20652
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1345574855804443,
      "learning_rate": 1.6750431846948318e-05,
      "loss": 2.3243,
      "step": 20653
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0435870885849,
      "learning_rate": 1.6750128070813354e-05,
      "loss": 2.5721,
      "step": 20654
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0552886724472046,
      "learning_rate": 1.674982428323509e-05,
      "loss": 2.4474,
      "step": 20655
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.078864574432373,
      "learning_rate": 1.6749520484214042e-05,
      "loss": 2.5455,
      "step": 20656
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1177037954330444,
      "learning_rate": 1.6749216673750724e-05,
      "loss": 2.4546,
      "step": 20657
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9832263588905334,
      "learning_rate": 1.674891285184565e-05,
      "loss": 2.1583,
      "step": 20658
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1313717365264893,
      "learning_rate": 1.6748609018499334e-05,
      "loss": 2.3898,
      "step": 20659
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1099445819854736,
      "learning_rate": 1.6748305173712292e-05,
      "loss": 2.2135,
      "step": 20660
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0280401706695557,
      "learning_rate": 1.6748001317485044e-05,
      "loss": 2.485,
      "step": 20661
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0138123035430908,
      "learning_rate": 1.6747697449818098e-05,
      "loss": 2.4943,
      "step": 20662
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9837760329246521,
      "learning_rate": 1.6747393570711975e-05,
      "loss": 2.3763,
      "step": 20663
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0219382047653198,
      "learning_rate": 1.6747089680167183e-05,
      "loss": 2.4966,
      "step": 20664
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0970759391784668,
      "learning_rate": 1.6746785778184245e-05,
      "loss": 2.7314,
      "step": 20665
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0657258033752441,
      "learning_rate": 1.6746481864763672e-05,
      "loss": 2.4877,
      "step": 20666
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.034260869026184,
      "learning_rate": 1.674617793990598e-05,
      "loss": 2.4717,
      "step": 20667
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0769290924072266,
      "learning_rate": 1.674587400361168e-05,
      "loss": 2.4997,
      "step": 20668
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.081681728363037,
      "learning_rate": 1.6745570055881293e-05,
      "loss": 2.3796,
      "step": 20669
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.0436484813690186,
      "learning_rate": 1.6745266096715337e-05,
      "loss": 2.6171,
      "step": 20670
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.013816237449646,
      "learning_rate": 1.674496212611432e-05,
      "loss": 2.2898,
      "step": 20671
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.957231342792511,
      "learning_rate": 1.6744658144078764e-05,
      "loss": 2.3452,
      "step": 20672
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9689329862594604,
      "learning_rate": 1.6744354150609176e-05,
      "loss": 2.5603,
      "step": 20673
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0935229063034058,
      "learning_rate": 1.6744050145706077e-05,
      "loss": 2.3651,
      "step": 20674
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9770079255104065,
      "learning_rate": 1.674374612936998e-05,
      "loss": 2.4397,
      "step": 20675
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0637109279632568,
      "learning_rate": 1.6743442101601407e-05,
      "loss": 2.5653,
      "step": 20676
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.938400149345398,
      "learning_rate": 1.6743138062400863e-05,
      "loss": 2.3518,
      "step": 20677
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.066892385482788,
      "learning_rate": 1.674283401176887e-05,
      "loss": 2.3049,
      "step": 20678
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.2041455507278442,
      "learning_rate": 1.6742529949705945e-05,
      "loss": 2.4657,
      "step": 20679
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1036738157272339,
      "learning_rate": 1.6742225876212597e-05,
      "loss": 2.3396,
      "step": 20680
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0523505210876465,
      "learning_rate": 1.6741921791289344e-05,
      "loss": 2.5407,
      "step": 20681
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.234415054321289,
      "learning_rate": 1.6741617694936705e-05,
      "loss": 2.4911,
      "step": 20682
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.059378981590271,
      "learning_rate": 1.6741313587155193e-05,
      "loss": 2.3562,
      "step": 20683
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.035888433456421,
      "learning_rate": 1.6741009467945323e-05,
      "loss": 2.5075,
      "step": 20684
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0122469663619995,
      "learning_rate": 1.674070533730761e-05,
      "loss": 2.4953,
      "step": 20685
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0092905759811401,
      "learning_rate": 1.674040119524257e-05,
      "loss": 2.367,
      "step": 20686
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0531182289123535,
      "learning_rate": 1.674009704175072e-05,
      "loss": 2.3147,
      "step": 20687
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1084221601486206,
      "learning_rate": 1.6739792876832576e-05,
      "loss": 2.5918,
      "step": 20688
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0062179565429688,
      "learning_rate": 1.6739488700488656e-05,
      "loss": 2.3213,
      "step": 20689
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.032962679862976,
      "learning_rate": 1.6739184512719465e-05,
      "loss": 2.5616,
      "step": 20690
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0750468969345093,
      "learning_rate": 1.673888031352553e-05,
      "loss": 2.5483,
      "step": 20691
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0151621103286743,
      "learning_rate": 1.6738576102907365e-05,
      "loss": 2.6039,
      "step": 20692
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0748955011367798,
      "learning_rate": 1.673827188086548e-05,
      "loss": 2.4046,
      "step": 20693
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1258684396743774,
      "learning_rate": 1.673796764740039e-05,
      "loss": 2.2154,
      "step": 20694
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0673115253448486,
      "learning_rate": 1.6737663402512624e-05,
      "loss": 2.3994,
      "step": 20695
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0930043458938599,
      "learning_rate": 1.6737359146202682e-05,
      "loss": 2.4463,
      "step": 20696
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0380672216415405,
      "learning_rate": 1.673705487847109e-05,
      "loss": 2.4374,
      "step": 20697
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0995689630508423,
      "learning_rate": 1.673675059931836e-05,
      "loss": 2.3636,
      "step": 20698
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.058086633682251,
      "learning_rate": 1.6736446308745004e-05,
      "loss": 2.353,
      "step": 20699
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.230216383934021,
      "learning_rate": 1.6736142006751546e-05,
      "loss": 2.4412,
      "step": 20700
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9904165267944336,
      "learning_rate": 1.6735837693338495e-05,
      "loss": 2.298,
      "step": 20701
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9966022968292236,
      "learning_rate": 1.6735533368506372e-05,
      "loss": 2.3022,
      "step": 20702
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.984052836894989,
      "learning_rate": 1.673522903225569e-05,
      "loss": 2.5006,
      "step": 20703
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0567556619644165,
      "learning_rate": 1.6734924684586963e-05,
      "loss": 2.5946,
      "step": 20704
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0913571119308472,
      "learning_rate": 1.6734620325500713e-05,
      "loss": 2.4141,
      "step": 20705
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.439378261566162,
      "learning_rate": 1.673431595499745e-05,
      "loss": 2.5928,
      "step": 20706
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9949067831039429,
      "learning_rate": 1.6734011573077694e-05,
      "loss": 2.5832,
      "step": 20707
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0641392469406128,
      "learning_rate": 1.673370717974196e-05,
      "loss": 2.676,
      "step": 20708
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9560983180999756,
      "learning_rate": 1.6733402774990762e-05,
      "loss": 2.385,
      "step": 20709
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1950453519821167,
      "learning_rate": 1.673309835882462e-05,
      "loss": 2.551,
      "step": 20710
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0397263765335083,
      "learning_rate": 1.6732793931244046e-05,
      "loss": 2.2353,
      "step": 20711
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0201104879379272,
      "learning_rate": 1.6732489492249558e-05,
      "loss": 2.4842,
      "step": 20712
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0165350437164307,
      "learning_rate": 1.673218504184167e-05,
      "loss": 2.6597,
      "step": 20713
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.957262396812439,
      "learning_rate": 1.67318805800209e-05,
      "loss": 2.3366,
      "step": 20714
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0847724676132202,
      "learning_rate": 1.6731576106787763e-05,
      "loss": 2.3577,
      "step": 20715
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1297366619110107,
      "learning_rate": 1.673127162214278e-05,
      "loss": 2.6763,
      "step": 20716
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.049733281135559,
      "learning_rate": 1.673096712608646e-05,
      "loss": 2.5293,
      "step": 20717
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9636937975883484,
      "learning_rate": 1.6730662618619327e-05,
      "loss": 2.2922,
      "step": 20718
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9468119144439697,
      "learning_rate": 1.6730358099741887e-05,
      "loss": 2.3833,
      "step": 20719
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1804678440093994,
      "learning_rate": 1.673005356945467e-05,
      "loss": 2.4825,
      "step": 20720
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0040136575698853,
      "learning_rate": 1.672974902775818e-05,
      "loss": 2.4102,
      "step": 20721
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9800973534584045,
      "learning_rate": 1.6729444474652937e-05,
      "loss": 2.6634,
      "step": 20722
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0057868957519531,
      "learning_rate": 1.6729139910139455e-05,
      "loss": 2.3481,
      "step": 20723
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.937482476234436,
      "learning_rate": 1.672883533421826e-05,
      "loss": 2.4937,
      "step": 20724
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0665504932403564,
      "learning_rate": 1.6728530746889858e-05,
      "loss": 2.4549,
      "step": 20725
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0988579988479614,
      "learning_rate": 1.6728226148154765e-05,
      "loss": 2.5426,
      "step": 20726
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.183945894241333,
      "learning_rate": 1.672792153801351e-05,
      "loss": 2.5407,
      "step": 20727
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0172585248947144,
      "learning_rate": 1.6727616916466592e-05,
      "loss": 2.3814,
      "step": 20728
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9235591292381287,
      "learning_rate": 1.672731228351454e-05,
      "loss": 2.4173,
      "step": 20729
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9596076607704163,
      "learning_rate": 1.6727007639157865e-05,
      "loss": 2.5099,
      "step": 20730
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0756022930145264,
      "learning_rate": 1.672670298339709e-05,
      "loss": 2.6781,
      "step": 20731
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1563260555267334,
      "learning_rate": 1.672639831623272e-05,
      "loss": 2.404,
      "step": 20732
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9582912921905518,
      "learning_rate": 1.672609363766528e-05,
      "loss": 2.4247,
      "step": 20733
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1162481307983398,
      "learning_rate": 1.672578894769529e-05,
      "loss": 2.5083,
      "step": 20734
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.116249918937683,
      "learning_rate": 1.6725484246323257e-05,
      "loss": 2.4396,
      "step": 20735
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0208345651626587,
      "learning_rate": 1.67251795335497e-05,
      "loss": 2.4875,
      "step": 20736
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0992631912231445,
      "learning_rate": 1.672487480937514e-05,
      "loss": 2.4604,
      "step": 20737
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0720733404159546,
      "learning_rate": 1.6724570073800087e-05,
      "loss": 2.5684,
      "step": 20738
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9584482908248901,
      "learning_rate": 1.6724265326825063e-05,
      "loss": 2.8513,
      "step": 20739
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0306872129440308,
      "learning_rate": 1.6723960568450586e-05,
      "loss": 2.3303,
      "step": 20740
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9776647090911865,
      "learning_rate": 1.6723655798677165e-05,
      "loss": 2.2709,
      "step": 20741
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.164586067199707,
      "learning_rate": 1.6723351017505324e-05,
      "loss": 2.5765,
      "step": 20742
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0069808959960938,
      "learning_rate": 1.6723046224935577e-05,
      "loss": 2.5514,
      "step": 20743
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1310762166976929,
      "learning_rate": 1.672274142096844e-05,
      "loss": 2.5063,
      "step": 20744
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1365931034088135,
      "learning_rate": 1.672243660560443e-05,
      "loss": 2.2496,
      "step": 20745
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0165444612503052,
      "learning_rate": 1.6722131778844066e-05,
      "loss": 2.4687,
      "step": 20746
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.991619348526001,
      "learning_rate": 1.672182694068786e-05,
      "loss": 2.6768,
      "step": 20747
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0305722951889038,
      "learning_rate": 1.6721522091136333e-05,
      "loss": 2.5092,
      "step": 20748
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0502287149429321,
      "learning_rate": 1.672121723019e-05,
      "loss": 2.4332,
      "step": 20749
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0363707542419434,
      "learning_rate": 1.672091235784938e-05,
      "loss": 2.4156,
      "step": 20750
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0346072912216187,
      "learning_rate": 1.672060747411499e-05,
      "loss": 2.265,
      "step": 20751
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9119462966918945,
      "learning_rate": 1.672030257898734e-05,
      "loss": 2.3281,
      "step": 20752
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1231498718261719,
      "learning_rate": 1.6719997672466956e-05,
      "loss": 2.253,
      "step": 20753
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.2635304927825928,
      "learning_rate": 1.6719692754554347e-05,
      "loss": 2.6643,
      "step": 20754
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0273115634918213,
      "learning_rate": 1.671938782525004e-05,
      "loss": 2.4535,
      "step": 20755
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9732064008712769,
      "learning_rate": 1.671908288455454e-05,
      "loss": 2.4841,
      "step": 20756
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.088244915008545,
      "learning_rate": 1.671877793246837e-05,
      "loss": 2.375,
      "step": 20757
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1439985036849976,
      "learning_rate": 1.6718472968992047e-05,
      "loss": 2.5796,
      "step": 20758
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9414029717445374,
      "learning_rate": 1.671816799412609e-05,
      "loss": 2.3068,
      "step": 20759
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1315299272537231,
      "learning_rate": 1.671786300787101e-05,
      "loss": 2.3751,
      "step": 20760
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.129254937171936,
      "learning_rate": 1.671755801022733e-05,
      "loss": 2.6513,
      "step": 20761
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0880508422851562,
      "learning_rate": 1.6717253001195565e-05,
      "loss": 2.5415,
      "step": 20762
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0962857007980347,
      "learning_rate": 1.6716947980776232e-05,
      "loss": 2.6496,
      "step": 20763
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.06198251247406,
      "learning_rate": 1.6716642948969846e-05,
      "loss": 2.5215,
      "step": 20764
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0725575685501099,
      "learning_rate": 1.6716337905776928e-05,
      "loss": 2.3536,
      "step": 20765
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.001057744026184,
      "learning_rate": 1.6716032851197993e-05,
      "loss": 2.2746,
      "step": 20766
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9416478872299194,
      "learning_rate": 1.6715727785233556e-05,
      "loss": 2.4883,
      "step": 20767
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0398063659667969,
      "learning_rate": 1.6715422707884136e-05,
      "loss": 2.5817,
      "step": 20768
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9988871812820435,
      "learning_rate": 1.6715117619150258e-05,
      "loss": 2.5923,
      "step": 20769
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0227113962173462,
      "learning_rate": 1.6714812519032423e-05,
      "loss": 2.3135,
      "step": 20770
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0090609788894653,
      "learning_rate": 1.671450740753116e-05,
      "loss": 2.4611,
      "step": 20771
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9730395674705505,
      "learning_rate": 1.6714202284646983e-05,
      "loss": 2.5069,
      "step": 20772
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9908173680305481,
      "learning_rate": 1.671389715038041e-05,
      "loss": 2.3593,
      "step": 20773
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9946169257164001,
      "learning_rate": 1.6713592004731956e-05,
      "loss": 2.4163,
      "step": 20774
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9691417217254639,
      "learning_rate": 1.6713286847702147e-05,
      "loss": 2.4497,
      "step": 20775
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.988133430480957,
      "learning_rate": 1.6712981679291488e-05,
      "loss": 2.668,
      "step": 20776
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0733190774917603,
      "learning_rate": 1.67126764995005e-05,
      "loss": 2.4879,
      "step": 20777
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0311557054519653,
      "learning_rate": 1.6712371308329707e-05,
      "loss": 2.324,
      "step": 20778
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0945497751235962,
      "learning_rate": 1.671206610577962e-05,
      "loss": 2.4838,
      "step": 20779
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0332624912261963,
      "learning_rate": 1.6711760891850757e-05,
      "loss": 2.4361,
      "step": 20780
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0460838079452515,
      "learning_rate": 1.6711455666543638e-05,
      "loss": 2.4447,
      "step": 20781
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9320278167724609,
      "learning_rate": 1.671115042985878e-05,
      "loss": 2.4112,
      "step": 20782
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0243325233459473,
      "learning_rate": 1.6710845181796695e-05,
      "loss": 2.4729,
      "step": 20783
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0673693418502808,
      "learning_rate": 1.6710539922357905e-05,
      "loss": 2.328,
      "step": 20784
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0911974906921387,
      "learning_rate": 1.6710234651542928e-05,
      "loss": 2.4704,
      "step": 20785
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.015189528465271,
      "learning_rate": 1.670992936935228e-05,
      "loss": 2.331,
      "step": 20786
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1013678312301636,
      "learning_rate": 1.6709624075786483e-05,
      "loss": 2.5791,
      "step": 20787
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.266258716583252,
      "learning_rate": 1.670931877084605e-05,
      "loss": 2.4601,
      "step": 20788
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0278456211090088,
      "learning_rate": 1.67090134545315e-05,
      "loss": 2.4458,
      "step": 20789
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0501205921173096,
      "learning_rate": 1.6708708126843346e-05,
      "loss": 2.401,
      "step": 20790
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.4194531440734863,
      "learning_rate": 1.6708402787782115e-05,
      "loss": 2.4377,
      "step": 20791
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1128450632095337,
      "learning_rate": 1.6708097437348314e-05,
      "loss": 2.5594,
      "step": 20792
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1414668560028076,
      "learning_rate": 1.670779207554247e-05,
      "loss": 2.2619,
      "step": 20793
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0191073417663574,
      "learning_rate": 1.6707486702365094e-05,
      "loss": 2.4337,
      "step": 20794
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9594138860702515,
      "learning_rate": 1.670718131781671e-05,
      "loss": 2.4648,
      "step": 20795
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0788921117782593,
      "learning_rate": 1.6706875921897828e-05,
      "loss": 2.523,
      "step": 20796
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0225872993469238,
      "learning_rate": 1.670657051460897e-05,
      "loss": 2.4056,
      "step": 20797
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0351735353469849,
      "learning_rate": 1.6706265095950653e-05,
      "loss": 2.5667,
      "step": 20798
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0071607828140259,
      "learning_rate": 1.67059596659234e-05,
      "loss": 2.3348,
      "step": 20799
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0236444473266602,
      "learning_rate": 1.670565422452772e-05,
      "loss": 2.6659,
      "step": 20800
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0797054767608643,
      "learning_rate": 1.6705348771764135e-05,
      "loss": 2.3344,
      "step": 20801
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.161712884902954,
      "learning_rate": 1.6705043307633165e-05,
      "loss": 2.4022,
      "step": 20802
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.026211142539978,
      "learning_rate": 1.6704737832135322e-05,
      "loss": 2.4121,
      "step": 20803
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.032384991645813,
      "learning_rate": 1.670443234527113e-05,
      "loss": 2.4449,
      "step": 20804
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1525598764419556,
      "learning_rate": 1.6704126847041102e-05,
      "loss": 2.3189,
      "step": 20805
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.085302710533142,
      "learning_rate": 1.670382133744576e-05,
      "loss": 2.4111,
      "step": 20806
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0181231498718262,
      "learning_rate": 1.670351581648562e-05,
      "loss": 2.2679,
      "step": 20807
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9762760996818542,
      "learning_rate": 1.67032102841612e-05,
      "loss": 2.3693,
      "step": 20808
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0623551607131958,
      "learning_rate": 1.670290474047302e-05,
      "loss": 2.6113,
      "step": 20809
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0118603706359863,
      "learning_rate": 1.6702599185421596e-05,
      "loss": 2.5143,
      "step": 20810
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0657973289489746,
      "learning_rate": 1.670229361900744e-05,
      "loss": 2.2925,
      "step": 20811
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0868231058120728,
      "learning_rate": 1.670198804123108e-05,
      "loss": 2.4578,
      "step": 20812
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0667849779129028,
      "learning_rate": 1.670168245209303e-05,
      "loss": 2.5492,
      "step": 20813
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.2062112092971802,
      "learning_rate": 1.670137685159381e-05,
      "loss": 2.6252,
      "step": 20814
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.7714112997055054,
      "learning_rate": 1.670107123973393e-05,
      "loss": 2.2329,
      "step": 20815
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.05355703830719,
      "learning_rate": 1.6700765616513922e-05,
      "loss": 2.6691,
      "step": 20816
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0895378589630127,
      "learning_rate": 1.6700459981934293e-05,
      "loss": 2.3197,
      "step": 20817
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.2942341566085815,
      "learning_rate": 1.6700154335995562e-05,
      "loss": 2.3436,
      "step": 20818
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9622281193733215,
      "learning_rate": 1.6699848678698255e-05,
      "loss": 2.5194,
      "step": 20819
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0646659135818481,
      "learning_rate": 1.669954301004288e-05,
      "loss": 2.3509,
      "step": 20820
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1709506511688232,
      "learning_rate": 1.669923733002996e-05,
      "loss": 2.5935,
      "step": 20821
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9880228638648987,
      "learning_rate": 1.669893163866002e-05,
      "loss": 2.4571,
      "step": 20822
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9668338298797607,
      "learning_rate": 1.6698625935933566e-05,
      "loss": 2.4932,
      "step": 20823
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0731656551361084,
      "learning_rate": 1.669832022185112e-05,
      "loss": 2.48,
      "step": 20824
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.123013973236084,
      "learning_rate": 1.6698014496413203e-05,
      "loss": 2.6158,
      "step": 20825
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9989885091781616,
      "learning_rate": 1.6697708759620336e-05,
      "loss": 2.4783,
      "step": 20826
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.005294919013977,
      "learning_rate": 1.669740301147303e-05,
      "loss": 2.4813,
      "step": 20827
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0616347789764404,
      "learning_rate": 1.669709725197181e-05,
      "loss": 2.5896,
      "step": 20828
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9950824975967407,
      "learning_rate": 1.669679148111719e-05,
      "loss": 2.6389,
      "step": 20829
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.044063925743103,
      "learning_rate": 1.6696485698909688e-05,
      "loss": 2.5759,
      "step": 20830
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.065769910812378,
      "learning_rate": 1.6696179905349824e-05,
      "loss": 2.4511,
      "step": 20831
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0341144800186157,
      "learning_rate": 1.6695874100438117e-05,
      "loss": 2.2998,
      "step": 20832
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1031936407089233,
      "learning_rate": 1.6695568284175084e-05,
      "loss": 2.6352,
      "step": 20833
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9662905335426331,
      "learning_rate": 1.6695262456561247e-05,
      "loss": 2.5231,
      "step": 20834
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9550092220306396,
      "learning_rate": 1.669495661759712e-05,
      "loss": 2.4253,
      "step": 20835
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.2090502977371216,
      "learning_rate": 1.669465076728322e-05,
      "loss": 2.5662,
      "step": 20836
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.8971286416053772,
      "learning_rate": 1.6694344905620076e-05,
      "loss": 2.4994,
      "step": 20837
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0454366207122803,
      "learning_rate": 1.6694039032608197e-05,
      "loss": 2.3585,
      "step": 20838
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9582996368408203,
      "learning_rate": 1.66937331482481e-05,
      "loss": 2.3821,
      "step": 20839
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.135218858718872,
      "learning_rate": 1.6693427252540307e-05,
      "loss": 2.6075,
      "step": 20840
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0580099821090698,
      "learning_rate": 1.6693121345485344e-05,
      "loss": 2.6898,
      "step": 20841
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0932302474975586,
      "learning_rate": 1.6692815427083714e-05,
      "loss": 2.5232,
      "step": 20842
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0379207134246826,
      "learning_rate": 1.669250949733595e-05,
      "loss": 2.3551,
      "step": 20843
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.029721736907959,
      "learning_rate": 1.669220355624256e-05,
      "loss": 2.448,
      "step": 20844
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1586737632751465,
      "learning_rate": 1.669189760380407e-05,
      "loss": 2.3786,
      "step": 20845
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0549578666687012,
      "learning_rate": 1.6691591640020995e-05,
      "loss": 2.4349,
      "step": 20846
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0094823837280273,
      "learning_rate": 1.6691285664893856e-05,
      "loss": 2.5496,
      "step": 20847
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0661277770996094,
      "learning_rate": 1.6690979678423174e-05,
      "loss": 2.4362,
      "step": 20848
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0332704782485962,
      "learning_rate": 1.669067368060946e-05,
      "loss": 2.7337,
      "step": 20849
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9289594888687134,
      "learning_rate": 1.6690367671453234e-05,
      "loss": 2.5481,
      "step": 20850
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0429131984710693,
      "learning_rate": 1.6690061650955023e-05,
      "loss": 2.4263,
      "step": 20851
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.007062315940857,
      "learning_rate": 1.668975561911534e-05,
      "loss": 2.5983,
      "step": 20852
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0080065727233887,
      "learning_rate": 1.6689449575934703e-05,
      "loss": 2.3655,
      "step": 20853
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.9625645875930786,
      "learning_rate": 1.6689143521413635e-05,
      "loss": 2.5315,
      "step": 20854
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.2384827136993408,
      "learning_rate": 1.6688837455552645e-05,
      "loss": 2.5637,
      "step": 20855
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9512273073196411,
      "learning_rate": 1.6688531378352263e-05,
      "loss": 2.3916,
      "step": 20856
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.240060567855835,
      "learning_rate": 1.6688225289813006e-05,
      "loss": 2.4263,
      "step": 20857
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1130973100662231,
      "learning_rate": 1.6687919189935387e-05,
      "loss": 2.7045,
      "step": 20858
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.105594515800476,
      "learning_rate": 1.668761307871993e-05,
      "loss": 2.5485,
      "step": 20859
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9377493858337402,
      "learning_rate": 1.668730695616715e-05,
      "loss": 2.5557,
      "step": 20860
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9768975973129272,
      "learning_rate": 1.6687000822277574e-05,
      "loss": 2.3184,
      "step": 20861
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.036379098892212,
      "learning_rate": 1.668669467705171e-05,
      "loss": 2.4439,
      "step": 20862
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9531053900718689,
      "learning_rate": 1.6686388520490085e-05,
      "loss": 2.4429,
      "step": 20863
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0309702157974243,
      "learning_rate": 1.6686082352593216e-05,
      "loss": 2.2743,
      "step": 20864
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0608243942260742,
      "learning_rate": 1.668577617336162e-05,
      "loss": 2.3575,
      "step": 20865
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0138252973556519,
      "learning_rate": 1.6685469982795818e-05,
      "loss": 2.4799,
      "step": 20866
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.139405608177185,
      "learning_rate": 1.668516378089633e-05,
      "loss": 2.3396,
      "step": 20867
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0663847923278809,
      "learning_rate": 1.6684857567663672e-05,
      "loss": 2.5826,
      "step": 20868
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.089253544807434,
      "learning_rate": 1.6684551343098365e-05,
      "loss": 2.4769,
      "step": 20869
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1669695377349854,
      "learning_rate": 1.668424510720093e-05,
      "loss": 2.4949,
      "step": 20870
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.2294505834579468,
      "learning_rate": 1.6683938859971882e-05,
      "loss": 2.441,
      "step": 20871
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9548177123069763,
      "learning_rate": 1.6683632601411744e-05,
      "loss": 2.4852,
      "step": 20872
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.055115818977356,
      "learning_rate": 1.668332633152103e-05,
      "loss": 2.5013,
      "step": 20873
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9478784799575806,
      "learning_rate": 1.6683020050300266e-05,
      "loss": 2.4538,
      "step": 20874
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0106602907180786,
      "learning_rate": 1.668271375774997e-05,
      "loss": 2.5499,
      "step": 20875
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0775288343429565,
      "learning_rate": 1.6682407453870652e-05,
      "loss": 2.5636,
      "step": 20876
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0131354331970215,
      "learning_rate": 1.6682101138662844e-05,
      "loss": 2.2511,
      "step": 20877
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9335513114929199,
      "learning_rate": 1.6681794812127055e-05,
      "loss": 2.2717,
      "step": 20878
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0456246137619019,
      "learning_rate": 1.6681488474263813e-05,
      "loss": 2.5836,
      "step": 20879
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.023918867111206,
      "learning_rate": 1.6681182125073637e-05,
      "loss": 2.5256,
      "step": 20880
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0057427883148193,
      "learning_rate": 1.6680875764557037e-05,
      "loss": 2.3623,
      "step": 20881
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0225154161453247,
      "learning_rate": 1.668056939271454e-05,
      "loss": 2.5279,
      "step": 20882
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1379189491271973,
      "learning_rate": 1.6680263009546663e-05,
      "loss": 2.4371,
      "step": 20883
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0801582336425781,
      "learning_rate": 1.6679956615053926e-05,
      "loss": 2.2363,
      "step": 20884
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1190392971038818,
      "learning_rate": 1.6679650209236846e-05,
      "loss": 2.3574,
      "step": 20885
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.178382396697998,
      "learning_rate": 1.6679343792095948e-05,
      "loss": 2.4188,
      "step": 20886
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1426591873168945,
      "learning_rate": 1.6679037363631748e-05,
      "loss": 2.4796,
      "step": 20887
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0511913299560547,
      "learning_rate": 1.6678730923844763e-05,
      "loss": 2.4953,
      "step": 20888
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0588157176971436,
      "learning_rate": 1.6678424472735517e-05,
      "loss": 2.418,
      "step": 20889
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1150096654891968,
      "learning_rate": 1.6678118010304526e-05,
      "loss": 2.2269,
      "step": 20890
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9464039206504822,
      "learning_rate": 1.6677811536552314e-05,
      "loss": 2.5773,
      "step": 20891
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.929006814956665,
      "learning_rate": 1.6677505051479398e-05,
      "loss": 2.2741,
      "step": 20892
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0452526807785034,
      "learning_rate": 1.6677198555086292e-05,
      "loss": 2.5334,
      "step": 20893
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.077197790145874,
      "learning_rate": 1.6676892047373527e-05,
      "loss": 2.3857,
      "step": 20894
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.4648820161819458,
      "learning_rate": 1.6676585528341617e-05,
      "loss": 2.2693,
      "step": 20895
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.008958339691162,
      "learning_rate": 1.667627899799108e-05,
      "loss": 2.5507,
      "step": 20896
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0320700407028198,
      "learning_rate": 1.6675972456322433e-05,
      "loss": 2.6589,
      "step": 20897
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9276434779167175,
      "learning_rate": 1.66756659033362e-05,
      "loss": 2.249,
      "step": 20898
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.135933756828308,
      "learning_rate": 1.6675359339032903e-05,
      "loss": 2.4926,
      "step": 20899
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0809617042541504,
      "learning_rate": 1.667505276341306e-05,
      "loss": 2.183,
      "step": 20900
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9592372179031372,
      "learning_rate": 1.6674746176477187e-05,
      "loss": 2.4632,
      "step": 20901
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0429002046585083,
      "learning_rate": 1.6674439578225806e-05,
      "loss": 2.5536,
      "step": 20902
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9224057197570801,
      "learning_rate": 1.667413296865944e-05,
      "loss": 2.3925,
      "step": 20903
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0782402753829956,
      "learning_rate": 1.6673826347778603e-05,
      "loss": 2.5425,
      "step": 20904
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.3488695621490479,
      "learning_rate": 1.667351971558382e-05,
      "loss": 2.3873,
      "step": 20905
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.008278727531433,
      "learning_rate": 1.6673213072075606e-05,
      "loss": 2.4559,
      "step": 20906
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0474591255187988,
      "learning_rate": 1.6672906417254485e-05,
      "loss": 2.4553,
      "step": 20907
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.002640724182129,
      "learning_rate": 1.6672599751120975e-05,
      "loss": 2.3433,
      "step": 20908
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0684549808502197,
      "learning_rate": 1.6672293073675597e-05,
      "loss": 2.3405,
      "step": 20909
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0522481203079224,
      "learning_rate": 1.667198638491887e-05,
      "loss": 2.2986,
      "step": 20910
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9038280248641968,
      "learning_rate": 1.6671679684851313e-05,
      "loss": 2.3745,
      "step": 20911
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.977512538433075,
      "learning_rate": 1.6671372973473446e-05,
      "loss": 2.5993,
      "step": 20912
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0602045059204102,
      "learning_rate": 1.6671066250785792e-05,
      "loss": 2.6192,
      "step": 20913
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.8847701549530029,
      "learning_rate": 1.667075951678887e-05,
      "loss": 2.4899,
      "step": 20914
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0524476766586304,
      "learning_rate": 1.6670452771483196e-05,
      "loss": 2.7029,
      "step": 20915
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0174574851989746,
      "learning_rate": 1.6670146014869295e-05,
      "loss": 2.6085,
      "step": 20916
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0153955221176147,
      "learning_rate": 1.6669839246947683e-05,
      "loss": 2.4772,
      "step": 20917
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1961331367492676,
      "learning_rate": 1.6669532467718882e-05,
      "loss": 2.277,
      "step": 20918
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0181175470352173,
      "learning_rate": 1.6669225677183415e-05,
      "loss": 2.4194,
      "step": 20919
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0245004892349243,
      "learning_rate": 1.6668918875341796e-05,
      "loss": 2.5111,
      "step": 20920
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9743157625198364,
      "learning_rate": 1.666861206219455e-05,
      "loss": 2.4355,
      "step": 20921
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1888461112976074,
      "learning_rate": 1.6668305237742194e-05,
      "loss": 2.5191,
      "step": 20922
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9366768002510071,
      "learning_rate": 1.666799840198525e-05,
      "loss": 2.4099,
      "step": 20923
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0529537200927734,
      "learning_rate": 1.666769155492424e-05,
      "loss": 2.5355,
      "step": 20924
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0798425674438477,
      "learning_rate": 1.6667384696559677e-05,
      "loss": 2.538,
      "step": 20925
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0793828964233398,
      "learning_rate": 1.6667077826892088e-05,
      "loss": 2.5868,
      "step": 20926
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9646422266960144,
      "learning_rate": 1.6666770945921996e-05,
      "loss": 2.3159,
      "step": 20927
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.041578769683838,
      "learning_rate": 1.666646405364991e-05,
      "loss": 2.386,
      "step": 20928
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0909446477890015,
      "learning_rate": 1.666615715007636e-05,
      "loss": 2.4562,
      "step": 20929
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1038553714752197,
      "learning_rate": 1.6665850235201865e-05,
      "loss": 2.3831,
      "step": 20930
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.009360432624817,
      "learning_rate": 1.666554330902694e-05,
      "loss": 2.4399,
      "step": 20931
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0655797719955444,
      "learning_rate": 1.666523637155211e-05,
      "loss": 2.4388,
      "step": 20932
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9852789640426636,
      "learning_rate": 1.6664929422777894e-05,
      "loss": 2.439,
      "step": 20933
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1949371099472046,
      "learning_rate": 1.6664622462704812e-05,
      "loss": 2.4634,
      "step": 20934
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9547277688980103,
      "learning_rate": 1.6664315491333387e-05,
      "loss": 2.4085,
      "step": 20935
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0073764324188232,
      "learning_rate": 1.6664008508664132e-05,
      "loss": 2.2988,
      "step": 20936
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9742031693458557,
      "learning_rate": 1.666370151469758e-05,
      "loss": 2.7076,
      "step": 20937
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9919570684432983,
      "learning_rate": 1.6663394509434238e-05,
      "loss": 2.2941,
      "step": 20938
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.115283727645874,
      "learning_rate": 1.6663087492874636e-05,
      "loss": 2.3217,
      "step": 20939
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1525092124938965,
      "learning_rate": 1.6662780465019287e-05,
      "loss": 2.5998,
      "step": 20940
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0363668203353882,
      "learning_rate": 1.6662473425868723e-05,
      "loss": 2.2662,
      "step": 20941
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0517898797988892,
      "learning_rate": 1.6662166375423452e-05,
      "loss": 2.2629,
      "step": 20942
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.086177945137024,
      "learning_rate": 1.6661859313684e-05,
      "loss": 2.4732,
      "step": 20943
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0455939769744873,
      "learning_rate": 1.666155224065089e-05,
      "loss": 2.4237,
      "step": 20944
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0830227136611938,
      "learning_rate": 1.6661245156324632e-05,
      "loss": 2.5017,
      "step": 20945
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0483702421188354,
      "learning_rate": 1.666093806070576e-05,
      "loss": 2.3693,
      "step": 20946
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1048674583435059,
      "learning_rate": 1.666063095379479e-05,
      "loss": 2.4939,
      "step": 20947
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9872008562088013,
      "learning_rate": 1.666032383559224e-05,
      "loss": 2.5547,
      "step": 20948
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9813193082809448,
      "learning_rate": 1.666001670609863e-05,
      "loss": 2.3191,
      "step": 20949
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.08998441696167,
      "learning_rate": 1.6659709565314482e-05,
      "loss": 2.6187,
      "step": 20950
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0289360284805298,
      "learning_rate": 1.665940241324032e-05,
      "loss": 2.5209,
      "step": 20951
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.217702865600586,
      "learning_rate": 1.6659095249876665e-05,
      "loss": 2.5247,
      "step": 20952
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1122339963912964,
      "learning_rate": 1.665878807522403e-05,
      "loss": 2.4129,
      "step": 20953
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0002832412719727,
      "learning_rate": 1.6658480889282946e-05,
      "loss": 2.533,
      "step": 20954
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1343106031417847,
      "learning_rate": 1.6658173692053923e-05,
      "loss": 2.4222,
      "step": 20955
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.070124626159668,
      "learning_rate": 1.665786648353749e-05,
      "loss": 2.0766,
      "step": 20956
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0432387590408325,
      "learning_rate": 1.6657559263734166e-05,
      "loss": 2.4299,
      "step": 20957
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1640474796295166,
      "learning_rate": 1.665725203264447e-05,
      "loss": 2.4817,
      "step": 20958
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.049872636795044,
      "learning_rate": 1.6656944790268923e-05,
      "loss": 2.4923,
      "step": 20959
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9195289611816406,
      "learning_rate": 1.6656637536608047e-05,
      "loss": 2.6204,
      "step": 20960
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0555835962295532,
      "learning_rate": 1.6656330271662362e-05,
      "loss": 2.2652,
      "step": 20961
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9900016188621521,
      "learning_rate": 1.665602299543239e-05,
      "loss": 2.4312,
      "step": 20962
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.3025240898132324,
      "learning_rate": 1.6655715707918653e-05,
      "loss": 2.4472,
      "step": 20963
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.3047568798065186,
      "learning_rate": 1.665540840912167e-05,
      "loss": 2.3353,
      "step": 20964
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.3515344858169556,
      "learning_rate": 1.665510109904196e-05,
      "loss": 2.3221,
      "step": 20965
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1896929740905762,
      "learning_rate": 1.6654793777680044e-05,
      "loss": 2.3931,
      "step": 20966
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9866147041320801,
      "learning_rate": 1.665448644503645e-05,
      "loss": 2.7049,
      "step": 20967
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9628532528877258,
      "learning_rate": 1.6654179101111688e-05,
      "loss": 2.3286,
      "step": 20968
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0323405265808105,
      "learning_rate": 1.665387174590629e-05,
      "loss": 2.613,
      "step": 20969
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0131101608276367,
      "learning_rate": 1.6653564379420773e-05,
      "loss": 2.4059,
      "step": 20970
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0488473176956177,
      "learning_rate": 1.6653257001655652e-05,
      "loss": 2.5967,
      "step": 20971
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1850829124450684,
      "learning_rate": 1.665294961261146e-05,
      "loss": 2.5119,
      "step": 20972
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0655226707458496,
      "learning_rate": 1.665264221228871e-05,
      "loss": 2.4516,
      "step": 20973
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0638099908828735,
      "learning_rate": 1.6652334800687922e-05,
      "loss": 2.4125,
      "step": 20974
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.977677583694458,
      "learning_rate": 1.665202737780962e-05,
      "loss": 2.3441,
      "step": 20975
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0504448413848877,
      "learning_rate": 1.6651719943654328e-05,
      "loss": 2.5297,
      "step": 20976
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0035524368286133,
      "learning_rate": 1.665141249822256e-05,
      "loss": 2.39,
      "step": 20977
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9505007266998291,
      "learning_rate": 1.6651105041514844e-05,
      "loss": 2.5853,
      "step": 20978
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9733124375343323,
      "learning_rate": 1.66507975735317e-05,
      "loss": 2.3322,
      "step": 20979
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1817646026611328,
      "learning_rate": 1.6650490094273644e-05,
      "loss": 2.3656,
      "step": 20980
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0838477611541748,
      "learning_rate": 1.66501826037412e-05,
      "loss": 2.3225,
      "step": 20981
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.09553861618042,
      "learning_rate": 1.6649875101934892e-05,
      "loss": 2.4932,
      "step": 20982
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0857479572296143,
      "learning_rate": 1.6649567588855242e-05,
      "loss": 2.4942,
      "step": 20983
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.167015790939331,
      "learning_rate": 1.664926006450277e-05,
      "loss": 2.5785,
      "step": 20984
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0478332042694092,
      "learning_rate": 1.6648952528877992e-05,
      "loss": 2.2972,
      "step": 20985
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0430485010147095,
      "learning_rate": 1.6648644981981436e-05,
      "loss": 2.5341,
      "step": 20986
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.047072410583496,
      "learning_rate": 1.6648337423813622e-05,
      "loss": 2.8281,
      "step": 20987
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1150072813034058,
      "learning_rate": 1.6648029854375066e-05,
      "loss": 2.5703,
      "step": 20988
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0588434934616089,
      "learning_rate": 1.66477222736663e-05,
      "loss": 2.485,
      "step": 20989
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.2202829122543335,
      "learning_rate": 1.664741468168783e-05,
      "loss": 2.3748,
      "step": 20990
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.2132227420806885,
      "learning_rate": 1.6647107078440195e-05,
      "loss": 2.4746,
      "step": 20991
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0573453903198242,
      "learning_rate": 1.6646799463923904e-05,
      "loss": 2.3375,
      "step": 20992
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0270676612854004,
      "learning_rate": 1.6646491838139487e-05,
      "loss": 2.6588,
      "step": 20993
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.210595726966858,
      "learning_rate": 1.6646184201087458e-05,
      "loss": 2.3633,
      "step": 20994
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9859777092933655,
      "learning_rate": 1.6645876552768342e-05,
      "loss": 2.3005,
      "step": 20995
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1123579740524292,
      "learning_rate": 1.664556889318266e-05,
      "loss": 2.6106,
      "step": 20996
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.5661817789077759,
      "learning_rate": 1.6645261222330934e-05,
      "loss": 2.441,
      "step": 20997
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1381059885025024,
      "learning_rate": 1.6644953540213686e-05,
      "loss": 2.3486,
      "step": 20998
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9507586359977722,
      "learning_rate": 1.664464584683144e-05,
      "loss": 2.3176,
      "step": 20999
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9891719222068787,
      "learning_rate": 1.664433814218471e-05,
      "loss": 2.2665,
      "step": 21000
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1193487644195557,
      "learning_rate": 1.6644030426274024e-05,
      "loss": 2.4163,
      "step": 21001
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0301451683044434,
      "learning_rate": 1.66437226990999e-05,
      "loss": 2.2223,
      "step": 21002
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.006895899772644,
      "learning_rate": 1.6643414960662863e-05,
      "loss": 2.6563,
      "step": 21003
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9685174226760864,
      "learning_rate": 1.6643107210963434e-05,
      "loss": 2.6075,
      "step": 21004
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0836577415466309,
      "learning_rate": 1.6642799450002133e-05,
      "loss": 2.7356,
      "step": 21005
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9268020987510681,
      "learning_rate": 1.6642491677779485e-05,
      "loss": 2.5497,
      "step": 21006
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0939874649047852,
      "learning_rate": 1.6642183894296008e-05,
      "loss": 2.5849,
      "step": 21007
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.3075867891311646,
      "learning_rate": 1.6641876099552224e-05,
      "loss": 2.3531,
      "step": 21008
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0153412818908691,
      "learning_rate": 1.6641568293548656e-05,
      "loss": 2.2604,
      "step": 21009
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0588608980178833,
      "learning_rate": 1.6641260476285827e-05,
      "loss": 2.3922,
      "step": 21010
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.088093638420105,
      "learning_rate": 1.664095264776426e-05,
      "loss": 2.5937,
      "step": 21011
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9849609732627869,
      "learning_rate": 1.6640644807984472e-05,
      "loss": 2.3,
      "step": 21012
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0865460634231567,
      "learning_rate": 1.6640336956946987e-05,
      "loss": 2.4569,
      "step": 21013
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.040010929107666,
      "learning_rate": 1.664002909465233e-05,
      "loss": 2.4776,
      "step": 21014
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0280568599700928,
      "learning_rate": 1.6639721221101017e-05,
      "loss": 2.2197,
      "step": 21015
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1526068449020386,
      "learning_rate": 1.6639413336293575e-05,
      "loss": 2.4544,
      "step": 21016
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9745588302612305,
      "learning_rate": 1.6639105440230526e-05,
      "loss": 2.4606,
      "step": 21017
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9916432499885559,
      "learning_rate": 1.6638797532912384e-05,
      "loss": 2.3043,
      "step": 21018
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9852439761161804,
      "learning_rate": 1.6638489614339685e-05,
      "loss": 2.5783,
      "step": 21019
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0508641004562378,
      "learning_rate": 1.663818168451294e-05,
      "loss": 2.6054,
      "step": 21020
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1008596420288086,
      "learning_rate": 1.6637873743432674e-05,
      "loss": 2.4167,
      "step": 21021
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1075353622436523,
      "learning_rate": 1.6637565791099407e-05,
      "loss": 2.6625,
      "step": 21022
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9925011396408081,
      "learning_rate": 1.6637257827513666e-05,
      "loss": 2.3484,
      "step": 21023
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.010189175605774,
      "learning_rate": 1.6636949852675966e-05,
      "loss": 2.3912,
      "step": 21024
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0806349515914917,
      "learning_rate": 1.6636641866586836e-05,
      "loss": 2.4852,
      "step": 21025
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1140758991241455,
      "learning_rate": 1.66363338692468e-05,
      "loss": 2.4105,
      "step": 21026
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1040953397750854,
      "learning_rate": 1.663602586065637e-05,
      "loss": 2.605,
      "step": 21027
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.2788336277008057,
      "learning_rate": 1.6635717840816075e-05,
      "loss": 2.4436,
      "step": 21028
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0261032581329346,
      "learning_rate": 1.6635409809726436e-05,
      "loss": 2.5135,
      "step": 21029
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1603286266326904,
      "learning_rate": 1.6635101767387976e-05,
      "loss": 2.6238,
      "step": 21030
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1152539253234863,
      "learning_rate": 1.6634793713801217e-05,
      "loss": 2.4888,
      "step": 21031
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9415915608406067,
      "learning_rate": 1.663448564896668e-05,
      "loss": 2.4924,
      "step": 21032
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0609241724014282,
      "learning_rate": 1.6634177572884887e-05,
      "loss": 2.413,
      "step": 21033
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0716923475265503,
      "learning_rate": 1.6633869485556364e-05,
      "loss": 2.4424,
      "step": 21034
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.221219778060913,
      "learning_rate": 1.6633561386981628e-05,
      "loss": 2.3828,
      "step": 21035
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9594178795814514,
      "learning_rate": 1.6633253277161205e-05,
      "loss": 2.5516,
      "step": 21036
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.2133537530899048,
      "learning_rate": 1.6632945156095613e-05,
      "loss": 2.3046,
      "step": 21037
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9935908913612366,
      "learning_rate": 1.663263702378538e-05,
      "loss": 2.5236,
      "step": 21038
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0382182598114014,
      "learning_rate": 1.663232888023103e-05,
      "loss": 2.4993,
      "step": 21039
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1603195667266846,
      "learning_rate": 1.6632020725433074e-05,
      "loss": 2.5391,
      "step": 21040
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.102698802947998,
      "learning_rate": 1.663171255939205e-05,
      "loss": 2.4211,
      "step": 21041
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0883169174194336,
      "learning_rate": 1.6631404382108464e-05,
      "loss": 2.4961,
      "step": 21042
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0451093912124634,
      "learning_rate": 1.6631096193582848e-05,
      "loss": 2.3367,
      "step": 21043
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0220743417739868,
      "learning_rate": 1.6630787993815725e-05,
      "loss": 2.3286,
      "step": 21044
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0329996347427368,
      "learning_rate": 1.6630479782807615e-05,
      "loss": 2.3741,
      "step": 21045
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9832659363746643,
      "learning_rate": 1.663017156055904e-05,
      "loss": 2.492,
      "step": 21046
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0396008491516113,
      "learning_rate": 1.6629863327070524e-05,
      "loss": 2.5078,
      "step": 21047
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.2261382341384888,
      "learning_rate": 1.662955508234259e-05,
      "loss": 2.5136,
      "step": 21048
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0804190635681152,
      "learning_rate": 1.6629246826375758e-05,
      "loss": 2.4912,
      "step": 21049
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0192288160324097,
      "learning_rate": 1.6628938559170552e-05,
      "loss": 2.4843,
      "step": 21050
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9784860610961914,
      "learning_rate": 1.66286302807275e-05,
      "loss": 2.4735,
      "step": 21051
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1126850843429565,
      "learning_rate": 1.6628321991047114e-05,
      "loss": 2.333,
      "step": 21052
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.255236029624939,
      "learning_rate": 1.6628013690129922e-05,
      "loss": 2.5986,
      "step": 21053
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9810110330581665,
      "learning_rate": 1.662770537797645e-05,
      "loss": 2.303,
      "step": 21054
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.2549123764038086,
      "learning_rate": 1.6627397054587212e-05,
      "loss": 2.1951,
      "step": 21055
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.2851746082305908,
      "learning_rate": 1.6627088719962738e-05,
      "loss": 2.4379,
      "step": 21056
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0619515180587769,
      "learning_rate": 1.662678037410355e-05,
      "loss": 2.4085,
      "step": 21057
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0093252658843994,
      "learning_rate": 1.662647201701017e-05,
      "loss": 2.4273,
      "step": 21058
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.972115695476532,
      "learning_rate": 1.662616364868312e-05,
      "loss": 2.3796,
      "step": 21059
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.230453610420227,
      "learning_rate": 1.6625855269122923e-05,
      "loss": 2.5269,
      "step": 21060
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9963583946228027,
      "learning_rate": 1.66255468783301e-05,
      "loss": 2.4924,
      "step": 21061
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9852562546730042,
      "learning_rate": 1.6625238476305178e-05,
      "loss": 2.3727,
      "step": 21062
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0178827047348022,
      "learning_rate": 1.6624930063048673e-05,
      "loss": 2.4279,
      "step": 21063
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0386741161346436,
      "learning_rate": 1.6624621638561118e-05,
      "loss": 2.3971,
      "step": 21064
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9682234525680542,
      "learning_rate": 1.662431320284303e-05,
      "loss": 2.2181,
      "step": 21065
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.055665135383606,
      "learning_rate": 1.6624004755894926e-05,
      "loss": 2.4265,
      "step": 21066
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.073472499847412,
      "learning_rate": 1.662369629771734e-05,
      "loss": 2.4022,
      "step": 21067
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.031826376914978,
      "learning_rate": 1.662338782831079e-05,
      "loss": 2.2703,
      "step": 21068
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1432124376296997,
      "learning_rate": 1.6623079347675796e-05,
      "loss": 2.6586,
      "step": 21069
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9959329962730408,
      "learning_rate": 1.662277085581288e-05,
      "loss": 2.6212,
      "step": 21070
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0491069555282593,
      "learning_rate": 1.6622462352722576e-05,
      "loss": 2.488,
      "step": 21071
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9485949277877808,
      "learning_rate": 1.6622153838405398e-05,
      "loss": 2.3915,
      "step": 21072
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.068142294883728,
      "learning_rate": 1.662184531286187e-05,
      "loss": 2.528,
      "step": 21073
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0242236852645874,
      "learning_rate": 1.6621536776092513e-05,
      "loss": 2.4302,
      "step": 21074
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.166024923324585,
      "learning_rate": 1.6621228228097857e-05,
      "loss": 2.4868,
      "step": 21075
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.2349956035614014,
      "learning_rate": 1.6620919668878416e-05,
      "loss": 2.2892,
      "step": 21076
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1397743225097656,
      "learning_rate": 1.6620611098434724e-05,
      "loss": 2.3577,
      "step": 21077
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.056701421737671,
      "learning_rate": 1.6620302516767296e-05,
      "loss": 2.323,
      "step": 21078
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9169910550117493,
      "learning_rate": 1.6619993923876655e-05,
      "loss": 2.4838,
      "step": 21079
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0507433414459229,
      "learning_rate": 1.661968531976333e-05,
      "loss": 2.5721,
      "step": 21080
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0531662702560425,
      "learning_rate": 1.6619376704427835e-05,
      "loss": 2.368,
      "step": 21081
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1025651693344116,
      "learning_rate": 1.66190680778707e-05,
      "loss": 2.3983,
      "step": 21082
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0378447771072388,
      "learning_rate": 1.661875944009245e-05,
      "loss": 2.2389,
      "step": 21083
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0102299451828003,
      "learning_rate": 1.6618450791093604e-05,
      "loss": 2.6019,
      "step": 21084
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0270562171936035,
      "learning_rate": 1.661814213087469e-05,
      "loss": 2.6679,
      "step": 21085
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0385355949401855,
      "learning_rate": 1.6617833459436223e-05,
      "loss": 2.5015,
      "step": 21086
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0579077005386353,
      "learning_rate": 1.6617524776778727e-05,
      "loss": 2.2492,
      "step": 21087
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1051137447357178,
      "learning_rate": 1.6617216082902738e-05,
      "loss": 2.4009,
      "step": 21088
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.053104281425476,
      "learning_rate": 1.6616907377808766e-05,
      "loss": 2.2359,
      "step": 21089
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0100362300872803,
      "learning_rate": 1.6616598661497337e-05,
      "loss": 2.3216,
      "step": 21090
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0362178087234497,
      "learning_rate": 1.6616289933968982e-05,
      "loss": 2.3389,
      "step": 21091
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.075384259223938,
      "learning_rate": 1.661598119522421e-05,
      "loss": 2.4828,
      "step": 21092
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1412692070007324,
      "learning_rate": 1.661567244526356e-05,
      "loss": 2.2076,
      "step": 21093
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0361369848251343,
      "learning_rate": 1.6615363684087548e-05,
      "loss": 2.5762,
      "step": 21094
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.056391954421997,
      "learning_rate": 1.6615054911696696e-05,
      "loss": 2.3516,
      "step": 21095
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0604908466339111,
      "learning_rate": 1.661474612809153e-05,
      "loss": 2.5841,
      "step": 21096
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0965611934661865,
      "learning_rate": 1.661443733327257e-05,
      "loss": 2.402,
      "step": 21097
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0219420194625854,
      "learning_rate": 1.6614128527240344e-05,
      "loss": 2.451,
      "step": 21098
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0683152675628662,
      "learning_rate": 1.6613819709995375e-05,
      "loss": 2.2785,
      "step": 21099
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9495127201080322,
      "learning_rate": 1.6613510881538183e-05,
      "loss": 2.4115,
      "step": 21100
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.97344970703125,
      "learning_rate": 1.6613202041869297e-05,
      "loss": 2.5554,
      "step": 21101
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1824373006820679,
      "learning_rate": 1.6612893190989235e-05,
      "loss": 2.5066,
      "step": 21102
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1289551258087158,
      "learning_rate": 1.6612584328898527e-05,
      "loss": 2.3742,
      "step": 21103
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1651891469955444,
      "learning_rate": 1.661227545559769e-05,
      "loss": 2.2876,
      "step": 21104
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0200073719024658,
      "learning_rate": 1.6611966571087248e-05,
      "loss": 2.3069,
      "step": 21105
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0195633172988892,
      "learning_rate": 1.661165767536773e-05,
      "loss": 2.1725,
      "step": 21106
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0750802755355835,
      "learning_rate": 1.6611348768439656e-05,
      "loss": 2.4478,
      "step": 21107
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0290824174880981,
      "learning_rate": 1.661103985030355e-05,
      "loss": 2.3126,
      "step": 21108
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9813759326934814,
      "learning_rate": 1.6610730920959932e-05,
      "loss": 2.349,
      "step": 21109
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.8865393400192261,
      "learning_rate": 1.6610421980409334e-05,
      "loss": 2.3631,
      "step": 21110
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9785754084587097,
      "learning_rate": 1.6610113028652276e-05,
      "loss": 2.4916,
      "step": 21111
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0122811794281006,
      "learning_rate": 1.6609804065689278e-05,
      "loss": 2.4332,
      "step": 21112
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0888543128967285,
      "learning_rate": 1.660949509152087e-05,
      "loss": 2.3256,
      "step": 21113
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.022689938545227,
      "learning_rate": 1.660918610614757e-05,
      "loss": 2.4272,
      "step": 21114
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.131312608718872,
      "learning_rate": 1.6608877109569905e-05,
      "loss": 2.3683,
      "step": 21115
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1200158596038818,
      "learning_rate": 1.66085681017884e-05,
      "loss": 2.5098,
      "step": 21116
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.075471043586731,
      "learning_rate": 1.6608259082803575e-05,
      "loss": 2.2781,
      "step": 21117
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9523522853851318,
      "learning_rate": 1.6607950052615956e-05,
      "loss": 2.3651,
      "step": 21118
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0202364921569824,
      "learning_rate": 1.660764101122607e-05,
      "loss": 2.4154,
      "step": 21119
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.5237195491790771,
      "learning_rate": 1.6607331958634435e-05,
      "loss": 2.5753,
      "step": 21120
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0363410711288452,
      "learning_rate": 1.660702289484158e-05,
      "loss": 2.3224,
      "step": 21121
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.4347492456436157,
      "learning_rate": 1.6606713819848025e-05,
      "loss": 2.3486,
      "step": 21122
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.2025011777877808,
      "learning_rate": 1.6606404733654296e-05,
      "loss": 2.332,
      "step": 21123
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0676684379577637,
      "learning_rate": 1.6606095636260917e-05,
      "loss": 2.5215,
      "step": 21124
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0528416633605957,
      "learning_rate": 1.6605786527668412e-05,
      "loss": 2.526,
      "step": 21125
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9626641273498535,
      "learning_rate": 1.6605477407877307e-05,
      "loss": 1.9673,
      "step": 21126
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0776301622390747,
      "learning_rate": 1.660516827688812e-05,
      "loss": 2.4157,
      "step": 21127
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.052677035331726,
      "learning_rate": 1.6604859134701378e-05,
      "loss": 2.2903,
      "step": 21128
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1137878894805908,
      "learning_rate": 1.660454998131761e-05,
      "loss": 2.304,
      "step": 21129
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.037142276763916,
      "learning_rate": 1.6604240816737332e-05,
      "loss": 2.4932,
      "step": 21130
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1375924348831177,
      "learning_rate": 1.6603931640961077e-05,
      "loss": 2.6312,
      "step": 21131
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0374709367752075,
      "learning_rate": 1.660362245398936e-05,
      "loss": 2.5515,
      "step": 21132
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1635738611221313,
      "learning_rate": 1.6603313255822713e-05,
      "loss": 2.2687,
      "step": 21133
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.2602503299713135,
      "learning_rate": 1.6603004046461652e-05,
      "loss": 2.5737,
      "step": 21134
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9137157201766968,
      "learning_rate": 1.660269482590671e-05,
      "loss": 2.5278,
      "step": 21135
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9417598247528076,
      "learning_rate": 1.6602385594158404e-05,
      "loss": 2.3476,
      "step": 21136
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0354154109954834,
      "learning_rate": 1.660207635121726e-05,
      "loss": 2.5053,
      "step": 21137
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9801291823387146,
      "learning_rate": 1.660176709708381e-05,
      "loss": 2.4057,
      "step": 21138
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0791763067245483,
      "learning_rate": 1.6601457831758567e-05,
      "loss": 2.3174,
      "step": 21139
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.126045823097229,
      "learning_rate": 1.6601148555242058e-05,
      "loss": 2.4374,
      "step": 21140
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0601153373718262,
      "learning_rate": 1.6600839267534813e-05,
      "loss": 2.4624,
      "step": 21141
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.4650338888168335,
      "learning_rate": 1.6600529968637355e-05,
      "loss": 2.3082,
      "step": 21142
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0375555753707886,
      "learning_rate": 1.66002206585502e-05,
      "loss": 2.4657,
      "step": 21143
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0225855112075806,
      "learning_rate": 1.6599911337273883e-05,
      "loss": 2.4601,
      "step": 21144
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9647702574729919,
      "learning_rate": 1.6599602004808923e-05,
      "loss": 2.5093,
      "step": 21145
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.025935411453247,
      "learning_rate": 1.6599292661155843e-05,
      "loss": 2.5128,
      "step": 21146
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.041235089302063,
      "learning_rate": 1.659898330631517e-05,
      "loss": 2.554,
      "step": 21147
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0574628114700317,
      "learning_rate": 1.659867394028743e-05,
      "loss": 2.7789,
      "step": 21148
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9968448877334595,
      "learning_rate": 1.6598364563073143e-05,
      "loss": 2.4428,
      "step": 21149
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9693108201026917,
      "learning_rate": 1.6598055174672837e-05,
      "loss": 2.5458,
      "step": 21150
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0029016733169556,
      "learning_rate": 1.6597745775087038e-05,
      "loss": 2.3303,
      "step": 21151
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.106335997581482,
      "learning_rate": 1.6597436364316263e-05,
      "loss": 2.3312,
      "step": 21152
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0701472759246826,
      "learning_rate": 1.6597126942361046e-05,
      "loss": 2.5022,
      "step": 21153
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.047654390335083,
      "learning_rate": 1.6596817509221903e-05,
      "loss": 2.4026,
      "step": 21154
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0533396005630493,
      "learning_rate": 1.6596508064899368e-05,
      "loss": 2.4992,
      "step": 21155
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9951097369194031,
      "learning_rate": 1.6596198609393958e-05,
      "loss": 2.415,
      "step": 21156
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.007570743560791,
      "learning_rate": 1.6595889142706195e-05,
      "loss": 2.3911,
      "step": 21157
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1085885763168335,
      "learning_rate": 1.6595579664836618e-05,
      "loss": 2.4269,
      "step": 21158
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0760154724121094,
      "learning_rate": 1.6595270175785735e-05,
      "loss": 2.2848,
      "step": 21159
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0583131313323975,
      "learning_rate": 1.6594960675554078e-05,
      "loss": 2.3225,
      "step": 21160
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1125134229660034,
      "learning_rate": 1.6594651164142173e-05,
      "loss": 2.132,
      "step": 21161
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0456974506378174,
      "learning_rate": 1.6594341641550544e-05,
      "loss": 2.3103,
      "step": 21162
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9870567917823792,
      "learning_rate": 1.6594032107779713e-05,
      "loss": 2.5025,
      "step": 21163
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0691375732421875,
      "learning_rate": 1.659372256283021e-05,
      "loss": 2.4279,
      "step": 21164
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9624147415161133,
      "learning_rate": 1.6593413006702553e-05,
      "loss": 2.6845,
      "step": 21165
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1165090799331665,
      "learning_rate": 1.659310343939727e-05,
      "loss": 2.4978,
      "step": 21166
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0894756317138672,
      "learning_rate": 1.6592793860914888e-05,
      "loss": 2.488,
      "step": 21167
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1189115047454834,
      "learning_rate": 1.659248427125593e-05,
      "loss": 2.4168,
      "step": 21168
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1531398296356201,
      "learning_rate": 1.659217467042092e-05,
      "loss": 2.4138,
      "step": 21169
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0850309133529663,
      "learning_rate": 1.6591865058410383e-05,
      "loss": 2.4006,
      "step": 21170
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0285824537277222,
      "learning_rate": 1.6591555435224843e-05,
      "loss": 2.4249,
      "step": 21171
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0608917474746704,
      "learning_rate": 1.6591245800864828e-05,
      "loss": 2.3534,
      "step": 21172
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9807969927787781,
      "learning_rate": 1.6590936155330863e-05,
      "loss": 2.5173,
      "step": 21173
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.436251163482666,
      "learning_rate": 1.659062649862347e-05,
      "loss": 2.5883,
      "step": 21174
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0157877206802368,
      "learning_rate": 1.6590316830743175e-05,
      "loss": 2.5172,
      "step": 21175
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.079170823097229,
      "learning_rate": 1.65900071516905e-05,
      "loss": 2.4788,
      "step": 21176
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.05293869972229,
      "learning_rate": 1.6589697461465975e-05,
      "loss": 2.3712,
      "step": 21177
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0181176662445068,
      "learning_rate": 1.6589387760070126e-05,
      "loss": 2.451,
      "step": 21178
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.005450963973999,
      "learning_rate": 1.6589078047503474e-05,
      "loss": 2.537,
      "step": 21179
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.931546151638031,
      "learning_rate": 1.6588768323766542e-05,
      "loss": 2.3057,
      "step": 21180
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.004850149154663,
      "learning_rate": 1.658845858885986e-05,
      "loss": 2.5794,
      "step": 21181
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0826680660247803,
      "learning_rate": 1.658814884278395e-05,
      "loss": 2.593,
      "step": 21182
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.034756064414978,
      "learning_rate": 1.658783908553934e-05,
      "loss": 2.335,
      "step": 21183
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9641644358634949,
      "learning_rate": 1.6587529317126556e-05,
      "loss": 2.461,
      "step": 21184
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1392556428909302,
      "learning_rate": 1.6587219537546117e-05,
      "loss": 2.5443,
      "step": 21185
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0257538557052612,
      "learning_rate": 1.658690974679855e-05,
      "loss": 2.4302,
      "step": 21186
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.2166001796722412,
      "learning_rate": 1.658659994488439e-05,
      "loss": 2.4029,
      "step": 21187
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9502843618392944,
      "learning_rate": 1.6586290131804147e-05,
      "loss": 2.3028,
      "step": 21188
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.082059621810913,
      "learning_rate": 1.6585980307558356e-05,
      "loss": 2.4051,
      "step": 21189
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0760693550109863,
      "learning_rate": 1.658567047214754e-05,
      "loss": 2.4652,
      "step": 21190
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.02787446975708,
      "learning_rate": 1.6585360625572222e-05,
      "loss": 2.4674,
      "step": 21191
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9766426086425781,
      "learning_rate": 1.6585050767832932e-05,
      "loss": 2.5179,
      "step": 21192
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0289757251739502,
      "learning_rate": 1.658474089893019e-05,
      "loss": 2.3343,
      "step": 21193
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0451433658599854,
      "learning_rate": 1.6584431018864523e-05,
      "loss": 2.3563,
      "step": 21194
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0061911344528198,
      "learning_rate": 1.6584121127636457e-05,
      "loss": 2.2133,
      "step": 21195
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0036643743515015,
      "learning_rate": 1.658381122524652e-05,
      "loss": 2.5284,
      "step": 21196
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0152901411056519,
      "learning_rate": 1.6583501311695237e-05,
      "loss": 2.4077,
      "step": 21197
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9791262149810791,
      "learning_rate": 1.6583191386983126e-05,
      "loss": 2.3813,
      "step": 21198
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0854555368423462,
      "learning_rate": 1.6582881451110718e-05,
      "loss": 2.6256,
      "step": 21199
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.127716064453125,
      "learning_rate": 1.6582571504078543e-05,
      "loss": 2.5089,
      "step": 21200
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0063140392303467,
      "learning_rate": 1.6582261545887116e-05,
      "loss": 2.4462,
      "step": 21201
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.001678228378296,
      "learning_rate": 1.658195157653697e-05,
      "loss": 2.3283,
      "step": 21202
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.112884521484375,
      "learning_rate": 1.6581641596028627e-05,
      "loss": 2.5751,
      "step": 21203
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0581398010253906,
      "learning_rate": 1.6581331604362617e-05,
      "loss": 2.5408,
      "step": 21204
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.019364356994629,
      "learning_rate": 1.6581021601539462e-05,
      "loss": 2.4403,
      "step": 21205
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0210827589035034,
      "learning_rate": 1.6580711587559686e-05,
      "loss": 2.3591,
      "step": 21206
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.320672631263733,
      "learning_rate": 1.658040156242382e-05,
      "loss": 2.2046,
      "step": 21207
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0147733688354492,
      "learning_rate": 1.6580091526132383e-05,
      "loss": 2.3658,
      "step": 21208
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0588390827178955,
      "learning_rate": 1.6579781478685903e-05,
      "loss": 2.3995,
      "step": 21209
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.076305866241455,
      "learning_rate": 1.657947142008491e-05,
      "loss": 2.5158,
      "step": 21210
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0020495653152466,
      "learning_rate": 1.6579161350329925e-05,
      "loss": 2.3141,
      "step": 21211
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.136116623878479,
      "learning_rate": 1.6578851269421473e-05,
      "loss": 2.6536,
      "step": 21212
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.075736403465271,
      "learning_rate": 1.6578541177360083e-05,
      "loss": 2.5407,
      "step": 21213
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0163973569869995,
      "learning_rate": 1.6578231074146277e-05,
      "loss": 2.4239,
      "step": 21214
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9432401657104492,
      "learning_rate": 1.6577920959780582e-05,
      "loss": 2.2361,
      "step": 21215
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0805954933166504,
      "learning_rate": 1.6577610834263525e-05,
      "loss": 2.64,
      "step": 21216
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0973914861679077,
      "learning_rate": 1.6577300697595632e-05,
      "loss": 2.5008,
      "step": 21217
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.033388376235962,
      "learning_rate": 1.657699054977743e-05,
      "loss": 2.3918,
      "step": 21218
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0280020236968994,
      "learning_rate": 1.657668039080944e-05,
      "loss": 2.436,
      "step": 21219
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5150550603866577,
      "learning_rate": 1.6576370220692192e-05,
      "loss": 2.3533,
      "step": 21220
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0126595497131348,
      "learning_rate": 1.657606003942621e-05,
      "loss": 2.2916,
      "step": 21221
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0021748542785645,
      "learning_rate": 1.657574984701202e-05,
      "loss": 2.2421,
      "step": 21222
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0847417116165161,
      "learning_rate": 1.6575439643450148e-05,
      "loss": 2.4485,
      "step": 21223
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.2186870574951172,
      "learning_rate": 1.6575129428741118e-05,
      "loss": 2.6806,
      "step": 21224
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0681034326553345,
      "learning_rate": 1.6574819202885463e-05,
      "loss": 2.3719,
      "step": 21225
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0642207860946655,
      "learning_rate": 1.6574508965883698e-05,
      "loss": 2.4745,
      "step": 21226
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0240203142166138,
      "learning_rate": 1.6574198717736355e-05,
      "loss": 2.4712,
      "step": 21227
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.945344090461731,
      "learning_rate": 1.6573888458443966e-05,
      "loss": 2.3149,
      "step": 21228
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1037653684616089,
      "learning_rate": 1.6573578188007043e-05,
      "loss": 2.2073,
      "step": 21229
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0307122468948364,
      "learning_rate": 1.6573267906426124e-05,
      "loss": 2.3172,
      "step": 21230
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0412812232971191,
      "learning_rate": 1.657295761370173e-05,
      "loss": 2.4089,
      "step": 21231
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0643365383148193,
      "learning_rate": 1.657264730983439e-05,
      "loss": 2.4436,
      "step": 21232
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9845615029335022,
      "learning_rate": 1.6572336994824624e-05,
      "loss": 2.2969,
      "step": 21233
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.161006212234497,
      "learning_rate": 1.657202666867296e-05,
      "loss": 2.5461,
      "step": 21234
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1227807998657227,
      "learning_rate": 1.657171633137993e-05,
      "loss": 2.3315,
      "step": 21235
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1221956014633179,
      "learning_rate": 1.6571405982946057e-05,
      "loss": 2.795,
      "step": 21236
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9631191492080688,
      "learning_rate": 1.6571095623371862e-05,
      "loss": 2.5421,
      "step": 21237
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9771989583969116,
      "learning_rate": 1.6570785252657876e-05,
      "loss": 2.5906,
      "step": 21238
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.2890124320983887,
      "learning_rate": 1.6570474870804628e-05,
      "loss": 2.3982,
      "step": 21239
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9934001564979553,
      "learning_rate": 1.6570164477812635e-05,
      "loss": 2.3408,
      "step": 21240
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9418749213218689,
      "learning_rate": 1.6569854073682433e-05,
      "loss": 2.2093,
      "step": 21241
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1150871515274048,
      "learning_rate": 1.656954365841454e-05,
      "loss": 2.356,
      "step": 21242
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.2338314056396484,
      "learning_rate": 1.656923323200949e-05,
      "loss": 2.6167,
      "step": 21243
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9547841548919678,
      "learning_rate": 1.6568922794467807e-05,
      "loss": 2.3709,
      "step": 21244
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0832312107086182,
      "learning_rate": 1.6568612345790012e-05,
      "loss": 2.3947,
      "step": 21245
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.31833815574646,
      "learning_rate": 1.6568301885976637e-05,
      "loss": 2.5089,
      "step": 21246
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.033618450164795,
      "learning_rate": 1.6567991415028204e-05,
      "loss": 2.4203,
      "step": 21247
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.007426381111145,
      "learning_rate": 1.6567680932945242e-05,
      "loss": 2.4885,
      "step": 21248
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1648145914077759,
      "learning_rate": 1.656737043972828e-05,
      "loss": 2.3418,
      "step": 21249
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0812302827835083,
      "learning_rate": 1.656705993537784e-05,
      "loss": 2.489,
      "step": 21250
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.019657015800476,
      "learning_rate": 1.656674941989445e-05,
      "loss": 2.2534,
      "step": 21251
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1392979621887207,
      "learning_rate": 1.6566438893278637e-05,
      "loss": 2.5332,
      "step": 21252
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1115374565124512,
      "learning_rate": 1.6566128355530926e-05,
      "loss": 2.5818,
      "step": 21253
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1526039838790894,
      "learning_rate": 1.6565817806651844e-05,
      "loss": 2.442,
      "step": 21254
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9497405886650085,
      "learning_rate": 1.6565507246641915e-05,
      "loss": 2.6596,
      "step": 21255
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0623106956481934,
      "learning_rate": 1.656519667550167e-05,
      "loss": 2.3425,
      "step": 21256
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0114853382110596,
      "learning_rate": 1.6564886093231635e-05,
      "loss": 2.6768,
      "step": 21257
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1912606954574585,
      "learning_rate": 1.6564575499832335e-05,
      "loss": 2.4803,
      "step": 21258
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1391829252243042,
      "learning_rate": 1.6564264895304293e-05,
      "loss": 2.5032,
      "step": 21259
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.114026427268982,
      "learning_rate": 1.6563954279648043e-05,
      "loss": 2.4294,
      "step": 21260
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0717428922653198,
      "learning_rate": 1.6563643652864105e-05,
      "loss": 2.4654,
      "step": 21261
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9801156520843506,
      "learning_rate": 1.656333301495301e-05,
      "loss": 2.4102,
      "step": 21262
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0128167867660522,
      "learning_rate": 1.6563022365915283e-05,
      "loss": 2.2197,
      "step": 21263
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0652943849563599,
      "learning_rate": 1.656271170575145e-05,
      "loss": 2.228,
      "step": 21264
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0523663759231567,
      "learning_rate": 1.6562401034462042e-05,
      "loss": 2.515,
      "step": 21265
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0861326456069946,
      "learning_rate": 1.6562090352047575e-05,
      "loss": 2.5931,
      "step": 21266
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0329409837722778,
      "learning_rate": 1.6561779658508584e-05,
      "loss": 2.5064,
      "step": 21267
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0635405778884888,
      "learning_rate": 1.6561468953845597e-05,
      "loss": 2.5437,
      "step": 21268
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9822238683700562,
      "learning_rate": 1.656115823805914e-05,
      "loss": 2.4037,
      "step": 21269
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9980190396308899,
      "learning_rate": 1.6560847511149734e-05,
      "loss": 2.4927,
      "step": 21270
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1498239040374756,
      "learning_rate": 1.656053677311791e-05,
      "loss": 2.0678,
      "step": 21271
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1061580181121826,
      "learning_rate": 1.6560226023964197e-05,
      "loss": 2.3507,
      "step": 21272
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.2129658460617065,
      "learning_rate": 1.6559915263689117e-05,
      "loss": 2.3553,
      "step": 21273
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0019320249557495,
      "learning_rate": 1.65596044922932e-05,
      "loss": 2.4422,
      "step": 21274
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1292332410812378,
      "learning_rate": 1.655929370977697e-05,
      "loss": 2.3891,
      "step": 21275
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.176749587059021,
      "learning_rate": 1.655898291614096e-05,
      "loss": 2.5767,
      "step": 21276
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0080382823944092,
      "learning_rate": 1.6558672111385688e-05,
      "loss": 2.7815,
      "step": 21277
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1518876552581787,
      "learning_rate": 1.6558361295511683e-05,
      "loss": 2.594,
      "step": 21278
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1464520692825317,
      "learning_rate": 1.6558050468519483e-05,
      "loss": 2.2364,
      "step": 21279
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.068037509918213,
      "learning_rate": 1.65577396304096e-05,
      "loss": 2.2466,
      "step": 21280
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9882639646530151,
      "learning_rate": 1.6557428781182568e-05,
      "loss": 2.3803,
      "step": 21281
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9707863926887512,
      "learning_rate": 1.6557117920838913e-05,
      "loss": 2.2687,
      "step": 21282
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9800567626953125,
      "learning_rate": 1.6556807049379168e-05,
      "loss": 2.3482,
      "step": 21283
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0045444965362549,
      "learning_rate": 1.6556496166803846e-05,
      "loss": 2.627,
      "step": 21284
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0571157932281494,
      "learning_rate": 1.6556185273113487e-05,
      "loss": 2.47,
      "step": 21285
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9808715581893921,
      "learning_rate": 1.6555874368308612e-05,
      "loss": 2.3051,
      "step": 21286
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.3669708967208862,
      "learning_rate": 1.655556345238975e-05,
      "loss": 2.5436,
      "step": 21287
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1806429624557495,
      "learning_rate": 1.6555252525357426e-05,
      "loss": 2.5543,
      "step": 21288
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1393882036209106,
      "learning_rate": 1.6554941587212172e-05,
      "loss": 2.4656,
      "step": 21289
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0335286855697632,
      "learning_rate": 1.6554630637954507e-05,
      "loss": 2.4787,
      "step": 21290
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9228387475013733,
      "learning_rate": 1.6554319677584965e-05,
      "loss": 2.244,
      "step": 21291
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.2032177448272705,
      "learning_rate": 1.6554008706104073e-05,
      "loss": 2.4409,
      "step": 21292
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.168964147567749,
      "learning_rate": 1.6553697723512354e-05,
      "loss": 2.2544,
      "step": 21293
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1567401885986328,
      "learning_rate": 1.6553386729810338e-05,
      "loss": 2.4597,
      "step": 21294
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0254276990890503,
      "learning_rate": 1.655307572499855e-05,
      "loss": 2.6295,
      "step": 21295
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.2238181829452515,
      "learning_rate": 1.655276470907752e-05,
      "loss": 2.3839,
      "step": 21296
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0772178173065186,
      "learning_rate": 1.6552453682047774e-05,
      "loss": 2.5931,
      "step": 21297
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0060495138168335,
      "learning_rate": 1.655214264390984e-05,
      "loss": 2.4495,
      "step": 21298
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.95113605260849,
      "learning_rate": 1.6551831594664245e-05,
      "loss": 2.3425,
      "step": 21299
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.965610682964325,
      "learning_rate": 1.6551520534311518e-05,
      "loss": 2.3137,
      "step": 21300
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0718806982040405,
      "learning_rate": 1.6551209462852182e-05,
      "loss": 2.4871,
      "step": 21301
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1889358758926392,
      "learning_rate": 1.6550898380286767e-05,
      "loss": 2.325,
      "step": 21302
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9727417230606079,
      "learning_rate": 1.6550587286615795e-05,
      "loss": 2.4779,
      "step": 21303
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1141847372055054,
      "learning_rate": 1.6550276181839807e-05,
      "loss": 2.5248,
      "step": 21304
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0281800031661987,
      "learning_rate": 1.6549965065959316e-05,
      "loss": 2.4027,
      "step": 21305
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1396924257278442,
      "learning_rate": 1.6549653938974857e-05,
      "loss": 2.5181,
      "step": 21306
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1858209371566772,
      "learning_rate": 1.6549342800886954e-05,
      "loss": 2.4851,
      "step": 21307
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9778310060501099,
      "learning_rate": 1.654903165169614e-05,
      "loss": 2.7528,
      "step": 21308
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0338199138641357,
      "learning_rate": 1.6548720491402936e-05,
      "loss": 2.4677,
      "step": 21309
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.2333844900131226,
      "learning_rate": 1.6548409320007875e-05,
      "loss": 2.4744,
      "step": 21310
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1714533567428589,
      "learning_rate": 1.6548098137511475e-05,
      "loss": 2.185,
      "step": 21311
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9903383851051331,
      "learning_rate": 1.6547786943914278e-05,
      "loss": 2.0498,
      "step": 21312
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.178025484085083,
      "learning_rate": 1.65474757392168e-05,
      "loss": 2.5836,
      "step": 21313
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0538073778152466,
      "learning_rate": 1.654716452341957e-05,
      "loss": 2.4909,
      "step": 21314
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0600577592849731,
      "learning_rate": 1.654685329652312e-05,
      "loss": 2.532,
      "step": 21315
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0757553577423096,
      "learning_rate": 1.6546542058527977e-05,
      "loss": 2.2871,
      "step": 21316
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.536111831665039,
      "learning_rate": 1.6546230809434665e-05,
      "loss": 2.4668,
      "step": 21317
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0173323154449463,
      "learning_rate": 1.6545919549243718e-05,
      "loss": 2.4256,
      "step": 21318
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0547796487808228,
      "learning_rate": 1.6545608277955655e-05,
      "loss": 2.3496,
      "step": 21319
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0194685459136963,
      "learning_rate": 1.654529699557101e-05,
      "loss": 2.3254,
      "step": 21320
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0522112846374512,
      "learning_rate": 1.654498570209031e-05,
      "loss": 2.308,
      "step": 21321
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0615506172180176,
      "learning_rate": 1.6544674397514083e-05,
      "loss": 2.7218,
      "step": 21322
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0902736186981201,
      "learning_rate": 1.6544363081842853e-05,
      "loss": 2.3179,
      "step": 21323
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.079078197479248,
      "learning_rate": 1.654405175507715e-05,
      "loss": 2.5142,
      "step": 21324
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.081834316253662,
      "learning_rate": 1.65437404172175e-05,
      "loss": 2.5404,
      "step": 21325
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0145741701126099,
      "learning_rate": 1.6543429068264436e-05,
      "loss": 2.2534,
      "step": 21326
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0311959981918335,
      "learning_rate": 1.654311770821848e-05,
      "loss": 2.5535,
      "step": 21327
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0376884937286377,
      "learning_rate": 1.6542806337080163e-05,
      "loss": 2.5883,
      "step": 21328
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0779869556427002,
      "learning_rate": 1.6542494954850016e-05,
      "loss": 2.3829,
      "step": 21329
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9282994270324707,
      "learning_rate": 1.654218356152856e-05,
      "loss": 2.1614,
      "step": 21330
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0111643075942993,
      "learning_rate": 1.6541872157116326e-05,
      "loss": 2.3493,
      "step": 21331
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0502556562423706,
      "learning_rate": 1.6541560741613844e-05,
      "loss": 2.4899,
      "step": 21332
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0665830373764038,
      "learning_rate": 1.6541249315021634e-05,
      "loss": 2.3029,
      "step": 21333
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0206961631774902,
      "learning_rate": 1.6540937877340234e-05,
      "loss": 2.2527,
      "step": 21334
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9701202511787415,
      "learning_rate": 1.6540626428570167e-05,
      "loss": 2.2843,
      "step": 21335
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.2310594320297241,
      "learning_rate": 1.6540314968711964e-05,
      "loss": 2.5235,
      "step": 21336
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9929507374763489,
      "learning_rate": 1.6540003497766148e-05,
      "loss": 2.311,
      "step": 21337
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9638239741325378,
      "learning_rate": 1.6539692015733252e-05,
      "loss": 2.5921,
      "step": 21338
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0143073797225952,
      "learning_rate": 1.65393805226138e-05,
      "loss": 2.6435,
      "step": 21339
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1224154233932495,
      "learning_rate": 1.6539069018408324e-05,
      "loss": 2.4521,
      "step": 21340
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0077061653137207,
      "learning_rate": 1.6538757503117348e-05,
      "loss": 2.1668,
      "step": 21341
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9940555095672607,
      "learning_rate": 1.6538445976741404e-05,
      "loss": 2.3212,
      "step": 21342
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1013370752334595,
      "learning_rate": 1.653813443928102e-05,
      "loss": 2.2825,
      "step": 21343
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1974248886108398,
      "learning_rate": 1.653782289073672e-05,
      "loss": 2.3212,
      "step": 21344
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.2492992877960205,
      "learning_rate": 1.653751133110903e-05,
      "loss": 2.5582,
      "step": 21345
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.026842713356018,
      "learning_rate": 1.6537199760398487e-05,
      "loss": 2.6166,
      "step": 21346
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.062941551208496,
      "learning_rate": 1.6536888178605616e-05,
      "loss": 2.3601,
      "step": 21347
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0099421739578247,
      "learning_rate": 1.6536576585730945e-05,
      "loss": 2.3535,
      "step": 21348
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0716018676757812,
      "learning_rate": 1.6536264981775e-05,
      "loss": 2.3453,
      "step": 21349
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1713718175888062,
      "learning_rate": 1.6535953366738307e-05,
      "loss": 2.3465,
      "step": 21350
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.041853904724121,
      "learning_rate": 1.65356417406214e-05,
      "loss": 2.2315,
      "step": 21351
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0719490051269531,
      "learning_rate": 1.6535330103424806e-05,
      "loss": 2.4421,
      "step": 21352
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1765960454940796,
      "learning_rate": 1.6535018455149054e-05,
      "loss": 2.4737,
      "step": 21353
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0063812732696533,
      "learning_rate": 1.653470679579467e-05,
      "loss": 2.4118,
      "step": 21354
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.128050446510315,
      "learning_rate": 1.6534395125362182e-05,
      "loss": 2.4553,
      "step": 21355
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0625661611557007,
      "learning_rate": 1.6534083443852124e-05,
      "loss": 2.3302,
      "step": 21356
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.114469051361084,
      "learning_rate": 1.6533771751265012e-05,
      "loss": 2.3494,
      "step": 21357
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.961524248123169,
      "learning_rate": 1.6533460047601386e-05,
      "loss": 1.9919,
      "step": 21358
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.112311601638794,
      "learning_rate": 1.6533148332861774e-05,
      "loss": 2.3597,
      "step": 21359
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1130183935165405,
      "learning_rate": 1.6532836607046698e-05,
      "loss": 2.5038,
      "step": 21360
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0555638074874878,
      "learning_rate": 1.653252487015669e-05,
      "loss": 2.4339,
      "step": 21361
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.116606593132019,
      "learning_rate": 1.6532213122192278e-05,
      "loss": 2.4783,
      "step": 21362
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0801496505737305,
      "learning_rate": 1.653190136315399e-05,
      "loss": 2.6973,
      "step": 21363
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0218398571014404,
      "learning_rate": 1.6531589593042355e-05,
      "loss": 2.2939,
      "step": 21364
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.2821952104568481,
      "learning_rate": 1.6531277811857904e-05,
      "loss": 2.5449,
      "step": 21365
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.997289776802063,
      "learning_rate": 1.653096601960116e-05,
      "loss": 2.4706,
      "step": 21366
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.4604796171188354,
      "learning_rate": 1.653065421627266e-05,
      "loss": 2.2888,
      "step": 21367
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1309630870819092,
      "learning_rate": 1.6530342401872923e-05,
      "loss": 2.4875,
      "step": 21368
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1209779977798462,
      "learning_rate": 1.6530030576402483e-05,
      "loss": 2.4428,
      "step": 21369
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0471285581588745,
      "learning_rate": 1.6529718739861868e-05,
      "loss": 2.4372,
      "step": 21370
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.022208571434021,
      "learning_rate": 1.6529406892251606e-05,
      "loss": 2.4948,
      "step": 21371
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0332019329071045,
      "learning_rate": 1.6529095033572226e-05,
      "loss": 2.5874,
      "step": 21372
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0464667081832886,
      "learning_rate": 1.6528783163824256e-05,
      "loss": 2.4439,
      "step": 21373
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0704823732376099,
      "learning_rate": 1.6528471283008226e-05,
      "loss": 2.7087,
      "step": 21374
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9959074258804321,
      "learning_rate": 1.6528159391124666e-05,
      "loss": 2.4194,
      "step": 21375
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9493032097816467,
      "learning_rate": 1.65278474881741e-05,
      "loss": 2.5451,
      "step": 21376
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9966594576835632,
      "learning_rate": 1.6527535574157058e-05,
      "loss": 2.3069,
      "step": 21377
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1104801893234253,
      "learning_rate": 1.6527223649074073e-05,
      "loss": 2.5234,
      "step": 21378
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.3042348623275757,
      "learning_rate": 1.6526911712925673e-05,
      "loss": 2.4268,
      "step": 21379
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0813175439834595,
      "learning_rate": 1.652659976571238e-05,
      "loss": 2.4108,
      "step": 21380
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.92948979139328,
      "learning_rate": 1.6526287807434732e-05,
      "loss": 2.5984,
      "step": 21381
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1401690244674683,
      "learning_rate": 1.6525975838093252e-05,
      "loss": 2.4471,
      "step": 21382
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9925780296325684,
      "learning_rate": 1.6525663857688475e-05,
      "loss": 2.576,
      "step": 21383
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1463252305984497,
      "learning_rate": 1.652535186622092e-05,
      "loss": 2.3828,
      "step": 21384
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0129776000976562,
      "learning_rate": 1.6525039863691123e-05,
      "loss": 2.3949,
      "step": 21385
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.020054578781128,
      "learning_rate": 1.6524727850099608e-05,
      "loss": 2.3446,
      "step": 21386
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9884892106056213,
      "learning_rate": 1.652441582544691e-05,
      "loss": 2.554,
      "step": 21387
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.018746018409729,
      "learning_rate": 1.6524103789733555e-05,
      "loss": 2.6515,
      "step": 21388
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1222739219665527,
      "learning_rate": 1.6523791742960074e-05,
      "loss": 2.4212,
      "step": 21389
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0197207927703857,
      "learning_rate": 1.652347968512699e-05,
      "loss": 2.5023,
      "step": 21390
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0383949279785156,
      "learning_rate": 1.652316761623484e-05,
      "loss": 2.3349,
      "step": 21391
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0615234375,
      "learning_rate": 1.652285553628415e-05,
      "loss": 2.3697,
      "step": 21392
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0133246183395386,
      "learning_rate": 1.6522543445275446e-05,
      "loss": 2.4323,
      "step": 21393
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.038041353225708,
      "learning_rate": 1.6522231343209257e-05,
      "loss": 2.232,
      "step": 21394
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0751850605010986,
      "learning_rate": 1.652191923008612e-05,
      "loss": 2.4986,
      "step": 21395
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9888918995857239,
      "learning_rate": 1.6521607105906556e-05,
      "loss": 2.2968,
      "step": 21396
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0784908533096313,
      "learning_rate": 1.6521294970671095e-05,
      "loss": 2.4235,
      "step": 21397
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1036807298660278,
      "learning_rate": 1.6520982824380268e-05,
      "loss": 2.5971,
      "step": 21398
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0420743227005005,
      "learning_rate": 1.6520670667034608e-05,
      "loss": 2.5717,
      "step": 21399
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.139678716659546,
      "learning_rate": 1.6520358498634635e-05,
      "loss": 2.5762,
      "step": 21400
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.154052972793579,
      "learning_rate": 1.652004631918089e-05,
      "loss": 2.1863,
      "step": 21401
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.092521071434021,
      "learning_rate": 1.6519734128673886e-05,
      "loss": 2.5611,
      "step": 21402
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1709232330322266,
      "learning_rate": 1.651942192711417e-05,
      "loss": 2.4075,
      "step": 21403
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0708850622177124,
      "learning_rate": 1.6519109714502263e-05,
      "loss": 2.4536,
      "step": 21404
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9478073716163635,
      "learning_rate": 1.651879749083869e-05,
      "loss": 2.5026,
      "step": 21405
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9233174324035645,
      "learning_rate": 1.6518485256123986e-05,
      "loss": 2.5585,
      "step": 21406
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.026363492012024,
      "learning_rate": 1.6518173010358678e-05,
      "loss": 2.4379,
      "step": 21407
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1001492738723755,
      "learning_rate": 1.65178607535433e-05,
      "loss": 2.45,
      "step": 21408
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9975847005844116,
      "learning_rate": 1.651754848567838e-05,
      "loss": 2.2393,
      "step": 21409
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.2361972332000732,
      "learning_rate": 1.651723620676444e-05,
      "loss": 2.4227,
      "step": 21410
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0573644638061523,
      "learning_rate": 1.6516923916802015e-05,
      "loss": 2.5394,
      "step": 21411
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0022767782211304,
      "learning_rate": 1.6516611615791632e-05,
      "loss": 2.5216,
      "step": 21412
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9747188091278076,
      "learning_rate": 1.6516299303733828e-05,
      "loss": 2.5554,
      "step": 21413
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1962270736694336,
      "learning_rate": 1.6515986980629122e-05,
      "loss": 2.3784,
      "step": 21414
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0551648139953613,
      "learning_rate": 1.6515674646478053e-05,
      "loss": 2.3462,
      "step": 21415
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0722033977508545,
      "learning_rate": 1.6515362301281143e-05,
      "loss": 2.1875,
      "step": 21416
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9407935738563538,
      "learning_rate": 1.6515049945038926e-05,
      "loss": 2.445,
      "step": 21417
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0987521409988403,
      "learning_rate": 1.6514737577751928e-05,
      "loss": 2.3884,
      "step": 21418
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0835835933685303,
      "learning_rate": 1.651442519942068e-05,
      "loss": 2.4599,
      "step": 21419
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9984152317047119,
      "learning_rate": 1.6514112810045713e-05,
      "loss": 2.3751,
      "step": 21420
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0594851970672607,
      "learning_rate": 1.6513800409627557e-05,
      "loss": 2.55,
      "step": 21421
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1152969598770142,
      "learning_rate": 1.651348799816674e-05,
      "loss": 2.3448,
      "step": 21422
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0703108310699463,
      "learning_rate": 1.651317557566379e-05,
      "loss": 2.3526,
      "step": 21423
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1008375883102417,
      "learning_rate": 1.6512863142119238e-05,
      "loss": 2.4881,
      "step": 21424
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9731556177139282,
      "learning_rate": 1.6512550697533616e-05,
      "loss": 2.3824,
      "step": 21425
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.958699643611908,
      "learning_rate": 1.651223824190745e-05,
      "loss": 2.3711,
      "step": 21426
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9951733350753784,
      "learning_rate": 1.6511925775241273e-05,
      "loss": 2.4348,
      "step": 21427
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1201473474502563,
      "learning_rate": 1.6511613297535615e-05,
      "loss": 2.3081,
      "step": 21428
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9879754185676575,
      "learning_rate": 1.6511300808791e-05,
      "loss": 2.4338,
      "step": 21429
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0483094453811646,
      "learning_rate": 1.6510988309007962e-05,
      "loss": 2.4911,
      "step": 21430
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0330073833465576,
      "learning_rate": 1.6510675798187033e-05,
      "loss": 2.5161,
      "step": 21431
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0225034952163696,
      "learning_rate": 1.6510363276328737e-05,
      "loss": 2.5497,
      "step": 21432
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0896607637405396,
      "learning_rate": 1.651005074343361e-05,
      "loss": 2.628,
      "step": 21433
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0247082710266113,
      "learning_rate": 1.6509738199502178e-05,
      "loss": 2.2857,
      "step": 21434
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.114264726638794,
      "learning_rate": 1.6509425644534973e-05,
      "loss": 2.3777,
      "step": 21435
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0514087677001953,
      "learning_rate": 1.650911307853252e-05,
      "loss": 2.3081,
      "step": 21436
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0692918300628662,
      "learning_rate": 1.6508800501495357e-05,
      "loss": 2.264,
      "step": 21437
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.045140027999878,
      "learning_rate": 1.6508487913424004e-05,
      "loss": 2.4725,
      "step": 21438
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.108862280845642,
      "learning_rate": 1.6508175314319003e-05,
      "loss": 2.5705,
      "step": 21439
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0433489084243774,
      "learning_rate": 1.6507862704180872e-05,
      "loss": 2.3292,
      "step": 21440
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.2025251388549805,
      "learning_rate": 1.6507550083010148e-05,
      "loss": 2.4979,
      "step": 21441
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0105807781219482,
      "learning_rate": 1.650723745080736e-05,
      "loss": 2.6161,
      "step": 21442
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.058951735496521,
      "learning_rate": 1.6506924807573038e-05,
      "loss": 2.481,
      "step": 21443
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1410495042800903,
      "learning_rate": 1.6506612153307707e-05,
      "loss": 2.5115,
      "step": 21444
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1243033409118652,
      "learning_rate": 1.6506299488011902e-05,
      "loss": 2.3332,
      "step": 21445
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.102310061454773,
      "learning_rate": 1.6505986811686153e-05,
      "loss": 2.5761,
      "step": 21446
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.104254126548767,
      "learning_rate": 1.6505674124330995e-05,
      "loss": 2.4604,
      "step": 21447
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1287753582000732,
      "learning_rate": 1.6505361425946946e-05,
      "loss": 2.3523,
      "step": 21448
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.006005048751831,
      "learning_rate": 1.6505048716534545e-05,
      "loss": 2.3872,
      "step": 21449
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.034942388534546,
      "learning_rate": 1.6504735996094317e-05,
      "loss": 2.4161,
      "step": 21450
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9587268233299255,
      "learning_rate": 1.6504423264626798e-05,
      "loss": 2.4703,
      "step": 21451
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.239855408668518,
      "learning_rate": 1.6504110522132512e-05,
      "loss": 2.377,
      "step": 21452
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.964124321937561,
      "learning_rate": 1.6503797768611996e-05,
      "loss": 2.3773,
      "step": 21453
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9889848232269287,
      "learning_rate": 1.6503485004065773e-05,
      "loss": 2.4745,
      "step": 21454
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0291597843170166,
      "learning_rate": 1.650317222849438e-05,
      "loss": 2.6131,
      "step": 21455
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0220849514007568,
      "learning_rate": 1.650285944189834e-05,
      "loss": 2.3721,
      "step": 21456
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0040041208267212,
      "learning_rate": 1.650254664427819e-05,
      "loss": 2.248,
      "step": 21457
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0987708568572998,
      "learning_rate": 1.6502233835634455e-05,
      "loss": 2.4055,
      "step": 21458
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.104014277458191,
      "learning_rate": 1.6501921015967668e-05,
      "loss": 2.2976,
      "step": 21459
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1445996761322021,
      "learning_rate": 1.650160818527836e-05,
      "loss": 2.4768,
      "step": 21460
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.059468150138855,
      "learning_rate": 1.6501295343567064e-05,
      "loss": 2.5836,
      "step": 21461
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1000730991363525,
      "learning_rate": 1.65009824908343e-05,
      "loss": 2.3772,
      "step": 21462
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0492955446243286,
      "learning_rate": 1.6500669627080606e-05,
      "loss": 2.3023,
      "step": 21463
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.229366660118103,
      "learning_rate": 1.6500356752306514e-05,
      "loss": 2.5463,
      "step": 21464
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0743674039840698,
      "learning_rate": 1.6500043866512552e-05,
      "loss": 2.1795,
      "step": 21465
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9978476166725159,
      "learning_rate": 1.649973096969925e-05,
      "loss": 2.2873,
      "step": 21466
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1199482679367065,
      "learning_rate": 1.649941806186714e-05,
      "loss": 2.4497,
      "step": 21467
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.045691967010498,
      "learning_rate": 1.649910514301675e-05,
      "loss": 2.5341,
      "step": 21468
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.010589599609375,
      "learning_rate": 1.6498792213148608e-05,
      "loss": 2.4636,
      "step": 21469
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.035145878791809,
      "learning_rate": 1.649847927226325e-05,
      "loss": 2.4999,
      "step": 21470
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9692746996879578,
      "learning_rate": 1.6498166320361208e-05,
      "loss": 2.5202,
      "step": 21471
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0940169095993042,
      "learning_rate": 1.6497853357443006e-05,
      "loss": 2.479,
      "step": 21472
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0409181118011475,
      "learning_rate": 1.6497540383509178e-05,
      "loss": 2.2881,
      "step": 21473
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0915195941925049,
      "learning_rate": 1.6497227398560256e-05,
      "loss": 2.5043,
      "step": 21474
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1921128034591675,
      "learning_rate": 1.649691440259677e-05,
      "loss": 2.5707,
      "step": 21475
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1598654985427856,
      "learning_rate": 1.6496601395619247e-05,
      "loss": 2.5168,
      "step": 21476
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.131198763847351,
      "learning_rate": 1.649628837762822e-05,
      "loss": 2.528,
      "step": 21477
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1567343473434448,
      "learning_rate": 1.649597534862422e-05,
      "loss": 2.6512,
      "step": 21478
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.041171908378601,
      "learning_rate": 1.649566230860778e-05,
      "loss": 2.4115,
      "step": 21479
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0410833358764648,
      "learning_rate": 1.6495349257579424e-05,
      "loss": 2.2586,
      "step": 21480
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0296425819396973,
      "learning_rate": 1.649503619553969e-05,
      "loss": 2.4759,
      "step": 21481
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9966782331466675,
      "learning_rate": 1.6494723122489105e-05,
      "loss": 2.4315,
      "step": 21482
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.122349739074707,
      "learning_rate": 1.64944100384282e-05,
      "loss": 2.4835,
      "step": 21483
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1743892431259155,
      "learning_rate": 1.6494096943357508e-05,
      "loss": 2.5324,
      "step": 21484
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1281468868255615,
      "learning_rate": 1.6493783837277555e-05,
      "loss": 2.6498,
      "step": 21485
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1738946437835693,
      "learning_rate": 1.6493470720188874e-05,
      "loss": 2.4327,
      "step": 21486
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.2017557621002197,
      "learning_rate": 1.6493157592092e-05,
      "loss": 2.4101,
      "step": 21487
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1114665269851685,
      "learning_rate": 1.6492844452987456e-05,
      "loss": 2.6302,
      "step": 21488
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9237337708473206,
      "learning_rate": 1.6492531302875783e-05,
      "loss": 2.3513,
      "step": 21489
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1172857284545898,
      "learning_rate": 1.6492218141757503e-05,
      "loss": 2.2405,
      "step": 21490
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.060068964958191,
      "learning_rate": 1.649190496963315e-05,
      "loss": 2.4503,
      "step": 21491
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0941120386123657,
      "learning_rate": 1.6491591786503253e-05,
      "loss": 2.4773,
      "step": 21492
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1296780109405518,
      "learning_rate": 1.649127859236835e-05,
      "loss": 2.2019,
      "step": 21493
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9877352118492126,
      "learning_rate": 1.649096538722896e-05,
      "loss": 2.187,
      "step": 21494
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.01417875289917,
      "learning_rate": 1.6490652171085625e-05,
      "loss": 2.4828,
      "step": 21495
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.516711950302124,
      "learning_rate": 1.6490338943938874e-05,
      "loss": 2.3017,
      "step": 21496
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.2991894483566284,
      "learning_rate": 1.649002570578923e-05,
      "loss": 2.3602,
      "step": 21497
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.093154788017273,
      "learning_rate": 1.6489712456637232e-05,
      "loss": 2.4274,
      "step": 21498
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9601559042930603,
      "learning_rate": 1.648939919648341e-05,
      "loss": 2.4052,
      "step": 21499
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.2624597549438477,
      "learning_rate": 1.6489085925328295e-05,
      "loss": 2.3396,
      "step": 21500
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.198005199432373,
      "learning_rate": 1.6488772643172416e-05,
      "loss": 2.1186,
      "step": 21501
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9669891595840454,
      "learning_rate": 1.6488459350016302e-05,
      "loss": 2.3369,
      "step": 21502
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1406298875808716,
      "learning_rate": 1.6488146045860493e-05,
      "loss": 2.377,
      "step": 21503
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.080248475074768,
      "learning_rate": 1.6487832730705507e-05,
      "loss": 2.8965,
      "step": 21504
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.03147292137146,
      "learning_rate": 1.6487519404551887e-05,
      "loss": 2.2196,
      "step": 21505
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0362492799758911,
      "learning_rate": 1.6487206067400157e-05,
      "loss": 2.2529,
      "step": 21506
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.11024010181427,
      "learning_rate": 1.6486892719250852e-05,
      "loss": 2.6042,
      "step": 21507
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0451112985610962,
      "learning_rate": 1.6486579360104506e-05,
      "loss": 2.3621,
      "step": 21508
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.197532296180725,
      "learning_rate": 1.6486265989961642e-05,
      "loss": 2.7475,
      "step": 21509
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1246033906936646,
      "learning_rate": 1.6485952608822796e-05,
      "loss": 2.7481,
      "step": 21510
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9404135942459106,
      "learning_rate": 1.6485639216688497e-05,
      "loss": 2.6405,
      "step": 21511
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0486434698104858,
      "learning_rate": 1.6485325813559284e-05,
      "loss": 2.3212,
      "step": 21512
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9806674718856812,
      "learning_rate": 1.6485012399435678e-05,
      "loss": 2.4777,
      "step": 21513
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0602409839630127,
      "learning_rate": 1.6484698974318214e-05,
      "loss": 2.4003,
      "step": 21514
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.2543671131134033,
      "learning_rate": 1.6484385538207425e-05,
      "loss": 2.3501,
      "step": 21515
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0551786422729492,
      "learning_rate": 1.6484072091103842e-05,
      "loss": 2.324,
      "step": 21516
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1594274044036865,
      "learning_rate": 1.6483758633007994e-05,
      "loss": 2.5257,
      "step": 21517
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1513478755950928,
      "learning_rate": 1.6483445163920417e-05,
      "loss": 2.357,
      "step": 21518
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0445146560668945,
      "learning_rate": 1.6483131683841634e-05,
      "loss": 2.436,
      "step": 21519
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0709233283996582,
      "learning_rate": 1.648281819277219e-05,
      "loss": 2.3536,
      "step": 21520
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.189353108406067,
      "learning_rate": 1.6482504690712605e-05,
      "loss": 2.7058,
      "step": 21521
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9704275727272034,
      "learning_rate": 1.6482191177663414e-05,
      "loss": 2.4187,
      "step": 21522
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0926889181137085,
      "learning_rate": 1.6481877653625143e-05,
      "loss": 2.4113,
      "step": 21523
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0848987102508545,
      "learning_rate": 1.6481564118598338e-05,
      "loss": 2.2821,
      "step": 21524
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1776902675628662,
      "learning_rate": 1.6481250572583512e-05,
      "loss": 2.4787,
      "step": 21525
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.153328537940979,
      "learning_rate": 1.6480937015581216e-05,
      "loss": 2.2119,
      "step": 21526
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9973009824752808,
      "learning_rate": 1.6480623447591968e-05,
      "loss": 2.7213,
      "step": 21527
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9453223943710327,
      "learning_rate": 1.64803098686163e-05,
      "loss": 2.465,
      "step": 21528
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0645167827606201,
      "learning_rate": 1.6479996278654748e-05,
      "loss": 2.6749,
      "step": 21529
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9632896184921265,
      "learning_rate": 1.6479682677707844e-05,
      "loss": 2.4035,
      "step": 21530
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.2663657665252686,
      "learning_rate": 1.6479369065776115e-05,
      "loss": 2.4055,
      "step": 21531
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.031290888786316,
      "learning_rate": 1.64790554428601e-05,
      "loss": 2.5669,
      "step": 21532
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0862985849380493,
      "learning_rate": 1.6478741808960325e-05,
      "loss": 2.4836,
      "step": 21533
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0139946937561035,
      "learning_rate": 1.647842816407732e-05,
      "loss": 2.3261,
      "step": 21534
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1444453001022339,
      "learning_rate": 1.6478114508211622e-05,
      "loss": 2.346,
      "step": 21535
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0216478109359741,
      "learning_rate": 1.647780084136376e-05,
      "loss": 2.541,
      "step": 21536
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.103692889213562,
      "learning_rate": 1.6477487163534266e-05,
      "loss": 2.2952,
      "step": 21537
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1699920892715454,
      "learning_rate": 1.6477173474723672e-05,
      "loss": 2.3677,
      "step": 21538
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.044580340385437,
      "learning_rate": 1.647685977493251e-05,
      "loss": 2.3362,
      "step": 21539
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0593664646148682,
      "learning_rate": 1.6476546064161312e-05,
      "loss": 2.6961,
      "step": 21540
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.982008695602417,
      "learning_rate": 1.6476232342410613e-05,
      "loss": 2.5269,
      "step": 21541
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0410823822021484,
      "learning_rate": 1.6475918609680938e-05,
      "loss": 2.4906,
      "step": 21542
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0952916145324707,
      "learning_rate": 1.647560486597282e-05,
      "loss": 2.4653,
      "step": 21543
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9733126163482666,
      "learning_rate": 1.6475291111286794e-05,
      "loss": 2.3908,
      "step": 21544
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0099700689315796,
      "learning_rate": 1.647497734562339e-05,
      "loss": 2.3285,
      "step": 21545
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9737483859062195,
      "learning_rate": 1.6474663568983146e-05,
      "loss": 2.5004,
      "step": 21546
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0495169162750244,
      "learning_rate": 1.6474349781366586e-05,
      "loss": 2.2887,
      "step": 21547
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0289392471313477,
      "learning_rate": 1.647403598277424e-05,
      "loss": 2.3442,
      "step": 21548
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0528974533081055,
      "learning_rate": 1.6473722173206648e-05,
      "loss": 2.3801,
      "step": 21549
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9610058069229126,
      "learning_rate": 1.6473408352664342e-05,
      "loss": 2.2458,
      "step": 21550
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0216383934020996,
      "learning_rate": 1.6473094521147846e-05,
      "loss": 2.4146,
      "step": 21551
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1041325330734253,
      "learning_rate": 1.64727806786577e-05,
      "loss": 2.3621,
      "step": 21552
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.4139678478240967,
      "learning_rate": 1.6472466825194434e-05,
      "loss": 2.3659,
      "step": 21553
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9901540279388428,
      "learning_rate": 1.6472152960758572e-05,
      "loss": 2.3408,
      "step": 21554
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0568184852600098,
      "learning_rate": 1.647183908535066e-05,
      "loss": 2.6504,
      "step": 21555
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9870189428329468,
      "learning_rate": 1.6471525198971215e-05,
      "loss": 2.5219,
      "step": 21556
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9568642377853394,
      "learning_rate": 1.6471211301620788e-05,
      "loss": 2.6851,
      "step": 21557
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0753509998321533,
      "learning_rate": 1.647089739329989e-05,
      "loss": 2.4225,
      "step": 21558
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0389515161514282,
      "learning_rate": 1.647058347400907e-05,
      "loss": 2.5274,
      "step": 21559
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0456533432006836,
      "learning_rate": 1.6470269543748852e-05,
      "loss": 2.5153,
      "step": 21560
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0681663751602173,
      "learning_rate": 1.646995560251977e-05,
      "loss": 2.5615,
      "step": 21561
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0550013780593872,
      "learning_rate": 1.6469641650322357e-05,
      "loss": 2.5237,
      "step": 21562
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0002498626708984,
      "learning_rate": 1.646932768715714e-05,
      "loss": 2.3992,
      "step": 21563
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.986835777759552,
      "learning_rate": 1.646901371302466e-05,
      "loss": 2.4032,
      "step": 21564
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0543447732925415,
      "learning_rate": 1.6468699727925442e-05,
      "loss": 2.6575,
      "step": 21565
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0725544691085815,
      "learning_rate": 1.6468385731860023e-05,
      "loss": 2.5527,
      "step": 21566
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0106463432312012,
      "learning_rate": 1.6468071724828932e-05,
      "loss": 2.448,
      "step": 21567
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.969695508480072,
      "learning_rate": 1.6467757706832702e-05,
      "loss": 2.5782,
      "step": 21568
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.3151620626449585,
      "learning_rate": 1.646744367787187e-05,
      "loss": 2.4975,
      "step": 21569
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0463389158248901,
      "learning_rate": 1.646712963794696e-05,
      "loss": 2.4139,
      "step": 21570
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.254008173942566,
      "learning_rate": 1.6466815587058513e-05,
      "loss": 2.4741,
      "step": 21571
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9975503087043762,
      "learning_rate": 1.6466501525207053e-05,
      "loss": 2.3843,
      "step": 21572
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.107689380645752,
      "learning_rate": 1.6466187452393117e-05,
      "loss": 2.362,
      "step": 21573
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0423420667648315,
      "learning_rate": 1.6465873368617243e-05,
      "loss": 2.2514,
      "step": 21574
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0674257278442383,
      "learning_rate": 1.6465559273879953e-05,
      "loss": 2.3008,
      "step": 21575
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0230305194854736,
      "learning_rate": 1.646524516818178e-05,
      "loss": 2.6673,
      "step": 21576
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9539461731910706,
      "learning_rate": 1.6464931051523266e-05,
      "loss": 2.4586,
      "step": 21577
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0144734382629395,
      "learning_rate": 1.6464616923904937e-05,
      "loss": 2.4341,
      "step": 21578
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.3410704135894775,
      "learning_rate": 1.6464302785327325e-05,
      "loss": 2.3101,
      "step": 21579
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9870290756225586,
      "learning_rate": 1.6463988635790968e-05,
      "loss": 2.5985,
      "step": 21580
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.06253182888031,
      "learning_rate": 1.646367447529639e-05,
      "loss": 2.443,
      "step": 21581
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1695678234100342,
      "learning_rate": 1.646336030384413e-05,
      "loss": 2.4148,
      "step": 21582
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.066623330116272,
      "learning_rate": 1.6463046121434716e-05,
      "loss": 2.4327,
      "step": 21583
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1814343929290771,
      "learning_rate": 1.646273192806869e-05,
      "loss": 2.6211,
      "step": 21584
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.942387044429779,
      "learning_rate": 1.6462417723746577e-05,
      "loss": 2.4187,
      "step": 21585
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.079146146774292,
      "learning_rate": 1.646210350846891e-05,
      "loss": 2.4026,
      "step": 21586
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1148322820663452,
      "learning_rate": 1.646178928223622e-05,
      "loss": 2.4739,
      "step": 21587
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9362660050392151,
      "learning_rate": 1.6461475045049044e-05,
      "loss": 2.2734,
      "step": 21588
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1137094497680664,
      "learning_rate": 1.646116079690791e-05,
      "loss": 2.609,
      "step": 21589
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1104620695114136,
      "learning_rate": 1.6460846537813354e-05,
      "loss": 2.6433,
      "step": 21590
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9848492741584778,
      "learning_rate": 1.646053226776591e-05,
      "loss": 2.2976,
      "step": 21591
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.104840874671936,
      "learning_rate": 1.646021798676611e-05,
      "loss": 2.4295,
      "step": 21592
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.045318365097046,
      "learning_rate": 1.6459903694814486e-05,
      "loss": 2.2783,
      "step": 21593
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0539569854736328,
      "learning_rate": 1.6459589391911568e-05,
      "loss": 2.5343,
      "step": 21594
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1154844760894775,
      "learning_rate": 1.6459275078057896e-05,
      "loss": 2.4125,
      "step": 21595
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.2342008352279663,
      "learning_rate": 1.6458960753253993e-05,
      "loss": 2.5612,
      "step": 21596
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.966357946395874,
      "learning_rate": 1.64586464175004e-05,
      "loss": 2.4534,
      "step": 21597
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1685855388641357,
      "learning_rate": 1.645833207079765e-05,
      "loss": 2.4638,
      "step": 21598
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1408361196517944,
      "learning_rate": 1.645801771314627e-05,
      "loss": 2.3585,
      "step": 21599
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1377838850021362,
      "learning_rate": 1.6457703344546794e-05,
      "loss": 2.3624,
      "step": 21600
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1250914335250854,
      "learning_rate": 1.6457388964999758e-05,
      "loss": 2.5936,
      "step": 21601
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.2081968784332275,
      "learning_rate": 1.6457074574505698e-05,
      "loss": 2.5555,
      "step": 21602
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1951078176498413,
      "learning_rate": 1.645676017306514e-05,
      "loss": 2.3623,
      "step": 21603
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9222654104232788,
      "learning_rate": 1.645644576067862e-05,
      "loss": 2.3747,
      "step": 21604
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1273411512374878,
      "learning_rate": 1.645613133734667e-05,
      "loss": 2.648,
      "step": 21605
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9521977305412292,
      "learning_rate": 1.6455816903069824e-05,
      "loss": 2.6299,
      "step": 21606
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0268824100494385,
      "learning_rate": 1.6455502457848617e-05,
      "loss": 2.4971,
      "step": 21607
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0493345260620117,
      "learning_rate": 1.6455188001683578e-05,
      "loss": 2.4041,
      "step": 21608
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1484512090682983,
      "learning_rate": 1.6454873534575243e-05,
      "loss": 2.7019,
      "step": 21609
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.2504764795303345,
      "learning_rate": 1.6454559056524143e-05,
      "loss": 2.3377,
      "step": 21610
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9936128258705139,
      "learning_rate": 1.6454244567530814e-05,
      "loss": 2.2968,
      "step": 21611
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0512571334838867,
      "learning_rate": 1.6453930067595788e-05,
      "loss": 2.605,
      "step": 21612
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0539836883544922,
      "learning_rate": 1.6453615556719596e-05,
      "loss": 2.3276,
      "step": 21613
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.091620683670044,
      "learning_rate": 1.6453301034902772e-05,
      "loss": 2.537,
      "step": 21614
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.2351843118667603,
      "learning_rate": 1.6452986502145853e-05,
      "loss": 2.4557,
      "step": 21615
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1604619026184082,
      "learning_rate": 1.6452671958449367e-05,
      "loss": 2.5368,
      "step": 21616
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.172798991203308,
      "learning_rate": 1.645235740381385e-05,
      "loss": 2.3307,
      "step": 21617
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1558691263198853,
      "learning_rate": 1.6452042838239836e-05,
      "loss": 2.436,
      "step": 21618
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1672296524047852,
      "learning_rate": 1.6451728261727858e-05,
      "loss": 2.4273,
      "step": 21619
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0891032218933105,
      "learning_rate": 1.645141367427845e-05,
      "loss": 2.4619,
      "step": 21620
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.2140939235687256,
      "learning_rate": 1.645109907589214e-05,
      "loss": 2.6415,
      "step": 21621
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0694514513015747,
      "learning_rate": 1.6450784466569462e-05,
      "loss": 2.4924,
      "step": 21622
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.081289529800415,
      "learning_rate": 1.6450469846310956e-05,
      "loss": 2.2727,
      "step": 21623
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0141096115112305,
      "learning_rate": 1.645015521511715e-05,
      "loss": 2.4959,
      "step": 21624
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1097859144210815,
      "learning_rate": 1.6449840572988582e-05,
      "loss": 2.4343,
      "step": 21625
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.2201265096664429,
      "learning_rate": 1.644952591992578e-05,
      "loss": 2.31,
      "step": 21626
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1326202154159546,
      "learning_rate": 1.644921125592928e-05,
      "loss": 2.275,
      "step": 21627
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0609710216522217,
      "learning_rate": 1.6448896580999618e-05,
      "loss": 2.5004,
      "step": 21628
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.091833233833313,
      "learning_rate": 1.6448581895137322e-05,
      "loss": 2.4402,
      "step": 21629
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0834386348724365,
      "learning_rate": 1.644826719834293e-05,
      "loss": 2.1572,
      "step": 21630
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9716445207595825,
      "learning_rate": 1.644795249061697e-05,
      "loss": 2.4993,
      "step": 21631
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.2715049982070923,
      "learning_rate": 1.6447637771959983e-05,
      "loss": 2.5627,
      "step": 21632
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1265321969985962,
      "learning_rate": 1.64473230423725e-05,
      "loss": 2.4191,
      "step": 21633
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0560448169708252,
      "learning_rate": 1.644700830185505e-05,
      "loss": 2.5488,
      "step": 21634
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0381795167922974,
      "learning_rate": 1.644669355040817e-05,
      "loss": 2.2747,
      "step": 21635
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9724236130714417,
      "learning_rate": 1.6446378788032395e-05,
      "loss": 2.4456,
      "step": 21636
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9741635322570801,
      "learning_rate": 1.644606401472826e-05,
      "loss": 2.1818,
      "step": 21637
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9280269145965576,
      "learning_rate": 1.644574923049629e-05,
      "loss": 2.3609,
      "step": 21638
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0416486263275146,
      "learning_rate": 1.6445434435337028e-05,
      "loss": 2.568,
      "step": 21639
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0417752265930176,
      "learning_rate": 1.6445119629251003e-05,
      "loss": 2.6659,
      "step": 21640
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9709153175354004,
      "learning_rate": 1.6444804812238748e-05,
      "loss": 2.36,
      "step": 21641
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0751620531082153,
      "learning_rate": 1.64444899843008e-05,
      "loss": 2.3895,
      "step": 21642
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0595462322235107,
      "learning_rate": 1.6444175145437694e-05,
      "loss": 2.3915,
      "step": 21643
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0646077394485474,
      "learning_rate": 1.644386029564996e-05,
      "loss": 2.4612,
      "step": 21644
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.038680911064148,
      "learning_rate": 1.644354543493813e-05,
      "loss": 2.5684,
      "step": 21645
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1354140043258667,
      "learning_rate": 1.644323056330274e-05,
      "loss": 2.4789,
      "step": 21646
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1528071165084839,
      "learning_rate": 1.6442915680744326e-05,
      "loss": 2.202,
      "step": 21647
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9247350096702576,
      "learning_rate": 1.644260078726342e-05,
      "loss": 2.3497,
      "step": 21648
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.057134747505188,
      "learning_rate": 1.6442285882860556e-05,
      "loss": 2.4197,
      "step": 21649
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.109771728515625,
      "learning_rate": 1.6441970967536266e-05,
      "loss": 2.3424,
      "step": 21650
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0884095430374146,
      "learning_rate": 1.6441656041291086e-05,
      "loss": 2.5191,
      "step": 21651
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9608280062675476,
      "learning_rate": 1.6441341104125554e-05,
      "loss": 2.3728,
      "step": 21652
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9623680710792542,
      "learning_rate": 1.6441026156040198e-05,
      "loss": 2.2703,
      "step": 21653
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0214054584503174,
      "learning_rate": 1.6440711197035547e-05,
      "loss": 2.3439,
      "step": 21654
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0548015832901,
      "learning_rate": 1.6440396227112144e-05,
      "loss": 2.5422,
      "step": 21655
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1358747482299805,
      "learning_rate": 1.6440081246270523e-05,
      "loss": 2.4095,
      "step": 21656
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.2101794481277466,
      "learning_rate": 1.6439766254511217e-05,
      "loss": 2.628,
      "step": 21657
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0813039541244507,
      "learning_rate": 1.6439451251834754e-05,
      "loss": 2.5404,
      "step": 21658
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1410846710205078,
      "learning_rate": 1.6439136238241672e-05,
      "loss": 2.41,
      "step": 21659
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.15265953540802,
      "learning_rate": 1.6438821213732508e-05,
      "loss": 2.4488,
      "step": 21660
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0507903099060059,
      "learning_rate": 1.6438506178307793e-05,
      "loss": 2.6048,
      "step": 21661
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0602598190307617,
      "learning_rate": 1.643819113196806e-05,
      "loss": 2.2792,
      "step": 21662
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1844733953475952,
      "learning_rate": 1.643787607471384e-05,
      "loss": 2.3961,
      "step": 21663
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.2941337823867798,
      "learning_rate": 1.6437561006545677e-05,
      "loss": 2.3452,
      "step": 21664
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.023528814315796,
      "learning_rate": 1.64372459274641e-05,
      "loss": 2.4411,
      "step": 21665
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0457756519317627,
      "learning_rate": 1.643693083746964e-05,
      "loss": 2.1441,
      "step": 21666
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9848031997680664,
      "learning_rate": 1.6436615736562838e-05,
      "loss": 2.7127,
      "step": 21667
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0148433446884155,
      "learning_rate": 1.643630062474422e-05,
      "loss": 2.4779,
      "step": 21668
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1353774070739746,
      "learning_rate": 1.643598550201433e-05,
      "loss": 2.5322,
      "step": 21669
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0509825944900513,
      "learning_rate": 1.6435670368373687e-05,
      "loss": 2.3766,
      "step": 21670
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1151773929595947,
      "learning_rate": 1.6435355223822844e-05,
      "loss": 2.5113,
      "step": 21671
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0222715139389038,
      "learning_rate": 1.6435040068362322e-05,
      "loss": 2.1131,
      "step": 21672
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9807103872299194,
      "learning_rate": 1.643472490199266e-05,
      "loss": 2.3802,
      "step": 21673
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1024435758590698,
      "learning_rate": 1.643440972471439e-05,
      "loss": 2.3819,
      "step": 21674
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.052983045578003,
      "learning_rate": 1.6434094536528053e-05,
      "loss": 2.4857,
      "step": 21675
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0394762754440308,
      "learning_rate": 1.6433779337434176e-05,
      "loss": 2.569,
      "step": 21676
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0052026510238647,
      "learning_rate": 1.643346412743329e-05,
      "loss": 2.3706,
      "step": 21677
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.2053890228271484,
      "learning_rate": 1.643314890652594e-05,
      "loss": 2.3298,
      "step": 21678
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.2069964408874512,
      "learning_rate": 1.643283367471266e-05,
      "loss": 2.5371,
      "step": 21679
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0038833618164062,
      "learning_rate": 1.643251843199397e-05,
      "loss": 2.4838,
      "step": 21680
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1184521913528442,
      "learning_rate": 1.643220317837042e-05,
      "loss": 2.3351,
      "step": 21681
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0049344301223755,
      "learning_rate": 1.6431887913842538e-05,
      "loss": 2.3801,
      "step": 21682
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0242222547531128,
      "learning_rate": 1.6431572638410857e-05,
      "loss": 2.3626,
      "step": 21683
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1177345514297485,
      "learning_rate": 1.6431257352075914e-05,
      "loss": 2.2876,
      "step": 21684
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.988236665725708,
      "learning_rate": 1.643094205483825e-05,
      "loss": 2.444,
      "step": 21685
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0192984342575073,
      "learning_rate": 1.6430626746698384e-05,
      "loss": 2.4677,
      "step": 21686
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.954248309135437,
      "learning_rate": 1.6430311427656863e-05,
      "loss": 2.5718,
      "step": 21687
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0260038375854492,
      "learning_rate": 1.6429996097714215e-05,
      "loss": 2.3466,
      "step": 21688
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1048301458358765,
      "learning_rate": 1.6429680756870977e-05,
      "loss": 2.4763,
      "step": 21689
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1297686100006104,
      "learning_rate": 1.6429365405127686e-05,
      "loss": 2.6317,
      "step": 21690
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0061509609222412,
      "learning_rate": 1.6429050042484877e-05,
      "loss": 2.465,
      "step": 21691
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9686765670776367,
      "learning_rate": 1.642873466894308e-05,
      "loss": 2.2546,
      "step": 21692
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.100501537322998,
      "learning_rate": 1.642841928450283e-05,
      "loss": 2.543,
      "step": 21693
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0470703840255737,
      "learning_rate": 1.6428103889164665e-05,
      "loss": 2.4845,
      "step": 21694
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1232025623321533,
      "learning_rate": 1.6427788482929116e-05,
      "loss": 2.4453,
      "step": 21695
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0459046363830566,
      "learning_rate": 1.642747306579672e-05,
      "loss": 2.469,
      "step": 21696
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1240874528884888,
      "learning_rate": 1.6427157637768013e-05,
      "loss": 2.7685,
      "step": 21697
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9826968908309937,
      "learning_rate": 1.642684219884353e-05,
      "loss": 2.5736,
      "step": 21698
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0706970691680908,
      "learning_rate": 1.6426526749023802e-05,
      "loss": 2.2191,
      "step": 21699
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0724655389785767,
      "learning_rate": 1.6426211288309363e-05,
      "loss": 2.2062,
      "step": 21700
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9840382933616638,
      "learning_rate": 1.6425895816700753e-05,
      "loss": 2.4928,
      "step": 21701
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.995265781879425,
      "learning_rate": 1.6425580334198505e-05,
      "loss": 2.473,
      "step": 21702
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.192855715751648,
      "learning_rate": 1.6425264840803154e-05,
      "loss": 2.5382,
      "step": 21703
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1489789485931396,
      "learning_rate": 1.6424949336515236e-05,
      "loss": 2.4861,
      "step": 21704
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0239946842193604,
      "learning_rate": 1.6424633821335278e-05,
      "loss": 2.4607,
      "step": 21705
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0000455379486084,
      "learning_rate": 1.6424318295263824e-05,
      "loss": 2.4236,
      "step": 21706
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9967013597488403,
      "learning_rate": 1.6424002758301404e-05,
      "loss": 2.5011,
      "step": 21707
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.045462727546692,
      "learning_rate": 1.6423687210448556e-05,
      "loss": 2.3574,
      "step": 21708
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1755470037460327,
      "learning_rate": 1.642337165170581e-05,
      "loss": 2.4144,
      "step": 21709
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.996780514717102,
      "learning_rate": 1.642305608207371e-05,
      "loss": 2.3288,
      "step": 21710
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1008598804473877,
      "learning_rate": 1.6422740501552783e-05,
      "loss": 2.4124,
      "step": 21711
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9716846942901611,
      "learning_rate": 1.642242491014357e-05,
      "loss": 2.5618,
      "step": 21712
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0420900583267212,
      "learning_rate": 1.6422109307846597e-05,
      "loss": 2.6117,
      "step": 21713
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0554500818252563,
      "learning_rate": 1.6421793694662403e-05,
      "loss": 2.5075,
      "step": 21714
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.005100131034851,
      "learning_rate": 1.642147807059153e-05,
      "loss": 2.5306,
      "step": 21715
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9739109873771667,
      "learning_rate": 1.6421162435634507e-05,
      "loss": 2.4432,
      "step": 21716
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0211652517318726,
      "learning_rate": 1.6420846789791866e-05,
      "loss": 2.4014,
      "step": 21717
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0373393297195435,
      "learning_rate": 1.6420531133064147e-05,
      "loss": 2.1845,
      "step": 21718
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.980364978313446,
      "learning_rate": 1.642021546545189e-05,
      "loss": 2.2614,
      "step": 21719
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0810998678207397,
      "learning_rate": 1.6419899786955617e-05,
      "loss": 2.6618,
      "step": 21720
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0654821395874023,
      "learning_rate": 1.6419584097575872e-05,
      "loss": 2.4218,
      "step": 21721
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.046200156211853,
      "learning_rate": 1.6419268397313187e-05,
      "loss": 2.4925,
      "step": 21722
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1029000282287598,
      "learning_rate": 1.64189526861681e-05,
      "loss": 2.5191,
      "step": 21723
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.933463454246521,
      "learning_rate": 1.641863696414115e-05,
      "loss": 2.3614,
      "step": 21724
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.007222294807434,
      "learning_rate": 1.6418321231232858e-05,
      "loss": 2.3703,
      "step": 21725
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9894136190414429,
      "learning_rate": 1.6418005487443776e-05,
      "loss": 2.6108,
      "step": 21726
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0406920909881592,
      "learning_rate": 1.6417689732774427e-05,
      "loss": 2.5752,
      "step": 21727
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.012969970703125,
      "learning_rate": 1.6417373967225353e-05,
      "loss": 2.2151,
      "step": 21728
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5198595523834229,
      "learning_rate": 1.6417058190797086e-05,
      "loss": 2.2247,
      "step": 21729
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.970538854598999,
      "learning_rate": 1.641674240349016e-05,
      "loss": 2.3941,
      "step": 21730
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0016194581985474,
      "learning_rate": 1.6416426605305118e-05,
      "loss": 2.359,
      "step": 21731
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0156314373016357,
      "learning_rate": 1.6416110796242488e-05,
      "loss": 2.6617,
      "step": 21732
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.972725510597229,
      "learning_rate": 1.641579497630281e-05,
      "loss": 2.4999,
      "step": 21733
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9818602204322815,
      "learning_rate": 1.641547914548661e-05,
      "loss": 2.2999,
      "step": 21734
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0393345355987549,
      "learning_rate": 1.641516330379444e-05,
      "loss": 2.4447,
      "step": 21735
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9995039701461792,
      "learning_rate": 1.6414847451226817e-05,
      "loss": 2.2887,
      "step": 21736
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1892061233520508,
      "learning_rate": 1.6414531587784292e-05,
      "loss": 2.4466,
      "step": 21737
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9684745073318481,
      "learning_rate": 1.641421571346739e-05,
      "loss": 2.5124,
      "step": 21738
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0427559614181519,
      "learning_rate": 1.641389982827665e-05,
      "loss": 2.7148,
      "step": 21739
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0305519104003906,
      "learning_rate": 1.6413583932212608e-05,
      "loss": 2.4337,
      "step": 21740
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0074536800384521,
      "learning_rate": 1.6413268025275802e-05,
      "loss": 2.2542,
      "step": 21741
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.979614794254303,
      "learning_rate": 1.6412952107466762e-05,
      "loss": 2.4035,
      "step": 21742
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0499544143676758,
      "learning_rate": 1.6412636178786027e-05,
      "loss": 2.501,
      "step": 21743
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0522555112838745,
      "learning_rate": 1.6412320239234136e-05,
      "loss": 2.5068,
      "step": 21744
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1664553880691528,
      "learning_rate": 1.6412004288811617e-05,
      "loss": 2.466,
      "step": 21745
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.169779658317566,
      "learning_rate": 1.641168832751901e-05,
      "loss": 2.3189,
      "step": 21746
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0017166137695312,
      "learning_rate": 1.641137235535685e-05,
      "loss": 2.4853,
      "step": 21747
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9308629631996155,
      "learning_rate": 1.6411056372325667e-05,
      "loss": 2.4334,
      "step": 21748
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0339088439941406,
      "learning_rate": 1.6410740378426008e-05,
      "loss": 2.5455,
      "step": 21749
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.117541790008545,
      "learning_rate": 1.6410424373658404e-05,
      "loss": 2.4026,
      "step": 21750
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0802847146987915,
      "learning_rate": 1.641010835802339e-05,
      "loss": 2.5099,
      "step": 21751
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9671151638031006,
      "learning_rate": 1.64097923315215e-05,
      "loss": 2.3805,
      "step": 21752
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.2085890769958496,
      "learning_rate": 1.640947629415327e-05,
      "loss": 2.3688,
      "step": 21753
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9877644777297974,
      "learning_rate": 1.6409160245919234e-05,
      "loss": 2.2787,
      "step": 21754
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0472716093063354,
      "learning_rate": 1.6408844186819935e-05,
      "loss": 2.6345,
      "step": 21755
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5101832151412964,
      "learning_rate": 1.6408528116855904e-05,
      "loss": 2.5981,
      "step": 21756
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0336171388626099,
      "learning_rate": 1.640821203602768e-05,
      "loss": 2.4521,
      "step": 21757
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.2543914318084717,
      "learning_rate": 1.640789594433579e-05,
      "loss": 2.4146,
      "step": 21758
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.968419075012207,
      "learning_rate": 1.640757984178078e-05,
      "loss": 2.3981,
      "step": 21759
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.019063115119934,
      "learning_rate": 1.6407263728363177e-05,
      "loss": 2.3768,
      "step": 21760
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9237931966781616,
      "learning_rate": 1.6406947604083527e-05,
      "loss": 2.4151,
      "step": 21761
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0877686738967896,
      "learning_rate": 1.640663146894236e-05,
      "loss": 2.4704,
      "step": 21762
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9734703898429871,
      "learning_rate": 1.6406315322940212e-05,
      "loss": 2.4156,
      "step": 21763
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0060220956802368,
      "learning_rate": 1.640599916607762e-05,
      "loss": 2.4844,
      "step": 21764
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1576855182647705,
      "learning_rate": 1.6405682998355118e-05,
      "loss": 2.1719,
      "step": 21765
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1005370616912842,
      "learning_rate": 1.6405366819773246e-05,
      "loss": 2.5027,
      "step": 21766
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.093969464302063,
      "learning_rate": 1.6405050630332534e-05,
      "loss": 2.5069,
      "step": 21767
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.089980959892273,
      "learning_rate": 1.6404734430033523e-05,
      "loss": 2.4007,
      "step": 21768
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9877281188964844,
      "learning_rate": 1.640441821887675e-05,
      "loss": 2.3023,
      "step": 21769
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1744158267974854,
      "learning_rate": 1.640410199686275e-05,
      "loss": 2.5756,
      "step": 21770
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0923840999603271,
      "learning_rate": 1.6403785763992056e-05,
      "loss": 2.5931,
      "step": 21771
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9798142910003662,
      "learning_rate": 1.6403469520265202e-05,
      "loss": 2.3279,
      "step": 21772
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.041397213935852,
      "learning_rate": 1.6403153265682733e-05,
      "loss": 2.5177,
      "step": 21773
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9840621948242188,
      "learning_rate": 1.6402837000245175e-05,
      "loss": 2.4234,
      "step": 21774
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9904937148094177,
      "learning_rate": 1.6402520723953072e-05,
      "loss": 2.5056,
      "step": 21775
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9724318981170654,
      "learning_rate": 1.640220443680696e-05,
      "loss": 2.5007,
      "step": 21776
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0578984022140503,
      "learning_rate": 1.640188813880737e-05,
      "loss": 2.4573,
      "step": 21777
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1816908121109009,
      "learning_rate": 1.640157182995484e-05,
      "loss": 2.261,
      "step": 21778
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0037038326263428,
      "learning_rate": 1.640125551024991e-05,
      "loss": 2.6387,
      "step": 21779
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0120385885238647,
      "learning_rate": 1.640093917969311e-05,
      "loss": 2.4247,
      "step": 21780
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0025677680969238,
      "learning_rate": 1.640062283828498e-05,
      "loss": 2.449,
      "step": 21781
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.062515377998352,
      "learning_rate": 1.6400306486026058e-05,
      "loss": 2.5988,
      "step": 21782
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0136370658874512,
      "learning_rate": 1.639999012291688e-05,
      "loss": 2.4031,
      "step": 21783
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9844813942909241,
      "learning_rate": 1.6399673748957975e-05,
      "loss": 2.6304,
      "step": 21784
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0151480436325073,
      "learning_rate": 1.639935736414989e-05,
      "loss": 2.3567,
      "step": 21785
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9983945488929749,
      "learning_rate": 1.6399040968493155e-05,
      "loss": 2.5041,
      "step": 21786
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0526161193847656,
      "learning_rate": 1.6398724561988303e-05,
      "loss": 2.2295,
      "step": 21787
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9412708878517151,
      "learning_rate": 1.639840814463588e-05,
      "loss": 2.2093,
      "step": 21788
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0828309059143066,
      "learning_rate": 1.6398091716436418e-05,
      "loss": 2.2866,
      "step": 21789
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1173306703567505,
      "learning_rate": 1.639777527739045e-05,
      "loss": 2.4595,
      "step": 21790
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0029443502426147,
      "learning_rate": 1.6397458827498516e-05,
      "loss": 2.2858,
      "step": 21791
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.2980427742004395,
      "learning_rate": 1.639714236676115e-05,
      "loss": 2.6355,
      "step": 21792
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0676404237747192,
      "learning_rate": 1.6396825895178893e-05,
      "loss": 2.6451,
      "step": 21793
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.071925401687622,
      "learning_rate": 1.639650941275228e-05,
      "loss": 2.4255,
      "step": 21794
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9773152470588684,
      "learning_rate": 1.6396192919481842e-05,
      "loss": 2.4404,
      "step": 21795
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1411124467849731,
      "learning_rate": 1.6395876415368123e-05,
      "loss": 2.531,
      "step": 21796
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1064320802688599,
      "learning_rate": 1.6395559900411656e-05,
      "loss": 2.4005,
      "step": 21797
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0427578687667847,
      "learning_rate": 1.639524337461298e-05,
      "loss": 2.3694,
      "step": 21798
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.2037335634231567,
      "learning_rate": 1.6394926837972624e-05,
      "loss": 2.5814,
      "step": 21799
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0880672931671143,
      "learning_rate": 1.6394610290491133e-05,
      "loss": 2.3945,
      "step": 21800
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0788081884384155,
      "learning_rate": 1.6394293732169043e-05,
      "loss": 2.4494,
      "step": 21801
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9933058619499207,
      "learning_rate": 1.6393977163006886e-05,
      "loss": 2.4505,
      "step": 21802
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0539525747299194,
      "learning_rate": 1.63936605830052e-05,
      "loss": 2.5458,
      "step": 21803
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9911499619483948,
      "learning_rate": 1.6393343992164527e-05,
      "loss": 2.4246,
      "step": 21804
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.204398274421692,
      "learning_rate": 1.6393027390485395e-05,
      "loss": 2.3849,
      "step": 21805
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1592926979064941,
      "learning_rate": 1.6392710777968348e-05,
      "loss": 2.5189,
      "step": 21806
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.033380389213562,
      "learning_rate": 1.639239415461392e-05,
      "loss": 2.3082,
      "step": 21807
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1567708253860474,
      "learning_rate": 1.6392077520422646e-05,
      "loss": 2.3949,
      "step": 21808
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0744540691375732,
      "learning_rate": 1.6391760875395065e-05,
      "loss": 2.4769,
      "step": 21809
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0883711576461792,
      "learning_rate": 1.6391444219531716e-05,
      "loss": 2.5097,
      "step": 21810
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0276319980621338,
      "learning_rate": 1.639112755283313e-05,
      "loss": 2.5466,
      "step": 21811
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1586203575134277,
      "learning_rate": 1.6390810875299846e-05,
      "loss": 2.4527,
      "step": 21812
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.4302481412887573,
      "learning_rate": 1.6390494186932404e-05,
      "loss": 2.4252,
      "step": 21813
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1137975454330444,
      "learning_rate": 1.6390177487731337e-05,
      "loss": 2.5982,
      "step": 21814
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.4595109224319458,
      "learning_rate": 1.6389860777697186e-05,
      "loss": 2.4809,
      "step": 21815
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9238116145133972,
      "learning_rate": 1.6389544056830486e-05,
      "loss": 2.3885,
      "step": 21816
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9838993549346924,
      "learning_rate": 1.6389227325131773e-05,
      "loss": 2.3466,
      "step": 21817
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9391265511512756,
      "learning_rate": 1.6388910582601583e-05,
      "loss": 2.5474,
      "step": 21818
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.2005038261413574,
      "learning_rate": 1.6388593829240453e-05,
      "loss": 2.1954,
      "step": 21819
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0604074001312256,
      "learning_rate": 1.6388277065048922e-05,
      "loss": 2.4158,
      "step": 21820
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0324008464813232,
      "learning_rate": 1.638796029002753e-05,
      "loss": 2.1531,
      "step": 21821
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0893632173538208,
      "learning_rate": 1.6387643504176805e-05,
      "loss": 2.4912,
      "step": 21822
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0630866289138794,
      "learning_rate": 1.6387326707497296e-05,
      "loss": 2.5834,
      "step": 21823
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9871872067451477,
      "learning_rate": 1.6387009899989527e-05,
      "loss": 2.5067,
      "step": 21824
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.043119192123413,
      "learning_rate": 1.6386693081654045e-05,
      "loss": 2.5855,
      "step": 21825
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0012270212173462,
      "learning_rate": 1.6386376252491384e-05,
      "loss": 2.6293,
      "step": 21826
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.8909401297569275,
      "learning_rate": 1.6386059412502077e-05,
      "loss": 2.4228,
      "step": 21827
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0865360498428345,
      "learning_rate": 1.638574256168667e-05,
      "loss": 2.4195,
      "step": 21828
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1210699081420898,
      "learning_rate": 1.6385425700045693e-05,
      "loss": 2.4381,
      "step": 21829
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0130436420440674,
      "learning_rate": 1.6385108827579682e-05,
      "loss": 2.6274,
      "step": 21830
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.225856900215149,
      "learning_rate": 1.6384791944289184e-05,
      "loss": 2.4005,
      "step": 21831
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0067129135131836,
      "learning_rate": 1.6384475050174722e-05,
      "loss": 2.4706,
      "step": 21832
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9940140247344971,
      "learning_rate": 1.6384158145236843e-05,
      "loss": 2.3492,
      "step": 21833
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.2078847885131836,
      "learning_rate": 1.6383841229476085e-05,
      "loss": 2.749,
      "step": 21834
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0781331062316895,
      "learning_rate": 1.638352430289298e-05,
      "loss": 2.3651,
      "step": 21835
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.2765628099441528,
      "learning_rate": 1.638320736548807e-05,
      "loss": 2.495,
      "step": 21836
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.193546175956726,
      "learning_rate": 1.638289041726189e-05,
      "loss": 2.4382,
      "step": 21837
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.4951732158660889,
      "learning_rate": 1.6382573458214976e-05,
      "loss": 2.2662,
      "step": 21838
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9784257411956787,
      "learning_rate": 1.6382256488347864e-05,
      "loss": 2.5079,
      "step": 21839
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9960077404975891,
      "learning_rate": 1.6381939507661096e-05,
      "loss": 2.5547,
      "step": 21840
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.058476209640503,
      "learning_rate": 1.6381622516155206e-05,
      "loss": 2.4524,
      "step": 21841
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0603721141815186,
      "learning_rate": 1.6381305513830734e-05,
      "loss": 2.5339,
      "step": 21842
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0365269184112549,
      "learning_rate": 1.6380988500688214e-05,
      "loss": 2.3105,
      "step": 21843
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.032364845275879,
      "learning_rate": 1.638067147672819e-05,
      "loss": 2.3857,
      "step": 21844
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9856956005096436,
      "learning_rate": 1.638035444195119e-05,
      "loss": 2.511,
      "step": 21845
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0039600133895874,
      "learning_rate": 1.638003739635776e-05,
      "loss": 2.3486,
      "step": 21846
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1782697439193726,
      "learning_rate": 1.637972033994843e-05,
      "loss": 2.5319,
      "step": 21847
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9728999137878418,
      "learning_rate": 1.637940327272374e-05,
      "loss": 2.3115,
      "step": 21848
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9570441842079163,
      "learning_rate": 1.6379086194684237e-05,
      "loss": 2.499,
      "step": 21849
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.151537537574768,
      "learning_rate": 1.6378769105830446e-05,
      "loss": 2.4417,
      "step": 21850
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.2208466529846191,
      "learning_rate": 1.6378452006162904e-05,
      "loss": 2.5078,
      "step": 21851
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.046377420425415,
      "learning_rate": 1.637813489568216e-05,
      "loss": 2.7588,
      "step": 21852
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0355993509292603,
      "learning_rate": 1.6377817774388743e-05,
      "loss": 2.3705,
      "step": 21853
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0669901371002197,
      "learning_rate": 1.6377500642283194e-05,
      "loss": 2.5918,
      "step": 21854
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0357673168182373,
      "learning_rate": 1.6377183499366048e-05,
      "loss": 2.5062,
      "step": 21855
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0165013074874878,
      "learning_rate": 1.637686634563784e-05,
      "loss": 2.4117,
      "step": 21856
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1189544200897217,
      "learning_rate": 1.6376549181099116e-05,
      "loss": 2.4679,
      "step": 21857
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9374314546585083,
      "learning_rate": 1.6376232005750414e-05,
      "loss": 2.3241,
      "step": 21858
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0031553506851196,
      "learning_rate": 1.637591481959226e-05,
      "loss": 2.3122,
      "step": 21859
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9919041991233826,
      "learning_rate": 1.63755976226252e-05,
      "loss": 2.6479,
      "step": 21860
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.125408411026001,
      "learning_rate": 1.637528041484977e-05,
      "loss": 2.4887,
      "step": 21861
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0828337669372559,
      "learning_rate": 1.6374963196266506e-05,
      "loss": 2.2278,
      "step": 21862
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.125827670097351,
      "learning_rate": 1.6374645966875957e-05,
      "loss": 2.4117,
      "step": 21863
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9834740161895752,
      "learning_rate": 1.6374328726678642e-05,
      "loss": 2.5736,
      "step": 21864
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0439612865447998,
      "learning_rate": 1.6374011475675112e-05,
      "loss": 2.4334,
      "step": 21865
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0821146965026855,
      "learning_rate": 1.63736942138659e-05,
      "loss": 2.4318,
      "step": 21866
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.94432532787323,
      "learning_rate": 1.6373376941251552e-05,
      "loss": 2.5601,
      "step": 21867
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9568341374397278,
      "learning_rate": 1.6373059657832592e-05,
      "loss": 2.4693,
      "step": 21868
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9323294758796692,
      "learning_rate": 1.637274236360957e-05,
      "loss": 2.5689,
      "step": 21869
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1556830406188965,
      "learning_rate": 1.6372425058583016e-05,
      "loss": 2.562,
      "step": 21870
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.958034873008728,
      "learning_rate": 1.637210774275347e-05,
      "loss": 2.6596,
      "step": 21871
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.3649959564208984,
      "learning_rate": 1.637179041612147e-05,
      "loss": 2.6619,
      "step": 21872
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.115424394607544,
      "learning_rate": 1.6371473078687555e-05,
      "loss": 2.4662,
      "step": 21873
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0349358320236206,
      "learning_rate": 1.6371155730452266e-05,
      "loss": 2.4555,
      "step": 21874
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0868983268737793,
      "learning_rate": 1.6370838371416134e-05,
      "loss": 2.2864,
      "step": 21875
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9732229113578796,
      "learning_rate": 1.6370521001579702e-05,
      "loss": 2.3416,
      "step": 21876
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0613211393356323,
      "learning_rate": 1.637020362094351e-05,
      "loss": 2.3953,
      "step": 21877
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9806767106056213,
      "learning_rate": 1.636988622950809e-05,
      "loss": 2.3373,
      "step": 21878
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.3126775026321411,
      "learning_rate": 1.6369568827273982e-05,
      "loss": 2.4576,
      "step": 21879
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9550053477287292,
      "learning_rate": 1.6369251414241722e-05,
      "loss": 2.4764,
      "step": 21880
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0948566198349,
      "learning_rate": 1.6368933990411856e-05,
      "loss": 2.353,
      "step": 21881
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9727603793144226,
      "learning_rate": 1.6368616555784915e-05,
      "loss": 2.4721,
      "step": 21882
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9449460506439209,
      "learning_rate": 1.636829911036144e-05,
      "loss": 2.2764,
      "step": 21883
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0404353141784668,
      "learning_rate": 1.6367981654141967e-05,
      "loss": 2.5342,
      "step": 21884
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.990459680557251,
      "learning_rate": 1.6367664187127038e-05,
      "loss": 2.5849,
      "step": 21885
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0672900676727295,
      "learning_rate": 1.636734670931719e-05,
      "loss": 2.37,
      "step": 21886
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9490339756011963,
      "learning_rate": 1.6367029220712954e-05,
      "loss": 2.5826,
      "step": 21887
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1083801984786987,
      "learning_rate": 1.6366711721314878e-05,
      "loss": 2.2393,
      "step": 21888
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0549958944320679,
      "learning_rate": 1.6366394211123497e-05,
      "loss": 2.4202,
      "step": 21889
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9454377889633179,
      "learning_rate": 1.636607669013935e-05,
      "loss": 2.4592,
      "step": 21890
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0990008115768433,
      "learning_rate": 1.6365759158362967e-05,
      "loss": 2.2992,
      "step": 21891
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9538518190383911,
      "learning_rate": 1.63654416157949e-05,
      "loss": 2.4349,
      "step": 21892
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0459955930709839,
      "learning_rate": 1.636512406243568e-05,
      "loss": 2.5557,
      "step": 21893
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0275118350982666,
      "learning_rate": 1.6364806498285843e-05,
      "loss": 2.4043,
      "step": 21894
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1858701705932617,
      "learning_rate": 1.6364488923345933e-05,
      "loss": 2.3682,
      "step": 21895
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9117500185966492,
      "learning_rate": 1.6364171337616483e-05,
      "loss": 2.4883,
      "step": 21896
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4363621473312378,
      "learning_rate": 1.636385374109804e-05,
      "loss": 2.3866,
      "step": 21897
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9740520715713501,
      "learning_rate": 1.636353613379113e-05,
      "loss": 2.5446,
      "step": 21898
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.139566421508789,
      "learning_rate": 1.63632185156963e-05,
      "loss": 2.6406,
      "step": 21899
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.00473153591156,
      "learning_rate": 1.6362900886814086e-05,
      "loss": 2.6334,
      "step": 21900
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0329185724258423,
      "learning_rate": 1.636258324714503e-05,
      "loss": 2.5016,
      "step": 21901
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9715197682380676,
      "learning_rate": 1.6362265596689665e-05,
      "loss": 2.5549,
      "step": 21902
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.2479063272476196,
      "learning_rate": 1.636194793544853e-05,
      "loss": 2.4485,
      "step": 21903
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0932499170303345,
      "learning_rate": 1.636163026342217e-05,
      "loss": 2.4284,
      "step": 21904
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9644758105278015,
      "learning_rate": 1.6361312580611112e-05,
      "loss": 2.3574,
      "step": 21905
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1655336618423462,
      "learning_rate": 1.6360994887015906e-05,
      "loss": 2.589,
      "step": 21906
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.10191011428833,
      "learning_rate": 1.6360677182637083e-05,
      "loss": 2.3201,
      "step": 21907
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.08524751663208,
      "learning_rate": 1.6360359467475188e-05,
      "loss": 2.3921,
      "step": 21908
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0951520204544067,
      "learning_rate": 1.636004174153076e-05,
      "loss": 2.4348,
      "step": 21909
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9773969650268555,
      "learning_rate": 1.6359724004804328e-05,
      "loss": 2.3008,
      "step": 21910
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.250999927520752,
      "learning_rate": 1.6359406257296437e-05,
      "loss": 2.1746,
      "step": 21911
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9773257970809937,
      "learning_rate": 1.6359088499007626e-05,
      "loss": 2.5705,
      "step": 21912
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1193815469741821,
      "learning_rate": 1.6358770729938433e-05,
      "loss": 2.4524,
      "step": 21913
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0743563175201416,
      "learning_rate": 1.6358452950089394e-05,
      "loss": 2.6435,
      "step": 21914
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.05963933467865,
      "learning_rate": 1.6358135159461055e-05,
      "loss": 2.4245,
      "step": 21915
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9895790219306946,
      "learning_rate": 1.6357817358053947e-05,
      "loss": 2.3499,
      "step": 21916
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.051961898803711,
      "learning_rate": 1.6357499545868612e-05,
      "loss": 2.3342,
      "step": 21917
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9833573698997498,
      "learning_rate": 1.635718172290559e-05,
      "loss": 2.3618,
      "step": 21918
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.059064269065857,
      "learning_rate": 1.6356863889165418e-05,
      "loss": 2.6418,
      "step": 21919
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.2273279428482056,
      "learning_rate": 1.6356546044648632e-05,
      "loss": 2.3828,
      "step": 21920
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0015419721603394,
      "learning_rate": 1.6356228189355778e-05,
      "loss": 2.3834,
      "step": 21921
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0401215553283691,
      "learning_rate": 1.635591032328739e-05,
      "loss": 2.356,
      "step": 21922
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9859806895256042,
      "learning_rate": 1.635559244644401e-05,
      "loss": 2.3276,
      "step": 21923
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0976694822311401,
      "learning_rate": 1.635527455882617e-05,
      "loss": 2.4686,
      "step": 21924
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9765834808349609,
      "learning_rate": 1.635495666043442e-05,
      "loss": 2.6726,
      "step": 21925
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0898979902267456,
      "learning_rate": 1.6354638751269284e-05,
      "loss": 2.4306,
      "step": 21926
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.15639066696167,
      "learning_rate": 1.6354320831331313e-05,
      "loss": 2.5466,
      "step": 21927
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9286717772483826,
      "learning_rate": 1.6354002900621047e-05,
      "loss": 2.1656,
      "step": 21928
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0557160377502441,
      "learning_rate": 1.6353684959139017e-05,
      "loss": 2.6493,
      "step": 21929
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9681809544563293,
      "learning_rate": 1.6353367006885763e-05,
      "loss": 2.4157,
      "step": 21930
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.169266700744629,
      "learning_rate": 1.6353049043861827e-05,
      "loss": 2.5583,
      "step": 21931
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0193344354629517,
      "learning_rate": 1.635273107006775e-05,
      "loss": 2.3327,
      "step": 21932
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.098464012145996,
      "learning_rate": 1.6352413085504064e-05,
      "loss": 2.5406,
      "step": 21933
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0840743780136108,
      "learning_rate": 1.6352095090171318e-05,
      "loss": 2.5639,
      "step": 21934
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1778560876846313,
      "learning_rate": 1.6351777084070045e-05,
      "loss": 2.4306,
      "step": 21935
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1175652742385864,
      "learning_rate": 1.635145906720078e-05,
      "loss": 2.552,
      "step": 21936
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9806419014930725,
      "learning_rate": 1.6351141039564068e-05,
      "loss": 2.4923,
      "step": 21937
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0304629802703857,
      "learning_rate": 1.635082300116045e-05,
      "loss": 2.3189,
      "step": 21938
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1321930885314941,
      "learning_rate": 1.6350504951990463e-05,
      "loss": 2.5213,
      "step": 21939
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.027817964553833,
      "learning_rate": 1.6350186892054642e-05,
      "loss": 2.2448,
      "step": 21940
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0096855163574219,
      "learning_rate": 1.634986882135353e-05,
      "loss": 2.4225,
      "step": 21941
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0340261459350586,
      "learning_rate": 1.634955073988767e-05,
      "loss": 2.2968,
      "step": 21942
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.09810471534729,
      "learning_rate": 1.634923264765759e-05,
      "loss": 2.2388,
      "step": 21943
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1354666948318481,
      "learning_rate": 1.6348914544663842e-05,
      "loss": 2.3721,
      "step": 21944
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1779948472976685,
      "learning_rate": 1.6348596430906955e-05,
      "loss": 2.2205,
      "step": 21945
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1023577451705933,
      "learning_rate": 1.6348278306387474e-05,
      "loss": 2.3199,
      "step": 21946
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0127371549606323,
      "learning_rate": 1.6347960171105938e-05,
      "loss": 2.4828,
      "step": 21947
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0377131700515747,
      "learning_rate": 1.634764202506289e-05,
      "loss": 2.2333,
      "step": 21948
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.051548719406128,
      "learning_rate": 1.634732386825886e-05,
      "loss": 2.3575,
      "step": 21949
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0926477909088135,
      "learning_rate": 1.634700570069439e-05,
      "loss": 2.5199,
      "step": 21950
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0384935140609741,
      "learning_rate": 1.6346687522370025e-05,
      "loss": 2.4202,
      "step": 21951
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.193247675895691,
      "learning_rate": 1.6346369333286297e-05,
      "loss": 2.4625,
      "step": 21952
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0843346118927002,
      "learning_rate": 1.6346051133443754e-05,
      "loss": 2.534,
      "step": 21953
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1247586011886597,
      "learning_rate": 1.634573292284293e-05,
      "loss": 2.4588,
      "step": 21954
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.408515453338623,
      "learning_rate": 1.6345414701484365e-05,
      "loss": 2.0616,
      "step": 21955
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0021854639053345,
      "learning_rate": 1.63450964693686e-05,
      "loss": 2.5577,
      "step": 21956
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.7800859212875366,
      "learning_rate": 1.6344778226496166e-05,
      "loss": 2.4045,
      "step": 21957
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0432732105255127,
      "learning_rate": 1.6344459972867617e-05,
      "loss": 2.409,
      "step": 21958
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0332003831863403,
      "learning_rate": 1.634414170848348e-05,
      "loss": 2.4513,
      "step": 21959
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.3361417055130005,
      "learning_rate": 1.6343823433344303e-05,
      "loss": 2.4516,
      "step": 21960
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0935612916946411,
      "learning_rate": 1.6343505147450625e-05,
      "loss": 2.5057,
      "step": 21961
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0290688276290894,
      "learning_rate": 1.634318685080298e-05,
      "loss": 2.5146,
      "step": 21962
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4104710817337036,
      "learning_rate": 1.634286854340191e-05,
      "loss": 2.5335,
      "step": 21963
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.049145221710205,
      "learning_rate": 1.6342550225247956e-05,
      "loss": 2.605,
      "step": 21964
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0226024389266968,
      "learning_rate": 1.6342231896341655e-05,
      "loss": 2.1466,
      "step": 21965
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0972310304641724,
      "learning_rate": 1.6341913556683547e-05,
      "loss": 2.5531,
      "step": 21966
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.066969394683838,
      "learning_rate": 1.6341595206274175e-05,
      "loss": 2.6203,
      "step": 21967
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0633280277252197,
      "learning_rate": 1.634127684511408e-05,
      "loss": 2.5471,
      "step": 21968
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1449429988861084,
      "learning_rate": 1.6340958473203796e-05,
      "loss": 2.5392,
      "step": 21969
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.120081901550293,
      "learning_rate": 1.6340640090543868e-05,
      "loss": 2.3951,
      "step": 21970
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.034571886062622,
      "learning_rate": 1.634032169713483e-05,
      "loss": 2.4704,
      "step": 21971
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.097249150276184,
      "learning_rate": 1.6340003292977225e-05,
      "loss": 2.2197,
      "step": 21972
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9820137023925781,
      "learning_rate": 1.633968487807159e-05,
      "loss": 2.4766,
      "step": 21973
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0324078798294067,
      "learning_rate": 1.633936645241847e-05,
      "loss": 2.4634,
      "step": 21974
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0878634452819824,
      "learning_rate": 1.6339048016018405e-05,
      "loss": 2.5069,
      "step": 21975
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0374282598495483,
      "learning_rate": 1.6338729568871924e-05,
      "loss": 2.4118,
      "step": 21976
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0319865942001343,
      "learning_rate": 1.633841111097958e-05,
      "loss": 2.4067,
      "step": 21977
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.005091667175293,
      "learning_rate": 1.633809264234191e-05,
      "loss": 2.549,
      "step": 21978
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4559134244918823,
      "learning_rate": 1.633777416295945e-05,
      "loss": 2.4258,
      "step": 21979
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0242416858673096,
      "learning_rate": 1.6337455672832735e-05,
      "loss": 2.4697,
      "step": 21980
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.152679204940796,
      "learning_rate": 1.633713717196232e-05,
      "loss": 2.4394,
      "step": 21981
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.121469259262085,
      "learning_rate": 1.6336818660348734e-05,
      "loss": 2.8402,
      "step": 21982
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.013352394104004,
      "learning_rate": 1.6336500137992518e-05,
      "loss": 2.3208,
      "step": 21983
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.14595627784729,
      "learning_rate": 1.6336181604894213e-05,
      "loss": 2.3941,
      "step": 21984
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0150341987609863,
      "learning_rate": 1.633586306105436e-05,
      "loss": 2.3672,
      "step": 21985
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0153839588165283,
      "learning_rate": 1.6335544506473498e-05,
      "loss": 2.5658,
      "step": 21986
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1095095872879028,
      "learning_rate": 1.633522594115217e-05,
      "loss": 2.5251,
      "step": 21987
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9850435853004456,
      "learning_rate": 1.633490736509091e-05,
      "loss": 2.2589,
      "step": 21988
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0996043682098389,
      "learning_rate": 1.6334588778290262e-05,
      "loss": 2.4008,
      "step": 21989
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.051480770111084,
      "learning_rate": 1.6334270180750765e-05,
      "loss": 2.3528,
      "step": 21990
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0573458671569824,
      "learning_rate": 1.6333951572472963e-05,
      "loss": 2.4519,
      "step": 21991
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0237889289855957,
      "learning_rate": 1.633363295345739e-05,
      "loss": 2.5794,
      "step": 21992
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1403613090515137,
      "learning_rate": 1.6333314323704588e-05,
      "loss": 2.4542,
      "step": 21993
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9597291350364685,
      "learning_rate": 1.63329956832151e-05,
      "loss": 2.6347,
      "step": 21994
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.024739146232605,
      "learning_rate": 1.6332677031989464e-05,
      "loss": 2.3889,
      "step": 21995
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9694358706474304,
      "learning_rate": 1.633235837002822e-05,
      "loss": 2.4507,
      "step": 21996
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.2075541019439697,
      "learning_rate": 1.633203969733191e-05,
      "loss": 2.3716,
      "step": 21997
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0321760177612305,
      "learning_rate": 1.6331721013901073e-05,
      "loss": 2.773,
      "step": 21998
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0220370292663574,
      "learning_rate": 1.633140231973625e-05,
      "loss": 2.2342,
      "step": 21999
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1154311895370483,
      "learning_rate": 1.6331083614837978e-05,
      "loss": 2.3433,
      "step": 22000
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.110227346420288,
      "learning_rate": 1.63307648992068e-05,
      "loss": 2.351,
      "step": 22001
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1952028274536133,
      "learning_rate": 1.6330446172843254e-05,
      "loss": 2.3295,
      "step": 22002
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0098317861557007,
      "learning_rate": 1.6330127435747885e-05,
      "loss": 2.3789,
      "step": 22003
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0959173440933228,
      "learning_rate": 1.6329808687921233e-05,
      "loss": 2.3197,
      "step": 22004
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.151191234588623,
      "learning_rate": 1.6329489929363835e-05,
      "loss": 2.4421,
      "step": 22005
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0902870893478394,
      "learning_rate": 1.632917116007623e-05,
      "loss": 2.5992,
      "step": 22006
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9466076493263245,
      "learning_rate": 1.6328852380058958e-05,
      "loss": 2.4073,
      "step": 22007
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.128250002861023,
      "learning_rate": 1.6328533589312573e-05,
      "loss": 2.3829,
      "step": 22008
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0477263927459717,
      "learning_rate": 1.6328214787837598e-05,
      "loss": 2.3468,
      "step": 22009
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0072070360183716,
      "learning_rate": 1.6327895975634578e-05,
      "loss": 2.4814,
      "step": 22010
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.036844253540039,
      "learning_rate": 1.632757715270406e-05,
      "loss": 2.5981,
      "step": 22011
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0294941663742065,
      "learning_rate": 1.6327258319046578e-05,
      "loss": 2.4708,
      "step": 22012
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0875426530838013,
      "learning_rate": 1.6326939474662676e-05,
      "loss": 2.188,
      "step": 22013
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0731925964355469,
      "learning_rate": 1.632662061955289e-05,
      "loss": 2.291,
      "step": 22014
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1964675188064575,
      "learning_rate": 1.632630175371777e-05,
      "loss": 2.3107,
      "step": 22015
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1073368787765503,
      "learning_rate": 1.6325982877157843e-05,
      "loss": 2.2554,
      "step": 22016
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0716341733932495,
      "learning_rate": 1.632566398987366e-05,
      "loss": 2.2797,
      "step": 22017
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0981518030166626,
      "learning_rate": 1.632534509186576e-05,
      "loss": 2.395,
      "step": 22018
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0646398067474365,
      "learning_rate": 1.632502618313468e-05,
      "loss": 2.3182,
      "step": 22019
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9706388115882874,
      "learning_rate": 1.6324707263680966e-05,
      "loss": 2.3659,
      "step": 22020
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1788424253463745,
      "learning_rate": 1.6324388333505155e-05,
      "loss": 2.5199,
      "step": 22021
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9864081740379333,
      "learning_rate": 1.6324069392607784e-05,
      "loss": 2.4405,
      "step": 22022
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1012678146362305,
      "learning_rate": 1.63237504409894e-05,
      "loss": 2.4651,
      "step": 22023
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1784324645996094,
      "learning_rate": 1.632343147865054e-05,
      "loss": 2.298,
      "step": 22024
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0325641632080078,
      "learning_rate": 1.632311250559175e-05,
      "loss": 2.1485,
      "step": 22025
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.973721444606781,
      "learning_rate": 1.6322793521813563e-05,
      "loss": 2.4347,
      "step": 22026
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1029411554336548,
      "learning_rate": 1.6322474527316527e-05,
      "loss": 2.4103,
      "step": 22027
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0436912775039673,
      "learning_rate": 1.6322155522101176e-05,
      "loss": 2.4014,
      "step": 22028
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0039149522781372,
      "learning_rate": 1.632183650616806e-05,
      "loss": 2.4231,
      "step": 22029
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0764954090118408,
      "learning_rate": 1.6321517479517707e-05,
      "loss": 2.4469,
      "step": 22030
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0999984741210938,
      "learning_rate": 1.632119844215067e-05,
      "loss": 2.4647,
      "step": 22031
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0617709159851074,
      "learning_rate": 1.6320879394067483e-05,
      "loss": 2.3932,
      "step": 22032
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0889233350753784,
      "learning_rate": 1.632056033526869e-05,
      "loss": 2.4226,
      "step": 22033
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9991604089736938,
      "learning_rate": 1.632024126575483e-05,
      "loss": 2.5958,
      "step": 22034
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1215450763702393,
      "learning_rate": 1.6319922185526446e-05,
      "loss": 2.4575,
      "step": 22035
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1865218877792358,
      "learning_rate": 1.6319603094584073e-05,
      "loss": 2.4421,
      "step": 22036
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1029114723205566,
      "learning_rate": 1.631928399292826e-05,
      "loss": 2.5612,
      "step": 22037
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.015553593635559,
      "learning_rate": 1.6318964880559544e-05,
      "loss": 2.3299,
      "step": 22038
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0671870708465576,
      "learning_rate": 1.631864575747846e-05,
      "loss": 2.3686,
      "step": 22039
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1730961799621582,
      "learning_rate": 1.6318326623685565e-05,
      "loss": 2.4896,
      "step": 22040
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.8936828374862671,
      "learning_rate": 1.6318007479181386e-05,
      "loss": 2.5912,
      "step": 22041
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0425541400909424,
      "learning_rate": 1.631768832396647e-05,
      "loss": 2.4645,
      "step": 22042
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9618951082229614,
      "learning_rate": 1.6317369158041355e-05,
      "loss": 2.4325,
      "step": 22043
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0963977575302124,
      "learning_rate": 1.6317049981406583e-05,
      "loss": 2.6098,
      "step": 22044
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0934908390045166,
      "learning_rate": 1.6316730794062693e-05,
      "loss": 2.6745,
      "step": 22045
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0402432680130005,
      "learning_rate": 1.6316411596010232e-05,
      "loss": 2.3719,
      "step": 22046
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0341471433639526,
      "learning_rate": 1.631609238724974e-05,
      "loss": 2.5365,
      "step": 22047
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0701258182525635,
      "learning_rate": 1.6315773167781753e-05,
      "loss": 2.4952,
      "step": 22048
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0069022178649902,
      "learning_rate": 1.6315453937606815e-05,
      "loss": 2.228,
      "step": 22049
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1728472709655762,
      "learning_rate": 1.631513469672547e-05,
      "loss": 2.388,
      "step": 22050
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0139694213867188,
      "learning_rate": 1.6314815445138253e-05,
      "loss": 2.3457,
      "step": 22051
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0395063161849976,
      "learning_rate": 1.6314496182845713e-05,
      "loss": 2.3143,
      "step": 22052
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1907461881637573,
      "learning_rate": 1.6314176909848384e-05,
      "loss": 2.6662,
      "step": 22053
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9824894666671753,
      "learning_rate": 1.631385762614681e-05,
      "loss": 2.504,
      "step": 22054
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9743613600730896,
      "learning_rate": 1.631353833174153e-05,
      "loss": 2.5159,
      "step": 22055
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.121774435043335,
      "learning_rate": 1.6313219026633095e-05,
      "loss": 2.3131,
      "step": 22056
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0801982879638672,
      "learning_rate": 1.6312899710822035e-05,
      "loss": 2.2447,
      "step": 22057
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.2782394886016846,
      "learning_rate": 1.6312580384308893e-05,
      "loss": 2.3339,
      "step": 22058
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0090526342391968,
      "learning_rate": 1.6312261047094215e-05,
      "loss": 2.3007,
      "step": 22059
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.072175145149231,
      "learning_rate": 1.6311941699178543e-05,
      "loss": 2.5918,
      "step": 22060
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0210862159729004,
      "learning_rate": 1.631162234056241e-05,
      "loss": 2.7086,
      "step": 22061
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0143378973007202,
      "learning_rate": 1.6311302971246368e-05,
      "loss": 2.6928,
      "step": 22062
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1569737195968628,
      "learning_rate": 1.6310983591230952e-05,
      "loss": 2.3812,
      "step": 22063
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9887821674346924,
      "learning_rate": 1.63106642005167e-05,
      "loss": 2.2242,
      "step": 22064
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0694606304168701,
      "learning_rate": 1.6310344799104163e-05,
      "loss": 2.4446,
      "step": 22065
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1073113679885864,
      "learning_rate": 1.6310025386993876e-05,
      "loss": 2.4859,
      "step": 22066
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0357112884521484,
      "learning_rate": 1.6309705964186384e-05,
      "loss": 2.5476,
      "step": 22067
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0872513055801392,
      "learning_rate": 1.6309386530682227e-05,
      "loss": 2.4624,
      "step": 22068
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.049585223197937,
      "learning_rate": 1.6309067086481944e-05,
      "loss": 2.3296,
      "step": 22069
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.897733747959137,
      "learning_rate": 1.630874763158608e-05,
      "loss": 2.3251,
      "step": 22070
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4139915704727173,
      "learning_rate": 1.6308428165995172e-05,
      "loss": 2.2748,
      "step": 22071
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1077252626419067,
      "learning_rate": 1.630810868970977e-05,
      "loss": 2.5362,
      "step": 22072
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.015544056892395,
      "learning_rate": 1.6307789202730407e-05,
      "loss": 2.3501,
      "step": 22073
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0356733798980713,
      "learning_rate": 1.6307469705057633e-05,
      "loss": 2.4984,
      "step": 22074
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1330684423446655,
      "learning_rate": 1.630715019669198e-05,
      "loss": 2.4104,
      "step": 22075
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0006762742996216,
      "learning_rate": 1.6306830677633995e-05,
      "loss": 2.1991,
      "step": 22076
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.2460179328918457,
      "learning_rate": 1.6306511147884218e-05,
      "loss": 2.4459,
      "step": 22077
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9973840117454529,
      "learning_rate": 1.6306191607443195e-05,
      "loss": 2.5181,
      "step": 22078
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0324923992156982,
      "learning_rate": 1.630587205631146e-05,
      "loss": 2.36,
      "step": 22079
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9841777682304382,
      "learning_rate": 1.6305552494489568e-05,
      "loss": 2.4451,
      "step": 22080
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1410291194915771,
      "learning_rate": 1.6305232921978045e-05,
      "loss": 2.6147,
      "step": 22081
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0537464618682861,
      "learning_rate": 1.630491333877744e-05,
      "loss": 2.4828,
      "step": 22082
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.2977080345153809,
      "learning_rate": 1.6304593744888296e-05,
      "loss": 2.4646,
      "step": 22083
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0581965446472168,
      "learning_rate": 1.6304274140311155e-05,
      "loss": 2.3822,
      "step": 22084
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.150730848312378,
      "learning_rate": 1.630395452504655e-05,
      "loss": 2.4179,
      "step": 22085
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1139631271362305,
      "learning_rate": 1.630363489909504e-05,
      "loss": 2.3815,
      "step": 22086
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.995677649974823,
      "learning_rate": 1.6303315262457148e-05,
      "loss": 2.2685,
      "step": 22087
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0434284210205078,
      "learning_rate": 1.6302995615133432e-05,
      "loss": 2.3136,
      "step": 22088
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0142738819122314,
      "learning_rate": 1.6302675957124422e-05,
      "loss": 2.4043,
      "step": 22089
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0569186210632324,
      "learning_rate": 1.6302356288430664e-05,
      "loss": 2.4555,
      "step": 22090
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.062340259552002,
      "learning_rate": 1.63020366090527e-05,
      "loss": 2.3218,
      "step": 22091
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0532729625701904,
      "learning_rate": 1.6301716918991077e-05,
      "loss": 2.5021,
      "step": 22092
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0734509229660034,
      "learning_rate": 1.6301397218246328e-05,
      "loss": 2.5843,
      "step": 22093
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.159125566482544,
      "learning_rate": 1.6301077506819002e-05,
      "loss": 2.2091,
      "step": 22094
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1364115476608276,
      "learning_rate": 1.6300757784709636e-05,
      "loss": 2.3214,
      "step": 22095
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.307349443435669,
      "learning_rate": 1.6300438051918774e-05,
      "loss": 2.3696,
      "step": 22096
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0992974042892456,
      "learning_rate": 1.630011830844696e-05,
      "loss": 2.6612,
      "step": 22097
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0378819704055786,
      "learning_rate": 1.6299798554294733e-05,
      "loss": 2.3158,
      "step": 22098
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1165393590927124,
      "learning_rate": 1.629947878946264e-05,
      "loss": 2.5307,
      "step": 22099
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9367683529853821,
      "learning_rate": 1.6299159013951215e-05,
      "loss": 2.5056,
      "step": 22100
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.684872031211853,
      "learning_rate": 1.6298839227761007e-05,
      "loss": 2.2973,
      "step": 22101
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9881401062011719,
      "learning_rate": 1.6298519430892557e-05,
      "loss": 2.4155,
      "step": 22102
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.3078651428222656,
      "learning_rate": 1.6298199623346403e-05,
      "loss": 2.5786,
      "step": 22103
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1084315776824951,
      "learning_rate": 1.629787980512309e-05,
      "loss": 2.6208,
      "step": 22104
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9749717116355896,
      "learning_rate": 1.6297559976223164e-05,
      "loss": 2.1815,
      "step": 22105
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9534570574760437,
      "learning_rate": 1.629724013664716e-05,
      "loss": 2.5969,
      "step": 22106
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1733273267745972,
      "learning_rate": 1.6296920286395624e-05,
      "loss": 2.5395,
      "step": 22107
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.093610405921936,
      "learning_rate": 1.62966004254691e-05,
      "loss": 2.4595,
      "step": 22108
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.2366108894348145,
      "learning_rate": 1.6296280553868126e-05,
      "loss": 2.3051,
      "step": 22109
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.044779658317566,
      "learning_rate": 1.629596067159325e-05,
      "loss": 2.4488,
      "step": 22110
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.952022910118103,
      "learning_rate": 1.6295640778645004e-05,
      "loss": 2.2877,
      "step": 22111
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1157528162002563,
      "learning_rate": 1.629532087502394e-05,
      "loss": 2.4701,
      "step": 22112
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0890767574310303,
      "learning_rate": 1.62950009607306e-05,
      "loss": 2.8069,
      "step": 22113
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1356266736984253,
      "learning_rate": 1.6294681035765522e-05,
      "loss": 2.4222,
      "step": 22114
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9653523564338684,
      "learning_rate": 1.6294361100129246e-05,
      "loss": 2.6549,
      "step": 22115
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.233664631843567,
      "learning_rate": 1.6294041153822324e-05,
      "loss": 2.4486,
      "step": 22116
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0424270629882812,
      "learning_rate": 1.629372119684529e-05,
      "loss": 2.4687,
      "step": 22117
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0715057849884033,
      "learning_rate": 1.629340122919869e-05,
      "loss": 2.5166,
      "step": 22118
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1752381324768066,
      "learning_rate": 1.6293081250883068e-05,
      "loss": 2.6426,
      "step": 22119
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0353574752807617,
      "learning_rate": 1.6292761261898963e-05,
      "loss": 2.4722,
      "step": 22120
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1054341793060303,
      "learning_rate": 1.6292441262246915e-05,
      "loss": 2.5712,
      "step": 22121
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9843377470970154,
      "learning_rate": 1.6292121251927473e-05,
      "loss": 2.5624,
      "step": 22122
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9513181447982788,
      "learning_rate": 1.6291801230941176e-05,
      "loss": 2.3553,
      "step": 22123
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.07746422290802,
      "learning_rate": 1.629148119928857e-05,
      "loss": 2.3698,
      "step": 22124
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0178792476654053,
      "learning_rate": 1.629116115697019e-05,
      "loss": 2.4392,
      "step": 22125
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1046912670135498,
      "learning_rate": 1.6290841103986588e-05,
      "loss": 2.527,
      "step": 22126
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0884534120559692,
      "learning_rate": 1.6290521040338298e-05,
      "loss": 2.3068,
      "step": 22127
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9581174254417419,
      "learning_rate": 1.6290200966025866e-05,
      "loss": 2.3736,
      "step": 22128
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.987586498260498,
      "learning_rate": 1.6289880881049834e-05,
      "loss": 2.3351,
      "step": 22129
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0872323513031006,
      "learning_rate": 1.6289560785410753e-05,
      "loss": 2.5535,
      "step": 22130
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0831475257873535,
      "learning_rate": 1.628924067910915e-05,
      "loss": 2.3616,
      "step": 22131
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0788726806640625,
      "learning_rate": 1.628892056214558e-05,
      "loss": 2.538,
      "step": 22132
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1072516441345215,
      "learning_rate": 1.6288600434520582e-05,
      "loss": 2.5272,
      "step": 22133
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1041691303253174,
      "learning_rate": 1.6288280296234696e-05,
      "loss": 2.5014,
      "step": 22134
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0275901556015015,
      "learning_rate": 1.6287960147288467e-05,
      "loss": 2.4561,
      "step": 22135
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0198289155960083,
      "learning_rate": 1.628763998768244e-05,
      "loss": 2.4315,
      "step": 22136
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.030890941619873,
      "learning_rate": 1.6287319817417153e-05,
      "loss": 2.3546,
      "step": 22137
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0644978284835815,
      "learning_rate": 1.6286999636493153e-05,
      "loss": 2.491,
      "step": 22138
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.204229712486267,
      "learning_rate": 1.628667944491098e-05,
      "loss": 2.7653,
      "step": 22139
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.003727912902832,
      "learning_rate": 1.628635924267118e-05,
      "loss": 2.4501,
      "step": 22140
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0564630031585693,
      "learning_rate": 1.628603902977429e-05,
      "loss": 2.5805,
      "step": 22141
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0696159601211548,
      "learning_rate": 1.6285718806220862e-05,
      "loss": 2.1218,
      "step": 22142
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9563384652137756,
      "learning_rate": 1.6285398572011425e-05,
      "loss": 2.4244,
      "step": 22143
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1401431560516357,
      "learning_rate": 1.6285078327146538e-05,
      "loss": 2.3655,
      "step": 22144
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.051163673400879,
      "learning_rate": 1.6284758071626734e-05,
      "loss": 2.2632,
      "step": 22145
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.2215782403945923,
      "learning_rate": 1.628443780545256e-05,
      "loss": 2.6263,
      "step": 22146
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.3002088069915771,
      "learning_rate": 1.6284117528624554e-05,
      "loss": 2.6025,
      "step": 22147
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0464162826538086,
      "learning_rate": 1.6283797241143263e-05,
      "loss": 2.435,
      "step": 22148
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0150734186172485,
      "learning_rate": 1.628347694300923e-05,
      "loss": 2.478,
      "step": 22149
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.082146167755127,
      "learning_rate": 1.6283156634222994e-05,
      "loss": 2.3889,
      "step": 22150
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9624238014221191,
      "learning_rate": 1.6282836314785102e-05,
      "loss": 2.426,
      "step": 22151
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1466511487960815,
      "learning_rate": 1.6282515984696096e-05,
      "loss": 2.5696,
      "step": 22152
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0462204217910767,
      "learning_rate": 1.628219564395652e-05,
      "loss": 2.3228,
      "step": 22153
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1855045557022095,
      "learning_rate": 1.628187529256692e-05,
      "loss": 2.5099,
      "step": 22154
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.030889630317688,
      "learning_rate": 1.6281554930527832e-05,
      "loss": 2.3693,
      "step": 22155
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9722461104393005,
      "learning_rate": 1.62812345578398e-05,
      "loss": 2.3483,
      "step": 22156
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.935454785823822,
      "learning_rate": 1.628091417450337e-05,
      "loss": 2.5417,
      "step": 22157
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9737971425056458,
      "learning_rate": 1.628059378051909e-05,
      "loss": 2.4132,
      "step": 22158
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9863804578781128,
      "learning_rate": 1.6280273375887493e-05,
      "loss": 2.4357,
      "step": 22159
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0079389810562134,
      "learning_rate": 1.6279952960609127e-05,
      "loss": 2.5484,
      "step": 22160
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.2418458461761475,
      "learning_rate": 1.6279632534684537e-05,
      "loss": 2.4607,
      "step": 22161
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0085417032241821,
      "learning_rate": 1.6279312098114264e-05,
      "loss": 2.3841,
      "step": 22162
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9885546565055847,
      "learning_rate": 1.627899165089885e-05,
      "loss": 2.4565,
      "step": 22163
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1129775047302246,
      "learning_rate": 1.627867119303884e-05,
      "loss": 2.6088,
      "step": 22164
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0550720691680908,
      "learning_rate": 1.6278350724534778e-05,
      "loss": 2.4094,
      "step": 22165
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0199313163757324,
      "learning_rate": 1.6278030245387208e-05,
      "loss": 2.3624,
      "step": 22166
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.053659200668335,
      "learning_rate": 1.627770975559667e-05,
      "loss": 2.5233,
      "step": 22167
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0222595930099487,
      "learning_rate": 1.6277389255163706e-05,
      "loss": 2.3423,
      "step": 22168
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.071037769317627,
      "learning_rate": 1.6277068744088865e-05,
      "loss": 2.4325,
      "step": 22169
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0864553451538086,
      "learning_rate": 1.6276748222372685e-05,
      "loss": 2.3455,
      "step": 22170
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0780445337295532,
      "learning_rate": 1.627642769001572e-05,
      "loss": 2.4218,
      "step": 22171
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9872550368309021,
      "learning_rate": 1.6276107147018496e-05,
      "loss": 2.4647,
      "step": 22172
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.163795828819275,
      "learning_rate": 1.627578659338157e-05,
      "loss": 2.4274,
      "step": 22173
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9857648015022278,
      "learning_rate": 1.627546602910548e-05,
      "loss": 2.5294,
      "step": 22174
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.041908621788025,
      "learning_rate": 1.627514545419077e-05,
      "loss": 2.5976,
      "step": 22175
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0478965044021606,
      "learning_rate": 1.6274824868637987e-05,
      "loss": 2.499,
      "step": 22176
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1343663930892944,
      "learning_rate": 1.6274504272447672e-05,
      "loss": 2.1677,
      "step": 22177
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1676878929138184,
      "learning_rate": 1.6274183665620368e-05,
      "loss": 2.4658,
      "step": 22178
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1343525648117065,
      "learning_rate": 1.6273863048156617e-05,
      "loss": 2.3292,
      "step": 22179
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.089012622833252,
      "learning_rate": 1.6273542420056965e-05,
      "loss": 2.4158,
      "step": 22180
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1184089183807373,
      "learning_rate": 1.6273221781321953e-05,
      "loss": 2.4538,
      "step": 22181
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.005850076675415,
      "learning_rate": 1.6272901131952128e-05,
      "loss": 2.3936,
      "step": 22182
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.2027318477630615,
      "learning_rate": 1.627258047194803e-05,
      "loss": 2.6322,
      "step": 22183
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0867116451263428,
      "learning_rate": 1.6272259801310205e-05,
      "loss": 2.285,
      "step": 22184
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1156747341156006,
      "learning_rate": 1.62719391200392e-05,
      "loss": 2.4263,
      "step": 22185
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0406529903411865,
      "learning_rate": 1.6271618428135552e-05,
      "loss": 2.6353,
      "step": 22186
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1977896690368652,
      "learning_rate": 1.6271297725599807e-05,
      "loss": 2.304,
      "step": 22187
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9823915362358093,
      "learning_rate": 1.6270977012432507e-05,
      "loss": 2.3789,
      "step": 22188
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9671448469161987,
      "learning_rate": 1.62706562886342e-05,
      "loss": 2.3453,
      "step": 22189
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1739988327026367,
      "learning_rate": 1.627033555420543e-05,
      "loss": 2.4567,
      "step": 22190
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9615138173103333,
      "learning_rate": 1.6270014809146736e-05,
      "loss": 2.5587,
      "step": 22191
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.002695083618164,
      "learning_rate": 1.6269694053458663e-05,
      "loss": 2.3867,
      "step": 22192
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1912577152252197,
      "learning_rate": 1.6269373287141757e-05,
      "loss": 2.5,
      "step": 22193
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0404077768325806,
      "learning_rate": 1.626905251019656e-05,
      "loss": 2.4427,
      "step": 22194
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1618205308914185,
      "learning_rate": 1.6268731722623615e-05,
      "loss": 2.4265,
      "step": 22195
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1248714923858643,
      "learning_rate": 1.626841092442347e-05,
      "loss": 2.4986,
      "step": 22196
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.2160851955413818,
      "learning_rate": 1.6268090115596664e-05,
      "loss": 2.562,
      "step": 22197
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.91785728931427,
      "learning_rate": 1.626776929614374e-05,
      "loss": 2.2983,
      "step": 22198
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9823681116104126,
      "learning_rate": 1.626744846606525e-05,
      "loss": 2.3563,
      "step": 22199
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1395395994186401,
      "learning_rate": 1.626712762536173e-05,
      "loss": 2.4344,
      "step": 22200
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9986022710800171,
      "learning_rate": 1.626680677403373e-05,
      "loss": 2.3684,
      "step": 22201
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.149058222770691,
      "learning_rate": 1.6266485912081788e-05,
      "loss": 2.3397,
      "step": 22202
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0501377582550049,
      "learning_rate": 1.626616503950645e-05,
      "loss": 2.2947,
      "step": 22203
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9975852966308594,
      "learning_rate": 1.6265844156308258e-05,
      "loss": 2.4151,
      "step": 22204
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0219913721084595,
      "learning_rate": 1.6265523262487763e-05,
      "loss": 2.3949,
      "step": 22205
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0952961444854736,
      "learning_rate": 1.6265202358045503e-05,
      "loss": 2.417,
      "step": 22206
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.077236533164978,
      "learning_rate": 1.626488144298202e-05,
      "loss": 2.5057,
      "step": 22207
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.010582685470581,
      "learning_rate": 1.6264560517297865e-05,
      "loss": 2.3915,
      "step": 22208
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.160131812095642,
      "learning_rate": 1.6264239580993576e-05,
      "loss": 2.3939,
      "step": 22209
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.023026943206787,
      "learning_rate": 1.6263918634069706e-05,
      "loss": 2.319,
      "step": 22210
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.062642216682434,
      "learning_rate": 1.6263597676526787e-05,
      "loss": 2.4378,
      "step": 22211
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0685815811157227,
      "learning_rate": 1.6263276708365368e-05,
      "loss": 2.3771,
      "step": 22212
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0839862823486328,
      "learning_rate": 1.6262955729585995e-05,
      "loss": 2.4532,
      "step": 22213
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.974543035030365,
      "learning_rate": 1.6262634740189213e-05,
      "loss": 2.1876,
      "step": 22214
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9913249015808105,
      "learning_rate": 1.6262313740175564e-05,
      "loss": 2.5468,
      "step": 22215
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0775235891342163,
      "learning_rate": 1.6261992729545587e-05,
      "loss": 2.4326,
      "step": 22216
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0594205856323242,
      "learning_rate": 1.6261671708299838e-05,
      "loss": 2.4008,
      "step": 22217
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0224798917770386,
      "learning_rate": 1.6261350676438855e-05,
      "loss": 2.4778,
      "step": 22218
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0353361368179321,
      "learning_rate": 1.626102963396318e-05,
      "loss": 2.5863,
      "step": 22219
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9864069819450378,
      "learning_rate": 1.626070858087336e-05,
      "loss": 2.2429,
      "step": 22220
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0641772747039795,
      "learning_rate": 1.6260387517169934e-05,
      "loss": 2.2384,
      "step": 22221
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.968723475933075,
      "learning_rate": 1.6260066442853454e-05,
      "loss": 2.5883,
      "step": 22222
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0195953845977783,
      "learning_rate": 1.6259745357924464e-05,
      "loss": 2.3384,
      "step": 22223
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0772008895874023,
      "learning_rate": 1.62594242623835e-05,
      "loss": 2.4266,
      "step": 22224
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0507781505584717,
      "learning_rate": 1.6259103156231117e-05,
      "loss": 2.3038,
      "step": 22225
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0890629291534424,
      "learning_rate": 1.625878203946785e-05,
      "loss": 2.2147,
      "step": 22226
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0325449705123901,
      "learning_rate": 1.625846091209425e-05,
      "loss": 2.6852,
      "step": 22227
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0620418787002563,
      "learning_rate": 1.625813977411086e-05,
      "loss": 2.2299,
      "step": 22228
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0122133493423462,
      "learning_rate": 1.625781862551822e-05,
      "loss": 2.3348,
      "step": 22229
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1049895286560059,
      "learning_rate": 1.625749746631688e-05,
      "loss": 2.319,
      "step": 22230
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0075364112854004,
      "learning_rate": 1.6257176296507385e-05,
      "loss": 2.3867,
      "step": 22231
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1233919858932495,
      "learning_rate": 1.6256855116090274e-05,
      "loss": 2.283,
      "step": 22232
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9441902041435242,
      "learning_rate": 1.6256533925066094e-05,
      "loss": 2.2402,
      "step": 22233
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0195879936218262,
      "learning_rate": 1.625621272343539e-05,
      "loss": 2.579,
      "step": 22234
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9374440908432007,
      "learning_rate": 1.6255891511198705e-05,
      "loss": 2.5317,
      "step": 22235
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0548778772354126,
      "learning_rate": 1.6255570288356587e-05,
      "loss": 2.3978,
      "step": 22236
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1367156505584717,
      "learning_rate": 1.6255249054909575e-05,
      "loss": 2.5579,
      "step": 22237
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1559009552001953,
      "learning_rate": 1.6254927810858223e-05,
      "loss": 2.3072,
      "step": 22238
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9941149950027466,
      "learning_rate": 1.6254606556203065e-05,
      "loss": 2.4015,
      "step": 22239
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.010606050491333,
      "learning_rate": 1.625428529094465e-05,
      "loss": 2.5082,
      "step": 22240
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1754013299942017,
      "learning_rate": 1.6253964015083524e-05,
      "loss": 2.6353,
      "step": 22241
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9351605772972107,
      "learning_rate": 1.625364272862023e-05,
      "loss": 2.2073,
      "step": 22242
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.152686595916748,
      "learning_rate": 1.6253321431555314e-05,
      "loss": 2.4101,
      "step": 22243
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9754710793495178,
      "learning_rate": 1.625300012388932e-05,
      "loss": 2.3722,
      "step": 22244
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.083736777305603,
      "learning_rate": 1.625267880562279e-05,
      "loss": 2.2927,
      "step": 22245
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1556428670883179,
      "learning_rate": 1.625235747675627e-05,
      "loss": 2.4144,
      "step": 22246
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1439493894577026,
      "learning_rate": 1.6252036137290313e-05,
      "loss": 2.2291,
      "step": 22247
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9812400341033936,
      "learning_rate": 1.6251714787225452e-05,
      "loss": 2.4149,
      "step": 22248
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1396305561065674,
      "learning_rate": 1.6251393426562237e-05,
      "loss": 2.4452,
      "step": 22249
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.101387858390808,
      "learning_rate": 1.625107205530121e-05,
      "loss": 2.5645,
      "step": 22250
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9379792213439941,
      "learning_rate": 1.6250750673442923e-05,
      "loss": 2.4556,
      "step": 22251
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0889664888381958,
      "learning_rate": 1.6250429280987915e-05,
      "loss": 2.3921,
      "step": 22252
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.012251377105713,
      "learning_rate": 1.625010787793673e-05,
      "loss": 2.249,
      "step": 22253
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1262282133102417,
      "learning_rate": 1.6249786464289916e-05,
      "loss": 2.3428,
      "step": 22254
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1014184951782227,
      "learning_rate": 1.624946504004801e-05,
      "loss": 2.5245,
      "step": 22255
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0284675359725952,
      "learning_rate": 1.624914360521157e-05,
      "loss": 2.5997,
      "step": 22256
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.168277382850647,
      "learning_rate": 1.6248822159781134e-05,
      "loss": 2.4292,
      "step": 22257
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0645225048065186,
      "learning_rate": 1.6248500703757245e-05,
      "loss": 2.3657,
      "step": 22258
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0806055068969727,
      "learning_rate": 1.624817923714045e-05,
      "loss": 2.2994,
      "step": 22259
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1168137788772583,
      "learning_rate": 1.62478577599313e-05,
      "loss": 2.459,
      "step": 22260
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9803699254989624,
      "learning_rate": 1.624753627213033e-05,
      "loss": 2.2566,
      "step": 22261
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0051649808883667,
      "learning_rate": 1.6247214773738086e-05,
      "loss": 2.4549,
      "step": 22262
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0259124040603638,
      "learning_rate": 1.6246893264755123e-05,
      "loss": 2.6636,
      "step": 22263
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.159591555595398,
      "learning_rate": 1.6246571745181974e-05,
      "loss": 2.4553,
      "step": 22264
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.082260251045227,
      "learning_rate": 1.624625021501919e-05,
      "loss": 2.5473,
      "step": 22265
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0083643198013306,
      "learning_rate": 1.6245928674267316e-05,
      "loss": 2.5314,
      "step": 22266
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0212675333023071,
      "learning_rate": 1.6245607122926898e-05,
      "loss": 2.2278,
      "step": 22267
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0463640689849854,
      "learning_rate": 1.6245285560998477e-05,
      "loss": 2.483,
      "step": 22268
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.008895754814148,
      "learning_rate": 1.6244963988482604e-05,
      "loss": 2.6001,
      "step": 22269
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.03701913356781,
      "learning_rate": 1.624464240537982e-05,
      "loss": 2.5896,
      "step": 22270
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0167115926742554,
      "learning_rate": 1.624432081169067e-05,
      "loss": 2.3906,
      "step": 22271
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0161142349243164,
      "learning_rate": 1.6243999207415702e-05,
      "loss": 2.4716,
      "step": 22272
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0445648431777954,
      "learning_rate": 1.6243677592555456e-05,
      "loss": 2.5091,
      "step": 22273
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1111061573028564,
      "learning_rate": 1.6243355967110484e-05,
      "loss": 2.644,
      "step": 22274
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.458767294883728,
      "learning_rate": 1.6243034331081326e-05,
      "loss": 2.6228,
      "step": 22275
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9535022974014282,
      "learning_rate": 1.624271268446853e-05,
      "loss": 2.4496,
      "step": 22276
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1154292821884155,
      "learning_rate": 1.6242391027272642e-05,
      "loss": 2.3204,
      "step": 22277
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0999575853347778,
      "learning_rate": 1.6242069359494203e-05,
      "loss": 2.448,
      "step": 22278
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0634465217590332,
      "learning_rate": 1.6241747681133765e-05,
      "loss": 2.3753,
      "step": 22279
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.034841775894165,
      "learning_rate": 1.6241425992191865e-05,
      "loss": 2.5747,
      "step": 22280
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1227242946624756,
      "learning_rate": 1.6241104292669057e-05,
      "loss": 2.5653,
      "step": 22281
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0367159843444824,
      "learning_rate": 1.6240782582565876e-05,
      "loss": 2.5281,
      "step": 22282
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0933473110198975,
      "learning_rate": 1.624046086188288e-05,
      "loss": 2.5328,
      "step": 22283
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0749253034591675,
      "learning_rate": 1.6240139130620606e-05,
      "loss": 2.3923,
      "step": 22284
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0140619277954102,
      "learning_rate": 1.6239817388779598e-05,
      "loss": 2.3309,
      "step": 22285
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9693902730941772,
      "learning_rate": 1.6239495636360412e-05,
      "loss": 2.7155,
      "step": 22286
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1335153579711914,
      "learning_rate": 1.6239173873363578e-05,
      "loss": 2.512,
      "step": 22287
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0350878238677979,
      "learning_rate": 1.6238852099789656e-05,
      "loss": 2.3462,
      "step": 22288
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1018617153167725,
      "learning_rate": 1.6238530315639183e-05,
      "loss": 2.4224,
      "step": 22289
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.021977186203003,
      "learning_rate": 1.6238208520912704e-05,
      "loss": 2.6014,
      "step": 22290
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0610321760177612,
      "learning_rate": 1.6237886715610772e-05,
      "loss": 2.5771,
      "step": 22291
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9396573305130005,
      "learning_rate": 1.6237564899733928e-05,
      "loss": 2.3599,
      "step": 22292
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9900801777839661,
      "learning_rate": 1.6237243073282714e-05,
      "loss": 2.345,
      "step": 22293
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0821565389633179,
      "learning_rate": 1.623692123625768e-05,
      "loss": 2.3642,
      "step": 22294
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.111624002456665,
      "learning_rate": 1.623659938865937e-05,
      "loss": 2.233,
      "step": 22295
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0694056749343872,
      "learning_rate": 1.6236277530488332e-05,
      "loss": 2.5465,
      "step": 22296
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1395680904388428,
      "learning_rate": 1.623595566174511e-05,
      "loss": 2.3233,
      "step": 22297
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9708985090255737,
      "learning_rate": 1.6235633782430247e-05,
      "loss": 2.3908,
      "step": 22298
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.3621468544006348,
      "learning_rate": 1.6235311892544292e-05,
      "loss": 2.3235,
      "step": 22299
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9928140044212341,
      "learning_rate": 1.623498999208779e-05,
      "loss": 2.5196,
      "step": 22300
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0636223554611206,
      "learning_rate": 1.6234668081061288e-05,
      "loss": 2.3818,
      "step": 22301
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0135431289672852,
      "learning_rate": 1.6234346159465327e-05,
      "loss": 2.487,
      "step": 22302
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1264939308166504,
      "learning_rate": 1.6234024227300458e-05,
      "loss": 2.4499,
      "step": 22303
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1263659000396729,
      "learning_rate": 1.6233702284567226e-05,
      "loss": 2.2214,
      "step": 22304
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0737906694412231,
      "learning_rate": 1.6233380331266174e-05,
      "loss": 2.4485,
      "step": 22305
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0540001392364502,
      "learning_rate": 1.6233058367397848e-05,
      "loss": 2.4453,
      "step": 22306
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9839097857475281,
      "learning_rate": 1.62327363929628e-05,
      "loss": 2.6168,
      "step": 22307
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9783893823623657,
      "learning_rate": 1.6232414407961566e-05,
      "loss": 2.4845,
      "step": 22308
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0523738861083984,
      "learning_rate": 1.62320924123947e-05,
      "loss": 2.5305,
      "step": 22309
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.099229335784912,
      "learning_rate": 1.6231770406262745e-05,
      "loss": 2.4413,
      "step": 22310
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0496773719787598,
      "learning_rate": 1.6231448389566248e-05,
      "loss": 2.3671,
      "step": 22311
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1657960414886475,
      "learning_rate": 1.623112636230575e-05,
      "loss": 2.1938,
      "step": 22312
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0620901584625244,
      "learning_rate": 1.62308043244818e-05,
      "loss": 2.3211,
      "step": 22313
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0056452751159668,
      "learning_rate": 1.6230482276094947e-05,
      "loss": 2.3987,
      "step": 22314
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1695133447647095,
      "learning_rate": 1.6230160217145732e-05,
      "loss": 2.445,
      "step": 22315
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0603653192520142,
      "learning_rate": 1.62298381476347e-05,
      "loss": 2.4182,
      "step": 22316
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.058024525642395,
      "learning_rate": 1.622951606756241e-05,
      "loss": 2.5041,
      "step": 22317
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0022627115249634,
      "learning_rate": 1.622919397692939e-05,
      "loss": 2.2217,
      "step": 22318
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9631438255310059,
      "learning_rate": 1.6228871875736197e-05,
      "loss": 2.4196,
      "step": 22319
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.063797116279602,
      "learning_rate": 1.6228549763983376e-05,
      "loss": 2.4099,
      "step": 22320
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9794148802757263,
      "learning_rate": 1.622822764167147e-05,
      "loss": 2.571,
      "step": 22321
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1097630262374878,
      "learning_rate": 1.622790550880103e-05,
      "loss": 2.3109,
      "step": 22322
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0630987882614136,
      "learning_rate": 1.6227583365372595e-05,
      "loss": 2.3665,
      "step": 22323
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1064770221710205,
      "learning_rate": 1.6227261211386718e-05,
      "loss": 2.3472,
      "step": 22324
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9629043340682983,
      "learning_rate": 1.6226939046843936e-05,
      "loss": 2.4201,
      "step": 22325
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.2191697359085083,
      "learning_rate": 1.6226616871744807e-05,
      "loss": 2.416,
      "step": 22326
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0440548658370972,
      "learning_rate": 1.6226294686089868e-05,
      "loss": 2.2964,
      "step": 22327
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0736216306686401,
      "learning_rate": 1.622597248987967e-05,
      "loss": 2.3584,
      "step": 22328
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0272531509399414,
      "learning_rate": 1.6225650283114755e-05,
      "loss": 2.3579,
      "step": 22329
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0152915716171265,
      "learning_rate": 1.6225328065795678e-05,
      "loss": 2.5249,
      "step": 22330
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0609922409057617,
      "learning_rate": 1.6225005837922974e-05,
      "loss": 2.4467,
      "step": 22331
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9767152070999146,
      "learning_rate": 1.6224683599497195e-05,
      "loss": 2.3443,
      "step": 22332
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0372474193572998,
      "learning_rate": 1.6224361350518886e-05,
      "loss": 2.3886,
      "step": 22333
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.002870798110962,
      "learning_rate": 1.6224039090988593e-05,
      "loss": 2.3758,
      "step": 22334
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.971019446849823,
      "learning_rate": 1.622371682090687e-05,
      "loss": 2.2598,
      "step": 22335
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9460346698760986,
      "learning_rate": 1.6223394540274248e-05,
      "loss": 2.3386,
      "step": 22336
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.2391598224639893,
      "learning_rate": 1.622307224909129e-05,
      "loss": 2.3846,
      "step": 22337
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0633565187454224,
      "learning_rate": 1.622274994735853e-05,
      "loss": 2.411,
      "step": 22338
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1130785942077637,
      "learning_rate": 1.6222427635076518e-05,
      "loss": 2.5682,
      "step": 22339
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0979578495025635,
      "learning_rate": 1.6222105312245803e-05,
      "loss": 2.4433,
      "step": 22340
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0693391561508179,
      "learning_rate": 1.622178297886693e-05,
      "loss": 2.3934,
      "step": 22341
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0396184921264648,
      "learning_rate": 1.6221460634940442e-05,
      "loss": 2.4454,
      "step": 22342
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0433498620986938,
      "learning_rate": 1.6221138280466893e-05,
      "loss": 2.5298,
      "step": 22343
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.147965431213379,
      "learning_rate": 1.6220815915446823e-05,
      "loss": 2.6375,
      "step": 22344
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0632898807525635,
      "learning_rate": 1.6220493539880778e-05,
      "loss": 2.3581,
      "step": 22345
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9538928270339966,
      "learning_rate": 1.622017115376931e-05,
      "loss": 2.4474,
      "step": 22346
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.034987211227417,
      "learning_rate": 1.621984875711296e-05,
      "loss": 2.4407,
      "step": 22347
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.040866494178772,
      "learning_rate": 1.621952634991228e-05,
      "loss": 2.2953,
      "step": 22348
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0209628343582153,
      "learning_rate": 1.6219203932167812e-05,
      "loss": 2.367,
      "step": 22349
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.980409562587738,
      "learning_rate": 1.6218881503880106e-05,
      "loss": 2.4646,
      "step": 22350
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.2384614944458008,
      "learning_rate": 1.6218559065049703e-05,
      "loss": 2.3096,
      "step": 22351
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9100879430770874,
      "learning_rate": 1.621823661567716e-05,
      "loss": 2.5902,
      "step": 22352
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.05472731590271,
      "learning_rate": 1.621791415576301e-05,
      "loss": 2.4737,
      "step": 22353
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1393392086029053,
      "learning_rate": 1.621759168530781e-05,
      "loss": 2.367,
      "step": 22354
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0759363174438477,
      "learning_rate": 1.6217269204312102e-05,
      "loss": 2.3057,
      "step": 22355
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.011222243309021,
      "learning_rate": 1.6216946712776436e-05,
      "loss": 2.2666,
      "step": 22356
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.2879477739334106,
      "learning_rate": 1.621662421070136e-05,
      "loss": 2.3492,
      "step": 22357
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0474333763122559,
      "learning_rate": 1.621630169808741e-05,
      "loss": 2.2235,
      "step": 22358
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9578617215156555,
      "learning_rate": 1.6215979174935144e-05,
      "loss": 2.4117,
      "step": 22359
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1210838556289673,
      "learning_rate": 1.6215656641245104e-05,
      "loss": 2.3397,
      "step": 22360
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9972339272499084,
      "learning_rate": 1.621533409701784e-05,
      "loss": 2.3697,
      "step": 22361
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0449726581573486,
      "learning_rate": 1.6215011542253894e-05,
      "loss": 2.6352,
      "step": 22362
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0578622817993164,
      "learning_rate": 1.6214688976953818e-05,
      "loss": 2.2338,
      "step": 22363
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0138447284698486,
      "learning_rate": 1.6214366401118153e-05,
      "loss": 2.372,
      "step": 22364
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1751165390014648,
      "learning_rate": 1.6214043814747455e-05,
      "loss": 2.3177,
      "step": 22365
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0036592483520508,
      "learning_rate": 1.621372121784226e-05,
      "loss": 2.6992,
      "step": 22366
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.064202070236206,
      "learning_rate": 1.621339861040312e-05,
      "loss": 2.5494,
      "step": 22367
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9646264314651489,
      "learning_rate": 1.621307599243058e-05,
      "loss": 2.5206,
      "step": 22368
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0226455926895142,
      "learning_rate": 1.6212753363925195e-05,
      "loss": 2.4783,
      "step": 22369
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.2622458934783936,
      "learning_rate": 1.6212430724887506e-05,
      "loss": 2.5533,
      "step": 22370
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1897717714309692,
      "learning_rate": 1.6212108075318053e-05,
      "loss": 2.5402,
      "step": 22371
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9625776410102844,
      "learning_rate": 1.6211785415217392e-05,
      "loss": 2.468,
      "step": 22372
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0078423023223877,
      "learning_rate": 1.621146274458607e-05,
      "loss": 2.223,
      "step": 22373
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9878655076026917,
      "learning_rate": 1.621114006342463e-05,
      "loss": 2.2741,
      "step": 22374
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0235238075256348,
      "learning_rate": 1.621081737173362e-05,
      "loss": 2.5593,
      "step": 22375
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0699628591537476,
      "learning_rate": 1.621049466951359e-05,
      "loss": 2.396,
      "step": 22376
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.033658742904663,
      "learning_rate": 1.6210171956765083e-05,
      "loss": 2.5123,
      "step": 22377
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9876248240470886,
      "learning_rate": 1.6209849233488646e-05,
      "loss": 2.1612,
      "step": 22378
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9565757513046265,
      "learning_rate": 1.620952649968483e-05,
      "loss": 2.4509,
      "step": 22379
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9241650700569153,
      "learning_rate": 1.6209203755354183e-05,
      "loss": 2.4659,
      "step": 22380
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.2651293277740479,
      "learning_rate": 1.6208881000497244e-05,
      "loss": 2.5807,
      "step": 22381
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0570814609527588,
      "learning_rate": 1.6208558235114565e-05,
      "loss": 2.4838,
      "step": 22382
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.02248215675354,
      "learning_rate": 1.62082354592067e-05,
      "loss": 2.3603,
      "step": 22383
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1386483907699585,
      "learning_rate": 1.6207912672774188e-05,
      "loss": 2.3875,
      "step": 22384
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.05609929561615,
      "learning_rate": 1.6207589875817573e-05,
      "loss": 2.3731,
      "step": 22385
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0943087339401245,
      "learning_rate": 1.620726706833741e-05,
      "loss": 2.2711,
      "step": 22386
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.085927128791809,
      "learning_rate": 1.6206944250334244e-05,
      "loss": 2.3523,
      "step": 22387
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.2204437255859375,
      "learning_rate": 1.620662142180862e-05,
      "loss": 2.5024,
      "step": 22388
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.017637848854065,
      "learning_rate": 1.6206298582761092e-05,
      "loss": 2.4676,
      "step": 22389
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0450631380081177,
      "learning_rate": 1.62059757331922e-05,
      "loss": 2.2768,
      "step": 22390
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.099440097808838,
      "learning_rate": 1.6205652873102492e-05,
      "loss": 2.3671,
      "step": 22391
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9581465125083923,
      "learning_rate": 1.6205330002492514e-05,
      "loss": 2.5421,
      "step": 22392
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0753494501113892,
      "learning_rate": 1.620500712136282e-05,
      "loss": 2.5143,
      "step": 22393
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.090171456336975,
      "learning_rate": 1.6204684229713953e-05,
      "loss": 2.4312,
      "step": 22394
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1025948524475098,
      "learning_rate": 1.6204361327546462e-05,
      "loss": 2.4076,
      "step": 22395
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.2026526927947998,
      "learning_rate": 1.6204038414860895e-05,
      "loss": 2.4783,
      "step": 22396
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.3453736305236816,
      "learning_rate": 1.6203715491657794e-05,
      "loss": 2.5339,
      "step": 22397
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0457329750061035,
      "learning_rate": 1.6203392557937712e-05,
      "loss": 2.3884,
      "step": 22398
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1241592168807983,
      "learning_rate": 1.6203069613701194e-05,
      "loss": 2.3193,
      "step": 22399
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9434928297996521,
      "learning_rate": 1.620274665894879e-05,
      "loss": 2.5318,
      "step": 22400
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1719447374343872,
      "learning_rate": 1.6202423693681043e-05,
      "loss": 2.6126,
      "step": 22401
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0614861249923706,
      "learning_rate": 1.6202100717898507e-05,
      "loss": 2.5429,
      "step": 22402
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9916620850563049,
      "learning_rate": 1.6201777731601725e-05,
      "loss": 2.3007,
      "step": 22403
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0165979862213135,
      "learning_rate": 1.6201454734791244e-05,
      "loss": 2.6067,
      "step": 22404
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9756540060043335,
      "learning_rate": 1.6201131727467614e-05,
      "loss": 2.4623,
      "step": 22405
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1651571989059448,
      "learning_rate": 1.620080870963138e-05,
      "loss": 2.321,
      "step": 22406
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9233918190002441,
      "learning_rate": 1.620048568128309e-05,
      "loss": 2.4373,
      "step": 22407
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1368930339813232,
      "learning_rate": 1.6200162642423296e-05,
      "loss": 2.3781,
      "step": 22408
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0177571773529053,
      "learning_rate": 1.619983959305254e-05,
      "loss": 2.2658,
      "step": 22409
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0843331813812256,
      "learning_rate": 1.6199516533171374e-05,
      "loss": 2.398,
      "step": 22410
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9710825085639954,
      "learning_rate": 1.6199193462780344e-05,
      "loss": 2.3888,
      "step": 22411
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9487689733505249,
      "learning_rate": 1.6198870381879993e-05,
      "loss": 2.5293,
      "step": 22412
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1099300384521484,
      "learning_rate": 1.6198547290470877e-05,
      "loss": 2.5855,
      "step": 22413
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0342826843261719,
      "learning_rate": 1.6198224188553538e-05,
      "loss": 2.5172,
      "step": 22414
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.041099190711975,
      "learning_rate": 1.619790107612853e-05,
      "loss": 2.4327,
      "step": 22415
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9835171103477478,
      "learning_rate": 1.6197577953196394e-05,
      "loss": 2.4393,
      "step": 22416
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9576781392097473,
      "learning_rate": 1.6197254819757676e-05,
      "loss": 2.4185,
      "step": 22417
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.047974705696106,
      "learning_rate": 1.6196931675812934e-05,
      "loss": 2.3605,
      "step": 22418
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0111552476882935,
      "learning_rate": 1.6196608521362703e-05,
      "loss": 2.6672,
      "step": 22419
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.2864617109298706,
      "learning_rate": 1.6196285356407542e-05,
      "loss": 2.5335,
      "step": 22420
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1060978174209595,
      "learning_rate": 1.6195962180947994e-05,
      "loss": 2.4161,
      "step": 22421
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.2256269454956055,
      "learning_rate": 1.6195638994984607e-05,
      "loss": 2.5585,
      "step": 22422
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0468748807907104,
      "learning_rate": 1.619531579851793e-05,
      "loss": 2.4926,
      "step": 22423
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9609246253967285,
      "learning_rate": 1.619499259154851e-05,
      "loss": 2.6209,
      "step": 22424
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0412393808364868,
      "learning_rate": 1.6194669374076893e-05,
      "loss": 2.5017,
      "step": 22425
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0032464265823364,
      "learning_rate": 1.6194346146103633e-05,
      "loss": 2.324,
      "step": 22426
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.957397997379303,
      "learning_rate": 1.6194022907629272e-05,
      "loss": 2.3586,
      "step": 22427
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1365935802459717,
      "learning_rate": 1.6193699658654356e-05,
      "loss": 2.3612,
      "step": 22428
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9875077605247498,
      "learning_rate": 1.6193376399179443e-05,
      "loss": 2.5512,
      "step": 22429
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.037300705909729,
      "learning_rate": 1.619305312920507e-05,
      "loss": 2.5321,
      "step": 22430
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9695734977722168,
      "learning_rate": 1.619272984873179e-05,
      "loss": 2.263,
      "step": 22431
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0029857158660889,
      "learning_rate": 1.6192406557760158e-05,
      "loss": 2.4352,
      "step": 22432
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.125654697418213,
      "learning_rate": 1.619208325629071e-05,
      "loss": 2.5677,
      "step": 22433
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.080880045890808,
      "learning_rate": 1.6191759944323996e-05,
      "loss": 2.4171,
      "step": 22434
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.6864094734191895,
      "learning_rate": 1.619143662186057e-05,
      "loss": 2.4913,
      "step": 22435
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0327531099319458,
      "learning_rate": 1.619111328890098e-05,
      "loss": 2.4507,
      "step": 22436
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9974413514137268,
      "learning_rate": 1.619078994544577e-05,
      "loss": 2.1681,
      "step": 22437
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.063564658164978,
      "learning_rate": 1.6190466591495488e-05,
      "loss": 2.262,
      "step": 22438
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1729322671890259,
      "learning_rate": 1.6190143227050685e-05,
      "loss": 2.4649,
      "step": 22439
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9978073835372925,
      "learning_rate": 1.6189819852111907e-05,
      "loss": 2.4388,
      "step": 22440
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.119464635848999,
      "learning_rate": 1.6189496466679704e-05,
      "loss": 2.514,
      "step": 22441
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0196609497070312,
      "learning_rate": 1.6189173070754624e-05,
      "loss": 2.6748,
      "step": 22442
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.078912615776062,
      "learning_rate": 1.6188849664337214e-05,
      "loss": 2.3507,
      "step": 22443
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1396757364273071,
      "learning_rate": 1.618852624742802e-05,
      "loss": 2.183,
      "step": 22444
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0862632989883423,
      "learning_rate": 1.61882028200276e-05,
      "loss": 2.3891,
      "step": 22445
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1599935293197632,
      "learning_rate": 1.618787938213649e-05,
      "loss": 2.3438,
      "step": 22446
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0170990228652954,
      "learning_rate": 1.6187555933755244e-05,
      "loss": 2.3516,
      "step": 22447
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0529478788375854,
      "learning_rate": 1.6187232474884415e-05,
      "loss": 2.4777,
      "step": 22448
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1004480123519897,
      "learning_rate": 1.6186909005524542e-05,
      "loss": 2.4538,
      "step": 22449
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0051356554031372,
      "learning_rate": 1.618658552567618e-05,
      "loss": 2.3823,
      "step": 22450
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.235256314277649,
      "learning_rate": 1.6186262035339875e-05,
      "loss": 2.5089,
      "step": 22451
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.054071068763733,
      "learning_rate": 1.618593853451617e-05,
      "loss": 2.5604,
      "step": 22452
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.025760293006897,
      "learning_rate": 1.6185615023205627e-05,
      "loss": 2.3774,
      "step": 22453
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.004733681678772,
      "learning_rate": 1.6185291501408788e-05,
      "loss": 2.5699,
      "step": 22454
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0538586378097534,
      "learning_rate": 1.6184967969126192e-05,
      "loss": 2.3623,
      "step": 22455
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9687589406967163,
      "learning_rate": 1.61846444263584e-05,
      "loss": 2.5583,
      "step": 22456
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0263296365737915,
      "learning_rate": 1.6184320873105958e-05,
      "loss": 2.4172,
      "step": 22457
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9657585620880127,
      "learning_rate": 1.6183997309369407e-05,
      "loss": 2.574,
      "step": 22458
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1167932748794556,
      "learning_rate": 1.6183673735149302e-05,
      "loss": 2.5757,
      "step": 22459
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0782480239868164,
      "learning_rate": 1.6183350150446196e-05,
      "loss": 2.6149,
      "step": 22460
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0396853685379028,
      "learning_rate": 1.618302655526063e-05,
      "loss": 2.4152,
      "step": 22461
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0438826084136963,
      "learning_rate": 1.6182702949593152e-05,
      "loss": 2.6433,
      "step": 22462
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1026479005813599,
      "learning_rate": 1.6182379333444313e-05,
      "loss": 2.5343,
      "step": 22463
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9661657214164734,
      "learning_rate": 1.6182055706814665e-05,
      "loss": 2.4307,
      "step": 22464
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.029678463935852,
      "learning_rate": 1.618173206970475e-05,
      "loss": 2.4347,
      "step": 22465
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0927015542984009,
      "learning_rate": 1.618140842211512e-05,
      "loss": 2.4273,
      "step": 22466
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.3490123748779297,
      "learning_rate": 1.618108476404633e-05,
      "loss": 2.5026,
      "step": 22467
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0271438360214233,
      "learning_rate": 1.618076109549892e-05,
      "loss": 2.4771,
      "step": 22468
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9696090817451477,
      "learning_rate": 1.618043741647344e-05,
      "loss": 2.4071,
      "step": 22469
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0808743238449097,
      "learning_rate": 1.618011372697044e-05,
      "loss": 2.5079,
      "step": 22470
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0475821495056152,
      "learning_rate": 1.6179790026990464e-05,
      "loss": 2.3118,
      "step": 22471
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.047263503074646,
      "learning_rate": 1.617946631653407e-05,
      "loss": 2.6429,
      "step": 22472
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0508395433425903,
      "learning_rate": 1.6179142595601805e-05,
      "loss": 2.2801,
      "step": 22473
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1191006898880005,
      "learning_rate": 1.6178818864194212e-05,
      "loss": 2.2259,
      "step": 22474
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1210578680038452,
      "learning_rate": 1.6178495122311843e-05,
      "loss": 2.4556,
      "step": 22475
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0647385120391846,
      "learning_rate": 1.6178171369955248e-05,
      "loss": 2.4799,
      "step": 22476
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0843299627304077,
      "learning_rate": 1.617784760712497e-05,
      "loss": 2.4793,
      "step": 22477
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9299989342689514,
      "learning_rate": 1.6177523833821566e-05,
      "loss": 2.3531,
      "step": 22478
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1049683094024658,
      "learning_rate": 1.617720005004558e-05,
      "loss": 2.4363,
      "step": 22479
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1183807849884033,
      "learning_rate": 1.6176876255797565e-05,
      "loss": 2.5926,
      "step": 22480
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.009536623954773,
      "learning_rate": 1.6176552451078067e-05,
      "loss": 2.5719,
      "step": 22481
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0196672677993774,
      "learning_rate": 1.6176228635887633e-05,
      "loss": 2.5172,
      "step": 22482
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0152511596679688,
      "learning_rate": 1.6175904810226818e-05,
      "loss": 2.5529,
      "step": 22483
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9972910284996033,
      "learning_rate": 1.6175580974096163e-05,
      "loss": 2.4315,
      "step": 22484
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1996549367904663,
      "learning_rate": 1.617525712749622e-05,
      "loss": 2.3567,
      "step": 22485
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1678961515426636,
      "learning_rate": 1.617493327042754e-05,
      "loss": 2.5905,
      "step": 22486
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0740952491760254,
      "learning_rate": 1.617460940289067e-05,
      "loss": 2.2076,
      "step": 22487
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0742461681365967,
      "learning_rate": 1.6174285524886166e-05,
      "loss": 2.534,
      "step": 22488
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0428568124771118,
      "learning_rate": 1.6173961636414567e-05,
      "loss": 2.5993,
      "step": 22489
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.119859218597412,
      "learning_rate": 1.6173637737476427e-05,
      "loss": 2.561,
      "step": 22490
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9665414690971375,
      "learning_rate": 1.6173313828072293e-05,
      "loss": 2.3771,
      "step": 22491
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.072698950767517,
      "learning_rate": 1.617298990820271e-05,
      "loss": 2.4694,
      "step": 22492
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0330629348754883,
      "learning_rate": 1.617266597786824e-05,
      "loss": 1.9904,
      "step": 22493
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0916097164154053,
      "learning_rate": 1.6172342037069424e-05,
      "loss": 2.6046,
      "step": 22494
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.012237548828125,
      "learning_rate": 1.6172018085806812e-05,
      "loss": 2.3415,
      "step": 22495
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.029574990272522,
      "learning_rate": 1.6171694124080952e-05,
      "loss": 2.3666,
      "step": 22496
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.094387412071228,
      "learning_rate": 1.6171370151892395e-05,
      "loss": 2.5428,
      "step": 22497
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0693403482437134,
      "learning_rate": 1.617104616924169e-05,
      "loss": 2.4282,
      "step": 22498
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1185048818588257,
      "learning_rate": 1.6170722176129383e-05,
      "loss": 2.2424,
      "step": 22499
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.134263038635254,
      "learning_rate": 1.6170398172556025e-05,
      "loss": 2.1908,
      "step": 22500
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0950274467468262,
      "learning_rate": 1.617007415852217e-05,
      "loss": 2.7888,
      "step": 22501
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0286539793014526,
      "learning_rate": 1.6169750134028363e-05,
      "loss": 2.6413,
      "step": 22502
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0914705991744995,
      "learning_rate": 1.6169426099075153e-05,
      "loss": 2.6027,
      "step": 22503
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1276029348373413,
      "learning_rate": 1.6169102053663088e-05,
      "loss": 2.4875,
      "step": 22504
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0086930990219116,
      "learning_rate": 1.6168777997792724e-05,
      "loss": 2.4271,
      "step": 22505
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0734456777572632,
      "learning_rate": 1.6168453931464603e-05,
      "loss": 2.3798,
      "step": 22506
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9757960438728333,
      "learning_rate": 1.6168129854679278e-05,
      "loss": 2.4429,
      "step": 22507
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9941748976707458,
      "learning_rate": 1.6167805767437297e-05,
      "loss": 2.4974,
      "step": 22508
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0603337287902832,
      "learning_rate": 1.6167481669739213e-05,
      "loss": 2.6015,
      "step": 22509
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0359801054000854,
      "learning_rate": 1.6167157561585567e-05,
      "loss": 2.5035,
      "step": 22510
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0826750993728638,
      "learning_rate": 1.6166833442976916e-05,
      "loss": 2.4977,
      "step": 22511
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.067716360092163,
      "learning_rate": 1.6166509313913808e-05,
      "loss": 2.2937,
      "step": 22512
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1994634866714478,
      "learning_rate": 1.6166185174396796e-05,
      "loss": 2.3507,
      "step": 22513
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0549196004867554,
      "learning_rate": 1.6165861024426418e-05,
      "loss": 2.3271,
      "step": 22514
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9432564973831177,
      "learning_rate": 1.6165536864003237e-05,
      "loss": 2.3692,
      "step": 22515
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1048022508621216,
      "learning_rate": 1.6165212693127794e-05,
      "loss": 2.3672,
      "step": 22516
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0758880376815796,
      "learning_rate": 1.6164888511800643e-05,
      "loss": 2.5173,
      "step": 22517
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.053205132484436,
      "learning_rate": 1.6164564320022328e-05,
      "loss": 2.4415,
      "step": 22518
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1313073635101318,
      "learning_rate": 1.6164240117793406e-05,
      "loss": 2.5104,
      "step": 22519
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.2023673057556152,
      "learning_rate": 1.6163915905114417e-05,
      "loss": 2.6169,
      "step": 22520
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.2786986827850342,
      "learning_rate": 1.6163591681985922e-05,
      "loss": 2.4019,
      "step": 22521
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0042989253997803,
      "learning_rate": 1.6163267448408463e-05,
      "loss": 2.3588,
      "step": 22522
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0884292125701904,
      "learning_rate": 1.616294320438259e-05,
      "loss": 2.6093,
      "step": 22523
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0576452016830444,
      "learning_rate": 1.6162618949908855e-05,
      "loss": 2.3768,
      "step": 22524
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0855287313461304,
      "learning_rate": 1.6162294684987812e-05,
      "loss": 2.4144,
      "step": 22525
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1913186311721802,
      "learning_rate": 1.6161970409620004e-05,
      "loss": 2.3407,
      "step": 22526
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1817610263824463,
      "learning_rate": 1.6161646123805978e-05,
      "loss": 2.5096,
      "step": 22527
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.02740478515625,
      "learning_rate": 1.616132182754629e-05,
      "loss": 2.4374,
      "step": 22528
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0210224390029907,
      "learning_rate": 1.6160997520841496e-05,
      "loss": 2.4166,
      "step": 22529
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.231755256652832,
      "learning_rate": 1.6160673203692127e-05,
      "loss": 2.4502,
      "step": 22530
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0354570150375366,
      "learning_rate": 1.616034887609875e-05,
      "loss": 2.5253,
      "step": 22531
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0086942911148071,
      "learning_rate": 1.6160024538061905e-05,
      "loss": 2.228,
      "step": 22532
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.044076681137085,
      "learning_rate": 1.615970018958215e-05,
      "loss": 2.2704,
      "step": 22533
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.050433874130249,
      "learning_rate": 1.6159375830660025e-05,
      "loss": 2.3827,
      "step": 22534
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0920627117156982,
      "learning_rate": 1.615905146129609e-05,
      "loss": 2.464,
      "step": 22535
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0486974716186523,
      "learning_rate": 1.6158727081490887e-05,
      "loss": 2.6549,
      "step": 22536
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.015778660774231,
      "learning_rate": 1.6158402691244967e-05,
      "loss": 2.3799,
      "step": 22537
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9723415970802307,
      "learning_rate": 1.6158078290558884e-05,
      "loss": 2.5897,
      "step": 22538
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.126632809638977,
      "learning_rate": 1.6157753879433185e-05,
      "loss": 2.3816,
      "step": 22539
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.917354941368103,
      "learning_rate": 1.6157429457868425e-05,
      "loss": 2.398,
      "step": 22540
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.035391092300415,
      "learning_rate": 1.6157105025865146e-05,
      "loss": 2.2658,
      "step": 22541
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0263419151306152,
      "learning_rate": 1.61567805834239e-05,
      "loss": 2.3709,
      "step": 22542
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1416847705841064,
      "learning_rate": 1.6156456130545243e-05,
      "loss": 2.2886,
      "step": 22543
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0447279214859009,
      "learning_rate": 1.6156131667229717e-05,
      "loss": 2.4643,
      "step": 22544
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9481898546218872,
      "learning_rate": 1.6155807193477878e-05,
      "loss": 2.3593,
      "step": 22545
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.032110571861267,
      "learning_rate": 1.6155482709290274e-05,
      "loss": 2.1637,
      "step": 22546
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0785548686981201,
      "learning_rate": 1.6155158214667452e-05,
      "loss": 2.4109,
      "step": 22547
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9293796420097351,
      "learning_rate": 1.615483370960997e-05,
      "loss": 2.3708,
      "step": 22548
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9918692708015442,
      "learning_rate": 1.615450919411837e-05,
      "loss": 2.4848,
      "step": 22549
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9597554802894592,
      "learning_rate": 1.6154184668193208e-05,
      "loss": 2.425,
      "step": 22550
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0585817098617554,
      "learning_rate": 1.615386013183503e-05,
      "loss": 2.4319,
      "step": 22551
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0851109027862549,
      "learning_rate": 1.6153535585044386e-05,
      "loss": 2.4647,
      "step": 22552
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0605725049972534,
      "learning_rate": 1.615321102782183e-05,
      "loss": 2.4506,
      "step": 22553
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.2246642112731934,
      "learning_rate": 1.615288646016791e-05,
      "loss": 2.3008,
      "step": 22554
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0200670957565308,
      "learning_rate": 1.6152561882083176e-05,
      "loss": 2.4349,
      "step": 22555
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9865121245384216,
      "learning_rate": 1.6152237293568178e-05,
      "loss": 2.3812,
      "step": 22556
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1203413009643555,
      "learning_rate": 1.6151912694623466e-05,
      "loss": 2.4554,
      "step": 22557
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1973270177841187,
      "learning_rate": 1.615158808524959e-05,
      "loss": 2.6047,
      "step": 22558
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0389021635055542,
      "learning_rate": 1.6151263465447106e-05,
      "loss": 2.5212,
      "step": 22559
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0703898668289185,
      "learning_rate": 1.6150938835216555e-05,
      "loss": 2.6158,
      "step": 22560
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0758774280548096,
      "learning_rate": 1.6150614194558497e-05,
      "loss": 2.332,
      "step": 22561
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.080957055091858,
      "learning_rate": 1.6150289543473476e-05,
      "loss": 2.3877,
      "step": 22562
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.2361254692077637,
      "learning_rate": 1.6149964881962043e-05,
      "loss": 2.4655,
      "step": 22563
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0172182321548462,
      "learning_rate": 1.6149640210024748e-05,
      "loss": 2.526,
      "step": 22564
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1039427518844604,
      "learning_rate": 1.6149315527662145e-05,
      "loss": 2.4839,
      "step": 22565
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0817224979400635,
      "learning_rate": 1.614899083487478e-05,
      "loss": 2.4965,
      "step": 22566
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0419816970825195,
      "learning_rate": 1.6148666131663205e-05,
      "loss": 2.4745,
      "step": 22567
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0210119485855103,
      "learning_rate": 1.6148341418027973e-05,
      "loss": 2.4029,
      "step": 22568
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0015612840652466,
      "learning_rate": 1.6148016693969634e-05,
      "loss": 2.52,
      "step": 22569
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0311472415924072,
      "learning_rate": 1.614769195948873e-05,
      "loss": 2.4485,
      "step": 22570
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1382746696472168,
      "learning_rate": 1.6147367214585826e-05,
      "loss": 2.3552,
      "step": 22571
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1204279661178589,
      "learning_rate": 1.6147042459261462e-05,
      "loss": 2.368,
      "step": 22572
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9932334423065186,
      "learning_rate": 1.6146717693516193e-05,
      "loss": 2.7994,
      "step": 22573
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0217623710632324,
      "learning_rate": 1.6146392917350566e-05,
      "loss": 2.3869,
      "step": 22574
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.03736412525177,
      "learning_rate": 1.6146068130765136e-05,
      "loss": 2.4984,
      "step": 22575
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.2008403539657593,
      "learning_rate": 1.614574333376045e-05,
      "loss": 2.6192,
      "step": 22576
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9774875640869141,
      "learning_rate": 1.6145418526337057e-05,
      "loss": 2.3307,
      "step": 22577
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.006423830986023,
      "learning_rate": 1.614509370849551e-05,
      "loss": 2.4209,
      "step": 22578
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.8994424939155579,
      "learning_rate": 1.6144768880236365e-05,
      "loss": 2.5584,
      "step": 22579
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1466552019119263,
      "learning_rate": 1.6144444041560164e-05,
      "loss": 2.4234,
      "step": 22580
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9614232182502747,
      "learning_rate": 1.6144119192467467e-05,
      "loss": 2.4574,
      "step": 22581
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0032031536102295,
      "learning_rate": 1.6143794332958816e-05,
      "loss": 2.3536,
      "step": 22582
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0250393152236938,
      "learning_rate": 1.6143469463034766e-05,
      "loss": 2.5525,
      "step": 22583
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1137737035751343,
      "learning_rate": 1.6143144582695864e-05,
      "loss": 2.354,
      "step": 22584
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.07180655002594,
      "learning_rate": 1.6142819691942664e-05,
      "loss": 2.3973,
      "step": 22585
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0716402530670166,
      "learning_rate": 1.614249479077572e-05,
      "loss": 2.4336,
      "step": 22586
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0116711854934692,
      "learning_rate": 1.6142169879195576e-05,
      "loss": 2.3771,
      "step": 22587
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9741569757461548,
      "learning_rate": 1.6141844957202784e-05,
      "loss": 2.5816,
      "step": 22588
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1208029985427856,
      "learning_rate": 1.6141520024797902e-05,
      "loss": 2.6589,
      "step": 22589
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0692130327224731,
      "learning_rate": 1.614119508198147e-05,
      "loss": 2.5665,
      "step": 22590
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9829134345054626,
      "learning_rate": 1.6140870128754047e-05,
      "loss": 2.3339,
      "step": 22591
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9890807271003723,
      "learning_rate": 1.614054516511618e-05,
      "loss": 2.6311,
      "step": 22592
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0754411220550537,
      "learning_rate": 1.6140220191068423e-05,
      "loss": 2.5706,
      "step": 22593
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9649567008018494,
      "learning_rate": 1.6139895206611327e-05,
      "loss": 2.4058,
      "step": 22594
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.073779582977295,
      "learning_rate": 1.6139570211745438e-05,
      "loss": 2.4678,
      "step": 22595
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.2456715106964111,
      "learning_rate": 1.6139245206471307e-05,
      "loss": 2.4128,
      "step": 22596
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.263990879058838,
      "learning_rate": 1.6138920190789493e-05,
      "loss": 2.4016,
      "step": 22597
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0402626991271973,
      "learning_rate": 1.613859516470054e-05,
      "loss": 2.4951,
      "step": 22598
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0789198875427246,
      "learning_rate": 1.6138270128205e-05,
      "loss": 2.3192,
      "step": 22599
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0836782455444336,
      "learning_rate": 1.6137945081303426e-05,
      "loss": 2.4454,
      "step": 22600
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9941078424453735,
      "learning_rate": 1.613762002399637e-05,
      "loss": 2.4609,
      "step": 22601
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1268571615219116,
      "learning_rate": 1.613729495628438e-05,
      "loss": 2.5192,
      "step": 22602
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1101317405700684,
      "learning_rate": 1.6136969878168008e-05,
      "loss": 2.5917,
      "step": 22603
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9747645854949951,
      "learning_rate": 1.6136644789647804e-05,
      "loss": 2.1815,
      "step": 22604
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4164637327194214,
      "learning_rate": 1.613631969072432e-05,
      "loss": 2.4392,
      "step": 22605
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0947346687316895,
      "learning_rate": 1.6135994581398112e-05,
      "loss": 2.4587,
      "step": 22606
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0519846677780151,
      "learning_rate": 1.6135669461669725e-05,
      "loss": 2.5732,
      "step": 22607
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1532336473464966,
      "learning_rate": 1.613534433153971e-05,
      "loss": 2.4829,
      "step": 22608
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1403454542160034,
      "learning_rate": 1.613501919100862e-05,
      "loss": 2.2973,
      "step": 22609
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9948816895484924,
      "learning_rate": 1.6134694040077005e-05,
      "loss": 2.6121,
      "step": 22610
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9735896587371826,
      "learning_rate": 1.613436887874542e-05,
      "loss": 2.5401,
      "step": 22611
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1124118566513062,
      "learning_rate": 1.6134043707014413e-05,
      "loss": 2.4088,
      "step": 22612
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9882761240005493,
      "learning_rate": 1.6133718524884535e-05,
      "loss": 2.47,
      "step": 22613
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9788326025009155,
      "learning_rate": 1.613339333235634e-05,
      "loss": 2.3961,
      "step": 22614
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0175554752349854,
      "learning_rate": 1.6133068129430377e-05,
      "loss": 2.7082,
      "step": 22615
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0326025485992432,
      "learning_rate": 1.6132742916107197e-05,
      "loss": 2.5739,
      "step": 22616
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.2937464714050293,
      "learning_rate": 1.6132417692387348e-05,
      "loss": 2.5061,
      "step": 22617
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.062251329421997,
      "learning_rate": 1.613209245827139e-05,
      "loss": 2.5882,
      "step": 22618
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.2029075622558594,
      "learning_rate": 1.6131767213759874e-05,
      "loss": 2.3772,
      "step": 22619
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0922716856002808,
      "learning_rate": 1.6131441958853343e-05,
      "loss": 2.3485,
      "step": 22620
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0609087944030762,
      "learning_rate": 1.6131116693552352e-05,
      "loss": 2.3185,
      "step": 22621
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4344377517700195,
      "learning_rate": 1.6130791417857453e-05,
      "loss": 2.2537,
      "step": 22622
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0381832122802734,
      "learning_rate": 1.6130466131769197e-05,
      "loss": 2.3608,
      "step": 22623
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.087341547012329,
      "learning_rate": 1.6130140835288137e-05,
      "loss": 2.5487,
      "step": 22624
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0096591711044312,
      "learning_rate": 1.6129815528414823e-05,
      "loss": 2.5463,
      "step": 22625
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9885003566741943,
      "learning_rate": 1.6129490211149808e-05,
      "loss": 2.3215,
      "step": 22626
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.082521677017212,
      "learning_rate": 1.612916488349364e-05,
      "loss": 2.4249,
      "step": 22627
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0798251628875732,
      "learning_rate": 1.6128839545446874e-05,
      "loss": 2.775,
      "step": 22628
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.101255178451538,
      "learning_rate": 1.612851419701006e-05,
      "loss": 2.2832,
      "step": 22629
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0015789270401,
      "learning_rate": 1.6128188838183753e-05,
      "loss": 2.5341,
      "step": 22630
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1067441701889038,
      "learning_rate": 1.61278634689685e-05,
      "loss": 2.4553,
      "step": 22631
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9768927693367004,
      "learning_rate": 1.612753808936485e-05,
      "loss": 2.3483,
      "step": 22632
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1010127067565918,
      "learning_rate": 1.6127212699373363e-05,
      "loss": 2.494,
      "step": 22633
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.002450704574585,
      "learning_rate": 1.6126887298994588e-05,
      "loss": 2.3857,
      "step": 22634
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0408426523208618,
      "learning_rate": 1.612656188822907e-05,
      "loss": 2.5444,
      "step": 22635
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0051822662353516,
      "learning_rate": 1.612623646707737e-05,
      "loss": 2.6336,
      "step": 22636
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0770069360733032,
      "learning_rate": 1.6125911035540033e-05,
      "loss": 2.4652,
      "step": 22637
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.2365890741348267,
      "learning_rate": 1.612558559361761e-05,
      "loss": 2.2004,
      "step": 22638
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.008244514465332,
      "learning_rate": 1.6125260141310663e-05,
      "loss": 2.4476,
      "step": 22639
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.165785312652588,
      "learning_rate": 1.612493467861973e-05,
      "loss": 2.2832,
      "step": 22640
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1218600273132324,
      "learning_rate": 1.612460920554537e-05,
      "loss": 2.4874,
      "step": 22641
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.020000696182251,
      "learning_rate": 1.6124283722088138e-05,
      "loss": 2.3224,
      "step": 22642
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1203360557556152,
      "learning_rate": 1.612395822824858e-05,
      "loss": 2.3518,
      "step": 22643
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0326156616210938,
      "learning_rate": 1.6123632724027246e-05,
      "loss": 2.431,
      "step": 22644
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0206235647201538,
      "learning_rate": 1.6123307209424696e-05,
      "loss": 2.7271,
      "step": 22645
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.072473168373108,
      "learning_rate": 1.6122981684441476e-05,
      "loss": 2.3768,
      "step": 22646
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1406697034835815,
      "learning_rate": 1.612265614907814e-05,
      "loss": 2.5583,
      "step": 22647
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0383453369140625,
      "learning_rate": 1.6122330603335238e-05,
      "loss": 2.5065,
      "step": 22648
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9443625211715698,
      "learning_rate": 1.612200504721332e-05,
      "loss": 2.302,
      "step": 22649
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0492908954620361,
      "learning_rate": 1.6121679480712944e-05,
      "loss": 2.6138,
      "step": 22650
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.2146425247192383,
      "learning_rate": 1.6121353903834654e-05,
      "loss": 2.3528,
      "step": 22651
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4017705917358398,
      "learning_rate": 1.612102831657901e-05,
      "loss": 2.3893,
      "step": 22652
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.2075927257537842,
      "learning_rate": 1.6120702718946565e-05,
      "loss": 2.3105,
      "step": 22653
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9980134963989258,
      "learning_rate": 1.6120377110937863e-05,
      "loss": 2.198,
      "step": 22654
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0448917150497437,
      "learning_rate": 1.6120051492553458e-05,
      "loss": 2.5191,
      "step": 22655
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.031493067741394,
      "learning_rate": 1.6119725863793906e-05,
      "loss": 2.5211,
      "step": 22656
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0339415073394775,
      "learning_rate": 1.6119400224659755e-05,
      "loss": 2.4237,
      "step": 22657
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0378320217132568,
      "learning_rate": 1.6119074575151558e-05,
      "loss": 2.3015,
      "step": 22658
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0224329233169556,
      "learning_rate": 1.6118748915269867e-05,
      "loss": 2.3694,
      "step": 22659
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9755315780639648,
      "learning_rate": 1.6118423245015238e-05,
      "loss": 2.4219,
      "step": 22660
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9957746267318726,
      "learning_rate": 1.6118097564388216e-05,
      "loss": 2.5761,
      "step": 22661
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1186479330062866,
      "learning_rate": 1.611777187338936e-05,
      "loss": 2.3674,
      "step": 22662
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.333084225654602,
      "learning_rate": 1.6117446172019217e-05,
      "loss": 2.3251,
      "step": 22663
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0614718198776245,
      "learning_rate": 1.6117120460278344e-05,
      "loss": 2.2844,
      "step": 22664
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9568861126899719,
      "learning_rate": 1.6116794738167287e-05,
      "loss": 2.218,
      "step": 22665
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0364394187927246,
      "learning_rate": 1.6116469005686602e-05,
      "loss": 2.5646,
      "step": 22666
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0487754344940186,
      "learning_rate": 1.6116143262836845e-05,
      "loss": 2.3045,
      "step": 22667
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9669418931007385,
      "learning_rate": 1.611581750961856e-05,
      "loss": 2.2567,
      "step": 22668
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9893560409545898,
      "learning_rate": 1.6115491746032305e-05,
      "loss": 2.3774,
      "step": 22669
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0237019062042236,
      "learning_rate": 1.611516597207863e-05,
      "loss": 2.409,
      "step": 22670
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0151112079620361,
      "learning_rate": 1.611484018775809e-05,
      "loss": 2.5792,
      "step": 22671
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1016324758529663,
      "learning_rate": 1.611451439307123e-05,
      "loss": 2.3019,
      "step": 22672
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0283808708190918,
      "learning_rate": 1.6114188588018613e-05,
      "loss": 2.2303,
      "step": 22673
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0356953144073486,
      "learning_rate": 1.6113862772600782e-05,
      "loss": 2.525,
      "step": 22674
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1317224502563477,
      "learning_rate": 1.6113536946818296e-05,
      "loss": 2.4837,
      "step": 22675
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.2909361124038696,
      "learning_rate": 1.61132111106717e-05,
      "loss": 2.5075,
      "step": 22676
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0337562561035156,
      "learning_rate": 1.6112885264161555e-05,
      "loss": 2.4767,
      "step": 22677
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0785294771194458,
      "learning_rate": 1.6112559407288407e-05,
      "loss": 2.2519,
      "step": 22678
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0634417533874512,
      "learning_rate": 1.611223354005281e-05,
      "loss": 2.2602,
      "step": 22679
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1961872577667236,
      "learning_rate": 1.6111907662455323e-05,
      "loss": 2.3574,
      "step": 22680
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9850155115127563,
      "learning_rate": 1.6111581774496486e-05,
      "loss": 2.5004,
      "step": 22681
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0164374113082886,
      "learning_rate": 1.611125587617686e-05,
      "loss": 2.5967,
      "step": 22682
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9766104817390442,
      "learning_rate": 1.6110929967496995e-05,
      "loss": 2.5128,
      "step": 22683
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1914464235305786,
      "learning_rate": 1.6110604048457446e-05,
      "loss": 2.4714,
      "step": 22684
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.88270103931427,
      "learning_rate": 1.6110278119058762e-05,
      "loss": 2.1842,
      "step": 22685
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1306852102279663,
      "learning_rate": 1.6109952179301498e-05,
      "loss": 2.3858,
      "step": 22686
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.5173208713531494,
      "learning_rate": 1.6109626229186204e-05,
      "loss": 2.4251,
      "step": 22687
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9819201231002808,
      "learning_rate": 1.6109300268713435e-05,
      "loss": 2.3568,
      "step": 22688
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9692687392234802,
      "learning_rate": 1.6108974297883744e-05,
      "loss": 2.4229,
      "step": 22689
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.975682258605957,
      "learning_rate": 1.6108648316697682e-05,
      "loss": 2.5113,
      "step": 22690
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0594301223754883,
      "learning_rate": 1.61083223251558e-05,
      "loss": 2.3892,
      "step": 22691
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1572271585464478,
      "learning_rate": 1.6107996323258653e-05,
      "loss": 2.557,
      "step": 22692
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0534558296203613,
      "learning_rate": 1.6107670311006794e-05,
      "loss": 2.6416,
      "step": 22693
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.005811333656311,
      "learning_rate": 1.6107344288400778e-05,
      "loss": 2.5615,
      "step": 22694
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.033551812171936,
      "learning_rate": 1.6107018255441148e-05,
      "loss": 2.4863,
      "step": 22695
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0154699087142944,
      "learning_rate": 1.610669221212847e-05,
      "loss": 2.4177,
      "step": 22696
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0446172952651978,
      "learning_rate": 1.6106366158463285e-05,
      "loss": 2.6122,
      "step": 22697
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0506523847579956,
      "learning_rate": 1.6106040094446153e-05,
      "loss": 2.4801,
      "step": 22698
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0826971530914307,
      "learning_rate": 1.6105714020077627e-05,
      "loss": 2.2439,
      "step": 22699
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.081583023071289,
      "learning_rate": 1.6105387935358258e-05,
      "loss": 2.3534,
      "step": 22700
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0530756711959839,
      "learning_rate": 1.6105061840288592e-05,
      "loss": 2.5354,
      "step": 22701
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.041831374168396,
      "learning_rate": 1.610473573486919e-05,
      "loss": 2.3444,
      "step": 22702
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0524224042892456,
      "learning_rate": 1.610440961910061e-05,
      "loss": 2.3953,
      "step": 22703
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9986395239830017,
      "learning_rate": 1.6104083492983388e-05,
      "loss": 2.39,
      "step": 22704
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0697180032730103,
      "learning_rate": 1.6103757356518094e-05,
      "loss": 2.3649,
      "step": 22705
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0097631216049194,
      "learning_rate": 1.610343120970527e-05,
      "loss": 2.4313,
      "step": 22706
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1375354528427124,
      "learning_rate": 1.6103105052545474e-05,
      "loss": 2.5534,
      "step": 22707
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9979559779167175,
      "learning_rate": 1.6102778885039255e-05,
      "loss": 2.3564,
      "step": 22708
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0990753173828125,
      "learning_rate": 1.610245270718717e-05,
      "loss": 2.4648,
      "step": 22709
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0211471319198608,
      "learning_rate": 1.610212651898977e-05,
      "loss": 2.3605,
      "step": 22710
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0666331052780151,
      "learning_rate": 1.6101800320447606e-05,
      "loss": 2.4741,
      "step": 22711
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1024165153503418,
      "learning_rate": 1.6101474111561236e-05,
      "loss": 2.4414,
      "step": 22712
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9594614505767822,
      "learning_rate": 1.610114789233121e-05,
      "loss": 2.1446,
      "step": 22713
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9915030598640442,
      "learning_rate": 1.610082166275808e-05,
      "loss": 2.4211,
      "step": 22714
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0449576377868652,
      "learning_rate": 1.6100495422842404e-05,
      "loss": 2.5052,
      "step": 22715
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.430566430091858,
      "learning_rate": 1.610016917258473e-05,
      "loss": 2.3998,
      "step": 22716
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1584584712982178,
      "learning_rate": 1.6099842911985608e-05,
      "loss": 2.4316,
      "step": 22717
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0963506698608398,
      "learning_rate": 1.60995166410456e-05,
      "loss": 2.6552,
      "step": 22718
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.066362977027893,
      "learning_rate": 1.6099190359765253e-05,
      "loss": 2.5503,
      "step": 22719
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1358052492141724,
      "learning_rate": 1.6098864068145123e-05,
      "loss": 2.4247,
      "step": 22720
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0277934074401855,
      "learning_rate": 1.609853776618576e-05,
      "loss": 2.342,
      "step": 22721
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.063570499420166,
      "learning_rate": 1.609821145388772e-05,
      "loss": 2.227,
      "step": 22722
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0051218271255493,
      "learning_rate": 1.6097885131251556e-05,
      "loss": 2.3403,
      "step": 22723
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0732016563415527,
      "learning_rate": 1.609755879827782e-05,
      "loss": 2.6348,
      "step": 22724
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1006066799163818,
      "learning_rate": 1.6097232454967064e-05,
      "loss": 2.3649,
      "step": 22725
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0914061069488525,
      "learning_rate": 1.6096906101319846e-05,
      "loss": 2.5284,
      "step": 22726
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0110766887664795,
      "learning_rate": 1.6096579737336716e-05,
      "loss": 2.1515,
      "step": 22727
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0407031774520874,
      "learning_rate": 1.6096253363018228e-05,
      "loss": 2.5591,
      "step": 22728
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0658800601959229,
      "learning_rate": 1.6095926978364932e-05,
      "loss": 2.464,
      "step": 22729
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0434460639953613,
      "learning_rate": 1.6095600583377387e-05,
      "loss": 2.4626,
      "step": 22730
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9224490523338318,
      "learning_rate": 1.609527417805614e-05,
      "loss": 2.2528,
      "step": 22731
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1124637126922607,
      "learning_rate": 1.609494776240175e-05,
      "loss": 2.4476,
      "step": 22732
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9915932416915894,
      "learning_rate": 1.609462133641477e-05,
      "loss": 2.4071,
      "step": 22733
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0097711086273193,
      "learning_rate": 1.609429490009575e-05,
      "loss": 2.389,
      "step": 22734
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1014763116836548,
      "learning_rate": 1.6093968453445246e-05,
      "loss": 2.4241,
      "step": 22735
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1174919605255127,
      "learning_rate": 1.6093641996463806e-05,
      "loss": 2.3167,
      "step": 22736
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0156769752502441,
      "learning_rate": 1.609331552915199e-05,
      "loss": 2.3474,
      "step": 22737
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.072195053100586,
      "learning_rate": 1.609298905151035e-05,
      "loss": 2.4351,
      "step": 22738
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.2629637718200684,
      "learning_rate": 1.609266256353944e-05,
      "loss": 2.3049,
      "step": 22739
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0353847742080688,
      "learning_rate": 1.6092336065239813e-05,
      "loss": 2.4589,
      "step": 22740
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.153300166130066,
      "learning_rate": 1.6092009556612017e-05,
      "loss": 2.4284,
      "step": 22741
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9631807208061218,
      "learning_rate": 1.6091683037656616e-05,
      "loss": 2.5248,
      "step": 22742
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0612058639526367,
      "learning_rate": 1.6091356508374155e-05,
      "loss": 2.3483,
      "step": 22743
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1850374937057495,
      "learning_rate": 1.6091029968765187e-05,
      "loss": 2.3875,
      "step": 22744
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1481842994689941,
      "learning_rate": 1.6090703418830274e-05,
      "loss": 2.4248,
      "step": 22745
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.2524951696395874,
      "learning_rate": 1.609037685856996e-05,
      "loss": 2.373,
      "step": 22746
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.049735188484192,
      "learning_rate": 1.609005028798481e-05,
      "loss": 2.3689,
      "step": 22747
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9894663691520691,
      "learning_rate": 1.6089723707075365e-05,
      "loss": 2.2869,
      "step": 22748
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0921233892440796,
      "learning_rate": 1.6089397115842185e-05,
      "loss": 2.4269,
      "step": 22749
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.052706241607666,
      "learning_rate": 1.608907051428582e-05,
      "loss": 2.5238,
      "step": 22750
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9988442659378052,
      "learning_rate": 1.6088743902406833e-05,
      "loss": 2.4752,
      "step": 22751
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.02199387550354,
      "learning_rate": 1.608841728020577e-05,
      "loss": 2.6209,
      "step": 22752
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.2102174758911133,
      "learning_rate": 1.6088090647683186e-05,
      "loss": 2.2718,
      "step": 22753
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.042685627937317,
      "learning_rate": 1.608776400483963e-05,
      "loss": 2.5503,
      "step": 22754
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0296459197998047,
      "learning_rate": 1.6087437351675665e-05,
      "loss": 2.3569,
      "step": 22755
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0963746309280396,
      "learning_rate": 1.608711068819184e-05,
      "loss": 2.3192,
      "step": 22756
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9946972727775574,
      "learning_rate": 1.608678401438871e-05,
      "loss": 2.5102,
      "step": 22757
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.022391438484192,
      "learning_rate": 1.6086457330266828e-05,
      "loss": 2.2856,
      "step": 22758
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1276503801345825,
      "learning_rate": 1.6086130635826746e-05,
      "loss": 2.3376,
      "step": 22759
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0802007913589478,
      "learning_rate": 1.6085803931069023e-05,
      "loss": 2.3522,
      "step": 22760
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0662602186203003,
      "learning_rate": 1.6085477215994205e-05,
      "loss": 2.3185,
      "step": 22761
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1601300239562988,
      "learning_rate": 1.608515049060285e-05,
      "loss": 2.5626,
      "step": 22762
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0809907913208008,
      "learning_rate": 1.6084823754895514e-05,
      "loss": 2.4219,
      "step": 22763
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.027541160583496,
      "learning_rate": 1.608449700887275e-05,
      "loss": 2.3632,
      "step": 22764
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1593172550201416,
      "learning_rate": 1.6084170252535113e-05,
      "loss": 2.5755,
      "step": 22765
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1286778450012207,
      "learning_rate": 1.608384348588315e-05,
      "loss": 2.5028,
      "step": 22766
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9630451202392578,
      "learning_rate": 1.608351670891742e-05,
      "loss": 2.6054,
      "step": 22767
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0697987079620361,
      "learning_rate": 1.6083189921638483e-05,
      "loss": 2.2576,
      "step": 22768
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0885695219039917,
      "learning_rate": 1.608286312404688e-05,
      "loss": 2.5379,
      "step": 22769
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.2350291013717651,
      "learning_rate": 1.6082536316143178e-05,
      "loss": 2.389,
      "step": 22770
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1135157346725464,
      "learning_rate": 1.608220949792792e-05,
      "loss": 2.1816,
      "step": 22771
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0053995847702026,
      "learning_rate": 1.6081882669401668e-05,
      "loss": 2.1655,
      "step": 22772
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.2209172248840332,
      "learning_rate": 1.6081555830564972e-05,
      "loss": 2.3289,
      "step": 22773
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9950939416885376,
      "learning_rate": 1.6081228981418388e-05,
      "loss": 2.3861,
      "step": 22774
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0726239681243896,
      "learning_rate": 1.6080902121962467e-05,
      "loss": 2.3641,
      "step": 22775
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1096757650375366,
      "learning_rate": 1.6080575252197766e-05,
      "loss": 2.5442,
      "step": 22776
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9693042635917664,
      "learning_rate": 1.608024837212484e-05,
      "loss": 2.3308,
      "step": 22777
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0787901878356934,
      "learning_rate": 1.607992148174424e-05,
      "loss": 2.4909,
      "step": 22778
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9774138331413269,
      "learning_rate": 1.6079594581056526e-05,
      "loss": 2.6659,
      "step": 22779
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0409183502197266,
      "learning_rate": 1.6079267670062245e-05,
      "loss": 2.3068,
      "step": 22780
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1479310989379883,
      "learning_rate": 1.6078940748761952e-05,
      "loss": 2.2938,
      "step": 22781
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.007996916770935,
      "learning_rate": 1.6078613817156206e-05,
      "loss": 2.6744,
      "step": 22782
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1518120765686035,
      "learning_rate": 1.6078286875245558e-05,
      "loss": 2.5742,
      "step": 22783
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0938799381256104,
      "learning_rate": 1.607795992303056e-05,
      "loss": 2.399,
      "step": 22784
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0735902786254883,
      "learning_rate": 1.6077632960511775e-05,
      "loss": 2.2752,
      "step": 22785
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.078163504600525,
      "learning_rate": 1.6077305987689747e-05,
      "loss": 2.2497,
      "step": 22786
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.069793939590454,
      "learning_rate": 1.6076979004565038e-05,
      "loss": 2.3132,
      "step": 22787
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9952090382575989,
      "learning_rate": 1.6076652011138195e-05,
      "loss": 2.4708,
      "step": 22788
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0925909280776978,
      "learning_rate": 1.6076325007409782e-05,
      "loss": 2.175,
      "step": 22789
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.2406728267669678,
      "learning_rate": 1.6075997993380344e-05,
      "loss": 2.4538,
      "step": 22790
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1145131587982178,
      "learning_rate": 1.607567096905044e-05,
      "loss": 2.5872,
      "step": 22791
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9901350736618042,
      "learning_rate": 1.6075343934420624e-05,
      "loss": 2.2348,
      "step": 22792
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0000355243682861,
      "learning_rate": 1.6075016889491448e-05,
      "loss": 2.4845,
      "step": 22793
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9853417873382568,
      "learning_rate": 1.6074689834263472e-05,
      "loss": 2.4174,
      "step": 22794
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1243709325790405,
      "learning_rate": 1.6074362768737242e-05,
      "loss": 2.6149,
      "step": 22795
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0554243326187134,
      "learning_rate": 1.6074035692913322e-05,
      "loss": 2.4046,
      "step": 22796
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0672597885131836,
      "learning_rate": 1.607370860679226e-05,
      "loss": 2.6001,
      "step": 22797
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.142536997795105,
      "learning_rate": 1.6073381510374612e-05,
      "loss": 2.3646,
      "step": 22798
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0921638011932373,
      "learning_rate": 1.607305440366093e-05,
      "loss": 2.3118,
      "step": 22799
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.06419038772583,
      "learning_rate": 1.6072727286651775e-05,
      "loss": 2.6732,
      "step": 22800
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1669832468032837,
      "learning_rate": 1.6072400159347696e-05,
      "loss": 2.3928,
      "step": 22801
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.078264832496643,
      "learning_rate": 1.6072073021749252e-05,
      "loss": 2.6355,
      "step": 22802
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9573271870613098,
      "learning_rate": 1.6071745873856994e-05,
      "loss": 2.2837,
      "step": 22803
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0011999607086182,
      "learning_rate": 1.607141871567148e-05,
      "loss": 2.4761,
      "step": 22804
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9812731146812439,
      "learning_rate": 1.6071091547193257e-05,
      "loss": 2.3854,
      "step": 22805
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9560190439224243,
      "learning_rate": 1.607076436842289e-05,
      "loss": 2.4263,
      "step": 22806
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.2181341648101807,
      "learning_rate": 1.6070437179360926e-05,
      "loss": 2.2435,
      "step": 22807
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9913486242294312,
      "learning_rate": 1.6070109980007923e-05,
      "loss": 2.229,
      "step": 22808
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1010980606079102,
      "learning_rate": 1.606978277036443e-05,
      "loss": 2.6419,
      "step": 22809
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1390149593353271,
      "learning_rate": 1.6069455550431014e-05,
      "loss": 2.3757,
      "step": 22810
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9844598174095154,
      "learning_rate": 1.606912832020822e-05,
      "loss": 2.4841,
      "step": 22811
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0886681079864502,
      "learning_rate": 1.6068801079696605e-05,
      "loss": 2.2855,
      "step": 22812
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0460238456726074,
      "learning_rate": 1.606847382889672e-05,
      "loss": 2.4628,
      "step": 22813
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1023941040039062,
      "learning_rate": 1.606814656780913e-05,
      "loss": 2.2982,
      "step": 22814
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.089589238166809,
      "learning_rate": 1.606781929643438e-05,
      "loss": 2.4572,
      "step": 22815
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1644816398620605,
      "learning_rate": 1.606749201477303e-05,
      "loss": 2.5197,
      "step": 22816
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0497021675109863,
      "learning_rate": 1.6067164722825634e-05,
      "loss": 2.3468,
      "step": 22817
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9603865742683411,
      "learning_rate": 1.6066837420592744e-05,
      "loss": 2.3554,
      "step": 22818
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1617674827575684,
      "learning_rate": 1.6066510108074916e-05,
      "loss": 2.4348,
      "step": 22819
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.949641764163971,
      "learning_rate": 1.606618278527271e-05,
      "loss": 2.3662,
      "step": 22820
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.2927149534225464,
      "learning_rate": 1.606585545218667e-05,
      "loss": 2.4046,
      "step": 22821
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.093300223350525,
      "learning_rate": 1.6065528108817364e-05,
      "loss": 2.3742,
      "step": 22822
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.066523790359497,
      "learning_rate": 1.6065200755165336e-05,
      "loss": 2.523,
      "step": 22823
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9993771314620972,
      "learning_rate": 1.6064873391231148e-05,
      "loss": 2.4933,
      "step": 22824
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1378761529922485,
      "learning_rate": 1.606454601701535e-05,
      "loss": 2.4377,
      "step": 22825
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.059194803237915,
      "learning_rate": 1.6064218632518503e-05,
      "loss": 2.5521,
      "step": 22826
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.065817952156067,
      "learning_rate": 1.6063891237741155e-05,
      "loss": 2.3437,
      "step": 22827
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0131858587265015,
      "learning_rate": 1.6063563832683866e-05,
      "loss": 2.2325,
      "step": 22828
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0338448286056519,
      "learning_rate": 1.6063236417347184e-05,
      "loss": 2.4873,
      "step": 22829
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.053397297859192,
      "learning_rate": 1.6062908991731677e-05,
      "loss": 2.2895,
      "step": 22830
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1069778203964233,
      "learning_rate": 1.606258155583789e-05,
      "loss": 2.5933,
      "step": 22831
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0762717723846436,
      "learning_rate": 1.6062254109666383e-05,
      "loss": 2.3225,
      "step": 22832
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0708808898925781,
      "learning_rate": 1.6061926653217705e-05,
      "loss": 2.5798,
      "step": 22833
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1000654697418213,
      "learning_rate": 1.6061599186492416e-05,
      "loss": 2.3694,
      "step": 22834
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0827401876449585,
      "learning_rate": 1.6061271709491072e-05,
      "loss": 2.3362,
      "step": 22835
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.921654999256134,
      "learning_rate": 1.6060944222214226e-05,
      "loss": 2.5436,
      "step": 22836
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0938527584075928,
      "learning_rate": 1.606061672466243e-05,
      "loss": 2.1814,
      "step": 22837
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9937676787376404,
      "learning_rate": 1.6060289216836246e-05,
      "loss": 2.5193,
      "step": 22838
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9652112722396851,
      "learning_rate": 1.6059961698736227e-05,
      "loss": 2.1813,
      "step": 22839
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0428626537322998,
      "learning_rate": 1.6059634170362924e-05,
      "loss": 2.5985,
      "step": 22840
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.022973656654358,
      "learning_rate": 1.6059306631716895e-05,
      "loss": 2.4877,
      "step": 22841
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9656156897544861,
      "learning_rate": 1.6058979082798693e-05,
      "loss": 2.2734,
      "step": 22842
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1646499633789062,
      "learning_rate": 1.605865152360888e-05,
      "loss": 2.4724,
      "step": 22843
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.003791332244873,
      "learning_rate": 1.605832395414801e-05,
      "loss": 2.3955,
      "step": 22844
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0508965253829956,
      "learning_rate": 1.605799637441663e-05,
      "loss": 2.3453,
      "step": 22845
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9271605610847473,
      "learning_rate": 1.6057668784415304e-05,
      "loss": 2.2664,
      "step": 22846
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0446581840515137,
      "learning_rate": 1.6057341184144585e-05,
      "loss": 2.5295,
      "step": 22847
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1572705507278442,
      "learning_rate": 1.6057013573605024e-05,
      "loss": 2.2963,
      "step": 22848
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9998410940170288,
      "learning_rate": 1.605668595279718e-05,
      "loss": 2.3793,
      "step": 22849
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.098824381828308,
      "learning_rate": 1.605635832172161e-05,
      "loss": 2.4692,
      "step": 22850
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0583175420761108,
      "learning_rate": 1.6056030680378868e-05,
      "loss": 2.4479,
      "step": 22851
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9198792576789856,
      "learning_rate": 1.605570302876951e-05,
      "loss": 2.6336,
      "step": 22852
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1385656595230103,
      "learning_rate": 1.605537536689409e-05,
      "loss": 2.4961,
      "step": 22853
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9859219789505005,
      "learning_rate": 1.6055047694753164e-05,
      "loss": 2.3844,
      "step": 22854
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0623362064361572,
      "learning_rate": 1.6054720012347286e-05,
      "loss": 2.4251,
      "step": 22855
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9724410772323608,
      "learning_rate": 1.6054392319677012e-05,
      "loss": 2.402,
      "step": 22856
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.042405605316162,
      "learning_rate": 1.6054064616742904e-05,
      "loss": 2.0952,
      "step": 22857
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1256061792373657,
      "learning_rate": 1.605373690354551e-05,
      "loss": 2.4119,
      "step": 22858
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1529901027679443,
      "learning_rate": 1.6053409180085387e-05,
      "loss": 2.3448,
      "step": 22859
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.013232946395874,
      "learning_rate": 1.605308144636309e-05,
      "loss": 2.6129,
      "step": 22860
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1885014772415161,
      "learning_rate": 1.605275370237918e-05,
      "loss": 2.3604,
      "step": 22861
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0767062902450562,
      "learning_rate": 1.6052425948134204e-05,
      "loss": 2.4702,
      "step": 22862
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0887062549591064,
      "learning_rate": 1.6052098183628723e-05,
      "loss": 2.6178,
      "step": 22863
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9067549109458923,
      "learning_rate": 1.6051770408863294e-05,
      "loss": 2.5277,
      "step": 22864
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.917161226272583,
      "learning_rate": 1.605144262383847e-05,
      "loss": 2.3614,
      "step": 22865
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1303293704986572,
      "learning_rate": 1.6051114828554806e-05,
      "loss": 2.2456,
      "step": 22866
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1194316148757935,
      "learning_rate": 1.605078702301286e-05,
      "loss": 2.4304,
      "step": 22867
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9576285481452942,
      "learning_rate": 1.6050459207213186e-05,
      "loss": 2.3843,
      "step": 22868
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.012046456336975,
      "learning_rate": 1.605013138115634e-05,
      "loss": 2.689,
      "step": 22869
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9774076342582703,
      "learning_rate": 1.604980354484288e-05,
      "loss": 2.6436,
      "step": 22870
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.021242618560791,
      "learning_rate": 1.604947569827336e-05,
      "loss": 2.5281,
      "step": 22871
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0640113353729248,
      "learning_rate": 1.604914784144833e-05,
      "loss": 2.4474,
      "step": 22872
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0009393692016602,
      "learning_rate": 1.6048819974368355e-05,
      "loss": 2.1922,
      "step": 22873
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9915861487388611,
      "learning_rate": 1.6048492097033992e-05,
      "loss": 2.4659,
      "step": 22874
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.2371846437454224,
      "learning_rate": 1.6048164209445788e-05,
      "loss": 2.3224,
      "step": 22875
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9665484428405762,
      "learning_rate": 1.6047836311604303e-05,
      "loss": 2.2864,
      "step": 22876
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.053433895111084,
      "learning_rate": 1.6047508403510094e-05,
      "loss": 2.6931,
      "step": 22877
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1844263076782227,
      "learning_rate": 1.6047180485163714e-05,
      "loss": 2.4533,
      "step": 22878
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.2004919052124023,
      "learning_rate": 1.604685255656572e-05,
      "loss": 2.4363,
      "step": 22879
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0501673221588135,
      "learning_rate": 1.604652461771667e-05,
      "loss": 2.5482,
      "step": 22880
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0987014770507812,
      "learning_rate": 1.6046196668617124e-05,
      "loss": 2.4526,
      "step": 22881
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1332110166549683,
      "learning_rate": 1.6045868709267623e-05,
      "loss": 2.4365,
      "step": 22882
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1377946138381958,
      "learning_rate": 1.6045540739668736e-05,
      "loss": 2.6854,
      "step": 22883
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0921393632888794,
      "learning_rate": 1.604521275982102e-05,
      "loss": 2.5186,
      "step": 22884
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0448042154312134,
      "learning_rate": 1.604488476972502e-05,
      "loss": 2.2742,
      "step": 22885
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.010886549949646,
      "learning_rate": 1.6044556769381302e-05,
      "loss": 2.4307,
      "step": 22886
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.959034264087677,
      "learning_rate": 1.6044228758790418e-05,
      "loss": 2.3493,
      "step": 22887
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0222111940383911,
      "learning_rate": 1.6043900737952926e-05,
      "loss": 2.3131,
      "step": 22888
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0704479217529297,
      "learning_rate": 1.6043572706869378e-05,
      "loss": 2.3931,
      "step": 22889
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0868066549301147,
      "learning_rate": 1.6043244665540336e-05,
      "loss": 2.5735,
      "step": 22890
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.2830779552459717,
      "learning_rate": 1.6042916613966352e-05,
      "loss": 2.3761,
      "step": 22891
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1135423183441162,
      "learning_rate": 1.604258855214798e-05,
      "loss": 2.3471,
      "step": 22892
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0923441648483276,
      "learning_rate": 1.6042260480085784e-05,
      "loss": 2.5664,
      "step": 22893
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9566693902015686,
      "learning_rate": 1.604193239778031e-05,
      "loss": 2.574,
      "step": 22894
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9780458211898804,
      "learning_rate": 1.6041604305232124e-05,
      "loss": 2.3273,
      "step": 22895
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9987219572067261,
      "learning_rate": 1.6041276202441775e-05,
      "loss": 2.5779,
      "step": 22896
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0000126361846924,
      "learning_rate": 1.6040948089409825e-05,
      "loss": 2.5127,
      "step": 22897
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0290329456329346,
      "learning_rate": 1.6040619966136827e-05,
      "loss": 2.2945,
      "step": 22898
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.026015281677246,
      "learning_rate": 1.6040291832623336e-05,
      "loss": 2.5447,
      "step": 22899
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0053948163986206,
      "learning_rate": 1.6039963688869908e-05,
      "loss": 2.5784,
      "step": 22900
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.2280359268188477,
      "learning_rate": 1.6039635534877102e-05,
      "loss": 2.4509,
      "step": 22901
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0138407945632935,
      "learning_rate": 1.6039307370645477e-05,
      "loss": 2.4121,
      "step": 22902
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.4237595796585083,
      "learning_rate": 1.603897919617558e-05,
      "loss": 2.5179,
      "step": 22903
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0648752450942993,
      "learning_rate": 1.603865101146798e-05,
      "loss": 2.322,
      "step": 22904
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.031514048576355,
      "learning_rate": 1.6038322816523222e-05,
      "loss": 2.4342,
      "step": 22905
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0121636390686035,
      "learning_rate": 1.603799461134187e-05,
      "loss": 2.5766,
      "step": 22906
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9629814028739929,
      "learning_rate": 1.6037666395924475e-05,
      "loss": 2.3548,
      "step": 22907
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0330742597579956,
      "learning_rate": 1.6037338170271593e-05,
      "loss": 2.3578,
      "step": 22908
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0072094202041626,
      "learning_rate": 1.6037009934383786e-05,
      "loss": 2.5286,
      "step": 22909
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.2122268676757812,
      "learning_rate": 1.603668168826161e-05,
      "loss": 2.4649,
      "step": 22910
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.983074963092804,
      "learning_rate": 1.6036353431905616e-05,
      "loss": 2.4821,
      "step": 22911
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0263826847076416,
      "learning_rate": 1.603602516531636e-05,
      "loss": 2.3312,
      "step": 22912
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1905628442764282,
      "learning_rate": 1.603569688849441e-05,
      "loss": 2.3373,
      "step": 22913
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.2165648937225342,
      "learning_rate": 1.603536860144031e-05,
      "loss": 2.2903,
      "step": 22914
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.042691946029663,
      "learning_rate": 1.6035040304154618e-05,
      "loss": 2.2612,
      "step": 22915
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0211317539215088,
      "learning_rate": 1.6034711996637896e-05,
      "loss": 2.2351,
      "step": 22916
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0743532180786133,
      "learning_rate": 1.60343836788907e-05,
      "loss": 2.584,
      "step": 22917
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0028325319290161,
      "learning_rate": 1.6034055350913583e-05,
      "loss": 2.6621,
      "step": 22918
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.205326795578003,
      "learning_rate": 1.6033727012707106e-05,
      "loss": 2.4767,
      "step": 22919
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.094810128211975,
      "learning_rate": 1.6033398664271822e-05,
      "loss": 2.2832,
      "step": 22920
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0141363143920898,
      "learning_rate": 1.6033070305608287e-05,
      "loss": 2.4158,
      "step": 22921
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0466421842575073,
      "learning_rate": 1.603274193671706e-05,
      "loss": 2.2085,
      "step": 22922
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0389751195907593,
      "learning_rate": 1.6032413557598697e-05,
      "loss": 2.5542,
      "step": 22923
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.005527377128601,
      "learning_rate": 1.6032085168253755e-05,
      "loss": 2.363,
      "step": 22924
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1042171716690063,
      "learning_rate": 1.603175676868279e-05,
      "loss": 2.4231,
      "step": 22925
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0741363763809204,
      "learning_rate": 1.603142835888636e-05,
      "loss": 2.5716,
      "step": 22926
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0222384929656982,
      "learning_rate": 1.603109993886502e-05,
      "loss": 2.3753,
      "step": 22927
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9415779709815979,
      "learning_rate": 1.6030771508619327e-05,
      "loss": 2.3014,
      "step": 22928
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9974366426467896,
      "learning_rate": 1.6030443068149837e-05,
      "loss": 2.6137,
      "step": 22929
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0875043869018555,
      "learning_rate": 1.603011461745711e-05,
      "loss": 2.3709,
      "step": 22930
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.006628155708313,
      "learning_rate": 1.6029786156541703e-05,
      "loss": 2.5193,
      "step": 22931
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0433907508850098,
      "learning_rate": 1.602945768540417e-05,
      "loss": 2.3711,
      "step": 22932
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9752472043037415,
      "learning_rate": 1.6029129204045065e-05,
      "loss": 2.5451,
      "step": 22933
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0402332544326782,
      "learning_rate": 1.602880071246495e-05,
      "loss": 2.5439,
      "step": 22934
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0178184509277344,
      "learning_rate": 1.6028472210664383e-05,
      "loss": 2.3322,
      "step": 22935
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0319558382034302,
      "learning_rate": 1.6028143698643916e-05,
      "loss": 2.3077,
      "step": 22936
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0880048274993896,
      "learning_rate": 1.602781517640411e-05,
      "loss": 2.6842,
      "step": 22937
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0768232345581055,
      "learning_rate": 1.602748664394552e-05,
      "loss": 2.5174,
      "step": 22938
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0687404870986938,
      "learning_rate": 1.60271581012687e-05,
      "loss": 2.5464,
      "step": 22939
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.131867527961731,
      "learning_rate": 1.6026829548374212e-05,
      "loss": 2.2334,
      "step": 22940
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.115212082862854,
      "learning_rate": 1.6026500985262612e-05,
      "loss": 2.4577,
      "step": 22941
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.3190690279006958,
      "learning_rate": 1.6026172411934453e-05,
      "loss": 2.5816,
      "step": 22942
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0546698570251465,
      "learning_rate": 1.60258438283903e-05,
      "loss": 2.4149,
      "step": 22943
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1518068313598633,
      "learning_rate": 1.60255152346307e-05,
      "loss": 2.4282,
      "step": 22944
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1950181722640991,
      "learning_rate": 1.602518663065622e-05,
      "loss": 2.5964,
      "step": 22945
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0489025115966797,
      "learning_rate": 1.602485801646741e-05,
      "loss": 2.3181,
      "step": 22946
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0542751550674438,
      "learning_rate": 1.6024529392064827e-05,
      "loss": 2.2876,
      "step": 22947
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0874584913253784,
      "learning_rate": 1.6024200757449033e-05,
      "loss": 2.2919,
      "step": 22948
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0016883611679077,
      "learning_rate": 1.6023872112620583e-05,
      "loss": 2.2602,
      "step": 22949
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1016956567764282,
      "learning_rate": 1.6023543457580033e-05,
      "loss": 2.476,
      "step": 22950
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1767650842666626,
      "learning_rate": 1.602321479232794e-05,
      "loss": 2.4144,
      "step": 22951
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1114598512649536,
      "learning_rate": 1.602288611686486e-05,
      "loss": 2.3333,
      "step": 22952
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9961403608322144,
      "learning_rate": 1.6022557431191358e-05,
      "loss": 2.4603,
      "step": 22953
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1636170148849487,
      "learning_rate": 1.6022228735307984e-05,
      "loss": 2.395,
      "step": 22954
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.99995356798172,
      "learning_rate": 1.602190002921529e-05,
      "loss": 2.5247,
      "step": 22955
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.933034360408783,
      "learning_rate": 1.6021571312913845e-05,
      "loss": 2.392,
      "step": 22956
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1664350032806396,
      "learning_rate": 1.6021242586404206e-05,
      "loss": 2.2403,
      "step": 22957
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1143877506256104,
      "learning_rate": 1.6020913849686918e-05,
      "loss": 2.5507,
      "step": 22958
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1118524074554443,
      "learning_rate": 1.6020585102762548e-05,
      "loss": 2.5548,
      "step": 22959
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.2816098928451538,
      "learning_rate": 1.602025634563165e-05,
      "loss": 2.3502,
      "step": 22960
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0041354894638062,
      "learning_rate": 1.6019927578294787e-05,
      "loss": 2.5401,
      "step": 22961
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0487964153289795,
      "learning_rate": 1.6019598800752506e-05,
      "loss": 2.2949,
      "step": 22962
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0378358364105225,
      "learning_rate": 1.6019270013005373e-05,
      "loss": 2.4049,
      "step": 22963
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.10503351688385,
      "learning_rate": 1.6018941215053946e-05,
      "loss": 2.7212,
      "step": 22964
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.2875384092330933,
      "learning_rate": 1.6018612406898774e-05,
      "loss": 2.4959,
      "step": 22965
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.101193904876709,
      "learning_rate": 1.601828358854042e-05,
      "loss": 2.4843,
      "step": 22966
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1790375709533691,
      "learning_rate": 1.601795475997944e-05,
      "loss": 2.6527,
      "step": 22967
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9876298308372498,
      "learning_rate": 1.6017625921216397e-05,
      "loss": 2.4271,
      "step": 22968
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.128352165222168,
      "learning_rate": 1.6017297072251838e-05,
      "loss": 2.3277,
      "step": 22969
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9937124848365784,
      "learning_rate": 1.601696821308633e-05,
      "loss": 2.2431,
      "step": 22970
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1499367952346802,
      "learning_rate": 1.6016639343720422e-05,
      "loss": 2.6057,
      "step": 22971
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9708899259567261,
      "learning_rate": 1.6016310464154684e-05,
      "loss": 2.31,
      "step": 22972
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.2342473268508911,
      "learning_rate": 1.601598157438966e-05,
      "loss": 2.2097,
      "step": 22973
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0183887481689453,
      "learning_rate": 1.6015652674425913e-05,
      "loss": 2.4843,
      "step": 22974
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1627846956253052,
      "learning_rate": 1.6015323764264e-05,
      "loss": 2.2856,
      "step": 22975
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.2562423944473267,
      "learning_rate": 1.6014994843904487e-05,
      "loss": 2.2294,
      "step": 22976
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.2538115978240967,
      "learning_rate": 1.6014665913347918e-05,
      "loss": 2.4454,
      "step": 22977
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0342170000076294,
      "learning_rate": 1.6014336972594853e-05,
      "loss": 2.3515,
      "step": 22978
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0010507106781006,
      "learning_rate": 1.601400802164586e-05,
      "loss": 2.5496,
      "step": 22979
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.2529516220092773,
      "learning_rate": 1.601367906050149e-05,
      "loss": 2.3129,
      "step": 22980
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0404565334320068,
      "learning_rate": 1.6013350089162298e-05,
      "loss": 2.3105,
      "step": 22981
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0266755819320679,
      "learning_rate": 1.6013021107628843e-05,
      "loss": 2.4125,
      "step": 22982
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0456185340881348,
      "learning_rate": 1.6012692115901688e-05,
      "loss": 2.453,
      "step": 22983
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0173912048339844,
      "learning_rate": 1.6012363113981386e-05,
      "loss": 2.6541,
      "step": 22984
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9774788022041321,
      "learning_rate": 1.6012034101868493e-05,
      "loss": 2.4386,
      "step": 22985
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.021627426147461,
      "learning_rate": 1.601170507956357e-05,
      "loss": 2.416,
      "step": 22986
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1106089353561401,
      "learning_rate": 1.6011376047067176e-05,
      "loss": 2.3274,
      "step": 22987
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0117230415344238,
      "learning_rate": 1.6011047004379867e-05,
      "loss": 2.7286,
      "step": 22988
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.208559513092041,
      "learning_rate": 1.6010717951502202e-05,
      "loss": 2.3733,
      "step": 22989
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0162051916122437,
      "learning_rate": 1.6010388888434734e-05,
      "loss": 2.3741,
      "step": 22990
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.323998212814331,
      "learning_rate": 1.6010059815178026e-05,
      "loss": 2.5133,
      "step": 22991
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0694161653518677,
      "learning_rate": 1.6009730731732633e-05,
      "loss": 2.467,
      "step": 22992
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0335185527801514,
      "learning_rate": 1.6009401638099117e-05,
      "loss": 2.3974,
      "step": 22993
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0980464220046997,
      "learning_rate": 1.600907253427803e-05,
      "loss": 2.4096,
      "step": 22994
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9898965954780579,
      "learning_rate": 1.6008743420269934e-05,
      "loss": 2.5495,
      "step": 22995
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9279645085334778,
      "learning_rate": 1.600841429607539e-05,
      "loss": 2.3329,
      "step": 22996
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0742450952529907,
      "learning_rate": 1.6008085161694946e-05,
      "loss": 2.4884,
      "step": 22997
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1183308362960815,
      "learning_rate": 1.6007756017129167e-05,
      "loss": 2.4764,
      "step": 22998
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0402926206588745,
      "learning_rate": 1.6007426862378612e-05,
      "loss": 2.3629,
      "step": 22999
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0367432832717896,
      "learning_rate": 1.6007097697443835e-05,
      "loss": 2.4885,
      "step": 23000
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1424986124038696,
      "learning_rate": 1.6006768522325398e-05,
      "loss": 2.347,
      "step": 23001
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0418980121612549,
      "learning_rate": 1.6006439337023855e-05,
      "loss": 2.4023,
      "step": 23002
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0077989101409912,
      "learning_rate": 1.6006110141539768e-05,
      "loss": 2.1353,
      "step": 23003
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0984761714935303,
      "learning_rate": 1.600578093587369e-05,
      "loss": 2.5719,
      "step": 23004
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1342283487319946,
      "learning_rate": 1.6005451720026185e-05,
      "loss": 2.4978,
      "step": 23005
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.010247826576233,
      "learning_rate": 1.6005122493997808e-05,
      "loss": 2.4619,
      "step": 23006
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1824491024017334,
      "learning_rate": 1.6004793257789115e-05,
      "loss": 2.4733,
      "step": 23007
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.131751537322998,
      "learning_rate": 1.6004464011400668e-05,
      "loss": 2.524,
      "step": 23008
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.957985520362854,
      "learning_rate": 1.6004134754833026e-05,
      "loss": 2.2936,
      "step": 23009
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.3047093152999878,
      "learning_rate": 1.600380548808674e-05,
      "loss": 2.4566,
      "step": 23010
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9831101894378662,
      "learning_rate": 1.6003476211162377e-05,
      "loss": 2.314,
      "step": 23011
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0267531871795654,
      "learning_rate": 1.6003146924060492e-05,
      "loss": 2.3253,
      "step": 23012
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9914640188217163,
      "learning_rate": 1.6002817626781638e-05,
      "loss": 2.5221,
      "step": 23013
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0339776277542114,
      "learning_rate": 1.600248831932638e-05,
      "loss": 2.5505,
      "step": 23014
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0870507955551147,
      "learning_rate": 1.6002159001695274e-05,
      "loss": 2.4573,
      "step": 23015
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9446941614151001,
      "learning_rate": 1.600182967388888e-05,
      "loss": 2.2815,
      "step": 23016
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9785939455032349,
      "learning_rate": 1.6001500335907757e-05,
      "loss": 2.2923,
      "step": 23017
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0922867059707642,
      "learning_rate": 1.6001170987752455e-05,
      "loss": 2.3162,
      "step": 23018
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1224024295806885,
      "learning_rate": 1.6000841629423542e-05,
      "loss": 2.3061,
      "step": 23019
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.2029787302017212,
      "learning_rate": 1.600051226092157e-05,
      "loss": 2.206,
      "step": 23020
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0482536554336548,
      "learning_rate": 1.60001828822471e-05,
      "loss": 2.3934,
      "step": 23021
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1080210208892822,
      "learning_rate": 1.5999853493400694e-05,
      "loss": 2.502,
      "step": 23022
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9988849759101868,
      "learning_rate": 1.5999524094382907e-05,
      "loss": 2.5693,
      "step": 23023
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.090037226676941,
      "learning_rate": 1.5999194685194293e-05,
      "loss": 2.3273,
      "step": 23024
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0557451248168945,
      "learning_rate": 1.5998865265835413e-05,
      "loss": 2.4882,
      "step": 23025
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.2074017524719238,
      "learning_rate": 1.5998535836306835e-05,
      "loss": 2.4734,
      "step": 23026
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0552846193313599,
      "learning_rate": 1.5998206396609105e-05,
      "loss": 2.4645,
      "step": 23027
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0655988454818726,
      "learning_rate": 1.599787694674278e-05,
      "loss": 2.4208,
      "step": 23028
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0345852375030518,
      "learning_rate": 1.5997547486708434e-05,
      "loss": 2.6534,
      "step": 23029
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0542367696762085,
      "learning_rate": 1.5997218016506615e-05,
      "loss": 2.4958,
      "step": 23030
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0738734006881714,
      "learning_rate": 1.599688853613788e-05,
      "loss": 2.3147,
      "step": 23031
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.16536545753479,
      "learning_rate": 1.599655904560279e-05,
      "loss": 2.4589,
      "step": 23032
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.2280341386795044,
      "learning_rate": 1.5996229544901906e-05,
      "loss": 2.3323,
      "step": 23033
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0174877643585205,
      "learning_rate": 1.599590003403578e-05,
      "loss": 2.2471,
      "step": 23034
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0440974235534668,
      "learning_rate": 1.5995570513004977e-05,
      "loss": 2.4679,
      "step": 23035
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.132738709449768,
      "learning_rate": 1.5995240981810057e-05,
      "loss": 2.4371,
      "step": 23036
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1690870523452759,
      "learning_rate": 1.599491144045157e-05,
      "loss": 2.5169,
      "step": 23037
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1652636528015137,
      "learning_rate": 1.5994581888930082e-05,
      "loss": 2.3141,
      "step": 23038
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0287352800369263,
      "learning_rate": 1.5994252327246154e-05,
      "loss": 2.3082,
      "step": 23039
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0352827310562134,
      "learning_rate": 1.5993922755400337e-05,
      "loss": 2.2481,
      "step": 23040
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0510612726211548,
      "learning_rate": 1.599359317339319e-05,
      "loss": 2.2804,
      "step": 23041
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1671295166015625,
      "learning_rate": 1.5993263581225277e-05,
      "loss": 2.3698,
      "step": 23042
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0948817729949951,
      "learning_rate": 1.5992933978897157e-05,
      "loss": 2.2858,
      "step": 23043
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0429171323776245,
      "learning_rate": 1.5992604366409385e-05,
      "loss": 2.4819,
      "step": 23044
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.157906174659729,
      "learning_rate": 1.5992274743762516e-05,
      "loss": 2.3888,
      "step": 23045
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0520005226135254,
      "learning_rate": 1.599194511095712e-05,
      "loss": 2.2848,
      "step": 23046
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.072127342224121,
      "learning_rate": 1.5991615467993746e-05,
      "loss": 2.5428,
      "step": 23047
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0766018629074097,
      "learning_rate": 1.5991285814872956e-05,
      "loss": 2.4967,
      "step": 23048
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9589725732803345,
      "learning_rate": 1.5990956151595312e-05,
      "loss": 2.2542,
      "step": 23049
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0697264671325684,
      "learning_rate": 1.599062647816137e-05,
      "loss": 2.5004,
      "step": 23050
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0522538423538208,
      "learning_rate": 1.5990296794571688e-05,
      "loss": 2.3924,
      "step": 23051
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0594733953475952,
      "learning_rate": 1.5989967100826825e-05,
      "loss": 2.4355,
      "step": 23052
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.8566722869873047,
      "learning_rate": 1.5989637396927345e-05,
      "loss": 2.4401,
      "step": 23053
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.977705717086792,
      "learning_rate": 1.59893076828738e-05,
      "loss": 2.0889,
      "step": 23054
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0930126905441284,
      "learning_rate": 1.5988977958666753e-05,
      "loss": 2.4766,
      "step": 23055
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0797332525253296,
      "learning_rate": 1.5988648224306758e-05,
      "loss": 2.5585,
      "step": 23056
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0540010929107666,
      "learning_rate": 1.5988318479794386e-05,
      "loss": 2.3203,
      "step": 23057
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1688088178634644,
      "learning_rate": 1.598798872513018e-05,
      "loss": 2.4521,
      "step": 23058
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.037753701210022,
      "learning_rate": 1.5987658960314708e-05,
      "loss": 2.425,
      "step": 23059
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1105436086654663,
      "learning_rate": 1.5987329185348527e-05,
      "loss": 2.4477,
      "step": 23060
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9539536833763123,
      "learning_rate": 1.59869994002322e-05,
      "loss": 2.4933,
      "step": 23061
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.5535588264465332,
      "learning_rate": 1.598666960496628e-05,
      "loss": 2.3486,
      "step": 23062
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0246741771697998,
      "learning_rate": 1.5986339799551336e-05,
      "loss": 2.3886,
      "step": 23063
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1394755840301514,
      "learning_rate": 1.5986009983987913e-05,
      "loss": 2.4171,
      "step": 23064
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9994028806686401,
      "learning_rate": 1.598568015827658e-05,
      "loss": 2.375,
      "step": 23065
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0669090747833252,
      "learning_rate": 1.598535032241789e-05,
      "loss": 2.4404,
      "step": 23066
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0375821590423584,
      "learning_rate": 1.5985020476412412e-05,
      "loss": 2.3385,
      "step": 23067
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1576402187347412,
      "learning_rate": 1.5984690620260695e-05,
      "loss": 2.4189,
      "step": 23068
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1322888135910034,
      "learning_rate": 1.59843607539633e-05,
      "loss": 2.3776,
      "step": 23069
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0603407621383667,
      "learning_rate": 1.5984030877520794e-05,
      "loss": 2.4603,
      "step": 23070
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.2562334537506104,
      "learning_rate": 1.5983700990933722e-05,
      "loss": 2.5908,
      "step": 23071
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.200202226638794,
      "learning_rate": 1.5983371094202658e-05,
      "loss": 2.2962,
      "step": 23072
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1623886823654175,
      "learning_rate": 1.5983041187328155e-05,
      "loss": 2.4625,
      "step": 23073
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0056012868881226,
      "learning_rate": 1.598271127031077e-05,
      "loss": 2.2933,
      "step": 23074
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.01290762424469,
      "learning_rate": 1.5982381343151064e-05,
      "loss": 2.334,
      "step": 23075
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1186586618423462,
      "learning_rate": 1.59820514058496e-05,
      "loss": 2.2457,
      "step": 23076
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0050758123397827,
      "learning_rate": 1.5981721458406934e-05,
      "loss": 2.2868,
      "step": 23077
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1426149606704712,
      "learning_rate": 1.598139150082362e-05,
      "loss": 2.5358,
      "step": 23078
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9208628535270691,
      "learning_rate": 1.5981061533100225e-05,
      "loss": 2.5684,
      "step": 23079
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0781687498092651,
      "learning_rate": 1.598073155523731e-05,
      "loss": 2.4095,
      "step": 23080
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0079282522201538,
      "learning_rate": 1.598040156723543e-05,
      "loss": 2.5459,
      "step": 23081
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.2705986499786377,
      "learning_rate": 1.5980071569095142e-05,
      "loss": 2.4609,
      "step": 23082
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0763269662857056,
      "learning_rate": 1.5979741560817012e-05,
      "loss": 2.5345,
      "step": 23083
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.99160236120224,
      "learning_rate": 1.597941154240159e-05,
      "loss": 2.3223,
      "step": 23084
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.096498966217041,
      "learning_rate": 1.597908151384945e-05,
      "loss": 2.5614,
      "step": 23085
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0409187078475952,
      "learning_rate": 1.597875147516114e-05,
      "loss": 2.6111,
      "step": 23086
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1520134210586548,
      "learning_rate": 1.597842142633722e-05,
      "loss": 2.603,
      "step": 23087
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1651973724365234,
      "learning_rate": 1.597809136737825e-05,
      "loss": 2.3632,
      "step": 23088
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1149907112121582,
      "learning_rate": 1.59777612982848e-05,
      "loss": 2.325,
      "step": 23089
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0330209732055664,
      "learning_rate": 1.597743121905741e-05,
      "loss": 2.2881,
      "step": 23090
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.064933180809021,
      "learning_rate": 1.5977101129696658e-05,
      "loss": 2.3908,
      "step": 23091
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.954908013343811,
      "learning_rate": 1.5976771030203094e-05,
      "loss": 2.5357,
      "step": 23092
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0395839214324951,
      "learning_rate": 1.597644092057728e-05,
      "loss": 2.3625,
      "step": 23093
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0955204963684082,
      "learning_rate": 1.5976110800819773e-05,
      "loss": 2.4995,
      "step": 23094
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1441539525985718,
      "learning_rate": 1.597578067093114e-05,
      "loss": 2.4379,
      "step": 23095
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.01997709274292,
      "learning_rate": 1.5975450530911933e-05,
      "loss": 2.5419,
      "step": 23096
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0851739645004272,
      "learning_rate": 1.5975120380762713e-05,
      "loss": 2.3341,
      "step": 23097
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1850405931472778,
      "learning_rate": 1.5974790220484046e-05,
      "loss": 2.4899,
      "step": 23098
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9695395827293396,
      "learning_rate": 1.5974460050076484e-05,
      "loss": 2.0202,
      "step": 23099
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.06459379196167,
      "learning_rate": 1.5974129869540587e-05,
      "loss": 2.3864,
      "step": 23100
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.959473192691803,
      "learning_rate": 1.597379967887692e-05,
      "loss": 2.3754,
      "step": 23101
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1171406507492065,
      "learning_rate": 1.597346947808604e-05,
      "loss": 2.2991,
      "step": 23102
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.066798210144043,
      "learning_rate": 1.5973139267168508e-05,
      "loss": 2.5534,
      "step": 23103
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0154320001602173,
      "learning_rate": 1.5972809046124883e-05,
      "loss": 2.0806,
      "step": 23104
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1529977321624756,
      "learning_rate": 1.597247881495572e-05,
      "loss": 2.5627,
      "step": 23105
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1382445096969604,
      "learning_rate": 1.597214857366159e-05,
      "loss": 2.3822,
      "step": 23106
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0202211141586304,
      "learning_rate": 1.5971818322243037e-05,
      "loss": 2.3796,
      "step": 23107
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.029131293296814,
      "learning_rate": 1.597148806070064e-05,
      "loss": 2.3534,
      "step": 23108
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1052590608596802,
      "learning_rate": 1.5971157789034944e-05,
      "loss": 2.2329,
      "step": 23109
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1567926406860352,
      "learning_rate": 1.597082750724651e-05,
      "loss": 2.2888,
      "step": 23110
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0714335441589355,
      "learning_rate": 1.5970497215335908e-05,
      "loss": 2.5429,
      "step": 23111
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0904690027236938,
      "learning_rate": 1.5970166913303692e-05,
      "loss": 2.4296,
      "step": 23112
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1794801950454712,
      "learning_rate": 1.5969836601150415e-05,
      "loss": 2.4473,
      "step": 23113
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9930815100669861,
      "learning_rate": 1.5969506278876646e-05,
      "loss": 2.4348,
      "step": 23114
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9605984091758728,
      "learning_rate": 1.5969175946482948e-05,
      "loss": 2.4412,
      "step": 23115
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9253600835800171,
      "learning_rate": 1.5968845603969872e-05,
      "loss": 2.4764,
      "step": 23116
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0649073123931885,
      "learning_rate": 1.5968515251337978e-05,
      "loss": 2.3209,
      "step": 23117
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.3596994876861572,
      "learning_rate": 1.5968184888587835e-05,
      "loss": 2.5007,
      "step": 23118
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0144296884536743,
      "learning_rate": 1.5967854515719995e-05,
      "loss": 2.2955,
      "step": 23119
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0205930471420288,
      "learning_rate": 1.596752413273502e-05,
      "loss": 2.5287,
      "step": 23120
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0231680870056152,
      "learning_rate": 1.5967193739633472e-05,
      "loss": 2.4188,
      "step": 23121
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0085595846176147,
      "learning_rate": 1.5966863336415913e-05,
      "loss": 2.5267,
      "step": 23122
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0766648054122925,
      "learning_rate": 1.5966532923082897e-05,
      "loss": 2.8321,
      "step": 23123
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.074213981628418,
      "learning_rate": 1.5966202499634986e-05,
      "loss": 2.5458,
      "step": 23124
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0397896766662598,
      "learning_rate": 1.5965872066072743e-05,
      "loss": 2.5228,
      "step": 23125
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0277323722839355,
      "learning_rate": 1.5965541622396727e-05,
      "loss": 2.6369,
      "step": 23126
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0918986797332764,
      "learning_rate": 1.5965211168607496e-05,
      "loss": 2.5275,
      "step": 23127
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9581888318061829,
      "learning_rate": 1.5964880704705614e-05,
      "loss": 2.2399,
      "step": 23128
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9681990742683411,
      "learning_rate": 1.596455023069164e-05,
      "loss": 2.3289,
      "step": 23129
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0644489526748657,
      "learning_rate": 1.596421974656613e-05,
      "loss": 2.2846,
      "step": 23130
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.2364909648895264,
      "learning_rate": 1.5963889252329646e-05,
      "loss": 2.4809,
      "step": 23131
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1635851860046387,
      "learning_rate": 1.5963558747982757e-05,
      "loss": 2.3301,
      "step": 23132
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0389044284820557,
      "learning_rate": 1.596322823352601e-05,
      "loss": 2.4682,
      "step": 23133
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9663707613945007,
      "learning_rate": 1.5962897708959974e-05,
      "loss": 2.3042,
      "step": 23134
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.2301990985870361,
      "learning_rate": 1.596256717428521e-05,
      "loss": 2.497,
      "step": 23135
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1207990646362305,
      "learning_rate": 1.596223662950227e-05,
      "loss": 2.4591,
      "step": 23136
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.2983399629592896,
      "learning_rate": 1.596190607461172e-05,
      "loss": 2.198,
      "step": 23137
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.034115195274353,
      "learning_rate": 1.5961575509614125e-05,
      "loss": 2.453,
      "step": 23138
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9920558333396912,
      "learning_rate": 1.5961244934510034e-05,
      "loss": 2.2587,
      "step": 23139
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9903026223182678,
      "learning_rate": 1.596091434930002e-05,
      "loss": 2.5523,
      "step": 23140
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.010436773300171,
      "learning_rate": 1.5960583753984634e-05,
      "loss": 2.7248,
      "step": 23141
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0710598230361938,
      "learning_rate": 1.5960253148564438e-05,
      "loss": 2.6805,
      "step": 23142
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9796727299690247,
      "learning_rate": 1.5959922533039995e-05,
      "loss": 2.5121,
      "step": 23143
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.969180166721344,
      "learning_rate": 1.5959591907411866e-05,
      "loss": 2.3875,
      "step": 23144
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9009659886360168,
      "learning_rate": 1.5959261271680612e-05,
      "loss": 2.6289,
      "step": 23145
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0103394985198975,
      "learning_rate": 1.5958930625846786e-05,
      "loss": 2.4591,
      "step": 23146
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.020735740661621,
      "learning_rate": 1.5958599969910957e-05,
      "loss": 2.3847,
      "step": 23147
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0345115661621094,
      "learning_rate": 1.5958269303873684e-05,
      "loss": 2.3657,
      "step": 23148
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0635095834732056,
      "learning_rate": 1.5957938627735527e-05,
      "loss": 2.3063,
      "step": 23149
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1528531312942505,
      "learning_rate": 1.5957607941497043e-05,
      "loss": 2.202,
      "step": 23150
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9554731845855713,
      "learning_rate": 1.5957277245158797e-05,
      "loss": 2.4684,
      "step": 23151
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0046064853668213,
      "learning_rate": 1.595694653872135e-05,
      "loss": 2.323,
      "step": 23152
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1452274322509766,
      "learning_rate": 1.5956615822185256e-05,
      "loss": 2.3638,
      "step": 23153
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9891022443771362,
      "learning_rate": 1.5956285095551084e-05,
      "loss": 2.3851,
      "step": 23154
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0619198083877563,
      "learning_rate": 1.595595435881939e-05,
      "loss": 2.5561,
      "step": 23155
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1418803930282593,
      "learning_rate": 1.5955623611990737e-05,
      "loss": 2.399,
      "step": 23156
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0260770320892334,
      "learning_rate": 1.595529285506568e-05,
      "loss": 2.2477,
      "step": 23157
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0480331182479858,
      "learning_rate": 1.5954962088044787e-05,
      "loss": 2.4641,
      "step": 23158
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1055831909179688,
      "learning_rate": 1.5954631310928617e-05,
      "loss": 2.2828,
      "step": 23159
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9889081120491028,
      "learning_rate": 1.5954300523717728e-05,
      "loss": 2.4878,
      "step": 23160
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9982499480247498,
      "learning_rate": 1.5953969726412686e-05,
      "loss": 2.663,
      "step": 23161
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9170729517936707,
      "learning_rate": 1.5953638919014044e-05,
      "loss": 2.4168,
      "step": 23162
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0527771711349487,
      "learning_rate": 1.5953308101522368e-05,
      "loss": 2.4594,
      "step": 23163
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0080066919326782,
      "learning_rate": 1.595297727393822e-05,
      "loss": 2.2142,
      "step": 23164
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.174414038658142,
      "learning_rate": 1.5952646436262157e-05,
      "loss": 2.4889,
      "step": 23165
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.048312783241272,
      "learning_rate": 1.5952315588494742e-05,
      "loss": 2.3359,
      "step": 23166
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0418843030929565,
      "learning_rate": 1.5951984730636534e-05,
      "loss": 2.3723,
      "step": 23167
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0069940090179443,
      "learning_rate": 1.5951653862688098e-05,
      "loss": 2.411,
      "step": 23168
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0242013931274414,
      "learning_rate": 1.595132298464999e-05,
      "loss": 2.3564,
      "step": 23169
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9710043668746948,
      "learning_rate": 1.5950992096522774e-05,
      "loss": 2.2893,
      "step": 23170
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0454975366592407,
      "learning_rate": 1.595066119830701e-05,
      "loss": 2.4078,
      "step": 23171
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0017391443252563,
      "learning_rate": 1.595033029000326e-05,
      "loss": 2.4609,
      "step": 23172
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.373009204864502,
      "learning_rate": 1.5949999371612082e-05,
      "loss": 2.2867,
      "step": 23173
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.171263575553894,
      "learning_rate": 1.594966844313404e-05,
      "loss": 2.5305,
      "step": 23174
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0089970827102661,
      "learning_rate": 1.5949337504569698e-05,
      "loss": 2.2306,
      "step": 23175
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9582048654556274,
      "learning_rate": 1.5949006555919608e-05,
      "loss": 2.1724,
      "step": 23176
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0091376304626465,
      "learning_rate": 1.5948675597184337e-05,
      "loss": 2.4623,
      "step": 23177
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0944303274154663,
      "learning_rate": 1.5948344628364444e-05,
      "loss": 2.4314,
      "step": 23178
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0331171751022339,
      "learning_rate": 1.5948013649460496e-05,
      "loss": 2.312,
      "step": 23179
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0691766738891602,
      "learning_rate": 1.5947682660473047e-05,
      "loss": 2.5086,
      "step": 23180
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0552122592926025,
      "learning_rate": 1.594735166140266e-05,
      "loss": 2.2988,
      "step": 23181
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.008092999458313,
      "learning_rate": 1.5947020652249897e-05,
      "loss": 2.3112,
      "step": 23182
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0175381898880005,
      "learning_rate": 1.594668963301532e-05,
      "loss": 2.4235,
      "step": 23183
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9922297596931458,
      "learning_rate": 1.5946358603699486e-05,
      "loss": 2.4587,
      "step": 23184
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1619306802749634,
      "learning_rate": 1.5946027564302958e-05,
      "loss": 2.4126,
      "step": 23185
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9774122834205627,
      "learning_rate": 1.5945696514826305e-05,
      "loss": 2.4696,
      "step": 23186
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1026438474655151,
      "learning_rate": 1.5945365455270078e-05,
      "loss": 2.6432,
      "step": 23187
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0685847997665405,
      "learning_rate": 1.594503438563484e-05,
      "loss": 2.5528,
      "step": 23188
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9986814260482788,
      "learning_rate": 1.5944703305921153e-05,
      "loss": 2.3349,
      "step": 23189
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0857291221618652,
      "learning_rate": 1.5944372216129584e-05,
      "loss": 2.3023,
      "step": 23190
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0974289178848267,
      "learning_rate": 1.594404111626069e-05,
      "loss": 2.3858,
      "step": 23191
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0237064361572266,
      "learning_rate": 1.594371000631503e-05,
      "loss": 2.3917,
      "step": 23192
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0205475091934204,
      "learning_rate": 1.5943378886293165e-05,
      "loss": 2.477,
      "step": 23193
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0876317024230957,
      "learning_rate": 1.594304775619566e-05,
      "loss": 2.538,
      "step": 23194
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.65041184425354,
      "learning_rate": 1.5942716616023074e-05,
      "loss": 2.3676,
      "step": 23195
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1850582361221313,
      "learning_rate": 1.594238546577597e-05,
      "loss": 2.2801,
      "step": 23196
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.065470814704895,
      "learning_rate": 1.594205430545491e-05,
      "loss": 2.2669,
      "step": 23197
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1589457988739014,
      "learning_rate": 1.594172313506045e-05,
      "loss": 2.4196,
      "step": 23198
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.3249084949493408,
      "learning_rate": 1.594139195459316e-05,
      "loss": 2.5799,
      "step": 23199
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0686956644058228,
      "learning_rate": 1.59410607640536e-05,
      "loss": 2.4431,
      "step": 23200
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.004713535308838,
      "learning_rate": 1.5940729563442323e-05,
      "loss": 2.6539,
      "step": 23201
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.2103904485702515,
      "learning_rate": 1.5940398352759897e-05,
      "loss": 2.4268,
      "step": 23202
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9748518466949463,
      "learning_rate": 1.5940067132006883e-05,
      "loss": 2.3926,
      "step": 23203
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.2619870901107788,
      "learning_rate": 1.5939735901183843e-05,
      "loss": 2.3932,
      "step": 23204
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.256441354751587,
      "learning_rate": 1.5939404660291337e-05,
      "loss": 2.3423,
      "step": 23205
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0375005006790161,
      "learning_rate": 1.5939073409329923e-05,
      "loss": 2.3911,
      "step": 23206
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1433067321777344,
      "learning_rate": 1.593874214830017e-05,
      "loss": 2.4246,
      "step": 23207
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0854970216751099,
      "learning_rate": 1.5938410877202635e-05,
      "loss": 2.2237,
      "step": 23208
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0760202407836914,
      "learning_rate": 1.593807959603788e-05,
      "loss": 2.4881,
      "step": 23209
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9356218576431274,
      "learning_rate": 1.593774830480647e-05,
      "loss": 2.4704,
      "step": 23210
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.100300908088684,
      "learning_rate": 1.593741700350896e-05,
      "loss": 2.5339,
      "step": 23211
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.2547519207000732,
      "learning_rate": 1.5937085692145918e-05,
      "loss": 2.5103,
      "step": 23212
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0438940525054932,
      "learning_rate": 1.5936754370717905e-05,
      "loss": 2.5351,
      "step": 23213
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1415940523147583,
      "learning_rate": 1.593642303922548e-05,
      "loss": 2.174,
      "step": 23214
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9879971742630005,
      "learning_rate": 1.5936091697669207e-05,
      "loss": 2.5847,
      "step": 23215
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0437684059143066,
      "learning_rate": 1.5935760346049644e-05,
      "loss": 2.4292,
      "step": 23216
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0935670137405396,
      "learning_rate": 1.5935428984367356e-05,
      "loss": 2.5109,
      "step": 23217
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0365822315216064,
      "learning_rate": 1.5935097612622902e-05,
      "loss": 2.5215,
      "step": 23218
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.168443202972412,
      "learning_rate": 1.5934766230816846e-05,
      "loss": 2.5889,
      "step": 23219
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9729253649711609,
      "learning_rate": 1.5934434838949753e-05,
      "loss": 2.3061,
      "step": 23220
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9867241978645325,
      "learning_rate": 1.593410343702218e-05,
      "loss": 2.3764,
      "step": 23221
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.275418996810913,
      "learning_rate": 1.5933772025034685e-05,
      "loss": 2.3936,
      "step": 23222
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0385318994522095,
      "learning_rate": 1.593344060298784e-05,
      "loss": 2.3949,
      "step": 23223
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9797679781913757,
      "learning_rate": 1.59331091708822e-05,
      "loss": 2.5156,
      "step": 23224
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0651676654815674,
      "learning_rate": 1.5932777728718326e-05,
      "loss": 2.2748,
      "step": 23225
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0159924030303955,
      "learning_rate": 1.5932446276496788e-05,
      "loss": 2.5136,
      "step": 23226
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1898674964904785,
      "learning_rate": 1.5932114814218136e-05,
      "loss": 2.5094,
      "step": 23227
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.130520224571228,
      "learning_rate": 1.5931783341882944e-05,
      "loss": 2.2501,
      "step": 23228
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1448163986206055,
      "learning_rate": 1.5931451859491766e-05,
      "loss": 2.4095,
      "step": 23229
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0312411785125732,
      "learning_rate": 1.5931120367045165e-05,
      "loss": 2.3266,
      "step": 23230
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0177768468856812,
      "learning_rate": 1.5930788864543707e-05,
      "loss": 2.4446,
      "step": 23231
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9670421481132507,
      "learning_rate": 1.5930457351987948e-05,
      "loss": 2.4204,
      "step": 23232
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0417746305465698,
      "learning_rate": 1.5930125829378457e-05,
      "loss": 2.3399,
      "step": 23233
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9225179553031921,
      "learning_rate": 1.5929794296715785e-05,
      "loss": 2.5693,
      "step": 23234
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9741840958595276,
      "learning_rate": 1.592946275400051e-05,
      "loss": 2.3117,
      "step": 23235
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.287458896636963,
      "learning_rate": 1.592913120123318e-05,
      "loss": 2.4798,
      "step": 23236
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0834711790084839,
      "learning_rate": 1.592879963841436e-05,
      "loss": 2.4454,
      "step": 23237
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0600357055664062,
      "learning_rate": 1.5928468065544616e-05,
      "loss": 2.0334,
      "step": 23238
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.035947322845459,
      "learning_rate": 1.592813648262451e-05,
      "loss": 2.2388,
      "step": 23239
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0405969619750977,
      "learning_rate": 1.5927804889654606e-05,
      "loss": 2.6463,
      "step": 23240
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9579251408576965,
      "learning_rate": 1.5927473286635457e-05,
      "loss": 2.5011,
      "step": 23241
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.010689377784729,
      "learning_rate": 1.592714167356763e-05,
      "loss": 2.5584,
      "step": 23242
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1898865699768066,
      "learning_rate": 1.5926810050451697e-05,
      "loss": 2.5186,
      "step": 23243
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.254925012588501,
      "learning_rate": 1.5926478417288204e-05,
      "loss": 2.6655,
      "step": 23244
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.197342872619629,
      "learning_rate": 1.592614677407772e-05,
      "loss": 2.2663,
      "step": 23245
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0286709070205688,
      "learning_rate": 1.592581512082081e-05,
      "loss": 2.3116,
      "step": 23246
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9705555438995361,
      "learning_rate": 1.5925483457518035e-05,
      "loss": 2.548,
      "step": 23247
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9820535778999329,
      "learning_rate": 1.5925151784169954e-05,
      "loss": 2.3712,
      "step": 23248
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0939959287643433,
      "learning_rate": 1.5924820100777132e-05,
      "loss": 2.3794,
      "step": 23249
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0841118097305298,
      "learning_rate": 1.592448840734013e-05,
      "loss": 2.6443,
      "step": 23250
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.042500615119934,
      "learning_rate": 1.5924156703859512e-05,
      "loss": 2.7247,
      "step": 23251
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0110138654708862,
      "learning_rate": 1.592382499033584e-05,
      "loss": 2.5456,
      "step": 23252
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0260703563690186,
      "learning_rate": 1.5923493266769678e-05,
      "loss": 2.4156,
      "step": 23253
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.024276614189148,
      "learning_rate": 1.592316153316158e-05,
      "loss": 2.3175,
      "step": 23254
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0260783433914185,
      "learning_rate": 1.5922829789512118e-05,
      "loss": 2.4388,
      "step": 23255
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.019613265991211,
      "learning_rate": 1.5922498035821853e-05,
      "loss": 2.2043,
      "step": 23256
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9773991703987122,
      "learning_rate": 1.5922166272091342e-05,
      "loss": 2.2536,
      "step": 23257
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0840693712234497,
      "learning_rate": 1.5921834498321152e-05,
      "loss": 2.4439,
      "step": 23258
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.2331533432006836,
      "learning_rate": 1.592150271451184e-05,
      "loss": 2.4661,
      "step": 23259
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0713695287704468,
      "learning_rate": 1.592117092066398e-05,
      "loss": 2.2821,
      "step": 23260
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0987825393676758,
      "learning_rate": 1.592083911677812e-05,
      "loss": 2.3009,
      "step": 23261
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1549360752105713,
      "learning_rate": 1.5920507302854838e-05,
      "loss": 2.4632,
      "step": 23262
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9269607663154602,
      "learning_rate": 1.592017547889468e-05,
      "loss": 2.425,
      "step": 23263
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.3928359746932983,
      "learning_rate": 1.591984364489822e-05,
      "loss": 2.4434,
      "step": 23264
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.055582880973816,
      "learning_rate": 1.5919511800866015e-05,
      "loss": 2.3768,
      "step": 23265
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9618104100227356,
      "learning_rate": 1.5919179946798637e-05,
      "loss": 2.3077,
      "step": 23266
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.2752490043640137,
      "learning_rate": 1.5918848082696635e-05,
      "loss": 2.2007,
      "step": 23267
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1767959594726562,
      "learning_rate": 1.591851620856058e-05,
      "loss": 2.3763,
      "step": 23268
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1132086515426636,
      "learning_rate": 1.5918184324391033e-05,
      "loss": 2.3765,
      "step": 23269
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9418795704841614,
      "learning_rate": 1.5917852430188553e-05,
      "loss": 2.4998,
      "step": 23270
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9785919785499573,
      "learning_rate": 1.591752052595371e-05,
      "loss": 2.3553,
      "step": 23271
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9630037546157837,
      "learning_rate": 1.591718861168706e-05,
      "loss": 2.421,
      "step": 23272
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0774414539337158,
      "learning_rate": 1.5916856687389168e-05,
      "loss": 2.4314,
      "step": 23273
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.2265050411224365,
      "learning_rate": 1.59165247530606e-05,
      "loss": 2.42,
      "step": 23274
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0436198711395264,
      "learning_rate": 1.591619280870191e-05,
      "loss": 2.6367,
      "step": 23275
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.2045878171920776,
      "learning_rate": 1.591586085431367e-05,
      "loss": 2.424,
      "step": 23276
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0364282131195068,
      "learning_rate": 1.591552888989644e-05,
      "loss": 2.5588,
      "step": 23277
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.987155556678772,
      "learning_rate": 1.5915196915450782e-05,
      "loss": 2.2877,
      "step": 23278
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1395790576934814,
      "learning_rate": 1.5914864930977257e-05,
      "loss": 2.4078,
      "step": 23279
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.049812912940979,
      "learning_rate": 1.591453293647643e-05,
      "loss": 2.3597,
      "step": 23280
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1884586811065674,
      "learning_rate": 1.591420093194886e-05,
      "loss": 2.3152,
      "step": 23281
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9820212125778198,
      "learning_rate": 1.591386891739512e-05,
      "loss": 2.2224,
      "step": 23282
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0910136699676514,
      "learning_rate": 1.591353689281576e-05,
      "loss": 2.4909,
      "step": 23283
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.10969078540802,
      "learning_rate": 1.591320485821135e-05,
      "loss": 2.4783,
      "step": 23284
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1562747955322266,
      "learning_rate": 1.5912872813582456e-05,
      "loss": 2.4958,
      "step": 23285
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0281059741973877,
      "learning_rate": 1.5912540758929632e-05,
      "loss": 2.4213,
      "step": 23286
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.2947293519973755,
      "learning_rate": 1.5912208694253447e-05,
      "loss": 2.2299,
      "step": 23287
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1495684385299683,
      "learning_rate": 1.5911876619554464e-05,
      "loss": 2.3471,
      "step": 23288
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.2105785608291626,
      "learning_rate": 1.5911544534833244e-05,
      "loss": 2.5127,
      "step": 23289
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0676531791687012,
      "learning_rate": 1.5911212440090348e-05,
      "loss": 2.5622,
      "step": 23290
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.069166660308838,
      "learning_rate": 1.5910880335326343e-05,
      "loss": 2.5532,
      "step": 23291
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1847805976867676,
      "learning_rate": 1.591054822054179e-05,
      "loss": 2.3411,
      "step": 23292
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0979046821594238,
      "learning_rate": 1.5910216095737253e-05,
      "loss": 2.3976,
      "step": 23293
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1008164882659912,
      "learning_rate": 1.5909883960913297e-05,
      "loss": 2.3136,
      "step": 23294
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0376174449920654,
      "learning_rate": 1.5909551816070478e-05,
      "loss": 2.3897,
      "step": 23295
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1429293155670166,
      "learning_rate": 1.5909219661209367e-05,
      "loss": 2.328,
      "step": 23296
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0665777921676636,
      "learning_rate": 1.5908887496330523e-05,
      "loss": 2.3454,
      "step": 23297
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.990010678768158,
      "learning_rate": 1.5908555321434508e-05,
      "loss": 2.5225,
      "step": 23298
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0629559755325317,
      "learning_rate": 1.590822313652189e-05,
      "loss": 2.2714,
      "step": 23299
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9658728241920471,
      "learning_rate": 1.590789094159323e-05,
      "loss": 2.5005,
      "step": 23300
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.01566481590271,
      "learning_rate": 1.5907558736649086e-05,
      "loss": 2.2333,
      "step": 23301
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0382189750671387,
      "learning_rate": 1.5907226521690025e-05,
      "loss": 2.4028,
      "step": 23302
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9599143862724304,
      "learning_rate": 1.5906894296716616e-05,
      "loss": 2.4251,
      "step": 23303
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.091657280921936,
      "learning_rate": 1.590656206172941e-05,
      "loss": 2.4914,
      "step": 23304
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9982537031173706,
      "learning_rate": 1.5906229816728985e-05,
      "loss": 2.4478,
      "step": 23305
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9751210808753967,
      "learning_rate": 1.590589756171589e-05,
      "loss": 2.4663,
      "step": 23306
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9911282658576965,
      "learning_rate": 1.5905565296690698e-05,
      "loss": 2.4448,
      "step": 23307
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1829172372817993,
      "learning_rate": 1.590523302165397e-05,
      "loss": 2.4135,
      "step": 23308
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0916780233383179,
      "learning_rate": 1.5904900736606266e-05,
      "loss": 2.3492,
      "step": 23309
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1117455959320068,
      "learning_rate": 1.590456844154815e-05,
      "loss": 2.1694,
      "step": 23310
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.079970121383667,
      "learning_rate": 1.5904236136480187e-05,
      "loss": 2.4078,
      "step": 23311
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0046486854553223,
      "learning_rate": 1.5903903821402943e-05,
      "loss": 2.4884,
      "step": 23312
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0510402917861938,
      "learning_rate": 1.590357149631698e-05,
      "loss": 2.3559,
      "step": 23313
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0448366403579712,
      "learning_rate": 1.5903239161222856e-05,
      "loss": 2.3901,
      "step": 23314
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1739403009414673,
      "learning_rate": 1.5902906816121137e-05,
      "loss": 2.4202,
      "step": 23315
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.153135061264038,
      "learning_rate": 1.590257446101239e-05,
      "loss": 2.3369,
      "step": 23316
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0830262899398804,
      "learning_rate": 1.5902242095897177e-05,
      "loss": 2.2615,
      "step": 23317
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.052417278289795,
      "learning_rate": 1.5901909720776058e-05,
      "loss": 2.5159,
      "step": 23318
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.044471263885498,
      "learning_rate": 1.5901577335649603e-05,
      "loss": 2.2781,
      "step": 23319
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9950944185256958,
      "learning_rate": 1.5901244940518368e-05,
      "loss": 2.3411,
      "step": 23320
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0760433673858643,
      "learning_rate": 1.5900912535382924e-05,
      "loss": 2.3211,
      "step": 23321
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0601764917373657,
      "learning_rate": 1.590058012024383e-05,
      "loss": 2.5118,
      "step": 23322
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1041799783706665,
      "learning_rate": 1.5900247695101646e-05,
      "loss": 2.5381,
      "step": 23323
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0858415365219116,
      "learning_rate": 1.589991525995694e-05,
      "loss": 2.4843,
      "step": 23324
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1195980310440063,
      "learning_rate": 1.5899582814810276e-05,
      "loss": 2.3997,
      "step": 23325
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0292034149169922,
      "learning_rate": 1.5899250359662223e-05,
      "loss": 2.4376,
      "step": 23326
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.4092580080032349,
      "learning_rate": 1.589891789451333e-05,
      "loss": 2.4073,
      "step": 23327
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0105270147323608,
      "learning_rate": 1.5898585419364175e-05,
      "loss": 2.5257,
      "step": 23328
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9965664744377136,
      "learning_rate": 1.589825293421531e-05,
      "loss": 2.5618,
      "step": 23329
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.983049750328064,
      "learning_rate": 1.5897920439067308e-05,
      "loss": 2.3033,
      "step": 23330
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9840854406356812,
      "learning_rate": 1.589758793392073e-05,
      "loss": 2.4939,
      "step": 23331
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1322100162506104,
      "learning_rate": 1.589725541877614e-05,
      "loss": 2.4042,
      "step": 23332
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.017087459564209,
      "learning_rate": 1.58969228936341e-05,
      "loss": 2.3447,
      "step": 23333
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0876508951187134,
      "learning_rate": 1.5896590358495167e-05,
      "loss": 2.2053,
      "step": 23334
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1151560544967651,
      "learning_rate": 1.5896257813359916e-05,
      "loss": 2.2902,
      "step": 23335
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0273959636688232,
      "learning_rate": 1.5895925258228907e-05,
      "loss": 2.495,
      "step": 23336
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.107996940612793,
      "learning_rate": 1.5895592693102702e-05,
      "loss": 2.5336,
      "step": 23337
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9739854335784912,
      "learning_rate": 1.589526011798187e-05,
      "loss": 2.3742,
      "step": 23338
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0419723987579346,
      "learning_rate": 1.589492753286697e-05,
      "loss": 2.465,
      "step": 23339
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9807815551757812,
      "learning_rate": 1.5894594937758564e-05,
      "loss": 2.5703,
      "step": 23340
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1014983654022217,
      "learning_rate": 1.589426233265722e-05,
      "loss": 2.3977,
      "step": 23341
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9964693188667297,
      "learning_rate": 1.58939297175635e-05,
      "loss": 2.3666,
      "step": 23342
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9813518524169922,
      "learning_rate": 1.589359709247797e-05,
      "loss": 2.226,
      "step": 23343
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0476233959197998,
      "learning_rate": 1.589326445740119e-05,
      "loss": 2.2461,
      "step": 23344
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1196542978286743,
      "learning_rate": 1.589293181233373e-05,
      "loss": 2.5884,
      "step": 23345
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0286606550216675,
      "learning_rate": 1.5892599157276144e-05,
      "loss": 2.4077,
      "step": 23346
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0582525730133057,
      "learning_rate": 1.589226649222901e-05,
      "loss": 2.5059,
      "step": 23347
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.3336509466171265,
      "learning_rate": 1.5891933817192877e-05,
      "loss": 2.3833,
      "step": 23348
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1341861486434937,
      "learning_rate": 1.589160113216832e-05,
      "loss": 2.4942,
      "step": 23349
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1075583696365356,
      "learning_rate": 1.5891268437155897e-05,
      "loss": 2.2726,
      "step": 23350
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1038672924041748,
      "learning_rate": 1.5890935732156174e-05,
      "loss": 2.3604,
      "step": 23351
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9893890619277954,
      "learning_rate": 1.5890603017169715e-05,
      "loss": 2.408,
      "step": 23352
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.2666252851486206,
      "learning_rate": 1.5890270292197084e-05,
      "loss": 2.4971,
      "step": 23353
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9525572061538696,
      "learning_rate": 1.5889937557238845e-05,
      "loss": 2.4112,
      "step": 23354
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.067856788635254,
      "learning_rate": 1.5889604812295562e-05,
      "loss": 2.3624,
      "step": 23355
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1294447183609009,
      "learning_rate": 1.58892720573678e-05,
      "loss": 2.4668,
      "step": 23356
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1261271238327026,
      "learning_rate": 1.5888939292456122e-05,
      "loss": 2.4835,
      "step": 23357
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0715256929397583,
      "learning_rate": 1.588860651756109e-05,
      "loss": 2.3597,
      "step": 23358
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1348739862442017,
      "learning_rate": 1.5888273732683277e-05,
      "loss": 2.4715,
      "step": 23359
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.131168246269226,
      "learning_rate": 1.5887940937823238e-05,
      "loss": 2.3381,
      "step": 23360
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9772473573684692,
      "learning_rate": 1.588760813298154e-05,
      "loss": 2.3673,
      "step": 23361
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9180372357368469,
      "learning_rate": 1.5887275318158746e-05,
      "loss": 2.5188,
      "step": 23362
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.986332893371582,
      "learning_rate": 1.588694249335542e-05,
      "loss": 2.6824,
      "step": 23363
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0198822021484375,
      "learning_rate": 1.588660965857213e-05,
      "loss": 2.3872,
      "step": 23364
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0997095108032227,
      "learning_rate": 1.5886276813809437e-05,
      "loss": 2.3693,
      "step": 23365
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1144722700119019,
      "learning_rate": 1.5885943959067905e-05,
      "loss": 2.3888,
      "step": 23366
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.5524342060089111,
      "learning_rate": 1.5885611094348102e-05,
      "loss": 2.2757,
      "step": 23367
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1005582809448242,
      "learning_rate": 1.5885278219650587e-05,
      "loss": 2.4467,
      "step": 23368
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0667918920516968,
      "learning_rate": 1.5884945334975927e-05,
      "loss": 2.5096,
      "step": 23369
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1868878602981567,
      "learning_rate": 1.5884612440324687e-05,
      "loss": 2.8463,
      "step": 23370
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0812915563583374,
      "learning_rate": 1.588427953569743e-05,
      "loss": 2.2848,
      "step": 23371
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0557128190994263,
      "learning_rate": 1.5883946621094726e-05,
      "loss": 2.4337,
      "step": 23372
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.2731878757476807,
      "learning_rate": 1.5883613696517128e-05,
      "loss": 2.4777,
      "step": 23373
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1039681434631348,
      "learning_rate": 1.588328076196521e-05,
      "loss": 2.3483,
      "step": 23374
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1033259630203247,
      "learning_rate": 1.5882947817439532e-05,
      "loss": 2.4503,
      "step": 23375
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0715140104293823,
      "learning_rate": 1.588261486294066e-05,
      "loss": 2.4936,
      "step": 23376
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1436647176742554,
      "learning_rate": 1.5882281898469162e-05,
      "loss": 2.6054,
      "step": 23377
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0190833806991577,
      "learning_rate": 1.5881948924025595e-05,
      "loss": 2.4146,
      "step": 23378
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1637911796569824,
      "learning_rate": 1.5881615939610523e-05,
      "loss": 2.2998,
      "step": 23379
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.045180082321167,
      "learning_rate": 1.588128294522452e-05,
      "loss": 2.4239,
      "step": 23380
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0736308097839355,
      "learning_rate": 1.5880949940868148e-05,
      "loss": 2.3524,
      "step": 23381
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0923367738723755,
      "learning_rate": 1.588061692654196e-05,
      "loss": 2.5762,
      "step": 23382
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.010817289352417,
      "learning_rate": 1.5880283902246534e-05,
      "loss": 2.2034,
      "step": 23383
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0812406539916992,
      "learning_rate": 1.5879950867982427e-05,
      "loss": 2.6041,
      "step": 23384
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0351725816726685,
      "learning_rate": 1.5879617823750213e-05,
      "loss": 2.3695,
      "step": 23385
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0131070613861084,
      "learning_rate": 1.5879284769550444e-05,
      "loss": 2.3006,
      "step": 23386
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9560803771018982,
      "learning_rate": 1.5878951705383693e-05,
      "loss": 2.4319,
      "step": 23387
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.2986842393875122,
      "learning_rate": 1.587861863125052e-05,
      "loss": 2.4005,
      "step": 23388
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.2021958827972412,
      "learning_rate": 1.5878285547151492e-05,
      "loss": 2.3101,
      "step": 23389
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1846662759780884,
      "learning_rate": 1.5877952453087175e-05,
      "loss": 2.5396,
      "step": 23390
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0335729122161865,
      "learning_rate": 1.5877619349058134e-05,
      "loss": 2.2203,
      "step": 23391
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.969176173210144,
      "learning_rate": 1.5877286235064927e-05,
      "loss": 2.7896,
      "step": 23392
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.108529806137085,
      "learning_rate": 1.587695311110813e-05,
      "loss": 2.1469,
      "step": 23393
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.954560399055481,
      "learning_rate": 1.5876619977188296e-05,
      "loss": 2.3244,
      "step": 23394
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.097164273262024,
      "learning_rate": 1.5876286833305993e-05,
      "loss": 2.5475,
      "step": 23395
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.134584665298462,
      "learning_rate": 1.5875953679461793e-05,
      "loss": 2.6427,
      "step": 23396
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9455372095108032,
      "learning_rate": 1.5875620515656252e-05,
      "loss": 2.2961,
      "step": 23397
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0375062227249146,
      "learning_rate": 1.5875287341889943e-05,
      "loss": 2.2875,
      "step": 23398
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0633208751678467,
      "learning_rate": 1.5874954158163424e-05,
      "loss": 2.5796,
      "step": 23399
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1076126098632812,
      "learning_rate": 1.5874620964477257e-05,
      "loss": 2.3879,
      "step": 23400
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0511573553085327,
      "learning_rate": 1.587428776083202e-05,
      "loss": 2.3972,
      "step": 23401
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1818509101867676,
      "learning_rate": 1.5873954547228266e-05,
      "loss": 2.2238,
      "step": 23402
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1135773658752441,
      "learning_rate": 1.587362132366656e-05,
      "loss": 2.5162,
      "step": 23403
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0107524394989014,
      "learning_rate": 1.5873288090147477e-05,
      "loss": 2.2976,
      "step": 23404
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1266707181930542,
      "learning_rate": 1.5872954846671572e-05,
      "loss": 2.3352,
      "step": 23405
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0758272409439087,
      "learning_rate": 1.5872621593239418e-05,
      "loss": 2.3501,
      "step": 23406
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0511428117752075,
      "learning_rate": 1.587228832985157e-05,
      "loss": 2.5741,
      "step": 23407
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1544835567474365,
      "learning_rate": 1.5871955056508604e-05,
      "loss": 2.4301,
      "step": 23408
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1221117973327637,
      "learning_rate": 1.5871621773211075e-05,
      "loss": 2.3521,
      "step": 23409
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0797611474990845,
      "learning_rate": 1.5871288479959553e-05,
      "loss": 2.5586,
      "step": 23410
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.3007537126541138,
      "learning_rate": 1.58709551767546e-05,
      "loss": 2.6174,
      "step": 23411
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.5548443794250488,
      "learning_rate": 1.587062186359679e-05,
      "loss": 2.2459,
      "step": 23412
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9738179445266724,
      "learning_rate": 1.5870288540486675e-05,
      "loss": 2.3469,
      "step": 23413
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0989532470703125,
      "learning_rate": 1.586995520742483e-05,
      "loss": 2.523,
      "step": 23414
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1294481754302979,
      "learning_rate": 1.5869621864411816e-05,
      "loss": 2.5181,
      "step": 23415
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0033241510391235,
      "learning_rate": 1.58692885114482e-05,
      "loss": 2.476,
      "step": 23416
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.053589940071106,
      "learning_rate": 1.5868955148534543e-05,
      "loss": 2.5039,
      "step": 23417
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0903761386871338,
      "learning_rate": 1.5868621775671414e-05,
      "loss": 2.4362,
      "step": 23418
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0018706321716309,
      "learning_rate": 1.586828839285938e-05,
      "loss": 2.1893,
      "step": 23419
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1120150089263916,
      "learning_rate": 1.5867955000099e-05,
      "loss": 2.6191,
      "step": 23420
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0537523031234741,
      "learning_rate": 1.586762159739084e-05,
      "loss": 2.2541,
      "step": 23421
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.196123719215393,
      "learning_rate": 1.5867288184735474e-05,
      "loss": 2.5533,
      "step": 23422
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0740491151809692,
      "learning_rate": 1.5866954762133456e-05,
      "loss": 2.404,
      "step": 23423
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9983194470405579,
      "learning_rate": 1.586662132958536e-05,
      "loss": 2.3528,
      "step": 23424
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0074745416641235,
      "learning_rate": 1.5866287887091746e-05,
      "loss": 2.2294,
      "step": 23425
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1987557411193848,
      "learning_rate": 1.5865954434653183e-05,
      "loss": 2.3485,
      "step": 23426
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0954581499099731,
      "learning_rate": 1.5865620972270232e-05,
      "loss": 2.3302,
      "step": 23427
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9760012030601501,
      "learning_rate": 1.5865287499943457e-05,
      "loss": 2.3401,
      "step": 23428
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9730210900306702,
      "learning_rate": 1.5864954017673433e-05,
      "loss": 2.6725,
      "step": 23429
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9349336624145508,
      "learning_rate": 1.5864620525460716e-05,
      "loss": 2.4384,
      "step": 23430
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.170257329940796,
      "learning_rate": 1.5864287023305872e-05,
      "loss": 2.1573,
      "step": 23431
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0581222772598267,
      "learning_rate": 1.5863953511209473e-05,
      "loss": 2.4585,
      "step": 23432
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0207328796386719,
      "learning_rate": 1.586361998917208e-05,
      "loss": 2.2323,
      "step": 23433
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0156269073486328,
      "learning_rate": 1.5863286457194256e-05,
      "loss": 2.4281,
      "step": 23434
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.973897397518158,
      "learning_rate": 1.586295291527657e-05,
      "loss": 2.231,
      "step": 23435
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1158485412597656,
      "learning_rate": 1.5862619363419586e-05,
      "loss": 2.4996,
      "step": 23436
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9968024492263794,
      "learning_rate": 1.5862285801623868e-05,
      "loss": 2.2234,
      "step": 23437
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.017985463142395,
      "learning_rate": 1.586195222988999e-05,
      "loss": 2.577,
      "step": 23438
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0265382528305054,
      "learning_rate": 1.5861618648218505e-05,
      "loss": 2.4699,
      "step": 23439
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0151481628417969,
      "learning_rate": 1.5861285056609986e-05,
      "loss": 2.3302,
      "step": 23440
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0007531642913818,
      "learning_rate": 1.5860951455064997e-05,
      "loss": 2.2719,
      "step": 23441
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9756132960319519,
      "learning_rate": 1.5860617843584102e-05,
      "loss": 2.2483,
      "step": 23442
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9815118908882141,
      "learning_rate": 1.586028422216787e-05,
      "loss": 2.264,
      "step": 23443
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9945678114891052,
      "learning_rate": 1.5859950590816865e-05,
      "loss": 2.4788,
      "step": 23444
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1067668199539185,
      "learning_rate": 1.5859616949531652e-05,
      "loss": 2.4673,
      "step": 23445
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1248995065689087,
      "learning_rate": 1.5859283298312796e-05,
      "loss": 2.5503,
      "step": 23446
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1380504369735718,
      "learning_rate": 1.5858949637160862e-05,
      "loss": 2.5224,
      "step": 23447
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.3511099815368652,
      "learning_rate": 1.585861596607642e-05,
      "loss": 2.2432,
      "step": 23448
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.101069450378418,
      "learning_rate": 1.585828228506003e-05,
      "loss": 2.4527,
      "step": 23449
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.2225359678268433,
      "learning_rate": 1.585794859411226e-05,
      "loss": 2.3375,
      "step": 23450
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.2319916486740112,
      "learning_rate": 1.585761489323368e-05,
      "loss": 2.3651,
      "step": 23451
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0320860147476196,
      "learning_rate": 1.5857281182424852e-05,
      "loss": 2.5309,
      "step": 23452
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9645834565162659,
      "learning_rate": 1.585694746168634e-05,
      "loss": 2.3359,
      "step": 23453
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9571994543075562,
      "learning_rate": 1.585661373101871e-05,
      "loss": 2.4524,
      "step": 23454
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0429755449295044,
      "learning_rate": 1.585627999042253e-05,
      "loss": 2.2603,
      "step": 23455
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0930472612380981,
      "learning_rate": 1.5855946239898363e-05,
      "loss": 2.357,
      "step": 23456
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0869001150131226,
      "learning_rate": 1.585561247944678e-05,
      "loss": 2.3606,
      "step": 23457
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0609468221664429,
      "learning_rate": 1.5855278709068343e-05,
      "loss": 2.2446,
      "step": 23458
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.053519368171692,
      "learning_rate": 1.5854944928763614e-05,
      "loss": 2.4721,
      "step": 23459
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9780715107917786,
      "learning_rate": 1.5854611138533166e-05,
      "loss": 2.3556,
      "step": 23460
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0431915521621704,
      "learning_rate": 1.5854277338377562e-05,
      "loss": 2.4485,
      "step": 23461
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0496017932891846,
      "learning_rate": 1.5853943528297368e-05,
      "loss": 2.3343,
      "step": 23462
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0480239391326904,
      "learning_rate": 1.5853609708293153e-05,
      "loss": 2.5335,
      "step": 23463
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1591506004333496,
      "learning_rate": 1.5853275878365476e-05,
      "loss": 2.3853,
      "step": 23464
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9538268446922302,
      "learning_rate": 1.5852942038514907e-05,
      "loss": 2.2649,
      "step": 23465
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.211371898651123,
      "learning_rate": 1.5852608188742014e-05,
      "loss": 2.3267,
      "step": 23466
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0528993606567383,
      "learning_rate": 1.5852274329047356e-05,
      "loss": 2.4811,
      "step": 23467
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0597914457321167,
      "learning_rate": 1.5851940459431507e-05,
      "loss": 2.3363,
      "step": 23468
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0489217042922974,
      "learning_rate": 1.5851606579895028e-05,
      "loss": 2.2784,
      "step": 23469
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0800403356552124,
      "learning_rate": 1.5851272690438485e-05,
      "loss": 2.5266,
      "step": 23470
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0593583583831787,
      "learning_rate": 1.5850938791062455e-05,
      "loss": 2.7067,
      "step": 23471
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0374906063079834,
      "learning_rate": 1.5850604881767483e-05,
      "loss": 2.4516,
      "step": 23472
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0101842880249023,
      "learning_rate": 1.5850270962554154e-05,
      "loss": 2.0842,
      "step": 23473
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0824602842330933,
      "learning_rate": 1.5849937033423022e-05,
      "loss": 2.5917,
      "step": 23474
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.078011393547058,
      "learning_rate": 1.5849603094374662e-05,
      "loss": 2.3673,
      "step": 23475
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1062734127044678,
      "learning_rate": 1.584926914540963e-05,
      "loss": 2.4666,
      "step": 23476
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.2614502906799316,
      "learning_rate": 1.5848935186528506e-05,
      "loss": 2.4025,
      "step": 23477
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0386511087417603,
      "learning_rate": 1.5848601217731844e-05,
      "loss": 2.4269,
      "step": 23478
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.465178370475769,
      "learning_rate": 1.5848267239020214e-05,
      "loss": 2.5111,
      "step": 23479
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0660748481750488,
      "learning_rate": 1.5847933250394186e-05,
      "loss": 2.2564,
      "step": 23480
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0156093835830688,
      "learning_rate": 1.584759925185432e-05,
      "loss": 2.4202,
      "step": 23481
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0936180353164673,
      "learning_rate": 1.5847265243401183e-05,
      "loss": 2.4636,
      "step": 23482
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9813528060913086,
      "learning_rate": 1.5846931225035347e-05,
      "loss": 2.4124,
      "step": 23483
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0302989482879639,
      "learning_rate": 1.5846597196757375e-05,
      "loss": 2.2678,
      "step": 23484
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9567240476608276,
      "learning_rate": 1.584626315856783e-05,
      "loss": 2.2336,
      "step": 23485
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.026126742362976,
      "learning_rate": 1.5845929110467282e-05,
      "loss": 2.3827,
      "step": 23486
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.144897222518921,
      "learning_rate": 1.58455950524563e-05,
      "loss": 2.2446,
      "step": 23487
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.4450418949127197,
      "learning_rate": 1.584526098453544e-05,
      "loss": 2.3533,
      "step": 23488
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0093897581100464,
      "learning_rate": 1.584492690670528e-05,
      "loss": 2.3771,
      "step": 23489
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9581822752952576,
      "learning_rate": 1.584459281896638e-05,
      "loss": 2.4492,
      "step": 23490
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9578421711921692,
      "learning_rate": 1.5844258721319307e-05,
      "loss": 2.3714,
      "step": 23491
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.100836157798767,
      "learning_rate": 1.584392461376463e-05,
      "loss": 2.4403,
      "step": 23492
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0232467651367188,
      "learning_rate": 1.584359049630291e-05,
      "loss": 2.2415,
      "step": 23493
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1224775314331055,
      "learning_rate": 1.5843256368934722e-05,
      "loss": 2.52,
      "step": 23494
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0927907228469849,
      "learning_rate": 1.584292223166063e-05,
      "loss": 2.472,
      "step": 23495
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0648808479309082,
      "learning_rate": 1.584258808448119e-05,
      "loss": 2.3145,
      "step": 23496
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.007025957107544,
      "learning_rate": 1.5842253927396977e-05,
      "loss": 2.1482,
      "step": 23497
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.000165343284607,
      "learning_rate": 1.584191976040856e-05,
      "loss": 2.5111,
      "step": 23498
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0345218181610107,
      "learning_rate": 1.5841585583516505e-05,
      "loss": 2.3929,
      "step": 23499
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.985310971736908,
      "learning_rate": 1.584125139672137e-05,
      "loss": 2.5602,
      "step": 23500
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0110493898391724,
      "learning_rate": 1.584091720002373e-05,
      "loss": 2.1498,
      "step": 23501
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9831143617630005,
      "learning_rate": 1.5840582993424147e-05,
      "loss": 2.3335,
      "step": 23502
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9728401303291321,
      "learning_rate": 1.5840248776923193e-05,
      "loss": 2.4389,
      "step": 23503
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0764501094818115,
      "learning_rate": 1.583991455052143e-05,
      "loss": 2.431,
      "step": 23504
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0508465766906738,
      "learning_rate": 1.5839580314219423e-05,
      "loss": 2.4806,
      "step": 23505
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1131281852722168,
      "learning_rate": 1.5839246068017747e-05,
      "loss": 2.388,
      "step": 23506
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0631824731826782,
      "learning_rate": 1.583891181191696e-05,
      "loss": 2.4982,
      "step": 23507
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1886793375015259,
      "learning_rate": 1.5838577545917632e-05,
      "loss": 2.5895,
      "step": 23508
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.130560278892517,
      "learning_rate": 1.5838243270020325e-05,
      "loss": 2.4948,
      "step": 23509
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0005550384521484,
      "learning_rate": 1.5837908984225615e-05,
      "loss": 2.3158,
      "step": 23510
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1494264602661133,
      "learning_rate": 1.5837574688534063e-05,
      "loss": 2.2888,
      "step": 23511
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0802404880523682,
      "learning_rate": 1.5837240382946236e-05,
      "loss": 2.5958,
      "step": 23512
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.02457594871521,
      "learning_rate": 1.5836906067462702e-05,
      "loss": 2.2902,
      "step": 23513
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.093187689781189,
      "learning_rate": 1.5836571742084024e-05,
      "loss": 2.4176,
      "step": 23514
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9417826533317566,
      "learning_rate": 1.5836237406810775e-05,
      "loss": 2.4282,
      "step": 23515
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.098689079284668,
      "learning_rate": 1.5835903061643517e-05,
      "loss": 2.4927,
      "step": 23516
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9854452013969421,
      "learning_rate": 1.5835568706582817e-05,
      "loss": 2.6518,
      "step": 23517
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0438404083251953,
      "learning_rate": 1.5835234341629245e-05,
      "loss": 2.5338,
      "step": 23518
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1070785522460938,
      "learning_rate": 1.5834899966783366e-05,
      "loss": 2.4372,
      "step": 23519
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0523351430892944,
      "learning_rate": 1.5834565582045747e-05,
      "loss": 2.32,
      "step": 23520
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0245634317398071,
      "learning_rate": 1.5834231187416956e-05,
      "loss": 2.3466,
      "step": 23521
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.101410150527954,
      "learning_rate": 1.5833896782897554e-05,
      "loss": 2.5813,
      "step": 23522
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0665584802627563,
      "learning_rate": 1.5833562368488117e-05,
      "loss": 2.2779,
      "step": 23523
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0061947107315063,
      "learning_rate": 1.5833227944189205e-05,
      "loss": 2.4193,
      "step": 23524
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0718084573745728,
      "learning_rate": 1.5832893510001387e-05,
      "loss": 2.6092,
      "step": 23525
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0134108066558838,
      "learning_rate": 1.5832559065925234e-05,
      "loss": 2.4843,
      "step": 23526
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0379910469055176,
      "learning_rate": 1.5832224611961304e-05,
      "loss": 2.5746,
      "step": 23527
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.968446671962738,
      "learning_rate": 1.583189014811017e-05,
      "loss": 2.321,
      "step": 23528
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1479439735412598,
      "learning_rate": 1.5831555674372402e-05,
      "loss": 2.4814,
      "step": 23529
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0194284915924072,
      "learning_rate": 1.5831221190748563e-05,
      "loss": 2.4586,
      "step": 23530
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1684439182281494,
      "learning_rate": 1.583088669723922e-05,
      "loss": 2.4892,
      "step": 23531
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9883757829666138,
      "learning_rate": 1.5830552193844936e-05,
      "loss": 2.4137,
      "step": 23532
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0238898992538452,
      "learning_rate": 1.5830217680566287e-05,
      "loss": 2.4695,
      "step": 23533
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1946001052856445,
      "learning_rate": 1.582988315740383e-05,
      "loss": 2.5986,
      "step": 23534
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1826224327087402,
      "learning_rate": 1.5829548624358146e-05,
      "loss": 2.2879,
      "step": 23535
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1091248989105225,
      "learning_rate": 1.582921408142979e-05,
      "loss": 2.5048,
      "step": 23536
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0246747732162476,
      "learning_rate": 1.5828879528619333e-05,
      "loss": 2.4491,
      "step": 23537
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0072928667068481,
      "learning_rate": 1.582854496592734e-05,
      "loss": 2.3852,
      "step": 23538
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.3085476160049438,
      "learning_rate": 1.5828210393354382e-05,
      "loss": 2.2332,
      "step": 23539
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9808730483055115,
      "learning_rate": 1.5827875810901025e-05,
      "loss": 2.2596,
      "step": 23540
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0942270755767822,
      "learning_rate": 1.5827541218567836e-05,
      "loss": 2.5541,
      "step": 23541
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.005165457725525,
      "learning_rate": 1.582720661635538e-05,
      "loss": 2.4716,
      "step": 23542
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1510380506515503,
      "learning_rate": 1.5826872004264227e-05,
      "loss": 2.5545,
      "step": 23543
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0278054475784302,
      "learning_rate": 1.582653738229494e-05,
      "loss": 2.5814,
      "step": 23544
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1313607692718506,
      "learning_rate": 1.5826202750448098e-05,
      "loss": 2.4117,
      "step": 23545
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.285022497177124,
      "learning_rate": 1.5825868108724254e-05,
      "loss": 2.5419,
      "step": 23546
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9523142576217651,
      "learning_rate": 1.5825533457123983e-05,
      "loss": 2.3915,
      "step": 23547
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1391797065734863,
      "learning_rate": 1.5825198795647846e-05,
      "loss": 2.3814,
      "step": 23548
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1554973125457764,
      "learning_rate": 1.582486412429642e-05,
      "loss": 2.3406,
      "step": 23549
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9890043139457703,
      "learning_rate": 1.582452944307027e-05,
      "loss": 2.313,
      "step": 23550
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0379273891448975,
      "learning_rate": 1.5824194751969955e-05,
      "loss": 2.4747,
      "step": 23551
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9690259099006653,
      "learning_rate": 1.582386005099605e-05,
      "loss": 2.1947,
      "step": 23552
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0357822179794312,
      "learning_rate": 1.5823525340149123e-05,
      "loss": 2.2796,
      "step": 23553
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0437943935394287,
      "learning_rate": 1.5823190619429733e-05,
      "loss": 2.5085,
      "step": 23554
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9527068138122559,
      "learning_rate": 1.5822855888838456e-05,
      "loss": 2.1764,
      "step": 23555
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0223902463912964,
      "learning_rate": 1.5822521148375856e-05,
      "loss": 2.2806,
      "step": 23556
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9455779194831848,
      "learning_rate": 1.5822186398042507e-05,
      "loss": 2.3489,
      "step": 23557
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1202560663223267,
      "learning_rate": 1.5821851637838965e-05,
      "loss": 2.5201,
      "step": 23558
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1399651765823364,
      "learning_rate": 1.5821516867765805e-05,
      "loss": 2.3213,
      "step": 23559
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.05216383934021,
      "learning_rate": 1.5821182087823592e-05,
      "loss": 2.4281,
      "step": 23560
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1387596130371094,
      "learning_rate": 1.5820847298012894e-05,
      "loss": 2.6358,
      "step": 23561
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0192548036575317,
      "learning_rate": 1.5820512498334282e-05,
      "loss": 2.4285,
      "step": 23562
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1454719305038452,
      "learning_rate": 1.582017768878832e-05,
      "loss": 2.3107,
      "step": 23563
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1701875925064087,
      "learning_rate": 1.581984286937557e-05,
      "loss": 2.3637,
      "step": 23564
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1551133394241333,
      "learning_rate": 1.581950804009661e-05,
      "loss": 2.563,
      "step": 23565
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9626572132110596,
      "learning_rate": 1.5819173200952006e-05,
      "loss": 2.3952,
      "step": 23566
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9117618799209595,
      "learning_rate": 1.581883835194232e-05,
      "loss": 2.4219,
      "step": 23567
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0935084819793701,
      "learning_rate": 1.5818503493068124e-05,
      "loss": 2.473,
      "step": 23568
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0787384510040283,
      "learning_rate": 1.5818168624329984e-05,
      "loss": 2.4969,
      "step": 23569
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0882409811019897,
      "learning_rate": 1.5817833745728467e-05,
      "loss": 2.4301,
      "step": 23570
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1213040351867676,
      "learning_rate": 1.5817498857264143e-05,
      "loss": 2.4536,
      "step": 23571
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.3444323539733887,
      "learning_rate": 1.5817163958937577e-05,
      "loss": 2.6016,
      "step": 23572
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1633625030517578,
      "learning_rate": 1.5816829050749335e-05,
      "loss": 2.3851,
      "step": 23573
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0917247533798218,
      "learning_rate": 1.5816494132699994e-05,
      "loss": 2.3222,
      "step": 23574
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1931549310684204,
      "learning_rate": 1.581615920479011e-05,
      "loss": 2.4379,
      "step": 23575
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1134135723114014,
      "learning_rate": 1.5815824267020263e-05,
      "loss": 2.5237,
      "step": 23576
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0449217557907104,
      "learning_rate": 1.581548931939101e-05,
      "loss": 2.6467,
      "step": 23577
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1681174039840698,
      "learning_rate": 1.5815154361902926e-05,
      "loss": 2.4861,
      "step": 23578
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1870783567428589,
      "learning_rate": 1.5814819394556575e-05,
      "loss": 2.5159,
      "step": 23579
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.066234827041626,
      "learning_rate": 1.5814484417352522e-05,
      "loss": 2.2642,
      "step": 23580
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.003672480583191,
      "learning_rate": 1.581414943029134e-05,
      "loss": 2.3093,
      "step": 23581
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0419764518737793,
      "learning_rate": 1.58138144333736e-05,
      "loss": 2.243,
      "step": 23582
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9941858649253845,
      "learning_rate": 1.5813479426599863e-05,
      "loss": 2.3735,
      "step": 23583
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0021892786026,
      "learning_rate": 1.58131444099707e-05,
      "loss": 2.4243,
      "step": 23584
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1355750560760498,
      "learning_rate": 1.581280938348668e-05,
      "loss": 2.24,
      "step": 23585
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0823148488998413,
      "learning_rate": 1.5812474347148366e-05,
      "loss": 2.6689,
      "step": 23586
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9958847761154175,
      "learning_rate": 1.581213930095633e-05,
      "loss": 2.5492,
      "step": 23587
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0529568195343018,
      "learning_rate": 1.5811804244911138e-05,
      "loss": 2.4534,
      "step": 23588
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0248501300811768,
      "learning_rate": 1.5811469179013364e-05,
      "loss": 2.329,
      "step": 23589
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.054461121559143,
      "learning_rate": 1.5811134103263567e-05,
      "loss": 2.3938,
      "step": 23590
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.047289252281189,
      "learning_rate": 1.581079901766232e-05,
      "loss": 2.5158,
      "step": 23591
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9815401434898376,
      "learning_rate": 1.581046392221019e-05,
      "loss": 2.6115,
      "step": 23592
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1472643613815308,
      "learning_rate": 1.5810128816907746e-05,
      "loss": 2.3227,
      "step": 23593
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9921955466270447,
      "learning_rate": 1.5809793701755558e-05,
      "loss": 2.3893,
      "step": 23594
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.3432213068008423,
      "learning_rate": 1.580945857675419e-05,
      "loss": 2.5631,
      "step": 23595
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.2918044328689575,
      "learning_rate": 1.5809123441904214e-05,
      "loss": 2.2056,
      "step": 23596
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.996932864189148,
      "learning_rate": 1.5808788297206193e-05,
      "loss": 2.4316,
      "step": 23597
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1123825311660767,
      "learning_rate": 1.5808453142660702e-05,
      "loss": 2.5579,
      "step": 23598
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.037676215171814,
      "learning_rate": 1.58081179782683e-05,
      "loss": 2.4687,
      "step": 23599
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.2409049272537231,
      "learning_rate": 1.5807782804029564e-05,
      "loss": 2.5225,
      "step": 23600
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9863846302032471,
      "learning_rate": 1.5807447619945056e-05,
      "loss": 2.3594,
      "step": 23601
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.193156361579895,
      "learning_rate": 1.5807112426015352e-05,
      "loss": 2.1891,
      "step": 23602
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0860143899917603,
      "learning_rate": 1.580677722224101e-05,
      "loss": 2.4375,
      "step": 23603
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1455800533294678,
      "learning_rate": 1.5806442008622606e-05,
      "loss": 2.7017,
      "step": 23604
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0369863510131836,
      "learning_rate": 1.5806106785160708e-05,
      "loss": 2.2984,
      "step": 23605
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0215389728546143,
      "learning_rate": 1.580577155185588e-05,
      "loss": 2.5184,
      "step": 23606
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1848106384277344,
      "learning_rate": 1.580543630870869e-05,
      "loss": 2.3745,
      "step": 23607
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1584086418151855,
      "learning_rate": 1.5805101055719713e-05,
      "loss": 2.746,
      "step": 23608
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.014805555343628,
      "learning_rate": 1.5804765792889513e-05,
      "loss": 2.5074,
      "step": 23609
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0461119413375854,
      "learning_rate": 1.5804430520218654e-05,
      "loss": 2.2868,
      "step": 23610
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9547132253646851,
      "learning_rate": 1.5804095237707707e-05,
      "loss": 2.2923,
      "step": 23611
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.997256338596344,
      "learning_rate": 1.580375994535725e-05,
      "loss": 2.5176,
      "step": 23612
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0250928401947021,
      "learning_rate": 1.580342464316784e-05,
      "loss": 2.5547,
      "step": 23613
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0739306211471558,
      "learning_rate": 1.5803089331140047e-05,
      "loss": 2.4755,
      "step": 23614
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.028067708015442,
      "learning_rate": 1.5802754009274446e-05,
      "loss": 2.5095,
      "step": 23615
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0385637283325195,
      "learning_rate": 1.5802418677571598e-05,
      "loss": 2.3176,
      "step": 23616
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9589214324951172,
      "learning_rate": 1.5802083336032074e-05,
      "loss": 2.3012,
      "step": 23617
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9851626753807068,
      "learning_rate": 1.5801747984656445e-05,
      "loss": 2.3416,
      "step": 23618
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.2781869173049927,
      "learning_rate": 1.5801412623445272e-05,
      "loss": 2.4744,
      "step": 23619
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0963125228881836,
      "learning_rate": 1.5801077252399136e-05,
      "loss": 2.3919,
      "step": 23620
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0366113185882568,
      "learning_rate": 1.5800741871518598e-05,
      "loss": 2.5033,
      "step": 23621
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.5570013523101807,
      "learning_rate": 1.580040648080422e-05,
      "loss": 2.3682,
      "step": 23622
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0318551063537598,
      "learning_rate": 1.5800071080256584e-05,
      "loss": 2.4809,
      "step": 23623
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0189809799194336,
      "learning_rate": 1.5799735669876253e-05,
      "loss": 2.3317,
      "step": 23624
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0764293670654297,
      "learning_rate": 1.579940024966379e-05,
      "loss": 2.2797,
      "step": 23625
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1918132305145264,
      "learning_rate": 1.5799064819619774e-05,
      "loss": 2.2395,
      "step": 23626
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0505130290985107,
      "learning_rate": 1.5798729379744763e-05,
      "loss": 2.5347,
      "step": 23627
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9779052138328552,
      "learning_rate": 1.5798393930039332e-05,
      "loss": 2.3749,
      "step": 23628
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9549375176429749,
      "learning_rate": 1.579805847050405e-05,
      "loss": 2.4545,
      "step": 23629
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9284440279006958,
      "learning_rate": 1.5797723001139485e-05,
      "loss": 2.2363,
      "step": 23630
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0576975345611572,
      "learning_rate": 1.5797387521946205e-05,
      "loss": 2.6477,
      "step": 23631
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0035836696624756,
      "learning_rate": 1.5797052032924774e-05,
      "loss": 2.5717,
      "step": 23632
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.4719860553741455,
      "learning_rate": 1.579671653407577e-05,
      "loss": 2.6141,
      "step": 23633
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.163068413734436,
      "learning_rate": 1.5796381025399755e-05,
      "loss": 2.5654,
      "step": 23634
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9950299859046936,
      "learning_rate": 1.57960455068973e-05,
      "loss": 2.3323,
      "step": 23635
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0429165363311768,
      "learning_rate": 1.5795709978568975e-05,
      "loss": 2.464,
      "step": 23636
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0277694463729858,
      "learning_rate": 1.5795374440415347e-05,
      "loss": 2.3178,
      "step": 23637
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0220577716827393,
      "learning_rate": 1.5795038892436984e-05,
      "loss": 2.3074,
      "step": 23638
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0161389112472534,
      "learning_rate": 1.5794703334634455e-05,
      "loss": 2.4948,
      "step": 23639
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1648495197296143,
      "learning_rate": 1.5794367767008333e-05,
      "loss": 2.2839,
      "step": 23640
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9954005479812622,
      "learning_rate": 1.5794032189559183e-05,
      "loss": 2.3461,
      "step": 23641
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.992474377155304,
      "learning_rate": 1.5793696602287575e-05,
      "loss": 2.6099,
      "step": 23642
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0794591903686523,
      "learning_rate": 1.5793361005194077e-05,
      "loss": 2.4304,
      "step": 23643
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9651526808738708,
      "learning_rate": 1.579302539827926e-05,
      "loss": 2.4032,
      "step": 23644
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9474751949310303,
      "learning_rate": 1.5792689781543692e-05,
      "loss": 2.2853,
      "step": 23645
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9830265045166016,
      "learning_rate": 1.579235415498794e-05,
      "loss": 2.5177,
      "step": 23646
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.4096475839614868,
      "learning_rate": 1.579201851861257e-05,
      "loss": 2.4862,
      "step": 23647
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.676164150238037,
      "learning_rate": 1.5791682872418163e-05,
      "loss": 2.4105,
      "step": 23648
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.967477560043335,
      "learning_rate": 1.5791347216405277e-05,
      "loss": 2.7345,
      "step": 23649
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.304062843322754,
      "learning_rate": 1.5791011550574487e-05,
      "loss": 2.0582,
      "step": 23650
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.043954849243164,
      "learning_rate": 1.5790675874926358e-05,
      "loss": 2.4521,
      "step": 23651
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.049623966217041,
      "learning_rate": 1.579034018946146e-05,
      "loss": 2.2503,
      "step": 23652
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.3537254333496094,
      "learning_rate": 1.579000449418036e-05,
      "loss": 2.7836,
      "step": 23653
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.104781985282898,
      "learning_rate": 1.5789668789083637e-05,
      "loss": 2.5301,
      "step": 23654
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1352945566177368,
      "learning_rate": 1.5789333074171848e-05,
      "loss": 2.4167,
      "step": 23655
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1024399995803833,
      "learning_rate": 1.5788997349445566e-05,
      "loss": 2.4163,
      "step": 23656
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.147680401802063,
      "learning_rate": 1.5788661614905367e-05,
      "loss": 2.3293,
      "step": 23657
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9880415201187134,
      "learning_rate": 1.578832587055181e-05,
      "loss": 2.4762,
      "step": 23658
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.168675184249878,
      "learning_rate": 1.578799011638547e-05,
      "loss": 2.3926,
      "step": 23659
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0682233572006226,
      "learning_rate": 1.5787654352406915e-05,
      "loss": 2.3086,
      "step": 23660
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.2218542098999023,
      "learning_rate": 1.578731857861671e-05,
      "loss": 2.3007,
      "step": 23661
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1585588455200195,
      "learning_rate": 1.5786982795015435e-05,
      "loss": 2.5174,
      "step": 23662
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.080993413925171,
      "learning_rate": 1.5786647001603648e-05,
      "loss": 2.4579,
      "step": 23663
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1313835382461548,
      "learning_rate": 1.578631119838192e-05,
      "loss": 2.7144,
      "step": 23664
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0185935497283936,
      "learning_rate": 1.578597538535083e-05,
      "loss": 2.4829,
      "step": 23665
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1710971593856812,
      "learning_rate": 1.5785639562510935e-05,
      "loss": 2.4999,
      "step": 23666
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0615475177764893,
      "learning_rate": 1.578530372986281e-05,
      "loss": 2.4878,
      "step": 23667
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0052677392959595,
      "learning_rate": 1.578496788740703e-05,
      "loss": 2.3769,
      "step": 23668
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9716368913650513,
      "learning_rate": 1.5784632035144148e-05,
      "loss": 2.4863,
      "step": 23669
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.164594292640686,
      "learning_rate": 1.578429617307475e-05,
      "loss": 2.3487,
      "step": 23670
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1393133401870728,
      "learning_rate": 1.57839603011994e-05,
      "loss": 2.5221,
      "step": 23671
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0046828985214233,
      "learning_rate": 1.5783624419518663e-05,
      "loss": 2.4472,
      "step": 23672
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.083832025527954,
      "learning_rate": 1.5783288528033115e-05,
      "loss": 2.4064,
      "step": 23673
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1273497343063354,
      "learning_rate": 1.578295262674332e-05,
      "loss": 2.4618,
      "step": 23674
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0088692903518677,
      "learning_rate": 1.578261671564985e-05,
      "loss": 2.2253,
      "step": 23675
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.2452011108398438,
      "learning_rate": 1.5782280794753276e-05,
      "loss": 2.3581,
      "step": 23676
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9751777052879333,
      "learning_rate": 1.5781944864054165e-05,
      "loss": 2.2429,
      "step": 23677
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1080915927886963,
      "learning_rate": 1.5781608923553084e-05,
      "loss": 2.3582,
      "step": 23678
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9795856475830078,
      "learning_rate": 1.5781272973250605e-05,
      "loss": 2.4948,
      "step": 23679
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.037365198135376,
      "learning_rate": 1.57809370131473e-05,
      "loss": 2.4058,
      "step": 23680
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0453859567642212,
      "learning_rate": 1.578060104324374e-05,
      "loss": 2.2862,
      "step": 23681
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0507997274398804,
      "learning_rate": 1.578026506354049e-05,
      "loss": 2.5411,
      "step": 23682
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9969083070755005,
      "learning_rate": 1.5779929074038117e-05,
      "loss": 2.6285,
      "step": 23683
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1149487495422363,
      "learning_rate": 1.5779593074737198e-05,
      "loss": 2.3342,
      "step": 23684
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1166191101074219,
      "learning_rate": 1.57792570656383e-05,
      "loss": 2.3584,
      "step": 23685
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.2150377035140991,
      "learning_rate": 1.5778921046741985e-05,
      "loss": 2.6667,
      "step": 23686
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0236366987228394,
      "learning_rate": 1.5778585018048836e-05,
      "loss": 2.3395,
      "step": 23687
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0137674808502197,
      "learning_rate": 1.5778248979559413e-05,
      "loss": 2.4418,
      "step": 23688
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0147061347961426,
      "learning_rate": 1.577791293127429e-05,
      "loss": 2.5237,
      "step": 23689
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.294367790222168,
      "learning_rate": 1.5777576873194035e-05,
      "loss": 2.4193,
      "step": 23690
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1496609449386597,
      "learning_rate": 1.577724080531922e-05,
      "loss": 2.4688,
      "step": 23691
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.193888545036316,
      "learning_rate": 1.5776904727650408e-05,
      "loss": 2.3327,
      "step": 23692
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.2625305652618408,
      "learning_rate": 1.5776568640188176e-05,
      "loss": 2.503,
      "step": 23693
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0184838771820068,
      "learning_rate": 1.5776232542933094e-05,
      "loss": 2.5164,
      "step": 23694
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.150160551071167,
      "learning_rate": 1.5775896435885724e-05,
      "loss": 2.4327,
      "step": 23695
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0065481662750244,
      "learning_rate": 1.5775560319046644e-05,
      "loss": 2.3099,
      "step": 23696
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.10236394405365,
      "learning_rate": 1.577522419241642e-05,
      "loss": 2.4366,
      "step": 23697
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9258880019187927,
      "learning_rate": 1.577488805599562e-05,
      "loss": 2.3071,
      "step": 23698
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0140687227249146,
      "learning_rate": 1.577455190978482e-05,
      "loss": 2.6689,
      "step": 23699
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9900055527687073,
      "learning_rate": 1.5774215753784584e-05,
      "loss": 2.555,
      "step": 23700
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0806093215942383,
      "learning_rate": 1.5773879587995487e-05,
      "loss": 2.4171,
      "step": 23701
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1185400485992432,
      "learning_rate": 1.5773543412418094e-05,
      "loss": 2.3936,
      "step": 23702
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0840470790863037,
      "learning_rate": 1.5773207227052974e-05,
      "loss": 2.7583,
      "step": 23703
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9871712327003479,
      "learning_rate": 1.5772871031900704e-05,
      "loss": 2.4759,
      "step": 23704
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.098521113395691,
      "learning_rate": 1.5772534826961846e-05,
      "loss": 2.2966,
      "step": 23705
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1596465110778809,
      "learning_rate": 1.577219861223698e-05,
      "loss": 2.5171,
      "step": 23706
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.026412010192871,
      "learning_rate": 1.5771862387726662e-05,
      "loss": 2.3221,
      "step": 23707
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.3722132444381714,
      "learning_rate": 1.5771526153431474e-05,
      "loss": 2.3274,
      "step": 23708
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.149655818939209,
      "learning_rate": 1.577118990935198e-05,
      "loss": 2.5129,
      "step": 23709
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9925386309623718,
      "learning_rate": 1.5770853655488752e-05,
      "loss": 2.4718,
      "step": 23710
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0010526180267334,
      "learning_rate": 1.5770517391842358e-05,
      "loss": 2.2756,
      "step": 23711
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9910935759544373,
      "learning_rate": 1.5770181118413375e-05,
      "loss": 2.3684,
      "step": 23712
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9179955124855042,
      "learning_rate": 1.5769844835202363e-05,
      "loss": 2.5116,
      "step": 23713
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.2134684324264526,
      "learning_rate": 1.57695085422099e-05,
      "loss": 2.0999,
      "step": 23714
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0145504474639893,
      "learning_rate": 1.576917223943655e-05,
      "loss": 2.489,
      "step": 23715
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0956943035125732,
      "learning_rate": 1.576883592688289e-05,
      "loss": 2.385,
      "step": 23716
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1002665758132935,
      "learning_rate": 1.5768499604549482e-05,
      "loss": 2.5888,
      "step": 23717
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0385652780532837,
      "learning_rate": 1.5768163272436904e-05,
      "loss": 2.4674,
      "step": 23718
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1771363019943237,
      "learning_rate": 1.576782693054572e-05,
      "loss": 2.3295,
      "step": 23719
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.013424277305603,
      "learning_rate": 1.5767490578876503e-05,
      "loss": 2.2675,
      "step": 23720
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.095630407333374,
      "learning_rate": 1.5767154217429825e-05,
      "loss": 2.2873,
      "step": 23721
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.009678602218628,
      "learning_rate": 1.5766817846206253e-05,
      "loss": 2.3437,
      "step": 23722
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.093610167503357,
      "learning_rate": 1.576648146520636e-05,
      "loss": 2.5006,
      "step": 23723
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9641068577766418,
      "learning_rate": 1.5766145074430714e-05,
      "loss": 2.3959,
      "step": 23724
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0070106983184814,
      "learning_rate": 1.5765808673879886e-05,
      "loss": 2.5338,
      "step": 23725
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0197607278823853,
      "learning_rate": 1.5765472263554444e-05,
      "loss": 2.3746,
      "step": 23726
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0880310535430908,
      "learning_rate": 1.5765135843454963e-05,
      "loss": 2.5455,
      "step": 23727
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0203933715820312,
      "learning_rate": 1.5764799413582013e-05,
      "loss": 2.3587,
      "step": 23728
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0474352836608887,
      "learning_rate": 1.576446297393616e-05,
      "loss": 2.327,
      "step": 23729
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.131187915802002,
      "learning_rate": 1.5764126524517975e-05,
      "loss": 2.2939,
      "step": 23730
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.012543797492981,
      "learning_rate": 1.5763790065328032e-05,
      "loss": 2.4175,
      "step": 23731
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0445228815078735,
      "learning_rate": 1.5763453596366896e-05,
      "loss": 2.6468,
      "step": 23732
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0418684482574463,
      "learning_rate": 1.5763117117635144e-05,
      "loss": 2.3764,
      "step": 23733
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1133229732513428,
      "learning_rate": 1.5762780629133345e-05,
      "loss": 2.3888,
      "step": 23734
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9355971813201904,
      "learning_rate": 1.5762444130862068e-05,
      "loss": 2.3951,
      "step": 23735
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9981017708778381,
      "learning_rate": 1.5762107622821878e-05,
      "loss": 2.2751,
      "step": 23736
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.3082530498504639,
      "learning_rate": 1.5761771105013352e-05,
      "loss": 2.3106,
      "step": 23737
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.036035180091858,
      "learning_rate": 1.5761434577437062e-05,
      "loss": 2.5532,
      "step": 23738
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0440694093704224,
      "learning_rate": 1.576109804009357e-05,
      "loss": 2.4289,
      "step": 23739
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9954226016998291,
      "learning_rate": 1.5760761492983458e-05,
      "loss": 2.2694,
      "step": 23740
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.058036208152771,
      "learning_rate": 1.576042493610729e-05,
      "loss": 2.5362,
      "step": 23741
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0882259607315063,
      "learning_rate": 1.5760088369465637e-05,
      "loss": 2.3151,
      "step": 23742
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1951782703399658,
      "learning_rate": 1.575975179305907e-05,
      "loss": 2.5815,
      "step": 23743
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0587197542190552,
      "learning_rate": 1.5759415206888158e-05,
      "loss": 2.3931,
      "step": 23744
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1478592157363892,
      "learning_rate": 1.5759078610953473e-05,
      "loss": 2.6049,
      "step": 23745
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1837869882583618,
      "learning_rate": 1.5758742005255586e-05,
      "loss": 2.5682,
      "step": 23746
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.2420438528060913,
      "learning_rate": 1.5758405389795067e-05,
      "loss": 2.5086,
      "step": 23747
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0297513008117676,
      "learning_rate": 1.5758068764572487e-05,
      "loss": 2.4078,
      "step": 23748
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1277109384536743,
      "learning_rate": 1.5757732129588415e-05,
      "loss": 2.5347,
      "step": 23749
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9091359972953796,
      "learning_rate": 1.5757395484843427e-05,
      "loss": 2.3259,
      "step": 23750
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.013015627861023,
      "learning_rate": 1.5757058830338088e-05,
      "loss": 2.4348,
      "step": 23751
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.960442304611206,
      "learning_rate": 1.575672216607297e-05,
      "loss": 2.3619,
      "step": 23752
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1386197805404663,
      "learning_rate": 1.5756385492048645e-05,
      "loss": 2.714,
      "step": 23753
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0886770486831665,
      "learning_rate": 1.5756048808265683e-05,
      "loss": 2.3561,
      "step": 23754
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.2326139211654663,
      "learning_rate": 1.5755712114724655e-05,
      "loss": 2.3879,
      "step": 23755
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.00566828250885,
      "learning_rate": 1.5755375411426132e-05,
      "loss": 2.1815,
      "step": 23756
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0058376789093018,
      "learning_rate": 1.5755038698370687e-05,
      "loss": 2.4406,
      "step": 23757
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9929758906364441,
      "learning_rate": 1.5754701975558884e-05,
      "loss": 2.6586,
      "step": 23758
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9746505618095398,
      "learning_rate": 1.57543652429913e-05,
      "loss": 2.3139,
      "step": 23759
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0999705791473389,
      "learning_rate": 1.5754028500668502e-05,
      "loss": 2.4653,
      "step": 23760
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.104691982269287,
      "learning_rate": 1.5753691748591066e-05,
      "loss": 2.6602,
      "step": 23761
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.117170810699463,
      "learning_rate": 1.5753354986759558e-05,
      "loss": 2.2542,
      "step": 23762
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1042726039886475,
      "learning_rate": 1.575301821517455e-05,
      "loss": 2.3541,
      "step": 23763
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0692485570907593,
      "learning_rate": 1.575268143383662e-05,
      "loss": 2.3696,
      "step": 23764
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.3700587749481201,
      "learning_rate": 1.5752344642746326e-05,
      "loss": 2.1946,
      "step": 23765
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9324142932891846,
      "learning_rate": 1.5752007841904244e-05,
      "loss": 2.3682,
      "step": 23766
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.127622365951538,
      "learning_rate": 1.5751671031310953e-05,
      "loss": 2.3262,
      "step": 23767
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0276967287063599,
      "learning_rate": 1.5751334210967012e-05,
      "loss": 2.5545,
      "step": 23768
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.2601946592330933,
      "learning_rate": 1.5750997380873e-05,
      "loss": 2.3118,
      "step": 23769
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1457468271255493,
      "learning_rate": 1.5750660541029487e-05,
      "loss": 2.2813,
      "step": 23770
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.056211233139038,
      "learning_rate": 1.575032369143704e-05,
      "loss": 2.2671,
      "step": 23771
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1021041870117188,
      "learning_rate": 1.574998683209623e-05,
      "loss": 2.2941,
      "step": 23772
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0342708826065063,
      "learning_rate": 1.5749649963007638e-05,
      "loss": 2.5911,
      "step": 23773
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1852948665618896,
      "learning_rate": 1.5749313084171823e-05,
      "loss": 2.4614,
      "step": 23774
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0954970121383667,
      "learning_rate": 1.574897619558936e-05,
      "loss": 2.3837,
      "step": 23775
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.130579948425293,
      "learning_rate": 1.5748639297260823e-05,
      "loss": 2.7618,
      "step": 23776
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0597786903381348,
      "learning_rate": 1.574830238918678e-05,
      "loss": 2.5718,
      "step": 23777
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.037940263748169,
      "learning_rate": 1.5747965471367804e-05,
      "loss": 2.323,
      "step": 23778
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.2461724281311035,
      "learning_rate": 1.5747628543804464e-05,
      "loss": 2.3386,
      "step": 23779
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.2097176313400269,
      "learning_rate": 1.5747291606497335e-05,
      "loss": 2.2919,
      "step": 23780
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0737777948379517,
      "learning_rate": 1.5746954659446982e-05,
      "loss": 2.4394,
      "step": 23781
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9801036715507507,
      "learning_rate": 1.5746617702653985e-05,
      "loss": 2.2683,
      "step": 23782
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0568795204162598,
      "learning_rate": 1.5746280736118906e-05,
      "loss": 2.2085,
      "step": 23783
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1229748725891113,
      "learning_rate": 1.5745943759842322e-05,
      "loss": 2.3623,
      "step": 23784
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1535537242889404,
      "learning_rate": 1.5745606773824805e-05,
      "loss": 2.2927,
      "step": 23785
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0094558000564575,
      "learning_rate": 1.574526977806692e-05,
      "loss": 2.4423,
      "step": 23786
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0019636154174805,
      "learning_rate": 1.5744932772569247e-05,
      "loss": 2.4053,
      "step": 23787
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0608114004135132,
      "learning_rate": 1.5744595757332346e-05,
      "loss": 2.5438,
      "step": 23788
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0801173448562622,
      "learning_rate": 1.57442587323568e-05,
      "loss": 2.3582,
      "step": 23789
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.327255129814148,
      "learning_rate": 1.5743921697643174e-05,
      "loss": 2.3694,
      "step": 23790
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0619486570358276,
      "learning_rate": 1.574358465319204e-05,
      "loss": 2.4105,
      "step": 23791
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1213959455490112,
      "learning_rate": 1.574324759900397e-05,
      "loss": 2.39,
      "step": 23792
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9719067811965942,
      "learning_rate": 1.5742910535079538e-05,
      "loss": 2.3126,
      "step": 23793
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9921202063560486,
      "learning_rate": 1.574257346141931e-05,
      "loss": 2.3835,
      "step": 23794
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9784013628959656,
      "learning_rate": 1.5742236378023864e-05,
      "loss": 2.2218,
      "step": 23795
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.168824315071106,
      "learning_rate": 1.5741899284893765e-05,
      "loss": 2.3801,
      "step": 23796
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9845526218414307,
      "learning_rate": 1.5741562182029587e-05,
      "loss": 2.4073,
      "step": 23797
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.031687617301941,
      "learning_rate": 1.5741225069431903e-05,
      "loss": 2.3972,
      "step": 23798
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9364480972290039,
      "learning_rate": 1.5740887947101282e-05,
      "loss": 2.1577,
      "step": 23799
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0406850576400757,
      "learning_rate": 1.5740550815038297e-05,
      "loss": 2.5492,
      "step": 23800
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1425671577453613,
      "learning_rate": 1.574021367324352e-05,
      "loss": 2.2348,
      "step": 23801
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9970740079879761,
      "learning_rate": 1.5739876521717522e-05,
      "loss": 2.3686,
      "step": 23802
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0519229173660278,
      "learning_rate": 1.5739539360460875e-05,
      "loss": 2.5644,
      "step": 23803
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.13430917263031,
      "learning_rate": 1.5739202189474147e-05,
      "loss": 2.381,
      "step": 23804
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0572478771209717,
      "learning_rate": 1.5738865008757914e-05,
      "loss": 2.2234,
      "step": 23805
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0680220127105713,
      "learning_rate": 1.5738527818312746e-05,
      "loss": 2.411,
      "step": 23806
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.102508544921875,
      "learning_rate": 1.5738190618139214e-05,
      "loss": 2.3753,
      "step": 23807
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1620174646377563,
      "learning_rate": 1.5737853408237892e-05,
      "loss": 2.4357,
      "step": 23808
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1886085271835327,
      "learning_rate": 1.573751618860935e-05,
      "loss": 2.4079,
      "step": 23809
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0227614641189575,
      "learning_rate": 1.573717895925416e-05,
      "loss": 2.3477,
      "step": 23810
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0079562664031982,
      "learning_rate": 1.5736841720172894e-05,
      "loss": 2.3395,
      "step": 23811
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0859805345535278,
      "learning_rate": 1.5736504471366122e-05,
      "loss": 2.5411,
      "step": 23812
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0560399293899536,
      "learning_rate": 1.5736167212834416e-05,
      "loss": 2.3766,
      "step": 23813
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.975915789604187,
      "learning_rate": 1.5735829944578348e-05,
      "loss": 2.1347,
      "step": 23814
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0804638862609863,
      "learning_rate": 1.5735492666598494e-05,
      "loss": 2.4613,
      "step": 23815
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.018328070640564,
      "learning_rate": 1.573515537889542e-05,
      "loss": 2.2921,
      "step": 23816
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0311400890350342,
      "learning_rate": 1.5734818081469702e-05,
      "loss": 2.1848,
      "step": 23817
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.092124342918396,
      "learning_rate": 1.5734480774321906e-05,
      "loss": 2.353,
      "step": 23818
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9935644268989563,
      "learning_rate": 1.573414345745261e-05,
      "loss": 2.3688,
      "step": 23819
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.006601333618164,
      "learning_rate": 1.5733806130862387e-05,
      "loss": 2.4319,
      "step": 23820
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0136079788208008,
      "learning_rate": 1.57334687945518e-05,
      "loss": 2.5459,
      "step": 23821
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.007817268371582,
      "learning_rate": 1.5733131448521427e-05,
      "loss": 2.3275,
      "step": 23822
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0829728841781616,
      "learning_rate": 1.5732794092771836e-05,
      "loss": 2.3635,
      "step": 23823
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0584604740142822,
      "learning_rate": 1.573245672730361e-05,
      "loss": 2.424,
      "step": 23824
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.108232855796814,
      "learning_rate": 1.5732119352117305e-05,
      "loss": 2.5218,
      "step": 23825
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0894882678985596,
      "learning_rate": 1.5731781967213508e-05,
      "loss": 2.291,
      "step": 23826
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1354858875274658,
      "learning_rate": 1.573144457259278e-05,
      "loss": 2.5719,
      "step": 23827
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1016746759414673,
      "learning_rate": 1.5731107168255694e-05,
      "loss": 2.5159,
      "step": 23828
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1200453042984009,
      "learning_rate": 1.573076975420283e-05,
      "loss": 2.5191,
      "step": 23829
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0732842683792114,
      "learning_rate": 1.573043233043475e-05,
      "loss": 2.3414,
      "step": 23830
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.208446741104126,
      "learning_rate": 1.5730094896952035e-05,
      "loss": 2.4191,
      "step": 23831
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0192070007324219,
      "learning_rate": 1.5729757453755254e-05,
      "loss": 2.4851,
      "step": 23832
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0809279680252075,
      "learning_rate": 1.5729420000844973e-05,
      "loss": 2.1552,
      "step": 23833
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1207829713821411,
      "learning_rate": 1.5729082538221773e-05,
      "loss": 2.1276,
      "step": 23834
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9494044780731201,
      "learning_rate": 1.572874506588622e-05,
      "loss": 2.397,
      "step": 23835
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0286946296691895,
      "learning_rate": 1.5728407583838888e-05,
      "loss": 2.4983,
      "step": 23836
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1979105472564697,
      "learning_rate": 1.572807009208035e-05,
      "loss": 2.5385,
      "step": 23837
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.108026385307312,
      "learning_rate": 1.5727732590611178e-05,
      "loss": 2.6593,
      "step": 23838
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1003473997116089,
      "learning_rate": 1.5727395079431942e-05,
      "loss": 2.252,
      "step": 23839
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.078738808631897,
      "learning_rate": 1.572705755854322e-05,
      "loss": 2.3139,
      "step": 23840
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0022358894348145,
      "learning_rate": 1.5726720027945576e-05,
      "loss": 2.4414,
      "step": 23841
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.050884485244751,
      "learning_rate": 1.5726382487639588e-05,
      "loss": 2.4061,
      "step": 23842
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0046544075012207,
      "learning_rate": 1.5726044937625827e-05,
      "loss": 2.6093,
      "step": 23843
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1013898849487305,
      "learning_rate": 1.572570737790486e-05,
      "loss": 2.3406,
      "step": 23844
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9809035658836365,
      "learning_rate": 1.572536980847727e-05,
      "loss": 2.5129,
      "step": 23845
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1082377433776855,
      "learning_rate": 1.572503222934362e-05,
      "loss": 2.403,
      "step": 23846
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.4652693271636963,
      "learning_rate": 1.5724694640504484e-05,
      "loss": 2.3736,
      "step": 23847
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0192110538482666,
      "learning_rate": 1.5724357041960437e-05,
      "loss": 2.4731,
      "step": 23848
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.158952236175537,
      "learning_rate": 1.5724019433712053e-05,
      "loss": 2.49,
      "step": 23849
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.038826823234558,
      "learning_rate": 1.57236818157599e-05,
      "loss": 2.5402,
      "step": 23850
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.002052903175354,
      "learning_rate": 1.572334418810455e-05,
      "loss": 2.4329,
      "step": 23851
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0453721284866333,
      "learning_rate": 1.5723006550746577e-05,
      "loss": 2.4805,
      "step": 23852
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0493154525756836,
      "learning_rate": 1.5722668903686556e-05,
      "loss": 2.3184,
      "step": 23853
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9942375421524048,
      "learning_rate": 1.5722331246925054e-05,
      "loss": 2.4373,
      "step": 23854
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.2146910429000854,
      "learning_rate": 1.572199358046265e-05,
      "loss": 2.309,
      "step": 23855
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.09824800491333,
      "learning_rate": 1.572165590429991e-05,
      "loss": 2.5432,
      "step": 23856
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0722559690475464,
      "learning_rate": 1.572131821843741e-05,
      "loss": 2.2059,
      "step": 23857
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.053243637084961,
      "learning_rate": 1.572098052287572e-05,
      "loss": 2.6149,
      "step": 23858
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1547960042953491,
      "learning_rate": 1.572064281761542e-05,
      "loss": 2.5918,
      "step": 23859
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0494744777679443,
      "learning_rate": 1.572030510265707e-05,
      "loss": 2.3216,
      "step": 23860
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.07566499710083,
      "learning_rate": 1.571996737800125e-05,
      "loss": 2.3946,
      "step": 23861
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0803041458129883,
      "learning_rate": 1.5719629643648536e-05,
      "loss": 2.5403,
      "step": 23862
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.007627248764038,
      "learning_rate": 1.5719291899599493e-05,
      "loss": 2.4204,
      "step": 23863
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.2639321088790894,
      "learning_rate": 1.57189541458547e-05,
      "loss": 2.4388,
      "step": 23864
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0124391317367554,
      "learning_rate": 1.5718616382414724e-05,
      "loss": 2.4348,
      "step": 23865
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.152459979057312,
      "learning_rate": 1.571827860928014e-05,
      "loss": 2.4919,
      "step": 23866
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9857677817344666,
      "learning_rate": 1.5717940826451524e-05,
      "loss": 2.3867,
      "step": 23867
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.053997278213501,
      "learning_rate": 1.571760303392944e-05,
      "loss": 2.3532,
      "step": 23868
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0561954975128174,
      "learning_rate": 1.5717265231714466e-05,
      "loss": 2.3381,
      "step": 23869
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.120037317276001,
      "learning_rate": 1.571692741980718e-05,
      "loss": 2.2683,
      "step": 23870
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9222534894943237,
      "learning_rate": 1.5716589598208147e-05,
      "loss": 2.4501,
      "step": 23871
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0794605016708374,
      "learning_rate": 1.571625176691794e-05,
      "loss": 2.4274,
      "step": 23872
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0400493144989014,
      "learning_rate": 1.5715913925937138e-05,
      "loss": 2.2852,
      "step": 23873
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0691624879837036,
      "learning_rate": 1.5715576075266304e-05,
      "loss": 2.44,
      "step": 23874
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1378670930862427,
      "learning_rate": 1.571523821490602e-05,
      "loss": 2.5866,
      "step": 23875
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.241544246673584,
      "learning_rate": 1.5714900344856855e-05,
      "loss": 2.2577,
      "step": 23876
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0696920156478882,
      "learning_rate": 1.5714562465119377e-05,
      "loss": 2.5607,
      "step": 23877
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0284645557403564,
      "learning_rate": 1.5714224575694167e-05,
      "loss": 2.471,
      "step": 23878
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1032302379608154,
      "learning_rate": 1.5713886676581797e-05,
      "loss": 2.2025,
      "step": 23879
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0280581712722778,
      "learning_rate": 1.5713548767782836e-05,
      "loss": 2.3831,
      "step": 23880
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.003503680229187,
      "learning_rate": 1.5713210849297857e-05,
      "loss": 2.2796,
      "step": 23881
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0697596073150635,
      "learning_rate": 1.5712872921127432e-05,
      "loss": 2.5436,
      "step": 23882
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.074165940284729,
      "learning_rate": 1.5712534983272138e-05,
      "loss": 2.4458,
      "step": 23883
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0020509958267212,
      "learning_rate": 1.5712197035732546e-05,
      "loss": 2.4006,
      "step": 23884
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1441341638565063,
      "learning_rate": 1.5711859078509226e-05,
      "loss": 2.3969,
      "step": 23885
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.2255853414535522,
      "learning_rate": 1.5711521111602758e-05,
      "loss": 2.4007,
      "step": 23886
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9535006880760193,
      "learning_rate": 1.5711183135013704e-05,
      "loss": 2.3784,
      "step": 23887
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0249398946762085,
      "learning_rate": 1.571084514874265e-05,
      "loss": 2.4449,
      "step": 23888
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.301559567451477,
      "learning_rate": 1.571050715279016e-05,
      "loss": 2.4179,
      "step": 23889
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0276809930801392,
      "learning_rate": 1.571016914715681e-05,
      "loss": 2.2753,
      "step": 23890
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.043724775314331,
      "learning_rate": 1.5709831131843173e-05,
      "loss": 2.4272,
      "step": 23891
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0022555589675903,
      "learning_rate": 1.570949310684982e-05,
      "loss": 2.5065,
      "step": 23892
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.055110216140747,
      "learning_rate": 1.5709155072177323e-05,
      "loss": 2.4597,
      "step": 23893
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9834873080253601,
      "learning_rate": 1.5708817027826263e-05,
      "loss": 2.3947,
      "step": 23894
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1702877283096313,
      "learning_rate": 1.5708478973797205e-05,
      "loss": 2.6166,
      "step": 23895
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9632502794265747,
      "learning_rate": 1.5708140910090724e-05,
      "loss": 2.5175,
      "step": 23896
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0249853134155273,
      "learning_rate": 1.5707802836707393e-05,
      "loss": 2.5414,
      "step": 23897
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9749595522880554,
      "learning_rate": 1.5707464753647785e-05,
      "loss": 2.5694,
      "step": 23898
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.260732889175415,
      "learning_rate": 1.570712666091248e-05,
      "loss": 2.4379,
      "step": 23899
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0167872905731201,
      "learning_rate": 1.5706788558502044e-05,
      "loss": 2.505,
      "step": 23900
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0987519025802612,
      "learning_rate": 1.5706450446417045e-05,
      "loss": 2.4018,
      "step": 23901
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0106133222579956,
      "learning_rate": 1.5706112324658067e-05,
      "loss": 2.1879,
      "step": 23902
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1604387760162354,
      "learning_rate": 1.570577419322568e-05,
      "loss": 2.4219,
      "step": 23903
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0390137434005737,
      "learning_rate": 1.5705436052120454e-05,
      "loss": 2.1836,
      "step": 23904
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9679672122001648,
      "learning_rate": 1.5705097901342967e-05,
      "loss": 2.4862,
      "step": 23905
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9617632627487183,
      "learning_rate": 1.5704759740893787e-05,
      "loss": 2.4109,
      "step": 23906
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.936309278011322,
      "learning_rate": 1.5704421570773493e-05,
      "loss": 2.4101,
      "step": 23907
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9754409193992615,
      "learning_rate": 1.570408339098265e-05,
      "loss": 2.2949,
      "step": 23908
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9262354373931885,
      "learning_rate": 1.5703745201521837e-05,
      "loss": 2.3487,
      "step": 23909
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9755936861038208,
      "learning_rate": 1.570340700239163e-05,
      "loss": 2.2996,
      "step": 23910
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9960100054740906,
      "learning_rate": 1.57030687935926e-05,
      "loss": 2.4033,
      "step": 23911
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0089064836502075,
      "learning_rate": 1.570273057512532e-05,
      "loss": 2.4868,
      "step": 23912
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0514609813690186,
      "learning_rate": 1.570239234699036e-05,
      "loss": 2.3384,
      "step": 23913
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0918561220169067,
      "learning_rate": 1.5702054109188294e-05,
      "loss": 2.469,
      "step": 23914
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9819031953811646,
      "learning_rate": 1.57017158617197e-05,
      "loss": 2.3875,
      "step": 23915
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1365077495574951,
      "learning_rate": 1.5701377604585147e-05,
      "loss": 2.469,
      "step": 23916
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0413084030151367,
      "learning_rate": 1.5701039337785214e-05,
      "loss": 2.3937,
      "step": 23917
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1063127517700195,
      "learning_rate": 1.5700701061320472e-05,
      "loss": 2.7292,
      "step": 23918
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.004205584526062,
      "learning_rate": 1.570036277519149e-05,
      "loss": 2.3301,
      "step": 23919
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0885419845581055,
      "learning_rate": 1.5700024479398847e-05,
      "loss": 2.3568,
      "step": 23920
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1714564561843872,
      "learning_rate": 1.569968617394311e-05,
      "loss": 2.2922,
      "step": 23921
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.158029317855835,
      "learning_rate": 1.569934785882486e-05,
      "loss": 2.274,
      "step": 23922
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1139166355133057,
      "learning_rate": 1.5699009534044673e-05,
      "loss": 2.7232,
      "step": 23923
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0646837949752808,
      "learning_rate": 1.569867119960311e-05,
      "loss": 2.33,
      "step": 23924
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0682752132415771,
      "learning_rate": 1.5698332855500752e-05,
      "loss": 2.3491,
      "step": 23925
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0819625854492188,
      "learning_rate": 1.5697994501738175e-05,
      "loss": 2.508,
      "step": 23926
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1596766710281372,
      "learning_rate": 1.5697656138315947e-05,
      "loss": 2.2681,
      "step": 23927
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0659629106521606,
      "learning_rate": 1.5697317765234646e-05,
      "loss": 2.5589,
      "step": 23928
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0703696012496948,
      "learning_rate": 1.5696979382494842e-05,
      "loss": 2.4918,
      "step": 23929
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9526802897453308,
      "learning_rate": 1.5696640990097114e-05,
      "loss": 2.375,
      "step": 23930
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0307037830352783,
      "learning_rate": 1.5696302588042032e-05,
      "loss": 2.3673,
      "step": 23931
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0571939945220947,
      "learning_rate": 1.569596417633017e-05,
      "loss": 2.3965,
      "step": 23932
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9557752013206482,
      "learning_rate": 1.5695625754962097e-05,
      "loss": 2.4893,
      "step": 23933
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.2116416692733765,
      "learning_rate": 1.5695287323938396e-05,
      "loss": 2.3278,
      "step": 23934
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0462181568145752,
      "learning_rate": 1.5694948883259637e-05,
      "loss": 2.309,
      "step": 23935
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.150915265083313,
      "learning_rate": 1.569461043292639e-05,
      "loss": 2.1578,
      "step": 23936
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9817917943000793,
      "learning_rate": 1.5694271972939232e-05,
      "loss": 2.546,
      "step": 23937
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.2234097719192505,
      "learning_rate": 1.569393350329874e-05,
      "loss": 2.3897,
      "step": 23938
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.057155966758728,
      "learning_rate": 1.5693595024005477e-05,
      "loss": 2.6381,
      "step": 23939
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1560450792312622,
      "learning_rate": 1.569325653506003e-05,
      "loss": 2.2438,
      "step": 23940
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9932188391685486,
      "learning_rate": 1.5692918036462965e-05,
      "loss": 2.2515,
      "step": 23941
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1368093490600586,
      "learning_rate": 1.5692579528214857e-05,
      "loss": 2.5991,
      "step": 23942
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1162434816360474,
      "learning_rate": 1.5692241010316283e-05,
      "loss": 2.6919,
      "step": 23943
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0396759510040283,
      "learning_rate": 1.5691902482767814e-05,
      "loss": 2.3983,
      "step": 23944
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1714063882827759,
      "learning_rate": 1.5691563945570024e-05,
      "loss": 2.4021,
      "step": 23945
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.127048373222351,
      "learning_rate": 1.5691225398723485e-05,
      "loss": 2.364,
      "step": 23946
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0249860286712646,
      "learning_rate": 1.5690886842228777e-05,
      "loss": 2.4562,
      "step": 23947
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.007112979888916,
      "learning_rate": 1.5690548276086465e-05,
      "loss": 2.3986,
      "step": 23948
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1065587997436523,
      "learning_rate": 1.5690209700297136e-05,
      "loss": 2.381,
      "step": 23949
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9971520304679871,
      "learning_rate": 1.568987111486135e-05,
      "loss": 2.4155,
      "step": 23950
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.185606598854065,
      "learning_rate": 1.5689532519779688e-05,
      "loss": 2.3514,
      "step": 23951
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1392325162887573,
      "learning_rate": 1.5689193915052725e-05,
      "loss": 2.6429,
      "step": 23952
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.067449688911438,
      "learning_rate": 1.568885530068103e-05,
      "loss": 2.367,
      "step": 23953
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1601015329360962,
      "learning_rate": 1.5688516676665183e-05,
      "loss": 2.2733,
      "step": 23954
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.082078456878662,
      "learning_rate": 1.5688178043005753e-05,
      "loss": 2.2068,
      "step": 23955
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.2820563316345215,
      "learning_rate": 1.5687839399703317e-05,
      "loss": 2.2799,
      "step": 23956
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.5428647994995117,
      "learning_rate": 1.568750074675845e-05,
      "loss": 2.1886,
      "step": 23957
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0454928874969482,
      "learning_rate": 1.5687162084171725e-05,
      "loss": 2.4311,
      "step": 23958
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9938676953315735,
      "learning_rate": 1.5686823411943713e-05,
      "loss": 2.3723,
      "step": 23959
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1389482021331787,
      "learning_rate": 1.5686484730074993e-05,
      "loss": 2.4,
      "step": 23960
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1381161212921143,
      "learning_rate": 1.5686146038566136e-05,
      "loss": 2.3008,
      "step": 23961
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0240846872329712,
      "learning_rate": 1.5685807337417716e-05,
      "loss": 2.3802,
      "step": 23962
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0005124807357788,
      "learning_rate": 1.5685468626630312e-05,
      "loss": 2.4343,
      "step": 23963
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.967801034450531,
      "learning_rate": 1.568512990620449e-05,
      "loss": 2.3455,
      "step": 23964
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.015476942062378,
      "learning_rate": 1.568479117614083e-05,
      "loss": 2.4043,
      "step": 23965
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1030473709106445,
      "learning_rate": 1.568445243643991e-05,
      "loss": 2.532,
      "step": 23966
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.4878138303756714,
      "learning_rate": 1.5684113687102295e-05,
      "loss": 2.2319,
      "step": 23967
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0101245641708374,
      "learning_rate": 1.5683774928128562e-05,
      "loss": 2.1953,
      "step": 23968
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1572829484939575,
      "learning_rate": 1.5683436159519287e-05,
      "loss": 2.3698,
      "step": 23969
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0723340511322021,
      "learning_rate": 1.5683097381275046e-05,
      "loss": 2.4855,
      "step": 23970
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.004269003868103,
      "learning_rate": 1.5682758593396412e-05,
      "loss": 2.3457,
      "step": 23971
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0131272077560425,
      "learning_rate": 1.5682419795883958e-05,
      "loss": 2.4652,
      "step": 23972
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9981458187103271,
      "learning_rate": 1.5682080988738258e-05,
      "loss": 2.5634,
      "step": 23973
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1055201292037964,
      "learning_rate": 1.568174217195989e-05,
      "loss": 2.3095,
      "step": 23974
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0684090852737427,
      "learning_rate": 1.5681403345549422e-05,
      "loss": 2.3698,
      "step": 23975
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.061102032661438,
      "learning_rate": 1.5681064509507434e-05,
      "loss": 2.541,
      "step": 23976
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.089725136756897,
      "learning_rate": 1.56807256638345e-05,
      "loss": 2.5241,
      "step": 23977
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.090002417564392,
      "learning_rate": 1.5680386808531195e-05,
      "loss": 2.6248,
      "step": 23978
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0392889976501465,
      "learning_rate": 1.5680047943598085e-05,
      "loss": 2.6097,
      "step": 23979
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9997532963752747,
      "learning_rate": 1.5679709069035756e-05,
      "loss": 2.4663,
      "step": 23980
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0634939670562744,
      "learning_rate": 1.5679370184844777e-05,
      "loss": 2.3776,
      "step": 23981
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1804465055465698,
      "learning_rate": 1.5679031291025723e-05,
      "loss": 2.318,
      "step": 23982
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0742812156677246,
      "learning_rate": 1.5678692387579167e-05,
      "loss": 2.3226,
      "step": 23983
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0200154781341553,
      "learning_rate": 1.5678353474505686e-05,
      "loss": 2.4659,
      "step": 23984
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9726473093032837,
      "learning_rate": 1.5678014551805857e-05,
      "loss": 2.5286,
      "step": 23985
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.124159812927246,
      "learning_rate": 1.5677675619480245e-05,
      "loss": 2.6011,
      "step": 23986
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.074388861656189,
      "learning_rate": 1.5677336677529432e-05,
      "loss": 2.6966,
      "step": 23987
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0133532285690308,
      "learning_rate": 1.5676997725953996e-05,
      "loss": 2.407,
      "step": 23988
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0363887548446655,
      "learning_rate": 1.5676658764754505e-05,
      "loss": 2.3915,
      "step": 23989
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0281893014907837,
      "learning_rate": 1.5676319793931533e-05,
      "loss": 2.4934,
      "step": 23990
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.064711570739746,
      "learning_rate": 1.567598081348566e-05,
      "loss": 2.3469,
      "step": 23991
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1772278547286987,
      "learning_rate": 1.5675641823417457e-05,
      "loss": 2.3351,
      "step": 23992
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0016404390335083,
      "learning_rate": 1.5675302823727497e-05,
      "loss": 2.4734,
      "step": 23993
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9937394261360168,
      "learning_rate": 1.5674963814416365e-05,
      "loss": 2.4115,
      "step": 23994
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9738433361053467,
      "learning_rate": 1.567462479548462e-05,
      "loss": 2.5227,
      "step": 23995
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0112708806991577,
      "learning_rate": 1.567428576693285e-05,
      "loss": 2.4908,
      "step": 23996
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9485402703285217,
      "learning_rate": 1.567394672876162e-05,
      "loss": 2.2666,
      "step": 23997
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.2797185182571411,
      "learning_rate": 1.5673607680971512e-05,
      "loss": 2.278,
      "step": 23998
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.968065083026886,
      "learning_rate": 1.5673268623563098e-05,
      "loss": 2.1558,
      "step": 23999
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0839205980300903,
      "learning_rate": 1.567292955653695e-05,
      "loss": 2.5919,
      "step": 24000
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.057989239692688,
      "learning_rate": 1.567259047989365e-05,
      "loss": 2.538,
      "step": 24001
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1523065567016602,
      "learning_rate": 1.5672251393633766e-05,
      "loss": 2.3976,
      "step": 24002
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1547187566757202,
      "learning_rate": 1.5671912297757876e-05,
      "loss": 2.3601,
      "step": 24003
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1717451810836792,
      "learning_rate": 1.5671573192266552e-05,
      "loss": 2.5085,
      "step": 24004
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0573265552520752,
      "learning_rate": 1.5671234077160375e-05,
      "loss": 2.3432,
      "step": 24005
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.133152723312378,
      "learning_rate": 1.5670894952439916e-05,
      "loss": 2.4192,
      "step": 24006
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.2427523136138916,
      "learning_rate": 1.5670555818105748e-05,
      "loss": 2.3808,
      "step": 24007
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0995492935180664,
      "learning_rate": 1.5670216674158447e-05,
      "loss": 2.3911,
      "step": 24008
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0893311500549316,
      "learning_rate": 1.566987752059859e-05,
      "loss": 2.3944,
      "step": 24009
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0164321660995483,
      "learning_rate": 1.566953835742675e-05,
      "loss": 2.5711,
      "step": 24010
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9359325170516968,
      "learning_rate": 1.5669199184643506e-05,
      "loss": 2.2932,
      "step": 24011
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0577317476272583,
      "learning_rate": 1.566886000224942e-05,
      "loss": 2.4792,
      "step": 24012
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9839690327644348,
      "learning_rate": 1.5668520810245085e-05,
      "loss": 2.2034,
      "step": 24013
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0720182657241821,
      "learning_rate": 1.5668181608631067e-05,
      "loss": 2.3534,
      "step": 24014
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0924400091171265,
      "learning_rate": 1.566784239740794e-05,
      "loss": 2.3419,
      "step": 24015
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0075116157531738,
      "learning_rate": 1.5667503176576283e-05,
      "loss": 2.5377,
      "step": 24016
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9942338466644287,
      "learning_rate": 1.5667163946136666e-05,
      "loss": 2.4769,
      "step": 24017
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1323726177215576,
      "learning_rate": 1.5666824706089666e-05,
      "loss": 2.3293,
      "step": 24018
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0892069339752197,
      "learning_rate": 1.5666485456435862e-05,
      "loss": 2.3778,
      "step": 24019
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0701755285263062,
      "learning_rate": 1.5666146197175828e-05,
      "loss": 2.2536,
      "step": 24020
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.2311911582946777,
      "learning_rate": 1.566580692831013e-05,
      "loss": 2.5525,
      "step": 24021
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.137596607208252,
      "learning_rate": 1.5665467649839358e-05,
      "loss": 2.3742,
      "step": 24022
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0092782974243164,
      "learning_rate": 1.5665128361764075e-05,
      "loss": 2.3331,
      "step": 24023
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0162659883499146,
      "learning_rate": 1.5664789064084866e-05,
      "loss": 2.2621,
      "step": 24024
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.070493221282959,
      "learning_rate": 1.5664449756802294e-05,
      "loss": 2.3702,
      "step": 24025
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0054988861083984,
      "learning_rate": 1.5664110439916948e-05,
      "loss": 2.3359,
      "step": 24026
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0007740259170532,
      "learning_rate": 1.5663771113429392e-05,
      "loss": 2.615,
      "step": 24027
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.068734884262085,
      "learning_rate": 1.5663431777340206e-05,
      "loss": 2.3448,
      "step": 24028
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1157162189483643,
      "learning_rate": 1.5663092431649966e-05,
      "loss": 2.4565,
      "step": 24029
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.159563660621643,
      "learning_rate": 1.5662753076359244e-05,
      "loss": 2.6674,
      "step": 24030
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9810140132904053,
      "learning_rate": 1.5662413711468624e-05,
      "loss": 2.4386,
      "step": 24031
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1342946290969849,
      "learning_rate": 1.5662074336978667e-05,
      "loss": 2.4868,
      "step": 24032
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9738770723342896,
      "learning_rate": 1.566173495288996e-05,
      "loss": 2.3606,
      "step": 24033
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0925856828689575,
      "learning_rate": 1.5661395559203073e-05,
      "loss": 2.394,
      "step": 24034
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1688092947006226,
      "learning_rate": 1.5661056155918584e-05,
      "loss": 2.4296,
      "step": 24035
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9339770674705505,
      "learning_rate": 1.566071674303707e-05,
      "loss": 2.2821,
      "step": 24036
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.061388611793518,
      "learning_rate": 1.5660377320559098e-05,
      "loss": 2.3635,
      "step": 24037
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.06948721408844,
      "learning_rate": 1.5660037888485256e-05,
      "loss": 2.3361,
      "step": 24038
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0331107378005981,
      "learning_rate": 1.565969844681611e-05,
      "loss": 2.5118,
      "step": 24039
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0690937042236328,
      "learning_rate": 1.5659358995552232e-05,
      "loss": 2.4375,
      "step": 24040
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0662306547164917,
      "learning_rate": 1.5659019534694212e-05,
      "loss": 2.4946,
      "step": 24041
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.131378173828125,
      "learning_rate": 1.565868006424261e-05,
      "loss": 2.5853,
      "step": 24042
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0706453323364258,
      "learning_rate": 1.5658340584198012e-05,
      "loss": 2.677,
      "step": 24043
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9738879799842834,
      "learning_rate": 1.565800109456099e-05,
      "loss": 2.4618,
      "step": 24044
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9970343708992004,
      "learning_rate": 1.565766159533212e-05,
      "loss": 2.48,
      "step": 24045
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1912904977798462,
      "learning_rate": 1.5657322086511975e-05,
      "loss": 2.3867,
      "step": 24046
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0689836740493774,
      "learning_rate": 1.5656982568101132e-05,
      "loss": 2.2047,
      "step": 24047
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1440770626068115,
      "learning_rate": 1.565664304010017e-05,
      "loss": 2.4883,
      "step": 24048
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9410773515701294,
      "learning_rate": 1.5656303502509658e-05,
      "loss": 2.4965,
      "step": 24049
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1821037530899048,
      "learning_rate": 1.565596395533018e-05,
      "loss": 2.3178,
      "step": 24050
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0762903690338135,
      "learning_rate": 1.5655624398562306e-05,
      "loss": 2.2739,
      "step": 24051
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1071619987487793,
      "learning_rate": 1.565528483220661e-05,
      "loss": 2.4961,
      "step": 24052
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.3637882471084595,
      "learning_rate": 1.565494525626367e-05,
      "loss": 2.369,
      "step": 24053
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1303731203079224,
      "learning_rate": 1.5654605670734065e-05,
      "loss": 2.2012,
      "step": 24054
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0738104581832886,
      "learning_rate": 1.5654266075618365e-05,
      "loss": 2.4531,
      "step": 24055
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0918900966644287,
      "learning_rate": 1.565392647091715e-05,
      "loss": 2.4777,
      "step": 24056
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0016264915466309,
      "learning_rate": 1.5653586856630993e-05,
      "loss": 2.523,
      "step": 24057
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9959909319877625,
      "learning_rate": 1.5653247232760473e-05,
      "loss": 2.6128,
      "step": 24058
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.044270396232605,
      "learning_rate": 1.565290759930616e-05,
      "loss": 2.6219,
      "step": 24059
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.270764708518982,
      "learning_rate": 1.5652567956268633e-05,
      "loss": 2.4027,
      "step": 24060
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0237016677856445,
      "learning_rate": 1.5652228303648473e-05,
      "loss": 2.2565,
      "step": 24061
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9722432494163513,
      "learning_rate": 1.5651888641446246e-05,
      "loss": 2.4323,
      "step": 24062
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1167813539505005,
      "learning_rate": 1.5651548969662534e-05,
      "loss": 2.2735,
      "step": 24063
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0990653038024902,
      "learning_rate": 1.5651209288297914e-05,
      "loss": 2.6676,
      "step": 24064
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0859674215316772,
      "learning_rate": 1.565086959735296e-05,
      "loss": 2.0665,
      "step": 24065
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9229402542114258,
      "learning_rate": 1.565052989682824e-05,
      "loss": 2.5215,
      "step": 24066
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0378040075302124,
      "learning_rate": 1.5650190186724345e-05,
      "loss": 2.4933,
      "step": 24067
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.083482265472412,
      "learning_rate": 1.5649850467041838e-05,
      "loss": 2.6141,
      "step": 24068
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0101161003112793,
      "learning_rate": 1.56495107377813e-05,
      "loss": 2.3479,
      "step": 24069
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0215308666229248,
      "learning_rate": 1.5649170998943316e-05,
      "loss": 2.3239,
      "step": 24070
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0239646434783936,
      "learning_rate": 1.5648831250528442e-05,
      "loss": 2.4751,
      "step": 24071
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0481785535812378,
      "learning_rate": 1.564849149253727e-05,
      "loss": 2.5409,
      "step": 24072
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0353792905807495,
      "learning_rate": 1.564815172497037e-05,
      "loss": 2.3389,
      "step": 24073
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.075048565864563,
      "learning_rate": 1.5647811947828316e-05,
      "loss": 2.4281,
      "step": 24074
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9693998098373413,
      "learning_rate": 1.564747216111169e-05,
      "loss": 2.2995,
      "step": 24075
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0782705545425415,
      "learning_rate": 1.5647132364821064e-05,
      "loss": 2.6342,
      "step": 24076
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0819662809371948,
      "learning_rate": 1.5646792558957017e-05,
      "loss": 2.2084,
      "step": 24077
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9889410734176636,
      "learning_rate": 1.564645274352012e-05,
      "loss": 2.2523,
      "step": 24078
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.2252682447433472,
      "learning_rate": 1.5646112918510953e-05,
      "loss": 2.436,
      "step": 24079
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0687880516052246,
      "learning_rate": 1.564577308393009e-05,
      "loss": 2.5412,
      "step": 24080
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1900123357772827,
      "learning_rate": 1.564543323977811e-05,
      "loss": 2.4828,
      "step": 24081
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.2869502305984497,
      "learning_rate": 1.5645093386055586e-05,
      "loss": 2.5043,
      "step": 24082
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0009212493896484,
      "learning_rate": 1.5644753522763095e-05,
      "loss": 2.4807,
      "step": 24083
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0029937028884888,
      "learning_rate": 1.564441364990122e-05,
      "loss": 2.528,
      "step": 24084
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.018070936203003,
      "learning_rate": 1.5644073767470524e-05,
      "loss": 2.4762,
      "step": 24085
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1047160625457764,
      "learning_rate": 1.564373387547159e-05,
      "loss": 2.4036,
      "step": 24086
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.2806308269500732,
      "learning_rate": 1.5643393973904995e-05,
      "loss": 2.4405,
      "step": 24087
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0509039163589478,
      "learning_rate": 1.5643054062771318e-05,
      "loss": 2.4656,
      "step": 24088
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0747259855270386,
      "learning_rate": 1.5642714142071125e-05,
      "loss": 2.4475,
      "step": 24089
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0907496213912964,
      "learning_rate": 1.5642374211805007e-05,
      "loss": 2.6235,
      "step": 24090
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0477105379104614,
      "learning_rate": 1.5642034271973528e-05,
      "loss": 2.4136,
      "step": 24091
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0486626625061035,
      "learning_rate": 1.5641694322577268e-05,
      "loss": 2.3617,
      "step": 24092
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.02476167678833,
      "learning_rate": 1.5641354363616804e-05,
      "loss": 2.5253,
      "step": 24093
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9299671649932861,
      "learning_rate": 1.5641014395092714e-05,
      "loss": 2.4374,
      "step": 24094
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0484070777893066,
      "learning_rate": 1.564067441700557e-05,
      "loss": 2.3253,
      "step": 24095
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0465383529663086,
      "learning_rate": 1.5640334429355956e-05,
      "loss": 2.4484,
      "step": 24096
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1161893606185913,
      "learning_rate": 1.563999443214444e-05,
      "loss": 2.5896,
      "step": 24097
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1214299201965332,
      "learning_rate": 1.56396544253716e-05,
      "loss": 2.4042,
      "step": 24098
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1792560815811157,
      "learning_rate": 1.5639314409038017e-05,
      "loss": 2.225,
      "step": 24099
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0070074796676636,
      "learning_rate": 1.563897438314426e-05,
      "loss": 2.401,
      "step": 24100
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.056375503540039,
      "learning_rate": 1.5638634347690914e-05,
      "loss": 2.471,
      "step": 24101
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.084411382675171,
      "learning_rate": 1.563829430267855e-05,
      "loss": 2.4454,
      "step": 24102
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0183300971984863,
      "learning_rate": 1.563795424810775e-05,
      "loss": 2.4699,
      "step": 24103
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0948163270950317,
      "learning_rate": 1.563761418397908e-05,
      "loss": 2.1952,
      "step": 24104
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.082139253616333,
      "learning_rate": 1.5637274110293125e-05,
      "loss": 2.4282,
      "step": 24105
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0128270387649536,
      "learning_rate": 1.563693402705046e-05,
      "loss": 2.4428,
      "step": 24106
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.947189211845398,
      "learning_rate": 1.5636593934251658e-05,
      "loss": 2.2638,
      "step": 24107
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1534063816070557,
      "learning_rate": 1.56362538318973e-05,
      "loss": 2.2795,
      "step": 24108
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.025179386138916,
      "learning_rate": 1.5635913719987962e-05,
      "loss": 2.6039,
      "step": 24109
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.149201512336731,
      "learning_rate": 1.563557359852422e-05,
      "loss": 2.2347,
      "step": 24110
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.031543254852295,
      "learning_rate": 1.563523346750665e-05,
      "loss": 2.349,
      "step": 24111
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.4089711904525757,
      "learning_rate": 1.5634893326935828e-05,
      "loss": 2.349,
      "step": 24112
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.5810707807540894,
      "learning_rate": 1.5634553176812333e-05,
      "loss": 2.4746,
      "step": 24113
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1517560482025146,
      "learning_rate": 1.5634213017136736e-05,
      "loss": 2.4068,
      "step": 24114
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9749888181686401,
      "learning_rate": 1.5633872847909623e-05,
      "loss": 2.4808,
      "step": 24115
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9765186905860901,
      "learning_rate": 1.5633532669131562e-05,
      "loss": 2.1538,
      "step": 24116
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1812347173690796,
      "learning_rate": 1.563319248080313e-05,
      "loss": 2.558,
      "step": 24117
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9468687772750854,
      "learning_rate": 1.5632852282924914e-05,
      "loss": 2.2672,
      "step": 24118
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.172654628753662,
      "learning_rate": 1.563251207549748e-05,
      "loss": 2.4839,
      "step": 24119
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0747944116592407,
      "learning_rate": 1.5632171858521407e-05,
      "loss": 2.4667,
      "step": 24120
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0299476385116577,
      "learning_rate": 1.5631831631997275e-05,
      "loss": 2.5308,
      "step": 24121
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.2479554414749146,
      "learning_rate": 1.563149139592566e-05,
      "loss": 2.3733,
      "step": 24122
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.119027018547058,
      "learning_rate": 1.5631151150307135e-05,
      "loss": 2.5565,
      "step": 24123
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.2065210342407227,
      "learning_rate": 1.5630810895142278e-05,
      "loss": 2.4938,
      "step": 24124
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1116052865982056,
      "learning_rate": 1.5630470630431668e-05,
      "loss": 2.3288,
      "step": 24125
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9331900477409363,
      "learning_rate": 1.5630130356175884e-05,
      "loss": 2.2808,
      "step": 24126
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0143160820007324,
      "learning_rate": 1.56297900723755e-05,
      "loss": 2.32,
      "step": 24127
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.133355736732483,
      "learning_rate": 1.5629449779031093e-05,
      "loss": 2.5964,
      "step": 24128
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1497986316680908,
      "learning_rate": 1.5629109476143238e-05,
      "loss": 2.3872,
      "step": 24129
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9355082511901855,
      "learning_rate": 1.5628769163712512e-05,
      "loss": 2.722,
      "step": 24130
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9930064082145691,
      "learning_rate": 1.5628428841739494e-05,
      "loss": 2.0224,
      "step": 24131
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0756173133850098,
      "learning_rate": 1.562808851022476e-05,
      "loss": 2.4534,
      "step": 24132
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0331218242645264,
      "learning_rate": 1.562774816916889e-05,
      "loss": 2.5008,
      "step": 24133
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9766289591789246,
      "learning_rate": 1.5627407818572455e-05,
      "loss": 2.5673,
      "step": 24134
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0200449228286743,
      "learning_rate": 1.562706745843604e-05,
      "loss": 2.3112,
      "step": 24135
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.997567892074585,
      "learning_rate": 1.5626727088760218e-05,
      "loss": 2.3472,
      "step": 24136
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1006611585617065,
      "learning_rate": 1.562638670954556e-05,
      "loss": 2.3992,
      "step": 24137
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0295339822769165,
      "learning_rate": 1.562604632079265e-05,
      "loss": 2.4256,
      "step": 24138
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0233319997787476,
      "learning_rate": 1.5625705922502064e-05,
      "loss": 2.5917,
      "step": 24139
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1149334907531738,
      "learning_rate": 1.562536551467438e-05,
      "loss": 2.4915,
      "step": 24140
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9999695420265198,
      "learning_rate": 1.562502509731017e-05,
      "loss": 2.39,
      "step": 24141
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9901835918426514,
      "learning_rate": 1.5624684670410016e-05,
      "loss": 2.5445,
      "step": 24142
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0195410251617432,
      "learning_rate": 1.5624344233974497e-05,
      "loss": 2.4632,
      "step": 24143
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.3300508260726929,
      "learning_rate": 1.5624003788004186e-05,
      "loss": 2.3945,
      "step": 24144
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1972408294677734,
      "learning_rate": 1.562366333249966e-05,
      "loss": 2.1962,
      "step": 24145
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1000016927719116,
      "learning_rate": 1.56233228674615e-05,
      "loss": 2.2358,
      "step": 24146
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1051589250564575,
      "learning_rate": 1.5622982392890273e-05,
      "loss": 2.4611,
      "step": 24147
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1586941480636597,
      "learning_rate": 1.562264190878657e-05,
      "loss": 2.2268,
      "step": 24148
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0500147342681885,
      "learning_rate": 1.562230141515096e-05,
      "loss": 2.6632,
      "step": 24149
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0717723369598389,
      "learning_rate": 1.562196091198402e-05,
      "loss": 2.5523,
      "step": 24150
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0171470642089844,
      "learning_rate": 1.562162039928633e-05,
      "loss": 2.1954,
      "step": 24151
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.158572793006897,
      "learning_rate": 1.5621279877058474e-05,
      "loss": 2.4973,
      "step": 24152
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0738611221313477,
      "learning_rate": 1.5620939345301012e-05,
      "loss": 2.4738,
      "step": 24153
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.137303113937378,
      "learning_rate": 1.5620598804014538e-05,
      "loss": 2.3288,
      "step": 24154
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0096595287322998,
      "learning_rate": 1.5620258253199617e-05,
      "loss": 2.5696,
      "step": 24155
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1254799365997314,
      "learning_rate": 1.5619917692856836e-05,
      "loss": 2.4016,
      "step": 24156
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0146797895431519,
      "learning_rate": 1.5619577122986767e-05,
      "loss": 2.5164,
      "step": 24157
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0197973251342773,
      "learning_rate": 1.5619236543589987e-05,
      "loss": 2.5619,
      "step": 24158
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9797958135604858,
      "learning_rate": 1.561889595466708e-05,
      "loss": 2.4174,
      "step": 24159
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9486579298973083,
      "learning_rate": 1.561855535621861e-05,
      "loss": 2.499,
      "step": 24160
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9564541578292847,
      "learning_rate": 1.561821474824517e-05,
      "loss": 2.4066,
      "step": 24161
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9240351915359497,
      "learning_rate": 1.5617874130747325e-05,
      "loss": 2.3625,
      "step": 24162
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9690752029418945,
      "learning_rate": 1.5617533503725663e-05,
      "loss": 2.1935,
      "step": 24163
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.049857258796692,
      "learning_rate": 1.5617192867180752e-05,
      "loss": 2.6437,
      "step": 24164
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0980197191238403,
      "learning_rate": 1.5616852221113175e-05,
      "loss": 2.2894,
      "step": 24165
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9459423422813416,
      "learning_rate": 1.5616511565523508e-05,
      "loss": 2.3258,
      "step": 24166
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9070435166358948,
      "learning_rate": 1.5616170900412326e-05,
      "loss": 2.188,
      "step": 24167
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.152032732963562,
      "learning_rate": 1.561583022578021e-05,
      "loss": 2.5348,
      "step": 24168
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0887305736541748,
      "learning_rate": 1.561548954162774e-05,
      "loss": 2.344,
      "step": 24169
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9784386157989502,
      "learning_rate": 1.561514884795549e-05,
      "loss": 2.4093,
      "step": 24170
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1842563152313232,
      "learning_rate": 1.5614808144764033e-05,
      "loss": 2.4662,
      "step": 24171
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0568981170654297,
      "learning_rate": 1.5614467432053953e-05,
      "loss": 2.6116,
      "step": 24172
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9524630308151245,
      "learning_rate": 1.5614126709825825e-05,
      "loss": 2.4832,
      "step": 24173
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0464060306549072,
      "learning_rate": 1.561378597808023e-05,
      "loss": 2.6463,
      "step": 24174
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0642744302749634,
      "learning_rate": 1.5613445236817742e-05,
      "loss": 2.1499,
      "step": 24175
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.24574613571167,
      "learning_rate": 1.561310448603894e-05,
      "loss": 2.3877,
      "step": 24176
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1671123504638672,
      "learning_rate": 1.5612763725744398e-05,
      "loss": 2.388,
      "step": 24177
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9864892363548279,
      "learning_rate": 1.5612422955934703e-05,
      "loss": 2.5177,
      "step": 24178
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1283692121505737,
      "learning_rate": 1.5612082176610425e-05,
      "loss": 2.4466,
      "step": 24179
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0981985330581665,
      "learning_rate": 1.5611741387772138e-05,
      "loss": 2.5914,
      "step": 24180
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0141420364379883,
      "learning_rate": 1.561140058942043e-05,
      "loss": 2.3708,
      "step": 24181
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.107426404953003,
      "learning_rate": 1.5611059781555876e-05,
      "loss": 2.3829,
      "step": 24182
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.079338788986206,
      "learning_rate": 1.561071896417905e-05,
      "loss": 2.4473,
      "step": 24183
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.3732013702392578,
      "learning_rate": 1.561037813729053e-05,
      "loss": 2.4522,
      "step": 24184
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1017807722091675,
      "learning_rate": 1.5610037300890893e-05,
      "loss": 2.5763,
      "step": 24185
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.201764702796936,
      "learning_rate": 1.5609696454980722e-05,
      "loss": 2.1041,
      "step": 24186
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0677753686904907,
      "learning_rate": 1.5609355599560593e-05,
      "loss": 2.4806,
      "step": 24187
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.960845410823822,
      "learning_rate": 1.5609014734631083e-05,
      "loss": 2.368,
      "step": 24188
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1424702405929565,
      "learning_rate": 1.5608673860192768e-05,
      "loss": 2.6023,
      "step": 24189
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1848684549331665,
      "learning_rate": 1.5608332976246226e-05,
      "loss": 2.5119,
      "step": 24190
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1045721769332886,
      "learning_rate": 1.560799208279204e-05,
      "loss": 2.604,
      "step": 24191
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0822323560714722,
      "learning_rate": 1.560765117983078e-05,
      "loss": 2.265,
      "step": 24192
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1439698934555054,
      "learning_rate": 1.5607310267363032e-05,
      "loss": 2.4125,
      "step": 24193
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9339233040809631,
      "learning_rate": 1.560696934538937e-05,
      "loss": 2.3967,
      "step": 24194
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0133484601974487,
      "learning_rate": 1.5606628413910372e-05,
      "loss": 2.3795,
      "step": 24195
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0874520540237427,
      "learning_rate": 1.5606287472926617e-05,
      "loss": 2.3311,
      "step": 24196
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1287238597869873,
      "learning_rate": 1.5605946522438678e-05,
      "loss": 2.4273,
      "step": 24197
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0934034585952759,
      "learning_rate": 1.5605605562447138e-05,
      "loss": 2.5581,
      "step": 24198
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0134764909744263,
      "learning_rate": 1.5605264592952576e-05,
      "loss": 2.3917,
      "step": 24199
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1047230958938599,
      "learning_rate": 1.560492361395557e-05,
      "loss": 2.4002,
      "step": 24200
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1436302661895752,
      "learning_rate": 1.560458262545669e-05,
      "loss": 2.3743,
      "step": 24201
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.3048893213272095,
      "learning_rate": 1.560424162745653e-05,
      "loss": 2.2837,
      "step": 24202
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0237776041030884,
      "learning_rate": 1.560390061995565e-05,
      "loss": 2.41,
      "step": 24203
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0697113275527954,
      "learning_rate": 1.560355960295464e-05,
      "loss": 2.6512,
      "step": 24204
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0943527221679688,
      "learning_rate": 1.5603218576454072e-05,
      "loss": 2.3342,
      "step": 24205
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.2633354663848877,
      "learning_rate": 1.5602877540454525e-05,
      "loss": 2.3885,
      "step": 24206
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1121692657470703,
      "learning_rate": 1.5602536494956585e-05,
      "loss": 2.4571,
      "step": 24207
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.2300058603286743,
      "learning_rate": 1.560219543996082e-05,
      "loss": 2.462,
      "step": 24208
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.034761667251587,
      "learning_rate": 1.5601854375467813e-05,
      "loss": 2.2174,
      "step": 24209
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0651742219924927,
      "learning_rate": 1.5601513301478144e-05,
      "loss": 2.6706,
      "step": 24210
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0123108625411987,
      "learning_rate": 1.5601172217992385e-05,
      "loss": 2.4302,
      "step": 24211
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0304275751113892,
      "learning_rate": 1.560083112501112e-05,
      "loss": 2.2276,
      "step": 24212
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.92754727602005,
      "learning_rate": 1.5600490022534925e-05,
      "loss": 2.6554,
      "step": 24213
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0073792934417725,
      "learning_rate": 1.5600148910564374e-05,
      "loss": 2.2842,
      "step": 24214
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0209856033325195,
      "learning_rate": 1.5599807789100053e-05,
      "loss": 2.4646,
      "step": 24215
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0025478601455688,
      "learning_rate": 1.5599466658142542e-05,
      "loss": 2.4821,
      "step": 24216
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0083304643630981,
      "learning_rate": 1.559912551769241e-05,
      "loss": 2.6581,
      "step": 24217
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.047029972076416,
      "learning_rate": 1.5598784367750233e-05,
      "loss": 2.2658,
      "step": 24218
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0334181785583496,
      "learning_rate": 1.5598443208316604e-05,
      "loss": 2.3429,
      "step": 24219
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0378634929656982,
      "learning_rate": 1.5598102039392088e-05,
      "loss": 2.3723,
      "step": 24220
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.135887622833252,
      "learning_rate": 1.5597760860977273e-05,
      "loss": 2.3505,
      "step": 24221
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0631715059280396,
      "learning_rate": 1.5597419673072732e-05,
      "loss": 2.4614,
      "step": 24222
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.982447624206543,
      "learning_rate": 1.5597078475679043e-05,
      "loss": 2.3647,
      "step": 24223
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0271364450454712,
      "learning_rate": 1.559673726879679e-05,
      "loss": 2.5188,
      "step": 24224
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0910052061080933,
      "learning_rate": 1.5596396052426543e-05,
      "loss": 2.4683,
      "step": 24225
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0543826818466187,
      "learning_rate": 1.5596054826568884e-05,
      "loss": 2.4308,
      "step": 24226
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0707489252090454,
      "learning_rate": 1.5595713591224395e-05,
      "loss": 2.3328,
      "step": 24227
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0324041843414307,
      "learning_rate": 1.5595372346393653e-05,
      "loss": 2.3033,
      "step": 24228
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0670830011367798,
      "learning_rate": 1.5595031092077232e-05,
      "loss": 2.4708,
      "step": 24229
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0760561227798462,
      "learning_rate": 1.5594689828275713e-05,
      "loss": 2.3409,
      "step": 24230
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0812268257141113,
      "learning_rate": 1.5594348554989677e-05,
      "loss": 2.504,
      "step": 24231
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0826284885406494,
      "learning_rate": 1.55940072722197e-05,
      "loss": 2.4526,
      "step": 24232
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0288982391357422,
      "learning_rate": 1.5593665979966363e-05,
      "loss": 2.5268,
      "step": 24233
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1357189416885376,
      "learning_rate": 1.5593324678230242e-05,
      "loss": 2.406,
      "step": 24234
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1097595691680908,
      "learning_rate": 1.5592983367011917e-05,
      "loss": 2.5267,
      "step": 24235
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.080044150352478,
      "learning_rate": 1.5592642046311965e-05,
      "loss": 2.3098,
      "step": 24236
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.042868971824646,
      "learning_rate": 1.5592300716130968e-05,
      "loss": 2.2865,
      "step": 24237
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1037596464157104,
      "learning_rate": 1.55919593764695e-05,
      "loss": 2.5204,
      "step": 24238
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0551735162734985,
      "learning_rate": 1.559161802732814e-05,
      "loss": 2.3926,
      "step": 24239
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.011553406715393,
      "learning_rate": 1.5591276668707472e-05,
      "loss": 2.3475,
      "step": 24240
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.043572187423706,
      "learning_rate": 1.5590935300608072e-05,
      "loss": 2.2949,
      "step": 24241
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0175906419754028,
      "learning_rate": 1.559059392303052e-05,
      "loss": 2.5632,
      "step": 24242
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9619408845901489,
      "learning_rate": 1.559025253597539e-05,
      "loss": 2.2611,
      "step": 24243
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9653187990188599,
      "learning_rate": 1.5589911139443264e-05,
      "loss": 2.3292,
      "step": 24244
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.2148374319076538,
      "learning_rate": 1.558956973343472e-05,
      "loss": 2.2934,
      "step": 24245
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9729490280151367,
      "learning_rate": 1.5589228317950337e-05,
      "loss": 2.5372,
      "step": 24246
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0294443368911743,
      "learning_rate": 1.5588886892990696e-05,
      "loss": 2.6049,
      "step": 24247
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.04388427734375,
      "learning_rate": 1.558854545855637e-05,
      "loss": 2.3811,
      "step": 24248
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9848626852035522,
      "learning_rate": 1.5588204014647946e-05,
      "loss": 2.5661,
      "step": 24249
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0422083139419556,
      "learning_rate": 1.5587862561265996e-05,
      "loss": 2.4828,
      "step": 24250
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.047980785369873,
      "learning_rate": 1.5587521098411104e-05,
      "loss": 2.3129,
      "step": 24251
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.182592749595642,
      "learning_rate": 1.5587179626083842e-05,
      "loss": 2.3935,
      "step": 24252
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9552311897277832,
      "learning_rate": 1.5586838144284794e-05,
      "loss": 2.5015,
      "step": 24253
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1546227931976318,
      "learning_rate": 1.558649665301454e-05,
      "loss": 2.545,
      "step": 24254
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.094667673110962,
      "learning_rate": 1.5586155152273658e-05,
      "loss": 2.6353,
      "step": 24255
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1586873531341553,
      "learning_rate": 1.5585813642062722e-05,
      "loss": 2.3068,
      "step": 24256
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0688395500183105,
      "learning_rate": 1.5585472122382314e-05,
      "loss": 2.4293,
      "step": 24257
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1066205501556396,
      "learning_rate": 1.558513059323302e-05,
      "loss": 2.4279,
      "step": 24258
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0609889030456543,
      "learning_rate": 1.5584789054615407e-05,
      "loss": 2.1919,
      "step": 24259
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.028663158416748,
      "learning_rate": 1.5584447506530064e-05,
      "loss": 2.19,
      "step": 24260
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9919793009757996,
      "learning_rate": 1.5584105948977563e-05,
      "loss": 2.4229,
      "step": 24261
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1328084468841553,
      "learning_rate": 1.5583764381958486e-05,
      "loss": 2.3749,
      "step": 24262
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0852327346801758,
      "learning_rate": 1.5583422805473412e-05,
      "loss": 2.6296,
      "step": 24263
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.2161412239074707,
      "learning_rate": 1.558308121952292e-05,
      "loss": 2.2413,
      "step": 24264
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1920239925384521,
      "learning_rate": 1.558273962410759e-05,
      "loss": 2.4728,
      "step": 24265
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0228941440582275,
      "learning_rate": 1.5582398019227996e-05,
      "loss": 2.5861,
      "step": 24266
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.020524501800537,
      "learning_rate": 1.5582056404884728e-05,
      "loss": 2.3207,
      "step": 24267
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9506626725196838,
      "learning_rate": 1.5581714781078353e-05,
      "loss": 2.3481,
      "step": 24268
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.222224473953247,
      "learning_rate": 1.5581373147809457e-05,
      "loss": 2.4112,
      "step": 24269
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9897230863571167,
      "learning_rate": 1.5581031505078618e-05,
      "loss": 2.3436,
      "step": 24270
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1016123294830322,
      "learning_rate": 1.5580689852886414e-05,
      "loss": 2.6055,
      "step": 24271
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.16618013381958,
      "learning_rate": 1.5580348191233428e-05,
      "loss": 2.5266,
      "step": 24272
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0111185312271118,
      "learning_rate": 1.5580006520120232e-05,
      "loss": 2.3388,
      "step": 24273
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.028045654296875,
      "learning_rate": 1.5579664839547413e-05,
      "loss": 2.4438,
      "step": 24274
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0552992820739746,
      "learning_rate": 1.5579323149515543e-05,
      "loss": 2.2406,
      "step": 24275
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9773495197296143,
      "learning_rate": 1.557898145002521e-05,
      "loss": 2.3846,
      "step": 24276
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9506726861000061,
      "learning_rate": 1.5578639741076983e-05,
      "loss": 2.1933,
      "step": 24277
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9714831709861755,
      "learning_rate": 1.5578298022671447e-05,
      "loss": 2.4917,
      "step": 24278
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.09647536277771,
      "learning_rate": 1.5577956294809182e-05,
      "loss": 2.4351,
      "step": 24279
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0566211938858032,
      "learning_rate": 1.5577614557490767e-05,
      "loss": 2.2968,
      "step": 24280
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0151091814041138,
      "learning_rate": 1.5577272810716783e-05,
      "loss": 2.4773,
      "step": 24281
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0951703786849976,
      "learning_rate": 1.55769310544878e-05,
      "loss": 2.3322,
      "step": 24282
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0358155965805054,
      "learning_rate": 1.557658928880441e-05,
      "loss": 2.2524,
      "step": 24283
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0190128087997437,
      "learning_rate": 1.5576247513667183e-05,
      "loss": 2.3816,
      "step": 24284
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.963744044303894,
      "learning_rate": 1.5575905729076706e-05,
      "loss": 2.4042,
      "step": 24285
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9849660992622375,
      "learning_rate": 1.5575563935033554e-05,
      "loss": 2.506,
      "step": 24286
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.001465916633606,
      "learning_rate": 1.5575222131538304e-05,
      "loss": 2.3647,
      "step": 24287
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1314061880111694,
      "learning_rate": 1.557488031859154e-05,
      "loss": 2.4797,
      "step": 24288
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1731666326522827,
      "learning_rate": 1.5574538496193837e-05,
      "loss": 2.4905,
      "step": 24289
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9345918297767639,
      "learning_rate": 1.557419666434578e-05,
      "loss": 2.2259,
      "step": 24290
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1824641227722168,
      "learning_rate": 1.5573854823047943e-05,
      "loss": 2.2906,
      "step": 24291
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9493207931518555,
      "learning_rate": 1.557351297230091e-05,
      "loss": 2.3174,
      "step": 24292
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.3117016553878784,
      "learning_rate": 1.5573171112105257e-05,
      "loss": 2.3288,
      "step": 24293
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1047977209091187,
      "learning_rate": 1.557282924246157e-05,
      "loss": 2.6713,
      "step": 24294
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0540504455566406,
      "learning_rate": 1.557248736337042e-05,
      "loss": 2.4336,
      "step": 24295
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9721533060073853,
      "learning_rate": 1.5572145474832392e-05,
      "loss": 2.4611,
      "step": 24296
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1401690244674683,
      "learning_rate": 1.5571803576848062e-05,
      "loss": 2.6902,
      "step": 24297
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0351569652557373,
      "learning_rate": 1.5571461669418014e-05,
      "loss": 2.6162,
      "step": 24298
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9989266991615295,
      "learning_rate": 1.5571119752542823e-05,
      "loss": 2.2781,
      "step": 24299
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0770647525787354,
      "learning_rate": 1.5570777826223073e-05,
      "loss": 2.3146,
      "step": 24300
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.793081283569336,
      "learning_rate": 1.5570435890459342e-05,
      "loss": 2.0813,
      "step": 24301
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0248106718063354,
      "learning_rate": 1.557009394525221e-05,
      "loss": 2.6836,
      "step": 24302
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0475412607192993,
      "learning_rate": 1.5569751990602254e-05,
      "loss": 2.5863,
      "step": 24303
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0444142818450928,
      "learning_rate": 1.5569410026510055e-05,
      "loss": 2.5158,
      "step": 24304
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0014550685882568,
      "learning_rate": 1.5569068052976194e-05,
      "loss": 2.2338,
      "step": 24305
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0976626873016357,
      "learning_rate": 1.556872607000125e-05,
      "loss": 2.4501,
      "step": 24306
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.056597352027893,
      "learning_rate": 1.5568384077585803e-05,
      "loss": 2.394,
      "step": 24307
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0693033933639526,
      "learning_rate": 1.5568042075730433e-05,
      "loss": 2.3113,
      "step": 24308
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0395795106887817,
      "learning_rate": 1.5567700064435717e-05,
      "loss": 2.3805,
      "step": 24309
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.097318410873413,
      "learning_rate": 1.5567358043702243e-05,
      "loss": 2.3501,
      "step": 24310
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0057412385940552,
      "learning_rate": 1.556701601353058e-05,
      "loss": 2.3674,
      "step": 24311
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.8866073489189148,
      "learning_rate": 1.5566673973921313e-05,
      "loss": 2.278,
      "step": 24312
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0882599353790283,
      "learning_rate": 1.5566331924875023e-05,
      "loss": 2.2526,
      "step": 24313
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9821467399597168,
      "learning_rate": 1.556598986639229e-05,
      "loss": 2.2742,
      "step": 24314
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0235562324523926,
      "learning_rate": 1.556564779847369e-05,
      "loss": 2.4738,
      "step": 24315
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.978018581867218,
      "learning_rate": 1.5565305721119804e-05,
      "loss": 2.3293,
      "step": 24316
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0818142890930176,
      "learning_rate": 1.556496363433122e-05,
      "loss": 2.5044,
      "step": 24317
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0409430265426636,
      "learning_rate": 1.5564621538108506e-05,
      "loss": 2.3568,
      "step": 24318
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1183717250823975,
      "learning_rate": 1.5564279432452246e-05,
      "loss": 2.4572,
      "step": 24319
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9812202453613281,
      "learning_rate": 1.5563937317363025e-05,
      "loss": 2.3763,
      "step": 24320
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0796717405319214,
      "learning_rate": 1.5563595192841417e-05,
      "loss": 2.3724,
      "step": 24321
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1881914138793945,
      "learning_rate": 1.5563253058888006e-05,
      "loss": 2.3458,
      "step": 24322
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.2985305786132812,
      "learning_rate": 1.5562910915503368e-05,
      "loss": 2.3018,
      "step": 24323
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.073665738105774,
      "learning_rate": 1.5562568762688085e-05,
      "loss": 2.2864,
      "step": 24324
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0408306121826172,
      "learning_rate": 1.556222660044274e-05,
      "loss": 2.4169,
      "step": 24325
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0753180980682373,
      "learning_rate": 1.5561884428767906e-05,
      "loss": 2.6294,
      "step": 24326
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9387456178665161,
      "learning_rate": 1.556154224766417e-05,
      "loss": 2.3,
      "step": 24327
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.08022940158844,
      "learning_rate": 1.5561200057132107e-05,
      "loss": 2.3758,
      "step": 24328
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0981531143188477,
      "learning_rate": 1.5560857857172305e-05,
      "loss": 2.6235,
      "step": 24329
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9906651973724365,
      "learning_rate": 1.5560515647785332e-05,
      "loss": 2.3177,
      "step": 24330
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9569207429885864,
      "learning_rate": 1.556017342897178e-05,
      "loss": 2.3617,
      "step": 24331
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1626758575439453,
      "learning_rate": 1.555983120073222e-05,
      "loss": 2.3651,
      "step": 24332
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1197474002838135,
      "learning_rate": 1.5559488963067238e-05,
      "loss": 2.3183,
      "step": 24333
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1004762649536133,
      "learning_rate": 1.5559146715977415e-05,
      "loss": 2.4948,
      "step": 24334
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1089426279067993,
      "learning_rate": 1.5558804459463324e-05,
      "loss": 2.3868,
      "step": 24335
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.2282862663269043,
      "learning_rate": 1.555846219352555e-05,
      "loss": 2.3236,
      "step": 24336
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.079408049583435,
      "learning_rate": 1.5558119918164675e-05,
      "loss": 2.5956,
      "step": 24337
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1877415180206299,
      "learning_rate": 1.5557777633381276e-05,
      "loss": 2.5207,
      "step": 24338
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0756617784500122,
      "learning_rate": 1.5557435339175933e-05,
      "loss": 2.4664,
      "step": 24339
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1420449018478394,
      "learning_rate": 1.5557093035549232e-05,
      "loss": 2.2693,
      "step": 24340
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1832165718078613,
      "learning_rate": 1.5556750722501743e-05,
      "loss": 2.4411,
      "step": 24341
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.041029691696167,
      "learning_rate": 1.5556408400034058e-05,
      "loss": 2.2848,
      "step": 24342
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9689757823944092,
      "learning_rate": 1.5556066068146747e-05,
      "loss": 2.4598,
      "step": 24343
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1443192958831787,
      "learning_rate": 1.55557237268404e-05,
      "loss": 2.3614,
      "step": 24344
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0766221284866333,
      "learning_rate": 1.555538137611559e-05,
      "loss": 2.4666,
      "step": 24345
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.14182448387146,
      "learning_rate": 1.5555039015972896e-05,
      "loss": 2.4685,
      "step": 24346
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0457279682159424,
      "learning_rate": 1.5554696646412907e-05,
      "loss": 2.2929,
      "step": 24347
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0488812923431396,
      "learning_rate": 1.5554354267436198e-05,
      "loss": 2.62,
      "step": 24348
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0499109029769897,
      "learning_rate": 1.5554011879043353e-05,
      "loss": 2.7138,
      "step": 24349
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0954188108444214,
      "learning_rate": 1.5553669481234944e-05,
      "loss": 2.4606,
      "step": 24350
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0275931358337402,
      "learning_rate": 1.5553327074011556e-05,
      "loss": 2.3935,
      "step": 24351
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0425894260406494,
      "learning_rate": 1.5552984657373774e-05,
      "loss": 2.3901,
      "step": 24352
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1558334827423096,
      "learning_rate": 1.5552642231322173e-05,
      "loss": 2.3753,
      "step": 24353
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1802369356155396,
      "learning_rate": 1.555229979585734e-05,
      "loss": 2.4621,
      "step": 24354
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0882378816604614,
      "learning_rate": 1.5551957350979843e-05,
      "loss": 2.3374,
      "step": 24355
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0934027433395386,
      "learning_rate": 1.5551614896690274e-05,
      "loss": 2.2923,
      "step": 24356
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9742549657821655,
      "learning_rate": 1.5551272432989207e-05,
      "loss": 2.4299,
      "step": 24357
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.5812551975250244,
      "learning_rate": 1.555092995987723e-05,
      "loss": 2.3387,
      "step": 24358
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1662980318069458,
      "learning_rate": 1.5550587477354922e-05,
      "loss": 2.5079,
      "step": 24359
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1145390272140503,
      "learning_rate": 1.5550244985422856e-05,
      "loss": 2.3157,
      "step": 24360
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9999150633811951,
      "learning_rate": 1.5549902484081617e-05,
      "loss": 2.3979,
      "step": 24361
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1628830432891846,
      "learning_rate": 1.5549559973331786e-05,
      "loss": 2.2774,
      "step": 24362
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.298309803009033,
      "learning_rate": 1.5549217453173943e-05,
      "loss": 2.3536,
      "step": 24363
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.975938618183136,
      "learning_rate": 1.554887492360867e-05,
      "loss": 2.438,
      "step": 24364
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9865092039108276,
      "learning_rate": 1.554853238463655e-05,
      "loss": 2.4099,
      "step": 24365
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1850615739822388,
      "learning_rate": 1.5548189836258157e-05,
      "loss": 2.478,
      "step": 24366
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.2007958889007568,
      "learning_rate": 1.554784727847408e-05,
      "loss": 2.5008,
      "step": 24367
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.042198896408081,
      "learning_rate": 1.554750471128489e-05,
      "loss": 2.4478,
      "step": 24368
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.049216628074646,
      "learning_rate": 1.5547162134691176e-05,
      "loss": 2.4283,
      "step": 24369
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1374108791351318,
      "learning_rate": 1.5546819548693513e-05,
      "loss": 2.4294,
      "step": 24370
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0814011096954346,
      "learning_rate": 1.5546476953292484e-05,
      "loss": 2.43,
      "step": 24371
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0456335544586182,
      "learning_rate": 1.554613434848867e-05,
      "loss": 2.543,
      "step": 24372
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.2186516523361206,
      "learning_rate": 1.5545791734282655e-05,
      "loss": 2.4451,
      "step": 24373
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0515069961547852,
      "learning_rate": 1.5545449110675018e-05,
      "loss": 2.5602,
      "step": 24374
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0127638578414917,
      "learning_rate": 1.5545106477666336e-05,
      "loss": 2.6305,
      "step": 24375
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.056924819946289,
      "learning_rate": 1.5544763835257193e-05,
      "loss": 2.4737,
      "step": 24376
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1729074716567993,
      "learning_rate": 1.5544421183448167e-05,
      "loss": 2.2824,
      "step": 24377
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0057127475738525,
      "learning_rate": 1.5544078522239843e-05,
      "loss": 2.3114,
      "step": 24378
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0027556419372559,
      "learning_rate": 1.55437358516328e-05,
      "loss": 2.2991,
      "step": 24379
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.354971170425415,
      "learning_rate": 1.554339317162762e-05,
      "loss": 2.4293,
      "step": 24380
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0175232887268066,
      "learning_rate": 1.5543050482224883e-05,
      "loss": 2.4623,
      "step": 24381
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1335701942443848,
      "learning_rate": 1.554270778342517e-05,
      "loss": 2.4115,
      "step": 24382
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0333644151687622,
      "learning_rate": 1.554236507522906e-05,
      "loss": 2.6131,
      "step": 24383
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0638874769210815,
      "learning_rate": 1.5542022357637136e-05,
      "loss": 2.2902,
      "step": 24384
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0263397693634033,
      "learning_rate": 1.554167963064998e-05,
      "loss": 2.46,
      "step": 24385
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.057218313217163,
      "learning_rate": 1.554133689426817e-05,
      "loss": 2.3417,
      "step": 24386
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0657461881637573,
      "learning_rate": 1.5540994148492295e-05,
      "loss": 2.4209,
      "step": 24387
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0959070920944214,
      "learning_rate": 1.5540651393322925e-05,
      "loss": 2.5648,
      "step": 24388
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0331668853759766,
      "learning_rate": 1.554030862876065e-05,
      "loss": 2.4622,
      "step": 24389
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0750160217285156,
      "learning_rate": 1.5539965854806044e-05,
      "loss": 2.2789,
      "step": 24390
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0802905559539795,
      "learning_rate": 1.553962307145969e-05,
      "loss": 2.4962,
      "step": 24391
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0562636852264404,
      "learning_rate": 1.5539280278722172e-05,
      "loss": 2.3594,
      "step": 24392
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0465432405471802,
      "learning_rate": 1.5538937476594068e-05,
      "loss": 2.3533,
      "step": 24393
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0136897563934326,
      "learning_rate": 1.5538594665075963e-05,
      "loss": 2.4286,
      "step": 24394
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0653843879699707,
      "learning_rate": 1.5538251844168434e-05,
      "loss": 2.4376,
      "step": 24395
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0157073736190796,
      "learning_rate": 1.5537909013872066e-05,
      "loss": 2.3053,
      "step": 24396
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.8832012414932251,
      "learning_rate": 1.553756617418744e-05,
      "loss": 2.3042,
      "step": 24397
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0056629180908203,
      "learning_rate": 1.553722332511513e-05,
      "loss": 2.4024,
      "step": 24398
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.978483259677887,
      "learning_rate": 1.553688046665572e-05,
      "loss": 2.4206,
      "step": 24399
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0377427339553833,
      "learning_rate": 1.5536537598809804e-05,
      "loss": 2.2998,
      "step": 24400
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9497395753860474,
      "learning_rate": 1.5536194721577948e-05,
      "loss": 2.3153,
      "step": 24401
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1422016620635986,
      "learning_rate": 1.5535851834960736e-05,
      "loss": 2.3656,
      "step": 24402
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1283291578292847,
      "learning_rate": 1.5535508938958752e-05,
      "loss": 2.4243,
      "step": 24403
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0197834968566895,
      "learning_rate": 1.553516603357258e-05,
      "loss": 2.5405,
      "step": 24404
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0443249940872192,
      "learning_rate": 1.5534823118802797e-05,
      "loss": 2.5221,
      "step": 24405
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9722735285758972,
      "learning_rate": 1.5534480194649984e-05,
      "loss": 2.4059,
      "step": 24406
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.042064905166626,
      "learning_rate": 1.5534137261114727e-05,
      "loss": 2.5762,
      "step": 24407
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9533015489578247,
      "learning_rate": 1.55337943181976e-05,
      "loss": 2.3357,
      "step": 24408
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0706888437271118,
      "learning_rate": 1.5533451365899194e-05,
      "loss": 2.3652,
      "step": 24409
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0931099653244019,
      "learning_rate": 1.5533108404220085e-05,
      "loss": 2.5012,
      "step": 24410
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.032732367515564,
      "learning_rate": 1.5532765433160847e-05,
      "loss": 2.4164,
      "step": 24411
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.2539970874786377,
      "learning_rate": 1.5532422452722076e-05,
      "loss": 2.5318,
      "step": 24412
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1492033004760742,
      "learning_rate": 1.5532079462904345e-05,
      "loss": 2.426,
      "step": 24413
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0823044776916504,
      "learning_rate": 1.5531736463708232e-05,
      "loss": 2.1181,
      "step": 24414
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1062614917755127,
      "learning_rate": 1.553139345513433e-05,
      "loss": 2.2397,
      "step": 24415
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9299767017364502,
      "learning_rate": 1.553105043718321e-05,
      "loss": 2.4991,
      "step": 24416
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.2265759706497192,
      "learning_rate": 1.5530707409855458e-05,
      "loss": 2.389,
      "step": 24417
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.134102463722229,
      "learning_rate": 1.5530364373151655e-05,
      "loss": 2.4874,
      "step": 24418
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0259337425231934,
      "learning_rate": 1.5530021327072384e-05,
      "loss": 2.5415,
      "step": 24419
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0538285970687866,
      "learning_rate": 1.5529678271618222e-05,
      "loss": 2.3301,
      "step": 24420
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9772950410842896,
      "learning_rate": 1.5529335206789755e-05,
      "loss": 2.4131,
      "step": 24421
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.03200364112854,
      "learning_rate": 1.5528992132587563e-05,
      "loss": 2.332,
      "step": 24422
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1182972192764282,
      "learning_rate": 1.5528649049012226e-05,
      "loss": 2.3355,
      "step": 24423
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.164705753326416,
      "learning_rate": 1.5528305956064328e-05,
      "loss": 2.4429,
      "step": 24424
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1123244762420654,
      "learning_rate": 1.5527962853744454e-05,
      "loss": 2.554,
      "step": 24425
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1131539344787598,
      "learning_rate": 1.552761974205318e-05,
      "loss": 2.4513,
      "step": 24426
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0170141458511353,
      "learning_rate": 1.5527276620991086e-05,
      "loss": 2.5409,
      "step": 24427
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1053647994995117,
      "learning_rate": 1.552693349055876e-05,
      "loss": 2.5515,
      "step": 24428
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9759210348129272,
      "learning_rate": 1.5526590350756778e-05,
      "loss": 2.2709,
      "step": 24429
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.093094825744629,
      "learning_rate": 1.5526247201585726e-05,
      "loss": 2.5367,
      "step": 24430
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1215059757232666,
      "learning_rate": 1.552590404304618e-05,
      "loss": 2.4615,
      "step": 24431
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0525506734848022,
      "learning_rate": 1.5525560875138734e-05,
      "loss": 2.4736,
      "step": 24432
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.096909999847412,
      "learning_rate": 1.5525217697863955e-05,
      "loss": 2.4199,
      "step": 24433
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1343181133270264,
      "learning_rate": 1.5524874511222436e-05,
      "loss": 2.5965,
      "step": 24434
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1466137170791626,
      "learning_rate": 1.552453131521475e-05,
      "loss": 2.3976,
      "step": 24435
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0513406991958618,
      "learning_rate": 1.5524188109841484e-05,
      "loss": 2.2829,
      "step": 24436
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9760900735855103,
      "learning_rate": 1.5523844895103218e-05,
      "loss": 2.5645,
      "step": 24437
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0450236797332764,
      "learning_rate": 1.5523501671000535e-05,
      "loss": 2.4464,
      "step": 24438
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.2565948963165283,
      "learning_rate": 1.552315843753402e-05,
      "loss": 2.3414,
      "step": 24439
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.019405484199524,
      "learning_rate": 1.552281519470425e-05,
      "loss": 2.4039,
      "step": 24440
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1077508926391602,
      "learning_rate": 1.552247194251181e-05,
      "loss": 2.4644,
      "step": 24441
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.136916995048523,
      "learning_rate": 1.5522128680957272e-05,
      "loss": 2.4402,
      "step": 24442
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.102736473083496,
      "learning_rate": 1.5521785410041233e-05,
      "loss": 2.4442,
      "step": 24443
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1987873315811157,
      "learning_rate": 1.5521442129764267e-05,
      "loss": 2.2638,
      "step": 24444
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.2112354040145874,
      "learning_rate": 1.5521098840126956e-05,
      "loss": 2.3644,
      "step": 24445
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.162532091140747,
      "learning_rate": 1.5520755541129884e-05,
      "loss": 2.4483,
      "step": 24446
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0320138931274414,
      "learning_rate": 1.5520412232773633e-05,
      "loss": 2.5107,
      "step": 24447
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9758789539337158,
      "learning_rate": 1.552006891505878e-05,
      "loss": 2.5552,
      "step": 24448
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.048105001449585,
      "learning_rate": 1.5519725587985913e-05,
      "loss": 2.8343,
      "step": 24449
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0890815258026123,
      "learning_rate": 1.5519382251555612e-05,
      "loss": 2.241,
      "step": 24450
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0816301107406616,
      "learning_rate": 1.551903890576846e-05,
      "loss": 2.5408,
      "step": 24451
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0948054790496826,
      "learning_rate": 1.551869555062504e-05,
      "loss": 2.5351,
      "step": 24452
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1512765884399414,
      "learning_rate": 1.5518352186125928e-05,
      "loss": 2.4182,
      "step": 24453
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0393145084381104,
      "learning_rate": 1.5518008812271713e-05,
      "loss": 2.652,
      "step": 24454
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.017014741897583,
      "learning_rate": 1.551766542906297e-05,
      "loss": 2.4103,
      "step": 24455
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9929613471031189,
      "learning_rate": 1.551732203650029e-05,
      "loss": 2.3494,
      "step": 24456
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0495352745056152,
      "learning_rate": 1.551697863458425e-05,
      "loss": 2.4342,
      "step": 24457
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.3097904920578003,
      "learning_rate": 1.551663522331543e-05,
      "loss": 2.3387,
      "step": 24458
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.126552700996399,
      "learning_rate": 1.551629180269442e-05,
      "loss": 2.4583,
      "step": 24459
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0492216348648071,
      "learning_rate": 1.5515948372721795e-05,
      "loss": 2.333,
      "step": 24460
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1843785047531128,
      "learning_rate": 1.551560493339814e-05,
      "loss": 2.7669,
      "step": 24461
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9497133493423462,
      "learning_rate": 1.5515261484724034e-05,
      "loss": 2.2186,
      "step": 24462
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9197787046432495,
      "learning_rate": 1.5514918026700066e-05,
      "loss": 2.3608,
      "step": 24463
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.08781898021698,
      "learning_rate": 1.551457455932681e-05,
      "loss": 2.3324,
      "step": 24464
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0757009983062744,
      "learning_rate": 1.5514231082604855e-05,
      "loss": 2.3033,
      "step": 24465
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1424410343170166,
      "learning_rate": 1.551388759653478e-05,
      "loss": 2.2695,
      "step": 24466
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9851672649383545,
      "learning_rate": 1.5513544101117167e-05,
      "loss": 2.3976,
      "step": 24467
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.021224856376648,
      "learning_rate": 1.55132005963526e-05,
      "loss": 2.4689,
      "step": 24468
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0470675230026245,
      "learning_rate": 1.551285708224166e-05,
      "loss": 2.604,
      "step": 24469
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.075774908065796,
      "learning_rate": 1.5512513558784928e-05,
      "loss": 2.2706,
      "step": 24470
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0574495792388916,
      "learning_rate": 1.551217002598299e-05,
      "loss": 2.6631,
      "step": 24471
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0620566606521606,
      "learning_rate": 1.551182648383643e-05,
      "loss": 2.6677,
      "step": 24472
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0074161291122437,
      "learning_rate": 1.5511482932345824e-05,
      "loss": 2.4528,
      "step": 24473
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9481361508369446,
      "learning_rate": 1.551113937151176e-05,
      "loss": 2.442,
      "step": 24474
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9825778007507324,
      "learning_rate": 1.5510795801334815e-05,
      "loss": 2.4059,
      "step": 24475
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9680204391479492,
      "learning_rate": 1.5510452221815576e-05,
      "loss": 2.2084,
      "step": 24476
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.2178008556365967,
      "learning_rate": 1.551010863295462e-05,
      "loss": 2.3337,
      "step": 24477
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4076848030090332,
      "learning_rate": 1.5509765034752538e-05,
      "loss": 2.3379,
      "step": 24478
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.03337562084198,
      "learning_rate": 1.550942142720991e-05,
      "loss": 2.412,
      "step": 24479
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.154661774635315,
      "learning_rate": 1.550907781032731e-05,
      "loss": 2.4287,
      "step": 24480
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9784912467002869,
      "learning_rate": 1.550873418410533e-05,
      "loss": 2.5669,
      "step": 24481
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0328447818756104,
      "learning_rate": 1.550839054854455e-05,
      "loss": 2.3789,
      "step": 24482
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.015744686126709,
      "learning_rate": 1.550804690364555e-05,
      "loss": 2.3858,
      "step": 24483
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1075689792633057,
      "learning_rate": 1.5507703249408918e-05,
      "loss": 2.3085,
      "step": 24484
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.194788932800293,
      "learning_rate": 1.5507359585835228e-05,
      "loss": 2.4152,
      "step": 24485
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0180201530456543,
      "learning_rate": 1.5507015912925074e-05,
      "loss": 2.2367,
      "step": 24486
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.955734133720398,
      "learning_rate": 1.5506672230679027e-05,
      "loss": 2.6805,
      "step": 24487
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9786337614059448,
      "learning_rate": 1.5506328539097678e-05,
      "loss": 2.6299,
      "step": 24488
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1010924577713013,
      "learning_rate": 1.5505984838181602e-05,
      "loss": 2.5011,
      "step": 24489
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.059924840927124,
      "learning_rate": 1.550564112793139e-05,
      "loss": 2.3038,
      "step": 24490
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.2977558374404907,
      "learning_rate": 1.5505297408347622e-05,
      "loss": 2.5836,
      "step": 24491
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1525139808654785,
      "learning_rate": 1.550495367943088e-05,
      "loss": 2.4112,
      "step": 24492
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0639965534210205,
      "learning_rate": 1.5504609941181742e-05,
      "loss": 2.464,
      "step": 24493
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9900172352790833,
      "learning_rate": 1.5504266193600802e-05,
      "loss": 2.5245,
      "step": 24494
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0224709510803223,
      "learning_rate": 1.550392243668863e-05,
      "loss": 2.6898,
      "step": 24495
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9470133185386658,
      "learning_rate": 1.5503578670445817e-05,
      "loss": 2.2155,
      "step": 24496
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.012549877166748,
      "learning_rate": 1.5503234894872942e-05,
      "loss": 2.3563,
      "step": 24497
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.93782639503479,
      "learning_rate": 1.5502891109970588e-05,
      "loss": 2.6405,
      "step": 24498
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0463072061538696,
      "learning_rate": 1.5502547315739342e-05,
      "loss": 2.1902,
      "step": 24499
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.2391462326049805,
      "learning_rate": 1.5502203512179783e-05,
      "loss": 2.4435,
      "step": 24500
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0105245113372803,
      "learning_rate": 1.5501859699292494e-05,
      "loss": 2.5083,
      "step": 24501
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9867286086082458,
      "learning_rate": 1.5501515877078058e-05,
      "loss": 2.3893,
      "step": 24502
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.076230764389038,
      "learning_rate": 1.550117204553706e-05,
      "loss": 2.3768,
      "step": 24503
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.985437273979187,
      "learning_rate": 1.550082820467008e-05,
      "loss": 2.7019,
      "step": 24504
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1634275913238525,
      "learning_rate": 1.5500484354477706e-05,
      "loss": 2.3953,
      "step": 24505
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0580990314483643,
      "learning_rate": 1.5500140494960515e-05,
      "loss": 2.4564,
      "step": 24506
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0500935316085815,
      "learning_rate": 1.5499796626119087e-05,
      "loss": 2.397,
      "step": 24507
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0652185678482056,
      "learning_rate": 1.5499452747954014e-05,
      "loss": 2.3919,
      "step": 24508
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0227302312850952,
      "learning_rate": 1.5499108860465877e-05,
      "loss": 2.3615,
      "step": 24509
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.054439663887024,
      "learning_rate": 1.5498764963655255e-05,
      "loss": 2.271,
      "step": 24510
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9852930307388306,
      "learning_rate": 1.5498421057522734e-05,
      "loss": 2.4623,
      "step": 24511
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9669661521911621,
      "learning_rate": 1.5498077142068893e-05,
      "loss": 2.3451,
      "step": 24512
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0790424346923828,
      "learning_rate": 1.549773321729432e-05,
      "loss": 2.318,
      "step": 24513
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0607306957244873,
      "learning_rate": 1.5497389283199596e-05,
      "loss": 2.371,
      "step": 24514
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0085117816925049,
      "learning_rate": 1.5497045339785302e-05,
      "loss": 2.2235,
      "step": 24515
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.058218240737915,
      "learning_rate": 1.5496701387052027e-05,
      "loss": 2.5986,
      "step": 24516
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0062092542648315,
      "learning_rate": 1.549635742500035e-05,
      "loss": 2.4253,
      "step": 24517
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0034594535827637,
      "learning_rate": 1.549601345363085e-05,
      "loss": 2.2749,
      "step": 24518
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.2113100290298462,
      "learning_rate": 1.549566947294412e-05,
      "loss": 2.395,
      "step": 24519
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0875202417373657,
      "learning_rate": 1.5495325482940733e-05,
      "loss": 2.5852,
      "step": 24520
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0324355363845825,
      "learning_rate": 1.549498148362128e-05,
      "loss": 2.4699,
      "step": 24521
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.21431565284729,
      "learning_rate": 1.549463747498634e-05,
      "loss": 2.5002,
      "step": 24522
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1883028745651245,
      "learning_rate": 1.5494293457036494e-05,
      "loss": 2.3766,
      "step": 24523
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9910076260566711,
      "learning_rate": 1.5493949429772332e-05,
      "loss": 2.2982,
      "step": 24524
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9695068597793579,
      "learning_rate": 1.5493605393194436e-05,
      "loss": 2.4641,
      "step": 24525
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.045579195022583,
      "learning_rate": 1.5493261347303383e-05,
      "loss": 2.3668,
      "step": 24526
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9634893536567688,
      "learning_rate": 1.5492917292099758e-05,
      "loss": 2.2861,
      "step": 24527
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1938611268997192,
      "learning_rate": 1.549257322758415e-05,
      "loss": 2.4028,
      "step": 24528
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.262477159500122,
      "learning_rate": 1.5492229153757137e-05,
      "loss": 2.3632,
      "step": 24529
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0771739482879639,
      "learning_rate": 1.5491885070619304e-05,
      "loss": 2.478,
      "step": 24530
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0373167991638184,
      "learning_rate": 1.5491540978171237e-05,
      "loss": 2.4255,
      "step": 24531
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0465960502624512,
      "learning_rate": 1.5491196876413512e-05,
      "loss": 2.4724,
      "step": 24532
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.2493466138839722,
      "learning_rate": 1.549085276534672e-05,
      "loss": 2.2383,
      "step": 24533
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0739060640335083,
      "learning_rate": 1.549050864497144e-05,
      "loss": 2.3649,
      "step": 24534
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0814108848571777,
      "learning_rate": 1.5490164515288256e-05,
      "loss": 2.3578,
      "step": 24535
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0039904117584229,
      "learning_rate": 1.5489820376297755e-05,
      "loss": 2.2047,
      "step": 24536
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0679264068603516,
      "learning_rate": 1.5489476228000516e-05,
      "loss": 2.342,
      "step": 24537
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.125673532485962,
      "learning_rate": 1.5489132070397123e-05,
      "loss": 2.2521,
      "step": 24538
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9710956811904907,
      "learning_rate": 1.5488787903488162e-05,
      "loss": 2.4486,
      "step": 24539
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9893857836723328,
      "learning_rate": 1.5488443727274215e-05,
      "loss": 2.2575,
      "step": 24540
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0539411306381226,
      "learning_rate": 1.548809954175586e-05,
      "loss": 2.4481,
      "step": 24541
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1388201713562012,
      "learning_rate": 1.548775534693369e-05,
      "loss": 2.3056,
      "step": 24542
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0582858324050903,
      "learning_rate": 1.548741114280828e-05,
      "loss": 2.5355,
      "step": 24543
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0615350008010864,
      "learning_rate": 1.5487066929380223e-05,
      "loss": 2.4477,
      "step": 24544
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0078788995742798,
      "learning_rate": 1.54867227066501e-05,
      "loss": 2.4066,
      "step": 24545
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9836481809616089,
      "learning_rate": 1.5486378474618487e-05,
      "loss": 2.4555,
      "step": 24546
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0183907747268677,
      "learning_rate": 1.5486034233285972e-05,
      "loss": 2.5124,
      "step": 24547
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9545769691467285,
      "learning_rate": 1.548568998265314e-05,
      "loss": 2.5987,
      "step": 24548
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0237295627593994,
      "learning_rate": 1.548534572272057e-05,
      "loss": 2.4093,
      "step": 24549
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1284099817276,
      "learning_rate": 1.5485001453488857e-05,
      "loss": 2.578,
      "step": 24550
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1188936233520508,
      "learning_rate": 1.5484657174958572e-05,
      "loss": 2.3269,
      "step": 24551
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.14493727684021,
      "learning_rate": 1.5484312887130304e-05,
      "loss": 2.4326,
      "step": 24552
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0160436630249023,
      "learning_rate": 1.5483968590004637e-05,
      "loss": 2.6489,
      "step": 24553
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.145888090133667,
      "learning_rate": 1.5483624283582153e-05,
      "loss": 2.4665,
      "step": 24554
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0159590244293213,
      "learning_rate": 1.5483279967863434e-05,
      "loss": 2.3228,
      "step": 24555
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.229299783706665,
      "learning_rate": 1.548293564284907e-05,
      "loss": 2.3803,
      "step": 24556
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0539566278457642,
      "learning_rate": 1.548259130853964e-05,
      "loss": 2.4339,
      "step": 24557
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0409196615219116,
      "learning_rate": 1.548224696493573e-05,
      "loss": 2.5139,
      "step": 24558
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0803792476654053,
      "learning_rate": 1.548190261203792e-05,
      "loss": 2.2244,
      "step": 24559
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.170121669769287,
      "learning_rate": 1.5481558249846797e-05,
      "loss": 2.4164,
      "step": 24560
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9743191599845886,
      "learning_rate": 1.5481213878362943e-05,
      "loss": 2.5118,
      "step": 24561
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.2011336088180542,
      "learning_rate": 1.5480869497586945e-05,
      "loss": 2.256,
      "step": 24562
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4361954927444458,
      "learning_rate": 1.548052510751938e-05,
      "loss": 2.4092,
      "step": 24563
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.059593915939331,
      "learning_rate": 1.5480180708160842e-05,
      "loss": 2.3433,
      "step": 24564
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.147837519645691,
      "learning_rate": 1.5479836299511906e-05,
      "loss": 2.2843,
      "step": 24565
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9588504433631897,
      "learning_rate": 1.547949188157316e-05,
      "loss": 2.357,
      "step": 24566
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0977915525436401,
      "learning_rate": 1.547914745434519e-05,
      "loss": 2.3092,
      "step": 24567
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0931556224822998,
      "learning_rate": 1.5478803017828573e-05,
      "loss": 2.4687,
      "step": 24568
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0035123825073242,
      "learning_rate": 1.54784585720239e-05,
      "loss": 2.5926,
      "step": 24569
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1357982158660889,
      "learning_rate": 1.547811411693175e-05,
      "loss": 2.4472,
      "step": 24570
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.046011209487915,
      "learning_rate": 1.547776965255271e-05,
      "loss": 2.3689,
      "step": 24571
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1154248714447021,
      "learning_rate": 1.547742517888736e-05,
      "loss": 2.4668,
      "step": 24572
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9953426122665405,
      "learning_rate": 1.547708069593629e-05,
      "loss": 2.3246,
      "step": 24573
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.048155665397644,
      "learning_rate": 1.5476736203700076e-05,
      "loss": 2.4385,
      "step": 24574
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0339020490646362,
      "learning_rate": 1.547639170217931e-05,
      "loss": 2.4589,
      "step": 24575
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9816077947616577,
      "learning_rate": 1.5476047191374573e-05,
      "loss": 2.3056,
      "step": 24576
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0705158710479736,
      "learning_rate": 1.5475702671286445e-05,
      "loss": 2.3151,
      "step": 24577
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.046279788017273,
      "learning_rate": 1.547535814191552e-05,
      "loss": 2.3239,
      "step": 24578
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1094738245010376,
      "learning_rate": 1.5475013603262373e-05,
      "loss": 2.1996,
      "step": 24579
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9973315000534058,
      "learning_rate": 1.547466905532759e-05,
      "loss": 2.3996,
      "step": 24580
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.959220290184021,
      "learning_rate": 1.5474324498111756e-05,
      "loss": 2.3581,
      "step": 24581
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0427719354629517,
      "learning_rate": 1.5473979931615456e-05,
      "loss": 2.4192,
      "step": 24582
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0817490816116333,
      "learning_rate": 1.5473635355839272e-05,
      "loss": 2.5879,
      "step": 24583
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.02628493309021,
      "learning_rate": 1.5473290770783793e-05,
      "loss": 2.4502,
      "step": 24584
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0548781156539917,
      "learning_rate": 1.54729461764496e-05,
      "loss": 2.619,
      "step": 24585
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0166614055633545,
      "learning_rate": 1.547260157283727e-05,
      "loss": 2.2211,
      "step": 24586
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1373236179351807,
      "learning_rate": 1.5472256959947397e-05,
      "loss": 2.5683,
      "step": 24587
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1598507165908813,
      "learning_rate": 1.5471912337780564e-05,
      "loss": 2.4614,
      "step": 24588
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1276992559432983,
      "learning_rate": 1.5471567706337352e-05,
      "loss": 2.6012,
      "step": 24589
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1059659719467163,
      "learning_rate": 1.5471223065618346e-05,
      "loss": 2.4556,
      "step": 24590
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0328755378723145,
      "learning_rate": 1.5470878415624137e-05,
      "loss": 2.4553,
      "step": 24591
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0318485498428345,
      "learning_rate": 1.5470533756355296e-05,
      "loss": 2.5306,
      "step": 24592
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0894235372543335,
      "learning_rate": 1.5470189087812413e-05,
      "loss": 2.3837,
      "step": 24593
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0621033906936646,
      "learning_rate": 1.546984440999608e-05,
      "loss": 2.2574,
      "step": 24594
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1527774333953857,
      "learning_rate": 1.5469499722906874e-05,
      "loss": 2.3729,
      "step": 24595
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0141263008117676,
      "learning_rate": 1.546915502654538e-05,
      "loss": 2.3861,
      "step": 24596
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0820375680923462,
      "learning_rate": 1.5468810320912182e-05,
      "loss": 2.3998,
      "step": 24597
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4526370763778687,
      "learning_rate": 1.5468465606007866e-05,
      "loss": 2.3259,
      "step": 24598
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0524410009384155,
      "learning_rate": 1.5468120881833015e-05,
      "loss": 2.4099,
      "step": 24599
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4103344678878784,
      "learning_rate": 1.5467776148388215e-05,
      "loss": 2.265,
      "step": 24600
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9280675649642944,
      "learning_rate": 1.5467431405674045e-05,
      "loss": 2.3998,
      "step": 24601
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9920411109924316,
      "learning_rate": 1.5467086653691095e-05,
      "loss": 2.5097,
      "step": 24602
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1147128343582153,
      "learning_rate": 1.5466741892439952e-05,
      "loss": 2.5316,
      "step": 24603
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0565582513809204,
      "learning_rate": 1.5466397121921197e-05,
      "loss": 2.4935,
      "step": 24604
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1527169942855835,
      "learning_rate": 1.546605234213541e-05,
      "loss": 2.6094,
      "step": 24605
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0385011434555054,
      "learning_rate": 1.5465707553083183e-05,
      "loss": 2.2682,
      "step": 24606
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0517148971557617,
      "learning_rate": 1.5465362754765095e-05,
      "loss": 2.36,
      "step": 24607
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0477739572525024,
      "learning_rate": 1.5465017947181734e-05,
      "loss": 2.6158,
      "step": 24608
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.041355848312378,
      "learning_rate": 1.5464673130333685e-05,
      "loss": 2.4157,
      "step": 24609
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.98304682970047,
      "learning_rate": 1.546432830422153e-05,
      "loss": 2.5585,
      "step": 24610
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9898975491523743,
      "learning_rate": 1.5463983468845855e-05,
      "loss": 2.4164,
      "step": 24611
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.8936141729354858,
      "learning_rate": 1.5463638624207242e-05,
      "loss": 2.4884,
      "step": 24612
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.2551217079162598,
      "learning_rate": 1.546329377030628e-05,
      "loss": 2.3739,
      "step": 24613
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0515168905258179,
      "learning_rate": 1.5462948907143547e-05,
      "loss": 2.3782,
      "step": 24614
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1155035495758057,
      "learning_rate": 1.5462604034719634e-05,
      "loss": 2.3744,
      "step": 24615
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0541119575500488,
      "learning_rate": 1.5462259153035124e-05,
      "loss": 2.2652,
      "step": 24616
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1544370651245117,
      "learning_rate": 1.54619142620906e-05,
      "loss": 2.5104,
      "step": 24617
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0178744792938232,
      "learning_rate": 1.546156936188665e-05,
      "loss": 2.3875,
      "step": 24618
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0398045778274536,
      "learning_rate": 1.5461224452423857e-05,
      "loss": 2.4561,
      "step": 24619
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0355467796325684,
      "learning_rate": 1.5460879533702804e-05,
      "loss": 2.3728,
      "step": 24620
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.094096064567566,
      "learning_rate": 1.5460534605724077e-05,
      "loss": 2.492,
      "step": 24621
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.118952751159668,
      "learning_rate": 1.5460189668488263e-05,
      "loss": 2.5582,
      "step": 24622
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0527870655059814,
      "learning_rate": 1.545984472199594e-05,
      "loss": 2.5657,
      "step": 24623
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9656544923782349,
      "learning_rate": 1.54594997662477e-05,
      "loss": 2.3086,
      "step": 24624
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.3143264055252075,
      "learning_rate": 1.545915480124412e-05,
      "loss": 2.5791,
      "step": 24625
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0131479501724243,
      "learning_rate": 1.5458809826985798e-05,
      "loss": 2.5376,
      "step": 24626
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0277670621871948,
      "learning_rate": 1.5458464843473308e-05,
      "loss": 2.4609,
      "step": 24627
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0460790395736694,
      "learning_rate": 1.545811985070724e-05,
      "loss": 2.3708,
      "step": 24628
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4177751541137695,
      "learning_rate": 1.545777484868817e-05,
      "loss": 2.4328,
      "step": 24629
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1133488416671753,
      "learning_rate": 1.5457429837416695e-05,
      "loss": 2.2616,
      "step": 24630
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.2167997360229492,
      "learning_rate": 1.545708481689339e-05,
      "loss": 2.5234,
      "step": 24631
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0737401247024536,
      "learning_rate": 1.5456739787118848e-05,
      "loss": 2.2662,
      "step": 24632
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.040328025817871,
      "learning_rate": 1.5456394748093648e-05,
      "loss": 2.6386,
      "step": 24633
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0478750467300415,
      "learning_rate": 1.5456049699818377e-05,
      "loss": 2.3471,
      "step": 24634
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1899291276931763,
      "learning_rate": 1.5455704642293618e-05,
      "loss": 2.3581,
      "step": 24635
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0440609455108643,
      "learning_rate": 1.545535957551996e-05,
      "loss": 2.3778,
      "step": 24636
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1661149263381958,
      "learning_rate": 1.5455014499497987e-05,
      "loss": 2.5426,
      "step": 24637
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0624761581420898,
      "learning_rate": 1.545466941422828e-05,
      "loss": 2.3013,
      "step": 24638
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0250861644744873,
      "learning_rate": 1.545432431971143e-05,
      "loss": 2.4918,
      "step": 24639
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.146193504333496,
      "learning_rate": 1.5453979215948018e-05,
      "loss": 2.7302,
      "step": 24640
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.171111822128296,
      "learning_rate": 1.545363410293863e-05,
      "loss": 2.3378,
      "step": 24641
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1275105476379395,
      "learning_rate": 1.545328898068385e-05,
      "loss": 2.3815,
      "step": 24642
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9296724796295166,
      "learning_rate": 1.5452943849184262e-05,
      "loss": 2.3805,
      "step": 24643
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1129189729690552,
      "learning_rate": 1.545259870844046e-05,
      "loss": 2.4115,
      "step": 24644
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0464730262756348,
      "learning_rate": 1.5452253558453016e-05,
      "loss": 2.6255,
      "step": 24645
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0550884008407593,
      "learning_rate": 1.5451908399222524e-05,
      "loss": 2.4791,
      "step": 24646
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.2665801048278809,
      "learning_rate": 1.5451563230749568e-05,
      "loss": 2.4738,
      "step": 24647
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0157161951065063,
      "learning_rate": 1.5451218053034726e-05,
      "loss": 2.6329,
      "step": 24648
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0289593935012817,
      "learning_rate": 1.5450872866078593e-05,
      "loss": 2.3905,
      "step": 24649
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.184050440788269,
      "learning_rate": 1.545052766988175e-05,
      "loss": 2.4017,
      "step": 24650
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0422940254211426,
      "learning_rate": 1.5450182464444784e-05,
      "loss": 2.5733,
      "step": 24651
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9510631561279297,
      "learning_rate": 1.5449837249768275e-05,
      "loss": 2.578,
      "step": 24652
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0314440727233887,
      "learning_rate": 1.544949202585281e-05,
      "loss": 2.5451,
      "step": 24653
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.032791256904602,
      "learning_rate": 1.5449146792698982e-05,
      "loss": 2.5865,
      "step": 24654
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0906641483306885,
      "learning_rate": 1.5448801550307365e-05,
      "loss": 2.3327,
      "step": 24655
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.009609580039978,
      "learning_rate": 1.5448456298678555e-05,
      "loss": 2.1963,
      "step": 24656
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.007867455482483,
      "learning_rate": 1.544811103781313e-05,
      "loss": 2.5341,
      "step": 24657
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.033044695854187,
      "learning_rate": 1.5447765767711672e-05,
      "loss": 2.32,
      "step": 24658
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0131148099899292,
      "learning_rate": 1.5447420488374774e-05,
      "loss": 2.4297,
      "step": 24659
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1198874711990356,
      "learning_rate": 1.5447075199803022e-05,
      "loss": 2.3208,
      "step": 24660
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0167104005813599,
      "learning_rate": 1.5446729901996995e-05,
      "loss": 2.3706,
      "step": 24661
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0311696529388428,
      "learning_rate": 1.5446384594957282e-05,
      "loss": 2.3394,
      "step": 24662
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.13943350315094,
      "learning_rate": 1.544603927868447e-05,
      "loss": 2.3935,
      "step": 24663
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.076831579208374,
      "learning_rate": 1.544569395317914e-05,
      "loss": 2.473,
      "step": 24664
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9219655990600586,
      "learning_rate": 1.5445348618441876e-05,
      "loss": 2.5897,
      "step": 24665
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0607181787490845,
      "learning_rate": 1.5445003274473275e-05,
      "loss": 2.2144,
      "step": 24666
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0063648223876953,
      "learning_rate": 1.544465792127391e-05,
      "loss": 2.4574,
      "step": 24667
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0003504753112793,
      "learning_rate": 1.5444312558844373e-05,
      "loss": 2.3036,
      "step": 24668
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.999903678894043,
      "learning_rate": 1.5443967187185248e-05,
      "loss": 2.485,
      "step": 24669
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.8976708054542542,
      "learning_rate": 1.5443621806297117e-05,
      "loss": 2.053,
      "step": 24670
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9616270661354065,
      "learning_rate": 1.544327641618057e-05,
      "loss": 2.4624,
      "step": 24671
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9944702982902527,
      "learning_rate": 1.544293101683619e-05,
      "loss": 2.1934,
      "step": 24672
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0576921701431274,
      "learning_rate": 1.5442585608264567e-05,
      "loss": 2.3613,
      "step": 24673
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.210018277168274,
      "learning_rate": 1.5442240190466284e-05,
      "loss": 2.474,
      "step": 24674
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.2251977920532227,
      "learning_rate": 1.544189476344192e-05,
      "loss": 2.235,
      "step": 24675
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1163225173950195,
      "learning_rate": 1.5441549327192068e-05,
      "loss": 2.4093,
      "step": 24676
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0405269861221313,
      "learning_rate": 1.5441203881717315e-05,
      "loss": 2.366,
      "step": 24677
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9426149129867554,
      "learning_rate": 1.5440858427018245e-05,
      "loss": 2.3205,
      "step": 24678
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0700420141220093,
      "learning_rate": 1.5440512963095436e-05,
      "loss": 2.2595,
      "step": 24679
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.103452205657959,
      "learning_rate": 1.5440167489949486e-05,
      "loss": 2.3374,
      "step": 24680
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0812864303588867,
      "learning_rate": 1.543982200758097e-05,
      "loss": 2.2742,
      "step": 24681
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.2099556922912598,
      "learning_rate": 1.543947651599048e-05,
      "loss": 2.3239,
      "step": 24682
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.157556414604187,
      "learning_rate": 1.5439131015178597e-05,
      "loss": 2.3671,
      "step": 24683
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.049687147140503,
      "learning_rate": 1.5438785505145917e-05,
      "loss": 2.5087,
      "step": 24684
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0399954319000244,
      "learning_rate": 1.5438439985893016e-05,
      "loss": 2.4496,
      "step": 24685
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.082256555557251,
      "learning_rate": 1.543809445742048e-05,
      "loss": 2.1536,
      "step": 24686
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0013974905014038,
      "learning_rate": 1.5437748919728894e-05,
      "loss": 2.5852,
      "step": 24687
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1412849426269531,
      "learning_rate": 1.543740337281885e-05,
      "loss": 2.4609,
      "step": 24688
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0382283926010132,
      "learning_rate": 1.5437057816690927e-05,
      "loss": 2.3463,
      "step": 24689
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9835050702095032,
      "learning_rate": 1.543671225134572e-05,
      "loss": 2.3139,
      "step": 24690
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1031169891357422,
      "learning_rate": 1.5436366676783808e-05,
      "loss": 2.2325,
      "step": 24691
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0804674625396729,
      "learning_rate": 1.5436021093005776e-05,
      "loss": 2.4641,
      "step": 24692
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0853298902511597,
      "learning_rate": 1.5435675500012212e-05,
      "loss": 2.37,
      "step": 24693
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0732625722885132,
      "learning_rate": 1.5435329897803703e-05,
      "loss": 2.3919,
      "step": 24694
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0679969787597656,
      "learning_rate": 1.5434984286380833e-05,
      "loss": 2.2859,
      "step": 24695
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0984289646148682,
      "learning_rate": 1.543463866574419e-05,
      "loss": 2.5584,
      "step": 24696
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.05858314037323,
      "learning_rate": 1.5434293035894356e-05,
      "loss": 2.3043,
      "step": 24697
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9802029132843018,
      "learning_rate": 1.543394739683192e-05,
      "loss": 2.3748,
      "step": 24698
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.2681764364242554,
      "learning_rate": 1.5433601748557466e-05,
      "loss": 2.514,
      "step": 24699
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0830585956573486,
      "learning_rate": 1.5433256091071583e-05,
      "loss": 2.5635,
      "step": 24700
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.999519407749176,
      "learning_rate": 1.5432910424374857e-05,
      "loss": 2.3571,
      "step": 24701
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0749043226242065,
      "learning_rate": 1.5432564748467867e-05,
      "loss": 2.6121,
      "step": 24702
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9857485294342041,
      "learning_rate": 1.5432219063351213e-05,
      "loss": 2.265,
      "step": 24703
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.2793514728546143,
      "learning_rate": 1.5431873369025465e-05,
      "loss": 2.4204,
      "step": 24704
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9397540092468262,
      "learning_rate": 1.543152766549122e-05,
      "loss": 2.3097,
      "step": 24705
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.022855520248413,
      "learning_rate": 1.543118195274906e-05,
      "loss": 2.6954,
      "step": 24706
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.987834095954895,
      "learning_rate": 1.5430836230799568e-05,
      "loss": 2.5934,
      "step": 24707
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0911234617233276,
      "learning_rate": 1.5430490499643336e-05,
      "loss": 2.35,
      "step": 24708
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.046857237815857,
      "learning_rate": 1.5430144759280947e-05,
      "loss": 2.3748,
      "step": 24709
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9873499870300293,
      "learning_rate": 1.5429799009712988e-05,
      "loss": 2.5395,
      "step": 24710
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0807515382766724,
      "learning_rate": 1.5429453250940046e-05,
      "loss": 2.401,
      "step": 24711
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0395394563674927,
      "learning_rate": 1.5429107482962706e-05,
      "loss": 2.262,
      "step": 24712
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1338576078414917,
      "learning_rate": 1.5428761705781554e-05,
      "loss": 2.4305,
      "step": 24713
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.129228115081787,
      "learning_rate": 1.5428415919397175e-05,
      "loss": 2.4052,
      "step": 24714
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0185294151306152,
      "learning_rate": 1.542807012381016e-05,
      "loss": 2.4677,
      "step": 24715
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0456225872039795,
      "learning_rate": 1.5427724319021092e-05,
      "loss": 2.6196,
      "step": 24716
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.016151785850525,
      "learning_rate": 1.5427378505030554e-05,
      "loss": 2.2675,
      "step": 24717
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9461421966552734,
      "learning_rate": 1.5427032681839134e-05,
      "loss": 2.4124,
      "step": 24718
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.032380223274231,
      "learning_rate": 1.5426686849447422e-05,
      "loss": 2.396,
      "step": 24719
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.012477993965149,
      "learning_rate": 1.5426341007856004e-05,
      "loss": 2.5748,
      "step": 24720
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1326255798339844,
      "learning_rate": 1.5425995157065462e-05,
      "loss": 2.3363,
      "step": 24721
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9709953665733337,
      "learning_rate": 1.542564929707638e-05,
      "loss": 2.6565,
      "step": 24722
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0234850645065308,
      "learning_rate": 1.5425303427889357e-05,
      "loss": 2.4398,
      "step": 24723
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1208018064498901,
      "learning_rate": 1.542495754950497e-05,
      "loss": 2.2334,
      "step": 24724
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1032600402832031,
      "learning_rate": 1.54246116619238e-05,
      "loss": 2.4482,
      "step": 24725
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0211365222930908,
      "learning_rate": 1.5424265765146445e-05,
      "loss": 2.3846,
      "step": 24726
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.065514326095581,
      "learning_rate": 1.5423919859173486e-05,
      "loss": 2.2407,
      "step": 24727
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.454626202583313,
      "learning_rate": 1.542357394400551e-05,
      "loss": 2.5045,
      "step": 24728
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0083187818527222,
      "learning_rate": 1.5423228019643102e-05,
      "loss": 2.2453,
      "step": 24729
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0406142473220825,
      "learning_rate": 1.5422882086086854e-05,
      "loss": 2.5451,
      "step": 24730
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1422066688537598,
      "learning_rate": 1.5422536143337343e-05,
      "loss": 2.2099,
      "step": 24731
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.9223593473434448,
      "learning_rate": 1.542219019139516e-05,
      "loss": 2.376,
      "step": 24732
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9847553372383118,
      "learning_rate": 1.542184423026089e-05,
      "loss": 1.9947,
      "step": 24733
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.114640474319458,
      "learning_rate": 1.542149825993513e-05,
      "loss": 2.288,
      "step": 24734
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9821996092796326,
      "learning_rate": 1.542115228041845e-05,
      "loss": 2.2773,
      "step": 24735
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9914050698280334,
      "learning_rate": 1.5420806291711448e-05,
      "loss": 2.5306,
      "step": 24736
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0663057565689087,
      "learning_rate": 1.5420460293814706e-05,
      "loss": 2.3649,
      "step": 24737
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.113905429840088,
      "learning_rate": 1.5420114286728815e-05,
      "loss": 2.4819,
      "step": 24738
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.171276569366455,
      "learning_rate": 1.5419768270454354e-05,
      "loss": 2.3328,
      "step": 24739
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0508644580841064,
      "learning_rate": 1.5419422244991914e-05,
      "loss": 2.2087,
      "step": 24740
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9890509247779846,
      "learning_rate": 1.5419076210342083e-05,
      "loss": 2.398,
      "step": 24741
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0827924013137817,
      "learning_rate": 1.5418730166505448e-05,
      "loss": 2.4028,
      "step": 24742
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9822116494178772,
      "learning_rate": 1.541838411348259e-05,
      "loss": 2.2894,
      "step": 24743
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0350234508514404,
      "learning_rate": 1.54180380512741e-05,
      "loss": 2.5662,
      "step": 24744
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1852588653564453,
      "learning_rate": 1.5417691979880567e-05,
      "loss": 2.4388,
      "step": 24745
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1458972692489624,
      "learning_rate": 1.5417345899302573e-05,
      "loss": 2.4265,
      "step": 24746
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0988510847091675,
      "learning_rate": 1.5416999809540703e-05,
      "loss": 2.4228,
      "step": 24747
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0892406702041626,
      "learning_rate": 1.541665371059555e-05,
      "loss": 2.3835,
      "step": 24748
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0537896156311035,
      "learning_rate": 1.5416307602467697e-05,
      "loss": 2.4899,
      "step": 24749
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1132878065109253,
      "learning_rate": 1.5415961485157734e-05,
      "loss": 2.5726,
      "step": 24750
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0931005477905273,
      "learning_rate": 1.541561535866624e-05,
      "loss": 2.26,
      "step": 24751
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1467974185943604,
      "learning_rate": 1.541526922299381e-05,
      "loss": 2.5977,
      "step": 24752
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9488571882247925,
      "learning_rate": 1.5414923078141032e-05,
      "loss": 2.5193,
      "step": 24753
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1336416006088257,
      "learning_rate": 1.541457692410848e-05,
      "loss": 2.2994,
      "step": 24754
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1429837942123413,
      "learning_rate": 1.541423076089676e-05,
      "loss": 2.386,
      "step": 24755
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0099380016326904,
      "learning_rate": 1.541388458850644e-05,
      "loss": 2.214,
      "step": 24756
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0929557085037231,
      "learning_rate": 1.541353840693812e-05,
      "loss": 2.1296,
      "step": 24757
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0920991897583008,
      "learning_rate": 1.541319221619238e-05,
      "loss": 2.6118,
      "step": 24758
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1374175548553467,
      "learning_rate": 1.541284601626981e-05,
      "loss": 2.2536,
      "step": 24759
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9749190211296082,
      "learning_rate": 1.5412499807170993e-05,
      "loss": 2.228,
      "step": 24760
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0765633583068848,
      "learning_rate": 1.5412153588896522e-05,
      "loss": 2.4592,
      "step": 24761
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0416624546051025,
      "learning_rate": 1.541180736144698e-05,
      "loss": 2.2567,
      "step": 24762
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0449328422546387,
      "learning_rate": 1.5411461124822956e-05,
      "loss": 2.1898,
      "step": 24763
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0516384840011597,
      "learning_rate": 1.5411114879025033e-05,
      "loss": 2.409,
      "step": 24764
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.160552740097046,
      "learning_rate": 1.54107686240538e-05,
      "loss": 2.4032,
      "step": 24765
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0169233083724976,
      "learning_rate": 1.5410422359909847e-05,
      "loss": 2.3534,
      "step": 24766
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9805094003677368,
      "learning_rate": 1.5410076086593758e-05,
      "loss": 2.2801,
      "step": 24767
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.182374119758606,
      "learning_rate": 1.5409729804106122e-05,
      "loss": 2.5148,
      "step": 24768
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0897228717803955,
      "learning_rate": 1.5409383512447525e-05,
      "loss": 2.5925,
      "step": 24769
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0601173639297485,
      "learning_rate": 1.5409037211618553e-05,
      "loss": 2.4402,
      "step": 24770
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1385480165481567,
      "learning_rate": 1.5408690901619792e-05,
      "loss": 2.4553,
      "step": 24771
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1480047702789307,
      "learning_rate": 1.540834458245183e-05,
      "loss": 2.5411,
      "step": 24772
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.115902304649353,
      "learning_rate": 1.5407998254115258e-05,
      "loss": 2.3499,
      "step": 24773
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9918823838233948,
      "learning_rate": 1.540765191661066e-05,
      "loss": 2.3288,
      "step": 24774
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.127892017364502,
      "learning_rate": 1.5407305569938622e-05,
      "loss": 2.523,
      "step": 24775
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.083779215812683,
      "learning_rate": 1.5406959214099732e-05,
      "loss": 2.2966,
      "step": 24776
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0566134452819824,
      "learning_rate": 1.540661284909458e-05,
      "loss": 2.5297,
      "step": 24777
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.044473648071289,
      "learning_rate": 1.540626647492375e-05,
      "loss": 2.3684,
      "step": 24778
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0486806631088257,
      "learning_rate": 1.540592009158783e-05,
      "loss": 2.2288,
      "step": 24779
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9527358412742615,
      "learning_rate": 1.5405573699087404e-05,
      "loss": 2.4907,
      "step": 24780
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.2503947019577026,
      "learning_rate": 1.5405227297423063e-05,
      "loss": 2.437,
      "step": 24781
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0710307359695435,
      "learning_rate": 1.54048808865954e-05,
      "loss": 2.2081,
      "step": 24782
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0102026462554932,
      "learning_rate": 1.5404534466604993e-05,
      "loss": 2.3296,
      "step": 24783
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0482367277145386,
      "learning_rate": 1.5404188037452427e-05,
      "loss": 2.3576,
      "step": 24784
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0582464933395386,
      "learning_rate": 1.54038415991383e-05,
      "loss": 2.4578,
      "step": 24785
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0366184711456299,
      "learning_rate": 1.5403495151663196e-05,
      "loss": 2.2316,
      "step": 24786
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0667634010314941,
      "learning_rate": 1.5403148695027694e-05,
      "loss": 2.281,
      "step": 24787
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0061447620391846,
      "learning_rate": 1.5402802229232387e-05,
      "loss": 2.589,
      "step": 24788
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.051447868347168,
      "learning_rate": 1.540245575427787e-05,
      "loss": 2.4361,
      "step": 24789
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0153443813323975,
      "learning_rate": 1.5402109270164717e-05,
      "loss": 2.4538,
      "step": 24790
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0666722059249878,
      "learning_rate": 1.540176277689352e-05,
      "loss": 2.3741,
      "step": 24791
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0227516889572144,
      "learning_rate": 1.5401416274464877e-05,
      "loss": 2.4672,
      "step": 24792
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9773373007774353,
      "learning_rate": 1.5401069762879363e-05,
      "loss": 2.3814,
      "step": 24793
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0195926427841187,
      "learning_rate": 1.5400723242137564e-05,
      "loss": 2.3912,
      "step": 24794
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1159974336624146,
      "learning_rate": 1.5400376712240075e-05,
      "loss": 2.2739,
      "step": 24795
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9889277219772339,
      "learning_rate": 1.5400030173187484e-05,
      "loss": 2.2454,
      "step": 24796
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0654021501541138,
      "learning_rate": 1.539968362498037e-05,
      "loss": 2.2799,
      "step": 24797
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9490382075309753,
      "learning_rate": 1.5399337067619326e-05,
      "loss": 2.5945,
      "step": 24798
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0622555017471313,
      "learning_rate": 1.539899050110494e-05,
      "loss": 2.5078,
      "step": 24799
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.961022675037384,
      "learning_rate": 1.5398643925437803e-05,
      "loss": 2.2297,
      "step": 24800
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0269885063171387,
      "learning_rate": 1.5398297340618494e-05,
      "loss": 2.2502,
      "step": 24801
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9389598965644836,
      "learning_rate": 1.5397950746647604e-05,
      "loss": 2.5036,
      "step": 24802
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9577478170394897,
      "learning_rate": 1.5397604143525725e-05,
      "loss": 2.3758,
      "step": 24803
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0487935543060303,
      "learning_rate": 1.539725753125344e-05,
      "loss": 2.467,
      "step": 24804
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0781326293945312,
      "learning_rate": 1.5396910909831335e-05,
      "loss": 2.3569,
      "step": 24805
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0343506336212158,
      "learning_rate": 1.5396564279260006e-05,
      "loss": 2.4651,
      "step": 24806
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9669490456581116,
      "learning_rate": 1.5396217639540026e-05,
      "loss": 2.2532,
      "step": 24807
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9879887104034424,
      "learning_rate": 1.5395870990672e-05,
      "loss": 2.3868,
      "step": 24808
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.048732876777649,
      "learning_rate": 1.5395524332656504e-05,
      "loss": 2.3184,
      "step": 24809
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0026973485946655,
      "learning_rate": 1.539517766549413e-05,
      "loss": 2.6153,
      "step": 24810
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0407524108886719,
      "learning_rate": 1.539483098918546e-05,
      "loss": 2.2663,
      "step": 24811
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9272372126579285,
      "learning_rate": 1.539448430373109e-05,
      "loss": 2.3738,
      "step": 24812
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.042980670928955,
      "learning_rate": 1.5394137609131604e-05,
      "loss": 2.2587,
      "step": 24813
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.2807743549346924,
      "learning_rate": 1.5393790905387586e-05,
      "loss": 2.3341,
      "step": 24814
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1354026794433594,
      "learning_rate": 1.5393444192499636e-05,
      "loss": 2.3562,
      "step": 24815
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9618578553199768,
      "learning_rate": 1.5393097470468325e-05,
      "loss": 2.6479,
      "step": 24816
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.06671142578125,
      "learning_rate": 1.5392750739294252e-05,
      "loss": 2.3234,
      "step": 24817
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0535234212875366,
      "learning_rate": 1.5392403998978003e-05,
      "loss": 2.4927,
      "step": 24818
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.3147053718566895,
      "learning_rate": 1.5392057249520162e-05,
      "loss": 2.4919,
      "step": 24819
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.036876916885376,
      "learning_rate": 1.539171049092132e-05,
      "loss": 2.3151,
      "step": 24820
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.257368803024292,
      "learning_rate": 1.539136372318207e-05,
      "loss": 2.2382,
      "step": 24821
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1246682405471802,
      "learning_rate": 1.539101694630299e-05,
      "loss": 2.4957,
      "step": 24822
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.2850180864334106,
      "learning_rate": 1.5390670160284667e-05,
      "loss": 2.4373,
      "step": 24823
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9709763526916504,
      "learning_rate": 1.53903233651277e-05,
      "loss": 2.3109,
      "step": 24824
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9793746471405029,
      "learning_rate": 1.5389976560832672e-05,
      "loss": 2.3118,
      "step": 24825
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1604124307632446,
      "learning_rate": 1.5389629747400167e-05,
      "loss": 2.4653,
      "step": 24826
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1402679681777954,
      "learning_rate": 1.538928292483077e-05,
      "loss": 2.2623,
      "step": 24827
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.2614696025848389,
      "learning_rate": 1.5388936093125085e-05,
      "loss": 2.5477,
      "step": 24828
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.2346218824386597,
      "learning_rate": 1.5388589252283685e-05,
      "loss": 2.356,
      "step": 24829
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0930047035217285,
      "learning_rate": 1.5388242402307163e-05,
      "loss": 2.3439,
      "step": 24830
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.122819423675537,
      "learning_rate": 1.538789554319611e-05,
      "loss": 2.2371,
      "step": 24831
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1376416683197021,
      "learning_rate": 1.5387548674951105e-05,
      "loss": 2.6693,
      "step": 24832
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0400866270065308,
      "learning_rate": 1.5387201797572745e-05,
      "loss": 2.4246,
      "step": 24833
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.014142632484436,
      "learning_rate": 1.5386854911061612e-05,
      "loss": 2.2269,
      "step": 24834
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1515858173370361,
      "learning_rate": 1.53865080154183e-05,
      "loss": 2.5088,
      "step": 24835
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0150316953659058,
      "learning_rate": 1.5386161110643393e-05,
      "loss": 2.3315,
      "step": 24836
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0539627075195312,
      "learning_rate": 1.5385814196737482e-05,
      "loss": 2.411,
      "step": 24837
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.990115225315094,
      "learning_rate": 1.538546727370115e-05,
      "loss": 2.5915,
      "step": 24838
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9625438451766968,
      "learning_rate": 1.5385120341534988e-05,
      "loss": 2.2972,
      "step": 24839
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.026196837425232,
      "learning_rate": 1.5384773400239586e-05,
      "loss": 2.3739,
      "step": 24840
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0377943515777588,
      "learning_rate": 1.5384426449815534e-05,
      "loss": 2.26,
      "step": 24841
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0925029516220093,
      "learning_rate": 1.5384079490263412e-05,
      "loss": 2.4076,
      "step": 24842
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1968739032745361,
      "learning_rate": 1.5383732521583814e-05,
      "loss": 2.2664,
      "step": 24843
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.3764783143997192,
      "learning_rate": 1.538338554377733e-05,
      "loss": 2.4791,
      "step": 24844
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4655275344848633,
      "learning_rate": 1.538303855684454e-05,
      "loss": 2.4171,
      "step": 24845
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.948539137840271,
      "learning_rate": 1.538269156078604e-05,
      "loss": 2.2255,
      "step": 24846
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1448513269424438,
      "learning_rate": 1.538234455560242e-05,
      "loss": 2.1224,
      "step": 24847
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9608450531959534,
      "learning_rate": 1.5381997541294258e-05,
      "loss": 2.4139,
      "step": 24848
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0870991945266724,
      "learning_rate": 1.5381650517862153e-05,
      "loss": 2.4731,
      "step": 24849
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0071804523468018,
      "learning_rate": 1.5381303485306683e-05,
      "loss": 2.5979,
      "step": 24850
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0708708763122559,
      "learning_rate": 1.5380956443628448e-05,
      "loss": 2.4503,
      "step": 24851
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1914206743240356,
      "learning_rate": 1.538060939282803e-05,
      "loss": 2.4794,
      "step": 24852
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9940510988235474,
      "learning_rate": 1.5380262332906014e-05,
      "loss": 2.2151,
      "step": 24853
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.056402325630188,
      "learning_rate": 1.5379915263862993e-05,
      "loss": 2.4824,
      "step": 24854
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0287487506866455,
      "learning_rate": 1.537956818569956e-05,
      "loss": 2.487,
      "step": 24855
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0576578378677368,
      "learning_rate": 1.537922109841629e-05,
      "loss": 2.4132,
      "step": 24856
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0297420024871826,
      "learning_rate": 1.5378874002013785e-05,
      "loss": 2.602,
      "step": 24857
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0187395811080933,
      "learning_rate": 1.5378526896492624e-05,
      "loss": 2.2357,
      "step": 24858
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.250307321548462,
      "learning_rate": 1.53781797818534e-05,
      "loss": 2.5243,
      "step": 24859
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.2746607065200806,
      "learning_rate": 1.53778326580967e-05,
      "loss": 2.313,
      "step": 24860
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.2254064083099365,
      "learning_rate": 1.5377485525223117e-05,
      "loss": 2.3549,
      "step": 24861
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0672454833984375,
      "learning_rate": 1.537713838323323e-05,
      "loss": 2.4475,
      "step": 24862
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1215673685073853,
      "learning_rate": 1.5376791232127635e-05,
      "loss": 2.277,
      "step": 24863
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.063697099685669,
      "learning_rate": 1.537644407190692e-05,
      "loss": 2.3347,
      "step": 24864
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0686360597610474,
      "learning_rate": 1.537609690257167e-05,
      "loss": 2.354,
      "step": 24865
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1317061185836792,
      "learning_rate": 1.5375749724122476e-05,
      "loss": 2.5915,
      "step": 24866
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1596229076385498,
      "learning_rate": 1.5375402536559926e-05,
      "loss": 2.4504,
      "step": 24867
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1386641263961792,
      "learning_rate": 1.5375055339884616e-05,
      "loss": 1.9736,
      "step": 24868
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1514747142791748,
      "learning_rate": 1.537470813409712e-05,
      "loss": 2.5,
      "step": 24869
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.138034701347351,
      "learning_rate": 1.5374360919198034e-05,
      "loss": 2.5898,
      "step": 24870
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1172465085983276,
      "learning_rate": 1.5374013695187947e-05,
      "loss": 2.4301,
      "step": 24871
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0732461214065552,
      "learning_rate": 1.5373666462067445e-05,
      "loss": 2.5335,
      "step": 24872
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.019209623336792,
      "learning_rate": 1.537331921983712e-05,
      "loss": 2.2465,
      "step": 24873
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0318762063980103,
      "learning_rate": 1.5372971968497567e-05,
      "loss": 2.2548,
      "step": 24874
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1111022233963013,
      "learning_rate": 1.537262470804936e-05,
      "loss": 2.0902,
      "step": 24875
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4285725355148315,
      "learning_rate": 1.5372277438493096e-05,
      "loss": 2.5544,
      "step": 24876
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.2753888368606567,
      "learning_rate": 1.5371930159829364e-05,
      "loss": 2.6762,
      "step": 24877
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1366193294525146,
      "learning_rate": 1.537158287205875e-05,
      "loss": 2.5588,
      "step": 24878
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1414718627929688,
      "learning_rate": 1.5371235575181845e-05,
      "loss": 2.388,
      "step": 24879
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4915965795516968,
      "learning_rate": 1.5370888269199237e-05,
      "loss": 2.2983,
      "step": 24880
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1786060333251953,
      "learning_rate": 1.5370540954111516e-05,
      "loss": 2.5758,
      "step": 24881
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.3389428853988647,
      "learning_rate": 1.5370193629919265e-05,
      "loss": 2.4589,
      "step": 24882
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.3187025785446167,
      "learning_rate": 1.536984629662308e-05,
      "loss": 2.358,
      "step": 24883
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.3525699377059937,
      "learning_rate": 1.536949895422355e-05,
      "loss": 2.384,
      "step": 24884
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9814115166664124,
      "learning_rate": 1.5369151602721257e-05,
      "loss": 2.3688,
      "step": 24885
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1246150732040405,
      "learning_rate": 1.5368804242116794e-05,
      "loss": 2.3753,
      "step": 24886
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0638238191604614,
      "learning_rate": 1.5368456872410752e-05,
      "loss": 2.4169,
      "step": 24887
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.454817533493042,
      "learning_rate": 1.5368109493603714e-05,
      "loss": 2.3399,
      "step": 24888
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0684514045715332,
      "learning_rate": 1.5367762105696276e-05,
      "loss": 2.4196,
      "step": 24889
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1975054740905762,
      "learning_rate": 1.536741470868902e-05,
      "loss": 2.3887,
      "step": 24890
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0167921781539917,
      "learning_rate": 1.536706730258254e-05,
      "loss": 2.4783,
      "step": 24891
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0382860898971558,
      "learning_rate": 1.5366719887377423e-05,
      "loss": 2.3888,
      "step": 24892
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0635426044464111,
      "learning_rate": 1.536637246307426e-05,
      "loss": 2.6795,
      "step": 24893
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9707269668579102,
      "learning_rate": 1.5366025029673638e-05,
      "loss": 2.3085,
      "step": 24894
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0712660551071167,
      "learning_rate": 1.5365677587176146e-05,
      "loss": 2.6129,
      "step": 24895
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.3153526782989502,
      "learning_rate": 1.5365330135582367e-05,
      "loss": 2.5216,
      "step": 24896
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.046417474746704,
      "learning_rate": 1.5364982674892905e-05,
      "loss": 2.5117,
      "step": 24897
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0029453039169312,
      "learning_rate": 1.5364635205108335e-05,
      "loss": 2.4351,
      "step": 24898
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1653271913528442,
      "learning_rate": 1.5364287726229253e-05,
      "loss": 2.4377,
      "step": 24899
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1094690561294556,
      "learning_rate": 1.5363940238256246e-05,
      "loss": 2.3418,
      "step": 24900
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0295162200927734,
      "learning_rate": 1.5363592741189904e-05,
      "loss": 2.4093,
      "step": 24901
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9921897649765015,
      "learning_rate": 1.5363245235030815e-05,
      "loss": 2.4362,
      "step": 24902
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0505355596542358,
      "learning_rate": 1.536289771977957e-05,
      "loss": 2.6252,
      "step": 24903
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.064784288406372,
      "learning_rate": 1.5362550195436753e-05,
      "loss": 2.6533,
      "step": 24904
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.176456332206726,
      "learning_rate": 1.536220266200296e-05,
      "loss": 2.2841,
      "step": 24905
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1188263893127441,
      "learning_rate": 1.5361855119478778e-05,
      "loss": 2.375,
      "step": 24906
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0759328603744507,
      "learning_rate": 1.5361507567864794e-05,
      "loss": 2.4769,
      "step": 24907
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1676099300384521,
      "learning_rate": 1.53611600071616e-05,
      "loss": 2.2665,
      "step": 24908
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.988959789276123,
      "learning_rate": 1.5360812437369782e-05,
      "loss": 2.0598,
      "step": 24909
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.019027590751648,
      "learning_rate": 1.5360464858489933e-05,
      "loss": 2.3232,
      "step": 24910
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9260450601577759,
      "learning_rate": 1.5360117270522637e-05,
      "loss": 2.6592,
      "step": 24911
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0181970596313477,
      "learning_rate": 1.5359769673468487e-05,
      "loss": 2.3359,
      "step": 24912
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9775312542915344,
      "learning_rate": 1.5359422067328075e-05,
      "loss": 2.5108,
      "step": 24913
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0644056797027588,
      "learning_rate": 1.5359074452101985e-05,
      "loss": 2.4927,
      "step": 24914
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0996835231781006,
      "learning_rate": 1.535872682779081e-05,
      "loss": 2.3201,
      "step": 24915
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1004191637039185,
      "learning_rate": 1.5358379194395133e-05,
      "loss": 2.4148,
      "step": 24916
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0456264019012451,
      "learning_rate": 1.535803155191555e-05,
      "loss": 2.4356,
      "step": 24917
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0386457443237305,
      "learning_rate": 1.5357683900352652e-05,
      "loss": 2.4564,
      "step": 24918
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.025339961051941,
      "learning_rate": 1.535733623970702e-05,
      "loss": 2.4498,
      "step": 24919
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1523476839065552,
      "learning_rate": 1.535698856997925e-05,
      "loss": 2.4804,
      "step": 24920
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1500338315963745,
      "learning_rate": 1.535664089116993e-05,
      "loss": 2.4843,
      "step": 24921
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0428858995437622,
      "learning_rate": 1.5356293203279645e-05,
      "loss": 2.513,
      "step": 24922
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0330661535263062,
      "learning_rate": 1.5355945506308994e-05,
      "loss": 2.3473,
      "step": 24923
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0545498132705688,
      "learning_rate": 1.5355597800258556e-05,
      "loss": 2.4819,
      "step": 24924
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1333630084991455,
      "learning_rate": 1.5355250085128924e-05,
      "loss": 2.4211,
      "step": 24925
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0133177042007446,
      "learning_rate": 1.5354902360920695e-05,
      "loss": 2.328,
      "step": 24926
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9948081374168396,
      "learning_rate": 1.5354554627634447e-05,
      "loss": 2.4439,
      "step": 24927
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9711465239524841,
      "learning_rate": 1.535420688527078e-05,
      "loss": 2.5915,
      "step": 24928
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0278828144073486,
      "learning_rate": 1.5353859133830274e-05,
      "loss": 2.3649,
      "step": 24929
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9461520314216614,
      "learning_rate": 1.535351137331352e-05,
      "loss": 2.3282,
      "step": 24930
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9796367287635803,
      "learning_rate": 1.5353163603721116e-05,
      "loss": 2.5397,
      "step": 24931
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0589126348495483,
      "learning_rate": 1.5352815825053643e-05,
      "loss": 2.7754,
      "step": 24932
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0802111625671387,
      "learning_rate": 1.535246803731169e-05,
      "loss": 2.2695,
      "step": 24933
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0644466876983643,
      "learning_rate": 1.5352120240495855e-05,
      "loss": 2.1253,
      "step": 24934
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.071786880493164,
      "learning_rate": 1.535177243460672e-05,
      "loss": 2.3183,
      "step": 24935
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1313077211380005,
      "learning_rate": 1.535142461964488e-05,
      "loss": 2.5305,
      "step": 24936
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0367199182510376,
      "learning_rate": 1.535107679561092e-05,
      "loss": 2.5712,
      "step": 24937
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.045644760131836,
      "learning_rate": 1.5350728962505434e-05,
      "loss": 2.2706,
      "step": 24938
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.028635859489441,
      "learning_rate": 1.5350381120329005e-05,
      "loss": 2.2429,
      "step": 24939
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0673311948776245,
      "learning_rate": 1.535003326908223e-05,
      "loss": 2.4309,
      "step": 24940
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0071173906326294,
      "learning_rate": 1.5349685408765695e-05,
      "loss": 2.3875,
      "step": 24941
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0697205066680908,
      "learning_rate": 1.534933753937999e-05,
      "loss": 2.2307,
      "step": 24942
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0634247064590454,
      "learning_rate": 1.5348989660925705e-05,
      "loss": 2.3197,
      "step": 24943
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0380221605300903,
      "learning_rate": 1.534864177340343e-05,
      "loss": 2.5424,
      "step": 24944
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1324557065963745,
      "learning_rate": 1.5348293876813752e-05,
      "loss": 2.3949,
      "step": 24945
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1692090034484863,
      "learning_rate": 1.5347945971157266e-05,
      "loss": 2.4855,
      "step": 24946
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0509419441223145,
      "learning_rate": 1.534759805643456e-05,
      "loss": 2.5511,
      "step": 24947
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9912024736404419,
      "learning_rate": 1.5347250132646218e-05,
      "loss": 2.409,
      "step": 24948
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0068163871765137,
      "learning_rate": 1.534690219979284e-05,
      "loss": 2.4968,
      "step": 24949
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0159151554107666,
      "learning_rate": 1.5346554257875007e-05,
      "loss": 2.2088,
      "step": 24950
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0169075727462769,
      "learning_rate": 1.5346206306893313e-05,
      "loss": 2.4649,
      "step": 24951
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9973908066749573,
      "learning_rate": 1.534585834684835e-05,
      "loss": 2.4005,
      "step": 24952
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0690971612930298,
      "learning_rate": 1.5345510377740704e-05,
      "loss": 2.4544,
      "step": 24953
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0019973516464233,
      "learning_rate": 1.5345162399570967e-05,
      "loss": 2.4377,
      "step": 24954
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0675519704818726,
      "learning_rate": 1.5344814412339726e-05,
      "loss": 2.2556,
      "step": 24955
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9138646125793457,
      "learning_rate": 1.5344466416047573e-05,
      "loss": 2.3301,
      "step": 24956
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0127778053283691,
      "learning_rate": 1.5344118410695095e-05,
      "loss": 2.335,
      "step": 24957
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0283647775650024,
      "learning_rate": 1.534377039628289e-05,
      "loss": 2.5221,
      "step": 24958
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9508605599403381,
      "learning_rate": 1.534342237281154e-05,
      "loss": 2.2628,
      "step": 24959
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0214704275131226,
      "learning_rate": 1.534307434028164e-05,
      "loss": 2.4054,
      "step": 24960
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9854013323783875,
      "learning_rate": 1.5342726298693778e-05,
      "loss": 2.3442,
      "step": 24961
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0971434116363525,
      "learning_rate": 1.534237824804854e-05,
      "loss": 2.5079,
      "step": 24962
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0387095212936401,
      "learning_rate": 1.5342030188346523e-05,
      "loss": 2.4459,
      "step": 24963
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1153459548950195,
      "learning_rate": 1.5341682119588313e-05,
      "loss": 2.2441,
      "step": 24964
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.127265214920044,
      "learning_rate": 1.53413340417745e-05,
      "loss": 2.2959,
      "step": 24965
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9950647950172424,
      "learning_rate": 1.534098595490568e-05,
      "loss": 2.2431,
      "step": 24966
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0898250341415405,
      "learning_rate": 1.5340637858982435e-05,
      "loss": 2.2087,
      "step": 24967
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0708686113357544,
      "learning_rate": 1.5340289754005358e-05,
      "loss": 2.2002,
      "step": 24968
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0880887508392334,
      "learning_rate": 1.533994163997504e-05,
      "loss": 2.4137,
      "step": 24969
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0126385688781738,
      "learning_rate": 1.533959351689207e-05,
      "loss": 2.3817,
      "step": 24970
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0685518980026245,
      "learning_rate": 1.533924538475704e-05,
      "loss": 2.4576,
      "step": 24971
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.2064517736434937,
      "learning_rate": 1.5338897243570536e-05,
      "loss": 2.0862,
      "step": 24972
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0501413345336914,
      "learning_rate": 1.5338549093333154e-05,
      "loss": 2.3901,
      "step": 24973
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1127378940582275,
      "learning_rate": 1.5338200934045482e-05,
      "loss": 2.2534,
      "step": 24974
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.072610855102539,
      "learning_rate": 1.5337852765708107e-05,
      "loss": 2.6131,
      "step": 24975
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9681482315063477,
      "learning_rate": 1.5337504588321622e-05,
      "loss": 2.3209,
      "step": 24976
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9838489294052124,
      "learning_rate": 1.5337156401886616e-05,
      "loss": 2.3131,
      "step": 24977
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9855980277061462,
      "learning_rate": 1.533680820640368e-05,
      "loss": 2.4555,
      "step": 24978
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9744206666946411,
      "learning_rate": 1.533646000187341e-05,
      "loss": 2.3874,
      "step": 24979
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.068648338317871,
      "learning_rate": 1.533611178829639e-05,
      "loss": 2.5927,
      "step": 24980
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.081563115119934,
      "learning_rate": 1.533576356567321e-05,
      "loss": 2.3754,
      "step": 24981
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.087618350982666,
      "learning_rate": 1.5335415334004458e-05,
      "loss": 2.3015,
      "step": 24982
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1124260425567627,
      "learning_rate": 1.533506709329073e-05,
      "loss": 2.3,
      "step": 24983
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0408891439437866,
      "learning_rate": 1.5334718843532614e-05,
      "loss": 2.3418,
      "step": 24984
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0290271043777466,
      "learning_rate": 1.53343705847307e-05,
      "loss": 2.565,
      "step": 24985
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9948166608810425,
      "learning_rate": 1.5334022316885582e-05,
      "loss": 2.4672,
      "step": 24986
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.210681676864624,
      "learning_rate": 1.533367403999785e-05,
      "loss": 2.5082,
      "step": 24987
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0135927200317383,
      "learning_rate": 1.5333325754068086e-05,
      "loss": 2.6496,
      "step": 24988
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0893303155899048,
      "learning_rate": 1.5332977459096886e-05,
      "loss": 2.5335,
      "step": 24989
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0417975187301636,
      "learning_rate": 1.5332629155084843e-05,
      "loss": 2.5167,
      "step": 24990
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.3256075382232666,
      "learning_rate": 1.5332280842032547e-05,
      "loss": 2.3766,
      "step": 24991
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1123697757720947,
      "learning_rate": 1.5331932519940586e-05,
      "loss": 2.3318,
      "step": 24992
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.202338457107544,
      "learning_rate": 1.533158418880955e-05,
      "loss": 2.4927,
      "step": 24993
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1682500839233398,
      "learning_rate": 1.533123584864003e-05,
      "loss": 2.4476,
      "step": 24994
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0861488580703735,
      "learning_rate": 1.533088749943262e-05,
      "loss": 2.4089,
      "step": 24995
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0240674018859863,
      "learning_rate": 1.5330539141187902e-05,
      "loss": 2.1234,
      "step": 24996
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1329323053359985,
      "learning_rate": 1.5330190773906476e-05,
      "loss": 2.3962,
      "step": 24997
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1241482496261597,
      "learning_rate": 1.5329842397588932e-05,
      "loss": 2.5271,
      "step": 24998
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1396617889404297,
      "learning_rate": 1.5329494012235855e-05,
      "loss": 2.3969,
      "step": 24999
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9809417724609375,
      "learning_rate": 1.532914561784784e-05,
      "loss": 2.5242,
      "step": 25000
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9664525985717773,
      "learning_rate": 1.5328797214425476e-05,
      "loss": 2.3168,
      "step": 25001
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1112020015716553,
      "learning_rate": 1.532844880196935e-05,
      "loss": 2.2136,
      "step": 25002
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0423879623413086,
      "learning_rate": 1.5328100380480058e-05,
      "loss": 2.4916,
      "step": 25003
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0092668533325195,
      "learning_rate": 1.5327751949958186e-05,
      "loss": 2.3517,
      "step": 25004
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.069808006286621,
      "learning_rate": 1.5327403510404334e-05,
      "loss": 2.3395,
      "step": 25005
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0114673376083374,
      "learning_rate": 1.532705506181908e-05,
      "loss": 2.5485,
      "step": 25006
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.161266803741455,
      "learning_rate": 1.5326706604203024e-05,
      "loss": 2.6211,
      "step": 25007
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.958458423614502,
      "learning_rate": 1.5326358137556753e-05,
      "loss": 2.5527,
      "step": 25008
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9815175533294678,
      "learning_rate": 1.5326009661880855e-05,
      "loss": 2.3201,
      "step": 25009
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9580057263374329,
      "learning_rate": 1.532566117717593e-05,
      "loss": 2.2683,
      "step": 25010
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1195926666259766,
      "learning_rate": 1.5325312683442557e-05,
      "loss": 2.417,
      "step": 25011
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9597085118293762,
      "learning_rate": 1.5324964180681338e-05,
      "loss": 2.3669,
      "step": 25012
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.065137505531311,
      "learning_rate": 1.5324615668892855e-05,
      "loss": 2.2964,
      "step": 25013
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9601421356201172,
      "learning_rate": 1.53242671480777e-05,
      "loss": 2.4265,
      "step": 25014
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.218752384185791,
      "learning_rate": 1.5323918618236473e-05,
      "loss": 2.2956,
      "step": 25015
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0819764137268066,
      "learning_rate": 1.5323570079369753e-05,
      "loss": 2.2274,
      "step": 25016
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.052349328994751,
      "learning_rate": 1.5323221531478138e-05,
      "loss": 2.3317,
      "step": 25017
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.2998278141021729,
      "learning_rate": 1.5322872974562213e-05,
      "loss": 2.376,
      "step": 25018
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.094862937927246,
      "learning_rate": 1.5322524408622575e-05,
      "loss": 2.2192,
      "step": 25019
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.039408564567566,
      "learning_rate": 1.5322175833659817e-05,
      "loss": 2.5066,
      "step": 25020
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0303194522857666,
      "learning_rate": 1.532182724967452e-05,
      "loss": 2.3067,
      "step": 25021
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1597176790237427,
      "learning_rate": 1.5321478656667285e-05,
      "loss": 2.4488,
      "step": 25022
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9827515482902527,
      "learning_rate": 1.5321130054638694e-05,
      "loss": 2.6301,
      "step": 25023
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0827146768569946,
      "learning_rate": 1.532078144358934e-05,
      "loss": 2.5309,
      "step": 25024
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9969695806503296,
      "learning_rate": 1.532043282351982e-05,
      "loss": 2.4184,
      "step": 25025
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9395972490310669,
      "learning_rate": 1.5320084194430723e-05,
      "loss": 2.2813,
      "step": 25026
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0037145614624023,
      "learning_rate": 1.5319735556322638e-05,
      "loss": 2.311,
      "step": 25027
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0199476480484009,
      "learning_rate": 1.5319386909196156e-05,
      "loss": 2.3791,
      "step": 25028
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0054749250411987,
      "learning_rate": 1.5319038253051866e-05,
      "loss": 2.2197,
      "step": 25029
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1199995279312134,
      "learning_rate": 1.5318689587890365e-05,
      "loss": 2.5393,
      "step": 25030
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1499583721160889,
      "learning_rate": 1.531834091371224e-05,
      "loss": 2.4411,
      "step": 25031
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.006988763809204,
      "learning_rate": 1.531799223051808e-05,
      "loss": 2.5809,
      "step": 25032
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.068461298942566,
      "learning_rate": 1.531764353830848e-05,
      "loss": 2.532,
      "step": 25033
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0865857601165771,
      "learning_rate": 1.531729483708403e-05,
      "loss": 2.5233,
      "step": 25034
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9991288781166077,
      "learning_rate": 1.5316946126845323e-05,
      "loss": 2.66,
      "step": 25035
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0780266523361206,
      "learning_rate": 1.5316597407592946e-05,
      "loss": 2.5517,
      "step": 25036
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0535467863082886,
      "learning_rate": 1.5316248679327494e-05,
      "loss": 2.1639,
      "step": 25037
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1615153551101685,
      "learning_rate": 1.5315899942049553e-05,
      "loss": 2.5722,
      "step": 25038
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.077346920967102,
      "learning_rate": 1.5315551195759725e-05,
      "loss": 2.1444,
      "step": 25039
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0053890943527222,
      "learning_rate": 1.5315202440458587e-05,
      "loss": 2.3804,
      "step": 25040
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.15482497215271,
      "learning_rate": 1.531485367614674e-05,
      "loss": 2.492,
      "step": 25041
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0249426364898682,
      "learning_rate": 1.5314504902824773e-05,
      "loss": 2.5734,
      "step": 25042
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9902834296226501,
      "learning_rate": 1.5314156120493272e-05,
      "loss": 2.406,
      "step": 25043
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0363026857376099,
      "learning_rate": 1.531380732915284e-05,
      "loss": 2.624,
      "step": 25044
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.072224497795105,
      "learning_rate": 1.5313458528804058e-05,
      "loss": 2.3374,
      "step": 25045
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9383209347724915,
      "learning_rate": 1.5313109719447524e-05,
      "loss": 2.3822,
      "step": 25046
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0528453588485718,
      "learning_rate": 1.5312760901083825e-05,
      "loss": 2.4385,
      "step": 25047
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.097810983657837,
      "learning_rate": 1.531241207371355e-05,
      "loss": 2.5052,
      "step": 25048
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0322719812393188,
      "learning_rate": 1.5312063237337294e-05,
      "loss": 2.303,
      "step": 25049
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0065668821334839,
      "learning_rate": 1.531171439195565e-05,
      "loss": 2.3846,
      "step": 25050
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9892202615737915,
      "learning_rate": 1.5311365537569204e-05,
      "loss": 2.5986,
      "step": 25051
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9816063642501831,
      "learning_rate": 1.5311016674178554e-05,
      "loss": 2.3348,
      "step": 25052
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.014654278755188,
      "learning_rate": 1.531066780178429e-05,
      "loss": 2.1045,
      "step": 25053
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.090570330619812,
      "learning_rate": 1.5310318920387e-05,
      "loss": 2.3251,
      "step": 25054
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.2524223327636719,
      "learning_rate": 1.5309970029987276e-05,
      "loss": 2.5249,
      "step": 25055
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9853727221488953,
      "learning_rate": 1.530962113058571e-05,
      "loss": 2.6724,
      "step": 25056
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.2081501483917236,
      "learning_rate": 1.5309272222182898e-05,
      "loss": 2.2329,
      "step": 25057
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1478407382965088,
      "learning_rate": 1.5308923304779427e-05,
      "loss": 2.4971,
      "step": 25058
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0882282257080078,
      "learning_rate": 1.5308574378375886e-05,
      "loss": 2.3976,
      "step": 25059
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9683533906936646,
      "learning_rate": 1.530822544297287e-05,
      "loss": 2.4681,
      "step": 25060
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0272479057312012,
      "learning_rate": 1.5307876498570973e-05,
      "loss": 2.3879,
      "step": 25061
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9736842513084412,
      "learning_rate": 1.530752754517078e-05,
      "loss": 2.3287,
      "step": 25062
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4628067016601562,
      "learning_rate": 1.530717858277289e-05,
      "loss": 2.4201,
      "step": 25063
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9413877129554749,
      "learning_rate": 1.5306829611377888e-05,
      "loss": 2.0999,
      "step": 25064
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.091257095336914,
      "learning_rate": 1.530648063098637e-05,
      "loss": 2.5428,
      "step": 25065
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1007930040359497,
      "learning_rate": 1.5306131641598927e-05,
      "loss": 2.3551,
      "step": 25066
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0372064113616943,
      "learning_rate": 1.5305782643216148e-05,
      "loss": 2.5893,
      "step": 25067
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.2252088785171509,
      "learning_rate": 1.5305433635838625e-05,
      "loss": 2.3669,
      "step": 25068
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.136888027191162,
      "learning_rate": 1.5305084619466954e-05,
      "loss": 2.2507,
      "step": 25069
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1714249849319458,
      "learning_rate": 1.530473559410172e-05,
      "loss": 2.4648,
      "step": 25070
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0200021266937256,
      "learning_rate": 1.5304386559743523e-05,
      "loss": 2.5152,
      "step": 25071
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0739707946777344,
      "learning_rate": 1.5304037516392948e-05,
      "loss": 2.4028,
      "step": 25072
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0688719749450684,
      "learning_rate": 1.530368846405059e-05,
      "loss": 2.4429,
      "step": 25073
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.028400182723999,
      "learning_rate": 1.530333940271704e-05,
      "loss": 2.2822,
      "step": 25074
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1870360374450684,
      "learning_rate": 1.5302990332392888e-05,
      "loss": 2.5046,
      "step": 25075
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.038901686668396,
      "learning_rate": 1.5302641253078724e-05,
      "loss": 2.6013,
      "step": 25076
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0743621587753296,
      "learning_rate": 1.5302292164775144e-05,
      "loss": 2.1323,
      "step": 25077
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.2102867364883423,
      "learning_rate": 1.5301943067482743e-05,
      "loss": 2.3804,
      "step": 25078
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9628194570541382,
      "learning_rate": 1.530159396120211e-05,
      "loss": 2.507,
      "step": 25079
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.3112105131149292,
      "learning_rate": 1.530124484593383e-05,
      "loss": 2.4052,
      "step": 25080
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0446795225143433,
      "learning_rate": 1.53008957216785e-05,
      "loss": 2.4184,
      "step": 25081
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.3067457675933838,
      "learning_rate": 1.5300546588436712e-05,
      "loss": 2.5409,
      "step": 25082
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.2864348888397217,
      "learning_rate": 1.530019744620906e-05,
      "loss": 2.3872,
      "step": 25083
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0914485454559326,
      "learning_rate": 1.5299848294996134e-05,
      "loss": 2.4687,
      "step": 25084
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9814939498901367,
      "learning_rate": 1.5299499134798526e-05,
      "loss": 2.5333,
      "step": 25085
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0156713724136353,
      "learning_rate": 1.529914996561683e-05,
      "loss": 2.2406,
      "step": 25086
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0783976316452026,
      "learning_rate": 1.529880078745163e-05,
      "loss": 2.2337,
      "step": 25087
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.3152183294296265,
      "learning_rate": 1.5298451600303527e-05,
      "loss": 2.5762,
      "step": 25088
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.946254312992096,
      "learning_rate": 1.5298102404173108e-05,
      "loss": 2.3691,
      "step": 25089
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1321684122085571,
      "learning_rate": 1.5297753199060968e-05,
      "loss": 2.2412,
      "step": 25090
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.030260682106018,
      "learning_rate": 1.5297403984967694e-05,
      "loss": 2.1875,
      "step": 25091
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1048816442489624,
      "learning_rate": 1.529705476189389e-05,
      "loss": 2.3794,
      "step": 25092
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0460059642791748,
      "learning_rate": 1.529670552984013e-05,
      "loss": 2.2886,
      "step": 25093
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1143008470535278,
      "learning_rate": 1.5296356288807018e-05,
      "loss": 2.2888,
      "step": 25094
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9977063536643982,
      "learning_rate": 1.5296007038795145e-05,
      "loss": 2.3827,
      "step": 25095
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0094850063323975,
      "learning_rate": 1.5295657779805103e-05,
      "loss": 2.2942,
      "step": 25096
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9907174706459045,
      "learning_rate": 1.5295308511837478e-05,
      "loss": 2.3107,
      "step": 25097
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.2386749982833862,
      "learning_rate": 1.529495923489287e-05,
      "loss": 2.3919,
      "step": 25098
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9869843125343323,
      "learning_rate": 1.529460994897187e-05,
      "loss": 2.3877,
      "step": 25099
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1675264835357666,
      "learning_rate": 1.5294260654075065e-05,
      "loss": 2.6137,
      "step": 25100
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0219193696975708,
      "learning_rate": 1.529391135020305e-05,
      "loss": 2.3248,
      "step": 25101
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.135164737701416,
      "learning_rate": 1.529356203735642e-05,
      "loss": 2.5844,
      "step": 25102
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1856324672698975,
      "learning_rate": 1.5293212715535762e-05,
      "loss": 2.3722,
      "step": 25103
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1506990194320679,
      "learning_rate": 1.5292863384741673e-05,
      "loss": 2.4699,
      "step": 25104
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1997337341308594,
      "learning_rate": 1.5292514044974744e-05,
      "loss": 2.2963,
      "step": 25105
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0004067420959473,
      "learning_rate": 1.5292164696235564e-05,
      "loss": 2.4039,
      "step": 25106
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0873475074768066,
      "learning_rate": 1.529181533852473e-05,
      "loss": 2.5063,
      "step": 25107
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1369893550872803,
      "learning_rate": 1.529146597184283e-05,
      "loss": 2.5431,
      "step": 25108
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0080459117889404,
      "learning_rate": 1.5291116596190456e-05,
      "loss": 2.431,
      "step": 25109
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9896987080574036,
      "learning_rate": 1.5290767211568206e-05,
      "loss": 2.4621,
      "step": 25110
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1494723558425903,
      "learning_rate": 1.5290417817976666e-05,
      "loss": 2.353,
      "step": 25111
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0356485843658447,
      "learning_rate": 1.529006841541643e-05,
      "loss": 2.3552,
      "step": 25112
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0340200662612915,
      "learning_rate": 1.5289719003888094e-05,
      "loss": 2.478,
      "step": 25113
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0206953287124634,
      "learning_rate": 1.528936958339225e-05,
      "loss": 2.2151,
      "step": 25114
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0892531871795654,
      "learning_rate": 1.528902015392948e-05,
      "loss": 2.648,
      "step": 25115
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1183403730392456,
      "learning_rate": 1.5288670715500392e-05,
      "loss": 2.5465,
      "step": 25116
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1081207990646362,
      "learning_rate": 1.5288321268105567e-05,
      "loss": 2.4218,
      "step": 25117
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9974584579467773,
      "learning_rate": 1.5287971811745603e-05,
      "loss": 2.3528,
      "step": 25118
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.106201410293579,
      "learning_rate": 1.528762234642109e-05,
      "loss": 2.6038,
      "step": 25119
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9434003829956055,
      "learning_rate": 1.5287272872132622e-05,
      "loss": 2.4634,
      "step": 25120
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.2091420888900757,
      "learning_rate": 1.5286923388880785e-05,
      "loss": 2.3521,
      "step": 25121
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.030410885810852,
      "learning_rate": 1.528657389666618e-05,
      "loss": 2.4959,
      "step": 25122
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0594360828399658,
      "learning_rate": 1.52862243954894e-05,
      "loss": 2.3142,
      "step": 25123
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0290782451629639,
      "learning_rate": 1.528587488535103e-05,
      "loss": 2.5163,
      "step": 25124
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0374854803085327,
      "learning_rate": 1.528552536625167e-05,
      "loss": 2.0628,
      "step": 25125
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0483418703079224,
      "learning_rate": 1.5285175838191908e-05,
      "loss": 2.5501,
      "step": 25126
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.048364281654358,
      "learning_rate": 1.5284826301172337e-05,
      "loss": 2.5828,
      "step": 25127
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0571049451828003,
      "learning_rate": 1.5284476755193546e-05,
      "loss": 2.3693,
      "step": 25128
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0406824350357056,
      "learning_rate": 1.5284127200256135e-05,
      "loss": 2.4021,
      "step": 25129
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.2342922687530518,
      "learning_rate": 1.5283777636360696e-05,
      "loss": 2.5913,
      "step": 25130
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9415925145149231,
      "learning_rate": 1.5283428063507816e-05,
      "loss": 2.1867,
      "step": 25131
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0974799394607544,
      "learning_rate": 1.5283078481698092e-05,
      "loss": 2.3104,
      "step": 25132
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0264981985092163,
      "learning_rate": 1.528272889093211e-05,
      "loss": 2.4921,
      "step": 25133
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0461523532867432,
      "learning_rate": 1.5282379291210473e-05,
      "loss": 2.4198,
      "step": 25134
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.04114830493927,
      "learning_rate": 1.5282029682533765e-05,
      "loss": 2.3986,
      "step": 25135
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.11564040184021,
      "learning_rate": 1.5281680064902583e-05,
      "loss": 2.4608,
      "step": 25136
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0721009969711304,
      "learning_rate": 1.528133043831752e-05,
      "loss": 2.293,
      "step": 25137
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.2518779039382935,
      "learning_rate": 1.528098080277917e-05,
      "loss": 2.3834,
      "step": 25138
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.016131043434143,
      "learning_rate": 1.528063115828812e-05,
      "loss": 2.5564,
      "step": 25139
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0546011924743652,
      "learning_rate": 1.5280281504844967e-05,
      "loss": 2.5674,
      "step": 25140
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0750364065170288,
      "learning_rate": 1.5279931842450303e-05,
      "loss": 2.3491,
      "step": 25141
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9863946437835693,
      "learning_rate": 1.527958217110472e-05,
      "loss": 2.5056,
      "step": 25142
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0315947532653809,
      "learning_rate": 1.5279232490808808e-05,
      "loss": 2.4557,
      "step": 25143
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0527424812316895,
      "learning_rate": 1.527888280156317e-05,
      "loss": 2.4106,
      "step": 25144
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0194889307022095,
      "learning_rate": 1.5278533103368387e-05,
      "loss": 2.2347,
      "step": 25145
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.2061259746551514,
      "learning_rate": 1.5278183396225058e-05,
      "loss": 2.3955,
      "step": 25146
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0973060131072998,
      "learning_rate": 1.5277833680133777e-05,
      "loss": 2.4993,
      "step": 25147
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9622814655303955,
      "learning_rate": 1.527748395509513e-05,
      "loss": 2.3561,
      "step": 25148
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.2240817546844482,
      "learning_rate": 1.5277134221109714e-05,
      "loss": 2.3735,
      "step": 25149
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1844308376312256,
      "learning_rate": 1.5276784478178127e-05,
      "loss": 2.3957,
      "step": 25150
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.91034334897995,
      "learning_rate": 1.5276434726300955e-05,
      "loss": 2.4093,
      "step": 25151
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1140720844268799,
      "learning_rate": 1.5276084965478793e-05,
      "loss": 2.334,
      "step": 25152
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9996068477630615,
      "learning_rate": 1.5275735195712234e-05,
      "loss": 2.4843,
      "step": 25153
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0887399911880493,
      "learning_rate": 1.527538541700187e-05,
      "loss": 2.4831,
      "step": 25154
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0929185152053833,
      "learning_rate": 1.5275035629348296e-05,
      "loss": 2.3493,
      "step": 25155
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0107320547103882,
      "learning_rate": 1.5274685832752104e-05,
      "loss": 2.2498,
      "step": 25156
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1223872900009155,
      "learning_rate": 1.5274336027213887e-05,
      "loss": 2.4208,
      "step": 25157
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0580605268478394,
      "learning_rate": 1.5273986212734237e-05,
      "loss": 2.3211,
      "step": 25158
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1274394989013672,
      "learning_rate": 1.5273636389313748e-05,
      "loss": 2.4268,
      "step": 25159
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.2467012405395508,
      "learning_rate": 1.5273286556953016e-05,
      "loss": 2.4901,
      "step": 25160
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0490931272506714,
      "learning_rate": 1.5272936715652627e-05,
      "loss": 2.4347,
      "step": 25161
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0445398092269897,
      "learning_rate": 1.5272586865413178e-05,
      "loss": 2.2532,
      "step": 25162
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0943825244903564,
      "learning_rate": 1.5272237006235267e-05,
      "loss": 2.4752,
      "step": 25163
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1341067552566528,
      "learning_rate": 1.527188713811948e-05,
      "loss": 2.39,
      "step": 25164
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0304415225982666,
      "learning_rate": 1.527153726106641e-05,
      "loss": 2.1763,
      "step": 25165
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0764036178588867,
      "learning_rate": 1.5271187375076653e-05,
      "loss": 2.5747,
      "step": 25166
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9918897747993469,
      "learning_rate": 1.5270837480150805e-05,
      "loss": 2.5319,
      "step": 25167
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1266285181045532,
      "learning_rate": 1.5270487576289454e-05,
      "loss": 2.3791,
      "step": 25168
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0424995422363281,
      "learning_rate": 1.5270137663493196e-05,
      "loss": 2.4782,
      "step": 25169
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1573357582092285,
      "learning_rate": 1.526978774176262e-05,
      "loss": 2.4663,
      "step": 25170
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0707647800445557,
      "learning_rate": 1.5269437811098328e-05,
      "loss": 2.3735,
      "step": 25171
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9706248641014099,
      "learning_rate": 1.5269087871500904e-05,
      "loss": 2.344,
      "step": 25172
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0665456056594849,
      "learning_rate": 1.5268737922970945e-05,
      "loss": 2.4894,
      "step": 25173
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1623883247375488,
      "learning_rate": 1.5268387965509047e-05,
      "loss": 2.3618,
      "step": 25174
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1181285381317139,
      "learning_rate": 1.52680379991158e-05,
      "loss": 2.2774,
      "step": 25175
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.213637351989746,
      "learning_rate": 1.5267688023791794e-05,
      "loss": 2.5561,
      "step": 25176
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9949375987052917,
      "learning_rate": 1.526733803953763e-05,
      "loss": 2.2025,
      "step": 25177
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0927523374557495,
      "learning_rate": 1.5266988046353897e-05,
      "loss": 2.1906,
      "step": 25178
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0872907638549805,
      "learning_rate": 1.5266638044241186e-05,
      "loss": 2.25,
      "step": 25179
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1494134664535522,
      "learning_rate": 1.5266288033200097e-05,
      "loss": 2.5006,
      "step": 25180
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9531768560409546,
      "learning_rate": 1.5265938013231217e-05,
      "loss": 2.3933,
      "step": 25181
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.2897093296051025,
      "learning_rate": 1.526558798433514e-05,
      "loss": 2.5592,
      "step": 25182
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1005367040634155,
      "learning_rate": 1.5265237946512463e-05,
      "loss": 2.6094,
      "step": 25183
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9971022605895996,
      "learning_rate": 1.5264887899763782e-05,
      "loss": 2.3348,
      "step": 25184
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0472737550735474,
      "learning_rate": 1.526453784408968e-05,
      "loss": 2.5896,
      "step": 25185
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.338263750076294,
      "learning_rate": 1.526418777949076e-05,
      "loss": 2.6327,
      "step": 25186
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.96612548828125,
      "learning_rate": 1.526383770596761e-05,
      "loss": 2.2264,
      "step": 25187
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1140954494476318,
      "learning_rate": 1.5263487623520825e-05,
      "loss": 2.5073,
      "step": 25188
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9982485771179199,
      "learning_rate": 1.5263137532151e-05,
      "loss": 2.3862,
      "step": 25189
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.2415162324905396,
      "learning_rate": 1.526278743185873e-05,
      "loss": 2.4868,
      "step": 25190
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0717135667800903,
      "learning_rate": 1.5262437322644603e-05,
      "loss": 2.5831,
      "step": 25191
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0769914388656616,
      "learning_rate": 1.5262087204509216e-05,
      "loss": 2.409,
      "step": 25192
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0209928750991821,
      "learning_rate": 1.526173707745316e-05,
      "loss": 2.4425,
      "step": 25193
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.114974021911621,
      "learning_rate": 1.5261386941477034e-05,
      "loss": 2.3576,
      "step": 25194
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1204122304916382,
      "learning_rate": 1.5261036796581426e-05,
      "loss": 2.4332,
      "step": 25195
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0435466766357422,
      "learning_rate": 1.5260686642766933e-05,
      "loss": 2.5792,
      "step": 25196
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0143094062805176,
      "learning_rate": 1.526033648003415e-05,
      "loss": 2.3862,
      "step": 25197
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.8929325342178345,
      "learning_rate": 1.525998630838366e-05,
      "loss": 2.173,
      "step": 25198
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.159150242805481,
      "learning_rate": 1.525963612781607e-05,
      "loss": 2.486,
      "step": 25199
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9982780814170837,
      "learning_rate": 1.525928593833197e-05,
      "loss": 2.2128,
      "step": 25200
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0152084827423096,
      "learning_rate": 1.525893573993195e-05,
      "loss": 2.3217,
      "step": 25201
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0778900384902954,
      "learning_rate": 1.5258585532616603e-05,
      "loss": 2.5468,
      "step": 25202
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0793243646621704,
      "learning_rate": 1.525823531638653e-05,
      "loss": 2.3347,
      "step": 25203
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0638697147369385,
      "learning_rate": 1.5257885091242317e-05,
      "loss": 2.5861,
      "step": 25204
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0219526290893555,
      "learning_rate": 1.5257534857184561e-05,
      "loss": 2.6365,
      "step": 25205
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0253018140792847,
      "learning_rate": 1.5257184614213856e-05,
      "loss": 2.699,
      "step": 25206
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9602543711662292,
      "learning_rate": 1.5256834362330795e-05,
      "loss": 2.3667,
      "step": 25207
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.106624960899353,
      "learning_rate": 1.5256484101535974e-05,
      "loss": 2.4562,
      "step": 25208
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.161547064781189,
      "learning_rate": 1.5256133831829983e-05,
      "loss": 2.4733,
      "step": 25209
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.2405891418457031,
      "learning_rate": 1.5255783553213417e-05,
      "loss": 2.2662,
      "step": 25210
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.965229332447052,
      "learning_rate": 1.5255433265686871e-05,
      "loss": 2.3868,
      "step": 25211
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.065738320350647,
      "learning_rate": 1.5255082969250938e-05,
      "loss": 2.3483,
      "step": 25212
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0208537578582764,
      "learning_rate": 1.525473266390621e-05,
      "loss": 2.2039,
      "step": 25213
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.140626311302185,
      "learning_rate": 1.5254382349653288e-05,
      "loss": 2.4997,
      "step": 25214
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9841623306274414,
      "learning_rate": 1.5254032026492755e-05,
      "loss": 2.4358,
      "step": 25215
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0063049793243408,
      "learning_rate": 1.5253681694425216e-05,
      "loss": 2.4454,
      "step": 25216
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0297411680221558,
      "learning_rate": 1.5253331353451257e-05,
      "loss": 2.3005,
      "step": 25217
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1051301956176758,
      "learning_rate": 1.5252981003571476e-05,
      "loss": 2.2765,
      "step": 25218
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0721153020858765,
      "learning_rate": 1.5252630644786464e-05,
      "loss": 2.3526,
      "step": 25219
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0402368307113647,
      "learning_rate": 1.5252280277096817e-05,
      "loss": 2.5543,
      "step": 25220
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9942357540130615,
      "learning_rate": 1.5251929900503128e-05,
      "loss": 2.1755,
      "step": 25221
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9650282263755798,
      "learning_rate": 1.5251579515005992e-05,
      "loss": 2.4425,
      "step": 25222
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0091676712036133,
      "learning_rate": 1.5251229120606e-05,
      "loss": 2.386,
      "step": 25223
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1507148742675781,
      "learning_rate": 1.5250878717303754e-05,
      "loss": 2.4077,
      "step": 25224
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.2477816343307495,
      "learning_rate": 1.5250528305099839e-05,
      "loss": 2.3371,
      "step": 25225
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.11184561252594,
      "learning_rate": 1.5250177883994852e-05,
      "loss": 2.3725,
      "step": 25226
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0479236841201782,
      "learning_rate": 1.5249827453989388e-05,
      "loss": 2.3853,
      "step": 25227
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0366904735565186,
      "learning_rate": 1.524947701508404e-05,
      "loss": 2.3766,
      "step": 25228
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1447962522506714,
      "learning_rate": 1.5249126567279404e-05,
      "loss": 2.4249,
      "step": 25229
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9346897602081299,
      "learning_rate": 1.5248776110576073e-05,
      "loss": 2.4285,
      "step": 25230
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9967412948608398,
      "learning_rate": 1.524842564497464e-05,
      "loss": 2.3052,
      "step": 25231
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.2474309206008911,
      "learning_rate": 1.5248075170475703e-05,
      "loss": 2.3361,
      "step": 25232
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1325727701187134,
      "learning_rate": 1.524772468707985e-05,
      "loss": 2.2853,
      "step": 25233
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.007310390472412,
      "learning_rate": 1.5247374194787679e-05,
      "loss": 2.6378,
      "step": 25234
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.148620367050171,
      "learning_rate": 1.5247023693599783e-05,
      "loss": 2.4126,
      "step": 25235
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1029196977615356,
      "learning_rate": 1.5246673183516758e-05,
      "loss": 2.4162,
      "step": 25236
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9654858112335205,
      "learning_rate": 1.52463226645392e-05,
      "loss": 2.3242,
      "step": 25237
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1139291524887085,
      "learning_rate": 1.5245972136667693e-05,
      "loss": 2.4442,
      "step": 25238
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.948514997959137,
      "learning_rate": 1.5245621599902843e-05,
      "loss": 2.4411,
      "step": 25239
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9619917273521423,
      "learning_rate": 1.5245271054245239e-05,
      "loss": 2.2925,
      "step": 25240
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.305511474609375,
      "learning_rate": 1.5244920499695474e-05,
      "loss": 2.4572,
      "step": 25241
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0468755960464478,
      "learning_rate": 1.5244569936254148e-05,
      "loss": 2.2166,
      "step": 25242
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0663973093032837,
      "learning_rate": 1.524421936392185e-05,
      "loss": 2.4375,
      "step": 25243
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0368725061416626,
      "learning_rate": 1.5243868782699176e-05,
      "loss": 2.3456,
      "step": 25244
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.051856517791748,
      "learning_rate": 1.524351819258672e-05,
      "loss": 2.3751,
      "step": 25245
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.055776596069336,
      "learning_rate": 1.5243167593585073e-05,
      "loss": 2.3631,
      "step": 25246
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.2715330123901367,
      "learning_rate": 1.5242816985694836e-05,
      "loss": 2.3821,
      "step": 25247
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0115717649459839,
      "learning_rate": 1.5242466368916602e-05,
      "loss": 2.3988,
      "step": 25248
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.092916488647461,
      "learning_rate": 1.524211574325096e-05,
      "loss": 2.4923,
      "step": 25249
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.075594186782837,
      "learning_rate": 1.5241765108698511e-05,
      "loss": 2.512,
      "step": 25250
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0303691625595093,
      "learning_rate": 1.5241414465259846e-05,
      "loss": 2.4658,
      "step": 25251
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0463767051696777,
      "learning_rate": 1.5241063812935559e-05,
      "loss": 2.4395,
      "step": 25252
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0503627061843872,
      "learning_rate": 1.5240713151726246e-05,
      "loss": 2.3649,
      "step": 25253
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0704962015151978,
      "learning_rate": 1.52403624816325e-05,
      "loss": 2.545,
      "step": 25254
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1781724691390991,
      "learning_rate": 1.5240011802654917e-05,
      "loss": 2.4681,
      "step": 25255
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.3765192031860352,
      "learning_rate": 1.523966111479409e-05,
      "loss": 2.3618,
      "step": 25256
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.2947207689285278,
      "learning_rate": 1.5239310418050616e-05,
      "loss": 2.2418,
      "step": 25257
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0697928667068481,
      "learning_rate": 1.5238959712425087e-05,
      "loss": 2.2391,
      "step": 25258
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0236479043960571,
      "learning_rate": 1.5238608997918098e-05,
      "loss": 2.3856,
      "step": 25259
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0493581295013428,
      "learning_rate": 1.5238258274530244e-05,
      "loss": 2.1227,
      "step": 25260
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0586191415786743,
      "learning_rate": 1.523790754226212e-05,
      "loss": 2.3866,
      "step": 25261
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0330162048339844,
      "learning_rate": 1.5237556801114321e-05,
      "loss": 2.5219,
      "step": 25262
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.988008975982666,
      "learning_rate": 1.523720605108744e-05,
      "loss": 2.4844,
      "step": 25263
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1812325716018677,
      "learning_rate": 1.523685529218207e-05,
      "loss": 2.4208,
      "step": 25264
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0304657220840454,
      "learning_rate": 1.5236504524398814e-05,
      "loss": 2.3495,
      "step": 25265
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0094270706176758,
      "learning_rate": 1.5236153747738258e-05,
      "loss": 2.2519,
      "step": 25266
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0904749631881714,
      "learning_rate": 1.5235802962200996e-05,
      "loss": 2.3386,
      "step": 25267
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.073683500289917,
      "learning_rate": 1.5235452167787627e-05,
      "loss": 2.5995,
      "step": 25268
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.070665955543518,
      "learning_rate": 1.5235101364498745e-05,
      "loss": 2.6009,
      "step": 25269
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.042470932006836,
      "learning_rate": 1.5234750552334947e-05,
      "loss": 2.5198,
      "step": 25270
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9440035223960876,
      "learning_rate": 1.5234399731296825e-05,
      "loss": 2.2937,
      "step": 25271
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.094860315322876,
      "learning_rate": 1.5234048901384971e-05,
      "loss": 2.3773,
      "step": 25272
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.189352035522461,
      "learning_rate": 1.5233698062599983e-05,
      "loss": 2.4576,
      "step": 25273
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.030055046081543,
      "learning_rate": 1.5233347214942456e-05,
      "loss": 2.4833,
      "step": 25274
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1186953783035278,
      "learning_rate": 1.5232996358412981e-05,
      "loss": 2.6695,
      "step": 25275
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0918655395507812,
      "learning_rate": 1.5232645493012161e-05,
      "loss": 2.3878,
      "step": 25276
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0356218814849854,
      "learning_rate": 1.5232294618740583e-05,
      "loss": 2.5433,
      "step": 25277
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9798769950866699,
      "learning_rate": 1.5231943735598846e-05,
      "loss": 2.5124,
      "step": 25278
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.024970531463623,
      "learning_rate": 1.5231592843587542e-05,
      "loss": 2.3893,
      "step": 25279
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0618623495101929,
      "learning_rate": 1.5231241942707266e-05,
      "loss": 2.4023,
      "step": 25280
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0729601383209229,
      "learning_rate": 1.5230891032958617e-05,
      "loss": 2.466,
      "step": 25281
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0954195261001587,
      "learning_rate": 1.5230540114342184e-05,
      "loss": 2.6036,
      "step": 25282
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1063463687896729,
      "learning_rate": 1.5230189186858569e-05,
      "loss": 2.1903,
      "step": 25283
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.056789517402649,
      "learning_rate": 1.5229838250508361e-05,
      "loss": 2.3577,
      "step": 25284
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.063380241394043,
      "learning_rate": 1.5229487305292155e-05,
      "loss": 2.5724,
      "step": 25285
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9585954546928406,
      "learning_rate": 1.522913635121055e-05,
      "loss": 2.5505,
      "step": 25286
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.278176188468933,
      "learning_rate": 1.5228785388264138e-05,
      "loss": 2.3555,
      "step": 25287
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0011273622512817,
      "learning_rate": 1.5228434416453513e-05,
      "loss": 2.4499,
      "step": 25288
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9685754179954529,
      "learning_rate": 1.5228083435779275e-05,
      "loss": 2.5272,
      "step": 25289
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.2807344198226929,
      "learning_rate": 1.5227732446242014e-05,
      "loss": 2.2547,
      "step": 25290
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0180410146713257,
      "learning_rate": 1.5227381447842324e-05,
      "loss": 2.4289,
      "step": 25291
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0577553510665894,
      "learning_rate": 1.5227030440580803e-05,
      "loss": 2.4512,
      "step": 25292
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1314013004302979,
      "learning_rate": 1.5226679424458053e-05,
      "loss": 2.4737,
      "step": 25293
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1252206563949585,
      "learning_rate": 1.5226328399474655e-05,
      "loss": 2.4528,
      "step": 25294
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1102628707885742,
      "learning_rate": 1.5225977365631211e-05,
      "loss": 2.4008,
      "step": 25295
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0241334438323975,
      "learning_rate": 1.522562632292832e-05,
      "loss": 2.3229,
      "step": 25296
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0090745687484741,
      "learning_rate": 1.5225275271366569e-05,
      "loss": 2.5242,
      "step": 25297
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0427050590515137,
      "learning_rate": 1.5224924210946559e-05,
      "loss": 2.4393,
      "step": 25298
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1825342178344727,
      "learning_rate": 1.5224573141668884e-05,
      "loss": 2.2702,
      "step": 25299
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1442852020263672,
      "learning_rate": 1.5224222063534138e-05,
      "loss": 2.3812,
      "step": 25300
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.160407304763794,
      "learning_rate": 1.5223870976542916e-05,
      "loss": 2.4817,
      "step": 25301
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1475398540496826,
      "learning_rate": 1.5223519880695815e-05,
      "loss": 2.3308,
      "step": 25302
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0855013132095337,
      "learning_rate": 1.5223168775993428e-05,
      "loss": 2.3138,
      "step": 25303
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.579413890838623,
      "learning_rate": 1.5222817662436352e-05,
      "loss": 2.4833,
      "step": 25304
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1720789670944214,
      "learning_rate": 1.522246654002518e-05,
      "loss": 2.4907,
      "step": 25305
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0661777257919312,
      "learning_rate": 1.5222115408760511e-05,
      "loss": 2.4696,
      "step": 25306
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0325342416763306,
      "learning_rate": 1.5221764268642937e-05,
      "loss": 2.5125,
      "step": 25307
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.149891972541809,
      "learning_rate": 1.5221413119673054e-05,
      "loss": 2.3338,
      "step": 25308
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1033110618591309,
      "learning_rate": 1.522106196185146e-05,
      "loss": 2.5146,
      "step": 25309
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0503101348876953,
      "learning_rate": 1.5220710795178746e-05,
      "loss": 2.348,
      "step": 25310
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0407063961029053,
      "learning_rate": 1.5220359619655511e-05,
      "loss": 2.3007,
      "step": 25311
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0661698579788208,
      "learning_rate": 1.5220008435282348e-05,
      "loss": 2.4502,
      "step": 25312
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0342698097229004,
      "learning_rate": 1.521965724205985e-05,
      "loss": 2.4186,
      "step": 25313
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0894581079483032,
      "learning_rate": 1.5219306039988617e-05,
      "loss": 2.2972,
      "step": 25314
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.3748955726623535,
      "learning_rate": 1.521895482906924e-05,
      "loss": 2.3468,
      "step": 25315
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0220115184783936,
      "learning_rate": 1.5218603609302324e-05,
      "loss": 2.3546,
      "step": 25316
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.12906014919281,
      "learning_rate": 1.5218252380688455e-05,
      "loss": 2.3748,
      "step": 25317
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0231316089630127,
      "learning_rate": 1.5217901143228228e-05,
      "loss": 2.5694,
      "step": 25318
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0605610609054565,
      "learning_rate": 1.5217549896922243e-05,
      "loss": 2.3222,
      "step": 25319
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0336347818374634,
      "learning_rate": 1.5217198641771096e-05,
      "loss": 2.4368,
      "step": 25320
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.2464581727981567,
      "learning_rate": 1.5216847377775378e-05,
      "loss": 2.5362,
      "step": 25321
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.008421778678894,
      "learning_rate": 1.5216496104935688e-05,
      "loss": 2.4782,
      "step": 25322
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.2475231885910034,
      "learning_rate": 1.521614482325262e-05,
      "loss": 2.3407,
      "step": 25323
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0469379425048828,
      "learning_rate": 1.5215793532726771e-05,
      "loss": 2.4641,
      "step": 25324
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1763759851455688,
      "learning_rate": 1.521544223335873e-05,
      "loss": 2.3518,
      "step": 25325
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1591124534606934,
      "learning_rate": 1.5215090925149103e-05,
      "loss": 2.5592,
      "step": 25326
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0656753778457642,
      "learning_rate": 1.5214739608098478e-05,
      "loss": 2.3482,
      "step": 25327
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1345700025558472,
      "learning_rate": 1.5214388282207454e-05,
      "loss": 2.4464,
      "step": 25328
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1078165769577026,
      "learning_rate": 1.5214036947476629e-05,
      "loss": 2.3919,
      "step": 25329
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.326353669166565,
      "learning_rate": 1.5213685603906592e-05,
      "loss": 2.1568,
      "step": 25330
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0690817832946777,
      "learning_rate": 1.521333425149794e-05,
      "loss": 2.3958,
      "step": 25331
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0601871013641357,
      "learning_rate": 1.5212982890251273e-05,
      "loss": 2.395,
      "step": 25332
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.3581271171569824,
      "learning_rate": 1.5212631520167182e-05,
      "loss": 2.3905,
      "step": 25333
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1769189834594727,
      "learning_rate": 1.5212280141246268e-05,
      "loss": 2.4092,
      "step": 25334
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.054648756980896,
      "learning_rate": 1.5211928753489123e-05,
      "loss": 2.2966,
      "step": 25335
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9791249632835388,
      "learning_rate": 1.5211577356896344e-05,
      "loss": 2.35,
      "step": 25336
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0729180574417114,
      "learning_rate": 1.5211225951468523e-05,
      "loss": 2.2702,
      "step": 25337
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1198999881744385,
      "learning_rate": 1.521087453720626e-05,
      "loss": 2.3491,
      "step": 25338
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1520711183547974,
      "learning_rate": 1.521052311411015e-05,
      "loss": 2.4372,
      "step": 25339
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1824443340301514,
      "learning_rate": 1.5210171682180786e-05,
      "loss": 2.2171,
      "step": 25340
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0117661952972412,
      "learning_rate": 1.5209820241418767e-05,
      "loss": 2.4945,
      "step": 25341
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0528535842895508,
      "learning_rate": 1.5209468791824688e-05,
      "loss": 2.4639,
      "step": 25342
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.095330834388733,
      "learning_rate": 1.5209117333399146e-05,
      "loss": 2.3085,
      "step": 25343
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.071647047996521,
      "learning_rate": 1.5208765866142732e-05,
      "loss": 2.2388,
      "step": 25344
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0571502447128296,
      "learning_rate": 1.5208414390056048e-05,
      "loss": 2.5094,
      "step": 25345
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.2072781324386597,
      "learning_rate": 1.5208062905139684e-05,
      "loss": 2.3288,
      "step": 25346
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1529396772384644,
      "learning_rate": 1.5207711411394241e-05,
      "loss": 2.5049,
      "step": 25347
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.430148720741272,
      "learning_rate": 1.5207359908820313e-05,
      "loss": 2.6084,
      "step": 25348
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9893866181373596,
      "learning_rate": 1.5207008397418496e-05,
      "loss": 2.4173,
      "step": 25349
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0201301574707031,
      "learning_rate": 1.5206656877189384e-05,
      "loss": 2.3872,
      "step": 25350
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9591367244720459,
      "learning_rate": 1.520630534813358e-05,
      "loss": 2.3246,
      "step": 25351
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.252649188041687,
      "learning_rate": 1.5205953810251668e-05,
      "loss": 2.2569,
      "step": 25352
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0640581846237183,
      "learning_rate": 1.5205602263544252e-05,
      "loss": 2.3882,
      "step": 25353
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1200636625289917,
      "learning_rate": 1.5205250708011926e-05,
      "loss": 2.3688,
      "step": 25354
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9986798763275146,
      "learning_rate": 1.5204899143655292e-05,
      "loss": 2.6383,
      "step": 25355
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.100549578666687,
      "learning_rate": 1.5204547570474932e-05,
      "loss": 2.2854,
      "step": 25356
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0654507875442505,
      "learning_rate": 1.5204195988471455e-05,
      "loss": 2.4393,
      "step": 25357
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0341321229934692,
      "learning_rate": 1.520384439764545e-05,
      "loss": 2.3568,
      "step": 25358
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1634776592254639,
      "learning_rate": 1.5203492797997516e-05,
      "loss": 2.5921,
      "step": 25359
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9749113321304321,
      "learning_rate": 1.5203141189528249e-05,
      "loss": 2.4653,
      "step": 25360
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.951664388179779,
      "learning_rate": 1.5202789572238245e-05,
      "loss": 2.4451,
      "step": 25361
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.013371467590332,
      "learning_rate": 1.52024379461281e-05,
      "loss": 2.4303,
      "step": 25362
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.067258596420288,
      "learning_rate": 1.5202086311198407e-05,
      "loss": 2.5498,
      "step": 25363
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0539013147354126,
      "learning_rate": 1.5201734667449769e-05,
      "loss": 2.6259,
      "step": 25364
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9803617596626282,
      "learning_rate": 1.5201383014882774e-05,
      "loss": 2.3826,
      "step": 25365
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0492247343063354,
      "learning_rate": 1.5201031353498024e-05,
      "loss": 2.5356,
      "step": 25366
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1828199625015259,
      "learning_rate": 1.5200679683296111e-05,
      "loss": 2.468,
      "step": 25367
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0308647155761719,
      "learning_rate": 1.5200328004277636e-05,
      "loss": 2.3593,
      "step": 25368
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9660583734512329,
      "learning_rate": 1.5199976316443192e-05,
      "loss": 2.4332,
      "step": 25369
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0601956844329834,
      "learning_rate": 1.5199624619793376e-05,
      "loss": 2.5483,
      "step": 25370
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1674127578735352,
      "learning_rate": 1.5199272914328784e-05,
      "loss": 2.3045,
      "step": 25371
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0459452867507935,
      "learning_rate": 1.5198921200050012e-05,
      "loss": 2.3463,
      "step": 25372
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.2564905881881714,
      "learning_rate": 1.5198569476957657e-05,
      "loss": 2.6513,
      "step": 25373
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.2987966537475586,
      "learning_rate": 1.5198217745052314e-05,
      "loss": 2.3672,
      "step": 25374
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9918949604034424,
      "learning_rate": 1.5197866004334582e-05,
      "loss": 2.4703,
      "step": 25375
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0337544679641724,
      "learning_rate": 1.5197514254805054e-05,
      "loss": 2.4685,
      "step": 25376
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0836352109909058,
      "learning_rate": 1.5197162496464328e-05,
      "loss": 2.5658,
      "step": 25377
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0670980215072632,
      "learning_rate": 1.5196810729313e-05,
      "loss": 2.6141,
      "step": 25378
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1609467267990112,
      "learning_rate": 1.5196458953351665e-05,
      "loss": 2.4906,
      "step": 25379
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9915605187416077,
      "learning_rate": 1.5196107168580923e-05,
      "loss": 2.5108,
      "step": 25380
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9484374523162842,
      "learning_rate": 1.5195755375001367e-05,
      "loss": 2.2926,
      "step": 25381
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.174567699432373,
      "learning_rate": 1.5195403572613593e-05,
      "loss": 2.0771,
      "step": 25382
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1690446138381958,
      "learning_rate": 1.5195051761418205e-05,
      "loss": 2.4048,
      "step": 25383
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.089444637298584,
      "learning_rate": 1.5194699941415787e-05,
      "loss": 2.578,
      "step": 25384
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9822553992271423,
      "learning_rate": 1.5194348112606943e-05,
      "loss": 2.404,
      "step": 25385
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9228809475898743,
      "learning_rate": 1.5193996274992268e-05,
      "loss": 2.5204,
      "step": 25386
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1582132577896118,
      "learning_rate": 1.5193644428572359e-05,
      "loss": 2.5535,
      "step": 25387
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1637564897537231,
      "learning_rate": 1.5193292573347815e-05,
      "loss": 2.2362,
      "step": 25388
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0978647470474243,
      "learning_rate": 1.5192940709319226e-05,
      "loss": 2.3866,
      "step": 25389
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9488703608512878,
      "learning_rate": 1.5192588836487194e-05,
      "loss": 2.2194,
      "step": 25390
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1382660865783691,
      "learning_rate": 1.519223695485231e-05,
      "loss": 2.4389,
      "step": 25391
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.999199390411377,
      "learning_rate": 1.519188506441518e-05,
      "loss": 2.4324,
      "step": 25392
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0810874700546265,
      "learning_rate": 1.519153316517639e-05,
      "loss": 2.5361,
      "step": 25393
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9727210402488708,
      "learning_rate": 1.5191181257136542e-05,
      "loss": 2.424,
      "step": 25394
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0256445407867432,
      "learning_rate": 1.5190829340296237e-05,
      "loss": 2.3228,
      "step": 25395
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0362269878387451,
      "learning_rate": 1.519047741465606e-05,
      "loss": 2.4751,
      "step": 25396
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0147302150726318,
      "learning_rate": 1.5190125480216616e-05,
      "loss": 2.6019,
      "step": 25397
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.2898485660552979,
      "learning_rate": 1.51897735369785e-05,
      "loss": 2.4111,
      "step": 25398
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0124294757843018,
      "learning_rate": 1.5189421584942308e-05,
      "loss": 2.3612,
      "step": 25399
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.067976951599121,
      "learning_rate": 1.518906962410864e-05,
      "loss": 2.4146,
      "step": 25400
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.064858078956604,
      "learning_rate": 1.5188717654478086e-05,
      "loss": 2.3653,
      "step": 25401
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1113237142562866,
      "learning_rate": 1.5188365676051247e-05,
      "loss": 2.3845,
      "step": 25402
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0494791269302368,
      "learning_rate": 1.5188013688828717e-05,
      "loss": 2.4931,
      "step": 25403
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.14508056640625,
      "learning_rate": 1.5187661692811099e-05,
      "loss": 2.3488,
      "step": 25404
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0742223262786865,
      "learning_rate": 1.5187309687998983e-05,
      "loss": 2.2718,
      "step": 25405
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0499217510223389,
      "learning_rate": 1.5186957674392968e-05,
      "loss": 2.3973,
      "step": 25406
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.04816472530365,
      "learning_rate": 1.5186605651993653e-05,
      "loss": 2.3188,
      "step": 25407
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0754271745681763,
      "learning_rate": 1.5186253620801635e-05,
      "loss": 2.5682,
      "step": 25408
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9501593112945557,
      "learning_rate": 1.5185901580817501e-05,
      "loss": 2.4771,
      "step": 25409
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.2105532884597778,
      "learning_rate": 1.5185549532041861e-05,
      "loss": 2.6038,
      "step": 25410
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.070881962776184,
      "learning_rate": 1.5185197474475306e-05,
      "loss": 2.4391,
      "step": 25411
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1806107759475708,
      "learning_rate": 1.5184845408118432e-05,
      "loss": 2.4374,
      "step": 25412
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1514867544174194,
      "learning_rate": 1.5184493332971837e-05,
      "loss": 2.3943,
      "step": 25413
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9390139579772949,
      "learning_rate": 1.5184141249036116e-05,
      "loss": 2.37,
      "step": 25414
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0966895818710327,
      "learning_rate": 1.518378915631187e-05,
      "loss": 2.337,
      "step": 25415
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0657517910003662,
      "learning_rate": 1.5183437054799692e-05,
      "loss": 2.4199,
      "step": 25416
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.891967236995697,
      "learning_rate": 1.5183084944500181e-05,
      "loss": 2.2556,
      "step": 25417
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0456645488739014,
      "learning_rate": 1.5182732825413933e-05,
      "loss": 2.3698,
      "step": 25418
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0695044994354248,
      "learning_rate": 1.5182380697541543e-05,
      "loss": 2.3994,
      "step": 25419
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1738001108169556,
      "learning_rate": 1.5182028560883613e-05,
      "loss": 2.4696,
      "step": 25420
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.2081843614578247,
      "learning_rate": 1.5181676415440738e-05,
      "loss": 2.3653,
      "step": 25421
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.122105360031128,
      "learning_rate": 1.5181324261213513e-05,
      "loss": 2.3945,
      "step": 25422
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.228545904159546,
      "learning_rate": 1.5180972098202536e-05,
      "loss": 2.3381,
      "step": 25423
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.272627830505371,
      "learning_rate": 1.5180619926408402e-05,
      "loss": 2.4173,
      "step": 25424
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9910051226615906,
      "learning_rate": 1.5180267745831713e-05,
      "loss": 2.3625,
      "step": 25425
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1227872371673584,
      "learning_rate": 1.5179915556473063e-05,
      "loss": 2.4198,
      "step": 25426
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0000780820846558,
      "learning_rate": 1.5179563358333046e-05,
      "loss": 2.3604,
      "step": 25427
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0306000709533691,
      "learning_rate": 1.5179211151412268e-05,
      "loss": 2.5115,
      "step": 25428
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.039268136024475,
      "learning_rate": 1.5178858935711318e-05,
      "loss": 2.3681,
      "step": 25429
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0237722396850586,
      "learning_rate": 1.5178506711230796e-05,
      "loss": 2.1903,
      "step": 25430
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.060593605041504,
      "learning_rate": 1.5178154477971296e-05,
      "loss": 2.3411,
      "step": 25431
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.973588228225708,
      "learning_rate": 1.5177802235933419e-05,
      "loss": 2.44,
      "step": 25432
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1009507179260254,
      "learning_rate": 1.517744998511776e-05,
      "loss": 2.491,
      "step": 25433
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.2786942720413208,
      "learning_rate": 1.5177097725524923e-05,
      "loss": 2.1372,
      "step": 25434
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9301329851150513,
      "learning_rate": 1.5176745457155492e-05,
      "loss": 2.4175,
      "step": 25435
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1670693159103394,
      "learning_rate": 1.5176393180010077e-05,
      "loss": 2.6754,
      "step": 25436
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0077584981918335,
      "learning_rate": 1.5176040894089266e-05,
      "loss": 2.4019,
      "step": 25437
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1427634954452515,
      "learning_rate": 1.517568859939366e-05,
      "loss": 2.4627,
      "step": 25438
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0997893810272217,
      "learning_rate": 1.5175336295923859e-05,
      "loss": 2.2814,
      "step": 25439
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.036158800125122,
      "learning_rate": 1.5174983983680456e-05,
      "loss": 2.264,
      "step": 25440
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0818464756011963,
      "learning_rate": 1.5174631662664048e-05,
      "loss": 2.4012,
      "step": 25441
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.947228193283081,
      "learning_rate": 1.5174279332875236e-05,
      "loss": 2.3303,
      "step": 25442
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.108892798423767,
      "learning_rate": 1.5173926994314614e-05,
      "loss": 2.3357,
      "step": 25443
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0760878324508667,
      "learning_rate": 1.517357464698278e-05,
      "loss": 2.3476,
      "step": 25444
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9791402816772461,
      "learning_rate": 1.5173222290880332e-05,
      "loss": 2.4163,
      "step": 25445
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1519047021865845,
      "learning_rate": 1.5172869926007869e-05,
      "loss": 2.2113,
      "step": 25446
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.121235966682434,
      "learning_rate": 1.5172517552365984e-05,
      "loss": 2.3354,
      "step": 25447
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0206751823425293,
      "learning_rate": 1.5172165169955282e-05,
      "loss": 2.4642,
      "step": 25448
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0416576862335205,
      "learning_rate": 1.517181277877635e-05,
      "loss": 2.5214,
      "step": 25449
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0702471733093262,
      "learning_rate": 1.5171460378829793e-05,
      "loss": 2.354,
      "step": 25450
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9994778037071228,
      "learning_rate": 1.5171107970116203e-05,
      "loss": 2.337,
      "step": 25451
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.2992240190505981,
      "learning_rate": 1.5170755552636183e-05,
      "loss": 2.42,
      "step": 25452
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1426122188568115,
      "learning_rate": 1.5170403126390327e-05,
      "loss": 2.6276,
      "step": 25453
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.051849603652954,
      "learning_rate": 1.5170050691379236e-05,
      "loss": 2.5024,
      "step": 25454
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.03657066822052,
      "learning_rate": 1.5169698247603503e-05,
      "loss": 2.4531,
      "step": 25455
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0224825143814087,
      "learning_rate": 1.5169345795063727e-05,
      "loss": 2.3876,
      "step": 25456
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9834596514701843,
      "learning_rate": 1.5168993333760506e-05,
      "loss": 2.44,
      "step": 25457
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0642802715301514,
      "learning_rate": 1.5168640863694438e-05,
      "loss": 2.0971,
      "step": 25458
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.2595897912979126,
      "learning_rate": 1.516828838486612e-05,
      "loss": 2.576,
      "step": 25459
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1966006755828857,
      "learning_rate": 1.516793589727615e-05,
      "loss": 2.5163,
      "step": 25460
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1043626070022583,
      "learning_rate": 1.5167583400925126e-05,
      "loss": 2.6371,
      "step": 25461
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.19379460811615,
      "learning_rate": 1.516723089581364e-05,
      "loss": 2.4612,
      "step": 25462
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.151719331741333,
      "learning_rate": 1.5166878381942298e-05,
      "loss": 2.5162,
      "step": 25463
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.105443000793457,
      "learning_rate": 1.5166525859311694e-05,
      "loss": 2.4891,
      "step": 25464
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.976203203201294,
      "learning_rate": 1.5166173327922424e-05,
      "loss": 2.6479,
      "step": 25465
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1110262870788574,
      "learning_rate": 1.5165820787775087e-05,
      "loss": 2.5091,
      "step": 25466
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.067481517791748,
      "learning_rate": 1.5165468238870284e-05,
      "loss": 2.4284,
      "step": 25467
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0483020544052124,
      "learning_rate": 1.5165115681208607e-05,
      "loss": 2.3169,
      "step": 25468
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.15350341796875,
      "learning_rate": 1.5164763114790655e-05,
      "loss": 2.5983,
      "step": 25469
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1088783740997314,
      "learning_rate": 1.5164410539617029e-05,
      "loss": 2.4395,
      "step": 25470
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0957365036010742,
      "learning_rate": 1.5164057955688323e-05,
      "loss": 2.3012,
      "step": 25471
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1982383728027344,
      "learning_rate": 1.5163705363005137e-05,
      "loss": 2.2683,
      "step": 25472
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0459802150726318,
      "learning_rate": 1.5163352761568066e-05,
      "loss": 2.2198,
      "step": 25473
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9916083216667175,
      "learning_rate": 1.5163000151377714e-05,
      "loss": 2.4327,
      "step": 25474
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0669974088668823,
      "learning_rate": 1.516264753243467e-05,
      "loss": 2.2492,
      "step": 25475
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0523775815963745,
      "learning_rate": 1.516229490473954e-05,
      "loss": 2.5434,
      "step": 25476
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1820926666259766,
      "learning_rate": 1.5161942268292915e-05,
      "loss": 2.4517,
      "step": 25477
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.2775927782058716,
      "learning_rate": 1.5161589623095397e-05,
      "loss": 2.3551,
      "step": 25478
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9699194431304932,
      "learning_rate": 1.516123696914758e-05,
      "loss": 2.3225,
      "step": 25479
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.300417423248291,
      "learning_rate": 1.5160884306450068e-05,
      "loss": 2.3801,
      "step": 25480
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.131649136543274,
      "learning_rate": 1.5160531635003456e-05,
      "loss": 2.3674,
      "step": 25481
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9079621434211731,
      "learning_rate": 1.5160178954808338e-05,
      "loss": 2.2686,
      "step": 25482
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0412418842315674,
      "learning_rate": 1.5159826265865319e-05,
      "loss": 2.2606,
      "step": 25483
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9976738691329956,
      "learning_rate": 1.5159473568174989e-05,
      "loss": 2.2625,
      "step": 25484
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0264723300933838,
      "learning_rate": 1.5159120861737952e-05,
      "loss": 2.3277,
      "step": 25485
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.3225340843200684,
      "learning_rate": 1.5158768146554805e-05,
      "loss": 2.2267,
      "step": 25486
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.102986216545105,
      "learning_rate": 1.5158415422626145e-05,
      "loss": 2.5511,
      "step": 25487
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0623674392700195,
      "learning_rate": 1.5158062689952568e-05,
      "loss": 2.532,
      "step": 25488
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.159953236579895,
      "learning_rate": 1.5157709948534676e-05,
      "loss": 2.4852,
      "step": 25489
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0295201539993286,
      "learning_rate": 1.5157357198373062e-05,
      "loss": 2.2485,
      "step": 25490
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.109041690826416,
      "learning_rate": 1.5157004439468328e-05,
      "loss": 2.4696,
      "step": 25491
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1939127445220947,
      "learning_rate": 1.5156651671821073e-05,
      "loss": 2.4407,
      "step": 25492
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.062193751335144,
      "learning_rate": 1.5156298895431889e-05,
      "loss": 2.6427,
      "step": 25493
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0188908576965332,
      "learning_rate": 1.5155946110301383e-05,
      "loss": 2.2596,
      "step": 25494
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1501235961914062,
      "learning_rate": 1.5155593316430142e-05,
      "loss": 2.2526,
      "step": 25495
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0444573163986206,
      "learning_rate": 1.5155240513818774e-05,
      "loss": 2.2096,
      "step": 25496
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0985965728759766,
      "learning_rate": 1.5154887702467873e-05,
      "loss": 2.1754,
      "step": 25497
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1124659776687622,
      "learning_rate": 1.5154534882378036e-05,
      "loss": 2.2539,
      "step": 25498
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9742494821548462,
      "learning_rate": 1.5154182053549864e-05,
      "loss": 2.3679,
      "step": 25499
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1683112382888794,
      "learning_rate": 1.5153829215983954e-05,
      "loss": 2.3618,
      "step": 25500
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.117741584777832,
      "learning_rate": 1.5153476369680902e-05,
      "loss": 2.6416,
      "step": 25501
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1353062391281128,
      "learning_rate": 1.5153123514641308e-05,
      "loss": 2.1919,
      "step": 25502
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0427356958389282,
      "learning_rate": 1.515277065086577e-05,
      "loss": 2.3597,
      "step": 25503
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.012373447418213,
      "learning_rate": 1.5152417778354886e-05,
      "loss": 2.4045,
      "step": 25504
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1163750886917114,
      "learning_rate": 1.5152064897109256e-05,
      "loss": 2.6882,
      "step": 25505
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.2110275030136108,
      "learning_rate": 1.5151712007129473e-05,
      "loss": 2.3458,
      "step": 25506
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.040664553642273,
      "learning_rate": 1.5151359108416146e-05,
      "loss": 2.1533,
      "step": 25507
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0158158540725708,
      "learning_rate": 1.515100620096986e-05,
      "loss": 2.4274,
      "step": 25508
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1228805780410767,
      "learning_rate": 1.5150653284791222e-05,
      "loss": 2.4852,
      "step": 25509
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1010524034500122,
      "learning_rate": 1.5150300359880828e-05,
      "loss": 2.4459,
      "step": 25510
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4921854734420776,
      "learning_rate": 1.5149947426239274e-05,
      "loss": 2.2374,
      "step": 25511
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.123104453086853,
      "learning_rate": 1.5149594483867161e-05,
      "loss": 2.4103,
      "step": 25512
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0756138563156128,
      "learning_rate": 1.5149241532765088e-05,
      "loss": 2.5678,
      "step": 25513
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.07865571975708,
      "learning_rate": 1.5148888572933653e-05,
      "loss": 2.571,
      "step": 25514
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0999772548675537,
      "learning_rate": 1.514853560437345e-05,
      "loss": 2.2458,
      "step": 25515
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0885366201400757,
      "learning_rate": 1.5148182627085083e-05,
      "loss": 2.3297,
      "step": 25516
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.391414761543274,
      "learning_rate": 1.5147829641069148e-05,
      "loss": 2.3825,
      "step": 25517
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.280019760131836,
      "learning_rate": 1.5147476646326245e-05,
      "loss": 2.6505,
      "step": 25518
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.090399146080017,
      "learning_rate": 1.5147123642856967e-05,
      "loss": 2.2959,
      "step": 25519
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9255099296569824,
      "learning_rate": 1.5146770630661921e-05,
      "loss": 2.4064,
      "step": 25520
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0662084817886353,
      "learning_rate": 1.51464176097417e-05,
      "loss": 2.5015,
      "step": 25521
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0519046783447266,
      "learning_rate": 1.51460645800969e-05,
      "loss": 2.422,
      "step": 25522
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0139505863189697,
      "learning_rate": 1.5145711541728126e-05,
      "loss": 2.4421,
      "step": 25523
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1117466688156128,
      "learning_rate": 1.5145358494635973e-05,
      "loss": 2.6612,
      "step": 25524
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0073169469833374,
      "learning_rate": 1.514500543882104e-05,
      "loss": 2.3276,
      "step": 25525
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.00523841381073,
      "learning_rate": 1.5144652374283927e-05,
      "loss": 2.4344,
      "step": 25526
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9952833652496338,
      "learning_rate": 1.5144299301025229e-05,
      "loss": 2.4343,
      "step": 25527
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0554728507995605,
      "learning_rate": 1.5143946219045544e-05,
      "loss": 2.4163,
      "step": 25528
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.991463840007782,
      "learning_rate": 1.5143593128345479e-05,
      "loss": 2.3753,
      "step": 25529
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9800494313240051,
      "learning_rate": 1.5143240028925622e-05,
      "loss": 2.2972,
      "step": 25530
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0026140213012695,
      "learning_rate": 1.5142886920786576e-05,
      "loss": 2.278,
      "step": 25531
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1961554288864136,
      "learning_rate": 1.5142533803928943e-05,
      "loss": 2.3965,
      "step": 25532
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0533456802368164,
      "learning_rate": 1.5142180678353318e-05,
      "loss": 2.4282,
      "step": 25533
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9584659934043884,
      "learning_rate": 1.5141827544060299e-05,
      "loss": 2.4909,
      "step": 25534
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0713169574737549,
      "learning_rate": 1.5141474401050484e-05,
      "loss": 2.5079,
      "step": 25535
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1213620901107788,
      "learning_rate": 1.5141121249324476e-05,
      "loss": 2.0088,
      "step": 25536
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0255590677261353,
      "learning_rate": 1.5140768088882872e-05,
      "loss": 2.5461,
      "step": 25537
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.051268458366394,
      "learning_rate": 1.5140414919726267e-05,
      "loss": 2.3288,
      "step": 25538
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0295692682266235,
      "learning_rate": 1.5140061741855263e-05,
      "loss": 2.3036,
      "step": 25539
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0470328330993652,
      "learning_rate": 1.5139708555270462e-05,
      "loss": 2.5161,
      "step": 25540
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0074862241744995,
      "learning_rate": 1.5139355359972456e-05,
      "loss": 2.2512,
      "step": 25541
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1584516763687134,
      "learning_rate": 1.5139002155961846e-05,
      "loss": 2.2084,
      "step": 25542
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.038509726524353,
      "learning_rate": 1.5138648943239233e-05,
      "loss": 2.5469,
      "step": 25543
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0825859308242798,
      "learning_rate": 1.5138295721805216e-05,
      "loss": 2.5009,
      "step": 25544
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0322657823562622,
      "learning_rate": 1.5137942491660389e-05,
      "loss": 2.4524,
      "step": 25545
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.06466805934906,
      "learning_rate": 1.5137589252805356e-05,
      "loss": 2.4171,
      "step": 25546
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0753744840621948,
      "learning_rate": 1.5137236005240715e-05,
      "loss": 2.7054,
      "step": 25547
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9949726462364197,
      "learning_rate": 1.513688274896706e-05,
      "loss": 2.3758,
      "step": 25548
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.018440842628479,
      "learning_rate": 1.5136529483984996e-05,
      "loss": 2.4821,
      "step": 25549
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.053701639175415,
      "learning_rate": 1.513617621029512e-05,
      "loss": 2.4193,
      "step": 25550
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1125026941299438,
      "learning_rate": 1.5135822927898028e-05,
      "loss": 2.1606,
      "step": 25551
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.068613886833191,
      "learning_rate": 1.5135469636794323e-05,
      "loss": 2.466,
      "step": 25552
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0602630376815796,
      "learning_rate": 1.5135116336984603e-05,
      "loss": 2.5943,
      "step": 25553
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0309609174728394,
      "learning_rate": 1.5134763028469465e-05,
      "loss": 2.2828,
      "step": 25554
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0501065254211426,
      "learning_rate": 1.5134409711249508e-05,
      "loss": 2.3493,
      "step": 25555
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.920193076133728,
      "learning_rate": 1.5134056385325333e-05,
      "loss": 2.4305,
      "step": 25556
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9597219824790955,
      "learning_rate": 1.5133703050697541e-05,
      "loss": 2.4586,
      "step": 25557
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1108272075653076,
      "learning_rate": 1.5133349707366725e-05,
      "loss": 2.4914,
      "step": 25558
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.529670238494873,
      "learning_rate": 1.5132996355333487e-05,
      "loss": 2.6962,
      "step": 25559
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9727574586868286,
      "learning_rate": 1.5132642994598428e-05,
      "loss": 2.3757,
      "step": 25560
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0082767009735107,
      "learning_rate": 1.5132289625162142e-05,
      "loss": 2.4546,
      "step": 25561
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0051274299621582,
      "learning_rate": 1.5131936247025235e-05,
      "loss": 2.5069,
      "step": 25562
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0774668455123901,
      "learning_rate": 1.5131582860188299e-05,
      "loss": 2.4593,
      "step": 25563
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.983235239982605,
      "learning_rate": 1.5131229464651935e-05,
      "loss": 2.5387,
      "step": 25564
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1311718225479126,
      "learning_rate": 1.5130876060416748e-05,
      "loss": 2.5501,
      "step": 25565
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0195527076721191,
      "learning_rate": 1.5130522647483332e-05,
      "loss": 2.3674,
      "step": 25566
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9895550012588501,
      "learning_rate": 1.5130169225852282e-05,
      "loss": 2.1572,
      "step": 25567
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.3032881021499634,
      "learning_rate": 1.5129815795524208e-05,
      "loss": 2.3941,
      "step": 25568
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1920262575149536,
      "learning_rate": 1.5129462356499699e-05,
      "loss": 2.3654,
      "step": 25569
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9935595989227295,
      "learning_rate": 1.5129108908779358e-05,
      "loss": 2.5043,
      "step": 25570
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9858668446540833,
      "learning_rate": 1.5128755452363783e-05,
      "loss": 2.5103,
      "step": 25571
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0669922828674316,
      "learning_rate": 1.5128401987253578e-05,
      "loss": 2.5264,
      "step": 25572
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0288468599319458,
      "learning_rate": 1.5128048513449339e-05,
      "loss": 2.4384,
      "step": 25573
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.125908374786377,
      "learning_rate": 1.5127695030951664e-05,
      "loss": 2.5115,
      "step": 25574
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.096398115158081,
      "learning_rate": 1.5127341539761151e-05,
      "loss": 2.5239,
      "step": 25575
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4862189292907715,
      "learning_rate": 1.5126988039878402e-05,
      "loss": 2.334,
      "step": 25576
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0610073804855347,
      "learning_rate": 1.5126634531304018e-05,
      "loss": 2.3734,
      "step": 25577
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9915971755981445,
      "learning_rate": 1.5126281014038594e-05,
      "loss": 2.3053,
      "step": 25578
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1780915260314941,
      "learning_rate": 1.5125927488082737e-05,
      "loss": 2.3373,
      "step": 25579
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1357505321502686,
      "learning_rate": 1.5125573953437035e-05,
      "loss": 2.6205,
      "step": 25580
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9767590761184692,
      "learning_rate": 1.5125220410102094e-05,
      "loss": 2.5721,
      "step": 25581
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1464290618896484,
      "learning_rate": 1.5124866858078513e-05,
      "loss": 2.45,
      "step": 25582
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.004223108291626,
      "learning_rate": 1.5124513297366892e-05,
      "loss": 2.2665,
      "step": 25583
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.2647250890731812,
      "learning_rate": 1.5124159727967826e-05,
      "loss": 2.5133,
      "step": 25584
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.35498046875,
      "learning_rate": 1.5123806149881922e-05,
      "loss": 2.5483,
      "step": 25585
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1361863613128662,
      "learning_rate": 1.5123452563109772e-05,
      "loss": 2.3354,
      "step": 25586
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0482758283615112,
      "learning_rate": 1.512309896765198e-05,
      "loss": 2.4004,
      "step": 25587
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.139020562171936,
      "learning_rate": 1.5122745363509141e-05,
      "loss": 2.4725,
      "step": 25588
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0823866128921509,
      "learning_rate": 1.512239175068186e-05,
      "loss": 2.6094,
      "step": 25589
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.2279530763626099,
      "learning_rate": 1.5122038129170733e-05,
      "loss": 2.389,
      "step": 25590
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.2156569957733154,
      "learning_rate": 1.512168449897636e-05,
      "loss": 2.4133,
      "step": 25591
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9404047131538391,
      "learning_rate": 1.512133086009934e-05,
      "loss": 2.4494,
      "step": 25592
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1208627223968506,
      "learning_rate": 1.5120977212540275e-05,
      "loss": 2.2576,
      "step": 25593
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1322362422943115,
      "learning_rate": 1.512062355629976e-05,
      "loss": 2.5435,
      "step": 25594
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9977635741233826,
      "learning_rate": 1.5120269891378398e-05,
      "loss": 2.3099,
      "step": 25595
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.036958932876587,
      "learning_rate": 1.5119916217776788e-05,
      "loss": 2.6233,
      "step": 25596
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.076324462890625,
      "learning_rate": 1.5119562535495531e-05,
      "loss": 2.4964,
      "step": 25597
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1540443897247314,
      "learning_rate": 1.5119208844535224e-05,
      "loss": 2.3976,
      "step": 25598
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.869913935661316,
      "learning_rate": 1.5118855144896472e-05,
      "loss": 2.4005,
      "step": 25599
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.2257276773452759,
      "learning_rate": 1.5118501436579865e-05,
      "loss": 2.3741,
      "step": 25600
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.949709415435791,
      "learning_rate": 1.5118147719586009e-05,
      "loss": 2.2357,
      "step": 25601
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9354369640350342,
      "learning_rate": 1.5117793993915505e-05,
      "loss": 2.246,
      "step": 25602
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0381969213485718,
      "learning_rate": 1.5117440259568945e-05,
      "loss": 2.3778,
      "step": 25603
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.3007441759109497,
      "learning_rate": 1.5117086516546937e-05,
      "loss": 2.4702,
      "step": 25604
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.048940896987915,
      "learning_rate": 1.5116732764850077e-05,
      "loss": 2.2886,
      "step": 25605
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9755516052246094,
      "learning_rate": 1.5116379004478967e-05,
      "loss": 2.5367,
      "step": 25606
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1238813400268555,
      "learning_rate": 1.5116025235434204e-05,
      "loss": 2.3785,
      "step": 25607
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.2159911394119263,
      "learning_rate": 1.5115671457716388e-05,
      "loss": 2.3923,
      "step": 25608
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9871715307235718,
      "learning_rate": 1.5115317671326119e-05,
      "loss": 2.4718,
      "step": 25609
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1671327352523804,
      "learning_rate": 1.5114963876263999e-05,
      "loss": 2.1903,
      "step": 25610
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1408846378326416,
      "learning_rate": 1.5114610072530625e-05,
      "loss": 2.4411,
      "step": 25611
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0947868824005127,
      "learning_rate": 1.5114256260126601e-05,
      "loss": 2.755,
      "step": 25612
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1244250535964966,
      "learning_rate": 1.511390243905252e-05,
      "loss": 2.4037,
      "step": 25613
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9515246152877808,
      "learning_rate": 1.5113548609308985e-05,
      "loss": 2.2407,
      "step": 25614
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0973107814788818,
      "learning_rate": 1.5113194770896598e-05,
      "loss": 2.4635,
      "step": 25615
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9978417754173279,
      "learning_rate": 1.5112840923815957e-05,
      "loss": 2.4092,
      "step": 25616
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0374562740325928,
      "learning_rate": 1.5112487068067663e-05,
      "loss": 2.4914,
      "step": 25617
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0357364416122437,
      "learning_rate": 1.511213320365231e-05,
      "loss": 2.4962,
      "step": 25618
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9982017874717712,
      "learning_rate": 1.5111779330570509e-05,
      "loss": 2.208,
      "step": 25619
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.13980233669281,
      "learning_rate": 1.5111425448822853e-05,
      "loss": 2.4727,
      "step": 25620
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.042542815208435,
      "learning_rate": 1.5111071558409937e-05,
      "loss": 2.3209,
      "step": 25621
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0851600170135498,
      "learning_rate": 1.511071765933237e-05,
      "loss": 2.4718,
      "step": 25622
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0917305946350098,
      "learning_rate": 1.511036375159075e-05,
      "loss": 2.5264,
      "step": 25623
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0527055263519287,
      "learning_rate": 1.5110009835185673e-05,
      "loss": 2.366,
      "step": 25624
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0564645528793335,
      "learning_rate": 1.5109655910117743e-05,
      "loss": 2.5329,
      "step": 25625
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0358781814575195,
      "learning_rate": 1.5109301976387559e-05,
      "loss": 2.3365,
      "step": 25626
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0964503288269043,
      "learning_rate": 1.5108948033995718e-05,
      "loss": 2.6143,
      "step": 25627
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0033581256866455,
      "learning_rate": 1.5108594082942825e-05,
      "loss": 2.2736,
      "step": 25628
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.2258498668670654,
      "learning_rate": 1.5108240123229477e-05,
      "loss": 2.5022,
      "step": 25629
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.988408088684082,
      "learning_rate": 1.5107886154856273e-05,
      "loss": 2.5324,
      "step": 25630
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0605089664459229,
      "learning_rate": 1.5107532177823813e-05,
      "loss": 2.2979,
      "step": 25631
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1356884241104126,
      "learning_rate": 1.5107178192132705e-05,
      "loss": 2.3324,
      "step": 25632
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.090801477432251,
      "learning_rate": 1.5106824197783541e-05,
      "loss": 2.4327,
      "step": 25633
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0289734601974487,
      "learning_rate": 1.5106470194776922e-05,
      "loss": 2.4193,
      "step": 25634
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.003688931465149,
      "learning_rate": 1.5106116183113446e-05,
      "loss": 2.4077,
      "step": 25635
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.3440039157867432,
      "learning_rate": 1.5105762162793718e-05,
      "loss": 2.3314,
      "step": 25636
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1037514209747314,
      "learning_rate": 1.5105408133818337e-05,
      "loss": 2.1499,
      "step": 25637
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1549149751663208,
      "learning_rate": 1.5105054096187904e-05,
      "loss": 2.5399,
      "step": 25638
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1819950342178345,
      "learning_rate": 1.5104700049903018e-05,
      "loss": 2.5767,
      "step": 25639
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1843713521957397,
      "learning_rate": 1.5104345994964279e-05,
      "loss": 2.3529,
      "step": 25640
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0564773082733154,
      "learning_rate": 1.5103991931372284e-05,
      "loss": 2.5115,
      "step": 25641
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0274176597595215,
      "learning_rate": 1.5103637859127638e-05,
      "loss": 2.4797,
      "step": 25642
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0593326091766357,
      "learning_rate": 1.510328377823094e-05,
      "loss": 2.6011,
      "step": 25643
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0313748121261597,
      "learning_rate": 1.5102929688682792e-05,
      "loss": 2.6365,
      "step": 25644
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.2175346612930298,
      "learning_rate": 1.510257559048379e-05,
      "loss": 2.5334,
      "step": 25645
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9769259691238403,
      "learning_rate": 1.5102221483634535e-05,
      "loss": 2.2695,
      "step": 25646
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0342806577682495,
      "learning_rate": 1.5101867368135632e-05,
      "loss": 2.5316,
      "step": 25647
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.102393388748169,
      "learning_rate": 1.5101513243987678e-05,
      "loss": 2.4948,
      "step": 25648
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9764338731765747,
      "learning_rate": 1.5101159111191272e-05,
      "loss": 2.4109,
      "step": 25649
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0518542528152466,
      "learning_rate": 1.5100804969747015e-05,
      "loss": 2.4557,
      "step": 25650
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0308167934417725,
      "learning_rate": 1.510045081965551e-05,
      "loss": 2.4975,
      "step": 25651
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0300146341323853,
      "learning_rate": 1.5100096660917356e-05,
      "loss": 2.2054,
      "step": 25652
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.091036319732666,
      "learning_rate": 1.509974249353315e-05,
      "loss": 2.4255,
      "step": 25653
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0235984325408936,
      "learning_rate": 1.50993883175035e-05,
      "loss": 2.216,
      "step": 25654
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9953795671463013,
      "learning_rate": 1.5099034132828998e-05,
      "loss": 2.2478,
      "step": 25655
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.980263888835907,
      "learning_rate": 1.509867993951025e-05,
      "loss": 2.5383,
      "step": 25656
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1081016063690186,
      "learning_rate": 1.5098325737547855e-05,
      "loss": 2.6085,
      "step": 25657
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9898543953895569,
      "learning_rate": 1.5097971526942413e-05,
      "loss": 2.4372,
      "step": 25658
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.2714015245437622,
      "learning_rate": 1.5097617307694522e-05,
      "loss": 2.4402,
      "step": 25659
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9676188230514526,
      "learning_rate": 1.5097263079804787e-05,
      "loss": 2.5488,
      "step": 25660
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0577861070632935,
      "learning_rate": 1.5096908843273808e-05,
      "loss": 2.4486,
      "step": 25661
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0106699466705322,
      "learning_rate": 1.5096554598102183e-05,
      "loss": 2.3423,
      "step": 25662
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0981718301773071,
      "learning_rate": 1.5096200344290512e-05,
      "loss": 2.5084,
      "step": 25663
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0135918855667114,
      "learning_rate": 1.50958460818394e-05,
      "loss": 2.3597,
      "step": 25664
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0538840293884277,
      "learning_rate": 1.5095491810749446e-05,
      "loss": 2.4476,
      "step": 25665
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0235852003097534,
      "learning_rate": 1.5095137531021247e-05,
      "loss": 2.1786,
      "step": 25666
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0844435691833496,
      "learning_rate": 1.5094783242655403e-05,
      "loss": 2.5343,
      "step": 25667
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0375019311904907,
      "learning_rate": 1.5094428945652525e-05,
      "loss": 2.4922,
      "step": 25668
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1049178838729858,
      "learning_rate": 1.5094074640013201e-05,
      "loss": 2.2694,
      "step": 25669
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1595723628997803,
      "learning_rate": 1.5093720325738037e-05,
      "loss": 2.5416,
      "step": 25670
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0026973485946655,
      "learning_rate": 1.5093366002827637e-05,
      "loss": 2.6308,
      "step": 25671
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0242999792099,
      "learning_rate": 1.5093011671282595e-05,
      "loss": 2.3802,
      "step": 25672
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0971623659133911,
      "learning_rate": 1.5092657331103517e-05,
      "loss": 2.3885,
      "step": 25673
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9497541189193726,
      "learning_rate": 1.5092302982291004e-05,
      "loss": 2.4851,
      "step": 25674
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.131862998008728,
      "learning_rate": 1.5091948624845651e-05,
      "loss": 2.4178,
      "step": 25675
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0404422283172607,
      "learning_rate": 1.5091594258768064e-05,
      "loss": 2.201,
      "step": 25676
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0281730890274048,
      "learning_rate": 1.5091239884058836e-05,
      "loss": 2.3746,
      "step": 25677
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0315511226654053,
      "learning_rate": 1.5090885500718582e-05,
      "loss": 2.3486,
      "step": 25678
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9972387552261353,
      "learning_rate": 1.509053110874789e-05,
      "loss": 2.2326,
      "step": 25679
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.096407413482666,
      "learning_rate": 1.5090176708147367e-05,
      "loss": 2.3459,
      "step": 25680
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0084991455078125,
      "learning_rate": 1.508982229891761e-05,
      "loss": 2.538,
      "step": 25681
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0396350622177124,
      "learning_rate": 1.5089467881059221e-05,
      "loss": 2.4218,
      "step": 25682
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9962837100028992,
      "learning_rate": 1.5089113454572804e-05,
      "loss": 2.2792,
      "step": 25683
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.165290355682373,
      "learning_rate": 1.5088759019458955e-05,
      "loss": 2.5392,
      "step": 25684
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9766818284988403,
      "learning_rate": 1.5088404575718283e-05,
      "loss": 2.4622,
      "step": 25685
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9979954361915588,
      "learning_rate": 1.5088050123351377e-05,
      "loss": 2.4294,
      "step": 25686
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0673249959945679,
      "learning_rate": 1.5087695662358846e-05,
      "loss": 2.3233,
      "step": 25687
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0234780311584473,
      "learning_rate": 1.508734119274129e-05,
      "loss": 2.2245,
      "step": 25688
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.006216287612915,
      "learning_rate": 1.5086986714499308e-05,
      "loss": 2.6046,
      "step": 25689
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0255300998687744,
      "learning_rate": 1.5086632227633503e-05,
      "loss": 2.3854,
      "step": 25690
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.2635035514831543,
      "learning_rate": 1.5086277732144475e-05,
      "loss": 2.349,
      "step": 25691
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1229496002197266,
      "learning_rate": 1.5085923228032823e-05,
      "loss": 2.3909,
      "step": 25692
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9960393905639648,
      "learning_rate": 1.508556871529915e-05,
      "loss": 2.2083,
      "step": 25693
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9967806339263916,
      "learning_rate": 1.508521419394406e-05,
      "loss": 2.5159,
      "step": 25694
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1528966426849365,
      "learning_rate": 1.5084859663968146e-05,
      "loss": 2.3326,
      "step": 25695
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.3056674003601074,
      "learning_rate": 1.5084505125372018e-05,
      "loss": 2.4419,
      "step": 25696
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0032635927200317,
      "learning_rate": 1.5084150578156269e-05,
      "loss": 2.2821,
      "step": 25697
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0605608224868774,
      "learning_rate": 1.5083796022321509e-05,
      "loss": 2.3916,
      "step": 25698
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0629478693008423,
      "learning_rate": 1.5083441457868331e-05,
      "loss": 2.492,
      "step": 25699
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9397687911987305,
      "learning_rate": 1.5083086884797338e-05,
      "loss": 2.3702,
      "step": 25700
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1964210271835327,
      "learning_rate": 1.5082732303109136e-05,
      "loss": 2.2771,
      "step": 25701
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1874239444732666,
      "learning_rate": 1.5082377712804319e-05,
      "loss": 2.612,
      "step": 25702
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9990188479423523,
      "learning_rate": 1.5082023113883494e-05,
      "loss": 2.373,
      "step": 25703
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0462665557861328,
      "learning_rate": 1.5081668506347255e-05,
      "loss": 2.4782,
      "step": 25704
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.050065040588379,
      "learning_rate": 1.5081313890196213e-05,
      "loss": 2.1653,
      "step": 25705
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0012905597686768,
      "learning_rate": 1.5080959265430962e-05,
      "loss": 2.5299,
      "step": 25706
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0134148597717285,
      "learning_rate": 1.5080604632052103e-05,
      "loss": 2.2034,
      "step": 25707
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0703160762786865,
      "learning_rate": 1.5080249990060241e-05,
      "loss": 2.1509,
      "step": 25708
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9853302836418152,
      "learning_rate": 1.5079895339455974e-05,
      "loss": 2.5362,
      "step": 25709
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.2347382307052612,
      "learning_rate": 1.5079540680239906e-05,
      "loss": 2.6416,
      "step": 25710
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1259394884109497,
      "learning_rate": 1.5079186012412639e-05,
      "loss": 2.5351,
      "step": 25711
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1492236852645874,
      "learning_rate": 1.5078831335974768e-05,
      "loss": 2.3425,
      "step": 25712
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0452641248703003,
      "learning_rate": 1.5078476650926903e-05,
      "loss": 2.378,
      "step": 25713
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0256606340408325,
      "learning_rate": 1.5078121957269636e-05,
      "loss": 2.4764,
      "step": 25714
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.006646752357483,
      "learning_rate": 1.5077767255003575e-05,
      "loss": 2.4039,
      "step": 25715
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1331983804702759,
      "learning_rate": 1.507741254412932e-05,
      "loss": 2.4465,
      "step": 25716
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0168300867080688,
      "learning_rate": 1.5077057824647471e-05,
      "loss": 2.3839,
      "step": 25717
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0252978801727295,
      "learning_rate": 1.5076703096558633e-05,
      "loss": 2.3601,
      "step": 25718
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9615651369094849,
      "learning_rate": 1.5076348359863402e-05,
      "loss": 2.3415,
      "step": 25719
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.2982940673828125,
      "learning_rate": 1.5075993614562382e-05,
      "loss": 2.2919,
      "step": 25720
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.066321849822998,
      "learning_rate": 1.5075638860656172e-05,
      "loss": 2.4132,
      "step": 25721
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0557361841201782,
      "learning_rate": 1.5075284098145378e-05,
      "loss": 2.4671,
      "step": 25722
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9953359365463257,
      "learning_rate": 1.5074929327030598e-05,
      "loss": 2.5202,
      "step": 25723
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1211589574813843,
      "learning_rate": 1.507457454731244e-05,
      "loss": 2.5871,
      "step": 25724
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1706609725952148,
      "learning_rate": 1.5074219758991493e-05,
      "loss": 2.4669,
      "step": 25725
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1530095338821411,
      "learning_rate": 1.5073864962068364e-05,
      "loss": 2.5681,
      "step": 25726
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0834710597991943,
      "learning_rate": 1.5073510156543662e-05,
      "loss": 2.5412,
      "step": 25727
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0401729345321655,
      "learning_rate": 1.5073155342417979e-05,
      "loss": 2.4936,
      "step": 25728
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.08172607421875,
      "learning_rate": 1.5072800519691919e-05,
      "loss": 2.3535,
      "step": 25729
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.064433217048645,
      "learning_rate": 1.5072445688366086e-05,
      "loss": 2.4075,
      "step": 25730
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9433953762054443,
      "learning_rate": 1.5072090848441084e-05,
      "loss": 2.4103,
      "step": 25731
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0613734722137451,
      "learning_rate": 1.5071735999917504e-05,
      "loss": 2.5751,
      "step": 25732
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9944970607757568,
      "learning_rate": 1.5071381142795956e-05,
      "loss": 2.3184,
      "step": 25733
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0879849195480347,
      "learning_rate": 1.5071026277077043e-05,
      "loss": 2.4646,
      "step": 25734
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0502407550811768,
      "learning_rate": 1.507067140276136e-05,
      "loss": 2.4605,
      "step": 25735
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0705561637878418,
      "learning_rate": 1.5070316519849511e-05,
      "loss": 2.4644,
      "step": 25736
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.2591384649276733,
      "learning_rate": 1.5069961628342098e-05,
      "loss": 2.3833,
      "step": 25737
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.3962955474853516,
      "learning_rate": 1.5069606728239726e-05,
      "loss": 2.5813,
      "step": 25738
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.093875527381897,
      "learning_rate": 1.5069251819542992e-05,
      "loss": 2.1833,
      "step": 25739
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0541648864746094,
      "learning_rate": 1.5068896902252499e-05,
      "loss": 2.3782,
      "step": 25740
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0792242288589478,
      "learning_rate": 1.506854197636885e-05,
      "loss": 2.3888,
      "step": 25741
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9136284589767456,
      "learning_rate": 1.5068187041892644e-05,
      "loss": 2.6597,
      "step": 25742
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0332838296890259,
      "learning_rate": 1.5067832098824483e-05,
      "loss": 2.3846,
      "step": 25743
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0576633214950562,
      "learning_rate": 1.5067477147164974e-05,
      "loss": 2.5834,
      "step": 25744
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0921286344528198,
      "learning_rate": 1.5067122186914715e-05,
      "loss": 2.4139,
      "step": 25745
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0080997943878174,
      "learning_rate": 1.5066767218074307e-05,
      "loss": 2.4561,
      "step": 25746
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9968608617782593,
      "learning_rate": 1.5066412240644352e-05,
      "loss": 2.5039,
      "step": 25747
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0155611038208008,
      "learning_rate": 1.5066057254625453e-05,
      "loss": 2.5324,
      "step": 25748
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0076062679290771,
      "learning_rate": 1.506570226001821e-05,
      "loss": 2.5694,
      "step": 25749
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9509096145629883,
      "learning_rate": 1.5065347256823224e-05,
      "loss": 2.4744,
      "step": 25750
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.028903603553772,
      "learning_rate": 1.5064992245041104e-05,
      "loss": 2.1735,
      "step": 25751
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0454661846160889,
      "learning_rate": 1.5064637224672441e-05,
      "loss": 2.5232,
      "step": 25752
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.2156167030334473,
      "learning_rate": 1.5064282195717845e-05,
      "loss": 2.6375,
      "step": 25753
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.014507532119751,
      "learning_rate": 1.5063927158177913e-05,
      "loss": 2.4022,
      "step": 25754
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0535162687301636,
      "learning_rate": 1.5063572112053253e-05,
      "loss": 2.338,
      "step": 25755
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1200389862060547,
      "learning_rate": 1.5063217057344459e-05,
      "loss": 2.2975,
      "step": 25756
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0290441513061523,
      "learning_rate": 1.506286199405214e-05,
      "loss": 2.4495,
      "step": 25757
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9821527600288391,
      "learning_rate": 1.5062506922176896e-05,
      "loss": 2.4417,
      "step": 25758
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1522775888442993,
      "learning_rate": 1.5062151841719324e-05,
      "loss": 2.2986,
      "step": 25759
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1819452047348022,
      "learning_rate": 1.5061796752680029e-05,
      "loss": 2.404,
      "step": 25760
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0984002351760864,
      "learning_rate": 1.5061441655059618e-05,
      "loss": 2.3185,
      "step": 25761
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0272828340530396,
      "learning_rate": 1.5061086548858689e-05,
      "loss": 2.3842,
      "step": 25762
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0635602474212646,
      "learning_rate": 1.506073143407784e-05,
      "loss": 2.4185,
      "step": 25763
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.2408289909362793,
      "learning_rate": 1.5060376310717679e-05,
      "loss": 2.5769,
      "step": 25764
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.183203101158142,
      "learning_rate": 1.5060021178778805e-05,
      "loss": 2.1798,
      "step": 25765
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0328192710876465,
      "learning_rate": 1.505966603826182e-05,
      "loss": 2.5153,
      "step": 25766
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.97548508644104,
      "learning_rate": 1.505931088916733e-05,
      "loss": 2.5362,
      "step": 25767
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1484565734863281,
      "learning_rate": 1.505895573149593e-05,
      "loss": 2.5429,
      "step": 25768
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0424551963806152,
      "learning_rate": 1.5058600565248227e-05,
      "loss": 2.1921,
      "step": 25769
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0375714302062988,
      "learning_rate": 1.5058245390424823e-05,
      "loss": 2.4594,
      "step": 25770
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0602940320968628,
      "learning_rate": 1.5057890207026322e-05,
      "loss": 2.535,
      "step": 25771
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.031966209411621,
      "learning_rate": 1.5057535015053319e-05,
      "loss": 2.4462,
      "step": 25772
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.092436671257019,
      "learning_rate": 1.5057179814506423e-05,
      "loss": 2.3537,
      "step": 25773
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0984922647476196,
      "learning_rate": 1.5056824605386232e-05,
      "loss": 2.3938,
      "step": 25774
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.973477840423584,
      "learning_rate": 1.5056469387693349e-05,
      "loss": 2.4325,
      "step": 25775
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9967489838600159,
      "learning_rate": 1.5056114161428379e-05,
      "loss": 2.3544,
      "step": 25776
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1293865442276,
      "learning_rate": 1.5055758926591924e-05,
      "loss": 2.2317,
      "step": 25777
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9465138912200928,
      "learning_rate": 1.5055403683184581e-05,
      "loss": 2.1869,
      "step": 25778
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0919580459594727,
      "learning_rate": 1.5055048431206957e-05,
      "loss": 2.4249,
      "step": 25779
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0473620891571045,
      "learning_rate": 1.5054693170659654e-05,
      "loss": 2.3737,
      "step": 25780
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9505690932273865,
      "learning_rate": 1.5054337901543272e-05,
      "loss": 2.1992,
      "step": 25781
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1363588571548462,
      "learning_rate": 1.5053982623858415e-05,
      "loss": 2.345,
      "step": 25782
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9817830324172974,
      "learning_rate": 1.5053627337605682e-05,
      "loss": 2.2793,
      "step": 25783
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9843472242355347,
      "learning_rate": 1.5053272042785683e-05,
      "loss": 2.4374,
      "step": 25784
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.107646107673645,
      "learning_rate": 1.5052916739399015e-05,
      "loss": 2.3436,
      "step": 25785
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.041460394859314,
      "learning_rate": 1.505256142744628e-05,
      "loss": 2.5271,
      "step": 25786
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1042941808700562,
      "learning_rate": 1.5052206106928078e-05,
      "loss": 2.308,
      "step": 25787
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1597074270248413,
      "learning_rate": 1.5051850777845016e-05,
      "loss": 2.553,
      "step": 25788
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.2210322618484497,
      "learning_rate": 1.5051495440197696e-05,
      "loss": 2.5563,
      "step": 25789
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1035566329956055,
      "learning_rate": 1.505114009398672e-05,
      "loss": 2.1992,
      "step": 25790
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0569944381713867,
      "learning_rate": 1.5050784739212686e-05,
      "loss": 2.5238,
      "step": 25791
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0015085935592651,
      "learning_rate": 1.5050429375876205e-05,
      "loss": 2.4305,
      "step": 25792
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.2694791555404663,
      "learning_rate": 1.5050074003977874e-05,
      "loss": 2.3352,
      "step": 25793
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0412908792495728,
      "learning_rate": 1.5049718623518295e-05,
      "loss": 2.4273,
      "step": 25794
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9460749626159668,
      "learning_rate": 1.5049363234498069e-05,
      "loss": 2.5616,
      "step": 25795
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1684885025024414,
      "learning_rate": 1.5049007836917802e-05,
      "loss": 2.3828,
      "step": 25796
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0853999853134155,
      "learning_rate": 1.5048652430778096e-05,
      "loss": 2.3974,
      "step": 25797
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0925556421279907,
      "learning_rate": 1.5048297016079553e-05,
      "loss": 2.3671,
      "step": 25798
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.960479199886322,
      "learning_rate": 1.5047941592822777e-05,
      "loss": 2.4328,
      "step": 25799
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.225923776626587,
      "learning_rate": 1.5047586161008366e-05,
      "loss": 2.4063,
      "step": 25800
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0588105916976929,
      "learning_rate": 1.5047230720636927e-05,
      "loss": 2.3694,
      "step": 25801
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.122392177581787,
      "learning_rate": 1.5046875271709059e-05,
      "loss": 2.1795,
      "step": 25802
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1557854413986206,
      "learning_rate": 1.5046519814225372e-05,
      "loss": 2.5425,
      "step": 25803
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1611530780792236,
      "learning_rate": 1.5046164348186459e-05,
      "loss": 2.4278,
      "step": 25804
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9961336851119995,
      "learning_rate": 1.5045808873592927e-05,
      "loss": 2.4073,
      "step": 25805
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1548608541488647,
      "learning_rate": 1.504545339044538e-05,
      "loss": 2.5506,
      "step": 25806
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1595467329025269,
      "learning_rate": 1.5045097898744417e-05,
      "loss": 2.2268,
      "step": 25807
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.017077922821045,
      "learning_rate": 1.5044742398490643e-05,
      "loss": 2.4836,
      "step": 25808
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1691224575042725,
      "learning_rate": 1.5044386889684664e-05,
      "loss": 2.5461,
      "step": 25809
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1360653638839722,
      "learning_rate": 1.504403137232708e-05,
      "loss": 2.5188,
      "step": 25810
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.058896780014038,
      "learning_rate": 1.5043675846418485e-05,
      "loss": 2.35,
      "step": 25811
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0393564701080322,
      "learning_rate": 1.5043320311959497e-05,
      "loss": 2.2435,
      "step": 25812
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1999435424804688,
      "learning_rate": 1.5042964768950707e-05,
      "loss": 2.1593,
      "step": 25813
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1362063884735107,
      "learning_rate": 1.5042609217392726e-05,
      "loss": 2.5159,
      "step": 25814
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0499433279037476,
      "learning_rate": 1.504225365728615e-05,
      "loss": 2.5246,
      "step": 25815
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.3594331741333008,
      "learning_rate": 1.5041898088631586e-05,
      "loss": 2.4145,
      "step": 25816
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0550609827041626,
      "learning_rate": 1.5041542511429637e-05,
      "loss": 2.5349,
      "step": 25817
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1476882696151733,
      "learning_rate": 1.50411869256809e-05,
      "loss": 2.2905,
      "step": 25818
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0051610469818115,
      "learning_rate": 1.5040831331385984e-05,
      "loss": 2.3796,
      "step": 25819
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0798547267913818,
      "learning_rate": 1.5040475728545489e-05,
      "loss": 2.3222,
      "step": 25820
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0037025213241577,
      "learning_rate": 1.504012011716002e-05,
      "loss": 2.4005,
      "step": 25821
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0640335083007812,
      "learning_rate": 1.5039764497230178e-05,
      "loss": 2.3154,
      "step": 25822
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9063381552696228,
      "learning_rate": 1.5039408868756566e-05,
      "loss": 2.3518,
      "step": 25823
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0288383960723877,
      "learning_rate": 1.503905323173979e-05,
      "loss": 2.4972,
      "step": 25824
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0534049272537231,
      "learning_rate": 1.5038697586180448e-05,
      "loss": 2.3659,
      "step": 25825
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.2054824829101562,
      "learning_rate": 1.5038341932079146e-05,
      "loss": 2.3782,
      "step": 25826
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.153296947479248,
      "learning_rate": 1.5037986269436484e-05,
      "loss": 2.5272,
      "step": 25827
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0636308193206787,
      "learning_rate": 1.5037630598253067e-05,
      "loss": 2.7231,
      "step": 25828
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9363356232643127,
      "learning_rate": 1.5037274918529497e-05,
      "loss": 2.4049,
      "step": 25829
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1573610305786133,
      "learning_rate": 1.5036919230266383e-05,
      "loss": 2.5164,
      "step": 25830
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1486248970031738,
      "learning_rate": 1.503656353346432e-05,
      "loss": 2.4883,
      "step": 25831
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0243498086929321,
      "learning_rate": 1.5036207828123912e-05,
      "loss": 2.3708,
      "step": 25832
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.080939769744873,
      "learning_rate": 1.5035852114245764e-05,
      "loss": 2.289,
      "step": 25833
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.103790044784546,
      "learning_rate": 1.503549639183048e-05,
      "loss": 2.2973,
      "step": 25834
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.262168288230896,
      "learning_rate": 1.5035140660878664e-05,
      "loss": 2.7023,
      "step": 25835
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.253687858581543,
      "learning_rate": 1.5034784921390917e-05,
      "loss": 2.4842,
      "step": 25836
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0676695108413696,
      "learning_rate": 1.503442917336784e-05,
      "loss": 2.4432,
      "step": 25837
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0194528102874756,
      "learning_rate": 1.5034073416810038e-05,
      "loss": 2.4465,
      "step": 25838
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0885393619537354,
      "learning_rate": 1.5033717651718113e-05,
      "loss": 2.3905,
      "step": 25839
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1480401754379272,
      "learning_rate": 1.5033361878092674e-05,
      "loss": 2.2498,
      "step": 25840
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.2314502000808716,
      "learning_rate": 1.5033006095934318e-05,
      "loss": 2.4381,
      "step": 25841
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.2262133359909058,
      "learning_rate": 1.5032650305243648e-05,
      "loss": 2.5051,
      "step": 25842
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0803332328796387,
      "learning_rate": 1.503229450602127e-05,
      "loss": 2.1605,
      "step": 25843
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0995413064956665,
      "learning_rate": 1.5031938698267787e-05,
      "loss": 2.3595,
      "step": 25844
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0503063201904297,
      "learning_rate": 1.50315828819838e-05,
      "loss": 2.4145,
      "step": 25845
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0026278495788574,
      "learning_rate": 1.5031227057169911e-05,
      "loss": 2.4538,
      "step": 25846
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1308192014694214,
      "learning_rate": 1.5030871223826728e-05,
      "loss": 2.4111,
      "step": 25847
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1222516298294067,
      "learning_rate": 1.5030515381954853e-05,
      "loss": 2.3432,
      "step": 25848
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.262998342514038,
      "learning_rate": 1.5030159531554889e-05,
      "loss": 2.3572,
      "step": 25849
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0660308599472046,
      "learning_rate": 1.5029803672627435e-05,
      "loss": 2.4902,
      "step": 25850
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.020477056503296,
      "learning_rate": 1.5029447805173098e-05,
      "loss": 2.1842,
      "step": 25851
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0614324808120728,
      "learning_rate": 1.5029091929192485e-05,
      "loss": 2.5738,
      "step": 25852
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0853021144866943,
      "learning_rate": 1.5028736044686195e-05,
      "loss": 2.2588,
      "step": 25853
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0895155668258667,
      "learning_rate": 1.5028380151654827e-05,
      "loss": 2.411,
      "step": 25854
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.110783338546753,
      "learning_rate": 1.5028024250098991e-05,
      "loss": 2.4494,
      "step": 25855
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.07685124874115,
      "learning_rate": 1.502766834001929e-05,
      "loss": 2.4393,
      "step": 25856
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1781048774719238,
      "learning_rate": 1.5027312421416325e-05,
      "loss": 2.3381,
      "step": 25857
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1899147033691406,
      "learning_rate": 1.50269564942907e-05,
      "loss": 2.3752,
      "step": 25858
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1576623916625977,
      "learning_rate": 1.5026600558643014e-05,
      "loss": 2.3702,
      "step": 25859
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0590102672576904,
      "learning_rate": 1.5026244614473879e-05,
      "loss": 2.4262,
      "step": 25860
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1297746896743774,
      "learning_rate": 1.5025888661783894e-05,
      "loss": 2.3761,
      "step": 25861
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.080796241760254,
      "learning_rate": 1.5025532700573662e-05,
      "loss": 2.2553,
      "step": 25862
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1224133968353271,
      "learning_rate": 1.5025176730843792e-05,
      "loss": 2.4253,
      "step": 25863
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1129618883132935,
      "learning_rate": 1.5024820752594876e-05,
      "loss": 2.257,
      "step": 25864
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0788699388504028,
      "learning_rate": 1.5024464765827524e-05,
      "loss": 2.4273,
      "step": 25865
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9798166751861572,
      "learning_rate": 1.5024108770542344e-05,
      "loss": 2.4039,
      "step": 25866
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1618075370788574,
      "learning_rate": 1.5023752766739932e-05,
      "loss": 2.6432,
      "step": 25867
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9487730860710144,
      "learning_rate": 1.5023396754420896e-05,
      "loss": 2.5467,
      "step": 25868
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.03610360622406,
      "learning_rate": 1.5023040733585838e-05,
      "loss": 2.3502,
      "step": 25869
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1314396858215332,
      "learning_rate": 1.5022684704235361e-05,
      "loss": 2.4512,
      "step": 25870
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0959621667861938,
      "learning_rate": 1.502232866637007e-05,
      "loss": 2.3852,
      "step": 25871
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.7640410661697388,
      "learning_rate": 1.502197261999057e-05,
      "loss": 2.4383,
      "step": 25872
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0203676223754883,
      "learning_rate": 1.5021616565097459e-05,
      "loss": 2.4638,
      "step": 25873
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0222264528274536,
      "learning_rate": 1.5021260501691343e-05,
      "loss": 2.2525,
      "step": 25874
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0097899436950684,
      "learning_rate": 1.502090442977283e-05,
      "loss": 2.4753,
      "step": 25875
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0013196468353271,
      "learning_rate": 1.5020548349342521e-05,
      "loss": 2.2931,
      "step": 25876
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.070007562637329,
      "learning_rate": 1.5020192260401017e-05,
      "loss": 2.4351,
      "step": 25877
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0603985786437988,
      "learning_rate": 1.5019836162948924e-05,
      "loss": 2.7051,
      "step": 25878
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1522949934005737,
      "learning_rate": 1.5019480056986843e-05,
      "loss": 2.5449,
      "step": 25879
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9875374436378479,
      "learning_rate": 1.5019123942515386e-05,
      "loss": 2.2288,
      "step": 25880
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0490378141403198,
      "learning_rate": 1.5018767819535146e-05,
      "loss": 2.3981,
      "step": 25881
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9740129709243774,
      "learning_rate": 1.5018411688046732e-05,
      "loss": 2.3467,
      "step": 25882
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0804579257965088,
      "learning_rate": 1.5018055548050749e-05,
      "loss": 2.3691,
      "step": 25883
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1380821466445923,
      "learning_rate": 1.5017699399547799e-05,
      "loss": 2.3835,
      "step": 25884
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.111809492111206,
      "learning_rate": 1.5017343242538483e-05,
      "loss": 2.4519,
      "step": 25885
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.3987334966659546,
      "learning_rate": 1.5016987077023411e-05,
      "loss": 2.5276,
      "step": 25886
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.2078237533569336,
      "learning_rate": 1.5016630903003178e-05,
      "loss": 2.512,
      "step": 25887
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1017976999282837,
      "learning_rate": 1.5016274720478396e-05,
      "loss": 2.345,
      "step": 25888
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0589087009429932,
      "learning_rate": 1.5015918529449669e-05,
      "loss": 2.3186,
      "step": 25889
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0282320976257324,
      "learning_rate": 1.5015562329917595e-05,
      "loss": 2.3585,
      "step": 25890
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.072119116783142,
      "learning_rate": 1.5015206121882782e-05,
      "loss": 2.4337,
      "step": 25891
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1770817041397095,
      "learning_rate": 1.5014849905345829e-05,
      "loss": 2.4262,
      "step": 25892
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0747450590133667,
      "learning_rate": 1.5014493680307345e-05,
      "loss": 2.3955,
      "step": 25893
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1721513271331787,
      "learning_rate": 1.5014137446767933e-05,
      "loss": 2.4725,
      "step": 25894
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.130590796470642,
      "learning_rate": 1.5013781204728197e-05,
      "loss": 2.4388,
      "step": 25895
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0612256526947021,
      "learning_rate": 1.501342495418874e-05,
      "loss": 2.0833,
      "step": 25896
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.8971041440963745,
      "learning_rate": 1.5013068695150166e-05,
      "loss": 2.2498,
      "step": 25897
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0487719774246216,
      "learning_rate": 1.5012712427613076e-05,
      "loss": 2.515,
      "step": 25898
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0035256147384644,
      "learning_rate": 1.501235615157808e-05,
      "loss": 2.4558,
      "step": 25899
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1988474130630493,
      "learning_rate": 1.501199986704578e-05,
      "loss": 2.1553,
      "step": 25900
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.177716612815857,
      "learning_rate": 1.5011643574016774e-05,
      "loss": 2.3207,
      "step": 25901
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9733306169509888,
      "learning_rate": 1.5011287272491677e-05,
      "loss": 2.0911,
      "step": 25902
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0735892057418823,
      "learning_rate": 1.5010930962471084e-05,
      "loss": 2.3786,
      "step": 25903
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1447988748550415,
      "learning_rate": 1.5010574643955603e-05,
      "loss": 2.3582,
      "step": 25904
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.4821727275848389,
      "learning_rate": 1.5010218316945837e-05,
      "loss": 2.4432,
      "step": 25905
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.3882901668548584,
      "learning_rate": 1.5009861981442388e-05,
      "loss": 2.2736,
      "step": 25906
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.062412142753601,
      "learning_rate": 1.5009505637445865e-05,
      "loss": 2.4939,
      "step": 25907
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1893302202224731,
      "learning_rate": 1.500914928495687e-05,
      "loss": 2.4974,
      "step": 25908
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1253336668014526,
      "learning_rate": 1.5008792923976003e-05,
      "loss": 2.3635,
      "step": 25909
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0910247564315796,
      "learning_rate": 1.5008436554503875e-05,
      "loss": 2.4439,
      "step": 25910
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0691636800765991,
      "learning_rate": 1.5008080176541087e-05,
      "loss": 2.1646,
      "step": 25911
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9903420209884644,
      "learning_rate": 1.500772379008824e-05,
      "loss": 2.3207,
      "step": 25912
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.33668053150177,
      "learning_rate": 1.5007367395145941e-05,
      "loss": 2.4811,
      "step": 25913
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9947190880775452,
      "learning_rate": 1.5007010991714797e-05,
      "loss": 2.4995,
      "step": 25914
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.260855793952942,
      "learning_rate": 1.5006654579795408e-05,
      "loss": 2.4737,
      "step": 25915
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0954921245574951,
      "learning_rate": 1.5006298159388379e-05,
      "loss": 2.1208,
      "step": 25916
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1146178245544434,
      "learning_rate": 1.5005941730494318e-05,
      "loss": 2.3413,
      "step": 25917
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1320910453796387,
      "learning_rate": 1.5005585293113822e-05,
      "loss": 2.3254,
      "step": 25918
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.00099515914917,
      "learning_rate": 1.5005228847247502e-05,
      "loss": 2.4066,
      "step": 25919
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1339069604873657,
      "learning_rate": 1.500487239289596e-05,
      "loss": 2.4351,
      "step": 25920
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0182958841323853,
      "learning_rate": 1.5004515930059797e-05,
      "loss": 2.5411,
      "step": 25921
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0054652690887451,
      "learning_rate": 1.5004159458739624e-05,
      "loss": 2.521,
      "step": 25922
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0910193920135498,
      "learning_rate": 1.5003802978936039e-05,
      "loss": 2.4889,
      "step": 25923
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.215338110923767,
      "learning_rate": 1.500344649064965e-05,
      "loss": 2.3998,
      "step": 25924
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0120275020599365,
      "learning_rate": 1.500308999388106e-05,
      "loss": 2.229,
      "step": 25925
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0981754064559937,
      "learning_rate": 1.5002733488630874e-05,
      "loss": 2.284,
      "step": 25926
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.006850242614746,
      "learning_rate": 1.5002376974899696e-05,
      "loss": 2.4519,
      "step": 25927
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0026283264160156,
      "learning_rate": 1.500202045268813e-05,
      "loss": 2.4997,
      "step": 25928
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.009986400604248,
      "learning_rate": 1.5001663921996782e-05,
      "loss": 2.2752,
      "step": 25929
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0818301439285278,
      "learning_rate": 1.5001307382826253e-05,
      "loss": 2.3676,
      "step": 25930
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.996595025062561,
      "learning_rate": 1.500095083517715e-05,
      "loss": 2.4596,
      "step": 25931
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0013353824615479,
      "learning_rate": 1.5000594279050078e-05,
      "loss": 2.5774,
      "step": 25932
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1404412984848022,
      "learning_rate": 1.500023771444564e-05,
      "loss": 2.5583,
      "step": 25933
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9653315544128418,
      "learning_rate": 1.4999881141364442e-05,
      "loss": 2.0727,
      "step": 25934
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0226491689682007,
      "learning_rate": 1.4999524559807089e-05,
      "loss": 2.277,
      "step": 25935
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0659514665603638,
      "learning_rate": 1.4999167969774183e-05,
      "loss": 2.3397,
      "step": 25936
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0106524229049683,
      "learning_rate": 1.4998811371266326e-05,
      "loss": 2.4882,
      "step": 25937
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9861960411071777,
      "learning_rate": 1.499845476428413e-05,
      "loss": 2.3697,
      "step": 25938
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.070567011833191,
      "learning_rate": 1.4998098148828196e-05,
      "loss": 2.3409,
      "step": 25939
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0104361772537231,
      "learning_rate": 1.4997741524899125e-05,
      "loss": 2.5059,
      "step": 25940
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0657380819320679,
      "learning_rate": 1.4997384892497526e-05,
      "loss": 2.3714,
      "step": 25941
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.049067735671997,
      "learning_rate": 1.4997028251624004e-05,
      "loss": 2.2981,
      "step": 25942
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0686333179473877,
      "learning_rate": 1.4996671602279161e-05,
      "loss": 2.5047,
      "step": 25943
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9711740612983704,
      "learning_rate": 1.4996314944463602e-05,
      "loss": 2.4364,
      "step": 25944
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.3225680589675903,
      "learning_rate": 1.499595827817793e-05,
      "loss": 2.4036,
      "step": 25945
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9903737902641296,
      "learning_rate": 1.4995601603422755e-05,
      "loss": 2.3849,
      "step": 25946
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0530977249145508,
      "learning_rate": 1.4995244920198678e-05,
      "loss": 2.2803,
      "step": 25947
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9334208965301514,
      "learning_rate": 1.4994888228506304e-05,
      "loss": 2.4926,
      "step": 25948
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0617049932479858,
      "learning_rate": 1.4994531528346238e-05,
      "loss": 2.3357,
      "step": 25949
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9447629451751709,
      "learning_rate": 1.4994174819719086e-05,
      "loss": 2.4962,
      "step": 25950
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1122909784317017,
      "learning_rate": 1.4993818102625448e-05,
      "loss": 2.3481,
      "step": 25951
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.139122724533081,
      "learning_rate": 1.4993461377065932e-05,
      "loss": 2.313,
      "step": 25952
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1940504312515259,
      "learning_rate": 1.4993104643041143e-05,
      "loss": 2.2443,
      "step": 25953
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1064629554748535,
      "learning_rate": 1.4992747900551687e-05,
      "loss": 2.4706,
      "step": 25954
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9471272826194763,
      "learning_rate": 1.499239114959817e-05,
      "loss": 2.2417,
      "step": 25955
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.087364912033081,
      "learning_rate": 1.499203439018119e-05,
      "loss": 2.3444,
      "step": 25956
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0079365968704224,
      "learning_rate": 1.4991677622301357e-05,
      "loss": 2.2922,
      "step": 25957
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9623551368713379,
      "learning_rate": 1.4991320845959273e-05,
      "loss": 2.0415,
      "step": 25958
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0121536254882812,
      "learning_rate": 1.499096406115555e-05,
      "loss": 2.4917,
      "step": 25959
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9782309532165527,
      "learning_rate": 1.4990607267890779e-05,
      "loss": 2.3008,
      "step": 25960
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0563991069793701,
      "learning_rate": 1.499025046616558e-05,
      "loss": 2.154,
      "step": 25961
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0964914560317993,
      "learning_rate": 1.4989893655980549e-05,
      "loss": 2.4658,
      "step": 25962
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0004463195800781,
      "learning_rate": 1.4989536837336293e-05,
      "loss": 2.5692,
      "step": 25963
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0288829803466797,
      "learning_rate": 1.4989180010233417e-05,
      "loss": 2.443,
      "step": 25964
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.000108242034912,
      "learning_rate": 1.4988823174672526e-05,
      "loss": 2.11,
      "step": 25965
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.119347095489502,
      "learning_rate": 1.4988466330654223e-05,
      "loss": 2.4319,
      "step": 25966
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.073519229888916,
      "learning_rate": 1.4988109478179117e-05,
      "loss": 2.6658,
      "step": 25967
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0950459241867065,
      "learning_rate": 1.4987752617247812e-05,
      "loss": 2.4055,
      "step": 25968
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.101692795753479,
      "learning_rate": 1.498739574786091e-05,
      "loss": 2.2288,
      "step": 25969
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0016093254089355,
      "learning_rate": 1.4987038870019017e-05,
      "loss": 2.4683,
      "step": 25970
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1628737449645996,
      "learning_rate": 1.498668198372274e-05,
      "loss": 2.4872,
      "step": 25971
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0467387437820435,
      "learning_rate": 1.4986325088972682e-05,
      "loss": 2.2381,
      "step": 25972
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0608279705047607,
      "learning_rate": 1.4985968185769446e-05,
      "loss": 2.3364,
      "step": 25973
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0391045808792114,
      "learning_rate": 1.4985611274113642e-05,
      "loss": 2.4247,
      "step": 25974
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.045667290687561,
      "learning_rate": 1.4985254354005876e-05,
      "loss": 2.376,
      "step": 25975
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0244067907333374,
      "learning_rate": 1.4984897425446744e-05,
      "loss": 2.3201,
      "step": 25976
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9759059548377991,
      "learning_rate": 1.4984540488436861e-05,
      "loss": 2.4922,
      "step": 25977
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.093753457069397,
      "learning_rate": 1.4984183542976827e-05,
      "loss": 2.1074,
      "step": 25978
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.3113377094268799,
      "learning_rate": 1.4983826589067247e-05,
      "loss": 2.4926,
      "step": 25979
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1539332866668701,
      "learning_rate": 1.4983469626708729e-05,
      "loss": 2.1929,
      "step": 25980
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.2612584829330444,
      "learning_rate": 1.4983112655901877e-05,
      "loss": 2.4041,
      "step": 25981
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9767376780509949,
      "learning_rate": 1.4982755676647293e-05,
      "loss": 2.411,
      "step": 25982
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1556507349014282,
      "learning_rate": 1.4982398688945588e-05,
      "loss": 2.5297,
      "step": 25983
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0828911066055298,
      "learning_rate": 1.498204169279736e-05,
      "loss": 2.4411,
      "step": 25984
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1054195165634155,
      "learning_rate": 1.4981684688203221e-05,
      "loss": 2.3347,
      "step": 25985
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.11935293674469,
      "learning_rate": 1.4981327675163773e-05,
      "loss": 2.5798,
      "step": 25986
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0524752140045166,
      "learning_rate": 1.4980970653679622e-05,
      "loss": 2.379,
      "step": 25987
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0571359395980835,
      "learning_rate": 1.4980613623751375e-05,
      "loss": 2.513,
      "step": 25988
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.082071304321289,
      "learning_rate": 1.498025658537963e-05,
      "loss": 2.3795,
      "step": 25989
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1306897401809692,
      "learning_rate": 1.4979899538565e-05,
      "loss": 2.3277,
      "step": 25990
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.012760043144226,
      "learning_rate": 1.497954248330809e-05,
      "loss": 2.2607,
      "step": 25991
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.201570749282837,
      "learning_rate": 1.4979185419609501e-05,
      "loss": 2.3803,
      "step": 25992
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9396119117736816,
      "learning_rate": 1.497882834746984e-05,
      "loss": 2.2768,
      "step": 25993
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1131993532180786,
      "learning_rate": 1.4978471266889712e-05,
      "loss": 2.5898,
      "step": 25994
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.123403787612915,
      "learning_rate": 1.4978114177869727e-05,
      "loss": 2.6482,
      "step": 25995
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0938647985458374,
      "learning_rate": 1.4977757080410484e-05,
      "loss": 2.4331,
      "step": 25996
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0205165147781372,
      "learning_rate": 1.497739997451259e-05,
      "loss": 2.5552,
      "step": 25997
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.262888789176941,
      "learning_rate": 1.4977042860176653e-05,
      "loss": 2.361,
      "step": 25998
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.4904719591140747,
      "learning_rate": 1.4976685737403274e-05,
      "loss": 2.4715,
      "step": 25999
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0557283163070679,
      "learning_rate": 1.4976328606193062e-05,
      "loss": 2.1633,
      "step": 26000
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0529792308807373,
      "learning_rate": 1.4975971466546624e-05,
      "loss": 2.45,
      "step": 26001
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9392009973526001,
      "learning_rate": 1.4975614318464562e-05,
      "loss": 2.3307,
      "step": 26002
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0990766286849976,
      "learning_rate": 1.4975257161947481e-05,
      "loss": 2.2529,
      "step": 26003
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.2021225690841675,
      "learning_rate": 1.4974899996995987e-05,
      "loss": 2.3129,
      "step": 26004
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1948912143707275,
      "learning_rate": 1.4974542823610687e-05,
      "loss": 2.2926,
      "step": 26005
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0605158805847168,
      "learning_rate": 1.4974185641792185e-05,
      "loss": 2.2708,
      "step": 26006
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.9260448217391968,
      "learning_rate": 1.4973828451541088e-05,
      "loss": 2.6361,
      "step": 26007
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0997018814086914,
      "learning_rate": 1.4973471252858006e-05,
      "loss": 2.4029,
      "step": 26008
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9905152916908264,
      "learning_rate": 1.4973114045743534e-05,
      "loss": 2.5991,
      "step": 26009
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0015480518341064,
      "learning_rate": 1.497275683019828e-05,
      "loss": 2.48,
      "step": 26010
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9521570801734924,
      "learning_rate": 1.4972399606222859e-05,
      "loss": 2.3032,
      "step": 26011
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.203901767730713,
      "learning_rate": 1.4972042373817867e-05,
      "loss": 2.2889,
      "step": 26012
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.151987910270691,
      "learning_rate": 1.4971685132983913e-05,
      "loss": 2.3813,
      "step": 26013
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0469043254852295,
      "learning_rate": 1.4971327883721604e-05,
      "loss": 2.3747,
      "step": 26014
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0752403736114502,
      "learning_rate": 1.4970970626031542e-05,
      "loss": 2.3567,
      "step": 26015
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0536167621612549,
      "learning_rate": 1.4970613359914336e-05,
      "loss": 2.4211,
      "step": 26016
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0915303230285645,
      "learning_rate": 1.497025608537059e-05,
      "loss": 2.4594,
      "step": 26017
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0157545804977417,
      "learning_rate": 1.4969898802400908e-05,
      "loss": 2.2637,
      "step": 26018
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.2324873208999634,
      "learning_rate": 1.49695415110059e-05,
      "loss": 2.3818,
      "step": 26019
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0771663188934326,
      "learning_rate": 1.496918421118617e-05,
      "loss": 2.5005,
      "step": 26020
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9999122023582458,
      "learning_rate": 1.4968826902942323e-05,
      "loss": 2.4213,
      "step": 26021
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.990500807762146,
      "learning_rate": 1.4968469586274965e-05,
      "loss": 2.3891,
      "step": 26022
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0549728870391846,
      "learning_rate": 1.4968112261184699e-05,
      "loss": 2.2765,
      "step": 26023
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0474289655685425,
      "learning_rate": 1.4967754927672136e-05,
      "loss": 2.4094,
      "step": 26024
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9338061809539795,
      "learning_rate": 1.4967397585737878e-05,
      "loss": 2.4116,
      "step": 26025
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0107543468475342,
      "learning_rate": 1.4967040235382535e-05,
      "loss": 2.2364,
      "step": 26026
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9673171043395996,
      "learning_rate": 1.4966682876606705e-05,
      "loss": 2.3698,
      "step": 26027
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9849082827568054,
      "learning_rate": 1.4966325509411001e-05,
      "loss": 2.2808,
      "step": 26028
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0963159799575806,
      "learning_rate": 1.4965968133796028e-05,
      "loss": 2.4223,
      "step": 26029
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9940831065177917,
      "learning_rate": 1.496561074976239e-05,
      "loss": 2.5423,
      "step": 26030
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0516283512115479,
      "learning_rate": 1.496525335731069e-05,
      "loss": 2.5357,
      "step": 26031
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0136219263076782,
      "learning_rate": 1.4964895956441538e-05,
      "loss": 2.6763,
      "step": 26032
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.2395737171173096,
      "learning_rate": 1.4964538547155538e-05,
      "loss": 2.4675,
      "step": 26033
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9869886040687561,
      "learning_rate": 1.4964181129453302e-05,
      "loss": 2.328,
      "step": 26034
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0521231889724731,
      "learning_rate": 1.4963823703335426e-05,
      "loss": 2.3582,
      "step": 26035
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9547354578971863,
      "learning_rate": 1.4963466268802523e-05,
      "loss": 2.2609,
      "step": 26036
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9987626075744629,
      "learning_rate": 1.4963108825855193e-05,
      "loss": 2.4767,
      "step": 26037
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0395435094833374,
      "learning_rate": 1.4962751374494046e-05,
      "loss": 2.3214,
      "step": 26038
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.148765206336975,
      "learning_rate": 1.496239391471969e-05,
      "loss": 2.1905,
      "step": 26039
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9591922760009766,
      "learning_rate": 1.4962036446532729e-05,
      "loss": 2.5719,
      "step": 26040
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0215415954589844,
      "learning_rate": 1.4961678969933768e-05,
      "loss": 2.1942,
      "step": 26041
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9228202104568481,
      "learning_rate": 1.4961321484923414e-05,
      "loss": 2.3564,
      "step": 26042
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.6265039443969727,
      "learning_rate": 1.496096399150227e-05,
      "loss": 2.4397,
      "step": 26043
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0340749025344849,
      "learning_rate": 1.4960606489670948e-05,
      "loss": 2.1789,
      "step": 26044
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1410951614379883,
      "learning_rate": 1.4960248979430047e-05,
      "loss": 2.5173,
      "step": 26045
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0388946533203125,
      "learning_rate": 1.495989146078018e-05,
      "loss": 2.337,
      "step": 26046
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.003226637840271,
      "learning_rate": 1.495953393372195e-05,
      "loss": 2.3877,
      "step": 26047
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0412771701812744,
      "learning_rate": 1.495917639825596e-05,
      "loss": 2.3784,
      "step": 26048
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1593639850616455,
      "learning_rate": 1.4958818854382822e-05,
      "loss": 2.4545,
      "step": 26049
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0031181573867798,
      "learning_rate": 1.4958461302103138e-05,
      "loss": 2.4433,
      "step": 26050
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1095753908157349,
      "learning_rate": 1.4958103741417515e-05,
      "loss": 2.3904,
      "step": 26051
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.19961678981781,
      "learning_rate": 1.495774617232656e-05,
      "loss": 2.5051,
      "step": 26052
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0368077754974365,
      "learning_rate": 1.4957388594830877e-05,
      "loss": 2.4877,
      "step": 26053
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.2630846500396729,
      "learning_rate": 1.4957031008931077e-05,
      "loss": 2.1781,
      "step": 26054
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.035016655921936,
      "learning_rate": 1.4956673414627762e-05,
      "loss": 2.3405,
      "step": 26055
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0652973651885986,
      "learning_rate": 1.495631581192154e-05,
      "loss": 2.3409,
      "step": 26056
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9955055713653564,
      "learning_rate": 1.4955958200813017e-05,
      "loss": 2.3817,
      "step": 26057
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.3287973403930664,
      "learning_rate": 1.4955600581302799e-05,
      "loss": 2.4086,
      "step": 26058
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.8407212495803833,
      "learning_rate": 1.4955242953391488e-05,
      "loss": 2.3314,
      "step": 26059
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0480703115463257,
      "learning_rate": 1.49548853170797e-05,
      "loss": 2.2495,
      "step": 26060
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.170314073562622,
      "learning_rate": 1.4954527672368033e-05,
      "loss": 2.273,
      "step": 26061
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.122607707977295,
      "learning_rate": 1.4954170019257095e-05,
      "loss": 2.467,
      "step": 26062
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0760339498519897,
      "learning_rate": 1.4953812357747493e-05,
      "loss": 2.4759,
      "step": 26063
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.075761318206787,
      "learning_rate": 1.4953454687839835e-05,
      "loss": 2.4205,
      "step": 26064
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0699464082717896,
      "learning_rate": 1.4953097009534726e-05,
      "loss": 2.6421,
      "step": 26065
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0004870891571045,
      "learning_rate": 1.495273932283277e-05,
      "loss": 2.3481,
      "step": 26066
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0775123834609985,
      "learning_rate": 1.495238162773458e-05,
      "loss": 2.383,
      "step": 26067
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.075593113899231,
      "learning_rate": 1.4952023924240756e-05,
      "loss": 2.4146,
      "step": 26068
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1912920475006104,
      "learning_rate": 1.4951666212351905e-05,
      "loss": 2.2291,
      "step": 26069
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0085556507110596,
      "learning_rate": 1.4951308492068636e-05,
      "loss": 2.1698,
      "step": 26070
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.156029462814331,
      "learning_rate": 1.4950950763391554e-05,
      "loss": 2.392,
      "step": 26071
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0120092630386353,
      "learning_rate": 1.4950593026321266e-05,
      "loss": 2.3513,
      "step": 26072
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.051479697227478,
      "learning_rate": 1.4950235280858379e-05,
      "loss": 2.3782,
      "step": 26073
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0957189798355103,
      "learning_rate": 1.4949877527003498e-05,
      "loss": 2.2896,
      "step": 26074
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0571123361587524,
      "learning_rate": 1.494951976475723e-05,
      "loss": 2.5755,
      "step": 26075
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1318490505218506,
      "learning_rate": 1.4949161994120182e-05,
      "loss": 2.3395,
      "step": 26076
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0922884941101074,
      "learning_rate": 1.494880421509296e-05,
      "loss": 2.3054,
      "step": 26077
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0988837480545044,
      "learning_rate": 1.4948446427676171e-05,
      "loss": 2.6408,
      "step": 26078
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9944416284561157,
      "learning_rate": 1.4948088631870423e-05,
      "loss": 2.5692,
      "step": 26079
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0842231512069702,
      "learning_rate": 1.494773082767632e-05,
      "loss": 2.1483,
      "step": 26080
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.083209753036499,
      "learning_rate": 1.4947373015094469e-05,
      "loss": 2.4359,
      "step": 26081
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.220740795135498,
      "learning_rate": 1.4947015194125476e-05,
      "loss": 2.5466,
      "step": 26082
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0788615942001343,
      "learning_rate": 1.4946657364769949e-05,
      "loss": 2.452,
      "step": 26083
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1962568759918213,
      "learning_rate": 1.4946299527028494e-05,
      "loss": 2.3704,
      "step": 26084
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0748790502548218,
      "learning_rate": 1.4945941680901722e-05,
      "loss": 2.3388,
      "step": 26085
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0996639728546143,
      "learning_rate": 1.494558382639023e-05,
      "loss": 2.5153,
      "step": 26086
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1101866960525513,
      "learning_rate": 1.4945225963494633e-05,
      "loss": 2.3615,
      "step": 26087
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1425734758377075,
      "learning_rate": 1.4944868092215536e-05,
      "loss": 2.3374,
      "step": 26088
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9443948268890381,
      "learning_rate": 1.4944510212553543e-05,
      "loss": 2.6002,
      "step": 26089
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0281835794448853,
      "learning_rate": 1.4944152324509263e-05,
      "loss": 2.3663,
      "step": 26090
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9640169739723206,
      "learning_rate": 1.49437944280833e-05,
      "loss": 2.5639,
      "step": 26091
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1938165426254272,
      "learning_rate": 1.4943436523276265e-05,
      "loss": 2.2787,
      "step": 26092
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.2855108976364136,
      "learning_rate": 1.494307861008876e-05,
      "loss": 2.5546,
      "step": 26093
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0633529424667358,
      "learning_rate": 1.4942720688521397e-05,
      "loss": 2.5082,
      "step": 26094
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1499541997909546,
      "learning_rate": 1.4942362758574778e-05,
      "loss": 2.2731,
      "step": 26095
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0372602939605713,
      "learning_rate": 1.4942004820249514e-05,
      "loss": 2.6647,
      "step": 26096
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.2362083196640015,
      "learning_rate": 1.4941646873546207e-05,
      "loss": 2.6024,
      "step": 26097
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9936025142669678,
      "learning_rate": 1.494128891846547e-05,
      "loss": 2.0398,
      "step": 26098
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1160883903503418,
      "learning_rate": 1.49409309550079e-05,
      "loss": 2.3806,
      "step": 26099
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.078611135482788,
      "learning_rate": 1.4940572983174116e-05,
      "loss": 2.3276,
      "step": 26100
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1338611841201782,
      "learning_rate": 1.4940215002964718e-05,
      "loss": 2.5055,
      "step": 26101
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1719485521316528,
      "learning_rate": 1.4939857014380312e-05,
      "loss": 2.5372,
      "step": 26102
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0097426176071167,
      "learning_rate": 1.4939499017421506e-05,
      "loss": 2.1834,
      "step": 26103
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.251239538192749,
      "learning_rate": 1.493914101208891e-05,
      "loss": 2.3015,
      "step": 26104
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0977394580841064,
      "learning_rate": 1.4938782998383126e-05,
      "loss": 2.4611,
      "step": 26105
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9960721731185913,
      "learning_rate": 1.4938424976304764e-05,
      "loss": 2.581,
      "step": 26106
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.2051700353622437,
      "learning_rate": 1.4938066945854432e-05,
      "loss": 2.5103,
      "step": 26107
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0564930438995361,
      "learning_rate": 1.4937708907032736e-05,
      "loss": 2.3362,
      "step": 26108
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.2607405185699463,
      "learning_rate": 1.493735085984028e-05,
      "loss": 2.6616,
      "step": 26109
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.013100266456604,
      "learning_rate": 1.4936992804277674e-05,
      "loss": 2.5257,
      "step": 26110
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.2855751514434814,
      "learning_rate": 1.4936634740345526e-05,
      "loss": 2.4135,
      "step": 26111
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0978384017944336,
      "learning_rate": 1.4936276668044439e-05,
      "loss": 2.5779,
      "step": 26112
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0037928819656372,
      "learning_rate": 1.4935918587375024e-05,
      "loss": 2.4229,
      "step": 26113
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9409034252166748,
      "learning_rate": 1.4935560498337886e-05,
      "loss": 2.3603,
      "step": 26114
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.299492597579956,
      "learning_rate": 1.4935202400933633e-05,
      "loss": 2.4673,
      "step": 26115
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0772866010665894,
      "learning_rate": 1.4934844295162871e-05,
      "loss": 2.4406,
      "step": 26116
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.040889859199524,
      "learning_rate": 1.4934486181026209e-05,
      "loss": 2.4617,
      "step": 26117
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0361238718032837,
      "learning_rate": 1.493412805852425e-05,
      "loss": 2.4249,
      "step": 26118
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0090867280960083,
      "learning_rate": 1.4933769927657605e-05,
      "loss": 2.4989,
      "step": 26119
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1571491956710815,
      "learning_rate": 1.4933411788426882e-05,
      "loss": 2.2713,
      "step": 26120
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1690796613693237,
      "learning_rate": 1.4933053640832684e-05,
      "loss": 2.3703,
      "step": 26121
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9801549315452576,
      "learning_rate": 1.4932695484875618e-05,
      "loss": 2.3321,
      "step": 26122
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0668448209762573,
      "learning_rate": 1.4932337320556299e-05,
      "loss": 2.3266,
      "step": 26123
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1070928573608398,
      "learning_rate": 1.4931979147875323e-05,
      "loss": 2.4591,
      "step": 26124
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9889038801193237,
      "learning_rate": 1.4931620966833307e-05,
      "loss": 2.5378,
      "step": 26125
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0034961700439453,
      "learning_rate": 1.4931262777430852e-05,
      "loss": 2.2434,
      "step": 26126
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.6193392276763916,
      "learning_rate": 1.4930904579668568e-05,
      "loss": 2.3054,
      "step": 26127
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1034895181655884,
      "learning_rate": 1.493054637354706e-05,
      "loss": 2.3083,
      "step": 26128
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.2474420070648193,
      "learning_rate": 1.4930188159066938e-05,
      "loss": 2.393,
      "step": 26129
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0238947868347168,
      "learning_rate": 1.492982993622881e-05,
      "loss": 2.5542,
      "step": 26130
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.18552565574646,
      "learning_rate": 1.4929471705033278e-05,
      "loss": 2.3572,
      "step": 26131
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0261653661727905,
      "learning_rate": 1.4929113465480954e-05,
      "loss": 2.3026,
      "step": 26132
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1061453819274902,
      "learning_rate": 1.4928755217572446e-05,
      "loss": 2.3187,
      "step": 26133
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9573785066604614,
      "learning_rate": 1.4928396961308356e-05,
      "loss": 2.4567,
      "step": 26134
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1040416955947876,
      "learning_rate": 1.4928038696689296e-05,
      "loss": 2.3271,
      "step": 26135
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0263694524765015,
      "learning_rate": 1.4927680423715872e-05,
      "loss": 2.3882,
      "step": 26136
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1106572151184082,
      "learning_rate": 1.4927322142388692e-05,
      "loss": 2.27,
      "step": 26137
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.189725399017334,
      "learning_rate": 1.4926963852708361e-05,
      "loss": 2.4858,
      "step": 26138
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.064532995223999,
      "learning_rate": 1.492660555467549e-05,
      "loss": 2.2748,
      "step": 26139
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1917319297790527,
      "learning_rate": 1.4926247248290685e-05,
      "loss": 2.4345,
      "step": 26140
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0155084133148193,
      "learning_rate": 1.4925888933554548e-05,
      "loss": 2.3924,
      "step": 26141
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9624667763710022,
      "learning_rate": 1.4925530610467696e-05,
      "loss": 2.3721,
      "step": 26142
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0537617206573486,
      "learning_rate": 1.4925172279030731e-05,
      "loss": 2.2802,
      "step": 26143
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0608254671096802,
      "learning_rate": 1.4924813939244263e-05,
      "loss": 2.3397,
      "step": 26144
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.128807783126831,
      "learning_rate": 1.4924455591108896e-05,
      "loss": 2.358,
      "step": 26145
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1260019540786743,
      "learning_rate": 1.4924097234625236e-05,
      "loss": 2.4236,
      "step": 26146
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0922261476516724,
      "learning_rate": 1.49237388697939e-05,
      "loss": 2.2562,
      "step": 26147
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1094719171524048,
      "learning_rate": 1.4923380496615487e-05,
      "loss": 2.274,
      "step": 26148
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0429058074951172,
      "learning_rate": 1.4923022115090606e-05,
      "loss": 2.1961,
      "step": 26149
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0968761444091797,
      "learning_rate": 1.4922663725219866e-05,
      "loss": 2.3292,
      "step": 26150
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.011659860610962,
      "learning_rate": 1.4922305327003872e-05,
      "loss": 2.2794,
      "step": 26151
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9722889065742493,
      "learning_rate": 1.4921946920443236e-05,
      "loss": 2.4081,
      "step": 26152
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1348106861114502,
      "learning_rate": 1.4921588505538565e-05,
      "loss": 2.3853,
      "step": 26153
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0311474800109863,
      "learning_rate": 1.4921230082290461e-05,
      "loss": 2.3806,
      "step": 26154
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0958203077316284,
      "learning_rate": 1.4920871650699535e-05,
      "loss": 2.3115,
      "step": 26155
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.028356909751892,
      "learning_rate": 1.4920513210766398e-05,
      "loss": 2.4814,
      "step": 26156
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0725769996643066,
      "learning_rate": 1.4920154762491655e-05,
      "loss": 2.5358,
      "step": 26157
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.014154076576233,
      "learning_rate": 1.491979630587591e-05,
      "loss": 2.5087,
      "step": 26158
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1786298751831055,
      "learning_rate": 1.4919437840919778e-05,
      "loss": 2.677,
      "step": 26159
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0154492855072021,
      "learning_rate": 1.491907936762386e-05,
      "loss": 2.3869,
      "step": 26160
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1029125452041626,
      "learning_rate": 1.4918720885988766e-05,
      "loss": 2.299,
      "step": 26161
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1332416534423828,
      "learning_rate": 1.4918362396015105e-05,
      "loss": 2.4737,
      "step": 26162
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0847023725509644,
      "learning_rate": 1.4918003897703486e-05,
      "loss": 2.2085,
      "step": 26163
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0279617309570312,
      "learning_rate": 1.4917645391054511e-05,
      "loss": 2.2486,
      "step": 26164
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0213017463684082,
      "learning_rate": 1.4917286876068793e-05,
      "loss": 2.4722,
      "step": 26165
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.14658784866333,
      "learning_rate": 1.491692835274694e-05,
      "loss": 2.5366,
      "step": 26166
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9439085721969604,
      "learning_rate": 1.4916569821089557e-05,
      "loss": 2.1934,
      "step": 26167
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0146124362945557,
      "learning_rate": 1.4916211281097253e-05,
      "loss": 2.2581,
      "step": 26168
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.008448600769043,
      "learning_rate": 1.4915852732770635e-05,
      "loss": 2.4461,
      "step": 26169
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0606446266174316,
      "learning_rate": 1.4915494176110311e-05,
      "loss": 2.2116,
      "step": 26170
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0582283735275269,
      "learning_rate": 1.4915135611116892e-05,
      "loss": 2.4107,
      "step": 26171
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0745315551757812,
      "learning_rate": 1.491477703779098e-05,
      "loss": 2.3673,
      "step": 26172
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0890352725982666,
      "learning_rate": 1.4914418456133188e-05,
      "loss": 2.3361,
      "step": 26173
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1548187732696533,
      "learning_rate": 1.4914059866144121e-05,
      "loss": 2.4087,
      "step": 26174
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1585564613342285,
      "learning_rate": 1.491370126782439e-05,
      "loss": 2.6603,
      "step": 26175
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1892107725143433,
      "learning_rate": 1.49133426611746e-05,
      "loss": 2.3737,
      "step": 26176
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.2069288492202759,
      "learning_rate": 1.4912984046195358e-05,
      "loss": 2.4377,
      "step": 26177
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0290160179138184,
      "learning_rate": 1.4912625422887272e-05,
      "loss": 2.4312,
      "step": 26178
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0257482528686523,
      "learning_rate": 1.4912266791250955e-05,
      "loss": 2.4346,
      "step": 26179
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0200316905975342,
      "learning_rate": 1.4911908151287012e-05,
      "loss": 2.5019,
      "step": 26180
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0289660692214966,
      "learning_rate": 1.491154950299605e-05,
      "loss": 2.5381,
      "step": 26181
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1425931453704834,
      "learning_rate": 1.4911190846378675e-05,
      "loss": 2.1337,
      "step": 26182
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0675244331359863,
      "learning_rate": 1.49108321814355e-05,
      "loss": 2.3911,
      "step": 26183
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.008194923400879,
      "learning_rate": 1.4910473508167132e-05,
      "loss": 2.4225,
      "step": 26184
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0962650775909424,
      "learning_rate": 1.4910114826574174e-05,
      "loss": 2.2959,
      "step": 26185
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0812318325042725,
      "learning_rate": 1.490975613665724e-05,
      "loss": 2.0187,
      "step": 26186
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9859844446182251,
      "learning_rate": 1.4909397438416937e-05,
      "loss": 2.4847,
      "step": 26187
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9972944259643555,
      "learning_rate": 1.4909038731853871e-05,
      "loss": 2.4467,
      "step": 26188
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.022109866142273,
      "learning_rate": 1.4908680016968649e-05,
      "loss": 2.3798,
      "step": 26189
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1681269407272339,
      "learning_rate": 1.4908321293761883e-05,
      "loss": 2.3958,
      "step": 26190
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9525561928749084,
      "learning_rate": 1.490796256223418e-05,
      "loss": 2.2937,
      "step": 26191
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9988518953323364,
      "learning_rate": 1.4907603822386148e-05,
      "loss": 2.6869,
      "step": 26192
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0598719120025635,
      "learning_rate": 1.4907245074218392e-05,
      "loss": 2.5046,
      "step": 26193
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.109209656715393,
      "learning_rate": 1.4906886317731526e-05,
      "loss": 2.3635,
      "step": 26194
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.055675983428955,
      "learning_rate": 1.4906527552926151e-05,
      "loss": 2.1996,
      "step": 26195
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0082181692123413,
      "learning_rate": 1.4906168779802884e-05,
      "loss": 2.3293,
      "step": 26196
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0286165475845337,
      "learning_rate": 1.4905809998362324e-05,
      "loss": 2.338,
      "step": 26197
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.007975459098816,
      "learning_rate": 1.4905451208605085e-05,
      "loss": 2.4246,
      "step": 26198
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0155837535858154,
      "learning_rate": 1.4905092410531776e-05,
      "loss": 2.267,
      "step": 26199
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.040892481803894,
      "learning_rate": 1.4904733604143e-05,
      "loss": 2.5311,
      "step": 26200
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1969949007034302,
      "learning_rate": 1.490437478943937e-05,
      "loss": 2.4709,
      "step": 26201
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0631026029586792,
      "learning_rate": 1.4904015966421495e-05,
      "loss": 2.5001,
      "step": 26202
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0983554124832153,
      "learning_rate": 1.490365713508998e-05,
      "loss": 2.6446,
      "step": 26203
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9723300933837891,
      "learning_rate": 1.4903298295445432e-05,
      "loss": 2.2037,
      "step": 26204
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0286705493927002,
      "learning_rate": 1.4902939447488462e-05,
      "loss": 2.2477,
      "step": 26205
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0184015035629272,
      "learning_rate": 1.490258059121968e-05,
      "loss": 2.3255,
      "step": 26206
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1095105409622192,
      "learning_rate": 1.4902221726639692e-05,
      "loss": 2.4796,
      "step": 26207
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0525174140930176,
      "learning_rate": 1.4901862853749105e-05,
      "loss": 2.335,
      "step": 26208
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1150188446044922,
      "learning_rate": 1.490150397254853e-05,
      "loss": 2.3599,
      "step": 26209
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1329916715621948,
      "learning_rate": 1.4901145083038574e-05,
      "loss": 2.4419,
      "step": 26210
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0128618478775024,
      "learning_rate": 1.4900786185219847e-05,
      "loss": 2.4856,
      "step": 26211
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0026618242263794,
      "learning_rate": 1.4900427279092954e-05,
      "loss": 2.3889,
      "step": 26212
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9638236165046692,
      "learning_rate": 1.4900068364658511e-05,
      "loss": 2.214,
      "step": 26213
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1368482112884521,
      "learning_rate": 1.4899709441917116e-05,
      "loss": 2.4332,
      "step": 26214
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1463366746902466,
      "learning_rate": 1.4899350510869384e-05,
      "loss": 2.5186,
      "step": 26215
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1909205913543701,
      "learning_rate": 1.4898991571515924e-05,
      "loss": 2.311,
      "step": 26216
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.2016769647598267,
      "learning_rate": 1.489863262385734e-05,
      "loss": 2.6349,
      "step": 26217
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9625743627548218,
      "learning_rate": 1.4898273667894244e-05,
      "loss": 2.6641,
      "step": 26218
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.021185040473938,
      "learning_rate": 1.4897914703627247e-05,
      "loss": 2.3795,
      "step": 26219
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0093286037445068,
      "learning_rate": 1.4897555731056952e-05,
      "loss": 2.4286,
      "step": 26220
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9825337529182434,
      "learning_rate": 1.489719675018397e-05,
      "loss": 2.4548,
      "step": 26221
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0598042011260986,
      "learning_rate": 1.4896837761008909e-05,
      "loss": 2.4363,
      "step": 26222
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0767806768417358,
      "learning_rate": 1.4896478763532379e-05,
      "loss": 2.5656,
      "step": 26223
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.2350047826766968,
      "learning_rate": 1.4896119757754988e-05,
      "loss": 2.324,
      "step": 26224
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1983164548873901,
      "learning_rate": 1.4895760743677343e-05,
      "loss": 2.3652,
      "step": 26225
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0825165510177612,
      "learning_rate": 1.4895401721300053e-05,
      "loss": 2.7347,
      "step": 26226
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9497080445289612,
      "learning_rate": 1.4895042690623728e-05,
      "loss": 2.3732,
      "step": 26227
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.007867455482483,
      "learning_rate": 1.4894683651648977e-05,
      "loss": 2.3862,
      "step": 26228
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.158316731452942,
      "learning_rate": 1.4894324604376408e-05,
      "loss": 2.4809,
      "step": 26229
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.975378155708313,
      "learning_rate": 1.489396554880663e-05,
      "loss": 2.2672,
      "step": 26230
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.2491967678070068,
      "learning_rate": 1.4893606484940253e-05,
      "loss": 2.2614,
      "step": 26231
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0145055055618286,
      "learning_rate": 1.489324741277788e-05,
      "loss": 2.3821,
      "step": 26232
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9827635288238525,
      "learning_rate": 1.4892888332320124e-05,
      "loss": 2.4361,
      "step": 26233
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.130379319190979,
      "learning_rate": 1.4892529243567596e-05,
      "loss": 2.4818,
      "step": 26234
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9615885019302368,
      "learning_rate": 1.4892170146520902e-05,
      "loss": 2.4717,
      "step": 26235
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.004064917564392,
      "learning_rate": 1.4891811041180648e-05,
      "loss": 2.4838,
      "step": 26236
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9993740320205688,
      "learning_rate": 1.4891451927547446e-05,
      "loss": 2.472,
      "step": 26237
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0371402502059937,
      "learning_rate": 1.4891092805621906e-05,
      "loss": 2.5425,
      "step": 26238
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.104657769203186,
      "learning_rate": 1.4890733675404636e-05,
      "loss": 2.3994,
      "step": 26239
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0958220958709717,
      "learning_rate": 1.4890374536896244e-05,
      "loss": 2.3999,
      "step": 26240
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.076306939125061,
      "learning_rate": 1.4890015390097337e-05,
      "loss": 2.4419,
      "step": 26241
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.989251971244812,
      "learning_rate": 1.4889656235008528e-05,
      "loss": 2.29,
      "step": 26242
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.119821310043335,
      "learning_rate": 1.488929707163042e-05,
      "loss": 2.6011,
      "step": 26243
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0592536926269531,
      "learning_rate": 1.4888937899963626e-05,
      "loss": 2.3574,
      "step": 26244
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0811506509780884,
      "learning_rate": 1.4888578720008759e-05,
      "loss": 2.2818,
      "step": 26245
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1649832725524902,
      "learning_rate": 1.4888219531766421e-05,
      "loss": 2.3302,
      "step": 26246
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1310455799102783,
      "learning_rate": 1.4887860335237223e-05,
      "loss": 2.5322,
      "step": 26247
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1519955396652222,
      "learning_rate": 1.4887501130421773e-05,
      "loss": 2.594,
      "step": 26248
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9706224203109741,
      "learning_rate": 1.4887141917320682e-05,
      "loss": 2.2339,
      "step": 26249
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0257242918014526,
      "learning_rate": 1.4886782695934558e-05,
      "loss": 2.4576,
      "step": 26250
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1385895013809204,
      "learning_rate": 1.488642346626401e-05,
      "loss": 2.4864,
      "step": 26251
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9977381229400635,
      "learning_rate": 1.4886064228309651e-05,
      "loss": 2.4464,
      "step": 26252
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0573575496673584,
      "learning_rate": 1.4885704982072083e-05,
      "loss": 2.3341,
      "step": 26253
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1124217510223389,
      "learning_rate": 1.4885345727551916e-05,
      "loss": 2.4668,
      "step": 26254
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0452940464019775,
      "learning_rate": 1.4884986464749762e-05,
      "loss": 2.5253,
      "step": 26255
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0250020027160645,
      "learning_rate": 1.488462719366623e-05,
      "loss": 2.3511,
      "step": 26256
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.973159670829773,
      "learning_rate": 1.4884267914301931e-05,
      "loss": 2.4103,
      "step": 26257
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.054060935974121,
      "learning_rate": 1.4883908626657468e-05,
      "loss": 2.3943,
      "step": 26258
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0246495008468628,
      "learning_rate": 1.4883549330733455e-05,
      "loss": 2.4347,
      "step": 26259
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9587917923927307,
      "learning_rate": 1.4883190026530498e-05,
      "loss": 2.5571,
      "step": 26260
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1485470533370972,
      "learning_rate": 1.4882830714049208e-05,
      "loss": 2.3871,
      "step": 26261
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9774271249771118,
      "learning_rate": 1.4882471393290197e-05,
      "loss": 2.2589,
      "step": 26262
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.2642099857330322,
      "learning_rate": 1.4882112064254068e-05,
      "loss": 2.2235,
      "step": 26263
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9955037236213684,
      "learning_rate": 1.4881752726941432e-05,
      "loss": 2.2548,
      "step": 26264
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1032531261444092,
      "learning_rate": 1.4881393381352902e-05,
      "loss": 2.6129,
      "step": 26265
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0315064191818237,
      "learning_rate": 1.4881034027489084e-05,
      "loss": 2.5815,
      "step": 26266
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9730962514877319,
      "learning_rate": 1.4880674665350586e-05,
      "loss": 2.4036,
      "step": 26267
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1520382165908813,
      "learning_rate": 1.4880315294938019e-05,
      "loss": 2.4757,
      "step": 26268
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0155670642852783,
      "learning_rate": 1.4879955916251992e-05,
      "loss": 2.6858,
      "step": 26269
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.135547161102295,
      "learning_rate": 1.4879596529293114e-05,
      "loss": 2.5667,
      "step": 26270
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1444716453552246,
      "learning_rate": 1.4879237134062e-05,
      "loss": 2.405,
      "step": 26271
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0390866994857788,
      "learning_rate": 1.4878877730559247e-05,
      "loss": 2.352,
      "step": 26272
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0384621620178223,
      "learning_rate": 1.4878518318785474e-05,
      "loss": 2.2322,
      "step": 26273
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.2428507804870605,
      "learning_rate": 1.4878158898741286e-05,
      "loss": 2.5276,
      "step": 26274
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1035258769989014,
      "learning_rate": 1.4877799470427295e-05,
      "loss": 2.3793,
      "step": 26275
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0892753601074219,
      "learning_rate": 1.4877440033844108e-05,
      "loss": 2.27,
      "step": 26276
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1382780075073242,
      "learning_rate": 1.4877080588992336e-05,
      "loss": 2.3855,
      "step": 26277
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.6226648092269897,
      "learning_rate": 1.4876721135872589e-05,
      "loss": 2.0792,
      "step": 26278
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9701646566390991,
      "learning_rate": 1.4876361674485475e-05,
      "loss": 2.7258,
      "step": 26279
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.2039015293121338,
      "learning_rate": 1.4876002204831603e-05,
      "loss": 2.4717,
      "step": 26280
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1263443231582642,
      "learning_rate": 1.4875642726911582e-05,
      "loss": 2.4047,
      "step": 26281
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.2091476917266846,
      "learning_rate": 1.4875283240726023e-05,
      "loss": 2.4332,
      "step": 26282
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0454446077346802,
      "learning_rate": 1.4874923746275536e-05,
      "loss": 2.289,
      "step": 26283
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.971778154373169,
      "learning_rate": 1.4874564243560727e-05,
      "loss": 2.5022,
      "step": 26284
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0209890604019165,
      "learning_rate": 1.487420473258221e-05,
      "loss": 2.4858,
      "step": 26285
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.062827706336975,
      "learning_rate": 1.4873845213340592e-05,
      "loss": 2.3213,
      "step": 26286
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.065701961517334,
      "learning_rate": 1.4873485685836482e-05,
      "loss": 2.3657,
      "step": 26287
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.031457781791687,
      "learning_rate": 1.4873126150070487e-05,
      "loss": 2.3874,
      "step": 26288
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.972713053226471,
      "learning_rate": 1.4872766606043224e-05,
      "loss": 2.0459,
      "step": 26289
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0238301753997803,
      "learning_rate": 1.4872407053755299e-05,
      "loss": 2.3992,
      "step": 26290
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0237663984298706,
      "learning_rate": 1.4872047493207318e-05,
      "loss": 2.2514,
      "step": 26291
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.12544846534729,
      "learning_rate": 1.4871687924399893e-05,
      "loss": 2.5474,
      "step": 26292
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1116513013839722,
      "learning_rate": 1.4871328347333635e-05,
      "loss": 2.2823,
      "step": 26293
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.107763648033142,
      "learning_rate": 1.4870968762009152e-05,
      "loss": 2.7176,
      "step": 26294
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0334783792495728,
      "learning_rate": 1.4870609168427054e-05,
      "loss": 2.5879,
      "step": 26295
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0462439060211182,
      "learning_rate": 1.4870249566587951e-05,
      "loss": 2.2839,
      "step": 26296
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1225569248199463,
      "learning_rate": 1.4869889956492452e-05,
      "loss": 2.4998,
      "step": 26297
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0056209564208984,
      "learning_rate": 1.4869530338141168e-05,
      "loss": 2.4585,
      "step": 26298
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1133822202682495,
      "learning_rate": 1.4869170711534704e-05,
      "loss": 2.3875,
      "step": 26299
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1247326135635376,
      "learning_rate": 1.4868811076673677e-05,
      "loss": 2.7226,
      "step": 26300
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0375880002975464,
      "learning_rate": 1.486845143355869e-05,
      "loss": 2.5966,
      "step": 26301
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0410306453704834,
      "learning_rate": 1.4868091782190357e-05,
      "loss": 2.4529,
      "step": 26302
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.980776846408844,
      "learning_rate": 1.4867732122569285e-05,
      "loss": 2.3488,
      "step": 26303
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0604246854782104,
      "learning_rate": 1.486737245469609e-05,
      "loss": 2.3005,
      "step": 26304
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0359892845153809,
      "learning_rate": 1.486701277857137e-05,
      "loss": 2.3612,
      "step": 26305
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0251896381378174,
      "learning_rate": 1.4866653094195745e-05,
      "loss": 2.5898,
      "step": 26306
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0274852514266968,
      "learning_rate": 1.486629340156982e-05,
      "loss": 2.28,
      "step": 26307
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0283652544021606,
      "learning_rate": 1.4865933700694205e-05,
      "loss": 2.4646,
      "step": 26308
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.029836893081665,
      "learning_rate": 1.4865573991569516e-05,
      "loss": 2.5269,
      "step": 26309
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.187616229057312,
      "learning_rate": 1.4865214274196352e-05,
      "loss": 2.3059,
      "step": 26310
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0369856357574463,
      "learning_rate": 1.4864854548575332e-05,
      "loss": 2.651,
      "step": 26311
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.00856351852417,
      "learning_rate": 1.486449481470706e-05,
      "loss": 2.5119,
      "step": 26312
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0101531744003296,
      "learning_rate": 1.486413507259215e-05,
      "loss": 2.5929,
      "step": 26313
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.2829762697219849,
      "learning_rate": 1.486377532223121e-05,
      "loss": 2.4748,
      "step": 26314
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0264146327972412,
      "learning_rate": 1.4863415563624846e-05,
      "loss": 2.5804,
      "step": 26315
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9966632127761841,
      "learning_rate": 1.4863055796773673e-05,
      "loss": 2.3853,
      "step": 26316
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1611971855163574,
      "learning_rate": 1.4862696021678304e-05,
      "loss": 2.145,
      "step": 26317
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0812523365020752,
      "learning_rate": 1.486233623833934e-05,
      "loss": 2.5028,
      "step": 26318
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.2602704763412476,
      "learning_rate": 1.4861976446757398e-05,
      "loss": 2.4665,
      "step": 26319
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0626531839370728,
      "learning_rate": 1.4861616646933083e-05,
      "loss": 2.6127,
      "step": 26320
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.046836495399475,
      "learning_rate": 1.486125683886701e-05,
      "loss": 2.4319,
      "step": 26321
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0316332578659058,
      "learning_rate": 1.4860897022559786e-05,
      "loss": 2.3153,
      "step": 26322
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9794617295265198,
      "learning_rate": 1.486053719801202e-05,
      "loss": 2.477,
      "step": 26323
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.2059861421585083,
      "learning_rate": 1.4860177365224326e-05,
      "loss": 2.5384,
      "step": 26324
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.015596866607666,
      "learning_rate": 1.4859817524197308e-05,
      "loss": 2.5304,
      "step": 26325
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0391608476638794,
      "learning_rate": 1.4859457674931581e-05,
      "loss": 2.4323,
      "step": 26326
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.056455135345459,
      "learning_rate": 1.4859097817427755e-05,
      "loss": 2.4841,
      "step": 26327
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9940375089645386,
      "learning_rate": 1.4858737951686435e-05,
      "loss": 2.3044,
      "step": 26328
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0985596179962158,
      "learning_rate": 1.4858378077708235e-05,
      "loss": 2.2274,
      "step": 26329
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0058332681655884,
      "learning_rate": 1.4858018195493766e-05,
      "loss": 2.5484,
      "step": 26330
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0062013864517212,
      "learning_rate": 1.4857658305043639e-05,
      "loss": 2.3378,
      "step": 26331
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0492056608200073,
      "learning_rate": 1.4857298406358458e-05,
      "loss": 2.301,
      "step": 26332
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0677728652954102,
      "learning_rate": 1.4856938499438838e-05,
      "loss": 2.4984,
      "step": 26333
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9118176698684692,
      "learning_rate": 1.4856578584285388e-05,
      "loss": 2.3971,
      "step": 26334
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9706757068634033,
      "learning_rate": 1.4856218660898719e-05,
      "loss": 2.3404,
      "step": 26335
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0358978509902954,
      "learning_rate": 1.485585872927944e-05,
      "loss": 2.4848,
      "step": 26336
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1618382930755615,
      "learning_rate": 1.4855498789428163e-05,
      "loss": 2.4293,
      "step": 26337
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.3138370513916016,
      "learning_rate": 1.4855138841345495e-05,
      "loss": 2.5039,
      "step": 26338
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1461341381072998,
      "learning_rate": 1.4854778885032048e-05,
      "loss": 2.3746,
      "step": 26339
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1320749521255493,
      "learning_rate": 1.4854418920488434e-05,
      "loss": 2.2749,
      "step": 26340
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1948778629302979,
      "learning_rate": 1.485405894771526e-05,
      "loss": 2.1758,
      "step": 26341
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1031867265701294,
      "learning_rate": 1.4853698966713137e-05,
      "loss": 2.2703,
      "step": 26342
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0622588396072388,
      "learning_rate": 1.4853338977482677e-05,
      "loss": 2.4485,
      "step": 26343
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0830212831497192,
      "learning_rate": 1.485297898002449e-05,
      "loss": 2.3247,
      "step": 26344
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1325061321258545,
      "learning_rate": 1.4852618974339185e-05,
      "loss": 2.3685,
      "step": 26345
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1795594692230225,
      "learning_rate": 1.4852258960427372e-05,
      "loss": 2.4979,
      "step": 26346
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.2775120735168457,
      "learning_rate": 1.4851898938289663e-05,
      "loss": 2.4413,
      "step": 26347
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1266413927078247,
      "learning_rate": 1.4851538907926668e-05,
      "loss": 2.4835,
      "step": 26348
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0795637369155884,
      "learning_rate": 1.4851178869338998e-05,
      "loss": 2.4639,
      "step": 26349
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0500351190567017,
      "learning_rate": 1.4850818822527262e-05,
      "loss": 2.5773,
      "step": 26350
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1575490236282349,
      "learning_rate": 1.4850458767492069e-05,
      "loss": 2.5538,
      "step": 26351
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.007929801940918,
      "learning_rate": 1.4850098704234033e-05,
      "loss": 2.5466,
      "step": 26352
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.050817608833313,
      "learning_rate": 1.4849738632753761e-05,
      "loss": 2.5738,
      "step": 26353
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1435199975967407,
      "learning_rate": 1.4849378553051862e-05,
      "loss": 2.3858,
      "step": 26354
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9988429546356201,
      "learning_rate": 1.4849018465128953e-05,
      "loss": 2.4419,
      "step": 26355
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9478567242622375,
      "learning_rate": 1.484865836898564e-05,
      "loss": 2.4549,
      "step": 26356
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0284318923950195,
      "learning_rate": 1.4848298264622533e-05,
      "loss": 2.4942,
      "step": 26357
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.155501365661621,
      "learning_rate": 1.4847938152040247e-05,
      "loss": 2.5078,
      "step": 26358
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.051378607749939,
      "learning_rate": 1.4847578031239386e-05,
      "loss": 2.4133,
      "step": 26359
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0699472427368164,
      "learning_rate": 1.4847217902220563e-05,
      "loss": 2.4752,
      "step": 26360
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0748752355575562,
      "learning_rate": 1.484685776498439e-05,
      "loss": 2.471,
      "step": 26361
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.015236496925354,
      "learning_rate": 1.484649761953148e-05,
      "loss": 2.3292,
      "step": 26362
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0780967473983765,
      "learning_rate": 1.4846137465862435e-05,
      "loss": 2.4173,
      "step": 26363
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.2137819528579712,
      "learning_rate": 1.4845777303977875e-05,
      "loss": 2.3294,
      "step": 26364
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1271084547042847,
      "learning_rate": 1.4845417133878404e-05,
      "loss": 2.3876,
      "step": 26365
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9947320222854614,
      "learning_rate": 1.4845056955564637e-05,
      "loss": 2.2681,
      "step": 26366
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.040670394897461,
      "learning_rate": 1.4844696769037179e-05,
      "loss": 2.4743,
      "step": 26367
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.2680583000183105,
      "learning_rate": 1.4844336574296647e-05,
      "loss": 2.3481,
      "step": 26368
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0579050779342651,
      "learning_rate": 1.4843976371343647e-05,
      "loss": 2.3683,
      "step": 26369
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0449140071868896,
      "learning_rate": 1.4843616160178796e-05,
      "loss": 2.1904,
      "step": 26370
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0144377946853638,
      "learning_rate": 1.4843255940802694e-05,
      "loss": 2.4817,
      "step": 26371
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1868681907653809,
      "learning_rate": 1.4842895713215961e-05,
      "loss": 2.4198,
      "step": 26372
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0668827295303345,
      "learning_rate": 1.4842535477419204e-05,
      "loss": 2.5153,
      "step": 26373
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1225827932357788,
      "learning_rate": 1.4842175233413034e-05,
      "loss": 2.2761,
      "step": 26374
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.2154428958892822,
      "learning_rate": 1.484181498119806e-05,
      "loss": 2.3869,
      "step": 26375
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.2388842105865479,
      "learning_rate": 1.4841454720774896e-05,
      "loss": 2.35,
      "step": 26376
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.002982258796692,
      "learning_rate": 1.4841094452144154e-05,
      "loss": 2.2399,
      "step": 26377
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9968406558036804,
      "learning_rate": 1.484073417530644e-05,
      "loss": 2.3071,
      "step": 26378
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1153393983840942,
      "learning_rate": 1.4840373890262367e-05,
      "loss": 2.4854,
      "step": 26379
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9819048047065735,
      "learning_rate": 1.4840013597012547e-05,
      "loss": 2.3113,
      "step": 26380
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9809114933013916,
      "learning_rate": 1.4839653295557587e-05,
      "loss": 2.2155,
      "step": 26381
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0224671363830566,
      "learning_rate": 1.4839292985898102e-05,
      "loss": 2.3083,
      "step": 26382
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0145951509475708,
      "learning_rate": 1.48389326680347e-05,
      "loss": 2.4407,
      "step": 26383
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9308640360832214,
      "learning_rate": 1.4838572341967994e-05,
      "loss": 2.3263,
      "step": 26384
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0464531183242798,
      "learning_rate": 1.4838212007698592e-05,
      "loss": 2.3565,
      "step": 26385
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.142720103263855,
      "learning_rate": 1.4837851665227108e-05,
      "loss": 2.3784,
      "step": 26386
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.047501802444458,
      "learning_rate": 1.483749131455415e-05,
      "loss": 2.4935,
      "step": 26387
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9763609766960144,
      "learning_rate": 1.483713095568033e-05,
      "loss": 2.4461,
      "step": 26388
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0483378171920776,
      "learning_rate": 1.483677058860626e-05,
      "loss": 2.242,
      "step": 26389
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.2023301124572754,
      "learning_rate": 1.4836410213332552e-05,
      "loss": 2.645,
      "step": 26390
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1412450075149536,
      "learning_rate": 1.4836049829859814e-05,
      "loss": 2.2068,
      "step": 26391
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0501551628112793,
      "learning_rate": 1.4835689438188659e-05,
      "loss": 2.5197,
      "step": 26392
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.433305025100708,
      "learning_rate": 1.4835329038319695e-05,
      "loss": 2.1335,
      "step": 26393
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9939533472061157,
      "learning_rate": 1.4834968630253537e-05,
      "loss": 2.5634,
      "step": 26394
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.994694709777832,
      "learning_rate": 1.483460821399079e-05,
      "loss": 2.2854,
      "step": 26395
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.2018024921417236,
      "learning_rate": 1.4834247789532073e-05,
      "loss": 2.5134,
      "step": 26396
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.019869327545166,
      "learning_rate": 1.4833887356877994e-05,
      "loss": 2.3232,
      "step": 26397
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0665620565414429,
      "learning_rate": 1.483352691602916e-05,
      "loss": 2.2582,
      "step": 26398
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1314204931259155,
      "learning_rate": 1.4833166466986187e-05,
      "loss": 1.9757,
      "step": 26399
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0380260944366455,
      "learning_rate": 1.4832806009749682e-05,
      "loss": 2.254,
      "step": 26400
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0860671997070312,
      "learning_rate": 1.483244554432026e-05,
      "loss": 2.3844,
      "step": 26401
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1852588653564453,
      "learning_rate": 1.483208507069853e-05,
      "loss": 2.3874,
      "step": 26402
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9582232236862183,
      "learning_rate": 1.4831724588885106e-05,
      "loss": 2.4724,
      "step": 26403
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0057928562164307,
      "learning_rate": 1.4831364098880592e-05,
      "loss": 2.5656,
      "step": 26404
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.026315689086914,
      "learning_rate": 1.4831003600685606e-05,
      "loss": 2.4296,
      "step": 26405
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0227751731872559,
      "learning_rate": 1.4830643094300757e-05,
      "loss": 2.4765,
      "step": 26406
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0245782136917114,
      "learning_rate": 1.4830282579726654e-05,
      "loss": 2.3638,
      "step": 26407
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.4000341892242432,
      "learning_rate": 1.4829922056963913e-05,
      "loss": 2.4596,
      "step": 26408
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1122967004776,
      "learning_rate": 1.4829561526013142e-05,
      "loss": 2.467,
      "step": 26409
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.4946120977401733,
      "learning_rate": 1.482920098687495e-05,
      "loss": 2.2521,
      "step": 26410
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.507543683052063,
      "learning_rate": 1.4828840439549952e-05,
      "loss": 2.5492,
      "step": 26411
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.084946870803833,
      "learning_rate": 1.4828479884038758e-05,
      "loss": 2.5796,
      "step": 26412
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9578818082809448,
      "learning_rate": 1.482811932034198e-05,
      "loss": 2.2804,
      "step": 26413
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1191201210021973,
      "learning_rate": 1.4827758748460227e-05,
      "loss": 2.5087,
      "step": 26414
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1429245471954346,
      "learning_rate": 1.4827398168394113e-05,
      "loss": 2.3867,
      "step": 26415
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.965537428855896,
      "learning_rate": 1.4827037580144248e-05,
      "loss": 2.1787,
      "step": 26416
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1149777173995972,
      "learning_rate": 1.4826676983711243e-05,
      "loss": 2.5113,
      "step": 26417
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9352356195449829,
      "learning_rate": 1.4826316379095709e-05,
      "loss": 2.5002,
      "step": 26418
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.006880521774292,
      "learning_rate": 1.4825955766298256e-05,
      "loss": 2.3293,
      "step": 26419
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1775585412979126,
      "learning_rate": 1.4825595145319502e-05,
      "loss": 2.2165,
      "step": 26420
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1463457345962524,
      "learning_rate": 1.482523451616005e-05,
      "loss": 2.5666,
      "step": 26421
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0981334447860718,
      "learning_rate": 1.4824873878820515e-05,
      "loss": 2.377,
      "step": 26422
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0598292350769043,
      "learning_rate": 1.4824513233301513e-05,
      "loss": 2.348,
      "step": 26423
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0222067832946777,
      "learning_rate": 1.4824152579603646e-05,
      "loss": 2.2785,
      "step": 26424
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.169947862625122,
      "learning_rate": 1.482379191772753e-05,
      "loss": 2.3141,
      "step": 26425
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0685745477676392,
      "learning_rate": 1.4823431247673778e-05,
      "loss": 2.4264,
      "step": 26426
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0071288347244263,
      "learning_rate": 1.4823070569443001e-05,
      "loss": 2.3088,
      "step": 26427
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9804954528808594,
      "learning_rate": 1.4822709883035806e-05,
      "loss": 2.2692,
      "step": 26428
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0846524238586426,
      "learning_rate": 1.482234918845281e-05,
      "loss": 2.56,
      "step": 26429
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0078874826431274,
      "learning_rate": 1.4821988485694626e-05,
      "loss": 2.3585,
      "step": 26430
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.082700490951538,
      "learning_rate": 1.4821627774761858e-05,
      "loss": 2.2829,
      "step": 26431
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1144061088562012,
      "learning_rate": 1.4821267055655123e-05,
      "loss": 2.4533,
      "step": 26432
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0282530784606934,
      "learning_rate": 1.4820906328375028e-05,
      "loss": 2.4079,
      "step": 26433
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9952662587165833,
      "learning_rate": 1.482054559292219e-05,
      "loss": 2.3489,
      "step": 26434
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1364319324493408,
      "learning_rate": 1.4820184849297218e-05,
      "loss": 2.3263,
      "step": 26435
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0386602878570557,
      "learning_rate": 1.4819824097500723e-05,
      "loss": 2.0704,
      "step": 26436
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.090284824371338,
      "learning_rate": 1.4819463337533317e-05,
      "loss": 2.4457,
      "step": 26437
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0417732000350952,
      "learning_rate": 1.4819102569395612e-05,
      "loss": 2.542,
      "step": 26438
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5849131345748901,
      "learning_rate": 1.481874179308822e-05,
      "loss": 2.4959,
      "step": 26439
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1188687086105347,
      "learning_rate": 1.481838100861175e-05,
      "loss": 2.3913,
      "step": 26440
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0628591775894165,
      "learning_rate": 1.4818020215966816e-05,
      "loss": 2.5698,
      "step": 26441
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1542106866836548,
      "learning_rate": 1.4817659415154029e-05,
      "loss": 2.1893,
      "step": 26442
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9570728540420532,
      "learning_rate": 1.4817298606174005e-05,
      "loss": 2.3537,
      "step": 26443
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.005286693572998,
      "learning_rate": 1.4816937789027347e-05,
      "loss": 2.5355,
      "step": 26444
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1906156539916992,
      "learning_rate": 1.481657696371467e-05,
      "loss": 2.342,
      "step": 26445
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1384035348892212,
      "learning_rate": 1.4816216130236589e-05,
      "loss": 2.4437,
      "step": 26446
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.4148919582366943,
      "learning_rate": 1.4815855288593715e-05,
      "loss": 2.3987,
      "step": 26447
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.287196397781372,
      "learning_rate": 1.4815494438786656e-05,
      "loss": 2.2691,
      "step": 26448
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0452851057052612,
      "learning_rate": 1.4815133580816028e-05,
      "loss": 2.4468,
      "step": 26449
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0291547775268555,
      "learning_rate": 1.4814772714682439e-05,
      "loss": 2.7128,
      "step": 26450
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.3920708894729614,
      "learning_rate": 1.4814411840386505e-05,
      "loss": 2.1371,
      "step": 26451
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.2762202024459839,
      "learning_rate": 1.4814050957928831e-05,
      "loss": 2.4009,
      "step": 26452
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.225661039352417,
      "learning_rate": 1.4813690067310037e-05,
      "loss": 2.3854,
      "step": 26453
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.120361328125,
      "learning_rate": 1.4813329168530727e-05,
      "loss": 2.3656,
      "step": 26454
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.7466877698898315,
      "learning_rate": 1.4812968261591522e-05,
      "loss": 2.4647,
      "step": 26455
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0501750707626343,
      "learning_rate": 1.4812607346493027e-05,
      "loss": 2.3379,
      "step": 26456
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9862456321716309,
      "learning_rate": 1.4812246423235854e-05,
      "loss": 2.389,
      "step": 26457
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.166602373123169,
      "learning_rate": 1.4811885491820616e-05,
      "loss": 2.1986,
      "step": 26458
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0135226249694824,
      "learning_rate": 1.4811524552247927e-05,
      "loss": 2.5812,
      "step": 26459
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.036731243133545,
      "learning_rate": 1.4811163604518397e-05,
      "loss": 2.4734,
      "step": 26460
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.077173113822937,
      "learning_rate": 1.4810802648632636e-05,
      "loss": 2.3063,
      "step": 26461
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0151976346969604,
      "learning_rate": 1.4810441684591261e-05,
      "loss": 2.5238,
      "step": 26462
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1160695552825928,
      "learning_rate": 1.481008071239488e-05,
      "loss": 2.2579,
      "step": 26463
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0250624418258667,
      "learning_rate": 1.4809719732044103e-05,
      "loss": 2.2987,
      "step": 26464
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0713105201721191,
      "learning_rate": 1.4809358743539546e-05,
      "loss": 2.5614,
      "step": 26465
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0794503688812256,
      "learning_rate": 1.4808997746881822e-05,
      "loss": 2.3389,
      "step": 26466
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0546245574951172,
      "learning_rate": 1.4808636742071537e-05,
      "loss": 2.4327,
      "step": 26467
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1319187879562378,
      "learning_rate": 1.480827572910931e-05,
      "loss": 2.4561,
      "step": 26468
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.001097559928894,
      "learning_rate": 1.4807914707995748e-05,
      "loss": 2.4568,
      "step": 26469
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1639091968536377,
      "learning_rate": 1.4807553678731468e-05,
      "loss": 2.4422,
      "step": 26470
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.148741602897644,
      "learning_rate": 1.4807192641317073e-05,
      "loss": 2.128,
      "step": 26471
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.2754367589950562,
      "learning_rate": 1.4806831595753187e-05,
      "loss": 2.305,
      "step": 26472
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.2256453037261963,
      "learning_rate": 1.4806470542040412e-05,
      "loss": 2.154,
      "step": 26473
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.201879620552063,
      "learning_rate": 1.4806109480179363e-05,
      "loss": 2.2877,
      "step": 26474
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0667297840118408,
      "learning_rate": 1.4805748410170656e-05,
      "loss": 2.3574,
      "step": 26475
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0140482187271118,
      "learning_rate": 1.48053873320149e-05,
      "loss": 2.2581,
      "step": 26476
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.078444480895996,
      "learning_rate": 1.4805026245712705e-05,
      "loss": 2.3835,
      "step": 26477
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0051124095916748,
      "learning_rate": 1.4804665151264687e-05,
      "loss": 2.2331,
      "step": 26478
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1407486200332642,
      "learning_rate": 1.4804304048671456e-05,
      "loss": 2.7647,
      "step": 26479
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0065577030181885,
      "learning_rate": 1.4803942937933627e-05,
      "loss": 2.4482,
      "step": 26480
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0944952964782715,
      "learning_rate": 1.4803581819051806e-05,
      "loss": 2.3731,
      "step": 26481
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1417384147644043,
      "learning_rate": 1.4803220692026615e-05,
      "loss": 2.4336,
      "step": 26482
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1405056715011597,
      "learning_rate": 1.4802859556858655e-05,
      "loss": 2.3478,
      "step": 26483
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1182972192764282,
      "learning_rate": 1.4802498413548545e-05,
      "loss": 2.3166,
      "step": 26484
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9291942715644836,
      "learning_rate": 1.4802137262096898e-05,
      "loss": 2.3366,
      "step": 26485
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0726879835128784,
      "learning_rate": 1.4801776102504321e-05,
      "loss": 2.465,
      "step": 26486
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1256732940673828,
      "learning_rate": 1.4801414934771429e-05,
      "loss": 2.5025,
      "step": 26487
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0079761743545532,
      "learning_rate": 1.4801053758898838e-05,
      "loss": 2.2415,
      "step": 26488
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1104744672775269,
      "learning_rate": 1.4800692574887157e-05,
      "loss": 2.4012,
      "step": 26489
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.080482006072998,
      "learning_rate": 1.4800331382736997e-05,
      "loss": 2.2964,
      "step": 26490
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1042741537094116,
      "learning_rate": 1.4799970182448971e-05,
      "loss": 2.3622,
      "step": 26491
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.057769775390625,
      "learning_rate": 1.4799608974023691e-05,
      "loss": 2.4871,
      "step": 26492
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.037104606628418,
      "learning_rate": 1.4799247757461772e-05,
      "loss": 2.3808,
      "step": 26493
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0305269956588745,
      "learning_rate": 1.4798886532763824e-05,
      "loss": 2.238,
      "step": 26494
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1781587600708008,
      "learning_rate": 1.4798525299930463e-05,
      "loss": 2.4741,
      "step": 26495
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.146759271621704,
      "learning_rate": 1.4798164058962292e-05,
      "loss": 2.3307,
      "step": 26496
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0965633392333984,
      "learning_rate": 1.4797802809859934e-05,
      "loss": 2.3759,
      "step": 26497
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0873312950134277,
      "learning_rate": 1.4797441552623997e-05,
      "loss": 2.4986,
      "step": 26498
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1219263076782227,
      "learning_rate": 1.4797080287255093e-05,
      "loss": 2.2723,
      "step": 26499
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.9702228307723999,
      "learning_rate": 1.4796719013753835e-05,
      "loss": 2.6073,
      "step": 26500
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.9446494579315186,
      "learning_rate": 1.4796357732120834e-05,
      "loss": 2.4399,
      "step": 26501
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0791213512420654,
      "learning_rate": 1.4795996442356707e-05,
      "loss": 2.3972,
      "step": 26502
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.033760905265808,
      "learning_rate": 1.4795635144462062e-05,
      "loss": 2.4683,
      "step": 26503
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.002577543258667,
      "learning_rate": 1.4795273838437511e-05,
      "loss": 2.6768,
      "step": 26504
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1045869588851929,
      "learning_rate": 1.4794912524283671e-05,
      "loss": 2.6085,
      "step": 26505
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0609153509140015,
      "learning_rate": 1.479455120200115e-05,
      "loss": 2.3291,
      "step": 26506
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0266950130462646,
      "learning_rate": 1.4794189871590563e-05,
      "loss": 2.373,
      "step": 26507
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0529574155807495,
      "learning_rate": 1.4793828533052523e-05,
      "loss": 2.3626,
      "step": 26508
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.9880542159080505,
      "learning_rate": 1.4793467186387645e-05,
      "loss": 2.3084,
      "step": 26509
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.2554324865341187,
      "learning_rate": 1.4793105831596532e-05,
      "loss": 2.3201,
      "step": 26510
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.2480378150939941,
      "learning_rate": 1.4792744468679806e-05,
      "loss": 2.4007,
      "step": 26511
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.237614631652832,
      "learning_rate": 1.4792383097638077e-05,
      "loss": 2.3103,
      "step": 26512
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.9728965163230896,
      "learning_rate": 1.4792021718471954e-05,
      "loss": 2.3484,
      "step": 26513
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0345124006271362,
      "learning_rate": 1.4791660331182054e-05,
      "loss": 2.339,
      "step": 26514
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.053053379058838,
      "learning_rate": 1.4791298935768993e-05,
      "loss": 2.4507,
      "step": 26515
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0541573762893677,
      "learning_rate": 1.4790937532233373e-05,
      "loss": 2.3335,
      "step": 26516
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0668903589248657,
      "learning_rate": 1.4790576120575814e-05,
      "loss": 2.3136,
      "step": 26517
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0069607496261597,
      "learning_rate": 1.479021470079693e-05,
      "loss": 2.3236,
      "step": 26518
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.2176852226257324,
      "learning_rate": 1.4789853272897328e-05,
      "loss": 2.5408,
      "step": 26519
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.9861999750137329,
      "learning_rate": 1.4789491836877625e-05,
      "loss": 2.1222,
      "step": 26520
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0079132318496704,
      "learning_rate": 1.4789130392738431e-05,
      "loss": 2.3298,
      "step": 26521
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1509782075881958,
      "learning_rate": 1.4788768940480362e-05,
      "loss": 2.4559,
      "step": 26522
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0884959697723389,
      "learning_rate": 1.478840748010403e-05,
      "loss": 2.4719,
      "step": 26523
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.8776741623878479,
      "learning_rate": 1.4788046011610044e-05,
      "loss": 2.3482,
      "step": 26524
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0129796266555786,
      "learning_rate": 1.4787684534999024e-05,
      "loss": 2.3319,
      "step": 26525
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.2674819231033325,
      "learning_rate": 1.4787323050271576e-05,
      "loss": 2.4931,
      "step": 26526
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.9956185817718506,
      "learning_rate": 1.4786961557428314e-05,
      "loss": 2.2719,
      "step": 26527
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1576080322265625,
      "learning_rate": 1.4786600056469854e-05,
      "loss": 2.4529,
      "step": 26528
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.295257806777954,
      "learning_rate": 1.4786238547396807e-05,
      "loss": 2.5162,
      "step": 26529
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.137297511100769,
      "learning_rate": 1.4785877030209783e-05,
      "loss": 2.5532,
      "step": 26530
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0230293273925781,
      "learning_rate": 1.47855155049094e-05,
      "loss": 2.4529,
      "step": 26531
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0299320220947266,
      "learning_rate": 1.4785153971496267e-05,
      "loss": 2.3839,
      "step": 26532
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1728707551956177,
      "learning_rate": 1.4784792429970998e-05,
      "loss": 2.4808,
      "step": 26533
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1045055389404297,
      "learning_rate": 1.4784430880334206e-05,
      "loss": 2.2399,
      "step": 26534
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.020814299583435,
      "learning_rate": 1.4784069322586508e-05,
      "loss": 2.2429,
      "step": 26535
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1137150526046753,
      "learning_rate": 1.478370775672851e-05,
      "loss": 2.3855,
      "step": 26536
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0726388692855835,
      "learning_rate": 1.478334618276083e-05,
      "loss": 2.3967,
      "step": 26537
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0511634349822998,
      "learning_rate": 1.4782984600684075e-05,
      "loss": 2.5036,
      "step": 26538
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0602960586547852,
      "learning_rate": 1.4782623010498864e-05,
      "loss": 2.4973,
      "step": 26539
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0021412372589111,
      "learning_rate": 1.478226141220581e-05,
      "loss": 2.2601,
      "step": 26540
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.095969319343567,
      "learning_rate": 1.4781899805805525e-05,
      "loss": 2.6105,
      "step": 26541
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0049670934677124,
      "learning_rate": 1.4781538191298618e-05,
      "loss": 2.341,
      "step": 26542
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.110459804534912,
      "learning_rate": 1.4781176568685706e-05,
      "loss": 2.503,
      "step": 26543
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1849972009658813,
      "learning_rate": 1.47808149379674e-05,
      "loss": 2.2133,
      "step": 26544
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.9954401850700378,
      "learning_rate": 1.4780453299144317e-05,
      "loss": 2.3842,
      "step": 26545
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.9765964150428772,
      "learning_rate": 1.4780091652217065e-05,
      "loss": 2.3599,
      "step": 26546
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0501153469085693,
      "learning_rate": 1.477972999718626e-05,
      "loss": 2.5132,
      "step": 26547
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0389095544815063,
      "learning_rate": 1.4779368334052517e-05,
      "loss": 2.4314,
      "step": 26548
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0148848295211792,
      "learning_rate": 1.4779006662816444e-05,
      "loss": 2.4258,
      "step": 26549
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0190423727035522,
      "learning_rate": 1.4778644983478655e-05,
      "loss": 2.4813,
      "step": 26550
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.9932206869125366,
      "learning_rate": 1.4778283296039767e-05,
      "loss": 2.1967,
      "step": 26551
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0905263423919678,
      "learning_rate": 1.4777921600500393e-05,
      "loss": 2.5073,
      "step": 26552
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.2496203184127808,
      "learning_rate": 1.4777559896861143e-05,
      "loss": 2.3774,
      "step": 26553
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.9823121428489685,
      "learning_rate": 1.477719818512263e-05,
      "loss": 2.3551,
      "step": 26554
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1688439846038818,
      "learning_rate": 1.4776836465285471e-05,
      "loss": 2.3341,
      "step": 26555
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1150537729263306,
      "learning_rate": 1.4776474737350275e-05,
      "loss": 2.2151,
      "step": 26556
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.020251750946045,
      "learning_rate": 1.4776113001317658e-05,
      "loss": 2.3095,
      "step": 26557
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.99367755651474,
      "learning_rate": 1.4775751257188232e-05,
      "loss": 2.4408,
      "step": 26558
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4068642854690552,
      "learning_rate": 1.4775389504962609e-05,
      "loss": 2.5127,
      "step": 26559
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.099892020225525,
      "learning_rate": 1.4775027744641403e-05,
      "loss": 2.5006,
      "step": 26560
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.103843331336975,
      "learning_rate": 1.4774665976225233e-05,
      "loss": 2.6177,
      "step": 26561
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0841823816299438,
      "learning_rate": 1.4774304199714703e-05,
      "loss": 2.3401,
      "step": 26562
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1176331043243408,
      "learning_rate": 1.4773942415110431e-05,
      "loss": 2.3002,
      "step": 26563
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.066167950630188,
      "learning_rate": 1.4773580622413032e-05,
      "loss": 2.2184,
      "step": 26564
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.998052179813385,
      "learning_rate": 1.4773218821623116e-05,
      "loss": 2.3834,
      "step": 26565
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0749512910842896,
      "learning_rate": 1.4772857012741298e-05,
      "loss": 2.3254,
      "step": 26566
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1427932977676392,
      "learning_rate": 1.477249519576819e-05,
      "loss": 2.6283,
      "step": 26567
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1468052864074707,
      "learning_rate": 1.4772133370704409e-05,
      "loss": 2.2432,
      "step": 26568
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.9961124658584595,
      "learning_rate": 1.4771771537550565e-05,
      "loss": 2.2757,
      "step": 26569
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.9618116617202759,
      "learning_rate": 1.4771409696307273e-05,
      "loss": 2.6147,
      "step": 26570
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.2098782062530518,
      "learning_rate": 1.4771047846975144e-05,
      "loss": 2.1522,
      "step": 26571
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1265944242477417,
      "learning_rate": 1.4770685989554792e-05,
      "loss": 2.3844,
      "step": 26572
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1794726848602295,
      "learning_rate": 1.4770324124046834e-05,
      "loss": 2.2512,
      "step": 26573
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.9643994569778442,
      "learning_rate": 1.476996225045188e-05,
      "loss": 2.5235,
      "step": 26574
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.9691073894500732,
      "learning_rate": 1.4769600368770546e-05,
      "loss": 2.4517,
      "step": 26575
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0815694332122803,
      "learning_rate": 1.476923847900344e-05,
      "loss": 2.5623,
      "step": 26576
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0421127080917358,
      "learning_rate": 1.4768876581151182e-05,
      "loss": 2.2498,
      "step": 26577
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0830458402633667,
      "learning_rate": 1.4768514675214383e-05,
      "loss": 2.4232,
      "step": 26578
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0693860054016113,
      "learning_rate": 1.4768152761193657e-05,
      "loss": 2.2817,
      "step": 26579
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0803226232528687,
      "learning_rate": 1.4767790839089615e-05,
      "loss": 2.4295,
      "step": 26580
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0781556367874146,
      "learning_rate": 1.4767428908902877e-05,
      "loss": 2.3642,
      "step": 26581
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.9869386553764343,
      "learning_rate": 1.476706697063405e-05,
      "loss": 2.3609,
      "step": 26582
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.120316505432129,
      "learning_rate": 1.4766705024283748e-05,
      "loss": 2.1172,
      "step": 26583
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4013516902923584,
      "learning_rate": 1.4766343069852587e-05,
      "loss": 2.4352,
      "step": 26584
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0700393915176392,
      "learning_rate": 1.476598110734118e-05,
      "loss": 2.4644,
      "step": 26585
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0226867198944092,
      "learning_rate": 1.4765619136750142e-05,
      "loss": 2.5005,
      "step": 26586
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0686949491500854,
      "learning_rate": 1.4765257158080083e-05,
      "loss": 2.4303,
      "step": 26587
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0922642946243286,
      "learning_rate": 1.4764895171331624e-05,
      "loss": 2.1509,
      "step": 26588
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.9746964573860168,
      "learning_rate": 1.4764533176505369e-05,
      "loss": 2.4469,
      "step": 26589
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1532788276672363,
      "learning_rate": 1.4764171173601936e-05,
      "loss": 2.1399,
      "step": 26590
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0312409400939941,
      "learning_rate": 1.476380916262194e-05,
      "loss": 2.3844,
      "step": 26591
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0979002714157104,
      "learning_rate": 1.4763447143565993e-05,
      "loss": 2.4268,
      "step": 26592
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0149025917053223,
      "learning_rate": 1.476308511643471e-05,
      "loss": 2.5853,
      "step": 26593
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0963901281356812,
      "learning_rate": 1.4762723081228704e-05,
      "loss": 2.4349,
      "step": 26594
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0531156063079834,
      "learning_rate": 1.476236103794859e-05,
      "loss": 2.4334,
      "step": 26595
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0592678785324097,
      "learning_rate": 1.476199898659498e-05,
      "loss": 2.2735,
      "step": 26596
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.2340168952941895,
      "learning_rate": 1.4761636927168486e-05,
      "loss": 2.2456,
      "step": 26597
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0859479904174805,
      "learning_rate": 1.4761274859669726e-05,
      "loss": 2.4445,
      "step": 26598
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.048554539680481,
      "learning_rate": 1.476091278409931e-05,
      "loss": 2.2512,
      "step": 26599
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1623095273971558,
      "learning_rate": 1.4760550700457857e-05,
      "loss": 2.3423,
      "step": 26600
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1510288715362549,
      "learning_rate": 1.4760188608745978e-05,
      "loss": 2.2986,
      "step": 26601
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1157996654510498,
      "learning_rate": 1.4759826508964283e-05,
      "loss": 2.4849,
      "step": 26602
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.6454901695251465,
      "learning_rate": 1.475946440111339e-05,
      "loss": 2.356,
      "step": 26603
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0753130912780762,
      "learning_rate": 1.4759102285193914e-05,
      "loss": 2.3969,
      "step": 26604
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0340982675552368,
      "learning_rate": 1.4758740161206466e-05,
      "loss": 2.2794,
      "step": 26605
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.2283765077590942,
      "learning_rate": 1.4758378029151661e-05,
      "loss": 2.2688,
      "step": 26606
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0142600536346436,
      "learning_rate": 1.4758015889030116e-05,
      "loss": 2.4442,
      "step": 26607
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.023002028465271,
      "learning_rate": 1.4757653740842437e-05,
      "loss": 2.4854,
      "step": 26608
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0159810781478882,
      "learning_rate": 1.4757291584589245e-05,
      "loss": 2.2571,
      "step": 26609
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0279853343963623,
      "learning_rate": 1.4756929420271152e-05,
      "loss": 2.214,
      "step": 26610
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0270322561264038,
      "learning_rate": 1.4756567247888771e-05,
      "loss": 2.2573,
      "step": 26611
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.034956693649292,
      "learning_rate": 1.4756205067442716e-05,
      "loss": 2.3313,
      "step": 26612
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.9606703519821167,
      "learning_rate": 1.4755842878933604e-05,
      "loss": 2.4048,
      "step": 26613
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1233755350112915,
      "learning_rate": 1.4755480682362044e-05,
      "loss": 2.5573,
      "step": 26614
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4436314105987549,
      "learning_rate": 1.4755118477728654e-05,
      "loss": 2.3952,
      "step": 26615
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1167091131210327,
      "learning_rate": 1.4754756265034049e-05,
      "loss": 2.5007,
      "step": 26616
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.014162540435791,
      "learning_rate": 1.4754394044278836e-05,
      "loss": 2.3646,
      "step": 26617
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1193825006484985,
      "learning_rate": 1.4754031815463636e-05,
      "loss": 2.4358,
      "step": 26618
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0219953060150146,
      "learning_rate": 1.4753669578589063e-05,
      "loss": 2.3815,
      "step": 26619
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0472580194473267,
      "learning_rate": 1.4753307333655725e-05,
      "loss": 2.3088,
      "step": 26620
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.047315239906311,
      "learning_rate": 1.4752945080664246e-05,
      "loss": 2.4945,
      "step": 26621
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.045764446258545,
      "learning_rate": 1.4752582819615228e-05,
      "loss": 2.3753,
      "step": 26622
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0440572500228882,
      "learning_rate": 1.4752220550509293e-05,
      "loss": 2.3222,
      "step": 26623
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0452936887741089,
      "learning_rate": 1.4751858273347055e-05,
      "loss": 2.5142,
      "step": 26624
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1188600063323975,
      "learning_rate": 1.4751495988129123e-05,
      "loss": 2.3615,
      "step": 26625
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0555447340011597,
      "learning_rate": 1.475113369485612e-05,
      "loss": 2.6936,
      "step": 26626
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.9798672199249268,
      "learning_rate": 1.4750771393528651e-05,
      "loss": 2.3584,
      "step": 26627
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.2638243436813354,
      "learning_rate": 1.4750409084147336e-05,
      "loss": 2.4516,
      "step": 26628
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.9960411190986633,
      "learning_rate": 1.4750046766712787e-05,
      "loss": 2.4824,
      "step": 26629
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0220248699188232,
      "learning_rate": 1.474968444122562e-05,
      "loss": 2.3179,
      "step": 26630
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.2626326084136963,
      "learning_rate": 1.4749322107686446e-05,
      "loss": 2.2419,
      "step": 26631
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.143040657043457,
      "learning_rate": 1.4748959766095881e-05,
      "loss": 2.4213,
      "step": 26632
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.731630802154541,
      "learning_rate": 1.474859741645454e-05,
      "loss": 2.3384,
      "step": 26633
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0259244441986084,
      "learning_rate": 1.4748235058763039e-05,
      "loss": 2.5106,
      "step": 26634
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0898855924606323,
      "learning_rate": 1.4747872693021985e-05,
      "loss": 2.213,
      "step": 26635
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.9420880079269409,
      "learning_rate": 1.4747510319232001e-05,
      "loss": 2.2389,
      "step": 26636
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1089388132095337,
      "learning_rate": 1.4747147937393695e-05,
      "loss": 2.366,
      "step": 26637
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0910327434539795,
      "learning_rate": 1.4746785547507687e-05,
      "loss": 2.422,
      "step": 26638
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1557942628860474,
      "learning_rate": 1.4746423149574586e-05,
      "loss": 2.7625,
      "step": 26639
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.047130823135376,
      "learning_rate": 1.4746060743595012e-05,
      "loss": 2.5192,
      "step": 26640
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1549392938613892,
      "learning_rate": 1.4745698329569574e-05,
      "loss": 2.28,
      "step": 26641
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0676084756851196,
      "learning_rate": 1.4745335907498885e-05,
      "loss": 2.2264,
      "step": 26642
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0883420705795288,
      "learning_rate": 1.4744973477383566e-05,
      "loss": 2.2374,
      "step": 26643
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0549001693725586,
      "learning_rate": 1.474461103922423e-05,
      "loss": 2.5221,
      "step": 26644
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1503318548202515,
      "learning_rate": 1.4744248593021486e-05,
      "loss": 2.3826,
      "step": 26645
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0474965572357178,
      "learning_rate": 1.4743886138775955e-05,
      "loss": 2.3333,
      "step": 26646
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1094104051589966,
      "learning_rate": 1.4743523676488249e-05,
      "loss": 2.1711,
      "step": 26647
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.986330509185791,
      "learning_rate": 1.474316120615898e-05,
      "loss": 2.4059,
      "step": 26648
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0496422052383423,
      "learning_rate": 1.4742798727788765e-05,
      "loss": 2.4506,
      "step": 26649
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.3269093036651611,
      "learning_rate": 1.4742436241378218e-05,
      "loss": 2.4725,
      "step": 26650
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0503432750701904,
      "learning_rate": 1.4742073746927954e-05,
      "loss": 2.4036,
      "step": 26651
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0637511014938354,
      "learning_rate": 1.4741711244438586e-05,
      "loss": 2.5798,
      "step": 26652
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.9975067973136902,
      "learning_rate": 1.474134873391073e-05,
      "loss": 2.2871,
      "step": 26653
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.9988012909889221,
      "learning_rate": 1.4740986215345001e-05,
      "loss": 2.529,
      "step": 26654
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1412473917007446,
      "learning_rate": 1.474062368874201e-05,
      "loss": 2.2318,
      "step": 26655
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0519911050796509,
      "learning_rate": 1.4740261154102378e-05,
      "loss": 2.4407,
      "step": 26656
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.2091602087020874,
      "learning_rate": 1.4739898611426715e-05,
      "loss": 2.3729,
      "step": 26657
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0770915746688843,
      "learning_rate": 1.4739536060715635e-05,
      "loss": 2.6031,
      "step": 26658
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1108728647232056,
      "learning_rate": 1.4739173501969755e-05,
      "loss": 2.5157,
      "step": 26659
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.167518973350525,
      "learning_rate": 1.473881093518969e-05,
      "loss": 2.4253,
      "step": 26660
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1624337434768677,
      "learning_rate": 1.4738448360376052e-05,
      "loss": 2.2603,
      "step": 26661
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.3044511079788208,
      "learning_rate": 1.4738085777529457e-05,
      "loss": 2.4991,
      "step": 26662
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0219991207122803,
      "learning_rate": 1.473772318665052e-05,
      "loss": 2.3168,
      "step": 26663
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0390989780426025,
      "learning_rate": 1.4737360587739854e-05,
      "loss": 2.2745,
      "step": 26664
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.2058953046798706,
      "learning_rate": 1.4736997980798076e-05,
      "loss": 2.3165,
      "step": 26665
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.142701506614685,
      "learning_rate": 1.47366353658258e-05,
      "loss": 2.3243,
      "step": 26666
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.036817193031311,
      "learning_rate": 1.4736272742823642e-05,
      "loss": 2.4418,
      "step": 26667
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.060526967048645,
      "learning_rate": 1.4735910111792216e-05,
      "loss": 2.3415,
      "step": 26668
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0658955574035645,
      "learning_rate": 1.4735547472732132e-05,
      "loss": 2.4138,
      "step": 26669
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1016981601715088,
      "learning_rate": 1.4735184825644014e-05,
      "loss": 2.4576,
      "step": 26670
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0506383180618286,
      "learning_rate": 1.4734822170528466e-05,
      "loss": 2.3303,
      "step": 26671
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0291035175323486,
      "learning_rate": 1.4734459507386114e-05,
      "loss": 2.406,
      "step": 26672
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.974445641040802,
      "learning_rate": 1.4734096836217564e-05,
      "loss": 2.5749,
      "step": 26673
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.025882363319397,
      "learning_rate": 1.4733734157023436e-05,
      "loss": 2.528,
      "step": 26674
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0494825839996338,
      "learning_rate": 1.4733371469804343e-05,
      "loss": 2.5059,
      "step": 26675
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.041733741760254,
      "learning_rate": 1.4733008774560898e-05,
      "loss": 2.5949,
      "step": 26676
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.987377405166626,
      "learning_rate": 1.4732646071293719e-05,
      "loss": 2.4849,
      "step": 26677
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0215661525726318,
      "learning_rate": 1.4732283360003419e-05,
      "loss": 2.3352,
      "step": 26678
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0344433784484863,
      "learning_rate": 1.4731920640690612e-05,
      "loss": 2.274,
      "step": 26679
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.9302814602851868,
      "learning_rate": 1.4731557913355915e-05,
      "loss": 2.6332,
      "step": 26680
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0301849842071533,
      "learning_rate": 1.4731195177999944e-05,
      "loss": 2.5057,
      "step": 26681
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.003843903541565,
      "learning_rate": 1.4730832434623312e-05,
      "loss": 2.5257,
      "step": 26682
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.006131887435913,
      "learning_rate": 1.4730469683226632e-05,
      "loss": 2.4067,
      "step": 26683
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.2518913745880127,
      "learning_rate": 1.4730106923810524e-05,
      "loss": 2.5335,
      "step": 26684
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.041092038154602,
      "learning_rate": 1.4729744156375597e-05,
      "loss": 2.547,
      "step": 26685
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.9876626133918762,
      "learning_rate": 1.472938138092247e-05,
      "loss": 2.4085,
      "step": 26686
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1289358139038086,
      "learning_rate": 1.4729018597451759e-05,
      "loss": 2.3789,
      "step": 26687
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0322260856628418,
      "learning_rate": 1.4728655805964076e-05,
      "loss": 2.5844,
      "step": 26688
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0290188789367676,
      "learning_rate": 1.4728293006460037e-05,
      "loss": 2.4311,
      "step": 26689
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0812653303146362,
      "learning_rate": 1.4727930198940255e-05,
      "loss": 2.6904,
      "step": 26690
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0169345140457153,
      "learning_rate": 1.472756738340535e-05,
      "loss": 2.2721,
      "step": 26691
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.959493100643158,
      "learning_rate": 1.4727204559855931e-05,
      "loss": 2.5293,
      "step": 26692
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.9713940024375916,
      "learning_rate": 1.4726841728292623e-05,
      "loss": 2.5418,
      "step": 26693
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0463075637817383,
      "learning_rate": 1.472647888871603e-05,
      "loss": 2.5151,
      "step": 26694
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0121139287948608,
      "learning_rate": 1.472611604112677e-05,
      "loss": 2.4467,
      "step": 26695
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.340667963027954,
      "learning_rate": 1.4725753185525462e-05,
      "loss": 2.3462,
      "step": 26696
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0291944742202759,
      "learning_rate": 1.4725390321912717e-05,
      "loss": 2.4512,
      "step": 26697
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1235971450805664,
      "learning_rate": 1.4725027450289155e-05,
      "loss": 2.3583,
      "step": 26698
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0854780673980713,
      "learning_rate": 1.4724664570655386e-05,
      "loss": 2.5067,
      "step": 26699
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.046787142753601,
      "learning_rate": 1.472430168301203e-05,
      "loss": 2.6194,
      "step": 26700
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.5190602540969849,
      "learning_rate": 1.4723938787359697e-05,
      "loss": 2.2971,
      "step": 26701
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.060354232788086,
      "learning_rate": 1.4723575883699004e-05,
      "loss": 2.3967,
      "step": 26702
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0553332567214966,
      "learning_rate": 1.472321297203057e-05,
      "loss": 2.4314,
      "step": 26703
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0619349479675293,
      "learning_rate": 1.4722850052355004e-05,
      "loss": 2.3776,
      "step": 26704
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1506410837173462,
      "learning_rate": 1.4722487124672929e-05,
      "loss": 2.2743,
      "step": 26705
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.5877281427383423,
      "learning_rate": 1.4722124188984951e-05,
      "loss": 2.4883,
      "step": 26706
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0522319078445435,
      "learning_rate": 1.4721761245291692e-05,
      "loss": 2.3896,
      "step": 26707
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0326945781707764,
      "learning_rate": 1.4721398293593765e-05,
      "loss": 2.4787,
      "step": 26708
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0003808736801147,
      "learning_rate": 1.4721035333891785e-05,
      "loss": 2.5369,
      "step": 26709
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0883959531784058,
      "learning_rate": 1.4720672366186368e-05,
      "loss": 2.3648,
      "step": 26710
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0465589761734009,
      "learning_rate": 1.472030939047813e-05,
      "loss": 2.5147,
      "step": 26711
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0521265268325806,
      "learning_rate": 1.4719946406767685e-05,
      "loss": 2.5536,
      "step": 26712
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0081210136413574,
      "learning_rate": 1.4719583415055652e-05,
      "loss": 2.3957,
      "step": 26713
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.131333589553833,
      "learning_rate": 1.4719220415342639e-05,
      "loss": 2.471,
      "step": 26714
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1117326021194458,
      "learning_rate": 1.4718857407629266e-05,
      "loss": 2.443,
      "step": 26715
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0645136833190918,
      "learning_rate": 1.471849439191615e-05,
      "loss": 2.2357,
      "step": 26716
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0730375051498413,
      "learning_rate": 1.4718131368203903e-05,
      "loss": 2.4023,
      "step": 26717
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.052757978439331,
      "learning_rate": 1.4717768336493141e-05,
      "loss": 2.4621,
      "step": 26718
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0355074405670166,
      "learning_rate": 1.4717405296784484e-05,
      "loss": 2.48,
      "step": 26719
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.054100513458252,
      "learning_rate": 1.4717042249078542e-05,
      "loss": 2.2415,
      "step": 26720
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.15826416015625,
      "learning_rate": 1.4716679193375931e-05,
      "loss": 2.2827,
      "step": 26721
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0925158262252808,
      "learning_rate": 1.4716316129677268e-05,
      "loss": 2.5967,
      "step": 26722
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0993130207061768,
      "learning_rate": 1.4715953057983169e-05,
      "loss": 2.436,
      "step": 26723
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1079583168029785,
      "learning_rate": 1.4715589978294248e-05,
      "loss": 2.2592,
      "step": 26724
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.9999187588691711,
      "learning_rate": 1.471522689061112e-05,
      "loss": 2.5056,
      "step": 26725
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1189556121826172,
      "learning_rate": 1.4714863794934406e-05,
      "loss": 2.5315,
      "step": 26726
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0926730632781982,
      "learning_rate": 1.4714500691264714e-05,
      "loss": 2.6221,
      "step": 26727
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.9874449372291565,
      "learning_rate": 1.4714137579602665e-05,
      "loss": 2.274,
      "step": 26728
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.2028089761734009,
      "learning_rate": 1.471377445994887e-05,
      "loss": 2.6446,
      "step": 26729
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0551339387893677,
      "learning_rate": 1.4713411332303948e-05,
      "loss": 2.1965,
      "step": 26730
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0557724237442017,
      "learning_rate": 1.4713048196668519e-05,
      "loss": 2.3061,
      "step": 26731
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0633282661437988,
      "learning_rate": 1.4712685053043185e-05,
      "loss": 2.6528,
      "step": 26732
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0719027519226074,
      "learning_rate": 1.4712321901428572e-05,
      "loss": 2.4064,
      "step": 26733
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.9601997137069702,
      "learning_rate": 1.4711958741825298e-05,
      "loss": 2.2682,
      "step": 26734
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0593713521957397,
      "learning_rate": 1.471159557423397e-05,
      "loss": 2.3595,
      "step": 26735
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0192875862121582,
      "learning_rate": 1.4711232398655208e-05,
      "loss": 2.4535,
      "step": 26736
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.120356559753418,
      "learning_rate": 1.471086921508963e-05,
      "loss": 2.4412,
      "step": 26737
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0292344093322754,
      "learning_rate": 1.4710506023537847e-05,
      "loss": 2.2862,
      "step": 26738
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.2229516506195068,
      "learning_rate": 1.4710142824000478e-05,
      "loss": 2.3451,
      "step": 26739
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0304560661315918,
      "learning_rate": 1.4709779616478136e-05,
      "loss": 2.3027,
      "step": 26740
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0082099437713623,
      "learning_rate": 1.4709416400971439e-05,
      "loss": 2.3933,
      "step": 26741
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.9708773493766785,
      "learning_rate": 1.4709053177481003e-05,
      "loss": 2.1849,
      "step": 26742
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0284626483917236,
      "learning_rate": 1.4708689946007441e-05,
      "loss": 2.3337,
      "step": 26743
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0784004926681519,
      "learning_rate": 1.4708326706551373e-05,
      "loss": 2.279,
      "step": 26744
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.014652967453003,
      "learning_rate": 1.470796345911341e-05,
      "loss": 2.0898,
      "step": 26745
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0900760889053345,
      "learning_rate": 1.4707600203694174e-05,
      "loss": 2.4659,
      "step": 26746
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0985071659088135,
      "learning_rate": 1.4707236940294275e-05,
      "loss": 2.2323,
      "step": 26747
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1009243726730347,
      "learning_rate": 1.4706873668914332e-05,
      "loss": 2.52,
      "step": 26748
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.143093466758728,
      "learning_rate": 1.4706510389554958e-05,
      "loss": 2.3162,
      "step": 26749
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.052248477935791,
      "learning_rate": 1.470614710221677e-05,
      "loss": 2.1783,
      "step": 26750
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.062180995941162,
      "learning_rate": 1.4705783806900386e-05,
      "loss": 2.2769,
      "step": 26751
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1395134925842285,
      "learning_rate": 1.4705420503606422e-05,
      "loss": 2.269,
      "step": 26752
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0634686946868896,
      "learning_rate": 1.470505719233549e-05,
      "loss": 2.3397,
      "step": 26753
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0286957025527954,
      "learning_rate": 1.4704693873088207e-05,
      "loss": 2.4851,
      "step": 26754
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.956881582736969,
      "learning_rate": 1.4704330545865193e-05,
      "loss": 2.4378,
      "step": 26755
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0141547918319702,
      "learning_rate": 1.4703967210667058e-05,
      "loss": 2.3599,
      "step": 26756
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0564124584197998,
      "learning_rate": 1.4703603867494423e-05,
      "loss": 2.1714,
      "step": 26757
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.041847586631775,
      "learning_rate": 1.4703240516347903e-05,
      "loss": 2.4529,
      "step": 26758
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1699187755584717,
      "learning_rate": 1.4702877157228112e-05,
      "loss": 2.3922,
      "step": 26759
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.2285314798355103,
      "learning_rate": 1.4702513790135666e-05,
      "loss": 2.4043,
      "step": 26760
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0307501554489136,
      "learning_rate": 1.4702150415071184e-05,
      "loss": 2.4966,
      "step": 26761
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0761311054229736,
      "learning_rate": 1.4701787032035277e-05,
      "loss": 2.3952,
      "step": 26762
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1465779542922974,
      "learning_rate": 1.4701423641028567e-05,
      "loss": 2.4163,
      "step": 26763
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.03399658203125,
      "learning_rate": 1.4701060242051665e-05,
      "loss": 2.2823,
      "step": 26764
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0170100927352905,
      "learning_rate": 1.470069683510519e-05,
      "loss": 2.4666,
      "step": 26765
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.9710218906402588,
      "learning_rate": 1.470033342018976e-05,
      "loss": 2.4115,
      "step": 26766
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.008285403251648,
      "learning_rate": 1.4699969997305982e-05,
      "loss": 2.2115,
      "step": 26767
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.3169324398040771,
      "learning_rate": 1.4699606566454483e-05,
      "loss": 2.4547,
      "step": 26768
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.175152063369751,
      "learning_rate": 1.4699243127635872e-05,
      "loss": 2.6499,
      "step": 26769
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.9167025089263916,
      "learning_rate": 1.4698879680850767e-05,
      "loss": 2.326,
      "step": 26770
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0929837226867676,
      "learning_rate": 1.4698516226099786e-05,
      "loss": 2.5307,
      "step": 26771
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.2278293371200562,
      "learning_rate": 1.4698152763383546e-05,
      "loss": 2.3175,
      "step": 26772
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0240687131881714,
      "learning_rate": 1.469778929270266e-05,
      "loss": 2.322,
      "step": 26773
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0860148668289185,
      "learning_rate": 1.4697425814057744e-05,
      "loss": 2.5753,
      "step": 26774
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.111429214477539,
      "learning_rate": 1.4697062327449413e-05,
      "loss": 2.3405,
      "step": 26775
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.9608001708984375,
      "learning_rate": 1.4696698832878289e-05,
      "loss": 2.2634,
      "step": 26776
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.261499047279358,
      "learning_rate": 1.4696335330344983e-05,
      "loss": 2.4704,
      "step": 26777
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.031635046005249,
      "learning_rate": 1.4695971819850117e-05,
      "loss": 2.236,
      "step": 26778
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.173951268196106,
      "learning_rate": 1.4695608301394301e-05,
      "loss": 2.261,
      "step": 26779
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0765509605407715,
      "learning_rate": 1.4695244774978155e-05,
      "loss": 2.3637,
      "step": 26780
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.225300669670105,
      "learning_rate": 1.469488124060229e-05,
      "loss": 2.4904,
      "step": 26781
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.9721283316612244,
      "learning_rate": 1.469451769826733e-05,
      "loss": 2.2635,
      "step": 26782
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0340049266815186,
      "learning_rate": 1.4694154147973884e-05,
      "loss": 2.3005,
      "step": 26783
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.090367317199707,
      "learning_rate": 1.4693790589722574e-05,
      "loss": 2.2425,
      "step": 26784
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.061393141746521,
      "learning_rate": 1.4693427023514017e-05,
      "loss": 2.3429,
      "step": 26785
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.9553743600845337,
      "learning_rate": 1.469306344934882e-05,
      "loss": 2.3527,
      "step": 26786
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.2582552433013916,
      "learning_rate": 1.4692699867227612e-05,
      "loss": 2.5197,
      "step": 26787
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.9442771673202515,
      "learning_rate": 1.4692336277151e-05,
      "loss": 2.2909,
      "step": 26788
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.161949872970581,
      "learning_rate": 1.4691972679119603e-05,
      "loss": 2.9612,
      "step": 26789
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.001181960105896,
      "learning_rate": 1.469160907313404e-05,
      "loss": 2.3465,
      "step": 26790
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.9450068473815918,
      "learning_rate": 1.4691245459194925e-05,
      "loss": 2.2931,
      "step": 26791
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.2411576509475708,
      "learning_rate": 1.4690881837302875e-05,
      "loss": 2.592,
      "step": 26792
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0745850801467896,
      "learning_rate": 1.4690518207458506e-05,
      "loss": 2.4773,
      "step": 26793
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1210824251174927,
      "learning_rate": 1.4690154569662433e-05,
      "loss": 2.4898,
      "step": 26794
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0568056106567383,
      "learning_rate": 1.4689790923915277e-05,
      "loss": 2.1604,
      "step": 26795
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.9284347295761108,
      "learning_rate": 1.4689427270217648e-05,
      "loss": 2.3811,
      "step": 26796
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.9859712719917297,
      "learning_rate": 1.4689063608570169e-05,
      "loss": 2.435,
      "step": 26797
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.9536950588226318,
      "learning_rate": 1.4688699938973452e-05,
      "loss": 2.2463,
      "step": 26798
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.088553547859192,
      "learning_rate": 1.468833626142812e-05,
      "loss": 2.3649,
      "step": 26799
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.114249348640442,
      "learning_rate": 1.468797257593478e-05,
      "loss": 2.6948,
      "step": 26800
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1035584211349487,
      "learning_rate": 1.4687608882494053e-05,
      "loss": 2.434,
      "step": 26801
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0258469581604004,
      "learning_rate": 1.4687245181106558e-05,
      "loss": 2.2295,
      "step": 26802
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.990046501159668,
      "learning_rate": 1.4686881471772908e-05,
      "loss": 2.578,
      "step": 26803
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.9936022758483887,
      "learning_rate": 1.4686517754493719e-05,
      "loss": 2.1927,
      "step": 26804
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0149157047271729,
      "learning_rate": 1.4686154029269614e-05,
      "loss": 2.3673,
      "step": 26805
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.032460331916809,
      "learning_rate": 1.4685790296101204e-05,
      "loss": 2.5356,
      "step": 26806
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0850563049316406,
      "learning_rate": 1.4685426554989104e-05,
      "loss": 2.4727,
      "step": 26807
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.9366020560264587,
      "learning_rate": 1.4685062805933933e-05,
      "loss": 2.5519,
      "step": 26808
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0200694799423218,
      "learning_rate": 1.4684699048936311e-05,
      "loss": 2.3441,
      "step": 26809
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0551681518554688,
      "learning_rate": 1.468433528399685e-05,
      "loss": 2.3564,
      "step": 26810
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.055385947227478,
      "learning_rate": 1.4683971511116171e-05,
      "loss": 2.5431,
      "step": 26811
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1829898357391357,
      "learning_rate": 1.4683607730294887e-05,
      "loss": 2.2711,
      "step": 26812
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0372488498687744,
      "learning_rate": 1.4683243941533616e-05,
      "loss": 2.2866,
      "step": 26813
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0638813972473145,
      "learning_rate": 1.4682880144832974e-05,
      "loss": 2.4533,
      "step": 26814
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0781973600387573,
      "learning_rate": 1.4682516340193578e-05,
      "loss": 2.4422,
      "step": 26815
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.194440484046936,
      "learning_rate": 1.4682152527616045e-05,
      "loss": 2.252,
      "step": 26816
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.9930405616760254,
      "learning_rate": 1.4681788707100993e-05,
      "loss": 2.4603,
      "step": 26817
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0712376832962036,
      "learning_rate": 1.4681424878649035e-05,
      "loss": 2.642,
      "step": 26818
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0398540496826172,
      "learning_rate": 1.4681061042260794e-05,
      "loss": 2.6089,
      "step": 26819
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1214607954025269,
      "learning_rate": 1.4680697197936882e-05,
      "loss": 2.2411,
      "step": 26820
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0969390869140625,
      "learning_rate": 1.4680333345677914e-05,
      "loss": 2.6771,
      "step": 26821
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.107282042503357,
      "learning_rate": 1.4679969485484513e-05,
      "loss": 2.4977,
      "step": 26822
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1275326013565063,
      "learning_rate": 1.4679605617357293e-05,
      "loss": 2.4967,
      "step": 26823
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0530182123184204,
      "learning_rate": 1.4679241741296868e-05,
      "loss": 2.6644,
      "step": 26824
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.007204532623291,
      "learning_rate": 1.4678877857303862e-05,
      "loss": 2.4123,
      "step": 26825
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.9508751034736633,
      "learning_rate": 1.4678513965378882e-05,
      "loss": 2.4253,
      "step": 26826
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.22113835811615,
      "learning_rate": 1.4678150065522552e-05,
      "loss": 2.3165,
      "step": 26827
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1002446413040161,
      "learning_rate": 1.4677786157735487e-05,
      "loss": 2.6028,
      "step": 26828
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1011115312576294,
      "learning_rate": 1.4677422242018304e-05,
      "loss": 2.2629,
      "step": 26829
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.026294469833374,
      "learning_rate": 1.467705831837162e-05,
      "loss": 2.4278,
      "step": 26830
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1385681629180908,
      "learning_rate": 1.467669438679605e-05,
      "loss": 2.3235,
      "step": 26831
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.036892056465149,
      "learning_rate": 1.4676330447292217e-05,
      "loss": 2.4604,
      "step": 26832
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0586930513381958,
      "learning_rate": 1.4675966499860732e-05,
      "loss": 2.3123,
      "step": 26833
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.9864040613174438,
      "learning_rate": 1.4675602544502214e-05,
      "loss": 2.4637,
      "step": 26834
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.9209405183792114,
      "learning_rate": 1.467523858121728e-05,
      "loss": 2.7366,
      "step": 26835
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0725796222686768,
      "learning_rate": 1.4674874610006545e-05,
      "loss": 2.2832,
      "step": 26836
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.019677996635437,
      "learning_rate": 1.467451063087063e-05,
      "loss": 2.2158,
      "step": 26837
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0792878866195679,
      "learning_rate": 1.467414664381015e-05,
      "loss": 2.6134,
      "step": 26838
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.3796436786651611,
      "learning_rate": 1.4673782648825719e-05,
      "loss": 2.4397,
      "step": 26839
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0520190000534058,
      "learning_rate": 1.4673418645917959e-05,
      "loss": 2.3844,
      "step": 26840
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.129292368888855,
      "learning_rate": 1.4673054635087485e-05,
      "loss": 2.5006,
      "step": 26841
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.990496039390564,
      "learning_rate": 1.4672690616334912e-05,
      "loss": 2.4189,
      "step": 26842
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.2468968629837036,
      "learning_rate": 1.4672326589660864e-05,
      "loss": 2.5249,
      "step": 26843
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.026265263557434,
      "learning_rate": 1.4671962555065948e-05,
      "loss": 2.2696,
      "step": 26844
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0380175113677979,
      "learning_rate": 1.4671598512550792e-05,
      "loss": 2.5002,
      "step": 26845
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.9478508830070496,
      "learning_rate": 1.4671234462116005e-05,
      "loss": 2.3567,
      "step": 26846
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.9854527115821838,
      "learning_rate": 1.4670870403762207e-05,
      "loss": 2.3172,
      "step": 26847
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.940188467502594,
      "learning_rate": 1.4670506337490013e-05,
      "loss": 2.466,
      "step": 26848
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.9912367463111877,
      "learning_rate": 1.467014226330005e-05,
      "loss": 2.6347,
      "step": 26849
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0133111476898193,
      "learning_rate": 1.466977818119292e-05,
      "loss": 2.2504,
      "step": 26850
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.574115514755249,
      "learning_rate": 1.4669414091169247e-05,
      "loss": 2.5639,
      "step": 26851
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0988062620162964,
      "learning_rate": 1.4669049993229654e-05,
      "loss": 2.3824,
      "step": 26852
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.3176844120025635,
      "learning_rate": 1.4668685887374751e-05,
      "loss": 2.5332,
      "step": 26853
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.9518610239028931,
      "learning_rate": 1.4668321773605156e-05,
      "loss": 2.4018,
      "step": 26854
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.03233003616333,
      "learning_rate": 1.466795765192149e-05,
      "loss": 2.1245,
      "step": 26855
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.2357043027877808,
      "learning_rate": 1.4667593522324365e-05,
      "loss": 2.305,
      "step": 26856
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.157267451286316,
      "learning_rate": 1.4667229384814403e-05,
      "loss": 2.4113,
      "step": 26857
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.9975437521934509,
      "learning_rate": 1.4666865239392224e-05,
      "loss": 2.3927,
      "step": 26858
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0313663482666016,
      "learning_rate": 1.4666501086058436e-05,
      "loss": 2.1161,
      "step": 26859
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1031962633132935,
      "learning_rate": 1.4666136924813659e-05,
      "loss": 2.4678,
      "step": 26860
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.9602052569389343,
      "learning_rate": 1.4665772755658515e-05,
      "loss": 2.509,
      "step": 26861
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0675112009048462,
      "learning_rate": 1.466540857859362e-05,
      "loss": 2.409,
      "step": 26862
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0765224695205688,
      "learning_rate": 1.466504439361959e-05,
      "loss": 2.4875,
      "step": 26863
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.091566562652588,
      "learning_rate": 1.4664680200737045e-05,
      "loss": 2.2311,
      "step": 26864
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0942697525024414,
      "learning_rate": 1.4664315999946596e-05,
      "loss": 2.4093,
      "step": 26865
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1918210983276367,
      "learning_rate": 1.4663951791248865e-05,
      "loss": 2.5542,
      "step": 26866
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.9981934428215027,
      "learning_rate": 1.466358757464447e-05,
      "loss": 2.6153,
      "step": 26867
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0915236473083496,
      "learning_rate": 1.4663223350134028e-05,
      "loss": 2.3992,
      "step": 26868
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.2581440210342407,
      "learning_rate": 1.4662859117718155e-05,
      "loss": 2.4974,
      "step": 26869
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0330288410186768,
      "learning_rate": 1.466249487739747e-05,
      "loss": 2.3201,
      "step": 26870
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.9261777400970459,
      "learning_rate": 1.4662130629172592e-05,
      "loss": 2.41,
      "step": 26871
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1882895231246948,
      "learning_rate": 1.4661766373044133e-05,
      "loss": 2.2523,
      "step": 26872
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0461064577102661,
      "learning_rate": 1.4661402109012714e-05,
      "loss": 2.5271,
      "step": 26873
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1719614267349243,
      "learning_rate": 1.4661037837078955e-05,
      "loss": 2.4594,
      "step": 26874
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0453089475631714,
      "learning_rate": 1.4660673557243468e-05,
      "loss": 2.4951,
      "step": 26875
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.2212859392166138,
      "learning_rate": 1.4660309269506879e-05,
      "loss": 2.3114,
      "step": 26876
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0178320407867432,
      "learning_rate": 1.4659944973869796e-05,
      "loss": 2.3182,
      "step": 26877
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.9904822707176208,
      "learning_rate": 1.4659580670332838e-05,
      "loss": 2.5953,
      "step": 26878
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.476124882698059,
      "learning_rate": 1.4659216358896629e-05,
      "loss": 2.263,
      "step": 26879
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.027411699295044,
      "learning_rate": 1.4658852039561783e-05,
      "loss": 2.4954,
      "step": 26880
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1161439418792725,
      "learning_rate": 1.4658487712328915e-05,
      "loss": 2.5165,
      "step": 26881
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0165926218032837,
      "learning_rate": 1.4658123377198648e-05,
      "loss": 2.1504,
      "step": 26882
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0555659532546997,
      "learning_rate": 1.4657759034171593e-05,
      "loss": 2.1783,
      "step": 26883
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1444052457809448,
      "learning_rate": 1.4657394683248376e-05,
      "loss": 2.4271,
      "step": 26884
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0696396827697754,
      "learning_rate": 1.4657030324429607e-05,
      "loss": 2.3618,
      "step": 26885
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.9628774523735046,
      "learning_rate": 1.4656665957715905e-05,
      "loss": 2.5801,
      "step": 26886
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.210364580154419,
      "learning_rate": 1.4656301583107893e-05,
      "loss": 2.2949,
      "step": 26887
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.9927051663398743,
      "learning_rate": 1.4655937200606183e-05,
      "loss": 2.4996,
      "step": 26888
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0006999969482422,
      "learning_rate": 1.4655572810211396e-05,
      "loss": 2.4503,
      "step": 26889
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.065548062324524,
      "learning_rate": 1.4655208411924149e-05,
      "loss": 2.5402,
      "step": 26890
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1289787292480469,
      "learning_rate": 1.465484400574506e-05,
      "loss": 2.1306,
      "step": 26891
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0907771587371826,
      "learning_rate": 1.4654479591674744e-05,
      "loss": 2.2782,
      "step": 26892
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.9854830503463745,
      "learning_rate": 1.4654115169713822e-05,
      "loss": 2.5994,
      "step": 26893
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0380481481552124,
      "learning_rate": 1.465375073986291e-05,
      "loss": 2.5278,
      "step": 26894
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.2043849229812622,
      "learning_rate": 1.4653386302122628e-05,
      "loss": 2.2021,
      "step": 26895
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.012751579284668,
      "learning_rate": 1.465302185649359e-05,
      "loss": 2.4454,
      "step": 26896
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0521399974822998,
      "learning_rate": 1.465265740297642e-05,
      "loss": 2.4001,
      "step": 26897
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1828269958496094,
      "learning_rate": 1.4652292941571731e-05,
      "loss": 2.6345,
      "step": 26898
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1269917488098145,
      "learning_rate": 1.4651928472280141e-05,
      "loss": 2.598,
      "step": 26899
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1946158409118652,
      "learning_rate": 1.4651563995102268e-05,
      "loss": 2.5586,
      "step": 26900
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.2956479787826538,
      "learning_rate": 1.4651199510038731e-05,
      "loss": 2.3978,
      "step": 26901
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.124064564704895,
      "learning_rate": 1.4650835017090148e-05,
      "loss": 2.667,
      "step": 26902
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0256582498550415,
      "learning_rate": 1.4650470516257136e-05,
      "loss": 2.2756,
      "step": 26903
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0895106792449951,
      "learning_rate": 1.4650106007540315e-05,
      "loss": 2.5154,
      "step": 26904
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0019230842590332,
      "learning_rate": 1.46497414909403e-05,
      "loss": 2.5314,
      "step": 26905
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.2474145889282227,
      "learning_rate": 1.4649376966457708e-05,
      "loss": 2.418,
      "step": 26906
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1409331560134888,
      "learning_rate": 1.4649012434093163e-05,
      "loss": 2.502,
      "step": 26907
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1924209594726562,
      "learning_rate": 1.464864789384728e-05,
      "loss": 2.3933,
      "step": 26908
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.9861869215965271,
      "learning_rate": 1.4648283345720675e-05,
      "loss": 2.4009,
      "step": 26909
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.2119914293289185,
      "learning_rate": 1.4647918789713967e-05,
      "loss": 2.4807,
      "step": 26910
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0290577411651611,
      "learning_rate": 1.4647554225827775e-05,
      "loss": 2.3615,
      "step": 26911
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.9969766139984131,
      "learning_rate": 1.4647189654062715e-05,
      "loss": 2.1778,
      "step": 26912
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.03745436668396,
      "learning_rate": 1.4646825074419407e-05,
      "loss": 2.5089,
      "step": 26913
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0491878986358643,
      "learning_rate": 1.4646460486898467e-05,
      "loss": 2.04,
      "step": 26914
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.049379587173462,
      "learning_rate": 1.4646095891500513e-05,
      "loss": 2.3172,
      "step": 26915
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.087516188621521,
      "learning_rate": 1.4645731288226168e-05,
      "loss": 2.4113,
      "step": 26916
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.145591378211975,
      "learning_rate": 1.4645366677076048e-05,
      "loss": 2.279,
      "step": 26917
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1024972200393677,
      "learning_rate": 1.4645002058050768e-05,
      "loss": 2.445,
      "step": 26918
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0086455345153809,
      "learning_rate": 1.4644637431150947e-05,
      "loss": 2.4374,
      "step": 26919
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0869745016098022,
      "learning_rate": 1.4644272796377204e-05,
      "loss": 2.4404,
      "step": 26920
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.076232671737671,
      "learning_rate": 1.4643908153730157e-05,
      "loss": 2.6289,
      "step": 26921
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4892045259475708,
      "learning_rate": 1.4643543503210426e-05,
      "loss": 2.5394,
      "step": 26922
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1638537645339966,
      "learning_rate": 1.4643178844818629e-05,
      "loss": 2.4859,
      "step": 26923
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0682651996612549,
      "learning_rate": 1.464281417855538e-05,
      "loss": 2.578,
      "step": 26924
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0327222347259521,
      "learning_rate": 1.4642449504421301e-05,
      "loss": 2.3274,
      "step": 26925
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0920031070709229,
      "learning_rate": 1.4642084822417009e-05,
      "loss": 2.4279,
      "step": 26926
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.9163335561752319,
      "learning_rate": 1.4641720132543123e-05,
      "loss": 2.5423,
      "step": 26927
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1254212856292725,
      "learning_rate": 1.4641355434800259e-05,
      "loss": 2.2802,
      "step": 26928
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1597706079483032,
      "learning_rate": 1.4640990729189038e-05,
      "loss": 2.47,
      "step": 26929
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0682621002197266,
      "learning_rate": 1.464062601571008e-05,
      "loss": 2.1431,
      "step": 26930
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0051288604736328,
      "learning_rate": 1.4640261294363996e-05,
      "loss": 2.3448,
      "step": 26931
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0525294542312622,
      "learning_rate": 1.463989656515141e-05,
      "loss": 2.2226,
      "step": 26932
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.356523871421814,
      "learning_rate": 1.4639531828072939e-05,
      "loss": 2.3085,
      "step": 26933
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0029873847961426,
      "learning_rate": 1.4639167083129203e-05,
      "loss": 2.1799,
      "step": 26934
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0688951015472412,
      "learning_rate": 1.463880233032082e-05,
      "loss": 2.3103,
      "step": 26935
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0248416662216187,
      "learning_rate": 1.4638437569648405e-05,
      "loss": 2.2053,
      "step": 26936
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0310635566711426,
      "learning_rate": 1.4638072801112579e-05,
      "loss": 2.4734,
      "step": 26937
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0316927433013916,
      "learning_rate": 1.4637708024713959e-05,
      "loss": 2.3777,
      "step": 26938
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0762732028961182,
      "learning_rate": 1.4637343240453166e-05,
      "loss": 2.2695,
      "step": 26939
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0538567304611206,
      "learning_rate": 1.4636978448330816e-05,
      "loss": 2.4976,
      "step": 26940
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0237321853637695,
      "learning_rate": 1.463661364834753e-05,
      "loss": 2.2753,
      "step": 26941
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.054895281791687,
      "learning_rate": 1.463624884050392e-05,
      "loss": 2.4638,
      "step": 26942
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.031657338142395,
      "learning_rate": 1.4635884024800611e-05,
      "loss": 2.5321,
      "step": 26943
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0940778255462646,
      "learning_rate": 1.4635519201238222e-05,
      "loss": 2.4114,
      "step": 26944
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.9846380352973938,
      "learning_rate": 1.4635154369817367e-05,
      "loss": 2.3605,
      "step": 26945
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1142759323120117,
      "learning_rate": 1.4634789530538667e-05,
      "loss": 2.5275,
      "step": 26946
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.3229966163635254,
      "learning_rate": 1.4634424683402739e-05,
      "loss": 2.3247,
      "step": 26947
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.9398072957992554,
      "learning_rate": 1.4634059828410202e-05,
      "loss": 2.4322,
      "step": 26948
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0179979801177979,
      "learning_rate": 1.4633694965561675e-05,
      "loss": 2.353,
      "step": 26949
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1817868947982788,
      "learning_rate": 1.4633330094857781e-05,
      "loss": 2.3851,
      "step": 26950
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1942658424377441,
      "learning_rate": 1.463296521629913e-05,
      "loss": 2.3778,
      "step": 26951
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.9570205211639404,
      "learning_rate": 1.4632600329886344e-05,
      "loss": 2.36,
      "step": 26952
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0132259130477905,
      "learning_rate": 1.4632235435620042e-05,
      "loss": 2.3568,
      "step": 26953
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1870485544204712,
      "learning_rate": 1.4631870533500845e-05,
      "loss": 2.4261,
      "step": 26954
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0896074771881104,
      "learning_rate": 1.4631505623529368e-05,
      "loss": 2.4495,
      "step": 26955
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.365919828414917,
      "learning_rate": 1.4631140705706233e-05,
      "loss": 2.302,
      "step": 26956
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.091216802597046,
      "learning_rate": 1.4630775780032056e-05,
      "loss": 2.549,
      "step": 26957
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.9936428666114807,
      "learning_rate": 1.4630410846507455e-05,
      "loss": 2.2471,
      "step": 26958
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1014716625213623,
      "learning_rate": 1.463004590513305e-05,
      "loss": 2.4426,
      "step": 26959
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.086292028427124,
      "learning_rate": 1.462968095590946e-05,
      "loss": 2.4394,
      "step": 26960
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.6658251285552979,
      "learning_rate": 1.4629315998837306e-05,
      "loss": 2.365,
      "step": 26961
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1350224018096924,
      "learning_rate": 1.4628951033917201e-05,
      "loss": 2.1638,
      "step": 26962
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0817663669586182,
      "learning_rate": 1.4628586061149767e-05,
      "loss": 2.2548,
      "step": 26963
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1708295345306396,
      "learning_rate": 1.4628221080535626e-05,
      "loss": 2.4845,
      "step": 26964
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0473195314407349,
      "learning_rate": 1.4627856092075389e-05,
      "loss": 2.3456,
      "step": 26965
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.9535555243492126,
      "learning_rate": 1.462749109576968e-05,
      "loss": 2.3063,
      "step": 26966
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.060202717781067,
      "learning_rate": 1.462712609161912e-05,
      "loss": 2.3827,
      "step": 26967
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1272143125534058,
      "learning_rate": 1.4626761079624321e-05,
      "loss": 2.5017,
      "step": 26968
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0387495756149292,
      "learning_rate": 1.4626396059785906e-05,
      "loss": 2.1742,
      "step": 26969
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.075493574142456,
      "learning_rate": 1.4626031032104495e-05,
      "loss": 2.4477,
      "step": 26970
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0033738613128662,
      "learning_rate": 1.4625665996580702e-05,
      "loss": 2.5959,
      "step": 26971
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0295709371566772,
      "learning_rate": 1.4625300953215151e-05,
      "loss": 2.44,
      "step": 26972
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.9516961574554443,
      "learning_rate": 1.4624935902008457e-05,
      "loss": 2.4765,
      "step": 26973
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0507935285568237,
      "learning_rate": 1.4624570842961241e-05,
      "loss": 2.3848,
      "step": 26974
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.9359740614891052,
      "learning_rate": 1.462420577607412e-05,
      "loss": 2.4909,
      "step": 26975
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.204239010810852,
      "learning_rate": 1.4623840701347719e-05,
      "loss": 2.376,
      "step": 26976
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.247796654701233,
      "learning_rate": 1.462347561878265e-05,
      "loss": 2.3533,
      "step": 26977
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1130473613739014,
      "learning_rate": 1.4623110528379533e-05,
      "loss": 2.3101,
      "step": 26978
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0264124870300293,
      "learning_rate": 1.4622745430138987e-05,
      "loss": 2.5401,
      "step": 26979
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.105249285697937,
      "learning_rate": 1.4622380324061634e-05,
      "loss": 2.4023,
      "step": 26980
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0627388954162598,
      "learning_rate": 1.462201521014809e-05,
      "loss": 2.5528,
      "step": 26981
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.086366057395935,
      "learning_rate": 1.4621650088398975e-05,
      "loss": 2.5765,
      "step": 26982
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0716137886047363,
      "learning_rate": 1.462128495881491e-05,
      "loss": 2.3798,
      "step": 26983
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.2173410654067993,
      "learning_rate": 1.462091982139651e-05,
      "loss": 2.2916,
      "step": 26984
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.9940149784088135,
      "learning_rate": 1.4620554676144397e-05,
      "loss": 2.2704,
      "step": 26985
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.9696975350379944,
      "learning_rate": 1.4620189523059187e-05,
      "loss": 2.4649,
      "step": 26986
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.987762987613678,
      "learning_rate": 1.4619824362141502e-05,
      "loss": 2.4094,
      "step": 26987
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1247243881225586,
      "learning_rate": 1.4619459193391958e-05,
      "loss": 2.3631,
      "step": 26988
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0153300762176514,
      "learning_rate": 1.4619094016811177e-05,
      "loss": 2.2486,
      "step": 26989
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0538954734802246,
      "learning_rate": 1.461872883239978e-05,
      "loss": 2.352,
      "step": 26990
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0205529928207397,
      "learning_rate": 1.461836364015838e-05,
      "loss": 2.5587,
      "step": 26991
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0648930072784424,
      "learning_rate": 1.46179984400876e-05,
      "loss": 2.3537,
      "step": 26992
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0655587911605835,
      "learning_rate": 1.461763323218806e-05,
      "loss": 2.2805,
      "step": 26993
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.162876009941101,
      "learning_rate": 1.4617268016460378e-05,
      "loss": 2.1995,
      "step": 26994
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0151580572128296,
      "learning_rate": 1.461690279290517e-05,
      "loss": 2.4955,
      "step": 26995
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0692740678787231,
      "learning_rate": 1.4616537561523058e-05,
      "loss": 2.1911,
      "step": 26996
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.9712006449699402,
      "learning_rate": 1.4616172322314662e-05,
      "loss": 2.5904,
      "step": 26997
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.9820172190666199,
      "learning_rate": 1.4615807075280601e-05,
      "loss": 2.2353,
      "step": 26998
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.04073166847229,
      "learning_rate": 1.4615441820421492e-05,
      "loss": 2.3461,
      "step": 26999
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0715763568878174,
      "learning_rate": 1.4615076557737958e-05,
      "loss": 2.7748,
      "step": 27000
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.063658595085144,
      "learning_rate": 1.4614711287230611e-05,
      "loss": 2.5217,
      "step": 27001
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0830936431884766,
      "learning_rate": 1.4614346008900078e-05,
      "loss": 2.3942,
      "step": 27002
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1306389570236206,
      "learning_rate": 1.4613980722746976e-05,
      "loss": 2.4007,
      "step": 27003
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.154873013496399,
      "learning_rate": 1.4613615428771923e-05,
      "loss": 2.0815,
      "step": 27004
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0274648666381836,
      "learning_rate": 1.4613250126975537e-05,
      "loss": 2.134,
      "step": 27005
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.048607587814331,
      "learning_rate": 1.4612884817358442e-05,
      "loss": 2.4544,
      "step": 27006
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1851000785827637,
      "learning_rate": 1.461251949992125e-05,
      "loss": 2.4409,
      "step": 27007
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.035990834236145,
      "learning_rate": 1.461215417466459e-05,
      "loss": 2.3405,
      "step": 27008
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.06605064868927,
      "learning_rate": 1.4611788841589073e-05,
      "loss": 2.3427,
      "step": 27009
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.9717721939086914,
      "learning_rate": 1.4611423500695324e-05,
      "loss": 2.2986,
      "step": 27010
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0823999643325806,
      "learning_rate": 1.4611058151983957e-05,
      "loss": 2.3858,
      "step": 27011
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0029658079147339,
      "learning_rate": 1.4610692795455594e-05,
      "loss": 2.4123,
      "step": 27012
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0311049222946167,
      "learning_rate": 1.4610327431110856e-05,
      "loss": 2.4105,
      "step": 27013
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0366379022598267,
      "learning_rate": 1.460996205895036e-05,
      "loss": 2.4995,
      "step": 27014
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.9712225794792175,
      "learning_rate": 1.4609596678974725e-05,
      "loss": 2.4037,
      "step": 27015
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.2900383472442627,
      "learning_rate": 1.4609231291184576e-05,
      "loss": 2.5158,
      "step": 27016
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0517288446426392,
      "learning_rate": 1.4608865895580525e-05,
      "loss": 2.3591,
      "step": 27017
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.018972635269165,
      "learning_rate": 1.4608500492163194e-05,
      "loss": 2.384,
      "step": 27018
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0333969593048096,
      "learning_rate": 1.4608135080933203e-05,
      "loss": 2.4488,
      "step": 27019
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.3169440031051636,
      "learning_rate": 1.4607769661891172e-05,
      "loss": 2.4151,
      "step": 27020
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1028797626495361,
      "learning_rate": 1.4607404235037719e-05,
      "loss": 2.4678,
      "step": 27021
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1155056953430176,
      "learning_rate": 1.4607038800373468e-05,
      "loss": 2.4676,
      "step": 27022
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1133544445037842,
      "learning_rate": 1.4606673357899033e-05,
      "loss": 2.3289,
      "step": 27023
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0923810005187988,
      "learning_rate": 1.4606307907615034e-05,
      "loss": 2.6024,
      "step": 27024
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0683480501174927,
      "learning_rate": 1.4605942449522093e-05,
      "loss": 2.0802,
      "step": 27025
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.062301516532898,
      "learning_rate": 1.460557698362083e-05,
      "loss": 2.6425,
      "step": 27026
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.037798523902893,
      "learning_rate": 1.4605211509911863e-05,
      "loss": 2.5402,
      "step": 27027
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.9755656719207764,
      "learning_rate": 1.460484602839581e-05,
      "loss": 2.3086,
      "step": 27028
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0761758089065552,
      "learning_rate": 1.4604480539073293e-05,
      "loss": 2.3734,
      "step": 27029
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.9894077777862549,
      "learning_rate": 1.460411504194493e-05,
      "loss": 2.3113,
      "step": 27030
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.033983588218689,
      "learning_rate": 1.4603749537011344e-05,
      "loss": 2.3988,
      "step": 27031
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1273562908172607,
      "learning_rate": 1.460338402427315e-05,
      "loss": 2.4623,
      "step": 27032
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0042163133621216,
      "learning_rate": 1.460301850373097e-05,
      "loss": 2.3372,
      "step": 27033
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0287766456604004,
      "learning_rate": 1.4602652975385422e-05,
      "loss": 2.4639,
      "step": 27034
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.045432209968567,
      "learning_rate": 1.4602287439237131e-05,
      "loss": 2.2762,
      "step": 27035
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.2283720970153809,
      "learning_rate": 1.4601921895286712e-05,
      "loss": 2.4572,
      "step": 27036
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.014452576637268,
      "learning_rate": 1.4601556343534783e-05,
      "loss": 2.6297,
      "step": 27037
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1192231178283691,
      "learning_rate": 1.4601190783981969e-05,
      "loss": 2.4448,
      "step": 27038
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0490672588348389,
      "learning_rate": 1.4600825216628884e-05,
      "loss": 2.3691,
      "step": 27039
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0954779386520386,
      "learning_rate": 1.4600459641476152e-05,
      "loss": 2.2791,
      "step": 27040
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0571781396865845,
      "learning_rate": 1.4600094058524393e-05,
      "loss": 2.3797,
      "step": 27041
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.2057528495788574,
      "learning_rate": 1.4599728467774226e-05,
      "loss": 2.4395,
      "step": 27042
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0352096557617188,
      "learning_rate": 1.4599362869226266e-05,
      "loss": 2.5974,
      "step": 27043
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1353967189788818,
      "learning_rate": 1.4598997262881138e-05,
      "loss": 2.2782,
      "step": 27044
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1473731994628906,
      "learning_rate": 1.459863164873946e-05,
      "loss": 2.2947,
      "step": 27045
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0276325941085815,
      "learning_rate": 1.4598266026801855e-05,
      "loss": 2.3944,
      "step": 27046
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.9758619070053101,
      "learning_rate": 1.4597900397068938e-05,
      "loss": 2.4897,
      "step": 27047
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.9854134917259216,
      "learning_rate": 1.4597534759541332e-05,
      "loss": 2.1843,
      "step": 27048
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.2102895975112915,
      "learning_rate": 1.4597169114219659e-05,
      "loss": 2.1525,
      "step": 27049
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1321487426757812,
      "learning_rate": 1.459680346110453e-05,
      "loss": 2.3701,
      "step": 27050
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1167259216308594,
      "learning_rate": 1.4596437800196575e-05,
      "loss": 2.3874,
      "step": 27051
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1055073738098145,
      "learning_rate": 1.4596072131496406e-05,
      "loss": 2.5005,
      "step": 27052
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0635271072387695,
      "learning_rate": 1.4595706455004649e-05,
      "loss": 2.5131,
      "step": 27053
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1136113405227661,
      "learning_rate": 1.4595340770721921e-05,
      "loss": 2.3441,
      "step": 27054
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.094333291053772,
      "learning_rate": 1.4594975078648844e-05,
      "loss": 2.218,
      "step": 27055
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1318615674972534,
      "learning_rate": 1.4594609378786033e-05,
      "loss": 2.3889,
      "step": 27056
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.123333215713501,
      "learning_rate": 1.4594243671134112e-05,
      "loss": 2.4818,
      "step": 27057
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0323177576065063,
      "learning_rate": 1.4593877955693701e-05,
      "loss": 2.3354,
      "step": 27058
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0054856538772583,
      "learning_rate": 1.4593512232465419e-05,
      "loss": 2.3803,
      "step": 27059
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.111870288848877,
      "learning_rate": 1.4593146501449886e-05,
      "loss": 2.4627,
      "step": 27060
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.3922024965286255,
      "learning_rate": 1.4592780762647722e-05,
      "loss": 2.2811,
      "step": 27061
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0652916431427002,
      "learning_rate": 1.4592415016059549e-05,
      "loss": 2.5248,
      "step": 27062
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0912331342697144,
      "learning_rate": 1.4592049261685983e-05,
      "loss": 2.679,
      "step": 27063
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0178600549697876,
      "learning_rate": 1.4591683499527646e-05,
      "loss": 2.4362,
      "step": 27064
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0167046785354614,
      "learning_rate": 1.459131772958516e-05,
      "loss": 2.27,
      "step": 27065
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0947599411010742,
      "learning_rate": 1.4590951951859142e-05,
      "loss": 2.3151,
      "step": 27066
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0354325771331787,
      "learning_rate": 1.4590586166350216e-05,
      "loss": 2.4196,
      "step": 27067
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0985335111618042,
      "learning_rate": 1.4590220373058996e-05,
      "loss": 2.5323,
      "step": 27068
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.9863749742507935,
      "learning_rate": 1.4589854571986108e-05,
      "loss": 2.3178,
      "step": 27069
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.146815538406372,
      "learning_rate": 1.458948876313217e-05,
      "loss": 2.183,
      "step": 27070
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.2768545150756836,
      "learning_rate": 1.4589122946497803e-05,
      "loss": 2.3645,
      "step": 27071
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1658011674880981,
      "learning_rate": 1.4588757122083625e-05,
      "loss": 2.2325,
      "step": 27072
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1010323762893677,
      "learning_rate": 1.4588391289890256e-05,
      "loss": 2.2392,
      "step": 27073
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.115114450454712,
      "learning_rate": 1.458802544991832e-05,
      "loss": 2.4356,
      "step": 27074
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.9773055911064148,
      "learning_rate": 1.4587659602168437e-05,
      "loss": 2.2507,
      "step": 27075
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.3320013284683228,
      "learning_rate": 1.4587293746641221e-05,
      "loss": 2.3861,
      "step": 27076
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.2072985172271729,
      "learning_rate": 1.4586927883337294e-05,
      "loss": 2.4361,
      "step": 27077
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0836513042449951,
      "learning_rate": 1.4586562012257281e-05,
      "loss": 2.69,
      "step": 27078
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.2755824327468872,
      "learning_rate": 1.4586196133401802e-05,
      "loss": 2.5844,
      "step": 27079
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0212689638137817,
      "learning_rate": 1.458583024677147e-05,
      "loss": 2.3888,
      "step": 27080
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.9881927967071533,
      "learning_rate": 1.4585464352366917e-05,
      "loss": 2.2186,
      "step": 27081
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.056571125984192,
      "learning_rate": 1.4585098450188753e-05,
      "loss": 2.4124,
      "step": 27082
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.047083854675293,
      "learning_rate": 1.45847325402376e-05,
      "loss": 2.4486,
      "step": 27083
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.229447841644287,
      "learning_rate": 1.4584366622514082e-05,
      "loss": 2.3925,
      "step": 27084
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.3127062320709229,
      "learning_rate": 1.458400069701882e-05,
      "loss": 2.495,
      "step": 27085
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.3482273817062378,
      "learning_rate": 1.4583634763752427e-05,
      "loss": 2.1732,
      "step": 27086
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0323864221572876,
      "learning_rate": 1.458326882271553e-05,
      "loss": 2.4248,
      "step": 27087
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0338083505630493,
      "learning_rate": 1.4582902873908748e-05,
      "loss": 2.5276,
      "step": 27088
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0202386379241943,
      "learning_rate": 1.4582536917332704e-05,
      "loss": 2.4182,
      "step": 27089
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1398203372955322,
      "learning_rate": 1.4582170952988011e-05,
      "loss": 2.5213,
      "step": 27090
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.9816780090332031,
      "learning_rate": 1.4581804980875294e-05,
      "loss": 2.3083,
      "step": 27091
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1622447967529297,
      "learning_rate": 1.4581439000995175e-05,
      "loss": 2.3423,
      "step": 27092
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.332897663116455,
      "learning_rate": 1.458107301334827e-05,
      "loss": 2.1621,
      "step": 27093
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.06686270236969,
      "learning_rate": 1.4580707017935203e-05,
      "loss": 2.4203,
      "step": 27094
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0140286684036255,
      "learning_rate": 1.4580341014756597e-05,
      "loss": 2.5182,
      "step": 27095
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0052905082702637,
      "learning_rate": 1.4579975003813065e-05,
      "loss": 2.3311,
      "step": 27096
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0843150615692139,
      "learning_rate": 1.4579608985105232e-05,
      "loss": 2.2097,
      "step": 27097
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.006874918937683,
      "learning_rate": 1.457924295863372e-05,
      "loss": 2.2876,
      "step": 27098
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1301534175872803,
      "learning_rate": 1.4578876924399144e-05,
      "loss": 2.4409,
      "step": 27099
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1299551725387573,
      "learning_rate": 1.4578510882402128e-05,
      "loss": 2.4889,
      "step": 27100
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1252191066741943,
      "learning_rate": 1.4578144832643295e-05,
      "loss": 2.6498,
      "step": 27101
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0357545614242554,
      "learning_rate": 1.4577778775123266e-05,
      "loss": 2.3068,
      "step": 27102
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.9732896089553833,
      "learning_rate": 1.4577412709842653e-05,
      "loss": 2.5275,
      "step": 27103
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0059447288513184,
      "learning_rate": 1.4577046636802086e-05,
      "loss": 2.4488,
      "step": 27104
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.046971082687378,
      "learning_rate": 1.4576680556002178e-05,
      "loss": 2.2842,
      "step": 27105
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0330251455307007,
      "learning_rate": 1.4576314467443558e-05,
      "loss": 2.6711,
      "step": 27106
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0237475633621216,
      "learning_rate": 1.4575948371126837e-05,
      "loss": 2.3893,
      "step": 27107
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.072949767112732,
      "learning_rate": 1.4575582267052646e-05,
      "loss": 2.4553,
      "step": 27108
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1017014980316162,
      "learning_rate": 1.4575216155221599e-05,
      "loss": 2.3069,
      "step": 27109
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1193987131118774,
      "learning_rate": 1.4574850035634315e-05,
      "loss": 2.4671,
      "step": 27110
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.9476757049560547,
      "learning_rate": 1.4574483908291421e-05,
      "loss": 2.3502,
      "step": 27111
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0119659900665283,
      "learning_rate": 1.4574117773193533e-05,
      "loss": 2.365,
      "step": 27112
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0955004692077637,
      "learning_rate": 1.4573751630341276e-05,
      "loss": 2.4879,
      "step": 27113
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0311254262924194,
      "learning_rate": 1.4573385479735262e-05,
      "loss": 2.373,
      "step": 27114
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0192550420761108,
      "learning_rate": 1.4573019321376121e-05,
      "loss": 2.2598,
      "step": 27115
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1287503242492676,
      "learning_rate": 1.4572653155264471e-05,
      "loss": 2.3382,
      "step": 27116
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0597132444381714,
      "learning_rate": 1.4572286981400929e-05,
      "loss": 2.242,
      "step": 27117
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1442241668701172,
      "learning_rate": 1.4571920799786122e-05,
      "loss": 2.5674,
      "step": 27118
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1065970659255981,
      "learning_rate": 1.4571554610420664e-05,
      "loss": 2.505,
      "step": 27119
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.2654787302017212,
      "learning_rate": 1.4571188413305182e-05,
      "loss": 2.5762,
      "step": 27120
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.094312310218811,
      "learning_rate": 1.4570822208440293e-05,
      "loss": 2.5221,
      "step": 27121
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0894094705581665,
      "learning_rate": 1.457045599582662e-05,
      "loss": 2.4171,
      "step": 27122
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0799188613891602,
      "learning_rate": 1.4570089775464782e-05,
      "loss": 2.3062,
      "step": 27123
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1071888208389282,
      "learning_rate": 1.4569723547355401e-05,
      "loss": 2.5171,
      "step": 27124
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1064151525497437,
      "learning_rate": 1.4569357311499098e-05,
      "loss": 2.5184,
      "step": 27125
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1170562505722046,
      "learning_rate": 1.456899106789649e-05,
      "loss": 2.415,
      "step": 27126
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1888257265090942,
      "learning_rate": 1.4568624816548203e-05,
      "loss": 2.473,
      "step": 27127
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.12997567653656,
      "learning_rate": 1.456825855745486e-05,
      "loss": 2.4187,
      "step": 27128
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1810890436172485,
      "learning_rate": 1.4567892290617073e-05,
      "loss": 2.2701,
      "step": 27129
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0171669721603394,
      "learning_rate": 1.4567526016035469e-05,
      "loss": 2.4749,
      "step": 27130
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1449178457260132,
      "learning_rate": 1.456715973371067e-05,
      "loss": 2.4447,
      "step": 27131
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0900187492370605,
      "learning_rate": 1.4566793443643293e-05,
      "loss": 2.3968,
      "step": 27132
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.988503098487854,
      "learning_rate": 1.4566427145833961e-05,
      "loss": 2.1209,
      "step": 27133
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.3089321851730347,
      "learning_rate": 1.4566060840283294e-05,
      "loss": 2.4993,
      "step": 27134
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.084640622138977,
      "learning_rate": 1.4565694526991916e-05,
      "loss": 2.3735,
      "step": 27135
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.086500883102417,
      "learning_rate": 1.4565328205960445e-05,
      "loss": 2.3692,
      "step": 27136
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0914039611816406,
      "learning_rate": 1.45649618771895e-05,
      "loss": 2.3316,
      "step": 27137
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1757925748825073,
      "learning_rate": 1.456459554067971e-05,
      "loss": 2.278,
      "step": 27138
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.028666377067566,
      "learning_rate": 1.4564229196431685e-05,
      "loss": 2.4654,
      "step": 27139
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0868390798568726,
      "learning_rate": 1.4563862844446057e-05,
      "loss": 2.4294,
      "step": 27140
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.126858115196228,
      "learning_rate": 1.4563496484723438e-05,
      "loss": 2.535,
      "step": 27141
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0029523372650146,
      "learning_rate": 1.4563130117264454e-05,
      "loss": 2.4684,
      "step": 27142
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1715855598449707,
      "learning_rate": 1.4562763742069728e-05,
      "loss": 2.3904,
      "step": 27143
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0400406122207642,
      "learning_rate": 1.4562397359139877e-05,
      "loss": 2.3639,
      "step": 27144
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.9738083481788635,
      "learning_rate": 1.4562030968475523e-05,
      "loss": 2.2505,
      "step": 27145
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1329565048217773,
      "learning_rate": 1.4561664570077285e-05,
      "loss": 2.5066,
      "step": 27146
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.027005910873413,
      "learning_rate": 1.456129816394579e-05,
      "loss": 2.2995,
      "step": 27147
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1784182786941528,
      "learning_rate": 1.4560931750081656e-05,
      "loss": 2.552,
      "step": 27148
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1208051443099976,
      "learning_rate": 1.4560565328485501e-05,
      "loss": 2.4899,
      "step": 27149
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.2277610301971436,
      "learning_rate": 1.4560198899157948e-05,
      "loss": 2.4942,
      "step": 27150
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0332300662994385,
      "learning_rate": 1.4559832462099624e-05,
      "loss": 2.4404,
      "step": 27151
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.083889365196228,
      "learning_rate": 1.4559466017311143e-05,
      "loss": 2.3697,
      "step": 27152
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1436232328414917,
      "learning_rate": 1.455909956479313e-05,
      "loss": 2.5238,
      "step": 27153
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.5035018920898438,
      "learning_rate": 1.4558733104546204e-05,
      "loss": 2.5336,
      "step": 27154
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0736345052719116,
      "learning_rate": 1.4558366636570986e-05,
      "loss": 2.468,
      "step": 27155
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.143441081047058,
      "learning_rate": 1.45580001608681e-05,
      "loss": 2.3676,
      "step": 27156
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.043704867362976,
      "learning_rate": 1.4557633677438165e-05,
      "loss": 2.5997,
      "step": 27157
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0720077753067017,
      "learning_rate": 1.4557267186281805e-05,
      "loss": 2.5314,
      "step": 27158
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1568635702133179,
      "learning_rate": 1.4556900687399637e-05,
      "loss": 2.4794,
      "step": 27159
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.9352864027023315,
      "learning_rate": 1.4556534180792286e-05,
      "loss": 2.6519,
      "step": 27160
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0479034185409546,
      "learning_rate": 1.4556167666460372e-05,
      "loss": 2.3104,
      "step": 27161
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1230874061584473,
      "learning_rate": 1.4555801144404516e-05,
      "loss": 2.2505,
      "step": 27162
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.996258556842804,
      "learning_rate": 1.455543461462534e-05,
      "loss": 2.0822,
      "step": 27163
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0502349138259888,
      "learning_rate": 1.4555068077123464e-05,
      "loss": 2.4746,
      "step": 27164
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.068419098854065,
      "learning_rate": 1.455470153189951e-05,
      "loss": 2.4252,
      "step": 27165
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0283657312393188,
      "learning_rate": 1.4554334978954104e-05,
      "loss": 2.376,
      "step": 27166
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0464578866958618,
      "learning_rate": 1.4553968418287859e-05,
      "loss": 2.3023,
      "step": 27167
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.9377939105033875,
      "learning_rate": 1.4553601849901404e-05,
      "loss": 2.383,
      "step": 27168
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1374938488006592,
      "learning_rate": 1.4553235273795354e-05,
      "loss": 2.5187,
      "step": 27169
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0628200769424438,
      "learning_rate": 1.4552868689970337e-05,
      "loss": 2.2416,
      "step": 27170
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0412009954452515,
      "learning_rate": 1.4552502098426965e-05,
      "loss": 2.3355,
      "step": 27171
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0535895824432373,
      "learning_rate": 1.4552135499165874e-05,
      "loss": 2.3324,
      "step": 27172
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.3019102811813354,
      "learning_rate": 1.4551768892187672e-05,
      "loss": 2.3248,
      "step": 27173
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.9650084376335144,
      "learning_rate": 1.4551402277492986e-05,
      "loss": 2.2189,
      "step": 27174
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0011316537857056,
      "learning_rate": 1.4551035655082438e-05,
      "loss": 2.4519,
      "step": 27175
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1136910915374756,
      "learning_rate": 1.4550669024956648e-05,
      "loss": 2.5356,
      "step": 27176
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.059242606163025,
      "learning_rate": 1.4550302387116235e-05,
      "loss": 2.4189,
      "step": 27177
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0947892665863037,
      "learning_rate": 1.4549935741561828e-05,
      "loss": 2.3224,
      "step": 27178
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.07207190990448,
      "learning_rate": 1.4549569088294039e-05,
      "loss": 2.2685,
      "step": 27179
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1503984928131104,
      "learning_rate": 1.4549202427313499e-05,
      "loss": 2.2048,
      "step": 27180
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.9637649655342102,
      "learning_rate": 1.4548835758620827e-05,
      "loss": 2.5698,
      "step": 27181
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0937392711639404,
      "learning_rate": 1.4548469082216641e-05,
      "loss": 2.2458,
      "step": 27182
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0296300649642944,
      "learning_rate": 1.4548102398101563e-05,
      "loss": 2.4778,
      "step": 27183
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1152832508087158,
      "learning_rate": 1.4547735706276216e-05,
      "loss": 2.236,
      "step": 27184
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0009926557540894,
      "learning_rate": 1.4547369006741221e-05,
      "loss": 2.2395,
      "step": 27185
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0871995687484741,
      "learning_rate": 1.4547002299497203e-05,
      "loss": 2.4341,
      "step": 27186
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0819810628890991,
      "learning_rate": 1.4546635584544784e-05,
      "loss": 2.3098,
      "step": 27187
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0616106986999512,
      "learning_rate": 1.4546268861884578e-05,
      "loss": 2.4093,
      "step": 27188
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0292792320251465,
      "learning_rate": 1.4545902131517213e-05,
      "loss": 2.3014,
      "step": 27189
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.9882727265357971,
      "learning_rate": 1.4545535393443308e-05,
      "loss": 2.4923,
      "step": 27190
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.9961086511611938,
      "learning_rate": 1.4545168647663487e-05,
      "loss": 2.3959,
      "step": 27191
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.9916508793830872,
      "learning_rate": 1.454480189417837e-05,
      "loss": 2.4266,
      "step": 27192
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0034537315368652,
      "learning_rate": 1.454443513298858e-05,
      "loss": 2.4054,
      "step": 27193
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0853315591812134,
      "learning_rate": 1.4544068364094742e-05,
      "loss": 2.4362,
      "step": 27194
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0915583372116089,
      "learning_rate": 1.4543701587497467e-05,
      "loss": 2.3004,
      "step": 27195
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.2173144817352295,
      "learning_rate": 1.454333480319739e-05,
      "loss": 2.4479,
      "step": 27196
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.330404281616211,
      "learning_rate": 1.4542968011195123e-05,
      "loss": 2.2061,
      "step": 27197
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1217302083969116,
      "learning_rate": 1.4542601211491289e-05,
      "loss": 2.4305,
      "step": 27198
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.9934119582176208,
      "learning_rate": 1.4542234404086518e-05,
      "loss": 2.4749,
      "step": 27199
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0166196823120117,
      "learning_rate": 1.4541867588981423e-05,
      "loss": 2.4141,
      "step": 27200
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0845446586608887,
      "learning_rate": 1.4541500766176628e-05,
      "loss": 2.5208,
      "step": 27201
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0991777181625366,
      "learning_rate": 1.4541133935672757e-05,
      "loss": 2.505,
      "step": 27202
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1291759014129639,
      "learning_rate": 1.4540767097470432e-05,
      "loss": 2.4507,
      "step": 27203
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.2957680225372314,
      "learning_rate": 1.4540400251570273e-05,
      "loss": 2.4808,
      "step": 27204
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1890723705291748,
      "learning_rate": 1.45400333979729e-05,
      "loss": 2.4863,
      "step": 27205
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0545521974563599,
      "learning_rate": 1.453966653667894e-05,
      "loss": 2.3571,
      "step": 27206
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.9651991128921509,
      "learning_rate": 1.453929966768901e-05,
      "loss": 2.3894,
      "step": 27207
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1308218240737915,
      "learning_rate": 1.4538932791003737e-05,
      "loss": 2.5454,
      "step": 27208
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.998606264591217,
      "learning_rate": 1.4538565906623739e-05,
      "loss": 2.5465,
      "step": 27209
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.165845274925232,
      "learning_rate": 1.4538199014549639e-05,
      "loss": 2.6621,
      "step": 27210
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.9798609018325806,
      "learning_rate": 1.4537832114782057e-05,
      "loss": 2.4578,
      "step": 27211
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.3271151781082153,
      "learning_rate": 1.4537465207321617e-05,
      "loss": 2.3221,
      "step": 27212
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.2136934995651245,
      "learning_rate": 1.4537098292168943e-05,
      "loss": 2.2081,
      "step": 27213
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0864074230194092,
      "learning_rate": 1.4536731369324657e-05,
      "loss": 2.4295,
      "step": 27214
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.018516182899475,
      "learning_rate": 1.4536364438789377e-05,
      "loss": 2.3843,
      "step": 27215
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.9611961245536804,
      "learning_rate": 1.4535997500563728e-05,
      "loss": 2.4512,
      "step": 27216
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.059731364250183,
      "learning_rate": 1.453563055464833e-05,
      "loss": 2.4963,
      "step": 27217
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0078811645507812,
      "learning_rate": 1.4535263601043807e-05,
      "loss": 2.3706,
      "step": 27218
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0810843706130981,
      "learning_rate": 1.453489663975078e-05,
      "loss": 2.6093,
      "step": 27219
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0992971658706665,
      "learning_rate": 1.4534529670769874e-05,
      "loss": 2.3372,
      "step": 27220
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.2732603549957275,
      "learning_rate": 1.4534162694101706e-05,
      "loss": 2.3535,
      "step": 27221
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1668287515640259,
      "learning_rate": 1.45337957097469e-05,
      "loss": 2.4605,
      "step": 27222
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0116641521453857,
      "learning_rate": 1.4533428717706082e-05,
      "loss": 2.3746,
      "step": 27223
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1667031049728394,
      "learning_rate": 1.4533061717979868e-05,
      "loss": 2.6105,
      "step": 27224
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.101739764213562,
      "learning_rate": 1.4532694710568885e-05,
      "loss": 2.3753,
      "step": 27225
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.058969259262085,
      "learning_rate": 1.453232769547375e-05,
      "loss": 2.3388,
      "step": 27226
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1997061967849731,
      "learning_rate": 1.4531960672695096e-05,
      "loss": 2.7578,
      "step": 27227
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0397282838821411,
      "learning_rate": 1.4531593642233532e-05,
      "loss": 2.4151,
      "step": 27228
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1826107501983643,
      "learning_rate": 1.4531226604089688e-05,
      "loss": 2.3316,
      "step": 27229
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0882790088653564,
      "learning_rate": 1.4530859558264184e-05,
      "loss": 2.2927,
      "step": 27230
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.10942804813385,
      "learning_rate": 1.4530492504757642e-05,
      "loss": 2.4077,
      "step": 27231
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0687360763549805,
      "learning_rate": 1.4530125443570687e-05,
      "loss": 2.5123,
      "step": 27232
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.991854727268219,
      "learning_rate": 1.4529758374703935e-05,
      "loss": 2.2964,
      "step": 27233
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0015591382980347,
      "learning_rate": 1.4529391298158015e-05,
      "loss": 2.542,
      "step": 27234
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0200453996658325,
      "learning_rate": 1.4529024213933546e-05,
      "loss": 2.5837,
      "step": 27235
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1452279090881348,
      "learning_rate": 1.4528657122031152e-05,
      "loss": 2.2957,
      "step": 27236
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.9699115753173828,
      "learning_rate": 1.4528290022451452e-05,
      "loss": 2.5736,
      "step": 27237
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0734754800796509,
      "learning_rate": 1.452792291519507e-05,
      "loss": 2.2536,
      "step": 27238
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0575737953186035,
      "learning_rate": 1.4527555800262632e-05,
      "loss": 2.4021,
      "step": 27239
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.026870608329773,
      "learning_rate": 1.4527188677654755e-05,
      "loss": 2.3681,
      "step": 27240
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0305966138839722,
      "learning_rate": 1.4526821547372066e-05,
      "loss": 2.4336,
      "step": 27241
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0271515846252441,
      "learning_rate": 1.452645440941518e-05,
      "loss": 2.4613,
      "step": 27242
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0927244424819946,
      "learning_rate": 1.452608726378473e-05,
      "loss": 2.4682,
      "step": 27243
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0707066059112549,
      "learning_rate": 1.452572011048133e-05,
      "loss": 2.3683,
      "step": 27244
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.018754005432129,
      "learning_rate": 1.4525352949505604e-05,
      "loss": 2.2813,
      "step": 27245
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.323868751525879,
      "learning_rate": 1.4524985780858178e-05,
      "loss": 2.5098,
      "step": 27246
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.134169578552246,
      "learning_rate": 1.4524618604539673e-05,
      "loss": 2.5317,
      "step": 27247
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.9807707071304321,
      "learning_rate": 1.4524251420550708e-05,
      "loss": 2.4765,
      "step": 27248
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0770567655563354,
      "learning_rate": 1.4523884228891909e-05,
      "loss": 2.3355,
      "step": 27249
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1999187469482422,
      "learning_rate": 1.4523517029563898e-05,
      "loss": 2.4577,
      "step": 27250
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0531777143478394,
      "learning_rate": 1.4523149822567297e-05,
      "loss": 2.4826,
      "step": 27251
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.2102651596069336,
      "learning_rate": 1.4522782607902727e-05,
      "loss": 2.3885,
      "step": 27252
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.9451062083244324,
      "learning_rate": 1.4522415385570817e-05,
      "loss": 2.2589,
      "step": 27253
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0306203365325928,
      "learning_rate": 1.4522048155572179e-05,
      "loss": 2.4121,
      "step": 27254
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0231525897979736,
      "learning_rate": 1.4521680917907444e-05,
      "loss": 2.2856,
      "step": 27255
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1204677820205688,
      "learning_rate": 1.4521313672577232e-05,
      "loss": 2.4907,
      "step": 27256
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0743227005004883,
      "learning_rate": 1.4520946419582162e-05,
      "loss": 2.1332,
      "step": 27257
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.065974473953247,
      "learning_rate": 1.4520579158922865e-05,
      "loss": 2.3858,
      "step": 27258
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1198071241378784,
      "learning_rate": 1.4520211890599955e-05,
      "loss": 2.0928,
      "step": 27259
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0864593982696533,
      "learning_rate": 1.4519844614614059e-05,
      "loss": 2.2948,
      "step": 27260
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.9670999050140381,
      "learning_rate": 1.4519477330965799e-05,
      "loss": 2.3549,
      "step": 27261
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0254228115081787,
      "learning_rate": 1.45191100396558e-05,
      "loss": 2.4529,
      "step": 27262
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0704126358032227,
      "learning_rate": 1.4518742740684676e-05,
      "loss": 2.4295,
      "step": 27263
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.157976508140564,
      "learning_rate": 1.451837543405306e-05,
      "loss": 2.4165,
      "step": 27264
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0579547882080078,
      "learning_rate": 1.451800811976157e-05,
      "loss": 2.6542,
      "step": 27265
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.9431766271591187,
      "learning_rate": 1.451764079781083e-05,
      "loss": 2.554,
      "step": 27266
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1029976606369019,
      "learning_rate": 1.451727346820146e-05,
      "loss": 2.299,
      "step": 27267
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0322831869125366,
      "learning_rate": 1.4516906130934088e-05,
      "loss": 2.2852,
      "step": 27268
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0059666633605957,
      "learning_rate": 1.4516538786009329e-05,
      "loss": 2.1656,
      "step": 27269
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0641931295394897,
      "learning_rate": 1.451617143342781e-05,
      "loss": 2.3074,
      "step": 27270
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0644928216934204,
      "learning_rate": 1.4515804073190155e-05,
      "loss": 2.5022,
      "step": 27271
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0195307731628418,
      "learning_rate": 1.4515436705296988e-05,
      "loss": 2.3385,
      "step": 27272
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0165774822235107,
      "learning_rate": 1.4515069329748928e-05,
      "loss": 2.4243,
      "step": 27273
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.072849154472351,
      "learning_rate": 1.4514701946546599e-05,
      "loss": 2.3739,
      "step": 27274
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1597278118133545,
      "learning_rate": 1.4514334555690624e-05,
      "loss": 2.505,
      "step": 27275
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.2484757900238037,
      "learning_rate": 1.4513967157181626e-05,
      "loss": 2.394,
      "step": 27276
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.2769325971603394,
      "learning_rate": 1.4513599751020226e-05,
      "loss": 2.4705,
      "step": 27277
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1206923723220825,
      "learning_rate": 1.451323233720705e-05,
      "loss": 2.5902,
      "step": 27278
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.151208758354187,
      "learning_rate": 1.451286491574272e-05,
      "loss": 2.4778,
      "step": 27279
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0576345920562744,
      "learning_rate": 1.4512497486627859e-05,
      "loss": 2.4165,
      "step": 27280
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.075183391571045,
      "learning_rate": 1.4512130049863086e-05,
      "loss": 2.2807,
      "step": 27281
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.979627251625061,
      "learning_rate": 1.4511762605449029e-05,
      "loss": 2.5617,
      "step": 27282
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0242828130722046,
      "learning_rate": 1.4511395153386309e-05,
      "loss": 2.3417,
      "step": 27283
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.9862294793128967,
      "learning_rate": 1.4511027693675548e-05,
      "loss": 2.2968,
      "step": 27284
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.107767105102539,
      "learning_rate": 1.451066022631737e-05,
      "loss": 2.4582,
      "step": 27285
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.2980161905288696,
      "learning_rate": 1.4510292751312402e-05,
      "loss": 2.4516,
      "step": 27286
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1138728857040405,
      "learning_rate": 1.4509925268661258e-05,
      "loss": 2.4103,
      "step": 27287
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0698935985565186,
      "learning_rate": 1.4509557778364567e-05,
      "loss": 2.4842,
      "step": 27288
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.154024362564087,
      "learning_rate": 1.4509190280422952e-05,
      "loss": 2.7926,
      "step": 27289
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.064951777458191,
      "learning_rate": 1.4508822774837035e-05,
      "loss": 2.4514,
      "step": 27290
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.9682998061180115,
      "learning_rate": 1.4508455261607437e-05,
      "loss": 2.4496,
      "step": 27291
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0778813362121582,
      "learning_rate": 1.4508087740734782e-05,
      "loss": 2.1115,
      "step": 27292
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.029010534286499,
      "learning_rate": 1.4507720212219698e-05,
      "loss": 2.4359,
      "step": 27293
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0372639894485474,
      "learning_rate": 1.4507352676062802e-05,
      "loss": 2.5221,
      "step": 27294
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.039512038230896,
      "learning_rate": 1.4506985132264717e-05,
      "loss": 2.5321,
      "step": 27295
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.8676610589027405,
      "learning_rate": 1.450661758082607e-05,
      "loss": 2.1936,
      "step": 27296
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0417137145996094,
      "learning_rate": 1.4506250021747481e-05,
      "loss": 2.5798,
      "step": 27297
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0748178958892822,
      "learning_rate": 1.4505882455029574e-05,
      "loss": 2.4411,
      "step": 27298
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0488438606262207,
      "learning_rate": 1.4505514880672975e-05,
      "loss": 2.4435,
      "step": 27299
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.9379796981811523,
      "learning_rate": 1.4505147298678305e-05,
      "loss": 2.4595,
      "step": 27300
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.9980201125144958,
      "learning_rate": 1.4504779709046184e-05,
      "loss": 2.5399,
      "step": 27301
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0974032878875732,
      "learning_rate": 1.4504412111777237e-05,
      "loss": 2.4139,
      "step": 27302
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.970562756061554,
      "learning_rate": 1.450404450687209e-05,
      "loss": 2.6086,
      "step": 27303
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.9583064913749695,
      "learning_rate": 1.4503676894331364e-05,
      "loss": 2.3289,
      "step": 27304
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0268410444259644,
      "learning_rate": 1.4503309274155681e-05,
      "loss": 2.3529,
      "step": 27305
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.2226766347885132,
      "learning_rate": 1.450294164634567e-05,
      "loss": 2.3793,
      "step": 27306
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1878453493118286,
      "learning_rate": 1.4502574010901947e-05,
      "loss": 2.4944,
      "step": 27307
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.9451116323471069,
      "learning_rate": 1.4502206367825138e-05,
      "loss": 2.3519,
      "step": 27308
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0482454299926758,
      "learning_rate": 1.4501838717115865e-05,
      "loss": 2.525,
      "step": 27309
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1327626705169678,
      "learning_rate": 1.4501471058774756e-05,
      "loss": 2.4403,
      "step": 27310
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0445897579193115,
      "learning_rate": 1.450110339280243e-05,
      "loss": 2.2438,
      "step": 27311
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0076277256011963,
      "learning_rate": 1.4500735719199508e-05,
      "loss": 2.461,
      "step": 27312
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0121468305587769,
      "learning_rate": 1.450036803796662e-05,
      "loss": 2.3742,
      "step": 27313
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0229918956756592,
      "learning_rate": 1.4500000349104389e-05,
      "loss": 2.4811,
      "step": 27314
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1128298044204712,
      "learning_rate": 1.449963265261343e-05,
      "loss": 2.3957,
      "step": 27315
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0635632276535034,
      "learning_rate": 1.4499264948494372e-05,
      "loss": 2.427,
      "step": 27316
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.046622395515442,
      "learning_rate": 1.449889723674784e-05,
      "loss": 2.3584,
      "step": 27317
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.18500816822052,
      "learning_rate": 1.4498529517374453e-05,
      "loss": 2.3277,
      "step": 27318
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1229215860366821,
      "learning_rate": 1.4498161790374838e-05,
      "loss": 2.4952,
      "step": 27319
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1986894607543945,
      "learning_rate": 1.4497794055749617e-05,
      "loss": 2.3872,
      "step": 27320
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0806005001068115,
      "learning_rate": 1.4497426313499417e-05,
      "loss": 2.4137,
      "step": 27321
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0822508335113525,
      "learning_rate": 1.4497058563624853e-05,
      "loss": 2.3484,
      "step": 27322
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0920979976654053,
      "learning_rate": 1.4496690806126554e-05,
      "loss": 2.5881,
      "step": 27323
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0678272247314453,
      "learning_rate": 1.4496323041005145e-05,
      "loss": 2.3173,
      "step": 27324
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.9901857972145081,
      "learning_rate": 1.4495955268261243e-05,
      "loss": 2.239,
      "step": 27325
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0180647373199463,
      "learning_rate": 1.4495587487895482e-05,
      "loss": 2.3395,
      "step": 27326
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0220471620559692,
      "learning_rate": 1.4495219699908476e-05,
      "loss": 2.2931,
      "step": 27327
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.9867969751358032,
      "learning_rate": 1.449485190430085e-05,
      "loss": 2.6327,
      "step": 27328
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0800169706344604,
      "learning_rate": 1.4494484101073232e-05,
      "loss": 2.3871,
      "step": 27329
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0383446216583252,
      "learning_rate": 1.4494116290226239e-05,
      "loss": 2.5342,
      "step": 27330
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.025532841682434,
      "learning_rate": 1.44937484717605e-05,
      "loss": 2.379,
      "step": 27331
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0907727479934692,
      "learning_rate": 1.449338064567664e-05,
      "loss": 2.2303,
      "step": 27332
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.3207404613494873,
      "learning_rate": 1.4493012811975278e-05,
      "loss": 2.348,
      "step": 27333
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.9894331693649292,
      "learning_rate": 1.4492644970657038e-05,
      "loss": 2.5092,
      "step": 27334
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0249760150909424,
      "learning_rate": 1.4492277121722542e-05,
      "loss": 2.3299,
      "step": 27335
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1643028259277344,
      "learning_rate": 1.449190926517242e-05,
      "loss": 2.2657,
      "step": 27336
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.3430569171905518,
      "learning_rate": 1.449154140100729e-05,
      "loss": 2.399,
      "step": 27337
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0572348833084106,
      "learning_rate": 1.4491173529227778e-05,
      "loss": 2.2796,
      "step": 27338
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0793476104736328,
      "learning_rate": 1.449080564983451e-05,
      "loss": 2.5795,
      "step": 27339
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0301008224487305,
      "learning_rate": 1.4490437762828102e-05,
      "loss": 2.5668,
      "step": 27340
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0079680681228638,
      "learning_rate": 1.4490069868209185e-05,
      "loss": 2.3502,
      "step": 27341
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0200713872909546,
      "learning_rate": 1.4489701965978378e-05,
      "loss": 2.3143,
      "step": 27342
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.2289906740188599,
      "learning_rate": 1.4489334056136309e-05,
      "loss": 2.4515,
      "step": 27343
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0696390867233276,
      "learning_rate": 1.4488966138683598e-05,
      "loss": 2.4079,
      "step": 27344
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.9596546292304993,
      "learning_rate": 1.4488598213620872e-05,
      "loss": 2.2014,
      "step": 27345
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0838193893432617,
      "learning_rate": 1.4488230280948752e-05,
      "loss": 2.4635,
      "step": 27346
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1260603666305542,
      "learning_rate": 1.4487862340667863e-05,
      "loss": 2.3157,
      "step": 27347
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0555353164672852,
      "learning_rate": 1.4487494392778827e-05,
      "loss": 2.481,
      "step": 27348
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.024167776107788,
      "learning_rate": 1.4487126437282274e-05,
      "loss": 2.3205,
      "step": 27349
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0295851230621338,
      "learning_rate": 1.4486758474178817e-05,
      "loss": 2.4073,
      "step": 27350
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0023161172866821,
      "learning_rate": 1.4486390503469088e-05,
      "loss": 2.2351,
      "step": 27351
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.9875010251998901,
      "learning_rate": 1.4486022525153709e-05,
      "loss": 2.4381,
      "step": 27352
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.049298644065857,
      "learning_rate": 1.4485654539233302e-05,
      "loss": 2.4136,
      "step": 27353
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.9753239750862122,
      "learning_rate": 1.4485286545708495e-05,
      "loss": 2.2234,
      "step": 27354
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0964797735214233,
      "learning_rate": 1.4484918544579906e-05,
      "loss": 2.1531,
      "step": 27355
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1147147417068481,
      "learning_rate": 1.4484550535848164e-05,
      "loss": 2.2688,
      "step": 27356
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.132973074913025,
      "learning_rate": 1.4484182519513889e-05,
      "loss": 2.3825,
      "step": 27357
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.006052017211914,
      "learning_rate": 1.4483814495577709e-05,
      "loss": 2.4766,
      "step": 27358
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.013788104057312,
      "learning_rate": 1.4483446464040244e-05,
      "loss": 2.252,
      "step": 27359
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0057761669158936,
      "learning_rate": 1.4483078424902121e-05,
      "loss": 2.4895,
      "step": 27360
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0923192501068115,
      "learning_rate": 1.448271037816396e-05,
      "loss": 2.1921,
      "step": 27361
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0111286640167236,
      "learning_rate": 1.4482342323826391e-05,
      "loss": 2.4646,
      "step": 27362
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0944262742996216,
      "learning_rate": 1.4481974261890031e-05,
      "loss": 2.4708,
      "step": 27363
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0002381801605225,
      "learning_rate": 1.4481606192355509e-05,
      "loss": 2.193,
      "step": 27364
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1112053394317627,
      "learning_rate": 1.4481238115223449e-05,
      "loss": 2.5173,
      "step": 27365
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.140610694885254,
      "learning_rate": 1.4480870030494472e-05,
      "loss": 2.3944,
      "step": 27366
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.368112564086914,
      "learning_rate": 1.44805019381692e-05,
      "loss": 2.3041,
      "step": 27367
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.139883041381836,
      "learning_rate": 1.4480133838248263e-05,
      "loss": 2.399,
      "step": 27368
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1378803253173828,
      "learning_rate": 1.4479765730732282e-05,
      "loss": 2.3254,
      "step": 27369
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0612741708755493,
      "learning_rate": 1.4479397615621882e-05,
      "loss": 2.1344,
      "step": 27370
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.9936883449554443,
      "learning_rate": 1.4479029492917687e-05,
      "loss": 2.2651,
      "step": 27371
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0315442085266113,
      "learning_rate": 1.4478661362620321e-05,
      "loss": 2.2708,
      "step": 27372
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1386408805847168,
      "learning_rate": 1.4478293224730407e-05,
      "loss": 2.2463,
      "step": 27373
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0458061695098877,
      "learning_rate": 1.4477925079248568e-05,
      "loss": 2.2991,
      "step": 27374
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0724478960037231,
      "learning_rate": 1.447755692617543e-05,
      "loss": 2.4963,
      "step": 27375
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.2657275199890137,
      "learning_rate": 1.4477188765511621e-05,
      "loss": 2.2888,
      "step": 27376
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0294197797775269,
      "learning_rate": 1.4476820597257757e-05,
      "loss": 2.3697,
      "step": 27377
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0101337432861328,
      "learning_rate": 1.4476452421414466e-05,
      "loss": 2.4765,
      "step": 27378
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0305997133255005,
      "learning_rate": 1.4476084237982373e-05,
      "loss": 2.184,
      "step": 27379
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0229544639587402,
      "learning_rate": 1.4475716046962105e-05,
      "loss": 2.2481,
      "step": 27380
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1347894668579102,
      "learning_rate": 1.4475347848354278e-05,
      "loss": 2.4687,
      "step": 27381
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1415201425552368,
      "learning_rate": 1.4474979642159523e-05,
      "loss": 2.5418,
      "step": 27382
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0579833984375,
      "learning_rate": 1.447461142837846e-05,
      "loss": 2.4631,
      "step": 27383
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.016070008277893,
      "learning_rate": 1.4474243207011717e-05,
      "loss": 2.2279,
      "step": 27384
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1399770975112915,
      "learning_rate": 1.447387497805992e-05,
      "loss": 2.3515,
      "step": 27385
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.001786470413208,
      "learning_rate": 1.4473506741523683e-05,
      "loss": 2.5461,
      "step": 27386
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0192830562591553,
      "learning_rate": 1.4473138497403643e-05,
      "loss": 2.3442,
      "step": 27387
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1008204221725464,
      "learning_rate": 1.4472770245700413e-05,
      "loss": 2.4499,
      "step": 27388
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.962324321269989,
      "learning_rate": 1.4472401986414628e-05,
      "loss": 2.3903,
      "step": 27389
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0034078359603882,
      "learning_rate": 1.4472033719546903e-05,
      "loss": 2.4544,
      "step": 27390
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.9870617985725403,
      "learning_rate": 1.4471665445097867e-05,
      "loss": 2.3199,
      "step": 27391
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.2451199293136597,
      "learning_rate": 1.4471297163068147e-05,
      "loss": 2.2393,
      "step": 27392
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0233526229858398,
      "learning_rate": 1.4470928873458362e-05,
      "loss": 2.0917,
      "step": 27393
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0670220851898193,
      "learning_rate": 1.4470560576269136e-05,
      "loss": 2.3614,
      "step": 27394
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.175820231437683,
      "learning_rate": 1.4470192271501096e-05,
      "loss": 2.3046,
      "step": 27395
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1122627258300781,
      "learning_rate": 1.4469823959154867e-05,
      "loss": 2.4592,
      "step": 27396
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0286060571670532,
      "learning_rate": 1.4469455639231074e-05,
      "loss": 2.1931,
      "step": 27397
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1270672082901,
      "learning_rate": 1.4469087311730338e-05,
      "loss": 2.4023,
      "step": 27398
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.970491886138916,
      "learning_rate": 1.4468718976653286e-05,
      "loss": 2.3611,
      "step": 27399
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1206135749816895,
      "learning_rate": 1.4468350634000541e-05,
      "loss": 2.4252,
      "step": 27400
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.029049038887024,
      "learning_rate": 1.4467982283772727e-05,
      "loss": 2.4371,
      "step": 27401
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0820640325546265,
      "learning_rate": 1.4467613925970473e-05,
      "loss": 2.346,
      "step": 27402
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1229203939437866,
      "learning_rate": 1.4467245560594396e-05,
      "loss": 2.4062,
      "step": 27403
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1686034202575684,
      "learning_rate": 1.4466877187645127e-05,
      "loss": 2.3079,
      "step": 27404
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.9929823279380798,
      "learning_rate": 1.4466508807123287e-05,
      "loss": 2.5137,
      "step": 27405
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.095099687576294,
      "learning_rate": 1.4466140419029503e-05,
      "loss": 2.461,
      "step": 27406
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1036369800567627,
      "learning_rate": 1.4465772023364396e-05,
      "loss": 2.4842,
      "step": 27407
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1852115392684937,
      "learning_rate": 1.4465403620128594e-05,
      "loss": 2.5043,
      "step": 27408
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0809646844863892,
      "learning_rate": 1.446503520932272e-05,
      "loss": 2.5973,
      "step": 27409
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1344164609909058,
      "learning_rate": 1.4464666790947397e-05,
      "loss": 2.363,
      "step": 27410
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0551068782806396,
      "learning_rate": 1.4464298365003254e-05,
      "loss": 2.4952,
      "step": 27411
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0304334163665771,
      "learning_rate": 1.446392993149091e-05,
      "loss": 2.3284,
      "step": 27412
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.071021318435669,
      "learning_rate": 1.4463561490410994e-05,
      "loss": 2.298,
      "step": 27413
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0830190181732178,
      "learning_rate": 1.4463193041764128e-05,
      "loss": 2.5455,
      "step": 27414
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.9937038421630859,
      "learning_rate": 1.446282458555094e-05,
      "loss": 2.3488,
      "step": 27415
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0337570905685425,
      "learning_rate": 1.4462456121772049e-05,
      "loss": 2.1952,
      "step": 27416
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1160537004470825,
      "learning_rate": 1.4462087650428084e-05,
      "loss": 2.2567,
      "step": 27417
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.9674677848815918,
      "learning_rate": 1.446171917151967e-05,
      "loss": 2.54,
      "step": 27418
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0390641689300537,
      "learning_rate": 1.4461350685047429e-05,
      "loss": 2.4524,
      "step": 27419
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.05540132522583,
      "learning_rate": 1.4460982191011986e-05,
      "loss": 2.5269,
      "step": 27420
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1669799089431763,
      "learning_rate": 1.4460613689413967e-05,
      "loss": 2.6132,
      "step": 27421
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0247479677200317,
      "learning_rate": 1.4460245180253996e-05,
      "loss": 2.6148,
      "step": 27422
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1421903371810913,
      "learning_rate": 1.4459876663532696e-05,
      "loss": 2.2095,
      "step": 27423
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0649086236953735,
      "learning_rate": 1.44595081392507e-05,
      "loss": 2.5161,
      "step": 27424
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0952370166778564,
      "learning_rate": 1.4459139607408623e-05,
      "loss": 2.58,
      "step": 27425
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0152313709259033,
      "learning_rate": 1.4458771068007093e-05,
      "loss": 2.3663,
      "step": 27426
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.051721215248108,
      "learning_rate": 1.4458402521046735e-05,
      "loss": 2.2558,
      "step": 27427
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.03188157081604,
      "learning_rate": 1.4458033966528173e-05,
      "loss": 2.3235,
      "step": 27428
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.9703969955444336,
      "learning_rate": 1.4457665404452035e-05,
      "loss": 2.7351,
      "step": 27429
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.040410041809082,
      "learning_rate": 1.4457296834818941e-05,
      "loss": 2.5119,
      "step": 27430
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0388922691345215,
      "learning_rate": 1.4456928257629522e-05,
      "loss": 2.425,
      "step": 27431
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0575953722000122,
      "learning_rate": 1.4456559672884396e-05,
      "loss": 2.3409,
      "step": 27432
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0503833293914795,
      "learning_rate": 1.4456191080584191e-05,
      "loss": 2.4653,
      "step": 27433
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1063328981399536,
      "learning_rate": 1.4455822480729532e-05,
      "loss": 2.3438,
      "step": 27434
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0400619506835938,
      "learning_rate": 1.4455453873321046e-05,
      "loss": 2.3606,
      "step": 27435
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.9642828106880188,
      "learning_rate": 1.4455085258359354e-05,
      "loss": 2.1956,
      "step": 27436
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.041688084602356,
      "learning_rate": 1.4454716635845083e-05,
      "loss": 2.35,
      "step": 27437
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.060289978981018,
      "learning_rate": 1.4454348005778855e-05,
      "loss": 2.2998,
      "step": 27438
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0665308237075806,
      "learning_rate": 1.4453979368161302e-05,
      "loss": 2.32,
      "step": 27439
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.9876610040664673,
      "learning_rate": 1.4453610722993042e-05,
      "loss": 2.5296,
      "step": 27440
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0064716339111328,
      "learning_rate": 1.44532420702747e-05,
      "loss": 2.4023,
      "step": 27441
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0716307163238525,
      "learning_rate": 1.4452873410006904e-05,
      "loss": 2.3613,
      "step": 27442
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.159690022468567,
      "learning_rate": 1.4452504742190279e-05,
      "loss": 2.3199,
      "step": 27443
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0466598272323608,
      "learning_rate": 1.445213606682545e-05,
      "loss": 2.0695,
      "step": 27444
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.052088737487793,
      "learning_rate": 1.445176738391304e-05,
      "loss": 2.419,
      "step": 27445
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.9925333857536316,
      "learning_rate": 1.4451398693453675e-05,
      "loss": 2.2464,
      "step": 27446
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0489894151687622,
      "learning_rate": 1.445102999544798e-05,
      "loss": 2.2976,
      "step": 27447
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0704470872879028,
      "learning_rate": 1.4450661289896584e-05,
      "loss": 2.394,
      "step": 27448
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1199543476104736,
      "learning_rate": 1.4450292576800102e-05,
      "loss": 2.3673,
      "step": 27449
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.035309910774231,
      "learning_rate": 1.444992385615917e-05,
      "loss": 2.3029,
      "step": 27450
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0671703815460205,
      "learning_rate": 1.4449555127974408e-05,
      "loss": 2.4236,
      "step": 27451
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0760297775268555,
      "learning_rate": 1.4449186392246442e-05,
      "loss": 2.3774,
      "step": 27452
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.9683323502540588,
      "learning_rate": 1.4448817648975895e-05,
      "loss": 2.3644,
      "step": 27453
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0976881980895996,
      "learning_rate": 1.4448448898163395e-05,
      "loss": 2.4289,
      "step": 27454
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1461503505706787,
      "learning_rate": 1.4448080139809564e-05,
      "loss": 2.1233,
      "step": 27455
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.2210328578948975,
      "learning_rate": 1.444771137391503e-05,
      "loss": 2.392,
      "step": 27456
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.291366457939148,
      "learning_rate": 1.4447342600480416e-05,
      "loss": 2.4179,
      "step": 27457
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1425188779830933,
      "learning_rate": 1.4446973819506354e-05,
      "loss": 2.4632,
      "step": 27458
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1207025051116943,
      "learning_rate": 1.4446605030993458e-05,
      "loss": 2.5743,
      "step": 27459
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1807957887649536,
      "learning_rate": 1.444623623494236e-05,
      "loss": 2.3351,
      "step": 27460
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.151383876800537,
      "learning_rate": 1.4445867431353685e-05,
      "loss": 2.3984,
      "step": 27461
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.8856905102729797,
      "learning_rate": 1.4445498620228055e-05,
      "loss": 2.3533,
      "step": 27462
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.2611544132232666,
      "learning_rate": 1.4445129801566102e-05,
      "loss": 2.3167,
      "step": 27463
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0181411504745483,
      "learning_rate": 1.4444760975368444e-05,
      "loss": 2.4975,
      "step": 27464
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.9813094139099121,
      "learning_rate": 1.4444392141635706e-05,
      "loss": 2.298,
      "step": 27465
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0973244905471802,
      "learning_rate": 1.4444023300368519e-05,
      "loss": 2.2203,
      "step": 27466
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.9976509809494019,
      "learning_rate": 1.4443654451567507e-05,
      "loss": 2.4029,
      "step": 27467
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0571521520614624,
      "learning_rate": 1.4443285595233291e-05,
      "loss": 2.4082,
      "step": 27468
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.9640374183654785,
      "learning_rate": 1.4442916731366503e-05,
      "loss": 2.313,
      "step": 27469
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0996638536453247,
      "learning_rate": 1.4442547859967762e-05,
      "loss": 2.4813,
      "step": 27470
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.047042727470398,
      "learning_rate": 1.4442178981037695e-05,
      "loss": 2.1684,
      "step": 27471
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1026850938796997,
      "learning_rate": 1.444181009457693e-05,
      "loss": 2.32,
      "step": 27472
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.078131079673767,
      "learning_rate": 1.4441441200586088e-05,
      "loss": 2.3681,
      "step": 27473
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0541043281555176,
      "learning_rate": 1.4441072299065798e-05,
      "loss": 2.2544,
      "step": 27474
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0630075931549072,
      "learning_rate": 1.4440703390016685e-05,
      "loss": 2.4608,
      "step": 27475
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.9958738684654236,
      "learning_rate": 1.4440334473439372e-05,
      "loss": 2.4408,
      "step": 27476
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0933935642242432,
      "learning_rate": 1.443996554933449e-05,
      "loss": 2.2518,
      "step": 27477
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.9947675466537476,
      "learning_rate": 1.4439596617702658e-05,
      "loss": 2.3302,
      "step": 27478
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1382534503936768,
      "learning_rate": 1.4439227678544504e-05,
      "loss": 2.4903,
      "step": 27479
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1230133771896362,
      "learning_rate": 1.4438858731860653e-05,
      "loss": 2.4443,
      "step": 27480
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1034753322601318,
      "learning_rate": 1.443848977765173e-05,
      "loss": 2.3244,
      "step": 27481
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.9379943609237671,
      "learning_rate": 1.4438120815918362e-05,
      "loss": 2.4694,
      "step": 27482
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0915261507034302,
      "learning_rate": 1.4437751846661173e-05,
      "loss": 2.2138,
      "step": 27483
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0095131397247314,
      "learning_rate": 1.4437382869880794e-05,
      "loss": 2.3775,
      "step": 27484
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.994617223739624,
      "learning_rate": 1.4437013885577842e-05,
      "loss": 2.4087,
      "step": 27485
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.2049940824508667,
      "learning_rate": 1.4436644893752945e-05,
      "loss": 2.4503,
      "step": 27486
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.066763997077942,
      "learning_rate": 1.443627589440673e-05,
      "loss": 2.3375,
      "step": 27487
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.2225064039230347,
      "learning_rate": 1.4435906887539826e-05,
      "loss": 2.4785,
      "step": 27488
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0570067167282104,
      "learning_rate": 1.4435537873152852e-05,
      "loss": 2.4581,
      "step": 27489
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0769768953323364,
      "learning_rate": 1.4435168851246438e-05,
      "loss": 2.3271,
      "step": 27490
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.9878968596458435,
      "learning_rate": 1.443479982182121e-05,
      "loss": 2.5074,
      "step": 27491
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0829787254333496,
      "learning_rate": 1.4434430784877789e-05,
      "loss": 2.4546,
      "step": 27492
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.9855023622512817,
      "learning_rate": 1.4434061740416805e-05,
      "loss": 2.4062,
      "step": 27493
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.9775480628013611,
      "learning_rate": 1.4433692688438883e-05,
      "loss": 2.5851,
      "step": 27494
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0996912717819214,
      "learning_rate": 1.4433323628944646e-05,
      "loss": 2.3914,
      "step": 27495
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1049975156784058,
      "learning_rate": 1.443295456193472e-05,
      "loss": 2.4552,
      "step": 27496
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1093144416809082,
      "learning_rate": 1.4432585487409734e-05,
      "loss": 2.2527,
      "step": 27497
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0771002769470215,
      "learning_rate": 1.4432216405370313e-05,
      "loss": 2.5305,
      "step": 27498
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0540533065795898,
      "learning_rate": 1.443184731581708e-05,
      "loss": 2.1905,
      "step": 27499
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1077402830123901,
      "learning_rate": 1.443147821875066e-05,
      "loss": 2.3623,
      "step": 27500
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.254651427268982,
      "learning_rate": 1.4431109114171684e-05,
      "loss": 2.403,
      "step": 27501
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.9794162511825562,
      "learning_rate": 1.4430740002080774e-05,
      "loss": 2.4191,
      "step": 27502
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.9990755915641785,
      "learning_rate": 1.4430370882478555e-05,
      "loss": 2.2649,
      "step": 27503
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0775325298309326,
      "learning_rate": 1.4430001755365654e-05,
      "loss": 2.3648,
      "step": 27504
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.068577527999878,
      "learning_rate": 1.4429632620742699e-05,
      "loss": 2.2443,
      "step": 27505
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0544512271881104,
      "learning_rate": 1.4429263478610312e-05,
      "loss": 2.2886,
      "step": 27506
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0228877067565918,
      "learning_rate": 1.4428894328969122e-05,
      "loss": 2.1992,
      "step": 27507
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.9937472343444824,
      "learning_rate": 1.442852517181975e-05,
      "loss": 2.2545,
      "step": 27508
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.9678653478622437,
      "learning_rate": 1.4428156007162826e-05,
      "loss": 2.361,
      "step": 27509
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.206464409828186,
      "learning_rate": 1.4427786834998978e-05,
      "loss": 2.6208,
      "step": 27510
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1061042547225952,
      "learning_rate": 1.4427417655328825e-05,
      "loss": 2.4605,
      "step": 27511
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0387005805969238,
      "learning_rate": 1.4427048468153e-05,
      "loss": 2.4923,
      "step": 27512
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.017615556716919,
      "learning_rate": 1.4426679273472122e-05,
      "loss": 2.2516,
      "step": 27513
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0913914442062378,
      "learning_rate": 1.4426310071286822e-05,
      "loss": 2.3117,
      "step": 27514
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.2685558795928955,
      "learning_rate": 1.4425940861597724e-05,
      "loss": 2.193,
      "step": 27515
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0212910175323486,
      "learning_rate": 1.4425571644405453e-05,
      "loss": 2.4016,
      "step": 27516
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.9469174742698669,
      "learning_rate": 1.4425202419710642e-05,
      "loss": 2.1499,
      "step": 27517
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0415728092193604,
      "learning_rate": 1.4424833187513904e-05,
      "loss": 2.5838,
      "step": 27518
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.2380733489990234,
      "learning_rate": 1.4424463947815874e-05,
      "loss": 2.5829,
      "step": 27519
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0926709175109863,
      "learning_rate": 1.4424094700617177e-05,
      "loss": 2.185,
      "step": 27520
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.2819277048110962,
      "learning_rate": 1.4423725445918437e-05,
      "loss": 2.5154,
      "step": 27521
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0509520769119263,
      "learning_rate": 1.4423356183720284e-05,
      "loss": 2.379,
      "step": 27522
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.9978776574134827,
      "learning_rate": 1.4422986914023339e-05,
      "loss": 2.1684,
      "step": 27523
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.04495108127594,
      "learning_rate": 1.4422617636828226e-05,
      "loss": 2.3697,
      "step": 27524
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1219266653060913,
      "learning_rate": 1.442224835213558e-05,
      "loss": 2.3689,
      "step": 27525
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1137986183166504,
      "learning_rate": 1.442187905994602e-05,
      "loss": 2.3449,
      "step": 27526
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1106417179107666,
      "learning_rate": 1.4421509760260175e-05,
      "loss": 2.3234,
      "step": 27527
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1389573812484741,
      "learning_rate": 1.4421140453078668e-05,
      "loss": 2.2975,
      "step": 27528
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0272406339645386,
      "learning_rate": 1.4420771138402127e-05,
      "loss": 2.3105,
      "step": 27529
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0914379358291626,
      "learning_rate": 1.4420401816231181e-05,
      "loss": 2.4704,
      "step": 27530
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.298571228981018,
      "learning_rate": 1.4420032486566454e-05,
      "loss": 2.3347,
      "step": 27531
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1497430801391602,
      "learning_rate": 1.4419663149408569e-05,
      "loss": 2.1949,
      "step": 27532
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.3453130722045898,
      "learning_rate": 1.4419293804758155e-05,
      "loss": 2.4941,
      "step": 27533
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0358322858810425,
      "learning_rate": 1.4418924452615837e-05,
      "loss": 2.2908,
      "step": 27534
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.018548846244812,
      "learning_rate": 1.4418555092982242e-05,
      "loss": 2.5419,
      "step": 27535
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.146466612815857,
      "learning_rate": 1.4418185725857995e-05,
      "loss": 2.6511,
      "step": 27536
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.133007526397705,
      "learning_rate": 1.4417816351243728e-05,
      "loss": 2.2113,
      "step": 27537
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1156113147735596,
      "learning_rate": 1.441744696914006e-05,
      "loss": 2.4493,
      "step": 27538
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0167001485824585,
      "learning_rate": 1.4417077579547618e-05,
      "loss": 2.4291,
      "step": 27539
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.5006757974624634,
      "learning_rate": 1.441670818246703e-05,
      "loss": 2.2832,
      "step": 27540
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0819242000579834,
      "learning_rate": 1.4416338777898925e-05,
      "loss": 2.2076,
      "step": 27541
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.9750305414199829,
      "learning_rate": 1.4415969365843922e-05,
      "loss": 2.4969,
      "step": 27542
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0638692378997803,
      "learning_rate": 1.4415599946302656e-05,
      "loss": 2.5258,
      "step": 27543
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0012967586517334,
      "learning_rate": 1.4415230519275745e-05,
      "loss": 2.4252,
      "step": 27544
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0481740236282349,
      "learning_rate": 1.441486108476382e-05,
      "loss": 2.373,
      "step": 27545
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.2463339567184448,
      "learning_rate": 1.4414491642767507e-05,
      "loss": 2.4156,
      "step": 27546
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.066467523574829,
      "learning_rate": 1.4414122193287431e-05,
      "loss": 2.3761,
      "step": 27547
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0762407779693604,
      "learning_rate": 1.4413752736324221e-05,
      "loss": 2.6033,
      "step": 27548
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0636297464370728,
      "learning_rate": 1.4413383271878498e-05,
      "loss": 2.4916,
      "step": 27549
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0046902894973755,
      "learning_rate": 1.4413013799950897e-05,
      "loss": 2.0823,
      "step": 27550
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.9656060338020325,
      "learning_rate": 1.4412644320542035e-05,
      "loss": 2.5776,
      "step": 27551
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.2422206401824951,
      "learning_rate": 1.4412274833652544e-05,
      "loss": 2.5333,
      "step": 27552
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0256420373916626,
      "learning_rate": 1.441190533928305e-05,
      "loss": 2.4196,
      "step": 27553
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1247808933258057,
      "learning_rate": 1.4411535837434177e-05,
      "loss": 2.3346,
      "step": 27554
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.063804268836975,
      "learning_rate": 1.4411166328106553e-05,
      "loss": 2.5202,
      "step": 27555
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.9653075337409973,
      "learning_rate": 1.44107968113008e-05,
      "loss": 2.3096,
      "step": 27556
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.2070599794387817,
      "learning_rate": 1.4410427287017554e-05,
      "loss": 2.548,
      "step": 27557
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1826881170272827,
      "learning_rate": 1.4410057755257432e-05,
      "loss": 2.3391,
      "step": 27558
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0235315561294556,
      "learning_rate": 1.4409688216021065e-05,
      "loss": 2.4621,
      "step": 27559
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1737221479415894,
      "learning_rate": 1.440931866930908e-05,
      "loss": 2.3345,
      "step": 27560
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0665911436080933,
      "learning_rate": 1.4408949115122102e-05,
      "loss": 2.3055,
      "step": 27561
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1646684408187866,
      "learning_rate": 1.4408579553460756e-05,
      "loss": 2.3678,
      "step": 27562
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1650326251983643,
      "learning_rate": 1.4408209984325673e-05,
      "loss": 2.462,
      "step": 27563
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0641610622406006,
      "learning_rate": 1.4407840407717477e-05,
      "loss": 2.504,
      "step": 27564
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.057429313659668,
      "learning_rate": 1.4407470823636791e-05,
      "loss": 2.5719,
      "step": 27565
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.9643968343734741,
      "learning_rate": 1.4407101232084247e-05,
      "loss": 2.3597,
      "step": 27566
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0104819536209106,
      "learning_rate": 1.4406731633060468e-05,
      "loss": 2.3306,
      "step": 27567
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1404271125793457,
      "learning_rate": 1.4406362026566085e-05,
      "loss": 2.4929,
      "step": 27568
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0615384578704834,
      "learning_rate": 1.440599241260172e-05,
      "loss": 2.7837,
      "step": 27569
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0499179363250732,
      "learning_rate": 1.4405622791168004e-05,
      "loss": 2.5087,
      "step": 27570
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0054465532302856,
      "learning_rate": 1.4405253162265555e-05,
      "loss": 2.4171,
      "step": 27571
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0759369134902954,
      "learning_rate": 1.440488352589501e-05,
      "loss": 2.4981,
      "step": 27572
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.157326102256775,
      "learning_rate": 1.440451388205699e-05,
      "loss": 2.3572,
      "step": 27573
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0921869277954102,
      "learning_rate": 1.4404144230752119e-05,
      "loss": 2.6161,
      "step": 27574
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.117587685585022,
      "learning_rate": 1.4403774571981032e-05,
      "loss": 2.368,
      "step": 27575
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0376219749450684,
      "learning_rate": 1.4403404905744351e-05,
      "loss": 2.6042,
      "step": 27576
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.049401044845581,
      "learning_rate": 1.4403035232042702e-05,
      "loss": 2.3127,
      "step": 27577
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0122942924499512,
      "learning_rate": 1.4402665550876711e-05,
      "loss": 2.4418,
      "step": 27578
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1712408065795898,
      "learning_rate": 1.4402295862247006e-05,
      "loss": 2.6355,
      "step": 27579
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.3237584829330444,
      "learning_rate": 1.4401926166154216e-05,
      "loss": 2.4192,
      "step": 27580
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.990828275680542,
      "learning_rate": 1.4401556462598966e-05,
      "loss": 2.5548,
      "step": 27581
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0175431966781616,
      "learning_rate": 1.4401186751581882e-05,
      "loss": 2.4625,
      "step": 27582
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.156480073928833,
      "learning_rate": 1.4400817033103588e-05,
      "loss": 2.6197,
      "step": 27583
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.066826581954956,
      "learning_rate": 1.4400447307164715e-05,
      "loss": 2.3711,
      "step": 27584
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.2015619277954102,
      "learning_rate": 1.4400077573765892e-05,
      "loss": 2.3236,
      "step": 27585
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1014671325683594,
      "learning_rate": 1.4399707832907739e-05,
      "loss": 2.3571,
      "step": 27586
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1349644660949707,
      "learning_rate": 1.4399338084590889e-05,
      "loss": 2.4316,
      "step": 27587
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0174458026885986,
      "learning_rate": 1.4398968328815964e-05,
      "loss": 2.3148,
      "step": 27588
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1939107179641724,
      "learning_rate": 1.4398598565583593e-05,
      "loss": 2.4502,
      "step": 27589
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1032947301864624,
      "learning_rate": 1.4398228794894404e-05,
      "loss": 2.4216,
      "step": 27590
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1808143854141235,
      "learning_rate": 1.439785901674902e-05,
      "loss": 2.4336,
      "step": 27591
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0103275775909424,
      "learning_rate": 1.4397489231148072e-05,
      "loss": 2.3595,
      "step": 27592
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.00314199924469,
      "learning_rate": 1.4397119438092186e-05,
      "loss": 2.3912,
      "step": 27593
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1222625970840454,
      "learning_rate": 1.4396749637581989e-05,
      "loss": 2.3978,
      "step": 27594
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0428392887115479,
      "learning_rate": 1.4396379829618105e-05,
      "loss": 2.5619,
      "step": 27595
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.9919726252555847,
      "learning_rate": 1.4396010014201168e-05,
      "loss": 2.3158,
      "step": 27596
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0650136470794678,
      "learning_rate": 1.4395640191331794e-05,
      "loss": 2.5816,
      "step": 27597
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.9609850645065308,
      "learning_rate": 1.4395270361010618e-05,
      "loss": 2.5714,
      "step": 27598
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0022506713867188,
      "learning_rate": 1.4394900523238264e-05,
      "loss": 2.4305,
      "step": 27599
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1016621589660645,
      "learning_rate": 1.4394530678015364e-05,
      "loss": 2.2927,
      "step": 27600
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1115806102752686,
      "learning_rate": 1.4394160825342538e-05,
      "loss": 2.5067,
      "step": 27601
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1585298776626587,
      "learning_rate": 1.4393790965220415e-05,
      "loss": 2.4179,
      "step": 27602
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0571430921554565,
      "learning_rate": 1.4393421097649625e-05,
      "loss": 2.3379,
      "step": 27603
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0050454139709473,
      "learning_rate": 1.4393051222630795e-05,
      "loss": 2.4635,
      "step": 27604
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.944657027721405,
      "learning_rate": 1.4392681340164545e-05,
      "loss": 2.4296,
      "step": 27605
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0894163846969604,
      "learning_rate": 1.4392311450251511e-05,
      "loss": 2.4392,
      "step": 27606
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1555336713790894,
      "learning_rate": 1.4391941552892312e-05,
      "loss": 2.5022,
      "step": 27607
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1282726526260376,
      "learning_rate": 1.4391571648087582e-05,
      "loss": 2.4636,
      "step": 27608
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.9285855889320374,
      "learning_rate": 1.4391201735837948e-05,
      "loss": 2.3839,
      "step": 27609
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0207648277282715,
      "learning_rate": 1.439083181614403e-05,
      "loss": 2.584,
      "step": 27610
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1349884271621704,
      "learning_rate": 1.4390461889006461e-05,
      "loss": 2.267,
      "step": 27611
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1367847919464111,
      "learning_rate": 1.439009195442587e-05,
      "loss": 2.4519,
      "step": 27612
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1354360580444336,
      "learning_rate": 1.4389722012402875e-05,
      "loss": 2.4034,
      "step": 27613
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0966455936431885,
      "learning_rate": 1.4389352062938114e-05,
      "loss": 2.4155,
      "step": 27614
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1484498977661133,
      "learning_rate": 1.4388982106032206e-05,
      "loss": 2.4081,
      "step": 27615
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.9775844216346741,
      "learning_rate": 1.4388612141685782e-05,
      "loss": 2.1228,
      "step": 27616
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.9703006744384766,
      "learning_rate": 1.4388242169899468e-05,
      "loss": 2.2857,
      "step": 27617
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.558555841445923,
      "learning_rate": 1.4387872190673893e-05,
      "loss": 2.2874,
      "step": 27618
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1112720966339111,
      "learning_rate": 1.4387502204009682e-05,
      "loss": 2.2931,
      "step": 27619
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0140438079833984,
      "learning_rate": 1.4387132209907462e-05,
      "loss": 2.3036,
      "step": 27620
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.9653734564781189,
      "learning_rate": 1.4386762208367864e-05,
      "loss": 2.2974,
      "step": 27621
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1913347244262695,
      "learning_rate": 1.4386392199391512e-05,
      "loss": 2.4357,
      "step": 27622
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0066391229629517,
      "learning_rate": 1.4386022182979033e-05,
      "loss": 2.5599,
      "step": 27623
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.3765207529067993,
      "learning_rate": 1.4385652159131054e-05,
      "loss": 2.312,
      "step": 27624
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.083201289176941,
      "learning_rate": 1.4385282127848205e-05,
      "loss": 2.3037,
      "step": 27625
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0935438871383667,
      "learning_rate": 1.4384912089131112e-05,
      "loss": 2.3152,
      "step": 27626
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1785968542099,
      "learning_rate": 1.43845420429804e-05,
      "loss": 2.2545,
      "step": 27627
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0651034116744995,
      "learning_rate": 1.43841719893967e-05,
      "loss": 2.2213,
      "step": 27628
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1190085411071777,
      "learning_rate": 1.438380192838064e-05,
      "loss": 2.3032,
      "step": 27629
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.974201500415802,
      "learning_rate": 1.4383431859932842e-05,
      "loss": 2.4831,
      "step": 27630
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0213221311569214,
      "learning_rate": 1.4383061784053938e-05,
      "loss": 2.3648,
      "step": 27631
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0575886964797974,
      "learning_rate": 1.4382691700744552e-05,
      "loss": 2.3154,
      "step": 27632
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0609978437423706,
      "learning_rate": 1.4382321610005314e-05,
      "loss": 2.5625,
      "step": 27633
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1997884511947632,
      "learning_rate": 1.4381951511836851e-05,
      "loss": 2.3929,
      "step": 27634
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1171715259552002,
      "learning_rate": 1.4381581406239791e-05,
      "loss": 2.2933,
      "step": 27635
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1737682819366455,
      "learning_rate": 1.438121129321476e-05,
      "loss": 2.2903,
      "step": 27636
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0771960020065308,
      "learning_rate": 1.4380841172762385e-05,
      "loss": 2.4531,
      "step": 27637
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0507563352584839,
      "learning_rate": 1.4380471044883295e-05,
      "loss": 2.4312,
      "step": 27638
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1039166450500488,
      "learning_rate": 1.4380100909578119e-05,
      "loss": 2.4193,
      "step": 27639
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0943076610565186,
      "learning_rate": 1.437973076684748e-05,
      "loss": 2.263,
      "step": 27640
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0989749431610107,
      "learning_rate": 1.437936061669201e-05,
      "loss": 2.3074,
      "step": 27641
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0811983346939087,
      "learning_rate": 1.437899045911233e-05,
      "loss": 2.3735,
      "step": 27642
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.140820860862732,
      "learning_rate": 1.4378620294109073e-05,
      "loss": 2.4536,
      "step": 27643
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0004016160964966,
      "learning_rate": 1.437825012168287e-05,
      "loss": 2.4725,
      "step": 27644
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0156989097595215,
      "learning_rate": 1.4377879941834341e-05,
      "loss": 2.4131,
      "step": 27645
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1462435722351074,
      "learning_rate": 1.4377509754564114e-05,
      "loss": 2.386,
      "step": 27646
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.044326901435852,
      "learning_rate": 1.4377139559872822e-05,
      "loss": 2.3635,
      "step": 27647
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1166832447052002,
      "learning_rate": 1.4376769357761087e-05,
      "loss": 2.3008,
      "step": 27648
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.2229794263839722,
      "learning_rate": 1.4376399148229543e-05,
      "loss": 2.3386,
      "step": 27649
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.9946218132972717,
      "learning_rate": 1.4376028931278811e-05,
      "loss": 2.4835,
      "step": 27650
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0924265384674072,
      "learning_rate": 1.4375658706909524e-05,
      "loss": 2.3476,
      "step": 27651
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1527818441390991,
      "learning_rate": 1.4375288475122301e-05,
      "loss": 2.4785,
      "step": 27652
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1385434865951538,
      "learning_rate": 1.4374918235917781e-05,
      "loss": 2.4823,
      "step": 27653
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1191141605377197,
      "learning_rate": 1.4374547989296586e-05,
      "loss": 2.5482,
      "step": 27654
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1093051433563232,
      "learning_rate": 1.4374177735259344e-05,
      "loss": 2.5625,
      "step": 27655
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1334797143936157,
      "learning_rate": 1.4373807473806682e-05,
      "loss": 2.4211,
      "step": 27656
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.3318742513656616,
      "learning_rate": 1.4373437204939228e-05,
      "loss": 2.2843,
      "step": 27657
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.216372013092041,
      "learning_rate": 1.4373066928657613e-05,
      "loss": 2.3306,
      "step": 27658
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0422779321670532,
      "learning_rate": 1.4372696644962458e-05,
      "loss": 2.2087,
      "step": 27659
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.027632713317871,
      "learning_rate": 1.4372326353854396e-05,
      "loss": 2.3811,
      "step": 27660
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0491840839385986,
      "learning_rate": 1.4371956055334054e-05,
      "loss": 2.237,
      "step": 27661
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.128162145614624,
      "learning_rate": 1.437158574940206e-05,
      "loss": 2.3994,
      "step": 27662
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0890698432922363,
      "learning_rate": 1.4371215436059038e-05,
      "loss": 2.3438,
      "step": 27663
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.2092890739440918,
      "learning_rate": 1.437084511530562e-05,
      "loss": 2.4788,
      "step": 27664
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.2889325618743896,
      "learning_rate": 1.4370474787142432e-05,
      "loss": 2.5156,
      "step": 27665
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0276248455047607,
      "learning_rate": 1.4370104451570104e-05,
      "loss": 2.3518,
      "step": 27666
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0485154390335083,
      "learning_rate": 1.436973410858926e-05,
      "loss": 2.4214,
      "step": 27667
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.155950903892517,
      "learning_rate": 1.4369363758200532e-05,
      "loss": 2.4557,
      "step": 27668
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.047241449356079,
      "learning_rate": 1.4368993400404543e-05,
      "loss": 2.5266,
      "step": 27669
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0281212329864502,
      "learning_rate": 1.4368623035201925e-05,
      "loss": 2.4837,
      "step": 27670
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.176326870918274,
      "learning_rate": 1.4368252662593305e-05,
      "loss": 2.2302,
      "step": 27671
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1079142093658447,
      "learning_rate": 1.4367882282579311e-05,
      "loss": 2.3534,
      "step": 27672
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.513823390007019,
      "learning_rate": 1.436751189516057e-05,
      "loss": 2.3555,
      "step": 27673
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.317142128944397,
      "learning_rate": 1.4367141500337707e-05,
      "loss": 2.3576,
      "step": 27674
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1222922801971436,
      "learning_rate": 1.4366771098111355e-05,
      "loss": 2.2561,
      "step": 27675
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0077056884765625,
      "learning_rate": 1.436640068848214e-05,
      "loss": 2.57,
      "step": 27676
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.080261468887329,
      "learning_rate": 1.436603027145069e-05,
      "loss": 2.3832,
      "step": 27677
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0433810949325562,
      "learning_rate": 1.4365659847017634e-05,
      "loss": 2.4344,
      "step": 27678
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.085142970085144,
      "learning_rate": 1.4365289415183598e-05,
      "loss": 2.127,
      "step": 27679
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.2508208751678467,
      "learning_rate": 1.436491897594921e-05,
      "loss": 2.5649,
      "step": 27680
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0888566970825195,
      "learning_rate": 1.43645485293151e-05,
      "loss": 2.3284,
      "step": 27681
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0677073001861572,
      "learning_rate": 1.4364178075281893e-05,
      "loss": 2.4619,
      "step": 27682
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1491597890853882,
      "learning_rate": 1.4363807613850222e-05,
      "loss": 2.5115,
      "step": 27683
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0331203937530518,
      "learning_rate": 1.4363437145020709e-05,
      "loss": 2.2897,
      "step": 27684
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.9460068941116333,
      "learning_rate": 1.4363066668793986e-05,
      "loss": 2.4604,
      "step": 27685
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1678987741470337,
      "learning_rate": 1.4362696185170682e-05,
      "loss": 2.4553,
      "step": 27686
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.2757984399795532,
      "learning_rate": 1.4362325694151418e-05,
      "loss": 2.5072,
      "step": 27687
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0088480710983276,
      "learning_rate": 1.4361955195736835e-05,
      "loss": 2.2335,
      "step": 27688
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0086981058120728,
      "learning_rate": 1.4361584689927546e-05,
      "loss": 2.4856,
      "step": 27689
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.2072045803070068,
      "learning_rate": 1.4361214176724189e-05,
      "loss": 2.3527,
      "step": 27690
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1669431924819946,
      "learning_rate": 1.436084365612739e-05,
      "loss": 2.4166,
      "step": 27691
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.9484556913375854,
      "learning_rate": 1.4360473128137776e-05,
      "loss": 2.2355,
      "step": 27692
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1721107959747314,
      "learning_rate": 1.4360102592755973e-05,
      "loss": 2.1649,
      "step": 27693
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0398555994033813,
      "learning_rate": 1.4359732049982615e-05,
      "loss": 2.5686,
      "step": 27694
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.108858346939087,
      "learning_rate": 1.435936149981833e-05,
      "loss": 2.4861,
      "step": 27695
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0773581266403198,
      "learning_rate": 1.435899094226374e-05,
      "loss": 2.2048,
      "step": 27696
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1463464498519897,
      "learning_rate": 1.4358620377319474e-05,
      "loss": 2.5842,
      "step": 27697
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.9636586308479309,
      "learning_rate": 1.4358249804986166e-05,
      "loss": 2.4112,
      "step": 27698
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0904868841171265,
      "learning_rate": 1.4357879225264441e-05,
      "loss": 2.4052,
      "step": 27699
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1569799184799194,
      "learning_rate": 1.4357508638154927e-05,
      "loss": 2.5383,
      "step": 27700
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0495542287826538,
      "learning_rate": 1.4357138043658249e-05,
      "loss": 2.4065,
      "step": 27701
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.9893547296524048,
      "learning_rate": 1.435676744177504e-05,
      "loss": 2.7375,
      "step": 27702
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0007450580596924,
      "learning_rate": 1.4356396832505932e-05,
      "loss": 2.264,
      "step": 27703
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0543968677520752,
      "learning_rate": 1.4356026215851544e-05,
      "loss": 2.4496,
      "step": 27704
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1266424655914307,
      "learning_rate": 1.4355655591812509e-05,
      "loss": 2.3537,
      "step": 27705
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.937724769115448,
      "learning_rate": 1.4355284960389453e-05,
      "loss": 2.4855,
      "step": 27706
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.2620402574539185,
      "learning_rate": 1.435491432158301e-05,
      "loss": 2.4433,
      "step": 27707
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1253855228424072,
      "learning_rate": 1.4354543675393801e-05,
      "loss": 2.2361,
      "step": 27708
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0453654527664185,
      "learning_rate": 1.4354173021822459e-05,
      "loss": 2.2944,
      "step": 27709
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0232747793197632,
      "learning_rate": 1.435380236086961e-05,
      "loss": 2.4704,
      "step": 27710
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0818253755569458,
      "learning_rate": 1.4353431692535886e-05,
      "loss": 2.2102,
      "step": 27711
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.4521914720535278,
      "learning_rate": 1.4353061016821912e-05,
      "loss": 2.5015,
      "step": 27712
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.3569252490997314,
      "learning_rate": 1.4352690333728316e-05,
      "loss": 2.2379,
      "step": 27713
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.125306248664856,
      "learning_rate": 1.4352319643255728e-05,
      "loss": 2.303,
      "step": 27714
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.3693586587905884,
      "learning_rate": 1.4351948945404779e-05,
      "loss": 2.3315,
      "step": 27715
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.6493024826049805,
      "learning_rate": 1.4351578240176092e-05,
      "loss": 2.2987,
      "step": 27716
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0258721113204956,
      "learning_rate": 1.4351207527570297e-05,
      "loss": 2.3017,
      "step": 27717
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.2078137397766113,
      "learning_rate": 1.4350836807588024e-05,
      "loss": 2.337,
      "step": 27718
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0884897708892822,
      "learning_rate": 1.4350466080229902e-05,
      "loss": 2.4251,
      "step": 27719
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.98102867603302,
      "learning_rate": 1.4350095345496558e-05,
      "loss": 2.4665,
      "step": 27720
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0680540800094604,
      "learning_rate": 1.4349724603388623e-05,
      "loss": 2.3842,
      "step": 27721
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.9788375496864319,
      "learning_rate": 1.434935385390672e-05,
      "loss": 2.3188,
      "step": 27722
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0223770141601562,
      "learning_rate": 1.4348983097051483e-05,
      "loss": 2.4774,
      "step": 27723
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.3544563055038452,
      "learning_rate": 1.4348612332823538e-05,
      "loss": 2.397,
      "step": 27724
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0222375392913818,
      "learning_rate": 1.4348241561223512e-05,
      "loss": 2.3169,
      "step": 27725
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0438978672027588,
      "learning_rate": 1.4347870782252038e-05,
      "loss": 2.2089,
      "step": 27726
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.2378884553909302,
      "learning_rate": 1.4347499995909742e-05,
      "loss": 2.3375,
      "step": 27727
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.102639079093933,
      "learning_rate": 1.4347129202197252e-05,
      "loss": 2.211,
      "step": 27728
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.7916393280029297,
      "learning_rate": 1.43467584011152e-05,
      "loss": 2.4679,
      "step": 27729
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.975410521030426,
      "learning_rate": 1.434638759266421e-05,
      "loss": 2.2039,
      "step": 27730
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.3637839555740356,
      "learning_rate": 1.434601677684491e-05,
      "loss": 2.3668,
      "step": 27731
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.096178650856018,
      "learning_rate": 1.4345645953657934e-05,
      "loss": 2.4545,
      "step": 27732
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.246091604232788,
      "learning_rate": 1.4345275123103907e-05,
      "loss": 2.4562,
      "step": 27733
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0821025371551514,
      "learning_rate": 1.434490428518346e-05,
      "loss": 2.3352,
      "step": 27734
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.9951163530349731,
      "learning_rate": 1.434453343989722e-05,
      "loss": 2.3606,
      "step": 27735
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1182790994644165,
      "learning_rate": 1.4344162587245813e-05,
      "loss": 2.303,
      "step": 27736
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0123564004898071,
      "learning_rate": 1.4343791727229875e-05,
      "loss": 2.455,
      "step": 27737
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1005796194076538,
      "learning_rate": 1.4343420859850027e-05,
      "loss": 2.4612,
      "step": 27738
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.9982873797416687,
      "learning_rate": 1.43430499851069e-05,
      "loss": 2.2244,
      "step": 27739
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.2091976404190063,
      "learning_rate": 1.4342679103001124e-05,
      "loss": 2.3972,
      "step": 27740
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1441051959991455,
      "learning_rate": 1.4342308213533332e-05,
      "loss": 2.2282,
      "step": 27741
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.198081612586975,
      "learning_rate": 1.4341937316704144e-05,
      "loss": 2.1431,
      "step": 27742
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.2061668634414673,
      "learning_rate": 1.4341566412514194e-05,
      "loss": 2.4536,
      "step": 27743
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.077397346496582,
      "learning_rate": 1.4341195500964108e-05,
      "loss": 2.35,
      "step": 27744
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.3232676982879639,
      "learning_rate": 1.434082458205452e-05,
      "loss": 2.4413,
      "step": 27745
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.078619360923767,
      "learning_rate": 1.4340453655786053e-05,
      "loss": 2.3871,
      "step": 27746
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0048710107803345,
      "learning_rate": 1.4340082722159338e-05,
      "loss": 2.4426,
      "step": 27747
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.2662222385406494,
      "learning_rate": 1.4339711781175007e-05,
      "loss": 2.1997,
      "step": 27748
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1470881700515747,
      "learning_rate": 1.4339340832833683e-05,
      "loss": 2.4709,
      "step": 27749
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0087758302688599,
      "learning_rate": 1.4338969877135998e-05,
      "loss": 2.5426,
      "step": 27750
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.223044753074646,
      "learning_rate": 1.4338598914082583e-05,
      "loss": 2.5311,
      "step": 27751
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.103157877922058,
      "learning_rate": 1.4338227943674062e-05,
      "loss": 2.3777,
      "step": 27752
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.161304235458374,
      "learning_rate": 1.4337856965911065e-05,
      "loss": 2.3726,
      "step": 27753
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.176419734954834,
      "learning_rate": 1.4337485980794225e-05,
      "loss": 2.488,
      "step": 27754
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.2633540630340576,
      "learning_rate": 1.433711498832417e-05,
      "loss": 2.5332,
      "step": 27755
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1243541240692139,
      "learning_rate": 1.4336743988501524e-05,
      "loss": 2.2114,
      "step": 27756
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0355424880981445,
      "learning_rate": 1.433637298132692e-05,
      "loss": 2.645,
      "step": 27757
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0321961641311646,
      "learning_rate": 1.4336001966800988e-05,
      "loss": 2.433,
      "step": 27758
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0534565448760986,
      "learning_rate": 1.4335630944924353e-05,
      "loss": 2.4358,
      "step": 27759
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.656229019165039,
      "learning_rate": 1.4335259915697644e-05,
      "loss": 2.4482,
      "step": 27760
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.9984132647514343,
      "learning_rate": 1.4334888879121494e-05,
      "loss": 2.3902,
      "step": 27761
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1499953269958496,
      "learning_rate": 1.433451783519653e-05,
      "loss": 2.2145,
      "step": 27762
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0180660486221313,
      "learning_rate": 1.4334146783923382e-05,
      "loss": 2.4793,
      "step": 27763
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.233640432357788,
      "learning_rate": 1.4333775725302676e-05,
      "loss": 2.3902,
      "step": 27764
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.2842289209365845,
      "learning_rate": 1.4333404659335044e-05,
      "loss": 2.4851,
      "step": 27765
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0639456510543823,
      "learning_rate": 1.4333033586021115e-05,
      "loss": 2.4304,
      "step": 27766
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.072412371635437,
      "learning_rate": 1.4332662505361517e-05,
      "loss": 2.2364,
      "step": 27767
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0800021886825562,
      "learning_rate": 1.4332291417356881e-05,
      "loss": 2.2427,
      "step": 27768
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.010055422782898,
      "learning_rate": 1.433192032200783e-05,
      "loss": 2.5245,
      "step": 27769
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.126446008682251,
      "learning_rate": 1.4331549219315e-05,
      "loss": 2.4764,
      "step": 27770
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.2505242824554443,
      "learning_rate": 1.4331178109279016e-05,
      "loss": 2.2628,
      "step": 27771
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0942901372909546,
      "learning_rate": 1.433080699190051e-05,
      "loss": 2.3447,
      "step": 27772
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.9580111503601074,
      "learning_rate": 1.433043586718011e-05,
      "loss": 2.4784,
      "step": 27773
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.9769638776779175,
      "learning_rate": 1.4330064735118448e-05,
      "loss": 2.392,
      "step": 27774
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0083974599838257,
      "learning_rate": 1.4329693595716148e-05,
      "loss": 2.2736,
      "step": 27775
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1539216041564941,
      "learning_rate": 1.432932244897384e-05,
      "loss": 2.5411,
      "step": 27776
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1790958642959595,
      "learning_rate": 1.4328951294892154e-05,
      "loss": 2.4765,
      "step": 27777
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0420194864273071,
      "learning_rate": 1.432858013347172e-05,
      "loss": 2.2428,
      "step": 27778
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0302786827087402,
      "learning_rate": 1.4328208964713169e-05,
      "loss": 2.4502,
      "step": 27779
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1619173288345337,
      "learning_rate": 1.4327837788617127e-05,
      "loss": 2.5867,
      "step": 27780
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0995351076126099,
      "learning_rate": 1.4327466605184227e-05,
      "loss": 2.337,
      "step": 27781
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0767539739608765,
      "learning_rate": 1.4327095414415095e-05,
      "loss": 2.3217,
      "step": 27782
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.135883092880249,
      "learning_rate": 1.432672421631036e-05,
      "loss": 2.3764,
      "step": 27783
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.9769971370697021,
      "learning_rate": 1.432635301087065e-05,
      "loss": 2.451,
      "step": 27784
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.9965095520019531,
      "learning_rate": 1.4325981798096599e-05,
      "loss": 2.295,
      "step": 27785
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.101986289024353,
      "learning_rate": 1.4325610577988835e-05,
      "loss": 2.2954,
      "step": 27786
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.9916412234306335,
      "learning_rate": 1.4325239350547987e-05,
      "loss": 2.4746,
      "step": 27787
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0235987901687622,
      "learning_rate": 1.4324868115774679e-05,
      "loss": 2.3344,
      "step": 27788
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1636953353881836,
      "learning_rate": 1.432449687366955e-05,
      "loss": 2.4913,
      "step": 27789
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1033847332000732,
      "learning_rate": 1.4324125624233223e-05,
      "loss": 2.3441,
      "step": 27790
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0980149507522583,
      "learning_rate": 1.4323754367466327e-05,
      "loss": 2.4603,
      "step": 27791
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.941527783870697,
      "learning_rate": 1.4323383103369493e-05,
      "loss": 2.635,
      "step": 27792
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.990856945514679,
      "learning_rate": 1.4323011831943352e-05,
      "loss": 2.3963,
      "step": 27793
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1257755756378174,
      "learning_rate": 1.432264055318853e-05,
      "loss": 2.3917,
      "step": 27794
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0637996196746826,
      "learning_rate": 1.432226926710566e-05,
      "loss": 2.188,
      "step": 27795
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0524932146072388,
      "learning_rate": 1.4321897973695369e-05,
      "loss": 2.386,
      "step": 27796
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.132891058921814,
      "learning_rate": 1.4321526672958286e-05,
      "loss": 2.3577,
      "step": 27797
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1422706842422485,
      "learning_rate": 1.4321155364895042e-05,
      "loss": 2.4306,
      "step": 27798
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.9979203939437866,
      "learning_rate": 1.4320784049506267e-05,
      "loss": 2.1919,
      "step": 27799
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.9903660416603088,
      "learning_rate": 1.4320412726792589e-05,
      "loss": 2.3279,
      "step": 27800
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.9847023487091064,
      "learning_rate": 1.4320041396754638e-05,
      "loss": 2.1431,
      "step": 27801
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0136475563049316,
      "learning_rate": 1.4319670059393045e-05,
      "loss": 2.4264,
      "step": 27802
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0613560676574707,
      "learning_rate": 1.4319298714708437e-05,
      "loss": 2.532,
      "step": 27803
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0515919923782349,
      "learning_rate": 1.4318927362701443e-05,
      "loss": 2.3859,
      "step": 27804
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.045433521270752,
      "learning_rate": 1.4318556003372694e-05,
      "loss": 2.5678,
      "step": 27805
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1361775398254395,
      "learning_rate": 1.4318184636722822e-05,
      "loss": 2.2344,
      "step": 27806
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.2536762952804565,
      "learning_rate": 1.4317813262752455e-05,
      "loss": 2.3811,
      "step": 27807
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0889415740966797,
      "learning_rate": 1.431744188146222e-05,
      "loss": 2.4727,
      "step": 27808
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.996760368347168,
      "learning_rate": 1.4317070492852748e-05,
      "loss": 2.1922,
      "step": 27809
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1462175846099854,
      "learning_rate": 1.431669909692467e-05,
      "loss": 2.4246,
      "step": 27810
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.072821855545044,
      "learning_rate": 1.431632769367861e-05,
      "loss": 2.2629,
      "step": 27811
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.2152509689331055,
      "learning_rate": 1.4315956283115209e-05,
      "loss": 2.3856,
      "step": 27812
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0302237272262573,
      "learning_rate": 1.4315584865235087e-05,
      "loss": 2.4679,
      "step": 27813
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0955506563186646,
      "learning_rate": 1.4315213440038878e-05,
      "loss": 2.1514,
      "step": 27814
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0714781284332275,
      "learning_rate": 1.431484200752721e-05,
      "loss": 2.4515,
      "step": 27815
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.9906691312789917,
      "learning_rate": 1.4314470567700712e-05,
      "loss": 2.3659,
      "step": 27816
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.305211067199707,
      "learning_rate": 1.4314099120560015e-05,
      "loss": 2.2361,
      "step": 27817
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.2200987339019775,
      "learning_rate": 1.4313727666105748e-05,
      "loss": 2.4631,
      "step": 27818
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.092995524406433,
      "learning_rate": 1.431335620433854e-05,
      "loss": 2.4745,
      "step": 27819
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0805145502090454,
      "learning_rate": 1.4312984735259021e-05,
      "loss": 2.3134,
      "step": 27820
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1933059692382812,
      "learning_rate": 1.4312613258867827e-05,
      "loss": 2.4239,
      "step": 27821
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.9838456511497498,
      "learning_rate": 1.4312241775165577e-05,
      "loss": 2.4081,
      "step": 27822
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0187219381332397,
      "learning_rate": 1.4311870284152907e-05,
      "loss": 2.5411,
      "step": 27823
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0062650442123413,
      "learning_rate": 1.4311498785830447e-05,
      "loss": 2.3198,
      "step": 27824
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0503127574920654,
      "learning_rate": 1.4311127280198824e-05,
      "loss": 2.3926,
      "step": 27825
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0211336612701416,
      "learning_rate": 1.4310755767258668e-05,
      "loss": 2.2897,
      "step": 27826
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.049238681793213,
      "learning_rate": 1.4310384247010615e-05,
      "loss": 2.4735,
      "step": 27827
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0917845964431763,
      "learning_rate": 1.4310012719455286e-05,
      "loss": 2.446,
      "step": 27828
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.084378957748413,
      "learning_rate": 1.4309641184593317e-05,
      "loss": 2.3324,
      "step": 27829
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0566227436065674,
      "learning_rate": 1.4309269642425334e-05,
      "loss": 2.5946,
      "step": 27830
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0350338220596313,
      "learning_rate": 1.4308898092951968e-05,
      "loss": 2.3539,
      "step": 27831
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0982654094696045,
      "learning_rate": 1.4308526536173853e-05,
      "loss": 2.5591,
      "step": 27832
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.058939814567566,
      "learning_rate": 1.4308154972091614e-05,
      "loss": 2.3187,
      "step": 27833
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.089503526687622,
      "learning_rate": 1.4307783400705882e-05,
      "loss": 2.3601,
      "step": 27834
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.118795394897461,
      "learning_rate": 1.4307411822017286e-05,
      "loss": 2.5475,
      "step": 27835
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1000250577926636,
      "learning_rate": 1.4307040236026457e-05,
      "loss": 2.3846,
      "step": 27836
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0729596614837646,
      "learning_rate": 1.4306668642734025e-05,
      "loss": 2.5395,
      "step": 27837
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0052410364151,
      "learning_rate": 1.430629704214062e-05,
      "loss": 2.5164,
      "step": 27838
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0363078117370605,
      "learning_rate": 1.4305925434246873e-05,
      "loss": 2.6876,
      "step": 27839
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.9519100785255432,
      "learning_rate": 1.4305553819053413e-05,
      "loss": 2.2832,
      "step": 27840
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0453954935073853,
      "learning_rate": 1.4305182196560869e-05,
      "loss": 2.47,
      "step": 27841
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.9375134706497192,
      "learning_rate": 1.4304810566769874e-05,
      "loss": 2.2426,
      "step": 27842
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1204967498779297,
      "learning_rate": 1.4304438929681052e-05,
      "loss": 2.4197,
      "step": 27843
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0332350730895996,
      "learning_rate": 1.430406728529504e-05,
      "loss": 2.6271,
      "step": 27844
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0007479190826416,
      "learning_rate": 1.4303695633612464e-05,
      "loss": 2.3863,
      "step": 27845
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0037800073623657,
      "learning_rate": 1.4303323974633957e-05,
      "loss": 2.4298,
      "step": 27846
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1263805627822876,
      "learning_rate": 1.4302952308360144e-05,
      "loss": 2.3481,
      "step": 27847
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0512521266937256,
      "learning_rate": 1.4302580634791662e-05,
      "loss": 2.7124,
      "step": 27848
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0030714273452759,
      "learning_rate": 1.4302208953929136e-05,
      "loss": 2.4741,
      "step": 27849
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1122370958328247,
      "learning_rate": 1.4301837265773193e-05,
      "loss": 2.5248,
      "step": 27850
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1206741333007812,
      "learning_rate": 1.430146557032447e-05,
      "loss": 2.3173,
      "step": 27851
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.9738208651542664,
      "learning_rate": 1.4301093867583595e-05,
      "loss": 2.2551,
      "step": 27852
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0670957565307617,
      "learning_rate": 1.4300722157551202e-05,
      "loss": 2.4835,
      "step": 27853
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.954613447189331,
      "learning_rate": 1.430035044022791e-05,
      "loss": 2.5133,
      "step": 27854
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.073051929473877,
      "learning_rate": 1.429997871561436e-05,
      "loss": 2.4787,
      "step": 27855
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1045719385147095,
      "learning_rate": 1.4299606983711176e-05,
      "loss": 2.3688,
      "step": 27856
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1392310857772827,
      "learning_rate": 1.4299235244518992e-05,
      "loss": 2.4001,
      "step": 27857
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1601426601409912,
      "learning_rate": 1.4298863498038436e-05,
      "loss": 2.4335,
      "step": 27858
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0498450994491577,
      "learning_rate": 1.4298491744270138e-05,
      "loss": 2.6193,
      "step": 27859
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1435160636901855,
      "learning_rate": 1.4298119983214732e-05,
      "loss": 2.2211,
      "step": 27860
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1101182699203491,
      "learning_rate": 1.4297748214872841e-05,
      "loss": 2.4252,
      "step": 27861
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0673885345458984,
      "learning_rate": 1.4297376439245104e-05,
      "loss": 2.2409,
      "step": 27862
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.069488525390625,
      "learning_rate": 1.4297004656332142e-05,
      "loss": 2.492,
      "step": 27863
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0929408073425293,
      "learning_rate": 1.4296632866134594e-05,
      "loss": 2.3658,
      "step": 27864
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1348127126693726,
      "learning_rate": 1.4296261068653083e-05,
      "loss": 2.469,
      "step": 27865
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.076577067375183,
      "learning_rate": 1.4295889263888247e-05,
      "loss": 2.2739,
      "step": 27866
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0310964584350586,
      "learning_rate": 1.429551745184071e-05,
      "loss": 2.3279,
      "step": 27867
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.125459909439087,
      "learning_rate": 1.4295145632511102e-05,
      "loss": 2.4956,
      "step": 27868
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.234339952468872,
      "learning_rate": 1.4294773805900057e-05,
      "loss": 2.4885,
      "step": 27869
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1446322202682495,
      "learning_rate": 1.4294401972008204e-05,
      "loss": 2.3778,
      "step": 27870
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.089430570602417,
      "learning_rate": 1.4294030130836175e-05,
      "loss": 2.3937,
      "step": 27871
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0197197198867798,
      "learning_rate": 1.4293658282384596e-05,
      "loss": 2.3297,
      "step": 27872
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1213889122009277,
      "learning_rate": 1.4293286426654103e-05,
      "loss": 2.5043,
      "step": 27873
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1082147359848022,
      "learning_rate": 1.4292914563645322e-05,
      "loss": 2.5426,
      "step": 27874
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0086978673934937,
      "learning_rate": 1.4292542693358884e-05,
      "loss": 2.2497,
      "step": 27875
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0802725553512573,
      "learning_rate": 1.4292170815795424e-05,
      "loss": 2.1939,
      "step": 27876
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1868796348571777,
      "learning_rate": 1.4291798930955564e-05,
      "loss": 2.572,
      "step": 27877
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.090529441833496,
      "learning_rate": 1.4291427038839941e-05,
      "loss": 2.3253,
      "step": 27878
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.9825701713562012,
      "learning_rate": 1.4291055139449184e-05,
      "loss": 2.2494,
      "step": 27879
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0800127983093262,
      "learning_rate": 1.4290683232783924e-05,
      "loss": 2.3273,
      "step": 27880
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0141117572784424,
      "learning_rate": 1.429031131884479e-05,
      "loss": 2.4476,
      "step": 27881
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0600719451904297,
      "learning_rate": 1.428993939763241e-05,
      "loss": 2.4596,
      "step": 27882
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0383930206298828,
      "learning_rate": 1.428956746914742e-05,
      "loss": 2.36,
      "step": 27883
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.044411063194275,
      "learning_rate": 1.428919553339045e-05,
      "loss": 2.3612,
      "step": 27884
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1613919734954834,
      "learning_rate": 1.4288823590362124e-05,
      "loss": 2.5041,
      "step": 27885
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.9798471927642822,
      "learning_rate": 1.428845164006308e-05,
      "loss": 2.2376,
      "step": 27886
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0567028522491455,
      "learning_rate": 1.4288079682493947e-05,
      "loss": 2.55,
      "step": 27887
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.9610081911087036,
      "learning_rate": 1.4287707717655352e-05,
      "loss": 2.5005,
      "step": 27888
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.03166925907135,
      "learning_rate": 1.428733574554793e-05,
      "loss": 2.7373,
      "step": 27889
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.15934419631958,
      "learning_rate": 1.4286963766172308e-05,
      "loss": 2.4271,
      "step": 27890
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.103288173675537,
      "learning_rate": 1.4286591779529117e-05,
      "loss": 2.4092,
      "step": 27891
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0505683422088623,
      "learning_rate": 1.428621978561899e-05,
      "loss": 2.5309,
      "step": 27892
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1285465955734253,
      "learning_rate": 1.4285847784442559e-05,
      "loss": 2.1257,
      "step": 27893
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.990798830986023,
      "learning_rate": 1.4285475776000448e-05,
      "loss": 2.4278,
      "step": 27894
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.182531714439392,
      "learning_rate": 1.4285103760293294e-05,
      "loss": 2.2746,
      "step": 27895
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.9752938747406006,
      "learning_rate": 1.4284731737321723e-05,
      "loss": 2.4924,
      "step": 27896
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0347952842712402,
      "learning_rate": 1.428435970708637e-05,
      "loss": 2.2761,
      "step": 27897
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0843216180801392,
      "learning_rate": 1.4283987669587863e-05,
      "loss": 2.2572,
      "step": 27898
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.2025152444839478,
      "learning_rate": 1.4283615624826834e-05,
      "loss": 2.4665,
      "step": 27899
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0293998718261719,
      "learning_rate": 1.4283243572803913e-05,
      "loss": 2.4653,
      "step": 27900
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0041258335113525,
      "learning_rate": 1.4282871513519729e-05,
      "loss": 2.3835,
      "step": 27901
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0861235857009888,
      "learning_rate": 1.4282499446974915e-05,
      "loss": 2.3426,
      "step": 27902
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0095804929733276,
      "learning_rate": 1.4282127373170105e-05,
      "loss": 2.4491,
      "step": 27903
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0916918516159058,
      "learning_rate": 1.4281755292105923e-05,
      "loss": 2.1525,
      "step": 27904
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0126398801803589,
      "learning_rate": 1.4281383203783004e-05,
      "loss": 2.446,
      "step": 27905
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.214217185974121,
      "learning_rate": 1.4281011108201977e-05,
      "loss": 2.6569,
      "step": 27906
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1670584678649902,
      "learning_rate": 1.4280639005363475e-05,
      "loss": 2.4553,
      "step": 27907
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.4244440793991089,
      "learning_rate": 1.4280266895268124e-05,
      "loss": 2.3282,
      "step": 27908
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0790098905563354,
      "learning_rate": 1.4279894777916559e-05,
      "loss": 2.5277,
      "step": 27909
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.9813265800476074,
      "learning_rate": 1.4279522653309412e-05,
      "loss": 2.2888,
      "step": 27910
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0792748928070068,
      "learning_rate": 1.427915052144731e-05,
      "loss": 2.4257,
      "step": 27911
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.024742841720581,
      "learning_rate": 1.4278778382330887e-05,
      "loss": 2.2831,
      "step": 27912
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.098714828491211,
      "learning_rate": 1.4278406235960772e-05,
      "loss": 2.4999,
      "step": 27913
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1936593055725098,
      "learning_rate": 1.4278034082337595e-05,
      "loss": 2.4619,
      "step": 27914
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1611831188201904,
      "learning_rate": 1.4277661921461989e-05,
      "loss": 2.628,
      "step": 27915
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1547185182571411,
      "learning_rate": 1.4277289753334584e-05,
      "loss": 2.537,
      "step": 27916
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1288886070251465,
      "learning_rate": 1.4276917577956013e-05,
      "loss": 2.1548,
      "step": 27917
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0859962701797485,
      "learning_rate": 1.4276545395326905e-05,
      "loss": 2.6437,
      "step": 27918
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.3117351531982422,
      "learning_rate": 1.4276173205447889e-05,
      "loss": 2.3354,
      "step": 27919
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0958611965179443,
      "learning_rate": 1.42758010083196e-05,
      "loss": 2.4784,
      "step": 27920
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0384559631347656,
      "learning_rate": 1.4275428803942666e-05,
      "loss": 2.4952,
      "step": 27921
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.029222011566162,
      "learning_rate": 1.427505659231772e-05,
      "loss": 2.5663,
      "step": 27922
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.3930622339248657,
      "learning_rate": 1.4274684373445388e-05,
      "loss": 2.2329,
      "step": 27923
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.2071821689605713,
      "learning_rate": 1.4274312147326308e-05,
      "loss": 2.3702,
      "step": 27924
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.2089886665344238,
      "learning_rate": 1.427393991396111e-05,
      "loss": 2.2076,
      "step": 27925
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.9233846664428711,
      "learning_rate": 1.4273567673350422e-05,
      "loss": 2.2045,
      "step": 27926
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0379823446273804,
      "learning_rate": 1.4273195425494875e-05,
      "loss": 2.3837,
      "step": 27927
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.9890062808990479,
      "learning_rate": 1.42728231703951e-05,
      "loss": 2.5742,
      "step": 27928
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0755783319473267,
      "learning_rate": 1.4272450908051731e-05,
      "loss": 2.3803,
      "step": 27929
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.2243671417236328,
      "learning_rate": 1.4272078638465397e-05,
      "loss": 2.692,
      "step": 27930
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.238844871520996,
      "learning_rate": 1.4271706361636728e-05,
      "loss": 2.4353,
      "step": 27931
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1933314800262451,
      "learning_rate": 1.4271334077566361e-05,
      "loss": 2.3858,
      "step": 27932
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0140162706375122,
      "learning_rate": 1.4270961786254919e-05,
      "loss": 2.5583,
      "step": 27933
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.134680986404419,
      "learning_rate": 1.4270589487703036e-05,
      "loss": 2.5017,
      "step": 27934
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.10580313205719,
      "learning_rate": 1.4270217181911348e-05,
      "loss": 2.0481,
      "step": 27935
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0077892541885376,
      "learning_rate": 1.4269844868880479e-05,
      "loss": 2.6026,
      "step": 27936
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.2726854085922241,
      "learning_rate": 1.4269472548611063e-05,
      "loss": 2.2452,
      "step": 27937
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0717618465423584,
      "learning_rate": 1.4269100221103731e-05,
      "loss": 2.4328,
      "step": 27938
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.112436294555664,
      "learning_rate": 1.4268727886359117e-05,
      "loss": 2.1747,
      "step": 27939
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1350593566894531,
      "learning_rate": 1.426835554437785e-05,
      "loss": 2.4258,
      "step": 27940
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.9370448589324951,
      "learning_rate": 1.426798319516056e-05,
      "loss": 2.179,
      "step": 27941
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1796643733978271,
      "learning_rate": 1.4267610838707877e-05,
      "loss": 2.3936,
      "step": 27942
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0906331539154053,
      "learning_rate": 1.4267238475020438e-05,
      "loss": 2.6134,
      "step": 27943
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.9827271699905396,
      "learning_rate": 1.426686610409887e-05,
      "loss": 2.667,
      "step": 27944
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.2749121189117432,
      "learning_rate": 1.4266493725943806e-05,
      "loss": 2.5213,
      "step": 27945
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1251238584518433,
      "learning_rate": 1.4266121340555874e-05,
      "loss": 2.6401,
      "step": 27946
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1738266944885254,
      "learning_rate": 1.4265748947935709e-05,
      "loss": 2.4038,
      "step": 27947
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0725407600402832,
      "learning_rate": 1.426537654808394e-05,
      "loss": 2.3737,
      "step": 27948
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0469837188720703,
      "learning_rate": 1.42650041410012e-05,
      "loss": 2.3234,
      "step": 27949
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0093907117843628,
      "learning_rate": 1.426463172668812e-05,
      "loss": 2.5399,
      "step": 27950
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0357730388641357,
      "learning_rate": 1.4264259305145326e-05,
      "loss": 2.2735,
      "step": 27951
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1774253845214844,
      "learning_rate": 1.4263886876373463e-05,
      "loss": 2.1265,
      "step": 27952
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.054063081741333,
      "learning_rate": 1.4263514440373149e-05,
      "loss": 2.3721,
      "step": 27953
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.030768632888794,
      "learning_rate": 1.426314199714502e-05,
      "loss": 2.4626,
      "step": 27954
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0433682203292847,
      "learning_rate": 1.4262769546689708e-05,
      "loss": 2.2145,
      "step": 27955
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0850870609283447,
      "learning_rate": 1.4262397089007843e-05,
      "loss": 2.5281,
      "step": 27956
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1719740629196167,
      "learning_rate": 1.4262024624100058e-05,
      "loss": 2.2934,
      "step": 27957
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0555542707443237,
      "learning_rate": 1.4261652151966985e-05,
      "loss": 2.3745,
      "step": 27958
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0799814462661743,
      "learning_rate": 1.4261279672609254e-05,
      "loss": 2.5184,
      "step": 27959
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0807955265045166,
      "learning_rate": 1.4260907186027495e-05,
      "loss": 2.4301,
      "step": 27960
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1156439781188965,
      "learning_rate": 1.4260534692222341e-05,
      "loss": 2.3115,
      "step": 27961
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.9868156909942627,
      "learning_rate": 1.4260162191194424e-05,
      "loss": 2.0434,
      "step": 27962
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0228867530822754,
      "learning_rate": 1.4259789682944376e-05,
      "loss": 2.3045,
      "step": 27963
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.069808006286621,
      "learning_rate": 1.4259417167472826e-05,
      "loss": 2.427,
      "step": 27964
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0521094799041748,
      "learning_rate": 1.425904464478041e-05,
      "loss": 2.5143,
      "step": 27965
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1973694562911987,
      "learning_rate": 1.4258672114867754e-05,
      "loss": 2.4373,
      "step": 27966
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1270391941070557,
      "learning_rate": 1.4258299577735492e-05,
      "loss": 2.4976,
      "step": 27967
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0366626977920532,
      "learning_rate": 1.4257927033384259e-05,
      "loss": 2.3231,
      "step": 27968
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1193981170654297,
      "learning_rate": 1.4257554481814676e-05,
      "loss": 2.421,
      "step": 27969
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0945276021957397,
      "learning_rate": 1.4257181923027387e-05,
      "loss": 2.3588,
      "step": 27970
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.9870714545249939,
      "learning_rate": 1.4256809357023019e-05,
      "loss": 2.4885,
      "step": 27971
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1008262634277344,
      "learning_rate": 1.4256436783802203e-05,
      "loss": 2.4468,
      "step": 27972
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0700058937072754,
      "learning_rate": 1.4256064203365569e-05,
      "loss": 2.2815,
      "step": 27973
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1132824420928955,
      "learning_rate": 1.425569161571375e-05,
      "loss": 2.4217,
      "step": 27974
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.9723073244094849,
      "learning_rate": 1.4255319020847377e-05,
      "loss": 2.3117,
      "step": 27975
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0894775390625,
      "learning_rate": 1.4254946418767084e-05,
      "loss": 2.3418,
      "step": 27976
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0018799304962158,
      "learning_rate": 1.4254573809473502e-05,
      "loss": 2.5525,
      "step": 27977
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1135727167129517,
      "learning_rate": 1.4254201192967262e-05,
      "loss": 2.2819,
      "step": 27978
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0234347581863403,
      "learning_rate": 1.4253828569248996e-05,
      "loss": 2.4712,
      "step": 27979
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.9632648229598999,
      "learning_rate": 1.4253455938319331e-05,
      "loss": 2.2536,
      "step": 27980
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.063560128211975,
      "learning_rate": 1.4253083300178908e-05,
      "loss": 2.5171,
      "step": 27981
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.168967843055725,
      "learning_rate": 1.425271065482835e-05,
      "loss": 2.5813,
      "step": 27982
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1965101957321167,
      "learning_rate": 1.4252338002268293e-05,
      "loss": 2.1379,
      "step": 27983
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0349918603897095,
      "learning_rate": 1.4251965342499369e-05,
      "loss": 2.4527,
      "step": 27984
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0274072885513306,
      "learning_rate": 1.4251592675522212e-05,
      "loss": 2.3037,
      "step": 27985
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0278953313827515,
      "learning_rate": 1.4251220001337446e-05,
      "loss": 2.3656,
      "step": 27986
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.3167579174041748,
      "learning_rate": 1.4250847319945709e-05,
      "loss": 2.2954,
      "step": 27987
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0309810638427734,
      "learning_rate": 1.4250474631347633e-05,
      "loss": 2.3014,
      "step": 27988
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0758378505706787,
      "learning_rate": 1.4250101935543845e-05,
      "loss": 2.5669,
      "step": 27989
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.2318034172058105,
      "learning_rate": 1.424972923253498e-05,
      "loss": 2.401,
      "step": 27990
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.9903720021247864,
      "learning_rate": 1.4249356522321673e-05,
      "loss": 2.3334,
      "step": 27991
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0357086658477783,
      "learning_rate": 1.4248983804904551e-05,
      "loss": 2.3046,
      "step": 27992
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.9681465029716492,
      "learning_rate": 1.4248611080284247e-05,
      "loss": 2.4175,
      "step": 27993
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.24038565158844,
      "learning_rate": 1.4248238348461397e-05,
      "loss": 2.6115,
      "step": 27994
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0318101644515991,
      "learning_rate": 1.4247865609436625e-05,
      "loss": 2.3901,
      "step": 27995
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0850962400436401,
      "learning_rate": 1.4247492863210565e-05,
      "loss": 2.3145,
      "step": 27996
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1268235445022583,
      "learning_rate": 1.4247120109783855e-05,
      "loss": 2.4,
      "step": 27997
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0245261192321777,
      "learning_rate": 1.4246747349157124e-05,
      "loss": 2.3341,
      "step": 27998
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0942410230636597,
      "learning_rate": 1.4246374581330999e-05,
      "loss": 2.6886,
      "step": 27999
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.037367343902588,
      "learning_rate": 1.4246001806306117e-05,
      "loss": 2.2382,
      "step": 28000
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.2061063051223755,
      "learning_rate": 1.4245629024083109e-05,
      "loss": 2.357,
      "step": 28001
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0179107189178467,
      "learning_rate": 1.4245256234662608e-05,
      "loss": 2.4384,
      "step": 28002
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.9603818655014038,
      "learning_rate": 1.4244883438045243e-05,
      "loss": 2.3309,
      "step": 28003
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1650362014770508,
      "learning_rate": 1.4244510634231648e-05,
      "loss": 2.5845,
      "step": 28004
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1538265943527222,
      "learning_rate": 1.4244137823222457e-05,
      "loss": 2.6209,
      "step": 28005
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.9756206274032593,
      "learning_rate": 1.4243765005018296e-05,
      "loss": 2.3266,
      "step": 28006
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.141144037246704,
      "learning_rate": 1.4243392179619802e-05,
      "loss": 2.1959,
      "step": 28007
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0007646083831787,
      "learning_rate": 1.4243019347027607e-05,
      "loss": 2.6316,
      "step": 28008
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1208728551864624,
      "learning_rate": 1.4242646507242339e-05,
      "loss": 2.4316,
      "step": 28009
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0708060264587402,
      "learning_rate": 1.4242273660264636e-05,
      "loss": 2.3757,
      "step": 28010
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.148197889328003,
      "learning_rate": 1.4241900806095127e-05,
      "loss": 2.5541,
      "step": 28011
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.1934661865234375,
      "learning_rate": 1.4241527944734442e-05,
      "loss": 2.3242,
      "step": 28012
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.9772096872329712,
      "learning_rate": 1.4241155076183214e-05,
      "loss": 2.5784,
      "step": 28013
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.9956226944923401,
      "learning_rate": 1.424078220044208e-05,
      "loss": 2.4814,
      "step": 28014
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.2630596160888672,
      "learning_rate": 1.4240409317511665e-05,
      "loss": 2.5341,
      "step": 28015
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.995461642742157,
      "learning_rate": 1.4240036427392606e-05,
      "loss": 2.5598,
      "step": 28016
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0667927265167236,
      "learning_rate": 1.4239663530085535e-05,
      "loss": 2.504,
      "step": 28017
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.2262908220291138,
      "learning_rate": 1.4239290625591083e-05,
      "loss": 2.4397,
      "step": 28018
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1182900667190552,
      "learning_rate": 1.4238917713909878e-05,
      "loss": 2.1941,
      "step": 28019
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0941076278686523,
      "learning_rate": 1.423854479504256e-05,
      "loss": 2.4334,
      "step": 28020
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0494565963745117,
      "learning_rate": 1.4238171868989757e-05,
      "loss": 2.526,
      "step": 28021
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0207992792129517,
      "learning_rate": 1.42377989357521e-05,
      "loss": 2.5525,
      "step": 28022
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1679490804672241,
      "learning_rate": 1.4237425995330226e-05,
      "loss": 2.2534,
      "step": 28023
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.2337764501571655,
      "learning_rate": 1.423705304772476e-05,
      "loss": 2.152,
      "step": 28024
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0335054397583008,
      "learning_rate": 1.4236680092936339e-05,
      "loss": 2.3889,
      "step": 28025
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.151489496231079,
      "learning_rate": 1.4236307130965599e-05,
      "loss": 2.4642,
      "step": 28026
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.145133376121521,
      "learning_rate": 1.4235934161813163e-05,
      "loss": 2.1786,
      "step": 28027
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1363364458084106,
      "learning_rate": 1.423556118547967e-05,
      "loss": 2.4222,
      "step": 28028
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.991172194480896,
      "learning_rate": 1.4235188201965748e-05,
      "loss": 2.2925,
      "step": 28029
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0481518507003784,
      "learning_rate": 1.4234815211272036e-05,
      "loss": 2.3029,
      "step": 28030
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.9663757681846619,
      "learning_rate": 1.423444221339916e-05,
      "loss": 2.3642,
      "step": 28031
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0210717916488647,
      "learning_rate": 1.4234069208347754e-05,
      "loss": 2.2652,
      "step": 28032
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1259379386901855,
      "learning_rate": 1.4233696196118452e-05,
      "loss": 2.5912,
      "step": 28033
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0761264562606812,
      "learning_rate": 1.4233323176711882e-05,
      "loss": 2.4356,
      "step": 28034
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.341691493988037,
      "learning_rate": 1.4232950150128681e-05,
      "loss": 2.4202,
      "step": 28035
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.102061152458191,
      "learning_rate": 1.4232577116369481e-05,
      "loss": 2.5222,
      "step": 28036
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0838240385055542,
      "learning_rate": 1.4232204075434913e-05,
      "loss": 2.4287,
      "step": 28037
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.144988775253296,
      "learning_rate": 1.4231831027325611e-05,
      "loss": 2.4916,
      "step": 28038
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.199107050895691,
      "learning_rate": 1.4231457972042205e-05,
      "loss": 2.2839,
      "step": 28039
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.2441946268081665,
      "learning_rate": 1.4231084909585327e-05,
      "loss": 2.5022,
      "step": 28040
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0439152717590332,
      "learning_rate": 1.4230711839955608e-05,
      "loss": 2.5247,
      "step": 28041
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0158907175064087,
      "learning_rate": 1.4230338763153687e-05,
      "loss": 2.5845,
      "step": 28042
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1142728328704834,
      "learning_rate": 1.4229965679180193e-05,
      "loss": 2.5726,
      "step": 28043
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1237263679504395,
      "learning_rate": 1.422959258803576e-05,
      "loss": 2.1897,
      "step": 28044
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1053518056869507,
      "learning_rate": 1.4229219489721017e-05,
      "loss": 2.4839,
      "step": 28045
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.010398268699646,
      "learning_rate": 1.4228846384236599e-05,
      "loss": 2.437,
      "step": 28046
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0084527730941772,
      "learning_rate": 1.4228473271583136e-05,
      "loss": 2.3179,
      "step": 28047
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.063279151916504,
      "learning_rate": 1.4228100151761264e-05,
      "loss": 2.385,
      "step": 28048
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.038987398147583,
      "learning_rate": 1.4227727024771614e-05,
      "loss": 2.5639,
      "step": 28049
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.9899570345878601,
      "learning_rate": 1.4227353890614819e-05,
      "loss": 2.3715,
      "step": 28050
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0346460342407227,
      "learning_rate": 1.422698074929151e-05,
      "loss": 2.5337,
      "step": 28051
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.063302755355835,
      "learning_rate": 1.422660760080232e-05,
      "loss": 2.3725,
      "step": 28052
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.9891655445098877,
      "learning_rate": 1.4226234445147883e-05,
      "loss": 2.5353,
      "step": 28053
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.250562310218811,
      "learning_rate": 1.4225861282328833e-05,
      "loss": 2.2338,
      "step": 28054
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1012327671051025,
      "learning_rate": 1.4225488112345795e-05,
      "loss": 2.5127,
      "step": 28055
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0189266204833984,
      "learning_rate": 1.4225114935199411e-05,
      "loss": 2.5608,
      "step": 28056
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0071630477905273,
      "learning_rate": 1.4224741750890312e-05,
      "loss": 2.3892,
      "step": 28057
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.021830439567566,
      "learning_rate": 1.4224368559419123e-05,
      "loss": 2.605,
      "step": 28058
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0742833614349365,
      "learning_rate": 1.4223995360786485e-05,
      "loss": 2.4775,
      "step": 28059
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1396772861480713,
      "learning_rate": 1.4223622154993027e-05,
      "loss": 2.4164,
      "step": 28060
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0255236625671387,
      "learning_rate": 1.422324894203938e-05,
      "loss": 2.2627,
      "step": 28061
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1595505475997925,
      "learning_rate": 1.4222875721926182e-05,
      "loss": 2.287,
      "step": 28062
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0344021320343018,
      "learning_rate": 1.422250249465406e-05,
      "loss": 2.3832,
      "step": 28063
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.13124418258667,
      "learning_rate": 1.4222129260223651e-05,
      "loss": 2.3819,
      "step": 28064
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0556963682174683,
      "learning_rate": 1.4221756018635588e-05,
      "loss": 1.9847,
      "step": 28065
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1455546617507935,
      "learning_rate": 1.4221382769890499e-05,
      "loss": 2.3232,
      "step": 28066
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0258523225784302,
      "learning_rate": 1.4221009513989021e-05,
      "loss": 2.2416,
      "step": 28067
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.187485694885254,
      "learning_rate": 1.4220636250931785e-05,
      "loss": 2.5199,
      "step": 28068
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1038898229599,
      "learning_rate": 1.4220262980719423e-05,
      "loss": 2.24,
      "step": 28069
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1342597007751465,
      "learning_rate": 1.4219889703352574e-05,
      "loss": 2.3768,
      "step": 28070
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.031689167022705,
      "learning_rate": 1.4219516418831862e-05,
      "loss": 2.3275,
      "step": 28071
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.091506004333496,
      "learning_rate": 1.4219143127157924e-05,
      "loss": 2.3123,
      "step": 28072
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0628511905670166,
      "learning_rate": 1.4218769828331392e-05,
      "loss": 2.3598,
      "step": 28073
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0830132961273193,
      "learning_rate": 1.42183965223529e-05,
      "loss": 2.3933,
      "step": 28074
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0482980012893677,
      "learning_rate": 1.421802320922308e-05,
      "loss": 2.525,
      "step": 28075
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.971836507320404,
      "learning_rate": 1.4217649888942564e-05,
      "loss": 2.2527,
      "step": 28076
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.022061824798584,
      "learning_rate": 1.421727656151199e-05,
      "loss": 2.2916,
      "step": 28077
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0392682552337646,
      "learning_rate": 1.4216903226931984e-05,
      "loss": 2.4617,
      "step": 28078
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0583900213241577,
      "learning_rate": 1.421652988520318e-05,
      "loss": 2.4462,
      "step": 28079
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.180870532989502,
      "learning_rate": 1.4216156536326216e-05,
      "loss": 2.365,
      "step": 28080
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1510539054870605,
      "learning_rate": 1.421578318030172e-05,
      "loss": 2.4126,
      "step": 28081
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1614314317703247,
      "learning_rate": 1.4215409817130329e-05,
      "loss": 2.4733,
      "step": 28082
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.193418264389038,
      "learning_rate": 1.4215036446812669e-05,
      "loss": 2.4039,
      "step": 28083
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0726782083511353,
      "learning_rate": 1.421466306934938e-05,
      "loss": 2.3858,
      "step": 28084
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1486358642578125,
      "learning_rate": 1.4214289684741091e-05,
      "loss": 2.5966,
      "step": 28085
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.037293553352356,
      "learning_rate": 1.4213916292988439e-05,
      "loss": 2.4665,
      "step": 28086
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.9621834754943848,
      "learning_rate": 1.4213542894092052e-05,
      "loss": 2.3805,
      "step": 28087
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0068649053573608,
      "learning_rate": 1.4213169488052564e-05,
      "loss": 2.3465,
      "step": 28088
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1543660163879395,
      "learning_rate": 1.4212796074870614e-05,
      "loss": 2.5446,
      "step": 28089
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1724416017532349,
      "learning_rate": 1.4212422654546827e-05,
      "loss": 2.3844,
      "step": 28090
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.9879787564277649,
      "learning_rate": 1.4212049227081842e-05,
      "loss": 2.6144,
      "step": 28091
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.166145920753479,
      "learning_rate": 1.4211675792476287e-05,
      "loss": 2.4321,
      "step": 28092
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.2143447399139404,
      "learning_rate": 1.4211302350730798e-05,
      "loss": 2.6076,
      "step": 28093
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.036448359489441,
      "learning_rate": 1.421092890184601e-05,
      "loss": 2.3882,
      "step": 28094
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.037916660308838,
      "learning_rate": 1.4210555445822552e-05,
      "loss": 2.3112,
      "step": 28095
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.177520990371704,
      "learning_rate": 1.4210181982661058e-05,
      "loss": 2.2575,
      "step": 28096
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.237097144126892,
      "learning_rate": 1.4209808512362164e-05,
      "loss": 2.4554,
      "step": 28097
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0936371088027954,
      "learning_rate": 1.4209435034926501e-05,
      "loss": 2.4983,
      "step": 28098
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0047812461853027,
      "learning_rate": 1.4209061550354702e-05,
      "loss": 2.3024,
      "step": 28099
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.102964162826538,
      "learning_rate": 1.42086880586474e-05,
      "loss": 2.5365,
      "step": 28100
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0194483995437622,
      "learning_rate": 1.420831455980523e-05,
      "loss": 2.4791,
      "step": 28101
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.203303337097168,
      "learning_rate": 1.420794105382882e-05,
      "loss": 2.2712,
      "step": 28102
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0077382326126099,
      "learning_rate": 1.4207567540718812e-05,
      "loss": 2.3564,
      "step": 28103
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.02106511592865,
      "learning_rate": 1.4207194020475833e-05,
      "loss": 2.5348,
      "step": 28104
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0513283014297485,
      "learning_rate": 1.4206820493100515e-05,
      "loss": 2.634,
      "step": 28105
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1009721755981445,
      "learning_rate": 1.4206446958593496e-05,
      "loss": 2.2627,
      "step": 28106
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.180286169052124,
      "learning_rate": 1.4206073416955406e-05,
      "loss": 2.5013,
      "step": 28107
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0748776197433472,
      "learning_rate": 1.420569986818688e-05,
      "loss": 2.5074,
      "step": 28108
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.2239055633544922,
      "learning_rate": 1.4205326312288553e-05,
      "loss": 2.5327,
      "step": 28109
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0563732385635376,
      "learning_rate": 1.420495274926105e-05,
      "loss": 2.5609,
      "step": 28110
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.9672766327857971,
      "learning_rate": 1.4204579179105014e-05,
      "loss": 2.4146,
      "step": 28111
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1350501775741577,
      "learning_rate": 1.4204205601821075e-05,
      "loss": 2.4948,
      "step": 28112
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0651079416275024,
      "learning_rate": 1.4203832017409861e-05,
      "loss": 2.4017,
      "step": 28113
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0096583366394043,
      "learning_rate": 1.4203458425872014e-05,
      "loss": 2.4663,
      "step": 28114
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0982980728149414,
      "learning_rate": 1.4203084827208162e-05,
      "loss": 2.2117,
      "step": 28115
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0004677772521973,
      "learning_rate": 1.420271122141894e-05,
      "loss": 2.3762,
      "step": 28116
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1564701795578003,
      "learning_rate": 1.4202337608504982e-05,
      "loss": 2.3808,
      "step": 28117
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1737264394760132,
      "learning_rate": 1.4201963988466918e-05,
      "loss": 2.4219,
      "step": 28118
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1942706108093262,
      "learning_rate": 1.4201590361305386e-05,
      "loss": 2.185,
      "step": 28119
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0908905267715454,
      "learning_rate": 1.4201216727021017e-05,
      "loss": 2.3046,
      "step": 28120
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.083598256111145,
      "learning_rate": 1.4200843085614442e-05,
      "loss": 2.3557,
      "step": 28121
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.2748943567276,
      "learning_rate": 1.42004694370863e-05,
      "loss": 2.4123,
      "step": 28122
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.9956881999969482,
      "learning_rate": 1.4200095781437223e-05,
      "loss": 2.4345,
      "step": 28123
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.225061297416687,
      "learning_rate": 1.4199722118667838e-05,
      "loss": 2.313,
      "step": 28124
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1571786403656006,
      "learning_rate": 1.4199348448778785e-05,
      "loss": 2.2905,
      "step": 28125
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0819129943847656,
      "learning_rate": 1.4198974771770697e-05,
      "loss": 2.2608,
      "step": 28126
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.3059481382369995,
      "learning_rate": 1.4198601087644206e-05,
      "loss": 2.3353,
      "step": 28127
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.142909288406372,
      "learning_rate": 1.4198227396399943e-05,
      "loss": 2.5558,
      "step": 28128
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0548361539840698,
      "learning_rate": 1.4197853698038549e-05,
      "loss": 2.3671,
      "step": 28129
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.953014612197876,
      "learning_rate": 1.4197479992560651e-05,
      "loss": 2.2823,
      "step": 28130
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0886467695236206,
      "learning_rate": 1.4197106279966884e-05,
      "loss": 2.4438,
      "step": 28131
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0266374349594116,
      "learning_rate": 1.4196732560257881e-05,
      "loss": 2.4098,
      "step": 28132
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.054451584815979,
      "learning_rate": 1.4196358833434279e-05,
      "loss": 2.2523,
      "step": 28133
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1353086233139038,
      "learning_rate": 1.4195985099496708e-05,
      "loss": 2.3607,
      "step": 28134
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1224350929260254,
      "learning_rate": 1.4195611358445802e-05,
      "loss": 2.3535,
      "step": 28135
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0381271839141846,
      "learning_rate": 1.4195237610282197e-05,
      "loss": 2.3574,
      "step": 28136
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.9583436846733093,
      "learning_rate": 1.4194863855006525e-05,
      "loss": 2.3215,
      "step": 28137
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0141403675079346,
      "learning_rate": 1.4194490092619414e-05,
      "loss": 2.573,
      "step": 28138
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0857949256896973,
      "learning_rate": 1.419411632312151e-05,
      "loss": 2.3696,
      "step": 28139
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.01668119430542,
      "learning_rate": 1.4193742546513436e-05,
      "loss": 2.6214,
      "step": 28140
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.060800552368164,
      "learning_rate": 1.4193368762795832e-05,
      "loss": 2.2935,
      "step": 28141
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.000946044921875,
      "learning_rate": 1.4192994971969328e-05,
      "loss": 2.4236,
      "step": 28142
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.995985746383667,
      "learning_rate": 1.4192621174034555e-05,
      "loss": 2.4625,
      "step": 28143
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0540716648101807,
      "learning_rate": 1.4192247368992156e-05,
      "loss": 2.4389,
      "step": 28144
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.9999485611915588,
      "learning_rate": 1.4191873556842754e-05,
      "loss": 2.425,
      "step": 28145
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0419596433639526,
      "learning_rate": 1.4191499737586992e-05,
      "loss": 2.3647,
      "step": 28146
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0934313535690308,
      "learning_rate": 1.4191125911225499e-05,
      "loss": 2.6247,
      "step": 28147
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0674948692321777,
      "learning_rate": 1.4190752077758907e-05,
      "loss": 2.4094,
      "step": 28148
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.090867280960083,
      "learning_rate": 1.4190378237187852e-05,
      "loss": 2.2201,
      "step": 28149
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0678540468215942,
      "learning_rate": 1.419000438951297e-05,
      "loss": 2.4611,
      "step": 28150
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0310287475585938,
      "learning_rate": 1.418963053473489e-05,
      "loss": 2.2592,
      "step": 28151
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0927997827529907,
      "learning_rate": 1.418925667285425e-05,
      "loss": 2.4336,
      "step": 28152
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.976127028465271,
      "learning_rate": 1.4188882803871682e-05,
      "loss": 2.4125,
      "step": 28153
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0320682525634766,
      "learning_rate": 1.418850892778782e-05,
      "loss": 2.4577,
      "step": 28154
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0859647989273071,
      "learning_rate": 1.4188135044603298e-05,
      "loss": 2.3715,
      "step": 28155
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0503886938095093,
      "learning_rate": 1.418776115431875e-05,
      "loss": 2.4262,
      "step": 28156
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.072336196899414,
      "learning_rate": 1.4187387256934809e-05,
      "loss": 2.3907,
      "step": 28157
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1192222833633423,
      "learning_rate": 1.4187013352452109e-05,
      "loss": 2.2157,
      "step": 28158
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0537117719650269,
      "learning_rate": 1.4186639440871283e-05,
      "loss": 2.3593,
      "step": 28159
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1072912216186523,
      "learning_rate": 1.4186265522192969e-05,
      "loss": 2.3645,
      "step": 28160
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.112912893295288,
      "learning_rate": 1.4185891596417795e-05,
      "loss": 2.1967,
      "step": 28161
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0236637592315674,
      "learning_rate": 1.41855176635464e-05,
      "loss": 2.5744,
      "step": 28162
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0742236375808716,
      "learning_rate": 1.4185143723579415e-05,
      "loss": 2.4627,
      "step": 28163
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0149927139282227,
      "learning_rate": 1.4184769776517475e-05,
      "loss": 2.1879,
      "step": 28164
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0303809642791748,
      "learning_rate": 1.4184395822361214e-05,
      "loss": 2.5279,
      "step": 28165
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.103582501411438,
      "learning_rate": 1.4184021861111266e-05,
      "loss": 2.4069,
      "step": 28166
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1234681606292725,
      "learning_rate": 1.4183647892768262e-05,
      "loss": 2.4509,
      "step": 28167
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.073601245880127,
      "learning_rate": 1.4183273917332843e-05,
      "loss": 2.4883,
      "step": 28168
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.9985374212265015,
      "learning_rate": 1.4182899934805634e-05,
      "loss": 2.4735,
      "step": 28169
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.9919396042823792,
      "learning_rate": 1.4182525945187276e-05,
      "loss": 2.4062,
      "step": 28170
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0871903896331787,
      "learning_rate": 1.41821519484784e-05,
      "loss": 2.3851,
      "step": 28171
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0781325101852417,
      "learning_rate": 1.418177794467964e-05,
      "loss": 2.3408,
      "step": 28172
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1085128784179688,
      "learning_rate": 1.4181403933791631e-05,
      "loss": 2.3794,
      "step": 28173
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0095032453536987,
      "learning_rate": 1.4181029915815007e-05,
      "loss": 2.4346,
      "step": 28174
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0756874084472656,
      "learning_rate": 1.4180655890750401e-05,
      "loss": 2.2855,
      "step": 28175
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.9336546659469604,
      "learning_rate": 1.418028185859845e-05,
      "loss": 2.2345,
      "step": 28176
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.9579910635948181,
      "learning_rate": 1.4179907819359783e-05,
      "loss": 2.4853,
      "step": 28177
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0271961688995361,
      "learning_rate": 1.4179533773035038e-05,
      "loss": 2.2556,
      "step": 28178
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0103803873062134,
      "learning_rate": 1.417915971962485e-05,
      "loss": 2.4243,
      "step": 28179
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.9299754500389099,
      "learning_rate": 1.4178785659129848e-05,
      "loss": 2.335,
      "step": 28180
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.9865383505821228,
      "learning_rate": 1.4178411591550671e-05,
      "loss": 2.4275,
      "step": 28181
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.04843270778656,
      "learning_rate": 1.417803751688795e-05,
      "loss": 2.5685,
      "step": 28182
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0840295553207397,
      "learning_rate": 1.4177663435142323e-05,
      "loss": 2.3734,
      "step": 28183
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1571807861328125,
      "learning_rate": 1.4177289346314422e-05,
      "loss": 2.2796,
      "step": 28184
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.079464316368103,
      "learning_rate": 1.417691525040488e-05,
      "loss": 2.3234,
      "step": 28185
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0231733322143555,
      "learning_rate": 1.417654114741433e-05,
      "loss": 2.3961,
      "step": 28186
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.175857663154602,
      "learning_rate": 1.417616703734341e-05,
      "loss": 2.439,
      "step": 28187
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.117719292640686,
      "learning_rate": 1.4175792920192755e-05,
      "loss": 2.5564,
      "step": 28188
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.031046748161316,
      "learning_rate": 1.4175418795962996e-05,
      "loss": 2.3099,
      "step": 28189
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1053885221481323,
      "learning_rate": 1.4175044664654768e-05,
      "loss": 2.4926,
      "step": 28190
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.9126776456832886,
      "learning_rate": 1.4174670526268703e-05,
      "loss": 2.3317,
      "step": 28191
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.002048373222351,
      "learning_rate": 1.4174296380805439e-05,
      "loss": 2.4081,
      "step": 28192
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1338374614715576,
      "learning_rate": 1.4173922228265608e-05,
      "loss": 2.6229,
      "step": 28193
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0647013187408447,
      "learning_rate": 1.4173548068649847e-05,
      "loss": 2.2514,
      "step": 28194
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0559250116348267,
      "learning_rate": 1.4173173901958788e-05,
      "loss": 2.4523,
      "step": 28195
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0605617761611938,
      "learning_rate": 1.4172799728193068e-05,
      "loss": 2.4786,
      "step": 28196
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.010908842086792,
      "learning_rate": 1.4172425547353314e-05,
      "loss": 2.4555,
      "step": 28197
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1200757026672363,
      "learning_rate": 1.4172051359440168e-05,
      "loss": 2.7256,
      "step": 28198
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.9840627908706665,
      "learning_rate": 1.4171677164454265e-05,
      "loss": 2.2663,
      "step": 28199
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0971777439117432,
      "learning_rate": 1.4171302962396234e-05,
      "loss": 2.5705,
      "step": 28200
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1781108379364014,
      "learning_rate": 1.417092875326671e-05,
      "loss": 2.5972,
      "step": 28201
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0190943479537964,
      "learning_rate": 1.417055453706633e-05,
      "loss": 2.5906,
      "step": 28202
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0431687831878662,
      "learning_rate": 1.417018031379573e-05,
      "loss": 2.2806,
      "step": 28203
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.894975483417511,
      "learning_rate": 1.4169806083455538e-05,
      "loss": 2.7408,
      "step": 28204
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0213853120803833,
      "learning_rate": 1.4169431846046391e-05,
      "loss": 2.4877,
      "step": 28205
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0839959383010864,
      "learning_rate": 1.4169057601568927e-05,
      "loss": 2.4952,
      "step": 28206
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5238116979599,
      "learning_rate": 1.4168683350023777e-05,
      "loss": 2.4613,
      "step": 28207
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.2931336164474487,
      "learning_rate": 1.4168309091411576e-05,
      "loss": 2.3565,
      "step": 28208
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0026518106460571,
      "learning_rate": 1.4167934825732964e-05,
      "loss": 2.4205,
      "step": 28209
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1079109907150269,
      "learning_rate": 1.4167560552988567e-05,
      "loss": 2.3468,
      "step": 28210
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0905040502548218,
      "learning_rate": 1.416718627317902e-05,
      "loss": 2.3763,
      "step": 28211
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.045613169670105,
      "learning_rate": 1.4166811986304965e-05,
      "loss": 2.3955,
      "step": 28212
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.2715976238250732,
      "learning_rate": 1.416643769236703e-05,
      "loss": 2.5386,
      "step": 28213
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.2577900886535645,
      "learning_rate": 1.416606339136585e-05,
      "loss": 2.418,
      "step": 28214
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1763914823532104,
      "learning_rate": 1.4165689083302066e-05,
      "loss": 2.1279,
      "step": 28215
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1457829475402832,
      "learning_rate": 1.4165314768176304e-05,
      "loss": 2.6759,
      "step": 28216
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1420387029647827,
      "learning_rate": 1.4164940445989201e-05,
      "loss": 2.3222,
      "step": 28217
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1233854293823242,
      "learning_rate": 1.4164566116741395e-05,
      "loss": 2.1503,
      "step": 28218
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0510509014129639,
      "learning_rate": 1.4164191780433516e-05,
      "loss": 2.435,
      "step": 28219
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.2281432151794434,
      "learning_rate": 1.4163817437066204e-05,
      "loss": 2.4056,
      "step": 28220
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.104406714439392,
      "learning_rate": 1.4163443086640088e-05,
      "loss": 2.562,
      "step": 28221
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4121053218841553,
      "learning_rate": 1.416306872915581e-05,
      "loss": 2.4689,
      "step": 28222
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.3405872583389282,
      "learning_rate": 1.4162694364613993e-05,
      "loss": 2.5718,
      "step": 28223
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.259682059288025,
      "learning_rate": 1.4162319993015283e-05,
      "loss": 2.4631,
      "step": 28224
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0461405515670776,
      "learning_rate": 1.4161945614360309e-05,
      "loss": 2.2198,
      "step": 28225
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0500706434249878,
      "learning_rate": 1.4161571228649706e-05,
      "loss": 2.3719,
      "step": 28226
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.9986976385116577,
      "learning_rate": 1.4161196835884111e-05,
      "loss": 2.4032,
      "step": 28227
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.9435450434684753,
      "learning_rate": 1.4160822436064157e-05,
      "loss": 2.3828,
      "step": 28228
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1036412715911865,
      "learning_rate": 1.4160448029190477e-05,
      "loss": 2.2914,
      "step": 28229
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1726807355880737,
      "learning_rate": 1.4160073615263713e-05,
      "loss": 2.3506,
      "step": 28230
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.2354756593704224,
      "learning_rate": 1.4159699194284489e-05,
      "loss": 2.3922,
      "step": 28231
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0485748052597046,
      "learning_rate": 1.4159324766253445e-05,
      "loss": 2.3143,
      "step": 28232
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1398218870162964,
      "learning_rate": 1.4158950331171217e-05,
      "loss": 2.2629,
      "step": 28233
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0910851955413818,
      "learning_rate": 1.415857588903844e-05,
      "loss": 2.5106,
      "step": 28234
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.094376802444458,
      "learning_rate": 1.4158201439855748e-05,
      "loss": 2.18,
      "step": 28235
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0479556322097778,
      "learning_rate": 1.4157826983623772e-05,
      "loss": 2.3545,
      "step": 28236
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0662949085235596,
      "learning_rate": 1.4157452520343151e-05,
      "loss": 2.2365,
      "step": 28237
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.9789827466011047,
      "learning_rate": 1.4157078050014519e-05,
      "loss": 2.5634,
      "step": 28238
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1625161170959473,
      "learning_rate": 1.415670357263851e-05,
      "loss": 2.3847,
      "step": 28239
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.119733452796936,
      "learning_rate": 1.415632908821576e-05,
      "loss": 2.5256,
      "step": 28240
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0183351039886475,
      "learning_rate": 1.4155954596746906e-05,
      "loss": 2.5338,
      "step": 28241
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1485599279403687,
      "learning_rate": 1.4155580098232577e-05,
      "loss": 2.3115,
      "step": 28242
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1009987592697144,
      "learning_rate": 1.4155205592673414e-05,
      "loss": 2.2976,
      "step": 28243
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1891549825668335,
      "learning_rate": 1.4154831080070047e-05,
      "loss": 2.1566,
      "step": 28244
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0428152084350586,
      "learning_rate": 1.4154456560423113e-05,
      "loss": 2.266,
      "step": 28245
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.047385811805725,
      "learning_rate": 1.4154082033733246e-05,
      "loss": 2.334,
      "step": 28246
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.2358758449554443,
      "learning_rate": 1.4153707500001081e-05,
      "loss": 2.2589,
      "step": 28247
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.133018970489502,
      "learning_rate": 1.4153332959227257e-05,
      "loss": 2.2508,
      "step": 28248
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.060410737991333,
      "learning_rate": 1.4152958411412403e-05,
      "loss": 2.2277,
      "step": 28249
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.2092509269714355,
      "learning_rate": 1.4152583856557158e-05,
      "loss": 2.4532,
      "step": 28250
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.149153470993042,
      "learning_rate": 1.4152209294662153e-05,
      "loss": 2.4972,
      "step": 28251
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.2467788457870483,
      "learning_rate": 1.415183472572803e-05,
      "loss": 2.5419,
      "step": 28252
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1311496496200562,
      "learning_rate": 1.4151460149755415e-05,
      "loss": 2.3853,
      "step": 28253
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0495355129241943,
      "learning_rate": 1.415108556674495e-05,
      "loss": 2.5425,
      "step": 28254
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0272767543792725,
      "learning_rate": 1.4150710976697268e-05,
      "loss": 2.2313,
      "step": 28255
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0469155311584473,
      "learning_rate": 1.4150336379613003e-05,
      "loss": 2.3704,
      "step": 28256
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.9755498766899109,
      "learning_rate": 1.4149961775492789e-05,
      "loss": 2.3487,
      "step": 28257
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0688682794570923,
      "learning_rate": 1.4149587164337267e-05,
      "loss": 2.5188,
      "step": 28258
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0413987636566162,
      "learning_rate": 1.4149212546147064e-05,
      "loss": 2.2317,
      "step": 28259
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.9523784518241882,
      "learning_rate": 1.4148837920922818e-05,
      "loss": 2.4509,
      "step": 28260
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.9704635143280029,
      "learning_rate": 1.4148463288665166e-05,
      "loss": 2.3819,
      "step": 28261
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.9526636600494385,
      "learning_rate": 1.4148088649374744e-05,
      "loss": 2.4092,
      "step": 28262
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0284165143966675,
      "learning_rate": 1.4147714003052185e-05,
      "loss": 2.3178,
      "step": 28263
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.9974873661994934,
      "learning_rate": 1.414733934969812e-05,
      "loss": 2.3129,
      "step": 28264
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0642648935317993,
      "learning_rate": 1.4146964689313192e-05,
      "loss": 2.37,
      "step": 28265
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.9928953647613525,
      "learning_rate": 1.4146590021898033e-05,
      "loss": 2.5151,
      "step": 28266
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.9872884154319763,
      "learning_rate": 1.4146215347453275e-05,
      "loss": 2.4491,
      "step": 28267
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0665744543075562,
      "learning_rate": 1.4145840665979557e-05,
      "loss": 2.4518,
      "step": 28268
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0584852695465088,
      "learning_rate": 1.4145465977477515e-05,
      "loss": 2.4899,
      "step": 28269
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.9756444692611694,
      "learning_rate": 1.4145091281947781e-05,
      "loss": 2.3631,
      "step": 28270
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.9946614503860474,
      "learning_rate": 1.414471657939099e-05,
      "loss": 2.3248,
      "step": 28271
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0469286441802979,
      "learning_rate": 1.4144341869807781e-05,
      "loss": 2.4961,
      "step": 28272
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1210161447525024,
      "learning_rate": 1.4143967153198785e-05,
      "loss": 2.2598,
      "step": 28273
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.04093337059021,
      "learning_rate": 1.4143592429564639e-05,
      "loss": 2.337,
      "step": 28274
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1037198305130005,
      "learning_rate": 1.4143217698905982e-05,
      "loss": 2.3938,
      "step": 28275
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0465372800827026,
      "learning_rate": 1.4142842961223442e-05,
      "loss": 2.5724,
      "step": 28276
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0518244504928589,
      "learning_rate": 1.414246821651766e-05,
      "loss": 2.4631,
      "step": 28277
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0011733770370483,
      "learning_rate": 1.4142093464789266e-05,
      "loss": 2.4091,
      "step": 28278
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4051250219345093,
      "learning_rate": 1.4141718706038902e-05,
      "loss": 2.2217,
      "step": 28279
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1742459535598755,
      "learning_rate": 1.4141343940267198e-05,
      "loss": 2.364,
      "step": 28280
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0892082452774048,
      "learning_rate": 1.4140969167474794e-05,
      "loss": 2.5037,
      "step": 28281
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1722867488861084,
      "learning_rate": 1.4140594387662321e-05,
      "loss": 2.5141,
      "step": 28282
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0687432289123535,
      "learning_rate": 1.4140219600830416e-05,
      "loss": 2.2481,
      "step": 28283
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0532493591308594,
      "learning_rate": 1.4139844806979714e-05,
      "loss": 2.1626,
      "step": 28284
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.097597599029541,
      "learning_rate": 1.4139470006110848e-05,
      "loss": 2.3829,
      "step": 28285
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.008008599281311,
      "learning_rate": 1.4139095198224462e-05,
      "loss": 2.3025,
      "step": 28286
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1537398099899292,
      "learning_rate": 1.4138720383321183e-05,
      "loss": 2.4827,
      "step": 28287
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.969637930393219,
      "learning_rate": 1.4138345561401648e-05,
      "loss": 2.3622,
      "step": 28288
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1695921421051025,
      "learning_rate": 1.4137970732466495e-05,
      "loss": 2.533,
      "step": 28289
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0187917947769165,
      "learning_rate": 1.4137595896516356e-05,
      "loss": 2.5398,
      "step": 28290
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.9949042797088623,
      "learning_rate": 1.4137221053551869e-05,
      "loss": 2.5091,
      "step": 28291
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0784947872161865,
      "learning_rate": 1.4136846203573667e-05,
      "loss": 2.4048,
      "step": 28292
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0272976160049438,
      "learning_rate": 1.4136471346582387e-05,
      "loss": 2.4099,
      "step": 28293
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.087626576423645,
      "learning_rate": 1.4136096482578667e-05,
      "loss": 2.3037,
      "step": 28294
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.9788106679916382,
      "learning_rate": 1.413572161156314e-05,
      "loss": 2.3372,
      "step": 28295
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1307222843170166,
      "learning_rate": 1.4135346733536439e-05,
      "loss": 2.3902,
      "step": 28296
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1226170063018799,
      "learning_rate": 1.4134971848499204e-05,
      "loss": 2.2852,
      "step": 28297
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0660580396652222,
      "learning_rate": 1.413459695645207e-05,
      "loss": 2.2577,
      "step": 28298
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0617631673812866,
      "learning_rate": 1.4134222057395669e-05,
      "loss": 2.2187,
      "step": 28299
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.114298939704895,
      "learning_rate": 1.4133847151330637e-05,
      "loss": 2.326,
      "step": 28300
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.076941728591919,
      "learning_rate": 1.4133472238257618e-05,
      "loss": 2.3826,
      "step": 28301
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1133936643600464,
      "learning_rate": 1.4133097318177236e-05,
      "loss": 2.1722,
      "step": 28302
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1285239458084106,
      "learning_rate": 1.4132722391090131e-05,
      "loss": 2.5075,
      "step": 28303
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0283923149108887,
      "learning_rate": 1.413234745699694e-05,
      "loss": 2.3512,
      "step": 28304
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.9896523356437683,
      "learning_rate": 1.4131972515898298e-05,
      "loss": 2.486,
      "step": 28305
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0026607513427734,
      "learning_rate": 1.4131597567794838e-05,
      "loss": 2.5251,
      "step": 28306
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1883797645568848,
      "learning_rate": 1.4131222612687201e-05,
      "loss": 2.4658,
      "step": 28307
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0621941089630127,
      "learning_rate": 1.4130847650576022e-05,
      "loss": 2.372,
      "step": 28308
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0379369258880615,
      "learning_rate": 1.4130472681461931e-05,
      "loss": 2.5075,
      "step": 28309
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.3064223527908325,
      "learning_rate": 1.4130097705345567e-05,
      "loss": 2.5323,
      "step": 28310
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.4045097827911377,
      "learning_rate": 1.4129722722227565e-05,
      "loss": 2.1932,
      "step": 28311
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1077804565429688,
      "learning_rate": 1.4129347732108563e-05,
      "loss": 2.3388,
      "step": 28312
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1674818992614746,
      "learning_rate": 1.4128972734989192e-05,
      "loss": 2.4946,
      "step": 28313
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.981423020362854,
      "learning_rate": 1.4128597730870097e-05,
      "loss": 2.454,
      "step": 28314
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.141363501548767,
      "learning_rate": 1.4128222719751903e-05,
      "loss": 2.5395,
      "step": 28315
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.2518279552459717,
      "learning_rate": 1.4127847701635252e-05,
      "loss": 2.3272,
      "step": 28316
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.190682053565979,
      "learning_rate": 1.4127472676520776e-05,
      "loss": 2.3883,
      "step": 28317
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.9921021461486816,
      "learning_rate": 1.4127097644409116e-05,
      "loss": 2.2687,
      "step": 28318
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.09224271774292,
      "learning_rate": 1.4126722605300903e-05,
      "loss": 2.2719,
      "step": 28319
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.026016116142273,
      "learning_rate": 1.4126347559196772e-05,
      "loss": 2.4094,
      "step": 28320
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.045996069908142,
      "learning_rate": 1.4125972506097365e-05,
      "loss": 2.4066,
      "step": 28321
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.2022395133972168,
      "learning_rate": 1.4125597446003312e-05,
      "loss": 2.5783,
      "step": 28322
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1540441513061523,
      "learning_rate": 1.4125222378915249e-05,
      "loss": 2.4308,
      "step": 28323
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.037200927734375,
      "learning_rate": 1.4124847304833814e-05,
      "loss": 2.557,
      "step": 28324
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0924543142318726,
      "learning_rate": 1.4124472223759647e-05,
      "loss": 2.5449,
      "step": 28325
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0566720962524414,
      "learning_rate": 1.4124097135693375e-05,
      "loss": 2.4799,
      "step": 28326
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0668976306915283,
      "learning_rate": 1.4123722040635642e-05,
      "loss": 2.4743,
      "step": 28327
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.006908893585205,
      "learning_rate": 1.4123346938587077e-05,
      "loss": 2.3905,
      "step": 28328
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0110167264938354,
      "learning_rate": 1.412297182954832e-05,
      "loss": 2.4513,
      "step": 28329
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.9608944654464722,
      "learning_rate": 1.4122596713520004e-05,
      "loss": 2.4402,
      "step": 28330
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1232500076293945,
      "learning_rate": 1.4122221590502769e-05,
      "loss": 2.3526,
      "step": 28331
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0813584327697754,
      "learning_rate": 1.4121846460497248e-05,
      "loss": 2.5972,
      "step": 28332
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.2462303638458252,
      "learning_rate": 1.4121471323504079e-05,
      "loss": 2.2538,
      "step": 28333
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1927454471588135,
      "learning_rate": 1.4121096179523898e-05,
      "loss": 2.3673,
      "step": 28334
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1721951961517334,
      "learning_rate": 1.4120721028557339e-05,
      "loss": 2.2459,
      "step": 28335
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0237196683883667,
      "learning_rate": 1.4120345870605036e-05,
      "loss": 2.4148,
      "step": 28336
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.6264050006866455,
      "learning_rate": 1.4119970705667629e-05,
      "loss": 2.4584,
      "step": 28337
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0856983661651611,
      "learning_rate": 1.411959553374575e-05,
      "loss": 2.5535,
      "step": 28338
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0148489475250244,
      "learning_rate": 1.4119220354840042e-05,
      "loss": 2.4123,
      "step": 28339
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.177010178565979,
      "learning_rate": 1.4118845168951136e-05,
      "loss": 2.4259,
      "step": 28340
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.211843490600586,
      "learning_rate": 1.411846997607967e-05,
      "loss": 2.3814,
      "step": 28341
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.181719183921814,
      "learning_rate": 1.4118094776226276e-05,
      "loss": 2.5469,
      "step": 28342
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1612880229949951,
      "learning_rate": 1.4117719569391594e-05,
      "loss": 2.438,
      "step": 28343
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.9049915075302124,
      "learning_rate": 1.4117344355576257e-05,
      "loss": 2.3142,
      "step": 28344
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0269050598144531,
      "learning_rate": 1.4116969134780907e-05,
      "loss": 2.4344,
      "step": 28345
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.9947853684425354,
      "learning_rate": 1.4116593907006175e-05,
      "loss": 2.294,
      "step": 28346
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0776636600494385,
      "learning_rate": 1.4116218672252697e-05,
      "loss": 2.3478,
      "step": 28347
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1217883825302124,
      "learning_rate": 1.4115843430521113e-05,
      "loss": 2.3929,
      "step": 28348
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0015451908111572,
      "learning_rate": 1.4115468181812052e-05,
      "loss": 2.1022,
      "step": 28349
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.2084300518035889,
      "learning_rate": 1.4115092926126158e-05,
      "loss": 2.4086,
      "step": 28350
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.9705108404159546,
      "learning_rate": 1.4114717663464062e-05,
      "loss": 2.4958,
      "step": 28351
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4104204177856445,
      "learning_rate": 1.4114342393826403e-05,
      "loss": 2.2901,
      "step": 28352
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.74042546749115,
      "learning_rate": 1.4113967117213814e-05,
      "loss": 2.4379,
      "step": 28353
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.067812442779541,
      "learning_rate": 1.4113591833626938e-05,
      "loss": 2.4452,
      "step": 28354
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0814710855484009,
      "learning_rate": 1.4113216543066403e-05,
      "loss": 2.6536,
      "step": 28355
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0185407400131226,
      "learning_rate": 1.411284124553285e-05,
      "loss": 2.3276,
      "step": 28356
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1433770656585693,
      "learning_rate": 1.4112465941026914e-05,
      "loss": 2.3224,
      "step": 28357
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1348986625671387,
      "learning_rate": 1.4112090629549231e-05,
      "loss": 2.2696,
      "step": 28358
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1380209922790527,
      "learning_rate": 1.4111715311100436e-05,
      "loss": 2.4173,
      "step": 28359
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1340082883834839,
      "learning_rate": 1.4111339985681172e-05,
      "loss": 2.1934,
      "step": 28360
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.062033772468567,
      "learning_rate": 1.4110964653292066e-05,
      "loss": 2.1927,
      "step": 28361
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.2678940296173096,
      "learning_rate": 1.4110589313933758e-05,
      "loss": 2.4314,
      "step": 28362
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.9582452178001404,
      "learning_rate": 1.4110213967606884e-05,
      "loss": 2.4027,
      "step": 28363
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1064130067825317,
      "learning_rate": 1.4109838614312084e-05,
      "loss": 2.4528,
      "step": 28364
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.9688554406166077,
      "learning_rate": 1.410946325404999e-05,
      "loss": 2.2952,
      "step": 28365
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1511236429214478,
      "learning_rate": 1.410908788682124e-05,
      "loss": 2.5607,
      "step": 28366
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0780950784683228,
      "learning_rate": 1.410871251262647e-05,
      "loss": 2.54,
      "step": 28367
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1664563417434692,
      "learning_rate": 1.4108337131466316e-05,
      "loss": 2.2532,
      "step": 28368
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.126912236213684,
      "learning_rate": 1.4107961743341415e-05,
      "loss": 2.2946,
      "step": 28369
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.2383979558944702,
      "learning_rate": 1.4107586348252403e-05,
      "loss": 2.2167,
      "step": 28370
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1629594564437866,
      "learning_rate": 1.4107210946199916e-05,
      "loss": 2.5523,
      "step": 28371
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1861991882324219,
      "learning_rate": 1.4106835537184591e-05,
      "loss": 2.3481,
      "step": 28372
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0302119255065918,
      "learning_rate": 1.4106460121207066e-05,
      "loss": 2.4717,
      "step": 28373
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0925734043121338,
      "learning_rate": 1.4106084698267975e-05,
      "loss": 2.4996,
      "step": 28374
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.159075140953064,
      "learning_rate": 1.4105709268367955e-05,
      "loss": 2.3334,
      "step": 28375
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.9981071352958679,
      "learning_rate": 1.4105333831507644e-05,
      "loss": 2.3395,
      "step": 28376
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1149168014526367,
      "learning_rate": 1.4104958387687674e-05,
      "loss": 2.3415,
      "step": 28377
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0077645778656006,
      "learning_rate": 1.4104582936908689e-05,
      "loss": 2.4121,
      "step": 28378
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.9715444445610046,
      "learning_rate": 1.4104207479171318e-05,
      "loss": 2.4451,
      "step": 28379
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0425167083740234,
      "learning_rate": 1.4103832014476203e-05,
      "loss": 2.4736,
      "step": 28380
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0305604934692383,
      "learning_rate": 1.4103456542823974e-05,
      "loss": 2.4306,
      "step": 28381
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0695765018463135,
      "learning_rate": 1.4103081064215274e-05,
      "loss": 2.331,
      "step": 28382
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1197116374969482,
      "learning_rate": 1.4102705578650738e-05,
      "loss": 2.4829,
      "step": 28383
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.020643949508667,
      "learning_rate": 1.4102330086131e-05,
      "loss": 2.3083,
      "step": 28384
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1441761255264282,
      "learning_rate": 1.4101954586656699e-05,
      "loss": 2.5734,
      "step": 28385
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.116530179977417,
      "learning_rate": 1.4101579080228472e-05,
      "loss": 2.3648,
      "step": 28386
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0487030744552612,
      "learning_rate": 1.4101203566846955e-05,
      "loss": 2.4335,
      "step": 28387
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.9428937435150146,
      "learning_rate": 1.4100828046512782e-05,
      "loss": 2.1579,
      "step": 28388
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1285494565963745,
      "learning_rate": 1.410045251922659e-05,
      "loss": 2.501,
      "step": 28389
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0402261018753052,
      "learning_rate": 1.4100076984989022e-05,
      "loss": 2.5793,
      "step": 28390
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.3891208171844482,
      "learning_rate": 1.4099701443800706e-05,
      "loss": 2.2385,
      "step": 28391
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.9893936514854431,
      "learning_rate": 1.4099325895662283e-05,
      "loss": 2.3691,
      "step": 28392
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0681140422821045,
      "learning_rate": 1.4098950340574394e-05,
      "loss": 2.268,
      "step": 28393
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0425201654434204,
      "learning_rate": 1.4098574778537667e-05,
      "loss": 2.3679,
      "step": 28394
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.9679477214813232,
      "learning_rate": 1.4098199209552743e-05,
      "loss": 2.1997,
      "step": 28395
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.9838434457778931,
      "learning_rate": 1.4097823633620257e-05,
      "loss": 2.3823,
      "step": 28396
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0489537715911865,
      "learning_rate": 1.409744805074085e-05,
      "loss": 2.4338,
      "step": 28397
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0779839754104614,
      "learning_rate": 1.4097072460915153e-05,
      "loss": 2.4616,
      "step": 28398
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0937576293945312,
      "learning_rate": 1.4096696864143806e-05,
      "loss": 2.6684,
      "step": 28399
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.2356094121932983,
      "learning_rate": 1.4096321260427446e-05,
      "loss": 2.5689,
      "step": 28400
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0658687353134155,
      "learning_rate": 1.4095945649766709e-05,
      "loss": 2.3871,
      "step": 28401
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0238292217254639,
      "learning_rate": 1.4095570032162231e-05,
      "loss": 2.4793,
      "step": 28402
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1192599534988403,
      "learning_rate": 1.409519440761465e-05,
      "loss": 2.3545,
      "step": 28403
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.2327218055725098,
      "learning_rate": 1.4094818776124604e-05,
      "loss": 2.4881,
      "step": 28404
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0881215333938599,
      "learning_rate": 1.4094443137692727e-05,
      "loss": 2.2473,
      "step": 28405
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.2355437278747559,
      "learning_rate": 1.4094067492319651e-05,
      "loss": 2.5466,
      "step": 28406
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0092391967773438,
      "learning_rate": 1.4093691840006027e-05,
      "loss": 2.2304,
      "step": 28407
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0243722200393677,
      "learning_rate": 1.4093316180752479e-05,
      "loss": 2.5029,
      "step": 28408
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0511537790298462,
      "learning_rate": 1.409294051455965e-05,
      "loss": 2.6199,
      "step": 28409
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0870983600616455,
      "learning_rate": 1.4092564841428174e-05,
      "loss": 2.4186,
      "step": 28410
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.8949235677719116,
      "learning_rate": 1.4092189161358689e-05,
      "loss": 2.3598,
      "step": 28411
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.031313419342041,
      "learning_rate": 1.4091813474351832e-05,
      "loss": 2.3417,
      "step": 28412
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0263162851333618,
      "learning_rate": 1.4091437780408244e-05,
      "loss": 2.4219,
      "step": 28413
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0172260999679565,
      "learning_rate": 1.4091062079528553e-05,
      "loss": 2.4925,
      "step": 28414
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.025786280632019,
      "learning_rate": 1.4090686371713403e-05,
      "loss": 2.2362,
      "step": 28415
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.051712989807129,
      "learning_rate": 1.4090310656963425e-05,
      "loss": 2.5491,
      "step": 28416
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0575332641601562,
      "learning_rate": 1.408993493527926e-05,
      "loss": 2.1345,
      "step": 28417
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.9955909252166748,
      "learning_rate": 1.4089559206661547e-05,
      "loss": 2.5658,
      "step": 28418
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0786561965942383,
      "learning_rate": 1.4089183471110919e-05,
      "loss": 2.4054,
      "step": 28419
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0443403720855713,
      "learning_rate": 1.4088807728628018e-05,
      "loss": 2.3177,
      "step": 28420
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0281692743301392,
      "learning_rate": 1.4088431979213476e-05,
      "loss": 2.4805,
      "step": 28421
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.090227484703064,
      "learning_rate": 1.4088056222867927e-05,
      "loss": 2.3847,
      "step": 28422
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.02260422706604,
      "learning_rate": 1.4087680459592016e-05,
      "loss": 2.524,
      "step": 28423
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1210731267929077,
      "learning_rate": 1.4087304689386376e-05,
      "loss": 2.4954,
      "step": 28424
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.045455813407898,
      "learning_rate": 1.4086928912251642e-05,
      "loss": 2.3851,
      "step": 28425
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0273447036743164,
      "learning_rate": 1.4086553128188458e-05,
      "loss": 2.2092,
      "step": 28426
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1225584745407104,
      "learning_rate": 1.4086177337197455e-05,
      "loss": 2.3527,
      "step": 28427
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.002488613128662,
      "learning_rate": 1.4085801539279271e-05,
      "loss": 2.5229,
      "step": 28428
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.090257167816162,
      "learning_rate": 1.4085425734434544e-05,
      "loss": 2.2538,
      "step": 28429
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.9799830913543701,
      "learning_rate": 1.408504992266391e-05,
      "loss": 2.409,
      "step": 28430
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0055395364761353,
      "learning_rate": 1.4084674103968005e-05,
      "loss": 2.5198,
      "step": 28431
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0117731094360352,
      "learning_rate": 1.4084298278347472e-05,
      "loss": 2.4265,
      "step": 28432
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.285703182220459,
      "learning_rate": 1.4083922445802945e-05,
      "loss": 2.3549,
      "step": 28433
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0616263151168823,
      "learning_rate": 1.4083546606335055e-05,
      "loss": 2.4046,
      "step": 28434
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0123403072357178,
      "learning_rate": 1.4083170759944449e-05,
      "loss": 2.2999,
      "step": 28435
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0345079898834229,
      "learning_rate": 1.4082794906631758e-05,
      "loss": 2.396,
      "step": 28436
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.9726755619049072,
      "learning_rate": 1.408241904639762e-05,
      "loss": 2.4309,
      "step": 28437
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0465301275253296,
      "learning_rate": 1.4082043179242672e-05,
      "loss": 2.3758,
      "step": 28438
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0636345148086548,
      "learning_rate": 1.4081667305167554e-05,
      "loss": 2.3912,
      "step": 28439
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.020691156387329,
      "learning_rate": 1.4081291424172903e-05,
      "loss": 2.1947,
      "step": 28440
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.9167407155036926,
      "learning_rate": 1.4080915536259351e-05,
      "loss": 2.2205,
      "step": 28441
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1797930002212524,
      "learning_rate": 1.4080539641427543e-05,
      "loss": 2.4196,
      "step": 28442
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0428733825683594,
      "learning_rate": 1.4080163739678108e-05,
      "loss": 2.3847,
      "step": 28443
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1936534643173218,
      "learning_rate": 1.4079787831011688e-05,
      "loss": 2.3133,
      "step": 28444
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.102431297302246,
      "learning_rate": 1.407941191542892e-05,
      "loss": 2.7239,
      "step": 28445
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1216962337493896,
      "learning_rate": 1.4079035992930443e-05,
      "loss": 2.4134,
      "step": 28446
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1095869541168213,
      "learning_rate": 1.4078660063516888e-05,
      "loss": 2.244,
      "step": 28447
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1059372425079346,
      "learning_rate": 1.4078284127188899e-05,
      "loss": 2.2949,
      "step": 28448
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.2410619258880615,
      "learning_rate": 1.407790818394711e-05,
      "loss": 2.3547,
      "step": 28449
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.9664702415466309,
      "learning_rate": 1.4077532233792162e-05,
      "loss": 2.29,
      "step": 28450
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0096850395202637,
      "learning_rate": 1.4077156276724684e-05,
      "loss": 2.3584,
      "step": 28451
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1023985147476196,
      "learning_rate": 1.4076780312745322e-05,
      "loss": 2.4056,
      "step": 28452
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.9979610443115234,
      "learning_rate": 1.4076404341854712e-05,
      "loss": 2.2999,
      "step": 28453
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.026400089263916,
      "learning_rate": 1.4076028364053487e-05,
      "loss": 2.528,
      "step": 28454
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0581341981887817,
      "learning_rate": 1.4075652379342288e-05,
      "loss": 2.4748,
      "step": 28455
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1412031650543213,
      "learning_rate": 1.4075276387721747e-05,
      "loss": 2.409,
      "step": 28456
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.306901216506958,
      "learning_rate": 1.4074900389192511e-05,
      "loss": 2.3521,
      "step": 28457
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.069832682609558,
      "learning_rate": 1.407452438375521e-05,
      "loss": 2.4374,
      "step": 28458
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.046722412109375,
      "learning_rate": 1.4074148371410486e-05,
      "loss": 2.1284,
      "step": 28459
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.060551404953003,
      "learning_rate": 1.4073772352158971e-05,
      "loss": 2.2591,
      "step": 28460
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0344440937042236,
      "learning_rate": 1.4073396326001305e-05,
      "loss": 2.2705,
      "step": 28461
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1414949893951416,
      "learning_rate": 1.4073020292938129e-05,
      "loss": 2.2098,
      "step": 28462
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1092902421951294,
      "learning_rate": 1.4072644252970076e-05,
      "loss": 2.4938,
      "step": 28463
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0205106735229492,
      "learning_rate": 1.4072268206097785e-05,
      "loss": 2.3418,
      "step": 28464
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1016091108322144,
      "learning_rate": 1.4071892152321894e-05,
      "loss": 2.4487,
      "step": 28465
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1143031120300293,
      "learning_rate": 1.4071516091643039e-05,
      "loss": 2.3056,
      "step": 28466
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.974602997303009,
      "learning_rate": 1.4071140024061857e-05,
      "loss": 2.4419,
      "step": 28467
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0762859582901,
      "learning_rate": 1.4070763949578989e-05,
      "loss": 2.3696,
      "step": 28468
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1814230680465698,
      "learning_rate": 1.407038786819507e-05,
      "loss": 2.3162,
      "step": 28469
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0929290056228638,
      "learning_rate": 1.4070011779910737e-05,
      "loss": 2.338,
      "step": 28470
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.053801417350769,
      "learning_rate": 1.4069635684726627e-05,
      "loss": 2.2622,
      "step": 28471
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.2732782363891602,
      "learning_rate": 1.4069259582643384e-05,
      "loss": 2.3328,
      "step": 28472
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0059500932693481,
      "learning_rate": 1.406888347366164e-05,
      "loss": 2.5142,
      "step": 28473
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1180115938186646,
      "learning_rate": 1.4068507357782028e-05,
      "loss": 2.609,
      "step": 28474
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.137131690979004,
      "learning_rate": 1.4068131235005196e-05,
      "loss": 2.5447,
      "step": 28475
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1006873846054077,
      "learning_rate": 1.4067755105331775e-05,
      "loss": 2.6379,
      "step": 28476
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.012412190437317,
      "learning_rate": 1.4067378968762407e-05,
      "loss": 2.5462,
      "step": 28477
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.2084275484085083,
      "learning_rate": 1.4067002825297722e-05,
      "loss": 2.3678,
      "step": 28478
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.095363736152649,
      "learning_rate": 1.4066626674938366e-05,
      "loss": 2.4664,
      "step": 28479
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.069766879081726,
      "learning_rate": 1.4066250517684975e-05,
      "loss": 2.293,
      "step": 28480
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.9927578568458557,
      "learning_rate": 1.406587435353818e-05,
      "loss": 2.4972,
      "step": 28481
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.021047830581665,
      "learning_rate": 1.4065498182498628e-05,
      "loss": 2.4643,
      "step": 28482
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.013598918914795,
      "learning_rate": 1.406512200456695e-05,
      "loss": 2.5818,
      "step": 28483
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.9892606735229492,
      "learning_rate": 1.4064745819743786e-05,
      "loss": 2.3059,
      "step": 28484
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.2133923768997192,
      "learning_rate": 1.4064369628029774e-05,
      "loss": 2.3412,
      "step": 28485
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0423659086227417,
      "learning_rate": 1.4063993429425555e-05,
      "loss": 2.3623,
      "step": 28486
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.139442801475525,
      "learning_rate": 1.406361722393176e-05,
      "loss": 2.3962,
      "step": 28487
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.184678077697754,
      "learning_rate": 1.4063241011549032e-05,
      "loss": 2.4033,
      "step": 28488
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.9089497327804565,
      "learning_rate": 1.4062864792278004e-05,
      "loss": 2.2289,
      "step": 28489
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0541189908981323,
      "learning_rate": 1.4062488566119319e-05,
      "loss": 2.4548,
      "step": 28490
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0534840822219849,
      "learning_rate": 1.4062112333073614e-05,
      "loss": 2.365,
      "step": 28491
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.9701035022735596,
      "learning_rate": 1.4061736093141523e-05,
      "loss": 2.4421,
      "step": 28492
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0185991525650024,
      "learning_rate": 1.4061359846323686e-05,
      "loss": 2.1957,
      "step": 28493
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0364034175872803,
      "learning_rate": 1.4060983592620743e-05,
      "loss": 2.1657,
      "step": 28494
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0111769437789917,
      "learning_rate": 1.4060607332033329e-05,
      "loss": 2.6547,
      "step": 28495
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1057875156402588,
      "learning_rate": 1.4060231064562082e-05,
      "loss": 2.2556,
      "step": 28496
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0025049448013306,
      "learning_rate": 1.405985479020764e-05,
      "loss": 2.3473,
      "step": 28497
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0350685119628906,
      "learning_rate": 1.405947850897064e-05,
      "loss": 2.3226,
      "step": 28498
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0794737339019775,
      "learning_rate": 1.4059102220851726e-05,
      "loss": 2.3328,
      "step": 28499
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.9885159730911255,
      "learning_rate": 1.4058725925851528e-05,
      "loss": 2.5405,
      "step": 28500
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0135301351547241,
      "learning_rate": 1.4058349623970688e-05,
      "loss": 2.1939,
      "step": 28501
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0073766708374023,
      "learning_rate": 1.4057973315209843e-05,
      "loss": 2.2872,
      "step": 28502
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1466008424758911,
      "learning_rate": 1.4057596999569632e-05,
      "loss": 2.465,
      "step": 28503
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1771109104156494,
      "learning_rate": 1.4057220677050692e-05,
      "loss": 2.5204,
      "step": 28504
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0211118459701538,
      "learning_rate": 1.4056844347653663e-05,
      "loss": 2.4284,
      "step": 28505
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0801331996917725,
      "learning_rate": 1.4056468011379177e-05,
      "loss": 2.5075,
      "step": 28506
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1449469327926636,
      "learning_rate": 1.4056091668227878e-05,
      "loss": 2.3692,
      "step": 28507
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0382723808288574,
      "learning_rate": 1.4055715318200401e-05,
      "loss": 2.7641,
      "step": 28508
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0089493989944458,
      "learning_rate": 1.4055338961297385e-05,
      "loss": 2.2088,
      "step": 28509
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.071994662284851,
      "learning_rate": 1.4054962597519468e-05,
      "loss": 2.3375,
      "step": 28510
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1564826965332031,
      "learning_rate": 1.4054586226867288e-05,
      "loss": 2.5124,
      "step": 28511
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0679577589035034,
      "learning_rate": 1.4054209849341484e-05,
      "loss": 2.4779,
      "step": 28512
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.096754550933838,
      "learning_rate": 1.4053833464942695e-05,
      "loss": 2.4991,
      "step": 28513
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.170108437538147,
      "learning_rate": 1.4053457073671555e-05,
      "loss": 2.4428,
      "step": 28514
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.9569265246391296,
      "learning_rate": 1.4053080675528702e-05,
      "loss": 2.2899,
      "step": 28515
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1947979927062988,
      "learning_rate": 1.405270427051478e-05,
      "loss": 2.6275,
      "step": 28516
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0006479024887085,
      "learning_rate": 1.405232785863042e-05,
      "loss": 2.2367,
      "step": 28517
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.9441274404525757,
      "learning_rate": 1.4051951439876267e-05,
      "loss": 2.3554,
      "step": 28518
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0956352949142456,
      "learning_rate": 1.4051575014252958e-05,
      "loss": 2.1834,
      "step": 28519
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.238173484802246,
      "learning_rate": 1.4051198581761125e-05,
      "loss": 2.1481,
      "step": 28520
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.9810798764228821,
      "learning_rate": 1.4050822142401412e-05,
      "loss": 2.4336,
      "step": 28521
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.9987525343894958,
      "learning_rate": 1.4050445696174455e-05,
      "loss": 2.5865,
      "step": 28522
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0387041568756104,
      "learning_rate": 1.4050069243080893e-05,
      "loss": 2.3586,
      "step": 28523
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.043769359588623,
      "learning_rate": 1.4049692783121362e-05,
      "loss": 2.3593,
      "step": 28524
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0876494646072388,
      "learning_rate": 1.4049316316296504e-05,
      "loss": 2.2869,
      "step": 28525
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.2340481281280518,
      "learning_rate": 1.4048939842606953e-05,
      "loss": 2.3358,
      "step": 28526
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0489912033081055,
      "learning_rate": 1.4048563362053349e-05,
      "loss": 2.4528,
      "step": 28527
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4332704544067383,
      "learning_rate": 1.4048186874636332e-05,
      "loss": 2.377,
      "step": 28528
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1561716794967651,
      "learning_rate": 1.4047810380356537e-05,
      "loss": 2.5223,
      "step": 28529
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0591075420379639,
      "learning_rate": 1.4047433879214604e-05,
      "loss": 2.4488,
      "step": 28530
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1145824193954468,
      "learning_rate": 1.4047057371211175e-05,
      "loss": 2.6846,
      "step": 28531
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.9037435054779053,
      "learning_rate": 1.4046680856346883e-05,
      "loss": 2.537,
      "step": 28532
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.048460602760315,
      "learning_rate": 1.4046304334622365e-05,
      "loss": 2.7027,
      "step": 28533
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0292165279388428,
      "learning_rate": 1.4045927806038265e-05,
      "loss": 2.4447,
      "step": 28534
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.2494995594024658,
      "learning_rate": 1.4045551270595216e-05,
      "loss": 2.1279,
      "step": 28535
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.2553874254226685,
      "learning_rate": 1.4045174728293862e-05,
      "loss": 2.416,
      "step": 28536
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0073248147964478,
      "learning_rate": 1.4044798179134835e-05,
      "loss": 2.4778,
      "step": 28537
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.913433313369751,
      "learning_rate": 1.4044421623118781e-05,
      "loss": 2.2586,
      "step": 28538
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.143872618675232,
      "learning_rate": 1.4044045060246329e-05,
      "loss": 2.3531,
      "step": 28539
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.9835883975028992,
      "learning_rate": 1.4043668490518125e-05,
      "loss": 2.4787,
      "step": 28540
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.182894229888916,
      "learning_rate": 1.4043291913934805e-05,
      "loss": 2.6841,
      "step": 28541
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0698214769363403,
      "learning_rate": 1.4042915330497004e-05,
      "loss": 2.5293,
      "step": 28542
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1682255268096924,
      "learning_rate": 1.4042538740205368e-05,
      "loss": 2.2164,
      "step": 28543
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1068224906921387,
      "learning_rate": 1.4042162143060524e-05,
      "loss": 2.4726,
      "step": 28544
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0681471824645996,
      "learning_rate": 1.4041785539063124e-05,
      "loss": 2.383,
      "step": 28545
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.006966233253479,
      "learning_rate": 1.40414089282138e-05,
      "loss": 2.4537,
      "step": 28546
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1112782955169678,
      "learning_rate": 1.4041032310513187e-05,
      "loss": 2.4676,
      "step": 28547
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0534653663635254,
      "learning_rate": 1.4040655685961926e-05,
      "loss": 2.2541,
      "step": 28548
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.2490954399108887,
      "learning_rate": 1.4040279054560658e-05,
      "loss": 2.504,
      "step": 28549
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.07296621799469,
      "learning_rate": 1.403990241631002e-05,
      "loss": 2.3813,
      "step": 28550
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1180702447891235,
      "learning_rate": 1.4039525771210649e-05,
      "loss": 2.4607,
      "step": 28551
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.293533205986023,
      "learning_rate": 1.4039149119263184e-05,
      "loss": 2.1968,
      "step": 28552
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.938603401184082,
      "learning_rate": 1.403877246046827e-05,
      "loss": 2.2773,
      "step": 28553
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1503260135650635,
      "learning_rate": 1.4038395794826533e-05,
      "loss": 2.2392,
      "step": 28554
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.7010067701339722,
      "learning_rate": 1.4038019122338619e-05,
      "loss": 2.2796,
      "step": 28555
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0840723514556885,
      "learning_rate": 1.4037642443005171e-05,
      "loss": 2.4501,
      "step": 28556
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0576610565185547,
      "learning_rate": 1.4037265756826816e-05,
      "loss": 2.3609,
      "step": 28557
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.079654335975647,
      "learning_rate": 1.4036889063804204e-05,
      "loss": 2.5427,
      "step": 28558
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0169605016708374,
      "learning_rate": 1.4036512363937966e-05,
      "loss": 2.2518,
      "step": 28559
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.9458981156349182,
      "learning_rate": 1.4036135657228744e-05,
      "loss": 2.6605,
      "step": 28560
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.9564598798751831,
      "learning_rate": 1.4035758943677176e-05,
      "loss": 2.1904,
      "step": 28561
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.9942647218704224,
      "learning_rate": 1.4035382223283899e-05,
      "loss": 2.2345,
      "step": 28562
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0941311120986938,
      "learning_rate": 1.4035005496049554e-05,
      "loss": 2.5129,
      "step": 28563
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.9421646595001221,
      "learning_rate": 1.403462876197478e-05,
      "loss": 2.3359,
      "step": 28564
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.08619225025177,
      "learning_rate": 1.4034252021060213e-05,
      "loss": 2.3582,
      "step": 28565
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.984085202217102,
      "learning_rate": 1.4033875273306495e-05,
      "loss": 2.4446,
      "step": 28566
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4255069494247437,
      "learning_rate": 1.403349851871426e-05,
      "loss": 2.3859,
      "step": 28567
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.3651525974273682,
      "learning_rate": 1.4033121757284152e-05,
      "loss": 2.4481,
      "step": 28568
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1501917839050293,
      "learning_rate": 1.4032744989016805e-05,
      "loss": 2.1623,
      "step": 28569
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.9485304355621338,
      "learning_rate": 1.4032368213912862e-05,
      "loss": 2.5104,
      "step": 28570
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.062928557395935,
      "learning_rate": 1.403199143197296e-05,
      "loss": 2.3154,
      "step": 28571
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1065198183059692,
      "learning_rate": 1.4031614643197736e-05,
      "loss": 2.4178,
      "step": 28572
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0895146131515503,
      "learning_rate": 1.4031237847587828e-05,
      "loss": 2.4656,
      "step": 28573
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0532640218734741,
      "learning_rate": 1.403086104514388e-05,
      "loss": 2.3053,
      "step": 28574
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0185956954956055,
      "learning_rate": 1.4030484235866528e-05,
      "loss": 2.2124,
      "step": 28575
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0897941589355469,
      "learning_rate": 1.4030107419756409e-05,
      "loss": 2.3929,
      "step": 28576
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0410356521606445,
      "learning_rate": 1.4029730596814164e-05,
      "loss": 2.5121,
      "step": 28577
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.212269902229309,
      "learning_rate": 1.4029353767040434e-05,
      "loss": 2.3498,
      "step": 28578
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0316764116287231,
      "learning_rate": 1.4028976930435852e-05,
      "loss": 2.2721,
      "step": 28579
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1034451723098755,
      "learning_rate": 1.4028600087001061e-05,
      "loss": 2.3985,
      "step": 28580
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0442222356796265,
      "learning_rate": 1.4028223236736698e-05,
      "loss": 2.5014,
      "step": 28581
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.081267237663269,
      "learning_rate": 1.4027846379643402e-05,
      "loss": 2.4722,
      "step": 28582
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0794402360916138,
      "learning_rate": 1.4027469515721811e-05,
      "loss": 2.1824,
      "step": 28583
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0084996223449707,
      "learning_rate": 1.4027092644972568e-05,
      "loss": 2.4641,
      "step": 28584
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0606051683425903,
      "learning_rate": 1.402671576739631e-05,
      "loss": 2.4584,
      "step": 28585
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.990231454372406,
      "learning_rate": 1.4026338882993676e-05,
      "loss": 2.4255,
      "step": 28586
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0225176811218262,
      "learning_rate": 1.40259619917653e-05,
      "loss": 2.3919,
      "step": 28587
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0078239440917969,
      "learning_rate": 1.4025585093711826e-05,
      "loss": 2.2321,
      "step": 28588
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0282176733016968,
      "learning_rate": 1.4025208188833893e-05,
      "loss": 2.4907,
      "step": 28589
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0381181240081787,
      "learning_rate": 1.4024831277132138e-05,
      "loss": 2.1768,
      "step": 28590
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0189481973648071,
      "learning_rate": 1.4024454358607202e-05,
      "loss": 2.4975,
      "step": 28591
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.9977965354919434,
      "learning_rate": 1.4024077433259721e-05,
      "loss": 2.4111,
      "step": 28592
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.9392869472503662,
      "learning_rate": 1.4023700501090339e-05,
      "loss": 2.4163,
      "step": 28593
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1115394830703735,
      "learning_rate": 1.402332356209969e-05,
      "loss": 2.4448,
      "step": 28594
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.127760410308838,
      "learning_rate": 1.4022946616288413e-05,
      "loss": 2.2999,
      "step": 28595
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0215450525283813,
      "learning_rate": 1.402256966365715e-05,
      "loss": 2.1372,
      "step": 28596
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0527135133743286,
      "learning_rate": 1.402219270420654e-05,
      "loss": 2.5916,
      "step": 28597
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.321516990661621,
      "learning_rate": 1.402181573793722e-05,
      "loss": 2.493,
      "step": 28598
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.02267324924469,
      "learning_rate": 1.4021438764849831e-05,
      "loss": 2.2109,
      "step": 28599
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1219996213912964,
      "learning_rate": 1.402106178494501e-05,
      "loss": 2.5545,
      "step": 28600
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0624065399169922,
      "learning_rate": 1.4020684798223398e-05,
      "loss": 2.6293,
      "step": 28601
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0779087543487549,
      "learning_rate": 1.4020307804685633e-05,
      "loss": 2.3355,
      "step": 28602
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.033014178276062,
      "learning_rate": 1.4019930804332353e-05,
      "loss": 2.3302,
      "step": 28603
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.9900583028793335,
      "learning_rate": 1.4019553797164203e-05,
      "loss": 2.4737,
      "step": 28604
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0857529640197754,
      "learning_rate": 1.4019176783181814e-05,
      "loss": 2.5831,
      "step": 28605
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0620886087417603,
      "learning_rate": 1.4018799762385829e-05,
      "loss": 2.4636,
      "step": 28606
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0325119495391846,
      "learning_rate": 1.4018422734776887e-05,
      "loss": 2.3525,
      "step": 28607
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.890071451663971,
      "learning_rate": 1.4018045700355626e-05,
      "loss": 2.1404,
      "step": 28608
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.130666971206665,
      "learning_rate": 1.401766865912269e-05,
      "loss": 2.4792,
      "step": 28609
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0621273517608643,
      "learning_rate": 1.4017291611078712e-05,
      "loss": 2.4229,
      "step": 28610
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.9402647614479065,
      "learning_rate": 1.4016914556224332e-05,
      "loss": 2.5323,
      "step": 28611
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0281782150268555,
      "learning_rate": 1.4016537494560195e-05,
      "loss": 2.3659,
      "step": 28612
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0268189907073975,
      "learning_rate": 1.4016160426086933e-05,
      "loss": 2.2304,
      "step": 28613
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.002665400505066,
      "learning_rate": 1.4015783350805187e-05,
      "loss": 2.474,
      "step": 28614
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.9408740401268005,
      "learning_rate": 1.40154062687156e-05,
      "loss": 2.3313,
      "step": 28615
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.9643930196762085,
      "learning_rate": 1.4015029179818806e-05,
      "loss": 2.3994,
      "step": 28616
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1729705333709717,
      "learning_rate": 1.4014652084115452e-05,
      "loss": 2.3752,
      "step": 28617
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1677418947219849,
      "learning_rate": 1.4014274981606169e-05,
      "loss": 2.4456,
      "step": 28618
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.100175380706787,
      "learning_rate": 1.40138978722916e-05,
      "loss": 2.2993,
      "step": 28619
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1039365530014038,
      "learning_rate": 1.4013520756172386e-05,
      "loss": 2.4277,
      "step": 28620
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.062679648399353,
      "learning_rate": 1.401314363324916e-05,
      "loss": 2.3485,
      "step": 28621
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.9833046793937683,
      "learning_rate": 1.4012766503522569e-05,
      "loss": 2.698,
      "step": 28622
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.9929099082946777,
      "learning_rate": 1.4012389366993247e-05,
      "loss": 2.4022,
      "step": 28623
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0519613027572632,
      "learning_rate": 1.4012012223661837e-05,
      "loss": 2.4755,
      "step": 28624
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0622354745864868,
      "learning_rate": 1.4011635073528976e-05,
      "loss": 2.3804,
      "step": 28625
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.223482370376587,
      "learning_rate": 1.4011257916595303e-05,
      "loss": 2.2524,
      "step": 28626
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1547915935516357,
      "learning_rate": 1.401088075286146e-05,
      "loss": 2.4373,
      "step": 28627
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.09833562374115,
      "learning_rate": 1.4010503582328081e-05,
      "loss": 2.2353,
      "step": 28628
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.984881579875946,
      "learning_rate": 1.4010126404995811e-05,
      "loss": 2.4442,
      "step": 28629
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.9892162084579468,
      "learning_rate": 1.4009749220865293e-05,
      "loss": 2.385,
      "step": 28630
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0458109378814697,
      "learning_rate": 1.4009372029937157e-05,
      "loss": 2.5304,
      "step": 28631
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0988619327545166,
      "learning_rate": 1.4008994832212044e-05,
      "loss": 2.48,
      "step": 28632
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0107563734054565,
      "learning_rate": 1.40086176276906e-05,
      "loss": 2.2114,
      "step": 28633
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0910063982009888,
      "learning_rate": 1.4008240416373457e-05,
      "loss": 2.3356,
      "step": 28634
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.111253261566162,
      "learning_rate": 1.400786319826126e-05,
      "loss": 2.3334,
      "step": 28635
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1234463453292847,
      "learning_rate": 1.4007485973354645e-05,
      "loss": 2.3357,
      "step": 28636
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.2139873504638672,
      "learning_rate": 1.4007108741654256e-05,
      "loss": 2.3842,
      "step": 28637
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0690628290176392,
      "learning_rate": 1.4006731503160726e-05,
      "loss": 2.4051,
      "step": 28638
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1729882955551147,
      "learning_rate": 1.40063542578747e-05,
      "loss": 2.5679,
      "step": 28639
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1114351749420166,
      "learning_rate": 1.4005977005796814e-05,
      "loss": 2.4739,
      "step": 28640
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.9839876294136047,
      "learning_rate": 1.400559974692771e-05,
      "loss": 2.1757,
      "step": 28641
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.9372967481613159,
      "learning_rate": 1.4005222481268023e-05,
      "loss": 2.4116,
      "step": 28642
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1509370803833008,
      "learning_rate": 1.40048452088184e-05,
      "loss": 2.3269,
      "step": 28643
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1234455108642578,
      "learning_rate": 1.4004467929579477e-05,
      "loss": 2.4314,
      "step": 28644
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.092592716217041,
      "learning_rate": 1.4004090643551892e-05,
      "loss": 2.3881,
      "step": 28645
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.106737732887268,
      "learning_rate": 1.4003713350736286e-05,
      "loss": 2.46,
      "step": 28646
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0988860130310059,
      "learning_rate": 1.4003336051133299e-05,
      "loss": 2.2629,
      "step": 28647
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.06280517578125,
      "learning_rate": 1.400295874474357e-05,
      "loss": 2.3752,
      "step": 28648
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.072475790977478,
      "learning_rate": 1.4002581431567737e-05,
      "loss": 2.347,
      "step": 28649
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.162465214729309,
      "learning_rate": 1.4002204111606444e-05,
      "loss": 2.4718,
      "step": 28650
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1115094423294067,
      "learning_rate": 1.4001826784860324e-05,
      "loss": 2.2456,
      "step": 28651
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.065453290939331,
      "learning_rate": 1.4001449451330022e-05,
      "loss": 2.1061,
      "step": 28652
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4816292524337769,
      "learning_rate": 1.4001072111016179e-05,
      "loss": 2.3061,
      "step": 28653
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1713553667068481,
      "learning_rate": 1.4000694763919432e-05,
      "loss": 2.3021,
      "step": 28654
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1581755876541138,
      "learning_rate": 1.4000317410040415e-05,
      "loss": 2.3261,
      "step": 28655
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.126725673675537,
      "learning_rate": 1.399994004937978e-05,
      "loss": 2.3816,
      "step": 28656
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.064551830291748,
      "learning_rate": 1.3999562681938162e-05,
      "loss": 2.6424,
      "step": 28657
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0512974262237549,
      "learning_rate": 1.3999185307716193e-05,
      "loss": 2.2001,
      "step": 28658
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0715206861495972,
      "learning_rate": 1.3998807926714522e-05,
      "loss": 2.5595,
      "step": 28659
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0285801887512207,
      "learning_rate": 1.3998430538933783e-05,
      "loss": 2.4445,
      "step": 28660
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.9583326578140259,
      "learning_rate": 1.399805314437462e-05,
      "loss": 2.2482,
      "step": 28661
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.988016664981842,
      "learning_rate": 1.399767574303767e-05,
      "loss": 2.2878,
      "step": 28662
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0915923118591309,
      "learning_rate": 1.3997298334923575e-05,
      "loss": 2.2241,
      "step": 28663
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.2239669561386108,
      "learning_rate": 1.3996920920032975e-05,
      "loss": 2.3127,
      "step": 28664
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0902280807495117,
      "learning_rate": 1.3996543498366507e-05,
      "loss": 2.5636,
      "step": 28665
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0154443979263306,
      "learning_rate": 1.399616606992481e-05,
      "loss": 2.5489,
      "step": 28666
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0570309162139893,
      "learning_rate": 1.399578863470853e-05,
      "loss": 2.2761,
      "step": 28667
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0209856033325195,
      "learning_rate": 1.3995411192718302e-05,
      "loss": 2.213,
      "step": 28668
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0473909378051758,
      "learning_rate": 1.3995033743954766e-05,
      "loss": 2.3695,
      "step": 28669
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.079936146736145,
      "learning_rate": 1.3994656288418562e-05,
      "loss": 2.5294,
      "step": 28670
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0287929773330688,
      "learning_rate": 1.3994278826110335e-05,
      "loss": 2.2809,
      "step": 28671
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0745530128479004,
      "learning_rate": 1.3993901357030714e-05,
      "loss": 2.2144,
      "step": 28672
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0381391048431396,
      "learning_rate": 1.399352388118035e-05,
      "loss": 2.4196,
      "step": 28673
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.9672221541404724,
      "learning_rate": 1.3993146398559875e-05,
      "loss": 2.3536,
      "step": 28674
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.001768708229065,
      "learning_rate": 1.3992768909169936e-05,
      "loss": 2.4757,
      "step": 28675
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1946535110473633,
      "learning_rate": 1.3992391413011168e-05,
      "loss": 2.0761,
      "step": 28676
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0179206132888794,
      "learning_rate": 1.3992013910084213e-05,
      "loss": 2.4396,
      "step": 28677
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0159143209457397,
      "learning_rate": 1.399163640038971e-05,
      "loss": 2.277,
      "step": 28678
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1869277954101562,
      "learning_rate": 1.3991258883928296e-05,
      "loss": 2.2328,
      "step": 28679
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0771832466125488,
      "learning_rate": 1.3990881360700617e-05,
      "loss": 2.2551,
      "step": 28680
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0535194873809814,
      "learning_rate": 1.399050383070731e-05,
      "loss": 2.4218,
      "step": 28681
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0168066024780273,
      "learning_rate": 1.3990126293949013e-05,
      "loss": 2.3412,
      "step": 28682
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0181947946548462,
      "learning_rate": 1.3989748750426374e-05,
      "loss": 2.5909,
      "step": 28683
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.2984687089920044,
      "learning_rate": 1.398937120014002e-05,
      "loss": 2.4444,
      "step": 28684
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0728005170822144,
      "learning_rate": 1.3988993643090602e-05,
      "loss": 2.3523,
      "step": 28685
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0544096231460571,
      "learning_rate": 1.3988616079278756e-05,
      "loss": 2.5256,
      "step": 28686
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0674877166748047,
      "learning_rate": 1.3988238508705123e-05,
      "loss": 2.3027,
      "step": 28687
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.183712124824524,
      "learning_rate": 1.398786093137034e-05,
      "loss": 2.7361,
      "step": 28688
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1082545518875122,
      "learning_rate": 1.3987483347275053e-05,
      "loss": 2.5397,
      "step": 28689
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.011712908744812,
      "learning_rate": 1.3987105756419901e-05,
      "loss": 2.2098,
      "step": 28690
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0138401985168457,
      "learning_rate": 1.3986728158805516e-05,
      "loss": 2.1646,
      "step": 28691
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.037908911705017,
      "learning_rate": 1.3986350554432546e-05,
      "loss": 2.3808,
      "step": 28692
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0324382781982422,
      "learning_rate": 1.398597294330163e-05,
      "loss": 2.5016,
      "step": 28693
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.9937072396278381,
      "learning_rate": 1.3985595325413406e-05,
      "loss": 2.3341,
      "step": 28694
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1210415363311768,
      "learning_rate": 1.3985217700768517e-05,
      "loss": 2.3776,
      "step": 28695
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.9961350560188293,
      "learning_rate": 1.39848400693676e-05,
      "loss": 2.411,
      "step": 28696
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0491554737091064,
      "learning_rate": 1.39844624312113e-05,
      "loss": 2.2871,
      "step": 28697
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1020950078964233,
      "learning_rate": 1.398408478630025e-05,
      "loss": 2.3974,
      "step": 28698
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0071529150009155,
      "learning_rate": 1.3983707134635098e-05,
      "loss": 2.304,
      "step": 28699
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1131150722503662,
      "learning_rate": 1.3983329476216478e-05,
      "loss": 2.1864,
      "step": 28700
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0015753507614136,
      "learning_rate": 1.3982951811045032e-05,
      "loss": 2.4256,
      "step": 28701
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0651108026504517,
      "learning_rate": 1.3982574139121404e-05,
      "loss": 2.3488,
      "step": 28702
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.301146149635315,
      "learning_rate": 1.398219646044623e-05,
      "loss": 2.3529,
      "step": 28703
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0715211629867554,
      "learning_rate": 1.3981818775020149e-05,
      "loss": 2.3865,
      "step": 28704
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.115631341934204,
      "learning_rate": 1.3981441082843805e-05,
      "loss": 2.3773,
      "step": 28705
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.9776172041893005,
      "learning_rate": 1.3981063383917837e-05,
      "loss": 2.3065,
      "step": 28706
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.099515438079834,
      "learning_rate": 1.3980685678242885e-05,
      "loss": 2.2987,
      "step": 28707
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0050983428955078,
      "learning_rate": 1.398030796581959e-05,
      "loss": 2.4025,
      "step": 28708
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1691681146621704,
      "learning_rate": 1.3979930246648591e-05,
      "loss": 2.1922,
      "step": 28709
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.9830480813980103,
      "learning_rate": 1.3979552520730533e-05,
      "loss": 2.2082,
      "step": 28710
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.2236437797546387,
      "learning_rate": 1.397917478806605e-05,
      "loss": 2.3303,
      "step": 28711
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1172395944595337,
      "learning_rate": 1.3978797048655786e-05,
      "loss": 2.5892,
      "step": 28712
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4203466176986694,
      "learning_rate": 1.3978419302500377e-05,
      "loss": 2.553,
      "step": 28713
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.255910038948059,
      "learning_rate": 1.397804154960047e-05,
      "loss": 2.5484,
      "step": 28714
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.2342529296875,
      "learning_rate": 1.3977663789956702e-05,
      "loss": 2.3734,
      "step": 28715
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.9311280846595764,
      "learning_rate": 1.3977286023569714e-05,
      "loss": 2.1643,
      "step": 28716
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0732535123825073,
      "learning_rate": 1.3976908250440145e-05,
      "loss": 2.6309,
      "step": 28717
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.9306190609931946,
      "learning_rate": 1.3976530470568635e-05,
      "loss": 2.1894,
      "step": 28718
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0866787433624268,
      "learning_rate": 1.3976152683955829e-05,
      "loss": 2.3751,
      "step": 28719
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1240381002426147,
      "learning_rate": 1.397577489060236e-05,
      "loss": 2.4224,
      "step": 28720
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1572743654251099,
      "learning_rate": 1.3975397090508876e-05,
      "loss": 2.4183,
      "step": 28721
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0662975311279297,
      "learning_rate": 1.3975019283676013e-05,
      "loss": 2.358,
      "step": 28722
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.3340554237365723,
      "learning_rate": 1.3974641470104414e-05,
      "loss": 2.235,
      "step": 28723
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1265249252319336,
      "learning_rate": 1.3974263649794717e-05,
      "loss": 2.2268,
      "step": 28724
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0138576030731201,
      "learning_rate": 1.3973885822747563e-05,
      "loss": 2.2708,
      "step": 28725
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1307079792022705,
      "learning_rate": 1.3973507988963596e-05,
      "loss": 2.4322,
      "step": 28726
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0730955600738525,
      "learning_rate": 1.3973130148443455e-05,
      "loss": 2.3229,
      "step": 28727
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.387195348739624,
      "learning_rate": 1.3972752301187775e-05,
      "loss": 2.4488,
      "step": 28728
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1727056503295898,
      "learning_rate": 1.3972374447197202e-05,
      "loss": 2.581,
      "step": 28729
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0548063516616821,
      "learning_rate": 1.3971996586472375e-05,
      "loss": 2.3836,
      "step": 28730
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0843161344528198,
      "learning_rate": 1.3971618719013937e-05,
      "loss": 2.3912,
      "step": 28731
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0969945192337036,
      "learning_rate": 1.3971240844822527e-05,
      "loss": 2.488,
      "step": 28732
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.2546238899230957,
      "learning_rate": 1.397086296389878e-05,
      "loss": 2.5344,
      "step": 28733
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.190150260925293,
      "learning_rate": 1.3970485076243345e-05,
      "loss": 2.5194,
      "step": 28734
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.213114857673645,
      "learning_rate": 1.397010718185686e-05,
      "loss": 2.5128,
      "step": 28735
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.9843619465827942,
      "learning_rate": 1.3969729280739967e-05,
      "loss": 2.2683,
      "step": 28736
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0025149583816528,
      "learning_rate": 1.3969351372893304e-05,
      "loss": 2.5895,
      "step": 28737
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1152116060256958,
      "learning_rate": 1.3968973458317509e-05,
      "loss": 2.3872,
      "step": 28738
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0000238418579102,
      "learning_rate": 1.3968595537013227e-05,
      "loss": 2.5067,
      "step": 28739
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0903786420822144,
      "learning_rate": 1.3968217608981101e-05,
      "loss": 2.4233,
      "step": 28740
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0592113733291626,
      "learning_rate": 1.3967839674221763e-05,
      "loss": 2.422,
      "step": 28741
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0643212795257568,
      "learning_rate": 1.3967461732735863e-05,
      "loss": 2.3871,
      "step": 28742
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0939388275146484,
      "learning_rate": 1.3967083784524037e-05,
      "loss": 2.2556,
      "step": 28743
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.9943174123764038,
      "learning_rate": 1.3966705829586928e-05,
      "loss": 2.2689,
      "step": 28744
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.120245337486267,
      "learning_rate": 1.3966327867925173e-05,
      "loss": 2.4232,
      "step": 28745
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0836808681488037,
      "learning_rate": 1.3965949899539415e-05,
      "loss": 2.3431,
      "step": 28746
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1821975708007812,
      "learning_rate": 1.3965571924430296e-05,
      "loss": 2.2554,
      "step": 28747
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.9679889678955078,
      "learning_rate": 1.3965193942598454e-05,
      "loss": 2.39,
      "step": 28748
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0209436416625977,
      "learning_rate": 1.3964815954044535e-05,
      "loss": 2.5013,
      "step": 28749
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.044377326965332,
      "learning_rate": 1.3964437958769172e-05,
      "loss": 2.482,
      "step": 28750
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0534182786941528,
      "learning_rate": 1.3964059956773013e-05,
      "loss": 2.4592,
      "step": 28751
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0123008489608765,
      "learning_rate": 1.3963681948056693e-05,
      "loss": 2.2667,
      "step": 28752
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1593204736709595,
      "learning_rate": 1.3963303932620858e-05,
      "loss": 2.3822,
      "step": 28753
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0546796321868896,
      "learning_rate": 1.3962925910466143e-05,
      "loss": 2.3155,
      "step": 28754
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0734710693359375,
      "learning_rate": 1.3962547881593195e-05,
      "loss": 2.4165,
      "step": 28755
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1011896133422852,
      "learning_rate": 1.3962169846002654e-05,
      "loss": 2.3229,
      "step": 28756
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1064554452896118,
      "learning_rate": 1.3961791803695155e-05,
      "loss": 2.555,
      "step": 28757
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.113219141960144,
      "learning_rate": 1.3961413754671347e-05,
      "loss": 2.4591,
      "step": 28758
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1057618856430054,
      "learning_rate": 1.3961035698931864e-05,
      "loss": 2.4593,
      "step": 28759
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1677254438400269,
      "learning_rate": 1.396065763647735e-05,
      "loss": 2.6081,
      "step": 28760
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0428025722503662,
      "learning_rate": 1.3960279567308446e-05,
      "loss": 2.3834,
      "step": 28761
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0262516736984253,
      "learning_rate": 1.3959901491425793e-05,
      "loss": 2.5855,
      "step": 28762
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.9812832474708557,
      "learning_rate": 1.395952340883003e-05,
      "loss": 2.2983,
      "step": 28763
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.129599690437317,
      "learning_rate": 1.3959145319521802e-05,
      "loss": 2.2974,
      "step": 28764
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.9940418601036072,
      "learning_rate": 1.3958767223501743e-05,
      "loss": 2.2748,
      "step": 28765
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1221644878387451,
      "learning_rate": 1.39583891207705e-05,
      "loss": 2.3907,
      "step": 28766
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0310182571411133,
      "learning_rate": 1.3958011011328714e-05,
      "loss": 2.2807,
      "step": 28767
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1189651489257812,
      "learning_rate": 1.3957632895177022e-05,
      "loss": 2.2522,
      "step": 28768
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1155959367752075,
      "learning_rate": 1.3957254772316071e-05,
      "loss": 2.4071,
      "step": 28769
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.9471438527107239,
      "learning_rate": 1.3956876642746498e-05,
      "loss": 2.2762,
      "step": 28770
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.9873204231262207,
      "learning_rate": 1.395649850646894e-05,
      "loss": 2.1617,
      "step": 28771
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.9759014844894409,
      "learning_rate": 1.3956120363484045e-05,
      "loss": 2.2225,
      "step": 28772
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.2974976301193237,
      "learning_rate": 1.3955742213792452e-05,
      "loss": 2.4455,
      "step": 28773
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.9529072642326355,
      "learning_rate": 1.39553640573948e-05,
      "loss": 2.4253,
      "step": 28774
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0435346364974976,
      "learning_rate": 1.3954985894291733e-05,
      "loss": 2.3249,
      "step": 28775
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0611982345581055,
      "learning_rate": 1.3954607724483893e-05,
      "loss": 2.4802,
      "step": 28776
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1983578205108643,
      "learning_rate": 1.3954229547971916e-05,
      "loss": 2.4763,
      "step": 28777
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1057735681533813,
      "learning_rate": 1.3953851364756446e-05,
      "loss": 2.4381,
      "step": 28778
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1627737283706665,
      "learning_rate": 1.3953473174838123e-05,
      "loss": 2.4959,
      "step": 28779
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1337062120437622,
      "learning_rate": 1.3953094978217592e-05,
      "loss": 2.798,
      "step": 28780
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1744345426559448,
      "learning_rate": 1.395271677489549e-05,
      "loss": 2.5135,
      "step": 28781
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0485416650772095,
      "learning_rate": 1.395233856487246e-05,
      "loss": 2.5787,
      "step": 28782
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0477466583251953,
      "learning_rate": 1.3951960348149142e-05,
      "loss": 2.4243,
      "step": 28783
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1692942380905151,
      "learning_rate": 1.395158212472618e-05,
      "loss": 2.3663,
      "step": 28784
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.105069637298584,
      "learning_rate": 1.3951203894604211e-05,
      "loss": 2.2096,
      "step": 28785
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1266684532165527,
      "learning_rate": 1.395082565778388e-05,
      "loss": 2.4757,
      "step": 28786
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1479610204696655,
      "learning_rate": 1.3950447414265825e-05,
      "loss": 2.5176,
      "step": 28787
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.976459264755249,
      "learning_rate": 1.3950069164050687e-05,
      "loss": 2.3317,
      "step": 28788
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0673518180847168,
      "learning_rate": 1.3949690907139112e-05,
      "loss": 2.308,
      "step": 28789
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0336655378341675,
      "learning_rate": 1.3949312643531738e-05,
      "loss": 2.4328,
      "step": 28790
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1407279968261719,
      "learning_rate": 1.3948934373229204e-05,
      "loss": 2.3582,
      "step": 28791
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.3726400136947632,
      "learning_rate": 1.3948556096232155e-05,
      "loss": 2.3271,
      "step": 28792
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0318750143051147,
      "learning_rate": 1.394817781254123e-05,
      "loss": 2.3575,
      "step": 28793
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.9673616290092468,
      "learning_rate": 1.3947799522157073e-05,
      "loss": 2.3648,
      "step": 28794
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.126442790031433,
      "learning_rate": 1.3947421225080324e-05,
      "loss": 2.3602,
      "step": 28795
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.012764573097229,
      "learning_rate": 1.3947042921311625e-05,
      "loss": 2.4951,
      "step": 28796
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.114668846130371,
      "learning_rate": 1.3946664610851613e-05,
      "loss": 2.5768,
      "step": 28797
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0620185136795044,
      "learning_rate": 1.3946286293700933e-05,
      "loss": 2.5925,
      "step": 28798
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.2856632471084595,
      "learning_rate": 1.3945907969860227e-05,
      "loss": 2.6609,
      "step": 28799
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0524513721466064,
      "learning_rate": 1.3945529639330133e-05,
      "loss": 2.3736,
      "step": 28800
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0837945938110352,
      "learning_rate": 1.3945151302111296e-05,
      "loss": 2.3808,
      "step": 28801
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9968463778495789,
      "learning_rate": 1.394477295820436e-05,
      "loss": 2.2713,
      "step": 28802
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9723106026649475,
      "learning_rate": 1.3944394607609959e-05,
      "loss": 2.4192,
      "step": 28803
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0152723789215088,
      "learning_rate": 1.3944016250328735e-05,
      "loss": 2.346,
      "step": 28804
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0696998834609985,
      "learning_rate": 1.3943637886361337e-05,
      "loss": 2.4748,
      "step": 28805
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0556950569152832,
      "learning_rate": 1.3943259515708399e-05,
      "loss": 2.4183,
      "step": 28806
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0741078853607178,
      "learning_rate": 1.3942881138370565e-05,
      "loss": 2.3749,
      "step": 28807
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.105244755744934,
      "learning_rate": 1.3942502754348479e-05,
      "loss": 2.4192,
      "step": 28808
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0343364477157593,
      "learning_rate": 1.394212436364278e-05,
      "loss": 2.2313,
      "step": 28809
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.947308361530304,
      "learning_rate": 1.3941745966254107e-05,
      "loss": 2.5506,
      "step": 28810
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0412565469741821,
      "learning_rate": 1.3941367562183106e-05,
      "loss": 2.2811,
      "step": 28811
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.095687747001648,
      "learning_rate": 1.3940989151430415e-05,
      "loss": 2.3127,
      "step": 28812
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0493965148925781,
      "learning_rate": 1.3940610733996677e-05,
      "loss": 2.5536,
      "step": 28813
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0678138732910156,
      "learning_rate": 1.3940232309882538e-05,
      "loss": 2.3874,
      "step": 28814
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0761280059814453,
      "learning_rate": 1.393985387908863e-05,
      "loss": 2.3002,
      "step": 28815
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.029954195022583,
      "learning_rate": 1.3939475441615602e-05,
      "loss": 2.485,
      "step": 28816
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9293516874313354,
      "learning_rate": 1.3939096997464092e-05,
      "loss": 2.4941,
      "step": 28817
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9901270270347595,
      "learning_rate": 1.3938718546634744e-05,
      "loss": 2.4628,
      "step": 28818
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.03733229637146,
      "learning_rate": 1.3938340089128198e-05,
      "loss": 2.4316,
      "step": 28819
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9358008503913879,
      "learning_rate": 1.3937961624945094e-05,
      "loss": 2.3868,
      "step": 28820
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0009280443191528,
      "learning_rate": 1.3937583154086076e-05,
      "loss": 2.4662,
      "step": 28821
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1101365089416504,
      "learning_rate": 1.3937204676551788e-05,
      "loss": 2.6513,
      "step": 28822
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.055818796157837,
      "learning_rate": 1.3936826192342867e-05,
      "loss": 2.4116,
      "step": 28823
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0088074207305908,
      "learning_rate": 1.3936447701459954e-05,
      "loss": 2.3744,
      "step": 28824
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0139952898025513,
      "learning_rate": 1.3936069203903696e-05,
      "loss": 2.3816,
      "step": 28825
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9934870004653931,
      "learning_rate": 1.3935690699674731e-05,
      "loss": 2.302,
      "step": 28826
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1436405181884766,
      "learning_rate": 1.39353121887737e-05,
      "loss": 2.3744,
      "step": 28827
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1111586093902588,
      "learning_rate": 1.393493367120125e-05,
      "loss": 2.3504,
      "step": 28828
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9789217710494995,
      "learning_rate": 1.3934555146958015e-05,
      "loss": 2.5042,
      "step": 28829
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1767719984054565,
      "learning_rate": 1.393417661604464e-05,
      "loss": 2.3723,
      "step": 28830
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0663673877716064,
      "learning_rate": 1.3933798078461768e-05,
      "loss": 2.5227,
      "step": 28831
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0596619844436646,
      "learning_rate": 1.3933419534210039e-05,
      "loss": 2.639,
      "step": 28832
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.201910376548767,
      "learning_rate": 1.3933040983290097e-05,
      "loss": 2.2292,
      "step": 28833
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0516413450241089,
      "learning_rate": 1.3932662425702583e-05,
      "loss": 2.3814,
      "step": 28834
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0966284275054932,
      "learning_rate": 1.3932283861448137e-05,
      "loss": 2.5531,
      "step": 28835
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1670594215393066,
      "learning_rate": 1.3931905290527403e-05,
      "loss": 2.2735,
      "step": 28836
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0197944641113281,
      "learning_rate": 1.393152671294102e-05,
      "loss": 2.4594,
      "step": 28837
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9962984919548035,
      "learning_rate": 1.3931148128689632e-05,
      "loss": 2.3724,
      "step": 28838
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1622081995010376,
      "learning_rate": 1.393076953777388e-05,
      "loss": 2.286,
      "step": 28839
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.997429370880127,
      "learning_rate": 1.3930390940194407e-05,
      "loss": 2.3196,
      "step": 28840
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1003679037094116,
      "learning_rate": 1.3930012335951852e-05,
      "loss": 2.6168,
      "step": 28841
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0924072265625,
      "learning_rate": 1.3929633725046862e-05,
      "loss": 2.495,
      "step": 28842
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1060376167297363,
      "learning_rate": 1.3929255107480073e-05,
      "loss": 2.3134,
      "step": 28843
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0135629177093506,
      "learning_rate": 1.392887648325213e-05,
      "loss": 2.3017,
      "step": 28844
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0339750051498413,
      "learning_rate": 1.3928497852363675e-05,
      "loss": 2.2782,
      "step": 28845
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.090756893157959,
      "learning_rate": 1.392811921481535e-05,
      "loss": 2.4076,
      "step": 28846
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1181998252868652,
      "learning_rate": 1.3927740570607795e-05,
      "loss": 2.3817,
      "step": 28847
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0187450647354126,
      "learning_rate": 1.3927361919741654e-05,
      "loss": 2.3722,
      "step": 28848
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.6470187902450562,
      "learning_rate": 1.3926983262217566e-05,
      "loss": 2.3029,
      "step": 28849
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0562423467636108,
      "learning_rate": 1.3926604598036173e-05,
      "loss": 2.3288,
      "step": 28850
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.005700945854187,
      "learning_rate": 1.3926225927198123e-05,
      "loss": 2.3735,
      "step": 28851
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.076188087463379,
      "learning_rate": 1.3925847249704049e-05,
      "loss": 2.5299,
      "step": 28852
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.041378378868103,
      "learning_rate": 1.39254685655546e-05,
      "loss": 2.3754,
      "step": 28853
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0761964321136475,
      "learning_rate": 1.3925089874750416e-05,
      "loss": 2.2533,
      "step": 28854
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.024877667427063,
      "learning_rate": 1.392471117729214e-05,
      "loss": 2.1092,
      "step": 28855
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0987032651901245,
      "learning_rate": 1.3924332473180413e-05,
      "loss": 2.4893,
      "step": 28856
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1113171577453613,
      "learning_rate": 1.3923953762415874e-05,
      "loss": 2.7747,
      "step": 28857
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9411512017250061,
      "learning_rate": 1.3923575044999167e-05,
      "loss": 2.3732,
      "step": 28858
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1317384243011475,
      "learning_rate": 1.3923196320930936e-05,
      "loss": 2.2608,
      "step": 28859
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1780246496200562,
      "learning_rate": 1.392281759021182e-05,
      "loss": 2.5188,
      "step": 28860
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0576149225234985,
      "learning_rate": 1.3922438852842468e-05,
      "loss": 2.3123,
      "step": 28861
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0808836221694946,
      "learning_rate": 1.3922060108823514e-05,
      "loss": 2.5636,
      "step": 28862
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0331364870071411,
      "learning_rate": 1.39216813581556e-05,
      "loss": 2.4181,
      "step": 28863
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1094027757644653,
      "learning_rate": 1.3921302600839374e-05,
      "loss": 2.2469,
      "step": 28864
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1467339992523193,
      "learning_rate": 1.3920923836875476e-05,
      "loss": 2.3818,
      "step": 28865
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.959907591342926,
      "learning_rate": 1.3920545066264544e-05,
      "loss": 2.4759,
      "step": 28866
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0957846641540527,
      "learning_rate": 1.3920166289007224e-05,
      "loss": 2.3919,
      "step": 28867
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.2257572412490845,
      "learning_rate": 1.391978750510416e-05,
      "loss": 2.5224,
      "step": 28868
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.103402018547058,
      "learning_rate": 1.3919408714555991e-05,
      "loss": 2.4121,
      "step": 28869
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.006361722946167,
      "learning_rate": 1.3919029917363358e-05,
      "loss": 2.3256,
      "step": 28870
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.2425323724746704,
      "learning_rate": 1.3918651113526905e-05,
      "loss": 2.3071,
      "step": 28871
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0012778043746948,
      "learning_rate": 1.3918272303047276e-05,
      "loss": 2.3853,
      "step": 28872
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0225024223327637,
      "learning_rate": 1.3917893485925111e-05,
      "loss": 2.1993,
      "step": 28873
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9983459115028381,
      "learning_rate": 1.391751466216105e-05,
      "loss": 2.3336,
      "step": 28874
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.091538667678833,
      "learning_rate": 1.3917135831755739e-05,
      "loss": 2.4064,
      "step": 28875
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9821422696113586,
      "learning_rate": 1.3916756994709821e-05,
      "loss": 2.4716,
      "step": 28876
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9426149725914001,
      "learning_rate": 1.3916378151023932e-05,
      "loss": 2.4137,
      "step": 28877
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.2687616348266602,
      "learning_rate": 1.3915999300698722e-05,
      "loss": 2.2648,
      "step": 28878
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0258983373641968,
      "learning_rate": 1.3915620443734827e-05,
      "loss": 2.1813,
      "step": 28879
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0139062404632568,
      "learning_rate": 1.3915241580132892e-05,
      "loss": 2.4044,
      "step": 28880
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0294467210769653,
      "learning_rate": 1.3914862709893562e-05,
      "loss": 2.5919,
      "step": 28881
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.024208903312683,
      "learning_rate": 1.3914483833017473e-05,
      "loss": 2.3094,
      "step": 28882
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0325459241867065,
      "learning_rate": 1.3914104949505274e-05,
      "loss": 2.3795,
      "step": 28883
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.2394047975540161,
      "learning_rate": 1.3913726059357602e-05,
      "loss": 2.4484,
      "step": 28884
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9885163307189941,
      "learning_rate": 1.3913347162575102e-05,
      "loss": 2.4896,
      "step": 28885
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0266451835632324,
      "learning_rate": 1.3912968259158412e-05,
      "loss": 2.2675,
      "step": 28886
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0058393478393555,
      "learning_rate": 1.3912589349108183e-05,
      "loss": 2.2877,
      "step": 28887
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9887447357177734,
      "learning_rate": 1.3912210432425052e-05,
      "loss": 2.2181,
      "step": 28888
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.039123296737671,
      "learning_rate": 1.391183150910966e-05,
      "loss": 2.3978,
      "step": 28889
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.007829189300537,
      "learning_rate": 1.3911452579162651e-05,
      "loss": 2.3772,
      "step": 28890
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.011120080947876,
      "learning_rate": 1.391107364258467e-05,
      "loss": 2.3898,
      "step": 28891
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1547037363052368,
      "learning_rate": 1.3910694699376355e-05,
      "loss": 2.2665,
      "step": 28892
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0854243040084839,
      "learning_rate": 1.391031574953835e-05,
      "loss": 2.3865,
      "step": 28893
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0549768209457397,
      "learning_rate": 1.39099367930713e-05,
      "loss": 2.4621,
      "step": 28894
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0215624570846558,
      "learning_rate": 1.3909557829975842e-05,
      "loss": 2.3124,
      "step": 28895
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.047469139099121,
      "learning_rate": 1.3909178860252623e-05,
      "loss": 2.4096,
      "step": 28896
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.133561134338379,
      "learning_rate": 1.3908799883902284e-05,
      "loss": 2.609,
      "step": 28897
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1480400562286377,
      "learning_rate": 1.3908420900925468e-05,
      "loss": 2.3702,
      "step": 28898
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.034153699874878,
      "learning_rate": 1.3908041911322818e-05,
      "loss": 2.4429,
      "step": 28899
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1303961277008057,
      "learning_rate": 1.3907662915094972e-05,
      "loss": 2.4783,
      "step": 28900
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0066643953323364,
      "learning_rate": 1.3907283912242582e-05,
      "loss": 2.4068,
      "step": 28901
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0502740144729614,
      "learning_rate": 1.390690490276628e-05,
      "loss": 2.1775,
      "step": 28902
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1208581924438477,
      "learning_rate": 1.3906525886666715e-05,
      "loss": 2.2266,
      "step": 28903
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0751783847808838,
      "learning_rate": 1.3906146863944527e-05,
      "loss": 2.6795,
      "step": 28904
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0085386037826538,
      "learning_rate": 1.3905767834600358e-05,
      "loss": 2.3505,
      "step": 28905
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0263195037841797,
      "learning_rate": 1.3905388798634853e-05,
      "loss": 2.1729,
      "step": 28906
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1467227935791016,
      "learning_rate": 1.3905009756048655e-05,
      "loss": 2.2531,
      "step": 28907
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0124900341033936,
      "learning_rate": 1.3904630706842404e-05,
      "loss": 2.3997,
      "step": 28908
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.118643879890442,
      "learning_rate": 1.3904251651016743e-05,
      "loss": 2.2336,
      "step": 28909
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9588543772697449,
      "learning_rate": 1.3903872588572313e-05,
      "loss": 2.2403,
      "step": 28910
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1810182332992554,
      "learning_rate": 1.3903493519509761e-05,
      "loss": 2.3925,
      "step": 28911
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0773074626922607,
      "learning_rate": 1.3903114443829724e-05,
      "loss": 2.5048,
      "step": 28912
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0841665267944336,
      "learning_rate": 1.3902735361532851e-05,
      "loss": 2.4355,
      "step": 28913
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0752055644989014,
      "learning_rate": 1.3902356272619784e-05,
      "loss": 2.396,
      "step": 28914
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0681397914886475,
      "learning_rate": 1.3901977177091159e-05,
      "loss": 2.3213,
      "step": 28915
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9906940460205078,
      "learning_rate": 1.3901598074947624e-05,
      "loss": 2.4083,
      "step": 28916
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0542528629302979,
      "learning_rate": 1.390121896618982e-05,
      "loss": 2.4201,
      "step": 28917
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.076661229133606,
      "learning_rate": 1.390083985081839e-05,
      "loss": 2.3322,
      "step": 28918
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1097065210342407,
      "learning_rate": 1.390046072883398e-05,
      "loss": 2.3496,
      "step": 28919
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.06540048122406,
      "learning_rate": 1.3900081600237227e-05,
      "loss": 2.3964,
      "step": 28920
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1315371990203857,
      "learning_rate": 1.3899702465028776e-05,
      "loss": 2.5195,
      "step": 28921
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0577893257141113,
      "learning_rate": 1.3899323323209272e-05,
      "loss": 2.5017,
      "step": 28922
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1640281677246094,
      "learning_rate": 1.3898944174779357e-05,
      "loss": 2.4049,
      "step": 28923
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.01004159450531,
      "learning_rate": 1.389856501973967e-05,
      "loss": 2.3097,
      "step": 28924
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0664366483688354,
      "learning_rate": 1.3898185858090858e-05,
      "loss": 2.5264,
      "step": 28925
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1213349103927612,
      "learning_rate": 1.389780668983356e-05,
      "loss": 2.2165,
      "step": 28926
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0594143867492676,
      "learning_rate": 1.3897427514968424e-05,
      "loss": 2.3332,
      "step": 28927
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.206154465675354,
      "learning_rate": 1.3897048333496091e-05,
      "loss": 2.3032,
      "step": 28928
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1929258108139038,
      "learning_rate": 1.3896669145417199e-05,
      "loss": 2.2239,
      "step": 28929
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1490522623062134,
      "learning_rate": 1.3896289950732395e-05,
      "loss": 2.6478,
      "step": 28930
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.252149224281311,
      "learning_rate": 1.389591074944232e-05,
      "loss": 2.4822,
      "step": 28931
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1564092636108398,
      "learning_rate": 1.3895531541547622e-05,
      "loss": 2.0411,
      "step": 28932
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0866036415100098,
      "learning_rate": 1.3895152327048939e-05,
      "loss": 2.3946,
      "step": 28933
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.046951413154602,
      "learning_rate": 1.3894773105946916e-05,
      "loss": 2.4795,
      "step": 28934
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1680821180343628,
      "learning_rate": 1.3894393878242194e-05,
      "loss": 2.2746,
      "step": 28935
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9121675491333008,
      "learning_rate": 1.3894014643935413e-05,
      "loss": 2.2408,
      "step": 28936
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.369345784187317,
      "learning_rate": 1.3893635403027224e-05,
      "loss": 2.3318,
      "step": 28937
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0292216539382935,
      "learning_rate": 1.3893256155518262e-05,
      "loss": 2.2938,
      "step": 28938
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0650317668914795,
      "learning_rate": 1.3892876901409174e-05,
      "loss": 2.5084,
      "step": 28939
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0471384525299072,
      "learning_rate": 1.3892497640700607e-05,
      "loss": 2.2323,
      "step": 28940
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.6153678894042969,
      "learning_rate": 1.3892118373393195e-05,
      "loss": 2.5259,
      "step": 28941
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1619938611984253,
      "learning_rate": 1.3891739099487585e-05,
      "loss": 2.5548,
      "step": 28942
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1824226379394531,
      "learning_rate": 1.3891359818984422e-05,
      "loss": 2.5846,
      "step": 28943
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1320774555206299,
      "learning_rate": 1.3890980531884346e-05,
      "loss": 2.4257,
      "step": 28944
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.083587646484375,
      "learning_rate": 1.3890601238187999e-05,
      "loss": 2.3851,
      "step": 28945
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0099048614501953,
      "learning_rate": 1.3890221937896029e-05,
      "loss": 2.4597,
      "step": 28946
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9560198187828064,
      "learning_rate": 1.388984263100908e-05,
      "loss": 2.4576,
      "step": 28947
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0673556327819824,
      "learning_rate": 1.3889463317527787e-05,
      "loss": 2.344,
      "step": 28948
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9926426410675049,
      "learning_rate": 1.3889083997452798e-05,
      "loss": 2.5193,
      "step": 28949
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1207592487335205,
      "learning_rate": 1.3888704670784755e-05,
      "loss": 2.587,
      "step": 28950
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0686712265014648,
      "learning_rate": 1.38883253375243e-05,
      "loss": 2.4159,
      "step": 28951
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1162703037261963,
      "learning_rate": 1.388794599767208e-05,
      "loss": 2.4202,
      "step": 28952
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0518584251403809,
      "learning_rate": 1.3887566651228733e-05,
      "loss": 2.2887,
      "step": 28953
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9871877431869507,
      "learning_rate": 1.3887187298194909e-05,
      "loss": 2.4241,
      "step": 28954
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.111973524093628,
      "learning_rate": 1.3886807938571244e-05,
      "loss": 2.4162,
      "step": 28955
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1829674243927002,
      "learning_rate": 1.3886428572358383e-05,
      "loss": 2.4829,
      "step": 28956
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.179214358329773,
      "learning_rate": 1.388604919955697e-05,
      "loss": 2.2829,
      "step": 28957
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.047035813331604,
      "learning_rate": 1.3885669820167649e-05,
      "loss": 2.2258,
      "step": 28958
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0508875846862793,
      "learning_rate": 1.3885290434191064e-05,
      "loss": 2.2035,
      "step": 28959
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0930112600326538,
      "learning_rate": 1.3884911041627856e-05,
      "loss": 2.2372,
      "step": 28960
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1749383211135864,
      "learning_rate": 1.3884531642478667e-05,
      "loss": 2.3178,
      "step": 28961
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0370384454727173,
      "learning_rate": 1.3884152236744142e-05,
      "loss": 2.5023,
      "step": 28962
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1761060953140259,
      "learning_rate": 1.3883772824424926e-05,
      "loss": 2.5131,
      "step": 28963
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.094261884689331,
      "learning_rate": 1.388339340552166e-05,
      "loss": 2.4189,
      "step": 28964
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1059176921844482,
      "learning_rate": 1.3883013980034985e-05,
      "loss": 2.4715,
      "step": 28965
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1364891529083252,
      "learning_rate": 1.3882634547965547e-05,
      "loss": 2.2866,
      "step": 28966
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0539379119873047,
      "learning_rate": 1.388225510931399e-05,
      "loss": 2.3433,
      "step": 28967
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.151196837425232,
      "learning_rate": 1.3881875664080958e-05,
      "loss": 2.6535,
      "step": 28968
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0164225101470947,
      "learning_rate": 1.388149621226709e-05,
      "loss": 2.3629,
      "step": 28969
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.140586256980896,
      "learning_rate": 1.3881116753873032e-05,
      "loss": 2.4389,
      "step": 28970
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9959695339202881,
      "learning_rate": 1.3880737288899427e-05,
      "loss": 2.4328,
      "step": 28971
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.023789644241333,
      "learning_rate": 1.3880357817346917e-05,
      "loss": 2.5063,
      "step": 28972
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0820575952529907,
      "learning_rate": 1.3879978339216149e-05,
      "loss": 2.4503,
      "step": 28973
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9949623942375183,
      "learning_rate": 1.3879598854507764e-05,
      "loss": 2.4507,
      "step": 28974
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0413023233413696,
      "learning_rate": 1.3879219363222405e-05,
      "loss": 2.3471,
      "step": 28975
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0361675024032593,
      "learning_rate": 1.3878839865360714e-05,
      "loss": 2.5468,
      "step": 28976
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.124574065208435,
      "learning_rate": 1.3878460360923336e-05,
      "loss": 2.5367,
      "step": 28977
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.060141682624817,
      "learning_rate": 1.3878080849910915e-05,
      "loss": 2.4609,
      "step": 28978
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9889354705810547,
      "learning_rate": 1.3877701332324094e-05,
      "loss": 2.3287,
      "step": 28979
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0907920598983765,
      "learning_rate": 1.3877321808163518e-05,
      "loss": 2.3776,
      "step": 28980
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9658677577972412,
      "learning_rate": 1.3876942277429825e-05,
      "loss": 2.5246,
      "step": 28981
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0328783988952637,
      "learning_rate": 1.3876562740123665e-05,
      "loss": 2.2682,
      "step": 28982
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.045325517654419,
      "learning_rate": 1.3876183196245677e-05,
      "loss": 2.3131,
      "step": 28983
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1807259321212769,
      "learning_rate": 1.3875803645796505e-05,
      "loss": 2.469,
      "step": 28984
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9750960469245911,
      "learning_rate": 1.3875424088776791e-05,
      "loss": 2.4868,
      "step": 28985
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9985323548316956,
      "learning_rate": 1.3875044525187182e-05,
      "loss": 2.2274,
      "step": 28986
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.034542202949524,
      "learning_rate": 1.3874664955028323e-05,
      "loss": 2.5376,
      "step": 28987
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1314501762390137,
      "learning_rate": 1.3874285378300853e-05,
      "loss": 2.4775,
      "step": 28988
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1605736017227173,
      "learning_rate": 1.3873905795005415e-05,
      "loss": 2.6807,
      "step": 28989
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0534740686416626,
      "learning_rate": 1.3873526205142658e-05,
      "loss": 2.4336,
      "step": 28990
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0364189147949219,
      "learning_rate": 1.3873146608713221e-05,
      "loss": 2.4148,
      "step": 28991
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9545799493789673,
      "learning_rate": 1.3872767005717749e-05,
      "loss": 2.6346,
      "step": 28992
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.6173632144927979,
      "learning_rate": 1.3872387396156883e-05,
      "loss": 2.4372,
      "step": 28993
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9828702211380005,
      "learning_rate": 1.3872007780031272e-05,
      "loss": 2.4747,
      "step": 28994
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.075980544090271,
      "learning_rate": 1.3871628157341551e-05,
      "loss": 2.4118,
      "step": 28995
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0207908153533936,
      "learning_rate": 1.3871248528088373e-05,
      "loss": 2.3285,
      "step": 28996
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0569604635238647,
      "learning_rate": 1.3870868892272376e-05,
      "loss": 2.4748,
      "step": 28997
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0195057392120361,
      "learning_rate": 1.3870489249894203e-05,
      "loss": 2.3676,
      "step": 28998
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0219285488128662,
      "learning_rate": 1.3870109600954502e-05,
      "loss": 2.4125,
      "step": 28999
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.115269422531128,
      "learning_rate": 1.3869729945453917e-05,
      "loss": 2.27,
      "step": 29000
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.096280813217163,
      "learning_rate": 1.3869350283393083e-05,
      "loss": 2.4475,
      "step": 29001
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.118736982345581,
      "learning_rate": 1.3868970614772652e-05,
      "loss": 2.4074,
      "step": 29002
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0615146160125732,
      "learning_rate": 1.3868590939593264e-05,
      "loss": 2.317,
      "step": 29003
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.051744818687439,
      "learning_rate": 1.3868211257855564e-05,
      "loss": 2.4073,
      "step": 29004
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0329028367996216,
      "learning_rate": 1.3867831569560195e-05,
      "loss": 2.2406,
      "step": 29005
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.182353138923645,
      "learning_rate": 1.3867451874707802e-05,
      "loss": 2.2487,
      "step": 29006
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0338191986083984,
      "learning_rate": 1.3867072173299028e-05,
      "loss": 2.5692,
      "step": 29007
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.098173975944519,
      "learning_rate": 1.3866692465334514e-05,
      "loss": 2.3207,
      "step": 29008
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1659209728240967,
      "learning_rate": 1.386631275081491e-05,
      "loss": 2.3335,
      "step": 29009
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0934377908706665,
      "learning_rate": 1.3865933029740852e-05,
      "loss": 2.3812,
      "step": 29010
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.060214638710022,
      "learning_rate": 1.386555330211299e-05,
      "loss": 2.4555,
      "step": 29011
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1918405294418335,
      "learning_rate": 1.3865173567931964e-05,
      "loss": 2.5184,
      "step": 29012
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9776821136474609,
      "learning_rate": 1.3864793827198421e-05,
      "loss": 2.4025,
      "step": 29013
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.17796790599823,
      "learning_rate": 1.3864414079913e-05,
      "loss": 2.5518,
      "step": 29014
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1195427179336548,
      "learning_rate": 1.3864034326076347e-05,
      "loss": 2.3769,
      "step": 29015
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0479124784469604,
      "learning_rate": 1.3863654565689109e-05,
      "loss": 2.4204,
      "step": 29016
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0488667488098145,
      "learning_rate": 1.3863274798751927e-05,
      "loss": 2.3742,
      "step": 29017
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1215481758117676,
      "learning_rate": 1.3862895025265443e-05,
      "loss": 2.5312,
      "step": 29018
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1764919757843018,
      "learning_rate": 1.3862515245230307e-05,
      "loss": 2.2786,
      "step": 29019
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0400010347366333,
      "learning_rate": 1.3862135458647155e-05,
      "loss": 2.2848,
      "step": 29020
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0593386888504028,
      "learning_rate": 1.3861755665516635e-05,
      "loss": 2.4942,
      "step": 29021
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1099746227264404,
      "learning_rate": 1.386137586583939e-05,
      "loss": 2.2917,
      "step": 29022
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0950483083724976,
      "learning_rate": 1.3860996059616065e-05,
      "loss": 2.1999,
      "step": 29023
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0294641256332397,
      "learning_rate": 1.3860616246847303e-05,
      "loss": 2.4164,
      "step": 29024
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1372833251953125,
      "learning_rate": 1.386023642753375e-05,
      "loss": 2.4141,
      "step": 29025
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1017943620681763,
      "learning_rate": 1.3859856601676045e-05,
      "loss": 2.3784,
      "step": 29026
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1922078132629395,
      "learning_rate": 1.3859476769274832e-05,
      "loss": 2.3047,
      "step": 29027
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1307718753814697,
      "learning_rate": 1.3859096930330762e-05,
      "loss": 2.6129,
      "step": 29028
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0298124551773071,
      "learning_rate": 1.3858717084844475e-05,
      "loss": 2.4244,
      "step": 29029
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1145694255828857,
      "learning_rate": 1.3858337232816613e-05,
      "loss": 2.3749,
      "step": 29030
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9770273566246033,
      "learning_rate": 1.3857957374247819e-05,
      "loss": 2.1824,
      "step": 29031
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0996692180633545,
      "learning_rate": 1.3857577509138743e-05,
      "loss": 2.1273,
      "step": 29032
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1472020149230957,
      "learning_rate": 1.3857197637490025e-05,
      "loss": 2.5961,
      "step": 29033
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0287729501724243,
      "learning_rate": 1.3856817759302308e-05,
      "loss": 2.2351,
      "step": 29034
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0160802602767944,
      "learning_rate": 1.3856437874576239e-05,
      "loss": 2.4298,
      "step": 29035
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9572611451148987,
      "learning_rate": 1.385605798331246e-05,
      "loss": 2.4054,
      "step": 29036
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1159197092056274,
      "learning_rate": 1.3855678085511613e-05,
      "loss": 2.3568,
      "step": 29037
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0675612688064575,
      "learning_rate": 1.3855298181174346e-05,
      "loss": 2.2144,
      "step": 29038
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9031867980957031,
      "learning_rate": 1.3854918270301303e-05,
      "loss": 2.3398,
      "step": 29039
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0027410984039307,
      "learning_rate": 1.3854538352893126e-05,
      "loss": 2.308,
      "step": 29040
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1106020212173462,
      "learning_rate": 1.385415842895046e-05,
      "loss": 2.4523,
      "step": 29041
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9237452745437622,
      "learning_rate": 1.3853778498473947e-05,
      "loss": 2.4426,
      "step": 29042
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1120376586914062,
      "learning_rate": 1.3853398561464234e-05,
      "loss": 2.3127,
      "step": 29043
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9774570465087891,
      "learning_rate": 1.385301861792196e-05,
      "loss": 2.3068,
      "step": 29044
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9966728091239929,
      "learning_rate": 1.3852638667847778e-05,
      "loss": 2.4312,
      "step": 29045
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0729666948318481,
      "learning_rate": 1.3852258711242325e-05,
      "loss": 2.3709,
      "step": 29046
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0111677646636963,
      "learning_rate": 1.3851878748106248e-05,
      "loss": 2.3713,
      "step": 29047
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.092708945274353,
      "learning_rate": 1.3851498778440191e-05,
      "loss": 2.5554,
      "step": 29048
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0754806995391846,
      "learning_rate": 1.3851118802244796e-05,
      "loss": 2.3996,
      "step": 29049
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0974699258804321,
      "learning_rate": 1.385073881952071e-05,
      "loss": 2.3909,
      "step": 29050
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9470226764678955,
      "learning_rate": 1.3850358830268575e-05,
      "loss": 2.3229,
      "step": 29051
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0298608541488647,
      "learning_rate": 1.3849978834489036e-05,
      "loss": 2.2825,
      "step": 29052
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0477573871612549,
      "learning_rate": 1.384959883218274e-05,
      "loss": 2.5499,
      "step": 29053
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.035050392150879,
      "learning_rate": 1.3849218823350325e-05,
      "loss": 2.4297,
      "step": 29054
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0949064493179321,
      "learning_rate": 1.3848838807992437e-05,
      "loss": 2.3897,
      "step": 29055
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0275659561157227,
      "learning_rate": 1.3848458786109726e-05,
      "loss": 2.4194,
      "step": 29056
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1566132307052612,
      "learning_rate": 1.384807875770283e-05,
      "loss": 2.6176,
      "step": 29057
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.026151180267334,
      "learning_rate": 1.3847698722772396e-05,
      "loss": 2.3146,
      "step": 29058
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0344382524490356,
      "learning_rate": 1.3847318681319069e-05,
      "loss": 2.3744,
      "step": 29059
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1070042848587036,
      "learning_rate": 1.3846938633343489e-05,
      "loss": 2.4245,
      "step": 29060
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0722576379776,
      "learning_rate": 1.3846558578846305e-05,
      "loss": 2.3558,
      "step": 29061
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.159421443939209,
      "learning_rate": 1.3846178517828159e-05,
      "loss": 2.489,
      "step": 29062
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.150038242340088,
      "learning_rate": 1.3845798450289696e-05,
      "loss": 2.3228,
      "step": 29063
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1185493469238281,
      "learning_rate": 1.3845418376231558e-05,
      "loss": 2.2956,
      "step": 29064
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9963648915290833,
      "learning_rate": 1.3845038295654393e-05,
      "loss": 2.2403,
      "step": 29065
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.078945517539978,
      "learning_rate": 1.3844658208558848e-05,
      "loss": 2.414,
      "step": 29066
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0705801248550415,
      "learning_rate": 1.3844278114945558e-05,
      "loss": 2.4831,
      "step": 29067
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.122897982597351,
      "learning_rate": 1.3843898014815173e-05,
      "loss": 2.3624,
      "step": 29068
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.3810105323791504,
      "learning_rate": 1.3843517908168338e-05,
      "loss": 2.4472,
      "step": 29069
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9583219289779663,
      "learning_rate": 1.3843137795005696e-05,
      "loss": 2.2969,
      "step": 29070
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.2019168138504028,
      "learning_rate": 1.3842757675327891e-05,
      "loss": 2.4331,
      "step": 29071
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0909981727600098,
      "learning_rate": 1.384237754913557e-05,
      "loss": 2.5245,
      "step": 29072
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.088568925857544,
      "learning_rate": 1.3841997416429375e-05,
      "loss": 2.4138,
      "step": 29073
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0379670858383179,
      "learning_rate": 1.384161727720995e-05,
      "loss": 2.6473,
      "step": 29074
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.050771713256836,
      "learning_rate": 1.384123713147794e-05,
      "loss": 2.3768,
      "step": 29075
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1352659463882446,
      "learning_rate": 1.384085697923399e-05,
      "loss": 2.6188,
      "step": 29076
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1766326427459717,
      "learning_rate": 1.3840476820478746e-05,
      "loss": 2.5257,
      "step": 29077
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0747251510620117,
      "learning_rate": 1.3840096655212848e-05,
      "loss": 2.4175,
      "step": 29078
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0941710472106934,
      "learning_rate": 1.3839716483436944e-05,
      "loss": 2.2803,
      "step": 29079
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0853570699691772,
      "learning_rate": 1.3839336305151681e-05,
      "loss": 2.4336,
      "step": 29080
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9960455894470215,
      "learning_rate": 1.3838956120357699e-05,
      "loss": 2.3795,
      "step": 29081
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.034105658531189,
      "learning_rate": 1.383857592905564e-05,
      "loss": 2.3373,
      "step": 29082
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1064934730529785,
      "learning_rate": 1.3838195731246153e-05,
      "loss": 2.4892,
      "step": 29083
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1378790140151978,
      "learning_rate": 1.3837815526929883e-05,
      "loss": 2.3266,
      "step": 29084
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0871044397354126,
      "learning_rate": 1.3837435316107476e-05,
      "loss": 2.4495,
      "step": 29085
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0036357641220093,
      "learning_rate": 1.3837055098779573e-05,
      "loss": 2.2618,
      "step": 29086
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1645398139953613,
      "learning_rate": 1.3836674874946816e-05,
      "loss": 2.3159,
      "step": 29087
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1064467430114746,
      "learning_rate": 1.3836294644609856e-05,
      "loss": 2.4468,
      "step": 29088
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.084773302078247,
      "learning_rate": 1.3835914407769332e-05,
      "loss": 2.3108,
      "step": 29089
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.004950761795044,
      "learning_rate": 1.3835534164425893e-05,
      "loss": 2.2841,
      "step": 29090
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0312942266464233,
      "learning_rate": 1.3835153914580182e-05,
      "loss": 2.2114,
      "step": 29091
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0326769351959229,
      "learning_rate": 1.3834773658232845e-05,
      "loss": 2.4123,
      "step": 29092
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.2300742864608765,
      "learning_rate": 1.3834393395384522e-05,
      "loss": 2.2213,
      "step": 29093
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0161139965057373,
      "learning_rate": 1.3834013126035864e-05,
      "loss": 2.6691,
      "step": 29094
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0177383422851562,
      "learning_rate": 1.3833632850187508e-05,
      "loss": 2.3571,
      "step": 29095
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0382165908813477,
      "learning_rate": 1.3833252567840107e-05,
      "loss": 2.168,
      "step": 29096
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0250602960586548,
      "learning_rate": 1.3832872278994299e-05,
      "loss": 2.6108,
      "step": 29097
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.067857027053833,
      "learning_rate": 1.3832491983650733e-05,
      "loss": 2.3049,
      "step": 29098
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1193749904632568,
      "learning_rate": 1.3832111681810054e-05,
      "loss": 2.3883,
      "step": 29099
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0477294921875,
      "learning_rate": 1.3831731373472903e-05,
      "loss": 2.6393,
      "step": 29100
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1561511754989624,
      "learning_rate": 1.3831351058639925e-05,
      "loss": 2.3929,
      "step": 29101
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0591927766799927,
      "learning_rate": 1.383097073731177e-05,
      "loss": 2.3879,
      "step": 29102
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.028991937637329,
      "learning_rate": 1.3830590409489078e-05,
      "loss": 2.4613,
      "step": 29103
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0337200164794922,
      "learning_rate": 1.3830210075172493e-05,
      "loss": 2.4261,
      "step": 29104
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0664904117584229,
      "learning_rate": 1.3829829734362665e-05,
      "loss": 2.435,
      "step": 29105
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.258237361907959,
      "learning_rate": 1.3829449387060232e-05,
      "loss": 2.305,
      "step": 29106
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.14765202999115,
      "learning_rate": 1.3829069033265843e-05,
      "loss": 2.4418,
      "step": 29107
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1823400259017944,
      "learning_rate": 1.3828688672980142e-05,
      "loss": 2.6302,
      "step": 29108
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.012976884841919,
      "learning_rate": 1.3828308306203777e-05,
      "loss": 2.3919,
      "step": 29109
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9601961970329285,
      "learning_rate": 1.3827927932937386e-05,
      "loss": 2.2354,
      "step": 29110
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1792314052581787,
      "learning_rate": 1.3827547553181617e-05,
      "loss": 2.4085,
      "step": 29111
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.202466368675232,
      "learning_rate": 1.382716716693712e-05,
      "loss": 2.3823,
      "step": 29112
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.20566987991333,
      "learning_rate": 1.3826786774204532e-05,
      "loss": 2.5087,
      "step": 29113
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9784151911735535,
      "learning_rate": 1.38264063749845e-05,
      "loss": 2.3057,
      "step": 29114
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0147775411605835,
      "learning_rate": 1.3826025969277672e-05,
      "loss": 2.6104,
      "step": 29115
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0203099250793457,
      "learning_rate": 1.382564555708469e-05,
      "loss": 2.568,
      "step": 29116
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1899014711380005,
      "learning_rate": 1.3825265138406199e-05,
      "loss": 2.3752,
      "step": 29117
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1857693195343018,
      "learning_rate": 1.3824884713242845e-05,
      "loss": 2.5157,
      "step": 29118
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0437697172164917,
      "learning_rate": 1.3824504281595275e-05,
      "loss": 2.5307,
      "step": 29119
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0814573764801025,
      "learning_rate": 1.3824123843464128e-05,
      "loss": 2.305,
      "step": 29120
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0467091798782349,
      "learning_rate": 1.3823743398850053e-05,
      "loss": 2.2753,
      "step": 29121
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1130802631378174,
      "learning_rate": 1.3823362947753696e-05,
      "loss": 2.339,
      "step": 29122
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1007341146469116,
      "learning_rate": 1.38229824901757e-05,
      "loss": 2.4013,
      "step": 29123
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.31400465965271,
      "learning_rate": 1.382260202611671e-05,
      "loss": 2.5412,
      "step": 29124
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0547128915786743,
      "learning_rate": 1.3822221555577372e-05,
      "loss": 2.2996,
      "step": 29125
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.2084394693374634,
      "learning_rate": 1.3821841078558329e-05,
      "loss": 2.4151,
      "step": 29126
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1476662158966064,
      "learning_rate": 1.382146059506023e-05,
      "loss": 2.4482,
      "step": 29127
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0267421007156372,
      "learning_rate": 1.3821080105083715e-05,
      "loss": 2.3422,
      "step": 29128
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9712404012680054,
      "learning_rate": 1.3820699608629431e-05,
      "loss": 2.4747,
      "step": 29129
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1026519536972046,
      "learning_rate": 1.3820319105698024e-05,
      "loss": 2.3234,
      "step": 29130
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1472065448760986,
      "learning_rate": 1.3819938596290141e-05,
      "loss": 2.3701,
      "step": 29131
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0996853113174438,
      "learning_rate": 1.3819558080406423e-05,
      "loss": 2.364,
      "step": 29132
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.139459252357483,
      "learning_rate": 1.3819177558047517e-05,
      "loss": 2.4944,
      "step": 29133
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.062911033630371,
      "learning_rate": 1.3818797029214065e-05,
      "loss": 2.4149,
      "step": 29134
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0286695957183838,
      "learning_rate": 1.3818416493906716e-05,
      "loss": 2.2988,
      "step": 29135
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0727049112319946,
      "learning_rate": 1.3818035952126117e-05,
      "loss": 2.1809,
      "step": 29136
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0928421020507812,
      "learning_rate": 1.3817655403872909e-05,
      "loss": 2.2489,
      "step": 29137
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.13727867603302,
      "learning_rate": 1.3817274849147737e-05,
      "loss": 2.2909,
      "step": 29138
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0246559381484985,
      "learning_rate": 1.3816894287951246e-05,
      "loss": 2.4459,
      "step": 29139
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.155173659324646,
      "learning_rate": 1.3816513720284088e-05,
      "loss": 2.5112,
      "step": 29140
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.038967251777649,
      "learning_rate": 1.3816133146146897e-05,
      "loss": 2.3914,
      "step": 29141
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1142401695251465,
      "learning_rate": 1.3815752565540325e-05,
      "loss": 2.4743,
      "step": 29142
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0635099411010742,
      "learning_rate": 1.3815371978465017e-05,
      "loss": 2.4415,
      "step": 29143
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0915629863739014,
      "learning_rate": 1.3814991384921617e-05,
      "loss": 2.421,
      "step": 29144
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0663666725158691,
      "learning_rate": 1.3814610784910772e-05,
      "loss": 2.4896,
      "step": 29145
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1089627742767334,
      "learning_rate": 1.3814230178433126e-05,
      "loss": 2.5689,
      "step": 29146
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1302133798599243,
      "learning_rate": 1.381384956548932e-05,
      "loss": 2.4234,
      "step": 29147
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.206072211265564,
      "learning_rate": 1.3813468946080005e-05,
      "loss": 2.3387,
      "step": 29148
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9631229043006897,
      "learning_rate": 1.3813088320205826e-05,
      "loss": 2.3119,
      "step": 29149
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0488306283950806,
      "learning_rate": 1.3812707687867423e-05,
      "loss": 2.4705,
      "step": 29150
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.020358920097351,
      "learning_rate": 1.3812327049065451e-05,
      "loss": 2.4479,
      "step": 29151
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.196266770362854,
      "learning_rate": 1.3811946403800545e-05,
      "loss": 2.6988,
      "step": 29152
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.064674973487854,
      "learning_rate": 1.3811565752073355e-05,
      "loss": 2.5071,
      "step": 29153
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1600359678268433,
      "learning_rate": 1.3811185093884525e-05,
      "loss": 2.1805,
      "step": 29154
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9551249742507935,
      "learning_rate": 1.38108044292347e-05,
      "loss": 2.2371,
      "step": 29155
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9276349544525146,
      "learning_rate": 1.3810423758124529e-05,
      "loss": 2.3207,
      "step": 29156
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9687106609344482,
      "learning_rate": 1.3810043080554655e-05,
      "loss": 2.4959,
      "step": 29157
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1515768766403198,
      "learning_rate": 1.3809662396525725e-05,
      "loss": 2.3752,
      "step": 29158
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.101111650466919,
      "learning_rate": 1.3809281706038377e-05,
      "loss": 2.3505,
      "step": 29159
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.021857738494873,
      "learning_rate": 1.3808901009093267e-05,
      "loss": 2.489,
      "step": 29160
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0871816873550415,
      "learning_rate": 1.3808520305691033e-05,
      "loss": 2.3328,
      "step": 29161
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1455247402191162,
      "learning_rate": 1.3808139595832322e-05,
      "loss": 2.5664,
      "step": 29162
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1126967668533325,
      "learning_rate": 1.380775887951778e-05,
      "loss": 2.4173,
      "step": 29163
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.2172596454620361,
      "learning_rate": 1.3807378156748057e-05,
      "loss": 2.3089,
      "step": 29164
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0877211093902588,
      "learning_rate": 1.380699742752379e-05,
      "loss": 2.2232,
      "step": 29165
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0793852806091309,
      "learning_rate": 1.3806616691845628e-05,
      "loss": 2.3553,
      "step": 29166
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.072522521018982,
      "learning_rate": 1.3806235949714219e-05,
      "loss": 2.3612,
      "step": 29167
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.066595435142517,
      "learning_rate": 1.3805855201130207e-05,
      "loss": 2.5326,
      "step": 29168
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9680148959159851,
      "learning_rate": 1.3805474446094234e-05,
      "loss": 2.329,
      "step": 29169
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.2705477476119995,
      "learning_rate": 1.380509368460695e-05,
      "loss": 2.3772,
      "step": 29170
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.143379807472229,
      "learning_rate": 1.3804712916669e-05,
      "loss": 2.3297,
      "step": 29171
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9498757123947144,
      "learning_rate": 1.3804332142281025e-05,
      "loss": 2.5437,
      "step": 29172
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0409901142120361,
      "learning_rate": 1.3803951361443675e-05,
      "loss": 2.5028,
      "step": 29173
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0964860916137695,
      "learning_rate": 1.3803570574157594e-05,
      "loss": 2.3263,
      "step": 29174
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1270602941513062,
      "learning_rate": 1.3803189780423427e-05,
      "loss": 2.3615,
      "step": 29175
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0620859861373901,
      "learning_rate": 1.3802808980241822e-05,
      "loss": 2.3301,
      "step": 29176
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0083988904953003,
      "learning_rate": 1.3802428173613423e-05,
      "loss": 2.5808,
      "step": 29177
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1623626947402954,
      "learning_rate": 1.3802047360538876e-05,
      "loss": 2.3047,
      "step": 29178
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0384997129440308,
      "learning_rate": 1.3801666541018826e-05,
      "loss": 2.4167,
      "step": 29179
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0053287744522095,
      "learning_rate": 1.3801285715053916e-05,
      "loss": 2.2362,
      "step": 29180
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9618905782699585,
      "learning_rate": 1.3800904882644797e-05,
      "loss": 2.2999,
      "step": 29181
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1311205625534058,
      "learning_rate": 1.380052404379211e-05,
      "loss": 2.5449,
      "step": 29182
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1721779108047485,
      "learning_rate": 1.3800143198496502e-05,
      "loss": 2.4685,
      "step": 29183
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.088879108428955,
      "learning_rate": 1.3799762346758623e-05,
      "loss": 2.4885,
      "step": 29184
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1332125663757324,
      "learning_rate": 1.3799381488579114e-05,
      "loss": 2.5815,
      "step": 29185
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1274666786193848,
      "learning_rate": 1.3799000623958617e-05,
      "loss": 2.6151,
      "step": 29186
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1110734939575195,
      "learning_rate": 1.3798619752897785e-05,
      "loss": 2.3724,
      "step": 29187
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9958120584487915,
      "learning_rate": 1.3798238875397264e-05,
      "loss": 2.3674,
      "step": 29188
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.4051735401153564,
      "learning_rate": 1.3797857991457692e-05,
      "loss": 2.1934,
      "step": 29189
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1804448366165161,
      "learning_rate": 1.379747710107972e-05,
      "loss": 2.2684,
      "step": 29190
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.038613200187683,
      "learning_rate": 1.3797096204263995e-05,
      "loss": 2.4051,
      "step": 29191
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.2804633378982544,
      "learning_rate": 1.379671530101116e-05,
      "loss": 2.3571,
      "step": 29192
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.049027681350708,
      "learning_rate": 1.379633439132186e-05,
      "loss": 2.5301,
      "step": 29193
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.2071818113327026,
      "learning_rate": 1.3795953475196742e-05,
      "loss": 2.3952,
      "step": 29194
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.210113763809204,
      "learning_rate": 1.3795572552636454e-05,
      "loss": 2.2453,
      "step": 29195
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1025896072387695,
      "learning_rate": 1.379519162364164e-05,
      "loss": 2.4749,
      "step": 29196
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1807111501693726,
      "learning_rate": 1.3794810688212943e-05,
      "loss": 2.4088,
      "step": 29197
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9869558215141296,
      "learning_rate": 1.3794429746351012e-05,
      "loss": 2.3168,
      "step": 29198
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1648725271224976,
      "learning_rate": 1.3794048798056494e-05,
      "loss": 2.4449,
      "step": 29199
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.3712483644485474,
      "learning_rate": 1.379366784333003e-05,
      "loss": 2.3205,
      "step": 29200
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0655360221862793,
      "learning_rate": 1.379328688217227e-05,
      "loss": 2.2927,
      "step": 29201
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1273884773254395,
      "learning_rate": 1.3792905914583857e-05,
      "loss": 2.2434,
      "step": 29202
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.103044033050537,
      "learning_rate": 1.379252494056544e-05,
      "loss": 2.3248,
      "step": 29203
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0642478466033936,
      "learning_rate": 1.3792143960117664e-05,
      "loss": 2.4692,
      "step": 29204
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.054069995880127,
      "learning_rate": 1.3791762973241172e-05,
      "loss": 2.2999,
      "step": 29205
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0678281784057617,
      "learning_rate": 1.3791381979936614e-05,
      "loss": 2.4989,
      "step": 29206
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0864953994750977,
      "learning_rate": 1.3791000980204632e-05,
      "loss": 2.3136,
      "step": 29207
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1388747692108154,
      "learning_rate": 1.3790619974045873e-05,
      "loss": 2.7755,
      "step": 29208
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.2653782367706299,
      "learning_rate": 1.3790238961460984e-05,
      "loss": 2.4798,
      "step": 29209
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0381673574447632,
      "learning_rate": 1.3789857942450611e-05,
      "loss": 2.5229,
      "step": 29210
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0926185846328735,
      "learning_rate": 1.3789476917015402e-05,
      "loss": 2.0671,
      "step": 29211
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9979848861694336,
      "learning_rate": 1.3789095885155996e-05,
      "loss": 2.0858,
      "step": 29212
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0093727111816406,
      "learning_rate": 1.3788714846873048e-05,
      "loss": 2.3111,
      "step": 29213
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1016364097595215,
      "learning_rate": 1.3788333802167196e-05,
      "loss": 2.2853,
      "step": 29214
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.001997947692871,
      "learning_rate": 1.3787952751039089e-05,
      "loss": 2.4651,
      "step": 29215
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1847351789474487,
      "learning_rate": 1.3787571693489375e-05,
      "loss": 2.5052,
      "step": 29216
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.065574288368225,
      "learning_rate": 1.3787190629518698e-05,
      "loss": 2.3133,
      "step": 29217
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1207966804504395,
      "learning_rate": 1.3786809559127705e-05,
      "loss": 2.4247,
      "step": 29218
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1397230625152588,
      "learning_rate": 1.378642848231704e-05,
      "loss": 2.4466,
      "step": 29219
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1369348764419556,
      "learning_rate": 1.378604739908735e-05,
      "loss": 2.5439,
      "step": 29220
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9670126438140869,
      "learning_rate": 1.3785666309439284e-05,
      "loss": 2.3499,
      "step": 29221
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9693414568901062,
      "learning_rate": 1.3785285213373484e-05,
      "loss": 2.4977,
      "step": 29222
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0393245220184326,
      "learning_rate": 1.3784904110890596e-05,
      "loss": 2.3053,
      "step": 29223
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0284805297851562,
      "learning_rate": 1.378452300199127e-05,
      "loss": 2.4873,
      "step": 29224
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0435551404953003,
      "learning_rate": 1.3784141886676147e-05,
      "loss": 2.4382,
      "step": 29225
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1853890419006348,
      "learning_rate": 1.3783760764945877e-05,
      "loss": 2.4062,
      "step": 29226
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.01944899559021,
      "learning_rate": 1.3783379636801106e-05,
      "loss": 2.3219,
      "step": 29227
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0066114664077759,
      "learning_rate": 1.3782998502242477e-05,
      "loss": 2.4757,
      "step": 29228
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.990348219871521,
      "learning_rate": 1.378261736127064e-05,
      "loss": 2.4068,
      "step": 29229
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1225343942642212,
      "learning_rate": 1.378223621388624e-05,
      "loss": 2.2519,
      "step": 29230
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9784425497055054,
      "learning_rate": 1.378185506008992e-05,
      "loss": 2.4209,
      "step": 29231
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.2300702333450317,
      "learning_rate": 1.378147389988233e-05,
      "loss": 2.3788,
      "step": 29232
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0579923391342163,
      "learning_rate": 1.3781092733264112e-05,
      "loss": 2.4493,
      "step": 29233
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0074493885040283,
      "learning_rate": 1.3780711560235918e-05,
      "loss": 2.2865,
      "step": 29234
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.950381875038147,
      "learning_rate": 1.3780330380798388e-05,
      "loss": 2.4685,
      "step": 29235
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.036930799484253,
      "learning_rate": 1.3779949194952174e-05,
      "loss": 2.4142,
      "step": 29236
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9942006468772888,
      "learning_rate": 1.377956800269792e-05,
      "loss": 2.4896,
      "step": 29237
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9980437159538269,
      "learning_rate": 1.377918680403627e-05,
      "loss": 2.3017,
      "step": 29238
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9493561387062073,
      "learning_rate": 1.3778805598967871e-05,
      "loss": 2.3164,
      "step": 29239
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0957050323486328,
      "learning_rate": 1.3778424387493373e-05,
      "loss": 2.3929,
      "step": 29240
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0869098901748657,
      "learning_rate": 1.3778043169613415e-05,
      "loss": 2.2277,
      "step": 29241
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.096228003501892,
      "learning_rate": 1.3777661945328651e-05,
      "loss": 2.4346,
      "step": 29242
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1353486776351929,
      "learning_rate": 1.3777280714639725e-05,
      "loss": 2.2774,
      "step": 29243
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0731669664382935,
      "learning_rate": 1.3776899477547281e-05,
      "loss": 2.3773,
      "step": 29244
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9637303948402405,
      "learning_rate": 1.3776518234051965e-05,
      "loss": 2.4539,
      "step": 29245
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0116100311279297,
      "learning_rate": 1.3776136984154427e-05,
      "loss": 2.493,
      "step": 29246
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.107909917831421,
      "learning_rate": 1.3775755727855311e-05,
      "loss": 2.3949,
      "step": 29247
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.2013919353485107,
      "learning_rate": 1.3775374465155262e-05,
      "loss": 2.1978,
      "step": 29248
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0832772254943848,
      "learning_rate": 1.3774993196054929e-05,
      "loss": 2.2123,
      "step": 29249
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.026559829711914,
      "learning_rate": 1.3774611920554958e-05,
      "loss": 2.4787,
      "step": 29250
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0453981161117554,
      "learning_rate": 1.3774230638655995e-05,
      "loss": 2.3788,
      "step": 29251
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1118649244308472,
      "learning_rate": 1.3773849350358686e-05,
      "loss": 2.1332,
      "step": 29252
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.068699836730957,
      "learning_rate": 1.3773468055663675e-05,
      "loss": 2.4498,
      "step": 29253
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.2040390968322754,
      "learning_rate": 1.3773086754571612e-05,
      "loss": 2.6612,
      "step": 29254
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9995293617248535,
      "learning_rate": 1.3772705447083144e-05,
      "loss": 2.2444,
      "step": 29255
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.896008849143982,
      "learning_rate": 1.3772324133198913e-05,
      "loss": 2.3759,
      "step": 29256
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1149007081985474,
      "learning_rate": 1.377194281291957e-05,
      "loss": 2.2857,
      "step": 29257
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0877113342285156,
      "learning_rate": 1.3771561486245759e-05,
      "loss": 2.3052,
      "step": 29258
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1200031042099,
      "learning_rate": 1.3771180153178128e-05,
      "loss": 2.565,
      "step": 29259
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1088420152664185,
      "learning_rate": 1.3770798813717319e-05,
      "loss": 2.5289,
      "step": 29260
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0486047267913818,
      "learning_rate": 1.3770417467863984e-05,
      "loss": 2.2046,
      "step": 29261
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9968266487121582,
      "learning_rate": 1.3770036115618766e-05,
      "loss": 2.2546,
      "step": 29262
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0395290851593018,
      "learning_rate": 1.3769654756982315e-05,
      "loss": 2.5213,
      "step": 29263
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.2028778791427612,
      "learning_rate": 1.3769273391955275e-05,
      "loss": 2.2629,
      "step": 29264
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0310755968093872,
      "learning_rate": 1.3768892020538292e-05,
      "loss": 2.326,
      "step": 29265
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1128305196762085,
      "learning_rate": 1.3768510642732013e-05,
      "loss": 2.303,
      "step": 29266
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0442968606948853,
      "learning_rate": 1.3768129258537087e-05,
      "loss": 2.364,
      "step": 29267
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.2161870002746582,
      "learning_rate": 1.3767747867954155e-05,
      "loss": 2.3472,
      "step": 29268
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0042803287506104,
      "learning_rate": 1.3767366470983868e-05,
      "loss": 2.3946,
      "step": 29269
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.117101788520813,
      "learning_rate": 1.3766985067626874e-05,
      "loss": 2.3879,
      "step": 29270
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1535837650299072,
      "learning_rate": 1.3766603657883817e-05,
      "loss": 2.3832,
      "step": 29271
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0302904844284058,
      "learning_rate": 1.3766222241755343e-05,
      "loss": 2.4208,
      "step": 29272
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1956801414489746,
      "learning_rate": 1.3765840819242098e-05,
      "loss": 2.434,
      "step": 29273
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1103376150131226,
      "learning_rate": 1.376545939034473e-05,
      "loss": 2.24,
      "step": 29274
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9940273761749268,
      "learning_rate": 1.3765077955063885e-05,
      "loss": 2.2629,
      "step": 29275
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0634280443191528,
      "learning_rate": 1.3764696513400215e-05,
      "loss": 2.6581,
      "step": 29276
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0044834613800049,
      "learning_rate": 1.376431506535436e-05,
      "loss": 2.5424,
      "step": 29277
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.225766658782959,
      "learning_rate": 1.3763933610926968e-05,
      "loss": 2.5975,
      "step": 29278
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0323154926300049,
      "learning_rate": 1.3763552150118684e-05,
      "loss": 2.425,
      "step": 29279
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.184396743774414,
      "learning_rate": 1.3763170682930158e-05,
      "loss": 2.2416,
      "step": 29280
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0749531984329224,
      "learning_rate": 1.3762789209362038e-05,
      "loss": 2.3895,
      "step": 29281
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1143150329589844,
      "learning_rate": 1.3762407729414967e-05,
      "loss": 2.4598,
      "step": 29282
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1453771591186523,
      "learning_rate": 1.3762026243089592e-05,
      "loss": 2.5244,
      "step": 29283
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1015779972076416,
      "learning_rate": 1.3761644750386562e-05,
      "loss": 2.2724,
      "step": 29284
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0291032791137695,
      "learning_rate": 1.3761263251306522e-05,
      "loss": 2.392,
      "step": 29285
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1189314126968384,
      "learning_rate": 1.376088174585012e-05,
      "loss": 2.5793,
      "step": 29286
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1285839080810547,
      "learning_rate": 1.3760500234018002e-05,
      "loss": 2.4245,
      "step": 29287
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1330578327178955,
      "learning_rate": 1.3760118715810813e-05,
      "loss": 2.4847,
      "step": 29288
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1184271574020386,
      "learning_rate": 1.3759737191229203e-05,
      "loss": 2.4158,
      "step": 29289
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.040209412574768,
      "learning_rate": 1.3759355660273818e-05,
      "loss": 2.7225,
      "step": 29290
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.054782509803772,
      "learning_rate": 1.3758974122945303e-05,
      "loss": 2.4546,
      "step": 29291
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0647166967391968,
      "learning_rate": 1.3758592579244305e-05,
      "loss": 2.3003,
      "step": 29292
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0288962125778198,
      "learning_rate": 1.3758211029171472e-05,
      "loss": 2.4885,
      "step": 29293
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.2462793588638306,
      "learning_rate": 1.3757829472727453e-05,
      "loss": 2.39,
      "step": 29294
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.056406855583191,
      "learning_rate": 1.375744790991289e-05,
      "loss": 2.5219,
      "step": 29295
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1059352159500122,
      "learning_rate": 1.3757066340728435e-05,
      "loss": 2.4132,
      "step": 29296
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1232900619506836,
      "learning_rate": 1.375668476517473e-05,
      "loss": 2.4812,
      "step": 29297
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.075814962387085,
      "learning_rate": 1.3756303183252424e-05,
      "loss": 2.4083,
      "step": 29298
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9840633273124695,
      "learning_rate": 1.3755921594962165e-05,
      "loss": 2.3926,
      "step": 29299
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1916508674621582,
      "learning_rate": 1.3755540000304597e-05,
      "loss": 2.2023,
      "step": 29300
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1616454124450684,
      "learning_rate": 1.375515839928037e-05,
      "loss": 2.3493,
      "step": 29301
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0378029346466064,
      "learning_rate": 1.3754776791890127e-05,
      "loss": 2.4613,
      "step": 29302
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1152631044387817,
      "learning_rate": 1.3754395178134523e-05,
      "loss": 2.4754,
      "step": 29303
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.085579514503479,
      "learning_rate": 1.3754013558014196e-05,
      "loss": 2.474,
      "step": 29304
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0514973402023315,
      "learning_rate": 1.3753631931529796e-05,
      "loss": 2.303,
      "step": 29305
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.2570987939834595,
      "learning_rate": 1.375325029868197e-05,
      "loss": 2.5987,
      "step": 29306
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.2754008769989014,
      "learning_rate": 1.3752868659471368e-05,
      "loss": 2.4016,
      "step": 29307
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0934807062149048,
      "learning_rate": 1.3752487013898632e-05,
      "loss": 2.3012,
      "step": 29308
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.2920690774917603,
      "learning_rate": 1.3752105361964414e-05,
      "loss": 2.2029,
      "step": 29309
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0987299680709839,
      "learning_rate": 1.3751723703669355e-05,
      "loss": 2.4164,
      "step": 29310
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1845699548721313,
      "learning_rate": 1.3751342039014105e-05,
      "loss": 2.4657,
      "step": 29311
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9783725738525391,
      "learning_rate": 1.3750960367999315e-05,
      "loss": 2.3396,
      "step": 29312
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.005150556564331,
      "learning_rate": 1.3750578690625627e-05,
      "loss": 2.5251,
      "step": 29313
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.033935785293579,
      "learning_rate": 1.375019700689369e-05,
      "loss": 2.4146,
      "step": 29314
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9967314004898071,
      "learning_rate": 1.3749815316804149e-05,
      "loss": 2.4168,
      "step": 29315
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1052978038787842,
      "learning_rate": 1.3749433620357655e-05,
      "loss": 2.6146,
      "step": 29316
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0891879796981812,
      "learning_rate": 1.3749051917554852e-05,
      "loss": 2.3248,
      "step": 29317
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1100910902023315,
      "learning_rate": 1.3748670208396386e-05,
      "loss": 2.5866,
      "step": 29318
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.020902395248413,
      "learning_rate": 1.374828849288291e-05,
      "loss": 2.5012,
      "step": 29319
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0426760911941528,
      "learning_rate": 1.374790677101506e-05,
      "loss": 2.4518,
      "step": 29320
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0396373271942139,
      "learning_rate": 1.3747525042793495e-05,
      "loss": 2.3684,
      "step": 29321
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1139510869979858,
      "learning_rate": 1.3747143308218856e-05,
      "loss": 2.4235,
      "step": 29322
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.016542673110962,
      "learning_rate": 1.374676156729179e-05,
      "loss": 2.4435,
      "step": 29323
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.002729058265686,
      "learning_rate": 1.3746379820012949e-05,
      "loss": 2.4504,
      "step": 29324
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0753535032272339,
      "learning_rate": 1.3745998066382975e-05,
      "loss": 2.4227,
      "step": 29325
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0145190954208374,
      "learning_rate": 1.3745616306402517e-05,
      "loss": 2.3944,
      "step": 29326
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0692275762557983,
      "learning_rate": 1.374523454007222e-05,
      "loss": 2.3112,
      "step": 29327
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9685477614402771,
      "learning_rate": 1.3744852767392734e-05,
      "loss": 2.2468,
      "step": 29328
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0958439111709595,
      "learning_rate": 1.374447098836471e-05,
      "loss": 2.3412,
      "step": 29329
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.3145155906677246,
      "learning_rate": 1.3744089202988784e-05,
      "loss": 2.2757,
      "step": 29330
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0521039962768555,
      "learning_rate": 1.3743707411265614e-05,
      "loss": 2.3847,
      "step": 29331
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.067854404449463,
      "learning_rate": 1.374332561319584e-05,
      "loss": 2.2956,
      "step": 29332
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.105776309967041,
      "learning_rate": 1.3742943808780115e-05,
      "loss": 2.4417,
      "step": 29333
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0734350681304932,
      "learning_rate": 1.3742561998019084e-05,
      "loss": 2.6486,
      "step": 29334
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9928901791572571,
      "learning_rate": 1.3742180180913392e-05,
      "loss": 2.3304,
      "step": 29335
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0626935958862305,
      "learning_rate": 1.3741798357463691e-05,
      "loss": 2.3853,
      "step": 29336
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0107791423797607,
      "learning_rate": 1.3741416527670625e-05,
      "loss": 2.5244,
      "step": 29337
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1199047565460205,
      "learning_rate": 1.374103469153484e-05,
      "loss": 2.5453,
      "step": 29338
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.2491930723190308,
      "learning_rate": 1.3740652849056986e-05,
      "loss": 2.4423,
      "step": 29339
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0670528411865234,
      "learning_rate": 1.3740271000237708e-05,
      "loss": 2.3129,
      "step": 29340
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1875227689743042,
      "learning_rate": 1.3739889145077655e-05,
      "loss": 2.2475,
      "step": 29341
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.039771318435669,
      "learning_rate": 1.3739507283577478e-05,
      "loss": 2.4143,
      "step": 29342
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9689587354660034,
      "learning_rate": 1.3739125415737818e-05,
      "loss": 2.5671,
      "step": 29343
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1147069931030273,
      "learning_rate": 1.3738743541559324e-05,
      "loss": 2.3584,
      "step": 29344
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0718615055084229,
      "learning_rate": 1.3738361661042647e-05,
      "loss": 2.7082,
      "step": 29345
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0666441917419434,
      "learning_rate": 1.373797977418843e-05,
      "loss": 2.3274,
      "step": 29346
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9883396625518799,
      "learning_rate": 1.373759788099732e-05,
      "loss": 2.1572,
      "step": 29347
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0302358865737915,
      "learning_rate": 1.3737215981469968e-05,
      "loss": 2.2816,
      "step": 29348
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1294801235198975,
      "learning_rate": 1.3736834075607023e-05,
      "loss": 2.3305,
      "step": 29349
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0690430402755737,
      "learning_rate": 1.3736452163409127e-05,
      "loss": 2.2908,
      "step": 29350
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0416115522384644,
      "learning_rate": 1.3736070244876927e-05,
      "loss": 2.5483,
      "step": 29351
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.097153902053833,
      "learning_rate": 1.3735688320011075e-05,
      "loss": 2.4616,
      "step": 29352
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0485209226608276,
      "learning_rate": 1.3735306388812218e-05,
      "loss": 2.3289,
      "step": 29353
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9853555560112,
      "learning_rate": 1.3734924451281e-05,
      "loss": 2.3027,
      "step": 29354
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0513640642166138,
      "learning_rate": 1.3734542507418072e-05,
      "loss": 2.2342,
      "step": 29355
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0816868543624878,
      "learning_rate": 1.3734160557224084e-05,
      "loss": 2.2397,
      "step": 29356
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1656794548034668,
      "learning_rate": 1.3733778600699675e-05,
      "loss": 2.3777,
      "step": 29357
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0349574089050293,
      "learning_rate": 1.3733396637845498e-05,
      "loss": 2.2546,
      "step": 29358
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9746579527854919,
      "learning_rate": 1.3733014668662197e-05,
      "loss": 2.3765,
      "step": 29359
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9893198013305664,
      "learning_rate": 1.3732632693150426e-05,
      "loss": 2.395,
      "step": 29360
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0313458442687988,
      "learning_rate": 1.3732250711310826e-05,
      "loss": 2.4302,
      "step": 29361
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1175719499588013,
      "learning_rate": 1.3731868723144052e-05,
      "loss": 2.4121,
      "step": 29362
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0156019926071167,
      "learning_rate": 1.3731486728650744e-05,
      "loss": 2.3923,
      "step": 29363
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0720669031143188,
      "learning_rate": 1.3731104727831553e-05,
      "loss": 2.5545,
      "step": 29364
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.15771484375,
      "learning_rate": 1.3730722720687125e-05,
      "loss": 2.6757,
      "step": 29365
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0020016431808472,
      "learning_rate": 1.3730340707218108e-05,
      "loss": 2.3691,
      "step": 29366
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9763222932815552,
      "learning_rate": 1.3729958687425153e-05,
      "loss": 2.5218,
      "step": 29367
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.169814109802246,
      "learning_rate": 1.3729576661308902e-05,
      "loss": 2.5653,
      "step": 29368
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0327214002609253,
      "learning_rate": 1.372919462887001e-05,
      "loss": 2.3493,
      "step": 29369
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9733937382698059,
      "learning_rate": 1.3728812590109117e-05,
      "loss": 2.5159,
      "step": 29370
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.180519461631775,
      "learning_rate": 1.3728430545026876e-05,
      "loss": 2.2584,
      "step": 29371
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.838423490524292,
      "learning_rate": 1.3728048493623931e-05,
      "loss": 2.4781,
      "step": 29372
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9942593574523926,
      "learning_rate": 1.3727666435900933e-05,
      "loss": 2.2824,
      "step": 29373
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0121370553970337,
      "learning_rate": 1.3727284371858525e-05,
      "loss": 2.5333,
      "step": 29374
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9709362387657166,
      "learning_rate": 1.372690230149736e-05,
      "loss": 2.2312,
      "step": 29375
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9476384520530701,
      "learning_rate": 1.3726520224818085e-05,
      "loss": 2.566,
      "step": 29376
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9772353768348694,
      "learning_rate": 1.3726138141821342e-05,
      "loss": 2.4224,
      "step": 29377
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.097196102142334,
      "learning_rate": 1.3725756052507784e-05,
      "loss": 2.4042,
      "step": 29378
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0846195220947266,
      "learning_rate": 1.3725373956878059e-05,
      "loss": 2.6066,
      "step": 29379
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0225090980529785,
      "learning_rate": 1.3724991854932812e-05,
      "loss": 2.3113,
      "step": 29380
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0312066078186035,
      "learning_rate": 1.3724609746672693e-05,
      "loss": 2.3563,
      "step": 29381
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9519336223602295,
      "learning_rate": 1.372422763209835e-05,
      "loss": 2.4708,
      "step": 29382
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1002345085144043,
      "learning_rate": 1.3723845511210427e-05,
      "loss": 2.2404,
      "step": 29383
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9069981575012207,
      "learning_rate": 1.3723463384009575e-05,
      "loss": 2.5209,
      "step": 29384
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1568458080291748,
      "learning_rate": 1.3723081250496442e-05,
      "loss": 2.4892,
      "step": 29385
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.022586703300476,
      "learning_rate": 1.3722699110671675e-05,
      "loss": 2.3403,
      "step": 29386
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.060981273651123,
      "learning_rate": 1.3722316964535922e-05,
      "loss": 2.3013,
      "step": 29387
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0803776979446411,
      "learning_rate": 1.372193481208983e-05,
      "loss": 2.3524,
      "step": 29388
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0503634214401245,
      "learning_rate": 1.3721552653334049e-05,
      "loss": 2.3804,
      "step": 29389
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1040654182434082,
      "learning_rate": 1.3721170488269227e-05,
      "loss": 2.3185,
      "step": 29390
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.2082703113555908,
      "learning_rate": 1.3720788316896008e-05,
      "loss": 2.3074,
      "step": 29391
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.070799469947815,
      "learning_rate": 1.3720406139215042e-05,
      "loss": 2.2655,
      "step": 29392
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.7646976709365845,
      "learning_rate": 1.3720023955226975e-05,
      "loss": 2.4303,
      "step": 29393
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0552364587783813,
      "learning_rate": 1.3719641764932462e-05,
      "loss": 2.427,
      "step": 29394
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0777573585510254,
      "learning_rate": 1.3719259568332144e-05,
      "loss": 2.5038,
      "step": 29395
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0095607042312622,
      "learning_rate": 1.371887736542667e-05,
      "loss": 2.2888,
      "step": 29396
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9393212795257568,
      "learning_rate": 1.371849515621669e-05,
      "loss": 2.3835,
      "step": 29397
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0899146795272827,
      "learning_rate": 1.3718112940702848e-05,
      "loss": 2.3302,
      "step": 29398
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0692437887191772,
      "learning_rate": 1.3717730718885798e-05,
      "loss": 2.3873,
      "step": 29399
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9455182552337646,
      "learning_rate": 1.3717348490766183e-05,
      "loss": 2.3093,
      "step": 29400
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9928444027900696,
      "learning_rate": 1.3716966256344655e-05,
      "loss": 2.388,
      "step": 29401
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0460492372512817,
      "learning_rate": 1.3716584015621859e-05,
      "loss": 2.6671,
      "step": 29402
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1165691614151,
      "learning_rate": 1.371620176859844e-05,
      "loss": 2.3854,
      "step": 29403
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0565091371536255,
      "learning_rate": 1.3715819515275055e-05,
      "loss": 2.4464,
      "step": 29404
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9570465683937073,
      "learning_rate": 1.3715437255652343e-05,
      "loss": 2.3327,
      "step": 29405
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9956026673316956,
      "learning_rate": 1.3715054989730958e-05,
      "loss": 2.517,
      "step": 29406
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1373026371002197,
      "learning_rate": 1.3714672717511543e-05,
      "loss": 2.3918,
      "step": 29407
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.2719577550888062,
      "learning_rate": 1.3714290438994753e-05,
      "loss": 2.1405,
      "step": 29408
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0380394458770752,
      "learning_rate": 1.3713908154181228e-05,
      "loss": 2.4821,
      "step": 29409
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.169470191001892,
      "learning_rate": 1.3713525863071621e-05,
      "loss": 2.5623,
      "step": 29410
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1625046730041504,
      "learning_rate": 1.371314356566658e-05,
      "loss": 2.2906,
      "step": 29411
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.084416151046753,
      "learning_rate": 1.3712761261966752e-05,
      "loss": 2.3381,
      "step": 29412
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1663775444030762,
      "learning_rate": 1.3712378951972785e-05,
      "loss": 2.3937,
      "step": 29413
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9697396755218506,
      "learning_rate": 1.3711996635685328e-05,
      "loss": 2.4437,
      "step": 29414
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0734868049621582,
      "learning_rate": 1.3711614313105029e-05,
      "loss": 2.2259,
      "step": 29415
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1518584489822388,
      "learning_rate": 1.3711231984232534e-05,
      "loss": 2.3259,
      "step": 29416
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.968687891960144,
      "learning_rate": 1.3710849649068495e-05,
      "loss": 2.443,
      "step": 29417
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1755818128585815,
      "learning_rate": 1.3710467307613554e-05,
      "loss": 2.3072,
      "step": 29418
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9352905750274658,
      "learning_rate": 1.3710084959868363e-05,
      "loss": 2.4287,
      "step": 29419
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0347607135772705,
      "learning_rate": 1.3709702605833576e-05,
      "loss": 2.3475,
      "step": 29420
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1203937530517578,
      "learning_rate": 1.3709320245509834e-05,
      "loss": 2.3638,
      "step": 29421
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0648332834243774,
      "learning_rate": 1.3708937878897783e-05,
      "loss": 2.5844,
      "step": 29422
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9769183397293091,
      "learning_rate": 1.3708555505998079e-05,
      "loss": 2.3841,
      "step": 29423
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1099028587341309,
      "learning_rate": 1.3708173126811364e-05,
      "loss": 2.3774,
      "step": 29424
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1506065130233765,
      "learning_rate": 1.3707790741338288e-05,
      "loss": 2.3323,
      "step": 29425
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0799009799957275,
      "learning_rate": 1.3707408349579498e-05,
      "loss": 2.4948,
      "step": 29426
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1013520956039429,
      "learning_rate": 1.3707025951535648e-05,
      "loss": 2.2342,
      "step": 29427
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9861881732940674,
      "learning_rate": 1.3706643547207381e-05,
      "loss": 2.6148,
      "step": 29428
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1304857730865479,
      "learning_rate": 1.3706261136595346e-05,
      "loss": 2.3269,
      "step": 29429
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0743364095687866,
      "learning_rate": 1.3705878719700192e-05,
      "loss": 2.387,
      "step": 29430
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1552956104278564,
      "learning_rate": 1.3705496296522567e-05,
      "loss": 2.231,
      "step": 29431
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0606342554092407,
      "learning_rate": 1.370511386706312e-05,
      "loss": 2.1865,
      "step": 29432
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.016489863395691,
      "learning_rate": 1.3704731431322498e-05,
      "loss": 2.3475,
      "step": 29433
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1966015100479126,
      "learning_rate": 1.370434898930135e-05,
      "loss": 2.4856,
      "step": 29434
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.034287691116333,
      "learning_rate": 1.3703966541000326e-05,
      "loss": 2.335,
      "step": 29435
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0650910139083862,
      "learning_rate": 1.370358408642007e-05,
      "loss": 2.1976,
      "step": 29436
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9703786969184875,
      "learning_rate": 1.3703201625561234e-05,
      "loss": 2.254,
      "step": 29437
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.2036720514297485,
      "learning_rate": 1.3702819158424466e-05,
      "loss": 2.6684,
      "step": 29438
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1187822818756104,
      "learning_rate": 1.3702436685010411e-05,
      "loss": 2.3773,
      "step": 29439
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9940219521522522,
      "learning_rate": 1.3702054205319724e-05,
      "loss": 2.3422,
      "step": 29440
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0229147672653198,
      "learning_rate": 1.3701671719353051e-05,
      "loss": 2.4,
      "step": 29441
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.005100965499878,
      "learning_rate": 1.3701289227111036e-05,
      "loss": 2.5398,
      "step": 29442
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1722606420516968,
      "learning_rate": 1.370090672859433e-05,
      "loss": 2.487,
      "step": 29443
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.4691823720932007,
      "learning_rate": 1.3700524223803582e-05,
      "loss": 2.4402,
      "step": 29444
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.956784188747406,
      "learning_rate": 1.3700141712739443e-05,
      "loss": 2.3947,
      "step": 29445
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.016648530960083,
      "learning_rate": 1.3699759195402556e-05,
      "loss": 2.1311,
      "step": 29446
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0351269245147705,
      "learning_rate": 1.3699376671793574e-05,
      "loss": 2.4834,
      "step": 29447
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.082351803779602,
      "learning_rate": 1.3698994141913144e-05,
      "loss": 2.2492,
      "step": 29448
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9449290037155151,
      "learning_rate": 1.3698611605761914e-05,
      "loss": 2.3787,
      "step": 29449
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9706251621246338,
      "learning_rate": 1.369822906334053e-05,
      "loss": 2.4982,
      "step": 29450
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1337623596191406,
      "learning_rate": 1.3697846514649647e-05,
      "loss": 2.4845,
      "step": 29451
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.03691828250885,
      "learning_rate": 1.3697463959689907e-05,
      "loss": 2.2373,
      "step": 29452
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1543152332305908,
      "learning_rate": 1.3697081398461965e-05,
      "loss": 2.6168,
      "step": 29453
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0112075805664062,
      "learning_rate": 1.3696698830966464e-05,
      "loss": 2.4313,
      "step": 29454
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1956238746643066,
      "learning_rate": 1.3696316257204055e-05,
      "loss": 2.3979,
      "step": 29455
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.022916555404663,
      "learning_rate": 1.3695933677175383e-05,
      "loss": 2.4143,
      "step": 29456
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9717732071876526,
      "learning_rate": 1.3695551090881103e-05,
      "loss": 2.4762,
      "step": 29457
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0520154237747192,
      "learning_rate": 1.369516849832186e-05,
      "loss": 2.5172,
      "step": 29458
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.048093557357788,
      "learning_rate": 1.3694785899498299e-05,
      "loss": 2.4337,
      "step": 29459
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.090631127357483,
      "learning_rate": 1.3694403294411079e-05,
      "loss": 2.3327,
      "step": 29460
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0261420011520386,
      "learning_rate": 1.3694020683060838e-05,
      "loss": 2.6358,
      "step": 29461
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.2198683023452759,
      "learning_rate": 1.369363806544823e-05,
      "loss": 2.1459,
      "step": 29462
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0896958112716675,
      "learning_rate": 1.3693255441573903e-05,
      "loss": 2.2153,
      "step": 29463
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0394374132156372,
      "learning_rate": 1.3692872811438503e-05,
      "loss": 2.5496,
      "step": 29464
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1974058151245117,
      "learning_rate": 1.3692490175042678e-05,
      "loss": 2.3402,
      "step": 29465
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1228961944580078,
      "learning_rate": 1.3692107532387082e-05,
      "loss": 2.5273,
      "step": 29466
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0997148752212524,
      "learning_rate": 1.3691724883472361e-05,
      "loss": 2.6568,
      "step": 29467
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.979711651802063,
      "learning_rate": 1.3691342228299168e-05,
      "loss": 2.2881,
      "step": 29468
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9409821629524231,
      "learning_rate": 1.3690959566868141e-05,
      "loss": 2.304,
      "step": 29469
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0908130407333374,
      "learning_rate": 1.3690576899179938e-05,
      "loss": 2.3732,
      "step": 29470
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0776728391647339,
      "learning_rate": 1.3690194225235205e-05,
      "loss": 2.4341,
      "step": 29471
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0525850057601929,
      "learning_rate": 1.3689811545034587e-05,
      "loss": 2.2656,
      "step": 29472
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9873967170715332,
      "learning_rate": 1.368942885857874e-05,
      "loss": 2.4236,
      "step": 29473
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1469404697418213,
      "learning_rate": 1.3689046165868309e-05,
      "loss": 2.2987,
      "step": 29474
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9891238808631897,
      "learning_rate": 1.3688663466903943e-05,
      "loss": 2.3928,
      "step": 29475
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0775842666625977,
      "learning_rate": 1.3688280761686288e-05,
      "loss": 2.325,
      "step": 29476
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.2154868841171265,
      "learning_rate": 1.3687898050215997e-05,
      "loss": 2.6181,
      "step": 29477
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1220768690109253,
      "learning_rate": 1.3687515332493715e-05,
      "loss": 2.524,
      "step": 29478
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0043247938156128,
      "learning_rate": 1.3687132608520095e-05,
      "loss": 2.353,
      "step": 29479
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0312694311141968,
      "learning_rate": 1.3686749878295784e-05,
      "loss": 2.5834,
      "step": 29480
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9984686374664307,
      "learning_rate": 1.3686367141821433e-05,
      "loss": 2.4328,
      "step": 29481
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1150333881378174,
      "learning_rate": 1.3685984399097685e-05,
      "loss": 2.3548,
      "step": 29482
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9735502004623413,
      "learning_rate": 1.3685601650125193e-05,
      "loss": 2.2419,
      "step": 29483
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9749685525894165,
      "learning_rate": 1.3685218894904603e-05,
      "loss": 2.2649,
      "step": 29484
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1318118572235107,
      "learning_rate": 1.368483613343657e-05,
      "loss": 2.2544,
      "step": 29485
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1400755643844604,
      "learning_rate": 1.3684453365721737e-05,
      "loss": 2.5726,
      "step": 29486
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.129660964012146,
      "learning_rate": 1.3684070591760759e-05,
      "loss": 2.413,
      "step": 29487
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0570474863052368,
      "learning_rate": 1.3683687811554276e-05,
      "loss": 2.1838,
      "step": 29488
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9794824719429016,
      "learning_rate": 1.3683305025102941e-05,
      "loss": 2.2467,
      "step": 29489
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.3308155536651611,
      "learning_rate": 1.3682922232407405e-05,
      "loss": 2.4859,
      "step": 29490
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9985870718955994,
      "learning_rate": 1.3682539433468321e-05,
      "loss": 2.3386,
      "step": 29491
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.2003445625305176,
      "learning_rate": 1.3682156628286325e-05,
      "loss": 2.5267,
      "step": 29492
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0647521018981934,
      "learning_rate": 1.3681773816862077e-05,
      "loss": 2.3265,
      "step": 29493
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9979245662689209,
      "learning_rate": 1.3681390999196223e-05,
      "loss": 2.6387,
      "step": 29494
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.976516842842102,
      "learning_rate": 1.3681008175289408e-05,
      "loss": 2.1987,
      "step": 29495
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0698802471160889,
      "learning_rate": 1.3680625345142286e-05,
      "loss": 2.4657,
      "step": 29496
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9945709705352783,
      "learning_rate": 1.3680242508755503e-05,
      "loss": 2.1229,
      "step": 29497
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0514042377471924,
      "learning_rate": 1.367985966612971e-05,
      "loss": 2.5022,
      "step": 29498
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.150513768196106,
      "learning_rate": 1.3679476817265558e-05,
      "loss": 2.4288,
      "step": 29499
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.160093069076538,
      "learning_rate": 1.3679093962163692e-05,
      "loss": 2.6127,
      "step": 29500
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.145243763923645,
      "learning_rate": 1.3678711100824764e-05,
      "loss": 2.4686,
      "step": 29501
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0974996089935303,
      "learning_rate": 1.367832823324942e-05,
      "loss": 2.6175,
      "step": 29502
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1221809387207031,
      "learning_rate": 1.3677945359438309e-05,
      "loss": 2.4027,
      "step": 29503
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1576602458953857,
      "learning_rate": 1.3677562479392083e-05,
      "loss": 2.4788,
      "step": 29504
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9892082810401917,
      "learning_rate": 1.3677179593111392e-05,
      "loss": 2.4941,
      "step": 29505
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0645790100097656,
      "learning_rate": 1.367679670059688e-05,
      "loss": 2.3601,
      "step": 29506
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0784385204315186,
      "learning_rate": 1.36764138018492e-05,
      "loss": 2.4513,
      "step": 29507
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0336542129516602,
      "learning_rate": 1.3676030896869001e-05,
      "loss": 2.5091,
      "step": 29508
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.992672324180603,
      "learning_rate": 1.367564798565693e-05,
      "loss": 2.4387,
      "step": 29509
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0304129123687744,
      "learning_rate": 1.3675265068213637e-05,
      "loss": 2.4965,
      "step": 29510
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0650465488433838,
      "learning_rate": 1.3674882144539771e-05,
      "loss": 2.4582,
      "step": 29511
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0870174169540405,
      "learning_rate": 1.3674499214635984e-05,
      "loss": 2.5969,
      "step": 29512
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0643885135650635,
      "learning_rate": 1.367411627850292e-05,
      "loss": 2.4892,
      "step": 29513
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1843249797821045,
      "learning_rate": 1.3673733336141235e-05,
      "loss": 2.2574,
      "step": 29514
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0699306726455688,
      "learning_rate": 1.3673350387551572e-05,
      "loss": 2.6388,
      "step": 29515
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9036979675292969,
      "learning_rate": 1.3672967432734582e-05,
      "loss": 2.3,
      "step": 29516
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0624229907989502,
      "learning_rate": 1.3672584471690915e-05,
      "loss": 2.1455,
      "step": 29517
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9768807291984558,
      "learning_rate": 1.3672201504421219e-05,
      "loss": 2.1312,
      "step": 29518
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0096794366836548,
      "learning_rate": 1.3671818530926146e-05,
      "loss": 2.4778,
      "step": 29519
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.078816294670105,
      "learning_rate": 1.3671435551206343e-05,
      "loss": 2.4987,
      "step": 29520
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.052149772644043,
      "learning_rate": 1.3671052565262459e-05,
      "loss": 2.4304,
      "step": 29521
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.3221627473831177,
      "learning_rate": 1.3670669573095146e-05,
      "loss": 2.4764,
      "step": 29522
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0587728023529053,
      "learning_rate": 1.3670286574705047e-05,
      "loss": 2.4807,
      "step": 29523
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0742067098617554,
      "learning_rate": 1.3669903570092817e-05,
      "loss": 2.2469,
      "step": 29524
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0412945747375488,
      "learning_rate": 1.3669520559259104e-05,
      "loss": 2.4503,
      "step": 29525
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9892836809158325,
      "learning_rate": 1.3669137542204559e-05,
      "loss": 2.4786,
      "step": 29526
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0535390377044678,
      "learning_rate": 1.3668754518929827e-05,
      "loss": 2.3582,
      "step": 29527
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.139012336730957,
      "learning_rate": 1.366837148943556e-05,
      "loss": 2.3479,
      "step": 29528
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.2104889154434204,
      "learning_rate": 1.3667988453722408e-05,
      "loss": 2.3797,
      "step": 29529
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0780982971191406,
      "learning_rate": 1.3667605411791017e-05,
      "loss": 2.3248,
      "step": 29530
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0907107591629028,
      "learning_rate": 1.366722236364204e-05,
      "loss": 2.5588,
      "step": 29531
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.058728814125061,
      "learning_rate": 1.3666839309276125e-05,
      "loss": 2.428,
      "step": 29532
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1077423095703125,
      "learning_rate": 1.3666456248693922e-05,
      "loss": 2.2952,
      "step": 29533
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.252077341079712,
      "learning_rate": 1.366607318189608e-05,
      "loss": 2.4458,
      "step": 29534
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0701920986175537,
      "learning_rate": 1.3665690108883247e-05,
      "loss": 2.4416,
      "step": 29535
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1048543453216553,
      "learning_rate": 1.3665307029656074e-05,
      "loss": 2.4244,
      "step": 29536
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0307393074035645,
      "learning_rate": 1.3664923944215212e-05,
      "loss": 2.3116,
      "step": 29537
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.123862862586975,
      "learning_rate": 1.3664540852561305e-05,
      "loss": 2.4754,
      "step": 29538
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.006791353225708,
      "learning_rate": 1.3664157754695009e-05,
      "loss": 2.5182,
      "step": 29539
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1488761901855469,
      "learning_rate": 1.3663774650616973e-05,
      "loss": 2.6066,
      "step": 29540
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1674034595489502,
      "learning_rate": 1.3663391540327839e-05,
      "loss": 2.4716,
      "step": 29541
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1261566877365112,
      "learning_rate": 1.3663008423828261e-05,
      "loss": 2.2866,
      "step": 29542
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0368151664733887,
      "learning_rate": 1.366262530111889e-05,
      "loss": 2.3229,
      "step": 29543
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9831522703170776,
      "learning_rate": 1.3662242172200374e-05,
      "loss": 2.6888,
      "step": 29544
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.131963849067688,
      "learning_rate": 1.3661859037073363e-05,
      "loss": 2.4865,
      "step": 29545
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9693456292152405,
      "learning_rate": 1.366147589573851e-05,
      "loss": 2.5788,
      "step": 29546
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0263422727584839,
      "learning_rate": 1.366109274819646e-05,
      "loss": 2.4065,
      "step": 29547
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1329262256622314,
      "learning_rate": 1.366070959444786e-05,
      "loss": 2.4583,
      "step": 29548
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0638346672058105,
      "learning_rate": 1.3660326434493365e-05,
      "loss": 2.4384,
      "step": 29549
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.2047549486160278,
      "learning_rate": 1.3659943268333624e-05,
      "loss": 2.3732,
      "step": 29550
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.034114122390747,
      "learning_rate": 1.3659560095969285e-05,
      "loss": 2.5848,
      "step": 29551
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9389419555664062,
      "learning_rate": 1.3659176917400994e-05,
      "loss": 2.4514,
      "step": 29552
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0298923254013062,
      "learning_rate": 1.3658793732629407e-05,
      "loss": 2.2879,
      "step": 29553
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1797668933868408,
      "learning_rate": 1.3658410541655171e-05,
      "loss": 2.6114,
      "step": 29554
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0326175689697266,
      "learning_rate": 1.3658027344478934e-05,
      "loss": 2.3574,
      "step": 29555
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0171234607696533,
      "learning_rate": 1.3657644141101347e-05,
      "loss": 2.2129,
      "step": 29556
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0759779214859009,
      "learning_rate": 1.3657260931523062e-05,
      "loss": 2.3358,
      "step": 29557
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.2029142379760742,
      "learning_rate": 1.3656877715744723e-05,
      "loss": 2.4595,
      "step": 29558
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0930171012878418,
      "learning_rate": 1.3656494493766985e-05,
      "loss": 2.4112,
      "step": 29559
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.115478515625,
      "learning_rate": 1.3656111265590499e-05,
      "loss": 2.3532,
      "step": 29560
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.6563397645950317,
      "learning_rate": 1.3655728031215909e-05,
      "loss": 2.2431,
      "step": 29561
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1466666460037231,
      "learning_rate": 1.3655344790643865e-05,
      "loss": 2.2705,
      "step": 29562
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9993913769721985,
      "learning_rate": 1.3654961543875021e-05,
      "loss": 2.2671,
      "step": 29563
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.067444086074829,
      "learning_rate": 1.3654578290910023e-05,
      "loss": 2.4131,
      "step": 29564
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.2201000452041626,
      "learning_rate": 1.3654195031749522e-05,
      "loss": 2.4873,
      "step": 29565
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.2096571922302246,
      "learning_rate": 1.3653811766394172e-05,
      "loss": 2.4431,
      "step": 29566
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1260775327682495,
      "learning_rate": 1.3653428494844614e-05,
      "loss": 2.1923,
      "step": 29567
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0918315649032593,
      "learning_rate": 1.3653045217101505e-05,
      "loss": 2.345,
      "step": 29568
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1064136028289795,
      "learning_rate": 1.365266193316549e-05,
      "loss": 2.2942,
      "step": 29569
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.9952602386474609,
      "learning_rate": 1.3652278643037221e-05,
      "loss": 2.307,
      "step": 29570
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0201295614242554,
      "learning_rate": 1.365189534671735e-05,
      "loss": 2.3697,
      "step": 29571
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0665533542633057,
      "learning_rate": 1.3651512044206523e-05,
      "loss": 2.2916,
      "step": 29572
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1587849855422974,
      "learning_rate": 1.3651128735505395e-05,
      "loss": 2.3469,
      "step": 29573
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.2539210319519043,
      "learning_rate": 1.3650745420614608e-05,
      "loss": 2.303,
      "step": 29574
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0031503438949585,
      "learning_rate": 1.3650362099534818e-05,
      "loss": 2.4769,
      "step": 29575
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1191071271896362,
      "learning_rate": 1.364997877226667e-05,
      "loss": 2.4512,
      "step": 29576
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0832586288452148,
      "learning_rate": 1.364959543881082e-05,
      "loss": 2.4015,
      "step": 29577
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0135490894317627,
      "learning_rate": 1.3649212099167915e-05,
      "loss": 2.4086,
      "step": 29578
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.028398871421814,
      "learning_rate": 1.3648828753338602e-05,
      "loss": 2.3475,
      "step": 29579
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.2462811470031738,
      "learning_rate": 1.364844540132353e-05,
      "loss": 2.2765,
      "step": 29580
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.061763882637024,
      "learning_rate": 1.364806204312336e-05,
      "loss": 2.4194,
      "step": 29581
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0443416833877563,
      "learning_rate": 1.364767867873873e-05,
      "loss": 2.2578,
      "step": 29582
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.2336370944976807,
      "learning_rate": 1.3647295308170292e-05,
      "loss": 2.4571,
      "step": 29583
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.17472243309021,
      "learning_rate": 1.3646911931418701e-05,
      "loss": 2.2911,
      "step": 29584
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1763375997543335,
      "learning_rate": 1.3646528548484601e-05,
      "loss": 2.4676,
      "step": 29585
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1675026416778564,
      "learning_rate": 1.3646145159368649e-05,
      "loss": 2.5985,
      "step": 29586
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1317232847213745,
      "learning_rate": 1.3645761764071485e-05,
      "loss": 2.3054,
      "step": 29587
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0504084825515747,
      "learning_rate": 1.3645378362593769e-05,
      "loss": 2.5771,
      "step": 29588
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0941979885101318,
      "learning_rate": 1.3644994954936147e-05,
      "loss": 2.26,
      "step": 29589
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0636013746261597,
      "learning_rate": 1.3644611541099265e-05,
      "loss": 2.6896,
      "step": 29590
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1668729782104492,
      "learning_rate": 1.3644228121083779e-05,
      "loss": 2.3219,
      "step": 29591
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.3962713479995728,
      "learning_rate": 1.3643844694890334e-05,
      "loss": 2.3264,
      "step": 29592
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1870923042297363,
      "learning_rate": 1.3643461262519585e-05,
      "loss": 2.3842,
      "step": 29593
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0173099040985107,
      "learning_rate": 1.3643077823972179e-05,
      "loss": 2.4287,
      "step": 29594
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0470409393310547,
      "learning_rate": 1.3642694379248766e-05,
      "loss": 2.3031,
      "step": 29595
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1284816265106201,
      "learning_rate": 1.3642310928349998e-05,
      "loss": 2.5001,
      "step": 29596
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.059752345085144,
      "learning_rate": 1.3641927471276524e-05,
      "loss": 2.3533,
      "step": 29597
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.094780445098877,
      "learning_rate": 1.3641544008028992e-05,
      "loss": 2.4741,
      "step": 29598
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1410311460494995,
      "learning_rate": 1.3641160538608056e-05,
      "loss": 2.3162,
      "step": 29599
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1511096954345703,
      "learning_rate": 1.3640777063014363e-05,
      "loss": 2.3789,
      "step": 29600
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0968843698501587,
      "learning_rate": 1.3640393581248564e-05,
      "loss": 2.4155,
      "step": 29601
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0957849025726318,
      "learning_rate": 1.3640010093311307e-05,
      "loss": 2.3459,
      "step": 29602
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1803464889526367,
      "learning_rate": 1.3639626599203247e-05,
      "loss": 2.1543,
      "step": 29603
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.9476506114006042,
      "learning_rate": 1.363924309892503e-05,
      "loss": 2.5333,
      "step": 29604
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0479159355163574,
      "learning_rate": 1.3638859592477307e-05,
      "loss": 2.5329,
      "step": 29605
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0314478874206543,
      "learning_rate": 1.3638476079860732e-05,
      "loss": 2.5971,
      "step": 29606
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0621461868286133,
      "learning_rate": 1.3638092561075951e-05,
      "loss": 2.2615,
      "step": 29607
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0567007064819336,
      "learning_rate": 1.3637709036123612e-05,
      "loss": 2.3272,
      "step": 29608
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.2226463556289673,
      "learning_rate": 1.3637325505004373e-05,
      "loss": 2.3694,
      "step": 29609
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0134986639022827,
      "learning_rate": 1.3636941967718874e-05,
      "loss": 2.4137,
      "step": 29610
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.113420009613037,
      "learning_rate": 1.3636558424267774e-05,
      "loss": 2.1454,
      "step": 29611
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.088352918624878,
      "learning_rate": 1.3636174874651717e-05,
      "loss": 2.5026,
      "step": 29612
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.060132622718811,
      "learning_rate": 1.3635791318871361e-05,
      "loss": 2.454,
      "step": 29613
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0617773532867432,
      "learning_rate": 1.3635407756927348e-05,
      "loss": 2.3989,
      "step": 29614
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0665690898895264,
      "learning_rate": 1.3635024188820332e-05,
      "loss": 2.3445,
      "step": 29615
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0201882123947144,
      "learning_rate": 1.363464061455096e-05,
      "loss": 2.3032,
      "step": 29616
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.970453679561615,
      "learning_rate": 1.3634257034119886e-05,
      "loss": 2.3794,
      "step": 29617
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0089194774627686,
      "learning_rate": 1.3633873447527762e-05,
      "loss": 2.4716,
      "step": 29618
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1124310493469238,
      "learning_rate": 1.3633489854775234e-05,
      "loss": 2.2158,
      "step": 29619
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.9998841881752014,
      "learning_rate": 1.3633106255862956e-05,
      "loss": 2.2582,
      "step": 29620
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0960501432418823,
      "learning_rate": 1.3632722650791572e-05,
      "loss": 2.3636,
      "step": 29621
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1676865816116333,
      "learning_rate": 1.363233903956174e-05,
      "loss": 2.3846,
      "step": 29622
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0092333555221558,
      "learning_rate": 1.3631955422174104e-05,
      "loss": 2.3599,
      "step": 29623
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.2920259237289429,
      "learning_rate": 1.363157179862932e-05,
      "loss": 2.2575,
      "step": 29624
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.065256953239441,
      "learning_rate": 1.363118816892803e-05,
      "loss": 2.3144,
      "step": 29625
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.22010338306427,
      "learning_rate": 1.3630804533070898e-05,
      "loss": 2.713,
      "step": 29626
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0495065450668335,
      "learning_rate": 1.3630420891058563e-05,
      "loss": 2.2359,
      "step": 29627
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.089687705039978,
      "learning_rate": 1.3630037242891676e-05,
      "loss": 2.3775,
      "step": 29628
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0311092138290405,
      "learning_rate": 1.362965358857089e-05,
      "loss": 2.3252,
      "step": 29629
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1835507154464722,
      "learning_rate": 1.3629269928096859e-05,
      "loss": 2.4407,
      "step": 29630
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0610814094543457,
      "learning_rate": 1.3628886261470227e-05,
      "loss": 2.2729,
      "step": 29631
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0445510149002075,
      "learning_rate": 1.3628502588691648e-05,
      "loss": 2.4563,
      "step": 29632
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.05061674118042,
      "learning_rate": 1.3628118909761771e-05,
      "loss": 2.5421,
      "step": 29633
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0515645742416382,
      "learning_rate": 1.3627735224681246e-05,
      "loss": 2.1999,
      "step": 29634
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.096073031425476,
      "learning_rate": 1.3627351533450727e-05,
      "loss": 2.31,
      "step": 29635
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0762585401535034,
      "learning_rate": 1.3626967836070862e-05,
      "loss": 2.288,
      "step": 29636
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.991352379322052,
      "learning_rate": 1.3626584132542302e-05,
      "loss": 2.2738,
      "step": 29637
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.04151451587677,
      "learning_rate": 1.3626200422865695e-05,
      "loss": 2.3649,
      "step": 29638
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.9783952236175537,
      "learning_rate": 1.3625816707041692e-05,
      "loss": 2.4691,
      "step": 29639
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0053353309631348,
      "learning_rate": 1.362543298507095e-05,
      "loss": 2.2626,
      "step": 29640
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.9677678346633911,
      "learning_rate": 1.362504925695411e-05,
      "loss": 2.5674,
      "step": 29641
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0485423803329468,
      "learning_rate": 1.3624665522691829e-05,
      "loss": 2.4042,
      "step": 29642
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.126222848892212,
      "learning_rate": 1.3624281782284753e-05,
      "loss": 2.3729,
      "step": 29643
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0426641702651978,
      "learning_rate": 1.3623898035733536e-05,
      "loss": 2.39,
      "step": 29644
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1078826189041138,
      "learning_rate": 1.362351428303883e-05,
      "loss": 2.5126,
      "step": 29645
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.035241961479187,
      "learning_rate": 1.3623130524201282e-05,
      "loss": 2.2861,
      "step": 29646
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0855987071990967,
      "learning_rate": 1.3622746759221543e-05,
      "loss": 2.3425,
      "step": 29647
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.043033242225647,
      "learning_rate": 1.3622362988100264e-05,
      "loss": 2.3083,
      "step": 29648
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1245861053466797,
      "learning_rate": 1.3621979210838097e-05,
      "loss": 2.4036,
      "step": 29649
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.030568242073059,
      "learning_rate": 1.362159542743569e-05,
      "loss": 2.6189,
      "step": 29650
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1008610725402832,
      "learning_rate": 1.3621211637893697e-05,
      "loss": 2.5107,
      "step": 29651
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.035408854484558,
      "learning_rate": 1.3620827842212766e-05,
      "loss": 2.3324,
      "step": 29652
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.179307460784912,
      "learning_rate": 1.3620444040393547e-05,
      "loss": 2.4662,
      "step": 29653
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1338255405426025,
      "learning_rate": 1.3620060232436694e-05,
      "loss": 2.3491,
      "step": 29654
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.162139654159546,
      "learning_rate": 1.3619676418342856e-05,
      "loss": 2.3875,
      "step": 29655
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.101203441619873,
      "learning_rate": 1.3619292598112681e-05,
      "loss": 2.4223,
      "step": 29656
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.9691340327262878,
      "learning_rate": 1.3618908771746824e-05,
      "loss": 2.187,
      "step": 29657
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0989487171173096,
      "learning_rate": 1.3618524939245932e-05,
      "loss": 2.363,
      "step": 29658
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0596929788589478,
      "learning_rate": 1.361814110061066e-05,
      "loss": 2.4715,
      "step": 29659
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0448966026306152,
      "learning_rate": 1.3617757255841654e-05,
      "loss": 2.4114,
      "step": 29660
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.083297848701477,
      "learning_rate": 1.361737340493957e-05,
      "loss": 2.4914,
      "step": 29661
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.2548143863677979,
      "learning_rate": 1.3616989547905051e-05,
      "loss": 2.4735,
      "step": 29662
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.177262306213379,
      "learning_rate": 1.3616605684738754e-05,
      "loss": 2.4985,
      "step": 29663
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.92280113697052,
      "learning_rate": 1.3616221815441328e-05,
      "loss": 2.338,
      "step": 29664
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.9626956582069397,
      "learning_rate": 1.3615837940013428e-05,
      "loss": 2.4438,
      "step": 29665
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0493080615997314,
      "learning_rate": 1.3615454058455696e-05,
      "loss": 2.5485,
      "step": 29666
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.141326904296875,
      "learning_rate": 1.3615070170768789e-05,
      "loss": 2.5408,
      "step": 29667
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0722049474716187,
      "learning_rate": 1.3614686276953357e-05,
      "loss": 2.5441,
      "step": 29668
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1561912298202515,
      "learning_rate": 1.361430237701005e-05,
      "loss": 2.27,
      "step": 29669
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1509251594543457,
      "learning_rate": 1.3613918470939518e-05,
      "loss": 2.2915,
      "step": 29670
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.9729735851287842,
      "learning_rate": 1.361353455874241e-05,
      "loss": 2.4842,
      "step": 29671
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0893374681472778,
      "learning_rate": 1.3613150640419384e-05,
      "loss": 2.4617,
      "step": 29672
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.145339846611023,
      "learning_rate": 1.3612766715971083e-05,
      "loss": 2.3046,
      "step": 29673
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.9824026823043823,
      "learning_rate": 1.3612382785398164e-05,
      "loss": 2.4341,
      "step": 29674
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.9432182908058167,
      "learning_rate": 1.3611998848701272e-05,
      "loss": 2.5144,
      "step": 29675
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1479679346084595,
      "learning_rate": 1.3611614905881063e-05,
      "loss": 2.3764,
      "step": 29676
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1520642042160034,
      "learning_rate": 1.3611230956938184e-05,
      "loss": 2.3691,
      "step": 29677
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.003928303718567,
      "learning_rate": 1.361084700187329e-05,
      "loss": 2.7241,
      "step": 29678
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.9978674054145813,
      "learning_rate": 1.361046304068703e-05,
      "loss": 2.2699,
      "step": 29679
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.3934860229492188,
      "learning_rate": 1.3610079073380053e-05,
      "loss": 2.2722,
      "step": 29680
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.9840326309204102,
      "learning_rate": 1.360969509995301e-05,
      "loss": 2.4967,
      "step": 29681
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.9859689474105835,
      "learning_rate": 1.3609311120406557e-05,
      "loss": 2.5463,
      "step": 29682
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.202865481376648,
      "learning_rate": 1.3608927134741339e-05,
      "loss": 2.3117,
      "step": 29683
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.074402093887329,
      "learning_rate": 1.3608543142958009e-05,
      "loss": 2.515,
      "step": 29684
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1192800998687744,
      "learning_rate": 1.360815914505722e-05,
      "loss": 2.385,
      "step": 29685
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.999113142490387,
      "learning_rate": 1.3607775141039622e-05,
      "loss": 2.5878,
      "step": 29686
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.9951119422912598,
      "learning_rate": 1.3607391130905862e-05,
      "loss": 2.2394,
      "step": 29687
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1080940961837769,
      "learning_rate": 1.3607007114656597e-05,
      "loss": 2.3534,
      "step": 29688
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.2073092460632324,
      "learning_rate": 1.3606623092292475e-05,
      "loss": 2.3202,
      "step": 29689
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0003000497817993,
      "learning_rate": 1.3606239063814146e-05,
      "loss": 2.4393,
      "step": 29690
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0443812608718872,
      "learning_rate": 1.3605855029222262e-05,
      "loss": 2.3912,
      "step": 29691
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.015769362449646,
      "learning_rate": 1.3605470988517477e-05,
      "loss": 2.2697,
      "step": 29692
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1505604982376099,
      "learning_rate": 1.3605086941700439e-05,
      "loss": 2.2983,
      "step": 29693
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1047087907791138,
      "learning_rate": 1.3604702888771798e-05,
      "loss": 2.4831,
      "step": 29694
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0986918210983276,
      "learning_rate": 1.3604318829732207e-05,
      "loss": 2.4669,
      "step": 29695
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1429780721664429,
      "learning_rate": 1.3603934764582316e-05,
      "loss": 2.2842,
      "step": 29696
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1982951164245605,
      "learning_rate": 1.3603550693322777e-05,
      "loss": 2.3164,
      "step": 29697
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1700807809829712,
      "learning_rate": 1.3603166615954242e-05,
      "loss": 2.2158,
      "step": 29698
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.2988981008529663,
      "learning_rate": 1.360278253247736e-05,
      "loss": 2.442,
      "step": 29699
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.9991050958633423,
      "learning_rate": 1.3602398442892786e-05,
      "loss": 2.3706,
      "step": 29700
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.2064670324325562,
      "learning_rate": 1.3602014347201164e-05,
      "loss": 2.3533,
      "step": 29701
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0840747356414795,
      "learning_rate": 1.360163024540315e-05,
      "loss": 2.5055,
      "step": 29702
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.9729994535446167,
      "learning_rate": 1.3601246137499396e-05,
      "loss": 2.2547,
      "step": 29703
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0677785873413086,
      "learning_rate": 1.3600862023490551e-05,
      "loss": 2.4343,
      "step": 29704
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0236687660217285,
      "learning_rate": 1.3600477903377267e-05,
      "loss": 2.2158,
      "step": 29705
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1821993589401245,
      "learning_rate": 1.3600093777160198e-05,
      "loss": 2.1831,
      "step": 29706
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.2201273441314697,
      "learning_rate": 1.3599709644839987e-05,
      "loss": 2.2618,
      "step": 29707
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1097084283828735,
      "learning_rate": 1.3599325506417293e-05,
      "loss": 2.2836,
      "step": 29708
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0785688161849976,
      "learning_rate": 1.3598941361892763e-05,
      "loss": 2.1712,
      "step": 29709
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.9760375618934631,
      "learning_rate": 1.3598557211267052e-05,
      "loss": 2.3615,
      "step": 29710
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0880491733551025,
      "learning_rate": 1.3598173054540811e-05,
      "loss": 2.3957,
      "step": 29711
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.9966511726379395,
      "learning_rate": 1.3597788891714687e-05,
      "loss": 2.3291,
      "step": 29712
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.9980723857879639,
      "learning_rate": 1.3597404722789333e-05,
      "loss": 2.2324,
      "step": 29713
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.4055348634719849,
      "learning_rate": 1.3597020547765402e-05,
      "loss": 2.6254,
      "step": 29714
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0653053522109985,
      "learning_rate": 1.3596636366643544e-05,
      "loss": 2.3232,
      "step": 29715
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1671570539474487,
      "learning_rate": 1.359625217942441e-05,
      "loss": 2.3594,
      "step": 29716
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1223591566085815,
      "learning_rate": 1.359586798610865e-05,
      "loss": 2.2697,
      "step": 29717
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.9939529895782471,
      "learning_rate": 1.3595483786696924e-05,
      "loss": 2.3857,
      "step": 29718
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.2612698078155518,
      "learning_rate": 1.359509958118987e-05,
      "loss": 2.419,
      "step": 29719
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0105026960372925,
      "learning_rate": 1.3594715369588147e-05,
      "loss": 2.4622,
      "step": 29720
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.2129855155944824,
      "learning_rate": 1.3594331151892406e-05,
      "loss": 2.3875,
      "step": 29721
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0159465074539185,
      "learning_rate": 1.3593946928103297e-05,
      "loss": 2.29,
      "step": 29722
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.9802182912826538,
      "learning_rate": 1.3593562698221472e-05,
      "loss": 2.569,
      "step": 29723
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0841445922851562,
      "learning_rate": 1.3593178462247584e-05,
      "loss": 2.2939,
      "step": 29724
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1750770807266235,
      "learning_rate": 1.3592794220182281e-05,
      "loss": 2.3045,
      "step": 29725
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.130274772644043,
      "learning_rate": 1.3592409972026216e-05,
      "loss": 2.297,
      "step": 29726
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.144948124885559,
      "learning_rate": 1.3592025717780042e-05,
      "loss": 2.2751,
      "step": 29727
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.9613586664199829,
      "learning_rate": 1.3591641457444406e-05,
      "loss": 2.3005,
      "step": 29728
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0470277070999146,
      "learning_rate": 1.3591257191019964e-05,
      "loss": 2.2024,
      "step": 29729
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1586171388626099,
      "learning_rate": 1.3590872918507365e-05,
      "loss": 2.3431,
      "step": 29730
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1672402620315552,
      "learning_rate": 1.3590488639907264e-05,
      "loss": 2.3773,
      "step": 29731
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.2158066034317017,
      "learning_rate": 1.3590104355220307e-05,
      "loss": 2.3758,
      "step": 29732
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.037307858467102,
      "learning_rate": 1.3589720064447147e-05,
      "loss": 2.476,
      "step": 29733
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.9943093061447144,
      "learning_rate": 1.358933576758844e-05,
      "loss": 2.2294,
      "step": 29734
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.020634651184082,
      "learning_rate": 1.3588951464644828e-05,
      "loss": 2.3161,
      "step": 29735
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0297869443893433,
      "learning_rate": 1.3588567155616971e-05,
      "loss": 2.4451,
      "step": 29736
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.9957147836685181,
      "learning_rate": 1.3588182840505519e-05,
      "loss": 2.4517,
      "step": 29737
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.018054485321045,
      "learning_rate": 1.3587798519311126e-05,
      "loss": 2.4721,
      "step": 29738
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.155070424079895,
      "learning_rate": 1.3587414192034436e-05,
      "loss": 2.4584,
      "step": 29739
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.052852988243103,
      "learning_rate": 1.3587029858676104e-05,
      "loss": 2.4214,
      "step": 29740
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.9990193843841553,
      "learning_rate": 1.3586645519236785e-05,
      "loss": 2.43,
      "step": 29741
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.9907782673835754,
      "learning_rate": 1.3586261173717126e-05,
      "loss": 2.4932,
      "step": 29742
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1594226360321045,
      "learning_rate": 1.358587682211778e-05,
      "loss": 2.352,
      "step": 29743
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0299121141433716,
      "learning_rate": 1.35854924644394e-05,
      "loss": 2.3601,
      "step": 29744
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0740495920181274,
      "learning_rate": 1.3585108100682636e-05,
      "loss": 2.5191,
      "step": 29745
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0970274209976196,
      "learning_rate": 1.3584723730848142e-05,
      "loss": 2.4123,
      "step": 29746
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1671327352523804,
      "learning_rate": 1.3584339354936564e-05,
      "loss": 2.3999,
      "step": 29747
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.9903366565704346,
      "learning_rate": 1.3583954972948559e-05,
      "loss": 2.4025,
      "step": 29748
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.303807020187378,
      "learning_rate": 1.3583570584884776e-05,
      "loss": 2.2853,
      "step": 29749
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.63309645652771,
      "learning_rate": 1.3583186190745867e-05,
      "loss": 2.2455,
      "step": 29750
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0783159732818604,
      "learning_rate": 1.358280179053249e-05,
      "loss": 2.2929,
      "step": 29751
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0588475465774536,
      "learning_rate": 1.3582417384245286e-05,
      "loss": 2.7243,
      "step": 29752
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.002840518951416,
      "learning_rate": 1.3582032971884911e-05,
      "loss": 2.436,
      "step": 29753
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0921820402145386,
      "learning_rate": 1.3581648553452017e-05,
      "loss": 2.2894,
      "step": 29754
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.9830005168914795,
      "learning_rate": 1.3581264128947261e-05,
      "loss": 2.2454,
      "step": 29755
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.977150022983551,
      "learning_rate": 1.3580879698371284e-05,
      "loss": 2.1678,
      "step": 29756
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.054265022277832,
      "learning_rate": 1.3580495261724744e-05,
      "loss": 2.1246,
      "step": 29757
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.9375614523887634,
      "learning_rate": 1.3580110819008296e-05,
      "loss": 2.3361,
      "step": 29758
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1294746398925781,
      "learning_rate": 1.3579726370222586e-05,
      "loss": 2.3819,
      "step": 29759
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0398465394973755,
      "learning_rate": 1.3579341915368266e-05,
      "loss": 2.2831,
      "step": 29760
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.2145590782165527,
      "learning_rate": 1.357895745444599e-05,
      "loss": 2.5097,
      "step": 29761
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1518239974975586,
      "learning_rate": 1.357857298745641e-05,
      "loss": 2.4776,
      "step": 29762
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.018785834312439,
      "learning_rate": 1.3578188514400175e-05,
      "loss": 2.5365,
      "step": 29763
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0665615797042847,
      "learning_rate": 1.3577804035277941e-05,
      "loss": 2.4917,
      "step": 29764
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.04734468460083,
      "learning_rate": 1.3577419550090355e-05,
      "loss": 2.4992,
      "step": 29765
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0937753915786743,
      "learning_rate": 1.3577035058838073e-05,
      "loss": 2.4489,
      "step": 29766
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0444024801254272,
      "learning_rate": 1.3576650561521744e-05,
      "loss": 2.3559,
      "step": 29767
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0061767101287842,
      "learning_rate": 1.3576266058142023e-05,
      "loss": 2.3712,
      "step": 29768
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0428639650344849,
      "learning_rate": 1.3575881548699557e-05,
      "loss": 2.0769,
      "step": 29769
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1271944046020508,
      "learning_rate": 1.3575497033195e-05,
      "loss": 2.3328,
      "step": 29770
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.9890061616897583,
      "learning_rate": 1.357511251162901e-05,
      "loss": 2.4049,
      "step": 29771
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1224173307418823,
      "learning_rate": 1.3574727984002227e-05,
      "loss": 2.4262,
      "step": 29772
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0238066911697388,
      "learning_rate": 1.3574343450315311e-05,
      "loss": 2.1884,
      "step": 29773
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0050872564315796,
      "learning_rate": 1.3573958910568913e-05,
      "loss": 2.3876,
      "step": 29774
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.9540329575538635,
      "learning_rate": 1.3573574364763684e-05,
      "loss": 2.5254,
      "step": 29775
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.165332555770874,
      "learning_rate": 1.3573189812900273e-05,
      "loss": 2.422,
      "step": 29776
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.065676212310791,
      "learning_rate": 1.357280525497934e-05,
      "loss": 2.5532,
      "step": 29777
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.234795093536377,
      "learning_rate": 1.3572420691001529e-05,
      "loss": 2.3115,
      "step": 29778
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1645398139953613,
      "learning_rate": 1.3572036120967496e-05,
      "loss": 2.2523,
      "step": 29779
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1069258451461792,
      "learning_rate": 1.3571651544877889e-05,
      "loss": 2.3287,
      "step": 29780
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0437780618667603,
      "learning_rate": 1.3571266962733362e-05,
      "loss": 2.4091,
      "step": 29781
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.9388931393623352,
      "learning_rate": 1.357088237453457e-05,
      "loss": 2.4157,
      "step": 29782
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0881150960922241,
      "learning_rate": 1.3570497780282164e-05,
      "loss": 2.6166,
      "step": 29783
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1626667976379395,
      "learning_rate": 1.3570113179976795e-05,
      "loss": 2.3417,
      "step": 29784
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.162597894668579,
      "learning_rate": 1.3569728573619109e-05,
      "loss": 2.2662,
      "step": 29785
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.082854986190796,
      "learning_rate": 1.3569343961209768e-05,
      "loss": 2.4444,
      "step": 29786
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.003332495689392,
      "learning_rate": 1.3568959342749418e-05,
      "loss": 2.4487,
      "step": 29787
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1241382360458374,
      "learning_rate": 1.3568574718238714e-05,
      "loss": 2.3457,
      "step": 29788
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0045431852340698,
      "learning_rate": 1.3568190087678304e-05,
      "loss": 2.7006,
      "step": 29789
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0796310901641846,
      "learning_rate": 1.3567805451068842e-05,
      "loss": 2.6521,
      "step": 29790
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0748798847198486,
      "learning_rate": 1.3567420808410987e-05,
      "loss": 2.486,
      "step": 29791
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.031059980392456,
      "learning_rate": 1.356703615970538e-05,
      "loss": 2.462,
      "step": 29792
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0498871803283691,
      "learning_rate": 1.3566651504952679e-05,
      "loss": 2.3809,
      "step": 29793
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0195221900939941,
      "learning_rate": 1.3566266844153533e-05,
      "loss": 2.4831,
      "step": 29794
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0321969985961914,
      "learning_rate": 1.3565882177308598e-05,
      "loss": 2.204,
      "step": 29795
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1466493606567383,
      "learning_rate": 1.3565497504418523e-05,
      "loss": 2.3236,
      "step": 29796
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.029473066329956,
      "learning_rate": 1.3565112825483966e-05,
      "loss": 2.0439,
      "step": 29797
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0695961713790894,
      "learning_rate": 1.356472814050557e-05,
      "loss": 2.311,
      "step": 29798
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.9905434846878052,
      "learning_rate": 1.3564343449483992e-05,
      "loss": 2.3508,
      "step": 29799
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1135668754577637,
      "learning_rate": 1.3563958752419883e-05,
      "loss": 2.3725,
      "step": 29800
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0655142068862915,
      "learning_rate": 1.3563574049313896e-05,
      "loss": 2.1252,
      "step": 29801
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0613086223602295,
      "learning_rate": 1.3563189340166684e-05,
      "loss": 2.4164,
      "step": 29802
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0605965852737427,
      "learning_rate": 1.35628046249789e-05,
      "loss": 2.4115,
      "step": 29803
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0825458765029907,
      "learning_rate": 1.3562419903751194e-05,
      "loss": 2.275,
      "step": 29804
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0089391469955444,
      "learning_rate": 1.3562035176484218e-05,
      "loss": 2.5871,
      "step": 29805
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0090157985687256,
      "learning_rate": 1.3561650443178625e-05,
      "loss": 2.3554,
      "step": 29806
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1354793310165405,
      "learning_rate": 1.3561265703835068e-05,
      "loss": 2.4741,
      "step": 29807
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0671305656433105,
      "learning_rate": 1.3560880958454195e-05,
      "loss": 2.3851,
      "step": 29808
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0823289155960083,
      "learning_rate": 1.3560496207036664e-05,
      "loss": 2.3518,
      "step": 29809
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0914937257766724,
      "learning_rate": 1.3560111449583126e-05,
      "loss": 2.368,
      "step": 29810
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.9981400966644287,
      "learning_rate": 1.3559726686094232e-05,
      "loss": 2.3856,
      "step": 29811
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0595163106918335,
      "learning_rate": 1.3559341916570632e-05,
      "loss": 2.2062,
      "step": 29812
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0060824155807495,
      "learning_rate": 1.3558957141012982e-05,
      "loss": 2.5252,
      "step": 29813
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0342389345169067,
      "learning_rate": 1.3558572359421937e-05,
      "loss": 2.513,
      "step": 29814
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0875394344329834,
      "learning_rate": 1.355818757179814e-05,
      "loss": 2.3618,
      "step": 29815
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0205976963043213,
      "learning_rate": 1.3557802778142251e-05,
      "loss": 2.3827,
      "step": 29816
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.102262020111084,
      "learning_rate": 1.3557417978454922e-05,
      "loss": 2.4486,
      "step": 29817
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0372471809387207,
      "learning_rate": 1.35570331727368e-05,
      "loss": 2.4938,
      "step": 29818
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0519989728927612,
      "learning_rate": 1.355664836098854e-05,
      "loss": 2.4644,
      "step": 29819
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.119290828704834,
      "learning_rate": 1.3556263543210797e-05,
      "loss": 2.3449,
      "step": 29820
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.103750228881836,
      "learning_rate": 1.3555878719404222e-05,
      "loss": 2.5469,
      "step": 29821
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0750858783721924,
      "learning_rate": 1.3555493889569465e-05,
      "loss": 2.2331,
      "step": 29822
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.2077100276947021,
      "learning_rate": 1.3555109053707182e-05,
      "loss": 2.4028,
      "step": 29823
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1036372184753418,
      "learning_rate": 1.3554724211818022e-05,
      "loss": 2.2963,
      "step": 29824
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.9920743107795715,
      "learning_rate": 1.355433936390264e-05,
      "loss": 2.4155,
      "step": 29825
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0150219202041626,
      "learning_rate": 1.3553954509961687e-05,
      "loss": 2.4264,
      "step": 29826
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0121338367462158,
      "learning_rate": 1.3553569649995815e-05,
      "loss": 2.3391,
      "step": 29827
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0631933212280273,
      "learning_rate": 1.3553184784005677e-05,
      "loss": 2.4722,
      "step": 29828
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.150410771369934,
      "learning_rate": 1.3552799911991927e-05,
      "loss": 2.4329,
      "step": 29829
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.9621747136116028,
      "learning_rate": 1.3552415033955217e-05,
      "loss": 2.4249,
      "step": 29830
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0031663179397583,
      "learning_rate": 1.3552030149896199e-05,
      "loss": 2.4135,
      "step": 29831
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.095359444618225,
      "learning_rate": 1.3551645259815522e-05,
      "loss": 2.3305,
      "step": 29832
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0624092817306519,
      "learning_rate": 1.3551260363713844e-05,
      "loss": 2.5172,
      "step": 29833
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.093855381011963,
      "learning_rate": 1.3550875461591815e-05,
      "loss": 2.2084,
      "step": 29834
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0413858890533447,
      "learning_rate": 1.3550490553450089e-05,
      "loss": 2.6013,
      "step": 29835
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1035712957382202,
      "learning_rate": 1.3550105639289312e-05,
      "loss": 2.4756,
      "step": 29836
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0772061347961426,
      "learning_rate": 1.3549720719110149e-05,
      "loss": 2.5591,
      "step": 29837
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.2243603467941284,
      "learning_rate": 1.3549335792913242e-05,
      "loss": 2.3994,
      "step": 29838
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.064971685409546,
      "learning_rate": 1.3548950860699246e-05,
      "loss": 2.2988,
      "step": 29839
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0052838325500488,
      "learning_rate": 1.3548565922468814e-05,
      "loss": 2.315,
      "step": 29840
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.9780650734901428,
      "learning_rate": 1.35481809782226e-05,
      "loss": 2.4728,
      "step": 29841
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.020317554473877,
      "learning_rate": 1.3547796027961257e-05,
      "loss": 2.3683,
      "step": 29842
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.158373475074768,
      "learning_rate": 1.3547411071685435e-05,
      "loss": 2.1101,
      "step": 29843
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.060901403427124,
      "learning_rate": 1.3547026109395786e-05,
      "loss": 2.5175,
      "step": 29844
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1377140283584595,
      "learning_rate": 1.3546641141092969e-05,
      "loss": 2.3544,
      "step": 29845
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.288160800933838,
      "learning_rate": 1.3546256166777629e-05,
      "loss": 2.4087,
      "step": 29846
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1161764860153198,
      "learning_rate": 1.3545871186450421e-05,
      "loss": 2.5955,
      "step": 29847
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.053079605102539,
      "learning_rate": 1.3545486200111999e-05,
      "loss": 2.3913,
      "step": 29848
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0366281270980835,
      "learning_rate": 1.3545101207763014e-05,
      "loss": 2.3568,
      "step": 29849
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0882065296173096,
      "learning_rate": 1.3544716209404124e-05,
      "loss": 2.2619,
      "step": 29850
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.010668396949768,
      "learning_rate": 1.3544331205035971e-05,
      "loss": 2.524,
      "step": 29851
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0614386796951294,
      "learning_rate": 1.3543946194659218e-05,
      "loss": 2.3112,
      "step": 29852
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.085415005683899,
      "learning_rate": 1.3543561178274511e-05,
      "loss": 2.3625,
      "step": 29853
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0841373205184937,
      "learning_rate": 1.3543176155882508e-05,
      "loss": 2.5278,
      "step": 29854
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0295994281768799,
      "learning_rate": 1.3542791127483856e-05,
      "loss": 2.2688,
      "step": 29855
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.035620927810669,
      "learning_rate": 1.3542406093079215e-05,
      "loss": 2.4518,
      "step": 29856
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1359399557113647,
      "learning_rate": 1.3542021052669231e-05,
      "loss": 2.2598,
      "step": 29857
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.2528692483901978,
      "learning_rate": 1.354163600625456e-05,
      "loss": 2.1829,
      "step": 29858
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0237996578216553,
      "learning_rate": 1.3541250953835854e-05,
      "loss": 2.4032,
      "step": 29859
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1758638620376587,
      "learning_rate": 1.3540865895413764e-05,
      "loss": 2.2311,
      "step": 29860
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0462490320205688,
      "learning_rate": 1.3540480830988946e-05,
      "loss": 2.443,
      "step": 29861
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0411252975463867,
      "learning_rate": 1.3540095760562051e-05,
      "loss": 2.4832,
      "step": 29862
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0220783948898315,
      "learning_rate": 1.3539710684133733e-05,
      "loss": 2.4145,
      "step": 29863
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.187743067741394,
      "learning_rate": 1.3539325601704644e-05,
      "loss": 2.4278,
      "step": 29864
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1684651374816895,
      "learning_rate": 1.3538940513275438e-05,
      "loss": 2.3142,
      "step": 29865
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0258231163024902,
      "learning_rate": 1.3538555418846764e-05,
      "loss": 2.34,
      "step": 29866
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1199665069580078,
      "learning_rate": 1.353817031841928e-05,
      "loss": 2.3207,
      "step": 29867
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.078465223312378,
      "learning_rate": 1.3537785211993634e-05,
      "loss": 2.4398,
      "step": 29868
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.4678466320037842,
      "learning_rate": 1.3537400099570482e-05,
      "loss": 2.3061,
      "step": 29869
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.9847422242164612,
      "learning_rate": 1.3537014981150478e-05,
      "loss": 2.1566,
      "step": 29870
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.3248558044433594,
      "learning_rate": 1.3536629856734271e-05,
      "loss": 2.4834,
      "step": 29871
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0511106252670288,
      "learning_rate": 1.3536244726322515e-05,
      "loss": 2.2096,
      "step": 29872
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.3195233345031738,
      "learning_rate": 1.3535859589915867e-05,
      "loss": 2.5308,
      "step": 29873
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0720634460449219,
      "learning_rate": 1.3535474447514975e-05,
      "loss": 2.2274,
      "step": 29874
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.9504157304763794,
      "learning_rate": 1.3535089299120493e-05,
      "loss": 2.4173,
      "step": 29875
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.059152364730835,
      "learning_rate": 1.3534704144733077e-05,
      "loss": 2.6363,
      "step": 29876
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1530721187591553,
      "learning_rate": 1.3534318984353374e-05,
      "loss": 2.5247,
      "step": 29877
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0477374792099,
      "learning_rate": 1.3533933817982043e-05,
      "loss": 2.4884,
      "step": 29878
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.068434715270996,
      "learning_rate": 1.3533548645619732e-05,
      "loss": 2.059,
      "step": 29879
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1404460668563843,
      "learning_rate": 1.3533163467267096e-05,
      "loss": 2.4073,
      "step": 29880
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0650243759155273,
      "learning_rate": 1.353277828292479e-05,
      "loss": 2.2006,
      "step": 29881
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.064245581626892,
      "learning_rate": 1.3532393092593466e-05,
      "loss": 2.3541,
      "step": 29882
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1159179210662842,
      "learning_rate": 1.3532007896273775e-05,
      "loss": 2.5356,
      "step": 29883
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0570234060287476,
      "learning_rate": 1.3531622693966373e-05,
      "loss": 2.3093,
      "step": 29884
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.2194491624832153,
      "learning_rate": 1.3531237485671909e-05,
      "loss": 2.3898,
      "step": 29885
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.153795599937439,
      "learning_rate": 1.353085227139104e-05,
      "loss": 2.3047,
      "step": 29886
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0325281620025635,
      "learning_rate": 1.3530467051124417e-05,
      "loss": 2.1476,
      "step": 29887
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1070661544799805,
      "learning_rate": 1.3530081824872693e-05,
      "loss": 2.5329,
      "step": 29888
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0104126930236816,
      "learning_rate": 1.3529696592636523e-05,
      "loss": 2.4307,
      "step": 29889
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1026233434677124,
      "learning_rate": 1.3529311354416557e-05,
      "loss": 2.4032,
      "step": 29890
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1237901449203491,
      "learning_rate": 1.352892611021345e-05,
      "loss": 2.4753,
      "step": 29891
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.052223801612854,
      "learning_rate": 1.3528540860027854e-05,
      "loss": 2.3477,
      "step": 29892
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1467697620391846,
      "learning_rate": 1.3528155603860424e-05,
      "loss": 2.4177,
      "step": 29893
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.2698235511779785,
      "learning_rate": 1.3527770341711813e-05,
      "loss": 2.346,
      "step": 29894
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.102048635482788,
      "learning_rate": 1.3527385073582671e-05,
      "loss": 2.2642,
      "step": 29895
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.159743070602417,
      "learning_rate": 1.3526999799473657e-05,
      "loss": 2.3032,
      "step": 29896
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.056715965270996,
      "learning_rate": 1.3526614519385418e-05,
      "loss": 2.4939,
      "step": 29897
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0616062879562378,
      "learning_rate": 1.3526229233318609e-05,
      "loss": 2.2417,
      "step": 29898
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0250898599624634,
      "learning_rate": 1.3525843941273883e-05,
      "loss": 2.4776,
      "step": 29899
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0647575855255127,
      "learning_rate": 1.3525458643251894e-05,
      "loss": 2.5976,
      "step": 29900
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0415467023849487,
      "learning_rate": 1.3525073339253299e-05,
      "loss": 2.2803,
      "step": 29901
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0775178670883179,
      "learning_rate": 1.3524688029278742e-05,
      "loss": 2.2966,
      "step": 29902
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.9876747727394104,
      "learning_rate": 1.3524302713328885e-05,
      "loss": 2.1538,
      "step": 29903
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.2458239793777466,
      "learning_rate": 1.3523917391404378e-05,
      "loss": 2.3918,
      "step": 29904
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.9448537230491638,
      "learning_rate": 1.3523532063505873e-05,
      "loss": 2.4493,
      "step": 29905
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.9832664728164673,
      "learning_rate": 1.3523146729634022e-05,
      "loss": 2.4667,
      "step": 29906
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.9887376427650452,
      "learning_rate": 1.3522761389789482e-05,
      "loss": 2.3478,
      "step": 29907
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1442841291427612,
      "learning_rate": 1.3522376043972906e-05,
      "loss": 2.6743,
      "step": 29908
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.057348608970642,
      "learning_rate": 1.3521990692184948e-05,
      "loss": 2.432,
      "step": 29909
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0978585481643677,
      "learning_rate": 1.3521605334426253e-05,
      "loss": 2.4017,
      "step": 29910
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.9991734027862549,
      "learning_rate": 1.3521219970697483e-05,
      "loss": 2.5054,
      "step": 29911
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.9833701252937317,
      "learning_rate": 1.3520834600999291e-05,
      "loss": 2.2975,
      "step": 29912
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.019482135772705,
      "learning_rate": 1.3520449225332325e-05,
      "loss": 2.3255,
      "step": 29913
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1138873100280762,
      "learning_rate": 1.3520063843697243e-05,
      "loss": 2.5786,
      "step": 29914
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1168744564056396,
      "learning_rate": 1.3519678456094696e-05,
      "loss": 2.2402,
      "step": 29915
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.973487913608551,
      "learning_rate": 1.351929306252534e-05,
      "loss": 2.5708,
      "step": 29916
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1955077648162842,
      "learning_rate": 1.3518907662989826e-05,
      "loss": 2.3163,
      "step": 29917
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0636155605316162,
      "learning_rate": 1.3518522257488807e-05,
      "loss": 2.2636,
      "step": 29918
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0064562559127808,
      "learning_rate": 1.3518136846022935e-05,
      "loss": 2.5287,
      "step": 29919
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1308716535568237,
      "learning_rate": 1.3517751428592866e-05,
      "loss": 2.5557,
      "step": 29920
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1698033809661865,
      "learning_rate": 1.3517366005199257e-05,
      "loss": 2.3004,
      "step": 29921
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0072077512741089,
      "learning_rate": 1.3516980575842755e-05,
      "loss": 2.275,
      "step": 29922
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.9625997543334961,
      "learning_rate": 1.3516595140524016e-05,
      "loss": 2.4104,
      "step": 29923
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0220054388046265,
      "learning_rate": 1.3516209699243691e-05,
      "loss": 2.5496,
      "step": 29924
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0727248191833496,
      "learning_rate": 1.3515824252002438e-05,
      "loss": 2.4363,
      "step": 29925
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.2182916402816772,
      "learning_rate": 1.3515438798800906e-05,
      "loss": 2.3876,
      "step": 29926
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1215413808822632,
      "learning_rate": 1.3515053339639753e-05,
      "loss": 2.2369,
      "step": 29927
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.066508412361145,
      "learning_rate": 1.3514667874519626e-05,
      "loss": 2.3534,
      "step": 29928
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0045520067214966,
      "learning_rate": 1.3514282403441187e-05,
      "loss": 2.2811,
      "step": 29929
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0671323537826538,
      "learning_rate": 1.3513896926405083e-05,
      "loss": 2.3441,
      "step": 29930
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0179541110992432,
      "learning_rate": 1.351351144341197e-05,
      "loss": 2.5962,
      "step": 29931
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0051733255386353,
      "learning_rate": 1.3513125954462503e-05,
      "loss": 2.2186,
      "step": 29932
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.170361876487732,
      "learning_rate": 1.351274045955733e-05,
      "loss": 2.4393,
      "step": 29933
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1239181756973267,
      "learning_rate": 1.3512354958697106e-05,
      "loss": 2.303,
      "step": 29934
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.024689793586731,
      "learning_rate": 1.3511969451882491e-05,
      "loss": 2.4167,
      "step": 29935
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.113639235496521,
      "learning_rate": 1.3511583939114132e-05,
      "loss": 2.4647,
      "step": 29936
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0756484270095825,
      "learning_rate": 1.3511198420392684e-05,
      "loss": 2.4008,
      "step": 29937
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0883105993270874,
      "learning_rate": 1.35108128957188e-05,
      "loss": 2.4631,
      "step": 29938
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.9803740978240967,
      "learning_rate": 1.3510427365093138e-05,
      "loss": 2.4405,
      "step": 29939
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0872441530227661,
      "learning_rate": 1.3510041828516344e-05,
      "loss": 2.4981,
      "step": 29940
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1098469495773315,
      "learning_rate": 1.3509656285989077e-05,
      "loss": 2.412,
      "step": 29941
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.167004108428955,
      "learning_rate": 1.3509270737511993e-05,
      "loss": 2.5808,
      "step": 29942
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0182582139968872,
      "learning_rate": 1.3508885183085739e-05,
      "loss": 2.37,
      "step": 29943
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.17987859249115,
      "learning_rate": 1.350849962271097e-05,
      "loss": 2.4299,
      "step": 29944
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.2590214014053345,
      "learning_rate": 1.3508114056388344e-05,
      "loss": 2.3422,
      "step": 29945
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1174260377883911,
      "learning_rate": 1.3507728484118509e-05,
      "loss": 2.1392,
      "step": 29946
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.2564820051193237,
      "learning_rate": 1.3507342905902125e-05,
      "loss": 2.6263,
      "step": 29947
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.024525761604309,
      "learning_rate": 1.350695732173984e-05,
      "loss": 2.3657,
      "step": 29948
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.08169686794281,
      "learning_rate": 1.3506571731632312e-05,
      "loss": 2.3562,
      "step": 29949
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0172113180160522,
      "learning_rate": 1.3506186135580191e-05,
      "loss": 2.3187,
      "step": 29950
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1003574132919312,
      "learning_rate": 1.3505800533584133e-05,
      "loss": 2.2615,
      "step": 29951
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0208683013916016,
      "learning_rate": 1.3505414925644789e-05,
      "loss": 2.4295,
      "step": 29952
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.2035006284713745,
      "learning_rate": 1.3505029311762815e-05,
      "loss": 2.5677,
      "step": 29953
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.023673176765442,
      "learning_rate": 1.3504643691938865e-05,
      "loss": 2.3221,
      "step": 29954
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.015647530555725,
      "learning_rate": 1.3504258066173594e-05,
      "loss": 2.5449,
      "step": 29955
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.9674081206321716,
      "learning_rate": 1.350387243446765e-05,
      "loss": 2.2346,
      "step": 29956
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1038929224014282,
      "learning_rate": 1.3503486796821693e-05,
      "loss": 2.4898,
      "step": 29957
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1104573011398315,
      "learning_rate": 1.3503101153236375e-05,
      "loss": 2.4341,
      "step": 29958
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.9955593347549438,
      "learning_rate": 1.3502715503712347e-05,
      "loss": 2.2995,
      "step": 29959
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.108319640159607,
      "learning_rate": 1.3502329848250268e-05,
      "loss": 2.2923,
      "step": 29960
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0047307014465332,
      "learning_rate": 1.3501944186850788e-05,
      "loss": 2.4198,
      "step": 29961
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0834424495697021,
      "learning_rate": 1.350155851951456e-05,
      "loss": 2.4238,
      "step": 29962
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0924110412597656,
      "learning_rate": 1.350117284624224e-05,
      "loss": 2.536,
      "step": 29963
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0596592426300049,
      "learning_rate": 1.3500787167034481e-05,
      "loss": 2.4833,
      "step": 29964
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.062259554862976,
      "learning_rate": 1.3500401481891936e-05,
      "loss": 2.3644,
      "step": 29965
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.045691967010498,
      "learning_rate": 1.3500015790815261e-05,
      "loss": 2.3969,
      "step": 29966
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0914098024368286,
      "learning_rate": 1.3499630093805107e-05,
      "loss": 2.6204,
      "step": 29967
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.257370114326477,
      "learning_rate": 1.3499244390862132e-05,
      "loss": 2.4262,
      "step": 29968
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0152852535247803,
      "learning_rate": 1.3498858681986986e-05,
      "loss": 2.4518,
      "step": 29969
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0533840656280518,
      "learning_rate": 1.3498472967180325e-05,
      "loss": 2.1134,
      "step": 29970
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0171449184417725,
      "learning_rate": 1.3498087246442801e-05,
      "loss": 2.3941,
      "step": 29971
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0894516706466675,
      "learning_rate": 1.3497701519775069e-05,
      "loss": 2.6508,
      "step": 29972
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1110970973968506,
      "learning_rate": 1.3497315787177782e-05,
      "loss": 2.2786,
      "step": 29973
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0825947523117065,
      "learning_rate": 1.3496930048651597e-05,
      "loss": 2.3884,
      "step": 29974
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0430313348770142,
      "learning_rate": 1.3496544304197166e-05,
      "loss": 2.6123,
      "step": 29975
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.9938409924507141,
      "learning_rate": 1.3496158553815142e-05,
      "loss": 2.418,
      "step": 29976
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0681806802749634,
      "learning_rate": 1.349577279750618e-05,
      "loss": 2.412,
      "step": 29977
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.3307815790176392,
      "learning_rate": 1.349538703527093e-05,
      "loss": 2.4103,
      "step": 29978
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.2208176851272583,
      "learning_rate": 1.3495001267110054e-05,
      "loss": 2.5895,
      "step": 29979
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1587625741958618,
      "learning_rate": 1.34946154930242e-05,
      "loss": 2.2997,
      "step": 29980
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.079067587852478,
      "learning_rate": 1.3494229713014023e-05,
      "loss": 2.3467,
      "step": 29981
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.163053035736084,
      "learning_rate": 1.3493843927080182e-05,
      "loss": 2.5059,
      "step": 29982
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0416324138641357,
      "learning_rate": 1.3493458135223324e-05,
      "loss": 2.364,
      "step": 29983
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1879911422729492,
      "learning_rate": 1.3493072337444102e-05,
      "loss": 2.2359,
      "step": 29984
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.049952507019043,
      "learning_rate": 1.3492686533743177e-05,
      "loss": 2.473,
      "step": 29985
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.9731563925743103,
      "learning_rate": 1.3492300724121199e-05,
      "loss": 2.3797,
      "step": 29986
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.9496220946311951,
      "learning_rate": 1.3491914908578822e-05,
      "loss": 2.5183,
      "step": 29987
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.2211376428604126,
      "learning_rate": 1.3491529087116706e-05,
      "loss": 2.4896,
      "step": 29988
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0601727962493896,
      "learning_rate": 1.3491143259735495e-05,
      "loss": 2.5345,
      "step": 29989
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.301674485206604,
      "learning_rate": 1.349075742643585e-05,
      "loss": 2.5641,
      "step": 29990
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0978413820266724,
      "learning_rate": 1.3490371587218423e-05,
      "loss": 2.3416,
      "step": 29991
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.046766757965088,
      "learning_rate": 1.3489985742083866e-05,
      "loss": 2.2574,
      "step": 29992
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0529991388320923,
      "learning_rate": 1.3489599891032837e-05,
      "loss": 2.4526,
      "step": 29993
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0441399812698364,
      "learning_rate": 1.3489214034065988e-05,
      "loss": 2.2667,
      "step": 29994
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0703741312026978,
      "learning_rate": 1.3488828171183975e-05,
      "loss": 2.3964,
      "step": 29995
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.132285475730896,
      "learning_rate": 1.3488442302387448e-05,
      "loss": 2.3589,
      "step": 29996
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.9739112854003906,
      "learning_rate": 1.3488056427677066e-05,
      "loss": 2.2749,
      "step": 29997
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0414576530456543,
      "learning_rate": 1.348767054705348e-05,
      "loss": 2.4133,
      "step": 29998
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1726917028427124,
      "learning_rate": 1.3487284660517344e-05,
      "loss": 2.3697,
      "step": 29999
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.9629577994346619,
      "learning_rate": 1.3486898768069315e-05,
      "loss": 2.3702,
      "step": 30000
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1328566074371338,
      "learning_rate": 1.3486512869710047e-05,
      "loss": 2.3741,
      "step": 30001
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.072094440460205,
      "learning_rate": 1.348612696544019e-05,
      "loss": 2.6007,
      "step": 30002
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0164825916290283,
      "learning_rate": 1.3485741055260402e-05,
      "loss": 2.4645,
      "step": 30003
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0802079439163208,
      "learning_rate": 1.3485355139171335e-05,
      "loss": 2.3243,
      "step": 30004
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0189883708953857,
      "learning_rate": 1.3484969217173644e-05,
      "loss": 2.3981,
      "step": 30005
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.9994475841522217,
      "learning_rate": 1.3484583289267985e-05,
      "loss": 2.4502,
      "step": 30006
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.010480284690857,
      "learning_rate": 1.348419735545501e-05,
      "loss": 2.3375,
      "step": 30007
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1611253023147583,
      "learning_rate": 1.3483811415735378e-05,
      "loss": 2.2289,
      "step": 30008
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0121411085128784,
      "learning_rate": 1.3483425470109737e-05,
      "loss": 2.4097,
      "step": 30009
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1161788702011108,
      "learning_rate": 1.3483039518578742e-05,
      "loss": 2.5609,
      "step": 30010
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1229430437088013,
      "learning_rate": 1.3482653561143051e-05,
      "loss": 2.3203,
      "step": 30011
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1141245365142822,
      "learning_rate": 1.3482267597803316e-05,
      "loss": 2.3257,
      "step": 30012
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0663460493087769,
      "learning_rate": 1.3481881628560189e-05,
      "loss": 2.3092,
      "step": 30013
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.2199738025665283,
      "learning_rate": 1.3481495653414332e-05,
      "loss": 2.5822,
      "step": 30014
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.9747195839881897,
      "learning_rate": 1.3481109672366393e-05,
      "loss": 2.189,
      "step": 30015
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0650383234024048,
      "learning_rate": 1.3480723685417025e-05,
      "loss": 2.3052,
      "step": 30016
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1994364261627197,
      "learning_rate": 1.3480337692566887e-05,
      "loss": 2.3754,
      "step": 30017
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.2035788297653198,
      "learning_rate": 1.3479951693816631e-05,
      "loss": 2.2345,
      "step": 30018
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.2201091051101685,
      "learning_rate": 1.3479565689166912e-05,
      "loss": 2.4126,
      "step": 30019
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.2501994371414185,
      "learning_rate": 1.3479179678618383e-05,
      "loss": 2.5718,
      "step": 30020
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0482914447784424,
      "learning_rate": 1.3478793662171701e-05,
      "loss": 2.6696,
      "step": 30021
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0847678184509277,
      "learning_rate": 1.347840763982752e-05,
      "loss": 2.5439,
      "step": 30022
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0965261459350586,
      "learning_rate": 1.347802161158649e-05,
      "loss": 2.4382,
      "step": 30023
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0642064809799194,
      "learning_rate": 1.347763557744927e-05,
      "loss": 2.4965,
      "step": 30024
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.30931556224823,
      "learning_rate": 1.3477249537416512e-05,
      "loss": 2.2536,
      "step": 30025
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.070814847946167,
      "learning_rate": 1.3476863491488874e-05,
      "loss": 2.3698,
      "step": 30026
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1975553035736084,
      "learning_rate": 1.3476477439667006e-05,
      "loss": 2.5946,
      "step": 30027
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1409235000610352,
      "learning_rate": 1.3476091381951567e-05,
      "loss": 2.4759,
      "step": 30028
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.9667251110076904,
      "learning_rate": 1.3475705318343209e-05,
      "loss": 2.3285,
      "step": 30029
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.002262830734253,
      "learning_rate": 1.3475319248842584e-05,
      "loss": 2.2807,
      "step": 30030
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.3117254972457886,
      "learning_rate": 1.3474933173450349e-05,
      "loss": 2.3874,
      "step": 30031
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1563657522201538,
      "learning_rate": 1.347454709216716e-05,
      "loss": 2.5945,
      "step": 30032
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0702095031738281,
      "learning_rate": 1.347416100499367e-05,
      "loss": 2.3369,
      "step": 30033
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.2097861766815186,
      "learning_rate": 1.3473774911930533e-05,
      "loss": 2.4708,
      "step": 30034
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0529876947402954,
      "learning_rate": 1.3473388812978405e-05,
      "loss": 2.6188,
      "step": 30035
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0522632598876953,
      "learning_rate": 1.3473002708137938e-05,
      "loss": 2.2469,
      "step": 30036
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1765981912612915,
      "learning_rate": 1.3472616597409789e-05,
      "loss": 2.5409,
      "step": 30037
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0517468452453613,
      "learning_rate": 1.347223048079461e-05,
      "loss": 2.715,
      "step": 30038
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.9563713073730469,
      "learning_rate": 1.347184435829306e-05,
      "loss": 2.2796,
      "step": 30039
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0690637826919556,
      "learning_rate": 1.3471458229905789e-05,
      "loss": 2.5305,
      "step": 30040
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.109609842300415,
      "learning_rate": 1.3471072095633458e-05,
      "loss": 2.4318,
      "step": 30041
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1166024208068848,
      "learning_rate": 1.347068595547671e-05,
      "loss": 2.4705,
      "step": 30042
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.104192852973938,
      "learning_rate": 1.3470299809436212e-05,
      "loss": 2.2746,
      "step": 30043
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1726378202438354,
      "learning_rate": 1.346991365751261e-05,
      "loss": 2.2662,
      "step": 30044
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.330682396888733,
      "learning_rate": 1.3469527499706563e-05,
      "loss": 2.2597,
      "step": 30045
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.046265721321106,
      "learning_rate": 1.3469141336018726e-05,
      "loss": 2.4917,
      "step": 30046
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.041622281074524,
      "learning_rate": 1.3468755166449752e-05,
      "loss": 2.2191,
      "step": 30047
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1592905521392822,
      "learning_rate": 1.3468368991000293e-05,
      "loss": 2.4085,
      "step": 30048
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.016559362411499,
      "learning_rate": 1.346798280967101e-05,
      "loss": 2.1857,
      "step": 30049
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.2489104270935059,
      "learning_rate": 1.3467596622462553e-05,
      "loss": 2.2971,
      "step": 30050
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.204727292060852,
      "learning_rate": 1.3467210429375576e-05,
      "loss": 2.4382,
      "step": 30051
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.9917922019958496,
      "learning_rate": 1.3466824230410738e-05,
      "loss": 2.3858,
      "step": 30052
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0009944438934326,
      "learning_rate": 1.3466438025568692e-05,
      "loss": 2.2405,
      "step": 30053
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1131365299224854,
      "learning_rate": 1.3466051814850092e-05,
      "loss": 2.6422,
      "step": 30054
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.112781047821045,
      "learning_rate": 1.3465665598255591e-05,
      "loss": 2.5364,
      "step": 30055
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0698888301849365,
      "learning_rate": 1.3465279375785847e-05,
      "loss": 2.2665,
      "step": 30056
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0148183107376099,
      "learning_rate": 1.3464893147441512e-05,
      "loss": 2.4167,
      "step": 30057
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1011667251586914,
      "learning_rate": 1.346450691322324e-05,
      "loss": 2.494,
      "step": 30058
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.9546380043029785,
      "learning_rate": 1.3464120673131691e-05,
      "loss": 2.2168,
      "step": 30059
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0454338788986206,
      "learning_rate": 1.3463734427167517e-05,
      "loss": 2.4039,
      "step": 30060
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1588770151138306,
      "learning_rate": 1.3463348175331371e-05,
      "loss": 2.2133,
      "step": 30061
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.046563744544983,
      "learning_rate": 1.346296191762391e-05,
      "loss": 2.5406,
      "step": 30062
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.038842797279358,
      "learning_rate": 1.346257565404579e-05,
      "loss": 2.2478,
      "step": 30063
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.9951362609863281,
      "learning_rate": 1.3462189384597659e-05,
      "loss": 2.4679,
      "step": 30064
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1092392206192017,
      "learning_rate": 1.3461803109280178e-05,
      "loss": 2.2119,
      "step": 30065
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0100668668746948,
      "learning_rate": 1.3461416828094e-05,
      "loss": 2.5197,
      "step": 30066
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.9982636570930481,
      "learning_rate": 1.3461030541039785e-05,
      "loss": 2.1959,
      "step": 30067
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0567293167114258,
      "learning_rate": 1.346064424811818e-05,
      "loss": 2.6094,
      "step": 30068
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1008405685424805,
      "learning_rate": 1.3460257949329844e-05,
      "loss": 2.1443,
      "step": 30069
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1545803546905518,
      "learning_rate": 1.3459871644675428e-05,
      "loss": 2.5722,
      "step": 30070
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0575419664382935,
      "learning_rate": 1.3459485334155593e-05,
      "loss": 2.1862,
      "step": 30071
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.4269349575042725,
      "learning_rate": 1.3459099017770989e-05,
      "loss": 2.5404,
      "step": 30072
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0424957275390625,
      "learning_rate": 1.3458712695522273e-05,
      "loss": 2.5272,
      "step": 30073
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0413743257522583,
      "learning_rate": 1.3458326367410105e-05,
      "loss": 2.4976,
      "step": 30074
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.9856486320495605,
      "learning_rate": 1.3457940033435128e-05,
      "loss": 2.3032,
      "step": 30075
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0099501609802246,
      "learning_rate": 1.3457553693598004e-05,
      "loss": 2.3785,
      "step": 30076
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.9378125667572021,
      "learning_rate": 1.345716734789939e-05,
      "loss": 2.3642,
      "step": 30077
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0496448278427124,
      "learning_rate": 1.3456780996339938e-05,
      "loss": 2.2777,
      "step": 30078
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0639424324035645,
      "learning_rate": 1.3456394638920302e-05,
      "loss": 2.3931,
      "step": 30079
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.9510096907615662,
      "learning_rate": 1.345600827564114e-05,
      "loss": 2.5215,
      "step": 30080
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0519769191741943,
      "learning_rate": 1.3455621906503105e-05,
      "loss": 2.4299,
      "step": 30081
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0400763750076294,
      "learning_rate": 1.3455235531506849e-05,
      "loss": 2.5255,
      "step": 30082
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0774046182632446,
      "learning_rate": 1.3454849150653034e-05,
      "loss": 2.3678,
      "step": 30083
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.095987319946289,
      "learning_rate": 1.345446276394231e-05,
      "loss": 2.1679,
      "step": 30084
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0374435186386108,
      "learning_rate": 1.3454076371375333e-05,
      "loss": 2.3849,
      "step": 30085
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.110640048980713,
      "learning_rate": 1.345368997295276e-05,
      "loss": 2.4556,
      "step": 30086
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0745961666107178,
      "learning_rate": 1.3453303568675244e-05,
      "loss": 2.35,
      "step": 30087
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0150041580200195,
      "learning_rate": 1.3452917158543439e-05,
      "loss": 2.5284,
      "step": 30088
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1258997917175293,
      "learning_rate": 1.3452530742558004e-05,
      "loss": 2.222,
      "step": 30089
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0172615051269531,
      "learning_rate": 1.345214432071959e-05,
      "loss": 2.3147,
      "step": 30090
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1093112230300903,
      "learning_rate": 1.3451757893028852e-05,
      "loss": 2.4592,
      "step": 30091
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.110298991203308,
      "learning_rate": 1.3451371459486448e-05,
      "loss": 2.6516,
      "step": 30092
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0992451906204224,
      "learning_rate": 1.3450985020093035e-05,
      "loss": 2.3799,
      "step": 30093
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0625221729278564,
      "learning_rate": 1.3450598574849263e-05,
      "loss": 2.4786,
      "step": 30094
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.164339542388916,
      "learning_rate": 1.3450212123755791e-05,
      "loss": 2.4427,
      "step": 30095
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.9401519298553467,
      "learning_rate": 1.344982566681327e-05,
      "loss": 2.3827,
      "step": 30096
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0115342140197754,
      "learning_rate": 1.3449439204022356e-05,
      "loss": 2.3739,
      "step": 30097
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1384280920028687,
      "learning_rate": 1.344905273538371e-05,
      "loss": 2.5223,
      "step": 30098
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0593384504318237,
      "learning_rate": 1.344866626089798e-05,
      "loss": 2.3672,
      "step": 30099
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0229878425598145,
      "learning_rate": 1.3448279780565825e-05,
      "loss": 2.41,
      "step": 30100
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.008312463760376,
      "learning_rate": 1.3447893294387901e-05,
      "loss": 2.3886,
      "step": 30101
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.104425072669983,
      "learning_rate": 1.3447506802364857e-05,
      "loss": 2.2705,
      "step": 30102
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.2562823295593262,
      "learning_rate": 1.3447120304497357e-05,
      "loss": 2.3978,
      "step": 30103
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0956701040267944,
      "learning_rate": 1.344673380078605e-05,
      "loss": 2.2975,
      "step": 30104
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0713908672332764,
      "learning_rate": 1.3446347291231592e-05,
      "loss": 2.3758,
      "step": 30105
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1419397592544556,
      "learning_rate": 1.3445960775834643e-05,
      "loss": 2.2843,
      "step": 30106
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.3147118091583252,
      "learning_rate": 1.344557425459585e-05,
      "loss": 2.3867,
      "step": 30107
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.110880970954895,
      "learning_rate": 1.3445187727515876e-05,
      "loss": 2.3466,
      "step": 30108
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0619242191314697,
      "learning_rate": 1.3444801194595374e-05,
      "loss": 2.3723,
      "step": 30109
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0170761346817017,
      "learning_rate": 1.3444414655834996e-05,
      "loss": 2.3704,
      "step": 30110
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0152453184127808,
      "learning_rate": 1.34440281112354e-05,
      "loss": 2.3602,
      "step": 30111
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0856212377548218,
      "learning_rate": 1.344364156079724e-05,
      "loss": 2.53,
      "step": 30112
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0544334650039673,
      "learning_rate": 1.3443255004521175e-05,
      "loss": 2.7008,
      "step": 30113
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0479789972305298,
      "learning_rate": 1.3442868442407855e-05,
      "loss": 2.4931,
      "step": 30114
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0604827404022217,
      "learning_rate": 1.3442481874457938e-05,
      "loss": 2.2475,
      "step": 30115
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0116453170776367,
      "learning_rate": 1.344209530067208e-05,
      "loss": 2.2775,
      "step": 30116
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1369246244430542,
      "learning_rate": 1.3441708721050937e-05,
      "loss": 2.512,
      "step": 30117
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.9095317721366882,
      "learning_rate": 1.344132213559516e-05,
      "loss": 2.4203,
      "step": 30118
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0934886932373047,
      "learning_rate": 1.344093554430541e-05,
      "loss": 2.3921,
      "step": 30119
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.9653845429420471,
      "learning_rate": 1.344054894718234e-05,
      "loss": 2.3724,
      "step": 30120
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.9459600448608398,
      "learning_rate": 1.3440162344226602e-05,
      "loss": 2.2994,
      "step": 30121
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.9283085465431213,
      "learning_rate": 1.3439775735438856e-05,
      "loss": 2.2279,
      "step": 30122
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.013543963432312,
      "learning_rate": 1.3439389120819755e-05,
      "loss": 2.5265,
      "step": 30123
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.066663384437561,
      "learning_rate": 1.3439002500369958e-05,
      "loss": 2.3364,
      "step": 30124
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.349625587463379,
      "learning_rate": 1.3438615874090115e-05,
      "loss": 2.3628,
      "step": 30125
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0490214824676514,
      "learning_rate": 1.3438229241980885e-05,
      "loss": 2.3329,
      "step": 30126
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0135267972946167,
      "learning_rate": 1.3437842604042925e-05,
      "loss": 2.4196,
      "step": 30127
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0143359899520874,
      "learning_rate": 1.3437455960276887e-05,
      "loss": 2.327,
      "step": 30128
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0581177473068237,
      "learning_rate": 1.3437069310683425e-05,
      "loss": 2.3986,
      "step": 30129
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0369995832443237,
      "learning_rate": 1.3436682655263198e-05,
      "loss": 2.4923,
      "step": 30130
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.9904788136482239,
      "learning_rate": 1.3436295994016862e-05,
      "loss": 2.2984,
      "step": 30131
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.024755835533142,
      "learning_rate": 1.3435909326945069e-05,
      "loss": 2.252,
      "step": 30132
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0232563018798828,
      "learning_rate": 1.3435522654048478e-05,
      "loss": 2.2989,
      "step": 30133
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.088573932647705,
      "learning_rate": 1.3435135975327743e-05,
      "loss": 2.3935,
      "step": 30134
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1788365840911865,
      "learning_rate": 1.343474929078352e-05,
      "loss": 2.3419,
      "step": 30135
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0529273748397827,
      "learning_rate": 1.3434362600416461e-05,
      "loss": 2.6228,
      "step": 30136
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.068907380104065,
      "learning_rate": 1.343397590422723e-05,
      "loss": 2.371,
      "step": 30137
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0149625539779663,
      "learning_rate": 1.3433589202216473e-05,
      "loss": 2.2936,
      "step": 30138
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.9877691268920898,
      "learning_rate": 1.3433202494384851e-05,
      "loss": 2.3844,
      "step": 30139
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.108465552330017,
      "learning_rate": 1.343281578073302e-05,
      "loss": 2.3268,
      "step": 30140
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0719529390335083,
      "learning_rate": 1.3432429061261632e-05,
      "loss": 2.3075,
      "step": 30141
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0793020725250244,
      "learning_rate": 1.3432042335971345e-05,
      "loss": 2.6132,
      "step": 30142
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0176922082901,
      "learning_rate": 1.3431655604862815e-05,
      "loss": 2.5043,
      "step": 30143
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1701847314834595,
      "learning_rate": 1.3431268867936693e-05,
      "loss": 2.448,
      "step": 30144
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.227405309677124,
      "learning_rate": 1.3430882125193641e-05,
      "loss": 2.433,
      "step": 30145
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0265544652938843,
      "learning_rate": 1.3430495376634314e-05,
      "loss": 2.3694,
      "step": 30146
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.9818238615989685,
      "learning_rate": 1.3430108622259362e-05,
      "loss": 2.5747,
      "step": 30147
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1366997957229614,
      "learning_rate": 1.3429721862069448e-05,
      "loss": 2.4553,
      "step": 30148
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.32663893699646,
      "learning_rate": 1.342933509606522e-05,
      "loss": 2.5374,
      "step": 30149
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.9634657502174377,
      "learning_rate": 1.3428948324247337e-05,
      "loss": 2.5976,
      "step": 30150
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.197134256362915,
      "learning_rate": 1.3428561546616458e-05,
      "loss": 2.261,
      "step": 30151
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.040888786315918,
      "learning_rate": 1.3428174763173237e-05,
      "loss": 2.486,
      "step": 30152
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1767992973327637,
      "learning_rate": 1.3427787973918327e-05,
      "loss": 2.2517,
      "step": 30153
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0348790884017944,
      "learning_rate": 1.3427401178852386e-05,
      "loss": 2.4053,
      "step": 30154
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0764626264572144,
      "learning_rate": 1.342701437797607e-05,
      "loss": 2.5113,
      "step": 30155
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.9929742813110352,
      "learning_rate": 1.342662757129003e-05,
      "loss": 2.4786,
      "step": 30156
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0189541578292847,
      "learning_rate": 1.3426240758794928e-05,
      "loss": 2.5457,
      "step": 30157
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0091946125030518,
      "learning_rate": 1.3425853940491418e-05,
      "loss": 2.3613,
      "step": 30158
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0876102447509766,
      "learning_rate": 1.3425467116380155e-05,
      "loss": 2.578,
      "step": 30159
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1379904747009277,
      "learning_rate": 1.3425080286461796e-05,
      "loss": 2.4951,
      "step": 30160
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.2000608444213867,
      "learning_rate": 1.3424693450736993e-05,
      "loss": 2.548,
      "step": 30161
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0526978969573975,
      "learning_rate": 1.3424306609206405e-05,
      "loss": 2.4994,
      "step": 30162
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0061017274856567,
      "learning_rate": 1.342391976187069e-05,
      "loss": 2.5116,
      "step": 30163
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1135209798812866,
      "learning_rate": 1.3423532908730499e-05,
      "loss": 2.4763,
      "step": 30164
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0931977033615112,
      "learning_rate": 1.342314604978649e-05,
      "loss": 2.3322,
      "step": 30165
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.2093218564987183,
      "learning_rate": 1.342275918503932e-05,
      "loss": 2.3739,
      "step": 30166
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1486775875091553,
      "learning_rate": 1.342237231448964e-05,
      "loss": 2.0026,
      "step": 30167
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.140801191329956,
      "learning_rate": 1.3421985438138114e-05,
      "loss": 2.3149,
      "step": 30168
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.2561434507369995,
      "learning_rate": 1.3421598555985391e-05,
      "loss": 2.4803,
      "step": 30169
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0116982460021973,
      "learning_rate": 1.342121166803213e-05,
      "loss": 2.3782,
      "step": 30170
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0423306226730347,
      "learning_rate": 1.3420824774278984e-05,
      "loss": 2.467,
      "step": 30171
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.015618920326233,
      "learning_rate": 1.3420437874726612e-05,
      "loss": 2.6641,
      "step": 30172
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0158066749572754,
      "learning_rate": 1.3420050969375671e-05,
      "loss": 2.3197,
      "step": 30173
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0292348861694336,
      "learning_rate": 1.3419664058226811e-05,
      "loss": 2.2318,
      "step": 30174
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0487828254699707,
      "learning_rate": 1.3419277141280694e-05,
      "loss": 2.2841,
      "step": 30175
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0543477535247803,
      "learning_rate": 1.3418890218537972e-05,
      "loss": 2.4266,
      "step": 30176
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0228432416915894,
      "learning_rate": 1.3418503289999306e-05,
      "loss": 2.3147,
      "step": 30177
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1763144731521606,
      "learning_rate": 1.3418116355665345e-05,
      "loss": 2.3801,
      "step": 30178
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0856406688690186,
      "learning_rate": 1.3417729415536749e-05,
      "loss": 2.5767,
      "step": 30179
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0043355226516724,
      "learning_rate": 1.3417342469614174e-05,
      "loss": 2.2688,
      "step": 30180
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.024998426437378,
      "learning_rate": 1.3416955517898276e-05,
      "loss": 2.352,
      "step": 30181
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.8444557189941406,
      "learning_rate": 1.3416568560389709e-05,
      "loss": 2.2686,
      "step": 30182
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1171741485595703,
      "learning_rate": 1.341618159708913e-05,
      "loss": 2.6145,
      "step": 30183
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0505282878875732,
      "learning_rate": 1.3415794627997196e-05,
      "loss": 2.5013,
      "step": 30184
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.9958783984184265,
      "learning_rate": 1.3415407653114562e-05,
      "loss": 2.3393,
      "step": 30185
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1398261785507202,
      "learning_rate": 1.3415020672441887e-05,
      "loss": 2.3578,
      "step": 30186
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0163158178329468,
      "learning_rate": 1.3414633685979823e-05,
      "loss": 2.3819,
      "step": 30187
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.9730129241943359,
      "learning_rate": 1.3414246693729025e-05,
      "loss": 2.0198,
      "step": 30188
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.062070369720459,
      "learning_rate": 1.3413859695690155e-05,
      "loss": 2.294,
      "step": 30189
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.2080137729644775,
      "learning_rate": 1.3413472691863863e-05,
      "loss": 2.474,
      "step": 30190
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0331960916519165,
      "learning_rate": 1.3413085682250808e-05,
      "loss": 2.4213,
      "step": 30191
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1038062572479248,
      "learning_rate": 1.3412698666851647e-05,
      "loss": 2.288,
      "step": 30192
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1766338348388672,
      "learning_rate": 1.3412311645667036e-05,
      "loss": 2.5751,
      "step": 30193
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.2527025938034058,
      "learning_rate": 1.3411924618697629e-05,
      "loss": 2.3912,
      "step": 30194
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.97208172082901,
      "learning_rate": 1.341153758594408e-05,
      "loss": 2.3052,
      "step": 30195
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.4059128761291504,
      "learning_rate": 1.3411150547407054e-05,
      "loss": 2.2138,
      "step": 30196
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.9304572939872742,
      "learning_rate": 1.3410763503087198e-05,
      "loss": 2.5559,
      "step": 30197
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0704822540283203,
      "learning_rate": 1.3410376452985172e-05,
      "loss": 2.5128,
      "step": 30198
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0794113874435425,
      "learning_rate": 1.3409989397101633e-05,
      "loss": 2.2388,
      "step": 30199
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0190478563308716,
      "learning_rate": 1.3409602335437236e-05,
      "loss": 2.3047,
      "step": 30200
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.260799765586853,
      "learning_rate": 1.3409215267992632e-05,
      "loss": 2.603,
      "step": 30201
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.9594661593437195,
      "learning_rate": 1.3408828194768487e-05,
      "loss": 2.1524,
      "step": 30202
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1266160011291504,
      "learning_rate": 1.3408441115765449e-05,
      "loss": 2.4713,
      "step": 30203
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.9897514581680298,
      "learning_rate": 1.340805403098418e-05,
      "loss": 2.403,
      "step": 30204
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.339214563369751,
      "learning_rate": 1.3407666940425332e-05,
      "loss": 2.1937,
      "step": 30205
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0572227239608765,
      "learning_rate": 1.3407279844089567e-05,
      "loss": 2.3688,
      "step": 30206
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.9886097311973572,
      "learning_rate": 1.3406892741977533e-05,
      "loss": 2.2447,
      "step": 30207
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0114539861679077,
      "learning_rate": 1.3406505634089892e-05,
      "loss": 2.3834,
      "step": 30208
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0823553800582886,
      "learning_rate": 1.3406118520427298e-05,
      "loss": 2.5654,
      "step": 30209
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1183708906173706,
      "learning_rate": 1.340573140099041e-05,
      "loss": 2.363,
      "step": 30210
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.2464691400527954,
      "learning_rate": 1.3405344275779881e-05,
      "loss": 2.3883,
      "step": 30211
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.099900245666504,
      "learning_rate": 1.3404957144796371e-05,
      "loss": 2.5424,
      "step": 30212
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.951198399066925,
      "learning_rate": 1.340457000804053e-05,
      "loss": 2.439,
      "step": 30213
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.149139165878296,
      "learning_rate": 1.340418286551302e-05,
      "loss": 2.2781,
      "step": 30214
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1958094835281372,
      "learning_rate": 1.3403795717214494e-05,
      "loss": 2.372,
      "step": 30215
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.918948769569397,
      "learning_rate": 1.340340856314561e-05,
      "loss": 2.2745,
      "step": 30216
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1426138877868652,
      "learning_rate": 1.3403021403307026e-05,
      "loss": 2.2846,
      "step": 30217
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.057747483253479,
      "learning_rate": 1.3402634237699394e-05,
      "loss": 2.4038,
      "step": 30218
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0862680673599243,
      "learning_rate": 1.3402247066323376e-05,
      "loss": 2.3709,
      "step": 30219
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1004705429077148,
      "learning_rate": 1.3401859889179625e-05,
      "loss": 2.1606,
      "step": 30220
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.2247835397720337,
      "learning_rate": 1.3401472706268796e-05,
      "loss": 2.4285,
      "step": 30221
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.172141432762146,
      "learning_rate": 1.3401085517591546e-05,
      "loss": 2.5299,
      "step": 30222
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.312312126159668,
      "learning_rate": 1.3400698323148533e-05,
      "loss": 2.3529,
      "step": 30223
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1141057014465332,
      "learning_rate": 1.3400311122940415e-05,
      "loss": 2.3473,
      "step": 30224
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1765356063842773,
      "learning_rate": 1.3399923916967845e-05,
      "loss": 2.6825,
      "step": 30225
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.2023835182189941,
      "learning_rate": 1.3399536705231478e-05,
      "loss": 2.6523,
      "step": 30226
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.9943537712097168,
      "learning_rate": 1.3399149487731977e-05,
      "loss": 2.5606,
      "step": 30227
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0198663473129272,
      "learning_rate": 1.3398762264469994e-05,
      "loss": 2.3703,
      "step": 30228
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0630435943603516,
      "learning_rate": 1.3398375035446183e-05,
      "loss": 2.3496,
      "step": 30229
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.9856513142585754,
      "learning_rate": 1.3397987800661202e-05,
      "loss": 2.2686,
      "step": 30230
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0788716077804565,
      "learning_rate": 1.339760056011571e-05,
      "loss": 2.1989,
      "step": 30231
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0851470232009888,
      "learning_rate": 1.3397213313810367e-05,
      "loss": 2.3645,
      "step": 30232
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0650238990783691,
      "learning_rate": 1.339682606174582e-05,
      "loss": 2.2847,
      "step": 30233
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1564048528671265,
      "learning_rate": 1.3396438803922731e-05,
      "loss": 2.3212,
      "step": 30234
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.9963393211364746,
      "learning_rate": 1.3396051540341756e-05,
      "loss": 2.3925,
      "step": 30235
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.5325733423233032,
      "learning_rate": 1.3395664271003552e-05,
      "loss": 2.4722,
      "step": 30236
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0845271348953247,
      "learning_rate": 1.3395276995908774e-05,
      "loss": 2.2796,
      "step": 30237
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0194792747497559,
      "learning_rate": 1.339488971505808e-05,
      "loss": 2.6206,
      "step": 30238
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0843653678894043,
      "learning_rate": 1.3394502428452127e-05,
      "loss": 2.3105,
      "step": 30239
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.074749231338501,
      "learning_rate": 1.3394115136091568e-05,
      "loss": 2.3944,
      "step": 30240
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0979284048080444,
      "learning_rate": 1.3393727837977065e-05,
      "loss": 2.293,
      "step": 30241
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.9979179501533508,
      "learning_rate": 1.339334053410927e-05,
      "loss": 2.5266,
      "step": 30242
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.054282784461975,
      "learning_rate": 1.3392953224488839e-05,
      "loss": 2.2735,
      "step": 30243
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.082380771636963,
      "learning_rate": 1.3392565909116433e-05,
      "loss": 2.5219,
      "step": 30244
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0656462907791138,
      "learning_rate": 1.3392178587992706e-05,
      "loss": 2.442,
      "step": 30245
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0585365295410156,
      "learning_rate": 1.3391791261118314e-05,
      "loss": 2.4585,
      "step": 30246
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0299683809280396,
      "learning_rate": 1.3391403928493919e-05,
      "loss": 2.2431,
      "step": 30247
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.9893502593040466,
      "learning_rate": 1.3391016590120166e-05,
      "loss": 2.2945,
      "step": 30248
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.9740555882453918,
      "learning_rate": 1.3390629245997724e-05,
      "loss": 2.1915,
      "step": 30249
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1173194646835327,
      "learning_rate": 1.3390241896127243e-05,
      "loss": 2.31,
      "step": 30250
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0971978902816772,
      "learning_rate": 1.3389854540509382e-05,
      "loss": 2.371,
      "step": 30251
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0926458835601807,
      "learning_rate": 1.3389467179144798e-05,
      "loss": 2.4819,
      "step": 30252
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0860472917556763,
      "learning_rate": 1.3389079812034145e-05,
      "loss": 2.4155,
      "step": 30253
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.042812466621399,
      "learning_rate": 1.338869243917808e-05,
      "loss": 2.6728,
      "step": 30254
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1428767442703247,
      "learning_rate": 1.3388305060577263e-05,
      "loss": 2.3685,
      "step": 30255
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.9044856429100037,
      "learning_rate": 1.3387917676232349e-05,
      "loss": 2.3168,
      "step": 30256
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0593008995056152,
      "learning_rate": 1.3387530286143993e-05,
      "loss": 2.4621,
      "step": 30257
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.9851667881011963,
      "learning_rate": 1.3387142890312854e-05,
      "loss": 2.3596,
      "step": 30258
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.991875171661377,
      "learning_rate": 1.3386755488739587e-05,
      "loss": 2.1728,
      "step": 30259
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.2785489559173584,
      "learning_rate": 1.338636808142485e-05,
      "loss": 2.2569,
      "step": 30260
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.9627405405044556,
      "learning_rate": 1.3385980668369298e-05,
      "loss": 2.4052,
      "step": 30261
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1726906299591064,
      "learning_rate": 1.338559324957359e-05,
      "loss": 2.5654,
      "step": 30262
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0123121738433838,
      "learning_rate": 1.3385205825038384e-05,
      "loss": 2.3217,
      "step": 30263
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0968385934829712,
      "learning_rate": 1.3384818394764332e-05,
      "loss": 2.3673,
      "step": 30264
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0831620693206787,
      "learning_rate": 1.3384430958752094e-05,
      "loss": 2.4321,
      "step": 30265
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1216821670532227,
      "learning_rate": 1.3384043517002328e-05,
      "loss": 2.3694,
      "step": 30266
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1267708539962769,
      "learning_rate": 1.3383656069515687e-05,
      "loss": 2.3305,
      "step": 30267
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0841803550720215,
      "learning_rate": 1.3383268616292831e-05,
      "loss": 2.4817,
      "step": 30268
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0896775722503662,
      "learning_rate": 1.3382881157334415e-05,
      "loss": 2.4377,
      "step": 30269
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1167999505996704,
      "learning_rate": 1.3382493692641098e-05,
      "loss": 2.3454,
      "step": 30270
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0360044240951538,
      "learning_rate": 1.3382106222213534e-05,
      "loss": 2.2809,
      "step": 30271
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.155669093132019,
      "learning_rate": 1.3381718746052385e-05,
      "loss": 2.3511,
      "step": 30272
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.057470440864563,
      "learning_rate": 1.3381331264158302e-05,
      "loss": 2.4846,
      "step": 30273
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1233316659927368,
      "learning_rate": 1.3380943776531942e-05,
      "loss": 2.4827,
      "step": 30274
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0519495010375977,
      "learning_rate": 1.3380556283173966e-05,
      "loss": 2.4965,
      "step": 30275
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1315511465072632,
      "learning_rate": 1.3380168784085028e-05,
      "loss": 2.1263,
      "step": 30276
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0299513339996338,
      "learning_rate": 1.3379781279265787e-05,
      "loss": 2.3034,
      "step": 30277
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.9989569187164307,
      "learning_rate": 1.33793937687169e-05,
      "loss": 2.2163,
      "step": 30278
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.114719271659851,
      "learning_rate": 1.337900625243902e-05,
      "loss": 2.5834,
      "step": 30279
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0437235832214355,
      "learning_rate": 1.337861873043281e-05,
      "loss": 2.3782,
      "step": 30280
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.9527410268783569,
      "learning_rate": 1.337823120269892e-05,
      "loss": 2.3381,
      "step": 30281
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1252137422561646,
      "learning_rate": 1.3377843669238011e-05,
      "loss": 2.2812,
      "step": 30282
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.069021463394165,
      "learning_rate": 1.3377456130050743e-05,
      "loss": 2.3513,
      "step": 30283
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0043553113937378,
      "learning_rate": 1.3377068585137767e-05,
      "loss": 2.4146,
      "step": 30284
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.3642174005508423,
      "learning_rate": 1.3376681034499742e-05,
      "loss": 2.4134,
      "step": 30285
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.9641445279121399,
      "learning_rate": 1.3376293478137327e-05,
      "loss": 2.4358,
      "step": 30286
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.2232213020324707,
      "learning_rate": 1.3375905916051178e-05,
      "loss": 2.4631,
      "step": 30287
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0479929447174072,
      "learning_rate": 1.337551834824195e-05,
      "loss": 2.2669,
      "step": 30288
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.2113449573516846,
      "learning_rate": 1.3375130774710302e-05,
      "loss": 2.2448,
      "step": 30289
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.050086259841919,
      "learning_rate": 1.337474319545689e-05,
      "loss": 2.215,
      "step": 30290
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0026625394821167,
      "learning_rate": 1.3374355610482375e-05,
      "loss": 2.4117,
      "step": 30291
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.9652759432792664,
      "learning_rate": 1.3373968019787406e-05,
      "loss": 2.2946,
      "step": 30292
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.002958059310913,
      "learning_rate": 1.3373580423372648e-05,
      "loss": 2.5205,
      "step": 30293
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0382930040359497,
      "learning_rate": 1.3373192821238753e-05,
      "loss": 2.3688,
      "step": 30294
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1053144931793213,
      "learning_rate": 1.337280521338638e-05,
      "loss": 2.5341,
      "step": 30295
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.188448429107666,
      "learning_rate": 1.3372417599816187e-05,
      "loss": 2.349,
      "step": 30296
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0161752700805664,
      "learning_rate": 1.337202998052883e-05,
      "loss": 2.7726,
      "step": 30297
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1272435188293457,
      "learning_rate": 1.3371642355524967e-05,
      "loss": 2.2064,
      "step": 30298
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0434836149215698,
      "learning_rate": 1.3371254724805255e-05,
      "loss": 2.4047,
      "step": 30299
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1028679609298706,
      "learning_rate": 1.337086708837035e-05,
      "loss": 2.3589,
      "step": 30300
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1332765817642212,
      "learning_rate": 1.337047944622091e-05,
      "loss": 2.445,
      "step": 30301
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1126751899719238,
      "learning_rate": 1.3370091798357589e-05,
      "loss": 2.4642,
      "step": 30302
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0797147750854492,
      "learning_rate": 1.336970414478105e-05,
      "loss": 2.4069,
      "step": 30303
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.063637137413025,
      "learning_rate": 1.3369316485491948e-05,
      "loss": 2.3717,
      "step": 30304
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0426236391067505,
      "learning_rate": 1.3368928820490941e-05,
      "loss": 2.3121,
      "step": 30305
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1866694688796997,
      "learning_rate": 1.336854114977868e-05,
      "loss": 2.3227,
      "step": 30306
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1084754467010498,
      "learning_rate": 1.336815347335583e-05,
      "loss": 2.4697,
      "step": 30307
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.9750834703445435,
      "learning_rate": 1.3367765791223043e-05,
      "loss": 2.4672,
      "step": 30308
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1722471714019775,
      "learning_rate": 1.336737810338098e-05,
      "loss": 2.3612,
      "step": 30309
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0555115938186646,
      "learning_rate": 1.3366990409830293e-05,
      "loss": 2.3349,
      "step": 30310
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0144456624984741,
      "learning_rate": 1.3366602710571647e-05,
      "loss": 2.2503,
      "step": 30311
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0441031455993652,
      "learning_rate": 1.3366215005605696e-05,
      "loss": 2.4232,
      "step": 30312
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.006753921508789,
      "learning_rate": 1.3365827294933093e-05,
      "loss": 2.2701,
      "step": 30313
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0670239925384521,
      "learning_rate": 1.33654395785545e-05,
      "loss": 2.2832,
      "step": 30314
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0244300365447998,
      "learning_rate": 1.3365051856470573e-05,
      "loss": 2.2688,
      "step": 30315
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.025290846824646,
      "learning_rate": 1.3364664128681968e-05,
      "loss": 2.5076,
      "step": 30316
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.2185834646224976,
      "learning_rate": 1.3364276395189343e-05,
      "loss": 2.3127,
      "step": 30317
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0369356870651245,
      "learning_rate": 1.336388865599336e-05,
      "loss": 2.4695,
      "step": 30318
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.038246750831604,
      "learning_rate": 1.3363500911094669e-05,
      "loss": 2.4919,
      "step": 30319
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1438981294631958,
      "learning_rate": 1.336311316049393e-05,
      "loss": 2.3515,
      "step": 30320
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.161785364151001,
      "learning_rate": 1.3362725404191802e-05,
      "loss": 2.1471,
      "step": 30321
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.095726490020752,
      "learning_rate": 1.3362337642188939e-05,
      "loss": 2.1698,
      "step": 30322
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0582443475723267,
      "learning_rate": 1.3361949874486002e-05,
      "loss": 2.4486,
      "step": 30323
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0606212615966797,
      "learning_rate": 1.3361562101083647e-05,
      "loss": 2.3316,
      "step": 30324
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.9835438132286072,
      "learning_rate": 1.3361174321982532e-05,
      "loss": 2.2263,
      "step": 30325
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0121532678604126,
      "learning_rate": 1.3360786537183312e-05,
      "loss": 2.522,
      "step": 30326
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1159586906433105,
      "learning_rate": 1.3360398746686648e-05,
      "loss": 2.6084,
      "step": 30327
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0527238845825195,
      "learning_rate": 1.3360010950493192e-05,
      "loss": 2.3357,
      "step": 30328
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1170026063919067,
      "learning_rate": 1.3359623148603607e-05,
      "loss": 2.4601,
      "step": 30329
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.9691396355628967,
      "learning_rate": 1.3359235341018546e-05,
      "loss": 2.3972,
      "step": 30330
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.055561900138855,
      "learning_rate": 1.3358847527738674e-05,
      "loss": 2.4349,
      "step": 30331
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.746729850769043,
      "learning_rate": 1.335845970876464e-05,
      "loss": 2.4002,
      "step": 30332
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0537068843841553,
      "learning_rate": 1.3358071884097104e-05,
      "loss": 2.2649,
      "step": 30333
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.9875593781471252,
      "learning_rate": 1.3357684053736724e-05,
      "loss": 2.3689,
      "step": 30334
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0850145816802979,
      "learning_rate": 1.335729621768416e-05,
      "loss": 2.3166,
      "step": 30335
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0731947422027588,
      "learning_rate": 1.3356908375940064e-05,
      "loss": 2.3975,
      "step": 30336
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0651723146438599,
      "learning_rate": 1.33565205285051e-05,
      "loss": 2.3298,
      "step": 30337
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0784004926681519,
      "learning_rate": 1.335613267537992e-05,
      "loss": 2.2593,
      "step": 30338
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0018543004989624,
      "learning_rate": 1.3355744816565184e-05,
      "loss": 2.4894,
      "step": 30339
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0898762941360474,
      "learning_rate": 1.3355356952061549e-05,
      "loss": 2.4475,
      "step": 30340
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.964475154876709,
      "learning_rate": 1.3354969081869672e-05,
      "loss": 2.3763,
      "step": 30341
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1033759117126465,
      "learning_rate": 1.3354581205990214e-05,
      "loss": 2.4081,
      "step": 30342
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1880947351455688,
      "learning_rate": 1.3354193324423828e-05,
      "loss": 2.5084,
      "step": 30343
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9752625823020935,
      "learning_rate": 1.3353805437171173e-05,
      "loss": 1.9964,
      "step": 30344
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0710526704788208,
      "learning_rate": 1.3353417544232907e-05,
      "loss": 2.4122,
      "step": 30345
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1306235790252686,
      "learning_rate": 1.3353029645609688e-05,
      "loss": 2.3698,
      "step": 30346
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0586268901824951,
      "learning_rate": 1.335264174130217e-05,
      "loss": 2.3902,
      "step": 30347
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1347554922103882,
      "learning_rate": 1.3352253831311017e-05,
      "loss": 2.3295,
      "step": 30348
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0768859386444092,
      "learning_rate": 1.335186591563688e-05,
      "loss": 2.3593,
      "step": 30349
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9929445385932922,
      "learning_rate": 1.3351477994280424e-05,
      "loss": 2.4405,
      "step": 30350
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.3495625257492065,
      "learning_rate": 1.3351090067242303e-05,
      "loss": 2.4107,
      "step": 30351
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0700918436050415,
      "learning_rate": 1.3350702134523172e-05,
      "loss": 2.4975,
      "step": 30352
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9820019602775574,
      "learning_rate": 1.335031419612369e-05,
      "loss": 2.1728,
      "step": 30353
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.293244481086731,
      "learning_rate": 1.3349926252044516e-05,
      "loss": 2.4566,
      "step": 30354
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9348441958427429,
      "learning_rate": 1.3349538302286308e-05,
      "loss": 2.3226,
      "step": 30355
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0436997413635254,
      "learning_rate": 1.3349150346849722e-05,
      "loss": 2.3091,
      "step": 30356
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9796085953712463,
      "learning_rate": 1.3348762385735419e-05,
      "loss": 2.2686,
      "step": 30357
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9692603945732117,
      "learning_rate": 1.3348374418944052e-05,
      "loss": 2.3839,
      "step": 30358
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1003546714782715,
      "learning_rate": 1.3347986446476279e-05,
      "loss": 2.3554,
      "step": 30359
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9748545289039612,
      "learning_rate": 1.3347598468332764e-05,
      "loss": 2.3289,
      "step": 30360
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1114917993545532,
      "learning_rate": 1.3347210484514158e-05,
      "loss": 2.4726,
      "step": 30361
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1502537727355957,
      "learning_rate": 1.334682249502112e-05,
      "loss": 2.1341,
      "step": 30362
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9624378681182861,
      "learning_rate": 1.3346434499854311e-05,
      "loss": 2.3535,
      "step": 30363
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.089389681816101,
      "learning_rate": 1.3346046499014387e-05,
      "loss": 2.4079,
      "step": 30364
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1201658248901367,
      "learning_rate": 1.3345658492502005e-05,
      "loss": 2.6455,
      "step": 30365
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.073728084564209,
      "learning_rate": 1.3345270480317822e-05,
      "loss": 2.5824,
      "step": 30366
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9491145014762878,
      "learning_rate": 1.3344882462462497e-05,
      "loss": 2.2496,
      "step": 30367
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1591477394104004,
      "learning_rate": 1.3344494438936687e-05,
      "loss": 2.462,
      "step": 30368
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0431686639785767,
      "learning_rate": 1.3344106409741053e-05,
      "loss": 2.4119,
      "step": 30369
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1304482221603394,
      "learning_rate": 1.334371837487625e-05,
      "loss": 2.3732,
      "step": 30370
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0300333499908447,
      "learning_rate": 1.3343330334342937e-05,
      "loss": 2.5095,
      "step": 30371
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9984959363937378,
      "learning_rate": 1.334294228814177e-05,
      "loss": 2.4346,
      "step": 30372
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.106614589691162,
      "learning_rate": 1.3342554236273406e-05,
      "loss": 2.2508,
      "step": 30373
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9398135542869568,
      "learning_rate": 1.3342166178738507e-05,
      "loss": 2.3225,
      "step": 30374
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0289981365203857,
      "learning_rate": 1.3341778115537726e-05,
      "loss": 2.5589,
      "step": 30375
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.3052427768707275,
      "learning_rate": 1.3341390046671727e-05,
      "loss": 2.4882,
      "step": 30376
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.5031408071517944,
      "learning_rate": 1.3341001972141163e-05,
      "loss": 2.4787,
      "step": 30377
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9619337320327759,
      "learning_rate": 1.3340613891946693e-05,
      "loss": 2.3795,
      "step": 30378
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0441378355026245,
      "learning_rate": 1.3340225806088975e-05,
      "loss": 2.557,
      "step": 30379
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0145909786224365,
      "learning_rate": 1.3339837714568667e-05,
      "loss": 2.4642,
      "step": 30380
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0393964052200317,
      "learning_rate": 1.3339449617386427e-05,
      "loss": 2.4427,
      "step": 30381
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0319533348083496,
      "learning_rate": 1.3339061514542912e-05,
      "loss": 2.3466,
      "step": 30382
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0392459630966187,
      "learning_rate": 1.3338673406038782e-05,
      "loss": 2.3591,
      "step": 30383
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0170809030532837,
      "learning_rate": 1.3338285291874695e-05,
      "loss": 2.1093,
      "step": 30384
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0215938091278076,
      "learning_rate": 1.3337897172051306e-05,
      "loss": 2.5242,
      "step": 30385
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.077696442604065,
      "learning_rate": 1.3337509046569275e-05,
      "loss": 2.3964,
      "step": 30386
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1426913738250732,
      "learning_rate": 1.333712091542926e-05,
      "loss": 2.4643,
      "step": 30387
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1599531173706055,
      "learning_rate": 1.3336732778631918e-05,
      "loss": 2.4931,
      "step": 30388
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1635315418243408,
      "learning_rate": 1.3336344636177909e-05,
      "loss": 2.3892,
      "step": 30389
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.02298903465271,
      "learning_rate": 1.333595648806789e-05,
      "loss": 2.3865,
      "step": 30390
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0672640800476074,
      "learning_rate": 1.3335568334302517e-05,
      "loss": 2.3786,
      "step": 30391
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9978084564208984,
      "learning_rate": 1.333518017488245e-05,
      "loss": 2.6067,
      "step": 30392
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0980428457260132,
      "learning_rate": 1.3334792009808348e-05,
      "loss": 2.4535,
      "step": 30393
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0391449928283691,
      "learning_rate": 1.3334403839080865e-05,
      "loss": 2.1361,
      "step": 30394
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0896400213241577,
      "learning_rate": 1.3334015662700664e-05,
      "loss": 2.3081,
      "step": 30395
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.090470314025879,
      "learning_rate": 1.33336274806684e-05,
      "loss": 2.4178,
      "step": 30396
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1187810897827148,
      "learning_rate": 1.3333239292984734e-05,
      "loss": 2.428,
      "step": 30397
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.004499912261963,
      "learning_rate": 1.3332851099650323e-05,
      "loss": 2.4334,
      "step": 30398
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1062901020050049,
      "learning_rate": 1.333246290066582e-05,
      "loss": 2.3522,
      "step": 30399
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0427677631378174,
      "learning_rate": 1.3332074696031892e-05,
      "loss": 2.5719,
      "step": 30400
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.3032829761505127,
      "learning_rate": 1.3331686485749189e-05,
      "loss": 2.4341,
      "step": 30401
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.2353756427764893,
      "learning_rate": 1.3331298269818373e-05,
      "loss": 2.4587,
      "step": 30402
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0488895177841187,
      "learning_rate": 1.3330910048240102e-05,
      "loss": 2.3856,
      "step": 30403
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.966830849647522,
      "learning_rate": 1.3330521821015037e-05,
      "loss": 2.2987,
      "step": 30404
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0039221048355103,
      "learning_rate": 1.333013358814383e-05,
      "loss": 2.2096,
      "step": 30405
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1854945421218872,
      "learning_rate": 1.332974534962714e-05,
      "loss": 2.3463,
      "step": 30406
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.108835220336914,
      "learning_rate": 1.332935710546563e-05,
      "loss": 2.313,
      "step": 30407
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.2628724575042725,
      "learning_rate": 1.3328968855659954e-05,
      "loss": 2.1787,
      "step": 30408
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.938811719417572,
      "learning_rate": 1.3328580600210772e-05,
      "loss": 2.1898,
      "step": 30409
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.2098206281661987,
      "learning_rate": 1.3328192339118745e-05,
      "loss": 2.4796,
      "step": 30410
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1826372146606445,
      "learning_rate": 1.3327804072384524e-05,
      "loss": 2.2679,
      "step": 30411
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1695301532745361,
      "learning_rate": 1.3327415800008771e-05,
      "loss": 2.3928,
      "step": 30412
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.2148351669311523,
      "learning_rate": 1.3327027521992146e-05,
      "loss": 2.4703,
      "step": 30413
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.2609118223190308,
      "learning_rate": 1.3326639238335305e-05,
      "loss": 2.236,
      "step": 30414
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1460051536560059,
      "learning_rate": 1.3326250949038907e-05,
      "loss": 2.4383,
      "step": 30415
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.949722170829773,
      "learning_rate": 1.3325862654103612e-05,
      "loss": 2.1134,
      "step": 30416
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0300841331481934,
      "learning_rate": 1.3325474353530078e-05,
      "loss": 2.4705,
      "step": 30417
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9504969716072083,
      "learning_rate": 1.3325086047318957e-05,
      "loss": 2.2466,
      "step": 30418
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9413229823112488,
      "learning_rate": 1.3324697735470915e-05,
      "loss": 2.4624,
      "step": 30419
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9898964166641235,
      "learning_rate": 1.3324309417986607e-05,
      "loss": 2.3651,
      "step": 30420
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.040116310119629,
      "learning_rate": 1.3323921094866693e-05,
      "loss": 2.4165,
      "step": 30421
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.3843159675598145,
      "learning_rate": 1.3323532766111827e-05,
      "loss": 2.1529,
      "step": 30422
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.113306999206543,
      "learning_rate": 1.3323144431722673e-05,
      "loss": 2.3895,
      "step": 30423
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0956790447235107,
      "learning_rate": 1.3322756091699884e-05,
      "loss": 2.4579,
      "step": 30424
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.2661972045898438,
      "learning_rate": 1.3322367746044122e-05,
      "loss": 2.416,
      "step": 30425
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9421018362045288,
      "learning_rate": 1.3321979394756046e-05,
      "loss": 2.3198,
      "step": 30426
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9767982363700867,
      "learning_rate": 1.332159103783631e-05,
      "loss": 2.2191,
      "step": 30427
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1771306991577148,
      "learning_rate": 1.3321202675285576e-05,
      "loss": 2.3677,
      "step": 30428
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.2626770734786987,
      "learning_rate": 1.3320814307104503e-05,
      "loss": 2.3808,
      "step": 30429
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.074596881866455,
      "learning_rate": 1.3320425933293748e-05,
      "loss": 2.2928,
      "step": 30430
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0014539957046509,
      "learning_rate": 1.3320037553853969e-05,
      "loss": 2.1637,
      "step": 30431
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1180351972579956,
      "learning_rate": 1.3319649168785822e-05,
      "loss": 2.3256,
      "step": 30432
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1209278106689453,
      "learning_rate": 1.3319260778089971e-05,
      "loss": 2.6661,
      "step": 30433
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9839913249015808,
      "learning_rate": 1.3318872381767069e-05,
      "loss": 2.337,
      "step": 30434
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.353276252746582,
      "learning_rate": 1.3318483979817777e-05,
      "loss": 2.1473,
      "step": 30435
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0209444761276245,
      "learning_rate": 1.3318095572242757e-05,
      "loss": 2.3912,
      "step": 30436
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0162277221679688,
      "learning_rate": 1.331770715904266e-05,
      "loss": 2.3574,
      "step": 30437
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0629435777664185,
      "learning_rate": 1.331731874021815e-05,
      "loss": 2.5137,
      "step": 30438
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0833590030670166,
      "learning_rate": 1.3316930315769883e-05,
      "loss": 2.4171,
      "step": 30439
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1389788389205933,
      "learning_rate": 1.3316541885698518e-05,
      "loss": 2.4639,
      "step": 30440
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9936037063598633,
      "learning_rate": 1.3316153450004715e-05,
      "loss": 2.478,
      "step": 30441
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.123407244682312,
      "learning_rate": 1.331576500868913e-05,
      "loss": 2.5762,
      "step": 30442
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0122051239013672,
      "learning_rate": 1.3315376561752424e-05,
      "loss": 2.3742,
      "step": 30443
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.028281569480896,
      "learning_rate": 1.3314988109195253e-05,
      "loss": 2.464,
      "step": 30444
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0739343166351318,
      "learning_rate": 1.3314599651018277e-05,
      "loss": 2.5229,
      "step": 30445
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.008928894996643,
      "learning_rate": 1.3314211187222154e-05,
      "loss": 2.2963,
      "step": 30446
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.2084909677505493,
      "learning_rate": 1.3313822717807546e-05,
      "loss": 2.3906,
      "step": 30447
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1079516410827637,
      "learning_rate": 1.3313434242775104e-05,
      "loss": 2.3277,
      "step": 30448
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0336955785751343,
      "learning_rate": 1.3313045762125491e-05,
      "loss": 2.277,
      "step": 30449
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1306692361831665,
      "learning_rate": 1.331265727585937e-05,
      "loss": 2.439,
      "step": 30450
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.2047455310821533,
      "learning_rate": 1.3312268783977394e-05,
      "loss": 2.4921,
      "step": 30451
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1355053186416626,
      "learning_rate": 1.3311880286480221e-05,
      "loss": 2.5502,
      "step": 30452
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9959485530853271,
      "learning_rate": 1.3311491783368511e-05,
      "loss": 2.166,
      "step": 30453
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.016789436340332,
      "learning_rate": 1.3311103274642925e-05,
      "loss": 2.3913,
      "step": 30454
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.062894582748413,
      "learning_rate": 1.3310714760304118e-05,
      "loss": 2.5072,
      "step": 30455
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.209646463394165,
      "learning_rate": 1.3310326240352753e-05,
      "loss": 2.353,
      "step": 30456
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0386208295822144,
      "learning_rate": 1.3309937714789485e-05,
      "loss": 2.3892,
      "step": 30457
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0267199277877808,
      "learning_rate": 1.3309549183614972e-05,
      "loss": 2.4578,
      "step": 30458
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.042077898979187,
      "learning_rate": 1.3309160646829873e-05,
      "loss": 2.7202,
      "step": 30459
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0579293966293335,
      "learning_rate": 1.3308772104434853e-05,
      "loss": 2.2931,
      "step": 30460
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0384443998336792,
      "learning_rate": 1.3308383556430563e-05,
      "loss": 2.2707,
      "step": 30461
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.953178882598877,
      "learning_rate": 1.3307995002817662e-05,
      "loss": 2.583,
      "step": 30462
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0203107595443726,
      "learning_rate": 1.3307606443596815e-05,
      "loss": 2.3238,
      "step": 30463
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1510154008865356,
      "learning_rate": 1.3307217878768675e-05,
      "loss": 2.3034,
      "step": 30464
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0688509941101074,
      "learning_rate": 1.3306829308333901e-05,
      "loss": 2.3957,
      "step": 30465
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9972437620162964,
      "learning_rate": 1.3306440732293156e-05,
      "loss": 2.4963,
      "step": 30466
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1823370456695557,
      "learning_rate": 1.3306052150647093e-05,
      "loss": 2.3889,
      "step": 30467
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0264534950256348,
      "learning_rate": 1.3305663563396375e-05,
      "loss": 2.516,
      "step": 30468
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9297645092010498,
      "learning_rate": 1.3305274970541659e-05,
      "loss": 2.2533,
      "step": 30469
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.074947714805603,
      "learning_rate": 1.3304886372083605e-05,
      "loss": 2.3468,
      "step": 30470
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1355537176132202,
      "learning_rate": 1.3304497768022872e-05,
      "loss": 2.3936,
      "step": 30471
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9586137533187866,
      "learning_rate": 1.3304109158360118e-05,
      "loss": 2.2306,
      "step": 30472
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1188297271728516,
      "learning_rate": 1.3303720543095997e-05,
      "loss": 2.2892,
      "step": 30473
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0116528272628784,
      "learning_rate": 1.3303331922231175e-05,
      "loss": 2.4499,
      "step": 30474
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0566933155059814,
      "learning_rate": 1.330294329576631e-05,
      "loss": 2.4605,
      "step": 30475
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0029395818710327,
      "learning_rate": 1.3302554663702059e-05,
      "loss": 2.48,
      "step": 30476
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.087359070777893,
      "learning_rate": 1.3302166026039078e-05,
      "loss": 2.2346,
      "step": 30477
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1231122016906738,
      "learning_rate": 1.330177738277803e-05,
      "loss": 2.442,
      "step": 30478
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9851601123809814,
      "learning_rate": 1.3301388733919573e-05,
      "loss": 2.3091,
      "step": 30479
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1568845510482788,
      "learning_rate": 1.3301000079464366e-05,
      "loss": 2.2662,
      "step": 30480
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.065850019454956,
      "learning_rate": 1.3300611419413066e-05,
      "loss": 2.4145,
      "step": 30481
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1540964841842651,
      "learning_rate": 1.3300222753766336e-05,
      "loss": 2.3951,
      "step": 30482
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0021815299987793,
      "learning_rate": 1.329983408252483e-05,
      "loss": 2.445,
      "step": 30483
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0678414106369019,
      "learning_rate": 1.3299445405689207e-05,
      "loss": 2.5212,
      "step": 30484
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1013108491897583,
      "learning_rate": 1.3299056723260132e-05,
      "loss": 2.6052,
      "step": 30485
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0622514486312866,
      "learning_rate": 1.3298668035238256e-05,
      "loss": 2.2795,
      "step": 30486
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.225303053855896,
      "learning_rate": 1.3298279341624243e-05,
      "loss": 2.4357,
      "step": 30487
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0485824346542358,
      "learning_rate": 1.3297890642418754e-05,
      "loss": 2.1976,
      "step": 30488
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9565262794494629,
      "learning_rate": 1.3297501937622442e-05,
      "loss": 2.281,
      "step": 30489
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0345786809921265,
      "learning_rate": 1.3297113227235967e-05,
      "loss": 2.2812,
      "step": 30490
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0494476556777954,
      "learning_rate": 1.3296724511259993e-05,
      "loss": 2.4858,
      "step": 30491
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9661981463432312,
      "learning_rate": 1.3296335789695174e-05,
      "loss": 2.1687,
      "step": 30492
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9885982275009155,
      "learning_rate": 1.329594706254217e-05,
      "loss": 2.3828,
      "step": 30493
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.093998908996582,
      "learning_rate": 1.3295558329801641e-05,
      "loss": 2.3964,
      "step": 30494
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.096825122833252,
      "learning_rate": 1.3295169591474243e-05,
      "loss": 2.3408,
      "step": 30495
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1188123226165771,
      "learning_rate": 1.3294780847560642e-05,
      "loss": 2.4295,
      "step": 30496
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0488642454147339,
      "learning_rate": 1.3294392098061491e-05,
      "loss": 2.5969,
      "step": 30497
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.116945505142212,
      "learning_rate": 1.329400334297745e-05,
      "loss": 2.4635,
      "step": 30498
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.08757483959198,
      "learning_rate": 1.3293614582309177e-05,
      "loss": 2.4572,
      "step": 30499
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0244697332382202,
      "learning_rate": 1.3293225816057335e-05,
      "loss": 2.5758,
      "step": 30500
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1373950242996216,
      "learning_rate": 1.3292837044222583e-05,
      "loss": 2.5125,
      "step": 30501
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0428314208984375,
      "learning_rate": 1.3292448266805576e-05,
      "loss": 2.3365,
      "step": 30502
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0147054195404053,
      "learning_rate": 1.3292059483806973e-05,
      "loss": 2.7136,
      "step": 30503
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9915233850479126,
      "learning_rate": 1.3291670695227436e-05,
      "loss": 2.2308,
      "step": 30504
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0351433753967285,
      "learning_rate": 1.3291281901067622e-05,
      "loss": 2.2038,
      "step": 30505
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.2358777523040771,
      "learning_rate": 1.3290893101328194e-05,
      "loss": 2.499,
      "step": 30506
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0961494445800781,
      "learning_rate": 1.3290504296009807e-05,
      "loss": 2.3752,
      "step": 30507
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.2753403186798096,
      "learning_rate": 1.3290115485113122e-05,
      "loss": 2.465,
      "step": 30508
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.3488619327545166,
      "learning_rate": 1.32897266686388e-05,
      "loss": 2.3093,
      "step": 30509
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0541428327560425,
      "learning_rate": 1.3289337846587494e-05,
      "loss": 2.3806,
      "step": 30510
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0333532094955444,
      "learning_rate": 1.3288949018959868e-05,
      "loss": 2.224,
      "step": 30511
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.8735504150390625,
      "learning_rate": 1.3288560185756581e-05,
      "loss": 2.4458,
      "step": 30512
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9858601093292236,
      "learning_rate": 1.3288171346978288e-05,
      "loss": 2.4164,
      "step": 30513
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9788105487823486,
      "learning_rate": 1.3287782502625657e-05,
      "loss": 2.4464,
      "step": 30514
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.2195225954055786,
      "learning_rate": 1.328739365269934e-05,
      "loss": 2.383,
      "step": 30515
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0818055868148804,
      "learning_rate": 1.3287004797199996e-05,
      "loss": 2.2902,
      "step": 30516
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1183791160583496,
      "learning_rate": 1.3286615936128286e-05,
      "loss": 2.2217,
      "step": 30517
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1847045421600342,
      "learning_rate": 1.3286227069484871e-05,
      "loss": 2.2887,
      "step": 30518
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0225145816802979,
      "learning_rate": 1.328583819727041e-05,
      "loss": 2.4766,
      "step": 30519
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0089353322982788,
      "learning_rate": 1.3285449319485558e-05,
      "loss": 2.4624,
      "step": 30520
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0804529190063477,
      "learning_rate": 1.328506043613098e-05,
      "loss": 2.3933,
      "step": 30521
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.5734823942184448,
      "learning_rate": 1.328467154720733e-05,
      "loss": 2.3012,
      "step": 30522
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.2042591571807861,
      "learning_rate": 1.328428265271527e-05,
      "loss": 2.2475,
      "step": 30523
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0155041217803955,
      "learning_rate": 1.328389375265546e-05,
      "loss": 2.3407,
      "step": 30524
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.322288990020752,
      "learning_rate": 1.3283504847028555e-05,
      "loss": 2.4412,
      "step": 30525
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.89666748046875,
      "learning_rate": 1.3283115935835218e-05,
      "loss": 2.4401,
      "step": 30526
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.029922604560852,
      "learning_rate": 1.328272701907611e-05,
      "loss": 2.3423,
      "step": 30527
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9725658893585205,
      "learning_rate": 1.3282338096751888e-05,
      "loss": 2.2875,
      "step": 30528
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0320003032684326,
      "learning_rate": 1.328194916886321e-05,
      "loss": 2.685,
      "step": 30529
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0050992965698242,
      "learning_rate": 1.328156023541074e-05,
      "loss": 2.2347,
      "step": 30530
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0985898971557617,
      "learning_rate": 1.328117129639513e-05,
      "loss": 2.278,
      "step": 30531
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0529706478118896,
      "learning_rate": 1.3280782351817045e-05,
      "loss": 2.2759,
      "step": 30532
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0164060592651367,
      "learning_rate": 1.3280393401677144e-05,
      "loss": 2.3356,
      "step": 30533
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0080156326293945,
      "learning_rate": 1.3280004445976082e-05,
      "loss": 2.3884,
      "step": 30534
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1754266023635864,
      "learning_rate": 1.3279615484714528e-05,
      "loss": 2.2998,
      "step": 30535
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0237722396850586,
      "learning_rate": 1.327922651789313e-05,
      "loss": 2.3971,
      "step": 30536
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0644313097000122,
      "learning_rate": 1.3278837545512552e-05,
      "loss": 2.499,
      "step": 30537
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0252509117126465,
      "learning_rate": 1.3278448567573457e-05,
      "loss": 2.43,
      "step": 30538
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.014738917350769,
      "learning_rate": 1.3278059584076499e-05,
      "loss": 2.4787,
      "step": 30539
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1685279607772827,
      "learning_rate": 1.3277670595022339e-05,
      "loss": 2.5968,
      "step": 30540
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1690874099731445,
      "learning_rate": 1.3277281600411638e-05,
      "loss": 2.273,
      "step": 30541
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1564652919769287,
      "learning_rate": 1.3276892600245058e-05,
      "loss": 2.2457,
      "step": 30542
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0494420528411865,
      "learning_rate": 1.3276503594523251e-05,
      "loss": 2.4784,
      "step": 30543
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0909247398376465,
      "learning_rate": 1.3276114583246882e-05,
      "loss": 2.2716,
      "step": 30544
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9457273483276367,
      "learning_rate": 1.3275725566416608e-05,
      "loss": 2.4671,
      "step": 30545
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0567632913589478,
      "learning_rate": 1.3275336544033089e-05,
      "loss": 2.2492,
      "step": 30546
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1284241676330566,
      "learning_rate": 1.3274947516096988e-05,
      "loss": 2.2501,
      "step": 30547
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.216895580291748,
      "learning_rate": 1.327455848260896e-05,
      "loss": 2.5026,
      "step": 30548
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0354280471801758,
      "learning_rate": 1.3274169443569665e-05,
      "loss": 2.4549,
      "step": 30549
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.109311580657959,
      "learning_rate": 1.3273780398979766e-05,
      "loss": 2.3586,
      "step": 30550
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0713696479797363,
      "learning_rate": 1.3273391348839917e-05,
      "loss": 2.3136,
      "step": 30551
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0414625406265259,
      "learning_rate": 1.3273002293150783e-05,
      "loss": 2.4786,
      "step": 30552
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9933451414108276,
      "learning_rate": 1.327261323191302e-05,
      "loss": 2.3678,
      "step": 30553
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1911466121673584,
      "learning_rate": 1.3272224165127288e-05,
      "loss": 2.6431,
      "step": 30554
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0053284168243408,
      "learning_rate": 1.3271835092794248e-05,
      "loss": 2.2917,
      "step": 30555
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0702717304229736,
      "learning_rate": 1.327144601491456e-05,
      "loss": 2.319,
      "step": 30556
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0143218040466309,
      "learning_rate": 1.3271056931488882e-05,
      "loss": 2.5192,
      "step": 30557
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0121147632598877,
      "learning_rate": 1.327066784251787e-05,
      "loss": 2.2674,
      "step": 30558
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9830950498580933,
      "learning_rate": 1.3270278748002192e-05,
      "loss": 2.4054,
      "step": 30559
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.037640929222107,
      "learning_rate": 1.3269889647942503e-05,
      "loss": 2.5172,
      "step": 30560
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1265621185302734,
      "learning_rate": 1.3269500542339462e-05,
      "loss": 2.4794,
      "step": 30561
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.2376680374145508,
      "learning_rate": 1.3269111431193731e-05,
      "loss": 2.2835,
      "step": 30562
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0533164739608765,
      "learning_rate": 1.3268722314505968e-05,
      "loss": 2.6211,
      "step": 30563
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0940189361572266,
      "learning_rate": 1.3268333192276831e-05,
      "loss": 2.4387,
      "step": 30564
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.2929017543792725,
      "learning_rate": 1.3267944064506984e-05,
      "loss": 2.3155,
      "step": 30565
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9623095393180847,
      "learning_rate": 1.3267554931197082e-05,
      "loss": 2.3292,
      "step": 30566
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9940621256828308,
      "learning_rate": 1.3267165792347787e-05,
      "loss": 2.4502,
      "step": 30567
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0598409175872803,
      "learning_rate": 1.3266776647959763e-05,
      "loss": 2.5962,
      "step": 30568
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.2159761190414429,
      "learning_rate": 1.3266387498033662e-05,
      "loss": 2.6437,
      "step": 30569
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1537854671478271,
      "learning_rate": 1.3265998342570145e-05,
      "loss": 2.53,
      "step": 30570
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0164791345596313,
      "learning_rate": 1.3265609181569876e-05,
      "loss": 2.3193,
      "step": 30571
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0533849000930786,
      "learning_rate": 1.3265220015033511e-05,
      "loss": 2.4196,
      "step": 30572
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.015209674835205,
      "learning_rate": 1.3264830842961713e-05,
      "loss": 2.4191,
      "step": 30573
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1061161756515503,
      "learning_rate": 1.326444166535514e-05,
      "loss": 2.5938,
      "step": 30574
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0547270774841309,
      "learning_rate": 1.3264052482214452e-05,
      "loss": 2.4781,
      "step": 30575
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0473911762237549,
      "learning_rate": 1.3263663293540308e-05,
      "loss": 2.3993,
      "step": 30576
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1602977514266968,
      "learning_rate": 1.3263274099333367e-05,
      "loss": 2.5837,
      "step": 30577
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.03542160987854,
      "learning_rate": 1.3262884899594295e-05,
      "loss": 1.9869,
      "step": 30578
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0532134771347046,
      "learning_rate": 1.3262495694323742e-05,
      "loss": 2.3211,
      "step": 30579
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9922279119491577,
      "learning_rate": 1.3262106483522374e-05,
      "loss": 2.3833,
      "step": 30580
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.2050386667251587,
      "learning_rate": 1.326171726719085e-05,
      "loss": 2.4571,
      "step": 30581
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.999793291091919,
      "learning_rate": 1.3261328045329828e-05,
      "loss": 2.289,
      "step": 30582
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1209675073623657,
      "learning_rate": 1.3260938817939972e-05,
      "loss": 2.6219,
      "step": 30583
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0885212421417236,
      "learning_rate": 1.3260549585021935e-05,
      "loss": 2.3947,
      "step": 30584
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1040838956832886,
      "learning_rate": 1.3260160346576383e-05,
      "loss": 2.6634,
      "step": 30585
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1300287246704102,
      "learning_rate": 1.3259771102603973e-05,
      "loss": 2.3228,
      "step": 30586
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1123576164245605,
      "learning_rate": 1.3259381853105366e-05,
      "loss": 2.3462,
      "step": 30587
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9957687854766846,
      "learning_rate": 1.3258992598081224e-05,
      "loss": 2.4542,
      "step": 30588
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0759912729263306,
      "learning_rate": 1.3258603337532203e-05,
      "loss": 2.5894,
      "step": 30589
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0044502019882202,
      "learning_rate": 1.3258214071458963e-05,
      "loss": 2.5521,
      "step": 30590
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9821599125862122,
      "learning_rate": 1.3257824799862165e-05,
      "loss": 2.3342,
      "step": 30591
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9796676635742188,
      "learning_rate": 1.325743552274247e-05,
      "loss": 1.9711,
      "step": 30592
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.2265774011611938,
      "learning_rate": 1.3257046240100538e-05,
      "loss": 2.2655,
      "step": 30593
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0220363140106201,
      "learning_rate": 1.3256656951937027e-05,
      "loss": 2.5863,
      "step": 30594
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.2561414241790771,
      "learning_rate": 1.32562676582526e-05,
      "loss": 2.4963,
      "step": 30595
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.191078543663025,
      "learning_rate": 1.3255878359047911e-05,
      "loss": 2.3801,
      "step": 30596
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0716333389282227,
      "learning_rate": 1.3255489054323625e-05,
      "loss": 2.4546,
      "step": 30597
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0109648704528809,
      "learning_rate": 1.3255099744080402e-05,
      "loss": 2.2497,
      "step": 30598
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.018922209739685,
      "learning_rate": 1.3254710428318899e-05,
      "loss": 2.4661,
      "step": 30599
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.022214651107788,
      "learning_rate": 1.325432110703978e-05,
      "loss": 2.1695,
      "step": 30600
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0627901554107666,
      "learning_rate": 1.3253931780243705e-05,
      "loss": 2.494,
      "step": 30601
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0245088338851929,
      "learning_rate": 1.3253542447931329e-05,
      "loss": 2.4189,
      "step": 30602
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9531190395355225,
      "learning_rate": 1.3253153110103316e-05,
      "loss": 2.4852,
      "step": 30603
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.4639909267425537,
      "learning_rate": 1.3252763766760324e-05,
      "loss": 2.1775,
      "step": 30604
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9667482376098633,
      "learning_rate": 1.3252374417903013e-05,
      "loss": 2.2888,
      "step": 30605
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0027563571929932,
      "learning_rate": 1.3251985063532047e-05,
      "loss": 2.3627,
      "step": 30606
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.03842031955719,
      "learning_rate": 1.325159570364808e-05,
      "loss": 2.3129,
      "step": 30607
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0446358919143677,
      "learning_rate": 1.3251206338251777e-05,
      "loss": 2.4499,
      "step": 30608
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.617671489715576,
      "learning_rate": 1.3250816967343795e-05,
      "loss": 2.4759,
      "step": 30609
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0045292377471924,
      "learning_rate": 1.3250427590924797e-05,
      "loss": 2.5368,
      "step": 30610
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0415382385253906,
      "learning_rate": 1.3250038208995441e-05,
      "loss": 2.5986,
      "step": 30611
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9897733330726624,
      "learning_rate": 1.3249648821556384e-05,
      "loss": 2.2657,
      "step": 30612
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0499931573867798,
      "learning_rate": 1.3249259428608293e-05,
      "loss": 2.4414,
      "step": 30613
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9929153919219971,
      "learning_rate": 1.3248870030151828e-05,
      "loss": 2.3449,
      "step": 30614
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9990679621696472,
      "learning_rate": 1.3248480626187642e-05,
      "loss": 2.5034,
      "step": 30615
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0322473049163818,
      "learning_rate": 1.3248091216716396e-05,
      "loss": 2.4313,
      "step": 30616
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1498444080352783,
      "learning_rate": 1.324770180173876e-05,
      "loss": 2.2154,
      "step": 30617
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0460022687911987,
      "learning_rate": 1.3247312381255381e-05,
      "loss": 2.4119,
      "step": 30618
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.013583779335022,
      "learning_rate": 1.3246922955266928e-05,
      "loss": 2.3692,
      "step": 30619
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9879340529441833,
      "learning_rate": 1.3246533523774061e-05,
      "loss": 2.4776,
      "step": 30620
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.079097032546997,
      "learning_rate": 1.3246144086777437e-05,
      "loss": 2.3304,
      "step": 30621
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9800634384155273,
      "learning_rate": 1.3245754644277716e-05,
      "loss": 2.4464,
      "step": 30622
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9922298789024353,
      "learning_rate": 1.324536519627556e-05,
      "loss": 2.4025,
      "step": 30623
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0551526546478271,
      "learning_rate": 1.3244975742771626e-05,
      "loss": 2.4238,
      "step": 30624
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0279929637908936,
      "learning_rate": 1.3244586283766578e-05,
      "loss": 2.4525,
      "step": 30625
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0848252773284912,
      "learning_rate": 1.3244196819261076e-05,
      "loss": 2.5702,
      "step": 30626
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0330545902252197,
      "learning_rate": 1.3243807349255782e-05,
      "loss": 2.2144,
      "step": 30627
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.023484706878662,
      "learning_rate": 1.324341787375135e-05,
      "loss": 2.4665,
      "step": 30628
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1226837635040283,
      "learning_rate": 1.3243028392748446e-05,
      "loss": 2.3543,
      "step": 30629
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0927584171295166,
      "learning_rate": 1.3242638906247724e-05,
      "loss": 2.3674,
      "step": 30630
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.056942343711853,
      "learning_rate": 1.3242249414249853e-05,
      "loss": 2.4624,
      "step": 30631
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.063273549079895,
      "learning_rate": 1.3241859916755484e-05,
      "loss": 2.443,
      "step": 30632
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0398998260498047,
      "learning_rate": 1.3241470413765284e-05,
      "loss": 2.3975,
      "step": 30633
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.120786428451538,
      "learning_rate": 1.3241080905279915e-05,
      "loss": 2.5359,
      "step": 30634
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9805760383605957,
      "learning_rate": 1.3240691391300032e-05,
      "loss": 2.151,
      "step": 30635
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0315542221069336,
      "learning_rate": 1.3240301871826293e-05,
      "loss": 2.66,
      "step": 30636
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9804006814956665,
      "learning_rate": 1.3239912346859366e-05,
      "loss": 2.308,
      "step": 30637
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.240935206413269,
      "learning_rate": 1.3239522816399909e-05,
      "loss": 2.3977,
      "step": 30638
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0319007635116577,
      "learning_rate": 1.3239133280448577e-05,
      "loss": 2.3046,
      "step": 30639
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0474274158477783,
      "learning_rate": 1.3238743739006035e-05,
      "loss": 2.2922,
      "step": 30640
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0988166332244873,
      "learning_rate": 1.3238354192072946e-05,
      "loss": 2.3435,
      "step": 30641
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9892976880073547,
      "learning_rate": 1.3237964639649963e-05,
      "loss": 2.4232,
      "step": 30642
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0526846647262573,
      "learning_rate": 1.3237575081737753e-05,
      "loss": 2.3586,
      "step": 30643
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0184824466705322,
      "learning_rate": 1.3237185518336972e-05,
      "loss": 2.4221,
      "step": 30644
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0890370607376099,
      "learning_rate": 1.3236795949448282e-05,
      "loss": 2.3828,
      "step": 30645
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9757750034332275,
      "learning_rate": 1.3236406375072346e-05,
      "loss": 2.4746,
      "step": 30646
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0482953786849976,
      "learning_rate": 1.3236016795209824e-05,
      "loss": 2.424,
      "step": 30647
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.172858715057373,
      "learning_rate": 1.323562720986137e-05,
      "loss": 2.3541,
      "step": 30648
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0673577785491943,
      "learning_rate": 1.3235237619027652e-05,
      "loss": 2.4035,
      "step": 30649
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0284284353256226,
      "learning_rate": 1.3234848022709326e-05,
      "loss": 2.3144,
      "step": 30650
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.7075821161270142,
      "learning_rate": 1.3234458420907053e-05,
      "loss": 2.5581,
      "step": 30651
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.135259985923767,
      "learning_rate": 1.3234068813621497e-05,
      "loss": 2.5011,
      "step": 30652
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.039837121963501,
      "learning_rate": 1.3233679200853314e-05,
      "loss": 2.409,
      "step": 30653
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1234849691390991,
      "learning_rate": 1.323328958260317e-05,
      "loss": 2.1612,
      "step": 30654
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0318347215652466,
      "learning_rate": 1.3232899958871719e-05,
      "loss": 2.3662,
      "step": 30655
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0485306978225708,
      "learning_rate": 1.3232510329659624e-05,
      "loss": 2.4719,
      "step": 30656
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0537986755371094,
      "learning_rate": 1.3232120694967548e-05,
      "loss": 2.1882,
      "step": 30657
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1491225957870483,
      "learning_rate": 1.323173105479615e-05,
      "loss": 2.3534,
      "step": 30658
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0903536081314087,
      "learning_rate": 1.3231341409146086e-05,
      "loss": 2.5352,
      "step": 30659
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0547348260879517,
      "learning_rate": 1.3230951758018026e-05,
      "loss": 2.4137,
      "step": 30660
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0252875089645386,
      "learning_rate": 1.3230562101412623e-05,
      "loss": 2.383,
      "step": 30661
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.041225790977478,
      "learning_rate": 1.3230172439330539e-05,
      "loss": 2.3973,
      "step": 30662
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0807347297668457,
      "learning_rate": 1.3229782771772434e-05,
      "loss": 2.5165,
      "step": 30663
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0046718120574951,
      "learning_rate": 1.3229393098738974e-05,
      "loss": 2.2616,
      "step": 30664
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.105810523033142,
      "learning_rate": 1.3229003420230816e-05,
      "loss": 2.3188,
      "step": 30665
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.051488995552063,
      "learning_rate": 1.3228613736248617e-05,
      "loss": 2.1465,
      "step": 30666
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.3034747838974,
      "learning_rate": 1.3228224046793041e-05,
      "loss": 2.3,
      "step": 30667
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1004096269607544,
      "learning_rate": 1.3227834351864752e-05,
      "loss": 2.6293,
      "step": 30668
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9653860330581665,
      "learning_rate": 1.3227444651464404e-05,
      "loss": 2.2375,
      "step": 30669
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1526589393615723,
      "learning_rate": 1.322705494559266e-05,
      "loss": 2.3623,
      "step": 30670
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0645747184753418,
      "learning_rate": 1.3226665234250183e-05,
      "loss": 2.2676,
      "step": 30671
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1497126817703247,
      "learning_rate": 1.322627551743763e-05,
      "loss": 2.2259,
      "step": 30672
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9771604537963867,
      "learning_rate": 1.3225885795155666e-05,
      "loss": 2.316,
      "step": 30673
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.091831922531128,
      "learning_rate": 1.322549606740495e-05,
      "loss": 2.332,
      "step": 30674
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.190616488456726,
      "learning_rate": 1.3225106334186142e-05,
      "loss": 2.2245,
      "step": 30675
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0027614831924438,
      "learning_rate": 1.3224716595499901e-05,
      "loss": 2.3508,
      "step": 30676
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1017341613769531,
      "learning_rate": 1.3224326851346889e-05,
      "loss": 2.4264,
      "step": 30677
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.09529447555542,
      "learning_rate": 1.3223937101727768e-05,
      "loss": 2.4593,
      "step": 30678
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0318983793258667,
      "learning_rate": 1.3223547346643199e-05,
      "loss": 2.4445,
      "step": 30679
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0405505895614624,
      "learning_rate": 1.3223157586093844e-05,
      "loss": 2.5228,
      "step": 30680
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1159495115280151,
      "learning_rate": 1.3222767820080358e-05,
      "loss": 2.588,
      "step": 30681
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.2249372005462646,
      "learning_rate": 1.3222378048603407e-05,
      "loss": 2.4149,
      "step": 30682
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.187132716178894,
      "learning_rate": 1.3221988271663648e-05,
      "loss": 2.4351,
      "step": 30683
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0196237564086914,
      "learning_rate": 1.3221598489261745e-05,
      "loss": 2.2083,
      "step": 30684
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.187789797782898,
      "learning_rate": 1.3221208701398356e-05,
      "loss": 2.4944,
      "step": 30685
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.2178338766098022,
      "learning_rate": 1.3220818908074144e-05,
      "loss": 2.1912,
      "step": 30686
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9517617225646973,
      "learning_rate": 1.3220429109289773e-05,
      "loss": 2.3582,
      "step": 30687
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.095707893371582,
      "learning_rate": 1.3220039305045896e-05,
      "loss": 2.4132,
      "step": 30688
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.3491504192352295,
      "learning_rate": 1.3219649495343179e-05,
      "loss": 2.3419,
      "step": 30689
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0145398378372192,
      "learning_rate": 1.3219259680182282e-05,
      "loss": 2.5868,
      "step": 30690
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0235260725021362,
      "learning_rate": 1.3218869859563865e-05,
      "loss": 2.2834,
      "step": 30691
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1080329418182373,
      "learning_rate": 1.3218480033488587e-05,
      "loss": 2.4655,
      "step": 30692
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9690984487533569,
      "learning_rate": 1.3218090201957116e-05,
      "loss": 2.2146,
      "step": 30693
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0885109901428223,
      "learning_rate": 1.3217700364970106e-05,
      "loss": 2.2496,
      "step": 30694
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0212939977645874,
      "learning_rate": 1.321731052252822e-05,
      "loss": 2.4587,
      "step": 30695
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.2684727907180786,
      "learning_rate": 1.321692067463212e-05,
      "loss": 2.459,
      "step": 30696
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9813371300697327,
      "learning_rate": 1.3216530821282465e-05,
      "loss": 2.2143,
      "step": 30697
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0430161952972412,
      "learning_rate": 1.3216140962479912e-05,
      "loss": 2.2551,
      "step": 30698
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.107008934020996,
      "learning_rate": 1.3215751098225131e-05,
      "loss": 2.3472,
      "step": 30699
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1089438199996948,
      "learning_rate": 1.3215361228518782e-05,
      "loss": 2.4306,
      "step": 30700
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.095662236213684,
      "learning_rate": 1.3214971353361517e-05,
      "loss": 2.3097,
      "step": 30701
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.113460659980774,
      "learning_rate": 1.3214581472754004e-05,
      "loss": 2.5374,
      "step": 30702
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.3007535934448242,
      "learning_rate": 1.32141915866969e-05,
      "loss": 2.583,
      "step": 30703
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1559603214263916,
      "learning_rate": 1.3213801695190873e-05,
      "loss": 2.227,
      "step": 30704
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0309579372406006,
      "learning_rate": 1.3213411798236575e-05,
      "loss": 2.3687,
      "step": 30705
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1311376094818115,
      "learning_rate": 1.3213021895834674e-05,
      "loss": 2.396,
      "step": 30706
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0111665725708008,
      "learning_rate": 1.3212631987985829e-05,
      "loss": 2.1465,
      "step": 30707
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.951533317565918,
      "learning_rate": 1.3212242074690698e-05,
      "loss": 2.499,
      "step": 30708
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.077684760093689,
      "learning_rate": 1.3211852155949945e-05,
      "loss": 2.3444,
      "step": 30709
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.246546745300293,
      "learning_rate": 1.321146223176423e-05,
      "loss": 2.2562,
      "step": 30710
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1286660432815552,
      "learning_rate": 1.3211072302134214e-05,
      "loss": 2.4356,
      "step": 30711
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.2345603704452515,
      "learning_rate": 1.3210682367060557e-05,
      "loss": 2.2944,
      "step": 30712
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0106487274169922,
      "learning_rate": 1.3210292426543925e-05,
      "loss": 2.5503,
      "step": 30713
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1789947748184204,
      "learning_rate": 1.3209902480584974e-05,
      "loss": 2.5331,
      "step": 30714
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9995407462120056,
      "learning_rate": 1.3209512529184365e-05,
      "loss": 2.3066,
      "step": 30715
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0783041715621948,
      "learning_rate": 1.320912257234276e-05,
      "loss": 2.1925,
      "step": 30716
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0584492683410645,
      "learning_rate": 1.3208732610060824e-05,
      "loss": 2.2108,
      "step": 30717
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.232721209526062,
      "learning_rate": 1.3208342642339211e-05,
      "loss": 2.4691,
      "step": 30718
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0515495538711548,
      "learning_rate": 1.3207952669178588e-05,
      "loss": 2.3407,
      "step": 30719
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9570008516311646,
      "learning_rate": 1.3207562690579616e-05,
      "loss": 2.5675,
      "step": 30720
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0320048332214355,
      "learning_rate": 1.320717270654295e-05,
      "loss": 2.3475,
      "step": 30721
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1109744310379028,
      "learning_rate": 1.3206782717069258e-05,
      "loss": 2.4738,
      "step": 30722
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.3111908435821533,
      "learning_rate": 1.3206392722159196e-05,
      "loss": 2.3061,
      "step": 30723
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1521177291870117,
      "learning_rate": 1.3206002721813431e-05,
      "loss": 2.3754,
      "step": 30724
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.108386754989624,
      "learning_rate": 1.320561271603262e-05,
      "loss": 2.3409,
      "step": 30725
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.3214126825332642,
      "learning_rate": 1.320522270481742e-05,
      "loss": 2.3849,
      "step": 30726
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1544581651687622,
      "learning_rate": 1.3204832688168503e-05,
      "loss": 2.5508,
      "step": 30727
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0417673587799072,
      "learning_rate": 1.3204442666086522e-05,
      "loss": 2.4064,
      "step": 30728
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0391310453414917,
      "learning_rate": 1.320405263857214e-05,
      "loss": 2.385,
      "step": 30729
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0660593509674072,
      "learning_rate": 1.3203662605626018e-05,
      "loss": 2.2098,
      "step": 30730
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9914316534996033,
      "learning_rate": 1.3203272567248816e-05,
      "loss": 2.2673,
      "step": 30731
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0597617626190186,
      "learning_rate": 1.32028825234412e-05,
      "loss": 2.2961,
      "step": 30732
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.2015715837478638,
      "learning_rate": 1.3202492474203829e-05,
      "loss": 2.2622,
      "step": 30733
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0685878992080688,
      "learning_rate": 1.3202102419537361e-05,
      "loss": 2.5814,
      "step": 30734
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0466041564941406,
      "learning_rate": 1.320171235944246e-05,
      "loss": 2.6833,
      "step": 30735
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.049952507019043,
      "learning_rate": 1.3201322293919788e-05,
      "loss": 2.1862,
      "step": 30736
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0659153461456299,
      "learning_rate": 1.3200932222970004e-05,
      "loss": 2.3982,
      "step": 30737
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0309888124465942,
      "learning_rate": 1.3200542146593771e-05,
      "loss": 2.5204,
      "step": 30738
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0762860774993896,
      "learning_rate": 1.3200152064791754e-05,
      "loss": 2.451,
      "step": 30739
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0864953994750977,
      "learning_rate": 1.3199761977564604e-05,
      "loss": 2.2938,
      "step": 30740
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1590667963027954,
      "learning_rate": 1.319937188491299e-05,
      "loss": 2.3636,
      "step": 30741
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0487289428710938,
      "learning_rate": 1.3198981786837573e-05,
      "loss": 2.0261,
      "step": 30742
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.3494197130203247,
      "learning_rate": 1.3198591683339012e-05,
      "loss": 2.452,
      "step": 30743
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1759482622146606,
      "learning_rate": 1.3198201574417971e-05,
      "loss": 2.4336,
      "step": 30744
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.053929328918457,
      "learning_rate": 1.3197811460075108e-05,
      "loss": 2.2409,
      "step": 30745
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0399123430252075,
      "learning_rate": 1.3197421340311088e-05,
      "loss": 2.5797,
      "step": 30746
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.133057951927185,
      "learning_rate": 1.3197031215126567e-05,
      "loss": 2.4138,
      "step": 30747
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.4326565265655518,
      "learning_rate": 1.3196641084522213e-05,
      "loss": 2.5988,
      "step": 30748
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1430141925811768,
      "learning_rate": 1.3196250948498681e-05,
      "loss": 2.3133,
      "step": 30749
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.109565019607544,
      "learning_rate": 1.3195860807056637e-05,
      "loss": 2.4404,
      "step": 30750
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0265060663223267,
      "learning_rate": 1.3195470660196743e-05,
      "loss": 2.3848,
      "step": 30751
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1308187246322632,
      "learning_rate": 1.3195080507919658e-05,
      "loss": 2.7037,
      "step": 30752
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0580517053604126,
      "learning_rate": 1.3194690350226043e-05,
      "loss": 2.5873,
      "step": 30753
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0343433618545532,
      "learning_rate": 1.3194300187116558e-05,
      "loss": 2.0374,
      "step": 30754
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.2423499822616577,
      "learning_rate": 1.3193910018591872e-05,
      "loss": 2.6368,
      "step": 30755
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.2345186471939087,
      "learning_rate": 1.3193519844652636e-05,
      "loss": 2.3313,
      "step": 30756
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1268699169158936,
      "learning_rate": 1.3193129665299518e-05,
      "loss": 2.5847,
      "step": 30757
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.2398048639297485,
      "learning_rate": 1.3192739480533177e-05,
      "loss": 2.4532,
      "step": 30758
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.2001748085021973,
      "learning_rate": 1.3192349290354278e-05,
      "loss": 2.4349,
      "step": 30759
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1382205486297607,
      "learning_rate": 1.3191959094763478e-05,
      "loss": 2.3187,
      "step": 30760
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.2090785503387451,
      "learning_rate": 1.319156889376144e-05,
      "loss": 2.3657,
      "step": 30761
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0598690509796143,
      "learning_rate": 1.3191178687348827e-05,
      "loss": 2.4592,
      "step": 30762
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1971973180770874,
      "learning_rate": 1.3190788475526297e-05,
      "loss": 2.2602,
      "step": 30763
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1307417154312134,
      "learning_rate": 1.3190398258294516e-05,
      "loss": 2.4122,
      "step": 30764
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1213687658309937,
      "learning_rate": 1.3190008035654142e-05,
      "loss": 2.1096,
      "step": 30765
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0850658416748047,
      "learning_rate": 1.3189617807605843e-05,
      "loss": 2.2385,
      "step": 30766
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9998976588249207,
      "learning_rate": 1.3189227574150271e-05,
      "loss": 2.2406,
      "step": 30767
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1044070720672607,
      "learning_rate": 1.3188837335288093e-05,
      "loss": 2.2585,
      "step": 30768
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1847963333129883,
      "learning_rate": 1.318844709101997e-05,
      "loss": 2.6489,
      "step": 30769
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0118260383605957,
      "learning_rate": 1.3188056841346562e-05,
      "loss": 2.427,
      "step": 30770
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1679680347442627,
      "learning_rate": 1.3187666586268533e-05,
      "loss": 2.3476,
      "step": 30771
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9746150374412537,
      "learning_rate": 1.3187276325786545e-05,
      "loss": 2.3936,
      "step": 30772
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.3453096151351929,
      "learning_rate": 1.3186886059901256e-05,
      "loss": 2.517,
      "step": 30773
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1206060647964478,
      "learning_rate": 1.318649578861333e-05,
      "loss": 2.4873,
      "step": 30774
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.026926040649414,
      "learning_rate": 1.3186105511923426e-05,
      "loss": 2.3164,
      "step": 30775
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.6785603761672974,
      "learning_rate": 1.3185715229832208e-05,
      "loss": 2.3169,
      "step": 30776
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0306867361068726,
      "learning_rate": 1.318532494234034e-05,
      "loss": 2.2808,
      "step": 30777
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.013066053390503,
      "learning_rate": 1.318493464944848e-05,
      "loss": 2.2944,
      "step": 30778
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0329350233078003,
      "learning_rate": 1.318454435115729e-05,
      "loss": 2.503,
      "step": 30779
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.050734043121338,
      "learning_rate": 1.3184154047467434e-05,
      "loss": 2.1395,
      "step": 30780
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.200274109840393,
      "learning_rate": 1.3183763738379572e-05,
      "loss": 2.2573,
      "step": 30781
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.2381558418273926,
      "learning_rate": 1.3183373423894365e-05,
      "loss": 2.4939,
      "step": 30782
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.161510705947876,
      "learning_rate": 1.3182983104012477e-05,
      "loss": 2.5401,
      "step": 30783
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0198798179626465,
      "learning_rate": 1.3182592778734566e-05,
      "loss": 2.2917,
      "step": 30784
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1327100992202759,
      "learning_rate": 1.3182202448061296e-05,
      "loss": 2.422,
      "step": 30785
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9745381474494934,
      "learning_rate": 1.3181812111993333e-05,
      "loss": 2.4145,
      "step": 30786
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0914734601974487,
      "learning_rate": 1.3181421770531328e-05,
      "loss": 2.3369,
      "step": 30787
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9826573729515076,
      "learning_rate": 1.3181031423675952e-05,
      "loss": 2.6581,
      "step": 30788
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.379846453666687,
      "learning_rate": 1.3180641071427864e-05,
      "loss": 2.3759,
      "step": 30789
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0748728513717651,
      "learning_rate": 1.3180250713787724e-05,
      "loss": 2.3011,
      "step": 30790
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9926534295082092,
      "learning_rate": 1.3179860350756196e-05,
      "loss": 2.3419,
      "step": 30791
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0238186120986938,
      "learning_rate": 1.3179469982333943e-05,
      "loss": 2.4485,
      "step": 30792
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1738084554672241,
      "learning_rate": 1.3179079608521624e-05,
      "loss": 2.3252,
      "step": 30793
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.230660080909729,
      "learning_rate": 1.31786892293199e-05,
      "loss": 2.177,
      "step": 30794
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.109230875968933,
      "learning_rate": 1.3178298844729433e-05,
      "loss": 2.5297,
      "step": 30795
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0799119472503662,
      "learning_rate": 1.317790845475089e-05,
      "loss": 2.4915,
      "step": 30796
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1055272817611694,
      "learning_rate": 1.3177518059384926e-05,
      "loss": 2.2396,
      "step": 30797
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.026528239250183,
      "learning_rate": 1.3177127658632207e-05,
      "loss": 2.3444,
      "step": 30798
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.2110732793807983,
      "learning_rate": 1.3176737252493395e-05,
      "loss": 2.4333,
      "step": 30799
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.191616415977478,
      "learning_rate": 1.317634684096915e-05,
      "loss": 2.5157,
      "step": 30800
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9603477120399475,
      "learning_rate": 1.3175956424060135e-05,
      "loss": 2.6104,
      "step": 30801
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0155887603759766,
      "learning_rate": 1.3175566001767009e-05,
      "loss": 2.79,
      "step": 30802
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9461056590080261,
      "learning_rate": 1.3175175574090438e-05,
      "loss": 2.5073,
      "step": 30803
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9662023186683655,
      "learning_rate": 1.3174785141031081e-05,
      "loss": 2.0202,
      "step": 30804
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.176310658454895,
      "learning_rate": 1.3174394702589602e-05,
      "loss": 2.2305,
      "step": 30805
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0796010494232178,
      "learning_rate": 1.3174004258766662e-05,
      "loss": 2.4608,
      "step": 30806
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0616858005523682,
      "learning_rate": 1.3173613809562921e-05,
      "loss": 2.2365,
      "step": 30807
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.093867540359497,
      "learning_rate": 1.3173223354979044e-05,
      "loss": 2.5914,
      "step": 30808
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0819758176803589,
      "learning_rate": 1.317283289501569e-05,
      "loss": 2.5599,
      "step": 30809
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.129429578781128,
      "learning_rate": 1.3172442429673525e-05,
      "loss": 2.3228,
      "step": 30810
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.989740788936615,
      "learning_rate": 1.3172051958953208e-05,
      "loss": 2.3435,
      "step": 30811
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.984508216381073,
      "learning_rate": 1.31716614828554e-05,
      "loss": 2.5178,
      "step": 30812
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0859904289245605,
      "learning_rate": 1.3171271001380765e-05,
      "loss": 2.3313,
      "step": 30813
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.057498574256897,
      "learning_rate": 1.3170880514529967e-05,
      "loss": 2.1886,
      "step": 30814
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0218125581741333,
      "learning_rate": 1.3170490022303661e-05,
      "loss": 2.2991,
      "step": 30815
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0471478700637817,
      "learning_rate": 1.3170099524702515e-05,
      "loss": 2.3514,
      "step": 30816
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9694737792015076,
      "learning_rate": 1.3169709021727188e-05,
      "loss": 2.3254,
      "step": 30817
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0153368711471558,
      "learning_rate": 1.3169318513378343e-05,
      "loss": 2.4005,
      "step": 30818
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.15132737159729,
      "learning_rate": 1.3168927999656647e-05,
      "loss": 2.5537,
      "step": 30819
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.974204421043396,
      "learning_rate": 1.3168537480562756e-05,
      "loss": 2.2885,
      "step": 30820
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1641007661819458,
      "learning_rate": 1.3168146956097329e-05,
      "loss": 2.1204,
      "step": 30821
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1641374826431274,
      "learning_rate": 1.3167756426261036e-05,
      "loss": 2.4753,
      "step": 30822
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0924128293991089,
      "learning_rate": 1.3167365891054535e-05,
      "loss": 2.3377,
      "step": 30823
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9617136120796204,
      "learning_rate": 1.3166975350478487e-05,
      "loss": 2.184,
      "step": 30824
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0451319217681885,
      "learning_rate": 1.316658480453356e-05,
      "loss": 2.3012,
      "step": 30825
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9861837029457092,
      "learning_rate": 1.3166194253220408e-05,
      "loss": 2.3453,
      "step": 30826
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.19960355758667,
      "learning_rate": 1.3165803696539696e-05,
      "loss": 2.3745,
      "step": 30827
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0870909690856934,
      "learning_rate": 1.316541313449209e-05,
      "loss": 2.2967,
      "step": 30828
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.2333009243011475,
      "learning_rate": 1.3165022567078247e-05,
      "loss": 2.6085,
      "step": 30829
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0325168371200562,
      "learning_rate": 1.3164631994298831e-05,
      "loss": 2.4506,
      "step": 30830
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.048619270324707,
      "learning_rate": 1.3164241416154504e-05,
      "loss": 2.317,
      "step": 30831
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.121438980102539,
      "learning_rate": 1.3163850832645931e-05,
      "loss": 2.3451,
      "step": 30832
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.142137885093689,
      "learning_rate": 1.3163460243773769e-05,
      "loss": 2.6282,
      "step": 30833
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.021737813949585,
      "learning_rate": 1.3163069649538684e-05,
      "loss": 2.4194,
      "step": 30834
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9990231394767761,
      "learning_rate": 1.3162679049941336e-05,
      "loss": 2.5315,
      "step": 30835
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.297873854637146,
      "learning_rate": 1.3162288444982387e-05,
      "loss": 2.3483,
      "step": 30836
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.3101496696472168,
      "learning_rate": 1.3161897834662502e-05,
      "loss": 2.1422,
      "step": 30837
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.10984468460083,
      "learning_rate": 1.316150721898234e-05,
      "loss": 2.4375,
      "step": 30838
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1699689626693726,
      "learning_rate": 1.3161116597942567e-05,
      "loss": 2.4499,
      "step": 30839
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.151691198348999,
      "learning_rate": 1.3160725971543842e-05,
      "loss": 2.3542,
      "step": 30840
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.223520278930664,
      "learning_rate": 1.3160335339786827e-05,
      "loss": 2.2875,
      "step": 30841
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0866127014160156,
      "learning_rate": 1.3159944702672186e-05,
      "loss": 2.4287,
      "step": 30842
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1321113109588623,
      "learning_rate": 1.315955406020058e-05,
      "loss": 2.3007,
      "step": 30843
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9623576402664185,
      "learning_rate": 1.3159163412372672e-05,
      "loss": 2.2774,
      "step": 30844
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.122981071472168,
      "learning_rate": 1.3158772759189127e-05,
      "loss": 2.3861,
      "step": 30845
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.104049801826477,
      "learning_rate": 1.31583821006506e-05,
      "loss": 2.2441,
      "step": 30846
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1949878931045532,
      "learning_rate": 1.315799143675776e-05,
      "loss": 2.4313,
      "step": 30847
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0491305589675903,
      "learning_rate": 1.3157600767511266e-05,
      "loss": 2.333,
      "step": 30848
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0473523139953613,
      "learning_rate": 1.3157210092911778e-05,
      "loss": 2.443,
      "step": 30849
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9579423069953918,
      "learning_rate": 1.3156819412959964e-05,
      "loss": 2.5197,
      "step": 30850
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1160786151885986,
      "learning_rate": 1.3156428727656482e-05,
      "loss": 2.3082,
      "step": 30851
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.065316081047058,
      "learning_rate": 1.3156038037002002e-05,
      "loss": 2.3201,
      "step": 30852
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1012427806854248,
      "learning_rate": 1.3155647340997176e-05,
      "loss": 2.5183,
      "step": 30853
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.3354051113128662,
      "learning_rate": 1.3155256639642668e-05,
      "loss": 2.3733,
      "step": 30854
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0606969594955444,
      "learning_rate": 1.3154865932939147e-05,
      "loss": 2.1583,
      "step": 30855
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1544016599655151,
      "learning_rate": 1.315447522088727e-05,
      "loss": 2.4277,
      "step": 30856
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1103572845458984,
      "learning_rate": 1.3154084503487701e-05,
      "loss": 2.5466,
      "step": 30857
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.2346302270889282,
      "learning_rate": 1.3153693780741103e-05,
      "loss": 2.2688,
      "step": 30858
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1712555885314941,
      "learning_rate": 1.3153303052648139e-05,
      "loss": 2.3368,
      "step": 30859
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0149257183074951,
      "learning_rate": 1.3152912319209466e-05,
      "loss": 2.2201,
      "step": 30860
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0241737365722656,
      "learning_rate": 1.3152521580425752e-05,
      "loss": 2.1358,
      "step": 30861
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1765635013580322,
      "learning_rate": 1.3152130836297656e-05,
      "loss": 2.3586,
      "step": 30862
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.4971184730529785,
      "learning_rate": 1.3151740086825843e-05,
      "loss": 2.5333,
      "step": 30863
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9321167469024658,
      "learning_rate": 1.3151349332010977e-05,
      "loss": 2.3458,
      "step": 30864
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0941383838653564,
      "learning_rate": 1.3150958571853718e-05,
      "loss": 2.2966,
      "step": 30865
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1089633703231812,
      "learning_rate": 1.3150567806354726e-05,
      "loss": 2.4693,
      "step": 30866
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0339698791503906,
      "learning_rate": 1.3150177035514667e-05,
      "loss": 2.535,
      "step": 30867
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1935158967971802,
      "learning_rate": 1.3149786259334201e-05,
      "loss": 2.2144,
      "step": 30868
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.123163104057312,
      "learning_rate": 1.3149395477813994e-05,
      "loss": 2.4023,
      "step": 30869
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1434205770492554,
      "learning_rate": 1.3149004690954706e-05,
      "loss": 2.4248,
      "step": 30870
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9172873497009277,
      "learning_rate": 1.3148613898756996e-05,
      "loss": 2.3511,
      "step": 30871
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.037597417831421,
      "learning_rate": 1.3148223101221533e-05,
      "loss": 2.3476,
      "step": 30872
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.025100588798523,
      "learning_rate": 1.3147832298348979e-05,
      "loss": 2.2404,
      "step": 30873
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9244888424873352,
      "learning_rate": 1.3147441490139992e-05,
      "loss": 2.3425,
      "step": 30874
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.225486397743225,
      "learning_rate": 1.3147050676595239e-05,
      "loss": 2.209,
      "step": 30875
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1689389944076538,
      "learning_rate": 1.3146659857715376e-05,
      "loss": 2.644,
      "step": 30876
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.072428584098816,
      "learning_rate": 1.3146269033501073e-05,
      "loss": 2.3197,
      "step": 30877
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0987509489059448,
      "learning_rate": 1.3145878203952991e-05,
      "loss": 2.468,
      "step": 30878
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.2080713510513306,
      "learning_rate": 1.3145487369071787e-05,
      "loss": 2.4275,
      "step": 30879
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.394631266593933,
      "learning_rate": 1.314509652885813e-05,
      "loss": 2.1938,
      "step": 30880
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.2030452489852905,
      "learning_rate": 1.314470568331268e-05,
      "loss": 2.373,
      "step": 30881
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0748815536499023,
      "learning_rate": 1.31443148324361e-05,
      "loss": 2.5253,
      "step": 30882
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.950480043888092,
      "learning_rate": 1.3143923976229052e-05,
      "loss": 2.3186,
      "step": 30883
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0863580703735352,
      "learning_rate": 1.3143533114692199e-05,
      "loss": 2.1764,
      "step": 30884
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.094557523727417,
      "learning_rate": 1.3143142247826204e-05,
      "loss": 2.3292,
      "step": 30885
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9929307699203491,
      "learning_rate": 1.3142751375631727e-05,
      "loss": 2.3832,
      "step": 30886
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0427687168121338,
      "learning_rate": 1.3142360498109434e-05,
      "loss": 2.6181,
      "step": 30887
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.102766990661621,
      "learning_rate": 1.3141969615259988e-05,
      "loss": 2.6232,
      "step": 30888
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0448247194290161,
      "learning_rate": 1.3141578727084047e-05,
      "loss": 2.1958,
      "step": 30889
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0351670980453491,
      "learning_rate": 1.314118783358228e-05,
      "loss": 2.178,
      "step": 30890
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.068985939025879,
      "learning_rate": 1.3140796934755347e-05,
      "loss": 2.2899,
      "step": 30891
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0774942636489868,
      "learning_rate": 1.3140406030603909e-05,
      "loss": 2.6032,
      "step": 30892
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9576667547225952,
      "learning_rate": 1.314001512112863e-05,
      "loss": 2.48,
      "step": 30893
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9437291026115417,
      "learning_rate": 1.3139624206330171e-05,
      "loss": 2.3597,
      "step": 30894
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.3998510837554932,
      "learning_rate": 1.3139233286209197e-05,
      "loss": 2.6441,
      "step": 30895
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.055263876914978,
      "learning_rate": 1.3138842360766369e-05,
      "loss": 2.2745,
      "step": 30896
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1859179735183716,
      "learning_rate": 1.3138451430002353e-05,
      "loss": 2.4233,
      "step": 30897
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.06223464012146,
      "learning_rate": 1.313806049391781e-05,
      "loss": 2.2995,
      "step": 30898
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0766758918762207,
      "learning_rate": 1.31376695525134e-05,
      "loss": 2.1714,
      "step": 30899
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.062269687652588,
      "learning_rate": 1.3137278605789787e-05,
      "loss": 2.3648,
      "step": 30900
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1018520593643188,
      "learning_rate": 1.3136887653747639e-05,
      "loss": 2.31,
      "step": 30901
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0842351913452148,
      "learning_rate": 1.313649669638761e-05,
      "loss": 2.2395,
      "step": 30902
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.047365665435791,
      "learning_rate": 1.3136105733710369e-05,
      "loss": 2.5841,
      "step": 30903
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1043701171875,
      "learning_rate": 1.3135714765716578e-05,
      "loss": 2.1889,
      "step": 30904
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.000550627708435,
      "learning_rate": 1.31353237924069e-05,
      "loss": 2.2046,
      "step": 30905
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.140055537223816,
      "learning_rate": 1.3134932813781992e-05,
      "loss": 2.2877,
      "step": 30906
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0255683660507202,
      "learning_rate": 1.3134541829842524e-05,
      "loss": 2.4283,
      "step": 30907
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.2422120571136475,
      "learning_rate": 1.3134150840589158e-05,
      "loss": 2.8238,
      "step": 30908
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0810009241104126,
      "learning_rate": 1.3133759846022551e-05,
      "loss": 2.3752,
      "step": 30909
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0548094511032104,
      "learning_rate": 1.3133368846143372e-05,
      "loss": 2.4246,
      "step": 30910
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0434290170669556,
      "learning_rate": 1.3132977840952284e-05,
      "loss": 2.3699,
      "step": 30911
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0119282007217407,
      "learning_rate": 1.3132586830449945e-05,
      "loss": 2.3188,
      "step": 30912
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0615159273147583,
      "learning_rate": 1.3132195814637021e-05,
      "loss": 2.6493,
      "step": 30913
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0889571905136108,
      "learning_rate": 1.3131804793514173e-05,
      "loss": 2.3888,
      "step": 30914
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0799839496612549,
      "learning_rate": 1.3131413767082068e-05,
      "loss": 2.0521,
      "step": 30915
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0754613876342773,
      "learning_rate": 1.3131022735341365e-05,
      "loss": 2.1775,
      "step": 30916
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0662821531295776,
      "learning_rate": 1.3130631698292731e-05,
      "loss": 2.5469,
      "step": 30917
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1305259466171265,
      "learning_rate": 1.3130240655936822e-05,
      "loss": 2.4749,
      "step": 30918
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0226411819458008,
      "learning_rate": 1.3129849608274305e-05,
      "loss": 2.2316,
      "step": 30919
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1098018884658813,
      "learning_rate": 1.3129458555305842e-05,
      "loss": 2.1976,
      "step": 30920
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0604952573776245,
      "learning_rate": 1.31290674970321e-05,
      "loss": 2.2948,
      "step": 30921
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0141764879226685,
      "learning_rate": 1.3128676433453738e-05,
      "loss": 2.4291,
      "step": 30922
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.138174295425415,
      "learning_rate": 1.3128285364571419e-05,
      "loss": 2.5702,
      "step": 30923
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1558029651641846,
      "learning_rate": 1.3127894290385807e-05,
      "loss": 2.3358,
      "step": 30924
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.074718952178955,
      "learning_rate": 1.3127503210897566e-05,
      "loss": 2.2434,
      "step": 30925
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0347301959991455,
      "learning_rate": 1.3127112126107356e-05,
      "loss": 2.5425,
      "step": 30926
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9870879054069519,
      "learning_rate": 1.312672103601584e-05,
      "loss": 2.2955,
      "step": 30927
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1811704635620117,
      "learning_rate": 1.3126329940623684e-05,
      "loss": 2.5161,
      "step": 30928
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0114303827285767,
      "learning_rate": 1.3125938839931552e-05,
      "loss": 2.4416,
      "step": 30929
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.051151156425476,
      "learning_rate": 1.3125547733940103e-05,
      "loss": 2.5671,
      "step": 30930
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0417762994766235,
      "learning_rate": 1.312515662265e-05,
      "loss": 2.413,
      "step": 30931
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0513174533843994,
      "learning_rate": 1.3124765506061912e-05,
      "loss": 2.5012,
      "step": 30932
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.927852213382721,
      "learning_rate": 1.3124374384176494e-05,
      "loss": 2.555,
      "step": 30933
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0397748947143555,
      "learning_rate": 1.3123983256994413e-05,
      "loss": 2.3561,
      "step": 30934
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.2242543697357178,
      "learning_rate": 1.312359212451633e-05,
      "loss": 2.3221,
      "step": 30935
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1688644886016846,
      "learning_rate": 1.3123200986742914e-05,
      "loss": 2.6088,
      "step": 30936
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0884450674057007,
      "learning_rate": 1.3122809843674826e-05,
      "loss": 2.3101,
      "step": 30937
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1022182703018188,
      "learning_rate": 1.3122418695312722e-05,
      "loss": 2.3585,
      "step": 30938
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1439005136489868,
      "learning_rate": 1.3122027541657274e-05,
      "loss": 2.4536,
      "step": 30939
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.099345088005066,
      "learning_rate": 1.3121636382709137e-05,
      "loss": 2.3586,
      "step": 30940
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9598014950752258,
      "learning_rate": 1.3121245218468981e-05,
      "loss": 2.3973,
      "step": 30941
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0779775381088257,
      "learning_rate": 1.3120854048937468e-05,
      "loss": 2.4307,
      "step": 30942
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.097381353378296,
      "learning_rate": 1.3120462874115259e-05,
      "loss": 2.5577,
      "step": 30943
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0969656705856323,
      "learning_rate": 1.3120071694003019e-05,
      "loss": 2.6848,
      "step": 30944
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0149482488632202,
      "learning_rate": 1.3119680508601407e-05,
      "loss": 2.6078,
      "step": 30945
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9878109097480774,
      "learning_rate": 1.311928931791109e-05,
      "loss": 2.3538,
      "step": 30946
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.06040358543396,
      "learning_rate": 1.311889812193273e-05,
      "loss": 2.435,
      "step": 30947
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.021435260772705,
      "learning_rate": 1.3118506920666993e-05,
      "loss": 2.5321,
      "step": 30948
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0785143375396729,
      "learning_rate": 1.3118115714114538e-05,
      "loss": 2.5363,
      "step": 30949
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0116382837295532,
      "learning_rate": 1.3117724502276032e-05,
      "loss": 2.2571,
      "step": 30950
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0089536905288696,
      "learning_rate": 1.3117333285152137e-05,
      "loss": 2.5354,
      "step": 30951
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1007102727890015,
      "learning_rate": 1.3116942062743511e-05,
      "loss": 2.3334,
      "step": 30952
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.069082498550415,
      "learning_rate": 1.3116550835050823e-05,
      "loss": 2.41,
      "step": 30953
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.030237078666687,
      "learning_rate": 1.3116159602074737e-05,
      "loss": 2.4001,
      "step": 30954
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1950339078903198,
      "learning_rate": 1.3115768363815913e-05,
      "loss": 2.2673,
      "step": 30955
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0103520154953003,
      "learning_rate": 1.3115377120275014e-05,
      "loss": 2.2117,
      "step": 30956
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.01472806930542,
      "learning_rate": 1.3114985871452709e-05,
      "loss": 2.2131,
      "step": 30957
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1248632669448853,
      "learning_rate": 1.3114594617349652e-05,
      "loss": 2.4393,
      "step": 30958
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0805391073226929,
      "learning_rate": 1.3114203357966514e-05,
      "loss": 2.3176,
      "step": 30959
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.125018835067749,
      "learning_rate": 1.3113812093303955e-05,
      "loss": 2.3345,
      "step": 30960
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0094348192214966,
      "learning_rate": 1.3113420823362639e-05,
      "loss": 2.2319,
      "step": 30961
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.123340129852295,
      "learning_rate": 1.3113029548143229e-05,
      "loss": 2.4103,
      "step": 30962
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0720460414886475,
      "learning_rate": 1.3112638267646385e-05,
      "loss": 2.1222,
      "step": 30963
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0110419988632202,
      "learning_rate": 1.311224698187278e-05,
      "loss": 2.3888,
      "step": 30964
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.133623719215393,
      "learning_rate": 1.3111855690823068e-05,
      "loss": 2.3985,
      "step": 30965
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1145888566970825,
      "learning_rate": 1.3111464394497912e-05,
      "loss": 2.3277,
      "step": 30966
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0161579847335815,
      "learning_rate": 1.3111073092897983e-05,
      "loss": 2.4245,
      "step": 30967
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.048333764076233,
      "learning_rate": 1.311068178602394e-05,
      "loss": 2.2689,
      "step": 30968
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0563040971755981,
      "learning_rate": 1.3110290473876446e-05,
      "loss": 2.2523,
      "step": 30969
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9567862749099731,
      "learning_rate": 1.3109899156456166e-05,
      "loss": 2.687,
      "step": 30970
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9793945550918579,
      "learning_rate": 1.3109507833763759e-05,
      "loss": 2.4156,
      "step": 30971
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0324586629867554,
      "learning_rate": 1.3109116505799895e-05,
      "loss": 2.5007,
      "step": 30972
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.122145652770996,
      "learning_rate": 1.3108725172565232e-05,
      "loss": 2.4312,
      "step": 30973
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1347358226776123,
      "learning_rate": 1.3108333834060436e-05,
      "loss": 2.5234,
      "step": 30974
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1741169691085815,
      "learning_rate": 1.310794249028617e-05,
      "loss": 2.4309,
      "step": 30975
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.4819636344909668,
      "learning_rate": 1.3107551141243095e-05,
      "loss": 2.438,
      "step": 30976
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.7224411964416504,
      "learning_rate": 1.3107159786931882e-05,
      "loss": 2.3852,
      "step": 30977
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1131211519241333,
      "learning_rate": 1.3106768427353188e-05,
      "loss": 2.1832,
      "step": 30978
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0835705995559692,
      "learning_rate": 1.3106377062507673e-05,
      "loss": 2.6562,
      "step": 30979
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.033595323562622,
      "learning_rate": 1.310598569239601e-05,
      "loss": 2.4352,
      "step": 30980
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9478339552879333,
      "learning_rate": 1.3105594317018854e-05,
      "loss": 2.3437,
      "step": 30981
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.2745819091796875,
      "learning_rate": 1.3105202936376875e-05,
      "loss": 2.5666,
      "step": 30982
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9587855339050293,
      "learning_rate": 1.3104811550470734e-05,
      "loss": 2.1745,
      "step": 30983
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9952520728111267,
      "learning_rate": 1.3104420159301094e-05,
      "loss": 2.4422,
      "step": 30984
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1534851789474487,
      "learning_rate": 1.3104028762868616e-05,
      "loss": 2.4813,
      "step": 30985
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0918338298797607,
      "learning_rate": 1.3103637361173968e-05,
      "loss": 2.4582,
      "step": 30986
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0794897079467773,
      "learning_rate": 1.3103245954217814e-05,
      "loss": 2.3392,
      "step": 30987
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0611497163772583,
      "learning_rate": 1.3102854542000813e-05,
      "loss": 2.1243,
      "step": 30988
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0377626419067383,
      "learning_rate": 1.310246312452363e-05,
      "loss": 2.5172,
      "step": 30989
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.793876051902771,
      "learning_rate": 1.3102071701786931e-05,
      "loss": 2.3817,
      "step": 30990
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1084442138671875,
      "learning_rate": 1.310168027379138e-05,
      "loss": 2.4626,
      "step": 30991
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0753411054611206,
      "learning_rate": 1.3101288840537634e-05,
      "loss": 2.2637,
      "step": 30992
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1153874397277832,
      "learning_rate": 1.3100897402026362e-05,
      "loss": 2.5378,
      "step": 30993
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.6696594953536987,
      "learning_rate": 1.3100505958258229e-05,
      "loss": 2.3183,
      "step": 30994
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9876852035522461,
      "learning_rate": 1.3100114509233894e-05,
      "loss": 2.5745,
      "step": 30995
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.017971158027649,
      "learning_rate": 1.3099723054954023e-05,
      "loss": 2.2455,
      "step": 30996
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1652017831802368,
      "learning_rate": 1.3099331595419283e-05,
      "loss": 2.3747,
      "step": 30997
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.006088376045227,
      "learning_rate": 1.3098940130630331e-05,
      "loss": 2.3134,
      "step": 30998
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.955545961856842,
      "learning_rate": 1.3098548660587832e-05,
      "loss": 2.3268,
      "step": 30999
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9736412167549133,
      "learning_rate": 1.3098157185292455e-05,
      "loss": 2.4341,
      "step": 31000
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.009482502937317,
      "learning_rate": 1.3097765704744857e-05,
      "loss": 2.1179,
      "step": 31001
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0425761938095093,
      "learning_rate": 1.309737421894571e-05,
      "loss": 2.4735,
      "step": 31002
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0761233568191528,
      "learning_rate": 1.309698272789567e-05,
      "loss": 2.5648,
      "step": 31003
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0543195009231567,
      "learning_rate": 1.30965912315954e-05,
      "loss": 2.4932,
      "step": 31004
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.216673731803894,
      "learning_rate": 1.3096199730045572e-05,
      "loss": 2.2842,
      "step": 31005
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.2893656492233276,
      "learning_rate": 1.309580822324684e-05,
      "loss": 1.991,
      "step": 31006
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0578980445861816,
      "learning_rate": 1.3095416711199873e-05,
      "loss": 2.3854,
      "step": 31007
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.069743037223816,
      "learning_rate": 1.3095025193905334e-05,
      "loss": 2.6333,
      "step": 31008
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.028072476387024,
      "learning_rate": 1.309463367136389e-05,
      "loss": 2.3842,
      "step": 31009
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1525812149047852,
      "learning_rate": 1.3094242143576198e-05,
      "loss": 2.2306,
      "step": 31010
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.182889461517334,
      "learning_rate": 1.3093850610542926e-05,
      "loss": 2.368,
      "step": 31011
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0665370225906372,
      "learning_rate": 1.3093459072264738e-05,
      "loss": 2.0397,
      "step": 31012
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0402610301971436,
      "learning_rate": 1.3093067528742296e-05,
      "loss": 2.5956,
      "step": 31013
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0832217931747437,
      "learning_rate": 1.3092675979976263e-05,
      "loss": 2.3478,
      "step": 31014
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1499223709106445,
      "learning_rate": 1.3092284425967305e-05,
      "loss": 2.3888,
      "step": 31015
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.4329910278320312,
      "learning_rate": 1.309189286671609e-05,
      "loss": 2.3721,
      "step": 31016
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0659703016281128,
      "learning_rate": 1.3091501302223271e-05,
      "loss": 2.3562,
      "step": 31017
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0454381704330444,
      "learning_rate": 1.3091109732489519e-05,
      "loss": 2.2798,
      "step": 31018
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1097335815429688,
      "learning_rate": 1.3090718157515498e-05,
      "loss": 2.2466,
      "step": 31019
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0991320610046387,
      "learning_rate": 1.3090326577301868e-05,
      "loss": 2.3845,
      "step": 31020
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.2245122194290161,
      "learning_rate": 1.3089934991849296e-05,
      "loss": 2.5163,
      "step": 31021
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0886495113372803,
      "learning_rate": 1.3089543401158444e-05,
      "loss": 2.4974,
      "step": 31022
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0417084693908691,
      "learning_rate": 1.3089151805229983e-05,
      "loss": 2.1685,
      "step": 31023
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1170949935913086,
      "learning_rate": 1.3088760204064563e-05,
      "loss": 2.4245,
      "step": 31024
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1117316484451294,
      "learning_rate": 1.3088368597662857e-05,
      "loss": 2.442,
      "step": 31025
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0781219005584717,
      "learning_rate": 1.308797698602553e-05,
      "loss": 2.3088,
      "step": 31026
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0979987382888794,
      "learning_rate": 1.3087585369153242e-05,
      "loss": 2.2773,
      "step": 31027
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.000762701034546,
      "learning_rate": 1.3087193747046656e-05,
      "loss": 2.3056,
      "step": 31028
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0622789859771729,
      "learning_rate": 1.3086802119706444e-05,
      "loss": 2.1076,
      "step": 31029
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0387815237045288,
      "learning_rate": 1.3086410487133259e-05,
      "loss": 2.539,
      "step": 31030
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1485068798065186,
      "learning_rate": 1.308601884932777e-05,
      "loss": 2.4081,
      "step": 31031
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1237863302230835,
      "learning_rate": 1.3085627206290644e-05,
      "loss": 2.2611,
      "step": 31032
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0115422010421753,
      "learning_rate": 1.3085235558022538e-05,
      "loss": 2.5429,
      "step": 31033
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9964805245399475,
      "learning_rate": 1.3084843904524122e-05,
      "loss": 2.3664,
      "step": 31034
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.2053797245025635,
      "learning_rate": 1.3084452245796058e-05,
      "loss": 2.6021,
      "step": 31035
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.118910789489746,
      "learning_rate": 1.3084060581839011e-05,
      "loss": 2.5137,
      "step": 31036
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0512809753417969,
      "learning_rate": 1.3083668912653642e-05,
      "loss": 2.4893,
      "step": 31037
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1707390546798706,
      "learning_rate": 1.3083277238240615e-05,
      "loss": 2.3007,
      "step": 31038
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1441380977630615,
      "learning_rate": 1.3082885558600597e-05,
      "loss": 2.3838,
      "step": 31039
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1607661247253418,
      "learning_rate": 1.308249387373425e-05,
      "loss": 2.3735,
      "step": 31040
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9896090626716614,
      "learning_rate": 1.308210218364224e-05,
      "loss": 2.2819,
      "step": 31041
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.103411078453064,
      "learning_rate": 1.308171048832523e-05,
      "loss": 2.274,
      "step": 31042
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0585720539093018,
      "learning_rate": 1.3081318787783882e-05,
      "loss": 2.4438,
      "step": 31043
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9998274445533752,
      "learning_rate": 1.3080927082018864e-05,
      "loss": 2.3097,
      "step": 31044
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0526796579360962,
      "learning_rate": 1.3080535371030836e-05,
      "loss": 2.2321,
      "step": 31045
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0803890228271484,
      "learning_rate": 1.3080143654820465e-05,
      "loss": 2.4879,
      "step": 31046
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.037024736404419,
      "learning_rate": 1.3079751933388416e-05,
      "loss": 2.343,
      "step": 31047
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.2010599374771118,
      "learning_rate": 1.3079360206735348e-05,
      "loss": 2.6394,
      "step": 31048
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0856951475143433,
      "learning_rate": 1.3078968474861929e-05,
      "loss": 2.3798,
      "step": 31049
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1050163507461548,
      "learning_rate": 1.3078576737768822e-05,
      "loss": 2.2133,
      "step": 31050
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0293387174606323,
      "learning_rate": 1.3078184995456692e-05,
      "loss": 2.2709,
      "step": 31051
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0032435655593872,
      "learning_rate": 1.3077793247926201e-05,
      "loss": 2.4265,
      "step": 31052
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0262199640274048,
      "learning_rate": 1.3077401495178015e-05,
      "loss": 2.3419,
      "step": 31053
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1495517492294312,
      "learning_rate": 1.3077009737212798e-05,
      "loss": 2.4631,
      "step": 31054
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.983694851398468,
      "learning_rate": 1.3076617974031214e-05,
      "loss": 2.3631,
      "step": 31055
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9766689538955688,
      "learning_rate": 1.3076226205633926e-05,
      "loss": 2.4488,
      "step": 31056
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1107360124588013,
      "learning_rate": 1.3075834432021602e-05,
      "loss": 2.3089,
      "step": 31057
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0597856044769287,
      "learning_rate": 1.3075442653194899e-05,
      "loss": 2.498,
      "step": 31058
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.055027723312378,
      "learning_rate": 1.3075050869154486e-05,
      "loss": 2.5607,
      "step": 31059
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9638487100601196,
      "learning_rate": 1.307465907990103e-05,
      "loss": 2.3336,
      "step": 31060
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9264606237411499,
      "learning_rate": 1.3074267285435188e-05,
      "loss": 2.4252,
      "step": 31061
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.2441459894180298,
      "learning_rate": 1.307387548575763e-05,
      "loss": 2.5057,
      "step": 31062
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1565654277801514,
      "learning_rate": 1.307348368086902e-05,
      "loss": 2.4738,
      "step": 31063
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0905470848083496,
      "learning_rate": 1.3073091870770016e-05,
      "loss": 2.4738,
      "step": 31064
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0543841123580933,
      "learning_rate": 1.307270005546129e-05,
      "loss": 2.678,
      "step": 31065
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9607625603675842,
      "learning_rate": 1.3072308234943501e-05,
      "loss": 2.3257,
      "step": 31066
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1042689085006714,
      "learning_rate": 1.3071916409217315e-05,
      "loss": 2.6149,
      "step": 31067
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.00127112865448,
      "learning_rate": 1.3071524578283397e-05,
      "loss": 2.3854,
      "step": 31068
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0582438707351685,
      "learning_rate": 1.3071132742142415e-05,
      "loss": 2.4469,
      "step": 31069
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.011969804763794,
      "learning_rate": 1.3070740900795022e-05,
      "loss": 2.2727,
      "step": 31070
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0295943021774292,
      "learning_rate": 1.3070349054241893e-05,
      "loss": 2.3477,
      "step": 31071
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0037351846694946,
      "learning_rate": 1.3069957202483688e-05,
      "loss": 2.2271,
      "step": 31072
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0229370594024658,
      "learning_rate": 1.3069565345521069e-05,
      "loss": 2.3675,
      "step": 31073
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0287489891052246,
      "learning_rate": 1.3069173483354704e-05,
      "loss": 2.3102,
      "step": 31074
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1276105642318726,
      "learning_rate": 1.306878161598526e-05,
      "loss": 2.2371,
      "step": 31075
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0607274770736694,
      "learning_rate": 1.3068389743413396e-05,
      "loss": 2.4951,
      "step": 31076
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1733332872390747,
      "learning_rate": 1.3067997865639777e-05,
      "loss": 2.4664,
      "step": 31077
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9830305576324463,
      "learning_rate": 1.306760598266507e-05,
      "loss": 2.4693,
      "step": 31078
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0823140144348145,
      "learning_rate": 1.3067214094489935e-05,
      "loss": 2.3682,
      "step": 31079
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0238217115402222,
      "learning_rate": 1.3066822201115041e-05,
      "loss": 2.3102,
      "step": 31080
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.09946870803833,
      "learning_rate": 1.3066430302541049e-05,
      "loss": 2.4912,
      "step": 31081
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9212365746498108,
      "learning_rate": 1.3066038398768628e-05,
      "loss": 2.5659,
      "step": 31082
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0033823251724243,
      "learning_rate": 1.3065646489798436e-05,
      "loss": 2.1805,
      "step": 31083
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1255722045898438,
      "learning_rate": 1.306525457563114e-05,
      "loss": 2.4261,
      "step": 31084
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0585092306137085,
      "learning_rate": 1.3064862656267408e-05,
      "loss": 2.3484,
      "step": 31085
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1151174306869507,
      "learning_rate": 1.3064470731707898e-05,
      "loss": 2.4064,
      "step": 31086
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0101324319839478,
      "learning_rate": 1.3064078801953282e-05,
      "loss": 2.3931,
      "step": 31087
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0494723320007324,
      "learning_rate": 1.3063686867004216e-05,
      "loss": 2.3389,
      "step": 31088
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.003496527671814,
      "learning_rate": 1.3063294926861372e-05,
      "loss": 2.4709,
      "step": 31089
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9966623187065125,
      "learning_rate": 1.306290298152541e-05,
      "loss": 2.3661,
      "step": 31090
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0329867601394653,
      "learning_rate": 1.3062511030996995e-05,
      "loss": 2.6684,
      "step": 31091
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1515815258026123,
      "learning_rate": 1.306211907527679e-05,
      "loss": 2.2663,
      "step": 31092
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0753364562988281,
      "learning_rate": 1.3061727114365465e-05,
      "loss": 2.3112,
      "step": 31093
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1936516761779785,
      "learning_rate": 1.3061335148263679e-05,
      "loss": 2.4331,
      "step": 31094
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0705264806747437,
      "learning_rate": 1.3060943176972103e-05,
      "loss": 2.4126,
      "step": 31095
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0113962888717651,
      "learning_rate": 1.3060551200491392e-05,
      "loss": 2.5029,
      "step": 31096
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0446877479553223,
      "learning_rate": 1.3060159218822217e-05,
      "loss": 2.4127,
      "step": 31097
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0076549053192139,
      "learning_rate": 1.3059767231965239e-05,
      "loss": 2.4827,
      "step": 31098
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.118619680404663,
      "learning_rate": 1.3059375239921126e-05,
      "loss": 2.3975,
      "step": 31099
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0614681243896484,
      "learning_rate": 1.3058983242690542e-05,
      "loss": 2.4749,
      "step": 31100
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1955641508102417,
      "learning_rate": 1.305859124027415e-05,
      "loss": 2.4147,
      "step": 31101
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9476941227912903,
      "learning_rate": 1.3058199232672614e-05,
      "loss": 2.2984,
      "step": 31102
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0849988460540771,
      "learning_rate": 1.3057807219886602e-05,
      "loss": 2.3848,
      "step": 31103
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.8458536863327026,
      "learning_rate": 1.3057415201916775e-05,
      "loss": 2.226,
      "step": 31104
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1739990711212158,
      "learning_rate": 1.30570231787638e-05,
      "loss": 2.3227,
      "step": 31105
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0161757469177246,
      "learning_rate": 1.305663115042834e-05,
      "loss": 2.3801,
      "step": 31106
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.9931632876396179,
      "learning_rate": 1.3056239116911057e-05,
      "loss": 2.5625,
      "step": 31107
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.071954369544983,
      "learning_rate": 1.3055847078212622e-05,
      "loss": 2.7053,
      "step": 31108
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0333045721054077,
      "learning_rate": 1.3055455034333697e-05,
      "loss": 2.5288,
      "step": 31109
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.033395767211914,
      "learning_rate": 1.3055062985274944e-05,
      "loss": 2.3889,
      "step": 31110
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.9924702048301697,
      "learning_rate": 1.3054670931037028e-05,
      "loss": 2.2607,
      "step": 31111
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1175154447555542,
      "learning_rate": 1.3054278871620617e-05,
      "loss": 2.2952,
      "step": 31112
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.079402208328247,
      "learning_rate": 1.3053886807026372e-05,
      "loss": 2.6995,
      "step": 31113
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0840675830841064,
      "learning_rate": 1.3053494737254961e-05,
      "loss": 2.4906,
      "step": 31114
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.003931999206543,
      "learning_rate": 1.3053102662307051e-05,
      "loss": 2.4048,
      "step": 31115
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1010329723358154,
      "learning_rate": 1.3052710582183297e-05,
      "loss": 2.4429,
      "step": 31116
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.008000135421753,
      "learning_rate": 1.3052318496884372e-05,
      "loss": 2.4163,
      "step": 31117
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0491374731063843,
      "learning_rate": 1.3051926406410935e-05,
      "loss": 2.4483,
      "step": 31118
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0415964126586914,
      "learning_rate": 1.3051534310763656e-05,
      "loss": 2.2618,
      "step": 31119
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0629310607910156,
      "learning_rate": 1.3051142209943199e-05,
      "loss": 2.2377,
      "step": 31120
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0678842067718506,
      "learning_rate": 1.3050750103950225e-05,
      "loss": 2.3227,
      "step": 31121
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1945159435272217,
      "learning_rate": 1.3050357992785405e-05,
      "loss": 2.218,
      "step": 31122
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0457789897918701,
      "learning_rate": 1.3049965876449395e-05,
      "loss": 2.3434,
      "step": 31123
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1285699605941772,
      "learning_rate": 1.3049573754942869e-05,
      "loss": 2.3617,
      "step": 31124
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.2833787202835083,
      "learning_rate": 1.304918162826648e-05,
      "loss": 2.5848,
      "step": 31125
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0535914897918701,
      "learning_rate": 1.3048789496420905e-05,
      "loss": 2.2665,
      "step": 31126
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.142207384109497,
      "learning_rate": 1.3048397359406803e-05,
      "loss": 2.3523,
      "step": 31127
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0625739097595215,
      "learning_rate": 1.3048005217224842e-05,
      "loss": 2.2538,
      "step": 31128
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0552427768707275,
      "learning_rate": 1.3047613069875682e-05,
      "loss": 2.4303,
      "step": 31129
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0022023916244507,
      "learning_rate": 1.3047220917359991e-05,
      "loss": 2.6056,
      "step": 31130
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0648130178451538,
      "learning_rate": 1.3046828759678431e-05,
      "loss": 2.3097,
      "step": 31131
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.113665223121643,
      "learning_rate": 1.304643659683167e-05,
      "loss": 2.6072,
      "step": 31132
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0653982162475586,
      "learning_rate": 1.3046044428820373e-05,
      "loss": 2.4211,
      "step": 31133
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.231963038444519,
      "learning_rate": 1.3045652255645202e-05,
      "loss": 2.5722,
      "step": 31134
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0547176599502563,
      "learning_rate": 1.3045260077306823e-05,
      "loss": 2.213,
      "step": 31135
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.221161127090454,
      "learning_rate": 1.30448678938059e-05,
      "loss": 2.5453,
      "step": 31136
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0193589925765991,
      "learning_rate": 1.3044475705143102e-05,
      "loss": 2.4449,
      "step": 31137
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1685868501663208,
      "learning_rate": 1.304408351131909e-05,
      "loss": 2.5061,
      "step": 31138
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.9995371699333191,
      "learning_rate": 1.3043691312334528e-05,
      "loss": 2.3447,
      "step": 31139
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.2012054920196533,
      "learning_rate": 1.3043299108190083e-05,
      "loss": 2.3478,
      "step": 31140
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1218271255493164,
      "learning_rate": 1.3042906898886419e-05,
      "loss": 2.2884,
      "step": 31141
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1190098524093628,
      "learning_rate": 1.3042514684424203e-05,
      "loss": 2.187,
      "step": 31142
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1956963539123535,
      "learning_rate": 1.3042122464804099e-05,
      "loss": 2.2959,
      "step": 31143
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0351413488388062,
      "learning_rate": 1.3041730240026768e-05,
      "loss": 2.398,
      "step": 31144
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.9926115274429321,
      "learning_rate": 1.3041338010092881e-05,
      "loss": 2.4664,
      "step": 31145
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0545152425765991,
      "learning_rate": 1.3040945775003097e-05,
      "loss": 2.4137,
      "step": 31146
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0703192949295044,
      "learning_rate": 1.3040553534758086e-05,
      "loss": 2.1875,
      "step": 31147
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1142185926437378,
      "learning_rate": 1.3040161289358512e-05,
      "loss": 2.4732,
      "step": 31148
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0738945007324219,
      "learning_rate": 1.3039769038805038e-05,
      "loss": 2.1034,
      "step": 31149
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1330347061157227,
      "learning_rate": 1.3039376783098328e-05,
      "loss": 2.4116,
      "step": 31150
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1882981061935425,
      "learning_rate": 1.3038984522239051e-05,
      "loss": 2.5896,
      "step": 31151
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0915765762329102,
      "learning_rate": 1.3038592256227869e-05,
      "loss": 2.5835,
      "step": 31152
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.103614091873169,
      "learning_rate": 1.3038199985065447e-05,
      "loss": 2.5017,
      "step": 31153
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.178037405014038,
      "learning_rate": 1.3037807708752452e-05,
      "loss": 2.3193,
      "step": 31154
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.9712868928909302,
      "learning_rate": 1.3037415427289549e-05,
      "loss": 2.3589,
      "step": 31155
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0625872611999512,
      "learning_rate": 1.30370231406774e-05,
      "loss": 2.2914,
      "step": 31156
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0380381345748901,
      "learning_rate": 1.3036630848916674e-05,
      "loss": 2.5723,
      "step": 31157
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.060706615447998,
      "learning_rate": 1.3036238552008032e-05,
      "loss": 2.5414,
      "step": 31158
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1221083402633667,
      "learning_rate": 1.3035846249952142e-05,
      "loss": 2.2956,
      "step": 31159
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.4304916858673096,
      "learning_rate": 1.3035453942749668e-05,
      "loss": 2.4902,
      "step": 31160
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0562834739685059,
      "learning_rate": 1.3035061630401276e-05,
      "loss": 2.1758,
      "step": 31161
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.9699026942253113,
      "learning_rate": 1.303466931290763e-05,
      "loss": 2.3063,
      "step": 31162
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1577836275100708,
      "learning_rate": 1.3034276990269393e-05,
      "loss": 2.222,
      "step": 31163
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.105299711227417,
      "learning_rate": 1.3033884662487233e-05,
      "loss": 2.4715,
      "step": 31164
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0041658878326416,
      "learning_rate": 1.3033492329561817e-05,
      "loss": 2.4731,
      "step": 31165
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1761257648468018,
      "learning_rate": 1.3033099991493804e-05,
      "loss": 2.2628,
      "step": 31166
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1022251844406128,
      "learning_rate": 1.3032707648283865e-05,
      "loss": 2.5768,
      "step": 31167
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1132869720458984,
      "learning_rate": 1.3032315299932666e-05,
      "loss": 2.383,
      "step": 31168
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.9761260151863098,
      "learning_rate": 1.3031922946440863e-05,
      "loss": 2.3262,
      "step": 31169
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0093591213226318,
      "learning_rate": 1.3031530587809131e-05,
      "loss": 2.1516,
      "step": 31170
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.9648526310920715,
      "learning_rate": 1.303113822403813e-05,
      "loss": 2.6735,
      "step": 31171
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0226645469665527,
      "learning_rate": 1.3030745855128526e-05,
      "loss": 2.1999,
      "step": 31172
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.147215723991394,
      "learning_rate": 1.3030353481080985e-05,
      "loss": 2.3721,
      "step": 31173
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.2367150783538818,
      "learning_rate": 1.3029961101896174e-05,
      "loss": 2.2443,
      "step": 31174
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0673032999038696,
      "learning_rate": 1.3029568717574751e-05,
      "loss": 2.6146,
      "step": 31175
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.9294235110282898,
      "learning_rate": 1.302917632811739e-05,
      "loss": 2.4568,
      "step": 31176
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0368399620056152,
      "learning_rate": 1.3028783933524752e-05,
      "loss": 2.3005,
      "step": 31177
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0038893222808838,
      "learning_rate": 1.30283915337975e-05,
      "loss": 2.4496,
      "step": 31178
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0620391368865967,
      "learning_rate": 1.3027999128936304e-05,
      "loss": 2.2636,
      "step": 31179
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1617823839187622,
      "learning_rate": 1.3027606718941828e-05,
      "loss": 2.4623,
      "step": 31180
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1249912977218628,
      "learning_rate": 1.3027214303814738e-05,
      "loss": 2.467,
      "step": 31181
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.2500519752502441,
      "learning_rate": 1.3026821883555695e-05,
      "loss": 2.2391,
      "step": 31182
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0820121765136719,
      "learning_rate": 1.3026429458165363e-05,
      "loss": 2.5051,
      "step": 31183
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.2343432903289795,
      "learning_rate": 1.3026037027644416e-05,
      "loss": 2.4937,
      "step": 31184
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0716736316680908,
      "learning_rate": 1.3025644591993514e-05,
      "loss": 2.302,
      "step": 31185
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1257855892181396,
      "learning_rate": 1.302525215121332e-05,
      "loss": 2.4704,
      "step": 31186
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1454713344573975,
      "learning_rate": 1.3024859705304505e-05,
      "loss": 2.1855,
      "step": 31187
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.9622888565063477,
      "learning_rate": 1.3024467254267732e-05,
      "loss": 2.4354,
      "step": 31188
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1089304685592651,
      "learning_rate": 1.3024074798103662e-05,
      "loss": 2.4357,
      "step": 31189
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.9845081567764282,
      "learning_rate": 1.3023682336812966e-05,
      "loss": 2.4254,
      "step": 31190
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0886317491531372,
      "learning_rate": 1.3023289870396307e-05,
      "loss": 2.2898,
      "step": 31191
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0366966724395752,
      "learning_rate": 1.302289739885435e-05,
      "loss": 2.479,
      "step": 31192
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0330393314361572,
      "learning_rate": 1.3022504922187764e-05,
      "loss": 2.2386,
      "step": 31193
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.9781923890113831,
      "learning_rate": 1.3022112440397208e-05,
      "loss": 2.3089,
      "step": 31194
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0555909872055054,
      "learning_rate": 1.302171995348335e-05,
      "loss": 2.4248,
      "step": 31195
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0224883556365967,
      "learning_rate": 1.302132746144686e-05,
      "loss": 2.4686,
      "step": 31196
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.2651077508926392,
      "learning_rate": 1.3020934964288396e-05,
      "loss": 2.5229,
      "step": 31197
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.118729829788208,
      "learning_rate": 1.3020542462008626e-05,
      "loss": 2.5661,
      "step": 31198
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0589735507965088,
      "learning_rate": 1.3020149954608217e-05,
      "loss": 2.5078,
      "step": 31199
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0439034700393677,
      "learning_rate": 1.3019757442087835e-05,
      "loss": 2.3034,
      "step": 31200
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.9941860437393188,
      "learning_rate": 1.3019364924448143e-05,
      "loss": 2.2666,
      "step": 31201
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1035072803497314,
      "learning_rate": 1.3018972401689807e-05,
      "loss": 2.3229,
      "step": 31202
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0234371423721313,
      "learning_rate": 1.3018579873813493e-05,
      "loss": 2.4468,
      "step": 31203
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0995442867279053,
      "learning_rate": 1.3018187340819867e-05,
      "loss": 2.5772,
      "step": 31204
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1285103559494019,
      "learning_rate": 1.3017794802709593e-05,
      "loss": 2.3336,
      "step": 31205
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.9607925415039062,
      "learning_rate": 1.3017402259483336e-05,
      "loss": 2.3759,
      "step": 31206
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0483088493347168,
      "learning_rate": 1.3017009711141766e-05,
      "loss": 2.1315,
      "step": 31207
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.228293776512146,
      "learning_rate": 1.3016617157685546e-05,
      "loss": 2.2399,
      "step": 31208
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.9944928884506226,
      "learning_rate": 1.3016224599115335e-05,
      "loss": 2.4538,
      "step": 31209
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.057104229927063,
      "learning_rate": 1.3015832035431806e-05,
      "loss": 2.4608,
      "step": 31210
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0472136735916138,
      "learning_rate": 1.3015439466635625e-05,
      "loss": 2.3364,
      "step": 31211
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.029422640800476,
      "learning_rate": 1.3015046892727452e-05,
      "loss": 2.3699,
      "step": 31212
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0731741189956665,
      "learning_rate": 1.3014654313707958e-05,
      "loss": 2.4515,
      "step": 31213
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0712956190109253,
      "learning_rate": 1.3014261729577806e-05,
      "loss": 2.4453,
      "step": 31214
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1633611917495728,
      "learning_rate": 1.301386914033766e-05,
      "loss": 2.4622,
      "step": 31215
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1235767602920532,
      "learning_rate": 1.3013476545988191e-05,
      "loss": 2.4472,
      "step": 31216
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0343999862670898,
      "learning_rate": 1.3013083946530056e-05,
      "loss": 2.2878,
      "step": 31217
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1157479286193848,
      "learning_rate": 1.3012691341963928e-05,
      "loss": 2.1866,
      "step": 31218
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1787347793579102,
      "learning_rate": 1.301229873229047e-05,
      "loss": 2.4099,
      "step": 31219
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1302047967910767,
      "learning_rate": 1.3011906117510349e-05,
      "loss": 2.5174,
      "step": 31220
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.411953330039978,
      "learning_rate": 1.3011513497624226e-05,
      "loss": 2.3156,
      "step": 31221
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.3018028736114502,
      "learning_rate": 1.301112087263277e-05,
      "loss": 2.3957,
      "step": 31222
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1850590705871582,
      "learning_rate": 1.3010728242536648e-05,
      "loss": 2.3972,
      "step": 31223
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0554115772247314,
      "learning_rate": 1.3010335607336524e-05,
      "loss": 2.3319,
      "step": 31224
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.9878089427947998,
      "learning_rate": 1.3009942967033063e-05,
      "loss": 2.374,
      "step": 31225
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0398112535476685,
      "learning_rate": 1.3009550321626931e-05,
      "loss": 2.3365,
      "step": 31226
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1168255805969238,
      "learning_rate": 1.3009157671118795e-05,
      "loss": 2.2261,
      "step": 31227
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0957896709442139,
      "learning_rate": 1.3008765015509316e-05,
      "loss": 2.367,
      "step": 31228
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0424456596374512,
      "learning_rate": 1.3008372354799166e-05,
      "loss": 2.4797,
      "step": 31229
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0022001266479492,
      "learning_rate": 1.3007979688989007e-05,
      "loss": 2.4003,
      "step": 31230
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1716861724853516,
      "learning_rate": 1.3007587018079507e-05,
      "loss": 2.6812,
      "step": 31231
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.223095417022705,
      "learning_rate": 1.3007194342071328e-05,
      "loss": 2.2616,
      "step": 31232
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.104501724243164,
      "learning_rate": 1.3006801660965138e-05,
      "loss": 2.1401,
      "step": 31233
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0508949756622314,
      "learning_rate": 1.3006408974761604e-05,
      "loss": 2.4129,
      "step": 31234
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0987669229507446,
      "learning_rate": 1.300601628346139e-05,
      "loss": 2.5639,
      "step": 31235
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.9838979840278625,
      "learning_rate": 1.3005623587065159e-05,
      "loss": 2.3046,
      "step": 31236
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1054818630218506,
      "learning_rate": 1.3005230885573582e-05,
      "loss": 2.2167,
      "step": 31237
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1365383863449097,
      "learning_rate": 1.3004838178987322e-05,
      "loss": 2.3021,
      "step": 31238
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1130515336990356,
      "learning_rate": 1.3004445467307047e-05,
      "loss": 2.4404,
      "step": 31239
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1804473400115967,
      "learning_rate": 1.300405275053342e-05,
      "loss": 2.149,
      "step": 31240
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1348496675491333,
      "learning_rate": 1.300366002866711e-05,
      "loss": 2.2087,
      "step": 31241
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.9672574996948242,
      "learning_rate": 1.3003267301708775e-05,
      "loss": 2.1638,
      "step": 31242
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0927674770355225,
      "learning_rate": 1.3002874569659089e-05,
      "loss": 2.2802,
      "step": 31243
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.096624493598938,
      "learning_rate": 1.3002481832518713e-05,
      "loss": 2.4519,
      "step": 31244
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0950329303741455,
      "learning_rate": 1.3002089090288319e-05,
      "loss": 2.2715,
      "step": 31245
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.2299295663833618,
      "learning_rate": 1.3001696342968565e-05,
      "loss": 2.4531,
      "step": 31246
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.9757322072982788,
      "learning_rate": 1.3001303590560124e-05,
      "loss": 2.4312,
      "step": 31247
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0054148435592651,
      "learning_rate": 1.3000910833063656e-05,
      "loss": 2.5222,
      "step": 31248
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0081467628479004,
      "learning_rate": 1.300051807047983e-05,
      "loss": 2.2211,
      "step": 31249
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0199953317642212,
      "learning_rate": 1.300012530280931e-05,
      "loss": 2.4104,
      "step": 31250
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1228662729263306,
      "learning_rate": 1.2999732530052762e-05,
      "loss": 2.3032,
      "step": 31251
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1350200176239014,
      "learning_rate": 1.2999339752210856e-05,
      "loss": 2.4632,
      "step": 31252
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1495040655136108,
      "learning_rate": 1.2998946969284251e-05,
      "loss": 2.5016,
      "step": 31253
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0597505569458008,
      "learning_rate": 1.2998554181273616e-05,
      "loss": 2.3257,
      "step": 31254
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.383029580116272,
      "learning_rate": 1.2998161388179621e-05,
      "loss": 2.3163,
      "step": 31255
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0892890691757202,
      "learning_rate": 1.2997768590002927e-05,
      "loss": 2.6003,
      "step": 31256
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.9718949794769287,
      "learning_rate": 1.29973757867442e-05,
      "loss": 2.3519,
      "step": 31257
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.023888111114502,
      "learning_rate": 1.2996982978404107e-05,
      "loss": 2.3967,
      "step": 31258
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.9795814156532288,
      "learning_rate": 1.2996590164983313e-05,
      "loss": 2.3484,
      "step": 31259
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0894100666046143,
      "learning_rate": 1.2996197346482489e-05,
      "loss": 2.453,
      "step": 31260
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0516252517700195,
      "learning_rate": 1.2995804522902291e-05,
      "loss": 2.243,
      "step": 31261
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.207269549369812,
      "learning_rate": 1.2995411694243394e-05,
      "loss": 2.372,
      "step": 31262
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.041428804397583,
      "learning_rate": 1.2995018860506459e-05,
      "loss": 2.3855,
      "step": 31263
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1143369674682617,
      "learning_rate": 1.2994626021692153e-05,
      "loss": 2.3277,
      "step": 31264
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.9871328473091125,
      "learning_rate": 1.2994233177801145e-05,
      "loss": 2.3231,
      "step": 31265
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0963478088378906,
      "learning_rate": 1.2993840328834098e-05,
      "loss": 2.636,
      "step": 31266
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.9972784519195557,
      "learning_rate": 1.2993447474791681e-05,
      "loss": 2.1608,
      "step": 31267
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1116586923599243,
      "learning_rate": 1.2993054615674551e-05,
      "loss": 2.3303,
      "step": 31268
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0354753732681274,
      "learning_rate": 1.2992661751483384e-05,
      "loss": 2.4163,
      "step": 31269
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0841195583343506,
      "learning_rate": 1.2992268882218843e-05,
      "loss": 2.2619,
      "step": 31270
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1655526161193848,
      "learning_rate": 1.2991876007881592e-05,
      "loss": 2.3769,
      "step": 31271
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0348783731460571,
      "learning_rate": 1.2991483128472302e-05,
      "loss": 2.3399,
      "step": 31272
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0636364221572876,
      "learning_rate": 1.2991090243991633e-05,
      "loss": 2.1899,
      "step": 31273
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.144410490989685,
      "learning_rate": 1.2990697354440257e-05,
      "loss": 2.3909,
      "step": 31274
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1733338832855225,
      "learning_rate": 1.2990304459818833e-05,
      "loss": 2.5432,
      "step": 31275
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0899674892425537,
      "learning_rate": 1.298991156012803e-05,
      "loss": 2.4196,
      "step": 31276
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.106465458869934,
      "learning_rate": 1.2989518655368516e-05,
      "loss": 2.4067,
      "step": 31277
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.3334652185440063,
      "learning_rate": 1.2989125745540955e-05,
      "loss": 2.336,
      "step": 31278
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.2990909814834595,
      "learning_rate": 1.2988732830646018e-05,
      "loss": 2.4957,
      "step": 31279
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.4514861106872559,
      "learning_rate": 1.2988339910684364e-05,
      "loss": 2.5902,
      "step": 31280
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1105360984802246,
      "learning_rate": 1.2987946985656663e-05,
      "loss": 2.4239,
      "step": 31281
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.010294795036316,
      "learning_rate": 1.2987554055563577e-05,
      "loss": 2.3959,
      "step": 31282
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.082849144935608,
      "learning_rate": 1.2987161120405782e-05,
      "loss": 2.3,
      "step": 31283
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.2726240158081055,
      "learning_rate": 1.2986768180183935e-05,
      "loss": 2.2762,
      "step": 31284
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.2487554550170898,
      "learning_rate": 1.2986375234898702e-05,
      "loss": 2.2507,
      "step": 31285
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0303630828857422,
      "learning_rate": 1.2985982284550756e-05,
      "loss": 2.3312,
      "step": 31286
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0833364725112915,
      "learning_rate": 1.2985589329140757e-05,
      "loss": 2.2854,
      "step": 31287
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.094696044921875,
      "learning_rate": 1.2985196368669372e-05,
      "loss": 2.2457,
      "step": 31288
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.16316819190979,
      "learning_rate": 1.2984803403137269e-05,
      "loss": 2.3887,
      "step": 31289
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0794079303741455,
      "learning_rate": 1.2984410432545114e-05,
      "loss": 2.2468,
      "step": 31290
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.9923095107078552,
      "learning_rate": 1.2984017456893571e-05,
      "loss": 2.3376,
      "step": 31291
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1544886827468872,
      "learning_rate": 1.298362447618331e-05,
      "loss": 2.5263,
      "step": 31292
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1515642404556274,
      "learning_rate": 1.2983231490414998e-05,
      "loss": 2.4878,
      "step": 31293
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.00148606300354,
      "learning_rate": 1.2982838499589295e-05,
      "loss": 2.2508,
      "step": 31294
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1136934757232666,
      "learning_rate": 1.2982445503706871e-05,
      "loss": 2.3124,
      "step": 31295
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0924112796783447,
      "learning_rate": 1.2982052502768389e-05,
      "loss": 2.4908,
      "step": 31296
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.2010704278945923,
      "learning_rate": 1.2981659496774521e-05,
      "loss": 2.5116,
      "step": 31297
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0481867790222168,
      "learning_rate": 1.298126648572593e-05,
      "loss": 2.6362,
      "step": 31298
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.129413366317749,
      "learning_rate": 1.2980873469623283e-05,
      "loss": 2.4947,
      "step": 31299
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.144191026687622,
      "learning_rate": 1.2980480448467248e-05,
      "loss": 2.2875,
      "step": 31300
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0149579048156738,
      "learning_rate": 1.2980087422258486e-05,
      "loss": 2.4916,
      "step": 31301
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.050338625907898,
      "learning_rate": 1.2979694390997667e-05,
      "loss": 2.2853,
      "step": 31302
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0444213151931763,
      "learning_rate": 1.2979301354685455e-05,
      "loss": 2.253,
      "step": 31303
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.2385334968566895,
      "learning_rate": 1.2978908313322523e-05,
      "loss": 2.4738,
      "step": 31304
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0568186044692993,
      "learning_rate": 1.2978515266909528e-05,
      "loss": 2.6202,
      "step": 31305
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1772878170013428,
      "learning_rate": 1.2978122215447142e-05,
      "loss": 2.528,
      "step": 31306
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0024361610412598,
      "learning_rate": 1.297772915893603e-05,
      "loss": 2.3898,
      "step": 31307
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.076743721961975,
      "learning_rate": 1.2977336097376858e-05,
      "loss": 2.4649,
      "step": 31308
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.058516025543213,
      "learning_rate": 1.2976943030770293e-05,
      "loss": 2.3676,
      "step": 31309
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.2207727432250977,
      "learning_rate": 1.2976549959117001e-05,
      "loss": 2.4279,
      "step": 31310
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.011263370513916,
      "learning_rate": 1.297615688241765e-05,
      "loss": 2.4241,
      "step": 31311
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0238713026046753,
      "learning_rate": 1.2975763800672905e-05,
      "loss": 2.5062,
      "step": 31312
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0879733562469482,
      "learning_rate": 1.2975370713883428e-05,
      "loss": 2.4593,
      "step": 31313
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.2174358367919922,
      "learning_rate": 1.2974977622049894e-05,
      "loss": 2.4418,
      "step": 31314
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.228711485862732,
      "learning_rate": 1.2974584525172964e-05,
      "loss": 2.3027,
      "step": 31315
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.9939629435539246,
      "learning_rate": 1.2974191423253304e-05,
      "loss": 2.3358,
      "step": 31316
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.2035189867019653,
      "learning_rate": 1.297379831629158e-05,
      "loss": 2.3557,
      "step": 31317
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0616116523742676,
      "learning_rate": 1.2973405204288462e-05,
      "loss": 2.6256,
      "step": 31318
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.2087384462356567,
      "learning_rate": 1.2973012087244616e-05,
      "loss": 2.4892,
      "step": 31319
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1728079319000244,
      "learning_rate": 1.2972618965160705e-05,
      "loss": 2.4647,
      "step": 31320
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.007277488708496,
      "learning_rate": 1.29722258380374e-05,
      "loss": 2.3433,
      "step": 31321
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0497729778289795,
      "learning_rate": 1.2971832705875362e-05,
      "loss": 2.5646,
      "step": 31322
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0238568782806396,
      "learning_rate": 1.2971439568675262e-05,
      "loss": 2.426,
      "step": 31323
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0871124267578125,
      "learning_rate": 1.2971046426437766e-05,
      "loss": 2.3482,
      "step": 31324
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0764970779418945,
      "learning_rate": 1.2970653279163537e-05,
      "loss": 2.5094,
      "step": 31325
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.9591026902198792,
      "learning_rate": 1.2970260126853247e-05,
      "loss": 2.2833,
      "step": 31326
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1438539028167725,
      "learning_rate": 1.2969866969507555e-05,
      "loss": 2.3744,
      "step": 31327
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.9469382762908936,
      "learning_rate": 1.2969473807127132e-05,
      "loss": 2.1225,
      "step": 31328
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.9972501993179321,
      "learning_rate": 1.2969080639712648e-05,
      "loss": 2.2556,
      "step": 31329
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1088591814041138,
      "learning_rate": 1.2968687467264764e-05,
      "loss": 2.3543,
      "step": 31330
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0405759811401367,
      "learning_rate": 1.2968294289784148e-05,
      "loss": 2.3383,
      "step": 31331
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.055082082748413,
      "learning_rate": 1.2967901107271467e-05,
      "loss": 2.1813,
      "step": 31332
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0582332611083984,
      "learning_rate": 1.2967507919727392e-05,
      "loss": 2.4559,
      "step": 31333
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0242761373519897,
      "learning_rate": 1.296711472715258e-05,
      "loss": 2.7428,
      "step": 31334
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.9961724281311035,
      "learning_rate": 1.2966721529547705e-05,
      "loss": 2.3474,
      "step": 31335
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0792933702468872,
      "learning_rate": 1.2966328326913431e-05,
      "loss": 2.2072,
      "step": 31336
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0624934434890747,
      "learning_rate": 1.2965935119250424e-05,
      "loss": 2.503,
      "step": 31337
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0772536993026733,
      "learning_rate": 1.2965541906559353e-05,
      "loss": 2.345,
      "step": 31338
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1251994371414185,
      "learning_rate": 1.2965148688840881e-05,
      "loss": 2.3683,
      "step": 31339
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.9188581705093384,
      "learning_rate": 1.2964755466095678e-05,
      "loss": 2.3648,
      "step": 31340
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1424037218093872,
      "learning_rate": 1.2964362238324409e-05,
      "loss": 2.2317,
      "step": 31341
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0593808889389038,
      "learning_rate": 1.296396900552774e-05,
      "loss": 2.4364,
      "step": 31342
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1507240533828735,
      "learning_rate": 1.296357576770634e-05,
      "loss": 2.3307,
      "step": 31343
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1752140522003174,
      "learning_rate": 1.2963182524860873e-05,
      "loss": 2.309,
      "step": 31344
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1259373426437378,
      "learning_rate": 1.2962789276992007e-05,
      "loss": 2.2355,
      "step": 31345
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.108811855316162,
      "learning_rate": 1.296239602410041e-05,
      "loss": 2.4883,
      "step": 31346
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0777440071105957,
      "learning_rate": 1.2962002766186745e-05,
      "loss": 2.4699,
      "step": 31347
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.048945426940918,
      "learning_rate": 1.2961609503251682e-05,
      "loss": 2.2891,
      "step": 31348
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0520387887954712,
      "learning_rate": 1.2961216235295886e-05,
      "loss": 2.3929,
      "step": 31349
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0292340517044067,
      "learning_rate": 1.2960822962320023e-05,
      "loss": 2.419,
      "step": 31350
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.9801260828971863,
      "learning_rate": 1.2960429684324765e-05,
      "loss": 2.2887,
      "step": 31351
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.197460412979126,
      "learning_rate": 1.2960036401310771e-05,
      "loss": 2.6177,
      "step": 31352
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1091036796569824,
      "learning_rate": 1.2959643113278713e-05,
      "loss": 2.5116,
      "step": 31353
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1056772470474243,
      "learning_rate": 1.2959249820229256e-05,
      "loss": 2.4706,
      "step": 31354
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.991578996181488,
      "learning_rate": 1.2958856522163066e-05,
      "loss": 2.2962,
      "step": 31355
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0170878171920776,
      "learning_rate": 1.295846321908081e-05,
      "loss": 2.2867,
      "step": 31356
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.921093761920929,
      "learning_rate": 1.2958069910983156e-05,
      "loss": 2.4436,
      "step": 31357
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.110400915145874,
      "learning_rate": 1.295767659787077e-05,
      "loss": 2.2123,
      "step": 31358
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.9888135194778442,
      "learning_rate": 1.2957283279744322e-05,
      "loss": 2.2613,
      "step": 31359
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.9333577752113342,
      "learning_rate": 1.2956889956604473e-05,
      "loss": 2.1783,
      "step": 31360
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1652886867523193,
      "learning_rate": 1.2956496628451892e-05,
      "loss": 2.2784,
      "step": 31361
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0153063535690308,
      "learning_rate": 1.2956103295287243e-05,
      "loss": 2.5273,
      "step": 31362
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0933549404144287,
      "learning_rate": 1.29557099571112e-05,
      "loss": 2.477,
      "step": 31363
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1562057733535767,
      "learning_rate": 1.2955316613924427e-05,
      "loss": 2.51,
      "step": 31364
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.074900507926941,
      "learning_rate": 1.2954923265727586e-05,
      "loss": 2.2593,
      "step": 31365
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1123600006103516,
      "learning_rate": 1.2954529912521352e-05,
      "loss": 2.3184,
      "step": 31366
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.084264874458313,
      "learning_rate": 1.2954136554306384e-05,
      "loss": 2.3126,
      "step": 31367
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0145301818847656,
      "learning_rate": 1.2953743191083352e-05,
      "loss": 2.3797,
      "step": 31368
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.18720281124115,
      "learning_rate": 1.2953349822852926e-05,
      "loss": 2.4558,
      "step": 31369
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0382933616638184,
      "learning_rate": 1.295295644961577e-05,
      "loss": 2.4107,
      "step": 31370
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0155634880065918,
      "learning_rate": 1.2952563071372549e-05,
      "loss": 2.5287,
      "step": 31371
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.2146440744400024,
      "learning_rate": 1.295216968812393e-05,
      "loss": 2.547,
      "step": 31372
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0475120544433594,
      "learning_rate": 1.2951776299870585e-05,
      "loss": 2.3809,
      "step": 31373
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0963425636291504,
      "learning_rate": 1.2951382906613177e-05,
      "loss": 2.1821,
      "step": 31374
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0647152662277222,
      "learning_rate": 1.2950989508352371e-05,
      "loss": 2.2357,
      "step": 31375
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0471677780151367,
      "learning_rate": 1.2950596105088837e-05,
      "loss": 2.5168,
      "step": 31376
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1469413042068481,
      "learning_rate": 1.2950202696823242e-05,
      "loss": 2.4447,
      "step": 31377
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0062092542648315,
      "learning_rate": 1.2949809283556252e-05,
      "loss": 2.3577,
      "step": 31378
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.8855596780776978,
      "learning_rate": 1.2949415865288534e-05,
      "loss": 2.5395,
      "step": 31379
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.206337332725525,
      "learning_rate": 1.2949022442020755e-05,
      "loss": 2.4197,
      "step": 31380
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.076413631439209,
      "learning_rate": 1.2948629013753582e-05,
      "loss": 2.4441,
      "step": 31381
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.9410355091094971,
      "learning_rate": 1.2948235580487682e-05,
      "loss": 2.3114,
      "step": 31382
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.229129672050476,
      "learning_rate": 1.2947842142223721e-05,
      "loss": 2.5493,
      "step": 31383
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0672310590744019,
      "learning_rate": 1.2947448698962368e-05,
      "loss": 2.2161,
      "step": 31384
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1160399913787842,
      "learning_rate": 1.294705525070429e-05,
      "loss": 2.4029,
      "step": 31385
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0922690629959106,
      "learning_rate": 1.2946661797450153e-05,
      "loss": 2.6253,
      "step": 31386
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0440938472747803,
      "learning_rate": 1.2946268339200621e-05,
      "loss": 2.5051,
      "step": 31387
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0324443578720093,
      "learning_rate": 1.2945874875956366e-05,
      "loss": 2.373,
      "step": 31388
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0936487913131714,
      "learning_rate": 1.2945481407718052e-05,
      "loss": 2.1743,
      "step": 31389
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.26443612575531,
      "learning_rate": 1.2945087934486348e-05,
      "loss": 2.5845,
      "step": 31390
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.178378701210022,
      "learning_rate": 1.2944694456261919e-05,
      "loss": 2.5525,
      "step": 31391
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0769942998886108,
      "learning_rate": 1.2944300973045436e-05,
      "loss": 2.4902,
      "step": 31392
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0277982950210571,
      "learning_rate": 1.2943907484837562e-05,
      "loss": 2.4618,
      "step": 31393
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.076818823814392,
      "learning_rate": 1.2943513991638962e-05,
      "loss": 2.2695,
      "step": 31394
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0516626834869385,
      "learning_rate": 1.294312049345031e-05,
      "loss": 2.3096,
      "step": 31395
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1846083402633667,
      "learning_rate": 1.2942726990272268e-05,
      "loss": 2.2757,
      "step": 31396
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1067638397216797,
      "learning_rate": 1.2942333482105505e-05,
      "loss": 2.3996,
      "step": 31397
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0334084033966064,
      "learning_rate": 1.2941939968950688e-05,
      "loss": 2.3871,
      "step": 31398
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0517882108688354,
      "learning_rate": 1.2941546450808482e-05,
      "loss": 2.3862,
      "step": 31399
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0304888486862183,
      "learning_rate": 1.2941152927679557e-05,
      "loss": 2.474,
      "step": 31400
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0100239515304565,
      "learning_rate": 1.294075939956458e-05,
      "loss": 2.1527,
      "step": 31401
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0516172647476196,
      "learning_rate": 1.2940365866464215e-05,
      "loss": 2.4988,
      "step": 31402
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.101798415184021,
      "learning_rate": 1.2939972328379134e-05,
      "loss": 2.3953,
      "step": 31403
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1085724830627441,
      "learning_rate": 1.2939578785309999e-05,
      "loss": 2.3491,
      "step": 31404
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0836679935455322,
      "learning_rate": 1.2939185237257481e-05,
      "loss": 2.4756,
      "step": 31405
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0902652740478516,
      "learning_rate": 1.2938791684222245e-05,
      "loss": 2.3682,
      "step": 31406
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1074937582015991,
      "learning_rate": 1.2938398126204959e-05,
      "loss": 2.5086,
      "step": 31407
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1987688541412354,
      "learning_rate": 1.293800456320629e-05,
      "loss": 2.4386,
      "step": 31408
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1248071193695068,
      "learning_rate": 1.2937610995226906e-05,
      "loss": 2.3353,
      "step": 31409
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.2525273561477661,
      "learning_rate": 1.2937217422267471e-05,
      "loss": 2.3107,
      "step": 31410
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0818935632705688,
      "learning_rate": 1.2936823844328656e-05,
      "loss": 2.5029,
      "step": 31411
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1674699783325195,
      "learning_rate": 1.2936430261411131e-05,
      "loss": 2.3581,
      "step": 31412
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0420252084732056,
      "learning_rate": 1.2936036673515556e-05,
      "loss": 2.4823,
      "step": 31413
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.039768934249878,
      "learning_rate": 1.29356430806426e-05,
      "loss": 2.406,
      "step": 31414
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0134823322296143,
      "learning_rate": 1.293524948279293e-05,
      "loss": 2.2902,
      "step": 31415
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1186474561691284,
      "learning_rate": 1.2934855879967219e-05,
      "loss": 2.3798,
      "step": 31416
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0910935401916504,
      "learning_rate": 1.2934462272166129e-05,
      "loss": 2.379,
      "step": 31417
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1229616403579712,
      "learning_rate": 1.2934068659390331e-05,
      "loss": 2.3886,
      "step": 31418
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0561096668243408,
      "learning_rate": 1.2933675041640485e-05,
      "loss": 2.4091,
      "step": 31419
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0462665557861328,
      "learning_rate": 1.2933281418917264e-05,
      "loss": 2.2723,
      "step": 31420
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.2625789642333984,
      "learning_rate": 1.2932887791221335e-05,
      "loss": 2.3644,
      "step": 31421
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.2337918281555176,
      "learning_rate": 1.2932494158553365e-05,
      "loss": 2.3865,
      "step": 31422
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.9806516766548157,
      "learning_rate": 1.2932100520914022e-05,
      "loss": 2.4673,
      "step": 31423
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.9723815321922302,
      "learning_rate": 1.293170687830397e-05,
      "loss": 2.3445,
      "step": 31424
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0176730155944824,
      "learning_rate": 1.2931313230723884e-05,
      "loss": 2.3881,
      "step": 31425
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.9632011651992798,
      "learning_rate": 1.293091957817442e-05,
      "loss": 2.4135,
      "step": 31426
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0566892623901367,
      "learning_rate": 1.2930525920656254e-05,
      "loss": 2.4592,
      "step": 31427
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0024863481521606,
      "learning_rate": 1.293013225817005e-05,
      "loss": 2.4169,
      "step": 31428
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1200652122497559,
      "learning_rate": 1.2929738590716476e-05,
      "loss": 2.3778,
      "step": 31429
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0488007068634033,
      "learning_rate": 1.2929344918296201e-05,
      "loss": 2.2855,
      "step": 31430
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.005948543548584,
      "learning_rate": 1.2928951240909889e-05,
      "loss": 2.3497,
      "step": 31431
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0059624910354614,
      "learning_rate": 1.292855755855821e-05,
      "loss": 2.3041,
      "step": 31432
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1163190603256226,
      "learning_rate": 1.2928163871241828e-05,
      "loss": 2.4461,
      "step": 31433
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0736337900161743,
      "learning_rate": 1.2927770178961414e-05,
      "loss": 2.2847,
      "step": 31434
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1189959049224854,
      "learning_rate": 1.2927376481717636e-05,
      "loss": 2.3098,
      "step": 31435
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0557829141616821,
      "learning_rate": 1.2926982779511158e-05,
      "loss": 2.2964,
      "step": 31436
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0295661687850952,
      "learning_rate": 1.292658907234265e-05,
      "loss": 2.316,
      "step": 31437
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0790386199951172,
      "learning_rate": 1.2926195360212778e-05,
      "loss": 2.5222,
      "step": 31438
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1401859521865845,
      "learning_rate": 1.2925801643122213e-05,
      "loss": 2.402,
      "step": 31439
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.235986351966858,
      "learning_rate": 1.2925407921071615e-05,
      "loss": 2.4639,
      "step": 31440
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.9386443495750427,
      "learning_rate": 1.2925014194061661e-05,
      "loss": 2.2976,
      "step": 31441
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0060802698135376,
      "learning_rate": 1.292462046209301e-05,
      "loss": 2.1972,
      "step": 31442
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0510649681091309,
      "learning_rate": 1.2924226725166333e-05,
      "loss": 2.5155,
      "step": 31443
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.9838858246803284,
      "learning_rate": 1.2923832983282299e-05,
      "loss": 2.3658,
      "step": 31444
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.9811605215072632,
      "learning_rate": 1.2923439236441575e-05,
      "loss": 2.3819,
      "step": 31445
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.9831762313842773,
      "learning_rate": 1.2923045484644825e-05,
      "loss": 2.3917,
      "step": 31446
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1478145122528076,
      "learning_rate": 1.2922651727892718e-05,
      "loss": 2.568,
      "step": 31447
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0007035732269287,
      "learning_rate": 1.2922257966185926e-05,
      "loss": 2.3945,
      "step": 31448
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0821892023086548,
      "learning_rate": 1.2921864199525111e-05,
      "loss": 2.4017,
      "step": 31449
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1307755708694458,
      "learning_rate": 1.2921470427910943e-05,
      "loss": 2.3544,
      "step": 31450
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.7861566543579102,
      "learning_rate": 1.2921076651344091e-05,
      "loss": 2.4065,
      "step": 31451
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1636898517608643,
      "learning_rate": 1.2920682869825219e-05,
      "loss": 2.5134,
      "step": 31452
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0538480281829834,
      "learning_rate": 1.2920289083354995e-05,
      "loss": 2.3182,
      "step": 31453
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1066051721572876,
      "learning_rate": 1.291989529193409e-05,
      "loss": 2.437,
      "step": 31454
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0205882787704468,
      "learning_rate": 1.2919501495563167e-05,
      "loss": 2.3477,
      "step": 31455
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0697356462478638,
      "learning_rate": 1.29191076942429e-05,
      "loss": 2.5144,
      "step": 31456
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.062074899673462,
      "learning_rate": 1.291871388797395e-05,
      "loss": 2.6519,
      "step": 31457
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0385074615478516,
      "learning_rate": 1.2918320076756987e-05,
      "loss": 2.292,
      "step": 31458
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.9630582332611084,
      "learning_rate": 1.2917926260592681e-05,
      "loss": 2.6194,
      "step": 31459
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0529723167419434,
      "learning_rate": 1.2917532439481697e-05,
      "loss": 2.419,
      "step": 31460
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1069576740264893,
      "learning_rate": 1.2917138613424702e-05,
      "loss": 2.5991,
      "step": 31461
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.048208475112915,
      "learning_rate": 1.2916744782422365e-05,
      "loss": 2.2755,
      "step": 31462
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.103851318359375,
      "learning_rate": 1.2916350946475351e-05,
      "loss": 2.6965,
      "step": 31463
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.9683347940444946,
      "learning_rate": 1.2915957105584336e-05,
      "loss": 2.4785,
      "step": 31464
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0468589067459106,
      "learning_rate": 1.2915563259749978e-05,
      "loss": 2.2942,
      "step": 31465
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.9778085350990295,
      "learning_rate": 1.2915169408972947e-05,
      "loss": 2.3906,
      "step": 31466
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.091447114944458,
      "learning_rate": 1.2914775553253914e-05,
      "loss": 2.303,
      "step": 31467
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.089388132095337,
      "learning_rate": 1.2914381692593544e-05,
      "loss": 2.3775,
      "step": 31468
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0484530925750732,
      "learning_rate": 1.2913987826992505e-05,
      "loss": 2.2427,
      "step": 31469
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.122879147529602,
      "learning_rate": 1.2913593956451466e-05,
      "loss": 2.2882,
      "step": 31470
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.9955636262893677,
      "learning_rate": 1.2913200080971095e-05,
      "loss": 2.448,
      "step": 31471
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1431776285171509,
      "learning_rate": 1.291280620055206e-05,
      "loss": 2.3677,
      "step": 31472
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1056967973709106,
      "learning_rate": 1.2912412315195022e-05,
      "loss": 2.427,
      "step": 31473
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0542848110198975,
      "learning_rate": 1.2912018424900658e-05,
      "loss": 2.4845,
      "step": 31474
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.080995798110962,
      "learning_rate": 1.291162452966963e-05,
      "loss": 2.368,
      "step": 31475
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0918182134628296,
      "learning_rate": 1.291123062950261e-05,
      "loss": 2.4778,
      "step": 31476
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.082801103591919,
      "learning_rate": 1.291083672440026e-05,
      "loss": 2.4225,
      "step": 31477
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.9280235767364502,
      "learning_rate": 1.2910442814363255e-05,
      "loss": 2.2224,
      "step": 31478
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.9977660775184631,
      "learning_rate": 1.2910048899392258e-05,
      "loss": 2.3989,
      "step": 31479
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0494877099990845,
      "learning_rate": 1.2909654979487938e-05,
      "loss": 2.0907,
      "step": 31480
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.9331192970275879,
      "learning_rate": 1.290926105465096e-05,
      "loss": 2.3968,
      "step": 31481
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.9696558117866516,
      "learning_rate": 1.2908867124881997e-05,
      "loss": 2.2962,
      "step": 31482
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.2253425121307373,
      "learning_rate": 1.2908473190181712e-05,
      "loss": 2.3595,
      "step": 31483
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1642202138900757,
      "learning_rate": 1.2908079250550777e-05,
      "loss": 2.2016,
      "step": 31484
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1706621646881104,
      "learning_rate": 1.2907685305989857e-05,
      "loss": 2.453,
      "step": 31485
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1004221439361572,
      "learning_rate": 1.2907291356499621e-05,
      "loss": 2.3094,
      "step": 31486
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.050180435180664,
      "learning_rate": 1.2906897402080736e-05,
      "loss": 2.6845,
      "step": 31487
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.031169056892395,
      "learning_rate": 1.2906503442733871e-05,
      "loss": 2.2919,
      "step": 31488
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.9804922342300415,
      "learning_rate": 1.2906109478459694e-05,
      "loss": 2.2089,
      "step": 31489
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1433120965957642,
      "learning_rate": 1.290571550925887e-05,
      "loss": 2.3528,
      "step": 31490
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.9978733658790588,
      "learning_rate": 1.2905321535132071e-05,
      "loss": 2.658,
      "step": 31491
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0563751459121704,
      "learning_rate": 1.2904927556079962e-05,
      "loss": 2.3905,
      "step": 31492
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0992594957351685,
      "learning_rate": 1.2904533572103214e-05,
      "loss": 2.4111,
      "step": 31493
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.02020263671875,
      "learning_rate": 1.2904139583202488e-05,
      "loss": 2.4893,
      "step": 31494
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.094876766204834,
      "learning_rate": 1.2903745589378458e-05,
      "loss": 2.2981,
      "step": 31495
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0560379028320312,
      "learning_rate": 1.2903351590631792e-05,
      "loss": 2.2373,
      "step": 31496
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.9781134724617004,
      "learning_rate": 1.2902957586963159e-05,
      "loss": 2.3231,
      "step": 31497
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.143120527267456,
      "learning_rate": 1.2902563578373222e-05,
      "loss": 2.4043,
      "step": 31498
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.091394066810608,
      "learning_rate": 1.290216956486265e-05,
      "loss": 2.2706,
      "step": 31499
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0515174865722656,
      "learning_rate": 1.2901775546432114e-05,
      "loss": 2.4892,
      "step": 31500
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.101793646812439,
      "learning_rate": 1.2901381523082279e-05,
      "loss": 2.3175,
      "step": 31501
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.986578106880188,
      "learning_rate": 1.2900987494813815e-05,
      "loss": 2.3007,
      "step": 31502
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0386910438537598,
      "learning_rate": 1.2900593461627389e-05,
      "loss": 2.3469,
      "step": 31503
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0004593133926392,
      "learning_rate": 1.2900199423523671e-05,
      "loss": 2.4262,
      "step": 31504
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0762096643447876,
      "learning_rate": 1.2899805380503326e-05,
      "loss": 2.3368,
      "step": 31505
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.9784696102142334,
      "learning_rate": 1.2899411332567023e-05,
      "loss": 2.2759,
      "step": 31506
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0677294731140137,
      "learning_rate": 1.289901727971543e-05,
      "loss": 2.2904,
      "step": 31507
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.2322386503219604,
      "learning_rate": 1.2898623221949216e-05,
      "loss": 2.488,
      "step": 31508
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0478194952011108,
      "learning_rate": 1.2898229159269047e-05,
      "loss": 2.2145,
      "step": 31509
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1541125774383545,
      "learning_rate": 1.2897835091675595e-05,
      "loss": 2.7065,
      "step": 31510
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.2089217901229858,
      "learning_rate": 1.2897441019169526e-05,
      "loss": 2.4465,
      "step": 31511
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.032880187034607,
      "learning_rate": 1.2897046941751505e-05,
      "loss": 2.4538,
      "step": 31512
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0463554859161377,
      "learning_rate": 1.2896652859422203e-05,
      "loss": 2.5469,
      "step": 31513
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.9794448614120483,
      "learning_rate": 1.2896258772182288e-05,
      "loss": 2.4273,
      "step": 31514
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1514735221862793,
      "learning_rate": 1.289586468003243e-05,
      "loss": 2.4824,
      "step": 31515
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.190914273262024,
      "learning_rate": 1.2895470582973292e-05,
      "loss": 2.3601,
      "step": 31516
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.3637408018112183,
      "learning_rate": 1.2895076481005546e-05,
      "loss": 2.4494,
      "step": 31517
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.058573603630066,
      "learning_rate": 1.2894682374129858e-05,
      "loss": 2.5056,
      "step": 31518
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1775978803634644,
      "learning_rate": 1.2894288262346902e-05,
      "loss": 2.3755,
      "step": 31519
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0525119304656982,
      "learning_rate": 1.2893894145657335e-05,
      "loss": 2.645,
      "step": 31520
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.076067566871643,
      "learning_rate": 1.2893500024061834e-05,
      "loss": 2.348,
      "step": 31521
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.398566722869873,
      "learning_rate": 1.2893105897561062e-05,
      "loss": 2.4146,
      "step": 31522
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.028126835823059,
      "learning_rate": 1.2892711766155691e-05,
      "loss": 2.4905,
      "step": 31523
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0565937757492065,
      "learning_rate": 1.2892317629846393e-05,
      "loss": 2.7498,
      "step": 31524
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1117584705352783,
      "learning_rate": 1.2891923488633826e-05,
      "loss": 2.5747,
      "step": 31525
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.9874650835990906,
      "learning_rate": 1.2891529342518665e-05,
      "loss": 2.3045,
      "step": 31526
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0870065689086914,
      "learning_rate": 1.2891135191501576e-05,
      "loss": 2.5085,
      "step": 31527
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.2658556699752808,
      "learning_rate": 1.2890741035583228e-05,
      "loss": 2.3072,
      "step": 31528
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0567481517791748,
      "learning_rate": 1.2890346874764286e-05,
      "loss": 2.3011,
      "step": 31529
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1339741945266724,
      "learning_rate": 1.2889952709045426e-05,
      "loss": 2.3183,
      "step": 31530
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0393062829971313,
      "learning_rate": 1.2889558538427307e-05,
      "loss": 2.2725,
      "step": 31531
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0433491468429565,
      "learning_rate": 1.2889164362910602e-05,
      "loss": 2.1808,
      "step": 31532
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.3702166080474854,
      "learning_rate": 1.2888770182495982e-05,
      "loss": 2.483,
      "step": 31533
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.04202401638031,
      "learning_rate": 1.2888375997184108e-05,
      "loss": 2.1897,
      "step": 31534
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1240824460983276,
      "learning_rate": 1.2887981806975654e-05,
      "loss": 2.3299,
      "step": 31535
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.2357048988342285,
      "learning_rate": 1.2887587611871286e-05,
      "loss": 2.6219,
      "step": 31536
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.129944086074829,
      "learning_rate": 1.2887193411871675e-05,
      "loss": 2.2525,
      "step": 31537
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.074684500694275,
      "learning_rate": 1.2886799206977485e-05,
      "loss": 2.2884,
      "step": 31538
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1027311086654663,
      "learning_rate": 1.2886404997189385e-05,
      "loss": 2.226,
      "step": 31539
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0785934925079346,
      "learning_rate": 1.2886010782508046e-05,
      "loss": 2.5064,
      "step": 31540
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0981069803237915,
      "learning_rate": 1.2885616562934136e-05,
      "loss": 2.5006,
      "step": 31541
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.035890817642212,
      "learning_rate": 1.288522233846832e-05,
      "loss": 2.3444,
      "step": 31542
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0927114486694336,
      "learning_rate": 1.2884828109111271e-05,
      "loss": 2.2923,
      "step": 31543
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1945334672927856,
      "learning_rate": 1.2884433874863653e-05,
      "loss": 2.3754,
      "step": 31544
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0792491436004639,
      "learning_rate": 1.2884039635726136e-05,
      "loss": 2.3831,
      "step": 31545
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0652952194213867,
      "learning_rate": 1.2883645391699392e-05,
      "loss": 2.2875,
      "step": 31546
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.2508398294448853,
      "learning_rate": 1.2883251142784081e-05,
      "loss": 2.4149,
      "step": 31547
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.2233431339263916,
      "learning_rate": 1.288285688898088e-05,
      "loss": 2.3304,
      "step": 31548
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1281564235687256,
      "learning_rate": 1.288246263029045e-05,
      "loss": 2.3211,
      "step": 31549
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0773910284042358,
      "learning_rate": 1.2882068366713467e-05,
      "loss": 2.2316,
      "step": 31550
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0504056215286255,
      "learning_rate": 1.2881674098250594e-05,
      "loss": 2.4005,
      "step": 31551
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.2655830383300781,
      "learning_rate": 1.28812798249025e-05,
      "loss": 2.4041,
      "step": 31552
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.2663291692733765,
      "learning_rate": 1.2880885546669854e-05,
      "loss": 2.2378,
      "step": 31553
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.2448842525482178,
      "learning_rate": 1.2880491263553323e-05,
      "loss": 2.361,
      "step": 31554
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0752359628677368,
      "learning_rate": 1.2880096975553578e-05,
      "loss": 2.1506,
      "step": 31555
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0372337102890015,
      "learning_rate": 1.2879702682671286e-05,
      "loss": 2.3729,
      "step": 31556
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0691006183624268,
      "learning_rate": 1.287930838490712e-05,
      "loss": 2.4123,
      "step": 31557
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0120738744735718,
      "learning_rate": 1.2878914082261742e-05,
      "loss": 2.2891,
      "step": 31558
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.2549034357070923,
      "learning_rate": 1.287851977473582e-05,
      "loss": 2.3954,
      "step": 31559
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1082478761672974,
      "learning_rate": 1.2878125462330027e-05,
      "loss": 2.2643,
      "step": 31560
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.9900864958763123,
      "learning_rate": 1.287773114504503e-05,
      "loss": 2.3016,
      "step": 31561
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0507159233093262,
      "learning_rate": 1.2877336822881496e-05,
      "loss": 2.2266,
      "step": 31562
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.127031922340393,
      "learning_rate": 1.2876942495840097e-05,
      "loss": 2.1304,
      "step": 31563
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1239181756973267,
      "learning_rate": 1.2876548163921498e-05,
      "loss": 2.1426,
      "step": 31564
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.243920922279358,
      "learning_rate": 1.2876153827126367e-05,
      "loss": 2.5663,
      "step": 31565
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0088675022125244,
      "learning_rate": 1.2875759485455378e-05,
      "loss": 2.513,
      "step": 31566
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.9474419355392456,
      "learning_rate": 1.287536513890919e-05,
      "loss": 2.3481,
      "step": 31567
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0907984972000122,
      "learning_rate": 1.2874970787488482e-05,
      "loss": 2.5543,
      "step": 31568
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0609056949615479,
      "learning_rate": 1.2874576431193916e-05,
      "loss": 2.4336,
      "step": 31569
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.108843445777893,
      "learning_rate": 1.2874182070026162e-05,
      "loss": 2.355,
      "step": 31570
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0686264038085938,
      "learning_rate": 1.2873787703985889e-05,
      "loss": 2.5749,
      "step": 31571
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1322933435440063,
      "learning_rate": 1.2873393333073765e-05,
      "loss": 2.3412,
      "step": 31572
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1107466220855713,
      "learning_rate": 1.2872998957290457e-05,
      "loss": 2.4671,
      "step": 31573
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.165907382965088,
      "learning_rate": 1.2872604576636641e-05,
      "loss": 2.3878,
      "step": 31574
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0083826780319214,
      "learning_rate": 1.2872210191112975e-05,
      "loss": 2.2319,
      "step": 31575
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.008837342262268,
      "learning_rate": 1.2871815800720135e-05,
      "loss": 2.3832,
      "step": 31576
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0749624967575073,
      "learning_rate": 1.2871421405458786e-05,
      "loss": 2.2731,
      "step": 31577
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0787540674209595,
      "learning_rate": 1.2871027005329599e-05,
      "loss": 2.2254,
      "step": 31578
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0300363302230835,
      "learning_rate": 1.2870632600333242e-05,
      "loss": 2.4101,
      "step": 31579
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1182072162628174,
      "learning_rate": 1.2870238190470381e-05,
      "loss": 2.1667,
      "step": 31580
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0342551469802856,
      "learning_rate": 1.2869843775741687e-05,
      "loss": 2.399,
      "step": 31581
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.096704125404358,
      "learning_rate": 1.2869449356147827e-05,
      "loss": 2.3287,
      "step": 31582
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.017356514930725,
      "learning_rate": 1.2869054931689475e-05,
      "loss": 2.3479,
      "step": 31583
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0405049324035645,
      "learning_rate": 1.2868660502367294e-05,
      "loss": 2.1837,
      "step": 31584
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0853586196899414,
      "learning_rate": 1.2868266068181953e-05,
      "loss": 2.3755,
      "step": 31585
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.10176420211792,
      "learning_rate": 1.2867871629134123e-05,
      "loss": 2.4454,
      "step": 31586
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0264009237289429,
      "learning_rate": 1.2867477185224471e-05,
      "loss": 2.496,
      "step": 31587
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0199060440063477,
      "learning_rate": 1.2867082736453667e-05,
      "loss": 2.2506,
      "step": 31588
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1008762121200562,
      "learning_rate": 1.2866688282822379e-05,
      "loss": 2.4667,
      "step": 31589
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1329282522201538,
      "learning_rate": 1.2866293824331277e-05,
      "loss": 2.627,
      "step": 31590
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0485881567001343,
      "learning_rate": 1.2865899360981026e-05,
      "loss": 2.4846,
      "step": 31591
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.9811524748802185,
      "learning_rate": 1.28655048927723e-05,
      "loss": 2.6083,
      "step": 31592
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.9426707029342651,
      "learning_rate": 1.2865110419705764e-05,
      "loss": 2.379,
      "step": 31593
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0141217708587646,
      "learning_rate": 1.2864715941782087e-05,
      "loss": 2.4907,
      "step": 31594
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0691490173339844,
      "learning_rate": 1.2864321459001939e-05,
      "loss": 2.398,
      "step": 31595
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0902355909347534,
      "learning_rate": 1.2863926971365988e-05,
      "loss": 2.2297,
      "step": 31596
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1473785638809204,
      "learning_rate": 1.2863532478874902e-05,
      "loss": 2.6102,
      "step": 31597
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.9410765767097473,
      "learning_rate": 1.2863137981529352e-05,
      "loss": 2.1807,
      "step": 31598
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0368338823318481,
      "learning_rate": 1.2862743479330004e-05,
      "loss": 2.2524,
      "step": 31599
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.55287766456604,
      "learning_rate": 1.286234897227753e-05,
      "loss": 2.6029,
      "step": 31600
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0951532125473022,
      "learning_rate": 1.2861954460372594e-05,
      "loss": 2.5735,
      "step": 31601
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1198155879974365,
      "learning_rate": 1.2861559943615874e-05,
      "loss": 2.4902,
      "step": 31602
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1760082244873047,
      "learning_rate": 1.2861165422008029e-05,
      "loss": 2.3983,
      "step": 31603
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.14037024974823,
      "learning_rate": 1.2860770895549731e-05,
      "loss": 2.1775,
      "step": 31604
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0948988199234009,
      "learning_rate": 1.2860376364241649e-05,
      "loss": 2.3252,
      "step": 31605
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1930798292160034,
      "learning_rate": 1.2859981828084453e-05,
      "loss": 2.4548,
      "step": 31606
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.9977846741676331,
      "learning_rate": 1.2859587287078811e-05,
      "loss": 2.2332,
      "step": 31607
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0660935640335083,
      "learning_rate": 1.2859192741225393e-05,
      "loss": 2.3964,
      "step": 31608
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0515670776367188,
      "learning_rate": 1.2858798190524868e-05,
      "loss": 2.4587,
      "step": 31609
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0399806499481201,
      "learning_rate": 1.28584036349779e-05,
      "loss": 2.4109,
      "step": 31610
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.154584288597107,
      "learning_rate": 1.2858009074585163e-05,
      "loss": 2.4122,
      "step": 31611
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1649419069290161,
      "learning_rate": 1.2857614509347322e-05,
      "loss": 2.5838,
      "step": 31612
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.037459135055542,
      "learning_rate": 1.2857219939265052e-05,
      "loss": 2.4226,
      "step": 31613
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0283714532852173,
      "learning_rate": 1.2856825364339017e-05,
      "loss": 2.1334,
      "step": 31614
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1190214157104492,
      "learning_rate": 1.2856430784569885e-05,
      "loss": 2.2781,
      "step": 31615
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0382591485977173,
      "learning_rate": 1.2856036199958331e-05,
      "loss": 2.4119,
      "step": 31616
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0048446655273438,
      "learning_rate": 1.2855641610505019e-05,
      "loss": 2.3737,
      "step": 31617
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0774787664413452,
      "learning_rate": 1.2855247016210617e-05,
      "loss": 2.3316,
      "step": 31618
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1100043058395386,
      "learning_rate": 1.2854852417075796e-05,
      "loss": 2.5288,
      "step": 31619
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1183730363845825,
      "learning_rate": 1.2854457813101223e-05,
      "loss": 2.4836,
      "step": 31620
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1245759725570679,
      "learning_rate": 1.2854063204287572e-05,
      "loss": 2.496,
      "step": 31621
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0331766605377197,
      "learning_rate": 1.2853668590635508e-05,
      "loss": 2.2563,
      "step": 31622
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.163872241973877,
      "learning_rate": 1.2853273972145701e-05,
      "loss": 2.2626,
      "step": 31623
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0094515085220337,
      "learning_rate": 1.2852879348818821e-05,
      "loss": 2.5414,
      "step": 31624
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.2769426107406616,
      "learning_rate": 1.2852484720655532e-05,
      "loss": 2.4922,
      "step": 31625
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.090814471244812,
      "learning_rate": 1.2852090087656508e-05,
      "loss": 2.4206,
      "step": 31626
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1936675310134888,
      "learning_rate": 1.2851695449822417e-05,
      "loss": 2.1585,
      "step": 31627
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1625932455062866,
      "learning_rate": 1.2851300807153928e-05,
      "loss": 2.4105,
      "step": 31628
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1166980266571045,
      "learning_rate": 1.285090615965171e-05,
      "loss": 2.6028,
      "step": 31629
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0964787006378174,
      "learning_rate": 1.2850511507316432e-05,
      "loss": 2.2955,
      "step": 31630
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.014041543006897,
      "learning_rate": 1.2850116850148761e-05,
      "loss": 2.3193,
      "step": 31631
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0940873622894287,
      "learning_rate": 1.2849722188149369e-05,
      "loss": 2.2752,
      "step": 31632
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0465853214263916,
      "learning_rate": 1.2849327521318926e-05,
      "loss": 2.2673,
      "step": 31633
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0949865579605103,
      "learning_rate": 1.2848932849658098e-05,
      "loss": 2.3105,
      "step": 31634
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0759801864624023,
      "learning_rate": 1.2848538173167553e-05,
      "loss": 2.343,
      "step": 31635
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0536694526672363,
      "learning_rate": 1.2848143491847962e-05,
      "loss": 2.5004,
      "step": 31636
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.9491336345672607,
      "learning_rate": 1.2847748805699998e-05,
      "loss": 2.3517,
      "step": 31637
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.197986125946045,
      "learning_rate": 1.2847354114724322e-05,
      "loss": 2.573,
      "step": 31638
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.107599139213562,
      "learning_rate": 1.284695941892161e-05,
      "loss": 2.4808,
      "step": 31639
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0362484455108643,
      "learning_rate": 1.2846564718292526e-05,
      "loss": 2.3942,
      "step": 31640
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0920144319534302,
      "learning_rate": 1.2846170012837744e-05,
      "loss": 2.5646,
      "step": 31641
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0169126987457275,
      "learning_rate": 1.2845775302557932e-05,
      "loss": 2.3191,
      "step": 31642
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1108245849609375,
      "learning_rate": 1.2845380587453757e-05,
      "loss": 2.3941,
      "step": 31643
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0309181213378906,
      "learning_rate": 1.2844985867525888e-05,
      "loss": 2.4102,
      "step": 31644
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.115653395652771,
      "learning_rate": 1.2844591142774995e-05,
      "loss": 2.4157,
      "step": 31645
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1244887113571167,
      "learning_rate": 1.2844196413201749e-05,
      "loss": 2.2171,
      "step": 31646
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0908191204071045,
      "learning_rate": 1.2843801678806815e-05,
      "loss": 2.335,
      "step": 31647
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.053652048110962,
      "learning_rate": 1.2843406939590866e-05,
      "loss": 2.2959,
      "step": 31648
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1689977645874023,
      "learning_rate": 1.2843012195554571e-05,
      "loss": 2.2792,
      "step": 31649
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1399070024490356,
      "learning_rate": 1.2842617446698598e-05,
      "loss": 2.6275,
      "step": 31650
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0602023601531982,
      "learning_rate": 1.2842222693023617e-05,
      "loss": 2.3127,
      "step": 31651
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0253489017486572,
      "learning_rate": 1.2841827934530295e-05,
      "loss": 2.3707,
      "step": 31652
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1215031147003174,
      "learning_rate": 1.2841433171219302e-05,
      "loss": 2.4707,
      "step": 31653
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0419903993606567,
      "learning_rate": 1.2841038403091308e-05,
      "loss": 2.3463,
      "step": 31654
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1347535848617554,
      "learning_rate": 1.2840643630146984e-05,
      "loss": 2.3713,
      "step": 31655
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1996302604675293,
      "learning_rate": 1.2840248852386998e-05,
      "loss": 2.4972,
      "step": 31656
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1615980863571167,
      "learning_rate": 1.2839854069812018e-05,
      "loss": 2.3334,
      "step": 31657
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1921448707580566,
      "learning_rate": 1.2839459282422715e-05,
      "loss": 2.317,
      "step": 31658
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.9949748516082764,
      "learning_rate": 1.2839064490219758e-05,
      "loss": 2.4812,
      "step": 31659
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.092769742012024,
      "learning_rate": 1.2838669693203812e-05,
      "loss": 2.2324,
      "step": 31660
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0936076641082764,
      "learning_rate": 1.2838274891375553e-05,
      "loss": 2.6095,
      "step": 31661
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.059255838394165,
      "learning_rate": 1.2837880084735646e-05,
      "loss": 2.1875,
      "step": 31662
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.987145185470581,
      "learning_rate": 1.2837485273284763e-05,
      "loss": 2.2858,
      "step": 31663
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.100958228111267,
      "learning_rate": 1.283709045702357e-05,
      "loss": 2.2883,
      "step": 31664
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0713728666305542,
      "learning_rate": 1.2836695635952738e-05,
      "loss": 2.2384,
      "step": 31665
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0914355516433716,
      "learning_rate": 1.2836300810072938e-05,
      "loss": 2.234,
      "step": 31666
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.9919341802597046,
      "learning_rate": 1.2835905979384836e-05,
      "loss": 2.2715,
      "step": 31667
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0765398740768433,
      "learning_rate": 1.2835511143889103e-05,
      "loss": 2.4292,
      "step": 31668
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.204710841178894,
      "learning_rate": 1.2835116303586412e-05,
      "loss": 2.2071,
      "step": 31669
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1265358924865723,
      "learning_rate": 1.2834721458477427e-05,
      "loss": 2.2234,
      "step": 31670
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.2404836416244507,
      "learning_rate": 1.2834326608562818e-05,
      "loss": 2.4217,
      "step": 31671
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1393629312515259,
      "learning_rate": 1.2833931753843256e-05,
      "loss": 2.1644,
      "step": 31672
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0645227432250977,
      "learning_rate": 1.2833536894319409e-05,
      "loss": 2.4807,
      "step": 31673
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0227651596069336,
      "learning_rate": 1.2833142029991947e-05,
      "loss": 2.3874,
      "step": 31674
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1371793746948242,
      "learning_rate": 1.2832747160861545e-05,
      "loss": 2.4381,
      "step": 31675
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1892145872116089,
      "learning_rate": 1.2832352286928861e-05,
      "loss": 2.4431,
      "step": 31676
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1074438095092773,
      "learning_rate": 1.2831957408194575e-05,
      "loss": 2.2497,
      "step": 31677
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.148284912109375,
      "learning_rate": 1.2831562524659348e-05,
      "loss": 2.6691,
      "step": 31678
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.2010153532028198,
      "learning_rate": 1.2831167636323855e-05,
      "loss": 2.4668,
      "step": 31679
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1444880962371826,
      "learning_rate": 1.2830772743188765e-05,
      "loss": 2.4717,
      "step": 31680
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1210315227508545,
      "learning_rate": 1.2830377845254746e-05,
      "loss": 2.48,
      "step": 31681
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.003239631652832,
      "learning_rate": 1.282998294252247e-05,
      "loss": 2.3895,
      "step": 31682
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.031813621520996,
      "learning_rate": 1.2829588034992602e-05,
      "loss": 2.4619,
      "step": 31683
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.9520313143730164,
      "learning_rate": 1.2829193122665812e-05,
      "loss": 2.4076,
      "step": 31684
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.9849922060966492,
      "learning_rate": 1.2828798205542775e-05,
      "loss": 2.3012,
      "step": 31685
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.018862009048462,
      "learning_rate": 1.2828403283624153e-05,
      "loss": 2.399,
      "step": 31686
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.565516471862793,
      "learning_rate": 1.2828008356910622e-05,
      "loss": 2.1998,
      "step": 31687
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0455999374389648,
      "learning_rate": 1.282761342540285e-05,
      "loss": 2.4857,
      "step": 31688
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.2106059789657593,
      "learning_rate": 1.2827218489101504e-05,
      "loss": 2.4155,
      "step": 31689
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0476619005203247,
      "learning_rate": 1.2826823548007252e-05,
      "loss": 2.3539,
      "step": 31690
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1100798845291138,
      "learning_rate": 1.2826428602120768e-05,
      "loss": 2.3484,
      "step": 31691
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1258373260498047,
      "learning_rate": 1.282603365144272e-05,
      "loss": 2.3018,
      "step": 31692
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1513442993164062,
      "learning_rate": 1.282563869597378e-05,
      "loss": 2.2748,
      "step": 31693
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0566543340682983,
      "learning_rate": 1.282524373571461e-05,
      "loss": 2.4861,
      "step": 31694
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0066472291946411,
      "learning_rate": 1.2824848770665887e-05,
      "loss": 2.2433,
      "step": 31695
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1045160293579102,
      "learning_rate": 1.2824453800828281e-05,
      "loss": 2.3051,
      "step": 31696
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0734888315200806,
      "learning_rate": 1.2824058826202457e-05,
      "loss": 2.3831,
      "step": 31697
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.9716285467147827,
      "learning_rate": 1.2823663846789082e-05,
      "loss": 2.2069,
      "step": 31698
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.9971434473991394,
      "learning_rate": 1.2823268862588833e-05,
      "loss": 2.2272,
      "step": 31699
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0985822677612305,
      "learning_rate": 1.2822873873602377e-05,
      "loss": 2.2933,
      "step": 31700
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0156251192092896,
      "learning_rate": 1.2822478879830383e-05,
      "loss": 2.3385,
      "step": 31701
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0213580131530762,
      "learning_rate": 1.2822083881273521e-05,
      "loss": 2.4474,
      "step": 31702
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0483275651931763,
      "learning_rate": 1.2821688877932462e-05,
      "loss": 2.4178,
      "step": 31703
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1791744232177734,
      "learning_rate": 1.2821293869807869e-05,
      "loss": 2.3907,
      "step": 31704
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0846940279006958,
      "learning_rate": 1.282089885690042e-05,
      "loss": 2.2244,
      "step": 31705
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0197659730911255,
      "learning_rate": 1.2820503839210781e-05,
      "loss": 2.416,
      "step": 31706
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.043015718460083,
      "learning_rate": 1.282010881673962e-05,
      "loss": 2.4956,
      "step": 31707
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0178439617156982,
      "learning_rate": 1.2819713789487613e-05,
      "loss": 2.2343,
      "step": 31708
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0206784009933472,
      "learning_rate": 1.281931875745542e-05,
      "loss": 2.2652,
      "step": 31709
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.3160367012023926,
      "learning_rate": 1.2818923720643719e-05,
      "loss": 2.4011,
      "step": 31710
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1132659912109375,
      "learning_rate": 1.2818528679053177e-05,
      "loss": 2.2526,
      "step": 31711
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0755525827407837,
      "learning_rate": 1.2818133632684462e-05,
      "loss": 2.4585,
      "step": 31712
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0165278911590576,
      "learning_rate": 1.2817738581538246e-05,
      "loss": 2.2465,
      "step": 31713
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.2290376424789429,
      "learning_rate": 1.2817343525615199e-05,
      "loss": 2.1951,
      "step": 31714
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.2867937088012695,
      "learning_rate": 1.2816948464915989e-05,
      "loss": 2.2991,
      "step": 31715
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.9731106162071228,
      "learning_rate": 1.2816553399441283e-05,
      "loss": 2.4777,
      "step": 31716
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0077664852142334,
      "learning_rate": 1.2816158329191757e-05,
      "loss": 2.4973,
      "step": 31717
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0538378953933716,
      "learning_rate": 1.2815763254168076e-05,
      "loss": 2.4264,
      "step": 31718
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1569390296936035,
      "learning_rate": 1.2815368174370912e-05,
      "loss": 2.4475,
      "step": 31719
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1844632625579834,
      "learning_rate": 1.2814973089800937e-05,
      "loss": 2.2115,
      "step": 31720
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0076801776885986,
      "learning_rate": 1.2814578000458816e-05,
      "loss": 2.3178,
      "step": 31721
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.091471791267395,
      "learning_rate": 1.281418290634522e-05,
      "loss": 2.484,
      "step": 31722
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.9877938628196716,
      "learning_rate": 1.2813787807460822e-05,
      "loss": 2.2638,
      "step": 31723
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0853289365768433,
      "learning_rate": 1.281339270380629e-05,
      "loss": 2.3248,
      "step": 31724
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0687109231948853,
      "learning_rate": 1.281299759538229e-05,
      "loss": 2.3803,
      "step": 31725
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.195309042930603,
      "learning_rate": 1.2812602482189496e-05,
      "loss": 2.1527,
      "step": 31726
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.2993297576904297,
      "learning_rate": 1.2812207364228577e-05,
      "loss": 2.1481,
      "step": 31727
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.193497657775879,
      "learning_rate": 1.2811812241500206e-05,
      "loss": 2.4174,
      "step": 31728
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1751577854156494,
      "learning_rate": 1.2811417114005045e-05,
      "loss": 2.3158,
      "step": 31729
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0660741329193115,
      "learning_rate": 1.281102198174377e-05,
      "loss": 2.3998,
      "step": 31730
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1008052825927734,
      "learning_rate": 1.2810626844717049e-05,
      "loss": 2.3887,
      "step": 31731
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0546609163284302,
      "learning_rate": 1.2810231702925551e-05,
      "loss": 2.2041,
      "step": 31732
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1303715705871582,
      "learning_rate": 1.2809836556369948e-05,
      "loss": 2.3763,
      "step": 31733
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0617254972457886,
      "learning_rate": 1.2809441405050912e-05,
      "loss": 2.4193,
      "step": 31734
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1310580968856812,
      "learning_rate": 1.2809046248969105e-05,
      "loss": 2.1644,
      "step": 31735
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.138161540031433,
      "learning_rate": 1.2808651088125204e-05,
      "loss": 2.3376,
      "step": 31736
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0543800592422485,
      "learning_rate": 1.2808255922519877e-05,
      "loss": 2.5196,
      "step": 31737
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0538725852966309,
      "learning_rate": 1.2807860752153794e-05,
      "loss": 2.3715,
      "step": 31738
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.248129963874817,
      "learning_rate": 1.280746557702762e-05,
      "loss": 2.4473,
      "step": 31739
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.084358811378479,
      "learning_rate": 1.2807070397142034e-05,
      "loss": 2.1571,
      "step": 31740
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0971888303756714,
      "learning_rate": 1.28066752124977e-05,
      "loss": 2.3329,
      "step": 31741
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.128955364227295,
      "learning_rate": 1.280628002309529e-05,
      "loss": 2.4972,
      "step": 31742
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1026015281677246,
      "learning_rate": 1.2805884828935473e-05,
      "loss": 2.2711,
      "step": 31743
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0734225511550903,
      "learning_rate": 1.2805489630018916e-05,
      "loss": 2.4213,
      "step": 31744
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0153281688690186,
      "learning_rate": 1.2805094426346296e-05,
      "loss": 2.5789,
      "step": 31745
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0212442874908447,
      "learning_rate": 1.2804699217918277e-05,
      "loss": 2.2454,
      "step": 31746
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.9698053002357483,
      "learning_rate": 1.280430400473553e-05,
      "loss": 2.4124,
      "step": 31747
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.3012416362762451,
      "learning_rate": 1.2803908786798732e-05,
      "loss": 2.3531,
      "step": 31748
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0915429592132568,
      "learning_rate": 1.2803513564108543e-05,
      "loss": 2.4991,
      "step": 31749
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1459919214248657,
      "learning_rate": 1.2803118336665636e-05,
      "loss": 2.361,
      "step": 31750
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0789984464645386,
      "learning_rate": 1.2802723104470682e-05,
      "loss": 2.4834,
      "step": 31751
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0397416353225708,
      "learning_rate": 1.2802327867524356e-05,
      "loss": 2.4773,
      "step": 31752
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0044654607772827,
      "learning_rate": 1.2801932625827318e-05,
      "loss": 2.3298,
      "step": 31753
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.9868649840354919,
      "learning_rate": 1.2801537379380244e-05,
      "loss": 2.5764,
      "step": 31754
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.2924329042434692,
      "learning_rate": 1.2801142128183805e-05,
      "loss": 2.3846,
      "step": 31755
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.057537317276001,
      "learning_rate": 1.2800746872238668e-05,
      "loss": 2.3551,
      "step": 31756
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.2545301914215088,
      "learning_rate": 1.2800351611545506e-05,
      "loss": 2.3033,
      "step": 31757
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0475172996520996,
      "learning_rate": 1.2799956346104985e-05,
      "loss": 2.547,
      "step": 31758
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.9895330667495728,
      "learning_rate": 1.2799561075917776e-05,
      "loss": 2.3664,
      "step": 31759
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0179487466812134,
      "learning_rate": 1.2799165800984554e-05,
      "loss": 2.2251,
      "step": 31760
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.101820945739746,
      "learning_rate": 1.2798770521305985e-05,
      "loss": 2.5613,
      "step": 31761
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.044242024421692,
      "learning_rate": 1.2798375236882741e-05,
      "loss": 2.1393,
      "step": 31762
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0810978412628174,
      "learning_rate": 1.279797994771549e-05,
      "loss": 2.4048,
      "step": 31763
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.050274133682251,
      "learning_rate": 1.2797584653804902e-05,
      "loss": 2.4194,
      "step": 31764
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0716089010238647,
      "learning_rate": 1.279718935515165e-05,
      "loss": 2.488,
      "step": 31765
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0272027254104614,
      "learning_rate": 1.2796794051756401e-05,
      "loss": 2.3232,
      "step": 31766
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0290583372116089,
      "learning_rate": 1.2796398743619824e-05,
      "loss": 2.4008,
      "step": 31767
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1096292734146118,
      "learning_rate": 1.2796003430742595e-05,
      "loss": 2.4222,
      "step": 31768
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0613586902618408,
      "learning_rate": 1.2795608113125382e-05,
      "loss": 2.2855,
      "step": 31769
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0417864322662354,
      "learning_rate": 1.2795212790768852e-05,
      "loss": 2.382,
      "step": 31770
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.9657596945762634,
      "learning_rate": 1.2794817463673677e-05,
      "loss": 2.4926,
      "step": 31771
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0609582662582397,
      "learning_rate": 1.2794422131840527e-05,
      "loss": 2.5962,
      "step": 31772
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.2511500120162964,
      "learning_rate": 1.2794026795270074e-05,
      "loss": 2.2222,
      "step": 31773
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.9457343816757202,
      "learning_rate": 1.2793631453962987e-05,
      "loss": 2.5536,
      "step": 31774
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1165574789047241,
      "learning_rate": 1.2793236107919932e-05,
      "loss": 2.0318,
      "step": 31775
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0984243154525757,
      "learning_rate": 1.2792840757141588e-05,
      "loss": 2.2668,
      "step": 31776
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0282119512557983,
      "learning_rate": 1.279244540162862e-05,
      "loss": 2.3242,
      "step": 31777
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.9683694243431091,
      "learning_rate": 1.2792050041381694e-05,
      "loss": 2.3914,
      "step": 31778
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0072605609893799,
      "learning_rate": 1.279165467640149e-05,
      "loss": 2.3137,
      "step": 31779
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.051289677619934,
      "learning_rate": 1.279125930668867e-05,
      "loss": 2.3761,
      "step": 31780
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0799468755722046,
      "learning_rate": 1.2790863932243908e-05,
      "loss": 2.5508,
      "step": 31781
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.08099365234375,
      "learning_rate": 1.2790468553067874e-05,
      "loss": 2.6143,
      "step": 31782
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.258817434310913,
      "learning_rate": 1.279007316916124e-05,
      "loss": 2.3619,
      "step": 31783
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.980292797088623,
      "learning_rate": 1.2789677780524672e-05,
      "loss": 2.3109,
      "step": 31784
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1707528829574585,
      "learning_rate": 1.2789282387158844e-05,
      "loss": 2.524,
      "step": 31785
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0453323125839233,
      "learning_rate": 1.2788886989064422e-05,
      "loss": 2.4611,
      "step": 31786
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.9800356030464172,
      "learning_rate": 1.2788491586242082e-05,
      "loss": 2.2452,
      "step": 31787
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.050466775894165,
      "learning_rate": 1.2788096178692491e-05,
      "loss": 2.4479,
      "step": 31788
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.014301061630249,
      "learning_rate": 1.2787700766416318e-05,
      "loss": 2.4382,
      "step": 31789
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1972640752792358,
      "learning_rate": 1.2787305349414236e-05,
      "loss": 2.2779,
      "step": 31790
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.9938864707946777,
      "learning_rate": 1.2786909927686912e-05,
      "loss": 2.362,
      "step": 31791
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.4741692543029785,
      "learning_rate": 1.2786514501235021e-05,
      "loss": 2.1969,
      "step": 31792
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.095018982887268,
      "learning_rate": 1.2786119070059232e-05,
      "loss": 2.3217,
      "step": 31793
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.029380202293396,
      "learning_rate": 1.2785723634160215e-05,
      "loss": 2.5283,
      "step": 31794
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0549970865249634,
      "learning_rate": 1.2785328193538637e-05,
      "loss": 2.5823,
      "step": 31795
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.035000205039978,
      "learning_rate": 1.2784932748195174e-05,
      "loss": 2.4153,
      "step": 31796
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.110658884048462,
      "learning_rate": 1.278453729813049e-05,
      "loss": 2.3416,
      "step": 31797
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0364165306091309,
      "learning_rate": 1.2784141843345261e-05,
      "loss": 2.5905,
      "step": 31798
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.026427984237671,
      "learning_rate": 1.2783746383840156e-05,
      "loss": 2.4214,
      "step": 31799
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.046119213104248,
      "learning_rate": 1.2783350919615844e-05,
      "loss": 2.5076,
      "step": 31800
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.296065330505371,
      "learning_rate": 1.2782955450672998e-05,
      "loss": 2.3141,
      "step": 31801
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0247427225112915,
      "learning_rate": 1.2782559977012286e-05,
      "loss": 2.5246,
      "step": 31802
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.9863447546958923,
      "learning_rate": 1.2782164498634377e-05,
      "loss": 2.5095,
      "step": 31803
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.020989179611206,
      "learning_rate": 1.2781769015539946e-05,
      "loss": 2.3189,
      "step": 31804
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.051417350769043,
      "learning_rate": 1.278137352772966e-05,
      "loss": 2.5046,
      "step": 31805
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0513323545455933,
      "learning_rate": 1.278097803520419e-05,
      "loss": 2.542,
      "step": 31806
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.9491667747497559,
      "learning_rate": 1.278058253796421e-05,
      "loss": 2.606,
      "step": 31807
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0016672611236572,
      "learning_rate": 1.2780187036010384e-05,
      "loss": 2.2706,
      "step": 31808
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0431480407714844,
      "learning_rate": 1.2779791529343386e-05,
      "loss": 2.4574,
      "step": 31809
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.227695345878601,
      "learning_rate": 1.2779396017963887e-05,
      "loss": 2.3582,
      "step": 31810
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0221658945083618,
      "learning_rate": 1.2779000501872558e-05,
      "loss": 2.2909,
      "step": 31811
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.9814035892486572,
      "learning_rate": 1.277860498107007e-05,
      "loss": 2.2798,
      "step": 31812
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1296669244766235,
      "learning_rate": 1.2778209455557088e-05,
      "loss": 2.3726,
      "step": 31813
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.048169732093811,
      "learning_rate": 1.2777813925334288e-05,
      "loss": 2.3903,
      "step": 31814
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1165931224822998,
      "learning_rate": 1.2777418390402339e-05,
      "loss": 2.4566,
      "step": 31815
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1035462617874146,
      "learning_rate": 1.2777022850761912e-05,
      "loss": 2.4253,
      "step": 31816
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1193344593048096,
      "learning_rate": 1.2776627306413673e-05,
      "loss": 2.1722,
      "step": 31817
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1313204765319824,
      "learning_rate": 1.2776231757358302e-05,
      "loss": 2.3125,
      "step": 31818
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1929072141647339,
      "learning_rate": 1.2775836203596462e-05,
      "loss": 2.3045,
      "step": 31819
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0833762884140015,
      "learning_rate": 1.2775440645128825e-05,
      "loss": 2.3416,
      "step": 31820
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0403848886489868,
      "learning_rate": 1.2775045081956064e-05,
      "loss": 2.2743,
      "step": 31821
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.574759006500244,
      "learning_rate": 1.2774649514078848e-05,
      "loss": 2.2791,
      "step": 31822
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.104360580444336,
      "learning_rate": 1.2774253941497845e-05,
      "loss": 2.2142,
      "step": 31823
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.078353762626648,
      "learning_rate": 1.277385836421373e-05,
      "loss": 2.3948,
      "step": 31824
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1061657667160034,
      "learning_rate": 1.2773462782227168e-05,
      "loss": 2.3215,
      "step": 31825
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.082710862159729,
      "learning_rate": 1.2773067195538838e-05,
      "loss": 2.3159,
      "step": 31826
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0273634195327759,
      "learning_rate": 1.2772671604149405e-05,
      "loss": 2.5449,
      "step": 31827
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0221762657165527,
      "learning_rate": 1.2772276008059541e-05,
      "loss": 2.5857,
      "step": 31828
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.3804268836975098,
      "learning_rate": 1.2771880407269915e-05,
      "loss": 2.6613,
      "step": 31829
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0386890172958374,
      "learning_rate": 1.2771484801781197e-05,
      "loss": 2.5194,
      "step": 31830
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0012946128845215,
      "learning_rate": 1.2771089191594062e-05,
      "loss": 2.5115,
      "step": 31831
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1259715557098389,
      "learning_rate": 1.2770693576709179e-05,
      "loss": 2.3355,
      "step": 31832
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.213794231414795,
      "learning_rate": 1.2770297957127217e-05,
      "loss": 2.6093,
      "step": 31833
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0238633155822754,
      "learning_rate": 1.2769902332848847e-05,
      "loss": 2.5504,
      "step": 31834
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0132023096084595,
      "learning_rate": 1.2769506703874742e-05,
      "loss": 2.1315,
      "step": 31835
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.2307813167572021,
      "learning_rate": 1.2769111070205566e-05,
      "loss": 2.3827,
      "step": 31836
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.00619375705719,
      "learning_rate": 1.2768715431841997e-05,
      "loss": 2.4484,
      "step": 31837
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.062631607055664,
      "learning_rate": 1.2768319788784709e-05,
      "loss": 2.3539,
      "step": 31838
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1741429567337036,
      "learning_rate": 1.2767924141034363e-05,
      "loss": 2.4401,
      "step": 31839
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1013447046279907,
      "learning_rate": 1.2767528488591632e-05,
      "loss": 2.3518,
      "step": 31840
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.105305552482605,
      "learning_rate": 1.276713283145719e-05,
      "loss": 2.4431,
      "step": 31841
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0342367887496948,
      "learning_rate": 1.276673716963171e-05,
      "loss": 2.3083,
      "step": 31842
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.9861257672309875,
      "learning_rate": 1.2766341503115853e-05,
      "loss": 2.3125,
      "step": 31843
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0383388996124268,
      "learning_rate": 1.2765945831910298e-05,
      "loss": 2.4128,
      "step": 31844
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.2551981210708618,
      "learning_rate": 1.2765550156015714e-05,
      "loss": 2.4161,
      "step": 31845
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0755765438079834,
      "learning_rate": 1.276515447543277e-05,
      "loss": 2.3198,
      "step": 31846
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0306522846221924,
      "learning_rate": 1.2764758790162142e-05,
      "loss": 2.4282,
      "step": 31847
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1272228956222534,
      "learning_rate": 1.2764363100204494e-05,
      "loss": 2.291,
      "step": 31848
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0139544010162354,
      "learning_rate": 1.2763967405560498e-05,
      "loss": 2.2669,
      "step": 31849
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1383122205734253,
      "learning_rate": 1.2763571706230828e-05,
      "loss": 2.3331,
      "step": 31850
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1249287128448486,
      "learning_rate": 1.2763176002216153e-05,
      "loss": 2.2086,
      "step": 31851
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.2086477279663086,
      "learning_rate": 1.2762780293517145e-05,
      "loss": 2.3544,
      "step": 31852
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1125627756118774,
      "learning_rate": 1.2762384580134476e-05,
      "loss": 2.5128,
      "step": 31853
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0439413785934448,
      "learning_rate": 1.2761988862068815e-05,
      "loss": 2.3014,
      "step": 31854
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0330524444580078,
      "learning_rate": 1.2761593139320829e-05,
      "loss": 2.1765,
      "step": 31855
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0271064043045044,
      "learning_rate": 1.2761197411891195e-05,
      "loss": 2.2139,
      "step": 31856
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0582009553909302,
      "learning_rate": 1.276080167978058e-05,
      "loss": 2.3249,
      "step": 31857
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0507720708847046,
      "learning_rate": 1.2760405942989657e-05,
      "loss": 2.3765,
      "step": 31858
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0431879758834839,
      "learning_rate": 1.2760010201519095e-05,
      "loss": 2.2257,
      "step": 31859
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.003000020980835,
      "learning_rate": 1.2759614455369572e-05,
      "loss": 2.365,
      "step": 31860
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.9784834980964661,
      "learning_rate": 1.2759218704541747e-05,
      "loss": 2.5444,
      "step": 31861
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0995516777038574,
      "learning_rate": 1.2758822949036298e-05,
      "loss": 2.4678,
      "step": 31862
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0866973400115967,
      "learning_rate": 1.2758427188853895e-05,
      "loss": 2.3675,
      "step": 31863
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.9606524705886841,
      "learning_rate": 1.2758031423995207e-05,
      "loss": 2.3406,
      "step": 31864
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1277637481689453,
      "learning_rate": 1.275763565446091e-05,
      "loss": 2.2606,
      "step": 31865
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0393497943878174,
      "learning_rate": 1.2757239880251672e-05,
      "loss": 2.3572,
      "step": 31866
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.3333253860473633,
      "learning_rate": 1.2756844101368161e-05,
      "loss": 2.529,
      "step": 31867
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.062792420387268,
      "learning_rate": 1.2756448317811053e-05,
      "loss": 2.4036,
      "step": 31868
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0930812358856201,
      "learning_rate": 1.2756052529581013e-05,
      "loss": 2.3,
      "step": 31869
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.9816736578941345,
      "learning_rate": 1.2755656736678718e-05,
      "loss": 2.4919,
      "step": 31870
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0747077465057373,
      "learning_rate": 1.2755260939104836e-05,
      "loss": 2.4179,
      "step": 31871
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.084687352180481,
      "learning_rate": 1.2754865136860038e-05,
      "loss": 2.4619,
      "step": 31872
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.038082480430603,
      "learning_rate": 1.2754469329944996e-05,
      "loss": 2.5418,
      "step": 31873
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0473419427871704,
      "learning_rate": 1.2754073518360379e-05,
      "loss": 2.415,
      "step": 31874
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1732583045959473,
      "learning_rate": 1.2753677702106862e-05,
      "loss": 2.4834,
      "step": 31875
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.267785906791687,
      "learning_rate": 1.2753281881185109e-05,
      "loss": 2.5789,
      "step": 31876
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0186585187911987,
      "learning_rate": 1.2752886055595798e-05,
      "loss": 2.4796,
      "step": 31877
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.138444185256958,
      "learning_rate": 1.2752490225339598e-05,
      "loss": 2.4039,
      "step": 31878
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1352752447128296,
      "learning_rate": 1.2752094390417179e-05,
      "loss": 2.4537,
      "step": 31879
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0766185522079468,
      "learning_rate": 1.2751698550829215e-05,
      "loss": 2.6208,
      "step": 31880
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0478005409240723,
      "learning_rate": 1.275130270657637e-05,
      "loss": 2.3379,
      "step": 31881
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1669141054153442,
      "learning_rate": 1.2750906857659323e-05,
      "loss": 2.4661,
      "step": 31882
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1865378618240356,
      "learning_rate": 1.2750511004078737e-05,
      "loss": 2.4436,
      "step": 31883
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.100653052330017,
      "learning_rate": 1.2750115145835293e-05,
      "loss": 2.4621,
      "step": 31884
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0067421197891235,
      "learning_rate": 1.2749719282929656e-05,
      "loss": 2.1652,
      "step": 31885
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1077665090560913,
      "learning_rate": 1.2749323415362497e-05,
      "loss": 2.4887,
      "step": 31886
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.246358036994934,
      "learning_rate": 1.2748927543134488e-05,
      "loss": 2.4535,
      "step": 31887
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.21505606174469,
      "learning_rate": 1.27485316662463e-05,
      "loss": 2.5229,
      "step": 31888
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0838631391525269,
      "learning_rate": 1.2748135784698605e-05,
      "loss": 2.4902,
      "step": 31889
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.2173678874969482,
      "learning_rate": 1.2747739898492072e-05,
      "loss": 2.3355,
      "step": 31890
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0632262229919434,
      "learning_rate": 1.2747344007627375e-05,
      "loss": 2.5277,
      "step": 31891
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.113383173942566,
      "learning_rate": 1.2746948112105184e-05,
      "loss": 2.3076,
      "step": 31892
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1144310235977173,
      "learning_rate": 1.2746552211926171e-05,
      "loss": 2.5442,
      "step": 31893
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1158636808395386,
      "learning_rate": 1.2746156307091004e-05,
      "loss": 2.4407,
      "step": 31894
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1133968830108643,
      "learning_rate": 1.2745760397600357e-05,
      "loss": 2.6312,
      "step": 31895
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.047821283340454,
      "learning_rate": 1.27453644834549e-05,
      "loss": 2.3133,
      "step": 31896
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0732795000076294,
      "learning_rate": 1.2744968564655307e-05,
      "loss": 2.3925,
      "step": 31897
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0879788398742676,
      "learning_rate": 1.2744572641202244e-05,
      "loss": 2.3692,
      "step": 31898
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.170997977256775,
      "learning_rate": 1.2744176713096386e-05,
      "loss": 2.3217,
      "step": 31899
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0898483991622925,
      "learning_rate": 1.2743780780338402e-05,
      "loss": 2.4293,
      "step": 31900
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.10120689868927,
      "learning_rate": 1.2743384842928964e-05,
      "loss": 2.4398,
      "step": 31901
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.16785728931427,
      "learning_rate": 1.2742988900868747e-05,
      "loss": 2.2828,
      "step": 31902
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.9796920418739319,
      "learning_rate": 1.2742592954158414e-05,
      "loss": 2.5905,
      "step": 31903
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0217769145965576,
      "learning_rate": 1.2742197002798644e-05,
      "loss": 2.1532,
      "step": 31904
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0017756223678589,
      "learning_rate": 1.2741801046790105e-05,
      "loss": 2.6195,
      "step": 31905
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0876541137695312,
      "learning_rate": 1.2741405086133471e-05,
      "loss": 2.5608,
      "step": 31906
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1239776611328125,
      "learning_rate": 1.2741009120829406e-05,
      "loss": 2.342,
      "step": 31907
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1695688962936401,
      "learning_rate": 1.274061315087859e-05,
      "loss": 2.5102,
      "step": 31908
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0323642492294312,
      "learning_rate": 1.2740217176281688e-05,
      "loss": 2.2904,
      "step": 31909
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0207469463348389,
      "learning_rate": 1.2739821197039373e-05,
      "loss": 2.2428,
      "step": 31910
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0451120138168335,
      "learning_rate": 1.273942521315232e-05,
      "loss": 2.3632,
      "step": 31911
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0506013631820679,
      "learning_rate": 1.2739029224621193e-05,
      "loss": 2.2597,
      "step": 31912
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0754024982452393,
      "learning_rate": 1.2738633231446671e-05,
      "loss": 2.316,
      "step": 31913
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.9773107171058655,
      "learning_rate": 1.2738237233629423e-05,
      "loss": 2.3035,
      "step": 31914
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.057289958000183,
      "learning_rate": 1.2737841231170117e-05,
      "loss": 2.5345,
      "step": 31915
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.011631727218628,
      "learning_rate": 1.2737445224069424e-05,
      "loss": 2.2874,
      "step": 31916
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1430747509002686,
      "learning_rate": 1.2737049212328022e-05,
      "loss": 2.4069,
      "step": 31917
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.027671217918396,
      "learning_rate": 1.2736653195946576e-05,
      "loss": 2.4812,
      "step": 31918
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.676833152770996,
      "learning_rate": 1.273625717492576e-05,
      "loss": 2.4369,
      "step": 31919
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.2826839685440063,
      "learning_rate": 1.2735861149266246e-05,
      "loss": 2.3228,
      "step": 31920
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0275768041610718,
      "learning_rate": 1.2735465118968704e-05,
      "loss": 2.5015,
      "step": 31921
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1293338537216187,
      "learning_rate": 1.2735069084033805e-05,
      "loss": 2.3707,
      "step": 31922
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.9710538983345032,
      "learning_rate": 1.273467304446222e-05,
      "loss": 2.3047,
      "step": 31923
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0919729471206665,
      "learning_rate": 1.2734277000254623e-05,
      "loss": 2.3834,
      "step": 31924
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.156988501548767,
      "learning_rate": 1.2733880951411686e-05,
      "loss": 2.3474,
      "step": 31925
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.047888994216919,
      "learning_rate": 1.2733484897934076e-05,
      "loss": 2.2665,
      "step": 31926
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0452320575714111,
      "learning_rate": 1.2733088839822465e-05,
      "loss": 2.1552,
      "step": 31927
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1025819778442383,
      "learning_rate": 1.2732692777077529e-05,
      "loss": 2.2995,
      "step": 31928
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.9727495908737183,
      "learning_rate": 1.2732296709699935e-05,
      "loss": 2.3229,
      "step": 31929
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.9763084650039673,
      "learning_rate": 1.2731900637690356e-05,
      "loss": 2.4641,
      "step": 31930
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0102498531341553,
      "learning_rate": 1.2731504561049463e-05,
      "loss": 2.4317,
      "step": 31931
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.030619740486145,
      "learning_rate": 1.2731108479777931e-05,
      "loss": 2.3938,
      "step": 31932
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0786985158920288,
      "learning_rate": 1.2730712393876425e-05,
      "loss": 2.2783,
      "step": 31933
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.9783939123153687,
      "learning_rate": 1.2730316303345622e-05,
      "loss": 2.1194,
      "step": 31934
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0025304555892944,
      "learning_rate": 1.2729920208186188e-05,
      "loss": 2.4658,
      "step": 31935
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0277553796768188,
      "learning_rate": 1.27295241083988e-05,
      "loss": 2.4917,
      "step": 31936
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0626333951950073,
      "learning_rate": 1.2729128003984128e-05,
      "loss": 2.2529,
      "step": 31937
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1438194513320923,
      "learning_rate": 1.272873189494284e-05,
      "loss": 2.5374,
      "step": 31938
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1197049617767334,
      "learning_rate": 1.2728335781275615e-05,
      "loss": 2.2838,
      "step": 31939
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.141076683998108,
      "learning_rate": 1.2727939662983117e-05,
      "loss": 2.3617,
      "step": 31940
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.148090124130249,
      "learning_rate": 1.272754354006602e-05,
      "loss": 2.3382,
      "step": 31941
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0697368383407593,
      "learning_rate": 1.2727147412524995e-05,
      "loss": 2.5546,
      "step": 31942
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0357205867767334,
      "learning_rate": 1.2726751280360718e-05,
      "loss": 2.2485,
      "step": 31943
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1108646392822266,
      "learning_rate": 1.2726355143573854e-05,
      "loss": 2.3445,
      "step": 31944
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0200399160385132,
      "learning_rate": 1.2725959002165078e-05,
      "loss": 2.2906,
      "step": 31945
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0076055526733398,
      "learning_rate": 1.2725562856135065e-05,
      "loss": 2.4828,
      "step": 31946
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.018688440322876,
      "learning_rate": 1.2725166705484477e-05,
      "loss": 2.3036,
      "step": 31947
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.9773655533790588,
      "learning_rate": 1.2724770550213993e-05,
      "loss": 2.3771,
      "step": 31948
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0025885105133057,
      "learning_rate": 1.2724374390324283e-05,
      "loss": 2.1972,
      "step": 31949
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0198389291763306,
      "learning_rate": 1.2723978225816021e-05,
      "loss": 2.3242,
      "step": 31950
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0431920289993286,
      "learning_rate": 1.2723582056689874e-05,
      "loss": 2.4909,
      "step": 31951
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.2386606931686401,
      "learning_rate": 1.2723185882946517e-05,
      "loss": 2.5771,
      "step": 31952
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.997424304485321,
      "learning_rate": 1.2722789704586618e-05,
      "loss": 2.3459,
      "step": 31953
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.127254843711853,
      "learning_rate": 1.2722393521610852e-05,
      "loss": 2.4072,
      "step": 31954
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1715623140335083,
      "learning_rate": 1.2721997334019889e-05,
      "loss": 2.3005,
      "step": 31955
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1290273666381836,
      "learning_rate": 1.2721601141814403e-05,
      "loss": 2.4031,
      "step": 31956
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.094620943069458,
      "learning_rate": 1.2721204944995064e-05,
      "loss": 2.4542,
      "step": 31957
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.2081702947616577,
      "learning_rate": 1.272080874356254e-05,
      "loss": 2.3145,
      "step": 31958
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1656463146209717,
      "learning_rate": 1.272041253751751e-05,
      "loss": 2.2485,
      "step": 31959
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.9393755793571472,
      "learning_rate": 1.2720016326860642e-05,
      "loss": 2.4708,
      "step": 31960
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1627979278564453,
      "learning_rate": 1.2719620111592605e-05,
      "loss": 2.5974,
      "step": 31961
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.2499831914901733,
      "learning_rate": 1.2719223891714074e-05,
      "loss": 2.5934,
      "step": 31962
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1292568445205688,
      "learning_rate": 1.271882766722572e-05,
      "loss": 2.5209,
      "step": 31963
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.9848510026931763,
      "learning_rate": 1.2718431438128215e-05,
      "loss": 2.4641,
      "step": 31964
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1143648624420166,
      "learning_rate": 1.2718035204422232e-05,
      "loss": 2.3156,
      "step": 31965
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.2207914590835571,
      "learning_rate": 1.2717638966108438e-05,
      "loss": 2.3849,
      "step": 31966
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0933791399002075,
      "learning_rate": 1.271724272318751e-05,
      "loss": 2.3771,
      "step": 31967
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.163543939590454,
      "learning_rate": 1.2716846475660116e-05,
      "loss": 2.2217,
      "step": 31968
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.981691300868988,
      "learning_rate": 1.271645022352693e-05,
      "loss": 2.2531,
      "step": 31969
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.9791170358657837,
      "learning_rate": 1.2716053966788623e-05,
      "loss": 2.0911,
      "step": 31970
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1571701765060425,
      "learning_rate": 1.2715657705445867e-05,
      "loss": 2.177,
      "step": 31971
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1131234169006348,
      "learning_rate": 1.2715261439499335e-05,
      "loss": 2.3933,
      "step": 31972
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0719246864318848,
      "learning_rate": 1.2714865168949697e-05,
      "loss": 2.2384,
      "step": 31973
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0355637073516846,
      "learning_rate": 1.2714468893797625e-05,
      "loss": 2.3029,
      "step": 31974
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.085255742073059,
      "learning_rate": 1.2714072614043788e-05,
      "loss": 2.4722,
      "step": 31975
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.025535225868225,
      "learning_rate": 1.2713676329688865e-05,
      "loss": 2.3551,
      "step": 31976
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0846147537231445,
      "learning_rate": 1.271328004073352e-05,
      "loss": 2.5187,
      "step": 31977
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.9694767594337463,
      "learning_rate": 1.2712883747178432e-05,
      "loss": 2.2865,
      "step": 31978
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0254989862442017,
      "learning_rate": 1.271248744902427e-05,
      "loss": 2.5508,
      "step": 31979
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1101734638214111,
      "learning_rate": 1.2712091146271699e-05,
      "loss": 2.403,
      "step": 31980
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1459566354751587,
      "learning_rate": 1.2711694838921401e-05,
      "loss": 2.4381,
      "step": 31981
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0608760118484497,
      "learning_rate": 1.2711298526974043e-05,
      "loss": 2.1593,
      "step": 31982
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.9996775984764099,
      "learning_rate": 1.2710902210430299e-05,
      "loss": 2.4127,
      "step": 31983
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0912938117980957,
      "learning_rate": 1.2710505889290838e-05,
      "loss": 2.359,
      "step": 31984
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0075035095214844,
      "learning_rate": 1.2710109563556334e-05,
      "loss": 2.2251,
      "step": 31985
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0567595958709717,
      "learning_rate": 1.270971323322746e-05,
      "loss": 2.3806,
      "step": 31986
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1440328359603882,
      "learning_rate": 1.2709316898304879e-05,
      "loss": 2.3695,
      "step": 31987
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.9691359400749207,
      "learning_rate": 1.270892055878928e-05,
      "loss": 2.4622,
      "step": 31988
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0302129983901978,
      "learning_rate": 1.270852421468132e-05,
      "loss": 2.4998,
      "step": 31989
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.9771643280982971,
      "learning_rate": 1.2708127865981674e-05,
      "loss": 2.1668,
      "step": 31990
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0210795402526855,
      "learning_rate": 1.2707731512691017e-05,
      "loss": 2.4104,
      "step": 31991
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.001718282699585,
      "learning_rate": 1.2707335154810023e-05,
      "loss": 2.4939,
      "step": 31992
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1952790021896362,
      "learning_rate": 1.2706938792339356e-05,
      "loss": 2.3607,
      "step": 31993
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1106115579605103,
      "learning_rate": 1.2706542425279696e-05,
      "loss": 2.3308,
      "step": 31994
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0140419006347656,
      "learning_rate": 1.270614605363171e-05,
      "loss": 2.3359,
      "step": 31995
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0678194761276245,
      "learning_rate": 1.2705749677396068e-05,
      "loss": 2.4598,
      "step": 31996
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0687081813812256,
      "learning_rate": 1.2705353296573448e-05,
      "loss": 2.3207,
      "step": 31997
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0667184591293335,
      "learning_rate": 1.2704956911164523e-05,
      "loss": 2.6019,
      "step": 31998
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0185455083847046,
      "learning_rate": 1.270456052116996e-05,
      "loss": 2.4636,
      "step": 31999
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1122641563415527,
      "learning_rate": 1.270416412659043e-05,
      "loss": 2.5009,
      "step": 32000
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1500675678253174,
      "learning_rate": 1.2703767727426608e-05,
      "loss": 2.5001,
      "step": 32001
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0098648071289062,
      "learning_rate": 1.2703371323679163e-05,
      "loss": 2.2879,
      "step": 32002
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0982012748718262,
      "learning_rate": 1.2702974915348774e-05,
      "loss": 2.273,
      "step": 32003
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1164941787719727,
      "learning_rate": 1.2702578502436105e-05,
      "loss": 2.4664,
      "step": 32004
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.9875032901763916,
      "learning_rate": 1.2702182084941834e-05,
      "loss": 2.4515,
      "step": 32005
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.001783847808838,
      "learning_rate": 1.270178566286663e-05,
      "loss": 2.166,
      "step": 32006
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.121203064918518,
      "learning_rate": 1.2701389236211163e-05,
      "loss": 2.3934,
      "step": 32007
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.176138162612915,
      "learning_rate": 1.270099280497611e-05,
      "loss": 2.3786,
      "step": 32008
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1405730247497559,
      "learning_rate": 1.270059636916214e-05,
      "loss": 2.1917,
      "step": 32009
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1711033582687378,
      "learning_rate": 1.2700199928769926e-05,
      "loss": 2.5401,
      "step": 32010
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.032057762145996,
      "learning_rate": 1.2699803483800142e-05,
      "loss": 2.2516,
      "step": 32011
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.9485176801681519,
      "learning_rate": 1.2699407034253453e-05,
      "loss": 2.3765,
      "step": 32012
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.9895279407501221,
      "learning_rate": 1.2699010580130537e-05,
      "loss": 2.3606,
      "step": 32013
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1678569316864014,
      "learning_rate": 1.2698614121432067e-05,
      "loss": 2.2585,
      "step": 32014
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.091640830039978,
      "learning_rate": 1.2698217658158713e-05,
      "loss": 2.4101,
      "step": 32015
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.088740348815918,
      "learning_rate": 1.2697821190311149e-05,
      "loss": 2.5,
      "step": 32016
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.9946261644363403,
      "learning_rate": 1.269742471789004e-05,
      "loss": 2.32,
      "step": 32017
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.930112361907959,
      "learning_rate": 1.2697028240896067e-05,
      "loss": 2.3481,
      "step": 32018
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1224726438522339,
      "learning_rate": 1.26966317593299e-05,
      "loss": 2.462,
      "step": 32019
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0312116146087646,
      "learning_rate": 1.2696235273192208e-05,
      "loss": 2.4978,
      "step": 32020
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.128554344177246,
      "learning_rate": 1.2695838782483665e-05,
      "loss": 2.5334,
      "step": 32021
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.9699029922485352,
      "learning_rate": 1.2695442287204942e-05,
      "loss": 2.4471,
      "step": 32022
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0938154458999634,
      "learning_rate": 1.2695045787356712e-05,
      "loss": 2.2984,
      "step": 32023
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0202528238296509,
      "learning_rate": 1.2694649282939648e-05,
      "loss": 2.4908,
      "step": 32024
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1000217199325562,
      "learning_rate": 1.2694252773954423e-05,
      "loss": 2.5543,
      "step": 32025
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.047121286392212,
      "learning_rate": 1.2693856260401706e-05,
      "loss": 2.77,
      "step": 32026
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.9884478449821472,
      "learning_rate": 1.2693459742282172e-05,
      "loss": 2.4152,
      "step": 32027
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0674071311950684,
      "learning_rate": 1.2693063219596493e-05,
      "loss": 2.2818,
      "step": 32028
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0745638608932495,
      "learning_rate": 1.2692666692345339e-05,
      "loss": 2.6193,
      "step": 32029
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0453202724456787,
      "learning_rate": 1.2692270160529386e-05,
      "loss": 2.4404,
      "step": 32030
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1474467515945435,
      "learning_rate": 1.2691873624149303e-05,
      "loss": 2.4738,
      "step": 32031
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.3036891222000122,
      "learning_rate": 1.2691477083205762e-05,
      "loss": 2.4089,
      "step": 32032
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0643609762191772,
      "learning_rate": 1.2691080537699434e-05,
      "loss": 2.2991,
      "step": 32033
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0870403051376343,
      "learning_rate": 1.2690683987630996e-05,
      "loss": 2.3022,
      "step": 32034
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0589174032211304,
      "learning_rate": 1.2690287433001119e-05,
      "loss": 2.4848,
      "step": 32035
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0874888896942139,
      "learning_rate": 1.2689890873810474e-05,
      "loss": 2.5127,
      "step": 32036
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.9557760953903198,
      "learning_rate": 1.2689494310059729e-05,
      "loss": 2.5564,
      "step": 32037
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0731045007705688,
      "learning_rate": 1.2689097741749568e-05,
      "loss": 2.3886,
      "step": 32038
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.074091911315918,
      "learning_rate": 1.268870116888065e-05,
      "loss": 2.5011,
      "step": 32039
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.02193284034729,
      "learning_rate": 1.2688304591453657e-05,
      "loss": 2.1728,
      "step": 32040
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0620396137237549,
      "learning_rate": 1.2687908009469254e-05,
      "loss": 2.3943,
      "step": 32041
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1450849771499634,
      "learning_rate": 1.2687511422928118e-05,
      "loss": 2.4999,
      "step": 32042
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0330263376235962,
      "learning_rate": 1.2687114831830924e-05,
      "loss": 2.366,
      "step": 32043
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0635676383972168,
      "learning_rate": 1.268671823617834e-05,
      "loss": 2.2111,
      "step": 32044
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1708698272705078,
      "learning_rate": 1.2686321635971032e-05,
      "loss": 2.4246,
      "step": 32045
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0623143911361694,
      "learning_rate": 1.2685925031209685e-05,
      "loss": 2.4628,
      "step": 32046
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0646294355392456,
      "learning_rate": 1.2685528421894965e-05,
      "loss": 2.2471,
      "step": 32047
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.9359006881713867,
      "learning_rate": 1.2685131808027547e-05,
      "loss": 2.2926,
      "step": 32048
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0185805559158325,
      "learning_rate": 1.2684735189608098e-05,
      "loss": 2.3329,
      "step": 32049
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0381643772125244,
      "learning_rate": 1.2684338566637293e-05,
      "loss": 2.6195,
      "step": 32050
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0726298093795776,
      "learning_rate": 1.268394193911581e-05,
      "loss": 2.3684,
      "step": 32051
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0430331230163574,
      "learning_rate": 1.2683545307044312e-05,
      "loss": 2.3447,
      "step": 32052
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.9402742385864258,
      "learning_rate": 1.2683148670423474e-05,
      "loss": 2.2506,
      "step": 32053
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0693506002426147,
      "learning_rate": 1.2682752029253976e-05,
      "loss": 2.5142,
      "step": 32054
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0303010940551758,
      "learning_rate": 1.268235538353648e-05,
      "loss": 2.1553,
      "step": 32055
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1036391258239746,
      "learning_rate": 1.2681958733271664e-05,
      "loss": 2.45,
      "step": 32056
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0701559782028198,
      "learning_rate": 1.26815620784602e-05,
      "loss": 2.3858,
      "step": 32057
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0885324478149414,
      "learning_rate": 1.2681165419102762e-05,
      "loss": 2.4637,
      "step": 32058
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0097119808197021,
      "learning_rate": 1.2680768755200018e-05,
      "loss": 2.3327,
      "step": 32059
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.9814662933349609,
      "learning_rate": 1.2680372086752643e-05,
      "loss": 2.196,
      "step": 32060
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1679385900497437,
      "learning_rate": 1.267997541376131e-05,
      "loss": 2.3876,
      "step": 32061
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.044158697128296,
      "learning_rate": 1.267957873622669e-05,
      "loss": 2.3738,
      "step": 32062
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1638952493667603,
      "learning_rate": 1.2679182054149457e-05,
      "loss": 2.3643,
      "step": 32063
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0691540241241455,
      "learning_rate": 1.2678785367530285e-05,
      "loss": 2.3382,
      "step": 32064
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1293275356292725,
      "learning_rate": 1.2678388676369841e-05,
      "loss": 2.5667,
      "step": 32065
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0491238832473755,
      "learning_rate": 1.2677991980668802e-05,
      "loss": 2.3712,
      "step": 32066
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.9632073640823364,
      "learning_rate": 1.2677595280427838e-05,
      "loss": 2.297,
      "step": 32067
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0775198936462402,
      "learning_rate": 1.2677198575647625e-05,
      "loss": 2.3689,
      "step": 32068
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0504732131958008,
      "learning_rate": 1.2676801866328832e-05,
      "loss": 2.3909,
      "step": 32069
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.2589391469955444,
      "learning_rate": 1.267640515247213e-05,
      "loss": 2.3031,
      "step": 32070
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.278923749923706,
      "learning_rate": 1.2676008434078199e-05,
      "loss": 2.3519,
      "step": 32071
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1101080179214478,
      "learning_rate": 1.2675611711147707e-05,
      "loss": 2.4127,
      "step": 32072
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.991369903087616,
      "learning_rate": 1.2675214983681325e-05,
      "loss": 2.4197,
      "step": 32073
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0081490278244019,
      "learning_rate": 1.2674818251679728e-05,
      "loss": 2.2758,
      "step": 32074
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1526724100112915,
      "learning_rate": 1.2674421515143584e-05,
      "loss": 2.1525,
      "step": 32075
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.9451174736022949,
      "learning_rate": 1.2674024774073573e-05,
      "loss": 2.2493,
      "step": 32076
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0399974584579468,
      "learning_rate": 1.2673628028470361e-05,
      "loss": 2.2307,
      "step": 32077
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1506632566452026,
      "learning_rate": 1.2673231278334627e-05,
      "loss": 2.2899,
      "step": 32078
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.9647251963615417,
      "learning_rate": 1.2672834523667037e-05,
      "loss": 2.0485,
      "step": 32079
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1196645498275757,
      "learning_rate": 1.2672437764468265e-05,
      "loss": 2.4798,
      "step": 32080
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.054582953453064,
      "learning_rate": 1.2672041000738988e-05,
      "loss": 2.2969,
      "step": 32081
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.076304316520691,
      "learning_rate": 1.2671644232479876e-05,
      "loss": 2.4339,
      "step": 32082
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1134566068649292,
      "learning_rate": 1.2671247459691601e-05,
      "loss": 2.3355,
      "step": 32083
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.2550630569458008,
      "learning_rate": 1.2670850682374836e-05,
      "loss": 2.27,
      "step": 32084
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.9769336581230164,
      "learning_rate": 1.2670453900530254e-05,
      "loss": 2.5572,
      "step": 32085
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0786997079849243,
      "learning_rate": 1.2670057114158525e-05,
      "loss": 2.2933,
      "step": 32086
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0697689056396484,
      "learning_rate": 1.2669660323260326e-05,
      "loss": 2.2197,
      "step": 32087
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1256049871444702,
      "learning_rate": 1.2669263527836326e-05,
      "loss": 2.3166,
      "step": 32088
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1533839702606201,
      "learning_rate": 1.26688667278872e-05,
      "loss": 2.5609,
      "step": 32089
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1703996658325195,
      "learning_rate": 1.2668469923413622e-05,
      "loss": 2.2811,
      "step": 32090
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.065557599067688,
      "learning_rate": 1.2668073114416261e-05,
      "loss": 2.2389,
      "step": 32091
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.081125259399414,
      "learning_rate": 1.2667676300895793e-05,
      "loss": 2.3607,
      "step": 32092
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.2312558889389038,
      "learning_rate": 1.2667279482852886e-05,
      "loss": 2.1992,
      "step": 32093
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0265623331069946,
      "learning_rate": 1.2666882660288218e-05,
      "loss": 2.3025,
      "step": 32094
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0465322732925415,
      "learning_rate": 1.266648583320246e-05,
      "loss": 2.3678,
      "step": 32095
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.2390644550323486,
      "learning_rate": 1.2666089001596282e-05,
      "loss": 2.3475,
      "step": 32096
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.117904782295227,
      "learning_rate": 1.2665692165470364e-05,
      "loss": 2.4397,
      "step": 32097
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0707762241363525,
      "learning_rate": 1.266529532482537e-05,
      "loss": 2.397,
      "step": 32098
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0431280136108398,
      "learning_rate": 1.2664898479661975e-05,
      "loss": 2.4798,
      "step": 32099
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.093904972076416,
      "learning_rate": 1.2664501629980856e-05,
      "loss": 2.4206,
      "step": 32100
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0372352600097656,
      "learning_rate": 1.2664104775782683e-05,
      "loss": 2.2404,
      "step": 32101
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0869650840759277,
      "learning_rate": 1.2663707917068128e-05,
      "loss": 2.4016,
      "step": 32102
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1686995029449463,
      "learning_rate": 1.2663311053837866e-05,
      "loss": 2.5325,
      "step": 32103
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.9919195771217346,
      "learning_rate": 1.2662914186092568e-05,
      "loss": 2.3722,
      "step": 32104
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.284035563468933,
      "learning_rate": 1.2662517313832904e-05,
      "loss": 2.3688,
      "step": 32105
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0288310050964355,
      "learning_rate": 1.2662120437059555e-05,
      "loss": 2.2983,
      "step": 32106
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.014034390449524,
      "learning_rate": 1.2661723555773186e-05,
      "loss": 2.2478,
      "step": 32107
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0055692195892334,
      "learning_rate": 1.2661326669974474e-05,
      "loss": 2.5387,
      "step": 32108
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.3272675275802612,
      "learning_rate": 1.2660929779664089e-05,
      "loss": 2.2663,
      "step": 32109
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1878182888031006,
      "learning_rate": 1.2660532884842707e-05,
      "loss": 2.3473,
      "step": 32110
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0705841779708862,
      "learning_rate": 1.2660135985510998e-05,
      "loss": 2.3444,
      "step": 32111
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.202901005744934,
      "learning_rate": 1.2659739081669634e-05,
      "loss": 2.5853,
      "step": 32112
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.9984095096588135,
      "learning_rate": 1.2659342173319293e-05,
      "loss": 2.3635,
      "step": 32113
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1751772165298462,
      "learning_rate": 1.2658945260460644e-05,
      "loss": 2.1727,
      "step": 32114
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1877915859222412,
      "learning_rate": 1.2658548343094358e-05,
      "loss": 2.4835,
      "step": 32115
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0111637115478516,
      "learning_rate": 1.2658151421221114e-05,
      "loss": 2.3127,
      "step": 32116
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.9654130935668945,
      "learning_rate": 1.2657754494841581e-05,
      "loss": 2.5307,
      "step": 32117
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.9324354529380798,
      "learning_rate": 1.2657357563956431e-05,
      "loss": 2.3244,
      "step": 32118
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.048385739326477,
      "learning_rate": 1.265696062856634e-05,
      "loss": 2.6888,
      "step": 32119
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.029085397720337,
      "learning_rate": 1.2656563688671976e-05,
      "loss": 2.445,
      "step": 32120
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0347810983657837,
      "learning_rate": 1.2656166744274017e-05,
      "loss": 2.3833,
      "step": 32121
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.057892918586731,
      "learning_rate": 1.2655769795373135e-05,
      "loss": 2.4334,
      "step": 32122
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0861785411834717,
      "learning_rate": 1.2655372841970002e-05,
      "loss": 2.4786,
      "step": 32123
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0706470012664795,
      "learning_rate": 1.2654975884065289e-05,
      "loss": 2.386,
      "step": 32124
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.202967882156372,
      "learning_rate": 1.2654578921659671e-05,
      "loss": 2.2256,
      "step": 32125
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.068750262260437,
      "learning_rate": 1.2654181954753822e-05,
      "loss": 2.3789,
      "step": 32126
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1805016994476318,
      "learning_rate": 1.2653784983348411e-05,
      "loss": 2.3542,
      "step": 32127
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0441406965255737,
      "learning_rate": 1.2653388007444116e-05,
      "loss": 2.3201,
      "step": 32128
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0570040941238403,
      "learning_rate": 1.2652991027041606e-05,
      "loss": 2.2585,
      "step": 32129
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0892553329467773,
      "learning_rate": 1.265259404214156e-05,
      "loss": 2.5475,
      "step": 32130
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0643527507781982,
      "learning_rate": 1.265219705274464e-05,
      "loss": 2.447,
      "step": 32131
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1189227104187012,
      "learning_rate": 1.265180005885153e-05,
      "loss": 2.4587,
      "step": 32132
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1047718524932861,
      "learning_rate": 1.26514030604629e-05,
      "loss": 2.1666,
      "step": 32133
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0418791770935059,
      "learning_rate": 1.265100605757942e-05,
      "loss": 2.3018,
      "step": 32134
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.2201083898544312,
      "learning_rate": 1.2650609050201762e-05,
      "loss": 2.3948,
      "step": 32135
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0456664562225342,
      "learning_rate": 1.2650212038330605e-05,
      "loss": 2.5132,
      "step": 32136
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1006630659103394,
      "learning_rate": 1.264981502196662e-05,
      "loss": 2.3874,
      "step": 32137
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1566359996795654,
      "learning_rate": 1.2649418001110476e-05,
      "loss": 2.4344,
      "step": 32138
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1946525573730469,
      "learning_rate": 1.264902097576285e-05,
      "loss": 2.5051,
      "step": 32139
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.103743314743042,
      "learning_rate": 1.2648623945924414e-05,
      "loss": 2.1609,
      "step": 32140
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0988786220550537,
      "learning_rate": 1.264822691159584e-05,
      "loss": 2.3084,
      "step": 32141
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0501552820205688,
      "learning_rate": 1.2647829872777801e-05,
      "loss": 2.1512,
      "step": 32142
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1617817878723145,
      "learning_rate": 1.2647432829470976e-05,
      "loss": 2.4668,
      "step": 32143
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.6823737621307373,
      "learning_rate": 1.264703578167603e-05,
      "loss": 2.3719,
      "step": 32144
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.2378662824630737,
      "learning_rate": 1.264663872939364e-05,
      "loss": 2.5566,
      "step": 32145
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.9885187149047852,
      "learning_rate": 1.2646241672624477e-05,
      "loss": 2.2728,
      "step": 32146
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0520952939987183,
      "learning_rate": 1.2645844611369219e-05,
      "loss": 2.3577,
      "step": 32147
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0795893669128418,
      "learning_rate": 1.264544754562853e-05,
      "loss": 2.4931,
      "step": 32148
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.102967381477356,
      "learning_rate": 1.2645050475403093e-05,
      "loss": 2.4189,
      "step": 32149
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0513473749160767,
      "learning_rate": 1.2644653400693578e-05,
      "loss": 2.3004,
      "step": 32150
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0483287572860718,
      "learning_rate": 1.2644256321500657e-05,
      "loss": 2.3016,
      "step": 32151
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.2068893909454346,
      "learning_rate": 1.2643859237825002e-05,
      "loss": 2.4122,
      "step": 32152
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1324474811553955,
      "learning_rate": 1.2643462149667287e-05,
      "loss": 2.4372,
      "step": 32153
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0777361392974854,
      "learning_rate": 1.2643065057028187e-05,
      "loss": 2.2616,
      "step": 32154
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.9413474202156067,
      "learning_rate": 1.2642667959908373e-05,
      "loss": 2.2604,
      "step": 32155
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0473847389221191,
      "learning_rate": 1.264227085830852e-05,
      "loss": 2.3977,
      "step": 32156
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1221811771392822,
      "learning_rate": 1.2641873752229299e-05,
      "loss": 2.1898,
      "step": 32157
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.17063570022583,
      "learning_rate": 1.2641476641671387e-05,
      "loss": 2.4647,
      "step": 32158
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0015537738800049,
      "learning_rate": 1.2641079526635451e-05,
      "loss": 2.5808,
      "step": 32159
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0120978355407715,
      "learning_rate": 1.264068240712217e-05,
      "loss": 2.5317,
      "step": 32160
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0696066617965698,
      "learning_rate": 1.2640285283132217e-05,
      "loss": 2.3946,
      "step": 32161
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.094826102256775,
      "learning_rate": 1.2639888154666262e-05,
      "loss": 2.3201,
      "step": 32162
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.2735552787780762,
      "learning_rate": 1.2639491021724977e-05,
      "loss": 2.2352,
      "step": 32163
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0038310289382935,
      "learning_rate": 1.263909388430904e-05,
      "loss": 2.1969,
      "step": 32164
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.9765909910202026,
      "learning_rate": 1.2638696742419124e-05,
      "loss": 2.3245,
      "step": 32165
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0720444917678833,
      "learning_rate": 1.2638299596055897e-05,
      "loss": 2.6157,
      "step": 32166
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0774084329605103,
      "learning_rate": 1.2637902445220036e-05,
      "loss": 2.5421,
      "step": 32167
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1156491041183472,
      "learning_rate": 1.2637505289912216e-05,
      "loss": 2.2319,
      "step": 32168
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1693452596664429,
      "learning_rate": 1.2637108130133107e-05,
      "loss": 2.3954,
      "step": 32169
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0995999574661255,
      "learning_rate": 1.2636710965883384e-05,
      "loss": 2.3494,
      "step": 32170
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0041754245758057,
      "learning_rate": 1.263631379716372e-05,
      "loss": 2.513,
      "step": 32171
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.9723935723304749,
      "learning_rate": 1.2635916623974787e-05,
      "loss": 2.2292,
      "step": 32172
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0918668508529663,
      "learning_rate": 1.2635519446317259e-05,
      "loss": 2.5307,
      "step": 32173
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0982338190078735,
      "learning_rate": 1.2635122264191808e-05,
      "loss": 2.5145,
      "step": 32174
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0469926595687866,
      "learning_rate": 1.2634725077599112e-05,
      "loss": 2.3251,
      "step": 32175
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.9537873268127441,
      "learning_rate": 1.2634327886539844e-05,
      "loss": 2.3152,
      "step": 32176
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.281372308731079,
      "learning_rate": 1.263393069101467e-05,
      "loss": 2.5938,
      "step": 32177
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0971651077270508,
      "learning_rate": 1.263353349102427e-05,
      "loss": 2.4415,
      "step": 32178
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1797151565551758,
      "learning_rate": 1.2633136286569314e-05,
      "loss": 2.3947,
      "step": 32179
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.3609440326690674,
      "learning_rate": 1.263273907765048e-05,
      "loss": 2.1419,
      "step": 32180
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1029876470565796,
      "learning_rate": 1.2632341864268436e-05,
      "loss": 2.3665,
      "step": 32181
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0916523933410645,
      "learning_rate": 1.2631944646423855e-05,
      "loss": 2.1905,
      "step": 32182
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.008505940437317,
      "learning_rate": 1.263154742411742e-05,
      "loss": 2.4285,
      "step": 32183
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0549559593200684,
      "learning_rate": 1.2631150197349792e-05,
      "loss": 2.4497,
      "step": 32184
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.2934982776641846,
      "learning_rate": 1.2630752966121651e-05,
      "loss": 2.3908,
      "step": 32185
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0756348371505737,
      "learning_rate": 1.263035573043367e-05,
      "loss": 2.5923,
      "step": 32186
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0258761644363403,
      "learning_rate": 1.262995849028652e-05,
      "loss": 2.291,
      "step": 32187
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0420130491256714,
      "learning_rate": 1.2629561245680877e-05,
      "loss": 2.5419,
      "step": 32188
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.283482313156128,
      "learning_rate": 1.2629163996617415e-05,
      "loss": 2.2927,
      "step": 32189
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.2499704360961914,
      "learning_rate": 1.2628766743096806e-05,
      "loss": 2.2439,
      "step": 32190
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.138260841369629,
      "learning_rate": 1.262836948511972e-05,
      "loss": 2.4899,
      "step": 32191
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0689496994018555,
      "learning_rate": 1.2627972222686837e-05,
      "loss": 2.3653,
      "step": 32192
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.062098741531372,
      "learning_rate": 1.2627574955798827e-05,
      "loss": 2.5272,
      "step": 32193
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.9980341792106628,
      "learning_rate": 1.2627177684456363e-05,
      "loss": 2.414,
      "step": 32194
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.147729516029358,
      "learning_rate": 1.2626780408660118e-05,
      "loss": 2.4439,
      "step": 32195
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0260549783706665,
      "learning_rate": 1.2626383128410771e-05,
      "loss": 2.3004,
      "step": 32196
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.2071863412857056,
      "learning_rate": 1.2625985843708985e-05,
      "loss": 2.3054,
      "step": 32197
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1010075807571411,
      "learning_rate": 1.2625588554555444e-05,
      "loss": 2.2579,
      "step": 32198
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0965962409973145,
      "learning_rate": 1.2625191260950818e-05,
      "loss": 2.4412,
      "step": 32199
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.170001745223999,
      "learning_rate": 1.2624793962895776e-05,
      "loss": 2.4565,
      "step": 32200
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0145056247711182,
      "learning_rate": 1.2624396660390995e-05,
      "loss": 2.3914,
      "step": 32201
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0042734146118164,
      "learning_rate": 1.2623999353437152e-05,
      "loss": 2.5902,
      "step": 32202
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0315266847610474,
      "learning_rate": 1.262360204203492e-05,
      "loss": 2.344,
      "step": 32203
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0999785661697388,
      "learning_rate": 1.2623204726184968e-05,
      "loss": 2.0921,
      "step": 32204
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1627968549728394,
      "learning_rate": 1.262280740588797e-05,
      "loss": 2.5051,
      "step": 32205
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1009572744369507,
      "learning_rate": 1.26224100811446e-05,
      "loss": 2.3174,
      "step": 32206
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0546396970748901,
      "learning_rate": 1.2622012751955535e-05,
      "loss": 2.4319,
      "step": 32207
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0872746706008911,
      "learning_rate": 1.2621615418321445e-05,
      "loss": 2.4651,
      "step": 32208
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0294958353042603,
      "learning_rate": 1.2621218080243005e-05,
      "loss": 2.342,
      "step": 32209
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.9843378663063049,
      "learning_rate": 1.262082073772089e-05,
      "loss": 2.5423,
      "step": 32210
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.157222032546997,
      "learning_rate": 1.262042339075577e-05,
      "loss": 2.4143,
      "step": 32211
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1077721118927002,
      "learning_rate": 1.2620026039348322e-05,
      "loss": 2.2709,
      "step": 32212
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1639580726623535,
      "learning_rate": 1.2619628683499217e-05,
      "loss": 2.5896,
      "step": 32213
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.9795194268226624,
      "learning_rate": 1.2619231323209129e-05,
      "loss": 2.4502,
      "step": 32214
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0943701267242432,
      "learning_rate": 1.2618833958478733e-05,
      "loss": 2.5318,
      "step": 32215
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.089248776435852,
      "learning_rate": 1.2618436589308707e-05,
      "loss": 2.2906,
      "step": 32216
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.9981902241706848,
      "learning_rate": 1.2618039215699715e-05,
      "loss": 2.5096,
      "step": 32217
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.072803258895874,
      "learning_rate": 1.2617641837652437e-05,
      "loss": 2.3082,
      "step": 32218
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0520373582839966,
      "learning_rate": 1.2617244455167544e-05,
      "loss": 2.3196,
      "step": 32219
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1041984558105469,
      "learning_rate": 1.2616847068245712e-05,
      "loss": 2.3403,
      "step": 32220
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.2087839841842651,
      "learning_rate": 1.2616449676887614e-05,
      "loss": 2.3872,
      "step": 32221
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0719023942947388,
      "learning_rate": 1.2616052281093923e-05,
      "loss": 2.5333,
      "step": 32222
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0534477233886719,
      "learning_rate": 1.2615654880865311e-05,
      "loss": 2.5254,
      "step": 32223
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0549864768981934,
      "learning_rate": 1.2615257476202457e-05,
      "loss": 2.385,
      "step": 32224
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1762484312057495,
      "learning_rate": 1.2614860067106027e-05,
      "loss": 2.382,
      "step": 32225
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1367448568344116,
      "learning_rate": 1.2614462653576702e-05,
      "loss": 2.3602,
      "step": 32226
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0199639797210693,
      "learning_rate": 1.2614065235615151e-05,
      "loss": 2.1276,
      "step": 32227
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0549105405807495,
      "learning_rate": 1.261366781322205e-05,
      "loss": 2.4328,
      "step": 32228
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0854142904281616,
      "learning_rate": 1.2613270386398073e-05,
      "loss": 2.1011,
      "step": 32229
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.2598001956939697,
      "learning_rate": 1.2612872955143893e-05,
      "loss": 2.383,
      "step": 32230
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.120370864868164,
      "learning_rate": 1.2612475519460183e-05,
      "loss": 2.1216,
      "step": 32231
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.4376095533370972,
      "learning_rate": 1.2612078079347616e-05,
      "loss": 2.6687,
      "step": 32232
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.9844440817832947,
      "learning_rate": 1.2611680634806869e-05,
      "loss": 2.4441,
      "step": 32233
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.10624361038208,
      "learning_rate": 1.2611283185838614e-05,
      "loss": 2.5253,
      "step": 32234
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.9870736002922058,
      "learning_rate": 1.2610885732443525e-05,
      "loss": 2.2493,
      "step": 32235
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0478745698928833,
      "learning_rate": 1.2610488274622274e-05,
      "loss": 2.5147,
      "step": 32236
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1483359336853027,
      "learning_rate": 1.2610090812375538e-05,
      "loss": 2.4269,
      "step": 32237
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1157952547073364,
      "learning_rate": 1.2609693345703988e-05,
      "loss": 2.2515,
      "step": 32238
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0267715454101562,
      "learning_rate": 1.26092958746083e-05,
      "loss": 2.258,
      "step": 32239
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0891860723495483,
      "learning_rate": 1.2608898399089146e-05,
      "loss": 2.2879,
      "step": 32240
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.9895901083946228,
      "learning_rate": 1.26085009191472e-05,
      "loss": 2.3133,
      "step": 32241
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.026680827140808,
      "learning_rate": 1.260810343478314e-05,
      "loss": 2.4621,
      "step": 32242
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.095698356628418,
      "learning_rate": 1.2607705945997632e-05,
      "loss": 2.358,
      "step": 32243
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.9860458374023438,
      "learning_rate": 1.2607308452791358e-05,
      "loss": 2.2568,
      "step": 32244
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0369489192962646,
      "learning_rate": 1.2606910955164984e-05,
      "loss": 2.2966,
      "step": 32245
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0209184885025024,
      "learning_rate": 1.2606513453119192e-05,
      "loss": 2.2576,
      "step": 32246
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.9970581531524658,
      "learning_rate": 1.260611594665465e-05,
      "loss": 2.3682,
      "step": 32247
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.993803858757019,
      "learning_rate": 1.2605718435772034e-05,
      "loss": 2.4148,
      "step": 32248
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.2292141914367676,
      "learning_rate": 1.2605320920472016e-05,
      "loss": 2.2388,
      "step": 32249
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1478215456008911,
      "learning_rate": 1.2604923400755272e-05,
      "loss": 2.4976,
      "step": 32250
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.9854578375816345,
      "learning_rate": 1.2604525876622477e-05,
      "loss": 2.4808,
      "step": 32251
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.9774047136306763,
      "learning_rate": 1.2604128348074302e-05,
      "loss": 2.4471,
      "step": 32252
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1148104667663574,
      "learning_rate": 1.2603730815111421e-05,
      "loss": 2.4097,
      "step": 32253
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1772708892822266,
      "learning_rate": 1.2603333277734511e-05,
      "loss": 2.3746,
      "step": 32254
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0170657634735107,
      "learning_rate": 1.2602935735944244e-05,
      "loss": 2.413,
      "step": 32255
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1201677322387695,
      "learning_rate": 1.2602538189741293e-05,
      "loss": 2.7248,
      "step": 32256
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0403271913528442,
      "learning_rate": 1.2602140639126334e-05,
      "loss": 2.4773,
      "step": 32257
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.124729871749878,
      "learning_rate": 1.260174308410004e-05,
      "loss": 2.5365,
      "step": 32258
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1445094347000122,
      "learning_rate": 1.2601345524663082e-05,
      "loss": 2.3034,
      "step": 32259
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1472699642181396,
      "learning_rate": 1.2600947960816139e-05,
      "loss": 2.5188,
      "step": 32260
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0796488523483276,
      "learning_rate": 1.2600550392559883e-05,
      "loss": 2.4204,
      "step": 32261
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.13149094581604,
      "learning_rate": 1.2600152819894988e-05,
      "loss": 2.2951,
      "step": 32262
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1837422847747803,
      "learning_rate": 1.2599755242822128e-05,
      "loss": 2.2105,
      "step": 32263
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.028340220451355,
      "learning_rate": 1.2599357661341976e-05,
      "loss": 2.3239,
      "step": 32264
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0529651641845703,
      "learning_rate": 1.2598960075455204e-05,
      "loss": 2.504,
      "step": 32265
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.188534140586853,
      "learning_rate": 1.259856248516249e-05,
      "loss": 2.4293,
      "step": 32266
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0699867010116577,
      "learning_rate": 1.259816489046451e-05,
      "loss": 2.5208,
      "step": 32267
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0582797527313232,
      "learning_rate": 1.2597767291361935e-05,
      "loss": 2.2622,
      "step": 32268
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.251145362854004,
      "learning_rate": 1.2597369687855437e-05,
      "loss": 2.4186,
      "step": 32269
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.051176905632019,
      "learning_rate": 1.2596972079945694e-05,
      "loss": 2.2199,
      "step": 32270
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0122063159942627,
      "learning_rate": 1.2596574467633373e-05,
      "loss": 2.2203,
      "step": 32271
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1766643524169922,
      "learning_rate": 1.2596176850919157e-05,
      "loss": 2.3084,
      "step": 32272
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0181258916854858,
      "learning_rate": 1.2595779229803715e-05,
      "loss": 2.5149,
      "step": 32273
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1204488277435303,
      "learning_rate": 1.2595381604287722e-05,
      "loss": 2.4265,
      "step": 32274
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.820716142654419,
      "learning_rate": 1.2594983974371853e-05,
      "loss": 2.3757,
      "step": 32275
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1168988943099976,
      "learning_rate": 1.2594586340056782e-05,
      "loss": 2.1657,
      "step": 32276
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0470741987228394,
      "learning_rate": 1.2594188701343181e-05,
      "loss": 2.1448,
      "step": 32277
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0603599548339844,
      "learning_rate": 1.2593791058231726e-05,
      "loss": 2.1631,
      "step": 32278
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.117915391921997,
      "learning_rate": 1.2593393410723091e-05,
      "loss": 2.4188,
      "step": 32279
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1088670492172241,
      "learning_rate": 1.2592995758817951e-05,
      "loss": 2.342,
      "step": 32280
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1662955284118652,
      "learning_rate": 1.2592598102516977e-05,
      "loss": 2.3894,
      "step": 32281
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1526601314544678,
      "learning_rate": 1.2592200441820845e-05,
      "loss": 2.3034,
      "step": 32282
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.9950539469718933,
      "learning_rate": 1.2591802776730232e-05,
      "loss": 2.4763,
      "step": 32283
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.98947674036026,
      "learning_rate": 1.2591405107245806e-05,
      "loss": 2.2474,
      "step": 32284
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0140775442123413,
      "learning_rate": 1.2591007433368247e-05,
      "loss": 2.3048,
      "step": 32285
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0636427402496338,
      "learning_rate": 1.2590609755098225e-05,
      "loss": 2.3181,
      "step": 32286
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.001496434211731,
      "learning_rate": 1.2590212072436416e-05,
      "loss": 2.2919,
      "step": 32287
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0854979753494263,
      "learning_rate": 1.2589814385383497e-05,
      "loss": 2.3649,
      "step": 32288
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0839444398880005,
      "learning_rate": 1.2589416693940135e-05,
      "loss": 2.3438,
      "step": 32289
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0594537258148193,
      "learning_rate": 1.258901899810701e-05,
      "loss": 2.3543,
      "step": 32290
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1182514429092407,
      "learning_rate": 1.2588621297884793e-05,
      "loss": 2.3693,
      "step": 32291
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0604079961776733,
      "learning_rate": 1.2588223593274162e-05,
      "loss": 2.3548,
      "step": 32292
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1164987087249756,
      "learning_rate": 1.2587825884275788e-05,
      "loss": 2.557,
      "step": 32293
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.229274272918701,
      "learning_rate": 1.2587428170890346e-05,
      "loss": 2.3267,
      "step": 32294
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.9336063861846924,
      "learning_rate": 1.2587030453118514e-05,
      "loss": 2.2794,
      "step": 32295
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.330190658569336,
      "learning_rate": 1.258663273096096e-05,
      "loss": 2.2116,
      "step": 32296
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0853171348571777,
      "learning_rate": 1.2586235004418362e-05,
      "loss": 2.394,
      "step": 32297
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.9471808671951294,
      "learning_rate": 1.258583727349139e-05,
      "loss": 2.3385,
      "step": 32298
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.9738388061523438,
      "learning_rate": 1.2585439538180725e-05,
      "loss": 2.3214,
      "step": 32299
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.136000633239746,
      "learning_rate": 1.2585041798487034e-05,
      "loss": 2.5293,
      "step": 32300
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.038395643234253,
      "learning_rate": 1.2584644054411001e-05,
      "loss": 2.2888,
      "step": 32301
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.066693663597107,
      "learning_rate": 1.2584246305953291e-05,
      "loss": 2.3936,
      "step": 32302
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0202497243881226,
      "learning_rate": 1.2583848553114582e-05,
      "loss": 2.4152,
      "step": 32303
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.999748945236206,
      "learning_rate": 1.2583450795895546e-05,
      "loss": 2.4272,
      "step": 32304
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1143593788146973,
      "learning_rate": 1.2583053034296862e-05,
      "loss": 2.5249,
      "step": 32305
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0484144687652588,
      "learning_rate": 1.25826552683192e-05,
      "loss": 2.3093,
      "step": 32306
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0010836124420166,
      "learning_rate": 1.2582257497963237e-05,
      "loss": 2.2287,
      "step": 32307
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0083330869674683,
      "learning_rate": 1.2581859723229646e-05,
      "loss": 2.3608,
      "step": 32308
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0679330825805664,
      "learning_rate": 1.25814619441191e-05,
      "loss": 2.4608,
      "step": 32309
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0804072618484497,
      "learning_rate": 1.2581064160632277e-05,
      "loss": 2.4213,
      "step": 32310
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0421472787857056,
      "learning_rate": 1.258066637276985e-05,
      "loss": 2.438,
      "step": 32311
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.095003366470337,
      "learning_rate": 1.258026858053249e-05,
      "loss": 2.2552,
      "step": 32312
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1192657947540283,
      "learning_rate": 1.2579870783920876e-05,
      "loss": 2.3205,
      "step": 32313
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.3187921047210693,
      "learning_rate": 1.2579472982935676e-05,
      "loss": 2.2317,
      "step": 32314
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0049625635147095,
      "learning_rate": 1.2579075177577575e-05,
      "loss": 2.2551,
      "step": 32315
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0400820970535278,
      "learning_rate": 1.2578677367847236e-05,
      "loss": 2.2607,
      "step": 32316
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.077130675315857,
      "learning_rate": 1.257827955374534e-05,
      "loss": 2.2281,
      "step": 32317
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.022713303565979,
      "learning_rate": 1.257788173527256e-05,
      "loss": 2.2394,
      "step": 32318
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.9401015639305115,
      "learning_rate": 1.257748391242957e-05,
      "loss": 2.4178,
      "step": 32319
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0674349069595337,
      "learning_rate": 1.2577086085217048e-05,
      "loss": 2.2247,
      "step": 32320
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.008368968963623,
      "learning_rate": 1.2576688253635663e-05,
      "loss": 2.4571,
      "step": 32321
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1019923686981201,
      "learning_rate": 1.2576290417686093e-05,
      "loss": 2.6204,
      "step": 32322
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.2367576360702515,
      "learning_rate": 1.2575892577369006e-05,
      "loss": 2.5294,
      "step": 32323
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0694262981414795,
      "learning_rate": 1.2575494732685085e-05,
      "loss": 2.3958,
      "step": 32324
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1703554391860962,
      "learning_rate": 1.2575096883635001e-05,
      "loss": 2.5053,
      "step": 32325
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1174070835113525,
      "learning_rate": 1.2574699030219426e-05,
      "loss": 2.3851,
      "step": 32326
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0907974243164062,
      "learning_rate": 1.257430117243904e-05,
      "loss": 2.3456,
      "step": 32327
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0434569120407104,
      "learning_rate": 1.2573903310294515e-05,
      "loss": 2.3986,
      "step": 32328
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1377360820770264,
      "learning_rate": 1.2573505443786522e-05,
      "loss": 2.4942,
      "step": 32329
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0370124578475952,
      "learning_rate": 1.2573107572915739e-05,
      "loss": 2.2644,
      "step": 32330
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0204132795333862,
      "learning_rate": 1.2572709697682841e-05,
      "loss": 2.2931,
      "step": 32331
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0810410976409912,
      "learning_rate": 1.2572311818088498e-05,
      "loss": 2.3381,
      "step": 32332
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1413804292678833,
      "learning_rate": 1.257191393413339e-05,
      "loss": 2.3949,
      "step": 32333
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0987447500228882,
      "learning_rate": 1.2571516045818193e-05,
      "loss": 2.5681,
      "step": 32334
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.16769540309906,
      "learning_rate": 1.2571118153143575e-05,
      "loss": 2.2779,
      "step": 32335
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.2560609579086304,
      "learning_rate": 1.2570720256110211e-05,
      "loss": 2.4521,
      "step": 32336
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.9963934421539307,
      "learning_rate": 1.257032235471878e-05,
      "loss": 2.4083,
      "step": 32337
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1224738359451294,
      "learning_rate": 1.2569924448969956e-05,
      "loss": 2.5718,
      "step": 32338
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.143418312072754,
      "learning_rate": 1.2569526538864412e-05,
      "loss": 2.2126,
      "step": 32339
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1627492904663086,
      "learning_rate": 1.256912862440282e-05,
      "loss": 2.2987,
      "step": 32340
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0880588293075562,
      "learning_rate": 1.256873070558586e-05,
      "loss": 2.072,
      "step": 32341
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1881870031356812,
      "learning_rate": 1.2568332782414203e-05,
      "loss": 2.2679,
      "step": 32342
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0175131559371948,
      "learning_rate": 1.2567934854888524e-05,
      "loss": 2.2988,
      "step": 32343
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1689542531967163,
      "learning_rate": 1.2567536923009495e-05,
      "loss": 2.4966,
      "step": 32344
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.9794272184371948,
      "learning_rate": 1.2567138986777796e-05,
      "loss": 2.2931,
      "step": 32345
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1011781692504883,
      "learning_rate": 1.25667410461941e-05,
      "loss": 2.4909,
      "step": 32346
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.033892035484314,
      "learning_rate": 1.256634310125908e-05,
      "loss": 2.2827,
      "step": 32347
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.048333764076233,
      "learning_rate": 1.2565945151973414e-05,
      "loss": 2.4566,
      "step": 32348
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.236741304397583,
      "learning_rate": 1.2565547198337771e-05,
      "loss": 2.1716,
      "step": 32349
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0266615152359009,
      "learning_rate": 1.2565149240352827e-05,
      "loss": 2.118,
      "step": 32350
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.9770930409431458,
      "learning_rate": 1.2564751278019262e-05,
      "loss": 2.3793,
      "step": 32351
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0278860330581665,
      "learning_rate": 1.2564353311337745e-05,
      "loss": 2.5513,
      "step": 32352
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0652493238449097,
      "learning_rate": 1.2563955340308953e-05,
      "loss": 2.3575,
      "step": 32353
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.127719521522522,
      "learning_rate": 1.2563557364933563e-05,
      "loss": 2.2932,
      "step": 32354
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.9341185688972473,
      "learning_rate": 1.2563159385212244e-05,
      "loss": 2.4299,
      "step": 32355
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.073748230934143,
      "learning_rate": 1.2562761401145674e-05,
      "loss": 2.195,
      "step": 32356
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.9538836479187012,
      "learning_rate": 1.256236341273453e-05,
      "loss": 2.3714,
      "step": 32357
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1398019790649414,
      "learning_rate": 1.2561965419979481e-05,
      "loss": 2.428,
      "step": 32358
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0701394081115723,
      "learning_rate": 1.2561567422881204e-05,
      "loss": 2.5944,
      "step": 32359
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0509997606277466,
      "learning_rate": 1.2561169421440375e-05,
      "loss": 2.2309,
      "step": 32360
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.2398639917373657,
      "learning_rate": 1.256077141565767e-05,
      "loss": 2.2767,
      "step": 32361
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1782691478729248,
      "learning_rate": 1.2560373405533763e-05,
      "loss": 2.3098,
      "step": 32362
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1178356409072876,
      "learning_rate": 1.2559975391069325e-05,
      "loss": 2.2918,
      "step": 32363
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1559656858444214,
      "learning_rate": 1.2559577372265035e-05,
      "loss": 2.4796,
      "step": 32364
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0357459783554077,
      "learning_rate": 1.2559179349121566e-05,
      "loss": 2.2467,
      "step": 32365
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.265561819076538,
      "learning_rate": 1.2558781321639595e-05,
      "loss": 2.242,
      "step": 32366
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1355700492858887,
      "learning_rate": 1.255838328981979e-05,
      "loss": 2.5437,
      "step": 32367
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0015146732330322,
      "learning_rate": 1.2557985253662835e-05,
      "loss": 2.4156,
      "step": 32368
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1302410364151,
      "learning_rate": 1.2557587213169398e-05,
      "loss": 2.2268,
      "step": 32369
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0317033529281616,
      "learning_rate": 1.2557189168340155e-05,
      "loss": 2.5323,
      "step": 32370
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0156086683273315,
      "learning_rate": 1.2556791119175784e-05,
      "loss": 2.4286,
      "step": 32371
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1637194156646729,
      "learning_rate": 1.2556393065676958e-05,
      "loss": 2.5628,
      "step": 32372
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.9699468612670898,
      "learning_rate": 1.2555995007844348e-05,
      "loss": 2.4797,
      "step": 32373
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0327658653259277,
      "learning_rate": 1.2555596945678636e-05,
      "loss": 2.1549,
      "step": 32374
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.8903996348381042,
      "learning_rate": 1.255519887918049e-05,
      "loss": 2.6905,
      "step": 32375
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.019018530845642,
      "learning_rate": 1.2554800808350591e-05,
      "loss": 2.3184,
      "step": 32376
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.9856223464012146,
      "learning_rate": 1.2554402733189608e-05,
      "loss": 2.2022,
      "step": 32377
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0200904607772827,
      "learning_rate": 1.2554004653698219e-05,
      "loss": 2.5728,
      "step": 32378
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1118593215942383,
      "learning_rate": 1.25536065698771e-05,
      "loss": 2.3112,
      "step": 32379
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.139981746673584,
      "learning_rate": 1.2553208481726925e-05,
      "loss": 2.1288,
      "step": 32380
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.2181037664413452,
      "learning_rate": 1.2552810389248365e-05,
      "loss": 2.1444,
      "step": 32381
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1866916418075562,
      "learning_rate": 1.2552412292442099e-05,
      "loss": 2.366,
      "step": 32382
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.109750747680664,
      "learning_rate": 1.2552014191308803e-05,
      "loss": 2.369,
      "step": 32383
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0030354261398315,
      "learning_rate": 1.2551616085849148e-05,
      "loss": 2.1033,
      "step": 32384
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0083768367767334,
      "learning_rate": 1.255121797606381e-05,
      "loss": 2.3865,
      "step": 32385
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0617542266845703,
      "learning_rate": 1.2550819861953465e-05,
      "loss": 2.389,
      "step": 32386
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.2347640991210938,
      "learning_rate": 1.255042174351879e-05,
      "loss": 2.5237,
      "step": 32387
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0119699239730835,
      "learning_rate": 1.2550023620760457e-05,
      "loss": 2.352,
      "step": 32388
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.9786412119865417,
      "learning_rate": 1.2549625493679138e-05,
      "loss": 2.3436,
      "step": 32389
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0577768087387085,
      "learning_rate": 1.2549227362275516e-05,
      "loss": 2.2556,
      "step": 32390
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1064845323562622,
      "learning_rate": 1.2548829226550256e-05,
      "loss": 2.6184,
      "step": 32391
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0778640508651733,
      "learning_rate": 1.254843108650404e-05,
      "loss": 2.4388,
      "step": 32392
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1426136493682861,
      "learning_rate": 1.2548032942137542e-05,
      "loss": 2.2427,
      "step": 32393
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.9896936416625977,
      "learning_rate": 1.254763479345144e-05,
      "loss": 2.3289,
      "step": 32394
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0111773014068604,
      "learning_rate": 1.2547236640446401e-05,
      "loss": 2.3402,
      "step": 32395
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1499552726745605,
      "learning_rate": 1.2546838483123103e-05,
      "loss": 2.4297,
      "step": 32396
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.5327445268630981,
      "learning_rate": 1.2546440321482228e-05,
      "loss": 2.6936,
      "step": 32397
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0750458240509033,
      "learning_rate": 1.2546042155524437e-05,
      "loss": 2.3581,
      "step": 32398
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.2372719049453735,
      "learning_rate": 1.2545643985250418e-05,
      "loss": 2.5044,
      "step": 32399
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.2284489870071411,
      "learning_rate": 1.2545245810660837e-05,
      "loss": 2.4021,
      "step": 32400
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0802134275436401,
      "learning_rate": 1.254484763175638e-05,
      "loss": 2.3801,
      "step": 32401
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.161602258682251,
      "learning_rate": 1.254444944853771e-05,
      "loss": 2.2727,
      "step": 32402
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0647528171539307,
      "learning_rate": 1.2544051261005508e-05,
      "loss": 2.2829,
      "step": 32403
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.026011347770691,
      "learning_rate": 1.2543653069160446e-05,
      "loss": 2.5184,
      "step": 32404
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1640630960464478,
      "learning_rate": 1.2543254873003203e-05,
      "loss": 2.5375,
      "step": 32405
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0244687795639038,
      "learning_rate": 1.2542856672534454e-05,
      "loss": 2.6203,
      "step": 32406
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0820106267929077,
      "learning_rate": 1.2542458467754872e-05,
      "loss": 2.3393,
      "step": 32407
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0576844215393066,
      "learning_rate": 1.254206025866513e-05,
      "loss": 2.4328,
      "step": 32408
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1528843641281128,
      "learning_rate": 1.2541662045265908e-05,
      "loss": 2.3045,
      "step": 32409
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0575153827667236,
      "learning_rate": 1.2541263827557876e-05,
      "loss": 2.1545,
      "step": 32410
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.9936330318450928,
      "learning_rate": 1.2540865605541712e-05,
      "loss": 2.436,
      "step": 32411
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.01131010055542,
      "learning_rate": 1.2540467379218093e-05,
      "loss": 2.3342,
      "step": 32412
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0676850080490112,
      "learning_rate": 1.2540069148587693e-05,
      "loss": 2.4575,
      "step": 32413
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0844860076904297,
      "learning_rate": 1.2539670913651181e-05,
      "loss": 2.4409,
      "step": 32414
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.2591005563735962,
      "learning_rate": 1.2539272674409239e-05,
      "loss": 2.1962,
      "step": 32415
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0444272756576538,
      "learning_rate": 1.253887443086254e-05,
      "loss": 2.3378,
      "step": 32416
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.019852876663208,
      "learning_rate": 1.2538476183011761e-05,
      "loss": 2.1728,
      "step": 32417
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0148271322250366,
      "learning_rate": 1.2538077930857572e-05,
      "loss": 2.1515,
      "step": 32418
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.018082857131958,
      "learning_rate": 1.2537679674400654e-05,
      "loss": 2.3737,
      "step": 32419
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.2401816844940186,
      "learning_rate": 1.2537281413641681e-05,
      "loss": 2.4177,
      "step": 32420
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1183964014053345,
      "learning_rate": 1.2536883148581324e-05,
      "loss": 2.375,
      "step": 32421
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1035295724868774,
      "learning_rate": 1.2536484879220263e-05,
      "loss": 2.5583,
      "step": 32422
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.9433981776237488,
      "learning_rate": 1.253608660555917e-05,
      "loss": 2.2538,
      "step": 32423
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0485436916351318,
      "learning_rate": 1.2535688327598723e-05,
      "loss": 2.3561,
      "step": 32424
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0081843137741089,
      "learning_rate": 1.2535290045339596e-05,
      "loss": 2.1601,
      "step": 32425
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0667635202407837,
      "learning_rate": 1.2534891758782461e-05,
      "loss": 2.3454,
      "step": 32426
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.2706260681152344,
      "learning_rate": 1.2534493467927998e-05,
      "loss": 2.5706,
      "step": 32427
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0685203075408936,
      "learning_rate": 1.2534095172776882e-05,
      "loss": 2.1266,
      "step": 32428
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1481082439422607,
      "learning_rate": 1.2533696873329784e-05,
      "loss": 2.1766,
      "step": 32429
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.227765679359436,
      "learning_rate": 1.2533298569587382e-05,
      "loss": 2.2567,
      "step": 32430
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.7927403450012207,
      "learning_rate": 1.2532900261550349e-05,
      "loss": 2.3876,
      "step": 32431
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.164894461631775,
      "learning_rate": 1.2532501949219365e-05,
      "loss": 2.4291,
      "step": 32432
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.029497742652893,
      "learning_rate": 1.2532103632595103e-05,
      "loss": 2.421,
      "step": 32433
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0200670957565308,
      "learning_rate": 1.2531705311678235e-05,
      "loss": 2.3437,
      "step": 32434
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.3104798793792725,
      "learning_rate": 1.2531306986469439e-05,
      "loss": 2.3456,
      "step": 32435
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0343949794769287,
      "learning_rate": 1.253090865696939e-05,
      "loss": 2.24,
      "step": 32436
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.9974491000175476,
      "learning_rate": 1.2530510323178764e-05,
      "loss": 2.2818,
      "step": 32437
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0709848403930664,
      "learning_rate": 1.2530111985098237e-05,
      "loss": 2.1304,
      "step": 32438
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0280512571334839,
      "learning_rate": 1.252971364272848e-05,
      "loss": 2.4148,
      "step": 32439
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0056341886520386,
      "learning_rate": 1.2529315296070175e-05,
      "loss": 2.3332,
      "step": 32440
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.9807705879211426,
      "learning_rate": 1.2528916945123992e-05,
      "loss": 2.3989,
      "step": 32441
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1108697652816772,
      "learning_rate": 1.2528518589890609e-05,
      "loss": 2.3074,
      "step": 32442
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.2807024717330933,
      "learning_rate": 1.2528120230370697e-05,
      "loss": 2.4364,
      "step": 32443
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.3204703330993652,
      "learning_rate": 1.2527721866564937e-05,
      "loss": 2.5216,
      "step": 32444
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0383543968200684,
      "learning_rate": 1.2527323498474e-05,
      "loss": 2.5163,
      "step": 32445
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0007573366165161,
      "learning_rate": 1.2526925126098568e-05,
      "loss": 2.3516,
      "step": 32446
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0088492631912231,
      "learning_rate": 1.252652674943931e-05,
      "loss": 2.3507,
      "step": 32447
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0205007791519165,
      "learning_rate": 1.25261283684969e-05,
      "loss": 2.3051,
      "step": 32448
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.9659740924835205,
      "learning_rate": 1.2525729983272017e-05,
      "loss": 2.2575,
      "step": 32449
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0763576030731201,
      "learning_rate": 1.2525331593765336e-05,
      "loss": 2.3425,
      "step": 32450
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0258209705352783,
      "learning_rate": 1.2524933199977531e-05,
      "loss": 2.5227,
      "step": 32451
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1031745672225952,
      "learning_rate": 1.2524534801909281e-05,
      "loss": 2.1785,
      "step": 32452
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.9432259798049927,
      "learning_rate": 1.252413639956126e-05,
      "loss": 2.1361,
      "step": 32453
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0289740562438965,
      "learning_rate": 1.252373799293414e-05,
      "loss": 2.5079,
      "step": 32454
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1177693605422974,
      "learning_rate": 1.2523339582028597e-05,
      "loss": 2.2375,
      "step": 32455
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.015582799911499,
      "learning_rate": 1.2522941166845313e-05,
      "loss": 2.3787,
      "step": 32456
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1097139120101929,
      "learning_rate": 1.2522542747384956e-05,
      "loss": 2.5261,
      "step": 32457
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.027124285697937,
      "learning_rate": 1.2522144323648203e-05,
      "loss": 2.2234,
      "step": 32458
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.2533982992172241,
      "learning_rate": 1.2521745895635729e-05,
      "loss": 2.3177,
      "step": 32459
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.077171802520752,
      "learning_rate": 1.2521347463348215e-05,
      "loss": 2.2564,
      "step": 32460
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.035461187362671,
      "learning_rate": 1.2520949026786329e-05,
      "loss": 2.334,
      "step": 32461
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1650387048721313,
      "learning_rate": 1.252055058595075e-05,
      "loss": 2.4912,
      "step": 32462
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.134329915046692,
      "learning_rate": 1.2520152140842154e-05,
      "loss": 2.412,
      "step": 32463
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.9884939789772034,
      "learning_rate": 1.2519753691461216e-05,
      "loss": 2.4076,
      "step": 32464
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.026166319847107,
      "learning_rate": 1.251935523780861e-05,
      "loss": 2.4929,
      "step": 32465
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0284321308135986,
      "learning_rate": 1.2518956779885015e-05,
      "loss": 2.3938,
      "step": 32466
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1466668844223022,
      "learning_rate": 1.25185583176911e-05,
      "loss": 2.4063,
      "step": 32467
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.2270523309707642,
      "learning_rate": 1.2518159851227547e-05,
      "loss": 2.4379,
      "step": 32468
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.063486933708191,
      "learning_rate": 1.2517761380495029e-05,
      "loss": 2.5429,
      "step": 32469
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0254919528961182,
      "learning_rate": 1.2517362905494222e-05,
      "loss": 2.2786,
      "step": 32470
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0384294986724854,
      "learning_rate": 1.2516964426225799e-05,
      "loss": 2.335,
      "step": 32471
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0625237226486206,
      "learning_rate": 1.251656594269044e-05,
      "loss": 2.3601,
      "step": 32472
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0095375776290894,
      "learning_rate": 1.251616745488882e-05,
      "loss": 2.3102,
      "step": 32473
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1322474479675293,
      "learning_rate": 1.2515768962821611e-05,
      "loss": 2.1677,
      "step": 32474
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0954415798187256,
      "learning_rate": 1.2515370466489488e-05,
      "loss": 2.1796,
      "step": 32475
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.2437362670898438,
      "learning_rate": 1.251497196589313e-05,
      "loss": 2.3573,
      "step": 32476
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1685612201690674,
      "learning_rate": 1.2514573461033213e-05,
      "loss": 2.4008,
      "step": 32477
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1392796039581299,
      "learning_rate": 1.2514174951910409e-05,
      "loss": 2.5946,
      "step": 32478
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0584598779678345,
      "learning_rate": 1.25137764385254e-05,
      "loss": 2.1797,
      "step": 32479
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1361156702041626,
      "learning_rate": 1.2513377920878853e-05,
      "loss": 2.4272,
      "step": 32480
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.9811152815818787,
      "learning_rate": 1.2512979398971447e-05,
      "loss": 2.4989,
      "step": 32481
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1105901002883911,
      "learning_rate": 1.2512580872803862e-05,
      "loss": 2.5313,
      "step": 32482
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.9898402690887451,
      "learning_rate": 1.2512182342376766e-05,
      "loss": 2.5295,
      "step": 32483
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.06064772605896,
      "learning_rate": 1.2511783807690843e-05,
      "loss": 2.4198,
      "step": 32484
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.4056943655014038,
      "learning_rate": 1.2511385268746763e-05,
      "loss": 2.2276,
      "step": 32485
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0279760360717773,
      "learning_rate": 1.25109867255452e-05,
      "loss": 2.282,
      "step": 32486
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0707263946533203,
      "learning_rate": 1.2510588178086838e-05,
      "loss": 2.2496,
      "step": 32487
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1614505052566528,
      "learning_rate": 1.2510189626372342e-05,
      "loss": 2.4183,
      "step": 32488
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.9984487891197205,
      "learning_rate": 1.2509791070402396e-05,
      "loss": 2.3071,
      "step": 32489
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.15635347366333,
      "learning_rate": 1.2509392510177669e-05,
      "loss": 2.276,
      "step": 32490
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.240262508392334,
      "learning_rate": 1.2508993945698844e-05,
      "loss": 2.4668,
      "step": 32491
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0024445056915283,
      "learning_rate": 1.2508595376966593e-05,
      "loss": 2.5341,
      "step": 32492
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0621684789657593,
      "learning_rate": 1.2508196803981587e-05,
      "loss": 2.2828,
      "step": 32493
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0417587757110596,
      "learning_rate": 1.250779822674451e-05,
      "loss": 2.2015,
      "step": 32494
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.9028211236000061,
      "learning_rate": 1.2507399645256034e-05,
      "loss": 2.288,
      "step": 32495
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1707439422607422,
      "learning_rate": 1.250700105951683e-05,
      "loss": 2.4196,
      "step": 32496
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1566039323806763,
      "learning_rate": 1.2506602469527581e-05,
      "loss": 2.1568,
      "step": 32497
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.092577338218689,
      "learning_rate": 1.250620387528896e-05,
      "loss": 2.305,
      "step": 32498
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0701491832733154,
      "learning_rate": 1.2505805276801646e-05,
      "loss": 2.2873,
      "step": 32499
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.151056170463562,
      "learning_rate": 1.2505406674066309e-05,
      "loss": 2.2502,
      "step": 32500
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0139877796173096,
      "learning_rate": 1.2505008067083625e-05,
      "loss": 2.3209,
      "step": 32501
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.12002694606781,
      "learning_rate": 1.2504609455854276e-05,
      "loss": 2.5305,
      "step": 32502
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0435853004455566,
      "learning_rate": 1.250421084037893e-05,
      "loss": 2.4097,
      "step": 32503
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.929060161113739,
      "learning_rate": 1.250381222065827e-05,
      "loss": 2.3437,
      "step": 32504
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.971618115901947,
      "learning_rate": 1.2503413596692963e-05,
      "loss": 2.7343,
      "step": 32505
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0051054954528809,
      "learning_rate": 1.2503014968483698e-05,
      "loss": 2.3028,
      "step": 32506
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1082561016082764,
      "learning_rate": 1.2502616336031136e-05,
      "loss": 2.1509,
      "step": 32507
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.9995272755622864,
      "learning_rate": 1.2502217699335963e-05,
      "loss": 2.3758,
      "step": 32508
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0602458715438843,
      "learning_rate": 1.2501819058398849e-05,
      "loss": 2.2767,
      "step": 32509
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1889569759368896,
      "learning_rate": 1.2501420413220475e-05,
      "loss": 2.4573,
      "step": 32510
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0645289421081543,
      "learning_rate": 1.2501021763801511e-05,
      "loss": 2.2534,
      "step": 32511
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1507587432861328,
      "learning_rate": 1.250062311014264e-05,
      "loss": 2.3844,
      "step": 32512
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1676387786865234,
      "learning_rate": 1.250022445224453e-05,
      "loss": 1.9992,
      "step": 32513
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1471275091171265,
      "learning_rate": 1.2499825790107863e-05,
      "loss": 2.3982,
      "step": 32514
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0538748502731323,
      "learning_rate": 1.2499427123733312e-05,
      "loss": 2.4989,
      "step": 32515
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.2334243059158325,
      "learning_rate": 1.249902845312155e-05,
      "loss": 2.3848,
      "step": 32516
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0329351425170898,
      "learning_rate": 1.2498629778273259e-05,
      "loss": 2.3067,
      "step": 32517
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1285260915756226,
      "learning_rate": 1.2498231099189111e-05,
      "loss": 2.1663,
      "step": 32518
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.2601022720336914,
      "learning_rate": 1.2497832415869784e-05,
      "loss": 2.3092,
      "step": 32519
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0987807512283325,
      "learning_rate": 1.249743372831595e-05,
      "loss": 2.1914,
      "step": 32520
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1788737773895264,
      "learning_rate": 1.249703503652829e-05,
      "loss": 2.2376,
      "step": 32521
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.9382199645042419,
      "learning_rate": 1.2496636340507477e-05,
      "loss": 2.2781,
      "step": 32522
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1706297397613525,
      "learning_rate": 1.2496237640254187e-05,
      "loss": 2.4548,
      "step": 32523
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1286985874176025,
      "learning_rate": 1.2495838935769095e-05,
      "loss": 2.2435,
      "step": 32524
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0043396949768066,
      "learning_rate": 1.2495440227052882e-05,
      "loss": 2.2609,
      "step": 32525
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0308183431625366,
      "learning_rate": 1.2495041514106216e-05,
      "loss": 2.3665,
      "step": 32526
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1830569505691528,
      "learning_rate": 1.2494642796929778e-05,
      "loss": 2.456,
      "step": 32527
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1215354204177856,
      "learning_rate": 1.2494244075524243e-05,
      "loss": 2.512,
      "step": 32528
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0974984169006348,
      "learning_rate": 1.2493845349890286e-05,
      "loss": 2.2035,
      "step": 32529
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0793434381484985,
      "learning_rate": 1.2493446620028584e-05,
      "loss": 2.4924,
      "step": 32530
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.155055046081543,
      "learning_rate": 1.2493047885939816e-05,
      "loss": 2.3212,
      "step": 32531
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.021411657333374,
      "learning_rate": 1.2492649147624651e-05,
      "loss": 2.3941,
      "step": 32532
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.9767234921455383,
      "learning_rate": 1.2492250405083772e-05,
      "loss": 2.2237,
      "step": 32533
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0539052486419678,
      "learning_rate": 1.2491851658317848e-05,
      "loss": 2.5314,
      "step": 32534
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.051007866859436,
      "learning_rate": 1.2491452907327562e-05,
      "loss": 2.1604,
      "step": 32535
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.026304006576538,
      "learning_rate": 1.2491054152113584e-05,
      "loss": 2.3015,
      "step": 32536
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0628935098648071,
      "learning_rate": 1.2490655392676594e-05,
      "loss": 2.5399,
      "step": 32537
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0571304559707642,
      "learning_rate": 1.2490256629017266e-05,
      "loss": 2.4312,
      "step": 32538
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0065380334854126,
      "learning_rate": 1.2489857861136279e-05,
      "loss": 2.2845,
      "step": 32539
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0365482568740845,
      "learning_rate": 1.2489459089034304e-05,
      "loss": 2.3323,
      "step": 32540
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1153459548950195,
      "learning_rate": 1.248906031271202e-05,
      "loss": 2.4278,
      "step": 32541
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1144272089004517,
      "learning_rate": 1.24886615321701e-05,
      "loss": 2.5372,
      "step": 32542
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0790568590164185,
      "learning_rate": 1.248826274740923e-05,
      "loss": 2.3647,
      "step": 32543
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0565587282180786,
      "learning_rate": 1.2487863958430074e-05,
      "loss": 2.4214,
      "step": 32544
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0667039155960083,
      "learning_rate": 1.2487465165233315e-05,
      "loss": 2.3041,
      "step": 32545
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0991380214691162,
      "learning_rate": 1.2487066367819627e-05,
      "loss": 2.2871,
      "step": 32546
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1627346277236938,
      "learning_rate": 1.2486667566189683e-05,
      "loss": 2.3012,
      "step": 32547
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0939619541168213,
      "learning_rate": 1.2486268760344165e-05,
      "loss": 2.5371,
      "step": 32548
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0673809051513672,
      "learning_rate": 1.2485869950283745e-05,
      "loss": 2.574,
      "step": 32549
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.9830179214477539,
      "learning_rate": 1.24854711360091e-05,
      "loss": 2.3589,
      "step": 32550
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0792185068130493,
      "learning_rate": 1.2485072317520905e-05,
      "loss": 2.2378,
      "step": 32551
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1027911901474,
      "learning_rate": 1.2484673494819841e-05,
      "loss": 2.3858,
      "step": 32552
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.2650796175003052,
      "learning_rate": 1.2484274667906577e-05,
      "loss": 2.4052,
      "step": 32553
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.9620087146759033,
      "learning_rate": 1.2483875836781796e-05,
      "loss": 2.3179,
      "step": 32554
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.164120078086853,
      "learning_rate": 1.2483477001446169e-05,
      "loss": 2.3828,
      "step": 32555
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1545305252075195,
      "learning_rate": 1.2483078161900372e-05,
      "loss": 2.1303,
      "step": 32556
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0129419565200806,
      "learning_rate": 1.2482679318145086e-05,
      "loss": 2.4102,
      "step": 32557
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.002525806427002,
      "learning_rate": 1.2482280470180985e-05,
      "loss": 2.4684,
      "step": 32558
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.032575249671936,
      "learning_rate": 1.2481881618008745e-05,
      "loss": 2.293,
      "step": 32559
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0719022750854492,
      "learning_rate": 1.2481482761629037e-05,
      "loss": 2.4287,
      "step": 32560
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1012790203094482,
      "learning_rate": 1.2481083901042546e-05,
      "loss": 2.5069,
      "step": 32561
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0259915590286255,
      "learning_rate": 1.2480685036249942e-05,
      "loss": 2.3757,
      "step": 32562
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0794596672058105,
      "learning_rate": 1.2480286167251903e-05,
      "loss": 2.3822,
      "step": 32563
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0145455598831177,
      "learning_rate": 1.2479887294049104e-05,
      "loss": 2.3091,
      "step": 32564
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.150202989578247,
      "learning_rate": 1.2479488416642227e-05,
      "loss": 2.4679,
      "step": 32565
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.034363031387329,
      "learning_rate": 1.2479089535031941e-05,
      "loss": 2.3278,
      "step": 32566
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1867623329162598,
      "learning_rate": 1.2478690649218925e-05,
      "loss": 2.3727,
      "step": 32567
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0454046726226807,
      "learning_rate": 1.2478291759203853e-05,
      "loss": 2.5021,
      "step": 32568
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1382900476455688,
      "learning_rate": 1.2477892864987406e-05,
      "loss": 2.4121,
      "step": 32569
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0522550344467163,
      "learning_rate": 1.2477493966570258e-05,
      "loss": 2.2413,
      "step": 32570
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.3063448667526245,
      "learning_rate": 1.2477095063953084e-05,
      "loss": 2.3679,
      "step": 32571
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.2064284086227417,
      "learning_rate": 1.2476696157136561e-05,
      "loss": 2.2535,
      "step": 32572
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.2234444618225098,
      "learning_rate": 1.2476297246121365e-05,
      "loss": 2.4241,
      "step": 32573
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0565416812896729,
      "learning_rate": 1.2475898330908174e-05,
      "loss": 2.5844,
      "step": 32574
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0245788097381592,
      "learning_rate": 1.2475499411497662e-05,
      "loss": 2.5324,
      "step": 32575
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0260077714920044,
      "learning_rate": 1.2475100487890505e-05,
      "loss": 2.2696,
      "step": 32576
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1022205352783203,
      "learning_rate": 1.2474701560087384e-05,
      "loss": 2.3627,
      "step": 32577
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0698025226593018,
      "learning_rate": 1.247430262808897e-05,
      "loss": 2.3644,
      "step": 32578
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.9914786219596863,
      "learning_rate": 1.2473903691895938e-05,
      "loss": 2.325,
      "step": 32579
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.089160442352295,
      "learning_rate": 1.2473504751508972e-05,
      "loss": 2.2658,
      "step": 32580
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.020310640335083,
      "learning_rate": 1.247310580692874e-05,
      "loss": 2.3266,
      "step": 32581
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.9972086548805237,
      "learning_rate": 1.2472706858155923e-05,
      "loss": 2.3518,
      "step": 32582
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1511304378509521,
      "learning_rate": 1.2472307905191197e-05,
      "loss": 2.3568,
      "step": 32583
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0464438199996948,
      "learning_rate": 1.2471908948035236e-05,
      "loss": 2.4108,
      "step": 32584
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.203292727470398,
      "learning_rate": 1.2471509986688721e-05,
      "loss": 2.4095,
      "step": 32585
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.2034980058670044,
      "learning_rate": 1.2471111021152322e-05,
      "loss": 2.5838,
      "step": 32586
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1100373268127441,
      "learning_rate": 1.2470712051426721e-05,
      "loss": 2.1841,
      "step": 32587
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.177484154701233,
      "learning_rate": 1.247031307751259e-05,
      "loss": 2.3617,
      "step": 32588
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.052964448928833,
      "learning_rate": 1.2469914099410608e-05,
      "loss": 2.4512,
      "step": 32589
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0804674625396729,
      "learning_rate": 1.246951511712145e-05,
      "loss": 2.244,
      "step": 32590
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1300129890441895,
      "learning_rate": 1.2469116130645797e-05,
      "loss": 2.3325,
      "step": 32591
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0639607906341553,
      "learning_rate": 1.2468717139984319e-05,
      "loss": 2.2948,
      "step": 32592
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0628868341445923,
      "learning_rate": 1.2468318145137694e-05,
      "loss": 2.2018,
      "step": 32593
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.9739844799041748,
      "learning_rate": 1.2467919146106602e-05,
      "loss": 2.536,
      "step": 32594
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1042605638504028,
      "learning_rate": 1.2467520142891714e-05,
      "loss": 2.4912,
      "step": 32595
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1269471645355225,
      "learning_rate": 1.246712113549371e-05,
      "loss": 2.3764,
      "step": 32596
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0471594333648682,
      "learning_rate": 1.2466722123913267e-05,
      "loss": 2.2762,
      "step": 32597
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.063301920890808,
      "learning_rate": 1.246632310815106e-05,
      "loss": 2.2694,
      "step": 32598
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0232752561569214,
      "learning_rate": 1.2465924088207764e-05,
      "loss": 2.4495,
      "step": 32599
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1652175188064575,
      "learning_rate": 1.2465525064084056e-05,
      "loss": 2.6114,
      "step": 32600
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1382932662963867,
      "learning_rate": 1.2465126035780615e-05,
      "loss": 2.3758,
      "step": 32601
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0298560857772827,
      "learning_rate": 1.2464727003298118e-05,
      "loss": 2.24,
      "step": 32602
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0448801517486572,
      "learning_rate": 1.2464327966637236e-05,
      "loss": 2.6391,
      "step": 32603
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1849687099456787,
      "learning_rate": 1.2463928925798649e-05,
      "loss": 2.6316,
      "step": 32604
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1037628650665283,
      "learning_rate": 1.2463529880783036e-05,
      "loss": 2.3372,
      "step": 32605
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0334088802337646,
      "learning_rate": 1.2463130831591069e-05,
      "loss": 2.3678,
      "step": 32606
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0686737298965454,
      "learning_rate": 1.2462731778223424e-05,
      "loss": 2.3671,
      "step": 32607
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.04106867313385,
      "learning_rate": 1.2462332720680783e-05,
      "loss": 2.3084,
      "step": 32608
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.9937437176704407,
      "learning_rate": 1.2461933658963818e-05,
      "loss": 2.516,
      "step": 32609
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1567984819412231,
      "learning_rate": 1.2461534593073205e-05,
      "loss": 2.3035,
      "step": 32610
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0984140634536743,
      "learning_rate": 1.2461135523009627e-05,
      "loss": 2.5159,
      "step": 32611
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.053594708442688,
      "learning_rate": 1.2460736448773754e-05,
      "loss": 2.3257,
      "step": 32612
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1184650659561157,
      "learning_rate": 1.2460337370366262e-05,
      "loss": 2.1452,
      "step": 32613
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1151211261749268,
      "learning_rate": 1.245993828778783e-05,
      "loss": 2.4079,
      "step": 32614
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.020653247833252,
      "learning_rate": 1.2459539201039136e-05,
      "loss": 2.5384,
      "step": 32615
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.7985765933990479,
      "learning_rate": 1.2459140110120855e-05,
      "loss": 2.387,
      "step": 32616
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.9681865572929382,
      "learning_rate": 1.2458741015033665e-05,
      "loss": 2.3498,
      "step": 32617
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.053655982017517,
      "learning_rate": 1.245834191577824e-05,
      "loss": 2.5103,
      "step": 32618
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1193264722824097,
      "learning_rate": 1.2457942812355257e-05,
      "loss": 2.4982,
      "step": 32619
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1392327547073364,
      "learning_rate": 1.2457543704765393e-05,
      "loss": 2.3406,
      "step": 32620
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.058668851852417,
      "learning_rate": 1.2457144593009327e-05,
      "loss": 2.4144,
      "step": 32621
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0720046758651733,
      "learning_rate": 1.2456745477087733e-05,
      "loss": 2.3558,
      "step": 32622
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.014388084411621,
      "learning_rate": 1.2456346357001286e-05,
      "loss": 2.2477,
      "step": 32623
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1395882368087769,
      "learning_rate": 1.2455947232750667e-05,
      "loss": 2.2884,
      "step": 32624
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0842857360839844,
      "learning_rate": 1.245554810433655e-05,
      "loss": 2.4561,
      "step": 32625
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.167060375213623,
      "learning_rate": 1.2455148971759613e-05,
      "loss": 2.4685,
      "step": 32626
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0026755332946777,
      "learning_rate": 1.245474983502053e-05,
      "loss": 2.4692,
      "step": 32627
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.12083899974823,
      "learning_rate": 1.2454350694119979e-05,
      "loss": 2.483,
      "step": 32628
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1420046091079712,
      "learning_rate": 1.2453951549058638e-05,
      "loss": 2.267,
      "step": 32629
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0273855924606323,
      "learning_rate": 1.2453552399837184e-05,
      "loss": 2.2848,
      "step": 32630
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.9763569235801697,
      "learning_rate": 1.245315324645629e-05,
      "loss": 2.2577,
      "step": 32631
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0590603351593018,
      "learning_rate": 1.2452754088916636e-05,
      "loss": 2.4602,
      "step": 32632
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1459307670593262,
      "learning_rate": 1.2452354927218899e-05,
      "loss": 2.4222,
      "step": 32633
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.3278141021728516,
      "learning_rate": 1.2451955761363753e-05,
      "loss": 2.4622,
      "step": 32634
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.081364631652832,
      "learning_rate": 1.2451556591351874e-05,
      "loss": 2.4036,
      "step": 32635
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.9792295694351196,
      "learning_rate": 1.2451157417183943e-05,
      "loss": 2.4203,
      "step": 32636
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.8593437671661377,
      "learning_rate": 1.2450758238860636e-05,
      "loss": 2.2721,
      "step": 32637
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0675736665725708,
      "learning_rate": 1.2450359056382626e-05,
      "loss": 2.148,
      "step": 32638
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.9558603167533875,
      "learning_rate": 1.2449959869750592e-05,
      "loss": 2.5635,
      "step": 32639
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1558340787887573,
      "learning_rate": 1.244956067896521e-05,
      "loss": 2.5406,
      "step": 32640
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.9689761996269226,
      "learning_rate": 1.2449161484027157e-05,
      "loss": 2.423,
      "step": 32641
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1426329612731934,
      "learning_rate": 1.244876228493711e-05,
      "loss": 2.5113,
      "step": 32642
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.194526195526123,
      "learning_rate": 1.2448363081695746e-05,
      "loss": 2.37,
      "step": 32643
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.197601556777954,
      "learning_rate": 1.2447963874303746e-05,
      "loss": 2.3331,
      "step": 32644
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.9939107894897461,
      "learning_rate": 1.2447564662761779e-05,
      "loss": 2.1377,
      "step": 32645
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.120906114578247,
      "learning_rate": 1.2447165447070525e-05,
      "loss": 2.5588,
      "step": 32646
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.996852695941925,
      "learning_rate": 1.244676622723066e-05,
      "loss": 2.3674,
      "step": 32647
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.9379388689994812,
      "learning_rate": 1.2446367003242864e-05,
      "loss": 2.2629,
      "step": 32648
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1243222951889038,
      "learning_rate": 1.2445967775107807e-05,
      "loss": 2.4408,
      "step": 32649
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.054855227470398,
      "learning_rate": 1.2445568542826174e-05,
      "loss": 2.4168,
      "step": 32650
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0485079288482666,
      "learning_rate": 1.2445169306398642e-05,
      "loss": 2.1962,
      "step": 32651
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.9709064960479736,
      "learning_rate": 1.2444770065825877e-05,
      "loss": 2.403,
      "step": 32652
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0244837999343872,
      "learning_rate": 1.2444370821108567e-05,
      "loss": 2.584,
      "step": 32653
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0488011837005615,
      "learning_rate": 1.2443971572247382e-05,
      "loss": 2.193,
      "step": 32654
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.392090916633606,
      "learning_rate": 1.2443572319243002e-05,
      "loss": 2.4507,
      "step": 32655
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1459623575210571,
      "learning_rate": 1.2443173062096103e-05,
      "loss": 2.3539,
      "step": 32656
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.049505352973938,
      "learning_rate": 1.2442773800807364e-05,
      "loss": 2.3916,
      "step": 32657
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0349055528640747,
      "learning_rate": 1.244237453537746e-05,
      "loss": 2.3369,
      "step": 32658
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0515859127044678,
      "learning_rate": 1.2441975265807064e-05,
      "loss": 2.4166,
      "step": 32659
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0967332124710083,
      "learning_rate": 1.244157599209686e-05,
      "loss": 2.37,
      "step": 32660
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0544670820236206,
      "learning_rate": 1.2441176714247522e-05,
      "loss": 2.4581,
      "step": 32661
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0690972805023193,
      "learning_rate": 1.2440777432259726e-05,
      "loss": 2.5746,
      "step": 32662
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0297802686691284,
      "learning_rate": 1.2440378146134146e-05,
      "loss": 2.2798,
      "step": 32663
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1081335544586182,
      "learning_rate": 1.2439978855871466e-05,
      "loss": 2.2515,
      "step": 32664
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1652384996414185,
      "learning_rate": 1.243957956147236e-05,
      "loss": 2.3971,
      "step": 32665
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1819039583206177,
      "learning_rate": 1.2439180262937502e-05,
      "loss": 2.183,
      "step": 32666
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1467441320419312,
      "learning_rate": 1.2438780960267572e-05,
      "loss": 2.2579,
      "step": 32667
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0933359861373901,
      "learning_rate": 1.2438381653463243e-05,
      "loss": 2.2851,
      "step": 32668
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.9511996507644653,
      "learning_rate": 1.2437982342525197e-05,
      "loss": 2.288,
      "step": 32669
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0457680225372314,
      "learning_rate": 1.243758302745411e-05,
      "loss": 2.3179,
      "step": 32670
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1661449670791626,
      "learning_rate": 1.2437183708250655e-05,
      "loss": 2.3517,
      "step": 32671
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.2421294450759888,
      "learning_rate": 1.2436784384915512e-05,
      "loss": 2.3622,
      "step": 32672
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0164686441421509,
      "learning_rate": 1.243638505744936e-05,
      "loss": 2.4679,
      "step": 32673
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.060981035232544,
      "learning_rate": 1.2435985725852873e-05,
      "loss": 2.3443,
      "step": 32674
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1240227222442627,
      "learning_rate": 1.2435586390126726e-05,
      "loss": 2.1371,
      "step": 32675
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1447908878326416,
      "learning_rate": 1.24351870502716e-05,
      "loss": 2.4416,
      "step": 32676
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.048832654953003,
      "learning_rate": 1.2434787706288173e-05,
      "loss": 2.4924,
      "step": 32677
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1308039426803589,
      "learning_rate": 1.2434388358177116e-05,
      "loss": 2.1771,
      "step": 32678
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.9870849251747131,
      "learning_rate": 1.2433989005939112e-05,
      "loss": 2.5762,
      "step": 32679
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.048414945602417,
      "learning_rate": 1.2433589649574833e-05,
      "loss": 2.4484,
      "step": 32680
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1408507823944092,
      "learning_rate": 1.2433190289084961e-05,
      "loss": 2.1482,
      "step": 32681
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.088787317276001,
      "learning_rate": 1.2432790924470171e-05,
      "loss": 2.3692,
      "step": 32682
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.089982271194458,
      "learning_rate": 1.2432391555731136e-05,
      "loss": 2.3164,
      "step": 32683
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.9625735878944397,
      "learning_rate": 1.2431992182868542e-05,
      "loss": 2.4112,
      "step": 32684
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.9898481965065002,
      "learning_rate": 1.2431592805883058e-05,
      "loss": 2.4202,
      "step": 32685
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.9568923711776733,
      "learning_rate": 1.2431193424775362e-05,
      "loss": 2.2262,
      "step": 32686
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0858076810836792,
      "learning_rate": 1.2430794039546135e-05,
      "loss": 2.5173,
      "step": 32687
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1325671672821045,
      "learning_rate": 1.2430394650196052e-05,
      "loss": 2.3063,
      "step": 32688
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.9657750725746155,
      "learning_rate": 1.242999525672579e-05,
      "loss": 2.3503,
      "step": 32689
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.9781829714775085,
      "learning_rate": 1.2429595859136025e-05,
      "loss": 2.1448,
      "step": 32690
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0349621772766113,
      "learning_rate": 1.2429196457427435e-05,
      "loss": 2.5265,
      "step": 32691
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.096815586090088,
      "learning_rate": 1.24287970516007e-05,
      "loss": 2.3301,
      "step": 32692
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0976814031600952,
      "learning_rate": 1.2428397641656492e-05,
      "loss": 2.4584,
      "step": 32693
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.058777928352356,
      "learning_rate": 1.242799822759549e-05,
      "loss": 2.5564,
      "step": 32694
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1359096765518188,
      "learning_rate": 1.2427598809418373e-05,
      "loss": 2.6406,
      "step": 32695
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1510891914367676,
      "learning_rate": 1.2427199387125815e-05,
      "loss": 2.3798,
      "step": 32696
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1166521310806274,
      "learning_rate": 1.2426799960718496e-05,
      "loss": 2.5805,
      "step": 32697
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.089885950088501,
      "learning_rate": 1.2426400530197094e-05,
      "loss": 2.1378,
      "step": 32698
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0020811557769775,
      "learning_rate": 1.2426001095562281e-05,
      "loss": 2.2846,
      "step": 32699
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0278352499008179,
      "learning_rate": 1.2425601656814736e-05,
      "loss": 2.4236,
      "step": 32700
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1460082530975342,
      "learning_rate": 1.242520221395514e-05,
      "loss": 2.5227,
      "step": 32701
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.062201738357544,
      "learning_rate": 1.2424802766984167e-05,
      "loss": 2.328,
      "step": 32702
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0881668329238892,
      "learning_rate": 1.2424403315902495e-05,
      "loss": 2.4006,
      "step": 32703
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.071331262588501,
      "learning_rate": 1.2424003860710801e-05,
      "loss": 2.4209,
      "step": 32704
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0922828912734985,
      "learning_rate": 1.242360440140976e-05,
      "loss": 2.2274,
      "step": 32705
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.128463864326477,
      "learning_rate": 1.2423204938000055e-05,
      "loss": 2.5377,
      "step": 32706
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0420879125595093,
      "learning_rate": 1.2422805470482358e-05,
      "loss": 2.3596,
      "step": 32707
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0191165208816528,
      "learning_rate": 1.2422405998857345e-05,
      "loss": 2.1476,
      "step": 32708
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.9755752086639404,
      "learning_rate": 1.2422006523125699e-05,
      "loss": 2.2185,
      "step": 32709
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0492057800292969,
      "learning_rate": 1.2421607043288095e-05,
      "loss": 2.331,
      "step": 32710
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0041261911392212,
      "learning_rate": 1.2421207559345208e-05,
      "loss": 2.2971,
      "step": 32711
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.9787488579750061,
      "learning_rate": 1.2420808071297716e-05,
      "loss": 2.1819,
      "step": 32712
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1264361143112183,
      "learning_rate": 1.2420408579146297e-05,
      "loss": 2.282,
      "step": 32713
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.2101367712020874,
      "learning_rate": 1.2420009082891626e-05,
      "loss": 2.3533,
      "step": 32714
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0256915092468262,
      "learning_rate": 1.2419609582534387e-05,
      "loss": 2.5203,
      "step": 32715
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.9828494191169739,
      "learning_rate": 1.2419210078075247e-05,
      "loss": 2.2528,
      "step": 32716
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1641020774841309,
      "learning_rate": 1.2418810569514894e-05,
      "loss": 2.2647,
      "step": 32717
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.9976619482040405,
      "learning_rate": 1.2418411056853998e-05,
      "loss": 2.3522,
      "step": 32718
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.9987268447875977,
      "learning_rate": 1.2418011540093239e-05,
      "loss": 2.352,
      "step": 32719
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0450408458709717,
      "learning_rate": 1.2417612019233297e-05,
      "loss": 2.1604,
      "step": 32720
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.9565159678459167,
      "learning_rate": 1.241721249427484e-05,
      "loss": 2.2689,
      "step": 32721
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.920096755027771,
      "learning_rate": 1.2416812965218554e-05,
      "loss": 2.2543,
      "step": 32722
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.024278163909912,
      "learning_rate": 1.2416413432065114e-05,
      "loss": 2.397,
      "step": 32723
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.9844933152198792,
      "learning_rate": 1.2416013894815197e-05,
      "loss": 2.3165,
      "step": 32724
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.9921123385429382,
      "learning_rate": 1.2415614353469478e-05,
      "loss": 2.2565,
      "step": 32725
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0530420541763306,
      "learning_rate": 1.241521480802864e-05,
      "loss": 2.0225,
      "step": 32726
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.024248719215393,
      "learning_rate": 1.2414815258493354e-05,
      "loss": 2.5039,
      "step": 32727
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.9811806678771973,
      "learning_rate": 1.24144157048643e-05,
      "loss": 2.0519,
      "step": 32728
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0911099910736084,
      "learning_rate": 1.2414016147142156e-05,
      "loss": 2.2286,
      "step": 32729
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.004112958908081,
      "learning_rate": 1.2413616585327603e-05,
      "loss": 2.4272,
      "step": 32730
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1778051853179932,
      "learning_rate": 1.2413217019421312e-05,
      "loss": 2.4197,
      "step": 32731
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0805552005767822,
      "learning_rate": 1.241281744942396e-05,
      "loss": 2.3918,
      "step": 32732
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.022804617881775,
      "learning_rate": 1.241241787533623e-05,
      "loss": 2.3525,
      "step": 32733
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0548285245895386,
      "learning_rate": 1.2412018297158794e-05,
      "loss": 2.3275,
      "step": 32734
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.2399834394454956,
      "learning_rate": 1.2411618714892332e-05,
      "loss": 2.5163,
      "step": 32735
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0649288892745972,
      "learning_rate": 1.2411219128537527e-05,
      "loss": 2.2702,
      "step": 32736
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.2325915098190308,
      "learning_rate": 1.2410819538095044e-05,
      "loss": 2.4493,
      "step": 32737
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.9683665633201599,
      "learning_rate": 1.241041994356557e-05,
      "loss": 2.3103,
      "step": 32738
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1392159461975098,
      "learning_rate": 1.241002034494978e-05,
      "loss": 2.4833,
      "step": 32739
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1076252460479736,
      "learning_rate": 1.240962074224835e-05,
      "loss": 2.2229,
      "step": 32740
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0971623659133911,
      "learning_rate": 1.2409221135461958e-05,
      "loss": 2.3103,
      "step": 32741
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1546728610992432,
      "learning_rate": 1.2408821524591282e-05,
      "loss": 2.4608,
      "step": 32742
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0522791147232056,
      "learning_rate": 1.2408421909637e-05,
      "loss": 2.3649,
      "step": 32743
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0142847299575806,
      "learning_rate": 1.2408022290599788e-05,
      "loss": 2.4574,
      "step": 32744
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0182770490646362,
      "learning_rate": 1.2407622667480326e-05,
      "loss": 2.3657,
      "step": 32745
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0419929027557373,
      "learning_rate": 1.2407223040279288e-05,
      "loss": 2.3577,
      "step": 32746
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0488555431365967,
      "learning_rate": 1.2406823408997352e-05,
      "loss": 2.292,
      "step": 32747
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0612454414367676,
      "learning_rate": 1.2406423773635201e-05,
      "loss": 2.4963,
      "step": 32748
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.05898916721344,
      "learning_rate": 1.2406024134193504e-05,
      "loss": 2.5031,
      "step": 32749
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0316028594970703,
      "learning_rate": 1.2405624490672943e-05,
      "loss": 2.3117,
      "step": 32750
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0100458860397339,
      "learning_rate": 1.24052248430742e-05,
      "loss": 2.417,
      "step": 32751
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.998643696308136,
      "learning_rate": 1.2404825191397942e-05,
      "loss": 2.2772,
      "step": 32752
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1858409643173218,
      "learning_rate": 1.2404425535644853e-05,
      "loss": 2.2976,
      "step": 32753
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0742433071136475,
      "learning_rate": 1.2404025875815612e-05,
      "loss": 2.3628,
      "step": 32754
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.193828821182251,
      "learning_rate": 1.2403626211910891e-05,
      "loss": 2.372,
      "step": 32755
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.3431541919708252,
      "learning_rate": 1.2403226543931375e-05,
      "loss": 2.0715,
      "step": 32756
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1030174493789673,
      "learning_rate": 1.2402826871877733e-05,
      "loss": 2.367,
      "step": 32757
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1303614377975464,
      "learning_rate": 1.2402427195750648e-05,
      "loss": 2.2371,
      "step": 32758
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1646475791931152,
      "learning_rate": 1.2402027515550796e-05,
      "loss": 2.3061,
      "step": 32759
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1452052593231201,
      "learning_rate": 1.2401627831278857e-05,
      "loss": 2.2929,
      "step": 32760
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1968212127685547,
      "learning_rate": 1.2401228142935504e-05,
      "loss": 2.4376,
      "step": 32761
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.2779639959335327,
      "learning_rate": 1.2400828450521419e-05,
      "loss": 2.3777,
      "step": 32762
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1381585597991943,
      "learning_rate": 1.2400428754037279e-05,
      "loss": 2.4689,
      "step": 32763
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0192632675170898,
      "learning_rate": 1.240002905348376e-05,
      "loss": 2.5441,
      "step": 32764
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.2496583461761475,
      "learning_rate": 1.2399629348861538e-05,
      "loss": 2.3296,
      "step": 32765
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0886272192001343,
      "learning_rate": 1.239922964017129e-05,
      "loss": 2.4091,
      "step": 32766
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1165308952331543,
      "learning_rate": 1.23988299274137e-05,
      "loss": 2.474,
      "step": 32767
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1128658056259155,
      "learning_rate": 1.239843021058944e-05,
      "loss": 2.4909,
      "step": 32768
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.064038634300232,
      "learning_rate": 1.2398030489699194e-05,
      "loss": 2.3696,
      "step": 32769
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.2526497840881348,
      "learning_rate": 1.239763076474363e-05,
      "loss": 2.3685,
      "step": 32770
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0491529703140259,
      "learning_rate": 1.2397231035723432e-05,
      "loss": 2.2201,
      "step": 32771
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1069223880767822,
      "learning_rate": 1.2396831302639276e-05,
      "loss": 2.3949,
      "step": 32772
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0659809112548828,
      "learning_rate": 1.239643156549184e-05,
      "loss": 2.4939,
      "step": 32773
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0578497648239136,
      "learning_rate": 1.2396031824281803e-05,
      "loss": 2.2949,
      "step": 32774
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.3374160528182983,
      "learning_rate": 1.2395632079009839e-05,
      "loss": 2.6327,
      "step": 32775
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.030550241470337,
      "learning_rate": 1.2395232329676631e-05,
      "loss": 2.1629,
      "step": 32776
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1779565811157227,
      "learning_rate": 1.2394832576282851e-05,
      "loss": 2.35,
      "step": 32777
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0933715105056763,
      "learning_rate": 1.2394432818829181e-05,
      "loss": 2.1893,
      "step": 32778
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.209718108177185,
      "learning_rate": 1.2394033057316298e-05,
      "loss": 2.1461,
      "step": 32779
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0696797370910645,
      "learning_rate": 1.2393633291744877e-05,
      "loss": 2.4502,
      "step": 32780
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1518280506134033,
      "learning_rate": 1.2393233522115597e-05,
      "loss": 2.5005,
      "step": 32781
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0373270511627197,
      "learning_rate": 1.2392833748429137e-05,
      "loss": 2.5292,
      "step": 32782
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1150174140930176,
      "learning_rate": 1.2392433970686175e-05,
      "loss": 2.5805,
      "step": 32783
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0857899188995361,
      "learning_rate": 1.2392034188887386e-05,
      "loss": 2.3078,
      "step": 32784
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0790647268295288,
      "learning_rate": 1.239163440303345e-05,
      "loss": 2.2335,
      "step": 32785
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0362125635147095,
      "learning_rate": 1.2391234613125043e-05,
      "loss": 2.1383,
      "step": 32786
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.2352607250213623,
      "learning_rate": 1.2390834819162844e-05,
      "loss": 2.4064,
      "step": 32787
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0146483182907104,
      "learning_rate": 1.2390435021147531e-05,
      "loss": 2.4359,
      "step": 32788
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0358620882034302,
      "learning_rate": 1.2390035219079784e-05,
      "loss": 2.2961,
      "step": 32789
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.2061583995819092,
      "learning_rate": 1.2389635412960275e-05,
      "loss": 2.5289,
      "step": 32790
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0257648229599,
      "learning_rate": 1.2389235602789686e-05,
      "loss": 2.2925,
      "step": 32791
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0107473134994507,
      "learning_rate": 1.2388835788568693e-05,
      "loss": 2.3622,
      "step": 32792
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.9835994243621826,
      "learning_rate": 1.2388435970297977e-05,
      "loss": 2.3856,
      "step": 32793
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.9367058873176575,
      "learning_rate": 1.238803614797821e-05,
      "loss": 2.2256,
      "step": 32794
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1613528728485107,
      "learning_rate": 1.2387636321610075e-05,
      "loss": 2.4465,
      "step": 32795
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0739455223083496,
      "learning_rate": 1.2387236491194248e-05,
      "loss": 2.2003,
      "step": 32796
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0207006931304932,
      "learning_rate": 1.2386836656731407e-05,
      "loss": 2.3806,
      "step": 32797
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.107379674911499,
      "learning_rate": 1.2386436818222227e-05,
      "loss": 2.1319,
      "step": 32798
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1590545177459717,
      "learning_rate": 1.238603697566739e-05,
      "loss": 2.3281,
      "step": 32799
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0210175514221191,
      "learning_rate": 1.2385637129067574e-05,
      "loss": 2.2453,
      "step": 32800
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0786666870117188,
      "learning_rate": 1.2385237278423454e-05,
      "loss": 2.3201,
      "step": 32801
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0051097869873047,
      "learning_rate": 1.238483742373571e-05,
      "loss": 2.2007,
      "step": 32802
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0308572053909302,
      "learning_rate": 1.2384437565005017e-05,
      "loss": 2.4737,
      "step": 32803
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.9889636635780334,
      "learning_rate": 1.2384037702232057e-05,
      "loss": 2.5718,
      "step": 32804
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.2434113025665283,
      "learning_rate": 1.2383637835417504e-05,
      "loss": 2.2517,
      "step": 32805
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0588531494140625,
      "learning_rate": 1.2383237964562036e-05,
      "loss": 2.1615,
      "step": 32806
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.237627625465393,
      "learning_rate": 1.2382838089666335e-05,
      "loss": 2.4581,
      "step": 32807
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.086835503578186,
      "learning_rate": 1.2382438210731076e-05,
      "loss": 2.2641,
      "step": 32808
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0926634073257446,
      "learning_rate": 1.2382038327756936e-05,
      "loss": 2.2706,
      "step": 32809
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0113450288772583,
      "learning_rate": 1.2381638440744594e-05,
      "loss": 2.2128,
      "step": 32810
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0525736808776855,
      "learning_rate": 1.238123854969473e-05,
      "loss": 2.3272,
      "step": 32811
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.2732362747192383,
      "learning_rate": 1.2380838654608018e-05,
      "loss": 2.5157,
      "step": 32812
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1407891511917114,
      "learning_rate": 1.2380438755485138e-05,
      "loss": 2.5088,
      "step": 32813
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.007266640663147,
      "learning_rate": 1.2380038852326768e-05,
      "loss": 2.1558,
      "step": 32814
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0145868062973022,
      "learning_rate": 1.2379638945133587e-05,
      "loss": 2.3738,
      "step": 32815
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1917994022369385,
      "learning_rate": 1.237923903390627e-05,
      "loss": 2.3099,
      "step": 32816
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1011991500854492,
      "learning_rate": 1.2378839118645496e-05,
      "loss": 2.2842,
      "step": 32817
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1375608444213867,
      "learning_rate": 1.2378439199351947e-05,
      "loss": 2.5664,
      "step": 32818
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.007157802581787,
      "learning_rate": 1.2378039276026293e-05,
      "loss": 2.5757,
      "step": 32819
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0244433879852295,
      "learning_rate": 1.2377639348669219e-05,
      "loss": 2.373,
      "step": 32820
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.113362193107605,
      "learning_rate": 1.2377239417281399e-05,
      "loss": 2.6143,
      "step": 32821
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0558615922927856,
      "learning_rate": 1.2376839481863516e-05,
      "loss": 2.4315,
      "step": 32822
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0717134475708008,
      "learning_rate": 1.2376439542416242e-05,
      "loss": 2.2693,
      "step": 32823
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1022180318832397,
      "learning_rate": 1.2376039598940258e-05,
      "loss": 2.4863,
      "step": 32824
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1200884580612183,
      "learning_rate": 1.2375639651436242e-05,
      "loss": 2.2018,
      "step": 32825
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.054591178894043,
      "learning_rate": 1.237523969990487e-05,
      "loss": 2.3713,
      "step": 32826
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0814158916473389,
      "learning_rate": 1.2374839744346823e-05,
      "loss": 2.4013,
      "step": 32827
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.056796908378601,
      "learning_rate": 1.2374439784762778e-05,
      "loss": 2.2802,
      "step": 32828
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.052272915840149,
      "learning_rate": 1.2374039821153411e-05,
      "loss": 2.2412,
      "step": 32829
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.9726176261901855,
      "learning_rate": 1.2373639853519403e-05,
      "loss": 2.4859,
      "step": 32830
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.9979048371315002,
      "learning_rate": 1.2373239881861431e-05,
      "loss": 2.6549,
      "step": 32831
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0269025564193726,
      "learning_rate": 1.2372839906180172e-05,
      "loss": 2.2552,
      "step": 32832
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.262628436088562,
      "learning_rate": 1.2372439926476305e-05,
      "loss": 2.3574,
      "step": 32833
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.098699927330017,
      "learning_rate": 1.2372039942750509e-05,
      "loss": 2.4893,
      "step": 32834
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0037373304367065,
      "learning_rate": 1.237163995500346e-05,
      "loss": 2.3571,
      "step": 32835
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0501936674118042,
      "learning_rate": 1.2371239963235836e-05,
      "loss": 2.4158,
      "step": 32836
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.2060141563415527,
      "learning_rate": 1.237083996744832e-05,
      "loss": 2.5223,
      "step": 32837
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1349353790283203,
      "learning_rate": 1.2370439967641585e-05,
      "loss": 2.1543,
      "step": 32838
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0364810228347778,
      "learning_rate": 1.2370039963816309e-05,
      "loss": 2.399,
      "step": 32839
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0811731815338135,
      "learning_rate": 1.2369639955973171e-05,
      "loss": 2.3381,
      "step": 32840
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0093553066253662,
      "learning_rate": 1.2369239944112852e-05,
      "loss": 2.3569,
      "step": 32841
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0474793910980225,
      "learning_rate": 1.2368839928236027e-05,
      "loss": 2.4642,
      "step": 32842
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0735946893692017,
      "learning_rate": 1.2368439908343374e-05,
      "loss": 2.4225,
      "step": 32843
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0207366943359375,
      "learning_rate": 1.2368039884435573e-05,
      "loss": 2.5464,
      "step": 32844
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.9903748035430908,
      "learning_rate": 1.23676398565133e-05,
      "loss": 2.5107,
      "step": 32845
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.9545467495918274,
      "learning_rate": 1.2367239824577236e-05,
      "loss": 2.2254,
      "step": 32846
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0483471155166626,
      "learning_rate": 1.2366839788628057e-05,
      "loss": 2.6386,
      "step": 32847
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0811924934387207,
      "learning_rate": 1.2366439748666443e-05,
      "loss": 2.4925,
      "step": 32848
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1742619276046753,
      "learning_rate": 1.2366039704693068e-05,
      "loss": 2.3293,
      "step": 32849
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0924168825149536,
      "learning_rate": 1.2365639656708614e-05,
      "loss": 2.304,
      "step": 32850
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.04755699634552,
      "learning_rate": 1.236523960471376e-05,
      "loss": 2.2934,
      "step": 32851
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0490177869796753,
      "learning_rate": 1.2364839548709182e-05,
      "loss": 2.4216,
      "step": 32852
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.9716585874557495,
      "learning_rate": 1.2364439488695557e-05,
      "loss": 2.4354,
      "step": 32853
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1453419923782349,
      "learning_rate": 1.2364039424673564e-05,
      "loss": 2.5622,
      "step": 32854
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.9699852466583252,
      "learning_rate": 1.2363639356643886e-05,
      "loss": 2.3066,
      "step": 32855
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0233699083328247,
      "learning_rate": 1.2363239284607196e-05,
      "loss": 2.251,
      "step": 32856
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.9878947138786316,
      "learning_rate": 1.2362839208564172e-05,
      "loss": 2.341,
      "step": 32857
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.111983060836792,
      "learning_rate": 1.2362439128515495e-05,
      "loss": 2.3717,
      "step": 32858
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1139575242996216,
      "learning_rate": 1.2362039044461841e-05,
      "loss": 2.4609,
      "step": 32859
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0352202653884888,
      "learning_rate": 1.2361638956403889e-05,
      "loss": 2.2751,
      "step": 32860
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1392302513122559,
      "learning_rate": 1.236123886434232e-05,
      "loss": 2.3304,
      "step": 32861
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.974396288394928,
      "learning_rate": 1.236083876827781e-05,
      "loss": 2.4283,
      "step": 32862
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0538756847381592,
      "learning_rate": 1.2360438668211034e-05,
      "loss": 2.3733,
      "step": 32863
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0213159322738647,
      "learning_rate": 1.2360038564142675e-05,
      "loss": 2.3914,
      "step": 32864
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0698699951171875,
      "learning_rate": 1.2359638456073411e-05,
      "loss": 2.3226,
      "step": 32865
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.220597505569458,
      "learning_rate": 1.235923834400392e-05,
      "loss": 2.2234,
      "step": 32866
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.9801617860794067,
      "learning_rate": 1.2358838227934875e-05,
      "loss": 2.2218,
      "step": 32867
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.109760046005249,
      "learning_rate": 1.235843810786696e-05,
      "loss": 2.4621,
      "step": 32868
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0255779027938843,
      "learning_rate": 1.2358037983800854e-05,
      "loss": 2.2209,
      "step": 32869
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.04659903049469,
      "learning_rate": 1.235763785573723e-05,
      "loss": 2.4714,
      "step": 32870
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.9954514503479004,
      "learning_rate": 1.2357237723676771e-05,
      "loss": 2.4389,
      "step": 32871
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.008677363395691,
      "learning_rate": 1.2356837587620154e-05,
      "loss": 2.1058,
      "step": 32872
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0860408544540405,
      "learning_rate": 1.2356437447568058e-05,
      "loss": 2.3433,
      "step": 32873
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0640579462051392,
      "learning_rate": 1.2356037303521158e-05,
      "loss": 2.1783,
      "step": 32874
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0815107822418213,
      "learning_rate": 1.2355637155480138e-05,
      "loss": 2.3851,
      "step": 32875
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1767076253890991,
      "learning_rate": 1.235523700344567e-05,
      "loss": 2.3462,
      "step": 32876
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.552649736404419,
      "learning_rate": 1.2354836847418436e-05,
      "loss": 2.4524,
      "step": 32877
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0040342807769775,
      "learning_rate": 1.2354436687399116e-05,
      "loss": 2.2766,
      "step": 32878
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.073002576828003,
      "learning_rate": 1.2354036523388384e-05,
      "loss": 2.4221,
      "step": 32879
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0371923446655273,
      "learning_rate": 1.2353636355386925e-05,
      "loss": 2.0593,
      "step": 32880
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.2046383619308472,
      "learning_rate": 1.2353236183395409e-05,
      "loss": 2.42,
      "step": 32881
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.9823405742645264,
      "learning_rate": 1.2352836007414521e-05,
      "loss": 2.4636,
      "step": 32882
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.2019649744033813,
      "learning_rate": 1.2352435827444936e-05,
      "loss": 2.2867,
      "step": 32883
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.070297122001648,
      "learning_rate": 1.2352035643487332e-05,
      "loss": 2.5654,
      "step": 32884
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1150637865066528,
      "learning_rate": 1.235163545554239e-05,
      "loss": 2.2527,
      "step": 32885
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.148179054260254,
      "learning_rate": 1.2351235263610785e-05,
      "loss": 2.3337,
      "step": 32886
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0019787549972534,
      "learning_rate": 1.23508350676932e-05,
      "loss": 2.2593,
      "step": 32887
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0245778560638428,
      "learning_rate": 1.2350434867790316e-05,
      "loss": 2.3522,
      "step": 32888
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1141726970672607,
      "learning_rate": 1.23500346639028e-05,
      "loss": 2.4513,
      "step": 32889
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.9809519052505493,
      "learning_rate": 1.2349634456031337e-05,
      "loss": 2.2707,
      "step": 32890
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1018327474594116,
      "learning_rate": 1.2349234244176607e-05,
      "loss": 2.3271,
      "step": 32891
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1279218196868896,
      "learning_rate": 1.2348834028339287e-05,
      "loss": 2.5606,
      "step": 32892
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.4197980165481567,
      "learning_rate": 1.2348433808520055e-05,
      "loss": 2.4261,
      "step": 32893
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.054146647453308,
      "learning_rate": 1.2348033584719593e-05,
      "loss": 2.3989,
      "step": 32894
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.031045913696289,
      "learning_rate": 1.2347633356938571e-05,
      "loss": 2.5022,
      "step": 32895
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0222951173782349,
      "learning_rate": 1.2347233125177676e-05,
      "loss": 2.2991,
      "step": 32896
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0696332454681396,
      "learning_rate": 1.2346832889437583e-05,
      "loss": 2.1689,
      "step": 32897
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.4577789306640625,
      "learning_rate": 1.2346432649718972e-05,
      "loss": 2.5106,
      "step": 32898
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.9803917407989502,
      "learning_rate": 1.2346032406022517e-05,
      "loss": 2.2234,
      "step": 32899
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1020034551620483,
      "learning_rate": 1.2345632158348901e-05,
      "loss": 2.338,
      "step": 32900
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1006360054016113,
      "learning_rate": 1.2345231906698805e-05,
      "loss": 2.4057,
      "step": 32901
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0607702732086182,
      "learning_rate": 1.2344831651072902e-05,
      "loss": 2.5075,
      "step": 32902
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1108298301696777,
      "learning_rate": 1.234443139147187e-05,
      "loss": 2.3821,
      "step": 32903
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1667871475219727,
      "learning_rate": 1.2344031127896392e-05,
      "loss": 2.3771,
      "step": 32904
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1032902002334595,
      "learning_rate": 1.2343630860347142e-05,
      "loss": 2.2389,
      "step": 32905
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1410467624664307,
      "learning_rate": 1.2343230588824804e-05,
      "loss": 2.2566,
      "step": 32906
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.018988847732544,
      "learning_rate": 1.2342830313330052e-05,
      "loss": 2.3764,
      "step": 32907
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1526387929916382,
      "learning_rate": 1.234243003386357e-05,
      "loss": 2.2921,
      "step": 32908
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0031479597091675,
      "learning_rate": 1.234202975042603e-05,
      "loss": 2.4739,
      "step": 32909
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0005630254745483,
      "learning_rate": 1.2341629463018114e-05,
      "loss": 2.2706,
      "step": 32910
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.13791024684906,
      "learning_rate": 1.2341229171640499e-05,
      "loss": 2.4793,
      "step": 32911
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0188082456588745,
      "learning_rate": 1.2340828876293867e-05,
      "loss": 2.3838,
      "step": 32912
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0826144218444824,
      "learning_rate": 1.234042857697889e-05,
      "loss": 2.4451,
      "step": 32913
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0193562507629395,
      "learning_rate": 1.2340028273696258e-05,
      "loss": 2.3525,
      "step": 32914
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0957958698272705,
      "learning_rate": 1.2339627966446638e-05,
      "loss": 2.736,
      "step": 32915
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1182698011398315,
      "learning_rate": 1.2339227655230713e-05,
      "loss": 2.2612,
      "step": 32916
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1690059900283813,
      "learning_rate": 1.2338827340049163e-05,
      "loss": 2.0669,
      "step": 32917
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0557529926300049,
      "learning_rate": 1.2338427020902663e-05,
      "loss": 2.3617,
      "step": 32918
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.2158926725387573,
      "learning_rate": 1.2338026697791895e-05,
      "loss": 2.3296,
      "step": 32919
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0580898523330688,
      "learning_rate": 1.2337626370717537e-05,
      "loss": 2.535,
      "step": 32920
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.012536883354187,
      "learning_rate": 1.233722603968027e-05,
      "loss": 2.2524,
      "step": 32921
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0862443447113037,
      "learning_rate": 1.2336825704680768e-05,
      "loss": 2.4381,
      "step": 32922
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1403288841247559,
      "learning_rate": 1.2336425365719713e-05,
      "loss": 2.4821,
      "step": 32923
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.048250436782837,
      "learning_rate": 1.233602502279778e-05,
      "loss": 2.3704,
      "step": 32924
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0385737419128418,
      "learning_rate": 1.2335624675915654e-05,
      "loss": 2.4827,
      "step": 32925
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0621094703674316,
      "learning_rate": 1.2335224325074007e-05,
      "loss": 2.3433,
      "step": 32926
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0572973489761353,
      "learning_rate": 1.233482397027352e-05,
      "loss": 2.4598,
      "step": 32927
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.94655442237854,
      "learning_rate": 1.2334423611514877e-05,
      "loss": 2.4319,
      "step": 32928
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1509640216827393,
      "learning_rate": 1.2334023248798746e-05,
      "loss": 2.3221,
      "step": 32929
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.068932056427002,
      "learning_rate": 1.2333622882125814e-05,
      "loss": 2.401,
      "step": 32930
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1724506616592407,
      "learning_rate": 1.2333222511496757e-05,
      "loss": 2.5059,
      "step": 32931
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0448625087738037,
      "learning_rate": 1.2332822136912253e-05,
      "loss": 2.4882,
      "step": 32932
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0404727458953857,
      "learning_rate": 1.2332421758372984e-05,
      "loss": 2.4222,
      "step": 32933
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0869754552841187,
      "learning_rate": 1.2332021375879626e-05,
      "loss": 2.3923,
      "step": 32934
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0469173192977905,
      "learning_rate": 1.2331620989432857e-05,
      "loss": 2.3526,
      "step": 32935
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.2825288772583008,
      "learning_rate": 1.233122059903336e-05,
      "loss": 2.5293,
      "step": 32936
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0133185386657715,
      "learning_rate": 1.2330820204681809e-05,
      "loss": 2.2886,
      "step": 32937
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.18614661693573,
      "learning_rate": 1.2330419806378884e-05,
      "loss": 2.2828,
      "step": 32938
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0197691917419434,
      "learning_rate": 1.2330019404125265e-05,
      "loss": 2.3037,
      "step": 32939
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0195393562316895,
      "learning_rate": 1.232961899792163e-05,
      "loss": 2.4279,
      "step": 32940
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0075136423110962,
      "learning_rate": 1.232921858776866e-05,
      "loss": 2.2889,
      "step": 32941
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1350222826004028,
      "learning_rate": 1.2328818173667032e-05,
      "loss": 2.4678,
      "step": 32942
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.9893385767936707,
      "learning_rate": 1.2328417755617423e-05,
      "loss": 2.2069,
      "step": 32943
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0341061353683472,
      "learning_rate": 1.2328017333620514e-05,
      "loss": 2.0569,
      "step": 32944
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.152348518371582,
      "learning_rate": 1.2327616907676982e-05,
      "loss": 2.1802,
      "step": 32945
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0212715864181519,
      "learning_rate": 1.232721647778751e-05,
      "loss": 2.3529,
      "step": 32946
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1344897747039795,
      "learning_rate": 1.2326816043952774e-05,
      "loss": 2.2591,
      "step": 32947
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1354141235351562,
      "learning_rate": 1.2326415606173449e-05,
      "loss": 2.4664,
      "step": 32948
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.9836336970329285,
      "learning_rate": 1.232601516445022e-05,
      "loss": 2.4439,
      "step": 32949
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1184254884719849,
      "learning_rate": 1.2325614718783764e-05,
      "loss": 2.4768,
      "step": 32950
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.9717051386833191,
      "learning_rate": 1.2325214269174758e-05,
      "loss": 2.1151,
      "step": 32951
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0936592817306519,
      "learning_rate": 1.2324813815623885e-05,
      "loss": 2.3032,
      "step": 32952
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.2661739587783813,
      "learning_rate": 1.232441335813182e-05,
      "loss": 2.3255,
      "step": 32953
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0661357641220093,
      "learning_rate": 1.2324012896699243e-05,
      "loss": 2.2198,
      "step": 32954
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.127240777015686,
      "learning_rate": 1.232361243132683e-05,
      "loss": 2.3691,
      "step": 32955
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0347521305084229,
      "learning_rate": 1.2323211962015269e-05,
      "loss": 2.0152,
      "step": 32956
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1283986568450928,
      "learning_rate": 1.2322811488765228e-05,
      "loss": 2.4613,
      "step": 32957
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0597015619277954,
      "learning_rate": 1.2322411011577391e-05,
      "loss": 2.0852,
      "step": 32958
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.976433277130127,
      "learning_rate": 1.2322010530452437e-05,
      "loss": 2.496,
      "step": 32959
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1444857120513916,
      "learning_rate": 1.2321610045391049e-05,
      "loss": 2.3562,
      "step": 32960
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1421153545379639,
      "learning_rate": 1.2321209556393896e-05,
      "loss": 2.2551,
      "step": 32961
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.9964028596878052,
      "learning_rate": 1.2320809063461664e-05,
      "loss": 2.2143,
      "step": 32962
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1041508913040161,
      "learning_rate": 1.2320408566595028e-05,
      "loss": 2.3089,
      "step": 32963
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.9947593212127686,
      "learning_rate": 1.232000806579467e-05,
      "loss": 2.243,
      "step": 32964
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1595487594604492,
      "learning_rate": 1.231960756106127e-05,
      "loss": 2.4807,
      "step": 32965
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0145362615585327,
      "learning_rate": 1.2319207052395505e-05,
      "loss": 2.3218,
      "step": 32966
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0417178869247437,
      "learning_rate": 1.2318806539798054e-05,
      "loss": 2.1637,
      "step": 32967
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.342834711074829,
      "learning_rate": 1.2318406023269596e-05,
      "loss": 2.2683,
      "step": 32968
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1277486085891724,
      "learning_rate": 1.231800550281081e-05,
      "loss": 2.4023,
      "step": 32969
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0564360618591309,
      "learning_rate": 1.2317604978422375e-05,
      "loss": 2.263,
      "step": 32970
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0475752353668213,
      "learning_rate": 1.2317204450104969e-05,
      "loss": 2.5347,
      "step": 32971
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.2022852897644043,
      "learning_rate": 1.2316803917859274e-05,
      "loss": 2.3107,
      "step": 32972
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0809727907180786,
      "learning_rate": 1.2316403381685966e-05,
      "loss": 2.4782,
      "step": 32973
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.046395182609558,
      "learning_rate": 1.2316002841585728e-05,
      "loss": 2.2505,
      "step": 32974
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.5744755268096924,
      "learning_rate": 1.2315602297559232e-05,
      "loss": 2.7069,
      "step": 32975
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0977799892425537,
      "learning_rate": 1.2315201749607165e-05,
      "loss": 2.4919,
      "step": 32976
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0976566076278687,
      "learning_rate": 1.23148011977302e-05,
      "loss": 2.3746,
      "step": 32977
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.099696397781372,
      "learning_rate": 1.2314400641929017e-05,
      "loss": 2.1279,
      "step": 32978
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1350446939468384,
      "learning_rate": 1.2314000082204298e-05,
      "loss": 2.3277,
      "step": 32979
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.000977873802185,
      "learning_rate": 1.231359951855672e-05,
      "loss": 2.5535,
      "step": 32980
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0916024446487427,
      "learning_rate": 1.2313198950986964e-05,
      "loss": 2.6228,
      "step": 32981
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.040000319480896,
      "learning_rate": 1.2312798379495705e-05,
      "loss": 2.3063,
      "step": 32982
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0556284189224243,
      "learning_rate": 1.2312397804083626e-05,
      "loss": 2.3942,
      "step": 32983
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0817630290985107,
      "learning_rate": 1.2311997224751406e-05,
      "loss": 2.3904,
      "step": 32984
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0859631299972534,
      "learning_rate": 1.2311596641499721e-05,
      "loss": 2.4317,
      "step": 32985
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1260039806365967,
      "learning_rate": 1.2311196054329253e-05,
      "loss": 2.2778,
      "step": 32986
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.115195631980896,
      "learning_rate": 1.231079546324068e-05,
      "loss": 2.3731,
      "step": 32987
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1317002773284912,
      "learning_rate": 1.2310394868234678e-05,
      "loss": 2.5867,
      "step": 32988
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0409164428710938,
      "learning_rate": 1.2309994269311933e-05,
      "loss": 2.461,
      "step": 32989
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.2569000720977783,
      "learning_rate": 1.2309593666473118e-05,
      "loss": 2.2037,
      "step": 32990
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0461939573287964,
      "learning_rate": 1.2309193059718916e-05,
      "loss": 2.5328,
      "step": 32991
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0594676733016968,
      "learning_rate": 1.2308792449050002e-05,
      "loss": 2.4698,
      "step": 32992
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1929049491882324,
      "learning_rate": 1.2308391834467062e-05,
      "loss": 2.4204,
      "step": 32993
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.083718180656433,
      "learning_rate": 1.2307991215970767e-05,
      "loss": 2.5478,
      "step": 32994
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.061705470085144,
      "learning_rate": 1.2307590593561802e-05,
      "loss": 2.3863,
      "step": 32995
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.082622766494751,
      "learning_rate": 1.2307189967240842e-05,
      "loss": 2.2795,
      "step": 32996
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1084638833999634,
      "learning_rate": 1.230678933700857e-05,
      "loss": 2.5629,
      "step": 32997
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.106101393699646,
      "learning_rate": 1.2306388702865663e-05,
      "loss": 2.5458,
      "step": 32998
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.2314012050628662,
      "learning_rate": 1.2305988064812802e-05,
      "loss": 2.2569,
      "step": 32999
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0450434684753418,
      "learning_rate": 1.2305587422850667e-05,
      "loss": 2.3365,
      "step": 33000
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0610601902008057,
      "learning_rate": 1.2305186776979931e-05,
      "loss": 2.4375,
      "step": 33001
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1453574895858765,
      "learning_rate": 1.2304786127201278e-05,
      "loss": 2.4217,
      "step": 33002
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.2197257280349731,
      "learning_rate": 1.2304385473515388e-05,
      "loss": 2.4748,
      "step": 33003
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0982574224472046,
      "learning_rate": 1.2303984815922937e-05,
      "loss": 2.3527,
      "step": 33004
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0336706638336182,
      "learning_rate": 1.2303584154424608e-05,
      "loss": 2.3838,
      "step": 33005
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.033653736114502,
      "learning_rate": 1.2303183489021075e-05,
      "loss": 2.4919,
      "step": 33006
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1766042709350586,
      "learning_rate": 1.2302782819713025e-05,
      "loss": 2.5156,
      "step": 33007
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.029541015625,
      "learning_rate": 1.2302382146501131e-05,
      "loss": 2.343,
      "step": 33008
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0793788433074951,
      "learning_rate": 1.2301981469386072e-05,
      "loss": 2.4674,
      "step": 33009
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1149455308914185,
      "learning_rate": 1.230158078836853e-05,
      "loss": 2.251,
      "step": 33010
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.084829568862915,
      "learning_rate": 1.2301180103449185e-05,
      "loss": 2.319,
      "step": 33011
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0375452041625977,
      "learning_rate": 1.2300779414628715e-05,
      "loss": 2.4535,
      "step": 33012
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.9595598578453064,
      "learning_rate": 1.2300378721907797e-05,
      "loss": 2.186,
      "step": 33013
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.9651429057121277,
      "learning_rate": 1.2299978025287114e-05,
      "loss": 2.252,
      "step": 33014
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.982616126537323,
      "learning_rate": 1.2299577324767342e-05,
      "loss": 2.4725,
      "step": 33015
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.2300279140472412,
      "learning_rate": 1.2299176620349163e-05,
      "loss": 2.2908,
      "step": 33016
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.2864421606063843,
      "learning_rate": 1.2298775912033254e-05,
      "loss": 2.3891,
      "step": 33017
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.9898128509521484,
      "learning_rate": 1.2298375199820296e-05,
      "loss": 2.0954,
      "step": 33018
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.01237952709198,
      "learning_rate": 1.2297974483710969e-05,
      "loss": 2.306,
      "step": 33019
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0609523057937622,
      "learning_rate": 1.229757376370595e-05,
      "loss": 2.5945,
      "step": 33020
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1901278495788574,
      "learning_rate": 1.2297173039805921e-05,
      "loss": 2.1232,
      "step": 33021
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1548258066177368,
      "learning_rate": 1.2296772312011558e-05,
      "loss": 2.567,
      "step": 33022
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1387461423873901,
      "learning_rate": 1.229637158032354e-05,
      "loss": 2.3066,
      "step": 33023
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1164729595184326,
      "learning_rate": 1.2295970844742553e-05,
      "loss": 2.4521,
      "step": 33024
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0004773139953613,
      "learning_rate": 1.2295570105269267e-05,
      "loss": 2.4884,
      "step": 33025
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0939064025878906,
      "learning_rate": 1.2295169361904372e-05,
      "loss": 2.3834,
      "step": 33026
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.9789632558822632,
      "learning_rate": 1.2294768614648539e-05,
      "loss": 2.327,
      "step": 33027
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1438586711883545,
      "learning_rate": 1.2294367863502448e-05,
      "loss": 2.2647,
      "step": 33028
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1536586284637451,
      "learning_rate": 1.2293967108466783e-05,
      "loss": 2.2989,
      "step": 33029
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0865256786346436,
      "learning_rate": 1.2293566349542219e-05,
      "loss": 2.4529,
      "step": 33030
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.041634440422058,
      "learning_rate": 1.2293165586729437e-05,
      "loss": 2.2905,
      "step": 33031
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.106394648551941,
      "learning_rate": 1.2292764820029117e-05,
      "loss": 2.5116,
      "step": 33032
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.9794439077377319,
      "learning_rate": 1.2292364049441941e-05,
      "loss": 2.4361,
      "step": 33033
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1952975988388062,
      "learning_rate": 1.2291963274968581e-05,
      "loss": 2.241,
      "step": 33034
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.073965311050415,
      "learning_rate": 1.2291562496609722e-05,
      "loss": 2.4671,
      "step": 33035
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0748530626296997,
      "learning_rate": 1.2291161714366043e-05,
      "loss": 2.3758,
      "step": 33036
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1270999908447266,
      "learning_rate": 1.2290760928238221e-05,
      "loss": 2.376,
      "step": 33037
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0953142642974854,
      "learning_rate": 1.2290360138226938e-05,
      "loss": 2.0135,
      "step": 33038
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1233559846878052,
      "learning_rate": 1.2289959344332873e-05,
      "loss": 2.5148,
      "step": 33039
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.9835236668586731,
      "learning_rate": 1.2289558546556703e-05,
      "loss": 2.3612,
      "step": 33040
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0626955032348633,
      "learning_rate": 1.2289157744899112e-05,
      "loss": 2.3763,
      "step": 33041
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0480115413665771,
      "learning_rate": 1.2288756939360775e-05,
      "loss": 2.321,
      "step": 33042
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1366783380508423,
      "learning_rate": 1.2288356129942378e-05,
      "loss": 2.4272,
      "step": 33043
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.2720507383346558,
      "learning_rate": 1.228795531664459e-05,
      "loss": 2.5179,
      "step": 33044
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0646227598190308,
      "learning_rate": 1.22875544994681e-05,
      "loss": 2.3934,
      "step": 33045
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0571659803390503,
      "learning_rate": 1.2287153678413583e-05,
      "loss": 2.3213,
      "step": 33046
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.015993356704712,
      "learning_rate": 1.2286752853481718e-05,
      "loss": 2.3125,
      "step": 33047
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0973353385925293,
      "learning_rate": 1.2286352024673187e-05,
      "loss": 2.371,
      "step": 33048
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.110811471939087,
      "learning_rate": 1.2285951191988667e-05,
      "loss": 2.3938,
      "step": 33049
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0011998414993286,
      "learning_rate": 1.228555035542884e-05,
      "loss": 2.3947,
      "step": 33050
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.9883822202682495,
      "learning_rate": 1.2285149514994383e-05,
      "loss": 2.4548,
      "step": 33051
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0304034948349,
      "learning_rate": 1.2284748670685977e-05,
      "loss": 2.2874,
      "step": 33052
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0289037227630615,
      "learning_rate": 1.2284347822504305e-05,
      "loss": 2.3496,
      "step": 33053
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0844709873199463,
      "learning_rate": 1.228394697045004e-05,
      "loss": 2.4402,
      "step": 33054
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0995216369628906,
      "learning_rate": 1.2283546114523865e-05,
      "loss": 2.5052,
      "step": 33055
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0826526880264282,
      "learning_rate": 1.2283145254726459e-05,
      "loss": 2.4011,
      "step": 33056
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.00977623462677,
      "learning_rate": 1.22827443910585e-05,
      "loss": 2.4367,
      "step": 33057
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1146773099899292,
      "learning_rate": 1.2282343523520672e-05,
      "loss": 2.4172,
      "step": 33058
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.170920491218567,
      "learning_rate": 1.2281942652113653e-05,
      "loss": 2.4168,
      "step": 33059
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.2064144611358643,
      "learning_rate": 1.2281541776838119e-05,
      "loss": 2.4099,
      "step": 33060
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.9816118478775024,
      "learning_rate": 1.2281140897694753e-05,
      "loss": 2.7382,
      "step": 33061
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1174936294555664,
      "learning_rate": 1.2280740014684231e-05,
      "loss": 2.211,
      "step": 33062
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.950232982635498,
      "learning_rate": 1.228033912780724e-05,
      "loss": 2.157,
      "step": 33063
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0601309537887573,
      "learning_rate": 1.227993823706445e-05,
      "loss": 2.3914,
      "step": 33064
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0893771648406982,
      "learning_rate": 1.2279537342456547e-05,
      "loss": 2.2885,
      "step": 33065
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.047278881072998,
      "learning_rate": 1.2279136443984213e-05,
      "loss": 2.3003,
      "step": 33066
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.9382340908050537,
      "learning_rate": 1.2278735541648119e-05,
      "loss": 2.3452,
      "step": 33067
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0368438959121704,
      "learning_rate": 1.2278334635448949e-05,
      "loss": 2.3142,
      "step": 33068
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0849125385284424,
      "learning_rate": 1.2277933725387385e-05,
      "loss": 2.483,
      "step": 33069
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.9919978976249695,
      "learning_rate": 1.2277532811464106e-05,
      "loss": 2.213,
      "step": 33070
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1938313245773315,
      "learning_rate": 1.227713189367979e-05,
      "loss": 2.3833,
      "step": 33071
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0648120641708374,
      "learning_rate": 1.2276730972035116e-05,
      "loss": 2.282,
      "step": 33072
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0839571952819824,
      "learning_rate": 1.2276330046530764e-05,
      "loss": 2.4464,
      "step": 33073
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.2804158926010132,
      "learning_rate": 1.2275929117167416e-05,
      "loss": 2.5335,
      "step": 33074
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.12649405002594,
      "learning_rate": 1.2275528183945749e-05,
      "loss": 2.4115,
      "step": 33075
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0531350374221802,
      "learning_rate": 1.2275127246866442e-05,
      "loss": 2.3871,
      "step": 33076
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1655491590499878,
      "learning_rate": 1.2274726305930178e-05,
      "loss": 2.2123,
      "step": 33077
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.083282232284546,
      "learning_rate": 1.2274325361137636e-05,
      "loss": 2.466,
      "step": 33078
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0506521463394165,
      "learning_rate": 1.2273924412489496e-05,
      "loss": 2.2593,
      "step": 33079
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.9833868145942688,
      "learning_rate": 1.2273523459986434e-05,
      "loss": 2.3516,
      "step": 33080
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.088751196861267,
      "learning_rate": 1.227312250362913e-05,
      "loss": 2.3134,
      "step": 33081
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0457918643951416,
      "learning_rate": 1.227272154341827e-05,
      "loss": 2.322,
      "step": 33082
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1802852153778076,
      "learning_rate": 1.2272320579354528e-05,
      "loss": 2.3999,
      "step": 33083
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.036260962486267,
      "learning_rate": 1.2271919611438585e-05,
      "loss": 2.1764,
      "step": 33084
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.105160117149353,
      "learning_rate": 1.227151863967112e-05,
      "loss": 2.2453,
      "step": 33085
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0427128076553345,
      "learning_rate": 1.2271117664052819e-05,
      "loss": 2.3023,
      "step": 33086
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0405664443969727,
      "learning_rate": 1.2270716684584354e-05,
      "loss": 2.3297,
      "step": 33087
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1099939346313477,
      "learning_rate": 1.2270315701266406e-05,
      "loss": 2.4392,
      "step": 33088
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.925127387046814,
      "learning_rate": 1.2269914714099658e-05,
      "loss": 2.228,
      "step": 33089
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0522563457489014,
      "learning_rate": 1.2269513723084787e-05,
      "loss": 2.5079,
      "step": 33090
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1300803422927856,
      "learning_rate": 1.2269112728222474e-05,
      "loss": 2.61,
      "step": 33091
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0386446714401245,
      "learning_rate": 1.22687117295134e-05,
      "loss": 2.5441,
      "step": 33092
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.001988410949707,
      "learning_rate": 1.2268310726958244e-05,
      "loss": 2.4807,
      "step": 33093
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.371881127357483,
      "learning_rate": 1.2267909720557682e-05,
      "loss": 2.4907,
      "step": 33094
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0869126319885254,
      "learning_rate": 1.22675087103124e-05,
      "loss": 2.1463,
      "step": 33095
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1124207973480225,
      "learning_rate": 1.2267107696223074e-05,
      "loss": 2.4236,
      "step": 33096
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0869317054748535,
      "learning_rate": 1.2266706678290383e-05,
      "loss": 2.2417,
      "step": 33097
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.2961609363555908,
      "learning_rate": 1.226630565651501e-05,
      "loss": 2.3909,
      "step": 33098
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.3292882442474365,
      "learning_rate": 1.2265904630897634e-05,
      "loss": 2.397,
      "step": 33099
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.016562581062317,
      "learning_rate": 1.2265503601438932e-05,
      "loss": 2.2816,
      "step": 33100
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.4912159442901611,
      "learning_rate": 1.2265102568139588e-05,
      "loss": 2.2406,
      "step": 33101
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0256091356277466,
      "learning_rate": 1.2264701531000283e-05,
      "loss": 2.2879,
      "step": 33102
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1452810764312744,
      "learning_rate": 1.226430049002169e-05,
      "loss": 2.3473,
      "step": 33103
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0767629146575928,
      "learning_rate": 1.2263899445204492e-05,
      "loss": 2.4033,
      "step": 33104
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0940154790878296,
      "learning_rate": 1.2263498396549372e-05,
      "loss": 2.2452,
      "step": 33105
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.2147430181503296,
      "learning_rate": 1.2263097344057007e-05,
      "loss": 2.4893,
      "step": 33106
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0934836864471436,
      "learning_rate": 1.2262696287728076e-05,
      "loss": 2.1949,
      "step": 33107
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.4363163709640503,
      "learning_rate": 1.226229522756326e-05,
      "loss": 2.2947,
      "step": 33108
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.9867699146270752,
      "learning_rate": 1.226189416356324e-05,
      "loss": 2.4218,
      "step": 33109
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.2877880334854126,
      "learning_rate": 1.2261493095728694e-05,
      "loss": 2.1076,
      "step": 33110
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.053645372390747,
      "learning_rate": 1.2261092024060303e-05,
      "loss": 2.1219,
      "step": 33111
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0423712730407715,
      "learning_rate": 1.226069094855875e-05,
      "loss": 2.261,
      "step": 33112
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0914677381515503,
      "learning_rate": 1.226028986922471e-05,
      "loss": 2.5061,
      "step": 33113
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0164098739624023,
      "learning_rate": 1.2259888786058864e-05,
      "loss": 2.327,
      "step": 33114
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0894697904586792,
      "learning_rate": 1.2259487699061893e-05,
      "loss": 2.3652,
      "step": 33115
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0992112159729004,
      "learning_rate": 1.2259086608234475e-05,
      "loss": 2.1263,
      "step": 33116
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0505306720733643,
      "learning_rate": 1.2258685513577295e-05,
      "loss": 2.4229,
      "step": 33117
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0489692687988281,
      "learning_rate": 1.2258284415091029e-05,
      "loss": 2.4319,
      "step": 33118
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0216742753982544,
      "learning_rate": 1.2257883312776357e-05,
      "loss": 2.2826,
      "step": 33119
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1428864002227783,
      "learning_rate": 1.225748220663396e-05,
      "loss": 2.366,
      "step": 33120
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1405538320541382,
      "learning_rate": 1.2257081096664516e-05,
      "loss": 2.4553,
      "step": 33121
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1437236070632935,
      "learning_rate": 1.2256679982868707e-05,
      "loss": 2.4222,
      "step": 33122
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0349161624908447,
      "learning_rate": 1.2256278865247213e-05,
      "loss": 2.3294,
      "step": 33123
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1754671335220337,
      "learning_rate": 1.2255877743800713e-05,
      "loss": 2.4952,
      "step": 33124
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1932251453399658,
      "learning_rate": 1.225547661852989e-05,
      "loss": 2.1803,
      "step": 33125
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.033395767211914,
      "learning_rate": 1.2255075489435419e-05,
      "loss": 2.4328,
      "step": 33126
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.9421275854110718,
      "learning_rate": 1.2254674356517982e-05,
      "loss": 2.4996,
      "step": 33127
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.057940125465393,
      "learning_rate": 1.2254273219778262e-05,
      "loss": 2.2088,
      "step": 33128
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.9571771621704102,
      "learning_rate": 1.2253872079216933e-05,
      "loss": 2.4386,
      "step": 33129
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1782351732254028,
      "learning_rate": 1.2253470934834684e-05,
      "loss": 2.432,
      "step": 33130
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.069322943687439,
      "learning_rate": 1.2253069786632186e-05,
      "loss": 2.4696,
      "step": 33131
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0841881036758423,
      "learning_rate": 1.2252668634610123e-05,
      "loss": 2.2229,
      "step": 33132
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0908880233764648,
      "learning_rate": 1.2252267478769177e-05,
      "loss": 2.1682,
      "step": 33133
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0686163902282715,
      "learning_rate": 1.2251866319110024e-05,
      "loss": 2.4376,
      "step": 33134
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.197037696838379,
      "learning_rate": 1.2251465155633346e-05,
      "loss": 2.1159,
      "step": 33135
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0305891036987305,
      "learning_rate": 1.2251063988339824e-05,
      "loss": 2.618,
      "step": 33136
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.186873435974121,
      "learning_rate": 1.2250662817230134e-05,
      "loss": 2.4136,
      "step": 33137
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1921378374099731,
      "learning_rate": 1.2250261642304966e-05,
      "loss": 2.3205,
      "step": 33138
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.2135297060012817,
      "learning_rate": 1.224986046356499e-05,
      "loss": 2.4616,
      "step": 33139
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0444105863571167,
      "learning_rate": 1.2249459281010886e-05,
      "loss": 2.288,
      "step": 33140
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1497936248779297,
      "learning_rate": 1.2249058094643341e-05,
      "loss": 2.4174,
      "step": 33141
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0090314149856567,
      "learning_rate": 1.224865690446303e-05,
      "loss": 2.1565,
      "step": 33142
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1136598587036133,
      "learning_rate": 1.2248255710470636e-05,
      "loss": 2.3082,
      "step": 33143
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.9614338278770447,
      "learning_rate": 1.2247854512666836e-05,
      "loss": 2.525,
      "step": 33144
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.6830416917800903,
      "learning_rate": 1.2247453311052315e-05,
      "loss": 2.451,
      "step": 33145
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0083402395248413,
      "learning_rate": 1.224705210562775e-05,
      "loss": 2.5449,
      "step": 33146
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.419226884841919,
      "learning_rate": 1.2246650896393818e-05,
      "loss": 2.3449,
      "step": 33147
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.150208830833435,
      "learning_rate": 1.2246249683351205e-05,
      "loss": 2.3344,
      "step": 33148
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1640795469284058,
      "learning_rate": 1.2245848466500589e-05,
      "loss": 2.333,
      "step": 33149
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0677621364593506,
      "learning_rate": 1.224544724584265e-05,
      "loss": 2.3307,
      "step": 33150
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.2648143768310547,
      "learning_rate": 1.2245046021378066e-05,
      "loss": 2.4107,
      "step": 33151
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.065239429473877,
      "learning_rate": 1.2244644793107523e-05,
      "loss": 2.4998,
      "step": 33152
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.9140837788581848,
      "learning_rate": 1.2244243561031695e-05,
      "loss": 2.2207,
      "step": 33153
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.9862635731697083,
      "learning_rate": 1.2243842325151265e-05,
      "loss": 2.3857,
      "step": 33154
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0032148361206055,
      "learning_rate": 1.2243441085466911e-05,
      "loss": 2.5404,
      "step": 33155
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.028513789176941,
      "learning_rate": 1.2243039841979318e-05,
      "loss": 2.3751,
      "step": 33156
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0557225942611694,
      "learning_rate": 1.2242638594689162e-05,
      "loss": 2.3477,
      "step": 33157
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.9964024424552917,
      "learning_rate": 1.2242237343597125e-05,
      "loss": 2.5171,
      "step": 33158
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0830293893814087,
      "learning_rate": 1.2241836088703886e-05,
      "loss": 2.3517,
      "step": 33159
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0283671617507935,
      "learning_rate": 1.2241434830010125e-05,
      "loss": 2.333,
      "step": 33160
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.081532597541809,
      "learning_rate": 1.2241033567516527e-05,
      "loss": 2.5365,
      "step": 33161
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0015769004821777,
      "learning_rate": 1.2240632301223768e-05,
      "loss": 2.4223,
      "step": 33162
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.3189489841461182,
      "learning_rate": 1.2240231031132525e-05,
      "loss": 2.5654,
      "step": 33163
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0112427473068237,
      "learning_rate": 1.2239829757243483e-05,
      "loss": 2.288,
      "step": 33164
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0310708284378052,
      "learning_rate": 1.2239428479557322e-05,
      "loss": 2.5928,
      "step": 33165
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0049315690994263,
      "learning_rate": 1.2239027198074723e-05,
      "loss": 2.3034,
      "step": 33166
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.2009950876235962,
      "learning_rate": 1.2238625912796362e-05,
      "loss": 2.3112,
      "step": 33167
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.9893039464950562,
      "learning_rate": 1.2238224623722922e-05,
      "loss": 2.2089,
      "step": 33168
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1477642059326172,
      "learning_rate": 1.2237823330855085e-05,
      "loss": 2.4307,
      "step": 33169
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1116795539855957,
      "learning_rate": 1.2237422034193528e-05,
      "loss": 2.5829,
      "step": 33170
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1611065864562988,
      "learning_rate": 1.2237020733738936e-05,
      "loss": 2.5397,
      "step": 33171
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.017042875289917,
      "learning_rate": 1.2236619429491985e-05,
      "loss": 2.4834,
      "step": 33172
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.2377296686172485,
      "learning_rate": 1.2236218121453356e-05,
      "loss": 2.199,
      "step": 33173
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.9961740374565125,
      "learning_rate": 1.2235816809623728e-05,
      "loss": 2.4288,
      "step": 33174
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0223585367202759,
      "learning_rate": 1.2235415494003787e-05,
      "loss": 2.4152,
      "step": 33175
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.3043485879898071,
      "learning_rate": 1.2235014174594206e-05,
      "loss": 2.202,
      "step": 33176
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.2385400533676147,
      "learning_rate": 1.2234612851395671e-05,
      "loss": 2.461,
      "step": 33177
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.116234540939331,
      "learning_rate": 1.2234211524408864e-05,
      "loss": 2.271,
      "step": 33178
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0997982025146484,
      "learning_rate": 1.2233810193634454e-05,
      "loss": 2.3155,
      "step": 33179
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0850365161895752,
      "learning_rate": 1.2233408859073135e-05,
      "loss": 2.5082,
      "step": 33180
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.103576898574829,
      "learning_rate": 1.2233007520725577e-05,
      "loss": 2.2968,
      "step": 33181
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.9875653982162476,
      "learning_rate": 1.2232606178592467e-05,
      "loss": 2.2439,
      "step": 33182
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0420352220535278,
      "learning_rate": 1.2232204832674484e-05,
      "loss": 2.2479,
      "step": 33183
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.141615867614746,
      "learning_rate": 1.2231803482972308e-05,
      "loss": 2.321,
      "step": 33184
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.094132900238037,
      "learning_rate": 1.2231402129486618e-05,
      "loss": 2.7219,
      "step": 33185
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.113086223602295,
      "learning_rate": 1.2231000772218093e-05,
      "loss": 2.2928,
      "step": 33186
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.04622483253479,
      "learning_rate": 1.223059941116742e-05,
      "loss": 2.3727,
      "step": 33187
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0555288791656494,
      "learning_rate": 1.223019804633527e-05,
      "loss": 2.362,
      "step": 33188
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.9929409623146057,
      "learning_rate": 1.2229796677722334e-05,
      "loss": 2.2111,
      "step": 33189
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.074993371963501,
      "learning_rate": 1.2229395305329284e-05,
      "loss": 2.4045,
      "step": 33190
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0290826559066772,
      "learning_rate": 1.2228993929156805e-05,
      "loss": 2.2736,
      "step": 33191
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.041480541229248,
      "learning_rate": 1.2228592549205575e-05,
      "loss": 2.4146,
      "step": 33192
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1250131130218506,
      "learning_rate": 1.2228191165476275e-05,
      "loss": 2.4171,
      "step": 33193
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.019396185874939,
      "learning_rate": 1.2227789777969586e-05,
      "loss": 2.528,
      "step": 33194
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0492268800735474,
      "learning_rate": 1.2227388386686188e-05,
      "loss": 2.2542,
      "step": 33195
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.138839602470398,
      "learning_rate": 1.2226986991626762e-05,
      "loss": 2.3278,
      "step": 33196
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0084034204483032,
      "learning_rate": 1.2226585592791988e-05,
      "loss": 2.2647,
      "step": 33197
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0747872591018677,
      "learning_rate": 1.2226184190182549e-05,
      "loss": 2.4513,
      "step": 33198
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.105460286140442,
      "learning_rate": 1.222578278379912e-05,
      "loss": 2.4759,
      "step": 33199
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0408543348312378,
      "learning_rate": 1.2225381373642387e-05,
      "loss": 2.385,
      "step": 33200
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.2827626466751099,
      "learning_rate": 1.2224979959713025e-05,
      "loss": 2.3754,
      "step": 33201
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.06155526638031,
      "learning_rate": 1.222457854201172e-05,
      "loss": 2.4172,
      "step": 33202
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0268172025680542,
      "learning_rate": 1.2224177120539149e-05,
      "loss": 2.3333,
      "step": 33203
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0948532819747925,
      "learning_rate": 1.2223775695295997e-05,
      "loss": 2.4631,
      "step": 33204
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1605318784713745,
      "learning_rate": 1.2223374266282936e-05,
      "loss": 2.5736,
      "step": 33205
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0370893478393555,
      "learning_rate": 1.2222972833500657e-05,
      "loss": 2.3713,
      "step": 33206
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1466470956802368,
      "learning_rate": 1.2222571396949832e-05,
      "loss": 2.4662,
      "step": 33207
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1305955648422241,
      "learning_rate": 1.2222169956631145e-05,
      "loss": 2.2703,
      "step": 33208
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.9176200032234192,
      "learning_rate": 1.2221768512545275e-05,
      "loss": 2.318,
      "step": 33209
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.021719217300415,
      "learning_rate": 1.2221367064692906e-05,
      "loss": 2.6972,
      "step": 33210
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0744115114212036,
      "learning_rate": 1.222096561307472e-05,
      "loss": 2.4704,
      "step": 33211
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.9835397005081177,
      "learning_rate": 1.222056415769139e-05,
      "loss": 2.3617,
      "step": 33212
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.115812063217163,
      "learning_rate": 1.22201626985436e-05,
      "loss": 2.3755,
      "step": 33213
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.020742416381836,
      "learning_rate": 1.2219761235632033e-05,
      "loss": 2.2192,
      "step": 33214
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0083284378051758,
      "learning_rate": 1.2219359768957367e-05,
      "loss": 2.6731,
      "step": 33215
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0610675811767578,
      "learning_rate": 1.2218958298520282e-05,
      "loss": 2.5807,
      "step": 33216
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0060267448425293,
      "learning_rate": 1.2218556824321466e-05,
      "loss": 2.3708,
      "step": 33217
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.059741735458374,
      "learning_rate": 1.2218155346361589e-05,
      "loss": 2.539,
      "step": 33218
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.02364182472229,
      "learning_rate": 1.2217753864641336e-05,
      "loss": 2.3741,
      "step": 33219
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0241496562957764,
      "learning_rate": 1.221735237916139e-05,
      "loss": 2.3068,
      "step": 33220
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.9658510684967041,
      "learning_rate": 1.2216950889922428e-05,
      "loss": 2.1721,
      "step": 33221
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0468870401382446,
      "learning_rate": 1.2216549396925131e-05,
      "loss": 2.5293,
      "step": 33222
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1976933479309082,
      "learning_rate": 1.221614790017018e-05,
      "loss": 2.2898,
      "step": 33223
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1129891872406006,
      "learning_rate": 1.221574639965826e-05,
      "loss": 2.4388,
      "step": 33224
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0814794301986694,
      "learning_rate": 1.2215344895390046e-05,
      "loss": 2.4984,
      "step": 33225
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0445760488510132,
      "learning_rate": 1.2214943387366222e-05,
      "loss": 2.5138,
      "step": 33226
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0336954593658447,
      "learning_rate": 1.2214541875587467e-05,
      "loss": 2.5069,
      "step": 33227
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1050924062728882,
      "learning_rate": 1.2214140360054461e-05,
      "loss": 2.5559,
      "step": 33228
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0662826299667358,
      "learning_rate": 1.2213738840767885e-05,
      "loss": 2.3691,
      "step": 33229
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.9369959235191345,
      "learning_rate": 1.2213337317728421e-05,
      "loss": 2.1455,
      "step": 33230
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.012433409690857,
      "learning_rate": 1.2212935790936752e-05,
      "loss": 2.3976,
      "step": 33231
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.8125675916671753,
      "learning_rate": 1.2212534260393554e-05,
      "loss": 2.3559,
      "step": 33232
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.019808053970337,
      "learning_rate": 1.2212132726099508e-05,
      "loss": 2.1379,
      "step": 33233
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1051034927368164,
      "learning_rate": 1.2211731188055297e-05,
      "loss": 2.2297,
      "step": 33234
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0364230871200562,
      "learning_rate": 1.22113296462616e-05,
      "loss": 2.3259,
      "step": 33235
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1277844905853271,
      "learning_rate": 1.2210928100719102e-05,
      "loss": 2.674,
      "step": 33236
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1411824226379395,
      "learning_rate": 1.2210526551428478e-05,
      "loss": 2.3917,
      "step": 33237
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0432547330856323,
      "learning_rate": 1.221012499839041e-05,
      "loss": 2.4151,
      "step": 33238
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.312589406967163,
      "learning_rate": 1.2209723441605583e-05,
      "loss": 2.3779,
      "step": 33239
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.3695251941680908,
      "learning_rate": 1.2209321881074672e-05,
      "loss": 2.1917,
      "step": 33240
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0800577402114868,
      "learning_rate": 1.2208920316798361e-05,
      "loss": 2.435,
      "step": 33241
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.2043076753616333,
      "learning_rate": 1.2208518748777331e-05,
      "loss": 2.5614,
      "step": 33242
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0142993927001953,
      "learning_rate": 1.220811717701226e-05,
      "loss": 2.1686,
      "step": 33243
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0312319993972778,
      "learning_rate": 1.2207715601503833e-05,
      "loss": 2.4542,
      "step": 33244
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.971088707447052,
      "learning_rate": 1.2207314022252727e-05,
      "loss": 2.5554,
      "step": 33245
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.049125075340271,
      "learning_rate": 1.2206912439259626e-05,
      "loss": 2.2485,
      "step": 33246
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0594645738601685,
      "learning_rate": 1.2206510852525207e-05,
      "loss": 2.2571,
      "step": 33247
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0683890581130981,
      "learning_rate": 1.2206109262050157e-05,
      "loss": 2.3763,
      "step": 33248
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.9904698133468628,
      "learning_rate": 1.2205707667835146e-05,
      "loss": 2.7419,
      "step": 33249
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0176535844802856,
      "learning_rate": 1.2205306069880865e-05,
      "loss": 2.6639,
      "step": 33250
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0888595581054688,
      "learning_rate": 1.2204904468187992e-05,
      "loss": 2.5177,
      "step": 33251
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0936411619186401,
      "learning_rate": 1.2204502862757206e-05,
      "loss": 2.2222,
      "step": 33252
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.086260437965393,
      "learning_rate": 1.220410125358919e-05,
      "loss": 2.3731,
      "step": 33253
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.2336724996566772,
      "learning_rate": 1.220369964068462e-05,
      "loss": 2.1453,
      "step": 33254
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1611661911010742,
      "learning_rate": 1.2203298024044184e-05,
      "loss": 2.3619,
      "step": 33255
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0393491983413696,
      "learning_rate": 1.2202896403668557e-05,
      "loss": 2.3171,
      "step": 33256
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.136210560798645,
      "learning_rate": 1.2202494779558427e-05,
      "loss": 2.5388,
      "step": 33257
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.156914472579956,
      "learning_rate": 1.2202093151714467e-05,
      "loss": 2.396,
      "step": 33258
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.9763582944869995,
      "learning_rate": 1.220169152013736e-05,
      "loss": 2.3695,
      "step": 33259
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1062839031219482,
      "learning_rate": 1.2201289884827789e-05,
      "loss": 2.3167,
      "step": 33260
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.020119071006775,
      "learning_rate": 1.2200888245786433e-05,
      "loss": 2.3418,
      "step": 33261
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.293410062789917,
      "learning_rate": 1.2200486603013974e-05,
      "loss": 2.4299,
      "step": 33262
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.2992812395095825,
      "learning_rate": 1.2200084956511093e-05,
      "loss": 2.155,
      "step": 33263
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.066009759902954,
      "learning_rate": 1.2199683306278474e-05,
      "loss": 2.4037,
      "step": 33264
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1769640445709229,
      "learning_rate": 1.219928165231679e-05,
      "loss": 2.5966,
      "step": 33265
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.9594935774803162,
      "learning_rate": 1.2198879994626726e-05,
      "loss": 2.4472,
      "step": 33266
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1232936382293701,
      "learning_rate": 1.2198478333208965e-05,
      "loss": 2.5895,
      "step": 33267
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.088714361190796,
      "learning_rate": 1.2198076668064185e-05,
      "loss": 2.2629,
      "step": 33268
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.062276005744934,
      "learning_rate": 1.2197674999193067e-05,
      "loss": 2.3246,
      "step": 33269
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.103459119796753,
      "learning_rate": 1.2197273326596298e-05,
      "loss": 2.2526,
      "step": 33270
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0284613370895386,
      "learning_rate": 1.2196871650274548e-05,
      "loss": 2.3408,
      "step": 33271
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.151318907737732,
      "learning_rate": 1.2196469970228507e-05,
      "loss": 2.3738,
      "step": 33272
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.9946495294570923,
      "learning_rate": 1.219606828645885e-05,
      "loss": 2.3872,
      "step": 33273
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.9981970191001892,
      "learning_rate": 1.2195666598966265e-05,
      "loss": 2.3703,
      "step": 33274
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1697347164154053,
      "learning_rate": 1.2195264907751425e-05,
      "loss": 2.3838,
      "step": 33275
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1442776918411255,
      "learning_rate": 1.2194863212815018e-05,
      "loss": 2.4187,
      "step": 33276
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.12289559841156,
      "learning_rate": 1.219446151415772e-05,
      "loss": 2.3257,
      "step": 33277
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0004732608795166,
      "learning_rate": 1.2194059811780214e-05,
      "loss": 2.1929,
      "step": 33278
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1202868223190308,
      "learning_rate": 1.2193658105683184e-05,
      "loss": 2.5387,
      "step": 33279
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0266739130020142,
      "learning_rate": 1.2193256395867303e-05,
      "loss": 2.2838,
      "step": 33280
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1616774797439575,
      "learning_rate": 1.2192854682333258e-05,
      "loss": 2.25,
      "step": 33281
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0002497434616089,
      "learning_rate": 1.219245296508173e-05,
      "loss": 2.4282,
      "step": 33282
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.233601689338684,
      "learning_rate": 1.2192051244113399e-05,
      "loss": 2.4818,
      "step": 33283
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1696302890777588,
      "learning_rate": 1.2191649519428943e-05,
      "loss": 2.4026,
      "step": 33284
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0746359825134277,
      "learning_rate": 1.2191247791029048e-05,
      "loss": 2.4178,
      "step": 33285
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.9891084432601929,
      "learning_rate": 1.2190846058914392e-05,
      "loss": 2.4097,
      "step": 33286
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0542497634887695,
      "learning_rate": 1.2190444323085656e-05,
      "loss": 2.4866,
      "step": 33287
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0651085376739502,
      "learning_rate": 1.2190042583543522e-05,
      "loss": 2.308,
      "step": 33288
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0421884059906006,
      "learning_rate": 1.2189640840288673e-05,
      "loss": 2.2711,
      "step": 33289
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.2936389446258545,
      "learning_rate": 1.2189239093321787e-05,
      "loss": 2.4145,
      "step": 33290
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.213989019393921,
      "learning_rate": 1.2188837342643547e-05,
      "loss": 2.26,
      "step": 33291
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.3138659000396729,
      "learning_rate": 1.2188435588254632e-05,
      "loss": 2.129,
      "step": 33292
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.9420149326324463,
      "learning_rate": 1.2188033830155725e-05,
      "loss": 2.3124,
      "step": 33293
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.947179913520813,
      "learning_rate": 1.2187632068347506e-05,
      "loss": 2.4496,
      "step": 33294
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.9803597331047058,
      "learning_rate": 1.2187230302830655e-05,
      "loss": 2.3522,
      "step": 33295
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.067976951599121,
      "learning_rate": 1.2186828533605859e-05,
      "loss": 2.3303,
      "step": 33296
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0930730104446411,
      "learning_rate": 1.2186426760673791e-05,
      "loss": 2.3738,
      "step": 33297
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.99909907579422,
      "learning_rate": 1.2186024984035136e-05,
      "loss": 2.2934,
      "step": 33298
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.9456990361213684,
      "learning_rate": 1.2185623203690575e-05,
      "loss": 2.2971,
      "step": 33299
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.2073580026626587,
      "learning_rate": 1.2185221419640792e-05,
      "loss": 2.4014,
      "step": 33300
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1075462102890015,
      "learning_rate": 1.2184819631886463e-05,
      "loss": 2.072,
      "step": 33301
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1040689945220947,
      "learning_rate": 1.2184417840428268e-05,
      "loss": 2.6184,
      "step": 33302
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0382636785507202,
      "learning_rate": 1.2184016045266897e-05,
      "loss": 2.6579,
      "step": 33303
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0372799634933472,
      "learning_rate": 1.2183614246403024e-05,
      "loss": 2.2875,
      "step": 33304
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0804487466812134,
      "learning_rate": 1.2183212443837331e-05,
      "loss": 2.4579,
      "step": 33305
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1389175653457642,
      "learning_rate": 1.2182810637570499e-05,
      "loss": 2.3656,
      "step": 33306
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.9634851813316345,
      "learning_rate": 1.2182408827603213e-05,
      "loss": 2.3215,
      "step": 33307
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.024423360824585,
      "learning_rate": 1.218200701393615e-05,
      "loss": 2.3214,
      "step": 33308
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1701699495315552,
      "learning_rate": 1.2181605196569992e-05,
      "loss": 2.2334,
      "step": 33309
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0397160053253174,
      "learning_rate": 1.218120337550542e-05,
      "loss": 2.3393,
      "step": 33310
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0831900835037231,
      "learning_rate": 1.2180801550743117e-05,
      "loss": 2.5029,
      "step": 33311
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0396881103515625,
      "learning_rate": 1.2180399722283764e-05,
      "loss": 2.282,
      "step": 33312
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0157231092453003,
      "learning_rate": 1.217999789012804e-05,
      "loss": 2.5564,
      "step": 33313
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.9865086078643799,
      "learning_rate": 1.2179596054276628e-05,
      "loss": 2.3863,
      "step": 33314
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0508111715316772,
      "learning_rate": 1.2179194214730208e-05,
      "loss": 2.3246,
      "step": 33315
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.045027256011963,
      "learning_rate": 1.2178792371489463e-05,
      "loss": 2.2478,
      "step": 33316
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1017037630081177,
      "learning_rate": 1.2178390524555073e-05,
      "loss": 2.2119,
      "step": 33317
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.3263829946517944,
      "learning_rate": 1.2177988673927717e-05,
      "loss": 2.1105,
      "step": 33318
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0417149066925049,
      "learning_rate": 1.217758681960808e-05,
      "loss": 2.3944,
      "step": 33319
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.101876974105835,
      "learning_rate": 1.2177184961596842e-05,
      "loss": 2.2887,
      "step": 33320
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.9986953139305115,
      "learning_rate": 1.2176783099894685e-05,
      "loss": 2.2281,
      "step": 33321
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0154297351837158,
      "learning_rate": 1.2176381234502288e-05,
      "loss": 2.3002,
      "step": 33322
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0549397468566895,
      "learning_rate": 1.2175979365420336e-05,
      "loss": 2.2529,
      "step": 33323
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0683577060699463,
      "learning_rate": 1.2175577492649507e-05,
      "loss": 2.3617,
      "step": 33324
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.284794807434082,
      "learning_rate": 1.2175175616190482e-05,
      "loss": 2.4534,
      "step": 33325
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0391509532928467,
      "learning_rate": 1.2174773736043944e-05,
      "loss": 2.5019,
      "step": 33326
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0962618589401245,
      "learning_rate": 1.2174371852210574e-05,
      "loss": 2.2476,
      "step": 33327
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1172453165054321,
      "learning_rate": 1.2173969964691052e-05,
      "loss": 2.4976,
      "step": 33328
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0812478065490723,
      "learning_rate": 1.2173568073486063e-05,
      "loss": 2.3658,
      "step": 33329
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0423544645309448,
      "learning_rate": 1.2173166178596288e-05,
      "loss": 2.1663,
      "step": 33330
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1764943599700928,
      "learning_rate": 1.21727642800224e-05,
      "loss": 2.3738,
      "step": 33331
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1330510377883911,
      "learning_rate": 1.217236237776509e-05,
      "loss": 2.576,
      "step": 33332
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0541223287582397,
      "learning_rate": 1.2171960471825035e-05,
      "loss": 2.2526,
      "step": 33333
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0711466073989868,
      "learning_rate": 1.2171558562202917e-05,
      "loss": 2.401,
      "step": 33334
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1304993629455566,
      "learning_rate": 1.2171156648899418e-05,
      "loss": 2.3644,
      "step": 33335
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.052066683769226,
      "learning_rate": 1.217075473191522e-05,
      "loss": 2.3434,
      "step": 33336
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.247023105621338,
      "learning_rate": 1.2170352811251001e-05,
      "loss": 2.2415,
      "step": 33337
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1306582689285278,
      "learning_rate": 1.2169950886907447e-05,
      "loss": 2.2757,
      "step": 33338
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.171963095664978,
      "learning_rate": 1.2169548958885234e-05,
      "loss": 2.3957,
      "step": 33339
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.2893600463867188,
      "learning_rate": 1.2169147027185047e-05,
      "loss": 2.3262,
      "step": 33340
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1016391515731812,
      "learning_rate": 1.2168745091807566e-05,
      "loss": 2.4339,
      "step": 33341
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.2662603855133057,
      "learning_rate": 1.2168343152753472e-05,
      "loss": 2.3874,
      "step": 33342
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.9898585677146912,
      "learning_rate": 1.2167941210023454e-05,
      "loss": 2.3289,
      "step": 33343
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.3427314758300781,
      "learning_rate": 1.216753926361818e-05,
      "loss": 2.3479,
      "step": 33344
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.9014262557029724,
      "learning_rate": 1.216713731353834e-05,
      "loss": 2.4017,
      "step": 33345
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.027034044265747,
      "learning_rate": 1.2166735359784614e-05,
      "loss": 2.3801,
      "step": 33346
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.061912178993225,
      "learning_rate": 1.2166333402357685e-05,
      "loss": 2.3305,
      "step": 33347
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1388026475906372,
      "learning_rate": 1.2165931441258229e-05,
      "loss": 2.3456,
      "step": 33348
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1450051069259644,
      "learning_rate": 1.2165529476486934e-05,
      "loss": 2.3384,
      "step": 33349
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0973421335220337,
      "learning_rate": 1.2165127508044477e-05,
      "loss": 2.4821,
      "step": 33350
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.9988972544670105,
      "learning_rate": 1.2164725535931541e-05,
      "loss": 2.3776,
      "step": 33351
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1243340969085693,
      "learning_rate": 1.2164323560148808e-05,
      "loss": 2.4145,
      "step": 33352
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0033059120178223,
      "learning_rate": 1.2163921580696955e-05,
      "loss": 2.3496,
      "step": 33353
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1435117721557617,
      "learning_rate": 1.2163519597576672e-05,
      "loss": 2.2946,
      "step": 33354
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1529357433319092,
      "learning_rate": 1.2163117610788633e-05,
      "loss": 2.4933,
      "step": 33355
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.9938245415687561,
      "learning_rate": 1.2162715620333524e-05,
      "loss": 2.3571,
      "step": 33356
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.9963070750236511,
      "learning_rate": 1.2162313626212023e-05,
      "loss": 2.4587,
      "step": 33357
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.9636539220809937,
      "learning_rate": 1.2161911628424814e-05,
      "loss": 2.2432,
      "step": 33358
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0930815935134888,
      "learning_rate": 1.2161509626972576e-05,
      "loss": 2.6981,
      "step": 33359
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1475616693496704,
      "learning_rate": 1.2161107621855994e-05,
      "loss": 2.4604,
      "step": 33360
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1521860361099243,
      "learning_rate": 1.2160705613075748e-05,
      "loss": 2.4306,
      "step": 33361
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0403432846069336,
      "learning_rate": 1.2160303600632519e-05,
      "loss": 2.2677,
      "step": 33362
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0062276124954224,
      "learning_rate": 1.2159901584526988e-05,
      "loss": 2.4673,
      "step": 33363
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.2438112497329712,
      "learning_rate": 1.2159499564759836e-05,
      "loss": 2.4823,
      "step": 33364
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.141538143157959,
      "learning_rate": 1.2159097541331745e-05,
      "loss": 2.3934,
      "step": 33365
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.078600287437439,
      "learning_rate": 1.2158695514243403e-05,
      "loss": 2.1674,
      "step": 33366
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1116836071014404,
      "learning_rate": 1.2158293483495482e-05,
      "loss": 2.633,
      "step": 33367
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.13326895236969,
      "learning_rate": 1.2157891449088666e-05,
      "loss": 2.3297,
      "step": 33368
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1113320589065552,
      "learning_rate": 1.2157489411023642e-05,
      "loss": 2.4214,
      "step": 33369
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0162090063095093,
      "learning_rate": 1.2157087369301083e-05,
      "loss": 2.2784,
      "step": 33370
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0774939060211182,
      "learning_rate": 1.2156685323921678e-05,
      "loss": 2.2878,
      "step": 33371
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0762701034545898,
      "learning_rate": 1.2156283274886102e-05,
      "loss": 2.3505,
      "step": 33372
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0322632789611816,
      "learning_rate": 1.2155881222195043e-05,
      "loss": 2.2178,
      "step": 33373
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1309596300125122,
      "learning_rate": 1.215547916584918e-05,
      "loss": 2.4571,
      "step": 33374
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.062053918838501,
      "learning_rate": 1.2155077105849193e-05,
      "loss": 2.3479,
      "step": 33375
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1887673139572144,
      "learning_rate": 1.2154675042195767e-05,
      "loss": 2.3026,
      "step": 33376
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.209246277809143,
      "learning_rate": 1.215427297488958e-05,
      "loss": 2.384,
      "step": 33377
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0553251504898071,
      "learning_rate": 1.2153870903931315e-05,
      "loss": 2.2676,
      "step": 33378
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.207692265510559,
      "learning_rate": 1.2153468829321655e-05,
      "loss": 2.4456,
      "step": 33379
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0188789367675781,
      "learning_rate": 1.215306675106128e-05,
      "loss": 2.6959,
      "step": 33380
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0396769046783447,
      "learning_rate": 1.2152664669150871e-05,
      "loss": 2.3619,
      "step": 33381
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0370451211929321,
      "learning_rate": 1.2152262583591115e-05,
      "loss": 2.3954,
      "step": 33382
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0587221384048462,
      "learning_rate": 1.2151860494382684e-05,
      "loss": 2.5038,
      "step": 33383
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.212728500366211,
      "learning_rate": 1.2151458401526269e-05,
      "loss": 2.26,
      "step": 33384
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.046566367149353,
      "learning_rate": 1.2151056305022544e-05,
      "loss": 2.5724,
      "step": 33385
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0842221975326538,
      "learning_rate": 1.2150654204872194e-05,
      "loss": 2.3562,
      "step": 33386
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1685960292816162,
      "learning_rate": 1.2150252101075903e-05,
      "loss": 2.4338,
      "step": 33387
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.7134149074554443,
      "learning_rate": 1.2149849993634351e-05,
      "loss": 2.406,
      "step": 33388
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1218029260635376,
      "learning_rate": 1.2149447882548222e-05,
      "loss": 2.196,
      "step": 33389
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0532491207122803,
      "learning_rate": 1.214904576781819e-05,
      "loss": 2.3536,
      "step": 33390
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0842677354812622,
      "learning_rate": 1.2148643649444945e-05,
      "loss": 2.2262,
      "step": 33391
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1179783344268799,
      "learning_rate": 1.2148241527429163e-05,
      "loss": 2.1719,
      "step": 33392
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.170255422592163,
      "learning_rate": 1.2147839401771531e-05,
      "loss": 2.3739,
      "step": 33393
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.022621512413025,
      "learning_rate": 1.2147437272472728e-05,
      "loss": 2.1764,
      "step": 33394
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.186767339706421,
      "learning_rate": 1.2147035139533435e-05,
      "loss": 2.572,
      "step": 33395
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.095190167427063,
      "learning_rate": 1.214663300295433e-05,
      "loss": 2.3837,
      "step": 33396
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.9950138330459595,
      "learning_rate": 1.2146230862736106e-05,
      "loss": 2.3123,
      "step": 33397
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.3513044118881226,
      "learning_rate": 1.2145828718879434e-05,
      "loss": 2.2622,
      "step": 33398
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.267935037612915,
      "learning_rate": 1.2145426571384999e-05,
      "loss": 2.3447,
      "step": 33399
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1014541387557983,
      "learning_rate": 1.2145024420253485e-05,
      "loss": 2.3524,
      "step": 33400
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1620200872421265,
      "learning_rate": 1.2144622265485571e-05,
      "loss": 2.7465,
      "step": 33401
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0061362981796265,
      "learning_rate": 1.214422010708194e-05,
      "loss": 2.5578,
      "step": 33402
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0284419059753418,
      "learning_rate": 1.2143817945043272e-05,
      "loss": 2.2426,
      "step": 33403
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0658488273620605,
      "learning_rate": 1.2143415779370254e-05,
      "loss": 2.4378,
      "step": 33404
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0556073188781738,
      "learning_rate": 1.214301361006356e-05,
      "loss": 2.5585,
      "step": 33405
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.9953346848487854,
      "learning_rate": 1.2142611437123876e-05,
      "loss": 2.7301,
      "step": 33406
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0493738651275635,
      "learning_rate": 1.2142209260551887e-05,
      "loss": 2.587,
      "step": 33407
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.097224473953247,
      "learning_rate": 1.2141807080348268e-05,
      "loss": 2.3545,
      "step": 33408
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0224196910858154,
      "learning_rate": 1.2141404896513708e-05,
      "loss": 2.3407,
      "step": 33409
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.008284330368042,
      "learning_rate": 1.2141002709048883e-05,
      "loss": 2.2512,
      "step": 33410
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.9841108322143555,
      "learning_rate": 1.2140600517954478e-05,
      "loss": 2.4403,
      "step": 33411
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.050747036933899,
      "learning_rate": 1.2140198323231172e-05,
      "loss": 2.4733,
      "step": 33412
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1408395767211914,
      "learning_rate": 1.213979612487965e-05,
      "loss": 2.3543,
      "step": 33413
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0628547668457031,
      "learning_rate": 1.213939392290059e-05,
      "loss": 2.3875,
      "step": 33414
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0271825790405273,
      "learning_rate": 1.2138991717294682e-05,
      "loss": 2.1626,
      "step": 33415
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1010111570358276,
      "learning_rate": 1.2138589508062597e-05,
      "loss": 2.4725,
      "step": 33416
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.9608837366104126,
      "learning_rate": 1.2138187295205024e-05,
      "loss": 2.4416,
      "step": 33417
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.146304726600647,
      "learning_rate": 1.2137785078722642e-05,
      "loss": 2.3344,
      "step": 33418
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0812543630599976,
      "learning_rate": 1.2137382858616132e-05,
      "loss": 2.3596,
      "step": 33419
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0731291770935059,
      "learning_rate": 1.213698063488618e-05,
      "loss": 2.3117,
      "step": 33420
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0248465538024902,
      "learning_rate": 1.2136578407533463e-05,
      "loss": 2.3225,
      "step": 33421
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1040939092636108,
      "learning_rate": 1.213617617655867e-05,
      "loss": 2.3499,
      "step": 33422
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.631347417831421,
      "learning_rate": 1.2135773941962474e-05,
      "loss": 2.4628,
      "step": 33423
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.050112009048462,
      "learning_rate": 1.2135371703745562e-05,
      "loss": 2.4816,
      "step": 33424
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.207370638847351,
      "learning_rate": 1.2134969461908616e-05,
      "loss": 2.2719,
      "step": 33425
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.2357884645462036,
      "learning_rate": 1.2134567216452318e-05,
      "loss": 2.3516,
      "step": 33426
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1720186471939087,
      "learning_rate": 1.2134164967377347e-05,
      "loss": 2.5576,
      "step": 33427
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.2375584840774536,
      "learning_rate": 1.2133762714684388e-05,
      "loss": 2.2561,
      "step": 33428
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0677663087844849,
      "learning_rate": 1.213336045837412e-05,
      "loss": 2.1214,
      "step": 33429
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0991617441177368,
      "learning_rate": 1.2132958198447227e-05,
      "loss": 2.1394,
      "step": 33430
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0329198837280273,
      "learning_rate": 1.2132555934904389e-05,
      "loss": 2.3597,
      "step": 33431
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0096395015716553,
      "learning_rate": 1.2132153667746291e-05,
      "loss": 2.308,
      "step": 33432
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1852600574493408,
      "learning_rate": 1.2131751396973614e-05,
      "loss": 2.3677,
      "step": 33433
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.3805311918258667,
      "learning_rate": 1.2131349122587037e-05,
      "loss": 2.4261,
      "step": 33434
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.407425045967102,
      "learning_rate": 1.2130946844587248e-05,
      "loss": 2.3089,
      "step": 33435
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.9831238389015198,
      "learning_rate": 1.2130544562974925e-05,
      "loss": 2.3733,
      "step": 33436
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0240317583084106,
      "learning_rate": 1.2130142277750747e-05,
      "loss": 2.3998,
      "step": 33437
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.002849817276001,
      "learning_rate": 1.2129739988915401e-05,
      "loss": 2.368,
      "step": 33438
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1969777345657349,
      "learning_rate": 1.2129337696469567e-05,
      "loss": 2.2356,
      "step": 33439
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1139681339263916,
      "learning_rate": 1.2128935400413929e-05,
      "loss": 2.2974,
      "step": 33440
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1341097354888916,
      "learning_rate": 1.212853310074917e-05,
      "loss": 2.4045,
      "step": 33441
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0464485883712769,
      "learning_rate": 1.2128130797475964e-05,
      "loss": 2.1086,
      "step": 33442
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.02267587184906,
      "learning_rate": 1.2127728490594999e-05,
      "loss": 2.5892,
      "step": 33443
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.185289978981018,
      "learning_rate": 1.2127326180106954e-05,
      "loss": 2.5681,
      "step": 33444
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0119740962982178,
      "learning_rate": 1.2126923866012518e-05,
      "loss": 2.2883,
      "step": 33445
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0533324480056763,
      "learning_rate": 1.2126521548312366e-05,
      "loss": 2.2864,
      "step": 33446
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0362639427185059,
      "learning_rate": 1.2126119227007186e-05,
      "loss": 2.2958,
      "step": 33447
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0547491312026978,
      "learning_rate": 1.2125716902097653e-05,
      "loss": 2.4116,
      "step": 33448
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.2006322145462036,
      "learning_rate": 1.2125314573584453e-05,
      "loss": 2.4498,
      "step": 33449
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0535964965820312,
      "learning_rate": 1.2124912241468267e-05,
      "loss": 2.3267,
      "step": 33450
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.154739260673523,
      "learning_rate": 1.212450990574978e-05,
      "loss": 2.5409,
      "step": 33451
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1238404512405396,
      "learning_rate": 1.2124107566429669e-05,
      "loss": 2.4415,
      "step": 33452
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.020249366760254,
      "learning_rate": 1.2123705223508621e-05,
      "loss": 2.3586,
      "step": 33453
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.139281153678894,
      "learning_rate": 1.2123302876987315e-05,
      "loss": 2.3398,
      "step": 33454
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.9815453290939331,
      "learning_rate": 1.2122900526866432e-05,
      "loss": 2.3617,
      "step": 33455
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0390939712524414,
      "learning_rate": 1.2122498173146659e-05,
      "loss": 2.3295,
      "step": 33456
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1978203058242798,
      "learning_rate": 1.2122095815828674e-05,
      "loss": 2.3674,
      "step": 33457
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.9882477521896362,
      "learning_rate": 1.2121693454913161e-05,
      "loss": 2.2416,
      "step": 33458
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0866384506225586,
      "learning_rate": 1.21212910904008e-05,
      "loss": 2.3922,
      "step": 33459
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0758310556411743,
      "learning_rate": 1.2120888722292274e-05,
      "loss": 2.3899,
      "step": 33460
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0563007593154907,
      "learning_rate": 1.212048635058827e-05,
      "loss": 2.223,
      "step": 33461
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.347546935081482,
      "learning_rate": 1.2120083975289461e-05,
      "loss": 2.2616,
      "step": 33462
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.2450954914093018,
      "learning_rate": 1.2119681596396535e-05,
      "loss": 2.3467,
      "step": 33463
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0111907720565796,
      "learning_rate": 1.2119279213910172e-05,
      "loss": 2.2461,
      "step": 33464
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0592501163482666,
      "learning_rate": 1.2118876827831057e-05,
      "loss": 2.4329,
      "step": 33465
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1312555074691772,
      "learning_rate": 1.2118474438159869e-05,
      "loss": 2.474,
      "step": 33466
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0698301792144775,
      "learning_rate": 1.2118072044897292e-05,
      "loss": 2.5027,
      "step": 33467
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.2107980251312256,
      "learning_rate": 1.2117669648044008e-05,
      "loss": 2.4006,
      "step": 33468
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.04047429561615,
      "learning_rate": 1.21172672476007e-05,
      "loss": 2.364,
      "step": 33469
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0735151767730713,
      "learning_rate": 1.2116864843568046e-05,
      "loss": 2.4232,
      "step": 33470
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.2655448913574219,
      "learning_rate": 1.2116462435946732e-05,
      "loss": 2.2838,
      "step": 33471
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.9223051071166992,
      "learning_rate": 1.211606002473744e-05,
      "loss": 2.3303,
      "step": 33472
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1372112035751343,
      "learning_rate": 1.211565760994085e-05,
      "loss": 2.3353,
      "step": 33473
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0428358316421509,
      "learning_rate": 1.2115255191557649e-05,
      "loss": 2.3014,
      "step": 33474
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0158169269561768,
      "learning_rate": 1.2114852769588515e-05,
      "loss": 2.2015,
      "step": 33475
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0566486120224,
      "learning_rate": 1.211445034403413e-05,
      "loss": 2.1614,
      "step": 33476
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.9787248373031616,
      "learning_rate": 1.2114047914895175e-05,
      "loss": 2.2636,
      "step": 33477
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0336264371871948,
      "learning_rate": 1.2113645482172338e-05,
      "loss": 2.3082,
      "step": 33478
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1718679666519165,
      "learning_rate": 1.2113243045866296e-05,
      "loss": 2.353,
      "step": 33479
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1175920963287354,
      "learning_rate": 1.2112840605977736e-05,
      "loss": 2.3502,
      "step": 33480
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.144616961479187,
      "learning_rate": 1.2112438162507337e-05,
      "loss": 2.4845,
      "step": 33481
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.061221718788147,
      "learning_rate": 1.2112035715455779e-05,
      "loss": 2.6679,
      "step": 33482
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1288124322891235,
      "learning_rate": 1.2111633264823748e-05,
      "loss": 2.3798,
      "step": 33483
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.121694803237915,
      "learning_rate": 1.2111230810611927e-05,
      "loss": 2.4477,
      "step": 33484
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0617371797561646,
      "learning_rate": 1.2110828352820994e-05,
      "loss": 2.3324,
      "step": 33485
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.2304006814956665,
      "learning_rate": 1.2110425891451633e-05,
      "loss": 2.2859,
      "step": 33486
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.127516746520996,
      "learning_rate": 1.2110023426504529e-05,
      "loss": 2.4906,
      "step": 33487
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0945388078689575,
      "learning_rate": 1.2109620957980362e-05,
      "loss": 2.2515,
      "step": 33488
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0282185077667236,
      "learning_rate": 1.2109218485879815e-05,
      "loss": 2.3735,
      "step": 33489
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.11298668384552,
      "learning_rate": 1.2108816010203568e-05,
      "loss": 2.462,
      "step": 33490
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0394179821014404,
      "learning_rate": 1.2108413530952303e-05,
      "loss": 2.3323,
      "step": 33491
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0003043413162231,
      "learning_rate": 1.210801104812671e-05,
      "loss": 2.2103,
      "step": 33492
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.18790864944458,
      "learning_rate": 1.2107608561727462e-05,
      "loss": 2.3918,
      "step": 33493
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1525529623031616,
      "learning_rate": 1.2107206071755247e-05,
      "loss": 2.3432,
      "step": 33494
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0283986330032349,
      "learning_rate": 1.2106803578210743e-05,
      "loss": 2.4278,
      "step": 33495
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.2001304626464844,
      "learning_rate": 1.2106401081094637e-05,
      "loss": 2.3102,
      "step": 33496
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1782028675079346,
      "learning_rate": 1.2105998580407608e-05,
      "loss": 2.3222,
      "step": 33497
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.2539235353469849,
      "learning_rate": 1.210559607615034e-05,
      "loss": 2.197,
      "step": 33498
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0865113735198975,
      "learning_rate": 1.2105193568323512e-05,
      "loss": 2.5908,
      "step": 33499
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1521486043930054,
      "learning_rate": 1.2104791056927812e-05,
      "loss": 2.3237,
      "step": 33500
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0682119131088257,
      "learning_rate": 1.210438854196392e-05,
      "loss": 2.281,
      "step": 33501
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0178309679031372,
      "learning_rate": 1.2103986023432517e-05,
      "loss": 2.1941,
      "step": 33502
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1170971393585205,
      "learning_rate": 1.2103583501334286e-05,
      "loss": 2.3398,
      "step": 33503
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0504887104034424,
      "learning_rate": 1.210318097566991e-05,
      "loss": 2.2899,
      "step": 33504
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1654106378555298,
      "learning_rate": 1.2102778446440069e-05,
      "loss": 2.3732,
      "step": 33505
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.098554253578186,
      "learning_rate": 1.210237591364545e-05,
      "loss": 2.2944,
      "step": 33506
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.962016224861145,
      "learning_rate": 1.2101973377286733e-05,
      "loss": 2.3352,
      "step": 33507
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.044298529624939,
      "learning_rate": 1.21015708373646e-05,
      "loss": 2.4159,
      "step": 33508
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1339406967163086,
      "learning_rate": 1.2101168293879732e-05,
      "loss": 2.421,
      "step": 33509
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0081806182861328,
      "learning_rate": 1.2100765746832814e-05,
      "loss": 2.3714,
      "step": 33510
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.093031406402588,
      "learning_rate": 1.2100363196224527e-05,
      "loss": 2.2936,
      "step": 33511
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.283185601234436,
      "learning_rate": 1.2099960642055555e-05,
      "loss": 2.2081,
      "step": 33512
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.9935119152069092,
      "learning_rate": 1.2099558084326579e-05,
      "loss": 2.4767,
      "step": 33513
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0386472940444946,
      "learning_rate": 1.2099155523038282e-05,
      "loss": 2.5153,
      "step": 33514
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.02781081199646,
      "learning_rate": 1.2098752958191348e-05,
      "loss": 2.1938,
      "step": 33515
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0537176132202148,
      "learning_rate": 1.2098350389786455e-05,
      "loss": 2.4646,
      "step": 33516
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0487667322158813,
      "learning_rate": 1.209794781782429e-05,
      "loss": 2.425,
      "step": 33517
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.117140293121338,
      "learning_rate": 1.209754524230553e-05,
      "loss": 2.4403,
      "step": 33518
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.9788135886192322,
      "learning_rate": 1.2097142663230863e-05,
      "loss": 2.2773,
      "step": 33519
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0025115013122559,
      "learning_rate": 1.209674008060097e-05,
      "loss": 2.3435,
      "step": 33520
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.103621006011963,
      "learning_rate": 1.2096337494416534e-05,
      "loss": 2.2387,
      "step": 33521
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.994193971157074,
      "learning_rate": 1.2095934904678237e-05,
      "loss": 2.4536,
      "step": 33522
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.131516456604004,
      "learning_rate": 1.2095532311386759e-05,
      "loss": 2.3734,
      "step": 33523
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0220979452133179,
      "learning_rate": 1.2095129714542786e-05,
      "loss": 2.2216,
      "step": 33524
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1178107261657715,
      "learning_rate": 1.2094727114146999e-05,
      "loss": 2.2936,
      "step": 33525
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1073111295700073,
      "learning_rate": 1.209432451020008e-05,
      "loss": 2.5252,
      "step": 33526
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0741171836853027,
      "learning_rate": 1.2093921902702714e-05,
      "loss": 2.2009,
      "step": 33527
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0529069900512695,
      "learning_rate": 1.209351929165558e-05,
      "loss": 2.341,
      "step": 33528
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1055822372436523,
      "learning_rate": 1.2093116677059362e-05,
      "loss": 2.4213,
      "step": 33529
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.9887660145759583,
      "learning_rate": 1.2092714058914743e-05,
      "loss": 2.2822,
      "step": 33530
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1225755214691162,
      "learning_rate": 1.2092311437222403e-05,
      "loss": 2.3623,
      "step": 33531
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0657455921173096,
      "learning_rate": 1.2091908811983031e-05,
      "loss": 2.1779,
      "step": 33532
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0663657188415527,
      "learning_rate": 1.2091506183197303e-05,
      "loss": 2.2608,
      "step": 33533
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0515650510787964,
      "learning_rate": 1.2091103550865905e-05,
      "loss": 2.5736,
      "step": 33534
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.9740630984306335,
      "learning_rate": 1.2090700914989519e-05,
      "loss": 2.3876,
      "step": 33535
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1393474340438843,
      "learning_rate": 1.2090298275568827e-05,
      "loss": 2.352,
      "step": 33536
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0390833616256714,
      "learning_rate": 1.2089895632604509e-05,
      "loss": 2.4799,
      "step": 33537
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1001307964324951,
      "learning_rate": 1.2089492986097251e-05,
      "loss": 2.4826,
      "step": 33538
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4805752038955688,
      "learning_rate": 1.2089090336047737e-05,
      "loss": 2.0324,
      "step": 33539
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1568108797073364,
      "learning_rate": 1.2088687682456647e-05,
      "loss": 2.5384,
      "step": 33540
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1656051874160767,
      "learning_rate": 1.2088285025324664e-05,
      "loss": 2.4386,
      "step": 33541
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1163078546524048,
      "learning_rate": 1.208788236465247e-05,
      "loss": 2.3053,
      "step": 33542
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.41110098361969,
      "learning_rate": 1.2087479700440751e-05,
      "loss": 2.4758,
      "step": 33543
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.085824728012085,
      "learning_rate": 1.2087077032690185e-05,
      "loss": 2.4268,
      "step": 33544
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0505759716033936,
      "learning_rate": 1.2086674361401457e-05,
      "loss": 2.4103,
      "step": 33545
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.2197495698928833,
      "learning_rate": 1.2086271686575248e-05,
      "loss": 2.4576,
      "step": 33546
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.2489441633224487,
      "learning_rate": 1.2085869008212243e-05,
      "loss": 2.3868,
      "step": 33547
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0918657779693604,
      "learning_rate": 1.2085466326313122e-05,
      "loss": 2.3442,
      "step": 33548
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.034095048904419,
      "learning_rate": 1.2085063640878571e-05,
      "loss": 2.4678,
      "step": 33549
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.9596139192581177,
      "learning_rate": 1.2084660951909269e-05,
      "loss": 2.4569,
      "step": 33550
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0623222589492798,
      "learning_rate": 1.20842582594059e-05,
      "loss": 2.5614,
      "step": 33551
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4096535444259644,
      "learning_rate": 1.2083855563369148e-05,
      "loss": 2.286,
      "step": 33552
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1677879095077515,
      "learning_rate": 1.2083452863799696e-05,
      "loss": 2.2609,
      "step": 33553
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.014116883277893,
      "learning_rate": 1.2083050160698228e-05,
      "loss": 2.1551,
      "step": 33554
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0103727579116821,
      "learning_rate": 1.2082647454065422e-05,
      "loss": 2.3893,
      "step": 33555
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0991098880767822,
      "learning_rate": 1.208224474390196e-05,
      "loss": 2.2765,
      "step": 33556
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.983863115310669,
      "learning_rate": 1.208184203020853e-05,
      "loss": 2.3962,
      "step": 33557
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0216401815414429,
      "learning_rate": 1.2081439312985812e-05,
      "loss": 2.3851,
      "step": 33558
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0734610557556152,
      "learning_rate": 1.2081036592234488e-05,
      "loss": 2.2534,
      "step": 33559
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.001597285270691,
      "learning_rate": 1.2080633867955245e-05,
      "loss": 2.2991,
      "step": 33560
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0235562324523926,
      "learning_rate": 1.2080231140148762e-05,
      "loss": 2.2383,
      "step": 33561
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.107562780380249,
      "learning_rate": 1.207982840881572e-05,
      "loss": 2.4622,
      "step": 33562
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0610204935073853,
      "learning_rate": 1.2079425673956804e-05,
      "loss": 2.4318,
      "step": 33563
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0686763525009155,
      "learning_rate": 1.2079022935572696e-05,
      "loss": 2.4743,
      "step": 33564
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0489158630371094,
      "learning_rate": 1.2078620193664081e-05,
      "loss": 2.312,
      "step": 33565
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0911219120025635,
      "learning_rate": 1.207821744823164e-05,
      "loss": 2.4783,
      "step": 33566
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.2591437101364136,
      "learning_rate": 1.2077814699276059e-05,
      "loss": 2.3328,
      "step": 33567
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0421112775802612,
      "learning_rate": 1.2077411946798011e-05,
      "loss": 2.3321,
      "step": 33568
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.9077491164207458,
      "learning_rate": 1.207700919079819e-05,
      "loss": 2.2244,
      "step": 33569
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0945554971694946,
      "learning_rate": 1.2076606431277273e-05,
      "loss": 2.3051,
      "step": 33570
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.126741647720337,
      "learning_rate": 1.2076203668235949e-05,
      "loss": 2.3626,
      "step": 33571
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0677040815353394,
      "learning_rate": 1.207580090167489e-05,
      "loss": 2.5827,
      "step": 33572
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1698389053344727,
      "learning_rate": 1.2075398131594787e-05,
      "loss": 2.2138,
      "step": 33573
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0528382062911987,
      "learning_rate": 1.2074995357996322e-05,
      "loss": 2.5838,
      "step": 33574
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1527522802352905,
      "learning_rate": 1.2074592580880173e-05,
      "loss": 2.2441,
      "step": 33575
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.9804710745811462,
      "learning_rate": 1.2074189800247027e-05,
      "loss": 2.3735,
      "step": 33576
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.05475652217865,
      "learning_rate": 1.2073787016097565e-05,
      "loss": 2.4749,
      "step": 33577
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.049694299697876,
      "learning_rate": 1.2073384228432472e-05,
      "loss": 2.2381,
      "step": 33578
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0369385480880737,
      "learning_rate": 1.2072981437252428e-05,
      "loss": 2.3092,
      "step": 33579
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.053073763847351,
      "learning_rate": 1.2072578642558122e-05,
      "loss": 2.3382,
      "step": 33580
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.9867405295372009,
      "learning_rate": 1.2072175844350229e-05,
      "loss": 2.417,
      "step": 33581
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1159210205078125,
      "learning_rate": 1.2071773042629435e-05,
      "loss": 2.4723,
      "step": 33582
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1228816509246826,
      "learning_rate": 1.2071370237396425e-05,
      "loss": 2.4147,
      "step": 33583
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.2201297283172607,
      "learning_rate": 1.2070967428651877e-05,
      "loss": 2.3421,
      "step": 33584
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.292975664138794,
      "learning_rate": 1.2070564616396478e-05,
      "loss": 2.3396,
      "step": 33585
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0882160663604736,
      "learning_rate": 1.207016180063091e-05,
      "loss": 2.2474,
      "step": 33586
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0437077283859253,
      "learning_rate": 1.2069758981355857e-05,
      "loss": 2.5144,
      "step": 33587
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0016601085662842,
      "learning_rate": 1.2069356158571996e-05,
      "loss": 2.4053,
      "step": 33588
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0614566802978516,
      "learning_rate": 1.2068953332280018e-05,
      "loss": 2.3348,
      "step": 33589
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1836458444595337,
      "learning_rate": 1.20685505024806e-05,
      "loss": 2.4612,
      "step": 33590
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0735477209091187,
      "learning_rate": 1.2068147669174428e-05,
      "loss": 2.3372,
      "step": 33591
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5976266860961914,
      "learning_rate": 1.2067744832362184e-05,
      "loss": 2.504,
      "step": 33592
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.2100762128829956,
      "learning_rate": 1.2067341992044555e-05,
      "loss": 2.2669,
      "step": 33593
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1282225847244263,
      "learning_rate": 1.2066939148222214e-05,
      "loss": 2.4285,
      "step": 33594
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0713529586791992,
      "learning_rate": 1.2066536300895853e-05,
      "loss": 2.4505,
      "step": 33595
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0800256729125977,
      "learning_rate": 1.2066133450066149e-05,
      "loss": 2.4161,
      "step": 33596
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.088098406791687,
      "learning_rate": 1.2065730595733789e-05,
      "loss": 2.3736,
      "step": 33597
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1880314350128174,
      "learning_rate": 1.2065327737899452e-05,
      "loss": 2.4887,
      "step": 33598
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1428059339523315,
      "learning_rate": 1.206492487656383e-05,
      "loss": 2.5568,
      "step": 33599
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.10273015499115,
      "learning_rate": 1.2064522011727596e-05,
      "loss": 2.3909,
      "step": 33600
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1965610980987549,
      "learning_rate": 1.2064119143391435e-05,
      "loss": 2.2754,
      "step": 33601
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0732320547103882,
      "learning_rate": 1.2063716271556034e-05,
      "loss": 2.3324,
      "step": 33602
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0408693552017212,
      "learning_rate": 1.2063313396222071e-05,
      "loss": 2.2149,
      "step": 33603
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0042731761932373,
      "learning_rate": 1.2062910517390233e-05,
      "loss": 2.4409,
      "step": 33604
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0568263530731201,
      "learning_rate": 1.20625076350612e-05,
      "loss": 2.3924,
      "step": 33605
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.2147213220596313,
      "learning_rate": 1.2062104749235658e-05,
      "loss": 2.4207,
      "step": 33606
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1006444692611694,
      "learning_rate": 1.206170185991429e-05,
      "loss": 2.3325,
      "step": 33607
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.2939993143081665,
      "learning_rate": 1.2061298967097774e-05,
      "loss": 2.2767,
      "step": 33608
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.020634651184082,
      "learning_rate": 1.2060896070786797e-05,
      "loss": 2.4155,
      "step": 33609
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0921398401260376,
      "learning_rate": 1.2060493170982042e-05,
      "loss": 2.2629,
      "step": 33610
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0558031797409058,
      "learning_rate": 1.206009026768419e-05,
      "loss": 2.3969,
      "step": 33611
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1183149814605713,
      "learning_rate": 1.2059687360893929e-05,
      "loss": 2.3089,
      "step": 33612
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.9876852631568909,
      "learning_rate": 1.2059284450611937e-05,
      "loss": 2.3998,
      "step": 33613
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.3762502670288086,
      "learning_rate": 1.2058881536838896e-05,
      "loss": 2.654,
      "step": 33614
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0947000980377197,
      "learning_rate": 1.2058478619575493e-05,
      "loss": 2.4752,
      "step": 33615
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.135947585105896,
      "learning_rate": 1.2058075698822411e-05,
      "loss": 2.3413,
      "step": 33616
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1153857707977295,
      "learning_rate": 1.205767277458033e-05,
      "loss": 2.5501,
      "step": 33617
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1191294193267822,
      "learning_rate": 1.2057269846849935e-05,
      "loss": 2.384,
      "step": 33618
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.2264858484268188,
      "learning_rate": 1.2056866915631913e-05,
      "loss": 2.24,
      "step": 33619
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.9928768277168274,
      "learning_rate": 1.2056463980926938e-05,
      "loss": 2.3539,
      "step": 33620
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.9418440461158752,
      "learning_rate": 1.2056061042735699e-05,
      "loss": 2.1746,
      "step": 33621
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1350775957107544,
      "learning_rate": 1.2055658101058878e-05,
      "loss": 2.4389,
      "step": 33622
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0832487344741821,
      "learning_rate": 1.205525515589716e-05,
      "loss": 2.569,
      "step": 33623
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.2296653985977173,
      "learning_rate": 1.2054852207251224e-05,
      "loss": 2.3757,
      "step": 33624
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1443591117858887,
      "learning_rate": 1.2054449255121757e-05,
      "loss": 2.3331,
      "step": 33625
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0152560472488403,
      "learning_rate": 1.2054046299509442e-05,
      "loss": 2.4145,
      "step": 33626
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0704363584518433,
      "learning_rate": 1.2053643340414957e-05,
      "loss": 2.3569,
      "step": 33627
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1247502565383911,
      "learning_rate": 1.205324037783899e-05,
      "loss": 2.6831,
      "step": 33628
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0699174404144287,
      "learning_rate": 1.2052837411782223e-05,
      "loss": 2.41,
      "step": 33629
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.2244515419006348,
      "learning_rate": 1.2052434442245342e-05,
      "loss": 2.4264,
      "step": 33630
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.9239156246185303,
      "learning_rate": 1.2052031469229025e-05,
      "loss": 2.2965,
      "step": 33631
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0158655643463135,
      "learning_rate": 1.2051628492733956e-05,
      "loss": 2.3896,
      "step": 33632
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.028573989868164,
      "learning_rate": 1.2051225512760823e-05,
      "loss": 2.3202,
      "step": 33633
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0497353076934814,
      "learning_rate": 1.2050822529310303e-05,
      "loss": 2.528,
      "step": 33634
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1555100679397583,
      "learning_rate": 1.205041954238308e-05,
      "loss": 2.4385,
      "step": 33635
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0387051105499268,
      "learning_rate": 1.2050016551979842e-05,
      "loss": 2.4401,
      "step": 33636
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0720961093902588,
      "learning_rate": 1.2049613558101267e-05,
      "loss": 2.3066,
      "step": 33637
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.9728649258613586,
      "learning_rate": 1.2049210560748041e-05,
      "loss": 2.2312,
      "step": 33638
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1636345386505127,
      "learning_rate": 1.204880755992085e-05,
      "loss": 2.4242,
      "step": 33639
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1537619829177856,
      "learning_rate": 1.204840455562037e-05,
      "loss": 2.4689,
      "step": 33640
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.9927242994308472,
      "learning_rate": 1.2048001547847288e-05,
      "loss": 2.1447,
      "step": 33641
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0846080780029297,
      "learning_rate": 1.2047598536602288e-05,
      "loss": 2.5386,
      "step": 33642
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.9995962381362915,
      "learning_rate": 1.204719552188605e-05,
      "loss": 2.4057,
      "step": 33643
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1056684255599976,
      "learning_rate": 1.2046792503699263e-05,
      "loss": 2.396,
      "step": 33644
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.072312593460083,
      "learning_rate": 1.2046389482042603e-05,
      "loss": 2.3916,
      "step": 33645
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1039451360702515,
      "learning_rate": 1.2045986456916763e-05,
      "loss": 2.3631,
      "step": 33646
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0650426149368286,
      "learning_rate": 1.2045583428322415e-05,
      "loss": 2.4524,
      "step": 33647
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0911856889724731,
      "learning_rate": 1.2045180396260249e-05,
      "loss": 2.5764,
      "step": 33648
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1297627687454224,
      "learning_rate": 1.2044777360730948e-05,
      "loss": 2.3063,
      "step": 33649
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.08339262008667,
      "learning_rate": 1.2044374321735192e-05,
      "loss": 2.4767,
      "step": 33650
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0806037187576294,
      "learning_rate": 1.204397127927367e-05,
      "loss": 2.2578,
      "step": 33651
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.053672432899475,
      "learning_rate": 1.204356823334706e-05,
      "loss": 2.3081,
      "step": 33652
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0217801332473755,
      "learning_rate": 1.2043165183956045e-05,
      "loss": 2.5087,
      "step": 33653
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0412209033966064,
      "learning_rate": 1.204276213110131e-05,
      "loss": 2.4724,
      "step": 33654
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.9641330242156982,
      "learning_rate": 1.204235907478354e-05,
      "loss": 2.3909,
      "step": 33655
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.9450984597206116,
      "learning_rate": 1.2041956015003416e-05,
      "loss": 2.2041,
      "step": 33656
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1020984649658203,
      "learning_rate": 1.204155295176162e-05,
      "loss": 2.3418,
      "step": 33657
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0547518730163574,
      "learning_rate": 1.2041149885058843e-05,
      "loss": 2.3605,
      "step": 33658
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1619852781295776,
      "learning_rate": 1.2040746814895759e-05,
      "loss": 2.4252,
      "step": 33659
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.2269902229309082,
      "learning_rate": 1.2040343741273054e-05,
      "loss": 2.2765,
      "step": 33660
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0148797035217285,
      "learning_rate": 1.2039940664191415e-05,
      "loss": 2.2266,
      "step": 33661
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.2540810108184814,
      "learning_rate": 1.203953758365152e-05,
      "loss": 2.3388,
      "step": 33662
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.2034183740615845,
      "learning_rate": 1.2039134499654052e-05,
      "loss": 2.3674,
      "step": 33663
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.2388514280319214,
      "learning_rate": 1.2038731412199702e-05,
      "loss": 2.264,
      "step": 33664
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.2833462953567505,
      "learning_rate": 1.2038328321289146e-05,
      "loss": 2.0085,
      "step": 33665
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0123000144958496,
      "learning_rate": 1.2037925226923073e-05,
      "loss": 2.51,
      "step": 33666
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.088594675064087,
      "learning_rate": 1.2037522129102161e-05,
      "loss": 2.4037,
      "step": 33667
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0653102397918701,
      "learning_rate": 1.2037119027827096e-05,
      "loss": 2.4658,
      "step": 33668
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.9815487265586853,
      "learning_rate": 1.2036715923098558e-05,
      "loss": 2.2533,
      "step": 33669
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1903733015060425,
      "learning_rate": 1.2036312814917237e-05,
      "loss": 2.441,
      "step": 33670
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0106059312820435,
      "learning_rate": 1.2035909703283813e-05,
      "loss": 2.1519,
      "step": 33671
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.9262506365776062,
      "learning_rate": 1.2035506588198969e-05,
      "loss": 2.2312,
      "step": 33672
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.136640191078186,
      "learning_rate": 1.2035103469663387e-05,
      "loss": 2.2781,
      "step": 33673
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.074737787246704,
      "learning_rate": 1.2034700347677753e-05,
      "loss": 2.3714,
      "step": 33674
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0104198455810547,
      "learning_rate": 1.2034297222242747e-05,
      "loss": 2.2319,
      "step": 33675
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1318796873092651,
      "learning_rate": 1.2033894093359055e-05,
      "loss": 2.4128,
      "step": 33676
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1421904563903809,
      "learning_rate": 1.2033490961027363e-05,
      "loss": 2.2344,
      "step": 33677
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.9322051405906677,
      "learning_rate": 1.2033087825248349e-05,
      "loss": 2.4512,
      "step": 33678
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0247251987457275,
      "learning_rate": 1.2032684686022703e-05,
      "loss": 2.2499,
      "step": 33679
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.992150068283081,
      "learning_rate": 1.2032281543351098e-05,
      "loss": 2.3009,
      "step": 33680
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0799274444580078,
      "learning_rate": 1.203187839723423e-05,
      "loss": 2.3887,
      "step": 33681
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0799694061279297,
      "learning_rate": 1.2031475247672772e-05,
      "loss": 2.3626,
      "step": 33682
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1244386434555054,
      "learning_rate": 1.2031072094667413e-05,
      "loss": 2.4001,
      "step": 33683
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.9945262670516968,
      "learning_rate": 1.2030668938218835e-05,
      "loss": 2.1823,
      "step": 33684
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0691338777542114,
      "learning_rate": 1.2030265778327723e-05,
      "loss": 2.2348,
      "step": 33685
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1675242185592651,
      "learning_rate": 1.2029862614994759e-05,
      "loss": 2.1326,
      "step": 33686
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1033707857131958,
      "learning_rate": 1.2029459448220623e-05,
      "loss": 2.365,
      "step": 33687
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.147472858428955,
      "learning_rate": 1.2029056278006004e-05,
      "loss": 2.3781,
      "step": 33688
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1628961563110352,
      "learning_rate": 1.2028653104351586e-05,
      "loss": 2.5188,
      "step": 33689
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.9955759048461914,
      "learning_rate": 1.2028249927258048e-05,
      "loss": 2.2164,
      "step": 33690
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0428874492645264,
      "learning_rate": 1.2027846746726075e-05,
      "loss": 2.2149,
      "step": 33691
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0070523023605347,
      "learning_rate": 1.2027443562756354e-05,
      "loss": 2.4596,
      "step": 33692
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0697609186172485,
      "learning_rate": 1.2027040375349564e-05,
      "loss": 2.3241,
      "step": 33693
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1693990230560303,
      "learning_rate": 1.2026637184506389e-05,
      "loss": 2.4992,
      "step": 33694
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.080431342124939,
      "learning_rate": 1.2026233990227513e-05,
      "loss": 2.3466,
      "step": 33695
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.045339584350586,
      "learning_rate": 1.202583079251362e-05,
      "loss": 2.2727,
      "step": 33696
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0968077182769775,
      "learning_rate": 1.2025427591365396e-05,
      "loss": 2.5247,
      "step": 33697
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.9802754521369934,
      "learning_rate": 1.2025024386783521e-05,
      "loss": 2.3901,
      "step": 33698
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0498685836791992,
      "learning_rate": 1.2024621178768678e-05,
      "loss": 2.4518,
      "step": 33699
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0124589204788208,
      "learning_rate": 1.2024217967321554e-05,
      "loss": 2.2509,
      "step": 33700
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.2539647817611694,
      "learning_rate": 1.202381475244283e-05,
      "loss": 1.943,
      "step": 33701
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0496236085891724,
      "learning_rate": 1.2023411534133189e-05,
      "loss": 2.4036,
      "step": 33702
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0882920026779175,
      "learning_rate": 1.2023008312393318e-05,
      "loss": 2.1983,
      "step": 33703
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.2182198762893677,
      "learning_rate": 1.2022605087223898e-05,
      "loss": 2.4962,
      "step": 33704
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1148990392684937,
      "learning_rate": 1.2022201858625616e-05,
      "loss": 2.2825,
      "step": 33705
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0789624452590942,
      "learning_rate": 1.2021798626599148e-05,
      "loss": 2.3368,
      "step": 33706
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.183579444885254,
      "learning_rate": 1.2021395391145183e-05,
      "loss": 2.3319,
      "step": 33707
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0283026695251465,
      "learning_rate": 1.2020992152264403e-05,
      "loss": 2.5669,
      "step": 33708
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0777326822280884,
      "learning_rate": 1.2020588909957495e-05,
      "loss": 2.576,
      "step": 33709
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.072684645652771,
      "learning_rate": 1.2020185664225139e-05,
      "loss": 2.3172,
      "step": 33710
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.072603464126587,
      "learning_rate": 1.201978241506802e-05,
      "loss": 2.3881,
      "step": 33711
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.9972795844078064,
      "learning_rate": 1.2019379162486821e-05,
      "loss": 2.2875,
      "step": 33712
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.00489342212677,
      "learning_rate": 1.2018975906482227e-05,
      "loss": 2.4062,
      "step": 33713
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0474509000778198,
      "learning_rate": 1.2018572647054918e-05,
      "loss": 2.4166,
      "step": 33714
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.9877877235412598,
      "learning_rate": 1.201816938420558e-05,
      "loss": 2.5004,
      "step": 33715
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.9926819801330566,
      "learning_rate": 1.2017766117934898e-05,
      "loss": 2.229,
      "step": 33716
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0750906467437744,
      "learning_rate": 1.2017362848243556e-05,
      "loss": 2.4747,
      "step": 33717
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1843534708023071,
      "learning_rate": 1.2016959575132234e-05,
      "loss": 2.3692,
      "step": 33718
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.2883374691009521,
      "learning_rate": 1.201655629860162e-05,
      "loss": 2.4133,
      "step": 33719
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.209298849105835,
      "learning_rate": 1.2016153018652394e-05,
      "loss": 2.2891,
      "step": 33720
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.135682463645935,
      "learning_rate": 1.201574973528524e-05,
      "loss": 2.4114,
      "step": 33721
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.149232268333435,
      "learning_rate": 1.2015346448500845e-05,
      "loss": 2.1876,
      "step": 33722
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.320223331451416,
      "learning_rate": 1.2014943158299887e-05,
      "loss": 2.3894,
      "step": 33723
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.9556058645248413,
      "learning_rate": 1.2014539864683057e-05,
      "loss": 2.2908,
      "step": 33724
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.139384388923645,
      "learning_rate": 1.2014136567651033e-05,
      "loss": 2.2945,
      "step": 33725
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0452706813812256,
      "learning_rate": 1.2013733267204502e-05,
      "loss": 2.3819,
      "step": 33726
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0746315717697144,
      "learning_rate": 1.2013329963344142e-05,
      "loss": 2.3595,
      "step": 33727
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.204290747642517,
      "learning_rate": 1.2012926656070642e-05,
      "loss": 2.4996,
      "step": 33728
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5221378803253174,
      "learning_rate": 1.2012523345384688e-05,
      "loss": 2.3547,
      "step": 33729
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0522637367248535,
      "learning_rate": 1.2012120031286959e-05,
      "loss": 2.242,
      "step": 33730
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.9830602407455444,
      "learning_rate": 1.2011716713778138e-05,
      "loss": 2.4636,
      "step": 33731
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.9810318350791931,
      "learning_rate": 1.2011313392858914e-05,
      "loss": 2.1575,
      "step": 33732
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0826473236083984,
      "learning_rate": 1.2010910068529964e-05,
      "loss": 2.4404,
      "step": 33733
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.2238863706588745,
      "learning_rate": 1.2010506740791979e-05,
      "loss": 2.2227,
      "step": 33734
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.9951692819595337,
      "learning_rate": 1.2010103409645635e-05,
      "loss": 2.4109,
      "step": 33735
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0872280597686768,
      "learning_rate": 1.2009700075091622e-05,
      "loss": 2.3801,
      "step": 33736
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.9798367619514465,
      "learning_rate": 1.200929673713062e-05,
      "loss": 2.4399,
      "step": 33737
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.08696448802948,
      "learning_rate": 1.200889339576332e-05,
      "loss": 2.2111,
      "step": 33738
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0620484352111816,
      "learning_rate": 1.2008490050990394e-05,
      "loss": 2.4514,
      "step": 33739
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.2466908693313599,
      "learning_rate": 1.2008086702812533e-05,
      "loss": 2.3335,
      "step": 33740
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0222513675689697,
      "learning_rate": 1.200768335123042e-05,
      "loss": 2.4436,
      "step": 33741
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.08052396774292,
      "learning_rate": 1.2007279996244738e-05,
      "loss": 2.4083,
      "step": 33742
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0057508945465088,
      "learning_rate": 1.200687663785617e-05,
      "loss": 2.3408,
      "step": 33743
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.144540786743164,
      "learning_rate": 1.2006473276065404e-05,
      "loss": 2.1969,
      "step": 33744
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1526910066604614,
      "learning_rate": 1.2006069910873121e-05,
      "loss": 2.1487,
      "step": 33745
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.2090699672698975,
      "learning_rate": 1.2005666542280003e-05,
      "loss": 2.4305,
      "step": 33746
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0596539974212646,
      "learning_rate": 1.2005263170286737e-05,
      "loss": 2.5837,
      "step": 33747
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5387517213821411,
      "learning_rate": 1.2004859794894006e-05,
      "loss": 2.3107,
      "step": 33748
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1082011461257935,
      "learning_rate": 1.2004456416102492e-05,
      "loss": 2.3676,
      "step": 33749
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.260719895362854,
      "learning_rate": 1.2004053033912879e-05,
      "loss": 2.2233,
      "step": 33750
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1391226053237915,
      "learning_rate": 1.2003649648325852e-05,
      "loss": 2.3534,
      "step": 33751
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0274196863174438,
      "learning_rate": 1.2003246259342096e-05,
      "loss": 2.2213,
      "step": 33752
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.9604017734527588,
      "learning_rate": 1.2002842866962292e-05,
      "loss": 2.3667,
      "step": 33753
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.9833151698112488,
      "learning_rate": 1.2002439471187126e-05,
      "loss": 2.4337,
      "step": 33754
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.2548249959945679,
      "learning_rate": 1.200203607201728e-05,
      "loss": 2.3793,
      "step": 33755
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0754603147506714,
      "learning_rate": 1.200163266945344e-05,
      "loss": 2.4907,
      "step": 33756
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.019476056098938,
      "learning_rate": 1.2001229263496287e-05,
      "loss": 2.5746,
      "step": 33757
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0737301111221313,
      "learning_rate": 1.2000825854146512e-05,
      "loss": 2.4241,
      "step": 33758
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.2297931909561157,
      "learning_rate": 1.2000422441404791e-05,
      "loss": 2.2546,
      "step": 33759
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1407535076141357,
      "learning_rate": 1.200001902527181e-05,
      "loss": 2.504,
      "step": 33760
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.045629858970642,
      "learning_rate": 1.1999615605748254e-05,
      "loss": 2.5348,
      "step": 33761
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.15392005443573,
      "learning_rate": 1.1999212182834806e-05,
      "loss": 2.3799,
      "step": 33762
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.069261908531189,
      "learning_rate": 1.199880875653215e-05,
      "loss": 2.4504,
      "step": 33763
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0562399625778198,
      "learning_rate": 1.1998405326840972e-05,
      "loss": 2.0072,
      "step": 33764
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1308931112289429,
      "learning_rate": 1.1998001893761955e-05,
      "loss": 2.1532,
      "step": 33765
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1922955513000488,
      "learning_rate": 1.1997598457295778e-05,
      "loss": 2.3852,
      "step": 33766
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.108853816986084,
      "learning_rate": 1.1997195017443133e-05,
      "loss": 2.4639,
      "step": 33767
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1094088554382324,
      "learning_rate": 1.1996791574204698e-05,
      "loss": 2.513,
      "step": 33768
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0765953063964844,
      "learning_rate": 1.1996388127581158e-05,
      "loss": 2.2168,
      "step": 33769
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1382358074188232,
      "learning_rate": 1.1995984677573201e-05,
      "loss": 2.3592,
      "step": 33770
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.2475463151931763,
      "learning_rate": 1.1995581224181505e-05,
      "loss": 2.3978,
      "step": 33771
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.069270133972168,
      "learning_rate": 1.1995177767406758e-05,
      "loss": 2.3847,
      "step": 33772
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0624456405639648,
      "learning_rate": 1.1994774307249644e-05,
      "loss": 2.2693,
      "step": 33773
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0270897150039673,
      "learning_rate": 1.1994370843710843e-05,
      "loss": 2.0598,
      "step": 33774
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5454365015029907,
      "learning_rate": 1.1993967376791045e-05,
      "loss": 2.291,
      "step": 33775
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.131145715713501,
      "learning_rate": 1.199356390649093e-05,
      "loss": 2.2779,
      "step": 33776
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0123428106307983,
      "learning_rate": 1.1993160432811183e-05,
      "loss": 2.1844,
      "step": 33777
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0027612447738647,
      "learning_rate": 1.1992756955752485e-05,
      "loss": 2.2458,
      "step": 33778
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0495854616165161,
      "learning_rate": 1.1992353475315525e-05,
      "loss": 2.2627,
      "step": 33779
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.069509506225586,
      "learning_rate": 1.1991949991500985e-05,
      "loss": 2.4512,
      "step": 33780
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1790651082992554,
      "learning_rate": 1.1991546504309545e-05,
      "loss": 2.2924,
      "step": 33781
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1326143741607666,
      "learning_rate": 1.1991143013741897e-05,
      "loss": 2.3368,
      "step": 33782
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0327606201171875,
      "learning_rate": 1.199073951979872e-05,
      "loss": 2.2539,
      "step": 33783
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1536179780960083,
      "learning_rate": 1.19903360224807e-05,
      "loss": 2.4735,
      "step": 33784
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0119891166687012,
      "learning_rate": 1.1989932521788517e-05,
      "loss": 2.2997,
      "step": 33785
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0682613849639893,
      "learning_rate": 1.1989529017722859e-05,
      "loss": 2.3424,
      "step": 33786
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1642943620681763,
      "learning_rate": 1.198912551028441e-05,
      "loss": 2.3125,
      "step": 33787
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1017240285873413,
      "learning_rate": 1.1988721999473851e-05,
      "loss": 2.4176,
      "step": 33788
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.029012680053711,
      "learning_rate": 1.1988318485291868e-05,
      "loss": 2.3654,
      "step": 33789
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1678078174591064,
      "learning_rate": 1.1987914967739148e-05,
      "loss": 2.309,
      "step": 33790
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.269956111907959,
      "learning_rate": 1.1987511446816371e-05,
      "loss": 2.5361,
      "step": 33791
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.2559418678283691,
      "learning_rate": 1.1987107922524222e-05,
      "loss": 2.252,
      "step": 33792
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.3694583177566528,
      "learning_rate": 1.1986704394863384e-05,
      "loss": 2.534,
      "step": 33793
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.112143874168396,
      "learning_rate": 1.1986300863834543e-05,
      "loss": 2.2445,
      "step": 33794
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.432496428489685,
      "learning_rate": 1.1985897329438383e-05,
      "loss": 2.2681,
      "step": 33795
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.09746515750885,
      "learning_rate": 1.1985493791675587e-05,
      "loss": 2.5092,
      "step": 33796
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0390335321426392,
      "learning_rate": 1.1985090250546843e-05,
      "loss": 2.669,
      "step": 33797
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0967401266098022,
      "learning_rate": 1.1984686706052828e-05,
      "loss": 2.5022,
      "step": 33798
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1077200174331665,
      "learning_rate": 1.1984283158194231e-05,
      "loss": 2.4673,
      "step": 33799
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1402145624160767,
      "learning_rate": 1.1983879606971738e-05,
      "loss": 2.4496,
      "step": 33800
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4426349401474,
      "learning_rate": 1.1983476052386026e-05,
      "loss": 2.2813,
      "step": 33801
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0827176570892334,
      "learning_rate": 1.1983072494437784e-05,
      "loss": 2.508,
      "step": 33802
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0122324228286743,
      "learning_rate": 1.1982668933127697e-05,
      "loss": 2.4217,
      "step": 33803
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0659443140029907,
      "learning_rate": 1.198226536845645e-05,
      "loss": 2.3983,
      "step": 33804
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.04108726978302,
      "learning_rate": 1.1981861800424722e-05,
      "loss": 2.3436,
      "step": 33805
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.9588260054588318,
      "learning_rate": 1.19814582290332e-05,
      "loss": 2.3573,
      "step": 33806
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.9769517779350281,
      "learning_rate": 1.1981054654282569e-05,
      "loss": 2.2842,
      "step": 33807
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0250605344772339,
      "learning_rate": 1.1980651076173513e-05,
      "loss": 2.5304,
      "step": 33808
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0535944700241089,
      "learning_rate": 1.1980247494706714e-05,
      "loss": 2.4692,
      "step": 33809
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0227034091949463,
      "learning_rate": 1.1979843909882857e-05,
      "loss": 2.1835,
      "step": 33810
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.09077787399292,
      "learning_rate": 1.1979440321702629e-05,
      "loss": 2.0996,
      "step": 33811
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1218020915985107,
      "learning_rate": 1.197903673016671e-05,
      "loss": 2.5263,
      "step": 33812
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1129088401794434,
      "learning_rate": 1.1978633135275788e-05,
      "loss": 2.4894,
      "step": 33813
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0345585346221924,
      "learning_rate": 1.1978229537030543e-05,
      "loss": 2.2871,
      "step": 33814
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0445122718811035,
      "learning_rate": 1.1977825935431665e-05,
      "loss": 2.4215,
      "step": 33815
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1596006155014038,
      "learning_rate": 1.1977422330479832e-05,
      "loss": 2.265,
      "step": 33816
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.033289909362793,
      "learning_rate": 1.1977018722175732e-05,
      "loss": 2.5621,
      "step": 33817
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1715248823165894,
      "learning_rate": 1.1976615110520048e-05,
      "loss": 2.0659,
      "step": 33818
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0632665157318115,
      "learning_rate": 1.1976211495513465e-05,
      "loss": 2.2688,
      "step": 33819
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0448006391525269,
      "learning_rate": 1.1975807877156667e-05,
      "loss": 2.2131,
      "step": 33820
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0738009214401245,
      "learning_rate": 1.1975404255450337e-05,
      "loss": 2.5677,
      "step": 33821
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.9746202826499939,
      "learning_rate": 1.1975000630395162e-05,
      "loss": 2.4543,
      "step": 33822
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.162937879562378,
      "learning_rate": 1.1974597001991824e-05,
      "loss": 2.4417,
      "step": 33823
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0581902265548706,
      "learning_rate": 1.1974193370241007e-05,
      "loss": 2.2547,
      "step": 33824
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.6417220830917358,
      "learning_rate": 1.1973789735143397e-05,
      "loss": 2.3179,
      "step": 33825
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0026220083236694,
      "learning_rate": 1.1973386096699678e-05,
      "loss": 2.3643,
      "step": 33826
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.025944471359253,
      "learning_rate": 1.1972982454910531e-05,
      "loss": 2.271,
      "step": 33827
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.031158447265625,
      "learning_rate": 1.1972578809776643e-05,
      "loss": 2.4789,
      "step": 33828
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1734254360198975,
      "learning_rate": 1.19721751612987e-05,
      "loss": 2.2643,
      "step": 33829
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.083528757095337,
      "learning_rate": 1.1971771509477385e-05,
      "loss": 2.3231,
      "step": 33830
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.2158846855163574,
      "learning_rate": 1.197136785431338e-05,
      "loss": 2.2466,
      "step": 33831
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0338057279586792,
      "learning_rate": 1.197096419580737e-05,
      "loss": 2.2332,
      "step": 33832
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0553882122039795,
      "learning_rate": 1.1970560533960043e-05,
      "loss": 2.2645,
      "step": 33833
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0494112968444824,
      "learning_rate": 1.197015686877208e-05,
      "loss": 2.4724,
      "step": 33834
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.2711431980133057,
      "learning_rate": 1.1969753200244167e-05,
      "loss": 2.4172,
      "step": 33835
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0398808717727661,
      "learning_rate": 1.1969349528376987e-05,
      "loss": 2.6047,
      "step": 33836
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.9875132441520691,
      "learning_rate": 1.1968945853171224e-05,
      "loss": 2.4166,
      "step": 33837
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.014085054397583,
      "learning_rate": 1.1968542174627564e-05,
      "loss": 2.213,
      "step": 33838
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1280611753463745,
      "learning_rate": 1.1968138492746688e-05,
      "loss": 2.5124,
      "step": 33839
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1128038167953491,
      "learning_rate": 1.1967734807529286e-05,
      "loss": 2.3754,
      "step": 33840
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.279247522354126,
      "learning_rate": 1.1967331118976036e-05,
      "loss": 2.2427,
      "step": 33841
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1540335416793823,
      "learning_rate": 1.1966927427087625e-05,
      "loss": 2.2413,
      "step": 33842
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0181552171707153,
      "learning_rate": 1.1966523731864741e-05,
      "loss": 2.2549,
      "step": 33843
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0673136711120605,
      "learning_rate": 1.1966120033308065e-05,
      "loss": 2.1646,
      "step": 33844
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0444769859313965,
      "learning_rate": 1.1965716331418278e-05,
      "loss": 2.2442,
      "step": 33845
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.28599214553833,
      "learning_rate": 1.1965312626196069e-05,
      "loss": 2.1718,
      "step": 33846
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0148273706436157,
      "learning_rate": 1.1964908917642122e-05,
      "loss": 2.5354,
      "step": 33847
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.9877902269363403,
      "learning_rate": 1.196450520575712e-05,
      "loss": 2.3239,
      "step": 33848
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1884781122207642,
      "learning_rate": 1.1964101490541751e-05,
      "loss": 2.3975,
      "step": 33849
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1731213331222534,
      "learning_rate": 1.1963697771996694e-05,
      "loss": 2.5619,
      "step": 33850
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0920360088348389,
      "learning_rate": 1.1963294050122637e-05,
      "loss": 2.489,
      "step": 33851
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1476622819900513,
      "learning_rate": 1.1962890324920263e-05,
      "loss": 2.3626,
      "step": 33852
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1152063608169556,
      "learning_rate": 1.1962486596390256e-05,
      "loss": 2.5508,
      "step": 33853
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1411619186401367,
      "learning_rate": 1.1962082864533303e-05,
      "loss": 2.3562,
      "step": 33854
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0456140041351318,
      "learning_rate": 1.1961679129350083e-05,
      "loss": 2.2495,
      "step": 33855
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.270534873008728,
      "learning_rate": 1.1961275390841287e-05,
      "loss": 2.3792,
      "step": 33856
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0254945755004883,
      "learning_rate": 1.19608716490076e-05,
      "loss": 2.1893,
      "step": 33857
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.137638807296753,
      "learning_rate": 1.1960467903849698e-05,
      "loss": 2.5659,
      "step": 33858
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.2549588680267334,
      "learning_rate": 1.196006415536827e-05,
      "loss": 2.5014,
      "step": 33859
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.060965895652771,
      "learning_rate": 1.1959660403564002e-05,
      "loss": 2.6181,
      "step": 33860
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1778942346572876,
      "learning_rate": 1.1959256648437578e-05,
      "loss": 2.1796,
      "step": 33861
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.9272847771644592,
      "learning_rate": 1.1958852889989683e-05,
      "loss": 2.4715,
      "step": 33862
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.9743599891662598,
      "learning_rate": 1.1958449128221e-05,
      "loss": 2.5014,
      "step": 33863
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.9571967124938965,
      "learning_rate": 1.1958045363132214e-05,
      "loss": 2.5021,
      "step": 33864
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.065063714981079,
      "learning_rate": 1.1957641594724005e-05,
      "loss": 2.5148,
      "step": 33865
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0656870603561401,
      "learning_rate": 1.1957237822997067e-05,
      "loss": 2.2396,
      "step": 33866
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.008413553237915,
      "learning_rate": 1.1956834047952077e-05,
      "loss": 2.1464,
      "step": 33867
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0635311603546143,
      "learning_rate": 1.1956430269589723e-05,
      "loss": 2.3175,
      "step": 33868
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.3128399848937988,
      "learning_rate": 1.1956026487910688e-05,
      "loss": 2.2915,
      "step": 33869
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1159508228302002,
      "learning_rate": 1.1955622702915656e-05,
      "loss": 2.5044,
      "step": 33870
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.051390528678894,
      "learning_rate": 1.1955218914605314e-05,
      "loss": 2.2516,
      "step": 33871
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0665339231491089,
      "learning_rate": 1.1954815122980343e-05,
      "loss": 2.5817,
      "step": 33872
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1870625019073486,
      "learning_rate": 1.195441132804143e-05,
      "loss": 2.4934,
      "step": 33873
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0906777381896973,
      "learning_rate": 1.1954007529789258e-05,
      "loss": 2.2361,
      "step": 33874
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1031416654586792,
      "learning_rate": 1.1953603728224515e-05,
      "loss": 2.2515,
      "step": 33875
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.3295718431472778,
      "learning_rate": 1.1953199923347883e-05,
      "loss": 2.4226,
      "step": 33876
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1693576574325562,
      "learning_rate": 1.1952796115160044e-05,
      "loss": 2.3376,
      "step": 33877
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.074857234954834,
      "learning_rate": 1.1952392303661687e-05,
      "loss": 2.2949,
      "step": 33878
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1277896165847778,
      "learning_rate": 1.1951988488853494e-05,
      "loss": 2.3979,
      "step": 33879
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.9899374842643738,
      "learning_rate": 1.195158467073615e-05,
      "loss": 2.4022,
      "step": 33880
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1415225267410278,
      "learning_rate": 1.195118084931034e-05,
      "loss": 2.433,
      "step": 33881
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0520918369293213,
      "learning_rate": 1.195077702457675e-05,
      "loss": 2.5127,
      "step": 33882
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0545443296432495,
      "learning_rate": 1.1950373196536063e-05,
      "loss": 2.3526,
      "step": 33883
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.135528802871704,
      "learning_rate": 1.1949969365188963e-05,
      "loss": 2.1325,
      "step": 33884
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4187127351760864,
      "learning_rate": 1.1949565530536136e-05,
      "loss": 2.2237,
      "step": 33885
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0412460565567017,
      "learning_rate": 1.1949161692578264e-05,
      "loss": 2.4575,
      "step": 33886
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1869721412658691,
      "learning_rate": 1.1948757851316036e-05,
      "loss": 2.2997,
      "step": 33887
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0335161685943604,
      "learning_rate": 1.194835400675013e-05,
      "loss": 2.3048,
      "step": 33888
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0070511102676392,
      "learning_rate": 1.1947950158881239e-05,
      "loss": 2.5059,
      "step": 33889
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0447386503219604,
      "learning_rate": 1.1947546307710042e-05,
      "loss": 2.233,
      "step": 33890
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0064164400100708,
      "learning_rate": 1.1947142453237226e-05,
      "loss": 2.2934,
      "step": 33891
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0845757722854614,
      "learning_rate": 1.1946738595463473e-05,
      "loss": 2.5011,
      "step": 33892
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.9259108304977417,
      "learning_rate": 1.194633473438947e-05,
      "loss": 2.354,
      "step": 33893
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1204224824905396,
      "learning_rate": 1.1945930870015905e-05,
      "loss": 2.4688,
      "step": 33894
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1398597955703735,
      "learning_rate": 1.1945527002343454e-05,
      "loss": 2.4716,
      "step": 33895
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.137478232383728,
      "learning_rate": 1.1945123131372806e-05,
      "loss": 2.3553,
      "step": 33896
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.174529790878296,
      "learning_rate": 1.1944719257104652e-05,
      "loss": 2.2599,
      "step": 33897
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4190616607666016,
      "learning_rate": 1.1944315379539664e-05,
      "loss": 2.2811,
      "step": 33898
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0618524551391602,
      "learning_rate": 1.1943911498678536e-05,
      "loss": 2.4424,
      "step": 33899
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.075728416442871,
      "learning_rate": 1.1943507614521947e-05,
      "loss": 2.3117,
      "step": 33900
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.9603826999664307,
      "learning_rate": 1.1943103727070589e-05,
      "loss": 2.3689,
      "step": 33901
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1091841459274292,
      "learning_rate": 1.194269983632514e-05,
      "loss": 2.3966,
      "step": 33902
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.073276400566101,
      "learning_rate": 1.1942295942286289e-05,
      "loss": 2.4948,
      "step": 33903
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0390609502792358,
      "learning_rate": 1.1941892044954717e-05,
      "loss": 2.5364,
      "step": 33904
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1474055051803589,
      "learning_rate": 1.194148814433111e-05,
      "loss": 2.19,
      "step": 33905
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.2562204599380493,
      "learning_rate": 1.1941084240416154e-05,
      "loss": 2.3209,
      "step": 33906
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.9847552180290222,
      "learning_rate": 1.1940680333210535e-05,
      "loss": 2.3516,
      "step": 33907
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.9830526113510132,
      "learning_rate": 1.1940276422714934e-05,
      "loss": 2.4419,
      "step": 33908
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.7631819248199463,
      "learning_rate": 1.193987250893004e-05,
      "loss": 2.3983,
      "step": 33909
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1156100034713745,
      "learning_rate": 1.193946859185653e-05,
      "loss": 2.3872,
      "step": 33910
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.033332109451294,
      "learning_rate": 1.1939064671495097e-05,
      "loss": 2.3993,
      "step": 33911
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1553415060043335,
      "learning_rate": 1.1938660747846425e-05,
      "loss": 2.4888,
      "step": 33912
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.2164863348007202,
      "learning_rate": 1.1938256820911192e-05,
      "loss": 2.4207,
      "step": 33913
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.092542052268982,
      "learning_rate": 1.193785289069009e-05,
      "loss": 2.3621,
      "step": 33914
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.06222403049469,
      "learning_rate": 1.19374489571838e-05,
      "loss": 2.3401,
      "step": 33915
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0537996292114258,
      "learning_rate": 1.193704502039301e-05,
      "loss": 2.2817,
      "step": 33916
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.117319107055664,
      "learning_rate": 1.19366410803184e-05,
      "loss": 2.5395,
      "step": 33917
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0173659324645996,
      "learning_rate": 1.1936237136960658e-05,
      "loss": 2.5669,
      "step": 33918
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.003061294555664,
      "learning_rate": 1.1935833190320469e-05,
      "loss": 2.1309,
      "step": 33919
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1135776042938232,
      "learning_rate": 1.1935429240398515e-05,
      "loss": 2.5832,
      "step": 33920
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0103501081466675,
      "learning_rate": 1.1935025287195486e-05,
      "loss": 2.486,
      "step": 33921
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0314583778381348,
      "learning_rate": 1.1934621330712061e-05,
      "loss": 2.3709,
      "step": 33922
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.065922737121582,
      "learning_rate": 1.1934217370948928e-05,
      "loss": 2.437,
      "step": 33923
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1134709119796753,
      "learning_rate": 1.193381340790677e-05,
      "loss": 2.5693,
      "step": 33924
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1744107007980347,
      "learning_rate": 1.193340944158628e-05,
      "loss": 2.6092,
      "step": 33925
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1004607677459717,
      "learning_rate": 1.1933005471988128e-05,
      "loss": 2.1278,
      "step": 33926
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.9683863520622253,
      "learning_rate": 1.193260149911301e-05,
      "loss": 2.5421,
      "step": 33927
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.9601937532424927,
      "learning_rate": 1.1932197522961607e-05,
      "loss": 2.2239,
      "step": 33928
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0449943542480469,
      "learning_rate": 1.1931793543534604e-05,
      "loss": 2.3583,
      "step": 33929
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.019243597984314,
      "learning_rate": 1.1931389560832687e-05,
      "loss": 2.4653,
      "step": 33930
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0607365369796753,
      "learning_rate": 1.193098557485654e-05,
      "loss": 2.4399,
      "step": 33931
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.003440022468567,
      "learning_rate": 1.1930581585606847e-05,
      "loss": 2.3596,
      "step": 33932
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0490868091583252,
      "learning_rate": 1.1930177593084293e-05,
      "loss": 2.6034,
      "step": 33933
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1757233142852783,
      "learning_rate": 1.1929773597289565e-05,
      "loss": 2.3906,
      "step": 33934
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0611516237258911,
      "learning_rate": 1.1929369598223346e-05,
      "loss": 2.2122,
      "step": 33935
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1069245338439941,
      "learning_rate": 1.1928965595886326e-05,
      "loss": 2.4278,
      "step": 33936
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.023103952407837,
      "learning_rate": 1.192856159027918e-05,
      "loss": 2.5102,
      "step": 33937
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0363305807113647,
      "learning_rate": 1.19281575814026e-05,
      "loss": 2.3224,
      "step": 33938
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.105225682258606,
      "learning_rate": 1.192775356925727e-05,
      "loss": 2.2702,
      "step": 33939
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.9910565614700317,
      "learning_rate": 1.1927349553843872e-05,
      "loss": 2.3345,
      "step": 33940
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.09854257106781,
      "learning_rate": 1.1926945535163094e-05,
      "loss": 2.2466,
      "step": 33941
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0296947956085205,
      "learning_rate": 1.1926541513215622e-05,
      "loss": 2.3461,
      "step": 33942
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1486692428588867,
      "learning_rate": 1.1926137488002136e-05,
      "loss": 2.304,
      "step": 33943
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0627659559249878,
      "learning_rate": 1.1925733459523326e-05,
      "loss": 2.2964,
      "step": 33944
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.9482988715171814,
      "learning_rate": 1.1925329427779875e-05,
      "loss": 2.3553,
      "step": 33945
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1300582885742188,
      "learning_rate": 1.1924925392772466e-05,
      "loss": 2.3366,
      "step": 33946
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1081736087799072,
      "learning_rate": 1.1924521354501786e-05,
      "loss": 2.2457,
      "step": 33947
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.01386559009552,
      "learning_rate": 1.192411731296852e-05,
      "loss": 2.5055,
      "step": 33948
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.2011566162109375,
      "learning_rate": 1.1923713268173353e-05,
      "loss": 2.3045,
      "step": 33949
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.2901238203048706,
      "learning_rate": 1.1923309220116969e-05,
      "loss": 2.4032,
      "step": 33950
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.2292267084121704,
      "learning_rate": 1.1922905168800052e-05,
      "loss": 2.1546,
      "step": 33951
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.9051681756973267,
      "learning_rate": 1.192250111422329e-05,
      "loss": 2.3352,
      "step": 33952
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0629026889801025,
      "learning_rate": 1.192209705638737e-05,
      "loss": 2.6191,
      "step": 33953
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.023821234703064,
      "learning_rate": 1.1921692995292969e-05,
      "loss": 2.1071,
      "step": 33954
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0526213645935059,
      "learning_rate": 1.1921288930940776e-05,
      "loss": 2.3756,
      "step": 33955
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.230609655380249,
      "learning_rate": 1.1920884863331482e-05,
      "loss": 2.5092,
      "step": 33956
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1819500923156738,
      "learning_rate": 1.192048079246576e-05,
      "loss": 2.5423,
      "step": 33957
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.2110750675201416,
      "learning_rate": 1.1920076718344303e-05,
      "loss": 2.3472,
      "step": 33958
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0877153873443604,
      "learning_rate": 1.1919672640967794e-05,
      "loss": 2.389,
      "step": 33959
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1036213636398315,
      "learning_rate": 1.191926856033692e-05,
      "loss": 2.3611,
      "step": 33960
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1570634841918945,
      "learning_rate": 1.1918864476452364e-05,
      "loss": 2.3696,
      "step": 33961
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0593169927597046,
      "learning_rate": 1.1918460389314815e-05,
      "loss": 2.2888,
      "step": 33962
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0869864225387573,
      "learning_rate": 1.1918056298924949e-05,
      "loss": 2.3555,
      "step": 33963
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1055887937545776,
      "learning_rate": 1.1917652205283459e-05,
      "loss": 2.5478,
      "step": 33964
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.01589834690094,
      "learning_rate": 1.1917248108391028e-05,
      "loss": 2.2424,
      "step": 33965
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.026799201965332,
      "learning_rate": 1.191684400824834e-05,
      "loss": 2.3387,
      "step": 33966
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0034387111663818,
      "learning_rate": 1.1916439904856082e-05,
      "loss": 2.1137,
      "step": 33967
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.172398567199707,
      "learning_rate": 1.1916035798214935e-05,
      "loss": 2.2429,
      "step": 33968
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1251639127731323,
      "learning_rate": 1.1915631688325591e-05,
      "loss": 2.3277,
      "step": 33969
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.2388395071029663,
      "learning_rate": 1.191522757518873e-05,
      "loss": 2.2201,
      "step": 33970
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0608240365982056,
      "learning_rate": 1.1914823458805036e-05,
      "loss": 2.5281,
      "step": 33971
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0796552896499634,
      "learning_rate": 1.1914419339175197e-05,
      "loss": 2.3507,
      "step": 33972
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0148195028305054,
      "learning_rate": 1.1914015216299896e-05,
      "loss": 2.0635,
      "step": 33973
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0259811878204346,
      "learning_rate": 1.1913611090179823e-05,
      "loss": 2.33,
      "step": 33974
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0301785469055176,
      "learning_rate": 1.1913206960815657e-05,
      "loss": 2.3645,
      "step": 33975
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.3033450841903687,
      "learning_rate": 1.1912802828208088e-05,
      "loss": 2.5616,
      "step": 33976
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.006521224975586,
      "learning_rate": 1.1912398692357793e-05,
      "loss": 2.4496,
      "step": 33977
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0451728105545044,
      "learning_rate": 1.1911994553265469e-05,
      "loss": 2.4425,
      "step": 33978
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.126288890838623,
      "learning_rate": 1.191159041093179e-05,
      "loss": 2.4317,
      "step": 33979
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0372387170791626,
      "learning_rate": 1.191118626535745e-05,
      "loss": 2.2525,
      "step": 33980
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0612071752548218,
      "learning_rate": 1.191078211654313e-05,
      "loss": 2.439,
      "step": 33981
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0473450422286987,
      "learning_rate": 1.1910377964489514e-05,
      "loss": 2.5283,
      "step": 33982
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1103367805480957,
      "learning_rate": 1.1909973809197289e-05,
      "loss": 2.3434,
      "step": 33983
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0626819133758545,
      "learning_rate": 1.1909569650667141e-05,
      "loss": 2.3372,
      "step": 33984
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1194380521774292,
      "learning_rate": 1.190916548889975e-05,
      "loss": 2.2368,
      "step": 33985
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.9953709244728088,
      "learning_rate": 1.1908761323895807e-05,
      "loss": 2.4334,
      "step": 33986
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0492128133773804,
      "learning_rate": 1.1908357155655996e-05,
      "loss": 2.2916,
      "step": 33987
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0715512037277222,
      "learning_rate": 1.1907952984181002e-05,
      "loss": 2.4015,
      "step": 33988
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0487792491912842,
      "learning_rate": 1.1907548809471507e-05,
      "loss": 2.3061,
      "step": 33989
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.080772042274475,
      "learning_rate": 1.1907144631528201e-05,
      "loss": 2.4077,
      "step": 33990
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.097332239151001,
      "learning_rate": 1.1906740450351763e-05,
      "loss": 2.2434,
      "step": 33991
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0668914318084717,
      "learning_rate": 1.1906336265942885e-05,
      "loss": 2.2694,
      "step": 33992
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1219587326049805,
      "learning_rate": 1.190593207830225e-05,
      "loss": 2.5395,
      "step": 33993
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0872536897659302,
      "learning_rate": 1.1905527887430539e-05,
      "loss": 2.5619,
      "step": 33994
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0571728944778442,
      "learning_rate": 1.1905123693328446e-05,
      "loss": 2.3499,
      "step": 33995
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.9753775000572205,
      "learning_rate": 1.1904719495996648e-05,
      "loss": 2.351,
      "step": 33996
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.9618228673934937,
      "learning_rate": 1.190431529543583e-05,
      "loss": 2.3261,
      "step": 33997
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.981885552406311,
      "learning_rate": 1.1903911091646684e-05,
      "loss": 2.5815,
      "step": 33998
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.002017855644226,
      "learning_rate": 1.1903506884629892e-05,
      "loss": 2.3396,
      "step": 33999
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0446574687957764,
      "learning_rate": 1.1903102674386138e-05,
      "loss": 2.4067,
      "step": 34000
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0064209699630737,
      "learning_rate": 1.1902698460916107e-05,
      "loss": 2.2691,
      "step": 34001
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.075266718864441,
      "learning_rate": 1.190229424422049e-05,
      "loss": 2.348,
      "step": 34002
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1194769144058228,
      "learning_rate": 1.1901890024299962e-05,
      "loss": 2.3382,
      "step": 34003
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0981560945510864,
      "learning_rate": 1.1901485801155215e-05,
      "loss": 2.4522,
      "step": 34004
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1092997789382935,
      "learning_rate": 1.1901081574786934e-05,
      "loss": 2.3599,
      "step": 34005
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0125205516815186,
      "learning_rate": 1.1900677345195804e-05,
      "loss": 2.5465,
      "step": 34006
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1231958866119385,
      "learning_rate": 1.1900273112382508e-05,
      "loss": 2.4865,
      "step": 34007
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0953550338745117,
      "learning_rate": 1.1899868876347736e-05,
      "loss": 2.359,
      "step": 34008
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.123394250869751,
      "learning_rate": 1.1899464637092169e-05,
      "loss": 2.382,
      "step": 34009
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0225083827972412,
      "learning_rate": 1.189906039461649e-05,
      "loss": 2.365,
      "step": 34010
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0524237155914307,
      "learning_rate": 1.1898656148921392e-05,
      "loss": 2.4751,
      "step": 34011
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0203313827514648,
      "learning_rate": 1.1898251900007555e-05,
      "loss": 2.379,
      "step": 34012
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.136082410812378,
      "learning_rate": 1.1897847647875667e-05,
      "loss": 2.3062,
      "step": 34013
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0663025379180908,
      "learning_rate": 1.189744339252641e-05,
      "loss": 2.5171,
      "step": 34014
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1040229797363281,
      "learning_rate": 1.1897039133960471e-05,
      "loss": 2.4089,
      "step": 34015
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.2161695957183838,
      "learning_rate": 1.1896634872178537e-05,
      "loss": 2.6314,
      "step": 34016
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.9926769137382507,
      "learning_rate": 1.1896230607181289e-05,
      "loss": 2.1532,
      "step": 34017
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0611432790756226,
      "learning_rate": 1.1895826338969416e-05,
      "loss": 2.4072,
      "step": 34018
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.057975172996521,
      "learning_rate": 1.1895422067543603e-05,
      "loss": 2.5405,
      "step": 34019
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0380010604858398,
      "learning_rate": 1.1895017792904535e-05,
      "loss": 2.4136,
      "step": 34020
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0024163722991943,
      "learning_rate": 1.18946135150529e-05,
      "loss": 2.4374,
      "step": 34021
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.981118381023407,
      "learning_rate": 1.1894209233989375e-05,
      "loss": 2.3966,
      "step": 34022
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0838277339935303,
      "learning_rate": 1.1893804949714654e-05,
      "loss": 2.4562,
      "step": 34023
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.031681776046753,
      "learning_rate": 1.1893400662229417e-05,
      "loss": 2.2718,
      "step": 34024
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0558743476867676,
      "learning_rate": 1.1892996371534353e-05,
      "loss": 2.4035,
      "step": 34025
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.6156123876571655,
      "learning_rate": 1.1892592077630148e-05,
      "loss": 2.3864,
      "step": 34026
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.9783244729042053,
      "learning_rate": 1.1892187780517482e-05,
      "loss": 2.3271,
      "step": 34027
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1390448808670044,
      "learning_rate": 1.1891783480197047e-05,
      "loss": 2.2157,
      "step": 34028
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.051567554473877,
      "learning_rate": 1.1891379176669525e-05,
      "loss": 2.2882,
      "step": 34029
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.9764256477355957,
      "learning_rate": 1.1890974869935599e-05,
      "loss": 2.5098,
      "step": 34030
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0524311065673828,
      "learning_rate": 1.1890570559995957e-05,
      "loss": 2.294,
      "step": 34031
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.006443738937378,
      "learning_rate": 1.1890166246851287e-05,
      "loss": 2.3605,
      "step": 34032
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0546451807022095,
      "learning_rate": 1.1889761930502268e-05,
      "loss": 2.3466,
      "step": 34033
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0332797765731812,
      "learning_rate": 1.1889357610949592e-05,
      "loss": 2.5983,
      "step": 34034
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0751042366027832,
      "learning_rate": 1.1888953288193945e-05,
      "loss": 2.5615,
      "step": 34035
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0745412111282349,
      "learning_rate": 1.1888548962236004e-05,
      "loss": 2.3733,
      "step": 34036
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1993151903152466,
      "learning_rate": 1.1888144633076461e-05,
      "loss": 2.3972,
      "step": 34037
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0717191696166992,
      "learning_rate": 1.1887740300716e-05,
      "loss": 2.3491,
      "step": 34038
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.9670485258102417,
      "learning_rate": 1.1887335965155306e-05,
      "loss": 2.2526,
      "step": 34039
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1414870023727417,
      "learning_rate": 1.1886931626395069e-05,
      "loss": 2.1883,
      "step": 34040
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0889476537704468,
      "learning_rate": 1.1886527284435967e-05,
      "loss": 2.455,
      "step": 34041
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.009647011756897,
      "learning_rate": 1.1886122939278687e-05,
      "loss": 2.4005,
      "step": 34042
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.046614646911621,
      "learning_rate": 1.1885718590923921e-05,
      "loss": 2.0946,
      "step": 34043
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1333224773406982,
      "learning_rate": 1.1885314239372347e-05,
      "loss": 2.2504,
      "step": 34044
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.071977972984314,
      "learning_rate": 1.1884909884624654e-05,
      "loss": 2.1014,
      "step": 34045
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0688796043395996,
      "learning_rate": 1.1884505526681527e-05,
      "loss": 2.2721,
      "step": 34046
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1564091444015503,
      "learning_rate": 1.188410116554365e-05,
      "loss": 2.4679,
      "step": 34047
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0594618320465088,
      "learning_rate": 1.188369680121171e-05,
      "loss": 2.2849,
      "step": 34048
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.9482966065406799,
      "learning_rate": 1.1883292433686393e-05,
      "loss": 2.439,
      "step": 34049
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0264216661453247,
      "learning_rate": 1.1882888062968383e-05,
      "loss": 2.1643,
      "step": 34050
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0042651891708374,
      "learning_rate": 1.1882483689058369e-05,
      "loss": 2.5346,
      "step": 34051
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.044356346130371,
      "learning_rate": 1.188207931195703e-05,
      "loss": 2.5344,
      "step": 34052
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.029600977897644,
      "learning_rate": 1.1881674931665056e-05,
      "loss": 2.2192,
      "step": 34053
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0249961614608765,
      "learning_rate": 1.1881270548183137e-05,
      "loss": 2.34,
      "step": 34054
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.9156482219696045,
      "learning_rate": 1.1880866161511948e-05,
      "loss": 2.2858,
      "step": 34055
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0510778427124023,
      "learning_rate": 1.1880461771652181e-05,
      "loss": 2.646,
      "step": 34056
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0697448253631592,
      "learning_rate": 1.188005737860452e-05,
      "loss": 2.5555,
      "step": 34057
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1134860515594482,
      "learning_rate": 1.1879652982369654e-05,
      "loss": 2.4859,
      "step": 34058
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.114288091659546,
      "learning_rate": 1.1879248582948264e-05,
      "loss": 2.4564,
      "step": 34059
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0223588943481445,
      "learning_rate": 1.1878844180341035e-05,
      "loss": 2.3068,
      "step": 34060
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0765328407287598,
      "learning_rate": 1.187843977454866e-05,
      "loss": 2.2609,
      "step": 34061
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0453100204467773,
      "learning_rate": 1.1878035365571817e-05,
      "loss": 2.3021,
      "step": 34062
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0491549968719482,
      "learning_rate": 1.1877630953411192e-05,
      "loss": 2.2881,
      "step": 34063
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0393246412277222,
      "learning_rate": 1.1877226538067475e-05,
      "loss": 2.3338,
      "step": 34064
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1866285800933838,
      "learning_rate": 1.1876822119541346e-05,
      "loss": 2.406,
      "step": 34065
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.068671703338623,
      "learning_rate": 1.1876417697833498e-05,
      "loss": 2.4099,
      "step": 34066
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.9524410963058472,
      "learning_rate": 1.1876013272944609e-05,
      "loss": 2.3254,
      "step": 34067
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1634079217910767,
      "learning_rate": 1.1875608844875372e-05,
      "loss": 2.3567,
      "step": 34068
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.9745323061943054,
      "learning_rate": 1.1875204413626466e-05,
      "loss": 2.4565,
      "step": 34069
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.007389783859253,
      "learning_rate": 1.1874799979198579e-05,
      "loss": 2.5214,
      "step": 34070
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1368705034255981,
      "learning_rate": 1.1874395541592398e-05,
      "loss": 2.5277,
      "step": 34071
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0386606454849243,
      "learning_rate": 1.1873991100808607e-05,
      "loss": 2.3438,
      "step": 34072
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0694749355316162,
      "learning_rate": 1.187358665684789e-05,
      "loss": 2.5771,
      "step": 34073
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0868275165557861,
      "learning_rate": 1.1873182209710939e-05,
      "loss": 2.6084,
      "step": 34074
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.063735008239746,
      "learning_rate": 1.1872777759398432e-05,
      "loss": 2.4765,
      "step": 34075
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1167479753494263,
      "learning_rate": 1.1872373305911058e-05,
      "loss": 2.2225,
      "step": 34076
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1170307397842407,
      "learning_rate": 1.1871968849249504e-05,
      "loss": 2.4244,
      "step": 34077
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0868628025054932,
      "learning_rate": 1.1871564389414454e-05,
      "loss": 2.3252,
      "step": 34078
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0326530933380127,
      "learning_rate": 1.1871159926406594e-05,
      "loss": 2.48,
      "step": 34079
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.009885311126709,
      "learning_rate": 1.1870755460226609e-05,
      "loss": 2.6353,
      "step": 34080
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0797399282455444,
      "learning_rate": 1.1870350990875189e-05,
      "loss": 2.2365,
      "step": 34081
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.208067774772644,
      "learning_rate": 1.1869946518353012e-05,
      "loss": 2.5009,
      "step": 34082
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1615691184997559,
      "learning_rate": 1.1869542042660769e-05,
      "loss": 2.3835,
      "step": 34083
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.120410442352295,
      "learning_rate": 1.1869137563799144e-05,
      "loss": 2.3979,
      "step": 34084
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.116189956665039,
      "learning_rate": 1.1868733081768824e-05,
      "loss": 2.2904,
      "step": 34085
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0586482286453247,
      "learning_rate": 1.1868328596570493e-05,
      "loss": 2.4231,
      "step": 34086
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.9354944229125977,
      "learning_rate": 1.186792410820484e-05,
      "loss": 2.4164,
      "step": 34087
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0874438285827637,
      "learning_rate": 1.1867519616672545e-05,
      "loss": 2.3116,
      "step": 34088
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0325208902359009,
      "learning_rate": 1.18671151219743e-05,
      "loss": 2.585,
      "step": 34089
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1591548919677734,
      "learning_rate": 1.1866710624110784e-05,
      "loss": 2.3573,
      "step": 34090
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0068684816360474,
      "learning_rate": 1.186630612308269e-05,
      "loss": 2.2755,
      "step": 34091
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0761014223098755,
      "learning_rate": 1.1865901618890698e-05,
      "loss": 2.2754,
      "step": 34092
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0810656547546387,
      "learning_rate": 1.1865497111535498e-05,
      "loss": 2.2735,
      "step": 34093
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1126433610916138,
      "learning_rate": 1.1865092601017773e-05,
      "loss": 2.3368,
      "step": 34094
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4101308584213257,
      "learning_rate": 1.186468808733821e-05,
      "loss": 2.3907,
      "step": 34095
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.3411951065063477,
      "learning_rate": 1.1864283570497492e-05,
      "loss": 2.1908,
      "step": 34096
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.309848666191101,
      "learning_rate": 1.186387905049631e-05,
      "loss": 2.3275,
      "step": 34097
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.2556521892547607,
      "learning_rate": 1.1863474527335343e-05,
      "loss": 2.3604,
      "step": 34098
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4375776052474976,
      "learning_rate": 1.1863070001015284e-05,
      "loss": 2.4168,
      "step": 34099
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0358318090438843,
      "learning_rate": 1.1862665471536816e-05,
      "loss": 2.5453,
      "step": 34100
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.9760684370994568,
      "learning_rate": 1.186226093890062e-05,
      "loss": 2.3064,
      "step": 34101
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0731236934661865,
      "learning_rate": 1.1861856403107393e-05,
      "loss": 2.4807,
      "step": 34102
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4746818542480469,
      "learning_rate": 1.1861451864157806e-05,
      "loss": 2.3322,
      "step": 34103
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.9790118336677551,
      "learning_rate": 1.1861047322052557e-05,
      "loss": 2.6321,
      "step": 34104
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1279327869415283,
      "learning_rate": 1.1860642776792326e-05,
      "loss": 2.2228,
      "step": 34105
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.19244384765625,
      "learning_rate": 1.1860238228377798e-05,
      "loss": 2.4177,
      "step": 34106
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.250096321105957,
      "learning_rate": 1.1859833676809664e-05,
      "loss": 2.4488,
      "step": 34107
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1029633283615112,
      "learning_rate": 1.1859429122088606e-05,
      "loss": 2.3702,
      "step": 34108
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.116586685180664,
      "learning_rate": 1.185902456421531e-05,
      "loss": 2.4737,
      "step": 34109
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1293050050735474,
      "learning_rate": 1.1858620003190465e-05,
      "loss": 2.2972,
      "step": 34110
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.9824316501617432,
      "learning_rate": 1.1858215439014751e-05,
      "loss": 2.3951,
      "step": 34111
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.058811068534851,
      "learning_rate": 1.1857810871688858e-05,
      "loss": 2.4286,
      "step": 34112
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0149720907211304,
      "learning_rate": 1.185740630121347e-05,
      "loss": 2.1999,
      "step": 34113
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.9928925633430481,
      "learning_rate": 1.1857001727589277e-05,
      "loss": 2.3455,
      "step": 34114
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1243699789047241,
      "learning_rate": 1.185659715081696e-05,
      "loss": 2.4274,
      "step": 34115
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0227115154266357,
      "learning_rate": 1.1856192570897204e-05,
      "loss": 2.356,
      "step": 34116
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0480397939682007,
      "learning_rate": 1.1855787987830701e-05,
      "loss": 2.1903,
      "step": 34117
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1412345170974731,
      "learning_rate": 1.1855383401618133e-05,
      "loss": 2.3204,
      "step": 34118
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0411901473999023,
      "learning_rate": 1.1854978812260185e-05,
      "loss": 2.3336,
      "step": 34119
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1449533700942993,
      "learning_rate": 1.1854574219757547e-05,
      "loss": 2.4088,
      "step": 34120
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1206488609313965,
      "learning_rate": 1.1854169624110898e-05,
      "loss": 2.6112,
      "step": 34121
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.3611335754394531,
      "learning_rate": 1.185376502532093e-05,
      "loss": 2.1978,
      "step": 34122
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1185741424560547,
      "learning_rate": 1.1853360423388326e-05,
      "loss": 2.2546,
      "step": 34123
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.988008439540863,
      "learning_rate": 1.1852955818313772e-05,
      "loss": 2.2156,
      "step": 34124
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.095902681350708,
      "learning_rate": 1.1852551210097956e-05,
      "loss": 2.1887,
      "step": 34125
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.058063268661499,
      "learning_rate": 1.1852146598741562e-05,
      "loss": 2.439,
      "step": 34126
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0877257585525513,
      "learning_rate": 1.1851741984245277e-05,
      "loss": 2.1878,
      "step": 34127
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0686599016189575,
      "learning_rate": 1.1851337366609786e-05,
      "loss": 2.3413,
      "step": 34128
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1942287683486938,
      "learning_rate": 1.1850932745835774e-05,
      "loss": 2.6209,
      "step": 34129
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0761185884475708,
      "learning_rate": 1.1850528121923933e-05,
      "loss": 2.2291,
      "step": 34130
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0578399896621704,
      "learning_rate": 1.185012349487494e-05,
      "loss": 2.0761,
      "step": 34131
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0354634523391724,
      "learning_rate": 1.1849718864689485e-05,
      "loss": 2.4296,
      "step": 34132
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.152030348777771,
      "learning_rate": 1.184931423136826e-05,
      "loss": 2.3418,
      "step": 34133
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.133938193321228,
      "learning_rate": 1.184890959491194e-05,
      "loss": 2.2658,
      "step": 34134
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.117005467414856,
      "learning_rate": 1.1848504955321213e-05,
      "loss": 2.3006,
      "step": 34135
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1814689636230469,
      "learning_rate": 1.1848100312596773e-05,
      "loss": 2.2307,
      "step": 34136
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.98912513256073,
      "learning_rate": 1.1847695666739299e-05,
      "loss": 2.2379,
      "step": 34137
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1957391500473022,
      "learning_rate": 1.1847291017749478e-05,
      "loss": 2.3036,
      "step": 34138
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0702409744262695,
      "learning_rate": 1.1846886365627999e-05,
      "loss": 2.378,
      "step": 34139
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.216342568397522,
      "learning_rate": 1.1846481710375547e-05,
      "loss": 2.7222,
      "step": 34140
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.034712791442871,
      "learning_rate": 1.1846077051992805e-05,
      "loss": 2.4095,
      "step": 34141
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.2028846740722656,
      "learning_rate": 1.1845672390480463e-05,
      "loss": 2.33,
      "step": 34142
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.10944402217865,
      "learning_rate": 1.1845267725839202e-05,
      "loss": 2.2687,
      "step": 34143
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0107098817825317,
      "learning_rate": 1.1844863058069714e-05,
      "loss": 2.2447,
      "step": 34144
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.111020565032959,
      "learning_rate": 1.1844458387172678e-05,
      "loss": 2.2681,
      "step": 34145
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1261016130447388,
      "learning_rate": 1.1844053713148789e-05,
      "loss": 2.5438,
      "step": 34146
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0663737058639526,
      "learning_rate": 1.1843649035998726e-05,
      "loss": 2.1782,
      "step": 34147
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1114798784255981,
      "learning_rate": 1.1843244355723178e-05,
      "loss": 2.3148,
      "step": 34148
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1322102546691895,
      "learning_rate": 1.1842839672322828e-05,
      "loss": 2.4261,
      "step": 34149
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.097914457321167,
      "learning_rate": 1.1842434985798365e-05,
      "loss": 2.4577,
      "step": 34150
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0841593742370605,
      "learning_rate": 1.1842030296150475e-05,
      "loss": 2.2106,
      "step": 34151
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.9610097408294678,
      "learning_rate": 1.1841625603379843e-05,
      "loss": 2.2651,
      "step": 34152
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1131185293197632,
      "learning_rate": 1.1841220907487157e-05,
      "loss": 2.3016,
      "step": 34153
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0891461372375488,
      "learning_rate": 1.18408162084731e-05,
      "loss": 2.3399,
      "step": 34154
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1612482070922852,
      "learning_rate": 1.1840411506338359e-05,
      "loss": 2.5871,
      "step": 34155
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0962681770324707,
      "learning_rate": 1.184000680108362e-05,
      "loss": 2.2952,
      "step": 34156
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0385708808898926,
      "learning_rate": 1.1839602092709572e-05,
      "loss": 2.2943,
      "step": 34157
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.055917501449585,
      "learning_rate": 1.18391973812169e-05,
      "loss": 2.3015,
      "step": 34158
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.9221929907798767,
      "learning_rate": 1.1838792666606285e-05,
      "loss": 2.158,
      "step": 34159
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.3234003782272339,
      "learning_rate": 1.183838794887842e-05,
      "loss": 2.2941,
      "step": 34160
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1648026704788208,
      "learning_rate": 1.183798322803399e-05,
      "loss": 2.2529,
      "step": 34161
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0318301916122437,
      "learning_rate": 1.1837578504073674e-05,
      "loss": 2.2546,
      "step": 34162
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.2217611074447632,
      "learning_rate": 1.1837173776998166e-05,
      "loss": 2.2087,
      "step": 34163
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.9607114791870117,
      "learning_rate": 1.1836769046808147e-05,
      "loss": 2.253,
      "step": 34164
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1726338863372803,
      "learning_rate": 1.183636431350431e-05,
      "loss": 2.5535,
      "step": 34165
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.029605507850647,
      "learning_rate": 1.1835959577087334e-05,
      "loss": 2.2879,
      "step": 34166
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1022917032241821,
      "learning_rate": 1.1835554837557908e-05,
      "loss": 2.2176,
      "step": 34167
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.9940068125724792,
      "learning_rate": 1.183515009491672e-05,
      "loss": 2.2768,
      "step": 34168
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.493645429611206,
      "learning_rate": 1.1834745349164449e-05,
      "loss": 2.457,
      "step": 34169
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.129367709159851,
      "learning_rate": 1.183434060030179e-05,
      "loss": 2.4602,
      "step": 34170
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.9251074194908142,
      "learning_rate": 1.1833935848329425e-05,
      "loss": 2.2686,
      "step": 34171
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0900077819824219,
      "learning_rate": 1.183353109324804e-05,
      "loss": 2.4727,
      "step": 34172
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0512714385986328,
      "learning_rate": 1.1833126335058326e-05,
      "loss": 2.4247,
      "step": 34173
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.9508091807365417,
      "learning_rate": 1.1832721573760962e-05,
      "loss": 2.4478,
      "step": 34174
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0408387184143066,
      "learning_rate": 1.1832316809356636e-05,
      "loss": 2.6362,
      "step": 34175
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0095429420471191,
      "learning_rate": 1.1831912041846034e-05,
      "loss": 2.3598,
      "step": 34176
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.985227108001709,
      "learning_rate": 1.1831507271229847e-05,
      "loss": 2.1938,
      "step": 34177
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0821911096572876,
      "learning_rate": 1.1831102497508755e-05,
      "loss": 2.5812,
      "step": 34178
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.5902376174926758,
      "learning_rate": 1.1830697720683448e-05,
      "loss": 2.3152,
      "step": 34179
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1102982759475708,
      "learning_rate": 1.1830292940754616e-05,
      "loss": 2.2706,
      "step": 34180
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.7598274946212769,
      "learning_rate": 1.1829888157722933e-05,
      "loss": 2.0912,
      "step": 34181
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0992743968963623,
      "learning_rate": 1.1829483371589096e-05,
      "loss": 2.6536,
      "step": 34182
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.2016751766204834,
      "learning_rate": 1.1829078582353787e-05,
      "loss": 2.6045,
      "step": 34183
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.2370736598968506,
      "learning_rate": 1.1828673790017693e-05,
      "loss": 2.0112,
      "step": 34184
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.9582053422927856,
      "learning_rate": 1.18282689945815e-05,
      "loss": 2.5376,
      "step": 34185
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0921378135681152,
      "learning_rate": 1.1827864196045896e-05,
      "loss": 2.4133,
      "step": 34186
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1847790479660034,
      "learning_rate": 1.1827459394411567e-05,
      "loss": 2.3558,
      "step": 34187
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0640978813171387,
      "learning_rate": 1.1827054589679194e-05,
      "loss": 2.5571,
      "step": 34188
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0227530002593994,
      "learning_rate": 1.1826649781849473e-05,
      "loss": 2.4574,
      "step": 34189
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.9432851076126099,
      "learning_rate": 1.182624497092308e-05,
      "loss": 2.443,
      "step": 34190
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0086592435836792,
      "learning_rate": 1.1825840156900706e-05,
      "loss": 2.3216,
      "step": 34191
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0742688179016113,
      "learning_rate": 1.1825435339783036e-05,
      "loss": 2.5116,
      "step": 34192
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.9161570072174072,
      "learning_rate": 1.1825030519570762e-05,
      "loss": 2.4217,
      "step": 34193
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1204482316970825,
      "learning_rate": 1.1824625696264561e-05,
      "loss": 2.2794,
      "step": 34194
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.162488341331482,
      "learning_rate": 1.1824220869865124e-05,
      "loss": 2.1386,
      "step": 34195
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.2236071825027466,
      "learning_rate": 1.182381604037314e-05,
      "loss": 2.2196,
      "step": 34196
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1079398393630981,
      "learning_rate": 1.182341120778929e-05,
      "loss": 2.5691,
      "step": 34197
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0433897972106934,
      "learning_rate": 1.1823006372114262e-05,
      "loss": 2.5459,
      "step": 34198
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1472841501235962,
      "learning_rate": 1.1822601533348748e-05,
      "loss": 2.2407,
      "step": 34199
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.100797176361084,
      "learning_rate": 1.1822196691493426e-05,
      "loss": 2.468,
      "step": 34200
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0557745695114136,
      "learning_rate": 1.1821791846548984e-05,
      "loss": 2.439,
      "step": 34201
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.2556897401809692,
      "learning_rate": 1.1821386998516113e-05,
      "loss": 2.1987,
      "step": 34202
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0396445989608765,
      "learning_rate": 1.1820982147395495e-05,
      "loss": 2.3247,
      "step": 34203
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.9907307028770447,
      "learning_rate": 1.1820577293187816e-05,
      "loss": 2.5237,
      "step": 34204
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.9585602283477783,
      "learning_rate": 1.1820172435893767e-05,
      "loss": 2.4322,
      "step": 34205
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.9842365384101868,
      "learning_rate": 1.1819767575514032e-05,
      "loss": 2.3155,
      "step": 34206
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.121522068977356,
      "learning_rate": 1.1819362712049294e-05,
      "loss": 2.3881,
      "step": 34207
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.2050516605377197,
      "learning_rate": 1.1818957845500243e-05,
      "loss": 2.6049,
      "step": 34208
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1581634283065796,
      "learning_rate": 1.1818552975867562e-05,
      "loss": 2.4092,
      "step": 34209
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0641816854476929,
      "learning_rate": 1.1818148103151946e-05,
      "loss": 2.3494,
      "step": 34210
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.9762142896652222,
      "learning_rate": 1.181774322735407e-05,
      "loss": 2.4745,
      "step": 34211
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0423054695129395,
      "learning_rate": 1.1817338348474627e-05,
      "loss": 2.2456,
      "step": 34212
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.946102499961853,
      "learning_rate": 1.1816933466514303e-05,
      "loss": 2.3326,
      "step": 34213
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0437071323394775,
      "learning_rate": 1.1816528581473782e-05,
      "loss": 2.5872,
      "step": 34214
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.092725396156311,
      "learning_rate": 1.1816123693353752e-05,
      "loss": 2.5216,
      "step": 34215
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1577365398406982,
      "learning_rate": 1.1815718802154899e-05,
      "loss": 2.5228,
      "step": 34216
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.9921717047691345,
      "learning_rate": 1.181531390787791e-05,
      "loss": 2.2452,
      "step": 34217
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0152477025985718,
      "learning_rate": 1.181490901052347e-05,
      "loss": 2.2959,
      "step": 34218
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0673965215682983,
      "learning_rate": 1.1814504110092266e-05,
      "loss": 2.2811,
      "step": 34219
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1703892946243286,
      "learning_rate": 1.1814099206584988e-05,
      "loss": 2.6002,
      "step": 34220
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.685730457305908,
      "learning_rate": 1.1813694300002316e-05,
      "loss": 2.2118,
      "step": 34221
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.28544282913208,
      "learning_rate": 1.181328939034494e-05,
      "loss": 2.515,
      "step": 34222
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1657406091690063,
      "learning_rate": 1.1812884477613547e-05,
      "loss": 2.5353,
      "step": 34223
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.9549759030342102,
      "learning_rate": 1.181247956180882e-05,
      "loss": 2.2788,
      "step": 34224
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0574171543121338,
      "learning_rate": 1.181207464293145e-05,
      "loss": 2.3267,
      "step": 34225
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1669970750808716,
      "learning_rate": 1.1811669720982121e-05,
      "loss": 2.5137,
      "step": 34226
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.3612195253372192,
      "learning_rate": 1.181126479596152e-05,
      "loss": 2.5495,
      "step": 34227
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.162766933441162,
      "learning_rate": 1.1810859867870332e-05,
      "loss": 2.5392,
      "step": 34228
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.173405408859253,
      "learning_rate": 1.1810454936709246e-05,
      "loss": 2.1645,
      "step": 34229
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.064764380455017,
      "learning_rate": 1.1810050002478946e-05,
      "loss": 2.4599,
      "step": 34230
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0371217727661133,
      "learning_rate": 1.180964506518012e-05,
      "loss": 2.4624,
      "step": 34231
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.017726182937622,
      "learning_rate": 1.1809240124813457e-05,
      "loss": 2.3351,
      "step": 34232
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0221552848815918,
      "learning_rate": 1.1808835181379636e-05,
      "loss": 2.4993,
      "step": 34233
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.4619522094726562,
      "learning_rate": 1.180843023487935e-05,
      "loss": 2.2775,
      "step": 34234
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1617416143417358,
      "learning_rate": 1.1808025285313285e-05,
      "loss": 2.4928,
      "step": 34235
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0685832500457764,
      "learning_rate": 1.1807620332682124e-05,
      "loss": 2.4031,
      "step": 34236
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1800211668014526,
      "learning_rate": 1.1807215376986555e-05,
      "loss": 2.1973,
      "step": 34237
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1486831903457642,
      "learning_rate": 1.1806810418227269e-05,
      "loss": 2.3907,
      "step": 34238
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.188804268836975,
      "learning_rate": 1.1806405456404945e-05,
      "loss": 2.2834,
      "step": 34239
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1018450260162354,
      "learning_rate": 1.1806000491520275e-05,
      "loss": 2.4045,
      "step": 34240
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.3520681858062744,
      "learning_rate": 1.1805595523573942e-05,
      "loss": 2.2596,
      "step": 34241
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.116326928138733,
      "learning_rate": 1.1805190552566637e-05,
      "loss": 2.3248,
      "step": 34242
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.02881920337677,
      "learning_rate": 1.180478557849904e-05,
      "loss": 2.2402,
      "step": 34243
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0052638053894043,
      "learning_rate": 1.1804380601371843e-05,
      "loss": 2.2989,
      "step": 34244
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.185268521308899,
      "learning_rate": 1.1803975621185733e-05,
      "loss": 2.4922,
      "step": 34245
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0703065395355225,
      "learning_rate": 1.1803570637941393e-05,
      "loss": 2.1434,
      "step": 34246
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1502143144607544,
      "learning_rate": 1.1803165651639507e-05,
      "loss": 2.4448,
      "step": 34247
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.2884762287139893,
      "learning_rate": 1.1802760662280772e-05,
      "loss": 2.345,
      "step": 34248
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1228750944137573,
      "learning_rate": 1.1802355669865863e-05,
      "loss": 2.2107,
      "step": 34249
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.040813684463501,
      "learning_rate": 1.1801950674395474e-05,
      "loss": 2.4624,
      "step": 34250
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.9634605050086975,
      "learning_rate": 1.180154567587029e-05,
      "loss": 2.3202,
      "step": 34251
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.4103529453277588,
      "learning_rate": 1.1801140674290997e-05,
      "loss": 2.3382,
      "step": 34252
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0351593494415283,
      "learning_rate": 1.180073566965828e-05,
      "loss": 2.4514,
      "step": 34253
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1651138067245483,
      "learning_rate": 1.1800330661972825e-05,
      "loss": 2.3202,
      "step": 34254
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.100954294204712,
      "learning_rate": 1.1799925651235322e-05,
      "loss": 2.3902,
      "step": 34255
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.103651762008667,
      "learning_rate": 1.1799520637446456e-05,
      "loss": 2.4458,
      "step": 34256
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1513651609420776,
      "learning_rate": 1.1799115620606913e-05,
      "loss": 2.4301,
      "step": 34257
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0336767435073853,
      "learning_rate": 1.1798710600717382e-05,
      "loss": 2.2628,
      "step": 34258
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1210864782333374,
      "learning_rate": 1.179830557777855e-05,
      "loss": 2.4317,
      "step": 34259
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1144577264785767,
      "learning_rate": 1.17979005517911e-05,
      "loss": 2.199,
      "step": 34260
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.9746612310409546,
      "learning_rate": 1.1797495522755717e-05,
      "loss": 2.292,
      "step": 34261
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.258349895477295,
      "learning_rate": 1.1797090490673093e-05,
      "loss": 2.1452,
      "step": 34262
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0357707738876343,
      "learning_rate": 1.1796685455543915e-05,
      "loss": 2.3944,
      "step": 34263
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0605137348175049,
      "learning_rate": 1.1796280417368864e-05,
      "loss": 2.5331,
      "step": 34264
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.9746150374412537,
      "learning_rate": 1.1795875376148633e-05,
      "loss": 2.2207,
      "step": 34265
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.047208547592163,
      "learning_rate": 1.1795470331883903e-05,
      "loss": 2.2318,
      "step": 34266
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.3653653860092163,
      "learning_rate": 1.1795065284575361e-05,
      "loss": 2.8405,
      "step": 34267
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.212872862815857,
      "learning_rate": 1.17946602342237e-05,
      "loss": 2.344,
      "step": 34268
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.089548945426941,
      "learning_rate": 1.1794255180829601e-05,
      "loss": 2.2242,
      "step": 34269
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.5307722091674805,
      "learning_rate": 1.1793850124393752e-05,
      "loss": 2.3197,
      "step": 34270
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0194361209869385,
      "learning_rate": 1.179344506491684e-05,
      "loss": 2.4344,
      "step": 34271
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.097768783569336,
      "learning_rate": 1.1793040002399551e-05,
      "loss": 2.3273,
      "step": 34272
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1019457578659058,
      "learning_rate": 1.1792634936842573e-05,
      "loss": 2.3625,
      "step": 34273
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0184396505355835,
      "learning_rate": 1.1792229868246592e-05,
      "loss": 2.5905,
      "step": 34274
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0271350145339966,
      "learning_rate": 1.1791824796612295e-05,
      "loss": 2.2682,
      "step": 34275
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.2015269994735718,
      "learning_rate": 1.1791419721940368e-05,
      "loss": 2.3865,
      "step": 34276
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1796836853027344,
      "learning_rate": 1.1791014644231497e-05,
      "loss": 2.3309,
      "step": 34277
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0135999917984009,
      "learning_rate": 1.1790609563486371e-05,
      "loss": 2.4285,
      "step": 34278
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.226760983467102,
      "learning_rate": 1.1790204479705675e-05,
      "loss": 2.4597,
      "step": 34279
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0450031757354736,
      "learning_rate": 1.1789799392890096e-05,
      "loss": 2.1177,
      "step": 34280
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.060212254524231,
      "learning_rate": 1.178939430304032e-05,
      "loss": 2.3637,
      "step": 34281
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0445839166641235,
      "learning_rate": 1.1788989210157036e-05,
      "loss": 2.2544,
      "step": 34282
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1818246841430664,
      "learning_rate": 1.1788584114240928e-05,
      "loss": 2.51,
      "step": 34283
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1130658388137817,
      "learning_rate": 1.1788179015292684e-05,
      "loss": 2.3606,
      "step": 34284
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.066979169845581,
      "learning_rate": 1.1787773913312995e-05,
      "loss": 2.4433,
      "step": 34285
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0889673233032227,
      "learning_rate": 1.178736880830254e-05,
      "loss": 2.5712,
      "step": 34286
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0512946844100952,
      "learning_rate": 1.1786963700262008e-05,
      "loss": 2.3041,
      "step": 34287
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0864636898040771,
      "learning_rate": 1.178655858919209e-05,
      "loss": 2.4763,
      "step": 34288
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.3341069221496582,
      "learning_rate": 1.1786153475093469e-05,
      "loss": 2.3684,
      "step": 34289
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.174148678779602,
      "learning_rate": 1.1785748357966833e-05,
      "loss": 2.3851,
      "step": 34290
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.5564556121826172,
      "learning_rate": 1.1785343237812867e-05,
      "loss": 2.3862,
      "step": 34291
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1498371362686157,
      "learning_rate": 1.1784938114632266e-05,
      "loss": 2.5182,
      "step": 34292
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.140332579612732,
      "learning_rate": 1.1784532988425704e-05,
      "loss": 2.1574,
      "step": 34293
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0204274654388428,
      "learning_rate": 1.1784127859193872e-05,
      "loss": 2.0683,
      "step": 34294
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0363342761993408,
      "learning_rate": 1.1783722726937464e-05,
      "loss": 2.4801,
      "step": 34295
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0208014249801636,
      "learning_rate": 1.1783317591657158e-05,
      "loss": 2.4888,
      "step": 34296
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.9917169809341431,
      "learning_rate": 1.1782912453353643e-05,
      "loss": 2.338,
      "step": 34297
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.151736855506897,
      "learning_rate": 1.1782507312027614e-05,
      "loss": 2.2419,
      "step": 34298
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0756982564926147,
      "learning_rate": 1.1782102167679746e-05,
      "loss": 2.6073,
      "step": 34299
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.3694448471069336,
      "learning_rate": 1.178169702031073e-05,
      "loss": 2.4182,
      "step": 34300
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1649508476257324,
      "learning_rate": 1.1781291869921254e-05,
      "loss": 2.2066,
      "step": 34301
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.034698724746704,
      "learning_rate": 1.1780886716512005e-05,
      "loss": 2.4712,
      "step": 34302
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0412927865982056,
      "learning_rate": 1.178048156008367e-05,
      "loss": 2.3149,
      "step": 34303
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.9562123417854309,
      "learning_rate": 1.1780076400636937e-05,
      "loss": 2.29,
      "step": 34304
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.2785730361938477,
      "learning_rate": 1.1779671238172487e-05,
      "loss": 2.4302,
      "step": 34305
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1428064107894897,
      "learning_rate": 1.1779266072691013e-05,
      "loss": 2.4416,
      "step": 34306
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0079410076141357,
      "learning_rate": 1.1778860904193203e-05,
      "loss": 2.2746,
      "step": 34307
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.068442940711975,
      "learning_rate": 1.1778455732679736e-05,
      "loss": 2.3697,
      "step": 34308
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0325593948364258,
      "learning_rate": 1.1778050558151307e-05,
      "loss": 2.4379,
      "step": 34309
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.8980663418769836,
      "learning_rate": 1.1777645380608595e-05,
      "loss": 2.2959,
      "step": 34310
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0038321018218994,
      "learning_rate": 1.1777240200052296e-05,
      "loss": 2.2848,
      "step": 34311
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.143565058708191,
      "learning_rate": 1.1776835016483088e-05,
      "loss": 2.312,
      "step": 34312
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0956625938415527,
      "learning_rate": 1.1776429829901666e-05,
      "loss": 2.3293,
      "step": 34313
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1818647384643555,
      "learning_rate": 1.177602464030871e-05,
      "loss": 2.2791,
      "step": 34314
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1243407726287842,
      "learning_rate": 1.1775619447704909e-05,
      "loss": 2.3628,
      "step": 34315
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.130351185798645,
      "learning_rate": 1.1775214252090951e-05,
      "loss": 2.3898,
      "step": 34316
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.017988920211792,
      "learning_rate": 1.1774809053467524e-05,
      "loss": 2.3869,
      "step": 34317
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.2687417268753052,
      "learning_rate": 1.1774403851835316e-05,
      "loss": 2.3614,
      "step": 34318
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.203434944152832,
      "learning_rate": 1.1773998647195009e-05,
      "loss": 2.2737,
      "step": 34319
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.289913296699524,
      "learning_rate": 1.1773593439547293e-05,
      "loss": 2.214,
      "step": 34320
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0794610977172852,
      "learning_rate": 1.1773188228892853e-05,
      "loss": 2.2245,
      "step": 34321
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1022883653640747,
      "learning_rate": 1.1772783015232378e-05,
      "loss": 2.2531,
      "step": 34322
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.9312551617622375,
      "learning_rate": 1.1772377798566557e-05,
      "loss": 2.3242,
      "step": 34323
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0641930103302002,
      "learning_rate": 1.1771972578896072e-05,
      "loss": 2.4463,
      "step": 34324
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1197863817214966,
      "learning_rate": 1.177156735622161e-05,
      "loss": 2.4401,
      "step": 34325
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0334343910217285,
      "learning_rate": 1.1771162130543865e-05,
      "loss": 2.5659,
      "step": 34326
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0325618982315063,
      "learning_rate": 1.1770756901863516e-05,
      "loss": 2.2186,
      "step": 34327
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.178202748298645,
      "learning_rate": 1.1770351670181253e-05,
      "loss": 2.3312,
      "step": 34328
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0680075883865356,
      "learning_rate": 1.1769946435497763e-05,
      "loss": 2.3939,
      "step": 34329
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0813285112380981,
      "learning_rate": 1.1769541197813734e-05,
      "loss": 2.4227,
      "step": 34330
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0444343090057373,
      "learning_rate": 1.1769135957129854e-05,
      "loss": 2.1241,
      "step": 34331
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0360723733901978,
      "learning_rate": 1.1768730713446805e-05,
      "loss": 2.1723,
      "step": 34332
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.9960413575172424,
      "learning_rate": 1.1768325466765278e-05,
      "loss": 2.4687,
      "step": 34333
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1146776676177979,
      "learning_rate": 1.176792021708596e-05,
      "loss": 2.4087,
      "step": 34334
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.2123205661773682,
      "learning_rate": 1.1767514964409539e-05,
      "loss": 2.3552,
      "step": 34335
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.089335560798645,
      "learning_rate": 1.1767109708736695e-05,
      "loss": 2.2872,
      "step": 34336
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0375314950942993,
      "learning_rate": 1.1766704450068122e-05,
      "loss": 2.3938,
      "step": 34337
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0058543682098389,
      "learning_rate": 1.1766299188404509e-05,
      "loss": 2.7098,
      "step": 34338
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1834137439727783,
      "learning_rate": 1.1765893923746534e-05,
      "loss": 2.5587,
      "step": 34339
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1446048021316528,
      "learning_rate": 1.1765488656094892e-05,
      "loss": 2.5957,
      "step": 34340
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.2314972877502441,
      "learning_rate": 1.1765083385450264e-05,
      "loss": 2.4596,
      "step": 34341
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0578311681747437,
      "learning_rate": 1.1764678111813344e-05,
      "loss": 2.3259,
      "step": 34342
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.9923766255378723,
      "learning_rate": 1.1764272835184813e-05,
      "loss": 2.2924,
      "step": 34343
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0222831964492798,
      "learning_rate": 1.1763867555565362e-05,
      "loss": 2.2604,
      "step": 34344
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.167519450187683,
      "learning_rate": 1.1763462272955677e-05,
      "loss": 2.304,
      "step": 34345
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0206949710845947,
      "learning_rate": 1.1763056987356442e-05,
      "loss": 2.2137,
      "step": 34346
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0804193019866943,
      "learning_rate": 1.1762651698768349e-05,
      "loss": 2.3857,
      "step": 34347
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1690014600753784,
      "learning_rate": 1.176224640719208e-05,
      "loss": 2.1113,
      "step": 34348
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0633450746536255,
      "learning_rate": 1.1761841112628325e-05,
      "loss": 2.373,
      "step": 34349
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.175215482711792,
      "learning_rate": 1.176143581507777e-05,
      "loss": 2.3477,
      "step": 34350
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1209851503372192,
      "learning_rate": 1.1761030514541108e-05,
      "loss": 2.5287,
      "step": 34351
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0760560035705566,
      "learning_rate": 1.1760625211019016e-05,
      "loss": 2.3638,
      "step": 34352
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0786842107772827,
      "learning_rate": 1.176021990451219e-05,
      "loss": 2.4377,
      "step": 34353
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1632803678512573,
      "learning_rate": 1.1759814595021308e-05,
      "loss": 2.5833,
      "step": 34354
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0441179275512695,
      "learning_rate": 1.1759409282547065e-05,
      "loss": 2.4253,
      "step": 34355
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0874956846237183,
      "learning_rate": 1.1759003967090146e-05,
      "loss": 2.1429,
      "step": 34356
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0220606327056885,
      "learning_rate": 1.1758598648651237e-05,
      "loss": 2.3753,
      "step": 34357
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.9558011889457703,
      "learning_rate": 1.1758193327231026e-05,
      "loss": 2.3998,
      "step": 34358
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1120696067810059,
      "learning_rate": 1.1757788002830201e-05,
      "loss": 2.2801,
      "step": 34359
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1176748275756836,
      "learning_rate": 1.1757382675449447e-05,
      "loss": 2.1952,
      "step": 34360
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.9660422205924988,
      "learning_rate": 1.175697734508945e-05,
      "loss": 2.4197,
      "step": 34361
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0621528625488281,
      "learning_rate": 1.1756572011750902e-05,
      "loss": 2.3591,
      "step": 34362
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.975489616394043,
      "learning_rate": 1.1756166675434489e-05,
      "loss": 2.2758,
      "step": 34363
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0237340927124023,
      "learning_rate": 1.1755761336140892e-05,
      "loss": 2.408,
      "step": 34364
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0369651317596436,
      "learning_rate": 1.1755355993870806e-05,
      "loss": 2.4122,
      "step": 34365
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1989437341690063,
      "learning_rate": 1.1754950648624912e-05,
      "loss": 2.2136,
      "step": 34366
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.3577461242675781,
      "learning_rate": 1.1754545300403901e-05,
      "loss": 2.4824,
      "step": 34367
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0206536054611206,
      "learning_rate": 1.1754139949208461e-05,
      "loss": 2.569,
      "step": 34368
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.9926642179489136,
      "learning_rate": 1.1753734595039275e-05,
      "loss": 2.1967,
      "step": 34369
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1604313850402832,
      "learning_rate": 1.1753329237897034e-05,
      "loss": 2.2614,
      "step": 34370
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0682426691055298,
      "learning_rate": 1.1752923877782424e-05,
      "loss": 2.3579,
      "step": 34371
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.081987738609314,
      "learning_rate": 1.175251851469613e-05,
      "loss": 2.3549,
      "step": 34372
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.287243127822876,
      "learning_rate": 1.1752113148638842e-05,
      "loss": 2.4858,
      "step": 34373
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1532405614852905,
      "learning_rate": 1.1751707779611246e-05,
      "loss": 2.3461,
      "step": 34374
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1674085855484009,
      "learning_rate": 1.1751302407614029e-05,
      "loss": 2.5412,
      "step": 34375
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.005544662475586,
      "learning_rate": 1.1750897032647882e-05,
      "loss": 2.5943,
      "step": 34376
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1037181615829468,
      "learning_rate": 1.1750491654713487e-05,
      "loss": 2.4323,
      "step": 34377
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0549861192703247,
      "learning_rate": 1.1750086273811535e-05,
      "loss": 2.3732,
      "step": 34378
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.06850004196167,
      "learning_rate": 1.1749680889942707e-05,
      "loss": 2.5308,
      "step": 34379
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1416047811508179,
      "learning_rate": 1.1749275503107697e-05,
      "loss": 2.2684,
      "step": 34380
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.9994296431541443,
      "learning_rate": 1.1748870113307189e-05,
      "loss": 2.2974,
      "step": 34381
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.089623212814331,
      "learning_rate": 1.1748464720541873e-05,
      "loss": 2.2281,
      "step": 34382
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0870983600616455,
      "learning_rate": 1.1748059324812434e-05,
      "loss": 2.414,
      "step": 34383
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.131907343864441,
      "learning_rate": 1.174765392611956e-05,
      "loss": 2.4893,
      "step": 34384
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0484817028045654,
      "learning_rate": 1.1747248524463939e-05,
      "loss": 2.4459,
      "step": 34385
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0490976572036743,
      "learning_rate": 1.1746843119846255e-05,
      "loss": 2.1837,
      "step": 34386
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0986294746398926,
      "learning_rate": 1.1746437712267196e-05,
      "loss": 2.4954,
      "step": 34387
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.2091858386993408,
      "learning_rate": 1.1746032301727452e-05,
      "loss": 2.4632,
      "step": 34388
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.2997177839279175,
      "learning_rate": 1.174562688822771e-05,
      "loss": 2.3,
      "step": 34389
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1207869052886963,
      "learning_rate": 1.1745221471768659e-05,
      "loss": 2.4687,
      "step": 34390
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.9997990727424622,
      "learning_rate": 1.174481605235098e-05,
      "loss": 2.269,
      "step": 34391
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0458511114120483,
      "learning_rate": 1.1744410629975365e-05,
      "loss": 2.4759,
      "step": 34392
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0743064880371094,
      "learning_rate": 1.17440052046425e-05,
      "loss": 2.1636,
      "step": 34393
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0936530828475952,
      "learning_rate": 1.1743599776353074e-05,
      "loss": 2.2549,
      "step": 34394
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0244618654251099,
      "learning_rate": 1.1743194345107769e-05,
      "loss": 2.4401,
      "step": 34395
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.9464240074157715,
      "learning_rate": 1.174278891090728e-05,
      "loss": 2.29,
      "step": 34396
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0227351188659668,
      "learning_rate": 1.174238347375229e-05,
      "loss": 2.4164,
      "step": 34397
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.9644157290458679,
      "learning_rate": 1.1741978033643485e-05,
      "loss": 2.2621,
      "step": 34398
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.2092598676681519,
      "learning_rate": 1.1741572590581554e-05,
      "loss": 2.2252,
      "step": 34399
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.195737361907959,
      "learning_rate": 1.1741167144567186e-05,
      "loss": 2.2444,
      "step": 34400
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.9597112536430359,
      "learning_rate": 1.1740761695601064e-05,
      "loss": 2.5053,
      "step": 34401
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0105563402175903,
      "learning_rate": 1.1740356243683881e-05,
      "loss": 2.3329,
      "step": 34402
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.062301516532898,
      "learning_rate": 1.173995078881632e-05,
      "loss": 2.7686,
      "step": 34403
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1467303037643433,
      "learning_rate": 1.1739545330999071e-05,
      "loss": 2.4975,
      "step": 34404
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.092270851135254,
      "learning_rate": 1.1739139870232821e-05,
      "loss": 2.7523,
      "step": 34405
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0587997436523438,
      "learning_rate": 1.1738734406518255e-05,
      "loss": 2.1714,
      "step": 34406
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.155820369720459,
      "learning_rate": 1.1738328939856061e-05,
      "loss": 2.2816,
      "step": 34407
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.7850881814956665,
      "learning_rate": 1.1737923470246928e-05,
      "loss": 2.1733,
      "step": 34408
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1534695625305176,
      "learning_rate": 1.1737517997691544e-05,
      "loss": 2.2771,
      "step": 34409
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.112654447555542,
      "learning_rate": 1.1737112522190595e-05,
      "loss": 2.3985,
      "step": 34410
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.136705756187439,
      "learning_rate": 1.1736707043744769e-05,
      "loss": 2.4726,
      "step": 34411
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0617105960845947,
      "learning_rate": 1.1736301562354751e-05,
      "loss": 2.4961,
      "step": 34412
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0021785497665405,
      "learning_rate": 1.173589607802123e-05,
      "loss": 2.5124,
      "step": 34413
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.2026891708374023,
      "learning_rate": 1.1735490590744893e-05,
      "loss": 2.33,
      "step": 34414
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.9830374121665955,
      "learning_rate": 1.173508510052643e-05,
      "loss": 2.2154,
      "step": 34415
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.043371319770813,
      "learning_rate": 1.1734679607366525e-05,
      "loss": 2.3746,
      "step": 34416
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0470372438430786,
      "learning_rate": 1.1734274111265871e-05,
      "loss": 2.4282,
      "step": 34417
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0710289478302002,
      "learning_rate": 1.1733868612225147e-05,
      "loss": 2.4404,
      "step": 34418
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.046790361404419,
      "learning_rate": 1.1733463110245047e-05,
      "loss": 2.5307,
      "step": 34419
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1446466445922852,
      "learning_rate": 1.1733057605326255e-05,
      "loss": 2.3188,
      "step": 34420
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.033401608467102,
      "learning_rate": 1.173265209746946e-05,
      "loss": 2.508,
      "step": 34421
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0464264154434204,
      "learning_rate": 1.1732246586675352e-05,
      "loss": 2.5759,
      "step": 34422
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.2227909564971924,
      "learning_rate": 1.1731841072944611e-05,
      "loss": 2.4031,
      "step": 34423
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.056776523590088,
      "learning_rate": 1.1731435556277933e-05,
      "loss": 2.5297,
      "step": 34424
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0805367231369019,
      "learning_rate": 1.1731030036675998e-05,
      "loss": 2.2673,
      "step": 34425
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.014911413192749,
      "learning_rate": 1.1730624514139498e-05,
      "loss": 2.3284,
      "step": 34426
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1890733242034912,
      "learning_rate": 1.1730218988669122e-05,
      "loss": 2.4623,
      "step": 34427
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0400588512420654,
      "learning_rate": 1.1729813460265552e-05,
      "loss": 2.4169,
      "step": 34428
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.102537989616394,
      "learning_rate": 1.1729407928929479e-05,
      "loss": 2.3922,
      "step": 34429
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.06725013256073,
      "learning_rate": 1.1729002394661591e-05,
      "loss": 2.5198,
      "step": 34430
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.9724226593971252,
      "learning_rate": 1.1728596857462573e-05,
      "loss": 2.2717,
      "step": 34431
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1766672134399414,
      "learning_rate": 1.1728191317333112e-05,
      "loss": 2.3136,
      "step": 34432
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1477853059768677,
      "learning_rate": 1.1727785774273902e-05,
      "loss": 2.371,
      "step": 34433
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1154528856277466,
      "learning_rate": 1.172738022828562e-05,
      "loss": 2.2098,
      "step": 34434
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.139596939086914,
      "learning_rate": 1.1726974679368962e-05,
      "loss": 2.3074,
      "step": 34435
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0529835224151611,
      "learning_rate": 1.1726569127524614e-05,
      "loss": 2.2857,
      "step": 34436
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.9923197031021118,
      "learning_rate": 1.1726163572753264e-05,
      "loss": 2.368,
      "step": 34437
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1145371198654175,
      "learning_rate": 1.1725758015055594e-05,
      "loss": 2.497,
      "step": 34438
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1906933784484863,
      "learning_rate": 1.1725352454432299e-05,
      "loss": 2.5159,
      "step": 34439
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0181454420089722,
      "learning_rate": 1.172494689088406e-05,
      "loss": 2.3033,
      "step": 34440
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0686026811599731,
      "learning_rate": 1.1724541324411569e-05,
      "loss": 2.5131,
      "step": 34441
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1724416017532349,
      "learning_rate": 1.172413575501551e-05,
      "loss": 2.3589,
      "step": 34442
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.071046233177185,
      "learning_rate": 1.1723730182696578e-05,
      "loss": 2.3962,
      "step": 34443
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.026191234588623,
      "learning_rate": 1.1723324607455452e-05,
      "loss": 2.3281,
      "step": 34444
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0236490964889526,
      "learning_rate": 1.1722919029292821e-05,
      "loss": 2.1729,
      "step": 34445
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0535005331039429,
      "learning_rate": 1.1722513448209377e-05,
      "loss": 2.3294,
      "step": 34446
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.9978026747703552,
      "learning_rate": 1.1722107864205803e-05,
      "loss": 2.3113,
      "step": 34447
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0818836688995361,
      "learning_rate": 1.172170227728279e-05,
      "loss": 2.4437,
      "step": 34448
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.9965366721153259,
      "learning_rate": 1.1721296687441022e-05,
      "loss": 2.2521,
      "step": 34449
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0629451274871826,
      "learning_rate": 1.172089109468119e-05,
      "loss": 2.4605,
      "step": 34450
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1221705675125122,
      "learning_rate": 1.172048549900398e-05,
      "loss": 2.2438,
      "step": 34451
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.2041786909103394,
      "learning_rate": 1.172007990041008e-05,
      "loss": 2.4761,
      "step": 34452
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.9520442485809326,
      "learning_rate": 1.171967429890018e-05,
      "loss": 2.3154,
      "step": 34453
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.736380696296692,
      "learning_rate": 1.1719268694474962e-05,
      "loss": 2.5223,
      "step": 34454
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.2232950925827026,
      "learning_rate": 1.1718863087135115e-05,
      "loss": 2.4397,
      "step": 34455
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0420386791229248,
      "learning_rate": 1.171845747688133e-05,
      "loss": 2.4449,
      "step": 34456
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.106127142906189,
      "learning_rate": 1.1718051863714295e-05,
      "loss": 2.5821,
      "step": 34457
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0559816360473633,
      "learning_rate": 1.171764624763469e-05,
      "loss": 2.3617,
      "step": 34458
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0649549961090088,
      "learning_rate": 1.1717240628643215e-05,
      "loss": 2.4715,
      "step": 34459
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0437390804290771,
      "learning_rate": 1.1716835006740546e-05,
      "loss": 2.5845,
      "step": 34460
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0849186182022095,
      "learning_rate": 1.1716429381927374e-05,
      "loss": 2.5158,
      "step": 34461
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.108735203742981,
      "learning_rate": 1.1716023754204391e-05,
      "loss": 2.2362,
      "step": 34462
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.9995266199111938,
      "learning_rate": 1.1715618123572282e-05,
      "loss": 2.4198,
      "step": 34463
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0968419313430786,
      "learning_rate": 1.1715212490031735e-05,
      "loss": 2.1771,
      "step": 34464
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0605828762054443,
      "learning_rate": 1.1714806853583434e-05,
      "loss": 2.4653,
      "step": 34465
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1126538515090942,
      "learning_rate": 1.171440121422807e-05,
      "loss": 2.3703,
      "step": 34466
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0495458841323853,
      "learning_rate": 1.1713995571966333e-05,
      "loss": 2.4146,
      "step": 34467
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.04495108127594,
      "learning_rate": 1.1713589926798905e-05,
      "loss": 2.5642,
      "step": 34468
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1187196969985962,
      "learning_rate": 1.1713184278726479e-05,
      "loss": 2.4653,
      "step": 34469
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1595046520233154,
      "learning_rate": 1.1712778627749741e-05,
      "loss": 2.4748,
      "step": 34470
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.9801645278930664,
      "learning_rate": 1.1712372973869377e-05,
      "loss": 2.3535,
      "step": 34471
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.9935697317123413,
      "learning_rate": 1.1711967317086074e-05,
      "loss": 2.1667,
      "step": 34472
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.08502197265625,
      "learning_rate": 1.1711561657400524e-05,
      "loss": 2.2802,
      "step": 34473
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0846493244171143,
      "learning_rate": 1.1711155994813411e-05,
      "loss": 2.2153,
      "step": 34474
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1208345890045166,
      "learning_rate": 1.1710750329325422e-05,
      "loss": 2.3194,
      "step": 34475
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.4170966148376465,
      "learning_rate": 1.171034466093725e-05,
      "loss": 2.4607,
      "step": 34476
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1586521863937378,
      "learning_rate": 1.1709938989649577e-05,
      "loss": 2.6057,
      "step": 34477
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0884531736373901,
      "learning_rate": 1.1709533315463093e-05,
      "loss": 2.4619,
      "step": 34478
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1530159711837769,
      "learning_rate": 1.1709127638378486e-05,
      "loss": 2.3912,
      "step": 34479
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0800386667251587,
      "learning_rate": 1.1708721958396442e-05,
      "loss": 2.281,
      "step": 34480
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1092824935913086,
      "learning_rate": 1.1708316275517656e-05,
      "loss": 2.491,
      "step": 34481
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0917104482650757,
      "learning_rate": 1.1707910589742804e-05,
      "loss": 2.2555,
      "step": 34482
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.9870944023132324,
      "learning_rate": 1.1707504901072582e-05,
      "loss": 2.2053,
      "step": 34483
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1111769676208496,
      "learning_rate": 1.1707099209507678e-05,
      "loss": 2.4253,
      "step": 34484
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.9505630135536194,
      "learning_rate": 1.1706693515048773e-05,
      "loss": 2.6138,
      "step": 34485
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.9762005805969238,
      "learning_rate": 1.170628781769656e-05,
      "loss": 2.3271,
      "step": 34486
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0381803512573242,
      "learning_rate": 1.1705882117451726e-05,
      "loss": 2.5397,
      "step": 34487
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0486801862716675,
      "learning_rate": 1.1705476414314956e-05,
      "loss": 2.2397,
      "step": 34488
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0274343490600586,
      "learning_rate": 1.1705070708286945e-05,
      "loss": 2.4703,
      "step": 34489
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1055612564086914,
      "learning_rate": 1.1704664999368373e-05,
      "loss": 2.474,
      "step": 34490
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0964518785476685,
      "learning_rate": 1.1704259287559932e-05,
      "loss": 2.2765,
      "step": 34491
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0354816913604736,
      "learning_rate": 1.1703853572862307e-05,
      "loss": 2.2986,
      "step": 34492
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.9902992248535156,
      "learning_rate": 1.170344785527619e-05,
      "loss": 2.4732,
      "step": 34493
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.9723275303840637,
      "learning_rate": 1.1703042134802262e-05,
      "loss": 2.3493,
      "step": 34494
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.2461512088775635,
      "learning_rate": 1.170263641144122e-05,
      "loss": 2.198,
      "step": 34495
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1164487600326538,
      "learning_rate": 1.1702230685193745e-05,
      "loss": 2.4837,
      "step": 34496
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.9854599237442017,
      "learning_rate": 1.1701824956060525e-05,
      "loss": 2.2904,
      "step": 34497
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1604169607162476,
      "learning_rate": 1.1701419224042252e-05,
      "loss": 2.4695,
      "step": 34498
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.992052435874939,
      "learning_rate": 1.170101348913961e-05,
      "loss": 2.3485,
      "step": 34499
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0461671352386475,
      "learning_rate": 1.1700607751353288e-05,
      "loss": 2.3512,
      "step": 34500
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.143320918083191,
      "learning_rate": 1.1700202010683975e-05,
      "loss": 2.2315,
      "step": 34501
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0374363660812378,
      "learning_rate": 1.1699796267132357e-05,
      "loss": 2.1925,
      "step": 34502
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0569007396697998,
      "learning_rate": 1.1699390520699123e-05,
      "loss": 2.0148,
      "step": 34503
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0336750745773315,
      "learning_rate": 1.169898477138496e-05,
      "loss": 2.3319,
      "step": 34504
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0125446319580078,
      "learning_rate": 1.1698579019190557e-05,
      "loss": 2.4597,
      "step": 34505
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0292212963104248,
      "learning_rate": 1.1698173264116601e-05,
      "loss": 2.2378,
      "step": 34506
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0056649446487427,
      "learning_rate": 1.169776750616378e-05,
      "loss": 2.4114,
      "step": 34507
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1001880168914795,
      "learning_rate": 1.169736174533278e-05,
      "loss": 2.4365,
      "step": 34508
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.9867892265319824,
      "learning_rate": 1.1696955981624296e-05,
      "loss": 2.329,
      "step": 34509
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.2963273525238037,
      "learning_rate": 1.1696550215039006e-05,
      "loss": 2.3351,
      "step": 34510
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0673683881759644,
      "learning_rate": 1.1696144445577604e-05,
      "loss": 2.3156,
      "step": 34511
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0365791320800781,
      "learning_rate": 1.169573867324078e-05,
      "loss": 2.2296,
      "step": 34512
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1227186918258667,
      "learning_rate": 1.1695332898029215e-05,
      "loss": 2.4126,
      "step": 34513
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.052066683769226,
      "learning_rate": 1.16949271199436e-05,
      "loss": 2.43,
      "step": 34514
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.031982183456421,
      "learning_rate": 1.1694521338984624e-05,
      "loss": 2.393,
      "step": 34515
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0739071369171143,
      "learning_rate": 1.1694115555152974e-05,
      "loss": 2.4776,
      "step": 34516
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.132703423500061,
      "learning_rate": 1.1693709768449338e-05,
      "loss": 2.3176,
      "step": 34517
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.2127975225448608,
      "learning_rate": 1.1693303978874403e-05,
      "loss": 2.4491,
      "step": 34518
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.031281590461731,
      "learning_rate": 1.1692898186428859e-05,
      "loss": 2.5023,
      "step": 34519
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.053604006767273,
      "learning_rate": 1.1692492391113391e-05,
      "loss": 2.3682,
      "step": 34520
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0095628499984741,
      "learning_rate": 1.169208659292869e-05,
      "loss": 2.3646,
      "step": 34521
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1170024871826172,
      "learning_rate": 1.1691680791875443e-05,
      "loss": 2.4081,
      "step": 34522
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.010831356048584,
      "learning_rate": 1.1691274987954337e-05,
      "loss": 2.5651,
      "step": 34523
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.9967325925827026,
      "learning_rate": 1.1690869181166062e-05,
      "loss": 2.3578,
      "step": 34524
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1077027320861816,
      "learning_rate": 1.1690463371511303e-05,
      "loss": 2.4067,
      "step": 34525
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.2286100387573242,
      "learning_rate": 1.1690057558990748e-05,
      "loss": 2.4295,
      "step": 34526
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0503690242767334,
      "learning_rate": 1.1689651743605088e-05,
      "loss": 2.3529,
      "step": 34527
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.054314136505127,
      "learning_rate": 1.168924592535501e-05,
      "loss": 2.283,
      "step": 34528
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0668939352035522,
      "learning_rate": 1.1688840104241204e-05,
      "loss": 2.2393,
      "step": 34529
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0552860498428345,
      "learning_rate": 1.1688434280264352e-05,
      "loss": 2.3353,
      "step": 34530
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.032582402229309,
      "learning_rate": 1.1688028453425145e-05,
      "loss": 2.3432,
      "step": 34531
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1326377391815186,
      "learning_rate": 1.1687622623724271e-05,
      "loss": 2.344,
      "step": 34532
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0361212491989136,
      "learning_rate": 1.168721679116242e-05,
      "loss": 2.5334,
      "step": 34533
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.9513944387435913,
      "learning_rate": 1.1686810955740275e-05,
      "loss": 2.4095,
      "step": 34534
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0489894151687622,
      "learning_rate": 1.1686405117458532e-05,
      "loss": 2.4022,
      "step": 34535
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1600373983383179,
      "learning_rate": 1.1685999276317872e-05,
      "loss": 2.246,
      "step": 34536
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.089035987854004,
      "learning_rate": 1.1685593432318987e-05,
      "loss": 2.1368,
      "step": 34537
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1860038042068481,
      "learning_rate": 1.168518758546256e-05,
      "loss": 2.3285,
      "step": 34538
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0632786750793457,
      "learning_rate": 1.1684781735749284e-05,
      "loss": 2.2441,
      "step": 34539
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.019286036491394,
      "learning_rate": 1.1684375883179849e-05,
      "loss": 2.5254,
      "step": 34540
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0447887182235718,
      "learning_rate": 1.1683970027754933e-05,
      "loss": 2.3587,
      "step": 34541
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.9989596009254456,
      "learning_rate": 1.1683564169475234e-05,
      "loss": 2.2056,
      "step": 34542
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1809585094451904,
      "learning_rate": 1.1683158308341438e-05,
      "loss": 2.4452,
      "step": 34543
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0183861255645752,
      "learning_rate": 1.168275244435423e-05,
      "loss": 2.4381,
      "step": 34544
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.016819715499878,
      "learning_rate": 1.1682346577514298e-05,
      "loss": 2.5525,
      "step": 34545
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.083382487297058,
      "learning_rate": 1.1681940707822332e-05,
      "loss": 2.2426,
      "step": 34546
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0381886959075928,
      "learning_rate": 1.168153483527902e-05,
      "loss": 2.4229,
      "step": 34547
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.2325844764709473,
      "learning_rate": 1.1681128959885052e-05,
      "loss": 2.1441,
      "step": 34548
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0150917768478394,
      "learning_rate": 1.1680723081641112e-05,
      "loss": 2.4957,
      "step": 34549
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.133945345878601,
      "learning_rate": 1.1680317200547888e-05,
      "loss": 2.2648,
      "step": 34550
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1805627346038818,
      "learning_rate": 1.1679911316606072e-05,
      "loss": 2.3966,
      "step": 34551
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.159612774848938,
      "learning_rate": 1.167950542981635e-05,
      "loss": 2.5813,
      "step": 34552
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1221493482589722,
      "learning_rate": 1.1679099540179411e-05,
      "loss": 2.3494,
      "step": 34553
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.3205419778823853,
      "learning_rate": 1.167869364769594e-05,
      "loss": 2.5802,
      "step": 34554
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.8054637908935547,
      "learning_rate": 1.167828775236663e-05,
      "loss": 2.5383,
      "step": 34555
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1350924968719482,
      "learning_rate": 1.1677881854192163e-05,
      "loss": 2.2411,
      "step": 34556
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.5833220481872559,
      "learning_rate": 1.1677475953173234e-05,
      "loss": 2.4313,
      "step": 34557
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1193450689315796,
      "learning_rate": 1.1677070049310526e-05,
      "loss": 2.3999,
      "step": 34558
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.039061427116394,
      "learning_rate": 1.1676664142604728e-05,
      "loss": 2.7107,
      "step": 34559
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0346124172210693,
      "learning_rate": 1.167625823305653e-05,
      "loss": 2.336,
      "step": 34560
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.2327415943145752,
      "learning_rate": 1.167585232066662e-05,
      "loss": 2.4157,
      "step": 34561
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.2060105800628662,
      "learning_rate": 1.1675446405435684e-05,
      "loss": 2.5967,
      "step": 34562
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.105309247970581,
      "learning_rate": 1.1675040487364411e-05,
      "loss": 2.2186,
      "step": 34563
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1407232284545898,
      "learning_rate": 1.1674634566453488e-05,
      "loss": 2.3713,
      "step": 34564
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.219811201095581,
      "learning_rate": 1.1674228642703607e-05,
      "loss": 2.3917,
      "step": 34565
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.2056046724319458,
      "learning_rate": 1.1673822716115452e-05,
      "loss": 2.301,
      "step": 34566
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0059151649475098,
      "learning_rate": 1.1673416786689715e-05,
      "loss": 2.3235,
      "step": 34567
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0541534423828125,
      "learning_rate": 1.1673010854427083e-05,
      "loss": 2.4764,
      "step": 34568
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0804777145385742,
      "learning_rate": 1.1672604919328241e-05,
      "loss": 2.5685,
      "step": 34569
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1033531427383423,
      "learning_rate": 1.1672198981393878e-05,
      "loss": 2.2868,
      "step": 34570
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.9466454386711121,
      "learning_rate": 1.1671793040624688e-05,
      "loss": 2.5355,
      "step": 34571
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0549688339233398,
      "learning_rate": 1.1671387097021352e-05,
      "loss": 2.2937,
      "step": 34572
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.9924399256706238,
      "learning_rate": 1.1670981150584562e-05,
      "loss": 2.2803,
      "step": 34573
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.091973900794983,
      "learning_rate": 1.1670575201315001e-05,
      "loss": 2.4714,
      "step": 34574
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0729539394378662,
      "learning_rate": 1.1670169249213367e-05,
      "loss": 2.2167,
      "step": 34575
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.09395170211792,
      "learning_rate": 1.166976329428034e-05,
      "loss": 2.3516,
      "step": 34576
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0978368520736694,
      "learning_rate": 1.1669357336516611e-05,
      "loss": 2.3406,
      "step": 34577
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0170214176177979,
      "learning_rate": 1.1668951375922865e-05,
      "loss": 2.6261,
      "step": 34578
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.261146068572998,
      "learning_rate": 1.1668545412499797e-05,
      "loss": 2.3671,
      "step": 34579
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0492624044418335,
      "learning_rate": 1.166813944624809e-05,
      "loss": 2.3732,
      "step": 34580
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.079614520072937,
      "learning_rate": 1.1667733477168432e-05,
      "loss": 2.2623,
      "step": 34581
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1113508939743042,
      "learning_rate": 1.1667327505261516e-05,
      "loss": 2.453,
      "step": 34582
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0814533233642578,
      "learning_rate": 1.1666921530528025e-05,
      "loss": 2.2785,
      "step": 34583
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0432225465774536,
      "learning_rate": 1.166651555296865e-05,
      "loss": 2.2552,
      "step": 34584
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.9968113899230957,
      "learning_rate": 1.1666109572584079e-05,
      "loss": 2.3549,
      "step": 34585
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1087273359298706,
      "learning_rate": 1.1665703589374997e-05,
      "loss": 2.4464,
      "step": 34586
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.09748375415802,
      "learning_rate": 1.1665297603342099e-05,
      "loss": 2.3825,
      "step": 34587
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.9775063395500183,
      "learning_rate": 1.1664891614486067e-05,
      "loss": 2.496,
      "step": 34588
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0219471454620361,
      "learning_rate": 1.1664485622807594e-05,
      "loss": 2.3238,
      "step": 34589
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.9763819575309753,
      "learning_rate": 1.1664079628307361e-05,
      "loss": 2.3325,
      "step": 34590
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1364346742630005,
      "learning_rate": 1.1663673630986063e-05,
      "loss": 2.4857,
      "step": 34591
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0529773235321045,
      "learning_rate": 1.1663267630844388e-05,
      "loss": 2.379,
      "step": 34592
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0007493495941162,
      "learning_rate": 1.166286162788302e-05,
      "loss": 2.2065,
      "step": 34593
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.2434475421905518,
      "learning_rate": 1.1662455622102651e-05,
      "loss": 2.349,
      "step": 34594
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.14596426486969,
      "learning_rate": 1.1662049613503972e-05,
      "loss": 2.3932,
      "step": 34595
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0630179643630981,
      "learning_rate": 1.1661643602087663e-05,
      "loss": 2.6504,
      "step": 34596
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1091432571411133,
      "learning_rate": 1.1661237587854419e-05,
      "loss": 2.2954,
      "step": 34597
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0367281436920166,
      "learning_rate": 1.1660831570804925e-05,
      "loss": 2.1659,
      "step": 34598
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0658392906188965,
      "learning_rate": 1.1660425550939871e-05,
      "loss": 2.3538,
      "step": 34599
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0515539646148682,
      "learning_rate": 1.1660019528259945e-05,
      "loss": 2.3554,
      "step": 34600
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.9711765646934509,
      "learning_rate": 1.1659613502765835e-05,
      "loss": 2.1813,
      "step": 34601
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.9904178977012634,
      "learning_rate": 1.165920747445823e-05,
      "loss": 2.4217,
      "step": 34602
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.9549176692962646,
      "learning_rate": 1.1658801443337816e-05,
      "loss": 2.4108,
      "step": 34603
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.9477810263633728,
      "learning_rate": 1.165839540940528e-05,
      "loss": 2.3514,
      "step": 34604
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.051857590675354,
      "learning_rate": 1.1657989372661318e-05,
      "loss": 2.4345,
      "step": 34605
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.992929995059967,
      "learning_rate": 1.1657583333106613e-05,
      "loss": 2.2201,
      "step": 34606
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0997562408447266,
      "learning_rate": 1.1657177290741853e-05,
      "loss": 2.3378,
      "step": 34607
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.968944787979126,
      "learning_rate": 1.165677124556773e-05,
      "loss": 2.3572,
      "step": 34608
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0490065813064575,
      "learning_rate": 1.1656365197584926e-05,
      "loss": 2.2462,
      "step": 34609
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0231975317001343,
      "learning_rate": 1.1655959146794136e-05,
      "loss": 2.4596,
      "step": 34610
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1331591606140137,
      "learning_rate": 1.1655553093196044e-05,
      "loss": 2.4336,
      "step": 34611
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.9868212342262268,
      "learning_rate": 1.1655147036791338e-05,
      "loss": 2.4195,
      "step": 34612
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0350548028945923,
      "learning_rate": 1.1654740977580713e-05,
      "loss": 2.617,
      "step": 34613
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0603187084197998,
      "learning_rate": 1.1654334915564849e-05,
      "loss": 2.3103,
      "step": 34614
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.2004121541976929,
      "learning_rate": 1.1653928850744441e-05,
      "loss": 2.1258,
      "step": 34615
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.104824185371399,
      "learning_rate": 1.1653522783120172e-05,
      "loss": 2.2468,
      "step": 34616
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1469268798828125,
      "learning_rate": 1.1653116712692733e-05,
      "loss": 2.3928,
      "step": 34617
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.139214277267456,
      "learning_rate": 1.1652710639462812e-05,
      "loss": 2.2034,
      "step": 34618
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0442088842391968,
      "learning_rate": 1.1652304563431098e-05,
      "loss": 2.3542,
      "step": 34619
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0694769620895386,
      "learning_rate": 1.1651898484598278e-05,
      "loss": 2.3031,
      "step": 34620
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.214887261390686,
      "learning_rate": 1.1651492402965045e-05,
      "loss": 2.3678,
      "step": 34621
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.9938302040100098,
      "learning_rate": 1.165108631853208e-05,
      "loss": 2.2648,
      "step": 34622
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.9640126824378967,
      "learning_rate": 1.1650680231300078e-05,
      "loss": 2.4608,
      "step": 34623
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0466046333312988,
      "learning_rate": 1.1650274141269722e-05,
      "loss": 2.3723,
      "step": 34624
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1181566715240479,
      "learning_rate": 1.1649868048441703e-05,
      "loss": 2.3725,
      "step": 34625
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.9982880353927612,
      "learning_rate": 1.1649461952816713e-05,
      "loss": 2.3389,
      "step": 34626
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0074472427368164,
      "learning_rate": 1.1649055854395434e-05,
      "loss": 2.5,
      "step": 34627
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0044037103652954,
      "learning_rate": 1.164864975317856e-05,
      "loss": 2.4691,
      "step": 34628
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0404973030090332,
      "learning_rate": 1.1648243649166775e-05,
      "loss": 2.6578,
      "step": 34629
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.251749873161316,
      "learning_rate": 1.1647837542360771e-05,
      "loss": 2.3241,
      "step": 34630
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1872706413269043,
      "learning_rate": 1.1647431432761233e-05,
      "loss": 2.5746,
      "step": 34631
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0459126234054565,
      "learning_rate": 1.1647025320368852e-05,
      "loss": 2.4674,
      "step": 34632
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.3597214221954346,
      "learning_rate": 1.1646619205184316e-05,
      "loss": 2.5382,
      "step": 34633
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0503355264663696,
      "learning_rate": 1.1646213087208313e-05,
      "loss": 2.4032,
      "step": 34634
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.073038935661316,
      "learning_rate": 1.164580696644153e-05,
      "loss": 2.4199,
      "step": 34635
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0800566673278809,
      "learning_rate": 1.164540084288466e-05,
      "loss": 2.1864,
      "step": 34636
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1169675588607788,
      "learning_rate": 1.1644994716538387e-05,
      "loss": 2.2029,
      "step": 34637
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0921729803085327,
      "learning_rate": 1.16445885874034e-05,
      "loss": 2.3943,
      "step": 34638
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1486129760742188,
      "learning_rate": 1.164418245548039e-05,
      "loss": 2.4198,
      "step": 34639
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0941826105117798,
      "learning_rate": 1.1643776320770047e-05,
      "loss": 2.5349,
      "step": 34640
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1748520135879517,
      "learning_rate": 1.1643370183273055e-05,
      "loss": 2.2957,
      "step": 34641
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.02207350730896,
      "learning_rate": 1.1642964042990102e-05,
      "loss": 2.37,
      "step": 34642
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0048226118087769,
      "learning_rate": 1.1642557899921881e-05,
      "loss": 2.3988,
      "step": 34643
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.4000884294509888,
      "learning_rate": 1.1642151754069075e-05,
      "loss": 2.5237,
      "step": 34644
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0308769941329956,
      "learning_rate": 1.1641745605432378e-05,
      "loss": 2.3156,
      "step": 34645
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0442806482315063,
      "learning_rate": 1.1641339454012477e-05,
      "loss": 2.1978,
      "step": 34646
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.607279658317566,
      "learning_rate": 1.164093329981006e-05,
      "loss": 2.1837,
      "step": 34647
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0749777555465698,
      "learning_rate": 1.1640527142825815e-05,
      "loss": 2.3341,
      "step": 34648
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.111770749092102,
      "learning_rate": 1.164012098306043e-05,
      "loss": 2.5835,
      "step": 34649
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0616391897201538,
      "learning_rate": 1.1639714820514593e-05,
      "loss": 2.3499,
      "step": 34650
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0863841772079468,
      "learning_rate": 1.1639308655188996e-05,
      "loss": 2.3296,
      "step": 34651
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.9610170722007751,
      "learning_rate": 1.1638902487084326e-05,
      "loss": 2.253,
      "step": 34652
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1406484842300415,
      "learning_rate": 1.1638496316201271e-05,
      "loss": 2.1059,
      "step": 34653
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.9504307508468628,
      "learning_rate": 1.1638090142540521e-05,
      "loss": 2.4699,
      "step": 34654
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0819425582885742,
      "learning_rate": 1.1637683966102761e-05,
      "loss": 2.2955,
      "step": 34655
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.048776388168335,
      "learning_rate": 1.1637277786888683e-05,
      "loss": 2.5725,
      "step": 34656
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.204225778579712,
      "learning_rate": 1.1636871604898973e-05,
      "loss": 2.4106,
      "step": 34657
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0389025211334229,
      "learning_rate": 1.1636465420134326e-05,
      "loss": 2.2114,
      "step": 34658
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0555927753448486,
      "learning_rate": 1.1636059232595423e-05,
      "loss": 2.1584,
      "step": 34659
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.9987322688102722,
      "learning_rate": 1.1635653042282952e-05,
      "loss": 2.4324,
      "step": 34660
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1028631925582886,
      "learning_rate": 1.1635246849197609e-05,
      "loss": 2.4014,
      "step": 34661
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0713967084884644,
      "learning_rate": 1.1634840653340075e-05,
      "loss": 2.4763,
      "step": 34662
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0767940282821655,
      "learning_rate": 1.1634434454711045e-05,
      "loss": 2.3582,
      "step": 34663
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.2258782386779785,
      "learning_rate": 1.1634028253311202e-05,
      "loss": 2.3656,
      "step": 34664
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1501847505569458,
      "learning_rate": 1.1633622049141238e-05,
      "loss": 2.301,
      "step": 34665
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0827597379684448,
      "learning_rate": 1.163321584220184e-05,
      "loss": 2.4885,
      "step": 34666
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.103742241859436,
      "learning_rate": 1.1632809632493703e-05,
      "loss": 2.2741,
      "step": 34667
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.157784104347229,
      "learning_rate": 1.1632403420017506e-05,
      "loss": 2.2903,
      "step": 34668
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.085176706314087,
      "learning_rate": 1.1631997204773942e-05,
      "loss": 2.2434,
      "step": 34669
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1417292356491089,
      "learning_rate": 1.1631590986763698e-05,
      "loss": 2.3193,
      "step": 34670
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0554323196411133,
      "learning_rate": 1.1631184765987467e-05,
      "loss": 2.3513,
      "step": 34671
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.136889934539795,
      "learning_rate": 1.1630778542445932e-05,
      "loss": 2.2163,
      "step": 34672
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.2176036834716797,
      "learning_rate": 1.1630372316139787e-05,
      "loss": 2.2294,
      "step": 34673
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.020632028579712,
      "learning_rate": 1.1629966087069718e-05,
      "loss": 2.1607,
      "step": 34674
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.9313035607337952,
      "learning_rate": 1.1629559855236411e-05,
      "loss": 2.2682,
      "step": 34675
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0391212701797485,
      "learning_rate": 1.1629153620640561e-05,
      "loss": 2.3024,
      "step": 34676
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.2133433818817139,
      "learning_rate": 1.1628747383282852e-05,
      "loss": 2.2785,
      "step": 34677
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.3480576276779175,
      "learning_rate": 1.1628341143163973e-05,
      "loss": 2.2121,
      "step": 34678
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0335054397583008,
      "learning_rate": 1.1627934900284612e-05,
      "loss": 2.507,
      "step": 34679
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1944547891616821,
      "learning_rate": 1.1627528654645463e-05,
      "loss": 2.4405,
      "step": 34680
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1358497142791748,
      "learning_rate": 1.162712240624721e-05,
      "loss": 2.444,
      "step": 34681
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.068307876586914,
      "learning_rate": 1.162671615509054e-05,
      "loss": 2.4092,
      "step": 34682
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1271781921386719,
      "learning_rate": 1.1626309901176147e-05,
      "loss": 2.487,
      "step": 34683
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1281169652938843,
      "learning_rate": 1.1625903644504715e-05,
      "loss": 2.2313,
      "step": 34684
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0312047004699707,
      "learning_rate": 1.1625497385076935e-05,
      "loss": 2.3745,
      "step": 34685
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0710291862487793,
      "learning_rate": 1.16250911228935e-05,
      "loss": 2.3259,
      "step": 34686
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.9973928928375244,
      "learning_rate": 1.1624684857955088e-05,
      "loss": 2.4453,
      "step": 34687
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.4690864086151123,
      "learning_rate": 1.1624278590262397e-05,
      "loss": 2.295,
      "step": 34688
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0680127143859863,
      "learning_rate": 1.1623872319816111e-05,
      "loss": 2.5734,
      "step": 34689
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.9916588068008423,
      "learning_rate": 1.1623466046616923e-05,
      "loss": 2.6944,
      "step": 34690
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0602021217346191,
      "learning_rate": 1.1623059770665517e-05,
      "loss": 2.252,
      "step": 34691
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1501418352127075,
      "learning_rate": 1.1622653491962583e-05,
      "loss": 2.5215,
      "step": 34692
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0736654996871948,
      "learning_rate": 1.1622247210508813e-05,
      "loss": 2.599,
      "step": 34693
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0245012044906616,
      "learning_rate": 1.1621840926304893e-05,
      "loss": 2.4173,
      "step": 34694
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0170867443084717,
      "learning_rate": 1.1621434639351512e-05,
      "loss": 2.364,
      "step": 34695
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0376925468444824,
      "learning_rate": 1.1621028349649357e-05,
      "loss": 2.2797,
      "step": 34696
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1204729080200195,
      "learning_rate": 1.162062205719912e-05,
      "loss": 2.2967,
      "step": 34697
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.013545036315918,
      "learning_rate": 1.1620215762001487e-05,
      "loss": 2.3464,
      "step": 34698
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0293340682983398,
      "learning_rate": 1.161980946405715e-05,
      "loss": 2.0474,
      "step": 34699
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.9713170528411865,
      "learning_rate": 1.1619403163366798e-05,
      "loss": 2.1219,
      "step": 34700
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.9689674377441406,
      "learning_rate": 1.1618996859931115e-05,
      "loss": 2.3645,
      "step": 34701
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1756103038787842,
      "learning_rate": 1.1618590553750793e-05,
      "loss": 2.4637,
      "step": 34702
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1678804159164429,
      "learning_rate": 1.161818424482652e-05,
      "loss": 2.5535,
      "step": 34703
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.028036117553711,
      "learning_rate": 1.1617777933158984e-05,
      "loss": 2.3989,
      "step": 34704
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.2307149171829224,
      "learning_rate": 1.1617371618748878e-05,
      "loss": 2.2604,
      "step": 34705
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0551159381866455,
      "learning_rate": 1.1616965301596886e-05,
      "loss": 2.5692,
      "step": 34706
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1406720876693726,
      "learning_rate": 1.16165589817037e-05,
      "loss": 2.4272,
      "step": 34707
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.11387300491333,
      "learning_rate": 1.1616152659070008e-05,
      "loss": 2.2211,
      "step": 34708
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.2439467906951904,
      "learning_rate": 1.1615746333696496e-05,
      "loss": 2.5071,
      "step": 34709
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.153265118598938,
      "learning_rate": 1.1615340005583854e-05,
      "loss": 2.3741,
      "step": 34710
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0002052783966064,
      "learning_rate": 1.1614933674732776e-05,
      "loss": 2.5821,
      "step": 34711
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1492304801940918,
      "learning_rate": 1.1614527341143943e-05,
      "loss": 2.3207,
      "step": 34712
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.082872986793518,
      "learning_rate": 1.1614121004818052e-05,
      "loss": 2.2267,
      "step": 34713
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1147722005844116,
      "learning_rate": 1.1613714665755786e-05,
      "loss": 2.5482,
      "step": 34714
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.2457833290100098,
      "learning_rate": 1.1613308323957833e-05,
      "loss": 2.2576,
      "step": 34715
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.9280401468276978,
      "learning_rate": 1.1612901979424886e-05,
      "loss": 2.4325,
      "step": 34716
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0084363222122192,
      "learning_rate": 1.1612495632157632e-05,
      "loss": 2.4264,
      "step": 34717
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.2656056880950928,
      "learning_rate": 1.161208928215676e-05,
      "loss": 2.3899,
      "step": 34718
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0014082193374634,
      "learning_rate": 1.1611682929422958e-05,
      "loss": 2.5601,
      "step": 34719
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.2206493616104126,
      "learning_rate": 1.1611276573956917e-05,
      "loss": 2.3183,
      "step": 34720
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.216450810432434,
      "learning_rate": 1.1610870215759325e-05,
      "loss": 2.4959,
      "step": 34721
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0199952125549316,
      "learning_rate": 1.1610463854830869e-05,
      "loss": 2.4911,
      "step": 34722
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.131671667098999,
      "learning_rate": 1.161005749117224e-05,
      "loss": 2.1475,
      "step": 34723
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.256235122680664,
      "learning_rate": 1.1609651124784126e-05,
      "loss": 2.4632,
      "step": 34724
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0113179683685303,
      "learning_rate": 1.1609244755667214e-05,
      "loss": 2.4381,
      "step": 34725
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.265655279159546,
      "learning_rate": 1.16088383838222e-05,
      "loss": 2.5419,
      "step": 34726
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0803745985031128,
      "learning_rate": 1.1608432009249766e-05,
      "loss": 2.3098,
      "step": 34727
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0197196006774902,
      "learning_rate": 1.16080256319506e-05,
      "loss": 2.3276,
      "step": 34728
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0082640647888184,
      "learning_rate": 1.1607619251925397e-05,
      "loss": 2.3396,
      "step": 34729
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0467561483383179,
      "learning_rate": 1.160721286917484e-05,
      "loss": 2.3615,
      "step": 34730
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1661427021026611,
      "learning_rate": 1.1606806483699622e-05,
      "loss": 2.7035,
      "step": 34731
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.2401777505874634,
      "learning_rate": 1.1606400095500432e-05,
      "loss": 2.4103,
      "step": 34732
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0535556077957153,
      "learning_rate": 1.1605993704577958e-05,
      "loss": 2.4396,
      "step": 34733
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0151467323303223,
      "learning_rate": 1.1605587310932887e-05,
      "loss": 2.4501,
      "step": 34734
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1382604837417603,
      "learning_rate": 1.1605180914565907e-05,
      "loss": 2.5033,
      "step": 34735
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.9389094710350037,
      "learning_rate": 1.1604774515477712e-05,
      "loss": 2.341,
      "step": 34736
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1119886636734009,
      "learning_rate": 1.1604368113668988e-05,
      "loss": 2.3663,
      "step": 34737
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1786144971847534,
      "learning_rate": 1.1603961709140426e-05,
      "loss": 2.3962,
      "step": 34738
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1591598987579346,
      "learning_rate": 1.160355530189271e-05,
      "loss": 2.2066,
      "step": 34739
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0766407251358032,
      "learning_rate": 1.1603148891926538e-05,
      "loss": 2.3078,
      "step": 34740
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.12039053440094,
      "learning_rate": 1.1602742479242586e-05,
      "loss": 2.3176,
      "step": 34741
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.2115478515625,
      "learning_rate": 1.1602336063841553e-05,
      "loss": 2.3647,
      "step": 34742
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1158578395843506,
      "learning_rate": 1.1601929645724126e-05,
      "loss": 2.4646,
      "step": 34743
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.083546757698059,
      "learning_rate": 1.1601523224890992e-05,
      "loss": 2.3719,
      "step": 34744
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0745149850845337,
      "learning_rate": 1.1601116801342845e-05,
      "loss": 2.2689,
      "step": 34745
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0983566045761108,
      "learning_rate": 1.1600710375080366e-05,
      "loss": 2.3642,
      "step": 34746
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.145769476890564,
      "learning_rate": 1.1600303946104248e-05,
      "loss": 2.4627,
      "step": 34747
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0089539289474487,
      "learning_rate": 1.1599897514415183e-05,
      "loss": 2.3324,
      "step": 34748
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1092230081558228,
      "learning_rate": 1.1599491080013856e-05,
      "loss": 2.2886,
      "step": 34749
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.153060793876648,
      "learning_rate": 1.1599084642900958e-05,
      "loss": 2.4376,
      "step": 34750
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.9546186923980713,
      "learning_rate": 1.1598678203077173e-05,
      "loss": 2.4496,
      "step": 34751
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1531753540039062,
      "learning_rate": 1.1598271760543198e-05,
      "loss": 2.6043,
      "step": 34752
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.068945050239563,
      "learning_rate": 1.159786531529972e-05,
      "loss": 2.4243,
      "step": 34753
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1050134897232056,
      "learning_rate": 1.1597458867347422e-05,
      "loss": 2.3662,
      "step": 34754
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.055538296699524,
      "learning_rate": 1.1597052416687001e-05,
      "loss": 2.3283,
      "step": 34755
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0470478534698486,
      "learning_rate": 1.159664596331914e-05,
      "loss": 2.4269,
      "step": 34756
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0306811332702637,
      "learning_rate": 1.1596239507244531e-05,
      "loss": 2.6089,
      "step": 34757
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0564285516738892,
      "learning_rate": 1.1595833048463861e-05,
      "loss": 2.4854,
      "step": 34758
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.993130087852478,
      "learning_rate": 1.1595426586977822e-05,
      "loss": 2.5259,
      "step": 34759
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0276967287063599,
      "learning_rate": 1.1595020122787104e-05,
      "loss": 2.4483,
      "step": 34760
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1883362531661987,
      "learning_rate": 1.159461365589239e-05,
      "loss": 2.6298,
      "step": 34761
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.9768151044845581,
      "learning_rate": 1.1594207186294376e-05,
      "loss": 2.3228,
      "step": 34762
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0869296789169312,
      "learning_rate": 1.1593800713993744e-05,
      "loss": 2.4104,
      "step": 34763
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.9938499331474304,
      "learning_rate": 1.159339423899119e-05,
      "loss": 2.4702,
      "step": 34764
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.191887378692627,
      "learning_rate": 1.1592987761287397e-05,
      "loss": 2.3161,
      "step": 34765
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0702853202819824,
      "learning_rate": 1.1592581280883062e-05,
      "loss": 2.3056,
      "step": 34766
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0986642837524414,
      "learning_rate": 1.1592174797778865e-05,
      "loss": 2.4123,
      "step": 34767
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.029258370399475,
      "learning_rate": 1.15917683119755e-05,
      "loss": 2.3211,
      "step": 34768
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1649572849273682,
      "learning_rate": 1.1591361823473656e-05,
      "loss": 2.3625,
      "step": 34769
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0640170574188232,
      "learning_rate": 1.1590955332274022e-05,
      "loss": 2.3019,
      "step": 34770
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0683006048202515,
      "learning_rate": 1.1590548838377285e-05,
      "loss": 2.4539,
      "step": 34771
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0331751108169556,
      "learning_rate": 1.1590142341784136e-05,
      "loss": 2.16,
      "step": 34772
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.3754054307937622,
      "learning_rate": 1.1589735842495267e-05,
      "loss": 2.3684,
      "step": 34773
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0114630460739136,
      "learning_rate": 1.1589329340511362e-05,
      "loss": 2.396,
      "step": 34774
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.2818704843521118,
      "learning_rate": 1.158892283583311e-05,
      "loss": 2.4866,
      "step": 34775
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.107651710510254,
      "learning_rate": 1.1588516328461208e-05,
      "loss": 2.4146,
      "step": 34776
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.3288378715515137,
      "learning_rate": 1.1588109818396333e-05,
      "loss": 2.3211,
      "step": 34777
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.2253050804138184,
      "learning_rate": 1.1587703305639183e-05,
      "loss": 2.5342,
      "step": 34778
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0583956241607666,
      "learning_rate": 1.1587296790190447e-05,
      "loss": 2.3372,
      "step": 34779
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.070528507232666,
      "learning_rate": 1.1586890272050808e-05,
      "loss": 2.4435,
      "step": 34780
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1263079643249512,
      "learning_rate": 1.1586483751220961e-05,
      "loss": 2.3675,
      "step": 34781
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.142970085144043,
      "learning_rate": 1.1586077227701594e-05,
      "loss": 2.5999,
      "step": 34782
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0390925407409668,
      "learning_rate": 1.1585670701493393e-05,
      "loss": 2.3036,
      "step": 34783
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.9921481013298035,
      "learning_rate": 1.1585264172597048e-05,
      "loss": 2.3047,
      "step": 34784
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0819268226623535,
      "learning_rate": 1.1584857641013252e-05,
      "loss": 2.2231,
      "step": 34785
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.9335017204284668,
      "learning_rate": 1.1584451106742694e-05,
      "loss": 2.2502,
      "step": 34786
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0217890739440918,
      "learning_rate": 1.158404456978606e-05,
      "loss": 2.3334,
      "step": 34787
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0266433954238892,
      "learning_rate": 1.1583638030144038e-05,
      "loss": 2.514,
      "step": 34788
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.026055932044983,
      "learning_rate": 1.1583231487817319e-05,
      "loss": 2.1916,
      "step": 34789
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1220848560333252,
      "learning_rate": 1.1582824942806594e-05,
      "loss": 2.4565,
      "step": 34790
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1388272047042847,
      "learning_rate": 1.158241839511255e-05,
      "loss": 2.3122,
      "step": 34791
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0312646627426147,
      "learning_rate": 1.158201184473588e-05,
      "loss": 2.2637,
      "step": 34792
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.9029236435890198,
      "learning_rate": 1.1581605291677269e-05,
      "loss": 2.2595,
      "step": 34793
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1114449501037598,
      "learning_rate": 1.1581198735937407e-05,
      "loss": 2.3584,
      "step": 34794
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.983366847038269,
      "learning_rate": 1.1580792177516983e-05,
      "loss": 2.3217,
      "step": 34795
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0507835149765015,
      "learning_rate": 1.1580385616416686e-05,
      "loss": 2.4762,
      "step": 34796
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0817509889602661,
      "learning_rate": 1.157997905263721e-05,
      "loss": 2.3883,
      "step": 34797
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.112894892692566,
      "learning_rate": 1.1579572486179236e-05,
      "loss": 2.4467,
      "step": 34798
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1053602695465088,
      "learning_rate": 1.1579165917043463e-05,
      "loss": 2.324,
      "step": 34799
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.3124802112579346,
      "learning_rate": 1.157875934523057e-05,
      "loss": 2.3425,
      "step": 34800
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0354838371276855,
      "learning_rate": 1.1578352770741252e-05,
      "loss": 2.2074,
      "step": 34801
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1102275848388672,
      "learning_rate": 1.1577946193576199e-05,
      "loss": 2.4364,
      "step": 34802
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.090907096862793,
      "learning_rate": 1.1577539613736098e-05,
      "loss": 2.4308,
      "step": 34803
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1393524408340454,
      "learning_rate": 1.157713303122164e-05,
      "loss": 2.3857,
      "step": 34804
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0925863981246948,
      "learning_rate": 1.1576726446033511e-05,
      "loss": 2.2589,
      "step": 34805
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1473592519760132,
      "learning_rate": 1.1576319858172403e-05,
      "loss": 2.3149,
      "step": 34806
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0865411758422852,
      "learning_rate": 1.1575913267639005e-05,
      "loss": 2.5139,
      "step": 34807
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0438547134399414,
      "learning_rate": 1.1575506674434007e-05,
      "loss": 2.2109,
      "step": 34808
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0886051654815674,
      "learning_rate": 1.1575100078558095e-05,
      "loss": 2.3339,
      "step": 34809
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0591957569122314,
      "learning_rate": 1.1574693480011962e-05,
      "loss": 2.2519,
      "step": 34810
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0442909002304077,
      "learning_rate": 1.1574286878796295e-05,
      "loss": 2.5413,
      "step": 34811
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1477112770080566,
      "learning_rate": 1.1573880274911787e-05,
      "loss": 2.2867,
      "step": 34812
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.3527915477752686,
      "learning_rate": 1.157347366835912e-05,
      "loss": 2.5748,
      "step": 34813
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.059146523475647,
      "learning_rate": 1.157306705913899e-05,
      "loss": 2.6215,
      "step": 34814
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.059938669204712,
      "learning_rate": 1.1572660447252084e-05,
      "loss": 2.3596,
      "step": 34815
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0759260654449463,
      "learning_rate": 1.1572253832699092e-05,
      "loss": 2.4168,
      "step": 34816
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0367980003356934,
      "learning_rate": 1.1571847215480703e-05,
      "loss": 2.2342,
      "step": 34817
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0505443811416626,
      "learning_rate": 1.1571440595597605e-05,
      "loss": 2.4243,
      "step": 34818
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.147836446762085,
      "learning_rate": 1.157103397305049e-05,
      "loss": 2.2267,
      "step": 34819
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.054579734802246,
      "learning_rate": 1.1570627347840044e-05,
      "loss": 2.2037,
      "step": 34820
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0089715719223022,
      "learning_rate": 1.1570220719966958e-05,
      "loss": 2.4829,
      "step": 34821
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.9882461428642273,
      "learning_rate": 1.1569814089431923e-05,
      "loss": 2.4093,
      "step": 34822
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0718036890029907,
      "learning_rate": 1.1569407456235624e-05,
      "loss": 2.4136,
      "step": 34823
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1196178197860718,
      "learning_rate": 1.1569000820378756e-05,
      "loss": 2.5721,
      "step": 34824
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0719869136810303,
      "learning_rate": 1.1568594181862006e-05,
      "loss": 2.4985,
      "step": 34825
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0224592685699463,
      "learning_rate": 1.156818754068606e-05,
      "loss": 2.2893,
      "step": 34826
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0303107500076294,
      "learning_rate": 1.156778089685161e-05,
      "loss": 2.4134,
      "step": 34827
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.9838218688964844,
      "learning_rate": 1.1567374250359347e-05,
      "loss": 2.1384,
      "step": 34828
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0156545639038086,
      "learning_rate": 1.156696760120996e-05,
      "loss": 2.3829,
      "step": 34829
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.165173053741455,
      "learning_rate": 1.1566560949404136e-05,
      "loss": 2.3902,
      "step": 34830
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0227409601211548,
      "learning_rate": 1.1566154294942565e-05,
      "loss": 2.5059,
      "step": 34831
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0734580755233765,
      "learning_rate": 1.1565747637825938e-05,
      "loss": 2.2299,
      "step": 34832
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0744186639785767,
      "learning_rate": 1.1565340978054945e-05,
      "loss": 2.3554,
      "step": 34833
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1314020156860352,
      "learning_rate": 1.1564934315630271e-05,
      "loss": 2.4405,
      "step": 34834
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.166225552558899,
      "learning_rate": 1.1564527650552613e-05,
      "loss": 2.5463,
      "step": 34835
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0340228080749512,
      "learning_rate": 1.156412098282265e-05,
      "loss": 2.3136,
      "step": 34836
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1016188859939575,
      "learning_rate": 1.1563714312441082e-05,
      "loss": 2.3402,
      "step": 34837
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1418455839157104,
      "learning_rate": 1.156330763940859e-05,
      "loss": 2.2597,
      "step": 34838
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0741908550262451,
      "learning_rate": 1.156290096372587e-05,
      "loss": 2.5432,
      "step": 34839
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.2198503017425537,
      "learning_rate": 1.1562494285393606e-05,
      "loss": 2.3436,
      "step": 34840
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0791689157485962,
      "learning_rate": 1.156208760441249e-05,
      "loss": 2.0817,
      "step": 34841
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1658780574798584,
      "learning_rate": 1.1561680920783212e-05,
      "loss": 2.2761,
      "step": 34842
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.9585366249084473,
      "learning_rate": 1.1561274234506458e-05,
      "loss": 2.4739,
      "step": 34843
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0878642797470093,
      "learning_rate": 1.1560867545582923e-05,
      "loss": 2.2119,
      "step": 34844
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1280758380889893,
      "learning_rate": 1.1560460854013296e-05,
      "loss": 2.4297,
      "step": 34845
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0379152297973633,
      "learning_rate": 1.156005415979826e-05,
      "loss": 2.3819,
      "step": 34846
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1285943984985352,
      "learning_rate": 1.1559647462938507e-05,
      "loss": 2.1717,
      "step": 34847
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.9941768646240234,
      "learning_rate": 1.1559240763434731e-05,
      "loss": 2.5055,
      "step": 34848
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.9657220840454102,
      "learning_rate": 1.1558834061287618e-05,
      "loss": 2.054,
      "step": 34849
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1605603694915771,
      "learning_rate": 1.155842735649786e-05,
      "loss": 2.4218,
      "step": 34850
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0900248289108276,
      "learning_rate": 1.155802064906614e-05,
      "loss": 2.5317,
      "step": 34851
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0795814990997314,
      "learning_rate": 1.1557613938993156e-05,
      "loss": 2.2935,
      "step": 34852
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.9913813471794128,
      "learning_rate": 1.155720722627959e-05,
      "loss": 2.1812,
      "step": 34853
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0929917097091675,
      "learning_rate": 1.1556800510926136e-05,
      "loss": 2.3047,
      "step": 34854
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.9663028120994568,
      "learning_rate": 1.1556393792933482e-05,
      "loss": 2.2819,
      "step": 34855
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.997849702835083,
      "learning_rate": 1.1555987072302318e-05,
      "loss": 2.3707,
      "step": 34856
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.108909249305725,
      "learning_rate": 1.1555580349033333e-05,
      "loss": 2.3523,
      "step": 34857
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.9476913809776306,
      "learning_rate": 1.155517362312722e-05,
      "loss": 2.3299,
      "step": 34858
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1489747762680054,
      "learning_rate": 1.155476689458466e-05,
      "loss": 2.4462,
      "step": 34859
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.2017924785614014,
      "learning_rate": 1.1554360163406353e-05,
      "loss": 2.442,
      "step": 34860
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.018747091293335,
      "learning_rate": 1.155395342959298e-05,
      "loss": 2.2525,
      "step": 34861
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0509220361709595,
      "learning_rate": 1.1553546693145234e-05,
      "loss": 2.3567,
      "step": 34862
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.9749879837036133,
      "learning_rate": 1.1553139954063808e-05,
      "loss": 2.251,
      "step": 34863
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.9698631763458252,
      "learning_rate": 1.1552733212349385e-05,
      "loss": 2.1576,
      "step": 34864
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.3344600200653076,
      "learning_rate": 1.1552326468002657e-05,
      "loss": 2.2953,
      "step": 34865
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.3617407083511353,
      "learning_rate": 1.1551919721024317e-05,
      "loss": 2.1888,
      "step": 34866
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0751240253448486,
      "learning_rate": 1.1551512971415049e-05,
      "loss": 2.464,
      "step": 34867
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.9887633919715881,
      "learning_rate": 1.1551106219175545e-05,
      "loss": 2.163,
      "step": 34868
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.030078411102295,
      "learning_rate": 1.1550699464306494e-05,
      "loss": 2.3696,
      "step": 34869
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1612226963043213,
      "learning_rate": 1.1550292706808588e-05,
      "loss": 2.4387,
      "step": 34870
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1452338695526123,
      "learning_rate": 1.1549885946682513e-05,
      "loss": 2.4886,
      "step": 34871
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0276869535446167,
      "learning_rate": 1.1549479183928964e-05,
      "loss": 2.5409,
      "step": 34872
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1663203239440918,
      "learning_rate": 1.1549072418548623e-05,
      "loss": 2.2825,
      "step": 34873
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0091651678085327,
      "learning_rate": 1.1548665650542186e-05,
      "loss": 2.1967,
      "step": 34874
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0200293064117432,
      "learning_rate": 1.1548258879910339e-05,
      "loss": 2.3613,
      "step": 34875
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0256285667419434,
      "learning_rate": 1.1547852106653771e-05,
      "loss": 2.4437,
      "step": 34876
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1016433238983154,
      "learning_rate": 1.1547445330773175e-05,
      "loss": 2.4261,
      "step": 34877
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0880343914031982,
      "learning_rate": 1.1547038552269241e-05,
      "loss": 2.296,
      "step": 34878
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.3452070951461792,
      "learning_rate": 1.1546631771142653e-05,
      "loss": 2.3649,
      "step": 34879
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0036671161651611,
      "learning_rate": 1.1546224987394106e-05,
      "loss": 2.4864,
      "step": 34880
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0279467105865479,
      "learning_rate": 1.1545818201024288e-05,
      "loss": 2.1205,
      "step": 34881
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.166765570640564,
      "learning_rate": 1.1545411412033886e-05,
      "loss": 2.3825,
      "step": 34882
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0719506740570068,
      "learning_rate": 1.1545004620423595e-05,
      "loss": 2.2168,
      "step": 34883
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0069698095321655,
      "learning_rate": 1.15445978261941e-05,
      "loss": 2.3165,
      "step": 34884
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0238077640533447,
      "learning_rate": 1.1544191029346092e-05,
      "loss": 2.5495,
      "step": 34885
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.018190860748291,
      "learning_rate": 1.1543784229880262e-05,
      "loss": 2.4,
      "step": 34886
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.082303524017334,
      "learning_rate": 1.1543377427797296e-05,
      "loss": 2.1489,
      "step": 34887
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.250867486000061,
      "learning_rate": 1.1542970623097886e-05,
      "loss": 2.5304,
      "step": 34888
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.031180739402771,
      "learning_rate": 1.1542563815782723e-05,
      "loss": 2.2259,
      "step": 34889
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0122411251068115,
      "learning_rate": 1.1542157005852496e-05,
      "loss": 2.4384,
      "step": 34890
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.100040078163147,
      "learning_rate": 1.1541750193307898e-05,
      "loss": 2.4543,
      "step": 34891
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1134459972381592,
      "learning_rate": 1.1541343378149608e-05,
      "loss": 2.4693,
      "step": 34892
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0869413614273071,
      "learning_rate": 1.1540936560378324e-05,
      "loss": 2.4018,
      "step": 34893
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.030164122581482,
      "learning_rate": 1.1540529739994737e-05,
      "loss": 2.4065,
      "step": 34894
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.063592791557312,
      "learning_rate": 1.1540122916999532e-05,
      "loss": 2.3915,
      "step": 34895
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.2167545557022095,
      "learning_rate": 1.1539716091393398e-05,
      "loss": 2.2892,
      "step": 34896
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.062413215637207,
      "learning_rate": 1.1539309263177029e-05,
      "loss": 2.3156,
      "step": 34897
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0785225629806519,
      "learning_rate": 1.1538902432351116e-05,
      "loss": 2.2338,
      "step": 34898
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1798135042190552,
      "learning_rate": 1.1538495598916339e-05,
      "loss": 2.1914,
      "step": 34899
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.087492823600769,
      "learning_rate": 1.1538088762873398e-05,
      "loss": 2.2611,
      "step": 34900
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1087905168533325,
      "learning_rate": 1.1537681924222976e-05,
      "loss": 2.4293,
      "step": 34901
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0402204990386963,
      "learning_rate": 1.1537275082965768e-05,
      "loss": 2.1726,
      "step": 34902
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1143379211425781,
      "learning_rate": 1.1536868239102461e-05,
      "loss": 2.5211,
      "step": 34903
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.193084716796875,
      "learning_rate": 1.1536461392633746e-05,
      "loss": 2.4349,
      "step": 34904
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1316144466400146,
      "learning_rate": 1.1536054543560311e-05,
      "loss": 2.3045,
      "step": 34905
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0423222780227661,
      "learning_rate": 1.1535647691882846e-05,
      "loss": 2.2923,
      "step": 34906
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.3846732378005981,
      "learning_rate": 1.153524083760204e-05,
      "loss": 2.4737,
      "step": 34907
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0679129362106323,
      "learning_rate": 1.1534833980718582e-05,
      "loss": 2.3236,
      "step": 34908
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1109070777893066,
      "learning_rate": 1.1534427121233167e-05,
      "loss": 2.4434,
      "step": 34909
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0496200323104858,
      "learning_rate": 1.1534020259146479e-05,
      "loss": 2.2442,
      "step": 34910
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0895830392837524,
      "learning_rate": 1.1533613394459213e-05,
      "loss": 2.228,
      "step": 34911
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.197826623916626,
      "learning_rate": 1.1533206527172054e-05,
      "loss": 2.3868,
      "step": 34912
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0228080749511719,
      "learning_rate": 1.1532799657285693e-05,
      "loss": 2.3329,
      "step": 34913
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0798089504241943,
      "learning_rate": 1.1532392784800821e-05,
      "loss": 2.1991,
      "step": 34914
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0193040370941162,
      "learning_rate": 1.1531985909718127e-05,
      "loss": 2.3458,
      "step": 34915
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0847591161727905,
      "learning_rate": 1.15315790320383e-05,
      "loss": 2.4389,
      "step": 34916
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1724035739898682,
      "learning_rate": 1.1531172151762032e-05,
      "loss": 2.219,
      "step": 34917
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0475051403045654,
      "learning_rate": 1.1530765268890012e-05,
      "loss": 2.2767,
      "step": 34918
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0577021837234497,
      "learning_rate": 1.1530358383422929e-05,
      "loss": 2.5899,
      "step": 34919
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0241090059280396,
      "learning_rate": 1.1529951495361468e-05,
      "loss": 2.5141,
      "step": 34920
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0931354761123657,
      "learning_rate": 1.1529544604706329e-05,
      "loss": 2.3748,
      "step": 34921
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1871663331985474,
      "learning_rate": 1.1529137711458196e-05,
      "loss": 2.2258,
      "step": 34922
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1947227716445923,
      "learning_rate": 1.152873081561776e-05,
      "loss": 2.3918,
      "step": 34923
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0676301717758179,
      "learning_rate": 1.1528323917185706e-05,
      "loss": 2.3709,
      "step": 34924
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.9992586374282837,
      "learning_rate": 1.1527917016162734e-05,
      "loss": 2.2222,
      "step": 34925
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.162521481513977,
      "learning_rate": 1.152751011254952e-05,
      "loss": 2.6405,
      "step": 34926
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.2945313453674316,
      "learning_rate": 1.1527103206346765e-05,
      "loss": 2.3721,
      "step": 34927
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.9875143766403198,
      "learning_rate": 1.1526696297555156e-05,
      "loss": 2.1897,
      "step": 34928
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.136244773864746,
      "learning_rate": 1.1526289386175382e-05,
      "loss": 2.2449,
      "step": 34929
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0626143217086792,
      "learning_rate": 1.1525882472208132e-05,
      "loss": 2.3809,
      "step": 34930
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.2068848609924316,
      "learning_rate": 1.15254755556541e-05,
      "loss": 2.4357,
      "step": 34931
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.9896283745765686,
      "learning_rate": 1.152506863651397e-05,
      "loss": 2.3,
      "step": 34932
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.9865592122077942,
      "learning_rate": 1.1524661714788432e-05,
      "loss": 2.3081,
      "step": 34933
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1547417640686035,
      "learning_rate": 1.1524254790478182e-05,
      "loss": 2.4345,
      "step": 34934
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1330467462539673,
      "learning_rate": 1.1523847863583903e-05,
      "loss": 2.3829,
      "step": 34935
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1170414686203003,
      "learning_rate": 1.1523440934106291e-05,
      "loss": 2.2667,
      "step": 34936
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.825499176979065,
      "learning_rate": 1.1523034002046034e-05,
      "loss": 2.3344,
      "step": 34937
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0081995725631714,
      "learning_rate": 1.1522627067403818e-05,
      "loss": 2.2963,
      "step": 34938
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.9576380848884583,
      "learning_rate": 1.1522220130180335e-05,
      "loss": 2.2808,
      "step": 34939
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0313218832015991,
      "learning_rate": 1.1521813190376277e-05,
      "loss": 2.2592,
      "step": 34940
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1518802642822266,
      "learning_rate": 1.152140624799233e-05,
      "loss": 2.2016,
      "step": 34941
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0668535232543945,
      "learning_rate": 1.152099930302919e-05,
      "loss": 2.3127,
      "step": 34942
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0559284687042236,
      "learning_rate": 1.1520592355487538e-05,
      "loss": 2.3496,
      "step": 34943
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.2322826385498047,
      "learning_rate": 1.1520185405368075e-05,
      "loss": 2.5943,
      "step": 34944
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.096429705619812,
      "learning_rate": 1.1519778452671482e-05,
      "loss": 2.2386,
      "step": 34945
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1107300519943237,
      "learning_rate": 1.1519371497398452e-05,
      "loss": 2.3011,
      "step": 34946
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.174039602279663,
      "learning_rate": 1.1518964539549673e-05,
      "loss": 2.2799,
      "step": 34947
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0913538932800293,
      "learning_rate": 1.1518557579125837e-05,
      "loss": 2.2139,
      "step": 34948
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0058518648147583,
      "learning_rate": 1.1518150616127635e-05,
      "loss": 2.2915,
      "step": 34949
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.088681936264038,
      "learning_rate": 1.1517743650555757e-05,
      "loss": 2.522,
      "step": 34950
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0635249614715576,
      "learning_rate": 1.1517336682410889e-05,
      "loss": 2.121,
      "step": 34951
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0115958452224731,
      "learning_rate": 1.1516929711693722e-05,
      "loss": 2.3327,
      "step": 34952
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1864935159683228,
      "learning_rate": 1.1516522738404952e-05,
      "loss": 2.5694,
      "step": 34953
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.9376252889633179,
      "learning_rate": 1.1516115762545259e-05,
      "loss": 2.4253,
      "step": 34954
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.038354754447937,
      "learning_rate": 1.151570878411534e-05,
      "loss": 2.4636,
      "step": 34955
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1261390447616577,
      "learning_rate": 1.1515301803115882e-05,
      "loss": 2.5304,
      "step": 34956
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0397545099258423,
      "learning_rate": 1.1514894819547578e-05,
      "loss": 2.5093,
      "step": 34957
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1737245321273804,
      "learning_rate": 1.1514487833411115e-05,
      "loss": 2.5072,
      "step": 34958
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0954893827438354,
      "learning_rate": 1.151408084470718e-05,
      "loss": 2.2852,
      "step": 34959
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0722874402999878,
      "learning_rate": 1.1513673853436471e-05,
      "loss": 2.4016,
      "step": 34960
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.2322012186050415,
      "learning_rate": 1.1513266859599673e-05,
      "loss": 2.1429,
      "step": 34961
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.9336223006248474,
      "learning_rate": 1.1512859863197474e-05,
      "loss": 2.3785,
      "step": 34962
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.163962960243225,
      "learning_rate": 1.151245286423057e-05,
      "loss": 2.061,
      "step": 34963
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.2257723808288574,
      "learning_rate": 1.1512045862699647e-05,
      "loss": 2.6865,
      "step": 34964
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.17232346534729,
      "learning_rate": 1.1511638858605397e-05,
      "loss": 2.3261,
      "step": 34965
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0156190395355225,
      "learning_rate": 1.1511231851948504e-05,
      "loss": 2.3465,
      "step": 34966
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1692724227905273,
      "learning_rate": 1.1510824842729665e-05,
      "loss": 2.2507,
      "step": 34967
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1097702980041504,
      "learning_rate": 1.1510417830949568e-05,
      "loss": 2.3886,
      "step": 34968
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1844429969787598,
      "learning_rate": 1.1510010816608902e-05,
      "loss": 2.6235,
      "step": 34969
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.9826382398605347,
      "learning_rate": 1.150960379970836e-05,
      "loss": 2.4616,
      "step": 34970
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.050849199295044,
      "learning_rate": 1.1509196780248627e-05,
      "loss": 2.3417,
      "step": 34971
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0360462665557861,
      "learning_rate": 1.1508789758230395e-05,
      "loss": 2.3948,
      "step": 34972
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0639846324920654,
      "learning_rate": 1.1508382733654356e-05,
      "loss": 2.1381,
      "step": 34973
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0511853694915771,
      "learning_rate": 1.1507975706521197e-05,
      "loss": 2.3625,
      "step": 34974
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.029773235321045,
      "learning_rate": 1.1507568676831611e-05,
      "loss": 2.454,
      "step": 34975
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0523653030395508,
      "learning_rate": 1.1507161644586286e-05,
      "loss": 2.3629,
      "step": 34976
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1248788833618164,
      "learning_rate": 1.1506754609785916e-05,
      "loss": 2.3804,
      "step": 34977
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1243077516555786,
      "learning_rate": 1.1506347572431182e-05,
      "loss": 2.4148,
      "step": 34978
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0253186225891113,
      "learning_rate": 1.1505940532522784e-05,
      "loss": 2.6539,
      "step": 34979
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1383386850357056,
      "learning_rate": 1.1505533490061405e-05,
      "loss": 2.4125,
      "step": 34980
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0474309921264648,
      "learning_rate": 1.150512644504774e-05,
      "loss": 2.3466,
      "step": 34981
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1321889162063599,
      "learning_rate": 1.1504719397482478e-05,
      "loss": 2.3366,
      "step": 34982
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1221373081207275,
      "learning_rate": 1.1504312347366305e-05,
      "loss": 2.5907,
      "step": 34983
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0810306072235107,
      "learning_rate": 1.1503905294699915e-05,
      "loss": 2.3474,
      "step": 34984
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0612493753433228,
      "learning_rate": 1.1503498239483997e-05,
      "loss": 2.5917,
      "step": 34985
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0357937812805176,
      "learning_rate": 1.1503091181719242e-05,
      "loss": 2.4153,
      "step": 34986
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1161073446273804,
      "learning_rate": 1.1502684121406337e-05,
      "loss": 2.544,
      "step": 34987
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.9826840162277222,
      "learning_rate": 1.1502277058545976e-05,
      "loss": 2.2538,
      "step": 34988
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0392916202545166,
      "learning_rate": 1.1501869993138847e-05,
      "loss": 2.4073,
      "step": 34989
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0800347328186035,
      "learning_rate": 1.1501462925185643e-05,
      "loss": 2.4791,
      "step": 34990
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0284614562988281,
      "learning_rate": 1.1501055854687048e-05,
      "loss": 2.5227,
      "step": 34991
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1246967315673828,
      "learning_rate": 1.1500648781643755e-05,
      "loss": 2.2948,
      "step": 34992
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.131566047668457,
      "learning_rate": 1.1500241706056457e-05,
      "loss": 2.1153,
      "step": 34993
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.162161111831665,
      "learning_rate": 1.1499834627925842e-05,
      "loss": 2.3322,
      "step": 34994
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1381348371505737,
      "learning_rate": 1.1499427547252598e-05,
      "loss": 2.4575,
      "step": 34995
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.9776776432991028,
      "learning_rate": 1.1499020464037418e-05,
      "loss": 2.391,
      "step": 34996
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0540906190872192,
      "learning_rate": 1.1498613378280993e-05,
      "loss": 2.4417,
      "step": 34997
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0754226446151733,
      "learning_rate": 1.149820628998401e-05,
      "loss": 2.3688,
      "step": 34998
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.17015540599823,
      "learning_rate": 1.1497799199147159e-05,
      "loss": 2.4356,
      "step": 34999
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0350219011306763,
      "learning_rate": 1.1497392105771134e-05,
      "loss": 2.1798,
      "step": 35000
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0066465139389038,
      "learning_rate": 1.149698500985662e-05,
      "loss": 2.4125,
      "step": 35001
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1022164821624756,
      "learning_rate": 1.1496577911404312e-05,
      "loss": 2.22,
      "step": 35002
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0627801418304443,
      "learning_rate": 1.14961708104149e-05,
      "loss": 2.294,
      "step": 35003
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.2217298746109009,
      "learning_rate": 1.1495763706889067e-05,
      "loss": 2.1448,
      "step": 35004
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1164435148239136,
      "learning_rate": 1.1495356600827512e-05,
      "loss": 2.2501,
      "step": 35005
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0349931716918945,
      "learning_rate": 1.149494949223092e-05,
      "loss": 2.3326,
      "step": 35006
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.9623151421546936,
      "learning_rate": 1.1494542381099982e-05,
      "loss": 2.5679,
      "step": 35007
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.9991925358772278,
      "learning_rate": 1.149413526743539e-05,
      "loss": 2.2313,
      "step": 35008
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.9718481302261353,
      "learning_rate": 1.1493728151237832e-05,
      "loss": 2.4592,
      "step": 35009
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0004258155822754,
      "learning_rate": 1.1493321032508001e-05,
      "loss": 2.2763,
      "step": 35010
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0631709098815918,
      "learning_rate": 1.1492913911246581e-05,
      "loss": 2.4909,
      "step": 35011
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.005907416343689,
      "learning_rate": 1.1492506787454271e-05,
      "loss": 2.4308,
      "step": 35012
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1597884893417358,
      "learning_rate": 1.1492099661131757e-05,
      "loss": 2.2243,
      "step": 35013
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.012953281402588,
      "learning_rate": 1.1491692532279726e-05,
      "loss": 2.2842,
      "step": 35014
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1611181497573853,
      "learning_rate": 1.1491285400898871e-05,
      "loss": 2.3235,
      "step": 35015
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0405924320220947,
      "learning_rate": 1.1490878266989883e-05,
      "loss": 2.3409,
      "step": 35016
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0237500667572021,
      "learning_rate": 1.1490471130553453e-05,
      "loss": 2.5937,
      "step": 35017
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1703559160232544,
      "learning_rate": 1.1490063991590267e-05,
      "loss": 2.4398,
      "step": 35018
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.173869252204895,
      "learning_rate": 1.1489656850101018e-05,
      "loss": 2.4596,
      "step": 35019
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.2098156213760376,
      "learning_rate": 1.1489249706086398e-05,
      "loss": 2.2539,
      "step": 35020
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.025664210319519,
      "learning_rate": 1.1488842559547093e-05,
      "loss": 2.6033,
      "step": 35021
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.3102363348007202,
      "learning_rate": 1.1488435410483795e-05,
      "loss": 2.4279,
      "step": 35022
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.055184245109558,
      "learning_rate": 1.14880282588972e-05,
      "loss": 2.4377,
      "step": 35023
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1945140361785889,
      "learning_rate": 1.148762110478799e-05,
      "loss": 2.5309,
      "step": 35024
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0580207109451294,
      "learning_rate": 1.1487213948156856e-05,
      "loss": 2.5362,
      "step": 35025
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1682754755020142,
      "learning_rate": 1.1486806789004492e-05,
      "loss": 2.3904,
      "step": 35026
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0314157009124756,
      "learning_rate": 1.1486399627331586e-05,
      "loss": 2.311,
      "step": 35027
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1385489702224731,
      "learning_rate": 1.1485992463138828e-05,
      "loss": 2.1813,
      "step": 35028
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.9375346302986145,
      "learning_rate": 1.1485585296426911e-05,
      "loss": 2.4925,
      "step": 35029
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1553421020507812,
      "learning_rate": 1.1485178127196525e-05,
      "loss": 2.3566,
      "step": 35030
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0155022144317627,
      "learning_rate": 1.1484770955448357e-05,
      "loss": 2.1502,
      "step": 35031
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.2015267610549927,
      "learning_rate": 1.1484363781183098e-05,
      "loss": 2.2376,
      "step": 35032
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.032286286354065,
      "learning_rate": 1.148395660440144e-05,
      "loss": 2.1359,
      "step": 35033
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.065447211265564,
      "learning_rate": 1.1483549425104073e-05,
      "loss": 2.1764,
      "step": 35034
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0880111455917358,
      "learning_rate": 1.1483142243291685e-05,
      "loss": 2.1895,
      "step": 35035
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1306642293930054,
      "learning_rate": 1.148273505896497e-05,
      "loss": 2.3102,
      "step": 35036
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.2867860794067383,
      "learning_rate": 1.1482327872124617e-05,
      "loss": 2.4902,
      "step": 35037
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0420411825180054,
      "learning_rate": 1.1481920682771315e-05,
      "loss": 2.1687,
      "step": 35038
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.3031072616577148,
      "learning_rate": 1.1481513490905753e-05,
      "loss": 2.2415,
      "step": 35039
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.078900933265686,
      "learning_rate": 1.1481106296528627e-05,
      "loss": 2.4491,
      "step": 35040
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.9574460387229919,
      "learning_rate": 1.148069909964062e-05,
      "loss": 2.3231,
      "step": 35041
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0985068082809448,
      "learning_rate": 1.1480291900242428e-05,
      "loss": 2.3239,
      "step": 35042
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.2056723833084106,
      "learning_rate": 1.147988469833474e-05,
      "loss": 2.161,
      "step": 35043
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0536820888519287,
      "learning_rate": 1.1479477493918243e-05,
      "loss": 2.2101,
      "step": 35044
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0396459102630615,
      "learning_rate": 1.147907028699363e-05,
      "loss": 2.3784,
      "step": 35045
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1591932773590088,
      "learning_rate": 1.1478663077561594e-05,
      "loss": 2.4126,
      "step": 35046
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0813417434692383,
      "learning_rate": 1.1478255865622818e-05,
      "loss": 2.4919,
      "step": 35047
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.4105921983718872,
      "learning_rate": 1.1477848651177998e-05,
      "loss": 2.6158,
      "step": 35048
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1021639108657837,
      "learning_rate": 1.1477441434227827e-05,
      "loss": 2.4625,
      "step": 35049
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.058322548866272,
      "learning_rate": 1.1477034214772988e-05,
      "loss": 2.4591,
      "step": 35050
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0461955070495605,
      "learning_rate": 1.1476626992814177e-05,
      "loss": 2.3676,
      "step": 35051
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.9558652639389038,
      "learning_rate": 1.147621976835208e-05,
      "loss": 2.4071,
      "step": 35052
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0614209175109863,
      "learning_rate": 1.147581254138739e-05,
      "loss": 2.3217,
      "step": 35053
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1139181852340698,
      "learning_rate": 1.1475405311920798e-05,
      "loss": 2.3217,
      "step": 35054
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.9842852354049683,
      "learning_rate": 1.147499807995299e-05,
      "loss": 2.4751,
      "step": 35055
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1254384517669678,
      "learning_rate": 1.1474590845484665e-05,
      "loss": 2.4763,
      "step": 35056
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.9319136738777161,
      "learning_rate": 1.1474183608516505e-05,
      "loss": 2.3303,
      "step": 35057
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1367863416671753,
      "learning_rate": 1.1473776369049204e-05,
      "loss": 2.4388,
      "step": 35058
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0002483129501343,
      "learning_rate": 1.1473369127083451e-05,
      "loss": 2.3738,
      "step": 35059
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.064220666885376,
      "learning_rate": 1.1472961882619937e-05,
      "loss": 2.4572,
      "step": 35060
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.122679591178894,
      "learning_rate": 1.1472554635659355e-05,
      "loss": 2.5264,
      "step": 35061
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.9762720465660095,
      "learning_rate": 1.1472147386202389e-05,
      "loss": 2.2846,
      "step": 35062
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0430656671524048,
      "learning_rate": 1.1471740134249738e-05,
      "loss": 2.2192,
      "step": 35063
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0630419254302979,
      "learning_rate": 1.1471332879802084e-05,
      "loss": 2.304,
      "step": 35064
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.933009922504425,
      "learning_rate": 1.1470925622860124e-05,
      "loss": 2.3473,
      "step": 35065
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.112038254737854,
      "learning_rate": 1.1470518363424542e-05,
      "loss": 2.2006,
      "step": 35066
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.9734358787536621,
      "learning_rate": 1.1470111101496034e-05,
      "loss": 2.099,
      "step": 35067
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.086750864982605,
      "learning_rate": 1.146970383707529e-05,
      "loss": 2.3206,
      "step": 35068
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.019575595855713,
      "learning_rate": 1.1469296570162998e-05,
      "loss": 2.6212,
      "step": 35069
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1219748258590698,
      "learning_rate": 1.1468889300759847e-05,
      "loss": 2.3295,
      "step": 35070
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.9998944997787476,
      "learning_rate": 1.1468482028866532e-05,
      "loss": 2.4195,
      "step": 35071
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1688481569290161,
      "learning_rate": 1.146807475448374e-05,
      "loss": 2.3384,
      "step": 35072
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.070709466934204,
      "learning_rate": 1.1467667477612163e-05,
      "loss": 2.2607,
      "step": 35073
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0425848960876465,
      "learning_rate": 1.146726019825249e-05,
      "loss": 2.4256,
      "step": 35074
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1281182765960693,
      "learning_rate": 1.1466852916405414e-05,
      "loss": 2.4264,
      "step": 35075
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0373728275299072,
      "learning_rate": 1.1466445632071625e-05,
      "loss": 2.4456,
      "step": 35076
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.011541485786438,
      "learning_rate": 1.146603834525181e-05,
      "loss": 2.0728,
      "step": 35077
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.9645648002624512,
      "learning_rate": 1.1465631055946662e-05,
      "loss": 2.4937,
      "step": 35078
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0656534433364868,
      "learning_rate": 1.1465223764156872e-05,
      "loss": 2.2081,
      "step": 35079
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1618318557739258,
      "learning_rate": 1.146481646988313e-05,
      "loss": 2.1704,
      "step": 35080
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0290201902389526,
      "learning_rate": 1.1464409173126126e-05,
      "loss": 2.48,
      "step": 35081
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.134922742843628,
      "learning_rate": 1.146400187388655e-05,
      "loss": 2.3754,
      "step": 35082
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.075648307800293,
      "learning_rate": 1.1463594572165093e-05,
      "loss": 2.3779,
      "step": 35083
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.053861379623413,
      "learning_rate": 1.1463187267962447e-05,
      "loss": 2.3186,
      "step": 35084
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0325596332550049,
      "learning_rate": 1.14627799612793e-05,
      "loss": 2.5131,
      "step": 35085
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.057178258895874,
      "learning_rate": 1.1462372652116345e-05,
      "loss": 2.2634,
      "step": 35086
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1643056869506836,
      "learning_rate": 1.146196534047427e-05,
      "loss": 2.2516,
      "step": 35087
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0633360147476196,
      "learning_rate": 1.1461558026353767e-05,
      "loss": 2.4784,
      "step": 35088
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0632356405258179,
      "learning_rate": 1.1461150709755527e-05,
      "loss": 2.154,
      "step": 35089
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0391271114349365,
      "learning_rate": 1.146074339068024e-05,
      "loss": 2.4349,
      "step": 35090
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0366275310516357,
      "learning_rate": 1.1460336069128594e-05,
      "loss": 2.2874,
      "step": 35091
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0724263191223145,
      "learning_rate": 1.1459928745101284e-05,
      "loss": 2.625,
      "step": 35092
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.2420897483825684,
      "learning_rate": 1.1459521418598995e-05,
      "loss": 2.4015,
      "step": 35093
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0620057582855225,
      "learning_rate": 1.1459114089622423e-05,
      "loss": 2.3001,
      "step": 35094
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0162923336029053,
      "learning_rate": 1.1458706758172256e-05,
      "loss": 2.4063,
      "step": 35095
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.002018690109253,
      "learning_rate": 1.1458299424249187e-05,
      "loss": 2.5585,
      "step": 35096
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0916016101837158,
      "learning_rate": 1.1457892087853904e-05,
      "loss": 2.2494,
      "step": 35097
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0246059894561768,
      "learning_rate": 1.1457484748987096e-05,
      "loss": 2.4404,
      "step": 35098
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0825963020324707,
      "learning_rate": 1.1457077407649457e-05,
      "loss": 2.4011,
      "step": 35099
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0758551359176636,
      "learning_rate": 1.1456670063841676e-05,
      "loss": 2.2956,
      "step": 35100
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0298240184783936,
      "learning_rate": 1.145626271756444e-05,
      "loss": 2.2166,
      "step": 35101
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.117568850517273,
      "learning_rate": 1.1455855368818449e-05,
      "loss": 2.1575,
      "step": 35102
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.027086853981018,
      "learning_rate": 1.1455448017604384e-05,
      "loss": 2.384,
      "step": 35103
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.27114999294281,
      "learning_rate": 1.1455040663922941e-05,
      "loss": 2.3313,
      "step": 35104
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.9708830714225769,
      "learning_rate": 1.1454633307774808e-05,
      "loss": 2.3737,
      "step": 35105
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.102778434753418,
      "learning_rate": 1.1454225949160675e-05,
      "loss": 2.4621,
      "step": 35106
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.9974473118782043,
      "learning_rate": 1.1453818588081234e-05,
      "loss": 2.1886,
      "step": 35107
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1075477600097656,
      "learning_rate": 1.1453411224537177e-05,
      "loss": 2.4618,
      "step": 35108
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.719796061515808,
      "learning_rate": 1.1453003858529195e-05,
      "loss": 2.4923,
      "step": 35109
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.9414060711860657,
      "learning_rate": 1.1452596490057974e-05,
      "loss": 2.5648,
      "step": 35110
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.9431402087211609,
      "learning_rate": 1.1452189119124208e-05,
      "loss": 2.3176,
      "step": 35111
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0119860172271729,
      "learning_rate": 1.145178174572859e-05,
      "loss": 2.4306,
      "step": 35112
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.2140249013900757,
      "learning_rate": 1.1451374369871804e-05,
      "loss": 2.2015,
      "step": 35113
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.2252449989318848,
      "learning_rate": 1.1450966991554544e-05,
      "loss": 2.3048,
      "step": 35114
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.07488214969635,
      "learning_rate": 1.1450559610777506e-05,
      "loss": 2.4322,
      "step": 35115
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0708614587783813,
      "learning_rate": 1.145015222754137e-05,
      "loss": 2.2153,
      "step": 35116
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1252928972244263,
      "learning_rate": 1.1449744841846834e-05,
      "loss": 2.4493,
      "step": 35117
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0299164056777954,
      "learning_rate": 1.1449337453694587e-05,
      "loss": 2.3356,
      "step": 35118
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.092514991760254,
      "learning_rate": 1.1448930063085317e-05,
      "loss": 2.3509,
      "step": 35119
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.3335951566696167,
      "learning_rate": 1.144852267001972e-05,
      "loss": 2.2417,
      "step": 35120
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0109730958938599,
      "learning_rate": 1.1448115274498481e-05,
      "loss": 2.3751,
      "step": 35121
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0917733907699585,
      "learning_rate": 1.1447707876522299e-05,
      "loss": 2.2294,
      "step": 35122
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1825969219207764,
      "learning_rate": 1.1447300476091854e-05,
      "loss": 2.5343,
      "step": 35123
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1572421789169312,
      "learning_rate": 1.1446893073207843e-05,
      "loss": 2.3893,
      "step": 35124
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0755892992019653,
      "learning_rate": 1.1446485667870954e-05,
      "loss": 2.3829,
      "step": 35125
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0301997661590576,
      "learning_rate": 1.1446078260081878e-05,
      "loss": 2.477,
      "step": 35126
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0206395387649536,
      "learning_rate": 1.1445670849841313e-05,
      "loss": 2.4051,
      "step": 35127
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1087944507598877,
      "learning_rate": 1.1445263437149939e-05,
      "loss": 2.4395,
      "step": 35128
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1032302379608154,
      "learning_rate": 1.1444856022008448e-05,
      "loss": 2.3312,
      "step": 35129
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.4152384996414185,
      "learning_rate": 1.1444448604417539e-05,
      "loss": 2.3211,
      "step": 35130
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1289374828338623,
      "learning_rate": 1.1444041184377895e-05,
      "loss": 2.3014,
      "step": 35131
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0443247556686401,
      "learning_rate": 1.1443633761890207e-05,
      "loss": 2.2589,
      "step": 35132
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0780121088027954,
      "learning_rate": 1.1443226336955168e-05,
      "loss": 2.2788,
      "step": 35133
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0139710903167725,
      "learning_rate": 1.144281890957347e-05,
      "loss": 2.5394,
      "step": 35134
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.9758664965629578,
      "learning_rate": 1.1442411479745803e-05,
      "loss": 2.2478,
      "step": 35135
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1062524318695068,
      "learning_rate": 1.1442004047472857e-05,
      "loss": 2.2837,
      "step": 35136
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.115478277206421,
      "learning_rate": 1.144159661275532e-05,
      "loss": 2.2745,
      "step": 35137
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.122933268547058,
      "learning_rate": 1.1441189175593885e-05,
      "loss": 2.5686,
      "step": 35138
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1058415174484253,
      "learning_rate": 1.1440781735989246e-05,
      "loss": 2.3158,
      "step": 35139
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0940958261489868,
      "learning_rate": 1.1440374293942087e-05,
      "loss": 2.2168,
      "step": 35140
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0270780324935913,
      "learning_rate": 1.1439966849453104e-05,
      "loss": 2.323,
      "step": 35141
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1209485530853271,
      "learning_rate": 1.1439559402522988e-05,
      "loss": 2.2598,
      "step": 35142
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1476351022720337,
      "learning_rate": 1.1439151953152425e-05,
      "loss": 2.1217,
      "step": 35143
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0519434213638306,
      "learning_rate": 1.1438744501342111e-05,
      "loss": 2.6157,
      "step": 35144
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.9260676503181458,
      "learning_rate": 1.1438337047092731e-05,
      "loss": 2.2721,
      "step": 35145
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.9427043199539185,
      "learning_rate": 1.1437929590404982e-05,
      "loss": 2.3524,
      "step": 35146
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1499375104904175,
      "learning_rate": 1.143752213127955e-05,
      "loss": 2.3293,
      "step": 35147
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.6063791513442993,
      "learning_rate": 1.1437114669717131e-05,
      "loss": 2.1148,
      "step": 35148
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.103546142578125,
      "learning_rate": 1.143670720571841e-05,
      "loss": 2.2489,
      "step": 35149
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.9094188213348389,
      "learning_rate": 1.1436299739284078e-05,
      "loss": 2.312,
      "step": 35150
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0795295238494873,
      "learning_rate": 1.143589227041483e-05,
      "loss": 2.4027,
      "step": 35151
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0913151502609253,
      "learning_rate": 1.1435484799111354e-05,
      "loss": 2.4185,
      "step": 35152
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1454763412475586,
      "learning_rate": 1.1435077325374343e-05,
      "loss": 2.2675,
      "step": 35153
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.077210783958435,
      "learning_rate": 1.1434669849204483e-05,
      "loss": 2.3856,
      "step": 35154
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0598552227020264,
      "learning_rate": 1.1434262370602472e-05,
      "loss": 2.556,
      "step": 35155
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0782465934753418,
      "learning_rate": 1.1433854889568994e-05,
      "loss": 2.473,
      "step": 35156
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.105908989906311,
      "learning_rate": 1.1433447406104742e-05,
      "loss": 2.4069,
      "step": 35157
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.357222080230713,
      "learning_rate": 1.1433039920210411e-05,
      "loss": 2.623,
      "step": 35158
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0763133764266968,
      "learning_rate": 1.1432632431886686e-05,
      "loss": 2.2418,
      "step": 35159
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.9962920546531677,
      "learning_rate": 1.1432224941134258e-05,
      "loss": 2.279,
      "step": 35160
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0978195667266846,
      "learning_rate": 1.143181744795382e-05,
      "loss": 2.6377,
      "step": 35161
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0102779865264893,
      "learning_rate": 1.1431409952346066e-05,
      "loss": 2.3832,
      "step": 35162
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0996330976486206,
      "learning_rate": 1.1431002454311681e-05,
      "loss": 2.4754,
      "step": 35163
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0132027864456177,
      "learning_rate": 1.143059495385136e-05,
      "loss": 2.3416,
      "step": 35164
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.2157135009765625,
      "learning_rate": 1.1430187450965788e-05,
      "loss": 2.3667,
      "step": 35165
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.037145733833313,
      "learning_rate": 1.1429779945655661e-05,
      "loss": 2.2611,
      "step": 35166
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1348384618759155,
      "learning_rate": 1.1429372437921671e-05,
      "loss": 2.1259,
      "step": 35167
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.9839007258415222,
      "learning_rate": 1.1428964927764508e-05,
      "loss": 2.314,
      "step": 35168
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0949914455413818,
      "learning_rate": 1.1428557415184858e-05,
      "loss": 2.2539,
      "step": 35169
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0400804281234741,
      "learning_rate": 1.1428149900183417e-05,
      "loss": 2.5002,
      "step": 35170
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.2122820615768433,
      "learning_rate": 1.142774238276087e-05,
      "loss": 2.4027,
      "step": 35171
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.715959072113037,
      "learning_rate": 1.1427334862917916e-05,
      "loss": 2.2588,
      "step": 35172
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0664677619934082,
      "learning_rate": 1.1426927340655238e-05,
      "loss": 2.3411,
      "step": 35173
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1314396858215332,
      "learning_rate": 1.1426519815973533e-05,
      "loss": 2.6177,
      "step": 35174
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.133341908454895,
      "learning_rate": 1.1426112288873492e-05,
      "loss": 2.3973,
      "step": 35175
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.2226063013076782,
      "learning_rate": 1.14257047593558e-05,
      "loss": 2.4773,
      "step": 35176
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0756105184555054,
      "learning_rate": 1.1425297227421152e-05,
      "loss": 2.2451,
      "step": 35177
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.09919011592865,
      "learning_rate": 1.1424889693070238e-05,
      "loss": 2.4189,
      "step": 35178
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0960439443588257,
      "learning_rate": 1.1424482156303748e-05,
      "loss": 2.5493,
      "step": 35179
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1022778749465942,
      "learning_rate": 1.1424074617122376e-05,
      "loss": 2.4887,
      "step": 35180
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.998387336730957,
      "learning_rate": 1.1423667075526813e-05,
      "loss": 2.3184,
      "step": 35181
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.113429069519043,
      "learning_rate": 1.1423259531517742e-05,
      "loss": 2.4747,
      "step": 35182
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.2055046558380127,
      "learning_rate": 1.1422851985095865e-05,
      "loss": 2.6368,
      "step": 35183
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.2804429531097412,
      "learning_rate": 1.1422444436261864e-05,
      "loss": 2.4597,
      "step": 35184
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.3251577615737915,
      "learning_rate": 1.1422036885016432e-05,
      "loss": 2.3813,
      "step": 35185
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.960060715675354,
      "learning_rate": 1.1421629331360265e-05,
      "loss": 2.3935,
      "step": 35186
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0503897666931152,
      "learning_rate": 1.1421221775294048e-05,
      "loss": 2.3915,
      "step": 35187
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0507992506027222,
      "learning_rate": 1.1420814216818475e-05,
      "loss": 2.4004,
      "step": 35188
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1115115880966187,
      "learning_rate": 1.1420406655934237e-05,
      "loss": 2.3164,
      "step": 35189
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.120665192604065,
      "learning_rate": 1.1419999092642023e-05,
      "loss": 2.0698,
      "step": 35190
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0488399267196655,
      "learning_rate": 1.1419591526942522e-05,
      "loss": 2.4506,
      "step": 35191
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1140780448913574,
      "learning_rate": 1.141918395883643e-05,
      "loss": 2.1709,
      "step": 35192
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0579739809036255,
      "learning_rate": 1.1418776388324436e-05,
      "loss": 2.4556,
      "step": 35193
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1238430738449097,
      "learning_rate": 1.1418368815407232e-05,
      "loss": 2.1703,
      "step": 35194
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0763754844665527,
      "learning_rate": 1.1417961240085505e-05,
      "loss": 2.3706,
      "step": 35195
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.9904897212982178,
      "learning_rate": 1.141755366235995e-05,
      "loss": 2.4729,
      "step": 35196
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0548481941223145,
      "learning_rate": 1.1417146082231257e-05,
      "loss": 2.1516,
      "step": 35197
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1870273351669312,
      "learning_rate": 1.1416738499700115e-05,
      "loss": 2.2845,
      "step": 35198
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.6941311359405518,
      "learning_rate": 1.1416330914767216e-05,
      "loss": 2.5132,
      "step": 35199
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0732351541519165,
      "learning_rate": 1.1415923327433252e-05,
      "loss": 2.2636,
      "step": 35200
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0450847148895264,
      "learning_rate": 1.1415515737698917e-05,
      "loss": 2.6046,
      "step": 35201
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1835671663284302,
      "learning_rate": 1.1415108145564894e-05,
      "loss": 2.2318,
      "step": 35202
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0467350482940674,
      "learning_rate": 1.1414700551031879e-05,
      "loss": 2.3622,
      "step": 35203
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1924155950546265,
      "learning_rate": 1.1414292954100563e-05,
      "loss": 2.3949,
      "step": 35204
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1507400274276733,
      "learning_rate": 1.1413885354771637e-05,
      "loss": 2.3127,
      "step": 35205
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0292890071868896,
      "learning_rate": 1.1413477753045789e-05,
      "loss": 2.3456,
      "step": 35206
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0950607061386108,
      "learning_rate": 1.1413070148923715e-05,
      "loss": 2.3932,
      "step": 35207
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0399580001831055,
      "learning_rate": 1.1412662542406104e-05,
      "loss": 2.4962,
      "step": 35208
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1457836627960205,
      "learning_rate": 1.1412254933493646e-05,
      "loss": 2.44,
      "step": 35209
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0755113363265991,
      "learning_rate": 1.1411847322187028e-05,
      "loss": 2.4411,
      "step": 35210
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0558445453643799,
      "learning_rate": 1.1411439708486948e-05,
      "loss": 2.2456,
      "step": 35211
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1422265768051147,
      "learning_rate": 1.1411032092394094e-05,
      "loss": 2.4161,
      "step": 35212
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0998406410217285,
      "learning_rate": 1.1410624473909158e-05,
      "loss": 2.3909,
      "step": 35213
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1356297731399536,
      "learning_rate": 1.1410216853032834e-05,
      "loss": 2.4629,
      "step": 35214
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1127547025680542,
      "learning_rate": 1.1409809229765805e-05,
      "loss": 2.3565,
      "step": 35215
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0489774942398071,
      "learning_rate": 1.1409401604108767e-05,
      "loss": 2.1547,
      "step": 35216
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.9791337251663208,
      "learning_rate": 1.1408993976062412e-05,
      "loss": 2.3813,
      "step": 35217
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.3071315288543701,
      "learning_rate": 1.1408586345627428e-05,
      "loss": 2.2348,
      "step": 35218
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0567420721054077,
      "learning_rate": 1.1408178712804507e-05,
      "loss": 2.5713,
      "step": 35219
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0725653171539307,
      "learning_rate": 1.1407771077594342e-05,
      "loss": 2.189,
      "step": 35220
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1000417470932007,
      "learning_rate": 1.1407363439997625e-05,
      "loss": 2.4593,
      "step": 35221
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0806292295455933,
      "learning_rate": 1.140695580001504e-05,
      "loss": 2.4252,
      "step": 35222
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.235977053642273,
      "learning_rate": 1.1406548157647287e-05,
      "loss": 2.2595,
      "step": 35223
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.13713538646698,
      "learning_rate": 1.140614051289505e-05,
      "loss": 2.278,
      "step": 35224
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.9585316181182861,
      "learning_rate": 1.140573286575902e-05,
      "loss": 2.2563,
      "step": 35225
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0365558862686157,
      "learning_rate": 1.1405325216239896e-05,
      "loss": 2.4226,
      "step": 35226
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0579880475997925,
      "learning_rate": 1.1404917564338366e-05,
      "loss": 2.4822,
      "step": 35227
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1074632406234741,
      "learning_rate": 1.1404509910055114e-05,
      "loss": 2.0588,
      "step": 35228
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.019262433052063,
      "learning_rate": 1.1404102253390838e-05,
      "loss": 2.1906,
      "step": 35229
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0620951652526855,
      "learning_rate": 1.1403694594346227e-05,
      "loss": 2.4156,
      "step": 35230
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1186846494674683,
      "learning_rate": 1.1403286932921973e-05,
      "loss": 2.3554,
      "step": 35231
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.035537838935852,
      "learning_rate": 1.1402879269118766e-05,
      "loss": 2.1156,
      "step": 35232
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0936086177825928,
      "learning_rate": 1.1402471602937297e-05,
      "loss": 2.4262,
      "step": 35233
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.9935060739517212,
      "learning_rate": 1.140206393437826e-05,
      "loss": 2.229,
      "step": 35234
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0198805332183838,
      "learning_rate": 1.1401656263442342e-05,
      "loss": 2.4278,
      "step": 35235
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1373603343963623,
      "learning_rate": 1.1401248590130236e-05,
      "loss": 2.184,
      "step": 35236
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1706509590148926,
      "learning_rate": 1.1400840914442632e-05,
      "loss": 2.1371,
      "step": 35237
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.9913342595100403,
      "learning_rate": 1.1400433236380226e-05,
      "loss": 2.296,
      "step": 35238
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.9987629652023315,
      "learning_rate": 1.1400025555943701e-05,
      "loss": 2.2068,
      "step": 35239
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1134330034255981,
      "learning_rate": 1.1399617873133754e-05,
      "loss": 2.5434,
      "step": 35240
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0230189561843872,
      "learning_rate": 1.1399210187951078e-05,
      "loss": 2.2677,
      "step": 35241
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.923782467842102,
      "learning_rate": 1.1398802500396357e-05,
      "loss": 2.4513,
      "step": 35242
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.9950152635574341,
      "learning_rate": 1.1398394810470285e-05,
      "loss": 2.3828,
      "step": 35243
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.2079497575759888,
      "learning_rate": 1.1397987118173557e-05,
      "loss": 2.4131,
      "step": 35244
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0296199321746826,
      "learning_rate": 1.1397579423506861e-05,
      "loss": 2.1588,
      "step": 35245
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.9638429880142212,
      "learning_rate": 1.1397171726470886e-05,
      "loss": 2.3253,
      "step": 35246
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0974974632263184,
      "learning_rate": 1.1396764027066327e-05,
      "loss": 2.2607,
      "step": 35247
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0427889823913574,
      "learning_rate": 1.1396356325293874e-05,
      "loss": 2.2948,
      "step": 35248
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0173003673553467,
      "learning_rate": 1.1395948621154217e-05,
      "loss": 2.3344,
      "step": 35249
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1739329099655151,
      "learning_rate": 1.1395540914648048e-05,
      "loss": 2.424,
      "step": 35250
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.2207913398742676,
      "learning_rate": 1.1395133205776057e-05,
      "loss": 2.2881,
      "step": 35251
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.162994146347046,
      "learning_rate": 1.1394725494538936e-05,
      "loss": 2.2561,
      "step": 35252
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1129467487335205,
      "learning_rate": 1.139431778093738e-05,
      "loss": 2.2284,
      "step": 35253
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0300267934799194,
      "learning_rate": 1.1393910064972076e-05,
      "loss": 2.2501,
      "step": 35254
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0884771347045898,
      "learning_rate": 1.1393502346643713e-05,
      "loss": 2.123,
      "step": 35255
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1098121404647827,
      "learning_rate": 1.1393094625952985e-05,
      "loss": 2.3479,
      "step": 35256
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0705816745758057,
      "learning_rate": 1.1392686902900584e-05,
      "loss": 2.2332,
      "step": 35257
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.052075982093811,
      "learning_rate": 1.1392279177487203e-05,
      "loss": 2.4433,
      "step": 35258
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1676990985870361,
      "learning_rate": 1.1391871449713528e-05,
      "loss": 2.4093,
      "step": 35259
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0204782485961914,
      "learning_rate": 1.1391463719580255e-05,
      "loss": 2.3851,
      "step": 35260
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.2657784223556519,
      "learning_rate": 1.1391055987088072e-05,
      "loss": 2.5197,
      "step": 35261
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0094965696334839,
      "learning_rate": 1.139064825223767e-05,
      "loss": 2.4386,
      "step": 35262
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1541955471038818,
      "learning_rate": 1.1390240515029743e-05,
      "loss": 2.4413,
      "step": 35263
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.9791756272315979,
      "learning_rate": 1.138983277546498e-05,
      "loss": 2.3044,
      "step": 35264
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1442339420318604,
      "learning_rate": 1.1389425033544073e-05,
      "loss": 2.3904,
      "step": 35265
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0359917879104614,
      "learning_rate": 1.1389017289267712e-05,
      "loss": 2.4754,
      "step": 35266
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0135008096694946,
      "learning_rate": 1.1388609542636593e-05,
      "loss": 2.4036,
      "step": 35267
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0607117414474487,
      "learning_rate": 1.1388201793651401e-05,
      "loss": 2.3676,
      "step": 35268
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.2698854207992554,
      "learning_rate": 1.138779404231283e-05,
      "loss": 2.3381,
      "step": 35269
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.008711814880371,
      "learning_rate": 1.1387386288621571e-05,
      "loss": 2.296,
      "step": 35270
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0962672233581543,
      "learning_rate": 1.1386978532578314e-05,
      "loss": 2.4874,
      "step": 35271
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0164852142333984,
      "learning_rate": 1.1386570774183753e-05,
      "loss": 2.427,
      "step": 35272
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.2301852703094482,
      "learning_rate": 1.1386163013438583e-05,
      "loss": 2.275,
      "step": 35273
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.073171615600586,
      "learning_rate": 1.1385755250343485e-05,
      "loss": 2.1469,
      "step": 35274
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0658038854599,
      "learning_rate": 1.1385347484899157e-05,
      "loss": 2.4467,
      "step": 35275
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0499809980392456,
      "learning_rate": 1.1384939717106289e-05,
      "loss": 2.4627,
      "step": 35276
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.9846980571746826,
      "learning_rate": 1.1384531946965571e-05,
      "loss": 2.1708,
      "step": 35277
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0224753618240356,
      "learning_rate": 1.1384124174477695e-05,
      "loss": 2.5932,
      "step": 35278
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.160192608833313,
      "learning_rate": 1.1383716399643353e-05,
      "loss": 2.2922,
      "step": 35279
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.19855797290802,
      "learning_rate": 1.1383308622463239e-05,
      "loss": 2.4549,
      "step": 35280
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0938174724578857,
      "learning_rate": 1.1382900842938037e-05,
      "loss": 2.3842,
      "step": 35281
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0754667520523071,
      "learning_rate": 1.1382493061068442e-05,
      "loss": 2.2179,
      "step": 35282
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1003377437591553,
      "learning_rate": 1.1382085276855148e-05,
      "loss": 2.2041,
      "step": 35283
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0601624250411987,
      "learning_rate": 1.1381677490298843e-05,
      "loss": 2.2393,
      "step": 35284
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.07147216796875,
      "learning_rate": 1.1381269701400219e-05,
      "loss": 2.2835,
      "step": 35285
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.106166124343872,
      "learning_rate": 1.1380861910159968e-05,
      "loss": 2.3843,
      "step": 35286
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0809122323989868,
      "learning_rate": 1.1380454116578786e-05,
      "loss": 2.2048,
      "step": 35287
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1271119117736816,
      "learning_rate": 1.1380046320657352e-05,
      "loss": 2.5507,
      "step": 35288
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0296567678451538,
      "learning_rate": 1.1379638522396368e-05,
      "loss": 2.2886,
      "step": 35289
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0694509744644165,
      "learning_rate": 1.1379230721796522e-05,
      "loss": 2.5987,
      "step": 35290
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0407099723815918,
      "learning_rate": 1.1378822918858502e-05,
      "loss": 2.4207,
      "step": 35291
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1583847999572754,
      "learning_rate": 1.1378415113583007e-05,
      "loss": 2.1846,
      "step": 35292
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0441436767578125,
      "learning_rate": 1.1378007305970724e-05,
      "loss": 2.3695,
      "step": 35293
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.2115074396133423,
      "learning_rate": 1.1377599496022343e-05,
      "loss": 2.4286,
      "step": 35294
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0279299020767212,
      "learning_rate": 1.1377191683738558e-05,
      "loss": 2.0729,
      "step": 35295
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0790224075317383,
      "learning_rate": 1.1376783869120056e-05,
      "loss": 2.3636,
      "step": 35296
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1227576732635498,
      "learning_rate": 1.1376376052167532e-05,
      "loss": 2.4727,
      "step": 35297
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1596564054489136,
      "learning_rate": 1.1375968232881678e-05,
      "loss": 2.2536,
      "step": 35298
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1142364740371704,
      "learning_rate": 1.1375560411263184e-05,
      "loss": 2.6807,
      "step": 35299
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0348438024520874,
      "learning_rate": 1.1375152587312743e-05,
      "loss": 2.2448,
      "step": 35300
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.967348039150238,
      "learning_rate": 1.1374744761031044e-05,
      "loss": 2.2122,
      "step": 35301
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.2636293172836304,
      "learning_rate": 1.1374336932418778e-05,
      "loss": 2.2236,
      "step": 35302
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1199127435684204,
      "learning_rate": 1.1373929101476637e-05,
      "loss": 2.3864,
      "step": 35303
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0237613916397095,
      "learning_rate": 1.1373521268205318e-05,
      "loss": 2.3676,
      "step": 35304
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.056903600692749,
      "learning_rate": 1.1373113432605503e-05,
      "loss": 2.2106,
      "step": 35305
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.3015774488449097,
      "learning_rate": 1.1372705594677889e-05,
      "loss": 2.1374,
      "step": 35306
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0600630044937134,
      "learning_rate": 1.1372297754423167e-05,
      "loss": 2.2865,
      "step": 35307
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.9891830086708069,
      "learning_rate": 1.1371889911842027e-05,
      "loss": 2.248,
      "step": 35308
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1439716815948486,
      "learning_rate": 1.137148206693516e-05,
      "loss": 2.2013,
      "step": 35309
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.071857213973999,
      "learning_rate": 1.1371074219703258e-05,
      "loss": 2.401,
      "step": 35310
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1676884889602661,
      "learning_rate": 1.1370666370147013e-05,
      "loss": 2.2772,
      "step": 35311
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1158782243728638,
      "learning_rate": 1.1370258518267116e-05,
      "loss": 2.4101,
      "step": 35312
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.03104829788208,
      "learning_rate": 1.1369850664064262e-05,
      "loss": 2.6926,
      "step": 35313
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.2941386699676514,
      "learning_rate": 1.1369442807539136e-05,
      "loss": 2.3439,
      "step": 35314
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.9782087206840515,
      "learning_rate": 1.1369034948692433e-05,
      "loss": 2.316,
      "step": 35315
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0703668594360352,
      "learning_rate": 1.1368627087524843e-05,
      "loss": 2.3459,
      "step": 35316
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.046679973602295,
      "learning_rate": 1.136821922403706e-05,
      "loss": 2.4559,
      "step": 35317
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1149811744689941,
      "learning_rate": 1.1367811358229773e-05,
      "loss": 2.2873,
      "step": 35318
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.076650857925415,
      "learning_rate": 1.1367403490103674e-05,
      "loss": 2.2539,
      "step": 35319
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.980915904045105,
      "learning_rate": 1.1366995619659456e-05,
      "loss": 2.1286,
      "step": 35320
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1422533988952637,
      "learning_rate": 1.1366587746897808e-05,
      "loss": 2.2769,
      "step": 35321
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0500985383987427,
      "learning_rate": 1.1366179871819422e-05,
      "loss": 2.5008,
      "step": 35322
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.2248319387435913,
      "learning_rate": 1.1365771994424992e-05,
      "loss": 2.3437,
      "step": 35323
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0703601837158203,
      "learning_rate": 1.1365364114715205e-05,
      "loss": 2.4086,
      "step": 35324
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1872966289520264,
      "learning_rate": 1.1364956232690759e-05,
      "loss": 2.4977,
      "step": 35325
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0827116966247559,
      "learning_rate": 1.136454834835234e-05,
      "loss": 2.3997,
      "step": 35326
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1443196535110474,
      "learning_rate": 1.1364140461700639e-05,
      "loss": 2.2297,
      "step": 35327
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0278960466384888,
      "learning_rate": 1.136373257273635e-05,
      "loss": 2.4737,
      "step": 35328
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1306365728378296,
      "learning_rate": 1.1363324681460164e-05,
      "loss": 2.3449,
      "step": 35329
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.016767978668213,
      "learning_rate": 1.1362916787872771e-05,
      "loss": 2.3229,
      "step": 35330
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.8565119504928589,
      "learning_rate": 1.1362508891974866e-05,
      "loss": 2.4455,
      "step": 35331
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.6326053142547607,
      "learning_rate": 1.136210099376714e-05,
      "loss": 2.4549,
      "step": 35332
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0092601776123047,
      "learning_rate": 1.1361693093250282e-05,
      "loss": 2.3363,
      "step": 35333
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.042265772819519,
      "learning_rate": 1.1361285190424982e-05,
      "loss": 2.3382,
      "step": 35334
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.3779135942459106,
      "learning_rate": 1.1360877285291935e-05,
      "loss": 2.3989,
      "step": 35335
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.174310564994812,
      "learning_rate": 1.1360469377851832e-05,
      "loss": 2.3589,
      "step": 35336
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0007332563400269,
      "learning_rate": 1.1360061468105364e-05,
      "loss": 2.395,
      "step": 35337
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1432316303253174,
      "learning_rate": 1.135965355605322e-05,
      "loss": 2.1324,
      "step": 35338
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0978196859359741,
      "learning_rate": 1.1359245641696097e-05,
      "loss": 2.3693,
      "step": 35339
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0931652784347534,
      "learning_rate": 1.1358837725034683e-05,
      "loss": 2.2234,
      "step": 35340
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0610522031784058,
      "learning_rate": 1.1358429806069669e-05,
      "loss": 2.2782,
      "step": 35341
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0872653722763062,
      "learning_rate": 1.1358021884801746e-05,
      "loss": 2.267,
      "step": 35342
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1331195831298828,
      "learning_rate": 1.135761396123161e-05,
      "loss": 2.3696,
      "step": 35343
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0396411418914795,
      "learning_rate": 1.1357206035359946e-05,
      "loss": 2.1962,
      "step": 35344
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0317434072494507,
      "learning_rate": 1.1356798107187453e-05,
      "loss": 2.2086,
      "step": 35345
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0315566062927246,
      "learning_rate": 1.135639017671482e-05,
      "loss": 2.4232,
      "step": 35346
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0820127725601196,
      "learning_rate": 1.1355982243942734e-05,
      "loss": 2.4163,
      "step": 35347
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.9846935272216797,
      "learning_rate": 1.1355574308871888e-05,
      "loss": 2.3916,
      "step": 35348
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0796420574188232,
      "learning_rate": 1.1355166371502979e-05,
      "loss": 2.4212,
      "step": 35349
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1777600049972534,
      "learning_rate": 1.1354758431836692e-05,
      "loss": 2.2611,
      "step": 35350
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.097668170928955,
      "learning_rate": 1.1354350489873725e-05,
      "loss": 2.3244,
      "step": 35351
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0143319368362427,
      "learning_rate": 1.1353942545614762e-05,
      "loss": 2.2728,
      "step": 35352
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0520955324172974,
      "learning_rate": 1.1353534599060503e-05,
      "loss": 2.2952,
      "step": 35353
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.9883099794387817,
      "learning_rate": 1.1353126650211634e-05,
      "loss": 2.2133,
      "step": 35354
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0374746322631836,
      "learning_rate": 1.1352718699068846e-05,
      "loss": 2.6299,
      "step": 35355
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0624971389770508,
      "learning_rate": 1.1352310745632833e-05,
      "loss": 2.3194,
      "step": 35356
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.9777982234954834,
      "learning_rate": 1.1351902789904286e-05,
      "loss": 2.3803,
      "step": 35357
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1146687269210815,
      "learning_rate": 1.1351494831883895e-05,
      "loss": 2.5159,
      "step": 35358
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.9987768530845642,
      "learning_rate": 1.135108687157236e-05,
      "loss": 2.3633,
      "step": 35359
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0188335180282593,
      "learning_rate": 1.135067890897036e-05,
      "loss": 2.2525,
      "step": 35360
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.9864283800125122,
      "learning_rate": 1.1350270944078591e-05,
      "loss": 2.5094,
      "step": 35361
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0694007873535156,
      "learning_rate": 1.1349862976897749e-05,
      "loss": 2.4346,
      "step": 35362
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.138289213180542,
      "learning_rate": 1.1349455007428525e-05,
      "loss": 2.5975,
      "step": 35363
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1291847229003906,
      "learning_rate": 1.1349047035671606e-05,
      "loss": 2.4015,
      "step": 35364
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.9970235228538513,
      "learning_rate": 1.1348639061627685e-05,
      "loss": 2.2939,
      "step": 35365
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.996609091758728,
      "learning_rate": 1.1348231085297453e-05,
      "loss": 2.4291,
      "step": 35366
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.07417893409729,
      "learning_rate": 1.1347823106681605e-05,
      "loss": 2.372,
      "step": 35367
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1723968982696533,
      "learning_rate": 1.1347415125780832e-05,
      "loss": 2.3771,
      "step": 35368
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0813647508621216,
      "learning_rate": 1.1347007142595822e-05,
      "loss": 2.6029,
      "step": 35369
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.3482447862625122,
      "learning_rate": 1.134659915712727e-05,
      "loss": 2.2961,
      "step": 35370
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1011186838150024,
      "learning_rate": 1.1346191169375866e-05,
      "loss": 2.3713,
      "step": 35371
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1158740520477295,
      "learning_rate": 1.1345783179342304e-05,
      "loss": 2.592,
      "step": 35372
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.990526020526886,
      "learning_rate": 1.1345375187027275e-05,
      "loss": 2.2889,
      "step": 35373
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.3821675777435303,
      "learning_rate": 1.1344967192431466e-05,
      "loss": 2.2789,
      "step": 35374
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0330935716629028,
      "learning_rate": 1.1344559195555575e-05,
      "loss": 2.3355,
      "step": 35375
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0384162664413452,
      "learning_rate": 1.1344151196400288e-05,
      "loss": 2.487,
      "step": 35376
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.164931297302246,
      "learning_rate": 1.1343743194966304e-05,
      "loss": 2.2238,
      "step": 35377
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.2085598707199097,
      "learning_rate": 1.1343335191254306e-05,
      "loss": 2.3964,
      "step": 35378
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.9772317409515381,
      "learning_rate": 1.1342927185264994e-05,
      "loss": 2.2957,
      "step": 35379
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0183342695236206,
      "learning_rate": 1.1342519176999055e-05,
      "loss": 2.3589,
      "step": 35380
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0615756511688232,
      "learning_rate": 1.134211116645718e-05,
      "loss": 2.5148,
      "step": 35381
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.950034499168396,
      "learning_rate": 1.134170315364006e-05,
      "loss": 2.4597,
      "step": 35382
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0215203762054443,
      "learning_rate": 1.1341295138548392e-05,
      "loss": 2.2976,
      "step": 35383
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1135042905807495,
      "learning_rate": 1.1340887121182862e-05,
      "loss": 2.7474,
      "step": 35384
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.2063593864440918,
      "learning_rate": 1.1340479101544167e-05,
      "loss": 2.4541,
      "step": 35385
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.9940555095672607,
      "learning_rate": 1.1340071079632995e-05,
      "loss": 2.4734,
      "step": 35386
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.129359483718872,
      "learning_rate": 1.1339663055450036e-05,
      "loss": 2.4197,
      "step": 35387
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.2227164506912231,
      "learning_rate": 1.1339255028995986e-05,
      "loss": 2.1891,
      "step": 35388
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0425502061843872,
      "learning_rate": 1.1338847000271536e-05,
      "loss": 2.2889,
      "step": 35389
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.937895655632019,
      "learning_rate": 1.1338438969277376e-05,
      "loss": 2.2107,
      "step": 35390
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1274486780166626,
      "learning_rate": 1.13380309360142e-05,
      "loss": 2.3671,
      "step": 35391
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0417507886886597,
      "learning_rate": 1.1337622900482696e-05,
      "loss": 2.2182,
      "step": 35392
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0751723051071167,
      "learning_rate": 1.1337214862683559e-05,
      "loss": 2.3053,
      "step": 35393
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.9437381029129028,
      "learning_rate": 1.1336806822617481e-05,
      "loss": 2.2743,
      "step": 35394
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.086371660232544,
      "learning_rate": 1.133639878028515e-05,
      "loss": 2.4455,
      "step": 35395
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0292257070541382,
      "learning_rate": 1.1335990735687258e-05,
      "loss": 2.1971,
      "step": 35396
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.125989317893982,
      "learning_rate": 1.1335582688824502e-05,
      "loss": 2.1609,
      "step": 35397
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0499564409255981,
      "learning_rate": 1.1335174639697571e-05,
      "loss": 2.2733,
      "step": 35398
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.2209861278533936,
      "learning_rate": 1.1334766588307156e-05,
      "loss": 2.4035,
      "step": 35399
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1928246021270752,
      "learning_rate": 1.1334358534653948e-05,
      "loss": 2.4216,
      "step": 35400
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.015510082244873,
      "learning_rate": 1.133395047873864e-05,
      "loss": 2.2801,
      "step": 35401
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0374294519424438,
      "learning_rate": 1.1333542420561923e-05,
      "loss": 2.2421,
      "step": 35402
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.027875542640686,
      "learning_rate": 1.1333134360124492e-05,
      "loss": 2.534,
      "step": 35403
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.051637053489685,
      "learning_rate": 1.1332726297427032e-05,
      "loss": 2.5818,
      "step": 35404
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0667206048965454,
      "learning_rate": 1.1332318232470243e-05,
      "loss": 2.2436,
      "step": 35405
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0767990350723267,
      "learning_rate": 1.1331910165254811e-05,
      "loss": 2.4196,
      "step": 35406
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.3670099973678589,
      "learning_rate": 1.133150209578143e-05,
      "loss": 2.2602,
      "step": 35407
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0709034204483032,
      "learning_rate": 1.133109402405079e-05,
      "loss": 2.3745,
      "step": 35408
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.9917925000190735,
      "learning_rate": 1.1330685950063587e-05,
      "loss": 2.3027,
      "step": 35409
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1515847444534302,
      "learning_rate": 1.1330277873820508e-05,
      "loss": 2.4343,
      "step": 35410
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1031655073165894,
      "learning_rate": 1.1329869795322246e-05,
      "loss": 2.4035,
      "step": 35411
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.074230432510376,
      "learning_rate": 1.1329461714569497e-05,
      "loss": 2.3653,
      "step": 35412
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.116489291191101,
      "learning_rate": 1.1329053631562945e-05,
      "loss": 2.4181,
      "step": 35413
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0324385166168213,
      "learning_rate": 1.1328645546303288e-05,
      "loss": 2.466,
      "step": 35414
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1019612550735474,
      "learning_rate": 1.1328237458791214e-05,
      "loss": 2.1718,
      "step": 35415
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.087104320526123,
      "learning_rate": 1.132782936902742e-05,
      "loss": 2.3684,
      "step": 35416
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0082489252090454,
      "learning_rate": 1.1327421277012591e-05,
      "loss": 2.417,
      "step": 35417
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.9404365420341492,
      "learning_rate": 1.1327013182747423e-05,
      "loss": 2.0719,
      "step": 35418
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1544322967529297,
      "learning_rate": 1.1326605086232612e-05,
      "loss": 2.372,
      "step": 35419
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0184650421142578,
      "learning_rate": 1.132619698746884e-05,
      "loss": 2.4817,
      "step": 35420
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.2509511709213257,
      "learning_rate": 1.1325788886456808e-05,
      "loss": 2.3561,
      "step": 35421
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.5726420879364014,
      "learning_rate": 1.1325380783197202e-05,
      "loss": 2.5339,
      "step": 35422
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0383869409561157,
      "learning_rate": 1.1324972677690715e-05,
      "loss": 2.356,
      "step": 35423
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.9583260416984558,
      "learning_rate": 1.132456456993804e-05,
      "loss": 2.1422,
      "step": 35424
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.9668848514556885,
      "learning_rate": 1.1324156459939868e-05,
      "loss": 2.5111,
      "step": 35425
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.256956696510315,
      "learning_rate": 1.1323748347696892e-05,
      "loss": 2.1994,
      "step": 35426
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0352296829223633,
      "learning_rate": 1.13233402332098e-05,
      "loss": 2.244,
      "step": 35427
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0584162473678589,
      "learning_rate": 1.132293211647929e-05,
      "loss": 2.4164,
      "step": 35428
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0094109773635864,
      "learning_rate": 1.132252399750605e-05,
      "loss": 2.3549,
      "step": 35429
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0237420797348022,
      "learning_rate": 1.1322115876290772e-05,
      "loss": 2.1186,
      "step": 35430
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.9725824594497681,
      "learning_rate": 1.1321707752834148e-05,
      "loss": 2.2513,
      "step": 35431
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0120162963867188,
      "learning_rate": 1.1321299627136872e-05,
      "loss": 2.2602,
      "step": 35432
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0278879404067993,
      "learning_rate": 1.1320891499199634e-05,
      "loss": 2.3769,
      "step": 35433
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.9768653512001038,
      "learning_rate": 1.1320483369023125e-05,
      "loss": 2.6222,
      "step": 35434
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.126595377922058,
      "learning_rate": 1.1320075236608039e-05,
      "loss": 2.1245,
      "step": 35435
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0955554246902466,
      "learning_rate": 1.1319667101955065e-05,
      "loss": 2.5144,
      "step": 35436
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.167322039604187,
      "learning_rate": 1.1319258965064898e-05,
      "loss": 2.1861,
      "step": 35437
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1071337461471558,
      "learning_rate": 1.1318850825938233e-05,
      "loss": 2.482,
      "step": 35438
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1323306560516357,
      "learning_rate": 1.1318442684575753e-05,
      "loss": 2.5019,
      "step": 35439
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0713895559310913,
      "learning_rate": 1.1318034540978155e-05,
      "loss": 2.4684,
      "step": 35440
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.9954500198364258,
      "learning_rate": 1.1317626395146131e-05,
      "loss": 2.3629,
      "step": 35441
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0213524103164673,
      "learning_rate": 1.1317218247080371e-05,
      "loss": 2.535,
      "step": 35442
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.180406928062439,
      "learning_rate": 1.131681009678157e-05,
      "loss": 2.3778,
      "step": 35443
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1806516647338867,
      "learning_rate": 1.1316401944250417e-05,
      "loss": 2.3549,
      "step": 35444
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0405632257461548,
      "learning_rate": 1.131599378948761e-05,
      "loss": 2.4131,
      "step": 35445
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1727837324142456,
      "learning_rate": 1.1315585632493831e-05,
      "loss": 2.2386,
      "step": 35446
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0046944618225098,
      "learning_rate": 1.1315177473269776e-05,
      "loss": 2.2612,
      "step": 35447
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1154656410217285,
      "learning_rate": 1.131476931181614e-05,
      "loss": 2.2748,
      "step": 35448
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0070186853408813,
      "learning_rate": 1.1314361148133614e-05,
      "loss": 2.2876,
      "step": 35449
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1469491720199585,
      "learning_rate": 1.131395298222289e-05,
      "loss": 2.5173,
      "step": 35450
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0412955284118652,
      "learning_rate": 1.1313544814084656e-05,
      "loss": 2.4142,
      "step": 35451
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.2588560581207275,
      "learning_rate": 1.1313136643719607e-05,
      "loss": 2.5724,
      "step": 35452
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.3177772760391235,
      "learning_rate": 1.1312728471128439e-05,
      "loss": 2.3817,
      "step": 35453
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0436824560165405,
      "learning_rate": 1.1312320296311834e-05,
      "loss": 2.3636,
      "step": 35454
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0855495929718018,
      "learning_rate": 1.1311912119270491e-05,
      "loss": 2.3101,
      "step": 35455
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.9418045878410339,
      "learning_rate": 1.1311503940005101e-05,
      "loss": 2.468,
      "step": 35456
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0817537307739258,
      "learning_rate": 1.1311095758516356e-05,
      "loss": 2.2824,
      "step": 35457
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0114864110946655,
      "learning_rate": 1.131068757480495e-05,
      "loss": 2.4366,
      "step": 35458
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.109787106513977,
      "learning_rate": 1.131027938887157e-05,
      "loss": 2.4371,
      "step": 35459
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0706971883773804,
      "learning_rate": 1.130987120071691e-05,
      "loss": 2.3767,
      "step": 35460
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1786799430847168,
      "learning_rate": 1.1309463010341664e-05,
      "loss": 2.4704,
      "step": 35461
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0134305953979492,
      "learning_rate": 1.1309054817746521e-05,
      "loss": 2.2521,
      "step": 35462
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0704847574234009,
      "learning_rate": 1.1308646622932176e-05,
      "loss": 2.3778,
      "step": 35463
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0604780912399292,
      "learning_rate": 1.1308238425899318e-05,
      "loss": 2.4411,
      "step": 35464
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.9710717797279358,
      "learning_rate": 1.1307830226648643e-05,
      "loss": 2.4212,
      "step": 35465
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1523065567016602,
      "learning_rate": 1.1307422025180839e-05,
      "loss": 2.4471,
      "step": 35466
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1267144680023193,
      "learning_rate": 1.1307013821496599e-05,
      "loss": 2.5801,
      "step": 35467
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0219368934631348,
      "learning_rate": 1.1306605615596615e-05,
      "loss": 2.5494,
      "step": 35468
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.985020101070404,
      "learning_rate": 1.1306197407481581e-05,
      "loss": 2.2241,
      "step": 35469
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.9983494877815247,
      "learning_rate": 1.1305789197152188e-05,
      "loss": 2.3694,
      "step": 35470
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.9362096786499023,
      "learning_rate": 1.1305380984609128e-05,
      "loss": 2.5318,
      "step": 35471
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1616103649139404,
      "learning_rate": 1.1304972769853093e-05,
      "loss": 2.3367,
      "step": 35472
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1654667854309082,
      "learning_rate": 1.1304564552884773e-05,
      "loss": 2.3652,
      "step": 35473
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.006866455078125,
      "learning_rate": 1.1304156333704861e-05,
      "loss": 2.3574,
      "step": 35474
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.9584826827049255,
      "learning_rate": 1.1303748112314051e-05,
      "loss": 2.1833,
      "step": 35475
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0289034843444824,
      "learning_rate": 1.1303339888713034e-05,
      "loss": 2.3526,
      "step": 35476
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0454902648925781,
      "learning_rate": 1.13029316629025e-05,
      "loss": 2.2817,
      "step": 35477
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0929168462753296,
      "learning_rate": 1.1302523434883148e-05,
      "loss": 2.2347,
      "step": 35478
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1356691122055054,
      "learning_rate": 1.1302115204655661e-05,
      "loss": 2.6257,
      "step": 35479
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0665735006332397,
      "learning_rate": 1.1301706972220736e-05,
      "loss": 2.1993,
      "step": 35480
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1121418476104736,
      "learning_rate": 1.1301298737579068e-05,
      "loss": 2.5098,
      "step": 35481
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.15945303440094,
      "learning_rate": 1.1300890500731341e-05,
      "loss": 2.2554,
      "step": 35482
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1594566106796265,
      "learning_rate": 1.130048226167825e-05,
      "loss": 2.3533,
      "step": 35483
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1385765075683594,
      "learning_rate": 1.1300074020420493e-05,
      "loss": 2.4027,
      "step": 35484
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.011806845664978,
      "learning_rate": 1.1299665776958752e-05,
      "loss": 2.4652,
      "step": 35485
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.9847522974014282,
      "learning_rate": 1.1299257531293728e-05,
      "loss": 2.5238,
      "step": 35486
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1079601049423218,
      "learning_rate": 1.1298849283426108e-05,
      "loss": 2.4231,
      "step": 35487
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1100457906723022,
      "learning_rate": 1.1298441033356587e-05,
      "loss": 2.5053,
      "step": 35488
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1290384531021118,
      "learning_rate": 1.1298032781085856e-05,
      "loss": 2.2916,
      "step": 35489
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0141472816467285,
      "learning_rate": 1.1297624526614604e-05,
      "loss": 2.3604,
      "step": 35490
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1014516353607178,
      "learning_rate": 1.129721626994353e-05,
      "loss": 2.1046,
      "step": 35491
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0822449922561646,
      "learning_rate": 1.129680801107332e-05,
      "loss": 2.2044,
      "step": 35492
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.9795567989349365,
      "learning_rate": 1.1296399750004667e-05,
      "loss": 2.5067,
      "step": 35493
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1675063371658325,
      "learning_rate": 1.1295991486738264e-05,
      "loss": 2.4192,
      "step": 35494
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1078143119812012,
      "learning_rate": 1.1295583221274807e-05,
      "loss": 2.4729,
      "step": 35495
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1652089357376099,
      "learning_rate": 1.129517495361498e-05,
      "loss": 2.4262,
      "step": 35496
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0621954202651978,
      "learning_rate": 1.1294766683759484e-05,
      "loss": 2.2969,
      "step": 35497
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.4348129034042358,
      "learning_rate": 1.1294358411709005e-05,
      "loss": 2.4076,
      "step": 35498
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1860601902008057,
      "learning_rate": 1.1293950137464237e-05,
      "loss": 2.3265,
      "step": 35499
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.3573448657989502,
      "learning_rate": 1.1293541861025872e-05,
      "loss": 2.6037,
      "step": 35500
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0164265632629395,
      "learning_rate": 1.1293133582394602e-05,
      "loss": 2.5308,
      "step": 35501
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.9238817095756531,
      "learning_rate": 1.1292725301571118e-05,
      "loss": 2.1247,
      "step": 35502
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0697696208953857,
      "learning_rate": 1.1292317018556119e-05,
      "loss": 2.5887,
      "step": 35503
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0472395420074463,
      "learning_rate": 1.1291908733350287e-05,
      "loss": 2.3468,
      "step": 35504
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0391356945037842,
      "learning_rate": 1.1291500445954319e-05,
      "loss": 2.2921,
      "step": 35505
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1606101989746094,
      "learning_rate": 1.1291092156368908e-05,
      "loss": 2.1777,
      "step": 35506
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0079530477523804,
      "learning_rate": 1.1290683864594744e-05,
      "loss": 2.2529,
      "step": 35507
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0804667472839355,
      "learning_rate": 1.1290275570632523e-05,
      "loss": 2.3674,
      "step": 35508
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0591694116592407,
      "learning_rate": 1.1289867274482932e-05,
      "loss": 2.3615,
      "step": 35509
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0653830766677856,
      "learning_rate": 1.1289458976146668e-05,
      "loss": 2.3707,
      "step": 35510
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1765975952148438,
      "learning_rate": 1.1289050675624417e-05,
      "loss": 2.32,
      "step": 35511
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.02522873878479,
      "learning_rate": 1.128864237291688e-05,
      "loss": 2.1892,
      "step": 35512
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.078410267829895,
      "learning_rate": 1.1288234068024741e-05,
      "loss": 2.3723,
      "step": 35513
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1882742643356323,
      "learning_rate": 1.1287825760948696e-05,
      "loss": 2.5389,
      "step": 35514
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0673179626464844,
      "learning_rate": 1.1287417451689434e-05,
      "loss": 2.433,
      "step": 35515
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.9553585648536682,
      "learning_rate": 1.1287009140247653e-05,
      "loss": 2.6041,
      "step": 35516
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1844477653503418,
      "learning_rate": 1.1286600826624044e-05,
      "loss": 2.3747,
      "step": 35517
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0773921012878418,
      "learning_rate": 1.1286192510819294e-05,
      "loss": 2.4449,
      "step": 35518
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0683722496032715,
      "learning_rate": 1.1285784192834097e-05,
      "loss": 2.2845,
      "step": 35519
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.135811448097229,
      "learning_rate": 1.1285375872669149e-05,
      "loss": 2.3875,
      "step": 35520
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1691325902938843,
      "learning_rate": 1.1284967550325137e-05,
      "loss": 2.2722,
      "step": 35521
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0719103813171387,
      "learning_rate": 1.1284559225802758e-05,
      "loss": 2.2474,
      "step": 35522
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.155585765838623,
      "learning_rate": 1.1284150899102701e-05,
      "loss": 2.36,
      "step": 35523
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1485668420791626,
      "learning_rate": 1.1283742570225665e-05,
      "loss": 2.3689,
      "step": 35524
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.2391339540481567,
      "learning_rate": 1.128333423917233e-05,
      "loss": 2.2197,
      "step": 35525
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1891292333602905,
      "learning_rate": 1.1282925905943396e-05,
      "loss": 2.5467,
      "step": 35526
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.9901558756828308,
      "learning_rate": 1.1282517570539556e-05,
      "loss": 2.4599,
      "step": 35527
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1296442747116089,
      "learning_rate": 1.12821092329615e-05,
      "loss": 2.3122,
      "step": 35528
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1289408206939697,
      "learning_rate": 1.128170089320992e-05,
      "loss": 2.3149,
      "step": 35529
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1423338651657104,
      "learning_rate": 1.1281292551285508e-05,
      "loss": 2.4156,
      "step": 35530
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.066918134689331,
      "learning_rate": 1.128088420718896e-05,
      "loss": 2.4873,
      "step": 35531
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.038581371307373,
      "learning_rate": 1.1280475860920964e-05,
      "loss": 2.2148,
      "step": 35532
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.053879737854004,
      "learning_rate": 1.1280067512482213e-05,
      "loss": 2.3859,
      "step": 35533
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.7181199789047241,
      "learning_rate": 1.12796591618734e-05,
      "loss": 2.3505,
      "step": 35534
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0720105171203613,
      "learning_rate": 1.1279250809095216e-05,
      "loss": 2.3561,
      "step": 35535
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0017824172973633,
      "learning_rate": 1.1278842454148356e-05,
      "loss": 2.436,
      "step": 35536
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.9530391097068787,
      "learning_rate": 1.1278434097033513e-05,
      "loss": 2.3118,
      "step": 35537
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1410611867904663,
      "learning_rate": 1.1278025737751375e-05,
      "loss": 2.4468,
      "step": 35538
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.228479027748108,
      "learning_rate": 1.1277617376302634e-05,
      "loss": 2.372,
      "step": 35539
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.091523289680481,
      "learning_rate": 1.1277209012687988e-05,
      "loss": 2.5374,
      "step": 35540
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1568729877471924,
      "learning_rate": 1.1276800646908125e-05,
      "loss": 2.1777,
      "step": 35541
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.9580810070037842,
      "learning_rate": 1.1276392278963737e-05,
      "loss": 2.3915,
      "step": 35542
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.03566575050354,
      "learning_rate": 1.1275983908855517e-05,
      "loss": 2.5856,
      "step": 35543
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0003474950790405,
      "learning_rate": 1.127557553658416e-05,
      "loss": 2.3116,
      "step": 35544
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.026153326034546,
      "learning_rate": 1.1275167162150356e-05,
      "loss": 2.4519,
      "step": 35545
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.3948174715042114,
      "learning_rate": 1.1274758785554795e-05,
      "loss": 2.6661,
      "step": 35546
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.005033254623413,
      "learning_rate": 1.1274350406798171e-05,
      "loss": 2.3692,
      "step": 35547
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0286974906921387,
      "learning_rate": 1.1273942025881179e-05,
      "loss": 2.3761,
      "step": 35548
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0699647665023804,
      "learning_rate": 1.1273533642804508e-05,
      "loss": 2.4103,
      "step": 35549
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1157066822052002,
      "learning_rate": 1.1273125257568856e-05,
      "loss": 2.4063,
      "step": 35550
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0413411855697632,
      "learning_rate": 1.1272716870174904e-05,
      "loss": 2.4769,
      "step": 35551
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0435471534729004,
      "learning_rate": 1.1272308480623354e-05,
      "loss": 2.3604,
      "step": 35552
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1536184549331665,
      "learning_rate": 1.1271900088914898e-05,
      "loss": 2.3933,
      "step": 35553
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.9011036157608032,
      "learning_rate": 1.1271491695050223e-05,
      "loss": 2.3877,
      "step": 35554
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0685405731201172,
      "learning_rate": 1.1271083299030025e-05,
      "loss": 2.3151,
      "step": 35555
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0524744987487793,
      "learning_rate": 1.1270674900854997e-05,
      "loss": 2.1902,
      "step": 35556
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.9772060513496399,
      "learning_rate": 1.1270266500525829e-05,
      "loss": 2.3665,
      "step": 35557
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0761001110076904,
      "learning_rate": 1.1269858098043214e-05,
      "loss": 2.2639,
      "step": 35558
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.152845025062561,
      "learning_rate": 1.1269449693407844e-05,
      "loss": 2.2417,
      "step": 35559
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1615444421768188,
      "learning_rate": 1.1269041286620412e-05,
      "loss": 2.2521,
      "step": 35560
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.2159956693649292,
      "learning_rate": 1.1268632877681611e-05,
      "loss": 2.4208,
      "step": 35561
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0644160509109497,
      "learning_rate": 1.1268224466592133e-05,
      "loss": 2.3938,
      "step": 35562
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1557815074920654,
      "learning_rate": 1.1267816053352669e-05,
      "loss": 2.2045,
      "step": 35563
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0196861028671265,
      "learning_rate": 1.1267407637963914e-05,
      "loss": 2.7272,
      "step": 35564
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0930922031402588,
      "learning_rate": 1.1266999220426558e-05,
      "loss": 2.1435,
      "step": 35565
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1089959144592285,
      "learning_rate": 1.1266590800741295e-05,
      "loss": 2.291,
      "step": 35566
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0156288146972656,
      "learning_rate": 1.1266182378908815e-05,
      "loss": 2.2216,
      "step": 35567
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.2383390665054321,
      "learning_rate": 1.1265773954929814e-05,
      "loss": 2.2879,
      "step": 35568
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0529793500900269,
      "learning_rate": 1.1265365528804981e-05,
      "loss": 2.2684,
      "step": 35569
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.2605382204055786,
      "learning_rate": 1.126495710053501e-05,
      "loss": 2.4054,
      "step": 35570
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0499060153961182,
      "learning_rate": 1.1264548670120594e-05,
      "loss": 2.2983,
      "step": 35571
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1517826318740845,
      "learning_rate": 1.1264140237562424e-05,
      "loss": 2.2843,
      "step": 35572
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.079406499862671,
      "learning_rate": 1.1263731802861194e-05,
      "loss": 2.6442,
      "step": 35573
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1053833961486816,
      "learning_rate": 1.1263323366017593e-05,
      "loss": 2.4077,
      "step": 35574
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1734601259231567,
      "learning_rate": 1.1262914927032316e-05,
      "loss": 2.4371,
      "step": 35575
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0086134672164917,
      "learning_rate": 1.1262506485906055e-05,
      "loss": 2.2333,
      "step": 35576
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.114135503768921,
      "learning_rate": 1.1262098042639506e-05,
      "loss": 2.258,
      "step": 35577
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1356689929962158,
      "learning_rate": 1.1261689597233355e-05,
      "loss": 2.3211,
      "step": 35578
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.098941445350647,
      "learning_rate": 1.12612811496883e-05,
      "loss": 2.5255,
      "step": 35579
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.9826149344444275,
      "learning_rate": 1.1260872700005029e-05,
      "loss": 2.4359,
      "step": 35580
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.064166784286499,
      "learning_rate": 1.1260464248184233e-05,
      "loss": 2.3,
      "step": 35581
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0804191827774048,
      "learning_rate": 1.1260055794226612e-05,
      "loss": 2.4957,
      "step": 35582
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0452978610992432,
      "learning_rate": 1.1259647338132856e-05,
      "loss": 2.4413,
      "step": 35583
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0057306289672852,
      "learning_rate": 1.1259238879903651e-05,
      "loss": 2.5208,
      "step": 35584
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0164177417755127,
      "learning_rate": 1.1258830419539696e-05,
      "loss": 2.3684,
      "step": 35585
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.2008062601089478,
      "learning_rate": 1.1258421957041681e-05,
      "loss": 2.3176,
      "step": 35586
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.9049593210220337,
      "learning_rate": 1.12580134924103e-05,
      "loss": 2.3029,
      "step": 35587
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0145994424819946,
      "learning_rate": 1.1257605025646242e-05,
      "loss": 2.4133,
      "step": 35588
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.05655038356781,
      "learning_rate": 1.1257196556750205e-05,
      "loss": 2.508,
      "step": 35589
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.335693120956421,
      "learning_rate": 1.1256788085722878e-05,
      "loss": 2.4988,
      "step": 35590
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.017785668373108,
      "learning_rate": 1.1256379612564955e-05,
      "loss": 2.3121,
      "step": 35591
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1181987524032593,
      "learning_rate": 1.1255971137277125e-05,
      "loss": 2.4649,
      "step": 35592
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0796914100646973,
      "learning_rate": 1.1255562659860083e-05,
      "loss": 2.2755,
      "step": 35593
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0323290824890137,
      "learning_rate": 1.125515418031452e-05,
      "loss": 2.2462,
      "step": 35594
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0723680257797241,
      "learning_rate": 1.1254745698641132e-05,
      "loss": 2.4195,
      "step": 35595
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1361771821975708,
      "learning_rate": 1.125433721484061e-05,
      "loss": 2.2908,
      "step": 35596
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.071757197380066,
      "learning_rate": 1.1253928728913643e-05,
      "loss": 2.6465,
      "step": 35597
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0844273567199707,
      "learning_rate": 1.1253520240860928e-05,
      "loss": 2.3836,
      "step": 35598
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.9977434873580933,
      "learning_rate": 1.1253111750683157e-05,
      "loss": 2.3132,
      "step": 35599
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.2206677198410034,
      "learning_rate": 1.1252703258381019e-05,
      "loss": 2.489,
      "step": 35600
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.328343391418457,
      "learning_rate": 1.125229476395521e-05,
      "loss": 2.1443,
      "step": 35601
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1207284927368164,
      "learning_rate": 1.125188626740642e-05,
      "loss": 2.3765,
      "step": 35602
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.9611669182777405,
      "learning_rate": 1.1251477768735345e-05,
      "loss": 2.23,
      "step": 35603
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.9752109050750732,
      "learning_rate": 1.1251069267942674e-05,
      "loss": 2.3103,
      "step": 35604
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.105995535850525,
      "learning_rate": 1.12506607650291e-05,
      "loss": 2.0323,
      "step": 35605
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0800201892852783,
      "learning_rate": 1.1250252259995316e-05,
      "loss": 2.3406,
      "step": 35606
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.118804693222046,
      "learning_rate": 1.1249843752842016e-05,
      "loss": 2.4853,
      "step": 35607
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.052117109298706,
      "learning_rate": 1.124943524356989e-05,
      "loss": 2.2672,
      "step": 35608
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1267956495285034,
      "learning_rate": 1.1249026732179633e-05,
      "loss": 2.446,
      "step": 35609
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1215184926986694,
      "learning_rate": 1.1248618218671939e-05,
      "loss": 2.1672,
      "step": 35610
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.2368576526641846,
      "learning_rate": 1.1248209703047495e-05,
      "loss": 2.3966,
      "step": 35611
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.9781003594398499,
      "learning_rate": 1.1247801185306997e-05,
      "loss": 2.3704,
      "step": 35612
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0449292659759521,
      "learning_rate": 1.1247392665451135e-05,
      "loss": 2.3329,
      "step": 35613
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.059954285621643,
      "learning_rate": 1.1246984143480605e-05,
      "loss": 2.5358,
      "step": 35614
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.04401695728302,
      "learning_rate": 1.12465756193961e-05,
      "loss": 2.427,
      "step": 35615
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0052541494369507,
      "learning_rate": 1.124616709319831e-05,
      "loss": 2.3087,
      "step": 35616
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.063352108001709,
      "learning_rate": 1.124575856488793e-05,
      "loss": 2.1181,
      "step": 35617
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1983470916748047,
      "learning_rate": 1.1245350034465648e-05,
      "loss": 2.4682,
      "step": 35618
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1942222118377686,
      "learning_rate": 1.124494150193216e-05,
      "loss": 2.5957,
      "step": 35619
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0306835174560547,
      "learning_rate": 1.1244532967288158e-05,
      "loss": 2.2781,
      "step": 35620
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.062447428703308,
      "learning_rate": 1.1244124430534333e-05,
      "loss": 2.4258,
      "step": 35621
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1104806661605835,
      "learning_rate": 1.1243715891671383e-05,
      "loss": 2.4786,
      "step": 35622
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1332229375839233,
      "learning_rate": 1.1243307350699998e-05,
      "loss": 2.3744,
      "step": 35623
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.9984214901924133,
      "learning_rate": 1.1242898807620863e-05,
      "loss": 2.4515,
      "step": 35624
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1686152219772339,
      "learning_rate": 1.124249026243468e-05,
      "loss": 2.4246,
      "step": 35625
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0061836242675781,
      "learning_rate": 1.124208171514214e-05,
      "loss": 2.5248,
      "step": 35626
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0795128345489502,
      "learning_rate": 1.1241673165743933e-05,
      "loss": 2.4666,
      "step": 35627
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.037707805633545,
      "learning_rate": 1.1241264614240755e-05,
      "loss": 2.288,
      "step": 35628
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.9915208220481873,
      "learning_rate": 1.1240856060633292e-05,
      "loss": 2.2394,
      "step": 35629
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0891188383102417,
      "learning_rate": 1.1240447504922246e-05,
      "loss": 2.4642,
      "step": 35630
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0439708232879639,
      "learning_rate": 1.12400389471083e-05,
      "loss": 2.5081,
      "step": 35631
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0766361951828003,
      "learning_rate": 1.1239630387192154e-05,
      "loss": 2.3078,
      "step": 35632
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.2749607563018799,
      "learning_rate": 1.1239221825174496e-05,
      "loss": 2.3199,
      "step": 35633
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1105064153671265,
      "learning_rate": 1.123881326105602e-05,
      "loss": 2.2092,
      "step": 35634
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.9955495595932007,
      "learning_rate": 1.1238404694837418e-05,
      "loss": 2.3396,
      "step": 35635
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0609936714172363,
      "learning_rate": 1.1237996126519387e-05,
      "loss": 2.1849,
      "step": 35636
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1083836555480957,
      "learning_rate": 1.1237587556102615e-05,
      "loss": 2.3057,
      "step": 35637
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0195544958114624,
      "learning_rate": 1.1237178983587797e-05,
      "loss": 2.3818,
      "step": 35638
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0000115633010864,
      "learning_rate": 1.1236770408975624e-05,
      "loss": 2.1714,
      "step": 35639
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.231616497039795,
      "learning_rate": 1.123636183226679e-05,
      "loss": 2.3738,
      "step": 35640
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.070342779159546,
      "learning_rate": 1.1235953253461984e-05,
      "loss": 2.4548,
      "step": 35641
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1227513551712036,
      "learning_rate": 1.1235544672561905e-05,
      "loss": 2.5107,
      "step": 35642
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1001911163330078,
      "learning_rate": 1.123513608956724e-05,
      "loss": 2.443,
      "step": 35643
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.9795680046081543,
      "learning_rate": 1.1234727504478685e-05,
      "loss": 2.5556,
      "step": 35644
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.3064779043197632,
      "learning_rate": 1.1234318917296932e-05,
      "loss": 2.3101,
      "step": 35645
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1344746351242065,
      "learning_rate": 1.123391032802267e-05,
      "loss": 2.5642,
      "step": 35646
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0186952352523804,
      "learning_rate": 1.1233501736656598e-05,
      "loss": 2.2276,
      "step": 35647
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0133599042892456,
      "learning_rate": 1.1233093143199402e-05,
      "loss": 2.4181,
      "step": 35648
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0699901580810547,
      "learning_rate": 1.1232684547651784e-05,
      "loss": 2.4695,
      "step": 35649
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0277130603790283,
      "learning_rate": 1.1232275950014428e-05,
      "loss": 2.4317,
      "step": 35650
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.9829949140548706,
      "learning_rate": 1.1231867350288029e-05,
      "loss": 2.5162,
      "step": 35651
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0417430400848389,
      "learning_rate": 1.123145874847328e-05,
      "loss": 2.511,
      "step": 35652
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.194310188293457,
      "learning_rate": 1.1231050144570873e-05,
      "loss": 2.3303,
      "step": 35653
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0752918720245361,
      "learning_rate": 1.1230641538581504e-05,
      "loss": 2.2963,
      "step": 35654
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.300350546836853,
      "learning_rate": 1.1230232930505864e-05,
      "loss": 2.4025,
      "step": 35655
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.092598795890808,
      "learning_rate": 1.1229824320344645e-05,
      "loss": 2.5256,
      "step": 35656
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1777656078338623,
      "learning_rate": 1.1229415708098536e-05,
      "loss": 2.4048,
      "step": 35657
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0062531232833862,
      "learning_rate": 1.1229007093768237e-05,
      "loss": 2.3838,
      "step": 35658
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0864920616149902,
      "learning_rate": 1.1228598477354437e-05,
      "loss": 2.2568,
      "step": 35659
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.122853398323059,
      "learning_rate": 1.1228189858857829e-05,
      "loss": 2.1999,
      "step": 35660
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0163519382476807,
      "learning_rate": 1.1227781238279104e-05,
      "loss": 2.3941,
      "step": 35661
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0717360973358154,
      "learning_rate": 1.1227372615618959e-05,
      "loss": 2.6528,
      "step": 35662
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0900863409042358,
      "learning_rate": 1.1226963990878081e-05,
      "loss": 2.3029,
      "step": 35663
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.3158578872680664,
      "learning_rate": 1.1226555364057168e-05,
      "loss": 2.4873,
      "step": 35664
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.12134850025177,
      "learning_rate": 1.1226146735156908e-05,
      "loss": 2.3835,
      "step": 35665
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.143612265586853,
      "learning_rate": 1.1225738104177997e-05,
      "loss": 2.2081,
      "step": 35666
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1240999698638916,
      "learning_rate": 1.1225329471121126e-05,
      "loss": 2.4396,
      "step": 35667
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0640387535095215,
      "learning_rate": 1.1224920835986992e-05,
      "loss": 2.4723,
      "step": 35668
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0242407321929932,
      "learning_rate": 1.1224512198776285e-05,
      "loss": 2.5282,
      "step": 35669
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.9669496417045593,
      "learning_rate": 1.1224103559489695e-05,
      "loss": 2.3075,
      "step": 35670
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.066892385482788,
      "learning_rate": 1.1223694918127917e-05,
      "loss": 2.4164,
      "step": 35671
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0648211240768433,
      "learning_rate": 1.1223286274691643e-05,
      "loss": 2.4952,
      "step": 35672
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.033360242843628,
      "learning_rate": 1.1222877629181565e-05,
      "loss": 2.2973,
      "step": 35673
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0842572450637817,
      "learning_rate": 1.122246898159838e-05,
      "loss": 2.1804,
      "step": 35674
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0155165195465088,
      "learning_rate": 1.1222060331942779e-05,
      "loss": 2.4247,
      "step": 35675
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1045900583267212,
      "learning_rate": 1.1221651680215454e-05,
      "loss": 2.1342,
      "step": 35676
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1743357181549072,
      "learning_rate": 1.1221243026417095e-05,
      "loss": 2.4117,
      "step": 35677
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0094103813171387,
      "learning_rate": 1.1220834370548399e-05,
      "loss": 2.4101,
      "step": 35678
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0612573623657227,
      "learning_rate": 1.1220425712610057e-05,
      "loss": 2.5802,
      "step": 35679
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1001352071762085,
      "learning_rate": 1.1220017052602761e-05,
      "loss": 2.3379,
      "step": 35680
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.105833649635315,
      "learning_rate": 1.1219608390527204e-05,
      "loss": 2.0714,
      "step": 35681
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.085188388824463,
      "learning_rate": 1.1219199726384083e-05,
      "loss": 2.4122,
      "step": 35682
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0284699201583862,
      "learning_rate": 1.1218791060174085e-05,
      "loss": 2.4921,
      "step": 35683
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0007247924804688,
      "learning_rate": 1.1218382391897906e-05,
      "loss": 2.3166,
      "step": 35684
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.175727367401123,
      "learning_rate": 1.1217973721556237e-05,
      "loss": 2.2002,
      "step": 35685
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0530273914337158,
      "learning_rate": 1.1217565049149773e-05,
      "loss": 2.5376,
      "step": 35686
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0262845754623413,
      "learning_rate": 1.1217156374679203e-05,
      "loss": 2.421,
      "step": 35687
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0519349575042725,
      "learning_rate": 1.1216747698145225e-05,
      "loss": 2.3179,
      "step": 35688
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1364476680755615,
      "learning_rate": 1.121633901954853e-05,
      "loss": 2.5124,
      "step": 35689
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1809875965118408,
      "learning_rate": 1.121593033888981e-05,
      "loss": 2.4115,
      "step": 35690
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.077437400817871,
      "learning_rate": 1.1215521656169755e-05,
      "loss": 2.2292,
      "step": 35691
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0224225521087646,
      "learning_rate": 1.121511297138906e-05,
      "loss": 2.296,
      "step": 35692
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.024254322052002,
      "learning_rate": 1.1214704284548422e-05,
      "loss": 2.2741,
      "step": 35693
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0766414403915405,
      "learning_rate": 1.1214295595648525e-05,
      "loss": 2.2389,
      "step": 35694
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1934754848480225,
      "learning_rate": 1.1213886904690075e-05,
      "loss": 2.3084,
      "step": 35695
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0737955570220947,
      "learning_rate": 1.1213478211673752e-05,
      "loss": 2.181,
      "step": 35696
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.9620527625083923,
      "learning_rate": 1.1213069516600254e-05,
      "loss": 2.2058,
      "step": 35697
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0196216106414795,
      "learning_rate": 1.1212660819470274e-05,
      "loss": 2.3756,
      "step": 35698
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0400919914245605,
      "learning_rate": 1.1212252120284504e-05,
      "loss": 2.3548,
      "step": 35699
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0952049493789673,
      "learning_rate": 1.1211843419043637e-05,
      "loss": 2.2295,
      "step": 35700
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1454423666000366,
      "learning_rate": 1.1211434715748369e-05,
      "loss": 2.3215,
      "step": 35701
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.100619912147522,
      "learning_rate": 1.121102601039939e-05,
      "loss": 2.1001,
      "step": 35702
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.9762477874755859,
      "learning_rate": 1.121061730299739e-05,
      "loss": 2.1886,
      "step": 35703
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0263404846191406,
      "learning_rate": 1.1210208593543068e-05,
      "loss": 2.3498,
      "step": 35704
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1453602313995361,
      "learning_rate": 1.1209799882037112e-05,
      "loss": 2.2185,
      "step": 35705
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1968375444412231,
      "learning_rate": 1.1209391168480215e-05,
      "loss": 2.2966,
      "step": 35706
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1502805948257446,
      "learning_rate": 1.1208982452873073e-05,
      "loss": 2.3121,
      "step": 35707
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0786436796188354,
      "learning_rate": 1.1208573735216381e-05,
      "loss": 2.4922,
      "step": 35708
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.121463656425476,
      "learning_rate": 1.1208165015510823e-05,
      "loss": 2.3925,
      "step": 35709
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0136845111846924,
      "learning_rate": 1.1207756293757099e-05,
      "loss": 2.3736,
      "step": 35710
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.053934931755066,
      "learning_rate": 1.1207347569955902e-05,
      "loss": 2.3985,
      "step": 35711
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.021545648574829,
      "learning_rate": 1.1206938844107919e-05,
      "loss": 2.188,
      "step": 35712
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1694890260696411,
      "learning_rate": 1.120653011621385e-05,
      "loss": 2.3973,
      "step": 35713
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1799612045288086,
      "learning_rate": 1.1206121386274385e-05,
      "loss": 2.1235,
      "step": 35714
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1385687589645386,
      "learning_rate": 1.1205712654290215e-05,
      "loss": 2.5191,
      "step": 35715
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0351192951202393,
      "learning_rate": 1.1205303920262036e-05,
      "loss": 2.3214,
      "step": 35716
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.2248930931091309,
      "learning_rate": 1.1204895184190541e-05,
      "loss": 2.3461,
      "step": 35717
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0738615989685059,
      "learning_rate": 1.1204486446076416e-05,
      "loss": 2.2425,
      "step": 35718
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1839559078216553,
      "learning_rate": 1.1204077705920364e-05,
      "loss": 2.1468,
      "step": 35719
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.9900708198547363,
      "learning_rate": 1.120366896372307e-05,
      "loss": 2.1785,
      "step": 35720
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0394177436828613,
      "learning_rate": 1.1203260219485233e-05,
      "loss": 2.1726,
      "step": 35721
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0781571865081787,
      "learning_rate": 1.1202851473207543e-05,
      "loss": 2.594,
      "step": 35722
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0183861255645752,
      "learning_rate": 1.1202442724890692e-05,
      "loss": 2.4596,
      "step": 35723
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1387007236480713,
      "learning_rate": 1.1202033974535374e-05,
      "loss": 2.2829,
      "step": 35724
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1940950155258179,
      "learning_rate": 1.1201625222142281e-05,
      "loss": 2.171,
      "step": 35725
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.19442880153656,
      "learning_rate": 1.1201216467712108e-05,
      "loss": 2.1543,
      "step": 35726
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0866804122924805,
      "learning_rate": 1.1200807711245546e-05,
      "loss": 2.4125,
      "step": 35727
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0111140012741089,
      "learning_rate": 1.1200398952743293e-05,
      "loss": 2.1523,
      "step": 35728
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1060453653335571,
      "learning_rate": 1.1199990192206033e-05,
      "loss": 2.0849,
      "step": 35729
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0383150577545166,
      "learning_rate": 1.1199581429634466e-05,
      "loss": 2.3708,
      "step": 35730
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1125781536102295,
      "learning_rate": 1.1199172665029278e-05,
      "loss": 2.4416,
      "step": 35731
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0776983499526978,
      "learning_rate": 1.1198763898391171e-05,
      "loss": 2.1991,
      "step": 35732
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.054469108581543,
      "learning_rate": 1.1198355129720832e-05,
      "loss": 2.2676,
      "step": 35733
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.2053554058074951,
      "learning_rate": 1.119794635901896e-05,
      "loss": 2.293,
      "step": 35734
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0299086570739746,
      "learning_rate": 1.119753758628624e-05,
      "loss": 2.1494,
      "step": 35735
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.095481276512146,
      "learning_rate": 1.1197128811523368e-05,
      "loss": 2.3737,
      "step": 35736
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1243133544921875,
      "learning_rate": 1.1196720034731036e-05,
      "loss": 2.5332,
      "step": 35737
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0589091777801514,
      "learning_rate": 1.1196311255909943e-05,
      "loss": 2.2669,
      "step": 35738
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.167640209197998,
      "learning_rate": 1.1195902475060774e-05,
      "loss": 2.4851,
      "step": 35739
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.074352741241455,
      "learning_rate": 1.1195493692184226e-05,
      "loss": 2.3839,
      "step": 35740
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0931122303009033,
      "learning_rate": 1.1195084907280993e-05,
      "loss": 2.3944,
      "step": 35741
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1362838745117188,
      "learning_rate": 1.1194676120351766e-05,
      "loss": 2.4336,
      "step": 35742
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0395342111587524,
      "learning_rate": 1.1194267331397238e-05,
      "loss": 2.4296,
      "step": 35743
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0247777700424194,
      "learning_rate": 1.1193858540418103e-05,
      "loss": 2.4052,
      "step": 35744
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1981711387634277,
      "learning_rate": 1.1193449747415055e-05,
      "loss": 2.4758,
      "step": 35745
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0466703176498413,
      "learning_rate": 1.1193040952388783e-05,
      "loss": 2.3123,
      "step": 35746
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0456340312957764,
      "learning_rate": 1.1192632155339982e-05,
      "loss": 2.2087,
      "step": 35747
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.9634523987770081,
      "learning_rate": 1.119222335626935e-05,
      "loss": 2.4389,
      "step": 35748
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.078285813331604,
      "learning_rate": 1.119181455517757e-05,
      "loss": 2.1887,
      "step": 35749
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0723530054092407,
      "learning_rate": 1.1191405752065346e-05,
      "loss": 2.5392,
      "step": 35750
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0566297769546509,
      "learning_rate": 1.119099694693336e-05,
      "loss": 2.3871,
      "step": 35751
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.143311858177185,
      "learning_rate": 1.1190588139782315e-05,
      "loss": 2.1542,
      "step": 35752
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0336133241653442,
      "learning_rate": 1.1190179330612898e-05,
      "loss": 2.431,
      "step": 35753
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0475817918777466,
      "learning_rate": 1.1189770519425803e-05,
      "loss": 2.2671,
      "step": 35754
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0857847929000854,
      "learning_rate": 1.1189361706221726e-05,
      "loss": 2.3508,
      "step": 35755
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.040252447128296,
      "learning_rate": 1.1188952891001356e-05,
      "loss": 2.4493,
      "step": 35756
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.390350103378296,
      "learning_rate": 1.118854407376539e-05,
      "loss": 2.2696,
      "step": 35757
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0465483665466309,
      "learning_rate": 1.1188135254514516e-05,
      "loss": 2.4486,
      "step": 35758
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0325937271118164,
      "learning_rate": 1.1187726433249434e-05,
      "loss": 2.7373,
      "step": 35759
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1448516845703125,
      "learning_rate": 1.118731760997083e-05,
      "loss": 2.3753,
      "step": 35760
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0212827920913696,
      "learning_rate": 1.1186908784679403e-05,
      "loss": 2.3897,
      "step": 35761
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1426750421524048,
      "learning_rate": 1.118649995737584e-05,
      "loss": 2.6595,
      "step": 35762
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.118998408317566,
      "learning_rate": 1.118609112806084e-05,
      "loss": 2.5069,
      "step": 35763
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0526646375656128,
      "learning_rate": 1.118568229673509e-05,
      "loss": 2.4679,
      "step": 35764
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1170337200164795,
      "learning_rate": 1.1185273463399287e-05,
      "loss": 2.4121,
      "step": 35765
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.085502028465271,
      "learning_rate": 1.1184864628054126e-05,
      "loss": 2.5155,
      "step": 35766
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.031551718711853,
      "learning_rate": 1.1184455790700296e-05,
      "loss": 2.3982,
      "step": 35767
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.9750426411628723,
      "learning_rate": 1.1184046951338494e-05,
      "loss": 2.4071,
      "step": 35768
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1125514507293701,
      "learning_rate": 1.1183638109969408e-05,
      "loss": 2.0824,
      "step": 35769
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.9540807604789734,
      "learning_rate": 1.1183229266593736e-05,
      "loss": 2.4229,
      "step": 35770
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.09810209274292,
      "learning_rate": 1.1182820421212168e-05,
      "loss": 2.3279,
      "step": 35771
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1781433820724487,
      "learning_rate": 1.1182411573825397e-05,
      "loss": 2.4325,
      "step": 35772
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.01962149143219,
      "learning_rate": 1.118200272443412e-05,
      "loss": 2.4209,
      "step": 35773
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.9724220037460327,
      "learning_rate": 1.1181593873039028e-05,
      "loss": 2.2233,
      "step": 35774
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0578749179840088,
      "learning_rate": 1.118118501964081e-05,
      "loss": 2.4473,
      "step": 35775
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1205060482025146,
      "learning_rate": 1.1180776164240165e-05,
      "loss": 2.4479,
      "step": 35776
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0295909643173218,
      "learning_rate": 1.1180367306837782e-05,
      "loss": 2.465,
      "step": 35777
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.034409761428833,
      "learning_rate": 1.1179958447434358e-05,
      "loss": 2.5784,
      "step": 35778
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.2054129838943481,
      "learning_rate": 1.117954958603058e-05,
      "loss": 2.5223,
      "step": 35779
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.2976861000061035,
      "learning_rate": 1.1179140722627149e-05,
      "loss": 2.3699,
      "step": 35780
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.974325954914093,
      "learning_rate": 1.1178731857224754e-05,
      "loss": 2.3427,
      "step": 35781
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0197256803512573,
      "learning_rate": 1.1178322989824086e-05,
      "loss": 2.3245,
      "step": 35782
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0192896127700806,
      "learning_rate": 1.1177914120425844e-05,
      "loss": 2.2461,
      "step": 35783
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1529157161712646,
      "learning_rate": 1.1177505249030713e-05,
      "loss": 2.3831,
      "step": 35784
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.9999333620071411,
      "learning_rate": 1.1177096375639394e-05,
      "loss": 2.2082,
      "step": 35785
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0509631633758545,
      "learning_rate": 1.1176687500252575e-05,
      "loss": 2.3657,
      "step": 35786
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.075762152671814,
      "learning_rate": 1.1176278622870953e-05,
      "loss": 2.5061,
      "step": 35787
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0869711637496948,
      "learning_rate": 1.117586974349522e-05,
      "loss": 2.4629,
      "step": 35788
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.9723890423774719,
      "learning_rate": 1.1175460862126067e-05,
      "loss": 2.6638,
      "step": 35789
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1112446784973145,
      "learning_rate": 1.1175051978764187e-05,
      "loss": 2.2582,
      "step": 35790
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.02313232421875,
      "learning_rate": 1.1174643093410276e-05,
      "loss": 2.4164,
      "step": 35791
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0247105360031128,
      "learning_rate": 1.1174234206065027e-05,
      "loss": 2.2383,
      "step": 35792
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0137306451797485,
      "learning_rate": 1.117382531672913e-05,
      "loss": 2.2934,
      "step": 35793
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.156176209449768,
      "learning_rate": 1.1173416425403283e-05,
      "loss": 2.3672,
      "step": 35794
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.2613874673843384,
      "learning_rate": 1.1173007532088173e-05,
      "loss": 2.5266,
      "step": 35795
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0238837003707886,
      "learning_rate": 1.11725986367845e-05,
      "loss": 2.3682,
      "step": 35796
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.9958357214927673,
      "learning_rate": 1.117218973949295e-05,
      "loss": 2.4538,
      "step": 35797
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1356656551361084,
      "learning_rate": 1.1171780840214224e-05,
      "loss": 2.3338,
      "step": 35798
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.088984727859497,
      "learning_rate": 1.1171371938949009e-05,
      "loss": 2.3006,
      "step": 35799
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0650713443756104,
      "learning_rate": 1.1170963035698004e-05,
      "loss": 2.475,
      "step": 35800
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0059130191802979,
      "learning_rate": 1.1170554130461895e-05,
      "loss": 2.4576,
      "step": 35801
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.01377534866333,
      "learning_rate": 1.117014522324138e-05,
      "loss": 2.2567,
      "step": 35802
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.004959225654602,
      "learning_rate": 1.116973631403715e-05,
      "loss": 2.2589,
      "step": 35803
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0516068935394287,
      "learning_rate": 1.11693274028499e-05,
      "loss": 2.4619,
      "step": 35804
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0423349142074585,
      "learning_rate": 1.1168918489680323e-05,
      "loss": 2.3421,
      "step": 35805
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.9581885933876038,
      "learning_rate": 1.1168509574529112e-05,
      "loss": 2.3191,
      "step": 35806
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.9769306778907776,
      "learning_rate": 1.116810065739696e-05,
      "loss": 2.4218,
      "step": 35807
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.971832275390625,
      "learning_rate": 1.116769173828456e-05,
      "loss": 2.2222,
      "step": 35808
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.015777587890625,
      "learning_rate": 1.1167282817192605e-05,
      "loss": 2.3395,
      "step": 35809
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0346261262893677,
      "learning_rate": 1.1166873894121787e-05,
      "loss": 2.2369,
      "step": 35810
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.090277910232544,
      "learning_rate": 1.11664649690728e-05,
      "loss": 2.3596,
      "step": 35811
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0493342876434326,
      "learning_rate": 1.116605604204634e-05,
      "loss": 2.4727,
      "step": 35812
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0448602437973022,
      "learning_rate": 1.1165647113043098e-05,
      "loss": 2.6062,
      "step": 35813
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0030053853988647,
      "learning_rate": 1.1165238182063769e-05,
      "loss": 2.3327,
      "step": 35814
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0088318586349487,
      "learning_rate": 1.1164829249109044e-05,
      "loss": 2.2604,
      "step": 35815
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0342448949813843,
      "learning_rate": 1.1164420314179617e-05,
      "loss": 2.4055,
      "step": 35816
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1065361499786377,
      "learning_rate": 1.1164011377276181e-05,
      "loss": 2.3407,
      "step": 35817
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0596485137939453,
      "learning_rate": 1.1163602438399428e-05,
      "loss": 2.3287,
      "step": 35818
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.018868088722229,
      "learning_rate": 1.1163193497550053e-05,
      "loss": 2.4098,
      "step": 35819
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0099921226501465,
      "learning_rate": 1.1162784554728752e-05,
      "loss": 2.5268,
      "step": 35820
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.9906497597694397,
      "learning_rate": 1.1162375609936216e-05,
      "loss": 2.2331,
      "step": 35821
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1578940153121948,
      "learning_rate": 1.1161966663173134e-05,
      "loss": 2.4187,
      "step": 35822
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0891653299331665,
      "learning_rate": 1.1161557714440206e-05,
      "loss": 2.4079,
      "step": 35823
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.104651689529419,
      "learning_rate": 1.116114876373812e-05,
      "loss": 2.5279,
      "step": 35824
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1660706996917725,
      "learning_rate": 1.116073981106757e-05,
      "loss": 2.2883,
      "step": 35825
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1694704294204712,
      "learning_rate": 1.1160330856429254e-05,
      "loss": 2.5038,
      "step": 35826
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0634692907333374,
      "learning_rate": 1.1159921899823863e-05,
      "loss": 2.5021,
      "step": 35827
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1059919595718384,
      "learning_rate": 1.1159512941252086e-05,
      "loss": 2.1621,
      "step": 35828
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1067308187484741,
      "learning_rate": 1.115910398071462e-05,
      "loss": 2.133,
      "step": 35829
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.7730200290679932,
      "learning_rate": 1.115869501821216e-05,
      "loss": 2.525,
      "step": 35830
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0884112119674683,
      "learning_rate": 1.1158286053745395e-05,
      "loss": 2.5549,
      "step": 35831
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0305105447769165,
      "learning_rate": 1.1157877087315023e-05,
      "loss": 2.5425,
      "step": 35832
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0763188600540161,
      "learning_rate": 1.1157468118921735e-05,
      "loss": 2.6285,
      "step": 35833
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0618265867233276,
      "learning_rate": 1.1157059148566221e-05,
      "loss": 2.4777,
      "step": 35834
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0428415536880493,
      "learning_rate": 1.1156650176249182e-05,
      "loss": 2.2,
      "step": 35835
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.132338047027588,
      "learning_rate": 1.1156241201971304e-05,
      "loss": 2.4387,
      "step": 35836
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0135481357574463,
      "learning_rate": 1.1155832225733283e-05,
      "loss": 2.4015,
      "step": 35837
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1421083211898804,
      "learning_rate": 1.115542324753581e-05,
      "loss": 2.5677,
      "step": 35838
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.024942398071289,
      "learning_rate": 1.1155014267379584e-05,
      "loss": 2.2673,
      "step": 35839
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4233109951019287,
      "learning_rate": 1.1154605285265296e-05,
      "loss": 2.2673,
      "step": 35840
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.060494303703308,
      "learning_rate": 1.1154196301193638e-05,
      "loss": 2.2303,
      "step": 35841
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1767146587371826,
      "learning_rate": 1.11537873151653e-05,
      "loss": 2.446,
      "step": 35842
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0229519605636597,
      "learning_rate": 1.1153378327180984e-05,
      "loss": 2.2773,
      "step": 35843
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.9819995164871216,
      "learning_rate": 1.1152969337241375e-05,
      "loss": 2.4164,
      "step": 35844
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0974946022033691,
      "learning_rate": 1.1152560345347172e-05,
      "loss": 2.1847,
      "step": 35845
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0609056949615479,
      "learning_rate": 1.1152151351499065e-05,
      "loss": 2.4935,
      "step": 35846
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.9653290510177612,
      "learning_rate": 1.115174235569775e-05,
      "loss": 2.3933,
      "step": 35847
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0731682777404785,
      "learning_rate": 1.115133335794392e-05,
      "loss": 2.518,
      "step": 35848
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.11613929271698,
      "learning_rate": 1.1150924358238263e-05,
      "loss": 2.2948,
      "step": 35849
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.9304162263870239,
      "learning_rate": 1.1150515356581478e-05,
      "loss": 2.3222,
      "step": 35850
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.6000428199768066,
      "learning_rate": 1.1150106352974257e-05,
      "loss": 2.1388,
      "step": 35851
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1803913116455078,
      "learning_rate": 1.1149697347417295e-05,
      "loss": 2.2328,
      "step": 35852
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1821563243865967,
      "learning_rate": 1.1149288339911283e-05,
      "loss": 2.325,
      "step": 35853
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.2492696046829224,
      "learning_rate": 1.1148879330456916e-05,
      "loss": 2.2669,
      "step": 35854
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0017149448394775,
      "learning_rate": 1.1148470319054885e-05,
      "loss": 2.2916,
      "step": 35855
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1157406568527222,
      "learning_rate": 1.1148061305705884e-05,
      "loss": 2.5031,
      "step": 35856
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1707156896591187,
      "learning_rate": 1.1147652290410608e-05,
      "loss": 2.4225,
      "step": 35857
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.9543875455856323,
      "learning_rate": 1.114724327316975e-05,
      "loss": 2.2342,
      "step": 35858
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0707998275756836,
      "learning_rate": 1.1146834253984008e-05,
      "loss": 2.1507,
      "step": 35859
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1415237188339233,
      "learning_rate": 1.1146425232854064e-05,
      "loss": 2.417,
      "step": 35860
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0573171377182007,
      "learning_rate": 1.1146016209780618e-05,
      "loss": 2.0291,
      "step": 35861
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0427476167678833,
      "learning_rate": 1.1145607184764367e-05,
      "loss": 2.3011,
      "step": 35862
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.2152477502822876,
      "learning_rate": 1.1145198157805998e-05,
      "loss": 2.5541,
      "step": 35863
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0518370866775513,
      "learning_rate": 1.1144789128906207e-05,
      "loss": 2.4466,
      "step": 35864
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.412129521369934,
      "learning_rate": 1.114438009806569e-05,
      "loss": 2.3014,
      "step": 35865
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.10960853099823,
      "learning_rate": 1.1143971065285135e-05,
      "loss": 2.3305,
      "step": 35866
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.2709074020385742,
      "learning_rate": 1.1143562030565242e-05,
      "loss": 2.477,
      "step": 35867
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0427792072296143,
      "learning_rate": 1.11431529939067e-05,
      "loss": 2.3773,
      "step": 35868
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0267858505249023,
      "learning_rate": 1.1142743955310197e-05,
      "loss": 2.3125,
      "step": 35869
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1626265048980713,
      "learning_rate": 1.1142334914776438e-05,
      "loss": 2.3604,
      "step": 35870
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.9779875874519348,
      "learning_rate": 1.1141925872306109e-05,
      "loss": 2.3081,
      "step": 35871
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0736124515533447,
      "learning_rate": 1.1141516827899908e-05,
      "loss": 2.4803,
      "step": 35872
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1389633417129517,
      "learning_rate": 1.1141107781558527e-05,
      "loss": 2.389,
      "step": 35873
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.252029299736023,
      "learning_rate": 1.1140698733282656e-05,
      "loss": 2.4382,
      "step": 35874
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0398669242858887,
      "learning_rate": 1.1140289683072989e-05,
      "loss": 2.4091,
      "step": 35875
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0776351690292358,
      "learning_rate": 1.1139880630930224e-05,
      "loss": 2.4209,
      "step": 35876
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.120229959487915,
      "learning_rate": 1.113947157685505e-05,
      "loss": 2.2534,
      "step": 35877
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1590708494186401,
      "learning_rate": 1.1139062520848163e-05,
      "loss": 2.4647,
      "step": 35878
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1510483026504517,
      "learning_rate": 1.1138653462910257e-05,
      "loss": 2.3048,
      "step": 35879
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0106197595596313,
      "learning_rate": 1.1138244403042022e-05,
      "loss": 2.4909,
      "step": 35880
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.9964442253112793,
      "learning_rate": 1.1137835341244154e-05,
      "loss": 2.2396,
      "step": 35881
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.043707013130188,
      "learning_rate": 1.1137426277517347e-05,
      "loss": 2.1321,
      "step": 35882
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0947916507720947,
      "learning_rate": 1.1137017211862293e-05,
      "loss": 2.4472,
      "step": 35883
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.116929292678833,
      "learning_rate": 1.1136608144279685e-05,
      "loss": 2.5668,
      "step": 35884
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.186715841293335,
      "learning_rate": 1.113619907477022e-05,
      "loss": 2.4945,
      "step": 35885
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0749881267547607,
      "learning_rate": 1.1135790003334588e-05,
      "loss": 2.5212,
      "step": 35886
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0578830242156982,
      "learning_rate": 1.1135380929973482e-05,
      "loss": 2.6408,
      "step": 35887
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0615835189819336,
      "learning_rate": 1.11349718546876e-05,
      "loss": 2.3811,
      "step": 35888
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.9867961406707764,
      "learning_rate": 1.1134562777477628e-05,
      "loss": 2.2185,
      "step": 35889
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.145041584968567,
      "learning_rate": 1.1134153698344268e-05,
      "loss": 2.5345,
      "step": 35890
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.3356589078903198,
      "learning_rate": 1.113374461728821e-05,
      "loss": 2.3459,
      "step": 35891
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.2025617361068726,
      "learning_rate": 1.1133335534310143e-05,
      "loss": 2.4662,
      "step": 35892
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.932953953742981,
      "learning_rate": 1.1132926449410766e-05,
      "loss": 2.2093,
      "step": 35893
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1551049947738647,
      "learning_rate": 1.1132517362590773e-05,
      "loss": 2.2024,
      "step": 35894
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1021846532821655,
      "learning_rate": 1.1132108273850854e-05,
      "loss": 2.401,
      "step": 35895
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.221787691116333,
      "learning_rate": 1.1131699183191703e-05,
      "loss": 2.2078,
      "step": 35896
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0661427974700928,
      "learning_rate": 1.1131290090614015e-05,
      "loss": 2.386,
      "step": 35897
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1665672063827515,
      "learning_rate": 1.1130880996118484e-05,
      "loss": 2.7322,
      "step": 35898
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.9436246156692505,
      "learning_rate": 1.1130471899705801e-05,
      "loss": 2.2843,
      "step": 35899
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1792333126068115,
      "learning_rate": 1.1130062801376662e-05,
      "loss": 2.2569,
      "step": 35900
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1175076961517334,
      "learning_rate": 1.112965370113176e-05,
      "loss": 2.3285,
      "step": 35901
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0008959770202637,
      "learning_rate": 1.1129244598971787e-05,
      "loss": 2.3197,
      "step": 35902
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0964858531951904,
      "learning_rate": 1.112883549489744e-05,
      "loss": 2.2562,
      "step": 35903
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0979056358337402,
      "learning_rate": 1.1128426388909407e-05,
      "loss": 2.5291,
      "step": 35904
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1827199459075928,
      "learning_rate": 1.1128017281008385e-05,
      "loss": 2.3879,
      "step": 35905
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.100161075592041,
      "learning_rate": 1.1127608171195073e-05,
      "loss": 2.1697,
      "step": 35906
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0628656148910522,
      "learning_rate": 1.1127199059470152e-05,
      "loss": 2.3059,
      "step": 35907
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.9920337200164795,
      "learning_rate": 1.1126789945834326e-05,
      "loss": 2.2572,
      "step": 35908
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.203102707862854,
      "learning_rate": 1.1126380830288284e-05,
      "loss": 2.3378,
      "step": 35909
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.039605736732483,
      "learning_rate": 1.1125971712832722e-05,
      "loss": 2.3327,
      "step": 35910
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0483816862106323,
      "learning_rate": 1.1125562593468329e-05,
      "loss": 2.1774,
      "step": 35911
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.033623456954956,
      "learning_rate": 1.1125153472195804e-05,
      "loss": 2.1736,
      "step": 35912
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1730246543884277,
      "learning_rate": 1.1124744349015839e-05,
      "loss": 2.5407,
      "step": 35913
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1810321807861328,
      "learning_rate": 1.1124335223929126e-05,
      "loss": 2.5716,
      "step": 35914
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0662306547164917,
      "learning_rate": 1.112392609693636e-05,
      "loss": 2.0838,
      "step": 35915
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1325641870498657,
      "learning_rate": 1.1123516968038231e-05,
      "loss": 2.4062,
      "step": 35916
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0419723987579346,
      "learning_rate": 1.1123107837235439e-05,
      "loss": 2.5397,
      "step": 35917
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.9933198690414429,
      "learning_rate": 1.1122698704528674e-05,
      "loss": 2.4075,
      "step": 35918
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.002301573753357,
      "learning_rate": 1.1122289569918628e-05,
      "loss": 2.2564,
      "step": 35919
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0621694326400757,
      "learning_rate": 1.1121880433405996e-05,
      "loss": 2.2627,
      "step": 35920
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.04866361618042,
      "learning_rate": 1.1121471294991473e-05,
      "loss": 2.6502,
      "step": 35921
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.234368085861206,
      "learning_rate": 1.1121062154675755e-05,
      "loss": 2.4097,
      "step": 35922
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0645067691802979,
      "learning_rate": 1.1120653012459528e-05,
      "loss": 2.4555,
      "step": 35923
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0032716989517212,
      "learning_rate": 1.112024386834349e-05,
      "loss": 2.2893,
      "step": 35924
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1173803806304932,
      "learning_rate": 1.1119834722328333e-05,
      "loss": 2.4212,
      "step": 35925
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0541409254074097,
      "learning_rate": 1.1119425574414757e-05,
      "loss": 2.2652,
      "step": 35926
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.9802040457725525,
      "learning_rate": 1.1119016424603447e-05,
      "loss": 2.3109,
      "step": 35927
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.9604999423027039,
      "learning_rate": 1.1118607272895101e-05,
      "loss": 2.5076,
      "step": 35928
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0371276140213013,
      "learning_rate": 1.111819811929041e-05,
      "loss": 2.2901,
      "step": 35929
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1075019836425781,
      "learning_rate": 1.1117788963790072e-05,
      "loss": 2.3456,
      "step": 35930
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0645830631256104,
      "learning_rate": 1.1117379806394777e-05,
      "loss": 2.4828,
      "step": 35931
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1059650182724,
      "learning_rate": 1.1116970647105218e-05,
      "loss": 2.3035,
      "step": 35932
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.155593752861023,
      "learning_rate": 1.1116561485922097e-05,
      "loss": 2.3998,
      "step": 35933
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.091281771659851,
      "learning_rate": 1.1116152322846094e-05,
      "loss": 2.5752,
      "step": 35934
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1658151149749756,
      "learning_rate": 1.1115743157877912e-05,
      "loss": 2.2945,
      "step": 35935
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0454093217849731,
      "learning_rate": 1.1115333991018242e-05,
      "loss": 2.5006,
      "step": 35936
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1277436017990112,
      "learning_rate": 1.1114924822267779e-05,
      "loss": 2.406,
      "step": 35937
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0107078552246094,
      "learning_rate": 1.1114515651627216e-05,
      "loss": 2.1794,
      "step": 35938
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0575358867645264,
      "learning_rate": 1.1114106479097247e-05,
      "loss": 2.3346,
      "step": 35939
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.015105128288269,
      "learning_rate": 1.1113697304678561e-05,
      "loss": 2.3936,
      "step": 35940
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.06134033203125,
      "learning_rate": 1.1113288128371858e-05,
      "loss": 2.3669,
      "step": 35941
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0093063116073608,
      "learning_rate": 1.111287895017783e-05,
      "loss": 2.1342,
      "step": 35942
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0765067338943481,
      "learning_rate": 1.1112469770097168e-05,
      "loss": 2.5171,
      "step": 35943
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.104841709136963,
      "learning_rate": 1.1112060588130568e-05,
      "loss": 2.3283,
      "step": 35944
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0359504222869873,
      "learning_rate": 1.1111651404278722e-05,
      "loss": 2.6261,
      "step": 35945
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1013998985290527,
      "learning_rate": 1.1111242218542332e-05,
      "loss": 2.3675,
      "step": 35946
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.048135757446289,
      "learning_rate": 1.1110833030922077e-05,
      "loss": 2.2505,
      "step": 35947
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.3363674879074097,
      "learning_rate": 1.1110423841418661e-05,
      "loss": 2.326,
      "step": 35948
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0420480966567993,
      "learning_rate": 1.1110014650032774e-05,
      "loss": 2.323,
      "step": 35949
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.9636382460594177,
      "learning_rate": 1.1109605456765114e-05,
      "loss": 2.3868,
      "step": 35950
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.08016836643219,
      "learning_rate": 1.1109196261616369e-05,
      "loss": 2.2894,
      "step": 35951
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1326287984848022,
      "learning_rate": 1.1108787064587233e-05,
      "loss": 2.4229,
      "step": 35952
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0340747833251953,
      "learning_rate": 1.1108377865678406e-05,
      "loss": 2.3297,
      "step": 35953
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0654569864273071,
      "learning_rate": 1.1107968664890576e-05,
      "loss": 2.3367,
      "step": 35954
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.041054606437683,
      "learning_rate": 1.1107559462224437e-05,
      "loss": 2.437,
      "step": 35955
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1028687953948975,
      "learning_rate": 1.1107150257680683e-05,
      "loss": 2.2611,
      "step": 35956
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0736401081085205,
      "learning_rate": 1.110674105126001e-05,
      "loss": 2.3656,
      "step": 35957
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.2242563962936401,
      "learning_rate": 1.110633184296311e-05,
      "loss": 2.5635,
      "step": 35958
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0044912099838257,
      "learning_rate": 1.110592263279068e-05,
      "loss": 2.0353,
      "step": 35959
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1510522365570068,
      "learning_rate": 1.1105513420743406e-05,
      "loss": 2.4549,
      "step": 35960
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0423709154129028,
      "learning_rate": 1.1105104206821989e-05,
      "loss": 2.3929,
      "step": 35961
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0009520053863525,
      "learning_rate": 1.1104694991027118e-05,
      "loss": 2.2866,
      "step": 35962
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1010104417800903,
      "learning_rate": 1.110428577335949e-05,
      "loss": 2.2474,
      "step": 35963
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1648081541061401,
      "learning_rate": 1.11038765538198e-05,
      "loss": 2.5195,
      "step": 35964
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.2195229530334473,
      "learning_rate": 1.1103467332408737e-05,
      "loss": 2.4196,
      "step": 35965
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.9672714471817017,
      "learning_rate": 1.1103058109127e-05,
      "loss": 2.301,
      "step": 35966
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0684678554534912,
      "learning_rate": 1.1102648883975277e-05,
      "loss": 2.3427,
      "step": 35967
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.169533371925354,
      "learning_rate": 1.1102239656954265e-05,
      "loss": 2.2822,
      "step": 35968
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0736006498336792,
      "learning_rate": 1.1101830428064659e-05,
      "loss": 2.4076,
      "step": 35969
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0413525104522705,
      "learning_rate": 1.110142119730715e-05,
      "loss": 2.3169,
      "step": 35970
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.2928217649459839,
      "learning_rate": 1.1101011964682433e-05,
      "loss": 2.3748,
      "step": 35971
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0358195304870605,
      "learning_rate": 1.1100602730191204e-05,
      "loss": 2.3293,
      "step": 35972
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1016093492507935,
      "learning_rate": 1.1100193493834152e-05,
      "loss": 2.3589,
      "step": 35973
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1529688835144043,
      "learning_rate": 1.1099784255611975e-05,
      "loss": 2.2532,
      "step": 35974
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0353810787200928,
      "learning_rate": 1.1099375015525359e-05,
      "loss": 2.3578,
      "step": 35975
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0523279905319214,
      "learning_rate": 1.109896577357501e-05,
      "loss": 2.4618,
      "step": 35976
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0571026802062988,
      "learning_rate": 1.1098556529761616e-05,
      "loss": 2.6139,
      "step": 35977
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.9962397217750549,
      "learning_rate": 1.1098147284085867e-05,
      "loss": 2.3907,
      "step": 35978
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0654798746109009,
      "learning_rate": 1.1097738036548461e-05,
      "loss": 2.3643,
      "step": 35979
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.2376185655593872,
      "learning_rate": 1.109732878715009e-05,
      "loss": 2.3304,
      "step": 35980
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.058375358581543,
      "learning_rate": 1.1096919535891453e-05,
      "loss": 2.3125,
      "step": 35981
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1053380966186523,
      "learning_rate": 1.1096510282773237e-05,
      "loss": 2.4276,
      "step": 35982
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.9943948984146118,
      "learning_rate": 1.1096101027796136e-05,
      "loss": 2.2678,
      "step": 35983
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1708064079284668,
      "learning_rate": 1.1095691770960849e-05,
      "loss": 2.3322,
      "step": 35984
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.366343379020691,
      "learning_rate": 1.1095282512268067e-05,
      "loss": 2.2683,
      "step": 35985
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0826354026794434,
      "learning_rate": 1.109487325171848e-05,
      "loss": 2.2672,
      "step": 35986
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1472734212875366,
      "learning_rate": 1.1094463989312787e-05,
      "loss": 2.2724,
      "step": 35987
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1376131772994995,
      "learning_rate": 1.1094054725051682e-05,
      "loss": 2.2622,
      "step": 35988
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0054655075073242,
      "learning_rate": 1.1093645458935856e-05,
      "loss": 2.4598,
      "step": 35989
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1204546689987183,
      "learning_rate": 1.1093236190966001e-05,
      "loss": 2.3717,
      "step": 35990
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0956130027770996,
      "learning_rate": 1.1092826921142819e-05,
      "loss": 2.1754,
      "step": 35991
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0349507331848145,
      "learning_rate": 1.1092417649466997e-05,
      "loss": 2.5208,
      "step": 35992
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0265101194381714,
      "learning_rate": 1.109200837593923e-05,
      "loss": 2.1183,
      "step": 35993
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1571253538131714,
      "learning_rate": 1.109159910056021e-05,
      "loss": 2.4071,
      "step": 35994
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.9570158123970032,
      "learning_rate": 1.1091189823330634e-05,
      "loss": 2.1657,
      "step": 35995
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.2788214683532715,
      "learning_rate": 1.1090780544251196e-05,
      "loss": 2.273,
      "step": 35996
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1292502880096436,
      "learning_rate": 1.1090371263322588e-05,
      "loss": 2.5304,
      "step": 35997
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1439634561538696,
      "learning_rate": 1.1089961980545506e-05,
      "loss": 2.0723,
      "step": 35998
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1569011211395264,
      "learning_rate": 1.1089552695920642e-05,
      "loss": 2.4629,
      "step": 35999
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.030383825302124,
      "learning_rate": 1.108914340944869e-05,
      "loss": 2.6172,
      "step": 36000
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1933337450027466,
      "learning_rate": 1.1088734121130342e-05,
      "loss": 2.2336,
      "step": 36001
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0255630016326904,
      "learning_rate": 1.1088324830966297e-05,
      "loss": 2.3131,
      "step": 36002
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1811929941177368,
      "learning_rate": 1.1087915538957244e-05,
      "loss": 2.542,
      "step": 36003
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.9615622758865356,
      "learning_rate": 1.1087506245103879e-05,
      "loss": 2.1505,
      "step": 36004
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.935072660446167,
      "learning_rate": 1.1087096949406898e-05,
      "loss": 2.4661,
      "step": 36005
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.2424978017807007,
      "learning_rate": 1.108668765186699e-05,
      "loss": 2.4369,
      "step": 36006
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.217331051826477,
      "learning_rate": 1.1086278352484852e-05,
      "loss": 2.3865,
      "step": 36007
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0073719024658203,
      "learning_rate": 1.1085869051261176e-05,
      "loss": 2.4771,
      "step": 36008
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1227582693099976,
      "learning_rate": 1.1085459748196659e-05,
      "loss": 2.3888,
      "step": 36009
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0738948583602905,
      "learning_rate": 1.108505044329199e-05,
      "loss": 2.35,
      "step": 36010
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0555955171585083,
      "learning_rate": 1.1084641136547868e-05,
      "loss": 2.5442,
      "step": 36011
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.112099051475525,
      "learning_rate": 1.1084231827964987e-05,
      "loss": 2.3752,
      "step": 36012
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0421868562698364,
      "learning_rate": 1.1083822517544035e-05,
      "loss": 2.476,
      "step": 36013
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.9780793190002441,
      "learning_rate": 1.108341320528571e-05,
      "loss": 2.3517,
      "step": 36014
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1055110692977905,
      "learning_rate": 1.1083003891190705e-05,
      "loss": 2.4395,
      "step": 36015
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1005686521530151,
      "learning_rate": 1.1082594575259715e-05,
      "loss": 2.5711,
      "step": 36016
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0210018157958984,
      "learning_rate": 1.1082185257493431e-05,
      "loss": 2.1015,
      "step": 36017
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.2301288843154907,
      "learning_rate": 1.1081775937892553e-05,
      "loss": 2.1779,
      "step": 36018
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1307483911514282,
      "learning_rate": 1.1081366616457769e-05,
      "loss": 2.2866,
      "step": 36019
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0712531805038452,
      "learning_rate": 1.1080957293189775e-05,
      "loss": 2.5279,
      "step": 36020
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0467859506607056,
      "learning_rate": 1.1080547968089265e-05,
      "loss": 2.3697,
      "step": 36021
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0437349081039429,
      "learning_rate": 1.1080138641156932e-05,
      "loss": 2.2954,
      "step": 36022
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.9732266068458557,
      "learning_rate": 1.107972931239347e-05,
      "loss": 2.4595,
      "step": 36023
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1372743844985962,
      "learning_rate": 1.1079319981799575e-05,
      "loss": 2.5081,
      "step": 36024
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.9769196510314941,
      "learning_rate": 1.107891064937594e-05,
      "loss": 2.6128,
      "step": 36025
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1436924934387207,
      "learning_rate": 1.1078501315123258e-05,
      "loss": 2.5811,
      "step": 36026
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0744572877883911,
      "learning_rate": 1.1078091979042223e-05,
      "loss": 2.3396,
      "step": 36027
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.033115267753601,
      "learning_rate": 1.1077682641133529e-05,
      "loss": 2.3866,
      "step": 36028
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.020470142364502,
      "learning_rate": 1.1077273301397871e-05,
      "loss": 2.4722,
      "step": 36029
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0203408002853394,
      "learning_rate": 1.107686395983594e-05,
      "loss": 2.3222,
      "step": 36030
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1363215446472168,
      "learning_rate": 1.1076454616448435e-05,
      "loss": 2.3346,
      "step": 36031
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1367796659469604,
      "learning_rate": 1.1076045271236047e-05,
      "loss": 2.2928,
      "step": 36032
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.9579449892044067,
      "learning_rate": 1.1075635924199468e-05,
      "loss": 2.3766,
      "step": 36033
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.9977248311042786,
      "learning_rate": 1.1075226575339396e-05,
      "loss": 2.4346,
      "step": 36034
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0795519351959229,
      "learning_rate": 1.107481722465652e-05,
      "loss": 2.19,
      "step": 36035
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.060160756111145,
      "learning_rate": 1.1074407872151541e-05,
      "loss": 2.4617,
      "step": 36036
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0111355781555176,
      "learning_rate": 1.1073998517825146e-05,
      "loss": 2.3583,
      "step": 36037
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.9788632988929749,
      "learning_rate": 1.107358916167803e-05,
      "loss": 2.3605,
      "step": 36038
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.9730570316314697,
      "learning_rate": 1.1073179803710892e-05,
      "loss": 2.2981,
      "step": 36039
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0561857223510742,
      "learning_rate": 1.1072770443924423e-05,
      "loss": 2.2643,
      "step": 36040
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0656718015670776,
      "learning_rate": 1.1072361082319316e-05,
      "loss": 2.3842,
      "step": 36041
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.2075587511062622,
      "learning_rate": 1.1071951718896265e-05,
      "loss": 2.4046,
      "step": 36042
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.9870991110801697,
      "learning_rate": 1.1071542353655965e-05,
      "loss": 2.4071,
      "step": 36043
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0740289688110352,
      "learning_rate": 1.1071132986599109e-05,
      "loss": 2.5015,
      "step": 36044
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.056770920753479,
      "learning_rate": 1.1070723617726395e-05,
      "loss": 2.4818,
      "step": 36045
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1241830587387085,
      "learning_rate": 1.1070314247038509e-05,
      "loss": 2.3339,
      "step": 36046
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0017690658569336,
      "learning_rate": 1.1069904874536153e-05,
      "loss": 2.3036,
      "step": 36047
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0372360944747925,
      "learning_rate": 1.1069495500220016e-05,
      "loss": 2.4222,
      "step": 36048
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0978714227676392,
      "learning_rate": 1.1069086124090793e-05,
      "loss": 2.2207,
      "step": 36049
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.9993574023246765,
      "learning_rate": 1.1068676746149181e-05,
      "loss": 2.354,
      "step": 36050
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0233174562454224,
      "learning_rate": 1.106826736639587e-05,
      "loss": 2.3982,
      "step": 36051
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.054188847541809,
      "learning_rate": 1.1067857984831556e-05,
      "loss": 2.3835,
      "step": 36052
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0109496116638184,
      "learning_rate": 1.1067448601456933e-05,
      "loss": 2.4879,
      "step": 36053
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0203925371170044,
      "learning_rate": 1.1067039216272695e-05,
      "loss": 2.2625,
      "step": 36054
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0756279230117798,
      "learning_rate": 1.1066629829279534e-05,
      "loss": 2.134,
      "step": 36055
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1076304912567139,
      "learning_rate": 1.1066220440478146e-05,
      "loss": 2.4439,
      "step": 36056
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.9996561408042908,
      "learning_rate": 1.1065811049869226e-05,
      "loss": 2.3295,
      "step": 36057
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.9644687175750732,
      "learning_rate": 1.1065401657453467e-05,
      "loss": 2.3693,
      "step": 36058
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0582231283187866,
      "learning_rate": 1.1064992263231561e-05,
      "loss": 2.2523,
      "step": 36059
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.9745727777481079,
      "learning_rate": 1.1064582867204205e-05,
      "loss": 2.3,
      "step": 36060
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.2875889539718628,
      "learning_rate": 1.1064173469372092e-05,
      "loss": 2.3882,
      "step": 36061
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1281753778457642,
      "learning_rate": 1.1063764069735916e-05,
      "loss": 2.6277,
      "step": 36062
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0046766996383667,
      "learning_rate": 1.1063354668296372e-05,
      "loss": 2.1803,
      "step": 36063
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0167129039764404,
      "learning_rate": 1.1062945265054153e-05,
      "loss": 2.4485,
      "step": 36064
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1038687229156494,
      "learning_rate": 1.1062535860009951e-05,
      "loss": 2.315,
      "step": 36065
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0244946479797363,
      "learning_rate": 1.1062126453164463e-05,
      "loss": 2.4439,
      "step": 36066
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0933481454849243,
      "learning_rate": 1.1061717044518382e-05,
      "loss": 2.3063,
      "step": 36067
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1451869010925293,
      "learning_rate": 1.1061307634072404e-05,
      "loss": 2.4323,
      "step": 36068
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0211681127548218,
      "learning_rate": 1.1060898221827221e-05,
      "loss": 2.3234,
      "step": 36069
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1241015195846558,
      "learning_rate": 1.1060488807783525e-05,
      "loss": 2.3428,
      "step": 36070
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.188895344734192,
      "learning_rate": 1.1060079391942016e-05,
      "loss": 2.3305,
      "step": 36071
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.9987683892250061,
      "learning_rate": 1.1059669974303382e-05,
      "loss": 2.1025,
      "step": 36072
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5712287425994873,
      "learning_rate": 1.105926055486832e-05,
      "loss": 2.2458,
      "step": 36073
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.2807223796844482,
      "learning_rate": 1.1058851133637523e-05,
      "loss": 2.1647,
      "step": 36074
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0945934057235718,
      "learning_rate": 1.1058441710611687e-05,
      "loss": 2.2778,
      "step": 36075
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.2820172309875488,
      "learning_rate": 1.1058032285791502e-05,
      "loss": 2.4948,
      "step": 36076
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1517990827560425,
      "learning_rate": 1.1057622859177668e-05,
      "loss": 2.4284,
      "step": 36077
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0337228775024414,
      "learning_rate": 1.1057213430770877e-05,
      "loss": 2.2762,
      "step": 36078
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0723347663879395,
      "learning_rate": 1.105680400057182e-05,
      "loss": 2.3875,
      "step": 36079
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1764777898788452,
      "learning_rate": 1.1056394568581193e-05,
      "loss": 2.5014,
      "step": 36080
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.9957585334777832,
      "learning_rate": 1.1055985134799689e-05,
      "loss": 2.4334,
      "step": 36081
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.9951653480529785,
      "learning_rate": 1.1055575699228005e-05,
      "loss": 2.2969,
      "step": 36082
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0980422496795654,
      "learning_rate": 1.1055166261866835e-05,
      "loss": 2.4511,
      "step": 36083
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0328071117401123,
      "learning_rate": 1.105475682271687e-05,
      "loss": 2.3513,
      "step": 36084
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.9976332783699036,
      "learning_rate": 1.1054347381778807e-05,
      "loss": 2.3265,
      "step": 36085
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1258738040924072,
      "learning_rate": 1.1053937939053337e-05,
      "loss": 2.6481,
      "step": 36086
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0411063432693481,
      "learning_rate": 1.1053528494541157e-05,
      "loss": 2.1271,
      "step": 36087
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1205660104751587,
      "learning_rate": 1.105311904824296e-05,
      "loss": 2.4796,
      "step": 36088
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.6552622318267822,
      "learning_rate": 1.105270960015944e-05,
      "loss": 2.3506,
      "step": 36089
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1398440599441528,
      "learning_rate": 1.1052300150291288e-05,
      "loss": 2.3039,
      "step": 36090
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0236543416976929,
      "learning_rate": 1.1051890698639207e-05,
      "loss": 2.4113,
      "step": 36091
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0553680658340454,
      "learning_rate": 1.1051481245203884e-05,
      "loss": 2.411,
      "step": 36092
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.9875940680503845,
      "learning_rate": 1.1051071789986014e-05,
      "loss": 2.2984,
      "step": 36093
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1829545497894287,
      "learning_rate": 1.105066233298629e-05,
      "loss": 2.3601,
      "step": 36094
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0412726402282715,
      "learning_rate": 1.1050252874205412e-05,
      "loss": 2.3936,
      "step": 36095
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1473067998886108,
      "learning_rate": 1.1049843413644067e-05,
      "loss": 2.4239,
      "step": 36096
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.991677463054657,
      "learning_rate": 1.1049433951302955e-05,
      "loss": 2.3512,
      "step": 36097
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1968014240264893,
      "learning_rate": 1.1049024487182764e-05,
      "loss": 2.6149,
      "step": 36098
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.106414556503296,
      "learning_rate": 1.1048615021284195e-05,
      "loss": 2.3379,
      "step": 36099
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0262657403945923,
      "learning_rate": 1.1048205553607938e-05,
      "loss": 2.4635,
      "step": 36100
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0333248376846313,
      "learning_rate": 1.1047796084154685e-05,
      "loss": 2.3379,
      "step": 36101
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.9301925897598267,
      "learning_rate": 1.1047386612925135e-05,
      "loss": 2.1934,
      "step": 36102
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.9570853114128113,
      "learning_rate": 1.1046977139919978e-05,
      "loss": 2.5297,
      "step": 36103
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.091194748878479,
      "learning_rate": 1.1046567665139916e-05,
      "loss": 2.3154,
      "step": 36104
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0093036890029907,
      "learning_rate": 1.1046158188585634e-05,
      "loss": 2.3022,
      "step": 36105
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.2035504579544067,
      "learning_rate": 1.1045748710257829e-05,
      "loss": 2.272,
      "step": 36106
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0902750492095947,
      "learning_rate": 1.1045339230157198e-05,
      "loss": 2.2798,
      "step": 36107
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0289702415466309,
      "learning_rate": 1.104492974828443e-05,
      "loss": 2.4015,
      "step": 36108
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0550439357757568,
      "learning_rate": 1.1044520264640224e-05,
      "loss": 2.2841,
      "step": 36109
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0080227851867676,
      "learning_rate": 1.1044110779225274e-05,
      "loss": 2.4798,
      "step": 36110
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0371284484863281,
      "learning_rate": 1.1043701292040272e-05,
      "loss": 2.466,
      "step": 36111
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1314105987548828,
      "learning_rate": 1.1043291803085912e-05,
      "loss": 2.3005,
      "step": 36112
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0682027339935303,
      "learning_rate": 1.1042882312362888e-05,
      "loss": 2.3682,
      "step": 36113
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.9998109340667725,
      "learning_rate": 1.1042472819871897e-05,
      "loss": 2.56,
      "step": 36114
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0378551483154297,
      "learning_rate": 1.104206332561363e-05,
      "loss": 2.4744,
      "step": 36115
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.092647910118103,
      "learning_rate": 1.1041653829588783e-05,
      "loss": 2.5735,
      "step": 36116
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.029813289642334,
      "learning_rate": 1.1041244331798051e-05,
      "loss": 2.3657,
      "step": 36117
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.072981357574463,
      "learning_rate": 1.1040834832242126e-05,
      "loss": 2.3681,
      "step": 36118
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.164825677871704,
      "learning_rate": 1.1040425330921705e-05,
      "loss": 2.4791,
      "step": 36119
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.2014472484588623,
      "learning_rate": 1.1040015827837479e-05,
      "loss": 2.4209,
      "step": 36120
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.070639729499817,
      "learning_rate": 1.1039606322990143e-05,
      "loss": 2.2488,
      "step": 36121
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.9632152915000916,
      "learning_rate": 1.1039196816380394e-05,
      "loss": 2.2055,
      "step": 36122
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0481631755828857,
      "learning_rate": 1.1038787308008923e-05,
      "loss": 2.1656,
      "step": 36123
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0457483530044556,
      "learning_rate": 1.1038377797876425e-05,
      "loss": 2.3717,
      "step": 36124
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0270943641662598,
      "learning_rate": 1.1037968285983594e-05,
      "loss": 2.3565,
      "step": 36125
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0471582412719727,
      "learning_rate": 1.1037558772331125e-05,
      "loss": 2.2606,
      "step": 36126
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0533682107925415,
      "learning_rate": 1.1037149256919715e-05,
      "loss": 2.2891,
      "step": 36127
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.13161039352417,
      "learning_rate": 1.1036739739750054e-05,
      "loss": 2.4313,
      "step": 36128
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.053296446800232,
      "learning_rate": 1.1036330220822835e-05,
      "loss": 2.2344,
      "step": 36129
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.040771722793579,
      "learning_rate": 1.103592070013876e-05,
      "loss": 2.2843,
      "step": 36130
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.9839245080947876,
      "learning_rate": 1.1035511177698513e-05,
      "loss": 2.7275,
      "step": 36131
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0176920890808105,
      "learning_rate": 1.1035101653502795e-05,
      "loss": 2.3788,
      "step": 36132
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0586395263671875,
      "learning_rate": 1.1034692127552297e-05,
      "loss": 2.3039,
      "step": 36133
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.162532091140747,
      "learning_rate": 1.1034282599847716e-05,
      "loss": 2.0892,
      "step": 36134
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0671621561050415,
      "learning_rate": 1.1033873070389746e-05,
      "loss": 2.2585,
      "step": 36135
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1147308349609375,
      "learning_rate": 1.103346353917908e-05,
      "loss": 2.4615,
      "step": 36136
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1146353483200073,
      "learning_rate": 1.1033054006216414e-05,
      "loss": 2.468,
      "step": 36137
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1433660984039307,
      "learning_rate": 1.1032644471502439e-05,
      "loss": 2.4596,
      "step": 36138
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0922682285308838,
      "learning_rate": 1.1032234935037852e-05,
      "loss": 2.1671,
      "step": 36139
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0730297565460205,
      "learning_rate": 1.1031825396823343e-05,
      "loss": 2.3112,
      "step": 36140
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.114261507987976,
      "learning_rate": 1.1031415856859614e-05,
      "loss": 2.4363,
      "step": 36141
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1397870779037476,
      "learning_rate": 1.1031006315147352e-05,
      "loss": 2.1675,
      "step": 36142
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.494580626487732,
      "learning_rate": 1.1030596771687259e-05,
      "loss": 2.4225,
      "step": 36143
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0426456928253174,
      "learning_rate": 1.1030187226480021e-05,
      "loss": 2.2492,
      "step": 36144
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1460661888122559,
      "learning_rate": 1.1029777679526335e-05,
      "loss": 2.1301,
      "step": 36145
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.9996222853660583,
      "learning_rate": 1.1029368130826899e-05,
      "loss": 2.3978,
      "step": 36146
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0682488679885864,
      "learning_rate": 1.10289585803824e-05,
      "loss": 2.3178,
      "step": 36147
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.149219036102295,
      "learning_rate": 1.1028549028193541e-05,
      "loss": 2.265,
      "step": 36148
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.106997013092041,
      "learning_rate": 1.102813947426101e-05,
      "loss": 2.3565,
      "step": 36149
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0373735427856445,
      "learning_rate": 1.1027729918585505e-05,
      "loss": 2.4079,
      "step": 36150
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.3436360359191895,
      "learning_rate": 1.1027320361167717e-05,
      "loss": 2.2383,
      "step": 36151
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1357014179229736,
      "learning_rate": 1.1026910802008344e-05,
      "loss": 2.4024,
      "step": 36152
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.037452220916748,
      "learning_rate": 1.1026501241108077e-05,
      "loss": 2.2853,
      "step": 36153
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1156014204025269,
      "learning_rate": 1.1026091678467612e-05,
      "loss": 2.1893,
      "step": 36154
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.039017677307129,
      "learning_rate": 1.1025682114087641e-05,
      "loss": 2.302,
      "step": 36155
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.222045660018921,
      "learning_rate": 1.1025272547968862e-05,
      "loss": 2.3777,
      "step": 36156
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0306297540664673,
      "learning_rate": 1.1024862980111969e-05,
      "loss": 2.4432,
      "step": 36157
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.112741470336914,
      "learning_rate": 1.1024453410517653e-05,
      "loss": 2.2066,
      "step": 36158
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.2094507217407227,
      "learning_rate": 1.102404383918661e-05,
      "loss": 2.4184,
      "step": 36159
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.017668604850769,
      "learning_rate": 1.1023634266119534e-05,
      "loss": 2.3425,
      "step": 36160
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.058376431465149,
      "learning_rate": 1.1023224691317121e-05,
      "loss": 2.4838,
      "step": 36161
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0923283100128174,
      "learning_rate": 1.1022815114780064e-05,
      "loss": 2.6054,
      "step": 36162
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0712281465530396,
      "learning_rate": 1.102240553650906e-05,
      "loss": 2.2599,
      "step": 36163
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0011135339736938,
      "learning_rate": 1.1021995956504798e-05,
      "loss": 2.3549,
      "step": 36164
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0651105642318726,
      "learning_rate": 1.1021586374767976e-05,
      "loss": 2.2058,
      "step": 36165
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0892589092254639,
      "learning_rate": 1.1021176791299285e-05,
      "loss": 2.4336,
      "step": 36166
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.111654281616211,
      "learning_rate": 1.1020767206099426e-05,
      "loss": 2.3304,
      "step": 36167
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1051963567733765,
      "learning_rate": 1.1020357619169086e-05,
      "loss": 2.2042,
      "step": 36168
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0346367359161377,
      "learning_rate": 1.1019948030508965e-05,
      "loss": 2.2766,
      "step": 36169
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.187532901763916,
      "learning_rate": 1.1019538440119757e-05,
      "loss": 2.2445,
      "step": 36170
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0835621356964111,
      "learning_rate": 1.1019128848002152e-05,
      "loss": 2.1815,
      "step": 36171
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0562307834625244,
      "learning_rate": 1.1018719254156847e-05,
      "loss": 2.402,
      "step": 36172
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0439364910125732,
      "learning_rate": 1.1018309658584536e-05,
      "loss": 2.4187,
      "step": 36173
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.052998661994934,
      "learning_rate": 1.1017900061285914e-05,
      "loss": 2.3508,
      "step": 36174
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0837405920028687,
      "learning_rate": 1.1017490462261673e-05,
      "loss": 2.3583,
      "step": 36175
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0853142738342285,
      "learning_rate": 1.1017080861512513e-05,
      "loss": 2.409,
      "step": 36176
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0355908870697021,
      "learning_rate": 1.1016671259039123e-05,
      "loss": 2.2585,
      "step": 36177
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.2855788469314575,
      "learning_rate": 1.1016261654842198e-05,
      "loss": 2.4468,
      "step": 36178
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0359621047973633,
      "learning_rate": 1.1015852048922434e-05,
      "loss": 2.3033,
      "step": 36179
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1492562294006348,
      "learning_rate": 1.1015442441280524e-05,
      "loss": 2.3236,
      "step": 36180
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0158724784851074,
      "learning_rate": 1.1015032831917163e-05,
      "loss": 2.6219,
      "step": 36181
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0504335165023804,
      "learning_rate": 1.1014623220833049e-05,
      "loss": 2.2753,
      "step": 36182
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1595022678375244,
      "learning_rate": 1.1014213608028872e-05,
      "loss": 2.3834,
      "step": 36183
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0390726327896118,
      "learning_rate": 1.1013803993505326e-05,
      "loss": 2.3666,
      "step": 36184
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.017788290977478,
      "learning_rate": 1.1013394377263107e-05,
      "loss": 2.3856,
      "step": 36185
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1115236282348633,
      "learning_rate": 1.1012984759302911e-05,
      "loss": 2.3236,
      "step": 36186
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1047879457473755,
      "learning_rate": 1.101257513962543e-05,
      "loss": 2.4927,
      "step": 36187
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0323129892349243,
      "learning_rate": 1.1012165518231358e-05,
      "loss": 2.3921,
      "step": 36188
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1168047189712524,
      "learning_rate": 1.101175589512139e-05,
      "loss": 2.4779,
      "step": 36189
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.2298637628555298,
      "learning_rate": 1.1011346270296223e-05,
      "loss": 2.3302,
      "step": 36190
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0789169073104858,
      "learning_rate": 1.101093664375655e-05,
      "loss": 2.2831,
      "step": 36191
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.9461844563484192,
      "learning_rate": 1.1010527015503062e-05,
      "loss": 2.199,
      "step": 36192
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0509518384933472,
      "learning_rate": 1.1010117385536458e-05,
      "loss": 2.1207,
      "step": 36193
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1011065244674683,
      "learning_rate": 1.100970775385743e-05,
      "loss": 2.0469,
      "step": 36194
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0384736061096191,
      "learning_rate": 1.1009298120466672e-05,
      "loss": 2.4411,
      "step": 36195
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.103843331336975,
      "learning_rate": 1.1008888485364884e-05,
      "loss": 2.3757,
      "step": 36196
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.009150505065918,
      "learning_rate": 1.1008478848552753e-05,
      "loss": 2.4871,
      "step": 36197
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1074621677398682,
      "learning_rate": 1.1008069210030973e-05,
      "loss": 2.35,
      "step": 36198
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.179358959197998,
      "learning_rate": 1.1007659569800247e-05,
      "loss": 2.252,
      "step": 36199
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.14090895652771,
      "learning_rate": 1.1007249927861264e-05,
      "loss": 2.1955,
      "step": 36200
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.9637647271156311,
      "learning_rate": 1.1006840284214715e-05,
      "loss": 2.2392,
      "step": 36201
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1605480909347534,
      "learning_rate": 1.1006430638861301e-05,
      "loss": 2.2085,
      "step": 36202
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0941238403320312,
      "learning_rate": 1.1006020991801716e-05,
      "loss": 2.5127,
      "step": 36203
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0480879545211792,
      "learning_rate": 1.1005611343036648e-05,
      "loss": 2.3385,
      "step": 36204
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.3473137617111206,
      "learning_rate": 1.1005201692566798e-05,
      "loss": 2.361,
      "step": 36205
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0509274005889893,
      "learning_rate": 1.1004792040392855e-05,
      "loss": 2.5691,
      "step": 36206
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1055495738983154,
      "learning_rate": 1.100438238651552e-05,
      "loss": 2.3625,
      "step": 36207
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1849548816680908,
      "learning_rate": 1.1003972730935484e-05,
      "loss": 2.5047,
      "step": 36208
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0459892749786377,
      "learning_rate": 1.1003563073653441e-05,
      "loss": 2.5133,
      "step": 36209
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0612280368804932,
      "learning_rate": 1.1003153414670085e-05,
      "loss": 2.4713,
      "step": 36210
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.086081624031067,
      "learning_rate": 1.1002743753986112e-05,
      "loss": 2.6172,
      "step": 36211
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.9726833701133728,
      "learning_rate": 1.1002334091602216e-05,
      "loss": 2.3767,
      "step": 36212
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1394842863082886,
      "learning_rate": 1.1001924427519093e-05,
      "loss": 2.0816,
      "step": 36213
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0598734617233276,
      "learning_rate": 1.1001514761737435e-05,
      "loss": 2.1906,
      "step": 36214
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1498974561691284,
      "learning_rate": 1.1001105094257936e-05,
      "loss": 2.5464,
      "step": 36215
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0504049062728882,
      "learning_rate": 1.1000695425081293e-05,
      "loss": 2.2831,
      "step": 36216
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.002051591873169,
      "learning_rate": 1.1000285754208202e-05,
      "loss": 2.4822,
      "step": 36217
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.02113938331604,
      "learning_rate": 1.0999876081639352e-05,
      "loss": 2.3758,
      "step": 36218
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0870884656906128,
      "learning_rate": 1.0999466407375442e-05,
      "loss": 2.3592,
      "step": 36219
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0961779356002808,
      "learning_rate": 1.0999056731417162e-05,
      "loss": 2.3246,
      "step": 36220
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0167301893234253,
      "learning_rate": 1.099864705376521e-05,
      "loss": 2.3079,
      "step": 36221
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1086207628250122,
      "learning_rate": 1.0998237374420283e-05,
      "loss": 2.365,
      "step": 36222
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0187699794769287,
      "learning_rate": 1.0997827693383072e-05,
      "loss": 2.1655,
      "step": 36223
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0835702419281006,
      "learning_rate": 1.099741801065427e-05,
      "loss": 2.3404,
      "step": 36224
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1190495491027832,
      "learning_rate": 1.0997008326234575e-05,
      "loss": 2.2157,
      "step": 36225
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.2372901439666748,
      "learning_rate": 1.099659864012468e-05,
      "loss": 2.2493,
      "step": 36226
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.188236117362976,
      "learning_rate": 1.0996188952325277e-05,
      "loss": 2.2466,
      "step": 36227
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.3648699522018433,
      "learning_rate": 1.0995779262837068e-05,
      "loss": 2.54,
      "step": 36228
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1214265823364258,
      "learning_rate": 1.099536957166074e-05,
      "loss": 2.2374,
      "step": 36229
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0920164585113525,
      "learning_rate": 1.0994959878796991e-05,
      "loss": 2.349,
      "step": 36230
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.9815571904182434,
      "learning_rate": 1.0994550184246513e-05,
      "loss": 2.1985,
      "step": 36231
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.9651110172271729,
      "learning_rate": 1.0994140488010004e-05,
      "loss": 2.1585,
      "step": 36232
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0902667045593262,
      "learning_rate": 1.0993730790088155e-05,
      "loss": 2.2173,
      "step": 36233
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1180583238601685,
      "learning_rate": 1.0993321090481666e-05,
      "loss": 2.4619,
      "step": 36234
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0881657600402832,
      "learning_rate": 1.0992911389191225e-05,
      "loss": 2.2411,
      "step": 36235
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1229854822158813,
      "learning_rate": 1.0992501686217531e-05,
      "loss": 2.2165,
      "step": 36236
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0676213502883911,
      "learning_rate": 1.0992091981561278e-05,
      "loss": 2.4048,
      "step": 36237
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0465389490127563,
      "learning_rate": 1.0991682275223157e-05,
      "loss": 2.3648,
      "step": 36238
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0030293464660645,
      "learning_rate": 1.0991272567203866e-05,
      "loss": 2.1174,
      "step": 36239
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0888023376464844,
      "learning_rate": 1.0990862857504098e-05,
      "loss": 2.4597,
      "step": 36240
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0853989124298096,
      "learning_rate": 1.0990453146124551e-05,
      "loss": 2.2992,
      "step": 36241
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.9643476605415344,
      "learning_rate": 1.0990043433065915e-05,
      "loss": 2.2389,
      "step": 36242
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.2452032566070557,
      "learning_rate": 1.0989633718328887e-05,
      "loss": 2.2363,
      "step": 36243
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.004784107208252,
      "learning_rate": 1.0989224001914161e-05,
      "loss": 2.286,
      "step": 36244
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.060685157775879,
      "learning_rate": 1.0988814283822433e-05,
      "loss": 2.6048,
      "step": 36245
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.112377405166626,
      "learning_rate": 1.0988404564054394e-05,
      "loss": 2.2827,
      "step": 36246
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0308228731155396,
      "learning_rate": 1.0987994842610741e-05,
      "loss": 2.5341,
      "step": 36247
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1041163206100464,
      "learning_rate": 1.0987585119492168e-05,
      "loss": 2.3656,
      "step": 36248
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.9434403777122498,
      "learning_rate": 1.0987175394699375e-05,
      "loss": 2.3574,
      "step": 36249
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.141512393951416,
      "learning_rate": 1.0986765668233046e-05,
      "loss": 2.5309,
      "step": 36250
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0418493747711182,
      "learning_rate": 1.0986355940093883e-05,
      "loss": 2.3371,
      "step": 36251
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1646013259887695,
      "learning_rate": 1.0985946210282577e-05,
      "loss": 2.1859,
      "step": 36252
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1480993032455444,
      "learning_rate": 1.0985536478799826e-05,
      "loss": 2.2257,
      "step": 36253
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0264674425125122,
      "learning_rate": 1.0985126745646323e-05,
      "loss": 2.5562,
      "step": 36254
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0110334157943726,
      "learning_rate": 1.0984717010822763e-05,
      "loss": 2.3403,
      "step": 36255
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.9580737948417664,
      "learning_rate": 1.098430727432984e-05,
      "loss": 2.2985,
      "step": 36256
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0524027347564697,
      "learning_rate": 1.0983897536168249e-05,
      "loss": 2.4485,
      "step": 36257
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.2617604732513428,
      "learning_rate": 1.0983487796338683e-05,
      "loss": 2.2794,
      "step": 36258
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0698610544204712,
      "learning_rate": 1.0983078054841837e-05,
      "loss": 2.3762,
      "step": 36259
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0324372053146362,
      "learning_rate": 1.098266831167841e-05,
      "loss": 2.4091,
      "step": 36260
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.9647249579429626,
      "learning_rate": 1.0982258566849093e-05,
      "loss": 2.0789,
      "step": 36261
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.2010389566421509,
      "learning_rate": 1.098184882035458e-05,
      "loss": 2.5066,
      "step": 36262
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0604652166366577,
      "learning_rate": 1.0981439072195567e-05,
      "loss": 2.4363,
      "step": 36263
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1330716609954834,
      "learning_rate": 1.0981029322372749e-05,
      "loss": 2.1828,
      "step": 36264
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.051252007484436,
      "learning_rate": 1.0980619570886816e-05,
      "loss": 2.508,
      "step": 36265
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.043315052986145,
      "learning_rate": 1.098020981773847e-05,
      "loss": 2.3779,
      "step": 36266
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.2317399978637695,
      "learning_rate": 1.0979800062928401e-05,
      "loss": 2.4916,
      "step": 36267
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0437813997268677,
      "learning_rate": 1.0979390306457304e-05,
      "loss": 2.6339,
      "step": 36268
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1897295713424683,
      "learning_rate": 1.0978980548325879e-05,
      "loss": 2.6025,
      "step": 36269
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.2040303945541382,
      "learning_rate": 1.0978570788534812e-05,
      "loss": 2.3752,
      "step": 36270
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.067638635635376,
      "learning_rate": 1.0978161027084801e-05,
      "loss": 2.5102,
      "step": 36271
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1405125856399536,
      "learning_rate": 1.0977751263976545e-05,
      "loss": 2.4359,
      "step": 36272
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.9230231046676636,
      "learning_rate": 1.0977341499210734e-05,
      "loss": 2.2375,
      "step": 36273
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.9544723629951477,
      "learning_rate": 1.097693173278806e-05,
      "loss": 2.1701,
      "step": 36274
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0983182191848755,
      "learning_rate": 1.0976521964709225e-05,
      "loss": 2.2673,
      "step": 36275
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1094542741775513,
      "learning_rate": 1.0976112194974922e-05,
      "loss": 2.2784,
      "step": 36276
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0775843858718872,
      "learning_rate": 1.0975702423585838e-05,
      "loss": 2.4042,
      "step": 36277
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1083645820617676,
      "learning_rate": 1.0975292650542677e-05,
      "loss": 2.5607,
      "step": 36278
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.046569585800171,
      "learning_rate": 1.097488287584613e-05,
      "loss": 2.3192,
      "step": 36279
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0913095474243164,
      "learning_rate": 1.097447309949689e-05,
      "loss": 2.4632,
      "step": 36280
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1214491128921509,
      "learning_rate": 1.0974063321495654e-05,
      "loss": 2.4419,
      "step": 36281
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.099825143814087,
      "learning_rate": 1.0973653541843118e-05,
      "loss": 2.2313,
      "step": 36282
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.138679027557373,
      "learning_rate": 1.0973243760539973e-05,
      "loss": 2.4977,
      "step": 36283
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0724858045578003,
      "learning_rate": 1.0972833977586916e-05,
      "loss": 2.4678,
      "step": 36284
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.154611587524414,
      "learning_rate": 1.097242419298464e-05,
      "loss": 2.3654,
      "step": 36285
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.022021770477295,
      "learning_rate": 1.0972014406733841e-05,
      "loss": 2.6152,
      "step": 36286
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1423338651657104,
      "learning_rate": 1.0971604618835215e-05,
      "loss": 2.1486,
      "step": 36287
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.9731311798095703,
      "learning_rate": 1.0971194829289455e-05,
      "loss": 2.2356,
      "step": 36288
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0695185661315918,
      "learning_rate": 1.0970785038097257e-05,
      "loss": 2.3101,
      "step": 36289
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0800325870513916,
      "learning_rate": 1.0970375245259312e-05,
      "loss": 2.2404,
      "step": 36290
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0141282081604004,
      "learning_rate": 1.0969965450776319e-05,
      "loss": 2.2849,
      "step": 36291
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0374912023544312,
      "learning_rate": 1.096955565464897e-05,
      "loss": 2.2546,
      "step": 36292
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1507787704467773,
      "learning_rate": 1.0969145856877963e-05,
      "loss": 2.2349,
      "step": 36293
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0594688653945923,
      "learning_rate": 1.096873605746399e-05,
      "loss": 2.2653,
      "step": 36294
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.9775505065917969,
      "learning_rate": 1.0968326256407746e-05,
      "loss": 2.2081,
      "step": 36295
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1984115839004517,
      "learning_rate": 1.0967916453709926e-05,
      "loss": 2.2715,
      "step": 36296
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.2568539381027222,
      "learning_rate": 1.0967506649371225e-05,
      "loss": 2.4762,
      "step": 36297
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1567997932434082,
      "learning_rate": 1.0967096843392335e-05,
      "loss": 2.5217,
      "step": 36298
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0559172630310059,
      "learning_rate": 1.0966687035773954e-05,
      "loss": 2.2702,
      "step": 36299
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0591990947723389,
      "learning_rate": 1.0966277226516779e-05,
      "loss": 2.3141,
      "step": 36300
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0661911964416504,
      "learning_rate": 1.09658674156215e-05,
      "loss": 2.6038,
      "step": 36301
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1180059909820557,
      "learning_rate": 1.0965457603088811e-05,
      "loss": 2.3313,
      "step": 36302
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1260406970977783,
      "learning_rate": 1.0965047788919412e-05,
      "loss": 2.2423,
      "step": 36303
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0960575342178345,
      "learning_rate": 1.0964637973113996e-05,
      "loss": 2.2207,
      "step": 36304
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.131306529045105,
      "learning_rate": 1.0964228155673255e-05,
      "loss": 2.3152,
      "step": 36305
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.9804295301437378,
      "learning_rate": 1.0963818336597884e-05,
      "loss": 2.5508,
      "step": 36306
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0203176736831665,
      "learning_rate": 1.096340851588858e-05,
      "loss": 2.3081,
      "step": 36307
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1090086698532104,
      "learning_rate": 1.0962998693546038e-05,
      "loss": 2.3508,
      "step": 36308
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1096806526184082,
      "learning_rate": 1.0962588869570952e-05,
      "loss": 2.139,
      "step": 36309
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1754246950149536,
      "learning_rate": 1.0962179043964014e-05,
      "loss": 2.2463,
      "step": 36310
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0912973880767822,
      "learning_rate": 1.0961769216725923e-05,
      "loss": 2.4424,
      "step": 36311
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0442349910736084,
      "learning_rate": 1.0961359387857372e-05,
      "loss": 2.3687,
      "step": 36312
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0022062063217163,
      "learning_rate": 1.0960949557359053e-05,
      "loss": 2.6186,
      "step": 36313
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0223701000213623,
      "learning_rate": 1.0960539725231666e-05,
      "loss": 2.3671,
      "step": 36314
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.081875205039978,
      "learning_rate": 1.0960129891475905e-05,
      "loss": 2.4071,
      "step": 36315
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.047412395477295,
      "learning_rate": 1.095972005609246e-05,
      "loss": 2.3275,
      "step": 36316
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0006803274154663,
      "learning_rate": 1.0959310219082032e-05,
      "loss": 2.2028,
      "step": 36317
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0033773183822632,
      "learning_rate": 1.0958900380445307e-05,
      "loss": 2.1973,
      "step": 36318
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.147605538368225,
      "learning_rate": 1.0958490540182988e-05,
      "loss": 2.5138,
      "step": 36319
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1092804670333862,
      "learning_rate": 1.095808069829577e-05,
      "loss": 2.2437,
      "step": 36320
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.109966516494751,
      "learning_rate": 1.0957670854784344e-05,
      "loss": 2.4027,
      "step": 36321
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.116518497467041,
      "learning_rate": 1.0957261009649405e-05,
      "loss": 2.6637,
      "step": 36322
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1068035364151,
      "learning_rate": 1.0956851162891648e-05,
      "loss": 2.5098,
      "step": 36323
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.2909046411514282,
      "learning_rate": 1.0956441314511768e-05,
      "loss": 2.3411,
      "step": 36324
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0793622732162476,
      "learning_rate": 1.0956031464510462e-05,
      "loss": 2.5201,
      "step": 36325
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0102213621139526,
      "learning_rate": 1.095562161288842e-05,
      "loss": 2.3617,
      "step": 36326
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.057754635810852,
      "learning_rate": 1.0955211759646342e-05,
      "loss": 2.4693,
      "step": 36327
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1132193803787231,
      "learning_rate": 1.0954801904784924e-05,
      "loss": 2.3897,
      "step": 36328
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1275382041931152,
      "learning_rate": 1.0954392048304854e-05,
      "loss": 2.383,
      "step": 36329
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1438255310058594,
      "learning_rate": 1.0953982190206828e-05,
      "loss": 2.398,
      "step": 36330
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0283104181289673,
      "learning_rate": 1.0953572330491546e-05,
      "loss": 2.487,
      "step": 36331
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0958698987960815,
      "learning_rate": 1.0953162469159701e-05,
      "loss": 2.4634,
      "step": 36332
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0865720510482788,
      "learning_rate": 1.0952752606211985e-05,
      "loss": 2.3805,
      "step": 36333
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0622367858886719,
      "learning_rate": 1.0952342741649094e-05,
      "loss": 2.3434,
      "step": 36334
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.001424789428711,
      "learning_rate": 1.0951932875471726e-05,
      "loss": 2.2894,
      "step": 36335
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.072052001953125,
      "learning_rate": 1.0951523007680571e-05,
      "loss": 2.2631,
      "step": 36336
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0073364973068237,
      "learning_rate": 1.0951113138276326e-05,
      "loss": 2.5158,
      "step": 36337
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0630533695220947,
      "learning_rate": 1.0950703267259686e-05,
      "loss": 2.3998,
      "step": 36338
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.9969105124473572,
      "learning_rate": 1.0950293394631346e-05,
      "loss": 2.3179,
      "step": 36339
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.043515682220459,
      "learning_rate": 1.0949883520392e-05,
      "loss": 2.3524,
      "step": 36340
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.248700737953186,
      "learning_rate": 1.0949473644542346e-05,
      "loss": 2.4546,
      "step": 36341
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.118595004081726,
      "learning_rate": 1.0949063767083075e-05,
      "loss": 2.5409,
      "step": 36342
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0862329006195068,
      "learning_rate": 1.0948653888014882e-05,
      "loss": 2.2954,
      "step": 36343
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0182676315307617,
      "learning_rate": 1.0948244007338461e-05,
      "loss": 2.3545,
      "step": 36344
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1483031511306763,
      "learning_rate": 1.0947834125054513e-05,
      "loss": 2.3372,
      "step": 36345
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.9784122705459595,
      "learning_rate": 1.0947424241163726e-05,
      "loss": 2.2045,
      "step": 36346
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1421525478363037,
      "learning_rate": 1.0947014355666799e-05,
      "loss": 2.5072,
      "step": 36347
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0411142110824585,
      "learning_rate": 1.0946604468564427e-05,
      "loss": 2.4812,
      "step": 36348
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.9765538573265076,
      "learning_rate": 1.0946194579857301e-05,
      "loss": 2.4318,
      "step": 36349
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4329098463058472,
      "learning_rate": 1.094578468954612e-05,
      "loss": 2.3866,
      "step": 36350
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0172407627105713,
      "learning_rate": 1.0945374797631574e-05,
      "loss": 2.5371,
      "step": 36351
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0205239057540894,
      "learning_rate": 1.0944964904114361e-05,
      "loss": 2.2976,
      "step": 36352
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.139070987701416,
      "learning_rate": 1.0944555008995179e-05,
      "loss": 2.2525,
      "step": 36353
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0409455299377441,
      "learning_rate": 1.094414511227472e-05,
      "loss": 2.3364,
      "step": 36354
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.023319959640503,
      "learning_rate": 1.0943735213953677e-05,
      "loss": 2.4596,
      "step": 36355
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.074100375175476,
      "learning_rate": 1.0943325314032745e-05,
      "loss": 2.1691,
      "step": 36356
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1047067642211914,
      "learning_rate": 1.0942915412512622e-05,
      "loss": 2.3246,
      "step": 36357
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1330218315124512,
      "learning_rate": 1.0942505509394001e-05,
      "loss": 2.3549,
      "step": 36358
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.129713535308838,
      "learning_rate": 1.0942095604677578e-05,
      "loss": 2.2932,
      "step": 36359
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0407025814056396,
      "learning_rate": 1.0941685698364046e-05,
      "loss": 2.1969,
      "step": 36360
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0396960973739624,
      "learning_rate": 1.0941275790454102e-05,
      "loss": 2.4071,
      "step": 36361
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0950191020965576,
      "learning_rate": 1.094086588094844e-05,
      "loss": 2.401,
      "step": 36362
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0440157651901245,
      "learning_rate": 1.0940455969847754e-05,
      "loss": 2.3254,
      "step": 36363
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.9544148445129395,
      "learning_rate": 1.094004605715274e-05,
      "loss": 2.3594,
      "step": 36364
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1385554075241089,
      "learning_rate": 1.0939636142864092e-05,
      "loss": 2.383,
      "step": 36365
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0780105590820312,
      "learning_rate": 1.0939226226982505e-05,
      "loss": 2.4271,
      "step": 36366
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0678373575210571,
      "learning_rate": 1.0938816309508675e-05,
      "loss": 2.411,
      "step": 36367
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.084685206413269,
      "learning_rate": 1.09384063904433e-05,
      "loss": 2.3337,
      "step": 36368
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1051632165908813,
      "learning_rate": 1.0937996469787066e-05,
      "loss": 2.3139,
      "step": 36369
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0661388635635376,
      "learning_rate": 1.0937586547540674e-05,
      "loss": 2.4107,
      "step": 36370
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1696211099624634,
      "learning_rate": 1.093717662370482e-05,
      "loss": 2.4153,
      "step": 36371
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0222150087356567,
      "learning_rate": 1.0936766698280196e-05,
      "loss": 2.6088,
      "step": 36372
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.6530554294586182,
      "learning_rate": 1.0936356771267497e-05,
      "loss": 2.3933,
      "step": 36373
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.13375985622406,
      "learning_rate": 1.0935946842667425e-05,
      "loss": 2.3271,
      "step": 36374
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1127899885177612,
      "learning_rate": 1.0935536912480665e-05,
      "loss": 2.4473,
      "step": 36375
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.19514000415802,
      "learning_rate": 1.0935126980707913e-05,
      "loss": 2.3269,
      "step": 36376
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.9679085612297058,
      "learning_rate": 1.093471704734987e-05,
      "loss": 2.2156,
      "step": 36377
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1093251705169678,
      "learning_rate": 1.0934307112407226e-05,
      "loss": 2.2857,
      "step": 36378
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.031493067741394,
      "learning_rate": 1.0933897175880676e-05,
      "loss": 2.5088,
      "step": 36379
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0118063688278198,
      "learning_rate": 1.093348723777092e-05,
      "loss": 2.3821,
      "step": 36380
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.06919527053833,
      "learning_rate": 1.0933077298078652e-05,
      "loss": 2.2906,
      "step": 36381
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0174643993377686,
      "learning_rate": 1.0932667356804561e-05,
      "loss": 2.1852,
      "step": 36382
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.2901691198349,
      "learning_rate": 1.0932257413949343e-05,
      "loss": 2.6082,
      "step": 36383
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0516358613967896,
      "learning_rate": 1.09318474695137e-05,
      "loss": 2.5098,
      "step": 36384
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0392236709594727,
      "learning_rate": 1.0931437523498318e-05,
      "loss": 2.2334,
      "step": 36385
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.9439862966537476,
      "learning_rate": 1.09310275759039e-05,
      "loss": 2.3216,
      "step": 36386
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1516581773757935,
      "learning_rate": 1.093061762673114e-05,
      "loss": 2.5928,
      "step": 36387
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1570923328399658,
      "learning_rate": 1.0930207675980724e-05,
      "loss": 2.387,
      "step": 36388
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0810277462005615,
      "learning_rate": 1.0929797723653357e-05,
      "loss": 2.3737,
      "step": 36389
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0386601686477661,
      "learning_rate": 1.0929387769749729e-05,
      "loss": 2.4228,
      "step": 36390
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0500794649124146,
      "learning_rate": 1.0928977814270538e-05,
      "loss": 2.0888,
      "step": 36391
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.26775324344635,
      "learning_rate": 1.0928567857216476e-05,
      "loss": 2.2005,
      "step": 36392
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.146934986114502,
      "learning_rate": 1.0928157898588238e-05,
      "loss": 2.4975,
      "step": 36393
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.9574949145317078,
      "learning_rate": 1.0927747938386525e-05,
      "loss": 2.5003,
      "step": 36394
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.3288582563400269,
      "learning_rate": 1.0927337976612025e-05,
      "loss": 2.2826,
      "step": 36395
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.9176115393638611,
      "learning_rate": 1.0926928013265432e-05,
      "loss": 2.2501,
      "step": 36396
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.9410755038261414,
      "learning_rate": 1.0926518048347447e-05,
      "loss": 1.9946,
      "step": 36397
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.9968295693397522,
      "learning_rate": 1.0926108081858762e-05,
      "loss": 2.259,
      "step": 36398
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0168367624282837,
      "learning_rate": 1.0925698113800071e-05,
      "loss": 2.2863,
      "step": 36399
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1010862588882446,
      "learning_rate": 1.0925288144172073e-05,
      "loss": 2.3044,
      "step": 36400
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.9584828019142151,
      "learning_rate": 1.0924878172975459e-05,
      "loss": 2.4441,
      "step": 36401
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.011091947555542,
      "learning_rate": 1.0924468200210926e-05,
      "loss": 2.4935,
      "step": 36402
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1427521705627441,
      "learning_rate": 1.0924058225879167e-05,
      "loss": 2.4303,
      "step": 36403
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.239540696144104,
      "learning_rate": 1.0923648249980876e-05,
      "loss": 2.4266,
      "step": 36404
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1619715690612793,
      "learning_rate": 1.0923238272516753e-05,
      "loss": 2.5272,
      "step": 36405
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4905720949172974,
      "learning_rate": 1.092282829348749e-05,
      "loss": 2.5959,
      "step": 36406
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.001181721687317,
      "learning_rate": 1.0922418312893786e-05,
      "loss": 2.3074,
      "step": 36407
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0844453573226929,
      "learning_rate": 1.0922008330736328e-05,
      "loss": 2.3813,
      "step": 36408
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0754947662353516,
      "learning_rate": 1.0921598347015816e-05,
      "loss": 2.3861,
      "step": 36409
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1080913543701172,
      "learning_rate": 1.0921188361732944e-05,
      "loss": 2.2429,
      "step": 36410
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.10909104347229,
      "learning_rate": 1.092077837488841e-05,
      "loss": 2.2117,
      "step": 36411
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0710090398788452,
      "learning_rate": 1.0920368386482904e-05,
      "loss": 2.3899,
      "step": 36412
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1320948600769043,
      "learning_rate": 1.0919958396517125e-05,
      "loss": 2.1517,
      "step": 36413
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1050084829330444,
      "learning_rate": 1.0919548404991769e-05,
      "loss": 2.4219,
      "step": 36414
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0088448524475098,
      "learning_rate": 1.0919138411907525e-05,
      "loss": 2.3435,
      "step": 36415
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.178678035736084,
      "learning_rate": 1.0918728417265092e-05,
      "loss": 2.3941,
      "step": 36416
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0681681632995605,
      "learning_rate": 1.0918318421065166e-05,
      "loss": 2.338,
      "step": 36417
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0439797639846802,
      "learning_rate": 1.0917908423308444e-05,
      "loss": 2.2808,
      "step": 36418
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0539631843566895,
      "learning_rate": 1.0917498423995614e-05,
      "loss": 2.2856,
      "step": 36419
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1694797277450562,
      "learning_rate": 1.0917088423127374e-05,
      "loss": 2.4014,
      "step": 36420
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1418465375900269,
      "learning_rate": 1.091667842070442e-05,
      "loss": 2.3412,
      "step": 36421
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.058781623840332,
      "learning_rate": 1.0916268416727452e-05,
      "loss": 2.274,
      "step": 36422
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0211598873138428,
      "learning_rate": 1.0915858411197157e-05,
      "loss": 2.4906,
      "step": 36423
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0029329061508179,
      "learning_rate": 1.0915448404114232e-05,
      "loss": 2.5303,
      "step": 36424
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.126648187637329,
      "learning_rate": 1.0915038395479373e-05,
      "loss": 2.1653,
      "step": 36425
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.08545982837677,
      "learning_rate": 1.0914628385293277e-05,
      "loss": 2.5518,
      "step": 36426
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0214961767196655,
      "learning_rate": 1.091421837355664e-05,
      "loss": 2.6269,
      "step": 36427
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.153939962387085,
      "learning_rate": 1.0913808360270152e-05,
      "loss": 2.6244,
      "step": 36428
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.00651216506958,
      "learning_rate": 1.0913398345434509e-05,
      "loss": 2.3523,
      "step": 36429
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1589422225952148,
      "learning_rate": 1.0912988329050412e-05,
      "loss": 2.3082,
      "step": 36430
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.3613436222076416,
      "learning_rate": 1.0912578311118547e-05,
      "loss": 2.3691,
      "step": 36431
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1544064283370972,
      "learning_rate": 1.0912168291639615e-05,
      "loss": 2.4375,
      "step": 36432
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0240470170974731,
      "learning_rate": 1.0911758270614314e-05,
      "loss": 2.3737,
      "step": 36433
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1953030824661255,
      "learning_rate": 1.0911348248043332e-05,
      "loss": 2.3403,
      "step": 36434
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0311520099639893,
      "learning_rate": 1.0910938223927366e-05,
      "loss": 2.2285,
      "step": 36435
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1484559774398804,
      "learning_rate": 1.0910528198267114e-05,
      "loss": 2.0885,
      "step": 36436
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1288708448410034,
      "learning_rate": 1.0910118171063269e-05,
      "loss": 2.1611,
      "step": 36437
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0295997858047485,
      "learning_rate": 1.0909708142316526e-05,
      "loss": 2.3258,
      "step": 36438
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.043524146080017,
      "learning_rate": 1.090929811202758e-05,
      "loss": 2.4595,
      "step": 36439
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0583384037017822,
      "learning_rate": 1.090888808019713e-05,
      "loss": 2.3903,
      "step": 36440
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.066426157951355,
      "learning_rate": 1.0908478046825866e-05,
      "loss": 2.2397,
      "step": 36441
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1406008005142212,
      "learning_rate": 1.0908068011914483e-05,
      "loss": 2.3684,
      "step": 36442
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0576262474060059,
      "learning_rate": 1.090765797546368e-05,
      "loss": 2.4158,
      "step": 36443
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1089739799499512,
      "learning_rate": 1.090724793747415e-05,
      "loss": 2.3865,
      "step": 36444
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.2911875247955322,
      "learning_rate": 1.090683789794659e-05,
      "loss": 2.2126,
      "step": 36445
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.166352391242981,
      "learning_rate": 1.0906427856881692e-05,
      "loss": 2.1991,
      "step": 36446
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0422238111495972,
      "learning_rate": 1.0906017814280152e-05,
      "loss": 2.3129,
      "step": 36447
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0088242292404175,
      "learning_rate": 1.0905607770142666e-05,
      "loss": 2.4465,
      "step": 36448
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.9577850699424744,
      "learning_rate": 1.090519772446993e-05,
      "loss": 2.1781,
      "step": 36449
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0508729219436646,
      "learning_rate": 1.0904787677262639e-05,
      "loss": 2.2377,
      "step": 36450
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0086036920547485,
      "learning_rate": 1.0904377628521483e-05,
      "loss": 2.2881,
      "step": 36451
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.342518925666809,
      "learning_rate": 1.0903967578247163e-05,
      "loss": 2.3138,
      "step": 36452
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.002411127090454,
      "learning_rate": 1.0903557526440376e-05,
      "loss": 2.3282,
      "step": 36453
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.988925039768219,
      "learning_rate": 1.0903147473101809e-05,
      "loss": 2.3574,
      "step": 36454
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1361567974090576,
      "learning_rate": 1.0902737418232163e-05,
      "loss": 2.2952,
      "step": 36455
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0061994791030884,
      "learning_rate": 1.0902327361832132e-05,
      "loss": 2.4618,
      "step": 36456
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.149317741394043,
      "learning_rate": 1.0901917303902412e-05,
      "loss": 2.3596,
      "step": 36457
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1025487184524536,
      "learning_rate": 1.0901507244443696e-05,
      "loss": 2.4719,
      "step": 36458
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.2012536525726318,
      "learning_rate": 1.0901097183456679e-05,
      "loss": 2.3445,
      "step": 36459
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0112535953521729,
      "learning_rate": 1.0900687120942061e-05,
      "loss": 2.4025,
      "step": 36460
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.070406198501587,
      "learning_rate": 1.0900277056900533e-05,
      "loss": 2.477,
      "step": 36461
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0253249406814575,
      "learning_rate": 1.089986699133279e-05,
      "loss": 2.2853,
      "step": 36462
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.2085336446762085,
      "learning_rate": 1.0899456924239527e-05,
      "loss": 2.4783,
      "step": 36463
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.036188006401062,
      "learning_rate": 1.0899046855621442e-05,
      "loss": 2.3811,
      "step": 36464
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0423072576522827,
      "learning_rate": 1.0898636785479227e-05,
      "loss": 2.5417,
      "step": 36465
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1224082708358765,
      "learning_rate": 1.089822671381358e-05,
      "loss": 2.3339,
      "step": 36466
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.326403260231018,
      "learning_rate": 1.0897816640625194e-05,
      "loss": 2.2338,
      "step": 36467
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0866100788116455,
      "learning_rate": 1.0897406565914765e-05,
      "loss": 2.1617,
      "step": 36468
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1320024728775024,
      "learning_rate": 1.0896996489682988e-05,
      "loss": 2.4297,
      "step": 36469
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.081701397895813,
      "learning_rate": 1.0896586411930559e-05,
      "loss": 2.5213,
      "step": 36470
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.9926483035087585,
      "learning_rate": 1.089617633265817e-05,
      "loss": 2.0618,
      "step": 36471
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1426738500595093,
      "learning_rate": 1.089576625186652e-05,
      "loss": 2.4017,
      "step": 36472
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.6766356229782104,
      "learning_rate": 1.0895356169556306e-05,
      "loss": 2.3245,
      "step": 36473
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0867935419082642,
      "learning_rate": 1.0894946085728217e-05,
      "loss": 2.3293,
      "step": 36474
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.076810598373413,
      "learning_rate": 1.0894536000382951e-05,
      "loss": 2.3415,
      "step": 36475
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0531985759735107,
      "learning_rate": 1.0894125913521205e-05,
      "loss": 2.2121,
      "step": 36476
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0252699851989746,
      "learning_rate": 1.0893715825143674e-05,
      "loss": 2.5754,
      "step": 36477
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0645171403884888,
      "learning_rate": 1.089330573525105e-05,
      "loss": 2.4335,
      "step": 36478
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.275447964668274,
      "learning_rate": 1.0892895643844031e-05,
      "loss": 2.3358,
      "step": 36479
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1484569311141968,
      "learning_rate": 1.0892485550923309e-05,
      "loss": 2.3627,
      "step": 36480
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.023690938949585,
      "learning_rate": 1.0892075456489586e-05,
      "loss": 2.2957,
      "step": 36481
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.054746150970459,
      "learning_rate": 1.0891665360543549e-05,
      "loss": 2.3191,
      "step": 36482
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0400364398956299,
      "learning_rate": 1.0891255263085898e-05,
      "loss": 2.4617,
      "step": 36483
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.9942631125450134,
      "learning_rate": 1.0890845164117327e-05,
      "loss": 2.5315,
      "step": 36484
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.98299241065979,
      "learning_rate": 1.0890435063638531e-05,
      "loss": 2.3899,
      "step": 36485
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1142436265945435,
      "learning_rate": 1.089002496165021e-05,
      "loss": 2.411,
      "step": 36486
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0413860082626343,
      "learning_rate": 1.088961485815305e-05,
      "loss": 2.2628,
      "step": 36487
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.199690580368042,
      "learning_rate": 1.0889204753147753e-05,
      "loss": 2.3361,
      "step": 36488
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1242241859436035,
      "learning_rate": 1.0888794646635013e-05,
      "loss": 2.434,
      "step": 36489
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0291424989700317,
      "learning_rate": 1.0888384538615524e-05,
      "loss": 2.3811,
      "step": 36490
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.058517575263977,
      "learning_rate": 1.088797442908998e-05,
      "loss": 2.0624,
      "step": 36491
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0648139715194702,
      "learning_rate": 1.088756431805908e-05,
      "loss": 2.5432,
      "step": 36492
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.147237777709961,
      "learning_rate": 1.0887154205523521e-05,
      "loss": 2.2632,
      "step": 36493
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0591071844100952,
      "learning_rate": 1.0886744091483989e-05,
      "loss": 2.2976,
      "step": 36494
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0350680351257324,
      "learning_rate": 1.0886333975941189e-05,
      "loss": 2.5081,
      "step": 36495
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.9619193077087402,
      "learning_rate": 1.0885923858895808e-05,
      "loss": 2.2123,
      "step": 36496
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0904184579849243,
      "learning_rate": 1.0885513740348548e-05,
      "loss": 2.4349,
      "step": 36497
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.021668791770935,
      "learning_rate": 1.0885103620300101e-05,
      "loss": 2.4399,
      "step": 36498
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0163246393203735,
      "learning_rate": 1.0884693498751165e-05,
      "loss": 2.1712,
      "step": 36499
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.182356595993042,
      "learning_rate": 1.0884283375702431e-05,
      "loss": 2.2695,
      "step": 36500
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.066884994506836,
      "learning_rate": 1.0883873251154597e-05,
      "loss": 2.1615,
      "step": 36501
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1617017984390259,
      "learning_rate": 1.0883463125108358e-05,
      "loss": 2.4286,
      "step": 36502
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0228321552276611,
      "learning_rate": 1.0883052997564407e-05,
      "loss": 2.5016,
      "step": 36503
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1021584272384644,
      "learning_rate": 1.0882642868523443e-05,
      "loss": 2.2913,
      "step": 36504
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0512518882751465,
      "learning_rate": 1.0882232737986163e-05,
      "loss": 2.4652,
      "step": 36505
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1800934076309204,
      "learning_rate": 1.0881822605953255e-05,
      "loss": 2.2851,
      "step": 36506
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1206450462341309,
      "learning_rate": 1.0881412472425418e-05,
      "loss": 2.3663,
      "step": 36507
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.08551025390625,
      "learning_rate": 1.088100233740335e-05,
      "loss": 2.5302,
      "step": 36508
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1192909479141235,
      "learning_rate": 1.0880592200887743e-05,
      "loss": 2.3674,
      "step": 36509
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0637524127960205,
      "learning_rate": 1.0880182062879291e-05,
      "loss": 2.5782,
      "step": 36510
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.2331256866455078,
      "learning_rate": 1.0879771923378693e-05,
      "loss": 2.3659,
      "step": 36511
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1249265670776367,
      "learning_rate": 1.0879361782386643e-05,
      "loss": 2.3214,
      "step": 36512
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0471781492233276,
      "learning_rate": 1.0878951639903837e-05,
      "loss": 2.446,
      "step": 36513
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1814059019088745,
      "learning_rate": 1.0878541495930968e-05,
      "loss": 2.4883,
      "step": 36514
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1690144538879395,
      "learning_rate": 1.087813135046873e-05,
      "loss": 2.3796,
      "step": 36515
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0091336965560913,
      "learning_rate": 1.0877721203517824e-05,
      "loss": 2.5422,
      "step": 36516
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.9930109977722168,
      "learning_rate": 1.0877311055078941e-05,
      "loss": 2.3433,
      "step": 36517
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0780830383300781,
      "learning_rate": 1.0876900905152776e-05,
      "loss": 2.2827,
      "step": 36518
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0504026412963867,
      "learning_rate": 1.087649075374003e-05,
      "loss": 2.3935,
      "step": 36519
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.07025146484375,
      "learning_rate": 1.087608060084139e-05,
      "loss": 2.5491,
      "step": 36520
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1304717063903809,
      "learning_rate": 1.0875670446457558e-05,
      "loss": 2.2401,
      "step": 36521
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.2066174745559692,
      "learning_rate": 1.0875260290589225e-05,
      "loss": 2.589,
      "step": 36522
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0473456382751465,
      "learning_rate": 1.0874850133237086e-05,
      "loss": 2.2691,
      "step": 36523
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1455949544906616,
      "learning_rate": 1.0874439974401843e-05,
      "loss": 2.2832,
      "step": 36524
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1495543718338013,
      "learning_rate": 1.0874029814084184e-05,
      "loss": 2.3201,
      "step": 36525
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0795553922653198,
      "learning_rate": 1.087361965228481e-05,
      "loss": 2.2635,
      "step": 36526
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1300392150878906,
      "learning_rate": 1.087320948900441e-05,
      "loss": 2.4484,
      "step": 36527
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.2559772729873657,
      "learning_rate": 1.0872799324243686e-05,
      "loss": 2.6237,
      "step": 36528
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.112248182296753,
      "learning_rate": 1.0872389158003326e-05,
      "loss": 2.2099,
      "step": 36529
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.3945207595825195,
      "learning_rate": 1.0871978990284034e-05,
      "loss": 2.3378,
      "step": 36530
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.171165943145752,
      "learning_rate": 1.0871568821086497e-05,
      "loss": 2.0269,
      "step": 36531
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1050676107406616,
      "learning_rate": 1.0871158650411418e-05,
      "loss": 2.3943,
      "step": 36532
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1125575304031372,
      "learning_rate": 1.0870748478259485e-05,
      "loss": 2.4029,
      "step": 36533
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.100968360900879,
      "learning_rate": 1.0870338304631399e-05,
      "loss": 2.4374,
      "step": 36534
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.9823750853538513,
      "learning_rate": 1.0869928129527852e-05,
      "loss": 2.2626,
      "step": 36535
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1257433891296387,
      "learning_rate": 1.0869517952949541e-05,
      "loss": 2.2914,
      "step": 36536
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0478012561798096,
      "learning_rate": 1.0869107774897161e-05,
      "loss": 2.2306,
      "step": 36537
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0519758462905884,
      "learning_rate": 1.0868697595371407e-05,
      "loss": 2.4282,
      "step": 36538
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0515819787979126,
      "learning_rate": 1.0868287414372973e-05,
      "loss": 2.6151,
      "step": 36539
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0060454607009888,
      "learning_rate": 1.086787723190256e-05,
      "loss": 2.3466,
      "step": 36540
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.973110556602478,
      "learning_rate": 1.0867467047960856e-05,
      "loss": 2.4223,
      "step": 36541
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.9764864444732666,
      "learning_rate": 1.0867056862548562e-05,
      "loss": 2.0403,
      "step": 36542
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.000542163848877,
      "learning_rate": 1.0866646675666369e-05,
      "loss": 2.3875,
      "step": 36543
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.108301043510437,
      "learning_rate": 1.0866236487314974e-05,
      "loss": 2.0633,
      "step": 36544
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.9803791046142578,
      "learning_rate": 1.0865826297495077e-05,
      "loss": 2.2626,
      "step": 36545
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0840321779251099,
      "learning_rate": 1.0865416106207365e-05,
      "loss": 2.4686,
      "step": 36546
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.2745740413665771,
      "learning_rate": 1.086500591345254e-05,
      "loss": 2.4985,
      "step": 36547
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.076289176940918,
      "learning_rate": 1.0864595719231292e-05,
      "loss": 2.338,
      "step": 36548
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1297612190246582,
      "learning_rate": 1.0864185523544321e-05,
      "loss": 2.5011,
      "step": 36549
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.054445743560791,
      "learning_rate": 1.0863775326392323e-05,
      "loss": 2.3717,
      "step": 36550
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0276641845703125,
      "learning_rate": 1.0863365127775988e-05,
      "loss": 2.2388,
      "step": 36551
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1400283575057983,
      "learning_rate": 1.0862954927696018e-05,
      "loss": 2.4381,
      "step": 36552
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0513075590133667,
      "learning_rate": 1.0862544726153104e-05,
      "loss": 2.4523,
      "step": 36553
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0819134712219238,
      "learning_rate": 1.0862134523147942e-05,
      "loss": 2.3253,
      "step": 36554
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1613482236862183,
      "learning_rate": 1.0861724318681225e-05,
      "loss": 2.4279,
      "step": 36555
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.03502357006073,
      "learning_rate": 1.0861314112753655e-05,
      "loss": 2.1313,
      "step": 36556
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1035178899765015,
      "learning_rate": 1.0860903905365922e-05,
      "loss": 2.2608,
      "step": 36557
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.2301386594772339,
      "learning_rate": 1.0860493696518724e-05,
      "loss": 2.4718,
      "step": 36558
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1612061262130737,
      "learning_rate": 1.0860083486212756e-05,
      "loss": 2.2345,
      "step": 36559
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0372340679168701,
      "learning_rate": 1.0859673274448712e-05,
      "loss": 2.3484,
      "step": 36560
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.02773118019104,
      "learning_rate": 1.0859263061227287e-05,
      "loss": 2.2921,
      "step": 36561
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1085830926895142,
      "learning_rate": 1.0858852846549179e-05,
      "loss": 2.3796,
      "step": 36562
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0630109310150146,
      "learning_rate": 1.0858442630415083e-05,
      "loss": 2.3433,
      "step": 36563
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1485365629196167,
      "learning_rate": 1.0858032412825694e-05,
      "loss": 2.1032,
      "step": 36564
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1441071033477783,
      "learning_rate": 1.0857622193781705e-05,
      "loss": 2.3605,
      "step": 36565
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.3993351459503174,
      "learning_rate": 1.0857211973283815e-05,
      "loss": 2.2598,
      "step": 36566
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.390784502029419,
      "learning_rate": 1.0856801751332715e-05,
      "loss": 2.3792,
      "step": 36567
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.138535499572754,
      "learning_rate": 1.0856391527929109e-05,
      "loss": 2.4073,
      "step": 36568
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.001327395439148,
      "learning_rate": 1.0855981303073681e-05,
      "loss": 2.2317,
      "step": 36569
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.045137882232666,
      "learning_rate": 1.0855571076767135e-05,
      "loss": 2.4366,
      "step": 36570
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.098320722579956,
      "learning_rate": 1.0855160849010162e-05,
      "loss": 2.0916,
      "step": 36571
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1313356161117554,
      "learning_rate": 1.0854750619803462e-05,
      "loss": 2.2581,
      "step": 36572
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1153124570846558,
      "learning_rate": 1.0854340389147725e-05,
      "loss": 2.4471,
      "step": 36573
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.2159944772720337,
      "learning_rate": 1.085393015704365e-05,
      "loss": 2.4075,
      "step": 36574
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0646721124649048,
      "learning_rate": 1.085351992349193e-05,
      "loss": 2.5968,
      "step": 36575
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1138147115707397,
      "learning_rate": 1.0853109688493264e-05,
      "loss": 2.294,
      "step": 36576
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0848419666290283,
      "learning_rate": 1.0852699452048342e-05,
      "loss": 2.3044,
      "step": 36577
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1265689134597778,
      "learning_rate": 1.085228921415787e-05,
      "loss": 2.304,
      "step": 36578
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.00836980342865,
      "learning_rate": 1.085187897482253e-05,
      "loss": 2.3391,
      "step": 36579
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1202878952026367,
      "learning_rate": 1.0851468734043025e-05,
      "loss": 2.4447,
      "step": 36580
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0843002796173096,
      "learning_rate": 1.085105849182005e-05,
      "loss": 2.2758,
      "step": 36581
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.131968379020691,
      "learning_rate": 1.0850648248154298e-05,
      "loss": 2.2091,
      "step": 36582
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1152602434158325,
      "learning_rate": 1.0850238003046466e-05,
      "loss": 2.2173,
      "step": 36583
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1444628238677979,
      "learning_rate": 1.0849827756497253e-05,
      "loss": 2.2663,
      "step": 36584
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0518120527267456,
      "learning_rate": 1.0849417508507351e-05,
      "loss": 2.4094,
      "step": 36585
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1804015636444092,
      "learning_rate": 1.0849007259077453e-05,
      "loss": 2.4982,
      "step": 36586
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1028754711151123,
      "learning_rate": 1.0848597008208259e-05,
      "loss": 2.2059,
      "step": 36587
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0293222665786743,
      "learning_rate": 1.0848186755900461e-05,
      "loss": 2.4588,
      "step": 36588
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0466411113739014,
      "learning_rate": 1.0847776502154756e-05,
      "loss": 2.4377,
      "step": 36589
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.238877296447754,
      "learning_rate": 1.084736624697184e-05,
      "loss": 2.4438,
      "step": 36590
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.2968436479568481,
      "learning_rate": 1.0846955990352408e-05,
      "loss": 2.4378,
      "step": 36591
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.3304038047790527,
      "learning_rate": 1.0846545732297157e-05,
      "loss": 2.4906,
      "step": 36592
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.9828941822052002,
      "learning_rate": 1.084613547280678e-05,
      "loss": 2.2367,
      "step": 36593
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.9907087683677673,
      "learning_rate": 1.0845725211881975e-05,
      "loss": 2.3928,
      "step": 36594
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0731356143951416,
      "learning_rate": 1.0845314949523435e-05,
      "loss": 2.3763,
      "step": 36595
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0666550397872925,
      "learning_rate": 1.0844904685731856e-05,
      "loss": 2.2712,
      "step": 36596
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.9876519441604614,
      "learning_rate": 1.0844494420507934e-05,
      "loss": 2.457,
      "step": 36597
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.105370283126831,
      "learning_rate": 1.0844084153852366e-05,
      "loss": 2.2425,
      "step": 36598
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.079999566078186,
      "learning_rate": 1.0843673885765847e-05,
      "loss": 2.3603,
      "step": 36599
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0218873023986816,
      "learning_rate": 1.0843263616249067e-05,
      "loss": 2.4383,
      "step": 36600
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.970719575881958,
      "learning_rate": 1.084285334530273e-05,
      "loss": 2.134,
      "step": 36601
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1359691619873047,
      "learning_rate": 1.0842443072927525e-05,
      "loss": 2.2297,
      "step": 36602
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.048853874206543,
      "learning_rate": 1.0842032799124152e-05,
      "loss": 2.2251,
      "step": 36603
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.9869097471237183,
      "learning_rate": 1.0841622523893303e-05,
      "loss": 2.2545,
      "step": 36604
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1366921663284302,
      "learning_rate": 1.084121224723568e-05,
      "loss": 2.4052,
      "step": 36605
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.258881688117981,
      "learning_rate": 1.0840801969151968e-05,
      "loss": 2.1066,
      "step": 36606
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0230450630187988,
      "learning_rate": 1.0840391689642871e-05,
      "loss": 2.3574,
      "step": 36607
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1686488389968872,
      "learning_rate": 1.0839981408709082e-05,
      "loss": 2.452,
      "step": 36608
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1127705574035645,
      "learning_rate": 1.0839571126351296e-05,
      "loss": 2.2392,
      "step": 36609
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0610378980636597,
      "learning_rate": 1.0839160842570207e-05,
      "loss": 2.1119,
      "step": 36610
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0062878131866455,
      "learning_rate": 1.0838750557366518e-05,
      "loss": 2.2798,
      "step": 36611
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.076480746269226,
      "learning_rate": 1.0838340270740912e-05,
      "loss": 2.3575,
      "step": 36612
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0081865787506104,
      "learning_rate": 1.0837929982694096e-05,
      "loss": 2.1721,
      "step": 36613
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0493900775909424,
      "learning_rate": 1.083751969322676e-05,
      "loss": 2.5066,
      "step": 36614
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0166213512420654,
      "learning_rate": 1.0837109402339601e-05,
      "loss": 2.1882,
      "step": 36615
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1169811487197876,
      "learning_rate": 1.0836699110033313e-05,
      "loss": 2.4674,
      "step": 36616
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1872875690460205,
      "learning_rate": 1.0836288816308591e-05,
      "loss": 2.3555,
      "step": 36617
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.3330329656600952,
      "learning_rate": 1.0835878521166139e-05,
      "loss": 2.3298,
      "step": 36618
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.2330865859985352,
      "learning_rate": 1.083546822460664e-05,
      "loss": 2.2109,
      "step": 36619
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0741651058197021,
      "learning_rate": 1.0835057926630796e-05,
      "loss": 2.2589,
      "step": 36620
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.9351814389228821,
      "learning_rate": 1.0834647627239303e-05,
      "loss": 2.6599,
      "step": 36621
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0625947713851929,
      "learning_rate": 1.0834237326432857e-05,
      "loss": 2.3396,
      "step": 36622
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1037495136260986,
      "learning_rate": 1.083382702421215e-05,
      "loss": 2.3408,
      "step": 36623
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.9501150846481323,
      "learning_rate": 1.0833416720577883e-05,
      "loss": 2.3751,
      "step": 36624
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0885018110275269,
      "learning_rate": 1.0833006415530744e-05,
      "loss": 2.4993,
      "step": 36625
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0829700231552124,
      "learning_rate": 1.0832596109071436e-05,
      "loss": 2.2754,
      "step": 36626
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0103760957717896,
      "learning_rate": 1.0832185801200652e-05,
      "loss": 2.421,
      "step": 36627
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1190658807754517,
      "learning_rate": 1.0831775491919085e-05,
      "loss": 2.292,
      "step": 36628
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0524052381515503,
      "learning_rate": 1.0831365181227434e-05,
      "loss": 2.5111,
      "step": 36629
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0069880485534668,
      "learning_rate": 1.0830954869126392e-05,
      "loss": 2.338,
      "step": 36630
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.9681001901626587,
      "learning_rate": 1.0830544555616655e-05,
      "loss": 2.1933,
      "step": 36631
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.9855614304542542,
      "learning_rate": 1.0830134240698921e-05,
      "loss": 2.1429,
      "step": 36632
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0463035106658936,
      "learning_rate": 1.0829723924373883e-05,
      "loss": 2.3465,
      "step": 36633
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0487357378005981,
      "learning_rate": 1.0829313606642238e-05,
      "loss": 2.2218,
      "step": 36634
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0458898544311523,
      "learning_rate": 1.0828903287504682e-05,
      "loss": 2.2101,
      "step": 36635
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.065216302871704,
      "learning_rate": 1.0828492966961907e-05,
      "loss": 2.2268,
      "step": 36636
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.295494794845581,
      "learning_rate": 1.0828082645014615e-05,
      "loss": 2.2654,
      "step": 36637
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.067004919052124,
      "learning_rate": 1.0827672321663497e-05,
      "loss": 2.43,
      "step": 36638
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0631303787231445,
      "learning_rate": 1.0827261996909248e-05,
      "loss": 2.2917,
      "step": 36639
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1138957738876343,
      "learning_rate": 1.0826851670752567e-05,
      "loss": 2.3641,
      "step": 36640
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.9719521403312683,
      "learning_rate": 1.0826441343194148e-05,
      "loss": 2.3339,
      "step": 36641
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1039037704467773,
      "learning_rate": 1.0826031014234684e-05,
      "loss": 2.4851,
      "step": 36642
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0523608922958374,
      "learning_rate": 1.0825620683874874e-05,
      "loss": 2.3614,
      "step": 36643
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0573720932006836,
      "learning_rate": 1.0825210352115416e-05,
      "loss": 2.1423,
      "step": 36644
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.173810601234436,
      "learning_rate": 1.0824800018957e-05,
      "loss": 2.3365,
      "step": 36645
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.9911524653434753,
      "learning_rate": 1.0824389684400323e-05,
      "loss": 2.4016,
      "step": 36646
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1281214952468872,
      "learning_rate": 1.082397934844608e-05,
      "loss": 2.2811,
      "step": 36647
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0763885974884033,
      "learning_rate": 1.0823569011094972e-05,
      "loss": 2.4061,
      "step": 36648
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1767189502716064,
      "learning_rate": 1.082315867234769e-05,
      "loss": 2.3181,
      "step": 36649
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.135888934135437,
      "learning_rate": 1.0822748332204928e-05,
      "loss": 2.1785,
      "step": 36650
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1452280282974243,
      "learning_rate": 1.0822337990667388e-05,
      "loss": 2.3724,
      "step": 36651
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1341766119003296,
      "learning_rate": 1.0821927647735759e-05,
      "loss": 2.4,
      "step": 36652
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.058557391166687,
      "learning_rate": 1.082151730341074e-05,
      "loss": 2.4086,
      "step": 36653
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.2070229053497314,
      "learning_rate": 1.0821106957693028e-05,
      "loss": 2.3445,
      "step": 36654
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1513757705688477,
      "learning_rate": 1.0820696610583314e-05,
      "loss": 2.3092,
      "step": 36655
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0096479654312134,
      "learning_rate": 1.0820286262082298e-05,
      "loss": 2.2647,
      "step": 36656
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.9838346242904663,
      "learning_rate": 1.0819875912190672e-05,
      "loss": 2.0486,
      "step": 36657
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0488977432250977,
      "learning_rate": 1.0819465560909137e-05,
      "loss": 2.2095,
      "step": 36658
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1282069683074951,
      "learning_rate": 1.0819055208238383e-05,
      "loss": 2.3769,
      "step": 36659
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1341060400009155,
      "learning_rate": 1.0818644854179107e-05,
      "loss": 2.527,
      "step": 36660
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.2334423065185547,
      "learning_rate": 1.0818234498732007e-05,
      "loss": 2.4227,
      "step": 36661
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0571035146713257,
      "learning_rate": 1.0817824141897777e-05,
      "loss": 2.2649,
      "step": 36662
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.3436530828475952,
      "learning_rate": 1.0817413783677114e-05,
      "loss": 2.2586,
      "step": 36663
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1696518659591675,
      "learning_rate": 1.081700342407071e-05,
      "loss": 2.4153,
      "step": 36664
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0178101062774658,
      "learning_rate": 1.0816593063079265e-05,
      "loss": 2.3521,
      "step": 36665
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0053695440292358,
      "learning_rate": 1.0816182700703475e-05,
      "loss": 2.0858,
      "step": 36666
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0093770027160645,
      "learning_rate": 1.081577233694403e-05,
      "loss": 2.3335,
      "step": 36667
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.9802722930908203,
      "learning_rate": 1.081536197180163e-05,
      "loss": 2.4614,
      "step": 36668
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.10678231716156,
      "learning_rate": 1.081495160527697e-05,
      "loss": 2.3529,
      "step": 36669
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1382924318313599,
      "learning_rate": 1.0814541237370748e-05,
      "loss": 2.3772,
      "step": 36670
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0461689233779907,
      "learning_rate": 1.0814130868083658e-05,
      "loss": 2.4682,
      "step": 36671
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0041857957839966,
      "learning_rate": 1.0813720497416394e-05,
      "loss": 2.2977,
      "step": 36672
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.046854853630066,
      "learning_rate": 1.0813310125369649e-05,
      "loss": 2.2257,
      "step": 36673
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1815091371536255,
      "learning_rate": 1.0812899751944126e-05,
      "loss": 2.4277,
      "step": 36674
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0618321895599365,
      "learning_rate": 1.0812489377140515e-05,
      "loss": 2.2214,
      "step": 36675
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.9427722096443176,
      "learning_rate": 1.0812079000959517e-05,
      "loss": 2.2614,
      "step": 36676
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0220075845718384,
      "learning_rate": 1.0811668623401824e-05,
      "loss": 2.2464,
      "step": 36677
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.982309103012085,
      "learning_rate": 1.081125824446813e-05,
      "loss": 2.2752,
      "step": 36678
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.9697916507720947,
      "learning_rate": 1.0810847864159134e-05,
      "loss": 2.3431,
      "step": 36679
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0089765787124634,
      "learning_rate": 1.081043748247553e-05,
      "loss": 2.4314,
      "step": 36680
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.975456714630127,
      "learning_rate": 1.0810027099418014e-05,
      "loss": 2.2279,
      "step": 36681
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.152161717414856,
      "learning_rate": 1.0809616714987285e-05,
      "loss": 2.1515,
      "step": 36682
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0927594900131226,
      "learning_rate": 1.0809206329184035e-05,
      "loss": 2.4931,
      "step": 36683
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.237899899482727,
      "learning_rate": 1.0808795942008958e-05,
      "loss": 2.1776,
      "step": 36684
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.083531379699707,
      "learning_rate": 1.0808385553462753e-05,
      "loss": 2.5543,
      "step": 36685
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.2432115077972412,
      "learning_rate": 1.0807975163546118e-05,
      "loss": 2.5584,
      "step": 36686
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.9546948075294495,
      "learning_rate": 1.0807564772259743e-05,
      "loss": 2.1791,
      "step": 36687
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.166365623474121,
      "learning_rate": 1.0807154379604325e-05,
      "loss": 2.4191,
      "step": 36688
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0378526449203491,
      "learning_rate": 1.0806743985580563e-05,
      "loss": 2.3696,
      "step": 36689
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.134871006011963,
      "learning_rate": 1.0806333590189151e-05,
      "loss": 2.416,
      "step": 36690
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0044214725494385,
      "learning_rate": 1.0805923193430786e-05,
      "loss": 2.3425,
      "step": 36691
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1058694124221802,
      "learning_rate": 1.080551279530616e-05,
      "loss": 2.4284,
      "step": 36692
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1779621839523315,
      "learning_rate": 1.080510239581597e-05,
      "loss": 2.7399,
      "step": 36693
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0279266834259033,
      "learning_rate": 1.0804691994960913e-05,
      "loss": 2.4611,
      "step": 36694
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.080447793006897,
      "learning_rate": 1.0804281592741685e-05,
      "loss": 2.4569,
      "step": 36695
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.2169855833053589,
      "learning_rate": 1.0803871189158982e-05,
      "loss": 2.3361,
      "step": 36696
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.006115436553955,
      "learning_rate": 1.0803460784213499e-05,
      "loss": 2.0832,
      "step": 36697
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.2324544191360474,
      "learning_rate": 1.0803050377905933e-05,
      "loss": 2.456,
      "step": 36698
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.2748024463653564,
      "learning_rate": 1.0802639970236977e-05,
      "loss": 2.258,
      "step": 36699
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1715302467346191,
      "learning_rate": 1.0802229561207326e-05,
      "loss": 2.4876,
      "step": 36700
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0915719270706177,
      "learning_rate": 1.0801819150817681e-05,
      "loss": 2.5348,
      "step": 36701
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0534172058105469,
      "learning_rate": 1.0801408739068735e-05,
      "loss": 2.1694,
      "step": 36702
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0773138999938965,
      "learning_rate": 1.0800998325961184e-05,
      "loss": 2.4846,
      "step": 36703
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0714000463485718,
      "learning_rate": 1.0800587911495721e-05,
      "loss": 2.3222,
      "step": 36704
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.130759835243225,
      "learning_rate": 1.0800177495673046e-05,
      "loss": 2.4032,
      "step": 36705
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.033212423324585,
      "learning_rate": 1.079976707849385e-05,
      "loss": 2.3701,
      "step": 36706
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0403772592544556,
      "learning_rate": 1.0799356659958833e-05,
      "loss": 2.4561,
      "step": 36707
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0711983442306519,
      "learning_rate": 1.079894624006869e-05,
      "loss": 2.558,
      "step": 36708
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.018520712852478,
      "learning_rate": 1.0798535818824117e-05,
      "loss": 2.2121,
      "step": 36709
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0918269157409668,
      "learning_rate": 1.079812539622581e-05,
      "loss": 2.3148,
      "step": 36710
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.145963191986084,
      "learning_rate": 1.079771497227446e-05,
      "loss": 2.1575,
      "step": 36711
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.9776366353034973,
      "learning_rate": 1.0797304546970768e-05,
      "loss": 2.4095,
      "step": 36712
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0415399074554443,
      "learning_rate": 1.0796894120315428e-05,
      "loss": 2.5119,
      "step": 36713
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.2431117296218872,
      "learning_rate": 1.0796483692309136e-05,
      "loss": 2.3801,
      "step": 36714
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0476750135421753,
      "learning_rate": 1.0796073262952589e-05,
      "loss": 2.3624,
      "step": 36715
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.116402506828308,
      "learning_rate": 1.0795662832246481e-05,
      "loss": 2.3405,
      "step": 36716
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.14828622341156,
      "learning_rate": 1.0795252400191509e-05,
      "loss": 2.4971,
      "step": 36717
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0628360509872437,
      "learning_rate": 1.0794841966788367e-05,
      "loss": 2.257,
      "step": 36718
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1051291227340698,
      "learning_rate": 1.0794431532037753e-05,
      "loss": 2.4906,
      "step": 36719
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.2027806043624878,
      "learning_rate": 1.0794021095940362e-05,
      "loss": 2.4256,
      "step": 36720
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.2395906448364258,
      "learning_rate": 1.0793610658496886e-05,
      "loss": 2.3557,
      "step": 36721
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0270874500274658,
      "learning_rate": 1.0793200219708026e-05,
      "loss": 2.3308,
      "step": 36722
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.034704327583313,
      "learning_rate": 1.079278977957448e-05,
      "loss": 2.2968,
      "step": 36723
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1635397672653198,
      "learning_rate": 1.0792379338096937e-05,
      "loss": 2.3284,
      "step": 36724
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0230884552001953,
      "learning_rate": 1.0791968895276097e-05,
      "loss": 2.2602,
      "step": 36725
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0479685068130493,
      "learning_rate": 1.0791558451112651e-05,
      "loss": 2.2895,
      "step": 36726
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0580382347106934,
      "learning_rate": 1.0791148005607301e-05,
      "loss": 2.3318,
      "step": 36727
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0403234958648682,
      "learning_rate": 1.0790737558760741e-05,
      "loss": 2.4153,
      "step": 36728
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1087391376495361,
      "learning_rate": 1.0790327110573666e-05,
      "loss": 2.3836,
      "step": 36729
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1133382320404053,
      "learning_rate": 1.0789916661046772e-05,
      "loss": 2.3667,
      "step": 36730
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.2548843622207642,
      "learning_rate": 1.0789506210180754e-05,
      "loss": 2.4517,
      "step": 36731
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0490223169326782,
      "learning_rate": 1.078909575797631e-05,
      "loss": 2.3145,
      "step": 36732
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1866754293441772,
      "learning_rate": 1.0788685304434132e-05,
      "loss": 2.2712,
      "step": 36733
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1391682624816895,
      "learning_rate": 1.0788274849554919e-05,
      "loss": 2.3818,
      "step": 36734
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0684218406677246,
      "learning_rate": 1.0787864393339365e-05,
      "loss": 2.3411,
      "step": 36735
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1170637607574463,
      "learning_rate": 1.0787453935788167e-05,
      "loss": 2.5081,
      "step": 36736
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.136981725692749,
      "learning_rate": 1.0787043476902024e-05,
      "loss": 2.354,
      "step": 36737
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0845602750778198,
      "learning_rate": 1.0786633016681627e-05,
      "loss": 2.2913,
      "step": 36738
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.045507550239563,
      "learning_rate": 1.0786222555127672e-05,
      "loss": 2.3366,
      "step": 36739
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.6051154136657715,
      "learning_rate": 1.0785812092240857e-05,
      "loss": 2.231,
      "step": 36740
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1906870603561401,
      "learning_rate": 1.0785401628021879e-05,
      "loss": 2.3703,
      "step": 36741
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0855745077133179,
      "learning_rate": 1.078499116247143e-05,
      "loss": 2.4248,
      "step": 36742
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0538955926895142,
      "learning_rate": 1.0784580695590207e-05,
      "loss": 2.15,
      "step": 36743
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.29616379737854,
      "learning_rate": 1.0784170227378907e-05,
      "loss": 2.3692,
      "step": 36744
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.2339363098144531,
      "learning_rate": 1.0783759757838228e-05,
      "loss": 2.3479,
      "step": 36745
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0234217643737793,
      "learning_rate": 1.078334928696886e-05,
      "loss": 2.4233,
      "step": 36746
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.378416895866394,
      "learning_rate": 1.0782938814771504e-05,
      "loss": 2.4248,
      "step": 36747
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.9943331480026245,
      "learning_rate": 1.0782528341246854e-05,
      "loss": 2.2833,
      "step": 36748
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.9836732149124146,
      "learning_rate": 1.0782117866395605e-05,
      "loss": 2.4812,
      "step": 36749
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0017629861831665,
      "learning_rate": 1.0781707390218454e-05,
      "loss": 2.2821,
      "step": 36750
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.9683874845504761,
      "learning_rate": 1.0781296912716097e-05,
      "loss": 2.4936,
      "step": 36751
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1934012174606323,
      "learning_rate": 1.0780886433889231e-05,
      "loss": 2.3462,
      "step": 36752
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1612565517425537,
      "learning_rate": 1.0780475953738547e-05,
      "loss": 2.4717,
      "step": 36753
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1692156791687012,
      "learning_rate": 1.0780065472264747e-05,
      "loss": 2.1614,
      "step": 36754
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.010320782661438,
      "learning_rate": 1.0779654989468522e-05,
      "loss": 2.3893,
      "step": 36755
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1066704988479614,
      "learning_rate": 1.0779244505350575e-05,
      "loss": 2.5183,
      "step": 36756
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1424624919891357,
      "learning_rate": 1.077883401991159e-05,
      "loss": 2.2029,
      "step": 36757
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0651700496673584,
      "learning_rate": 1.0778423533152274e-05,
      "loss": 2.1242,
      "step": 36758
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.185303807258606,
      "learning_rate": 1.0778013045073316e-05,
      "loss": 2.4213,
      "step": 36759
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1160892248153687,
      "learning_rate": 1.0777602555675416e-05,
      "loss": 2.3546,
      "step": 36760
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1778233051300049,
      "learning_rate": 1.077719206495927e-05,
      "loss": 2.2899,
      "step": 36761
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.096024751663208,
      "learning_rate": 1.0776781572925572e-05,
      "loss": 2.5005,
      "step": 36762
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1667245626449585,
      "learning_rate": 1.0776371079575018e-05,
      "loss": 2.4492,
      "step": 36763
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0836960077285767,
      "learning_rate": 1.0775960584908302e-05,
      "loss": 2.1768,
      "step": 36764
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.063172698020935,
      "learning_rate": 1.0775550088926126e-05,
      "loss": 2.2435,
      "step": 36765
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.9622576236724854,
      "learning_rate": 1.0775139591629177e-05,
      "loss": 2.4122,
      "step": 36766
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.9520300030708313,
      "learning_rate": 1.077472909301816e-05,
      "loss": 2.3045,
      "step": 36767
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0590702295303345,
      "learning_rate": 1.0774318593093764e-05,
      "loss": 2.5976,
      "step": 36768
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.12699556350708,
      "learning_rate": 1.077390809185669e-05,
      "loss": 2.2104,
      "step": 36769
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.3202520608901978,
      "learning_rate": 1.0773497589307631e-05,
      "loss": 2.3188,
      "step": 36770
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1448255777359009,
      "learning_rate": 1.0773087085447282e-05,
      "loss": 2.517,
      "step": 36771
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1765562295913696,
      "learning_rate": 1.0772676580276345e-05,
      "loss": 2.206,
      "step": 36772
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.9736366271972656,
      "learning_rate": 1.0772266073795508e-05,
      "loss": 2.3317,
      "step": 36773
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0517281293869019,
      "learning_rate": 1.0771855566005469e-05,
      "loss": 2.4334,
      "step": 36774
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0001364946365356,
      "learning_rate": 1.0771445056906926e-05,
      "loss": 2.1667,
      "step": 36775
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0931299924850464,
      "learning_rate": 1.0771034546500576e-05,
      "loss": 2.4865,
      "step": 36776
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0287226438522339,
      "learning_rate": 1.0770624034787112e-05,
      "loss": 2.2041,
      "step": 36777
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0360674858093262,
      "learning_rate": 1.077021352176723e-05,
      "loss": 2.5062,
      "step": 36778
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.2984391450881958,
      "learning_rate": 1.0769803007441626e-05,
      "loss": 2.4423,
      "step": 36779
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.09343683719635,
      "learning_rate": 1.0769392491810997e-05,
      "loss": 2.2932,
      "step": 36780
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0978426933288574,
      "learning_rate": 1.0768981974876041e-05,
      "loss": 2.5222,
      "step": 36781
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.2298617362976074,
      "learning_rate": 1.0768571456637449e-05,
      "loss": 2.389,
      "step": 36782
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.2313143014907837,
      "learning_rate": 1.0768160937095925e-05,
      "loss": 2.5126,
      "step": 36783
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0106192827224731,
      "learning_rate": 1.0767750416252153e-05,
      "loss": 2.2303,
      "step": 36784
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1567778587341309,
      "learning_rate": 1.0767339894106838e-05,
      "loss": 2.5802,
      "step": 36785
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.9902662634849548,
      "learning_rate": 1.0766929370660675e-05,
      "loss": 2.5709,
      "step": 36786
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1158835887908936,
      "learning_rate": 1.0766518845914356e-05,
      "loss": 2.576,
      "step": 36787
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0252102613449097,
      "learning_rate": 1.076610831986858e-05,
      "loss": 2.3651,
      "step": 36788
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.9854553937911987,
      "learning_rate": 1.0765697792524045e-05,
      "loss": 2.3285,
      "step": 36789
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1252644062042236,
      "learning_rate": 1.0765287263881444e-05,
      "loss": 2.5045,
      "step": 36790
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0388002395629883,
      "learning_rate": 1.0764876733941469e-05,
      "loss": 2.3056,
      "step": 36791
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.043940782546997,
      "learning_rate": 1.0764466202704822e-05,
      "loss": 2.5718,
      "step": 36792
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.9970881938934326,
      "learning_rate": 1.0764055670172198e-05,
      "loss": 2.2609,
      "step": 36793
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0250329971313477,
      "learning_rate": 1.0763645136344291e-05,
      "loss": 2.3965,
      "step": 36794
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1912596225738525,
      "learning_rate": 1.0763234601221799e-05,
      "loss": 2.5246,
      "step": 36795
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.9923447370529175,
      "learning_rate": 1.0762824064805419e-05,
      "loss": 2.1124,
      "step": 36796
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.041330337524414,
      "learning_rate": 1.0762413527095843e-05,
      "loss": 2.4491,
      "step": 36797
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.9797162413597107,
      "learning_rate": 1.0762002988093769e-05,
      "loss": 2.3206,
      "step": 36798
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.031970739364624,
      "learning_rate": 1.0761592447799894e-05,
      "loss": 2.3409,
      "step": 36799
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1101516485214233,
      "learning_rate": 1.0761181906214913e-05,
      "loss": 2.2052,
      "step": 36800
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0571035146713257,
      "learning_rate": 1.0760771363339519e-05,
      "loss": 2.4682,
      "step": 36801
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0537009239196777,
      "learning_rate": 1.0760360819174413e-05,
      "loss": 2.3245,
      "step": 36802
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0934113264083862,
      "learning_rate": 1.075995027372029e-05,
      "loss": 2.3858,
      "step": 36803
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.033998727798462,
      "learning_rate": 1.0759539726977844e-05,
      "loss": 2.4824,
      "step": 36804
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0961382389068604,
      "learning_rate": 1.0759129178947771e-05,
      "loss": 2.4919,
      "step": 36805
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.134881854057312,
      "learning_rate": 1.0758718629630769e-05,
      "loss": 2.3322,
      "step": 36806
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1501686573028564,
      "learning_rate": 1.075830807902753e-05,
      "loss": 2.394,
      "step": 36807
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1305885314941406,
      "learning_rate": 1.0757897527138757e-05,
      "loss": 2.3834,
      "step": 36808
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0460940599441528,
      "learning_rate": 1.075748697396514e-05,
      "loss": 2.5097,
      "step": 36809
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1226707696914673,
      "learning_rate": 1.0757076419507377e-05,
      "loss": 2.3281,
      "step": 36810
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1575820446014404,
      "learning_rate": 1.0756665863766165e-05,
      "loss": 2.4867,
      "step": 36811
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.988214910030365,
      "learning_rate": 1.0756255306742197e-05,
      "loss": 2.3556,
      "step": 36812
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.112997055053711,
      "learning_rate": 1.075584474843617e-05,
      "loss": 2.3816,
      "step": 36813
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0790767669677734,
      "learning_rate": 1.075543418884878e-05,
      "loss": 2.4104,
      "step": 36814
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1152911186218262,
      "learning_rate": 1.0755023627980728e-05,
      "loss": 2.4543,
      "step": 36815
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0026166439056396,
      "learning_rate": 1.0754613065832706e-05,
      "loss": 2.3633,
      "step": 36816
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0435417890548706,
      "learning_rate": 1.0754202502405407e-05,
      "loss": 2.2633,
      "step": 36817
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.101837396621704,
      "learning_rate": 1.075379193769953e-05,
      "loss": 2.5191,
      "step": 36818
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0182485580444336,
      "learning_rate": 1.0753381371715771e-05,
      "loss": 2.3546,
      "step": 36819
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.052186369895935,
      "learning_rate": 1.0752970804454827e-05,
      "loss": 2.255,
      "step": 36820
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.031424641609192,
      "learning_rate": 1.0752560235917393e-05,
      "loss": 2.5291,
      "step": 36821
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0597065687179565,
      "learning_rate": 1.0752149666104166e-05,
      "loss": 2.3065,
      "step": 36822
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.092970609664917,
      "learning_rate": 1.075173909501584e-05,
      "loss": 2.3013,
      "step": 36823
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.9847490191459656,
      "learning_rate": 1.0751328522653111e-05,
      "loss": 2.0949,
      "step": 36824
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1040802001953125,
      "learning_rate": 1.0750917949016677e-05,
      "loss": 2.1911,
      "step": 36825
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0773123502731323,
      "learning_rate": 1.0750507374107233e-05,
      "loss": 2.5278,
      "step": 36826
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0423622131347656,
      "learning_rate": 1.0750096797925475e-05,
      "loss": 2.3543,
      "step": 36827
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.047244906425476,
      "learning_rate": 1.07496862204721e-05,
      "loss": 2.2924,
      "step": 36828
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.9390963912010193,
      "learning_rate": 1.0749275641747803e-05,
      "loss": 2.394,
      "step": 36829
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.080232858657837,
      "learning_rate": 1.074886506175328e-05,
      "loss": 2.4936,
      "step": 36830
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0965520143508911,
      "learning_rate": 1.074845448048923e-05,
      "loss": 2.5259,
      "step": 36831
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.993371307849884,
      "learning_rate": 1.0748043897956341e-05,
      "loss": 2.3178,
      "step": 36832
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.039164423942566,
      "learning_rate": 1.0747633314155318e-05,
      "loss": 2.5583,
      "step": 36833
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.2458380460739136,
      "learning_rate": 1.0747222729086853e-05,
      "loss": 2.4888,
      "step": 36834
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0138272047042847,
      "learning_rate": 1.0746812142751643e-05,
      "loss": 2.3996,
      "step": 36835
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0486875772476196,
      "learning_rate": 1.0746401555150383e-05,
      "loss": 2.3202,
      "step": 36836
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0612069368362427,
      "learning_rate": 1.0745990966283768e-05,
      "loss": 2.4156,
      "step": 36837
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.047747015953064,
      "learning_rate": 1.0745580376152498e-05,
      "loss": 2.236,
      "step": 36838
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0428217649459839,
      "learning_rate": 1.0745169784757264e-05,
      "loss": 2.4832,
      "step": 36839
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0818606615066528,
      "learning_rate": 1.0744759192098765e-05,
      "loss": 2.4183,
      "step": 36840
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.024444341659546,
      "learning_rate": 1.07443485981777e-05,
      "loss": 2.4587,
      "step": 36841
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0449776649475098,
      "learning_rate": 1.0743938002994762e-05,
      "loss": 2.259,
      "step": 36842
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.059672474861145,
      "learning_rate": 1.0743527406550644e-05,
      "loss": 2.3625,
      "step": 36843
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1037321090698242,
      "learning_rate": 1.0743116808846048e-05,
      "loss": 2.2986,
      "step": 36844
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.080021858215332,
      "learning_rate": 1.0742706209881665e-05,
      "loss": 2.37,
      "step": 36845
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0995538234710693,
      "learning_rate": 1.0742295609658193e-05,
      "loss": 2.3344,
      "step": 36846
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0205501317977905,
      "learning_rate": 1.074188500817633e-05,
      "loss": 2.3834,
      "step": 36847
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0731687545776367,
      "learning_rate": 1.074147440543677e-05,
      "loss": 2.279,
      "step": 36848
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.139816164970398,
      "learning_rate": 1.074106380144021e-05,
      "loss": 2.2981,
      "step": 36849
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0729515552520752,
      "learning_rate": 1.0740653196187345e-05,
      "loss": 2.5537,
      "step": 36850
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0749160051345825,
      "learning_rate": 1.074024258967887e-05,
      "loss": 2.6206,
      "step": 36851
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.2399790287017822,
      "learning_rate": 1.0739831981915485e-05,
      "loss": 2.4059,
      "step": 36852
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0393370389938354,
      "learning_rate": 1.0739421372897883e-05,
      "loss": 2.2929,
      "step": 36853
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.022658109664917,
      "learning_rate": 1.073901076262676e-05,
      "loss": 2.4427,
      "step": 36854
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.107788324356079,
      "learning_rate": 1.0738600151102818e-05,
      "loss": 2.6429,
      "step": 36855
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1148808002471924,
      "learning_rate": 1.0738189538326743e-05,
      "loss": 2.4386,
      "step": 36856
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.9944809079170227,
      "learning_rate": 1.0737778924299237e-05,
      "loss": 2.3944,
      "step": 36857
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5672578811645508,
      "learning_rate": 1.0737368309020996e-05,
      "loss": 2.4619,
      "step": 36858
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0087815523147583,
      "learning_rate": 1.0736957692492717e-05,
      "loss": 2.3886,
      "step": 36859
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1152245998382568,
      "learning_rate": 1.0736547074715094e-05,
      "loss": 2.3734,
      "step": 36860
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0949862003326416,
      "learning_rate": 1.0736136455688822e-05,
      "loss": 2.3394,
      "step": 36861
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0826692581176758,
      "learning_rate": 1.0735725835414599e-05,
      "loss": 2.3729,
      "step": 36862
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0471757650375366,
      "learning_rate": 1.0735315213893122e-05,
      "loss": 2.3672,
      "step": 36863
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.9741603136062622,
      "learning_rate": 1.0734904591125085e-05,
      "loss": 2.3599,
      "step": 36864
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.033728837966919,
      "learning_rate": 1.0734493967111185e-05,
      "loss": 2.5691,
      "step": 36865
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0861817598342896,
      "learning_rate": 1.0734083341852118e-05,
      "loss": 2.2481,
      "step": 36866
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1212571859359741,
      "learning_rate": 1.0733672715348579e-05,
      "loss": 2.331,
      "step": 36867
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1191320419311523,
      "learning_rate": 1.0733262087601271e-05,
      "loss": 2.4026,
      "step": 36868
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.032137155532837,
      "learning_rate": 1.073285145861088e-05,
      "loss": 2.2211,
      "step": 36869
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0481985807418823,
      "learning_rate": 1.0732440828378107e-05,
      "loss": 2.4454,
      "step": 36870
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0343722105026245,
      "learning_rate": 1.073203019690365e-05,
      "loss": 2.249,
      "step": 36871
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0608712434768677,
      "learning_rate": 1.07316195641882e-05,
      "loss": 2.5238,
      "step": 36872
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0393037796020508,
      "learning_rate": 1.073120893023246e-05,
      "loss": 2.3441,
      "step": 36873
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.171265721321106,
      "learning_rate": 1.0730798295037119e-05,
      "loss": 2.4656,
      "step": 36874
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1776998043060303,
      "learning_rate": 1.073038765860288e-05,
      "loss": 2.4727,
      "step": 36875
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1779131889343262,
      "learning_rate": 1.072997702093043e-05,
      "loss": 2.5206,
      "step": 36876
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0701322555541992,
      "learning_rate": 1.0729566382020475e-05,
      "loss": 2.1677,
      "step": 36877
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1330711841583252,
      "learning_rate": 1.0729155741873704e-05,
      "loss": 2.3938,
      "step": 36878
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1443860530853271,
      "learning_rate": 1.0728745100490817e-05,
      "loss": 2.1194,
      "step": 36879
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.06468665599823,
      "learning_rate": 1.0728334457872511e-05,
      "loss": 2.4898,
      "step": 36880
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0097689628601074,
      "learning_rate": 1.0727923814019478e-05,
      "loss": 2.3694,
      "step": 36881
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0593401193618774,
      "learning_rate": 1.072751316893242e-05,
      "loss": 2.3573,
      "step": 36882
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0918101072311401,
      "learning_rate": 1.0727102522612025e-05,
      "loss": 2.4684,
      "step": 36883
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0771863460540771,
      "learning_rate": 1.0726691875058996e-05,
      "loss": 2.39,
      "step": 36884
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1014891862869263,
      "learning_rate": 1.0726281226274027e-05,
      "loss": 2.4878,
      "step": 36885
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0506525039672852,
      "learning_rate": 1.0725870576257813e-05,
      "loss": 2.3264,
      "step": 36886
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0990229845046997,
      "learning_rate": 1.0725459925011055e-05,
      "loss": 2.4225,
      "step": 36887
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1571019887924194,
      "learning_rate": 1.072504927253444e-05,
      "loss": 2.414,
      "step": 36888
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.170353651046753,
      "learning_rate": 1.0724638618828673e-05,
      "loss": 2.1036,
      "step": 36889
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0876758098602295,
      "learning_rate": 1.0724227963894446e-05,
      "loss": 2.5178,
      "step": 36890
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.9887635707855225,
      "learning_rate": 1.0723817307732456e-05,
      "loss": 2.1798,
      "step": 36891
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1376093626022339,
      "learning_rate": 1.0723406650343397e-05,
      "loss": 2.163,
      "step": 36892
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0554651021957397,
      "learning_rate": 1.0722995991727969e-05,
      "loss": 2.3427,
      "step": 36893
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0365194082260132,
      "learning_rate": 1.0722585331886865e-05,
      "loss": 2.0107,
      "step": 36894
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0120108127593994,
      "learning_rate": 1.0722174670820786e-05,
      "loss": 2.3012,
      "step": 36895
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0215970277786255,
      "learning_rate": 1.0721764008530424e-05,
      "loss": 2.3195,
      "step": 36896
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1262131929397583,
      "learning_rate": 1.0721353345016474e-05,
      "loss": 2.3918,
      "step": 36897
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0683292150497437,
      "learning_rate": 1.0720942680279633e-05,
      "loss": 2.4694,
      "step": 36898
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0319344997406006,
      "learning_rate": 1.0720532014320602e-05,
      "loss": 2.4661,
      "step": 36899
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.06244695186615,
      "learning_rate": 1.0720121347140072e-05,
      "loss": 2.4985,
      "step": 36900
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0155632495880127,
      "learning_rate": 1.0719710678738742e-05,
      "loss": 2.4053,
      "step": 36901
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.2096036672592163,
      "learning_rate": 1.0719300009117304e-05,
      "loss": 2.3137,
      "step": 36902
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1057409048080444,
      "learning_rate": 1.071888933827646e-05,
      "loss": 2.5951,
      "step": 36903
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.9796444773674011,
      "learning_rate": 1.0718478666216901e-05,
      "loss": 2.3229,
      "step": 36904
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.043100357055664,
      "learning_rate": 1.0718067992939326e-05,
      "loss": 2.2067,
      "step": 36905
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0213358402252197,
      "learning_rate": 1.071765731844443e-05,
      "loss": 2.4459,
      "step": 36906
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0577030181884766,
      "learning_rate": 1.071724664273291e-05,
      "loss": 2.515,
      "step": 36907
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0913516283035278,
      "learning_rate": 1.0716835965805465e-05,
      "loss": 2.2033,
      "step": 36908
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1750050783157349,
      "learning_rate": 1.0716425287662787e-05,
      "loss": 2.4098,
      "step": 36909
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0446691513061523,
      "learning_rate": 1.0716014608305571e-05,
      "loss": 2.3753,
      "step": 36910
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0185632705688477,
      "learning_rate": 1.0715603927734519e-05,
      "loss": 2.3717,
      "step": 36911
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0543609857559204,
      "learning_rate": 1.0715193245950322e-05,
      "loss": 2.2951,
      "step": 36912
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1379153728485107,
      "learning_rate": 1.0714782562953679e-05,
      "loss": 2.5874,
      "step": 36913
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.2010576725006104,
      "learning_rate": 1.0714371878745286e-05,
      "loss": 2.3578,
      "step": 36914
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0663830041885376,
      "learning_rate": 1.0713961193325839e-05,
      "loss": 2.1047,
      "step": 36915
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.2590138912200928,
      "learning_rate": 1.0713550506696032e-05,
      "loss": 2.5128,
      "step": 36916
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1806069612503052,
      "learning_rate": 1.0713139818856563e-05,
      "loss": 2.2891,
      "step": 36917
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0578879117965698,
      "learning_rate": 1.0712729129808132e-05,
      "loss": 2.3492,
      "step": 36918
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0200375318527222,
      "learning_rate": 1.0712318439551427e-05,
      "loss": 2.3926,
      "step": 36919
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.041843056678772,
      "learning_rate": 1.0711907748087152e-05,
      "loss": 2.3669,
      "step": 36920
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0851976871490479,
      "learning_rate": 1.0711497055415998e-05,
      "loss": 2.3394,
      "step": 36921
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.162387490272522,
      "learning_rate": 1.0711086361538668e-05,
      "loss": 2.3187,
      "step": 36922
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.9735119342803955,
      "learning_rate": 1.0710675666455847e-05,
      "loss": 2.508,
      "step": 36923
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0099891424179077,
      "learning_rate": 1.071026497016824e-05,
      "loss": 2.2908,
      "step": 36924
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1209521293640137,
      "learning_rate": 1.0709854272676541e-05,
      "loss": 2.1314,
      "step": 36925
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0377564430236816,
      "learning_rate": 1.0709443573981447e-05,
      "loss": 2.3579,
      "step": 36926
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.9997205138206482,
      "learning_rate": 1.0709032874083652e-05,
      "loss": 2.4419,
      "step": 36927
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.02640962600708,
      "learning_rate": 1.0708622172983857e-05,
      "loss": 2.4369,
      "step": 36928
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0218788385391235,
      "learning_rate": 1.070821147068275e-05,
      "loss": 2.1855,
      "step": 36929
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0375455617904663,
      "learning_rate": 1.0707800767181036e-05,
      "loss": 2.4815,
      "step": 36930
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0956748723983765,
      "learning_rate": 1.0707390062479406e-05,
      "loss": 2.2505,
      "step": 36931
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0695782899856567,
      "learning_rate": 1.0706979356578557e-05,
      "loss": 2.2925,
      "step": 36932
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0353401899337769,
      "learning_rate": 1.0706568649479186e-05,
      "loss": 2.4878,
      "step": 36933
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0153917074203491,
      "learning_rate": 1.070615794118199e-05,
      "loss": 2.372,
      "step": 36934
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.9975523352622986,
      "learning_rate": 1.0705747231687667e-05,
      "loss": 2.2554,
      "step": 36935
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.087581753730774,
      "learning_rate": 1.0705336520996907e-05,
      "loss": 2.4302,
      "step": 36936
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.094758152961731,
      "learning_rate": 1.0704925809110411e-05,
      "loss": 2.4749,
      "step": 36937
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1638025045394897,
      "learning_rate": 1.0704515096028876e-05,
      "loss": 2.4516,
      "step": 36938
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0222316980361938,
      "learning_rate": 1.0704104381752993e-05,
      "loss": 2.274,
      "step": 36939
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1156257390975952,
      "learning_rate": 1.0703693666283465e-05,
      "loss": 2.4092,
      "step": 36940
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1525297164916992,
      "learning_rate": 1.0703282949620986e-05,
      "loss": 2.3985,
      "step": 36941
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0966627597808838,
      "learning_rate": 1.0702872231766248e-05,
      "loss": 2.5137,
      "step": 36942
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.8807686567306519,
      "learning_rate": 1.0702461512719952e-05,
      "loss": 2.5258,
      "step": 36943
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1536113023757935,
      "learning_rate": 1.0702050792482794e-05,
      "loss": 2.3445,
      "step": 36944
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0235817432403564,
      "learning_rate": 1.0701640071055468e-05,
      "loss": 2.5747,
      "step": 36945
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1837900876998901,
      "learning_rate": 1.0701229348438673e-05,
      "loss": 2.3198,
      "step": 36946
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1371904611587524,
      "learning_rate": 1.0700818624633102e-05,
      "loss": 2.3045,
      "step": 36947
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0488110780715942,
      "learning_rate": 1.0700407899639456e-05,
      "loss": 2.2827,
      "step": 36948
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0684999227523804,
      "learning_rate": 1.0699997173458427e-05,
      "loss": 2.3357,
      "step": 36949
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.7483974695205688,
      "learning_rate": 1.069958644609071e-05,
      "loss": 2.3873,
      "step": 36950
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.265972375869751,
      "learning_rate": 1.0699175717537008e-05,
      "loss": 2.3832,
      "step": 36951
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.2181400060653687,
      "learning_rate": 1.0698764987798008e-05,
      "loss": 2.2542,
      "step": 36952
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.9687880277633667,
      "learning_rate": 1.0698354256874417e-05,
      "loss": 2.4025,
      "step": 36953
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0612225532531738,
      "learning_rate": 1.0697943524766925e-05,
      "loss": 2.3645,
      "step": 36954
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.9372354745864868,
      "learning_rate": 1.0697532791476229e-05,
      "loss": 2.4256,
      "step": 36955
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1064316034317017,
      "learning_rate": 1.0697122057003022e-05,
      "loss": 2.2693,
      "step": 36956
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1127218008041382,
      "learning_rate": 1.0696711321348005e-05,
      "loss": 2.3752,
      "step": 36957
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.9869120121002197,
      "learning_rate": 1.0696300584511875e-05,
      "loss": 2.3358,
      "step": 36958
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1253751516342163,
      "learning_rate": 1.0695889846495324e-05,
      "loss": 2.4234,
      "step": 36959
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1027441024780273,
      "learning_rate": 1.0695479107299051e-05,
      "loss": 2.3549,
      "step": 36960
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0994224548339844,
      "learning_rate": 1.0695068366923757e-05,
      "loss": 2.3707,
      "step": 36961
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.186461091041565,
      "learning_rate": 1.0694657625370129e-05,
      "loss": 2.2077,
      "step": 36962
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0439180135726929,
      "learning_rate": 1.0694246882638867e-05,
      "loss": 2.2535,
      "step": 36963
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0699225664138794,
      "learning_rate": 1.0693836138730669e-05,
      "loss": 2.4064,
      "step": 36964
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0772510766983032,
      "learning_rate": 1.0693425393646231e-05,
      "loss": 2.4098,
      "step": 36965
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.9530598521232605,
      "learning_rate": 1.0693014647386249e-05,
      "loss": 2.4177,
      "step": 36966
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.007751226425171,
      "learning_rate": 1.0692603899951419e-05,
      "loss": 2.3232,
      "step": 36967
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.3105899095535278,
      "learning_rate": 1.0692193151342436e-05,
      "loss": 2.5627,
      "step": 36968
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0200634002685547,
      "learning_rate": 1.0691782401559998e-05,
      "loss": 2.2233,
      "step": 36969
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0790042877197266,
      "learning_rate": 1.06913716506048e-05,
      "loss": 2.1025,
      "step": 36970
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0052616596221924,
      "learning_rate": 1.0690960898477541e-05,
      "loss": 2.2249,
      "step": 36971
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1030457019805908,
      "learning_rate": 1.0690550145178913e-05,
      "loss": 2.2711,
      "step": 36972
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1890270709991455,
      "learning_rate": 1.0690139390709617e-05,
      "loss": 2.425,
      "step": 36973
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.2306499481201172,
      "learning_rate": 1.0689728635070349e-05,
      "loss": 2.4701,
      "step": 36974
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.2669531106948853,
      "learning_rate": 1.06893178782618e-05,
      "loss": 2.3279,
      "step": 36975
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.15255606174469,
      "learning_rate": 1.0688907120284673e-05,
      "loss": 2.3899,
      "step": 36976
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1027772426605225,
      "learning_rate": 1.0688496361139661e-05,
      "loss": 2.4447,
      "step": 36977
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.065686821937561,
      "learning_rate": 1.0688085600827459e-05,
      "loss": 2.4348,
      "step": 36978
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0317933559417725,
      "learning_rate": 1.0687674839348766e-05,
      "loss": 2.3251,
      "step": 36979
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.036337971687317,
      "learning_rate": 1.0687264076704276e-05,
      "loss": 2.4197,
      "step": 36980
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.9962064623832703,
      "learning_rate": 1.068685331289469e-05,
      "loss": 2.5059,
      "step": 36981
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1028350591659546,
      "learning_rate": 1.0686442547920699e-05,
      "loss": 2.4657,
      "step": 36982
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1299796104431152,
      "learning_rate": 1.0686031781783e-05,
      "loss": 2.295,
      "step": 36983
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1129562854766846,
      "learning_rate": 1.0685621014482292e-05,
      "loss": 2.3498,
      "step": 36984
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1169418096542358,
      "learning_rate": 1.068521024601927e-05,
      "loss": 2.2617,
      "step": 36985
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0237250328063965,
      "learning_rate": 1.0684799476394632e-05,
      "loss": 2.4085,
      "step": 36986
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1657862663269043,
      "learning_rate": 1.0684388705609071e-05,
      "loss": 2.4071,
      "step": 36987
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.2116057872772217,
      "learning_rate": 1.0683977933663288e-05,
      "loss": 2.3849,
      "step": 36988
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.081705093383789,
      "learning_rate": 1.0683567160557972e-05,
      "loss": 2.2977,
      "step": 36989
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.2069544792175293,
      "learning_rate": 1.0683156386293827e-05,
      "loss": 2.3947,
      "step": 36990
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.073335886001587,
      "learning_rate": 1.0682745610871546e-05,
      "loss": 2.3097,
      "step": 36991
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1934798955917358,
      "learning_rate": 1.0682334834291826e-05,
      "loss": 2.265,
      "step": 36992
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0204803943634033,
      "learning_rate": 1.0681924056555362e-05,
      "loss": 2.4589,
      "step": 36993
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0374057292938232,
      "learning_rate": 1.0681513277662853e-05,
      "loss": 2.2831,
      "step": 36994
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.9846578240394592,
      "learning_rate": 1.0681102497614994e-05,
      "loss": 2.295,
      "step": 36995
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0634149312973022,
      "learning_rate": 1.068069171641248e-05,
      "loss": 2.5732,
      "step": 36996
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1760029792785645,
      "learning_rate": 1.0680280934056011e-05,
      "loss": 2.4078,
      "step": 36997
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0423325300216675,
      "learning_rate": 1.0679870150546278e-05,
      "loss": 2.429,
      "step": 36998
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.9801955819129944,
      "learning_rate": 1.0679459365883982e-05,
      "loss": 2.3083,
      "step": 36999
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1928026676177979,
      "learning_rate": 1.067904858006982e-05,
      "loss": 2.4657,
      "step": 37000
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.9353005290031433,
      "learning_rate": 1.0678637793104483e-05,
      "loss": 2.3631,
      "step": 37001
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.237542986869812,
      "learning_rate": 1.0678227004988671e-05,
      "loss": 2.4445,
      "step": 37002
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0056138038635254,
      "learning_rate": 1.067781621572308e-05,
      "loss": 2.3812,
      "step": 37003
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1088677644729614,
      "learning_rate": 1.0677405425308407e-05,
      "loss": 2.6224,
      "step": 37004
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.9607679843902588,
      "learning_rate": 1.067699463374535e-05,
      "loss": 2.2692,
      "step": 37005
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0344876050949097,
      "learning_rate": 1.06765838410346e-05,
      "loss": 2.3438,
      "step": 37006
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0327728986740112,
      "learning_rate": 1.0676173047176856e-05,
      "loss": 2.2133,
      "step": 37007
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0011861324310303,
      "learning_rate": 1.067576225217282e-05,
      "loss": 2.3573,
      "step": 37008
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0888296365737915,
      "learning_rate": 1.067535145602318e-05,
      "loss": 2.4063,
      "step": 37009
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1107360124588013,
      "learning_rate": 1.0674940658728634e-05,
      "loss": 2.447,
      "step": 37010
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0911880731582642,
      "learning_rate": 1.0674529860289882e-05,
      "loss": 2.2077,
      "step": 37011
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.9759435057640076,
      "learning_rate": 1.0674119060707619e-05,
      "loss": 2.1537,
      "step": 37012
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1164051294326782,
      "learning_rate": 1.0673708259982543e-05,
      "loss": 2.3568,
      "step": 37013
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.609790563583374,
      "learning_rate": 1.0673297458115345e-05,
      "loss": 2.2812,
      "step": 37014
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.040918231010437,
      "learning_rate": 1.0672886655106727e-05,
      "loss": 2.2827,
      "step": 37015
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1966403722763062,
      "learning_rate": 1.0672475850957383e-05,
      "loss": 2.4295,
      "step": 37016
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.058380365371704,
      "learning_rate": 1.067206504566801e-05,
      "loss": 2.4607,
      "step": 37017
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.096441388130188,
      "learning_rate": 1.0671654239239302e-05,
      "loss": 2.2672,
      "step": 37018
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0379430055618286,
      "learning_rate": 1.067124343167196e-05,
      "loss": 2.1626,
      "step": 37019
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.9412804841995239,
      "learning_rate": 1.067083262296668e-05,
      "loss": 2.222,
      "step": 37020
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.071131706237793,
      "learning_rate": 1.0670421813124153e-05,
      "loss": 2.4927,
      "step": 37021
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.4242573976516724,
      "learning_rate": 1.067001100214508e-05,
      "loss": 2.4508,
      "step": 37022
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0452615022659302,
      "learning_rate": 1.0669600190030156e-05,
      "loss": 2.3867,
      "step": 37023
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.027323842048645,
      "learning_rate": 1.0669189376780078e-05,
      "loss": 2.3938,
      "step": 37024
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0124667882919312,
      "learning_rate": 1.0668778562395543e-05,
      "loss": 2.6267,
      "step": 37025
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0744962692260742,
      "learning_rate": 1.0668367746877245e-05,
      "loss": 2.4043,
      "step": 37026
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.9961416125297546,
      "learning_rate": 1.0667956930225884e-05,
      "loss": 2.4116,
      "step": 37027
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.2089157104492188,
      "learning_rate": 1.0667546112442155e-05,
      "loss": 2.2539,
      "step": 37028
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0134812593460083,
      "learning_rate": 1.0667135293526751e-05,
      "loss": 2.3216,
      "step": 37029
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1314904689788818,
      "learning_rate": 1.0666724473480374e-05,
      "loss": 2.2981,
      "step": 37030
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.2201826572418213,
      "learning_rate": 1.0666313652303717e-05,
      "loss": 2.4133,
      "step": 37031
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0856410264968872,
      "learning_rate": 1.066590282999748e-05,
      "loss": 2.2964,
      "step": 37032
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.9766810536384583,
      "learning_rate": 1.0665492006562355e-05,
      "loss": 2.3272,
      "step": 37033
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0976911783218384,
      "learning_rate": 1.0665081181999039e-05,
      "loss": 2.1893,
      "step": 37034
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0954301357269287,
      "learning_rate": 1.066467035630823e-05,
      "loss": 2.3679,
      "step": 37035
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1359672546386719,
      "learning_rate": 1.0664259529490626e-05,
      "loss": 2.4474,
      "step": 37036
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.7571113109588623,
      "learning_rate": 1.0663848701546921e-05,
      "loss": 2.2078,
      "step": 37037
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0045875310897827,
      "learning_rate": 1.0663437872477812e-05,
      "loss": 2.3514,
      "step": 37038
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0493332147598267,
      "learning_rate": 1.0663027042283995e-05,
      "loss": 2.3623,
      "step": 37039
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0233259201049805,
      "learning_rate": 1.0662616210966168e-05,
      "loss": 2.2645,
      "step": 37040
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0389375686645508,
      "learning_rate": 1.0662205378525026e-05,
      "loss": 2.5773,
      "step": 37041
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.9841597080230713,
      "learning_rate": 1.0661794544961266e-05,
      "loss": 2.1619,
      "step": 37042
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0708528757095337,
      "learning_rate": 1.0661383710275586e-05,
      "loss": 2.5842,
      "step": 37043
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.091477632522583,
      "learning_rate": 1.0660972874468678e-05,
      "loss": 2.4782,
      "step": 37044
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.6416616439819336,
      "learning_rate": 1.0660562037541242e-05,
      "loss": 2.4922,
      "step": 37045
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0742623805999756,
      "learning_rate": 1.0660151199493978e-05,
      "loss": 2.3915,
      "step": 37046
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1178797483444214,
      "learning_rate": 1.0659740360327575e-05,
      "loss": 2.4008,
      "step": 37047
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0299512147903442,
      "learning_rate": 1.0659329520042731e-05,
      "loss": 2.5156,
      "step": 37048
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.9577342867851257,
      "learning_rate": 1.0658918678640146e-05,
      "loss": 2.3969,
      "step": 37049
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1131196022033691,
      "learning_rate": 1.0658507836120517e-05,
      "loss": 2.2747,
      "step": 37050
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0977349281311035,
      "learning_rate": 1.0658096992484536e-05,
      "loss": 2.1173,
      "step": 37051
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.069638967514038,
      "learning_rate": 1.0657686147732901e-05,
      "loss": 2.2854,
      "step": 37052
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1869043111801147,
      "learning_rate": 1.0657275301866314e-05,
      "loss": 2.2025,
      "step": 37053
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.9712148904800415,
      "learning_rate": 1.0656864454885464e-05,
      "loss": 2.4107,
      "step": 37054
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0718467235565186,
      "learning_rate": 1.0656453606791051e-05,
      "loss": 2.2696,
      "step": 37055
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0603301525115967,
      "learning_rate": 1.0656042757583768e-05,
      "loss": 2.4145,
      "step": 37056
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.046830654144287,
      "learning_rate": 1.0655631907264317e-05,
      "loss": 2.4771,
      "step": 37057
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0325813293457031,
      "learning_rate": 1.0655221055833391e-05,
      "loss": 2.4576,
      "step": 37058
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1646937131881714,
      "learning_rate": 1.065481020329169e-05,
      "loss": 2.3127,
      "step": 37059
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1431854963302612,
      "learning_rate": 1.0654399349639906e-05,
      "loss": 2.4474,
      "step": 37060
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.043986439704895,
      "learning_rate": 1.0653988494878736e-05,
      "loss": 2.4389,
      "step": 37061
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.284985065460205,
      "learning_rate": 1.0653577639008878e-05,
      "loss": 2.2677,
      "step": 37062
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0188159942626953,
      "learning_rate": 1.0653166782031028e-05,
      "loss": 2.3546,
      "step": 37063
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0133544206619263,
      "learning_rate": 1.0652755923945888e-05,
      "loss": 2.2871,
      "step": 37064
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0925722122192383,
      "learning_rate": 1.0652345064754144e-05,
      "loss": 2.3461,
      "step": 37065
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1006983518600464,
      "learning_rate": 1.06519342044565e-05,
      "loss": 2.4213,
      "step": 37066
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.089423656463623,
      "learning_rate": 1.0651523343053652e-05,
      "loss": 2.5686,
      "step": 37067
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0322041511535645,
      "learning_rate": 1.065111248054629e-05,
      "loss": 2.3542,
      "step": 37068
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.9849118590354919,
      "learning_rate": 1.065070161693512e-05,
      "loss": 2.4731,
      "step": 37069
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1462738513946533,
      "learning_rate": 1.065029075222083e-05,
      "loss": 2.2697,
      "step": 37070
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.068875789642334,
      "learning_rate": 1.0649879886404125e-05,
      "loss": 2.4184,
      "step": 37071
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.089809536933899,
      "learning_rate": 1.0649469019485694e-05,
      "loss": 2.4177,
      "step": 37072
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0905665159225464,
      "learning_rate": 1.0649058151466241e-05,
      "loss": 2.4093,
      "step": 37073
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0022392272949219,
      "learning_rate": 1.0648647282346454e-05,
      "loss": 2.2112,
      "step": 37074
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1378971338272095,
      "learning_rate": 1.0648236412127032e-05,
      "loss": 2.398,
      "step": 37075
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.145198941230774,
      "learning_rate": 1.0647825540808677e-05,
      "loss": 2.3733,
      "step": 37076
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.097133755683899,
      "learning_rate": 1.064741466839208e-05,
      "loss": 2.3546,
      "step": 37077
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0916223526000977,
      "learning_rate": 1.0647003794877939e-05,
      "loss": 2.4305,
      "step": 37078
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0615582466125488,
      "learning_rate": 1.0646592920266954e-05,
      "loss": 2.113,
      "step": 37079
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.9723032116889954,
      "learning_rate": 1.0646182044559817e-05,
      "loss": 2.2258,
      "step": 37080
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1797056198120117,
      "learning_rate": 1.0645771167757224e-05,
      "loss": 2.2367,
      "step": 37081
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1055899858474731,
      "learning_rate": 1.0645360289859874e-05,
      "loss": 2.3733,
      "step": 37082
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0034316778182983,
      "learning_rate": 1.0644949410868464e-05,
      "loss": 2.3106,
      "step": 37083
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0515698194503784,
      "learning_rate": 1.0644538530783688e-05,
      "loss": 2.5761,
      "step": 37084
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0202512741088867,
      "learning_rate": 1.0644127649606244e-05,
      "loss": 2.3795,
      "step": 37085
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.115694522857666,
      "learning_rate": 1.0643716767336833e-05,
      "loss": 2.3049,
      "step": 37086
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.9536049962043762,
      "learning_rate": 1.0643305883976142e-05,
      "loss": 2.2343,
      "step": 37087
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0072249174118042,
      "learning_rate": 1.0642894999524876e-05,
      "loss": 2.5738,
      "step": 37088
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.9923067092895508,
      "learning_rate": 1.0642484113983726e-05,
      "loss": 2.445,
      "step": 37089
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1057889461517334,
      "learning_rate": 1.0642073227353393e-05,
      "loss": 2.5566,
      "step": 37090
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0348191261291504,
      "learning_rate": 1.0641662339634572e-05,
      "loss": 2.2197,
      "step": 37091
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1191315650939941,
      "learning_rate": 1.0641251450827959e-05,
      "loss": 2.1623,
      "step": 37092
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.9924424290657043,
      "learning_rate": 1.064084056093425e-05,
      "loss": 2.4124,
      "step": 37093
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1017411947250366,
      "learning_rate": 1.064042966995414e-05,
      "loss": 2.4066,
      "step": 37094
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0343265533447266,
      "learning_rate": 1.064001877788833e-05,
      "loss": 2.2988,
      "step": 37095
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0662742853164673,
      "learning_rate": 1.0639607884737516e-05,
      "loss": 2.3758,
      "step": 37096
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.9678900837898254,
      "learning_rate": 1.0639196990502389e-05,
      "loss": 2.3114,
      "step": 37097
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.170560359954834,
      "learning_rate": 1.0638786095183651e-05,
      "loss": 2.3208,
      "step": 37098
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5642099380493164,
      "learning_rate": 1.0638375198781998e-05,
      "loss": 2.1329,
      "step": 37099
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0340877771377563,
      "learning_rate": 1.0637964301298126e-05,
      "loss": 2.4267,
      "step": 37100
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0959315299987793,
      "learning_rate": 1.063755340273273e-05,
      "loss": 2.3192,
      "step": 37101
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1424728631973267,
      "learning_rate": 1.0637142503086507e-05,
      "loss": 2.3947,
      "step": 37102
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0525445938110352,
      "learning_rate": 1.0636731602360154e-05,
      "loss": 2.1077,
      "step": 37103
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.173856496810913,
      "learning_rate": 1.063632070055437e-05,
      "loss": 2.2918,
      "step": 37104
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.149917721748352,
      "learning_rate": 1.0635909797669848e-05,
      "loss": 2.5246,
      "step": 37105
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.066683292388916,
      "learning_rate": 1.063549889370729e-05,
      "loss": 2.3635,
      "step": 37106
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.9671275019645691,
      "learning_rate": 1.0635087988667385e-05,
      "loss": 2.4338,
      "step": 37107
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.048423409461975,
      "learning_rate": 1.0634677082550835e-05,
      "loss": 2.3513,
      "step": 37108
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0836423635482788,
      "learning_rate": 1.0634266175358334e-05,
      "loss": 2.4741,
      "step": 37109
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.216905951499939,
      "learning_rate": 1.0633855267090578e-05,
      "loss": 1.9722,
      "step": 37110
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0065385103225708,
      "learning_rate": 1.0633444357748268e-05,
      "loss": 2.3012,
      "step": 37111
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.2622970342636108,
      "learning_rate": 1.0633033447332099e-05,
      "loss": 2.4959,
      "step": 37112
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0789868831634521,
      "learning_rate": 1.0632622535842762e-05,
      "loss": 2.385,
      "step": 37113
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1022337675094604,
      "learning_rate": 1.063221162328096e-05,
      "loss": 2.4934,
      "step": 37114
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0343575477600098,
      "learning_rate": 1.0631800709647388e-05,
      "loss": 2.322,
      "step": 37115
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0894993543624878,
      "learning_rate": 1.0631389794942741e-05,
      "loss": 2.3306,
      "step": 37116
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.9515323638916016,
      "learning_rate": 1.0630978879167719e-05,
      "loss": 2.351,
      "step": 37117
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0523130893707275,
      "learning_rate": 1.0630567962323016e-05,
      "loss": 2.2375,
      "step": 37118
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0760424137115479,
      "learning_rate": 1.0630157044409329e-05,
      "loss": 2.5335,
      "step": 37119
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0993397235870361,
      "learning_rate": 1.0629746125427353e-05,
      "loss": 2.334,
      "step": 37120
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0343618392944336,
      "learning_rate": 1.0629335205377786e-05,
      "loss": 2.4991,
      "step": 37121
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0623692274093628,
      "learning_rate": 1.0628924284261326e-05,
      "loss": 2.1027,
      "step": 37122
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1245356798171997,
      "learning_rate": 1.0628513362078672e-05,
      "loss": 2.2714,
      "step": 37123
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0666778087615967,
      "learning_rate": 1.0628102438830512e-05,
      "loss": 2.3084,
      "step": 37124
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1382564306259155,
      "learning_rate": 1.062769151451755e-05,
      "loss": 2.2731,
      "step": 37125
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.2792177200317383,
      "learning_rate": 1.0627280589140483e-05,
      "loss": 2.1677,
      "step": 37126
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.05589759349823,
      "learning_rate": 1.0626869662700001e-05,
      "loss": 2.4544,
      "step": 37127
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0689724683761597,
      "learning_rate": 1.0626458735196804e-05,
      "loss": 2.3558,
      "step": 37128
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0547908544540405,
      "learning_rate": 1.0626047806631591e-05,
      "loss": 2.2183,
      "step": 37129
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.9958969354629517,
      "learning_rate": 1.0625636877005056e-05,
      "loss": 2.2746,
      "step": 37130
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0814920663833618,
      "learning_rate": 1.0625225946317898e-05,
      "loss": 2.612,
      "step": 37131
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.04880952835083,
      "learning_rate": 1.0624815014570813e-05,
      "loss": 2.3563,
      "step": 37132
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1518114805221558,
      "learning_rate": 1.0624404081764497e-05,
      "loss": 2.2674,
      "step": 37133
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.192009449005127,
      "learning_rate": 1.0623993147899643e-05,
      "loss": 2.2733,
      "step": 37134
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1999297142028809,
      "learning_rate": 1.0623582212976953e-05,
      "loss": 2.3753,
      "step": 37135
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.2254812717437744,
      "learning_rate": 1.0623171276997122e-05,
      "loss": 2.3927,
      "step": 37136
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.2830517292022705,
      "learning_rate": 1.0622760339960847e-05,
      "loss": 2.33,
      "step": 37137
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1545135974884033,
      "learning_rate": 1.0622349401868821e-05,
      "loss": 2.3357,
      "step": 37138
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.9504961967468262,
      "learning_rate": 1.0621938462721747e-05,
      "loss": 2.3232,
      "step": 37139
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0285810232162476,
      "learning_rate": 1.062152752252032e-05,
      "loss": 2.421,
      "step": 37140
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1734181642532349,
      "learning_rate": 1.0621116581265231e-05,
      "loss": 2.4014,
      "step": 37141
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.9753625392913818,
      "learning_rate": 1.0620705638957183e-05,
      "loss": 2.2601,
      "step": 37142
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1222889423370361,
      "learning_rate": 1.0620294695596869e-05,
      "loss": 2.0845,
      "step": 37143
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.084816575050354,
      "learning_rate": 1.0619883751184988e-05,
      "loss": 2.3957,
      "step": 37144
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.113459587097168,
      "learning_rate": 1.0619472805722238e-05,
      "loss": 2.2625,
      "step": 37145
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.94054114818573,
      "learning_rate": 1.061906185920931e-05,
      "loss": 2.2835,
      "step": 37146
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.9599242210388184,
      "learning_rate": 1.0618650911646904e-05,
      "loss": 2.267,
      "step": 37147
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0476573705673218,
      "learning_rate": 1.0618239963035716e-05,
      "loss": 2.113,
      "step": 37148
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0571035146713257,
      "learning_rate": 1.0617829013376445e-05,
      "loss": 2.2769,
      "step": 37149
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0666320323944092,
      "learning_rate": 1.0617418062669786e-05,
      "loss": 2.1349,
      "step": 37150
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.953620433807373,
      "learning_rate": 1.0617007110916438e-05,
      "loss": 2.4772,
      "step": 37151
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1815009117126465,
      "learning_rate": 1.0616596158117092e-05,
      "loss": 2.2774,
      "step": 37152
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.064071774482727,
      "learning_rate": 1.0616185204272448e-05,
      "loss": 2.3789,
      "step": 37153
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1545459032058716,
      "learning_rate": 1.0615774249383206e-05,
      "loss": 2.5274,
      "step": 37154
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.2515265941619873,
      "learning_rate": 1.0615363293450055e-05,
      "loss": 2.32,
      "step": 37155
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1040433645248413,
      "learning_rate": 1.0614952336473699e-05,
      "loss": 2.4707,
      "step": 37156
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.110323429107666,
      "learning_rate": 1.061454137845483e-05,
      "loss": 2.4859,
      "step": 37157
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1575963497161865,
      "learning_rate": 1.0614130419394152e-05,
      "loss": 2.4792,
      "step": 37158
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.2700464725494385,
      "learning_rate": 1.0613719459292348e-05,
      "loss": 2.466,
      "step": 37159
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.4202497005462646,
      "learning_rate": 1.0613308498150128e-05,
      "loss": 2.498,
      "step": 37160
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.2182050943374634,
      "learning_rate": 1.0612897535968181e-05,
      "loss": 2.4428,
      "step": 37161
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.288142442703247,
      "learning_rate": 1.0612486572747206e-05,
      "loss": 2.3965,
      "step": 37162
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1017348766326904,
      "learning_rate": 1.0612075608487901e-05,
      "loss": 2.1564,
      "step": 37163
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0285075902938843,
      "learning_rate": 1.0611664643190962e-05,
      "loss": 2.2646,
      "step": 37164
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0419267416000366,
      "learning_rate": 1.0611253676857087e-05,
      "loss": 2.2132,
      "step": 37165
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.3405413627624512,
      "learning_rate": 1.0610842709486968e-05,
      "loss": 2.3311,
      "step": 37166
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1146891117095947,
      "learning_rate": 1.0610431741081307e-05,
      "loss": 2.3695,
      "step": 37167
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0808930397033691,
      "learning_rate": 1.0610020771640794e-05,
      "loss": 2.4561,
      "step": 37168
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0960805416107178,
      "learning_rate": 1.0609609801166134e-05,
      "loss": 2.2717,
      "step": 37169
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.4013044834136963,
      "learning_rate": 1.0609198829658019e-05,
      "loss": 2.2864,
      "step": 37170
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.2446534633636475,
      "learning_rate": 1.0608787857117146e-05,
      "loss": 2.372,
      "step": 37171
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0355018377304077,
      "learning_rate": 1.0608376883544214e-05,
      "loss": 2.3251,
      "step": 37172
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0877506732940674,
      "learning_rate": 1.0607965908939917e-05,
      "loss": 2.4219,
      "step": 37173
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1225919723510742,
      "learning_rate": 1.0607554933304951e-05,
      "loss": 2.4071,
      "step": 37174
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0603044033050537,
      "learning_rate": 1.0607143956640017e-05,
      "loss": 2.4267,
      "step": 37175
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.2243854999542236,
      "learning_rate": 1.0606732978945807e-05,
      "loss": 2.3239,
      "step": 37176
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.3727582693099976,
      "learning_rate": 1.060632200022302e-05,
      "loss": 2.247,
      "step": 37177
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5061012506484985,
      "learning_rate": 1.0605911020472354e-05,
      "loss": 2.3101,
      "step": 37178
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1480624675750732,
      "learning_rate": 1.0605500039694504e-05,
      "loss": 2.7038,
      "step": 37179
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1645687818527222,
      "learning_rate": 1.0605089057890166e-05,
      "loss": 2.3017,
      "step": 37180
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.034254789352417,
      "learning_rate": 1.0604678075060038e-05,
      "loss": 2.1842,
      "step": 37181
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1020747423171997,
      "learning_rate": 1.0604267091204819e-05,
      "loss": 2.2764,
      "step": 37182
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.2189316749572754,
      "learning_rate": 1.0603856106325198e-05,
      "loss": 2.369,
      "step": 37183
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0390557050704956,
      "learning_rate": 1.060344512042188e-05,
      "loss": 2.2097,
      "step": 37184
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.9678075909614563,
      "learning_rate": 1.060303413349556e-05,
      "loss": 2.3391,
      "step": 37185
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.2036807537078857,
      "learning_rate": 1.0602623145546934e-05,
      "loss": 2.4661,
      "step": 37186
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.326024055480957,
      "learning_rate": 1.0602212156576694e-05,
      "loss": 2.3187,
      "step": 37187
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0258581638336182,
      "learning_rate": 1.0601801166585541e-05,
      "loss": 2.3907,
      "step": 37188
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0573784112930298,
      "learning_rate": 1.0601390175574173e-05,
      "loss": 2.3162,
      "step": 37189
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1433185338974,
      "learning_rate": 1.0600979183543285e-05,
      "loss": 2.2274,
      "step": 37190
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0110207796096802,
      "learning_rate": 1.0600568190493577e-05,
      "loss": 2.2991,
      "step": 37191
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0939075946807861,
      "learning_rate": 1.060015719642574e-05,
      "loss": 2.2597,
      "step": 37192
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.025482416152954,
      "learning_rate": 1.0599746201340472e-05,
      "loss": 2.5773,
      "step": 37193
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1578174829483032,
      "learning_rate": 1.0599335205238475e-05,
      "loss": 2.1622,
      "step": 37194
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0744781494140625,
      "learning_rate": 1.0598924208120439e-05,
      "loss": 2.3988,
      "step": 37195
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1324392557144165,
      "learning_rate": 1.0598513209987065e-05,
      "loss": 2.3041,
      "step": 37196
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0521533489227295,
      "learning_rate": 1.0598102210839047e-05,
      "loss": 2.1573,
      "step": 37197
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1227394342422485,
      "learning_rate": 1.0597691210677088e-05,
      "loss": 2.4127,
      "step": 37198
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1167845726013184,
      "learning_rate": 1.0597280209501876e-05,
      "loss": 2.3203,
      "step": 37199
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.086681604385376,
      "learning_rate": 1.0596869207314113e-05,
      "loss": 2.4981,
      "step": 37200
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1530771255493164,
      "learning_rate": 1.0596458204114492e-05,
      "loss": 2.4448,
      "step": 37201
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0952814817428589,
      "learning_rate": 1.0596047199903716e-05,
      "loss": 2.481,
      "step": 37202
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0851967334747314,
      "learning_rate": 1.0595636194682477e-05,
      "loss": 2.3352,
      "step": 37203
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.786868691444397,
      "learning_rate": 1.0595225188451475e-05,
      "loss": 2.2966,
      "step": 37204
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0344157218933105,
      "learning_rate": 1.0594814181211402e-05,
      "loss": 2.2297,
      "step": 37205
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1891392469406128,
      "learning_rate": 1.0594403172962957e-05,
      "loss": 2.3498,
      "step": 37206
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0878243446350098,
      "learning_rate": 1.0593992163706837e-05,
      "loss": 2.4573,
      "step": 37207
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.2616350650787354,
      "learning_rate": 1.059358115344374e-05,
      "loss": 2.3557,
      "step": 37208
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0026525259017944,
      "learning_rate": 1.059317014217436e-05,
      "loss": 2.253,
      "step": 37209
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0606153011322021,
      "learning_rate": 1.05927591298994e-05,
      "loss": 2.12,
      "step": 37210
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.152827501296997,
      "learning_rate": 1.0592348116619549e-05,
      "loss": 2.4721,
      "step": 37211
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.2645423412322998,
      "learning_rate": 1.0591937102335508e-05,
      "loss": 2.3776,
      "step": 37212
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1553840637207031,
      "learning_rate": 1.0591526087047971e-05,
      "loss": 2.2237,
      "step": 37213
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1176893711090088,
      "learning_rate": 1.0591115070757641e-05,
      "loss": 2.4918,
      "step": 37214
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.172206163406372,
      "learning_rate": 1.0590704053465207e-05,
      "loss": 2.2992,
      "step": 37215
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.9710507988929749,
      "learning_rate": 1.0590293035171368e-05,
      "loss": 2.3445,
      "step": 37216
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1594314575195312,
      "learning_rate": 1.0589882015876822e-05,
      "loss": 2.2824,
      "step": 37217
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.169629693031311,
      "learning_rate": 1.0589470995582269e-05,
      "loss": 2.3266,
      "step": 37218
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.9777478575706482,
      "learning_rate": 1.0589059974288402e-05,
      "loss": 2.2037,
      "step": 37219
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1754752397537231,
      "learning_rate": 1.0588648951995917e-05,
      "loss": 2.397,
      "step": 37220
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.100539207458496,
      "learning_rate": 1.0588237928705512e-05,
      "loss": 2.3812,
      "step": 37221
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1013050079345703,
      "learning_rate": 1.0587826904417884e-05,
      "loss": 2.2725,
      "step": 37222
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.553202748298645,
      "learning_rate": 1.058741587913373e-05,
      "loss": 2.3635,
      "step": 37223
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.1202754974365234,
      "learning_rate": 1.0587004852853749e-05,
      "loss": 2.2763,
      "step": 37224
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1155662536621094,
      "learning_rate": 1.058659382557863e-05,
      "loss": 2.4402,
      "step": 37225
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.014208197593689,
      "learning_rate": 1.0586182797309079e-05,
      "loss": 2.1975,
      "step": 37226
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0361385345458984,
      "learning_rate": 1.0585771768045787e-05,
      "loss": 2.447,
      "step": 37227
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1020456552505493,
      "learning_rate": 1.0585360737789454e-05,
      "loss": 2.4639,
      "step": 37228
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0685646533966064,
      "learning_rate": 1.0584949706540774e-05,
      "loss": 2.2665,
      "step": 37229
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1359140872955322,
      "learning_rate": 1.0584538674300448e-05,
      "loss": 2.3123,
      "step": 37230
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1338813304901123,
      "learning_rate": 1.0584127641069171e-05,
      "loss": 2.502,
      "step": 37231
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.027811884880066,
      "learning_rate": 1.0583716606847636e-05,
      "loss": 2.3448,
      "step": 37232
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.184609055519104,
      "learning_rate": 1.0583305571636544e-05,
      "loss": 2.5109,
      "step": 37233
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0385987758636475,
      "learning_rate": 1.058289453543659e-05,
      "loss": 2.3164,
      "step": 37234
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1088671684265137,
      "learning_rate": 1.058248349824847e-05,
      "loss": 2.3006,
      "step": 37235
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.99042147397995,
      "learning_rate": 1.0582072460072886e-05,
      "loss": 2.1972,
      "step": 37236
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0415135622024536,
      "learning_rate": 1.058166142091053e-05,
      "loss": 2.1738,
      "step": 37237
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0071437358856201,
      "learning_rate": 1.0581250380762097e-05,
      "loss": 2.3958,
      "step": 37238
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0441666841506958,
      "learning_rate": 1.0580839339628291e-05,
      "loss": 2.3033,
      "step": 37239
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.3050264120101929,
      "learning_rate": 1.0580428297509802e-05,
      "loss": 2.2538,
      "step": 37240
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0709104537963867,
      "learning_rate": 1.058001725440733e-05,
      "loss": 2.2246,
      "step": 37241
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.3554775714874268,
      "learning_rate": 1.0579606210321572e-05,
      "loss": 2.2963,
      "step": 37242
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.9851405024528503,
      "learning_rate": 1.0579195165253223e-05,
      "loss": 2.275,
      "step": 37243
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0692615509033203,
      "learning_rate": 1.0578784119202983e-05,
      "loss": 2.2266,
      "step": 37244
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.2110786437988281,
      "learning_rate": 1.0578373072171544e-05,
      "loss": 2.5109,
      "step": 37245
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.2328006029129028,
      "learning_rate": 1.0577962024159604e-05,
      "loss": 2.4782,
      "step": 37246
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1755117177963257,
      "learning_rate": 1.0577550975167865e-05,
      "loss": 2.3585,
      "step": 37247
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0277717113494873,
      "learning_rate": 1.0577139925197018e-05,
      "loss": 2.4877,
      "step": 37248
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.066489577293396,
      "learning_rate": 1.0576728874247763e-05,
      "loss": 2.6589,
      "step": 37249
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1006475687026978,
      "learning_rate": 1.0576317822320797e-05,
      "loss": 2.3232,
      "step": 37250
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1644952297210693,
      "learning_rate": 1.0575906769416816e-05,
      "loss": 2.405,
      "step": 37251
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0519441366195679,
      "learning_rate": 1.0575495715536515e-05,
      "loss": 2.4408,
      "step": 37252
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.038671612739563,
      "learning_rate": 1.0575084660680592e-05,
      "loss": 2.2508,
      "step": 37253
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0844582319259644,
      "learning_rate": 1.0574673604849743e-05,
      "loss": 2.3707,
      "step": 37254
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0413426160812378,
      "learning_rate": 1.057426254804467e-05,
      "loss": 2.2674,
      "step": 37255
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.206064224243164,
      "learning_rate": 1.0573851490266063e-05,
      "loss": 2.4062,
      "step": 37256
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0332967042922974,
      "learning_rate": 1.0573440431514625e-05,
      "loss": 2.3215,
      "step": 37257
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.9165018200874329,
      "learning_rate": 1.0573029371791048e-05,
      "loss": 2.3167,
      "step": 37258
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1488356590270996,
      "learning_rate": 1.057261831109603e-05,
      "loss": 2.5501,
      "step": 37259
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.012453317642212,
      "learning_rate": 1.0572207249430269e-05,
      "loss": 2.381,
      "step": 37260
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0813095569610596,
      "learning_rate": 1.0571796186794462e-05,
      "loss": 2.3439,
      "step": 37261
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.118011713027954,
      "learning_rate": 1.0571385123189301e-05,
      "loss": 2.2388,
      "step": 37262
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1219661235809326,
      "learning_rate": 1.0570974058615491e-05,
      "loss": 2.3687,
      "step": 37263
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.101958990097046,
      "learning_rate": 1.0570562993073726e-05,
      "loss": 2.3119,
      "step": 37264
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0296553373336792,
      "learning_rate": 1.05701519265647e-05,
      "loss": 2.27,
      "step": 37265
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0025289058685303,
      "learning_rate": 1.056974085908911e-05,
      "loss": 2.4005,
      "step": 37266
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.158039927482605,
      "learning_rate": 1.0569329790647656e-05,
      "loss": 2.2574,
      "step": 37267
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.2807574272155762,
      "learning_rate": 1.0568918721241032e-05,
      "loss": 2.3568,
      "step": 37268
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1628587245941162,
      "learning_rate": 1.0568507650869942e-05,
      "loss": 2.4851,
      "step": 37269
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1538618803024292,
      "learning_rate": 1.0568096579535072e-05,
      "loss": 2.4228,
      "step": 37270
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0436017513275146,
      "learning_rate": 1.0567685507237125e-05,
      "loss": 2.4073,
      "step": 37271
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.039263129234314,
      "learning_rate": 1.0567274433976797e-05,
      "loss": 2.3408,
      "step": 37272
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.3767012357711792,
      "learning_rate": 1.0566863359754784e-05,
      "loss": 2.2743,
      "step": 37273
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0006667375564575,
      "learning_rate": 1.0566452284571784e-05,
      "loss": 2.2331,
      "step": 37274
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.9666184782981873,
      "learning_rate": 1.0566041208428495e-05,
      "loss": 2.0685,
      "step": 37275
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1506128311157227,
      "learning_rate": 1.056563013132561e-05,
      "loss": 2.4511,
      "step": 37276
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0448520183563232,
      "learning_rate": 1.0565219053263831e-05,
      "loss": 2.3897,
      "step": 37277
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0753813982009888,
      "learning_rate": 1.0564807974243852e-05,
      "loss": 2.3443,
      "step": 37278
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1235500574111938,
      "learning_rate": 1.0564396894266366e-05,
      "loss": 2.3341,
      "step": 37279
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.171846866607666,
      "learning_rate": 1.0563985813332078e-05,
      "loss": 2.4528,
      "step": 37280
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1485000848770142,
      "learning_rate": 1.0563574731441678e-05,
      "loss": 2.4714,
      "step": 37281
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.9906587600708008,
      "learning_rate": 1.0563163648595867e-05,
      "loss": 2.3408,
      "step": 37282
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0197608470916748,
      "learning_rate": 1.0562752564795342e-05,
      "loss": 2.2758,
      "step": 37283
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0699926614761353,
      "learning_rate": 1.05623414800408e-05,
      "loss": 2.3439,
      "step": 37284
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.9927539229393005,
      "learning_rate": 1.0561930394332933e-05,
      "loss": 2.3723,
      "step": 37285
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.2562576532363892,
      "learning_rate": 1.056151930767244e-05,
      "loss": 2.293,
      "step": 37286
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1821070909500122,
      "learning_rate": 1.0561108220060023e-05,
      "loss": 2.3317,
      "step": 37287
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.2146008014678955,
      "learning_rate": 1.0560697131496373e-05,
      "loss": 2.4849,
      "step": 37288
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.065864086151123,
      "learning_rate": 1.0560286041982191e-05,
      "loss": 2.5331,
      "step": 37289
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.9974269270896912,
      "learning_rate": 1.0559874951518172e-05,
      "loss": 2.1513,
      "step": 37290
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0661381483078003,
      "learning_rate": 1.055946386010501e-05,
      "loss": 2.4117,
      "step": 37291
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.050145149230957,
      "learning_rate": 1.0559052767743407e-05,
      "loss": 2.3279,
      "step": 37292
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.000581979751587,
      "learning_rate": 1.0558641674434058e-05,
      "loss": 1.9917,
      "step": 37293
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1911799907684326,
      "learning_rate": 1.0558230580177657e-05,
      "loss": 2.5305,
      "step": 37294
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1147465705871582,
      "learning_rate": 1.0557819484974905e-05,
      "loss": 2.5292,
      "step": 37295
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.9947388172149658,
      "learning_rate": 1.0557408388826499e-05,
      "loss": 2.3647,
      "step": 37296
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0717424154281616,
      "learning_rate": 1.0556997291733135e-05,
      "loss": 2.1145,
      "step": 37297
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1350651979446411,
      "learning_rate": 1.0556586193695506e-05,
      "loss": 2.3856,
      "step": 37298
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1018749475479126,
      "learning_rate": 1.0556175094714313e-05,
      "loss": 2.2127,
      "step": 37299
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.2207534313201904,
      "learning_rate": 1.0555763994790256e-05,
      "loss": 2.5679,
      "step": 37300
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0172029733657837,
      "learning_rate": 1.0555352893924025e-05,
      "loss": 2.4723,
      "step": 37301
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1368659734725952,
      "learning_rate": 1.055494179211632e-05,
      "loss": 2.5015,
      "step": 37302
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0746837854385376,
      "learning_rate": 1.055453068936784e-05,
      "loss": 2.5122,
      "step": 37303
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1112865209579468,
      "learning_rate": 1.0554119585679276e-05,
      "loss": 2.3739,
      "step": 37304
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.9253544807434082,
      "learning_rate": 1.0553708481051333e-05,
      "loss": 2.2158,
      "step": 37305
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1529014110565186,
      "learning_rate": 1.05532973754847e-05,
      "loss": 2.2213,
      "step": 37306
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0821597576141357,
      "learning_rate": 1.0552886268980078e-05,
      "loss": 2.2869,
      "step": 37307
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.2243685722351074,
      "learning_rate": 1.0552475161538164e-05,
      "loss": 2.3223,
      "step": 37308
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1544183492660522,
      "learning_rate": 1.0552064053159656e-05,
      "loss": 2.3939,
      "step": 37309
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.056128978729248,
      "learning_rate": 1.0551652943845252e-05,
      "loss": 2.3457,
      "step": 37310
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0202401876449585,
      "learning_rate": 1.0551241833595642e-05,
      "loss": 2.0838,
      "step": 37311
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.96525639295578,
      "learning_rate": 1.0550830722411529e-05,
      "loss": 2.2499,
      "step": 37312
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0058420896530151,
      "learning_rate": 1.0550419610293607e-05,
      "loss": 2.2929,
      "step": 37313
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.2301719188690186,
      "learning_rate": 1.0550008497242575e-05,
      "loss": 2.2776,
      "step": 37314
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0636872053146362,
      "learning_rate": 1.0549597383259129e-05,
      "loss": 2.1458,
      "step": 37315
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1572693586349487,
      "learning_rate": 1.054918626834397e-05,
      "loss": 2.3904,
      "step": 37316
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.2086827754974365,
      "learning_rate": 1.0548775152497785e-05,
      "loss": 2.3385,
      "step": 37317
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.9636045098304749,
      "learning_rate": 1.054836403572128e-05,
      "loss": 2.5248,
      "step": 37318
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.2599120140075684,
      "learning_rate": 1.054795291801515e-05,
      "loss": 2.3755,
      "step": 37319
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1178650856018066,
      "learning_rate": 1.054754179938009e-05,
      "loss": 2.4607,
      "step": 37320
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.2828819751739502,
      "learning_rate": 1.0547130679816797e-05,
      "loss": 2.306,
      "step": 37321
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1486248970031738,
      "learning_rate": 1.054671955932597e-05,
      "loss": 2.4271,
      "step": 37322
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1409121751785278,
      "learning_rate": 1.0546308437908307e-05,
      "loss": 2.6446,
      "step": 37323
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.2183315753936768,
      "learning_rate": 1.0545897315564496e-05,
      "loss": 2.5835,
      "step": 37324
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1726939678192139,
      "learning_rate": 1.0545486192295247e-05,
      "loss": 2.5554,
      "step": 37325
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.2971851825714111,
      "learning_rate": 1.0545075068101249e-05,
      "loss": 2.2893,
      "step": 37326
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1213265657424927,
      "learning_rate": 1.05446639429832e-05,
      "loss": 2.2969,
      "step": 37327
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0313825607299805,
      "learning_rate": 1.05442528169418e-05,
      "loss": 2.134,
      "step": 37328
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1452373266220093,
      "learning_rate": 1.054384168997774e-05,
      "loss": 2.1402,
      "step": 37329
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0002436637878418,
      "learning_rate": 1.0543430562091723e-05,
      "loss": 2.6021,
      "step": 37330
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.9784189462661743,
      "learning_rate": 1.0543019433284445e-05,
      "loss": 2.3163,
      "step": 37331
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.091363549232483,
      "learning_rate": 1.0542608303556601e-05,
      "loss": 2.2339,
      "step": 37332
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0078037977218628,
      "learning_rate": 1.0542197172908885e-05,
      "loss": 2.5696,
      "step": 37333
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0564078092575073,
      "learning_rate": 1.0541786041342e-05,
      "loss": 2.1158,
      "step": 37334
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.003011703491211,
      "learning_rate": 1.0541374908856641e-05,
      "loss": 2.3044,
      "step": 37335
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0297977924346924,
      "learning_rate": 1.0540963775453505e-05,
      "loss": 2.3366,
      "step": 37336
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.9932360649108887,
      "learning_rate": 1.0540552641133287e-05,
      "loss": 2.3425,
      "step": 37337
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.6565450429916382,
      "learning_rate": 1.0540141505896685e-05,
      "loss": 2.378,
      "step": 37338
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0669523477554321,
      "learning_rate": 1.0539730369744398e-05,
      "loss": 2.4778,
      "step": 37339
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0833138227462769,
      "learning_rate": 1.0539319232677121e-05,
      "loss": 2.4463,
      "step": 37340
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0475600957870483,
      "learning_rate": 1.053890809469555e-05,
      "loss": 2.2645,
      "step": 37341
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0841156244277954,
      "learning_rate": 1.0538496955800384e-05,
      "loss": 2.4415,
      "step": 37342
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0251191854476929,
      "learning_rate": 1.0538085815992323e-05,
      "loss": 2.3891,
      "step": 37343
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.019750952720642,
      "learning_rate": 1.0537674675272056e-05,
      "loss": 2.4459,
      "step": 37344
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.032891035079956,
      "learning_rate": 1.0537263533640285e-05,
      "loss": 2.5202,
      "step": 37345
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.041783332824707,
      "learning_rate": 1.0536852391097707e-05,
      "loss": 2.2633,
      "step": 37346
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.995901346206665,
      "learning_rate": 1.0536441247645017e-05,
      "loss": 2.3335,
      "step": 37347
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0713934898376465,
      "learning_rate": 1.0536030103282914e-05,
      "loss": 2.3526,
      "step": 37348
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1737091541290283,
      "learning_rate": 1.0535618958012099e-05,
      "loss": 2.3331,
      "step": 37349
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0427943468093872,
      "learning_rate": 1.0535207811833261e-05,
      "loss": 2.1035,
      "step": 37350
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.9786615371704102,
      "learning_rate": 1.05347966647471e-05,
      "loss": 2.4689,
      "step": 37351
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0620019435882568,
      "learning_rate": 1.0534385516754312e-05,
      "loss": 2.2308,
      "step": 37352
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.9902092814445496,
      "learning_rate": 1.0533974367855597e-05,
      "loss": 2.4125,
      "step": 37353
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0302379131317139,
      "learning_rate": 1.053356321805165e-05,
      "loss": 2.4995,
      "step": 37354
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.102423906326294,
      "learning_rate": 1.0533152067343168e-05,
      "loss": 2.5671,
      "step": 37355
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.9515507817268372,
      "learning_rate": 1.053274091573085e-05,
      "loss": 2.3146,
      "step": 37356
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1425037384033203,
      "learning_rate": 1.0532329763215392e-05,
      "loss": 2.5455,
      "step": 37357
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.3047072887420654,
      "learning_rate": 1.0531918609797488e-05,
      "loss": 2.3971,
      "step": 37358
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.144389271736145,
      "learning_rate": 1.053150745547784e-05,
      "loss": 2.3634,
      "step": 37359
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1114965677261353,
      "learning_rate": 1.0531096300257143e-05,
      "loss": 2.2275,
      "step": 37360
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0313912630081177,
      "learning_rate": 1.053068514413609e-05,
      "loss": 2.5658,
      "step": 37361
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0817439556121826,
      "learning_rate": 1.0530273987115382e-05,
      "loss": 2.2369,
      "step": 37362
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.218851089477539,
      "learning_rate": 1.052986282919572e-05,
      "loss": 2.6089,
      "step": 37363
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0006588697433472,
      "learning_rate": 1.0529451670377793e-05,
      "loss": 2.2538,
      "step": 37364
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0699784755706787,
      "learning_rate": 1.0529040510662301e-05,
      "loss": 2.2706,
      "step": 37365
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.135219693183899,
      "learning_rate": 1.0528629350049942e-05,
      "loss": 2.2633,
      "step": 37366
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0443230867385864,
      "learning_rate": 1.0528218188541412e-05,
      "loss": 2.2226,
      "step": 37367
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0372565984725952,
      "learning_rate": 1.0527807026137411e-05,
      "loss": 2.039,
      "step": 37368
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0143153667449951,
      "learning_rate": 1.0527395862838634e-05,
      "loss": 2.4185,
      "step": 37369
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.957419753074646,
      "learning_rate": 1.0526984698645777e-05,
      "loss": 2.4794,
      "step": 37370
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0908740758895874,
      "learning_rate": 1.0526573533559536e-05,
      "loss": 2.3881,
      "step": 37371
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0529392957687378,
      "learning_rate": 1.0526162367580612e-05,
      "loss": 2.4166,
      "step": 37372
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1916745901107788,
      "learning_rate": 1.0525751200709698e-05,
      "loss": 2.3069,
      "step": 37373
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.2308975458145142,
      "learning_rate": 1.0525340032947493e-05,
      "loss": 2.4932,
      "step": 37374
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0067174434661865,
      "learning_rate": 1.0524928864294694e-05,
      "loss": 2.6325,
      "step": 37375
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0528993606567383,
      "learning_rate": 1.0524517694752004e-05,
      "loss": 2.3652,
      "step": 37376
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0925993919372559,
      "learning_rate": 1.0524106524320107e-05,
      "loss": 2.4154,
      "step": 37377
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.064702033996582,
      "learning_rate": 1.0523695352999707e-05,
      "loss": 2.4138,
      "step": 37378
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.054605484008789,
      "learning_rate": 1.0523284180791504e-05,
      "loss": 2.4243,
      "step": 37379
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1194660663604736,
      "learning_rate": 1.0522873007696192e-05,
      "loss": 2.6741,
      "step": 37380
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1249003410339355,
      "learning_rate": 1.0522461833714468e-05,
      "loss": 2.4237,
      "step": 37381
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0878713130950928,
      "learning_rate": 1.052205065884703e-05,
      "loss": 2.4938,
      "step": 37382
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.25714910030365,
      "learning_rate": 1.0521639483094572e-05,
      "loss": 2.4061,
      "step": 37383
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0956594944000244,
      "learning_rate": 1.0521228306457796e-05,
      "loss": 2.3452,
      "step": 37384
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.027519941329956,
      "learning_rate": 1.0520817128937396e-05,
      "loss": 2.2558,
      "step": 37385
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1160566806793213,
      "learning_rate": 1.0520405950534067e-05,
      "loss": 2.2595,
      "step": 37386
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.080844759941101,
      "learning_rate": 1.0519994771248513e-05,
      "loss": 2.3635,
      "step": 37387
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1535323858261108,
      "learning_rate": 1.0519583591081422e-05,
      "loss": 2.2639,
      "step": 37388
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1131863594055176,
      "learning_rate": 1.0519172410033497e-05,
      "loss": 2.4207,
      "step": 37389
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.050652027130127,
      "learning_rate": 1.0518761228105436e-05,
      "loss": 2.3911,
      "step": 37390
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0205684900283813,
      "learning_rate": 1.0518350045297932e-05,
      "loss": 2.308,
      "step": 37391
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.11116623878479,
      "learning_rate": 1.0517938861611686e-05,
      "loss": 2.5193,
      "step": 37392
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0910085439682007,
      "learning_rate": 1.051752767704739e-05,
      "loss": 2.3486,
      "step": 37393
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1084582805633545,
      "learning_rate": 1.0517116491605744e-05,
      "loss": 2.4022,
      "step": 37394
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0621951818466187,
      "learning_rate": 1.0516705305287445e-05,
      "loss": 2.3343,
      "step": 37395
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.093569040298462,
      "learning_rate": 1.0516294118093193e-05,
      "loss": 2.276,
      "step": 37396
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1434929370880127,
      "learning_rate": 1.0515882930023681e-05,
      "loss": 2.1538,
      "step": 37397
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1131623983383179,
      "learning_rate": 1.0515471741079605e-05,
      "loss": 2.5915,
      "step": 37398
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.3398542404174805,
      "learning_rate": 1.0515060551261667e-05,
      "loss": 2.288,
      "step": 37399
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.014467716217041,
      "learning_rate": 1.0514649360570562e-05,
      "loss": 2.4057,
      "step": 37400
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.050791621208191,
      "learning_rate": 1.0514238169006985e-05,
      "loss": 2.2298,
      "step": 37401
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.096221685409546,
      "learning_rate": 1.0513826976571634e-05,
      "loss": 2.3441,
      "step": 37402
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1097345352172852,
      "learning_rate": 1.0513415783265208e-05,
      "loss": 2.2862,
      "step": 37403
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1012322902679443,
      "learning_rate": 1.0513004589088403e-05,
      "loss": 2.1074,
      "step": 37404
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.137070894241333,
      "learning_rate": 1.0512593394041915e-05,
      "loss": 2.5533,
      "step": 37405
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0659849643707275,
      "learning_rate": 1.051218219812644e-05,
      "loss": 2.3821,
      "step": 37406
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0553663969039917,
      "learning_rate": 1.0511771001342682e-05,
      "loss": 2.2118,
      "step": 37407
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0832924842834473,
      "learning_rate": 1.051135980369133e-05,
      "loss": 2.2156,
      "step": 37408
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1987850666046143,
      "learning_rate": 1.0510948605173087e-05,
      "loss": 2.3854,
      "step": 37409
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.058294415473938,
      "learning_rate": 1.0510537405788643e-05,
      "loss": 2.3217,
      "step": 37410
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0759258270263672,
      "learning_rate": 1.0510126205538702e-05,
      "loss": 2.592,
      "step": 37411
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1506778001785278,
      "learning_rate": 1.050971500442396e-05,
      "loss": 2.4528,
      "step": 37412
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.032447338104248,
      "learning_rate": 1.0509303802445109e-05,
      "loss": 2.6305,
      "step": 37413
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1081186532974243,
      "learning_rate": 1.0508892599602851e-05,
      "loss": 2.2329,
      "step": 37414
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.9870256781578064,
      "learning_rate": 1.0508481395897884e-05,
      "loss": 2.5294,
      "step": 37415
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0879278182983398,
      "learning_rate": 1.0508070191330902e-05,
      "loss": 2.217,
      "step": 37416
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.2011066675186157,
      "learning_rate": 1.05076589859026e-05,
      "loss": 2.2593,
      "step": 37417
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1281886100769043,
      "learning_rate": 1.0507247779613684e-05,
      "loss": 2.4267,
      "step": 37418
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0926408767700195,
      "learning_rate": 1.050683657246484e-05,
      "loss": 2.268,
      "step": 37419
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.079979419708252,
      "learning_rate": 1.0506425364456771e-05,
      "loss": 2.1768,
      "step": 37420
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0379211902618408,
      "learning_rate": 1.0506014155590177e-05,
      "loss": 2.4677,
      "step": 37421
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0658844709396362,
      "learning_rate": 1.050560294586575e-05,
      "loss": 2.321,
      "step": 37422
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1902369260787964,
      "learning_rate": 1.0505191735284188e-05,
      "loss": 2.2865,
      "step": 37423
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.9397465586662292,
      "learning_rate": 1.0504780523846188e-05,
      "loss": 2.4011,
      "step": 37424
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1872280836105347,
      "learning_rate": 1.0504369311552448e-05,
      "loss": 2.2518,
      "step": 37425
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0223301649093628,
      "learning_rate": 1.0503958098403665e-05,
      "loss": 2.2667,
      "step": 37426
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0511707067489624,
      "learning_rate": 1.0503546884400535e-05,
      "loss": 2.3782,
      "step": 37427
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0290472507476807,
      "learning_rate": 1.0503135669543758e-05,
      "loss": 2.3148,
      "step": 37428
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1465984582901,
      "learning_rate": 1.0502724453834032e-05,
      "loss": 2.2047,
      "step": 37429
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.199570655822754,
      "learning_rate": 1.0502313237272048e-05,
      "loss": 2.3758,
      "step": 37430
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1193230152130127,
      "learning_rate": 1.0501902019858507e-05,
      "loss": 2.6463,
      "step": 37431
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1276869773864746,
      "learning_rate": 1.0501490801594107e-05,
      "loss": 2.3153,
      "step": 37432
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0586494207382202,
      "learning_rate": 1.0501079582479543e-05,
      "loss": 2.4492,
      "step": 37433
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1584833860397339,
      "learning_rate": 1.0500668362515512e-05,
      "loss": 2.4004,
      "step": 37434
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1969348192214966,
      "learning_rate": 1.0500257141702714e-05,
      "loss": 2.146,
      "step": 37435
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0725064277648926,
      "learning_rate": 1.0499845920041845e-05,
      "loss": 2.3016,
      "step": 37436
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0696518421173096,
      "learning_rate": 1.04994346975336e-05,
      "loss": 2.394,
      "step": 37437
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.147876262664795,
      "learning_rate": 1.0499023474178675e-05,
      "loss": 2.3213,
      "step": 37438
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1070520877838135,
      "learning_rate": 1.0498612249977774e-05,
      "loss": 2.3766,
      "step": 37439
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0889075994491577,
      "learning_rate": 1.0498201024931586e-05,
      "loss": 2.244,
      "step": 37440
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.2621105909347534,
      "learning_rate": 1.0497789799040814e-05,
      "loss": 2.2601,
      "step": 37441
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.2494897842407227,
      "learning_rate": 1.0497378572306156e-05,
      "loss": 2.0526,
      "step": 37442
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0772281885147095,
      "learning_rate": 1.0496967344728302e-05,
      "loss": 2.3498,
      "step": 37443
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1623536348342896,
      "learning_rate": 1.0496556116307954e-05,
      "loss": 2.4872,
      "step": 37444
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0226974487304688,
      "learning_rate": 1.049614488704581e-05,
      "loss": 2.4387,
      "step": 37445
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0638073682785034,
      "learning_rate": 1.0495733656942566e-05,
      "loss": 2.2436,
      "step": 37446
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0457425117492676,
      "learning_rate": 1.0495322425998917e-05,
      "loss": 2.2919,
      "step": 37447
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.3784196376800537,
      "learning_rate": 1.0494911194215563e-05,
      "loss": 2.0323,
      "step": 37448
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0156121253967285,
      "learning_rate": 1.0494499961593201e-05,
      "loss": 2.4668,
      "step": 37449
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.9818880558013916,
      "learning_rate": 1.0494088728132526e-05,
      "loss": 2.3755,
      "step": 37450
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1215593814849854,
      "learning_rate": 1.0493677493834236e-05,
      "loss": 2.3332,
      "step": 37451
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.010672926902771,
      "learning_rate": 1.049326625869903e-05,
      "loss": 2.1099,
      "step": 37452
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1048130989074707,
      "learning_rate": 1.0492855022727601e-05,
      "loss": 2.2362,
      "step": 37453
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.066426157951355,
      "learning_rate": 1.0492443785920649e-05,
      "loss": 2.2051,
      "step": 37454
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.9932700991630554,
      "learning_rate": 1.0492032548278877e-05,
      "loss": 2.445,
      "step": 37455
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.052842617034912,
      "learning_rate": 1.049162130980297e-05,
      "loss": 2.4284,
      "step": 37456
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0811666250228882,
      "learning_rate": 1.0491210070493634e-05,
      "loss": 2.1615,
      "step": 37457
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.9822971224784851,
      "learning_rate": 1.0490798830351562e-05,
      "loss": 2.2448,
      "step": 37458
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.021163821220398,
      "learning_rate": 1.0490387589377452e-05,
      "loss": 2.1114,
      "step": 37459
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0151389837265015,
      "learning_rate": 1.0489976347572003e-05,
      "loss": 2.3394,
      "step": 37460
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0389974117279053,
      "learning_rate": 1.0489565104935915e-05,
      "loss": 2.2362,
      "step": 37461
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1320829391479492,
      "learning_rate": 1.0489153861469876e-05,
      "loss": 2.368,
      "step": 37462
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.026421070098877,
      "learning_rate": 1.048874261717459e-05,
      "loss": 2.1647,
      "step": 37463
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.018898606300354,
      "learning_rate": 1.048833137205075e-05,
      "loss": 2.3315,
      "step": 37464
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.01975679397583,
      "learning_rate": 1.0487920126099058e-05,
      "loss": 2.3514,
      "step": 37465
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1403281688690186,
      "learning_rate": 1.048750887932021e-05,
      "loss": 2.2762,
      "step": 37466
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.131978154182434,
      "learning_rate": 1.04870976317149e-05,
      "loss": 2.451,
      "step": 37467
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0537854433059692,
      "learning_rate": 1.048668638328383e-05,
      "loss": 2.2849,
      "step": 37468
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0753268003463745,
      "learning_rate": 1.0486275134027692e-05,
      "loss": 2.393,
      "step": 37469
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1224675178527832,
      "learning_rate": 1.0485863883947186e-05,
      "loss": 2.3062,
      "step": 37470
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0807209014892578,
      "learning_rate": 1.048545263304301e-05,
      "loss": 2.3657,
      "step": 37471
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.954330325126648,
      "learning_rate": 1.0485041381315857e-05,
      "loss": 2.3244,
      "step": 37472
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1589136123657227,
      "learning_rate": 1.0484630128766428e-05,
      "loss": 2.458,
      "step": 37473
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.2158695459365845,
      "learning_rate": 1.0484218875395422e-05,
      "loss": 2.3012,
      "step": 37474
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1288806200027466,
      "learning_rate": 1.0483807621203532e-05,
      "loss": 2.4917,
      "step": 37475
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.025320053100586,
      "learning_rate": 1.0483396366191457e-05,
      "loss": 2.4125,
      "step": 37476
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.004947304725647,
      "learning_rate": 1.0482985110359894e-05,
      "loss": 2.4008,
      "step": 37477
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.019731879234314,
      "learning_rate": 1.0482573853709538e-05,
      "loss": 2.4082,
      "step": 37478
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0081250667572021,
      "learning_rate": 1.048216259624109e-05,
      "loss": 2.4937,
      "step": 37479
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0555790662765503,
      "learning_rate": 1.0481751337955246e-05,
      "loss": 2.2693,
      "step": 37480
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.9382525682449341,
      "learning_rate": 1.0481340078852703e-05,
      "loss": 2.3503,
      "step": 37481
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0963102579116821,
      "learning_rate": 1.0480928818934155e-05,
      "loss": 2.1307,
      "step": 37482
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.05890953540802,
      "learning_rate": 1.0480517558200305e-05,
      "loss": 2.3452,
      "step": 37483
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.090047836303711,
      "learning_rate": 1.0480106296651844e-05,
      "loss": 2.5024,
      "step": 37484
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.187907099723816,
      "learning_rate": 1.0479695034289475e-05,
      "loss": 2.396,
      "step": 37485
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1479554176330566,
      "learning_rate": 1.047928377111389e-05,
      "loss": 2.4249,
      "step": 37486
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1011444330215454,
      "learning_rate": 1.0478872507125792e-05,
      "loss": 2.4326,
      "step": 37487
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.9952298402786255,
      "learning_rate": 1.0478461242325875e-05,
      "loss": 2.5048,
      "step": 37488
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0234787464141846,
      "learning_rate": 1.0478049976714834e-05,
      "loss": 2.3803,
      "step": 37489
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1738511323928833,
      "learning_rate": 1.047763871029337e-05,
      "loss": 2.2994,
      "step": 37490
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0402106046676636,
      "learning_rate": 1.0477227443062176e-05,
      "loss": 2.3006,
      "step": 37491
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0524415969848633,
      "learning_rate": 1.0476816175021955e-05,
      "loss": 2.3909,
      "step": 37492
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1619669198989868,
      "learning_rate": 1.04764049061734e-05,
      "loss": 2.21,
      "step": 37493
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.2532281875610352,
      "learning_rate": 1.0475993636517211e-05,
      "loss": 2.2141,
      "step": 37494
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0843523740768433,
      "learning_rate": 1.0475582366054083e-05,
      "loss": 2.3066,
      "step": 37495
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1035293340682983,
      "learning_rate": 1.0475171094784711e-05,
      "loss": 2.5125,
      "step": 37496
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0472161769866943,
      "learning_rate": 1.0474759822709796e-05,
      "loss": 2.528,
      "step": 37497
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.084196925163269,
      "learning_rate": 1.0474348549830033e-05,
      "loss": 2.5184,
      "step": 37498
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1335488557815552,
      "learning_rate": 1.0473937276146122e-05,
      "loss": 2.5819,
      "step": 37499
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1970537900924683,
      "learning_rate": 1.047352600165876e-05,
      "loss": 2.2835,
      "step": 37500
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0248777866363525,
      "learning_rate": 1.0473114726368643e-05,
      "loss": 2.4137,
      "step": 37501
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0838241577148438,
      "learning_rate": 1.0472703450276467e-05,
      "loss": 2.3257,
      "step": 37502
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0301120281219482,
      "learning_rate": 1.0472292173382927e-05,
      "loss": 2.355,
      "step": 37503
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0624750852584839,
      "learning_rate": 1.0471880895688726e-05,
      "loss": 2.3325,
      "step": 37504
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0711785554885864,
      "learning_rate": 1.0471469617194563e-05,
      "loss": 2.3684,
      "step": 37505
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0199624300003052,
      "learning_rate": 1.0471058337901125e-05,
      "loss": 2.5578,
      "step": 37506
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.062124490737915,
      "learning_rate": 1.0470647057809117e-05,
      "loss": 2.1263,
      "step": 37507
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0603224039077759,
      "learning_rate": 1.0470235776919237e-05,
      "loss": 2.3542,
      "step": 37508
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.117234230041504,
      "learning_rate": 1.0469824495232177e-05,
      "loss": 2.2023,
      "step": 37509
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0455900430679321,
      "learning_rate": 1.0469413212748636e-05,
      "loss": 2.2147,
      "step": 37510
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1158630847930908,
      "learning_rate": 1.0469001929469313e-05,
      "loss": 2.2091,
      "step": 37511
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.011474609375,
      "learning_rate": 1.0468590645394905e-05,
      "loss": 2.3457,
      "step": 37512
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.07951021194458,
      "learning_rate": 1.0468179360526108e-05,
      "loss": 2.4303,
      "step": 37513
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.2049397230148315,
      "learning_rate": 1.046776807486362e-05,
      "loss": 2.4459,
      "step": 37514
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0064352750778198,
      "learning_rate": 1.0467356788408139e-05,
      "loss": 2.4027,
      "step": 37515
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0248463153839111,
      "learning_rate": 1.046694550116036e-05,
      "loss": 2.4859,
      "step": 37516
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.2271085977554321,
      "learning_rate": 1.0466534213120981e-05,
      "loss": 2.6051,
      "step": 37517
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0695401430130005,
      "learning_rate": 1.04661229242907e-05,
      "loss": 2.3479,
      "step": 37518
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0302931070327759,
      "learning_rate": 1.0465711634670214e-05,
      "loss": 2.2061,
      "step": 37519
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0367259979248047,
      "learning_rate": 1.046530034426022e-05,
      "loss": 2.3115,
      "step": 37520
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0304334163665771,
      "learning_rate": 1.046488905306142e-05,
      "loss": 2.4332,
      "step": 37521
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1564254760742188,
      "learning_rate": 1.0464477761074504e-05,
      "loss": 2.6154,
      "step": 37522
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0027492046356201,
      "learning_rate": 1.0464066468300168e-05,
      "loss": 2.1585,
      "step": 37523
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0655295848846436,
      "learning_rate": 1.0463655174739117e-05,
      "loss": 2.4531,
      "step": 37524
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0972758531570435,
      "learning_rate": 1.0463243880392045e-05,
      "loss": 2.4033,
      "step": 37525
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.013885736465454,
      "learning_rate": 1.0462832585259648e-05,
      "loss": 2.4061,
      "step": 37526
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.9881218671798706,
      "learning_rate": 1.0462421289342625e-05,
      "loss": 2.2844,
      "step": 37527
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0148727893829346,
      "learning_rate": 1.046200999264167e-05,
      "loss": 2.4495,
      "step": 37528
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0752325057983398,
      "learning_rate": 1.0461598695157485e-05,
      "loss": 2.4632,
      "step": 37529
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.9918451905250549,
      "learning_rate": 1.0461187396890764e-05,
      "loss": 2.387,
      "step": 37530
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0963778495788574,
      "learning_rate": 1.0460776097842205e-05,
      "loss": 2.4545,
      "step": 37531
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0843758583068848,
      "learning_rate": 1.0460364798012505e-05,
      "loss": 2.4511,
      "step": 37532
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1284903287887573,
      "learning_rate": 1.0459953497402364e-05,
      "loss": 2.3039,
      "step": 37533
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0627758502960205,
      "learning_rate": 1.0459542196012474e-05,
      "loss": 2.4623,
      "step": 37534
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.2050249576568604,
      "learning_rate": 1.0459130893843537e-05,
      "loss": 2.173,
      "step": 37535
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.9752926230430603,
      "learning_rate": 1.0458719590896248e-05,
      "loss": 2.4115,
      "step": 37536
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0353648662567139,
      "learning_rate": 1.0458308287171304e-05,
      "loss": 2.381,
      "step": 37537
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0038816928863525,
      "learning_rate": 1.0457896982669401e-05,
      "loss": 2.1488,
      "step": 37538
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0557396411895752,
      "learning_rate": 1.0457485677391242e-05,
      "loss": 2.3008,
      "step": 37539
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.112828254699707,
      "learning_rate": 1.0457074371337519e-05,
      "loss": 2.632,
      "step": 37540
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0113242864608765,
      "learning_rate": 1.0456663064508931e-05,
      "loss": 2.5084,
      "step": 37541
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.170684576034546,
      "learning_rate": 1.0456251756906175e-05,
      "loss": 2.3601,
      "step": 37542
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0323153734207153,
      "learning_rate": 1.0455840448529948e-05,
      "loss": 2.4796,
      "step": 37543
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0255781412124634,
      "learning_rate": 1.0455429139380948e-05,
      "loss": 2.199,
      "step": 37544
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0280532836914062,
      "learning_rate": 1.045501782945987e-05,
      "loss": 2.518,
      "step": 37545
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.052657127380371,
      "learning_rate": 1.0454606518767417e-05,
      "loss": 2.3608,
      "step": 37546
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0552693605422974,
      "learning_rate": 1.045419520730428e-05,
      "loss": 2.158,
      "step": 37547
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.2805871963500977,
      "learning_rate": 1.0453783895071159e-05,
      "loss": 2.3167,
      "step": 37548
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0257848501205444,
      "learning_rate": 1.045337258206875e-05,
      "loss": 2.2361,
      "step": 37549
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0711853504180908,
      "learning_rate": 1.0452961268297755e-05,
      "loss": 2.2242,
      "step": 37550
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0183649063110352,
      "learning_rate": 1.0452549953758864e-05,
      "loss": 2.5432,
      "step": 37551
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.9827296733856201,
      "learning_rate": 1.0452138638452778e-05,
      "loss": 2.2174,
      "step": 37552
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.976926326751709,
      "learning_rate": 1.0451727322380193e-05,
      "loss": 2.4128,
      "step": 37553
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.047324538230896,
      "learning_rate": 1.0451316005541815e-05,
      "loss": 2.2686,
      "step": 37554
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.082503318786621,
      "learning_rate": 1.0450904687938328e-05,
      "loss": 2.3507,
      "step": 37555
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.2374249696731567,
      "learning_rate": 1.0450493369570433e-05,
      "loss": 2.4585,
      "step": 37556
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0539751052856445,
      "learning_rate": 1.0450082050438833e-05,
      "loss": 2.3969,
      "step": 37557
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.099429965019226,
      "learning_rate": 1.0449670730544222e-05,
      "loss": 2.3174,
      "step": 37558
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1711770296096802,
      "learning_rate": 1.0449259409887293e-05,
      "loss": 2.393,
      "step": 37559
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0353407859802246,
      "learning_rate": 1.0448848088468753e-05,
      "loss": 2.2682,
      "step": 37560
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0763846635818481,
      "learning_rate": 1.044843676628929e-05,
      "loss": 2.3297,
      "step": 37561
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0106685161590576,
      "learning_rate": 1.0448025443349605e-05,
      "loss": 2.397,
      "step": 37562
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0452563762664795,
      "learning_rate": 1.0447614119650395e-05,
      "loss": 2.3957,
      "step": 37563
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1001003980636597,
      "learning_rate": 1.0447202795192362e-05,
      "loss": 2.4009,
      "step": 37564
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0312119722366333,
      "learning_rate": 1.0446791469976194e-05,
      "loss": 2.1807,
      "step": 37565
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1731138229370117,
      "learning_rate": 1.0446380144002593e-05,
      "loss": 2.2828,
      "step": 37566
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0365471839904785,
      "learning_rate": 1.0445968817272259e-05,
      "loss": 2.513,
      "step": 37567
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.032422423362732,
      "learning_rate": 1.0445557489785885e-05,
      "loss": 2.0541,
      "step": 37568
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1883130073547363,
      "learning_rate": 1.044514616154417e-05,
      "loss": 2.5539,
      "step": 37569
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0450217723846436,
      "learning_rate": 1.0444734832547811e-05,
      "loss": 2.3675,
      "step": 37570
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.196876883506775,
      "learning_rate": 1.0444323502797506e-05,
      "loss": 2.2175,
      "step": 37571
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.9446025490760803,
      "learning_rate": 1.0443912172293953e-05,
      "loss": 2.4085,
      "step": 37572
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0885907411575317,
      "learning_rate": 1.0443500841037845e-05,
      "loss": 2.2333,
      "step": 37573
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.2148232460021973,
      "learning_rate": 1.044308950902989e-05,
      "loss": 2.3667,
      "step": 37574
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1178916692733765,
      "learning_rate": 1.044267817627077e-05,
      "loss": 2.3395,
      "step": 37575
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.416770577430725,
      "learning_rate": 1.0442266842761194e-05,
      "loss": 2.3713,
      "step": 37576
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0090140104293823,
      "learning_rate": 1.0441855508501855e-05,
      "loss": 2.3079,
      "step": 37577
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0304025411605835,
      "learning_rate": 1.044144417349345e-05,
      "loss": 2.1936,
      "step": 37578
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1048786640167236,
      "learning_rate": 1.0441032837736679e-05,
      "loss": 2.2869,
      "step": 37579
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0343692302703857,
      "learning_rate": 1.0440621501232238e-05,
      "loss": 2.3677,
      "step": 37580
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.6616088151931763,
      "learning_rate": 1.0440210163980821e-05,
      "loss": 2.4013,
      "step": 37581
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1226615905761719,
      "learning_rate": 1.0439798825983132e-05,
      "loss": 2.1092,
      "step": 37582
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1163102388381958,
      "learning_rate": 1.043938748723986e-05,
      "loss": 2.5124,
      "step": 37583
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.071772575378418,
      "learning_rate": 1.043897614775171e-05,
      "loss": 2.4044,
      "step": 37584
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.30507493019104,
      "learning_rate": 1.0438564807519375e-05,
      "loss": 2.1915,
      "step": 37585
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.213686227798462,
      "learning_rate": 1.0438153466543554e-05,
      "loss": 2.5165,
      "step": 37586
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.207004427909851,
      "learning_rate": 1.0437742124824945e-05,
      "loss": 2.4971,
      "step": 37587
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.250964641571045,
      "learning_rate": 1.0437330782364243e-05,
      "loss": 2.5037,
      "step": 37588
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.249089241027832,
      "learning_rate": 1.0436919439162145e-05,
      "loss": 2.3203,
      "step": 37589
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0742847919464111,
      "learning_rate": 1.0436508095219354e-05,
      "loss": 2.5309,
      "step": 37590
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.2165542840957642,
      "learning_rate": 1.0436096750536559e-05,
      "loss": 2.2916,
      "step": 37591
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0092405080795288,
      "learning_rate": 1.0435685405114464e-05,
      "loss": 2.3761,
      "step": 37592
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.9668604731559753,
      "learning_rate": 1.0435274058953765e-05,
      "loss": 2.2463,
      "step": 37593
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.08309805393219,
      "learning_rate": 1.0434862712055155e-05,
      "loss": 2.3363,
      "step": 37594
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0688525438308716,
      "learning_rate": 1.0434451364419338e-05,
      "loss": 2.2146,
      "step": 37595
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.924748420715332,
      "learning_rate": 1.0434040016047005e-05,
      "loss": 2.4962,
      "step": 37596
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.9224449992179871,
      "learning_rate": 1.0433628666938858e-05,
      "loss": 2.2303,
      "step": 37597
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1559854745864868,
      "learning_rate": 1.0433217317095591e-05,
      "loss": 2.1697,
      "step": 37598
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0164903402328491,
      "learning_rate": 1.0432805966517903e-05,
      "loss": 2.3416,
      "step": 37599
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1721159219741821,
      "learning_rate": 1.0432394615206493e-05,
      "loss": 2.5231,
      "step": 37600
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1896644830703735,
      "learning_rate": 1.0431983263162057e-05,
      "loss": 2.3578,
      "step": 37601
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.243911623954773,
      "learning_rate": 1.043157191038529e-05,
      "loss": 2.2462,
      "step": 37602
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.018498182296753,
      "learning_rate": 1.0431160556876893e-05,
      "loss": 2.4399,
      "step": 37603
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1240888833999634,
      "learning_rate": 1.043074920263756e-05,
      "loss": 2.0732,
      "step": 37604
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.9995251893997192,
      "learning_rate": 1.043033784766799e-05,
      "loss": 2.1715,
      "step": 37605
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0190128087997437,
      "learning_rate": 1.0429926491968885e-05,
      "loss": 2.3358,
      "step": 37606
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0491105318069458,
      "learning_rate": 1.0429515135540935e-05,
      "loss": 2.4339,
      "step": 37607
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.108343243598938,
      "learning_rate": 1.042910377838484e-05,
      "loss": 2.3212,
      "step": 37608
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0443153381347656,
      "learning_rate": 1.0428692420501296e-05,
      "loss": 2.195,
      "step": 37609
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.025841236114502,
      "learning_rate": 1.0428281061891001e-05,
      "loss": 2.2538,
      "step": 37610
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.025710105895996,
      "learning_rate": 1.0427869702554657e-05,
      "loss": 2.0953,
      "step": 37611
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.9753035306930542,
      "learning_rate": 1.0427458342492956e-05,
      "loss": 2.4141,
      "step": 37612
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.002285122871399,
      "learning_rate": 1.04270469817066e-05,
      "loss": 2.6219,
      "step": 37613
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1120884418487549,
      "learning_rate": 1.0426635620196279e-05,
      "loss": 2.5101,
      "step": 37614
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.018530011177063,
      "learning_rate": 1.0426224257962696e-05,
      "loss": 2.2069,
      "step": 37615
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1208388805389404,
      "learning_rate": 1.0425812895006546e-05,
      "loss": 2.5938,
      "step": 37616
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0907020568847656,
      "learning_rate": 1.042540153132853e-05,
      "loss": 2.4915,
      "step": 37617
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0452477931976318,
      "learning_rate": 1.042499016692934e-05,
      "loss": 2.1692,
      "step": 37618
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1739563941955566,
      "learning_rate": 1.0424578801809677e-05,
      "loss": 2.4809,
      "step": 37619
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0439050197601318,
      "learning_rate": 1.042416743597024e-05,
      "loss": 2.3825,
      "step": 37620
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0477733612060547,
      "learning_rate": 1.0423756069411724e-05,
      "loss": 2.4333,
      "step": 37621
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0874931812286377,
      "learning_rate": 1.0423344702134823e-05,
      "loss": 2.1824,
      "step": 37622
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0196101665496826,
      "learning_rate": 1.0422933334140242e-05,
      "loss": 2.4679,
      "step": 37623
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0782992839813232,
      "learning_rate": 1.0422521965428671e-05,
      "loss": 2.1871,
      "step": 37624
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0310337543487549,
      "learning_rate": 1.0422110596000812e-05,
      "loss": 2.4138,
      "step": 37625
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0755873918533325,
      "learning_rate": 1.0421699225857362e-05,
      "loss": 2.3789,
      "step": 37626
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.116357445716858,
      "learning_rate": 1.0421287854999017e-05,
      "loss": 2.5218,
      "step": 37627
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1234688758850098,
      "learning_rate": 1.0420876483426472e-05,
      "loss": 2.4759,
      "step": 37628
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.05709707736969,
      "learning_rate": 1.0420465111140427e-05,
      "loss": 2.5672,
      "step": 37629
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.2288466691970825,
      "learning_rate": 1.042005373814158e-05,
      "loss": 2.3051,
      "step": 37630
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0643192529678345,
      "learning_rate": 1.0419642364430629e-05,
      "loss": 2.5943,
      "step": 37631
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.108832597732544,
      "learning_rate": 1.0419230990008269e-05,
      "loss": 2.3495,
      "step": 37632
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0970745086669922,
      "learning_rate": 1.0418819614875202e-05,
      "loss": 2.339,
      "step": 37633
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1158111095428467,
      "learning_rate": 1.0418408239032119e-05,
      "loss": 2.4302,
      "step": 37634
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0296584367752075,
      "learning_rate": 1.041799686247972e-05,
      "loss": 2.6021,
      "step": 37635
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.006506085395813,
      "learning_rate": 1.0417585485218701e-05,
      "loss": 2.1886,
      "step": 37636
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1450709104537964,
      "learning_rate": 1.0417174107249766e-05,
      "loss": 2.3522,
      "step": 37637
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.972160816192627,
      "learning_rate": 1.0416762728573603e-05,
      "loss": 2.4624,
      "step": 37638
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1604913473129272,
      "learning_rate": 1.0416351349190918e-05,
      "loss": 2.456,
      "step": 37639
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1723378896713257,
      "learning_rate": 1.0415939969102404e-05,
      "loss": 2.5413,
      "step": 37640
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1029818058013916,
      "learning_rate": 1.0415528588308757e-05,
      "loss": 2.4343,
      "step": 37641
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1026030778884888,
      "learning_rate": 1.0415117206810676e-05,
      "loss": 2.4049,
      "step": 37642
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.093946933746338,
      "learning_rate": 1.0414705824608859e-05,
      "loss": 2.394,
      "step": 37643
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1334295272827148,
      "learning_rate": 1.0414294441704002e-05,
      "loss": 2.3444,
      "step": 37644
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.125845193862915,
      "learning_rate": 1.0413883058096805e-05,
      "loss": 2.0552,
      "step": 37645
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1752132177352905,
      "learning_rate": 1.0413471673787965e-05,
      "loss": 2.4738,
      "step": 37646
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0301499366760254,
      "learning_rate": 1.0413060288778176e-05,
      "loss": 2.1968,
      "step": 37647
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.081598162651062,
      "learning_rate": 1.0412648903068138e-05,
      "loss": 2.2215,
      "step": 37648
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0529754161834717,
      "learning_rate": 1.041223751665855e-05,
      "loss": 2.3903,
      "step": 37649
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0474337339401245,
      "learning_rate": 1.0411826129550105e-05,
      "loss": 2.2773,
      "step": 37650
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0865763425827026,
      "learning_rate": 1.0411414741743504e-05,
      "loss": 2.5684,
      "step": 37651
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1350921392440796,
      "learning_rate": 1.0411003353239443e-05,
      "loss": 2.2517,
      "step": 37652
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0601919889450073,
      "learning_rate": 1.041059196403862e-05,
      "loss": 2.3332,
      "step": 37653
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0616213083267212,
      "learning_rate": 1.0410180574141733e-05,
      "loss": 2.375,
      "step": 37654
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.9744997024536133,
      "learning_rate": 1.0409769183549477e-05,
      "loss": 2.1964,
      "step": 37655
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.9868162870407104,
      "learning_rate": 1.0409357792262551e-05,
      "loss": 2.289,
      "step": 37656
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0336830615997314,
      "learning_rate": 1.0408946400281652e-05,
      "loss": 2.429,
      "step": 37657
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.2791415452957153,
      "learning_rate": 1.0408535007607478e-05,
      "loss": 2.295,
      "step": 37658
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.106835961341858,
      "learning_rate": 1.0408123614240728e-05,
      "loss": 2.0719,
      "step": 37659
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.2477115392684937,
      "learning_rate": 1.0407712220182096e-05,
      "loss": 2.3494,
      "step": 37660
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.362018346786499,
      "learning_rate": 1.040730082543228e-05,
      "loss": 2.4924,
      "step": 37661
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0305235385894775,
      "learning_rate": 1.040688942999198e-05,
      "loss": 2.3555,
      "step": 37662
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.110779047012329,
      "learning_rate": 1.0406478033861891e-05,
      "loss": 2.0498,
      "step": 37663
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0313493013381958,
      "learning_rate": 1.0406066637042711e-05,
      "loss": 2.3231,
      "step": 37664
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.237655758857727,
      "learning_rate": 1.040565523953514e-05,
      "loss": 2.2971,
      "step": 37665
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1133652925491333,
      "learning_rate": 1.0405243841339874e-05,
      "loss": 2.34,
      "step": 37666
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.069298505783081,
      "learning_rate": 1.0404832442457606e-05,
      "loss": 2.3679,
      "step": 37667
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5922542810440063,
      "learning_rate": 1.040442104288904e-05,
      "loss": 2.4656,
      "step": 37668
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1311941146850586,
      "learning_rate": 1.0404009642634866e-05,
      "loss": 2.5139,
      "step": 37669
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0439417362213135,
      "learning_rate": 1.0403598241695789e-05,
      "loss": 2.3445,
      "step": 37670
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0800200700759888,
      "learning_rate": 1.0403186840072504e-05,
      "loss": 2.2761,
      "step": 37671
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0327363014221191,
      "learning_rate": 1.0402775437765709e-05,
      "loss": 2.569,
      "step": 37672
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0147204399108887,
      "learning_rate": 1.0402364034776098e-05,
      "loss": 2.1587,
      "step": 37673
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0455721616744995,
      "learning_rate": 1.0401952631104371e-05,
      "loss": 2.3116,
      "step": 37674
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.021798849105835,
      "learning_rate": 1.0401541226751224e-05,
      "loss": 2.4102,
      "step": 37675
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.9686524271965027,
      "learning_rate": 1.0401129821717357e-05,
      "loss": 2.4886,
      "step": 37676
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1317170858383179,
      "learning_rate": 1.0400718416003465e-05,
      "loss": 2.3664,
      "step": 37677
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0945347547531128,
      "learning_rate": 1.0400307009610246e-05,
      "loss": 2.3846,
      "step": 37678
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0392357110977173,
      "learning_rate": 1.0399895602538401e-05,
      "loss": 2.4558,
      "step": 37679
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0182141065597534,
      "learning_rate": 1.0399484194788623e-05,
      "loss": 2.2484,
      "step": 37680
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0255037546157837,
      "learning_rate": 1.039907278636161e-05,
      "loss": 2.2733,
      "step": 37681
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1659345626831055,
      "learning_rate": 1.0398661377258062e-05,
      "loss": 2.3698,
      "step": 37682
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0633536577224731,
      "learning_rate": 1.0398249967478673e-05,
      "loss": 2.4177,
      "step": 37683
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0246317386627197,
      "learning_rate": 1.0397838557024143e-05,
      "loss": 2.3829,
      "step": 37684
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0790555477142334,
      "learning_rate": 1.0397427145895168e-05,
      "loss": 2.2419,
      "step": 37685
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0569803714752197,
      "learning_rate": 1.0397015734092445e-05,
      "loss": 2.3818,
      "step": 37686
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1105523109436035,
      "learning_rate": 1.0396604321616674e-05,
      "loss": 2.3106,
      "step": 37687
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.06101393699646,
      "learning_rate": 1.039619290846855e-05,
      "loss": 2.2096,
      "step": 37688
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0472171306610107,
      "learning_rate": 1.0395781494648772e-05,
      "loss": 2.0968,
      "step": 37689
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0202127695083618,
      "learning_rate": 1.0395370080158036e-05,
      "loss": 2.1969,
      "step": 37690
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.14211106300354,
      "learning_rate": 1.0394958664997042e-05,
      "loss": 2.4838,
      "step": 37691
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0779228210449219,
      "learning_rate": 1.0394547249166487e-05,
      "loss": 2.5444,
      "step": 37692
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.012740969657898,
      "learning_rate": 1.0394135832667064e-05,
      "loss": 2.3799,
      "step": 37693
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0107637643814087,
      "learning_rate": 1.0393724415499474e-05,
      "loss": 2.3752,
      "step": 37694
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.083868145942688,
      "learning_rate": 1.0393312997664415e-05,
      "loss": 2.1908,
      "step": 37695
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.053220510482788,
      "learning_rate": 1.0392901579162582e-05,
      "loss": 2.1521,
      "step": 37696
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.2047456502914429,
      "learning_rate": 1.0392490159994676e-05,
      "loss": 2.4646,
      "step": 37697
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1309010982513428,
      "learning_rate": 1.0392078740161392e-05,
      "loss": 2.2016,
      "step": 37698
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0597217082977295,
      "learning_rate": 1.039166731966343e-05,
      "loss": 2.2472,
      "step": 37699
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1677265167236328,
      "learning_rate": 1.0391255898501485e-05,
      "loss": 2.4541,
      "step": 37700
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1605812311172485,
      "learning_rate": 1.0390844476676252e-05,
      "loss": 2.1772,
      "step": 37701
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.9922007322311401,
      "learning_rate": 1.0390433054188433e-05,
      "loss": 2.2924,
      "step": 37702
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.9708894491195679,
      "learning_rate": 1.0390021631038725e-05,
      "loss": 2.4158,
      "step": 37703
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0250557661056519,
      "learning_rate": 1.0389610207227822e-05,
      "loss": 2.3892,
      "step": 37704
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.9953950047492981,
      "learning_rate": 1.038919878275643e-05,
      "loss": 2.5642,
      "step": 37705
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0111218690872192,
      "learning_rate": 1.0388787357625234e-05,
      "loss": 2.3379,
      "step": 37706
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0011664628982544,
      "learning_rate": 1.038837593183494e-05,
      "loss": 2.4046,
      "step": 37707
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.9886720180511475,
      "learning_rate": 1.0387964505386243e-05,
      "loss": 2.3428,
      "step": 37708
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0556747913360596,
      "learning_rate": 1.0387553078279839e-05,
      "loss": 2.5046,
      "step": 37709
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1024491786956787,
      "learning_rate": 1.0387141650516433e-05,
      "loss": 2.2533,
      "step": 37710
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1206953525543213,
      "learning_rate": 1.038673022209671e-05,
      "loss": 2.264,
      "step": 37711
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.9689244627952576,
      "learning_rate": 1.0386318793021376e-05,
      "loss": 2.4721,
      "step": 37712
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1524447202682495,
      "learning_rate": 1.0385907363291133e-05,
      "loss": 2.1804,
      "step": 37713
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0488835573196411,
      "learning_rate": 1.0385495932906666e-05,
      "loss": 2.3733,
      "step": 37714
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0719736814498901,
      "learning_rate": 1.038508450186868e-05,
      "loss": 2.2813,
      "step": 37715
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1218376159667969,
      "learning_rate": 1.0384673070177869e-05,
      "loss": 2.4134,
      "step": 37716
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1778937578201294,
      "learning_rate": 1.0384261637834935e-05,
      "loss": 2.4287,
      "step": 37717
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0001509189605713,
      "learning_rate": 1.0383850204840575e-05,
      "loss": 2.4615,
      "step": 37718
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.044303297996521,
      "learning_rate": 1.0383438771195482e-05,
      "loss": 2.419,
      "step": 37719
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1574978828430176,
      "learning_rate": 1.0383027336900356e-05,
      "loss": 2.3988,
      "step": 37720
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.033950924873352,
      "learning_rate": 1.0382615901955894e-05,
      "loss": 2.3345,
      "step": 37721
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.037071704864502,
      "learning_rate": 1.0382204466362795e-05,
      "loss": 2.2987,
      "step": 37722
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1178919076919556,
      "learning_rate": 1.0381793030121756e-05,
      "loss": 2.4573,
      "step": 37723
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.9966049790382385,
      "learning_rate": 1.0381381593233475e-05,
      "loss": 2.1885,
      "step": 37724
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.9342920184135437,
      "learning_rate": 1.038097015569865e-05,
      "loss": 2.3201,
      "step": 37725
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0234981775283813,
      "learning_rate": 1.0380558717517973e-05,
      "loss": 2.136,
      "step": 37726
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.250787615776062,
      "learning_rate": 1.0380147278692147e-05,
      "loss": 2.2763,
      "step": 37727
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0959291458129883,
      "learning_rate": 1.0379735839221868e-05,
      "loss": 2.3135,
      "step": 37728
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.2105014324188232,
      "learning_rate": 1.0379324399107835e-05,
      "loss": 2.2193,
      "step": 37729
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.9245404005050659,
      "learning_rate": 1.0378912958350742e-05,
      "loss": 2.2966,
      "step": 37730
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1862365007400513,
      "learning_rate": 1.0378501516951289e-05,
      "loss": 2.1842,
      "step": 37731
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0596866607666016,
      "learning_rate": 1.0378090074910177e-05,
      "loss": 2.6001,
      "step": 37732
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0551081895828247,
      "learning_rate": 1.0377678632228096e-05,
      "loss": 2.3921,
      "step": 37733
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0097215175628662,
      "learning_rate": 1.0377267188905746e-05,
      "loss": 2.4308,
      "step": 37734
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.9661890268325806,
      "learning_rate": 1.0376855744943828e-05,
      "loss": 2.1468,
      "step": 37735
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1451201438903809,
      "learning_rate": 1.0376444300343036e-05,
      "loss": 2.4379,
      "step": 37736
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1996335983276367,
      "learning_rate": 1.037603285510407e-05,
      "loss": 2.4371,
      "step": 37737
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.9887668490409851,
      "learning_rate": 1.0375621409227628e-05,
      "loss": 2.2377,
      "step": 37738
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1732640266418457,
      "learning_rate": 1.0375209962714402e-05,
      "loss": 2.3073,
      "step": 37739
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0735493898391724,
      "learning_rate": 1.0374798515565095e-05,
      "loss": 2.4908,
      "step": 37740
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1141512393951416,
      "learning_rate": 1.0374387067780403e-05,
      "loss": 2.5137,
      "step": 37741
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0525439977645874,
      "learning_rate": 1.037397561936102e-05,
      "loss": 2.389,
      "step": 37742
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.2984315156936646,
      "learning_rate": 1.0373564170307651e-05,
      "loss": 2.3869,
      "step": 37743
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0047684907913208,
      "learning_rate": 1.0373152720620987e-05,
      "loss": 2.3271,
      "step": 37744
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1845279932022095,
      "learning_rate": 1.0372741270301729e-05,
      "loss": 2.3063,
      "step": 37745
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0550867319107056,
      "learning_rate": 1.0372329819350572e-05,
      "loss": 2.0858,
      "step": 37746
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0455310344696045,
      "learning_rate": 1.0371918367768215e-05,
      "loss": 2.359,
      "step": 37747
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0252528190612793,
      "learning_rate": 1.0371506915555357e-05,
      "loss": 2.3106,
      "step": 37748
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1000218391418457,
      "learning_rate": 1.037109546271269e-05,
      "loss": 2.3239,
      "step": 37749
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.056697964668274,
      "learning_rate": 1.037068400924092e-05,
      "loss": 2.5898,
      "step": 37750
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0190829038619995,
      "learning_rate": 1.037027255514074e-05,
      "loss": 2.3243,
      "step": 37751
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.2039549350738525,
      "learning_rate": 1.0369861100412844e-05,
      "loss": 2.4003,
      "step": 37752
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1360129117965698,
      "learning_rate": 1.0369449645057934e-05,
      "loss": 2.2598,
      "step": 37753
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.2595040798187256,
      "learning_rate": 1.0369038189076706e-05,
      "loss": 2.2009,
      "step": 37754
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.9926018118858337,
      "learning_rate": 1.0368626732469858e-05,
      "loss": 2.34,
      "step": 37755
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.2359364032745361,
      "learning_rate": 1.0368215275238089e-05,
      "loss": 2.3063,
      "step": 37756
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1326264142990112,
      "learning_rate": 1.0367803817382094e-05,
      "loss": 2.3078,
      "step": 37757
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.3932111263275146,
      "learning_rate": 1.0367392358902573e-05,
      "loss": 2.306,
      "step": 37758
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0918564796447754,
      "learning_rate": 1.0366980899800221e-05,
      "loss": 2.2857,
      "step": 37759
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1389585733413696,
      "learning_rate": 1.0366569440075738e-05,
      "loss": 2.4079,
      "step": 37760
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.176513910293579,
      "learning_rate": 1.0366157979729818e-05,
      "loss": 2.3928,
      "step": 37761
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0785123109817505,
      "learning_rate": 1.0365746518763162e-05,
      "loss": 2.4406,
      "step": 37762
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0105605125427246,
      "learning_rate": 1.0365335057176467e-05,
      "loss": 2.3073,
      "step": 37763
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.2453378438949585,
      "learning_rate": 1.0364923594970429e-05,
      "loss": 2.5829,
      "step": 37764
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0942624807357788,
      "learning_rate": 1.0364512132145747e-05,
      "loss": 2.5082,
      "step": 37765
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0615003108978271,
      "learning_rate": 1.0364100668703116e-05,
      "loss": 2.3611,
      "step": 37766
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.266389012336731,
      "learning_rate": 1.0363689204643238e-05,
      "loss": 2.265,
      "step": 37767
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1184618473052979,
      "learning_rate": 1.0363277739966805e-05,
      "loss": 2.3974,
      "step": 37768
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.9692980051040649,
      "learning_rate": 1.0362866274674523e-05,
      "loss": 2.2712,
      "step": 37769
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1491000652313232,
      "learning_rate": 1.0362454808767079e-05,
      "loss": 2.286,
      "step": 37770
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0506662130355835,
      "learning_rate": 1.0362043342245178e-05,
      "loss": 2.3409,
      "step": 37771
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0653895139694214,
      "learning_rate": 1.0361631875109517e-05,
      "loss": 2.5116,
      "step": 37772
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0418736934661865,
      "learning_rate": 1.0361220407360789e-05,
      "loss": 2.5615,
      "step": 37773
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.062353253364563,
      "learning_rate": 1.0360808938999693e-05,
      "loss": 2.2799,
      "step": 37774
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.226704716682434,
      "learning_rate": 1.0360397470026931e-05,
      "loss": 2.4099,
      "step": 37775
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1102533340454102,
      "learning_rate": 1.0359986000443194e-05,
      "loss": 2.2885,
      "step": 37776
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0639575719833374,
      "learning_rate": 1.0359574530249187e-05,
      "loss": 2.1679,
      "step": 37777
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1822216510772705,
      "learning_rate": 1.0359163059445603e-05,
      "loss": 2.4401,
      "step": 37778
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.993781328201294,
      "learning_rate": 1.0358751588033138e-05,
      "loss": 2.4332,
      "step": 37779
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.06436288356781,
      "learning_rate": 1.035834011601249e-05,
      "loss": 2.2696,
      "step": 37780
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.087266445159912,
      "learning_rate": 1.0357928643384361e-05,
      "loss": 2.6121,
      "step": 37781
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0227460861206055,
      "learning_rate": 1.0357517170149444e-05,
      "loss": 2.4942,
      "step": 37782
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1704460382461548,
      "learning_rate": 1.0357105696308438e-05,
      "loss": 2.4148,
      "step": 37783
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0115654468536377,
      "learning_rate": 1.0356694221862044e-05,
      "loss": 2.3139,
      "step": 37784
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.148036003112793,
      "learning_rate": 1.0356282746810955e-05,
      "loss": 2.4192,
      "step": 37785
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.158503770828247,
      "learning_rate": 1.035587127115587e-05,
      "loss": 2.2105,
      "step": 37786
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.202217936515808,
      "learning_rate": 1.0355459794897484e-05,
      "loss": 2.38,
      "step": 37787
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0884265899658203,
      "learning_rate": 1.03550483180365e-05,
      "loss": 2.3386,
      "step": 37788
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.070112943649292,
      "learning_rate": 1.035463684057361e-05,
      "loss": 2.4725,
      "step": 37789
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1533527374267578,
      "learning_rate": 1.0354225362509518e-05,
      "loss": 2.2971,
      "step": 37790
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1333311796188354,
      "learning_rate": 1.0353813883844916e-05,
      "loss": 2.3532,
      "step": 37791
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.028586745262146,
      "learning_rate": 1.0353402404580504e-05,
      "loss": 2.3714,
      "step": 37792
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1495113372802734,
      "learning_rate": 1.0352990924716976e-05,
      "loss": 2.4108,
      "step": 37793
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.084979772567749,
      "learning_rate": 1.0352579444255036e-05,
      "loss": 2.4394,
      "step": 37794
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.170771598815918,
      "learning_rate": 1.0352167963195376e-05,
      "loss": 2.4517,
      "step": 37795
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0889630317687988,
      "learning_rate": 1.0351756481538697e-05,
      "loss": 2.0926,
      "step": 37796
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.9940429925918579,
      "learning_rate": 1.0351344999285695e-05,
      "loss": 2.2786,
      "step": 37797
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0240734815597534,
      "learning_rate": 1.035093351643707e-05,
      "loss": 2.1326,
      "step": 37798
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0953301191329956,
      "learning_rate": 1.0350522032993513e-05,
      "loss": 2.3141,
      "step": 37799
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1468842029571533,
      "learning_rate": 1.035011054895573e-05,
      "loss": 2.4914,
      "step": 37800
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1931172609329224,
      "learning_rate": 1.0349699064324413e-05,
      "loss": 2.3262,
      "step": 37801
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0196077823638916,
      "learning_rate": 1.0349287579100258e-05,
      "loss": 2.1522,
      "step": 37802
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.2597867250442505,
      "learning_rate": 1.0348876093283968e-05,
      "loss": 2.2592,
      "step": 37803
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.077710747718811,
      "learning_rate": 1.0348464606876241e-05,
      "loss": 2.198,
      "step": 37804
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.019295334815979,
      "learning_rate": 1.0348053119877767e-05,
      "loss": 2.2149,
      "step": 37805
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1750710010528564,
      "learning_rate": 1.034764163228925e-05,
      "loss": 2.1898,
      "step": 37806
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0755480527877808,
      "learning_rate": 1.0347230144111389e-05,
      "loss": 2.6079,
      "step": 37807
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.066747784614563,
      "learning_rate": 1.0346818655344875e-05,
      "loss": 2.4191,
      "step": 37808
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.2149710655212402,
      "learning_rate": 1.0346407165990411e-05,
      "loss": 2.3776,
      "step": 37809
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.043186902999878,
      "learning_rate": 1.0345995676048693e-05,
      "loss": 2.38,
      "step": 37810
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.3023966550827026,
      "learning_rate": 1.0345584185520418e-05,
      "loss": 2.3729,
      "step": 37811
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0835521221160889,
      "learning_rate": 1.0345172694406283e-05,
      "loss": 2.4925,
      "step": 37812
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1692763566970825,
      "learning_rate": 1.0344761202706988e-05,
      "loss": 2.2724,
      "step": 37813
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1339302062988281,
      "learning_rate": 1.0344349710423227e-05,
      "loss": 2.5239,
      "step": 37814
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0808696746826172,
      "learning_rate": 1.03439382175557e-05,
      "loss": 2.3015,
      "step": 37815
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.9800588488578796,
      "learning_rate": 1.0343526724105107e-05,
      "loss": 2.1877,
      "step": 37816
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0710816383361816,
      "learning_rate": 1.034311523007214e-05,
      "loss": 2.3718,
      "step": 37817
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1664828062057495,
      "learning_rate": 1.0342703735457502e-05,
      "loss": 2.2362,
      "step": 37818
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0321836471557617,
      "learning_rate": 1.0342292240261887e-05,
      "loss": 2.3426,
      "step": 37819
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.9827679395675659,
      "learning_rate": 1.0341880744485992e-05,
      "loss": 2.4162,
      "step": 37820
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.045802354812622,
      "learning_rate": 1.0341469248130519e-05,
      "loss": 2.2153,
      "step": 37821
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1940232515335083,
      "learning_rate": 1.0341057751196159e-05,
      "loss": 2.4228,
      "step": 37822
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1170618534088135,
      "learning_rate": 1.0340646253683618e-05,
      "loss": 2.3309,
      "step": 37823
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0654850006103516,
      "learning_rate": 1.0340234755593588e-05,
      "loss": 2.2086,
      "step": 37824
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.2451038360595703,
      "learning_rate": 1.0339823256926767e-05,
      "loss": 2.5442,
      "step": 37825
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0737979412078857,
      "learning_rate": 1.0339411757683853e-05,
      "loss": 2.264,
      "step": 37826
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0506170988082886,
      "learning_rate": 1.033900025786554e-05,
      "loss": 2.5186,
      "step": 37827
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0577318668365479,
      "learning_rate": 1.0338588757472539e-05,
      "loss": 2.4116,
      "step": 37828
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0070598125457764,
      "learning_rate": 1.0338177256505531e-05,
      "loss": 2.2983,
      "step": 37829
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0531411170959473,
      "learning_rate": 1.0337765754965222e-05,
      "loss": 2.205,
      "step": 37830
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.137291669845581,
      "learning_rate": 1.0337354252852309e-05,
      "loss": 2.3151,
      "step": 37831
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.2015610933303833,
      "learning_rate": 1.0336942750167488e-05,
      "loss": 2.4201,
      "step": 37832
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.2230654954910278,
      "learning_rate": 1.033653124691146e-05,
      "loss": 2.2987,
      "step": 37833
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0863161087036133,
      "learning_rate": 1.0336119743084918e-05,
      "loss": 2.2033,
      "step": 37834
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.131502389907837,
      "learning_rate": 1.033570823868856e-05,
      "loss": 2.2426,
      "step": 37835
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.9904347062110901,
      "learning_rate": 1.0335296733723088e-05,
      "loss": 2.3091,
      "step": 37836
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1519219875335693,
      "learning_rate": 1.0334885228189198e-05,
      "loss": 2.3957,
      "step": 37837
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.069594383239746,
      "learning_rate": 1.0334473722087583e-05,
      "loss": 2.3682,
      "step": 37838
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0692428350448608,
      "learning_rate": 1.0334062215418948e-05,
      "loss": 2.3735,
      "step": 37839
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1295589208602905,
      "learning_rate": 1.0333650708183985e-05,
      "loss": 2.449,
      "step": 37840
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.3948880434036255,
      "learning_rate": 1.0333239200383392e-05,
      "loss": 2.2293,
      "step": 37841
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0632973909378052,
      "learning_rate": 1.0332827692017869e-05,
      "loss": 2.1416,
      "step": 37842
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.132185697555542,
      "learning_rate": 1.0332416183088111e-05,
      "loss": 2.3821,
      "step": 37843
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0325144529342651,
      "learning_rate": 1.0332004673594823e-05,
      "loss": 2.2231,
      "step": 37844
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.9905569553375244,
      "learning_rate": 1.0331593163538691e-05,
      "loss": 2.3021,
      "step": 37845
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.001945972442627,
      "learning_rate": 1.0331181652920421e-05,
      "loss": 2.2858,
      "step": 37846
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1962238550186157,
      "learning_rate": 1.0330770141740709e-05,
      "loss": 2.4382,
      "step": 37847
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.091689109802246,
      "learning_rate": 1.033035863000025e-05,
      "loss": 2.2525,
      "step": 37848
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0311814546585083,
      "learning_rate": 1.0329947117699744e-05,
      "loss": 2.259,
      "step": 37849
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1648045778274536,
      "learning_rate": 1.0329535604839889e-05,
      "loss": 2.4007,
      "step": 37850
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0678404569625854,
      "learning_rate": 1.032912409142138e-05,
      "loss": 2.355,
      "step": 37851
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0648068189620972,
      "learning_rate": 1.0328712577444918e-05,
      "loss": 2.4207,
      "step": 37852
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.288055658340454,
      "learning_rate": 1.0328301062911197e-05,
      "loss": 2.3083,
      "step": 37853
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1095672845840454,
      "learning_rate": 1.0327889547820917e-05,
      "loss": 2.5585,
      "step": 37854
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1248971223831177,
      "learning_rate": 1.0327478032174776e-05,
      "loss": 2.3247,
      "step": 37855
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0083619356155396,
      "learning_rate": 1.0327066515973472e-05,
      "loss": 2.4105,
      "step": 37856
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1211613416671753,
      "learning_rate": 1.0326654999217698e-05,
      "loss": 2.4504,
      "step": 37857
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0984101295471191,
      "learning_rate": 1.0326243481908157e-05,
      "loss": 2.0757,
      "step": 37858
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0893391370773315,
      "learning_rate": 1.0325831964045548e-05,
      "loss": 2.0914,
      "step": 37859
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.971713662147522,
      "learning_rate": 1.032542044563056e-05,
      "loss": 2.357,
      "step": 37860
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.01618230342865,
      "learning_rate": 1.0325008926663897e-05,
      "loss": 2.2152,
      "step": 37861
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1367545127868652,
      "learning_rate": 1.0324597407146257e-05,
      "loss": 2.2299,
      "step": 37862
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0081768035888672,
      "learning_rate": 1.0324185887078338e-05,
      "loss": 2.46,
      "step": 37863
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.063693881034851,
      "learning_rate": 1.0323774366460832e-05,
      "loss": 2.2668,
      "step": 37864
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0730804204940796,
      "learning_rate": 1.0323362845294442e-05,
      "loss": 2.1924,
      "step": 37865
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0113993883132935,
      "learning_rate": 1.0322951323579864e-05,
      "loss": 2.3309,
      "step": 37866
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0230413675308228,
      "learning_rate": 1.0322539801317796e-05,
      "loss": 2.1218,
      "step": 37867
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1365432739257812,
      "learning_rate": 1.0322128278508936e-05,
      "loss": 2.3477,
      "step": 37868
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1121141910552979,
      "learning_rate": 1.032171675515398e-05,
      "loss": 2.264,
      "step": 37869
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.107129454612732,
      "learning_rate": 1.032130523125363e-05,
      "loss": 2.2025,
      "step": 37870
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0996837615966797,
      "learning_rate": 1.0320893706808576e-05,
      "loss": 2.3039,
      "step": 37871
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0523797273635864,
      "learning_rate": 1.0320482181819521e-05,
      "loss": 2.3215,
      "step": 37872
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1056342124938965,
      "learning_rate": 1.0320070656287163e-05,
      "loss": 2.2833,
      "step": 37873
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.09505295753479,
      "learning_rate": 1.0319659130212196e-05,
      "loss": 2.261,
      "step": 37874
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0514973402023315,
      "learning_rate": 1.0319247603595322e-05,
      "loss": 2.5031,
      "step": 37875
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0330723524093628,
      "learning_rate": 1.0318836076437236e-05,
      "loss": 2.4289,
      "step": 37876
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.068021535873413,
      "learning_rate": 1.0318424548738636e-05,
      "loss": 2.4215,
      "step": 37877
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0629693269729614,
      "learning_rate": 1.031801302050022e-05,
      "loss": 2.3696,
      "step": 37878
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1099755764007568,
      "learning_rate": 1.0317601491722686e-05,
      "loss": 2.409,
      "step": 37879
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1176291704177856,
      "learning_rate": 1.0317189962406732e-05,
      "loss": 2.5366,
      "step": 37880
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.080438256263733,
      "learning_rate": 1.0316778432553054e-05,
      "loss": 2.2848,
      "step": 37881
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0750302076339722,
      "learning_rate": 1.031636690216235e-05,
      "loss": 2.458,
      "step": 37882
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0049561262130737,
      "learning_rate": 1.031595537123532e-05,
      "loss": 2.2423,
      "step": 37883
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1330573558807373,
      "learning_rate": 1.031554383977266e-05,
      "loss": 2.3642,
      "step": 37884
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.2849843502044678,
      "learning_rate": 1.0315132307775064e-05,
      "loss": 2.3675,
      "step": 37885
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0895755290985107,
      "learning_rate": 1.0314720775243235e-05,
      "loss": 2.362,
      "step": 37886
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0447708368301392,
      "learning_rate": 1.0314309242177872e-05,
      "loss": 2.2479,
      "step": 37887
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.3424594402313232,
      "learning_rate": 1.0313897708579666e-05,
      "loss": 2.475,
      "step": 37888
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1694082021713257,
      "learning_rate": 1.0313486174449316e-05,
      "loss": 2.0891,
      "step": 37889
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.066272258758545,
      "learning_rate": 1.0313074639787528e-05,
      "loss": 2.2054,
      "step": 37890
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.037882685661316,
      "learning_rate": 1.0312663104594989e-05,
      "loss": 2.3188,
      "step": 37891
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0579490661621094,
      "learning_rate": 1.0312251568872402e-05,
      "loss": 2.3717,
      "step": 37892
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.256460428237915,
      "learning_rate": 1.0311840032620463e-05,
      "loss": 2.5012,
      "step": 37893
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1218594312667847,
      "learning_rate": 1.0311428495839871e-05,
      "loss": 2.2626,
      "step": 37894
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0258755683898926,
      "learning_rate": 1.0311016958531323e-05,
      "loss": 2.2706,
      "step": 37895
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0784558057785034,
      "learning_rate": 1.0310605420695518e-05,
      "loss": 2.3962,
      "step": 37896
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0931756496429443,
      "learning_rate": 1.0310193882333153e-05,
      "loss": 2.546,
      "step": 37897
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0182585716247559,
      "learning_rate": 1.0309782343444923e-05,
      "loss": 2.5359,
      "step": 37898
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.2191439867019653,
      "learning_rate": 1.0309370804031527e-05,
      "loss": 2.3702,
      "step": 37899
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1195788383483887,
      "learning_rate": 1.0308959264093665e-05,
      "loss": 2.5215,
      "step": 37900
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.121285080909729,
      "learning_rate": 1.0308547723632032e-05,
      "loss": 2.3891,
      "step": 37901
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.587666630744934,
      "learning_rate": 1.0308136182647328e-05,
      "loss": 2.5361,
      "step": 37902
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.2307369709014893,
      "learning_rate": 1.0307724641140251e-05,
      "loss": 2.3547,
      "step": 37903
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1564363241195679,
      "learning_rate": 1.0307313099111496e-05,
      "loss": 2.3053,
      "step": 37904
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0556126832962036,
      "learning_rate": 1.0306901556561759e-05,
      "loss": 2.3621,
      "step": 37905
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0166146755218506,
      "learning_rate": 1.0306490013491744e-05,
      "loss": 2.4516,
      "step": 37906
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.002922773361206,
      "learning_rate": 1.0306078469902143e-05,
      "loss": 2.2007,
      "step": 37907
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.9923383593559265,
      "learning_rate": 1.0305666925793656e-05,
      "loss": 2.3096,
      "step": 37908
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.9973229169845581,
      "learning_rate": 1.030525538116698e-05,
      "loss": 2.2082,
      "step": 37909
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1087020635604858,
      "learning_rate": 1.0304843836022818e-05,
      "loss": 2.477,
      "step": 37910
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.125370740890503,
      "learning_rate": 1.030443229036186e-05,
      "loss": 2.5307,
      "step": 37911
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.063705325126648,
      "learning_rate": 1.0304020744184805e-05,
      "loss": 2.5643,
      "step": 37912
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1028791666030884,
      "learning_rate": 1.0303609197492352e-05,
      "loss": 2.3079,
      "step": 37913
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.2832624912261963,
      "learning_rate": 1.0303197650285202e-05,
      "loss": 2.3368,
      "step": 37914
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.2364957332611084,
      "learning_rate": 1.0302786102564049e-05,
      "loss": 2.332,
      "step": 37915
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.214904546737671,
      "learning_rate": 1.0302374554329591e-05,
      "loss": 2.4081,
      "step": 37916
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1067488193511963,
      "learning_rate": 1.0301963005582524e-05,
      "loss": 2.2422,
      "step": 37917
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1420410871505737,
      "learning_rate": 1.0301551456323551e-05,
      "loss": 2.5629,
      "step": 37918
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.748262643814087,
      "learning_rate": 1.0301139906553363e-05,
      "loss": 2.2888,
      "step": 37919
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0317046642303467,
      "learning_rate": 1.0300728356272662e-05,
      "loss": 2.4904,
      "step": 37920
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1272375583648682,
      "learning_rate": 1.0300316805482147e-05,
      "loss": 2.2946,
      "step": 37921
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1603771448135376,
      "learning_rate": 1.029990525418251e-05,
      "loss": 2.4301,
      "step": 37922
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.2493466138839722,
      "learning_rate": 1.0299493702374456e-05,
      "loss": 2.2331,
      "step": 37923
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.249835729598999,
      "learning_rate": 1.0299082150058677e-05,
      "loss": 2.2569,
      "step": 37924
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0184154510498047,
      "learning_rate": 1.0298670597235872e-05,
      "loss": 2.3418,
      "step": 37925
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0416104793548584,
      "learning_rate": 1.0298259043906738e-05,
      "loss": 2.2205,
      "step": 37926
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0938265323638916,
      "learning_rate": 1.0297847490071974e-05,
      "loss": 2.2454,
      "step": 37927
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.9810279607772827,
      "learning_rate": 1.0297435935732279e-05,
      "loss": 2.4012,
      "step": 37928
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0962363481521606,
      "learning_rate": 1.0297024380888352e-05,
      "loss": 2.2656,
      "step": 37929
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0226331949234009,
      "learning_rate": 1.0296612825540884e-05,
      "loss": 2.2935,
      "step": 37930
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1144912242889404,
      "learning_rate": 1.0296201269690578e-05,
      "loss": 2.5177,
      "step": 37931
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0504329204559326,
      "learning_rate": 1.029578971333813e-05,
      "loss": 2.2557,
      "step": 37932
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.149916410446167,
      "learning_rate": 1.0295378156484237e-05,
      "loss": 2.1934,
      "step": 37933
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1770668029785156,
      "learning_rate": 1.02949665991296e-05,
      "loss": 2.1704,
      "step": 37934
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0480093955993652,
      "learning_rate": 1.0294555041274914e-05,
      "loss": 2.3317,
      "step": 37935
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.3833420276641846,
      "learning_rate": 1.0294143482920878e-05,
      "loss": 2.2745,
      "step": 37936
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.9746193289756775,
      "learning_rate": 1.0293731924068186e-05,
      "loss": 2.3071,
      "step": 37937
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0101372003555298,
      "learning_rate": 1.029332036471754e-05,
      "loss": 2.3069,
      "step": 37938
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1588845252990723,
      "learning_rate": 1.0292908804869637e-05,
      "loss": 2.3859,
      "step": 37939
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0734907388687134,
      "learning_rate": 1.0292497244525175e-05,
      "loss": 2.3754,
      "step": 37940
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.163636565208435,
      "learning_rate": 1.0292085683684849e-05,
      "loss": 2.5151,
      "step": 37941
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.9841744303703308,
      "learning_rate": 1.0291674122349358e-05,
      "loss": 2.3063,
      "step": 37942
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0479414463043213,
      "learning_rate": 1.0291262560519405e-05,
      "loss": 2.218,
      "step": 37943
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1738027334213257,
      "learning_rate": 1.0290850998195677e-05,
      "loss": 2.3824,
      "step": 37944
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0241612195968628,
      "learning_rate": 1.0290439435378878e-05,
      "loss": 2.0978,
      "step": 37945
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1045485734939575,
      "learning_rate": 1.029002787206971e-05,
      "loss": 2.1919,
      "step": 37946
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1768717765808105,
      "learning_rate": 1.0289616308268862e-05,
      "loss": 2.3664,
      "step": 37947
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0668610334396362,
      "learning_rate": 1.0289204743977038e-05,
      "loss": 2.3296,
      "step": 37948
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0728323459625244,
      "learning_rate": 1.028879317919493e-05,
      "loss": 2.1449,
      "step": 37949
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1869450807571411,
      "learning_rate": 1.0288381613923242e-05,
      "loss": 2.3415,
      "step": 37950
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.2933084964752197,
      "learning_rate": 1.0287970048162668e-05,
      "loss": 2.4502,
      "step": 37951
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0044705867767334,
      "learning_rate": 1.0287558481913906e-05,
      "loss": 2.3314,
      "step": 37952
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1107455492019653,
      "learning_rate": 1.0287146915177655e-05,
      "loss": 2.2834,
      "step": 37953
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.01288902759552,
      "learning_rate": 1.0286735347954611e-05,
      "loss": 2.1721,
      "step": 37954
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0417455434799194,
      "learning_rate": 1.0286323780245473e-05,
      "loss": 2.3693,
      "step": 37955
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0926584005355835,
      "learning_rate": 1.0285912212050941e-05,
      "loss": 2.5306,
      "step": 37956
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0694935321807861,
      "learning_rate": 1.0285500643371707e-05,
      "loss": 2.298,
      "step": 37957
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0162203311920166,
      "learning_rate": 1.0285089074208473e-05,
      "loss": 2.1978,
      "step": 37958
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.2241196632385254,
      "learning_rate": 1.0284677504561935e-05,
      "loss": 2.1272,
      "step": 37959
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1091827154159546,
      "learning_rate": 1.0284265934432792e-05,
      "loss": 2.356,
      "step": 37960
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0299460887908936,
      "learning_rate": 1.028385436382174e-05,
      "loss": 2.3218,
      "step": 37961
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0590990781784058,
      "learning_rate": 1.0283442792729479e-05,
      "loss": 2.5494,
      "step": 37962
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1878429651260376,
      "learning_rate": 1.0283031221156705e-05,
      "loss": 2.341,
      "step": 37963
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0528805255889893,
      "learning_rate": 1.0282619649104115e-05,
      "loss": 2.1863,
      "step": 37964
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.3547896146774292,
      "learning_rate": 1.028220807657241e-05,
      "loss": 2.3521,
      "step": 37965
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0757110118865967,
      "learning_rate": 1.0281796503562284e-05,
      "loss": 2.3561,
      "step": 37966
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.119269609451294,
      "learning_rate": 1.0281384930074439e-05,
      "loss": 2.2579,
      "step": 37967
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1743485927581787,
      "learning_rate": 1.0280973356109568e-05,
      "loss": 2.3457,
      "step": 37968
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0340906381607056,
      "learning_rate": 1.0280561781668373e-05,
      "loss": 2.3063,
      "step": 37969
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.9662755131721497,
      "learning_rate": 1.0280150206751547e-05,
      "loss": 2.4027,
      "step": 37970
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.9933568835258484,
      "learning_rate": 1.027973863135979e-05,
      "loss": 2.3772,
      "step": 37971
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0512489080429077,
      "learning_rate": 1.02793270554938e-05,
      "loss": 2.1896,
      "step": 37972
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1235713958740234,
      "learning_rate": 1.0278915479154277e-05,
      "loss": 2.3707,
      "step": 37973
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.2004671096801758,
      "learning_rate": 1.0278503902341917e-05,
      "loss": 2.3765,
      "step": 37974
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.146482229232788,
      "learning_rate": 1.0278092325057413e-05,
      "loss": 2.3617,
      "step": 37975
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.145087480545044,
      "learning_rate": 1.027768074730147e-05,
      "loss": 2.5542,
      "step": 37976
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0691450834274292,
      "learning_rate": 1.0277269169074784e-05,
      "loss": 2.4423,
      "step": 37977
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0742141008377075,
      "learning_rate": 1.0276857590378051e-05,
      "loss": 2.422,
      "step": 37978
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.12864089012146,
      "learning_rate": 1.0276446011211966e-05,
      "loss": 2.3655,
      "step": 37979
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1258635520935059,
      "learning_rate": 1.0276034431577233e-05,
      "loss": 2.3044,
      "step": 37980
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0190941095352173,
      "learning_rate": 1.0275622851474545e-05,
      "loss": 2.3476,
      "step": 37981
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0128798484802246,
      "learning_rate": 1.0275211270904604e-05,
      "loss": 2.181,
      "step": 37982
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.2459369897842407,
      "learning_rate": 1.0274799689868102e-05,
      "loss": 2.4787,
      "step": 37983
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1097135543823242,
      "learning_rate": 1.0274388108365741e-05,
      "loss": 2.3793,
      "step": 37984
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0604933500289917,
      "learning_rate": 1.0273976526398216e-05,
      "loss": 2.294,
      "step": 37985
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0149929523468018,
      "learning_rate": 1.027356494396623e-05,
      "loss": 2.2712,
      "step": 37986
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1575360298156738,
      "learning_rate": 1.0273153361070474e-05,
      "loss": 2.413,
      "step": 37987
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0677108764648438,
      "learning_rate": 1.027274177771165e-05,
      "loss": 2.5371,
      "step": 37988
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.123469352722168,
      "learning_rate": 1.0272330193890457e-05,
      "loss": 2.4247,
      "step": 37989
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0927550792694092,
      "learning_rate": 1.0271918609607588e-05,
      "loss": 2.3961,
      "step": 37990
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.162983775138855,
      "learning_rate": 1.027150702486374e-05,
      "loss": 2.347,
      "step": 37991
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.9560102820396423,
      "learning_rate": 1.0271095439659617e-05,
      "loss": 2.5427,
      "step": 37992
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0199007987976074,
      "learning_rate": 1.0270683853995915e-05,
      "loss": 2.3159,
      "step": 37993
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0447614192962646,
      "learning_rate": 1.0270272267873327e-05,
      "loss": 2.5931,
      "step": 37994
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.2780848741531372,
      "learning_rate": 1.0269860681292559e-05,
      "loss": 2.6271,
      "step": 37995
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.01914381980896,
      "learning_rate": 1.02694490942543e-05,
      "loss": 2.2122,
      "step": 37996
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1363739967346191,
      "learning_rate": 1.0269037506759252e-05,
      "loss": 2.2385,
      "step": 37997
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0679668188095093,
      "learning_rate": 1.0268625918808112e-05,
      "loss": 2.3727,
      "step": 37998
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.046397089958191,
      "learning_rate": 1.026821433040158e-05,
      "loss": 2.1325,
      "step": 37999
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.9958606362342834,
      "learning_rate": 1.026780274154035e-05,
      "loss": 2.2592,
      "step": 38000
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.291925072669983,
      "learning_rate": 1.0267391152225122e-05,
      "loss": 2.4469,
      "step": 38001
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0226866006851196,
      "learning_rate": 1.0266979562456597e-05,
      "loss": 2.4492,
      "step": 38002
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0236822366714478,
      "learning_rate": 1.0266567972235465e-05,
      "loss": 2.4141,
      "step": 38003
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1176494359970093,
      "learning_rate": 1.0266156381562427e-05,
      "loss": 2.3786,
      "step": 38004
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0932222604751587,
      "learning_rate": 1.0265744790438186e-05,
      "loss": 2.2851,
      "step": 38005
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.073809027671814,
      "learning_rate": 1.0265333198863432e-05,
      "loss": 2.4987,
      "step": 38006
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0780504941940308,
      "learning_rate": 1.0264921606838867e-05,
      "loss": 2.255,
      "step": 38007
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0777864456176758,
      "learning_rate": 1.026451001436519e-05,
      "loss": 2.4693,
      "step": 38008
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0860618352890015,
      "learning_rate": 1.0264098421443095e-05,
      "loss": 2.2489,
      "step": 38009
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1781214475631714,
      "learning_rate": 1.026368682807328e-05,
      "loss": 2.4812,
      "step": 38010
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1944022178649902,
      "learning_rate": 1.0263275234256445e-05,
      "loss": 2.4095,
      "step": 38011
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0686917304992676,
      "learning_rate": 1.0262863639993287e-05,
      "loss": 2.4475,
      "step": 38012
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.2522931098937988,
      "learning_rate": 1.0262452045284503e-05,
      "loss": 2.4617,
      "step": 38013
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0663373470306396,
      "learning_rate": 1.0262040450130793e-05,
      "loss": 2.3878,
      "step": 38014
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1156529188156128,
      "learning_rate": 1.0261628854532855e-05,
      "loss": 2.2881,
      "step": 38015
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0227757692337036,
      "learning_rate": 1.0261217258491383e-05,
      "loss": 2.4222,
      "step": 38016
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0711597204208374,
      "learning_rate": 1.0260805662007074e-05,
      "loss": 2.4463,
      "step": 38017
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0125406980514526,
      "learning_rate": 1.0260394065080632e-05,
      "loss": 2.3569,
      "step": 38018
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0606108903884888,
      "learning_rate": 1.025998246771275e-05,
      "loss": 2.5423,
      "step": 38019
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0701757669448853,
      "learning_rate": 1.0259570869904128e-05,
      "loss": 2.2785,
      "step": 38020
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1147780418395996,
      "learning_rate": 1.0259159271655461e-05,
      "loss": 2.2661,
      "step": 38021
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0194778442382812,
      "learning_rate": 1.0258747672967452e-05,
      "loss": 2.3981,
      "step": 38022
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.967117190361023,
      "learning_rate": 1.0258336073840793e-05,
      "loss": 2.2428,
      "step": 38023
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.2198315858840942,
      "learning_rate": 1.0257924474276185e-05,
      "loss": 2.4811,
      "step": 38024
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.6252355575561523,
      "learning_rate": 1.0257512874274324e-05,
      "loss": 2.4522,
      "step": 38025
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.2243443727493286,
      "learning_rate": 1.025710127383591e-05,
      "loss": 2.2234,
      "step": 38026
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0826314687728882,
      "learning_rate": 1.0256689672961637e-05,
      "loss": 2.3644,
      "step": 38027
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.9768279194831848,
      "learning_rate": 1.025627807165221e-05,
      "loss": 2.2536,
      "step": 38028
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1279391050338745,
      "learning_rate": 1.025586646990832e-05,
      "loss": 2.3029,
      "step": 38029
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.144521951675415,
      "learning_rate": 1.0255454867730666e-05,
      "loss": 2.3416,
      "step": 38030
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1438544988632202,
      "learning_rate": 1.0255043265119946e-05,
      "loss": 2.4339,
      "step": 38031
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.215538740158081,
      "learning_rate": 1.0254631662076859e-05,
      "loss": 2.2123,
      "step": 38032
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0652135610580444,
      "learning_rate": 1.0254220058602104e-05,
      "loss": 2.5593,
      "step": 38033
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.2244603633880615,
      "learning_rate": 1.0253808454696375e-05,
      "loss": 2.0185,
      "step": 38034
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.9394292831420898,
      "learning_rate": 1.025339685036037e-05,
      "loss": 2.3606,
      "step": 38035
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.2057589292526245,
      "learning_rate": 1.0252985245594793e-05,
      "loss": 2.5441,
      "step": 38036
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.2152591943740845,
      "learning_rate": 1.0252573640400336e-05,
      "loss": 2.2943,
      "step": 38037
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.052680253982544,
      "learning_rate": 1.0252162034777695e-05,
      "loss": 2.3291,
      "step": 38038
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1308226585388184,
      "learning_rate": 1.0251750428727573e-05,
      "loss": 2.3013,
      "step": 38039
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.069693684577942,
      "learning_rate": 1.0251338822250664e-05,
      "loss": 2.5238,
      "step": 38040
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1134754419326782,
      "learning_rate": 1.0250927215347672e-05,
      "loss": 2.1871,
      "step": 38041
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.9690481424331665,
      "learning_rate": 1.0250515608019286e-05,
      "loss": 2.3204,
      "step": 38042
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.2103110551834106,
      "learning_rate": 1.025010400026621e-05,
      "loss": 2.3194,
      "step": 38043
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.087674617767334,
      "learning_rate": 1.0249692392089138e-05,
      "loss": 2.4797,
      "step": 38044
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0907431840896606,
      "learning_rate": 1.0249280783488772e-05,
      "loss": 2.4847,
      "step": 38045
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.060168743133545,
      "learning_rate": 1.0248869174465803e-05,
      "loss": 2.227,
      "step": 38046
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0600465536117554,
      "learning_rate": 1.0248457565020936e-05,
      "loss": 2.3475,
      "step": 38047
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1927579641342163,
      "learning_rate": 1.0248045955154869e-05,
      "loss": 2.3384,
      "step": 38048
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0513173341751099,
      "learning_rate": 1.0247634344868293e-05,
      "loss": 2.3903,
      "step": 38049
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.9706948399543762,
      "learning_rate": 1.024722273416191e-05,
      "loss": 2.3853,
      "step": 38050
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0035988092422485,
      "learning_rate": 1.0246811123036418e-05,
      "loss": 2.4775,
      "step": 38051
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1101220846176147,
      "learning_rate": 1.0246399511492514e-05,
      "loss": 2.0553,
      "step": 38052
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0514789819717407,
      "learning_rate": 1.0245987899530893e-05,
      "loss": 2.4036,
      "step": 38053
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0607432126998901,
      "learning_rate": 1.0245576287152258e-05,
      "loss": 2.4627,
      "step": 38054
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.040333867073059,
      "learning_rate": 1.0245164674357307e-05,
      "loss": 2.206,
      "step": 38055
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.362878441810608,
      "learning_rate": 1.0244753061146733e-05,
      "loss": 2.0904,
      "step": 38056
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0358415842056274,
      "learning_rate": 1.0244341447521235e-05,
      "loss": 2.1087,
      "step": 38057
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.029579520225525,
      "learning_rate": 1.0243929833481514e-05,
      "loss": 2.3965,
      "step": 38058
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0361685752868652,
      "learning_rate": 1.0243518219028263e-05,
      "loss": 2.2162,
      "step": 38059
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0725430250167847,
      "learning_rate": 1.0243106604162186e-05,
      "loss": 2.3507,
      "step": 38060
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.9851121306419373,
      "learning_rate": 1.0242694988883977e-05,
      "loss": 2.212,
      "step": 38061
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.065877914428711,
      "learning_rate": 1.0242283373194331e-05,
      "loss": 2.3898,
      "step": 38062
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1799135208129883,
      "learning_rate": 1.024187175709395e-05,
      "loss": 2.3204,
      "step": 38063
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0473108291625977,
      "learning_rate": 1.0241460140583533e-05,
      "loss": 2.516,
      "step": 38064
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.071143388748169,
      "learning_rate": 1.0241048523663773e-05,
      "loss": 2.6468,
      "step": 38065
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0618853569030762,
      "learning_rate": 1.024063690633537e-05,
      "loss": 2.2445,
      "step": 38066
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0316376686096191,
      "learning_rate": 1.0240225288599023e-05,
      "loss": 2.462,
      "step": 38067
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0799787044525146,
      "learning_rate": 1.0239813670455431e-05,
      "loss": 2.5544,
      "step": 38068
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1120684146881104,
      "learning_rate": 1.0239402051905287e-05,
      "loss": 2.4254,
      "step": 38069
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.9610036015510559,
      "learning_rate": 1.0238990432949292e-05,
      "loss": 2.3769,
      "step": 38070
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.149154543876648,
      "learning_rate": 1.0238578813588143e-05,
      "loss": 2.3166,
      "step": 38071
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0122699737548828,
      "learning_rate": 1.0238167193822539e-05,
      "loss": 2.359,
      "step": 38072
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0383477210998535,
      "learning_rate": 1.0237755573653174e-05,
      "loss": 2.3881,
      "step": 38073
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0639574527740479,
      "learning_rate": 1.0237343953080754e-05,
      "loss": 2.4117,
      "step": 38074
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.616560459136963,
      "learning_rate": 1.0236932332105968e-05,
      "loss": 2.1994,
      "step": 38075
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.119989275932312,
      "learning_rate": 1.0236520710729519e-05,
      "loss": 2.1165,
      "step": 38076
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1752206087112427,
      "learning_rate": 1.0236109088952102e-05,
      "loss": 2.3458,
      "step": 38077
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1345787048339844,
      "learning_rate": 1.0235697466774415e-05,
      "loss": 2.3777,
      "step": 38078
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0245189666748047,
      "learning_rate": 1.0235285844197158e-05,
      "loss": 2.3007,
      "step": 38079
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0433930158615112,
      "learning_rate": 1.0234874221221028e-05,
      "loss": 2.1385,
      "step": 38080
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.9998322129249573,
      "learning_rate": 1.0234462597846721e-05,
      "loss": 2.3436,
      "step": 38081
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.4163978099823,
      "learning_rate": 1.0234050974074939e-05,
      "loss": 2.327,
      "step": 38082
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.9868815541267395,
      "learning_rate": 1.0233639349906374e-05,
      "loss": 2.5396,
      "step": 38083
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0295391082763672,
      "learning_rate": 1.0233227725341727e-05,
      "loss": 2.3835,
      "step": 38084
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0982978343963623,
      "learning_rate": 1.0232816100381697e-05,
      "loss": 2.3999,
      "step": 38085
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0112959146499634,
      "learning_rate": 1.023240447502698e-05,
      "loss": 2.3058,
      "step": 38086
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0857031345367432,
      "learning_rate": 1.0231992849278273e-05,
      "loss": 2.2216,
      "step": 38087
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0963777303695679,
      "learning_rate": 1.023158122313628e-05,
      "loss": 2.3988,
      "step": 38088
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0017842054367065,
      "learning_rate": 1.023116959660169e-05,
      "loss": 2.319,
      "step": 38089
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0285520553588867,
      "learning_rate": 1.0230757969675205e-05,
      "loss": 2.3156,
      "step": 38090
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.113236665725708,
      "learning_rate": 1.0230346342357522e-05,
      "loss": 2.3449,
      "step": 38091
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0963681936264038,
      "learning_rate": 1.0229934714649341e-05,
      "loss": 2.3148,
      "step": 38092
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.264288306236267,
      "learning_rate": 1.0229523086551358e-05,
      "loss": 2.3084,
      "step": 38093
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0336854457855225,
      "learning_rate": 1.022911145806427e-05,
      "loss": 2.2602,
      "step": 38094
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0584723949432373,
      "learning_rate": 1.0228699829188777e-05,
      "loss": 2.4147,
      "step": 38095
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1590803861618042,
      "learning_rate": 1.0228288199925575e-05,
      "loss": 2.3915,
      "step": 38096
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.3754169940948486,
      "learning_rate": 1.0227876570275364e-05,
      "loss": 2.2532,
      "step": 38097
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.037545084953308,
      "learning_rate": 1.0227464940238836e-05,
      "loss": 2.2894,
      "step": 38098
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.9598405957221985,
      "learning_rate": 1.0227053309816697e-05,
      "loss": 2.2216,
      "step": 38099
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.105075716972351,
      "learning_rate": 1.0226641679009639e-05,
      "loss": 2.2169,
      "step": 38100
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0415306091308594,
      "learning_rate": 1.0226230047818363e-05,
      "loss": 2.5401,
      "step": 38101
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0573649406433105,
      "learning_rate": 1.0225818416243566e-05,
      "loss": 2.3518,
      "step": 38102
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.2089368104934692,
      "learning_rate": 1.0225406784285944e-05,
      "loss": 2.1757,
      "step": 38103
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.9891823530197144,
      "learning_rate": 1.0224995151946197e-05,
      "loss": 2.415,
      "step": 38104
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1694145202636719,
      "learning_rate": 1.0224583519225022e-05,
      "loss": 2.1259,
      "step": 38105
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0916203260421753,
      "learning_rate": 1.0224171886123118e-05,
      "loss": 2.1728,
      "step": 38106
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1184120178222656,
      "learning_rate": 1.0223760252641182e-05,
      "loss": 2.5327,
      "step": 38107
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0071176290512085,
      "learning_rate": 1.022334861877991e-05,
      "loss": 2.5142,
      "step": 38108
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.9814181923866272,
      "learning_rate": 1.0222936984540002e-05,
      "loss": 2.3896,
      "step": 38109
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.9259427189826965,
      "learning_rate": 1.0222525349922154e-05,
      "loss": 2.4322,
      "step": 38110
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.2583080530166626,
      "learning_rate": 1.0222113714927067e-05,
      "loss": 2.4788,
      "step": 38111
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.3799246549606323,
      "learning_rate": 1.0221702079555436e-05,
      "loss": 2.4648,
      "step": 38112
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0941097736358643,
      "learning_rate": 1.0221290443807959e-05,
      "loss": 2.2144,
      "step": 38113
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0306293964385986,
      "learning_rate": 1.0220878807685337e-05,
      "loss": 2.3241,
      "step": 38114
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.103589415550232,
      "learning_rate": 1.0220467171188263e-05,
      "loss": 2.4384,
      "step": 38115
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0962421894073486,
      "learning_rate": 1.0220055534317439e-05,
      "loss": 2.4149,
      "step": 38116
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0704618692398071,
      "learning_rate": 1.0219643897073559e-05,
      "loss": 2.3333,
      "step": 38117
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0314949750900269,
      "learning_rate": 1.0219232259457326e-05,
      "loss": 2.0879,
      "step": 38118
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.9844167828559875,
      "learning_rate": 1.0218820621469431e-05,
      "loss": 2.5509,
      "step": 38119
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1210517883300781,
      "learning_rate": 1.021840898311058e-05,
      "loss": 2.4319,
      "step": 38120
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0626413822174072,
      "learning_rate": 1.0217997344381463e-05,
      "loss": 2.4296,
      "step": 38121
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1369096040725708,
      "learning_rate": 1.0217585705282782e-05,
      "loss": 2.3097,
      "step": 38122
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.2333751916885376,
      "learning_rate": 1.0217174065815236e-05,
      "loss": 2.3656,
      "step": 38123
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1361507177352905,
      "learning_rate": 1.0216762425979521e-05,
      "loss": 2.4278,
      "step": 38124
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.200102686882019,
      "learning_rate": 1.0216350785776332e-05,
      "loss": 2.4159,
      "step": 38125
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0570805072784424,
      "learning_rate": 1.021593914520637e-05,
      "loss": 2.2171,
      "step": 38126
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1922783851623535,
      "learning_rate": 1.0215527504270338e-05,
      "loss": 2.3142,
      "step": 38127
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0238873958587646,
      "learning_rate": 1.0215115862968923e-05,
      "loss": 2.2383,
      "step": 38128
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1180803775787354,
      "learning_rate": 1.0214704221302828e-05,
      "loss": 2.2782,
      "step": 38129
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0898799896240234,
      "learning_rate": 1.021429257927275e-05,
      "loss": 2.3551,
      "step": 38130
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1619869470596313,
      "learning_rate": 1.021388093687939e-05,
      "loss": 2.2806,
      "step": 38131
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.9430814981460571,
      "learning_rate": 1.0213469294123444e-05,
      "loss": 2.3623,
      "step": 38132
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.9599156975746155,
      "learning_rate": 1.021305765100561e-05,
      "loss": 2.0823,
      "step": 38133
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0567845106124878,
      "learning_rate": 1.0212646007526586e-05,
      "loss": 2.0417,
      "step": 38134
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1432924270629883,
      "learning_rate": 1.0212234363687069e-05,
      "loss": 2.1129,
      "step": 38135
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1988674402236938,
      "learning_rate": 1.0211822719487754e-05,
      "loss": 2.3715,
      "step": 38136
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0792993307113647,
      "learning_rate": 1.0211411074929343e-05,
      "loss": 2.3603,
      "step": 38137
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1452761888504028,
      "learning_rate": 1.0210999430012534e-05,
      "loss": 2.1818,
      "step": 38138
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0427742004394531,
      "learning_rate": 1.0210587784738024e-05,
      "loss": 2.4393,
      "step": 38139
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.9739007949829102,
      "learning_rate": 1.0210176139106512e-05,
      "loss": 2.3081,
      "step": 38140
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.9773148894309998,
      "learning_rate": 1.020976449311869e-05,
      "loss": 2.3474,
      "step": 38141
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0804895162582397,
      "learning_rate": 1.0209352846775262e-05,
      "loss": 2.4142,
      "step": 38142
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0390175580978394,
      "learning_rate": 1.0208941200076924e-05,
      "loss": 2.6106,
      "step": 38143
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.132670283317566,
      "learning_rate": 1.0208529553024376e-05,
      "loss": 2.3265,
      "step": 38144
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1507585048675537,
      "learning_rate": 1.0208117905618311e-05,
      "loss": 2.0917,
      "step": 38145
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.2234522104263306,
      "learning_rate": 1.0207706257859432e-05,
      "loss": 2.4237,
      "step": 38146
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0921765565872192,
      "learning_rate": 1.0207294609748436e-05,
      "loss": 2.3635,
      "step": 38147
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0397347211837769,
      "learning_rate": 1.0206882961286016e-05,
      "loss": 2.2233,
      "step": 38148
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0713011026382446,
      "learning_rate": 1.0206471312472872e-05,
      "loss": 2.4755,
      "step": 38149
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.6169919967651367,
      "learning_rate": 1.0206059663309704e-05,
      "loss": 2.3057,
      "step": 38150
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1259198188781738,
      "learning_rate": 1.0205648013797214e-05,
      "loss": 2.2755,
      "step": 38151
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1230170726776123,
      "learning_rate": 1.0205236363936088e-05,
      "loss": 2.2663,
      "step": 38152
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1597846746444702,
      "learning_rate": 1.0204824713727036e-05,
      "loss": 2.4332,
      "step": 38153
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1356548070907593,
      "learning_rate": 1.0204413063170749e-05,
      "loss": 2.3617,
      "step": 38154
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1329600811004639,
      "learning_rate": 1.0204001412267924e-05,
      "loss": 2.3289,
      "step": 38155
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.932198703289032,
      "learning_rate": 1.0203589761019262e-05,
      "loss": 2.1586,
      "step": 38156
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1266992092132568,
      "learning_rate": 1.020317810942546e-05,
      "loss": 2.3992,
      "step": 38157
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0233218669891357,
      "learning_rate": 1.0202766457487216e-05,
      "loss": 2.2219,
      "step": 38158
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0308542251586914,
      "learning_rate": 1.0202354805205229e-05,
      "loss": 2.282,
      "step": 38159
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.05833899974823,
      "learning_rate": 1.0201943152580194e-05,
      "loss": 2.3614,
      "step": 38160
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1627719402313232,
      "learning_rate": 1.0201531499612811e-05,
      "loss": 2.3791,
      "step": 38161
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.9718337655067444,
      "learning_rate": 1.0201119846303777e-05,
      "loss": 2.4457,
      "step": 38162
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0065550804138184,
      "learning_rate": 1.0200708192653792e-05,
      "loss": 2.4921,
      "step": 38163
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1023255586624146,
      "learning_rate": 1.020029653866355e-05,
      "loss": 2.147,
      "step": 38164
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0981738567352295,
      "learning_rate": 1.019988488433375e-05,
      "loss": 2.2299,
      "step": 38165
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1508443355560303,
      "learning_rate": 1.0199473229665093e-05,
      "loss": 2.3298,
      "step": 38166
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0121989250183105,
      "learning_rate": 1.0199061574658278e-05,
      "loss": 2.3403,
      "step": 38167
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.2188204526901245,
      "learning_rate": 1.0198649919313994e-05,
      "loss": 2.3971,
      "step": 38168
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0850578546524048,
      "learning_rate": 1.0198238263632948e-05,
      "loss": 2.3396,
      "step": 38169
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1093478202819824,
      "learning_rate": 1.019782660761583e-05,
      "loss": 2.442,
      "step": 38170
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0336323976516724,
      "learning_rate": 1.0197414951263346e-05,
      "loss": 2.1633,
      "step": 38171
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0888242721557617,
      "learning_rate": 1.019700329457619e-05,
      "loss": 2.4269,
      "step": 38172
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0542304515838623,
      "learning_rate": 1.019659163755506e-05,
      "loss": 2.3286,
      "step": 38173
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.053636908531189,
      "learning_rate": 1.0196179980200652e-05,
      "loss": 2.4069,
      "step": 38174
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1291913986206055,
      "learning_rate": 1.0195768322513665e-05,
      "loss": 2.4812,
      "step": 38175
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1961255073547363,
      "learning_rate": 1.01953566644948e-05,
      "loss": 2.3627,
      "step": 38176
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.016430377960205,
      "learning_rate": 1.0194945006144749e-05,
      "loss": 2.0724,
      "step": 38177
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1056885719299316,
      "learning_rate": 1.0194533347464215e-05,
      "loss": 2.4987,
      "step": 38178
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0002588033676147,
      "learning_rate": 1.0194121688453897e-05,
      "loss": 2.3972,
      "step": 38179
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0405561923980713,
      "learning_rate": 1.0193710029114487e-05,
      "loss": 2.4285,
      "step": 38180
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0207419395446777,
      "learning_rate": 1.0193298369446687e-05,
      "loss": 2.2806,
      "step": 38181
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0517007112503052,
      "learning_rate": 1.0192886709451195e-05,
      "loss": 2.4198,
      "step": 38182
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.2803010940551758,
      "learning_rate": 1.0192475049128705e-05,
      "loss": 2.1753,
      "step": 38183
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0159275531768799,
      "learning_rate": 1.0192063388479919e-05,
      "loss": 2.3985,
      "step": 38184
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.031911015510559,
      "learning_rate": 1.0191651727505531e-05,
      "loss": 2.1941,
      "step": 38185
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1252191066741943,
      "learning_rate": 1.0191240066206248e-05,
      "loss": 2.321,
      "step": 38186
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.9307346343994141,
      "learning_rate": 1.0190828404582754e-05,
      "loss": 2.4309,
      "step": 38187
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1214739084243774,
      "learning_rate": 1.0190416742635756e-05,
      "loss": 2.185,
      "step": 38188
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.063500165939331,
      "learning_rate": 1.019000508036595e-05,
      "loss": 2.1557,
      "step": 38189
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.9905858635902405,
      "learning_rate": 1.0189593417774033e-05,
      "loss": 2.4146,
      "step": 38190
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0058083534240723,
      "learning_rate": 1.0189181754860704e-05,
      "loss": 2.2327,
      "step": 38191
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0790114402770996,
      "learning_rate": 1.018877009162666e-05,
      "loss": 2.3021,
      "step": 38192
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.9956169724464417,
      "learning_rate": 1.0188358428072605e-05,
      "loss": 2.2737,
      "step": 38193
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.2343229055404663,
      "learning_rate": 1.0187946764199226e-05,
      "loss": 2.5186,
      "step": 38194
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.9988512396812439,
      "learning_rate": 1.0187535100007226e-05,
      "loss": 2.4721,
      "step": 38195
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1181696653366089,
      "learning_rate": 1.0187123435497303e-05,
      "loss": 2.4347,
      "step": 38196
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1926960945129395,
      "learning_rate": 1.0186711770670158e-05,
      "loss": 2.1986,
      "step": 38197
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1985658407211304,
      "learning_rate": 1.0186300105526483e-05,
      "loss": 2.3457,
      "step": 38198
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1891616582870483,
      "learning_rate": 1.0185888440066978e-05,
      "loss": 2.4102,
      "step": 38199
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0801857709884644,
      "learning_rate": 1.0185476774292347e-05,
      "loss": 2.1173,
      "step": 38200
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1144930124282837,
      "learning_rate": 1.0185065108203278e-05,
      "loss": 2.316,
      "step": 38201
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1898691654205322,
      "learning_rate": 1.0184653441800473e-05,
      "loss": 2.3708,
      "step": 38202
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.126950740814209,
      "learning_rate": 1.018424177508463e-05,
      "loss": 2.4371,
      "step": 38203
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1680842638015747,
      "learning_rate": 1.018383010805645e-05,
      "loss": 2.4867,
      "step": 38204
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0876989364624023,
      "learning_rate": 1.0183418440716625e-05,
      "loss": 2.64,
      "step": 38205
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.214656114578247,
      "learning_rate": 1.018300677306586e-05,
      "loss": 2.3896,
      "step": 38206
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0254653692245483,
      "learning_rate": 1.0182595105104847e-05,
      "loss": 2.2007,
      "step": 38207
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1571437120437622,
      "learning_rate": 1.0182183436834284e-05,
      "loss": 2.351,
      "step": 38208
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.938136875629425,
      "learning_rate": 1.0181771768254872e-05,
      "loss": 2.3611,
      "step": 38209
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.068038821220398,
      "learning_rate": 1.0181360099367308e-05,
      "loss": 2.1476,
      "step": 38210
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.2399966716766357,
      "learning_rate": 1.0180948430172288e-05,
      "loss": 2.2105,
      "step": 38211
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0017536878585815,
      "learning_rate": 1.0180536760670512e-05,
      "loss": 2.6944,
      "step": 38212
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.9840841889381409,
      "learning_rate": 1.0180125090862679e-05,
      "loss": 2.3129,
      "step": 38213
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.2614161968231201,
      "learning_rate": 1.0179713420749482e-05,
      "loss": 2.325,
      "step": 38214
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0213055610656738,
      "learning_rate": 1.0179301750331623e-05,
      "loss": 2.3218,
      "step": 38215
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1210016012191772,
      "learning_rate": 1.0178890079609797e-05,
      "loss": 2.4333,
      "step": 38216
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.2340091466903687,
      "learning_rate": 1.0178478408584706e-05,
      "loss": 2.3623,
      "step": 38217
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.116127848625183,
      "learning_rate": 1.0178066737257046e-05,
      "loss": 2.2997,
      "step": 38218
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0702446699142456,
      "learning_rate": 1.0177655065627514e-05,
      "loss": 2.4391,
      "step": 38219
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1326643228530884,
      "learning_rate": 1.017724339369681e-05,
      "loss": 2.2641,
      "step": 38220
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0629559755325317,
      "learning_rate": 1.0176831721465627e-05,
      "loss": 2.3377,
      "step": 38221
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.124227523803711,
      "learning_rate": 1.0176420048934668e-05,
      "loss": 2.4339,
      "step": 38222
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0095858573913574,
      "learning_rate": 1.0176008376104627e-05,
      "loss": 2.291,
      "step": 38223
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.110737919807434,
      "learning_rate": 1.0175596702976204e-05,
      "loss": 2.2534,
      "step": 38224
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1966619491577148,
      "learning_rate": 1.0175185029550099e-05,
      "loss": 2.0949,
      "step": 38225
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0210092067718506,
      "learning_rate": 1.017477335582701e-05,
      "loss": 2.2632,
      "step": 38226
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1330442428588867,
      "learning_rate": 1.017436168180763e-05,
      "loss": 2.2662,
      "step": 38227
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0009580850601196,
      "learning_rate": 1.017395000749266e-05,
      "loss": 2.4634,
      "step": 38228
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.9852112531661987,
      "learning_rate": 1.0173538332882795e-05,
      "loss": 2.3036,
      "step": 38229
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.2546395063400269,
      "learning_rate": 1.0173126657978738e-05,
      "loss": 2.259,
      "step": 38230
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0380438566207886,
      "learning_rate": 1.0172714982781185e-05,
      "loss": 2.3158,
      "step": 38231
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0622502565383911,
      "learning_rate": 1.017230330729083e-05,
      "loss": 2.1692,
      "step": 38232
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0505067110061646,
      "learning_rate": 1.0171891631508377e-05,
      "loss": 2.2653,
      "step": 38233
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0929709672927856,
      "learning_rate": 1.017147995543452e-05,
      "loss": 2.3617,
      "step": 38234
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1392455101013184,
      "learning_rate": 1.0171068279069957e-05,
      "loss": 2.4647,
      "step": 38235
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1727946996688843,
      "learning_rate": 1.0170656602415389e-05,
      "loss": 2.2697,
      "step": 38236
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0983648300170898,
      "learning_rate": 1.017024492547151e-05,
      "loss": 2.4341,
      "step": 38237
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1138403415679932,
      "learning_rate": 1.0169833248239023e-05,
      "loss": 2.352,
      "step": 38238
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0225961208343506,
      "learning_rate": 1.0169421570718618e-05,
      "loss": 2.1922,
      "step": 38239
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1071960926055908,
      "learning_rate": 1.0169009892911e-05,
      "loss": 2.4366,
      "step": 38240
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.121286153793335,
      "learning_rate": 1.0168598214816865e-05,
      "loss": 2.5142,
      "step": 38241
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0944160223007202,
      "learning_rate": 1.0168186536436908e-05,
      "loss": 2.262,
      "step": 38242
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0594979524612427,
      "learning_rate": 1.0167774857771831e-05,
      "loss": 2.2067,
      "step": 38243
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.2655775547027588,
      "learning_rate": 1.0167363178822327e-05,
      "loss": 2.488,
      "step": 38244
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0114063024520874,
      "learning_rate": 1.01669514995891e-05,
      "loss": 2.3737,
      "step": 38245
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0889400243759155,
      "learning_rate": 1.0166539820072846e-05,
      "loss": 2.1385,
      "step": 38246
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.047570824623108,
      "learning_rate": 1.0166128140274256e-05,
      "loss": 2.2635,
      "step": 38247
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1000338792800903,
      "learning_rate": 1.0165716460194037e-05,
      "loss": 2.4885,
      "step": 38248
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1016188859939575,
      "learning_rate": 1.0165304779832885e-05,
      "loss": 2.1618,
      "step": 38249
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1985563039779663,
      "learning_rate": 1.0164893099191496e-05,
      "loss": 2.2041,
      "step": 38250
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0865365266799927,
      "learning_rate": 1.0164481418270563e-05,
      "loss": 2.5241,
      "step": 38251
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.034328818321228,
      "learning_rate": 1.0164069737070798e-05,
      "loss": 2.217,
      "step": 38252
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1261463165283203,
      "learning_rate": 1.0163658055592887e-05,
      "loss": 2.2405,
      "step": 38253
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.052112340927124,
      "learning_rate": 1.016324637383753e-05,
      "loss": 2.3416,
      "step": 38254
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0424247980117798,
      "learning_rate": 1.0162834691805425e-05,
      "loss": 2.2874,
      "step": 38255
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.4722261428833008,
      "learning_rate": 1.0162423009497273e-05,
      "loss": 2.3289,
      "step": 38256
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.116971731185913,
      "learning_rate": 1.0162011326913767e-05,
      "loss": 2.4585,
      "step": 38257
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.2505381107330322,
      "learning_rate": 1.0161599644055611e-05,
      "loss": 2.1397,
      "step": 38258
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.126786231994629,
      "learning_rate": 1.01611879609235e-05,
      "loss": 2.4127,
      "step": 38259
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0815794467926025,
      "learning_rate": 1.016077627751813e-05,
      "loss": 2.5583,
      "step": 38260
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.051867127418518,
      "learning_rate": 1.0160364593840202e-05,
      "loss": 2.1988,
      "step": 38261
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.960455596446991,
      "learning_rate": 1.015995290989041e-05,
      "loss": 2.4469,
      "step": 38262
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.055458903312683,
      "learning_rate": 1.0159541225669456e-05,
      "loss": 2.1397,
      "step": 38263
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0873326063156128,
      "learning_rate": 1.0159129541178034e-05,
      "loss": 2.2816,
      "step": 38264
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0286294221878052,
      "learning_rate": 1.0158717856416849e-05,
      "loss": 2.3377,
      "step": 38265
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0831007957458496,
      "learning_rate": 1.0158306171386592e-05,
      "loss": 2.3425,
      "step": 38266
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.134971022605896,
      "learning_rate": 1.0157894486087963e-05,
      "loss": 2.1665,
      "step": 38267
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0461503267288208,
      "learning_rate": 1.0157482800521658e-05,
      "loss": 2.5261,
      "step": 38268
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.138657569885254,
      "learning_rate": 1.015707111468838e-05,
      "loss": 2.1851,
      "step": 38269
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.004542350769043,
      "learning_rate": 1.0156659428588822e-05,
      "loss": 2.2941,
      "step": 38270
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1410142183303833,
      "learning_rate": 1.0156247742223684e-05,
      "loss": 2.3391,
      "step": 38271
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0173001289367676,
      "learning_rate": 1.0155836055593664e-05,
      "loss": 2.352,
      "step": 38272
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0130467414855957,
      "learning_rate": 1.0155424368699457e-05,
      "loss": 2.2868,
      "step": 38273
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0718448162078857,
      "learning_rate": 1.0155012681541767e-05,
      "loss": 2.2654,
      "step": 38274
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0629526376724243,
      "learning_rate": 1.0154600994121287e-05,
      "loss": 2.2201,
      "step": 38275
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.064658761024475,
      "learning_rate": 1.0154189306438716e-05,
      "loss": 2.4814,
      "step": 38276
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0242533683776855,
      "learning_rate": 1.0153777618494752e-05,
      "loss": 2.5656,
      "step": 38277
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.115132212638855,
      "learning_rate": 1.0153365930290093e-05,
      "loss": 2.2305,
      "step": 38278
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.2066508531570435,
      "learning_rate": 1.0152954241825438e-05,
      "loss": 2.523,
      "step": 38279
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.9846090078353882,
      "learning_rate": 1.0152542553101486e-05,
      "loss": 2.5115,
      "step": 38280
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1585876941680908,
      "learning_rate": 1.015213086411893e-05,
      "loss": 2.1371,
      "step": 38281
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.9632755517959595,
      "learning_rate": 1.0151719174878471e-05,
      "loss": 2.2256,
      "step": 38282
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.3594406843185425,
      "learning_rate": 1.0151307485380805e-05,
      "loss": 2.4929,
      "step": 38283
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0607843399047852,
      "learning_rate": 1.0150895795626636e-05,
      "loss": 2.4184,
      "step": 38284
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.9872586131095886,
      "learning_rate": 1.0150484105616655e-05,
      "loss": 2.2986,
      "step": 38285
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1388434171676636,
      "learning_rate": 1.0150072415351564e-05,
      "loss": 2.3593,
      "step": 38286
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0791566371917725,
      "learning_rate": 1.014966072483206e-05,
      "loss": 2.379,
      "step": 38287
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0322279930114746,
      "learning_rate": 1.0149249034058839e-05,
      "loss": 2.2678,
      "step": 38288
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.047404170036316,
      "learning_rate": 1.01488373430326e-05,
      "loss": 2.3105,
      "step": 38289
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.037796139717102,
      "learning_rate": 1.014842565175404e-05,
      "loss": 2.2259,
      "step": 38290
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.2142434120178223,
      "learning_rate": 1.0148013960223861e-05,
      "loss": 2.4273,
      "step": 38291
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.2724783420562744,
      "learning_rate": 1.0147602268442757e-05,
      "loss": 2.374,
      "step": 38292
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.9399126768112183,
      "learning_rate": 1.0147190576411427e-05,
      "loss": 2.2354,
      "step": 38293
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0337332487106323,
      "learning_rate": 1.014677888413057e-05,
      "loss": 2.4445,
      "step": 38294
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.00093412399292,
      "learning_rate": 1.0146367191600884e-05,
      "loss": 2.3827,
      "step": 38295
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0050545930862427,
      "learning_rate": 1.0145955498823062e-05,
      "loss": 2.5582,
      "step": 38296
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.144067645072937,
      "learning_rate": 1.014554380579781e-05,
      "loss": 2.3329,
      "step": 38297
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.292341709136963,
      "learning_rate": 1.014513211252582e-05,
      "loss": 2.4968,
      "step": 38298
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0243364572525024,
      "learning_rate": 1.014472041900779e-05,
      "loss": 2.2507,
      "step": 38299
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.984630823135376,
      "learning_rate": 1.0144308725244423e-05,
      "loss": 2.1447,
      "step": 38300
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.025289535522461,
      "learning_rate": 1.014389703123641e-05,
      "loss": 2.2097,
      "step": 38301
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0282129049301147,
      "learning_rate": 1.0143485336984453e-05,
      "loss": 2.1882,
      "step": 38302
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0733014345169067,
      "learning_rate": 1.014307364248925e-05,
      "loss": 2.2501,
      "step": 38303
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.2051392793655396,
      "learning_rate": 1.0142661947751497e-05,
      "loss": 2.5361,
      "step": 38304
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0190134048461914,
      "learning_rate": 1.0142250252771898e-05,
      "loss": 2.4648,
      "step": 38305
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1169929504394531,
      "learning_rate": 1.0141838557551142e-05,
      "loss": 2.141,
      "step": 38306
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0899823904037476,
      "learning_rate": 1.0141426862089932e-05,
      "loss": 2.2875,
      "step": 38307
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1216195821762085,
      "learning_rate": 1.0141015166388966e-05,
      "loss": 2.478,
      "step": 38308
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1283037662506104,
      "learning_rate": 1.014060347044894e-05,
      "loss": 2.2056,
      "step": 38309
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0441765785217285,
      "learning_rate": 1.014019177427055e-05,
      "loss": 2.1299,
      "step": 38310
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.084626317024231,
      "learning_rate": 1.0139780077854502e-05,
      "loss": 2.2453,
      "step": 38311
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.2173688411712646,
      "learning_rate": 1.0139368381201487e-05,
      "loss": 2.4202,
      "step": 38312
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1913150548934937,
      "learning_rate": 1.0138956684312204e-05,
      "loss": 2.3867,
      "step": 38313
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.261183738708496,
      "learning_rate": 1.0138544987187354e-05,
      "loss": 2.4622,
      "step": 38314
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.070586085319519,
      "learning_rate": 1.0138133289827628e-05,
      "loss": 2.5982,
      "step": 38315
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1176438331604004,
      "learning_rate": 1.0137721592233731e-05,
      "loss": 2.4464,
      "step": 38316
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0988688468933105,
      "learning_rate": 1.013730989440636e-05,
      "loss": 2.3764,
      "step": 38317
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.114571452140808,
      "learning_rate": 1.013689819634621e-05,
      "loss": 2.294,
      "step": 38318
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1612632274627686,
      "learning_rate": 1.013648649805398e-05,
      "loss": 2.4178,
      "step": 38319
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.016880989074707,
      "learning_rate": 1.0136074799530369e-05,
      "loss": 2.2227,
      "step": 38320
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.119829773902893,
      "learning_rate": 1.0135663100776074e-05,
      "loss": 2.2641,
      "step": 38321
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.05417799949646,
      "learning_rate": 1.0135251401791793e-05,
      "loss": 2.449,
      "step": 38322
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0958153009414673,
      "learning_rate": 1.0134839702578226e-05,
      "loss": 2.2574,
      "step": 38323
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0510700941085815,
      "learning_rate": 1.0134428003136066e-05,
      "loss": 2.4438,
      "step": 38324
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0535205602645874,
      "learning_rate": 1.0134016303466017e-05,
      "loss": 2.3114,
      "step": 38325
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0854178667068481,
      "learning_rate": 1.0133604603568771e-05,
      "loss": 2.408,
      "step": 38326
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.10009765625,
      "learning_rate": 1.0133192903445033e-05,
      "loss": 2.3882,
      "step": 38327
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1476789712905884,
      "learning_rate": 1.0132781203095494e-05,
      "loss": 2.196,
      "step": 38328
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0794552564620972,
      "learning_rate": 1.0132369502520856e-05,
      "loss": 2.4569,
      "step": 38329
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0907763242721558,
      "learning_rate": 1.0131957801721814e-05,
      "loss": 2.2862,
      "step": 38330
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.157566785812378,
      "learning_rate": 1.013154610069907e-05,
      "loss": 2.4154,
      "step": 38331
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.9412557482719421,
      "learning_rate": 1.0131134399453318e-05,
      "loss": 2.4456,
      "step": 38332
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0995631217956543,
      "learning_rate": 1.0130722697985258e-05,
      "loss": 2.2389,
      "step": 38333
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0388580560684204,
      "learning_rate": 1.0130310996295586e-05,
      "loss": 2.3535,
      "step": 38334
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0776383876800537,
      "learning_rate": 1.0129899294385003e-05,
      "loss": 2.5788,
      "step": 38335
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.9765859246253967,
      "learning_rate": 1.0129487592254206e-05,
      "loss": 2.3187,
      "step": 38336
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1344025135040283,
      "learning_rate": 1.012907588990389e-05,
      "loss": 2.3016,
      "step": 38337
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.116758108139038,
      "learning_rate": 1.0128664187334758e-05,
      "loss": 2.4716,
      "step": 38338
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0691410303115845,
      "learning_rate": 1.0128252484547504e-05,
      "loss": 2.3547,
      "step": 38339
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.110791563987732,
      "learning_rate": 1.0127840781542829e-05,
      "loss": 2.2365,
      "step": 38340
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.050071120262146,
      "learning_rate": 1.0127429078321425e-05,
      "loss": 2.3019,
      "step": 38341
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0283087491989136,
      "learning_rate": 1.0127017374883997e-05,
      "loss": 2.4391,
      "step": 38342
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.06186842918396,
      "learning_rate": 1.012660567123124e-05,
      "loss": 2.3285,
      "step": 38343
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1594330072402954,
      "learning_rate": 1.0126193967363851e-05,
      "loss": 2.5059,
      "step": 38344
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.039228916168213,
      "learning_rate": 1.012578226328253e-05,
      "loss": 2.4081,
      "step": 38345
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.065310001373291,
      "learning_rate": 1.0125370558987975e-05,
      "loss": 2.3377,
      "step": 38346
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0140725374221802,
      "learning_rate": 1.012495885448088e-05,
      "loss": 2.5027,
      "step": 38347
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0213594436645508,
      "learning_rate": 1.0124547149761947e-05,
      "loss": 2.2699,
      "step": 38348
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0850695371627808,
      "learning_rate": 1.0124135444831872e-05,
      "loss": 2.2664,
      "step": 38349
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0966923236846924,
      "learning_rate": 1.0123723739691354e-05,
      "loss": 2.3019,
      "step": 38350
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0512052774429321,
      "learning_rate": 1.0123312034341092e-05,
      "loss": 2.5416,
      "step": 38351
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1693106889724731,
      "learning_rate": 1.0122900328781784e-05,
      "loss": 2.4829,
      "step": 38352
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0389788150787354,
      "learning_rate": 1.0122488623014124e-05,
      "loss": 2.3538,
      "step": 38353
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1108235120773315,
      "learning_rate": 1.0122076917038813e-05,
      "loss": 2.5893,
      "step": 38354
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1237549781799316,
      "learning_rate": 1.0121665210856546e-05,
      "loss": 2.6353,
      "step": 38355
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0377614498138428,
      "learning_rate": 1.012125350446803e-05,
      "loss": 2.573,
      "step": 38356
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0336304903030396,
      "learning_rate": 1.012084179787395e-05,
      "loss": 2.3678,
      "step": 38357
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1891525983810425,
      "learning_rate": 1.0120430091075013e-05,
      "loss": 2.1821,
      "step": 38358
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1568340063095093,
      "learning_rate": 1.0120018384071915e-05,
      "loss": 2.3699,
      "step": 38359
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.9982422590255737,
      "learning_rate": 1.0119606676865351e-05,
      "loss": 1.969,
      "step": 38360
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1179817914962769,
      "learning_rate": 1.0119194969456023e-05,
      "loss": 2.4538,
      "step": 38361
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1533252000808716,
      "learning_rate": 1.0118783261844627e-05,
      "loss": 2.4968,
      "step": 38362
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1693722009658813,
      "learning_rate": 1.0118371554031859e-05,
      "loss": 2.2991,
      "step": 38363
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.105991005897522,
      "learning_rate": 1.0117959846018424e-05,
      "loss": 2.4657,
      "step": 38364
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.142101764678955,
      "learning_rate": 1.0117548137805009e-05,
      "loss": 2.4606,
      "step": 38365
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1237199306488037,
      "learning_rate": 1.011713642939232e-05,
      "loss": 2.2958,
      "step": 38366
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0696158409118652,
      "learning_rate": 1.0116724720781055e-05,
      "loss": 2.4527,
      "step": 38367
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.125899314880371,
      "learning_rate": 1.0116313011971908e-05,
      "loss": 2.5097,
      "step": 38368
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0460444688796997,
      "learning_rate": 1.0115901302965578e-05,
      "loss": 2.5047,
      "step": 38369
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0304580926895142,
      "learning_rate": 1.0115489593762766e-05,
      "loss": 2.275,
      "step": 38370
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.972629725933075,
      "learning_rate": 1.0115077884364169e-05,
      "loss": 2.2672,
      "step": 38371
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1703248023986816,
      "learning_rate": 1.011466617477048e-05,
      "loss": 2.4153,
      "step": 38372
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.030868649482727,
      "learning_rate": 1.0114254464982402e-05,
      "loss": 2.3778,
      "step": 38373
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.20066237449646,
      "learning_rate": 1.0113842755000632e-05,
      "loss": 2.3807,
      "step": 38374
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.944671869277954,
      "learning_rate": 1.0113431044825868e-05,
      "loss": 2.4846,
      "step": 38375
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0572447776794434,
      "learning_rate": 1.0113019334458804e-05,
      "loss": 2.3906,
      "step": 38376
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1171296834945679,
      "learning_rate": 1.0112607623900145e-05,
      "loss": 2.4965,
      "step": 38377
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0948553085327148,
      "learning_rate": 1.0112195913150588e-05,
      "loss": 2.1913,
      "step": 38378
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1258013248443604,
      "learning_rate": 1.0111784202210825e-05,
      "loss": 2.1932,
      "step": 38379
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.2819664478302002,
      "learning_rate": 1.0111372491081557e-05,
      "loss": 2.3546,
      "step": 38380
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0585981607437134,
      "learning_rate": 1.0110960779763484e-05,
      "loss": 2.2785,
      "step": 38381
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0137282609939575,
      "learning_rate": 1.0110549068257301e-05,
      "loss": 2.4391,
      "step": 38382
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0432193279266357,
      "learning_rate": 1.0110137356563708e-05,
      "loss": 2.2196,
      "step": 38383
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0670431852340698,
      "learning_rate": 1.0109725644683404e-05,
      "loss": 2.6036,
      "step": 38384
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.9416545629501343,
      "learning_rate": 1.0109313932617084e-05,
      "loss": 2.3208,
      "step": 38385
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.087884545326233,
      "learning_rate": 1.0108902220365446e-05,
      "loss": 2.3248,
      "step": 38386
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1536357402801514,
      "learning_rate": 1.0108490507929191e-05,
      "loss": 2.4283,
      "step": 38387
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0357173681259155,
      "learning_rate": 1.0108078795309015e-05,
      "loss": 2.6653,
      "step": 38388
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0018339157104492,
      "learning_rate": 1.0107667082505615e-05,
      "loss": 2.4771,
      "step": 38389
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.2169928550720215,
      "learning_rate": 1.0107255369519688e-05,
      "loss": 2.3432,
      "step": 38390
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0708671808242798,
      "learning_rate": 1.0106843656351938e-05,
      "loss": 2.2527,
      "step": 38391
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1141316890716553,
      "learning_rate": 1.010643194300306e-05,
      "loss": 2.2337,
      "step": 38392
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1097179651260376,
      "learning_rate": 1.0106020229473745e-05,
      "loss": 2.3676,
      "step": 38393
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0016412734985352,
      "learning_rate": 1.01056085157647e-05,
      "loss": 2.2725,
      "step": 38394
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0715477466583252,
      "learning_rate": 1.010519680187662e-05,
      "loss": 2.3711,
      "step": 38395
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1848326921463013,
      "learning_rate": 1.0104785087810202e-05,
      "loss": 2.2336,
      "step": 38396
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.050462007522583,
      "learning_rate": 1.0104373373566147e-05,
      "loss": 2.3427,
      "step": 38397
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.2132607698440552,
      "learning_rate": 1.010396165914515e-05,
      "loss": 2.3122,
      "step": 38398
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1187840700149536,
      "learning_rate": 1.0103549944547908e-05,
      "loss": 2.2173,
      "step": 38399
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.2112523317337036,
      "learning_rate": 1.0103138229775123e-05,
      "loss": 2.2562,
      "step": 38400
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0517901182174683,
      "learning_rate": 1.0102726514827488e-05,
      "loss": 2.4666,
      "step": 38401
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.087938904762268,
      "learning_rate": 1.0102314799705706e-05,
      "loss": 2.2932,
      "step": 38402
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0025919675827026,
      "learning_rate": 1.0101903084410471e-05,
      "loss": 2.5592,
      "step": 38403
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1369330883026123,
      "learning_rate": 1.0101491368942485e-05,
      "loss": 2.4385,
      "step": 38404
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0374418497085571,
      "learning_rate": 1.0101079653302441e-05,
      "loss": 2.3488,
      "step": 38405
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.278070330619812,
      "learning_rate": 1.010066793749104e-05,
      "loss": 2.4474,
      "step": 38406
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.001060962677002,
      "learning_rate": 1.0100256221508979e-05,
      "loss": 2.61,
      "step": 38407
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.926756739616394,
      "learning_rate": 1.009984450535696e-05,
      "loss": 2.4322,
      "step": 38408
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0010675191879272,
      "learning_rate": 1.0099432789035672e-05,
      "loss": 2.3182,
      "step": 38409
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.012015461921692,
      "learning_rate": 1.0099021072545824e-05,
      "loss": 2.55,
      "step": 38410
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.166351079940796,
      "learning_rate": 1.0098609355888104e-05,
      "loss": 2.4213,
      "step": 38411
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.96686190366745,
      "learning_rate": 1.0098197639063217e-05,
      "loss": 2.3197,
      "step": 38412
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.9576833248138428,
      "learning_rate": 1.0097785922071856e-05,
      "loss": 2.3962,
      "step": 38413
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.072770595550537,
      "learning_rate": 1.0097374204914722e-05,
      "loss": 2.4065,
      "step": 38414
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.9701164364814758,
      "learning_rate": 1.0096962487592517e-05,
      "loss": 2.5786,
      "step": 38415
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0308371782302856,
      "learning_rate": 1.009655077010593e-05,
      "loss": 2.3642,
      "step": 38416
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1041008234024048,
      "learning_rate": 1.0096139052455663e-05,
      "loss": 2.3052,
      "step": 38417
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0061951875686646,
      "learning_rate": 1.0095727334642415e-05,
      "loss": 2.399,
      "step": 38418
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0337084531784058,
      "learning_rate": 1.0095315616666883e-05,
      "loss": 2.3736,
      "step": 38419
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0188007354736328,
      "learning_rate": 1.0094903898529766e-05,
      "loss": 2.3156,
      "step": 38420
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0961482524871826,
      "learning_rate": 1.0094492180231758e-05,
      "loss": 2.4491,
      "step": 38421
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.067221760749817,
      "learning_rate": 1.0094080461773563e-05,
      "loss": 2.4981,
      "step": 38422
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.045884132385254,
      "learning_rate": 1.0093668743155876e-05,
      "loss": 2.412,
      "step": 38423
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0977915525436401,
      "learning_rate": 1.0093257024379395e-05,
      "loss": 2.4745,
      "step": 38424
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.186245083808899,
      "learning_rate": 1.0092845305444816e-05,
      "loss": 2.1475,
      "step": 38425
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.032752513885498,
      "learning_rate": 1.009243358635284e-05,
      "loss": 2.2524,
      "step": 38426
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0066351890563965,
      "learning_rate": 1.0092021867104165e-05,
      "loss": 2.2292,
      "step": 38427
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0551471710205078,
      "learning_rate": 1.0091610147699487e-05,
      "loss": 2.2296,
      "step": 38428
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0704922676086426,
      "learning_rate": 1.0091198428139504e-05,
      "loss": 2.2743,
      "step": 38429
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1543418169021606,
      "learning_rate": 1.0090786708424918e-05,
      "loss": 2.3839,
      "step": 38430
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0590721368789673,
      "learning_rate": 1.0090374988556421e-05,
      "loss": 2.2205,
      "step": 38431
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.100207805633545,
      "learning_rate": 1.0089963268534715e-05,
      "loss": 2.353,
      "step": 38432
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0102622509002686,
      "learning_rate": 1.0089551548360497e-05,
      "loss": 2.3129,
      "step": 38433
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1196033954620361,
      "learning_rate": 1.0089139828034464e-05,
      "loss": 2.502,
      "step": 38434
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.026005744934082,
      "learning_rate": 1.0088728107557315e-05,
      "loss": 2.386,
      "step": 38435
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0273199081420898,
      "learning_rate": 1.0088316386929746e-05,
      "loss": 2.3333,
      "step": 38436
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1495510339736938,
      "learning_rate": 1.0087904666152461e-05,
      "loss": 2.3232,
      "step": 38437
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.127850890159607,
      "learning_rate": 1.0087492945226152e-05,
      "loss": 2.3929,
      "step": 38438
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.2043579816818237,
      "learning_rate": 1.0087081224151518e-05,
      "loss": 2.4305,
      "step": 38439
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0739293098449707,
      "learning_rate": 1.0086669502929256e-05,
      "loss": 2.1363,
      "step": 38440
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.9845311045646667,
      "learning_rate": 1.0086257781560068e-05,
      "loss": 2.0473,
      "step": 38441
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1397333145141602,
      "learning_rate": 1.0085846060044648e-05,
      "loss": 2.4168,
      "step": 38442
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1020766496658325,
      "learning_rate": 1.0085434338383699e-05,
      "loss": 2.2309,
      "step": 38443
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.00168776512146,
      "learning_rate": 1.0085022616577913e-05,
      "loss": 2.4179,
      "step": 38444
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.136472225189209,
      "learning_rate": 1.008461089462799e-05,
      "loss": 2.511,
      "step": 38445
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0048447847366333,
      "learning_rate": 1.0084199172534633e-05,
      "loss": 2.5015,
      "step": 38446
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.309828519821167,
      "learning_rate": 1.008378745029853e-05,
      "loss": 2.532,
      "step": 38447
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.16374671459198,
      "learning_rate": 1.0083375727920386e-05,
      "loss": 2.5021,
      "step": 38448
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.115065336227417,
      "learning_rate": 1.0082964005400897e-05,
      "loss": 2.4421,
      "step": 38449
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.076728105545044,
      "learning_rate": 1.0082552282740764e-05,
      "loss": 2.1778,
      "step": 38450
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.074090838432312,
      "learning_rate": 1.008214055994068e-05,
      "loss": 2.4321,
      "step": 38451
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0560038089752197,
      "learning_rate": 1.0081728837001348e-05,
      "loss": 2.2995,
      "step": 38452
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0422852039337158,
      "learning_rate": 1.008131711392346e-05,
      "loss": 2.5011,
      "step": 38453
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1091283559799194,
      "learning_rate": 1.008090539070772e-05,
      "loss": 2.2661,
      "step": 38454
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.2475162744522095,
      "learning_rate": 1.0080493667354823e-05,
      "loss": 2.4129,
      "step": 38455
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.2650012969970703,
      "learning_rate": 1.0080081943865466e-05,
      "loss": 2.2656,
      "step": 38456
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.071787714958191,
      "learning_rate": 1.007967022024035e-05,
      "loss": 2.1567,
      "step": 38457
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0496362447738647,
      "learning_rate": 1.0079258496480172e-05,
      "loss": 2.2593,
      "step": 38458
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1377458572387695,
      "learning_rate": 1.0078846772585626e-05,
      "loss": 2.4796,
      "step": 38459
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1121163368225098,
      "learning_rate": 1.0078435048557416e-05,
      "loss": 2.4208,
      "step": 38460
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.014636754989624,
      "learning_rate": 1.0078023324396236e-05,
      "loss": 2.2943,
      "step": 38461
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1766092777252197,
      "learning_rate": 1.0077611600102785e-05,
      "loss": 2.4854,
      "step": 38462
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1803641319274902,
      "learning_rate": 1.0077199875677763e-05,
      "loss": 2.159,
      "step": 38463
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.9841715097427368,
      "learning_rate": 1.0076788151121866e-05,
      "loss": 2.2961,
      "step": 38464
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.122306227684021,
      "learning_rate": 1.007637642643579e-05,
      "loss": 2.2931,
      "step": 38465
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0167951583862305,
      "learning_rate": 1.0075964701620238e-05,
      "loss": 2.3055,
      "step": 38466
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0657858848571777,
      "learning_rate": 1.0075552976675901e-05,
      "loss": 2.2976,
      "step": 38467
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.3752111196517944,
      "learning_rate": 1.0075141251603487e-05,
      "loss": 2.1836,
      "step": 38468
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0964233875274658,
      "learning_rate": 1.0074729526403683e-05,
      "loss": 2.0766,
      "step": 38469
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0695128440856934,
      "learning_rate": 1.0074317801077196e-05,
      "loss": 2.1729,
      "step": 38470
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.137967586517334,
      "learning_rate": 1.007390607562472e-05,
      "loss": 2.1664,
      "step": 38471
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.028140902519226,
      "learning_rate": 1.0073494350046951e-05,
      "loss": 2.5699,
      "step": 38472
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0785037279129028,
      "learning_rate": 1.0073082624344588e-05,
      "loss": 2.2259,
      "step": 38473
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1347358226776123,
      "learning_rate": 1.0072670898518334e-05,
      "loss": 2.4284,
      "step": 38474
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.9965214133262634,
      "learning_rate": 1.0072259172568882e-05,
      "loss": 2.1893,
      "step": 38475
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1721999645233154,
      "learning_rate": 1.0071847446496929e-05,
      "loss": 2.3758,
      "step": 38476
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0672298669815063,
      "learning_rate": 1.0071435720303178e-05,
      "loss": 2.4518,
      "step": 38477
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1769001483917236,
      "learning_rate": 1.0071023993988322e-05,
      "loss": 2.5055,
      "step": 38478
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1187465190887451,
      "learning_rate": 1.0070612267553062e-05,
      "loss": 2.4698,
      "step": 38479
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.125005841255188,
      "learning_rate": 1.0070200540998092e-05,
      "loss": 2.3733,
      "step": 38480
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1482619047164917,
      "learning_rate": 1.0069788814324115e-05,
      "loss": 2.3286,
      "step": 38481
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.9690588712692261,
      "learning_rate": 1.0069377087531828e-05,
      "loss": 2.1575,
      "step": 38482
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1665953397750854,
      "learning_rate": 1.006896536062193e-05,
      "loss": 2.4149,
      "step": 38483
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0195873975753784,
      "learning_rate": 1.0068553633595112e-05,
      "loss": 2.2891,
      "step": 38484
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.15741765499115,
      "learning_rate": 1.006814190645208e-05,
      "loss": 2.334,
      "step": 38485
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0806214809417725,
      "learning_rate": 1.0067730179193527e-05,
      "loss": 2.276,
      "step": 38486
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0922359228134155,
      "learning_rate": 1.0067318451820154e-05,
      "loss": 2.2491,
      "step": 38487
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0515445470809937,
      "learning_rate": 1.0066906724332659e-05,
      "loss": 2.4163,
      "step": 38488
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.014808177947998,
      "learning_rate": 1.0066494996731738e-05,
      "loss": 2.0036,
      "step": 38489
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0149494409561157,
      "learning_rate": 1.006608326901809e-05,
      "loss": 2.2769,
      "step": 38490
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.032875418663025,
      "learning_rate": 1.0065671541192413e-05,
      "loss": 2.3374,
      "step": 38491
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0747853517532349,
      "learning_rate": 1.0065259813255404e-05,
      "loss": 2.3962,
      "step": 38492
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0404267311096191,
      "learning_rate": 1.0064848085207764e-05,
      "loss": 2.2874,
      "step": 38493
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.2229204177856445,
      "learning_rate": 1.0064436357050186e-05,
      "loss": 2.4208,
      "step": 38494
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0698248147964478,
      "learning_rate": 1.0064024628783371e-05,
      "loss": 2.2066,
      "step": 38495
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0755226612091064,
      "learning_rate": 1.0063612900408021e-05,
      "loss": 2.4686,
      "step": 38496
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.9453892111778259,
      "learning_rate": 1.0063201171924828e-05,
      "loss": 2.4922,
      "step": 38497
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0199397802352905,
      "learning_rate": 1.0062789443334488e-05,
      "loss": 2.3227,
      "step": 38498
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0494916439056396,
      "learning_rate": 1.0062377714637707e-05,
      "loss": 2.2301,
      "step": 38499
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.098616361618042,
      "learning_rate": 1.0061965985835178e-05,
      "loss": 2.2581,
      "step": 38500
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.054066777229309,
      "learning_rate": 1.00615542569276e-05,
      "loss": 2.2014,
      "step": 38501
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.2036731243133545,
      "learning_rate": 1.006114252791567e-05,
      "loss": 2.5845,
      "step": 38502
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.2200431823730469,
      "learning_rate": 1.0060730798800086e-05,
      "loss": 2.4749,
      "step": 38503
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.221238136291504,
      "learning_rate": 1.0060319069581547e-05,
      "loss": 2.4696,
      "step": 38504
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0615203380584717,
      "learning_rate": 1.0059907340260756e-05,
      "loss": 2.3968,
      "step": 38505
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.9759595990180969,
      "learning_rate": 1.0059495610838402e-05,
      "loss": 2.168,
      "step": 38506
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.023861289024353,
      "learning_rate": 1.0059083881315183e-05,
      "loss": 2.4708,
      "step": 38507
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1376625299453735,
      "learning_rate": 1.0058672151691806e-05,
      "loss": 2.4755,
      "step": 38508
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.2155036926269531,
      "learning_rate": 1.0058260421968964e-05,
      "loss": 2.3301,
      "step": 38509
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1966181993484497,
      "learning_rate": 1.0057848692147352e-05,
      "loss": 2.4607,
      "step": 38510
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1854138374328613,
      "learning_rate": 1.005743696222767e-05,
      "loss": 2.5793,
      "step": 38511
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.9768896698951721,
      "learning_rate": 1.0057025232210617e-05,
      "loss": 2.3165,
      "step": 38512
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0844390392303467,
      "learning_rate": 1.0056613502096893e-05,
      "loss": 2.2831,
      "step": 38513
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1027684211730957,
      "learning_rate": 1.0056201771887191e-05,
      "loss": 2.0744,
      "step": 38514
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1042304039001465,
      "learning_rate": 1.0055790041582213e-05,
      "loss": 2.5869,
      "step": 38515
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0495357513427734,
      "learning_rate": 1.0055378311182658e-05,
      "loss": 2.4527,
      "step": 38516
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.044359803199768,
      "learning_rate": 1.005496658068922e-05,
      "loss": 2.4494,
      "step": 38517
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0660945177078247,
      "learning_rate": 1.0054554850102597e-05,
      "loss": 2.3005,
      "step": 38518
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0360476970672607,
      "learning_rate": 1.0054143119423489e-05,
      "loss": 2.3019,
      "step": 38519
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.2160966396331787,
      "learning_rate": 1.0053731388652597e-05,
      "loss": 2.243,
      "step": 38520
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1675342321395874,
      "learning_rate": 1.0053319657790613e-05,
      "loss": 2.0636,
      "step": 38521
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0897969007492065,
      "learning_rate": 1.005290792683824e-05,
      "loss": 2.3355,
      "step": 38522
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0296083688735962,
      "learning_rate": 1.005249619579617e-05,
      "loss": 2.4255,
      "step": 38523
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.994998574256897,
      "learning_rate": 1.0052084464665108e-05,
      "loss": 2.4794,
      "step": 38524
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0729215145111084,
      "learning_rate": 1.0051672733445745e-05,
      "loss": 2.4254,
      "step": 38525
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0344258546829224,
      "learning_rate": 1.0051261002138785e-05,
      "loss": 2.2437,
      "step": 38526
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0342087745666504,
      "learning_rate": 1.0050849270744925e-05,
      "loss": 2.4263,
      "step": 38527
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0751498937606812,
      "learning_rate": 1.0050437539264858e-05,
      "loss": 2.4702,
      "step": 38528
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0471529960632324,
      "learning_rate": 1.005002580769929e-05,
      "loss": 2.3933,
      "step": 38529
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0853493213653564,
      "learning_rate": 1.0049614076048915e-05,
      "loss": 2.2381,
      "step": 38530
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0884172916412354,
      "learning_rate": 1.0049202344314427e-05,
      "loss": 2.291,
      "step": 38531
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0400042533874512,
      "learning_rate": 1.0048790612496528e-05,
      "loss": 2.2724,
      "step": 38532
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.067298173904419,
      "learning_rate": 1.004837888059592e-05,
      "loss": 2.288,
      "step": 38533
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0445213317871094,
      "learning_rate": 1.0047967148613292e-05,
      "loss": 2.4113,
      "step": 38534
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1443333625793457,
      "learning_rate": 1.0047555416549349e-05,
      "loss": 2.4415,
      "step": 38535
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1204885244369507,
      "learning_rate": 1.0047143684404785e-05,
      "loss": 2.3177,
      "step": 38536
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0737435817718506,
      "learning_rate": 1.0046731952180302e-05,
      "loss": 2.3121,
      "step": 38537
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.9813777804374695,
      "learning_rate": 1.0046320219876592e-05,
      "loss": 2.3578,
      "step": 38538
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.3226087093353271,
      "learning_rate": 1.004590848749436e-05,
      "loss": 2.4904,
      "step": 38539
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1030319929122925,
      "learning_rate": 1.0045496755034298e-05,
      "loss": 2.2889,
      "step": 38540
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.2004822492599487,
      "learning_rate": 1.0045085022497107e-05,
      "loss": 2.467,
      "step": 38541
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1747320890426636,
      "learning_rate": 1.0044673289883489e-05,
      "loss": 2.4915,
      "step": 38542
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0614694356918335,
      "learning_rate": 1.0044261557194132e-05,
      "loss": 2.3143,
      "step": 38543
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.3265421390533447,
      "learning_rate": 1.0043849824429744e-05,
      "loss": 2.2817,
      "step": 38544
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1774208545684814,
      "learning_rate": 1.0043438091591016e-05,
      "loss": 2.4063,
      "step": 38545
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0212008953094482,
      "learning_rate": 1.0043026358678647e-05,
      "loss": 2.4166,
      "step": 38546
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.2779909372329712,
      "learning_rate": 1.0042614625693339e-05,
      "loss": 2.154,
      "step": 38547
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1085281372070312,
      "learning_rate": 1.0042202892635786e-05,
      "loss": 2.1554,
      "step": 38548
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.120195984840393,
      "learning_rate": 1.004179115950669e-05,
      "loss": 2.4451,
      "step": 38549
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0917160511016846,
      "learning_rate": 1.0041379426306747e-05,
      "loss": 2.0277,
      "step": 38550
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.2621856927871704,
      "learning_rate": 1.0040967693036654e-05,
      "loss": 2.4397,
      "step": 38551
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1684679985046387,
      "learning_rate": 1.0040555959697106e-05,
      "loss": 2.2707,
      "step": 38552
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.053267240524292,
      "learning_rate": 1.0040144226288809e-05,
      "loss": 2.4297,
      "step": 38553
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0701308250427246,
      "learning_rate": 1.0039732492812455e-05,
      "loss": 2.2102,
      "step": 38554
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.083376169204712,
      "learning_rate": 1.0039320759268745e-05,
      "loss": 2.2682,
      "step": 38555
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0835179090499878,
      "learning_rate": 1.0038909025658374e-05,
      "loss": 2.4401,
      "step": 38556
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.133455753326416,
      "learning_rate": 1.003849729198204e-05,
      "loss": 2.3403,
      "step": 38557
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.2451733350753784,
      "learning_rate": 1.0038085558240447e-05,
      "loss": 2.7474,
      "step": 38558
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1390324831008911,
      "learning_rate": 1.0037673824434286e-05,
      "loss": 2.3175,
      "step": 38559
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.053957223892212,
      "learning_rate": 1.0037262090564257e-05,
      "loss": 2.0627,
      "step": 38560
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0577327013015747,
      "learning_rate": 1.0036850356631063e-05,
      "loss": 2.2208,
      "step": 38561
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1587790250778198,
      "learning_rate": 1.0036438622635391e-05,
      "loss": 2.5837,
      "step": 38562
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1956820487976074,
      "learning_rate": 1.003602688857795e-05,
      "loss": 2.4812,
      "step": 38563
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0549644231796265,
      "learning_rate": 1.0035615154459435e-05,
      "loss": 2.2178,
      "step": 38564
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1661287546157837,
      "learning_rate": 1.003520342028054e-05,
      "loss": 2.4342,
      "step": 38565
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0246212482452393,
      "learning_rate": 1.0034791686041965e-05,
      "loss": 2.4171,
      "step": 38566
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0671979188919067,
      "learning_rate": 1.0034379951744408e-05,
      "loss": 2.4668,
      "step": 38567
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0464022159576416,
      "learning_rate": 1.003396821738857e-05,
      "loss": 2.1616,
      "step": 38568
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1912482976913452,
      "learning_rate": 1.0033556482975147e-05,
      "loss": 2.4114,
      "step": 38569
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1811809539794922,
      "learning_rate": 1.0033144748504836e-05,
      "loss": 2.2255,
      "step": 38570
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0092097520828247,
      "learning_rate": 1.0032733013978336e-05,
      "loss": 2.3868,
      "step": 38571
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0894051790237427,
      "learning_rate": 1.0032321279396341e-05,
      "loss": 2.138,
      "step": 38572
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.112318992614746,
      "learning_rate": 1.0031909544759554e-05,
      "loss": 2.1926,
      "step": 38573
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.129408359527588,
      "learning_rate": 1.0031497810068674e-05,
      "loss": 2.4727,
      "step": 38574
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1516010761260986,
      "learning_rate": 1.0031086075324398e-05,
      "loss": 2.2455,
      "step": 38575
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0787644386291504,
      "learning_rate": 1.003067434052742e-05,
      "loss": 2.3452,
      "step": 38576
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.107908010482788,
      "learning_rate": 1.003026260567844e-05,
      "loss": 2.3918,
      "step": 38577
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1311659812927246,
      "learning_rate": 1.0029850870778157e-05,
      "loss": 2.1724,
      "step": 38578
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1805330514907837,
      "learning_rate": 1.002943913582727e-05,
      "loss": 2.3747,
      "step": 38579
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1234912872314453,
      "learning_rate": 1.0029027400826474e-05,
      "loss": 2.2599,
      "step": 38580
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.2042063474655151,
      "learning_rate": 1.0028615665776469e-05,
      "loss": 2.5713,
      "step": 38581
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0748546123504639,
      "learning_rate": 1.0028203930677955e-05,
      "loss": 2.3612,
      "step": 38582
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.081609845161438,
      "learning_rate": 1.0027792195531627e-05,
      "loss": 2.3646,
      "step": 38583
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1183351278305054,
      "learning_rate": 1.0027380460338184e-05,
      "loss": 2.327,
      "step": 38584
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1327155828475952,
      "learning_rate": 1.0026968725098321e-05,
      "loss": 2.3285,
      "step": 38585
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.3343091011047363,
      "learning_rate": 1.0026556989812739e-05,
      "loss": 2.5942,
      "step": 38586
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0633009672164917,
      "learning_rate": 1.0026145254482135e-05,
      "loss": 2.3525,
      "step": 38587
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0613194704055786,
      "learning_rate": 1.0025733519107213e-05,
      "loss": 2.5052,
      "step": 38588
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1309657096862793,
      "learning_rate": 1.0025321783688662e-05,
      "loss": 2.1574,
      "step": 38589
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.3339924812316895,
      "learning_rate": 1.0024910048227183e-05,
      "loss": 2.2347,
      "step": 38590
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0452088117599487,
      "learning_rate": 1.0024498312723475e-05,
      "loss": 2.255,
      "step": 38591
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0667078495025635,
      "learning_rate": 1.0024086577178238e-05,
      "loss": 2.4139,
      "step": 38592
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0880197286605835,
      "learning_rate": 1.0023674841592166e-05,
      "loss": 2.3789,
      "step": 38593
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1083884239196777,
      "learning_rate": 1.002326310596596e-05,
      "loss": 2.1038,
      "step": 38594
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.061788558959961,
      "learning_rate": 1.0022851370300315e-05,
      "loss": 2.3227,
      "step": 38595
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.110822081565857,
      "learning_rate": 1.0022439634595931e-05,
      "loss": 2.461,
      "step": 38596
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1703948974609375,
      "learning_rate": 1.0022027898853505e-05,
      "loss": 2.3504,
      "step": 38597
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0394881963729858,
      "learning_rate": 1.0021616163073737e-05,
      "loss": 2.1765,
      "step": 38598
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.2293516397476196,
      "learning_rate": 1.0021204427257323e-05,
      "loss": 2.1025,
      "step": 38599
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1226545572280884,
      "learning_rate": 1.0020792691404962e-05,
      "loss": 2.1759,
      "step": 38600
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1149057149887085,
      "learning_rate": 1.0020380955517351e-05,
      "loss": 2.4092,
      "step": 38601
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.368782639503479,
      "learning_rate": 1.0019969219595191e-05,
      "loss": 2.2765,
      "step": 38602
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.056709885597229,
      "learning_rate": 1.0019557483639176e-05,
      "loss": 2.0768,
      "step": 38603
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0915671586990356,
      "learning_rate": 1.0019145747650006e-05,
      "loss": 2.4459,
      "step": 38604
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.2124419212341309,
      "learning_rate": 1.0018734011628379e-05,
      "loss": 2.5559,
      "step": 38605
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.057863712310791,
      "learning_rate": 1.0018322275574992e-05,
      "loss": 2.294,
      "step": 38606
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0125069618225098,
      "learning_rate": 1.0017910539490543e-05,
      "loss": 2.381,
      "step": 38607
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1657530069351196,
      "learning_rate": 1.0017498803375734e-05,
      "loss": 2.0237,
      "step": 38608
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.2582354545593262,
      "learning_rate": 1.0017087067231257e-05,
      "loss": 2.3083,
      "step": 38609
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.037327766418457,
      "learning_rate": 1.0016675331057815e-05,
      "loss": 2.5092,
      "step": 38610
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.127004861831665,
      "learning_rate": 1.0016263594856102e-05,
      "loss": 2.5672,
      "step": 38611
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1620420217514038,
      "learning_rate": 1.0015851858626817e-05,
      "loss": 2.1611,
      "step": 38612
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1348249912261963,
      "learning_rate": 1.0015440122370659e-05,
      "loss": 2.4439,
      "step": 38613
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1804746389389038,
      "learning_rate": 1.0015028386088329e-05,
      "loss": 2.3862,
      "step": 38614
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.2060155868530273,
      "learning_rate": 1.001461664978052e-05,
      "loss": 2.3892,
      "step": 38615
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0150072574615479,
      "learning_rate": 1.001420491344793e-05,
      "loss": 2.1792,
      "step": 38616
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1002693176269531,
      "learning_rate": 1.001379317709126e-05,
      "loss": 2.4802,
      "step": 38617
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0980541706085205,
      "learning_rate": 1.0013381440711208e-05,
      "loss": 2.4904,
      "step": 38618
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.2025511264801025,
      "learning_rate": 1.0012969704308469e-05,
      "loss": 2.3143,
      "step": 38619
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.3010932207107544,
      "learning_rate": 1.0012557967883744e-05,
      "loss": 2.2355,
      "step": 38620
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0154104232788086,
      "learning_rate": 1.0012146231437732e-05,
      "loss": 2.5135,
      "step": 38621
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1926789283752441,
      "learning_rate": 1.0011734494971123e-05,
      "loss": 2.3994,
      "step": 38622
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0631251335144043,
      "learning_rate": 1.0011322758484628e-05,
      "loss": 2.23,
      "step": 38623
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.11944580078125,
      "learning_rate": 1.0010911021978932e-05,
      "loss": 2.3873,
      "step": 38624
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1330691576004028,
      "learning_rate": 1.0010499285454742e-05,
      "loss": 2.6712,
      "step": 38625
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0627484321594238,
      "learning_rate": 1.0010087548912753e-05,
      "loss": 2.1176,
      "step": 38626
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1653565168380737,
      "learning_rate": 1.0009675812353662e-05,
      "loss": 2.4157,
      "step": 38627
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.195288896560669,
      "learning_rate": 1.0009264075778167e-05,
      "loss": 2.4137,
      "step": 38628
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0159579515457153,
      "learning_rate": 1.0008852339186968e-05,
      "loss": 2.2186,
      "step": 38629
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.9674634337425232,
      "learning_rate": 1.000844060258076e-05,
      "loss": 2.1648,
      "step": 38630
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1061890125274658,
      "learning_rate": 1.0008028865960244e-05,
      "loss": 2.3771,
      "step": 38631
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0478832721710205,
      "learning_rate": 1.0007617129326117e-05,
      "loss": 2.4099,
      "step": 38632
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1442418098449707,
      "learning_rate": 1.0007205392679075e-05,
      "loss": 2.2528,
      "step": 38633
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1508029699325562,
      "learning_rate": 1.0006793656019823e-05,
      "loss": 2.472,
      "step": 38634
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.108819603919983,
      "learning_rate": 1.000638191934905e-05,
      "loss": 2.3793,
      "step": 38635
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.3718788623809814,
      "learning_rate": 1.000597018266746e-05,
      "loss": 2.4143,
      "step": 38636
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.2093627452850342,
      "learning_rate": 1.0005558445975747e-05,
      "loss": 2.2896,
      "step": 38637
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1489404439926147,
      "learning_rate": 1.0005146709274612e-05,
      "loss": 2.4841,
      "step": 38638
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0674035549163818,
      "learning_rate": 1.0004734972564749e-05,
      "loss": 2.2786,
      "step": 38639
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0939579010009766,
      "learning_rate": 1.0004323235846862e-05,
      "loss": 2.3083,
      "step": 38640
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0111777782440186,
      "learning_rate": 1.0003911499121646e-05,
      "loss": 2.3456,
      "step": 38641
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1431066989898682,
      "learning_rate": 1.00034997623898e-05,
      "loss": 2.3347,
      "step": 38642
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0308723449707031,
      "learning_rate": 1.0003088025652016e-05,
      "loss": 2.2931,
      "step": 38643
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0910438299179077,
      "learning_rate": 1.0002676288909e-05,
      "loss": 2.6144,
      "step": 38644
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.032496690750122,
      "learning_rate": 1.000226455216145e-05,
      "loss": 2.2634,
      "step": 38645
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0833872556686401,
      "learning_rate": 1.0001852815410057e-05,
      "loss": 2.1829,
      "step": 38646
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.100059151649475,
      "learning_rate": 1.0001441078655523e-05,
      "loss": 2.3799,
      "step": 38647
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0622889995574951,
      "learning_rate": 1.000102934189855e-05,
      "loss": 2.6274,
      "step": 38648
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1189701557159424,
      "learning_rate": 1.0000617605139826e-05,
      "loss": 2.0147,
      "step": 38649
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0415223836898804,
      "learning_rate": 1.000020586838006e-05,
      "loss": 2.2463,
      "step": 38650
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.9969229102134705,
      "learning_rate": 9.999794131619944e-06,
      "loss": 2.4628,
      "step": 38651
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1311800479888916,
      "learning_rate": 9.999382394860175e-06,
      "loss": 2.4628,
      "step": 38652
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.9893664121627808,
      "learning_rate": 9.998970658101454e-06,
      "loss": 2.2371,
      "step": 38653
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.217046856880188,
      "learning_rate": 9.99855892134448e-06,
      "loss": 2.3073,
      "step": 38654
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0465000867843628,
      "learning_rate": 9.998147184589947e-06,
      "loss": 2.2304,
      "step": 38655
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0264376401901245,
      "learning_rate": 9.997735447838555e-06,
      "loss": 2.2878,
      "step": 38656
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1846204996109009,
      "learning_rate": 9.997323711091001e-06,
      "loss": 2.4188,
      "step": 38657
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0668646097183228,
      "learning_rate": 9.996911974347985e-06,
      "loss": 2.1966,
      "step": 38658
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1122151613235474,
      "learning_rate": 9.996500237610204e-06,
      "loss": 2.308,
      "step": 38659
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1052700281143188,
      "learning_rate": 9.996088500878355e-06,
      "loss": 2.155,
      "step": 38660
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0360230207443237,
      "learning_rate": 9.99567676415314e-06,
      "loss": 2.3906,
      "step": 38661
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0757938623428345,
      "learning_rate": 9.995265027435251e-06,
      "loss": 2.4088,
      "step": 38662
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0634543895721436,
      "learning_rate": 9.99485329072539e-06,
      "loss": 2.3408,
      "step": 38663
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.9948243498802185,
      "learning_rate": 9.994441554024258e-06,
      "loss": 2.2686,
      "step": 38664
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1634910106658936,
      "learning_rate": 9.994029817332546e-06,
      "loss": 2.6708,
      "step": 38665
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0400501489639282,
      "learning_rate": 9.993618080650955e-06,
      "loss": 2.246,
      "step": 38666
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1032383441925049,
      "learning_rate": 9.993206343980182e-06,
      "loss": 2.5203,
      "step": 38667
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.055292010307312,
      "learning_rate": 9.992794607320926e-06,
      "loss": 2.3097,
      "step": 38668
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0624393224716187,
      "learning_rate": 9.992382870673886e-06,
      "loss": 2.4817,
      "step": 38669
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0866918563842773,
      "learning_rate": 9.991971134039758e-06,
      "loss": 2.5912,
      "step": 38670
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0069139003753662,
      "learning_rate": 9.991559397419243e-06,
      "loss": 2.1588,
      "step": 38671
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0687564611434937,
      "learning_rate": 9.991147660813035e-06,
      "loss": 2.2887,
      "step": 38672
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0536563396453857,
      "learning_rate": 9.990735924221836e-06,
      "loss": 2.3842,
      "step": 38673
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0392022132873535,
      "learning_rate": 9.99032418764634e-06,
      "loss": 2.4307,
      "step": 38674
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0826737880706787,
      "learning_rate": 9.989912451087249e-06,
      "loss": 2.317,
      "step": 38675
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.9692473411560059,
      "learning_rate": 9.989500714545261e-06,
      "loss": 2.47,
      "step": 38676
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0992629528045654,
      "learning_rate": 9.989088978021071e-06,
      "loss": 2.4277,
      "step": 38677
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0658729076385498,
      "learning_rate": 9.988677241515377e-06,
      "loss": 2.4567,
      "step": 38678
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0620386600494385,
      "learning_rate": 9.988265505028878e-06,
      "loss": 2.4895,
      "step": 38679
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0540751218795776,
      "learning_rate": 9.987853768562273e-06,
      "loss": 2.2528,
      "step": 38680
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.9567081928253174,
      "learning_rate": 9.987442032116257e-06,
      "loss": 2.197,
      "step": 38681
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0215537548065186,
      "learning_rate": 9.987030295691534e-06,
      "loss": 2.3766,
      "step": 38682
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0913524627685547,
      "learning_rate": 9.986618559288795e-06,
      "loss": 2.4146,
      "step": 38683
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.83049476146698,
      "learning_rate": 9.986206822908742e-06,
      "loss": 2.095,
      "step": 38684
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1311945915222168,
      "learning_rate": 9.985795086552073e-06,
      "loss": 2.5038,
      "step": 38685
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0513325929641724,
      "learning_rate": 9.985383350219483e-06,
      "loss": 2.3247,
      "step": 38686
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.2202383279800415,
      "learning_rate": 9.984971613911673e-06,
      "loss": 2.4082,
      "step": 38687
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.7048351764678955,
      "learning_rate": 9.984559877629341e-06,
      "loss": 2.4098,
      "step": 38688
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1570544242858887,
      "learning_rate": 9.984148141373183e-06,
      "loss": 2.3109,
      "step": 38689
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.9962157011032104,
      "learning_rate": 9.9837364051439e-06,
      "loss": 2.3406,
      "step": 38690
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1608518362045288,
      "learning_rate": 9.98332466894219e-06,
      "loss": 2.3371,
      "step": 38691
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.053324818611145,
      "learning_rate": 9.982912932768746e-06,
      "loss": 2.2758,
      "step": 38692
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.2343872785568237,
      "learning_rate": 9.98250119662427e-06,
      "loss": 2.3654,
      "step": 38693
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0707963705062866,
      "learning_rate": 9.98208946050946e-06,
      "loss": 2.4005,
      "step": 38694
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0113961696624756,
      "learning_rate": 9.981677724425011e-06,
      "loss": 2.4618,
      "step": 38695
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.5645499229431152,
      "learning_rate": 9.981265988371624e-06,
      "loss": 2.2415,
      "step": 38696
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1428433656692505,
      "learning_rate": 9.980854252349997e-06,
      "loss": 2.1682,
      "step": 38697
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0893968343734741,
      "learning_rate": 9.980442516360826e-06,
      "loss": 2.2476,
      "step": 38698
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.2240355014801025,
      "learning_rate": 9.980030780404812e-06,
      "loss": 2.2441,
      "step": 38699
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.217381477355957,
      "learning_rate": 9.97961904448265e-06,
      "loss": 2.2495,
      "step": 38700
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0077128410339355,
      "learning_rate": 9.97920730859504e-06,
      "loss": 2.3115,
      "step": 38701
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.064522624015808,
      "learning_rate": 9.978795572742677e-06,
      "loss": 2.3154,
      "step": 38702
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0644627809524536,
      "learning_rate": 9.978383836926265e-06,
      "loss": 2.2501,
      "step": 38703
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0324699878692627,
      "learning_rate": 9.9779721011465e-06,
      "loss": 2.3889,
      "step": 38704
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0426617860794067,
      "learning_rate": 9.977560365404074e-06,
      "loss": 2.4742,
      "step": 38705
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0689777135849,
      "learning_rate": 9.97714862969969e-06,
      "loss": 2.2829,
      "step": 38706
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.2530813217163086,
      "learning_rate": 9.976736894034046e-06,
      "loss": 2.3807,
      "step": 38707
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.2045845985412598,
      "learning_rate": 9.976325158407838e-06,
      "loss": 2.2582,
      "step": 38708
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1109399795532227,
      "learning_rate": 9.975913422821765e-06,
      "loss": 2.3646,
      "step": 38709
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.3616465330123901,
      "learning_rate": 9.975501687276527e-06,
      "loss": 2.4016,
      "step": 38710
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0492430925369263,
      "learning_rate": 9.97508995177282e-06,
      "loss": 2.1952,
      "step": 38711
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1125562191009521,
      "learning_rate": 9.974678216311342e-06,
      "loss": 2.4961,
      "step": 38712
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.097118616104126,
      "learning_rate": 9.97426648089279e-06,
      "loss": 2.3349,
      "step": 38713
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.027757167816162,
      "learning_rate": 9.973854745517865e-06,
      "loss": 2.4154,
      "step": 38714
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0003135204315186,
      "learning_rate": 9.973443010187261e-06,
      "loss": 2.3379,
      "step": 38715
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0323907136917114,
      "learning_rate": 9.97303127490168e-06,
      "loss": 2.2959,
      "step": 38716
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.232051968574524,
      "learning_rate": 9.972619539661823e-06,
      "loss": 2.0963,
      "step": 38717
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0469565391540527,
      "learning_rate": 9.97220780446838e-06,
      "loss": 2.4333,
      "step": 38718
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1018494367599487,
      "learning_rate": 9.97179606932205e-06,
      "loss": 2.5074,
      "step": 38719
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.965670108795166,
      "learning_rate": 9.971384334223533e-06,
      "loss": 2.52,
      "step": 38720
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0705831050872803,
      "learning_rate": 9.97097259917353e-06,
      "loss": 2.2381,
      "step": 38721
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1710679531097412,
      "learning_rate": 9.970560864172733e-06,
      "loss": 2.3996,
      "step": 38722
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1299216747283936,
      "learning_rate": 9.970149129221846e-06,
      "loss": 2.3059,
      "step": 38723
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0803866386413574,
      "learning_rate": 9.969737394321562e-06,
      "loss": 2.4212,
      "step": 38724
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1335984468460083,
      "learning_rate": 9.969325659472583e-06,
      "loss": 2.6028,
      "step": 38725
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.9872013926506042,
      "learning_rate": 9.968913924675605e-06,
      "loss": 2.3268,
      "step": 38726
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.216399908065796,
      "learning_rate": 9.968502189931326e-06,
      "loss": 2.4971,
      "step": 38727
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.953222930431366,
      "learning_rate": 9.968090455240444e-06,
      "loss": 2.2712,
      "step": 38728
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1264548301696777,
      "learning_rate": 9.967678720603659e-06,
      "loss": 2.3075,
      "step": 38729
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.2934428453445435,
      "learning_rate": 9.967266986021671e-06,
      "loss": 2.3593,
      "step": 38730
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0555846691131592,
      "learning_rate": 9.96685525149517e-06,
      "loss": 2.5373,
      "step": 38731
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0657908916473389,
      "learning_rate": 9.966443517024858e-06,
      "loss": 2.2762,
      "step": 38732
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0601142644882202,
      "learning_rate": 9.966031782611434e-06,
      "loss": 2.4315,
      "step": 38733
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0832831859588623,
      "learning_rate": 9.965620048255594e-06,
      "loss": 2.3529,
      "step": 38734
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.9898302555084229,
      "learning_rate": 9.965208313958038e-06,
      "loss": 2.3938,
      "step": 38735
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.191135287284851,
      "learning_rate": 9.964796579719463e-06,
      "loss": 2.4588,
      "step": 38736
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.3076298236846924,
      "learning_rate": 9.964384845540569e-06,
      "loss": 2.5867,
      "step": 38737
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.229952335357666,
      "learning_rate": 9.963973111422052e-06,
      "loss": 2.2328,
      "step": 38738
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0824147462844849,
      "learning_rate": 9.96356137736461e-06,
      "loss": 2.3189,
      "step": 38739
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1523821353912354,
      "learning_rate": 9.96314964336894e-06,
      "loss": 2.3568,
      "step": 38740
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0872489213943481,
      "learning_rate": 9.962737909435743e-06,
      "loss": 2.2998,
      "step": 38741
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.324654459953308,
      "learning_rate": 9.962326175565716e-06,
      "loss": 2.1606,
      "step": 38742
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1822532415390015,
      "learning_rate": 9.961914441759556e-06,
      "loss": 2.3774,
      "step": 38743
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.085266351699829,
      "learning_rate": 9.961502708017963e-06,
      "loss": 2.3642,
      "step": 38744
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.179366946220398,
      "learning_rate": 9.961090974341631e-06,
      "loss": 2.1812,
      "step": 38745
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1204956769943237,
      "learning_rate": 9.96067924073126e-06,
      "loss": 2.3204,
      "step": 38746
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.9998188614845276,
      "learning_rate": 9.960267507187548e-06,
      "loss": 2.2427,
      "step": 38747
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0266023874282837,
      "learning_rate": 9.959855773711195e-06,
      "loss": 2.3774,
      "step": 38748
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0492160320281982,
      "learning_rate": 9.959444040302895e-06,
      "loss": 2.299,
      "step": 38749
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1735126972198486,
      "learning_rate": 9.959032306963349e-06,
      "loss": 2.4625,
      "step": 38750
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0140228271484375,
      "learning_rate": 9.958620573693256e-06,
      "loss": 2.2368,
      "step": 38751
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.05485999584198,
      "learning_rate": 9.958208840493312e-06,
      "loss": 2.5277,
      "step": 38752
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0849770307540894,
      "learning_rate": 9.957797107364213e-06,
      "loss": 2.1845,
      "step": 38753
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.9556840062141418,
      "learning_rate": 9.957385374306661e-06,
      "loss": 2.4023,
      "step": 38754
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.941709578037262,
      "learning_rate": 9.956973641321354e-06,
      "loss": 2.2321,
      "step": 38755
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.9824163913726807,
      "learning_rate": 9.95656190840899e-06,
      "loss": 2.2336,
      "step": 38756
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1906466484069824,
      "learning_rate": 9.956150175570263e-06,
      "loss": 2.3097,
      "step": 38757
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0589628219604492,
      "learning_rate": 9.955738442805873e-06,
      "loss": 1.9945,
      "step": 38758
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0869276523590088,
      "learning_rate": 9.955326710116516e-06,
      "loss": 2.2449,
      "step": 38759
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0973036289215088,
      "learning_rate": 9.954914977502896e-06,
      "loss": 2.2353,
      "step": 38760
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1775823831558228,
      "learning_rate": 9.954503244965704e-06,
      "loss": 2.2252,
      "step": 38761
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0737069845199585,
      "learning_rate": 9.954091512505644e-06,
      "loss": 2.2926,
      "step": 38762
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.114381194114685,
      "learning_rate": 9.95367978012341e-06,
      "loss": 2.1804,
      "step": 38763
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0917432308197021,
      "learning_rate": 9.953268047819702e-06,
      "loss": 2.2138,
      "step": 38764
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0115454196929932,
      "learning_rate": 9.952856315595217e-06,
      "loss": 2.3888,
      "step": 38765
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0911223888397217,
      "learning_rate": 9.952444583450655e-06,
      "loss": 2.2316,
      "step": 38766
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0637714862823486,
      "learning_rate": 9.95203285138671e-06,
      "loss": 2.3918,
      "step": 38767
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.071734070777893,
      "learning_rate": 9.951621119404084e-06,
      "loss": 2.5244,
      "step": 38768
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.9194403886795044,
      "learning_rate": 9.951209387503472e-06,
      "loss": 2.1078,
      "step": 38769
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0436725616455078,
      "learning_rate": 9.950797655685575e-06,
      "loss": 2.4285,
      "step": 38770
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.066104531288147,
      "learning_rate": 9.950385923951089e-06,
      "loss": 2.2995,
      "step": 38771
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.106598973274231,
      "learning_rate": 9.949974192300713e-06,
      "loss": 2.1289,
      "step": 38772
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1453794240951538,
      "learning_rate": 9.949562460735143e-06,
      "loss": 2.5337,
      "step": 38773
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.098148226737976,
      "learning_rate": 9.949150729255079e-06,
      "loss": 2.5031,
      "step": 38774
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1607505083084106,
      "learning_rate": 9.948738997861217e-06,
      "loss": 2.3656,
      "step": 38775
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0672552585601807,
      "learning_rate": 9.948327266554257e-06,
      "loss": 2.2283,
      "step": 38776
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.2290246486663818,
      "learning_rate": 9.947915535334895e-06,
      "loss": 2.2555,
      "step": 38777
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.9908828735351562,
      "learning_rate": 9.947503804203832e-06,
      "loss": 2.3035,
      "step": 38778
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.992017924785614,
      "learning_rate": 9.947092073161764e-06,
      "loss": 2.4683,
      "step": 38779
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.011143445968628,
      "learning_rate": 9.946680342209387e-06,
      "loss": 2.4216,
      "step": 38780
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0723971128463745,
      "learning_rate": 9.946268611347404e-06,
      "loss": 2.4945,
      "step": 38781
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.04787278175354,
      "learning_rate": 9.94585688057651e-06,
      "loss": 2.397,
      "step": 38782
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0241538286209106,
      "learning_rate": 9.945445149897408e-06,
      "loss": 2.3429,
      "step": 38783
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0834662914276123,
      "learning_rate": 9.945033419310786e-06,
      "loss": 2.4323,
      "step": 38784
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0151036977767944,
      "learning_rate": 9.944621688817346e-06,
      "loss": 2.34,
      "step": 38785
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1513923406600952,
      "learning_rate": 9.94420995841779e-06,
      "loss": 2.4732,
      "step": 38786
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0490922927856445,
      "learning_rate": 9.94379822811281e-06,
      "loss": 2.2991,
      "step": 38787
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.008562684059143,
      "learning_rate": 9.94338649790311e-06,
      "loss": 2.4634,
      "step": 38788
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0104671716690063,
      "learning_rate": 9.942974767789385e-06,
      "loss": 2.4945,
      "step": 38789
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1119463443756104,
      "learning_rate": 9.942563037772332e-06,
      "loss": 2.3052,
      "step": 38790
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.2260771989822388,
      "learning_rate": 9.942151307852653e-06,
      "loss": 2.3744,
      "step": 38791
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1233744621276855,
      "learning_rate": 9.94173957803104e-06,
      "loss": 2.222,
      "step": 38792
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.9671911001205444,
      "learning_rate": 9.941327848308196e-06,
      "loss": 2.346,
      "step": 38793
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1933294534683228,
      "learning_rate": 9.940916118684819e-06,
      "loss": 2.3687,
      "step": 38794
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1099655628204346,
      "learning_rate": 9.940504389161603e-06,
      "loss": 2.4142,
      "step": 38795
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0689594745635986,
      "learning_rate": 9.94009265973925e-06,
      "loss": 2.3736,
      "step": 38796
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0468206405639648,
      "learning_rate": 9.939680930418455e-06,
      "loss": 2.5798,
      "step": 38797
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.233268141746521,
      "learning_rate": 9.939269201199915e-06,
      "loss": 2.171,
      "step": 38798
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0768073797225952,
      "learning_rate": 9.938857472084333e-06,
      "loss": 2.5354,
      "step": 38799
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0424515008926392,
      "learning_rate": 9.938445743072405e-06,
      "loss": 2.5834,
      "step": 38800
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1820344924926758,
      "learning_rate": 9.938034014164825e-06,
      "loss": 2.2636,
      "step": 38801
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1150262355804443,
      "learning_rate": 9.937622285362296e-06,
      "loss": 2.3691,
      "step": 38802
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0327881574630737,
      "learning_rate": 9.937210556665514e-06,
      "loss": 2.3234,
      "step": 38803
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1566479206085205,
      "learning_rate": 9.936798828075177e-06,
      "loss": 2.3313,
      "step": 38804
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1498942375183105,
      "learning_rate": 9.936387099591982e-06,
      "loss": 2.1755,
      "step": 38805
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0361634492874146,
      "learning_rate": 9.935975371216629e-06,
      "loss": 2.2607,
      "step": 38806
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0634156465530396,
      "learning_rate": 9.935563642949816e-06,
      "loss": 2.3064,
      "step": 38807
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.071089267730713,
      "learning_rate": 9.935151914792238e-06,
      "loss": 2.3799,
      "step": 38808
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0569350719451904,
      "learning_rate": 9.934740186744601e-06,
      "loss": 2.2921,
      "step": 38809
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1960885524749756,
      "learning_rate": 9.934328458807592e-06,
      "loss": 2.5203,
      "step": 38810
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.140932559967041,
      "learning_rate": 9.933916730981916e-06,
      "loss": 2.3584,
      "step": 38811
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1310458183288574,
      "learning_rate": 9.933505003268268e-06,
      "loss": 2.4107,
      "step": 38812
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.077486276626587,
      "learning_rate": 9.933093275667344e-06,
      "loss": 2.3023,
      "step": 38813
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.004443645477295,
      "learning_rate": 9.932681548179849e-06,
      "loss": 2.2855,
      "step": 38814
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.171736478805542,
      "learning_rate": 9.932269820806474e-06,
      "loss": 2.39,
      "step": 38815
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1190239191055298,
      "learning_rate": 9.931858093547924e-06,
      "loss": 2.3281,
      "step": 38816
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.216371774673462,
      "learning_rate": 9.93144636640489e-06,
      "loss": 2.2951,
      "step": 38817
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0439311265945435,
      "learning_rate": 9.931034639378075e-06,
      "loss": 2.3719,
      "step": 38818
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1826425790786743,
      "learning_rate": 9.930622912468173e-06,
      "loss": 2.3592,
      "step": 38819
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0859284400939941,
      "learning_rate": 9.930211185675885e-06,
      "loss": 2.5023,
      "step": 38820
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0714845657348633,
      "learning_rate": 9.929799459001908e-06,
      "loss": 2.3283,
      "step": 38821
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.03650963306427,
      "learning_rate": 9.929387732446945e-06,
      "loss": 2.5145,
      "step": 38822
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.9908475279808044,
      "learning_rate": 9.928976006011683e-06,
      "loss": 2.4751,
      "step": 38823
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1421698331832886,
      "learning_rate": 9.928564279696827e-06,
      "loss": 2.4444,
      "step": 38824
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0930548906326294,
      "learning_rate": 9.928152553503074e-06,
      "loss": 2.2043,
      "step": 38825
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.055664300918579,
      "learning_rate": 9.927740827431122e-06,
      "loss": 2.4122,
      "step": 38826
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.075147271156311,
      "learning_rate": 9.927329101481669e-06,
      "loss": 2.3778,
      "step": 38827
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0419683456420898,
      "learning_rate": 9.926917375655414e-06,
      "loss": 2.7011,
      "step": 38828
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.056722640991211,
      "learning_rate": 9.926505649953052e-06,
      "loss": 2.3727,
      "step": 38829
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0778342485427856,
      "learning_rate": 9.926093924375284e-06,
      "loss": 2.5669,
      "step": 38830
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.3826841115951538,
      "learning_rate": 9.925682198922806e-06,
      "loss": 2.42,
      "step": 38831
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.999646782875061,
      "learning_rate": 9.925270473596317e-06,
      "loss": 2.4775,
      "step": 38832
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0007725954055786,
      "learning_rate": 9.924858748396515e-06,
      "loss": 2.4359,
      "step": 38833
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0267918109893799,
      "learning_rate": 9.924447023324097e-06,
      "loss": 2.1382,
      "step": 38834
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.079569697380066,
      "learning_rate": 9.924035298379764e-06,
      "loss": 2.0284,
      "step": 38835
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1128820180892944,
      "learning_rate": 9.923623573564214e-06,
      "loss": 2.3102,
      "step": 38836
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0601705312728882,
      "learning_rate": 9.92321184887814e-06,
      "loss": 2.4044,
      "step": 38837
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1106966733932495,
      "learning_rate": 9.922800124322242e-06,
      "loss": 2.3841,
      "step": 38838
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.2930362224578857,
      "learning_rate": 9.922388399897218e-06,
      "loss": 2.2043,
      "step": 38839
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0490601062774658,
      "learning_rate": 9.921976675603767e-06,
      "loss": 2.136,
      "step": 38840
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.5520495176315308,
      "learning_rate": 9.921564951442587e-06,
      "loss": 2.2425,
      "step": 38841
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.10762357711792,
      "learning_rate": 9.921153227414376e-06,
      "loss": 2.3401,
      "step": 38842
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.719519019126892,
      "learning_rate": 9.920741503519832e-06,
      "loss": 2.3356,
      "step": 38843
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0391968488693237,
      "learning_rate": 9.920329779759652e-06,
      "loss": 2.2204,
      "step": 38844
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.99068683385849,
      "learning_rate": 9.919918056134536e-06,
      "loss": 2.1922,
      "step": 38845
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1206995248794556,
      "learning_rate": 9.919506332645178e-06,
      "loss": 2.2826,
      "step": 38846
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.087266206741333,
      "learning_rate": 9.919094609292281e-06,
      "loss": 2.4755,
      "step": 38847
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1220436096191406,
      "learning_rate": 9.918682886076539e-06,
      "loss": 2.3253,
      "step": 38848
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.074170708656311,
      "learning_rate": 9.918271162998657e-06,
      "loss": 2.0895,
      "step": 38849
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1187020540237427,
      "learning_rate": 9.917859440059323e-06,
      "loss": 2.1459,
      "step": 38850
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1248599290847778,
      "learning_rate": 9.91744771725924e-06,
      "loss": 2.3918,
      "step": 38851
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1039563417434692,
      "learning_rate": 9.917035994599105e-06,
      "loss": 2.1907,
      "step": 38852
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.082212209701538,
      "learning_rate": 9.916624272079617e-06,
      "loss": 2.3473,
      "step": 38853
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.9897827506065369,
      "learning_rate": 9.916212549701473e-06,
      "loss": 2.4328,
      "step": 38854
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1876527070999146,
      "learning_rate": 9.91580082746537e-06,
      "loss": 2.4728,
      "step": 38855
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0535967350006104,
      "learning_rate": 9.915389105372012e-06,
      "loss": 2.3651,
      "step": 38856
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.9949532151222229,
      "learning_rate": 9.914977383422089e-06,
      "loss": 2.0901,
      "step": 38857
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.2207430601119995,
      "learning_rate": 9.914565661616303e-06,
      "loss": 2.4144,
      "step": 38858
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.2740612030029297,
      "learning_rate": 9.914153939955352e-06,
      "loss": 2.3291,
      "step": 38859
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.028792142868042,
      "learning_rate": 9.913742218439932e-06,
      "loss": 2.3969,
      "step": 38860
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1553417444229126,
      "learning_rate": 9.913330497070744e-06,
      "loss": 2.2066,
      "step": 38861
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.2346097230911255,
      "learning_rate": 9.912918775848489e-06,
      "loss": 2.4441,
      "step": 38862
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.2588130235671997,
      "learning_rate": 9.912507054773853e-06,
      "loss": 2.4225,
      "step": 38863
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1514261960983276,
      "learning_rate": 9.912095333847544e-06,
      "loss": 2.2675,
      "step": 38864
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.067452073097229,
      "learning_rate": 9.911683613070256e-06,
      "loss": 2.3921,
      "step": 38865
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.9762516021728516,
      "learning_rate": 9.911271892442689e-06,
      "loss": 2.4174,
      "step": 38866
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1190160512924194,
      "learning_rate": 9.91086017196554e-06,
      "loss": 2.1316,
      "step": 38867
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0245006084442139,
      "learning_rate": 9.910448451639506e-06,
      "loss": 2.2432,
      "step": 38868
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0254782438278198,
      "learning_rate": 9.910036731465287e-06,
      "loss": 2.1669,
      "step": 38869
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0025525093078613,
      "learning_rate": 9.90962501144358e-06,
      "loss": 2.4997,
      "step": 38870
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.981800377368927,
      "learning_rate": 9.909213291575084e-06,
      "loss": 2.1679,
      "step": 38871
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0672162771224976,
      "learning_rate": 9.908801571860496e-06,
      "loss": 2.3239,
      "step": 38872
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1298785209655762,
      "learning_rate": 9.908389852300514e-06,
      "loss": 2.2945,
      "step": 38873
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1211011409759521,
      "learning_rate": 9.907978132895836e-06,
      "loss": 2.1836,
      "step": 38874
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0980205535888672,
      "learning_rate": 9.907566413647164e-06,
      "loss": 2.3277,
      "step": 38875
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.397355556488037,
      "learning_rate": 9.907154694555189e-06,
      "loss": 2.399,
      "step": 38876
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.4314905405044556,
      "learning_rate": 9.90674297562061e-06,
      "loss": 2.388,
      "step": 38877
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.970405638217926,
      "learning_rate": 9.906331256844129e-06,
      "loss": 2.4587,
      "step": 38878
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.230689287185669,
      "learning_rate": 9.905919538226439e-06,
      "loss": 2.4328,
      "step": 38879
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.114175796508789,
      "learning_rate": 9.905507819768244e-06,
      "loss": 2.5415,
      "step": 38880
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0992127656936646,
      "learning_rate": 9.905096101470238e-06,
      "loss": 2.4147,
      "step": 38881
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.9857369065284729,
      "learning_rate": 9.904684383333119e-06,
      "loss": 2.3215,
      "step": 38882
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0304416418075562,
      "learning_rate": 9.904272665357586e-06,
      "loss": 2.1316,
      "step": 38883
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0172853469848633,
      "learning_rate": 9.90386094754434e-06,
      "loss": 2.352,
      "step": 38884
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.2239208221435547,
      "learning_rate": 9.903449229894073e-06,
      "loss": 2.4262,
      "step": 38885
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0139453411102295,
      "learning_rate": 9.903037512407487e-06,
      "loss": 2.3113,
      "step": 38886
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1154894828796387,
      "learning_rate": 9.902625795085276e-06,
      "loss": 2.2865,
      "step": 38887
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1700996160507202,
      "learning_rate": 9.902214077928146e-06,
      "loss": 2.358,
      "step": 38888
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.029595971107483,
      "learning_rate": 9.901802360936784e-06,
      "loss": 2.6796,
      "step": 38889
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0839389562606812,
      "learning_rate": 9.901390644111899e-06,
      "loss": 2.4334,
      "step": 38890
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0737699270248413,
      "learning_rate": 9.90097892745418e-06,
      "loss": 2.2949,
      "step": 38891
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.008264183998108,
      "learning_rate": 9.900567210964331e-06,
      "loss": 2.1747,
      "step": 38892
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.9996837973594666,
      "learning_rate": 9.900155494643044e-06,
      "loss": 2.3362,
      "step": 38893
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.135886549949646,
      "learning_rate": 9.899743778491023e-06,
      "loss": 2.2702,
      "step": 38894
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0821778774261475,
      "learning_rate": 9.899332062508962e-06,
      "loss": 2.3433,
      "step": 38895
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1307905912399292,
      "learning_rate": 9.898920346697562e-06,
      "loss": 2.1276,
      "step": 38896
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0936380624771118,
      "learning_rate": 9.898508631057518e-06,
      "loss": 2.2171,
      "step": 38897
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1139193773269653,
      "learning_rate": 9.89809691558953e-06,
      "loss": 2.276,
      "step": 38898
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.037389874458313,
      "learning_rate": 9.897685200294295e-06,
      "loss": 2.1578,
      "step": 38899
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1678602695465088,
      "learning_rate": 9.897273485172512e-06,
      "loss": 2.264,
      "step": 38900
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1970824003219604,
      "learning_rate": 9.896861770224882e-06,
      "loss": 2.2849,
      "step": 38901
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.128539800643921,
      "learning_rate": 9.896450055452097e-06,
      "loss": 2.1933,
      "step": 38902
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1402902603149414,
      "learning_rate": 9.896038340854856e-06,
      "loss": 2.3062,
      "step": 38903
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1018040180206299,
      "learning_rate": 9.895626626433857e-06,
      "loss": 2.3072,
      "step": 38904
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.3646929264068604,
      "learning_rate": 9.895214912189801e-06,
      "loss": 2.4248,
      "step": 38905
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0214776992797852,
      "learning_rate": 9.894803198123384e-06,
      "loss": 2.0584,
      "step": 38906
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0030192136764526,
      "learning_rate": 9.894391484235302e-06,
      "loss": 2.3066,
      "step": 38907
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.028086543083191,
      "learning_rate": 9.893979770526258e-06,
      "loss": 2.4605,
      "step": 38908
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.9175854325294495,
      "learning_rate": 9.893568056996946e-06,
      "loss": 2.3723,
      "step": 38909
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.071605920791626,
      "learning_rate": 9.893156343648063e-06,
      "loss": 2.2966,
      "step": 38910
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0507289171218872,
      "learning_rate": 9.892744630480312e-06,
      "loss": 2.2554,
      "step": 38911
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0894927978515625,
      "learning_rate": 9.892332917494389e-06,
      "loss": 2.2648,
      "step": 38912
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.014248251914978,
      "learning_rate": 9.891921204690989e-06,
      "loss": 2.3331,
      "step": 38913
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.026719093322754,
      "learning_rate": 9.89150949207081e-06,
      "loss": 2.4203,
      "step": 38914
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1197059154510498,
      "learning_rate": 9.891097779634557e-06,
      "loss": 2.3262,
      "step": 38915
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0634230375289917,
      "learning_rate": 9.89068606738292e-06,
      "loss": 2.304,
      "step": 38916
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.9891291260719299,
      "learning_rate": 9.890274355316597e-06,
      "loss": 2.0074,
      "step": 38917
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0995237827301025,
      "learning_rate": 9.889862643436294e-06,
      "loss": 2.4176,
      "step": 38918
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1765122413635254,
      "learning_rate": 9.8894509317427e-06,
      "loss": 2.3358,
      "step": 38919
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1080682277679443,
      "learning_rate": 9.88903922023652e-06,
      "loss": 2.2987,
      "step": 38920
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0659561157226562,
      "learning_rate": 9.888627508918445e-06,
      "loss": 2.476,
      "step": 38921
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.9854937195777893,
      "learning_rate": 9.888215797789178e-06,
      "loss": 2.0298,
      "step": 38922
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.9357441663742065,
      "learning_rate": 9.887804086849415e-06,
      "loss": 2.2807,
      "step": 38923
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1627529859542847,
      "learning_rate": 9.887392376099855e-06,
      "loss": 2.417,
      "step": 38924
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0630990266799927,
      "learning_rate": 9.886980665541196e-06,
      "loss": 2.319,
      "step": 38925
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.004868507385254,
      "learning_rate": 9.886568955174134e-06,
      "loss": 2.333,
      "step": 38926
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.223569631576538,
      "learning_rate": 9.88615724499937e-06,
      "loss": 2.3266,
      "step": 38927
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.169080138206482,
      "learning_rate": 9.885745535017603e-06,
      "loss": 2.4214,
      "step": 38928
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1137443780899048,
      "learning_rate": 9.885333825229525e-06,
      "loss": 2.3225,
      "step": 38929
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0244625806808472,
      "learning_rate": 9.884922115635838e-06,
      "loss": 2.3289,
      "step": 38930
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.363502860069275,
      "learning_rate": 9.884510406237238e-06,
      "loss": 2.319,
      "step": 38931
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1208821535110474,
      "learning_rate": 9.884098697034424e-06,
      "loss": 2.4419,
      "step": 38932
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1751887798309326,
      "learning_rate": 9.883686988028095e-06,
      "loss": 2.2718,
      "step": 38933
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.2048529386520386,
      "learning_rate": 9.883275279218948e-06,
      "loss": 2.3342,
      "step": 38934
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1082520484924316,
      "learning_rate": 9.882863570607682e-06,
      "loss": 2.2181,
      "step": 38935
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.988910436630249,
      "learning_rate": 9.882451862194993e-06,
      "loss": 2.2583,
      "step": 38936
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1044480800628662,
      "learning_rate": 9.88204015398158e-06,
      "loss": 2.0115,
      "step": 38937
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1213433742523193,
      "learning_rate": 9.881628445968141e-06,
      "loss": 2.3516,
      "step": 38938
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.184492588043213,
      "learning_rate": 9.881216738155375e-06,
      "loss": 2.3188,
      "step": 38939
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1793019771575928,
      "learning_rate": 9.88080503054398e-06,
      "loss": 2.3301,
      "step": 38940
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0831735134124756,
      "learning_rate": 9.880393323134652e-06,
      "loss": 2.2737,
      "step": 38941
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.9669040441513062,
      "learning_rate": 9.87998161592809e-06,
      "loss": 2.3026,
      "step": 38942
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.100601315498352,
      "learning_rate": 9.87956990892499e-06,
      "loss": 2.1597,
      "step": 38943
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1814074516296387,
      "learning_rate": 9.879158202126053e-06,
      "loss": 2.3654,
      "step": 38944
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0810526609420776,
      "learning_rate": 9.878746495531974e-06,
      "loss": 2.3899,
      "step": 38945
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.2123922109603882,
      "learning_rate": 9.878334789143456e-06,
      "loss": 2.3619,
      "step": 38946
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0457369089126587,
      "learning_rate": 9.877923082961192e-06,
      "loss": 2.2008,
      "step": 38947
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.9616295695304871,
      "learning_rate": 9.87751137698588e-06,
      "loss": 2.4905,
      "step": 38948
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0334420204162598,
      "learning_rate": 9.87709967121822e-06,
      "loss": 2.329,
      "step": 38949
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.9852625131607056,
      "learning_rate": 9.87668796565891e-06,
      "loss": 2.2641,
      "step": 38950
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0935081243515015,
      "learning_rate": 9.876276260308646e-06,
      "loss": 2.4631,
      "step": 38951
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.148482084274292,
      "learning_rate": 9.87586455516813e-06,
      "loss": 2.2562,
      "step": 38952
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0684007406234741,
      "learning_rate": 9.875452850238055e-06,
      "loss": 2.5411,
      "step": 38953
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0352245569229126,
      "learning_rate": 9.875041145519125e-06,
      "loss": 2.3612,
      "step": 38954
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0220447778701782,
      "learning_rate": 9.874629441012032e-06,
      "loss": 2.4186,
      "step": 38955
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.982176661491394,
      "learning_rate": 9.874217736717474e-06,
      "loss": 2.203,
      "step": 38956
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0488183498382568,
      "learning_rate": 9.873806032636152e-06,
      "loss": 2.2018,
      "step": 38957
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.2714402675628662,
      "learning_rate": 9.873394328768764e-06,
      "loss": 2.3393,
      "step": 38958
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1838047504425049,
      "learning_rate": 9.872982625116005e-06,
      "loss": 2.2258,
      "step": 38959
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0860577821731567,
      "learning_rate": 9.872570921678577e-06,
      "loss": 2.5737,
      "step": 38960
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.035304307937622,
      "learning_rate": 9.872159218457176e-06,
      "loss": 2.2136,
      "step": 38961
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1633069515228271,
      "learning_rate": 9.871747515452497e-06,
      "loss": 2.3122,
      "step": 38962
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0883857011795044,
      "learning_rate": 9.871335812665243e-06,
      "loss": 2.2219,
      "step": 38963
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.169558048248291,
      "learning_rate": 9.87092411009611e-06,
      "loss": 2.3716,
      "step": 38964
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0271707773208618,
      "learning_rate": 9.870512407745795e-06,
      "loss": 2.3596,
      "step": 38965
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0150779485702515,
      "learning_rate": 9.870100705614999e-06,
      "loss": 2.3511,
      "step": 38966
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.3137006759643555,
      "learning_rate": 9.869689003704419e-06,
      "loss": 2.5214,
      "step": 38967
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.128228783607483,
      "learning_rate": 9.869277302014747e-06,
      "loss": 2.0663,
      "step": 38968
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0779632329940796,
      "learning_rate": 9.868865600546687e-06,
      "loss": 2.3537,
      "step": 38969
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1252031326293945,
      "learning_rate": 9.868453899300935e-06,
      "loss": 2.4466,
      "step": 38970
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1902453899383545,
      "learning_rate": 9.868042198278188e-06,
      "loss": 2.3059,
      "step": 38971
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1212161779403687,
      "learning_rate": 9.867630497479147e-06,
      "loss": 2.2979,
      "step": 38972
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.054903268814087,
      "learning_rate": 9.867218796904508e-06,
      "loss": 2.5517,
      "step": 38973
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1775556802749634,
      "learning_rate": 9.86680709655497e-06,
      "loss": 2.4003,
      "step": 38974
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.138700008392334,
      "learning_rate": 9.86639539643123e-06,
      "loss": 2.2638,
      "step": 38975
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.04082453250885,
      "learning_rate": 9.865983696533985e-06,
      "loss": 2.7864,
      "step": 38976
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.9963046908378601,
      "learning_rate": 9.865571996863936e-06,
      "loss": 2.3848,
      "step": 38977
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.215689778327942,
      "learning_rate": 9.865160297421776e-06,
      "loss": 2.1995,
      "step": 38978
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.196850061416626,
      "learning_rate": 9.864748598208207e-06,
      "loss": 2.2328,
      "step": 38979
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0370463132858276,
      "learning_rate": 9.864336899223926e-06,
      "loss": 2.2381,
      "step": 38980
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.9597389101982117,
      "learning_rate": 9.863925200469635e-06,
      "loss": 2.3979,
      "step": 38981
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0994948148727417,
      "learning_rate": 9.863513501946024e-06,
      "loss": 2.456,
      "step": 38982
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.2196823358535767,
      "learning_rate": 9.863101803653795e-06,
      "loss": 2.4353,
      "step": 38983
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.2724827527999878,
      "learning_rate": 9.862690105593643e-06,
      "loss": 2.625,
      "step": 38984
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1018062829971313,
      "learning_rate": 9.86227840776627e-06,
      "loss": 2.3716,
      "step": 38985
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.9998806118965149,
      "learning_rate": 9.861866710172374e-06,
      "loss": 2.3611,
      "step": 38986
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1498243808746338,
      "learning_rate": 9.861455012812651e-06,
      "loss": 2.1823,
      "step": 38987
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.037576675415039,
      "learning_rate": 9.861043315687799e-06,
      "loss": 2.429,
      "step": 38988
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.9965187311172485,
      "learning_rate": 9.860631618798515e-06,
      "loss": 2.3523,
      "step": 38989
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0485893487930298,
      "learning_rate": 9.8602199221455e-06,
      "loss": 2.4036,
      "step": 38990
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.198088526725769,
      "learning_rate": 9.85980822572945e-06,
      "loss": 2.2426,
      "step": 38991
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.9883264303207397,
      "learning_rate": 9.859396529551061e-06,
      "loss": 2.289,
      "step": 38992
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.2763020992279053,
      "learning_rate": 9.858984833611035e-06,
      "loss": 2.373,
      "step": 38993
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1296814680099487,
      "learning_rate": 9.858573137910073e-06,
      "loss": 2.4009,
      "step": 38994
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0535005331039429,
      "learning_rate": 9.858161442448863e-06,
      "loss": 2.4055,
      "step": 38995
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0453110933303833,
      "learning_rate": 9.857749747228107e-06,
      "loss": 2.5022,
      "step": 38996
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.9669805765151978,
      "learning_rate": 9.857338052248504e-06,
      "loss": 2.2598,
      "step": 38997
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.037531852722168,
      "learning_rate": 9.856926357510752e-06,
      "loss": 2.241,
      "step": 38998
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0335174798965454,
      "learning_rate": 9.856514663015549e-06,
      "loss": 2.4449,
      "step": 38999
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0146716833114624,
      "learning_rate": 9.856102968763592e-06,
      "loss": 2.4176,
      "step": 39000
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0353195667266846,
      "learning_rate": 9.85569127475558e-06,
      "loss": 2.3788,
      "step": 39001
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0198146104812622,
      "learning_rate": 9.855279580992212e-06,
      "loss": 2.2605,
      "step": 39002
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0421395301818848,
      "learning_rate": 9.854867887474184e-06,
      "loss": 2.4012,
      "step": 39003
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1525200605392456,
      "learning_rate": 9.854456194202193e-06,
      "loss": 2.2014,
      "step": 39004
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0045288801193237,
      "learning_rate": 9.85404450117694e-06,
      "loss": 2.5385,
      "step": 39005
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.132641315460205,
      "learning_rate": 9.853632808399118e-06,
      "loss": 2.5007,
      "step": 39006
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1333240270614624,
      "learning_rate": 9.853221115869433e-06,
      "loss": 2.562,
      "step": 39007
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.362655758857727,
      "learning_rate": 9.852809423588576e-06,
      "loss": 2.2793,
      "step": 39008
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1116122007369995,
      "learning_rate": 9.852397731557246e-06,
      "loss": 2.5974,
      "step": 39009
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1664825677871704,
      "learning_rate": 9.851986039776142e-06,
      "loss": 2.2383,
      "step": 39010
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.120863676071167,
      "learning_rate": 9.851574348245963e-06,
      "loss": 2.5401,
      "step": 39011
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.03167724609375,
      "learning_rate": 9.851162656967403e-06,
      "loss": 2.4265,
      "step": 39012
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.110169529914856,
      "learning_rate": 9.850750965941164e-06,
      "loss": 2.2624,
      "step": 39013
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1420255899429321,
      "learning_rate": 9.850339275167944e-06,
      "loss": 2.5603,
      "step": 39014
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.140936017036438,
      "learning_rate": 9.849927584648438e-06,
      "loss": 2.1276,
      "step": 39015
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.9965466856956482,
      "learning_rate": 9.849515894383347e-06,
      "loss": 2.2253,
      "step": 39016
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.2054444551467896,
      "learning_rate": 9.849104204373366e-06,
      "loss": 2.3514,
      "step": 39017
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0530507564544678,
      "learning_rate": 9.848692514619193e-06,
      "loss": 2.2974,
      "step": 39018
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0712138414382935,
      "learning_rate": 9.84828082512153e-06,
      "loss": 2.3278,
      "step": 39019
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0270740985870361,
      "learning_rate": 9.847869135881076e-06,
      "loss": 2.593,
      "step": 39020
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.2204123735427856,
      "learning_rate": 9.847457446898521e-06,
      "loss": 2.2518,
      "step": 39021
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0152230262756348,
      "learning_rate": 9.847045758174567e-06,
      "loss": 2.2705,
      "step": 39022
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0853307247161865,
      "learning_rate": 9.84663406970991e-06,
      "loss": 2.1866,
      "step": 39023
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0240960121154785,
      "learning_rate": 9.846222381505251e-06,
      "loss": 2.4311,
      "step": 39024
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.052533745765686,
      "learning_rate": 9.845810693561288e-06,
      "loss": 2.3718,
      "step": 39025
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.208860993385315,
      "learning_rate": 9.845399005878717e-06,
      "loss": 2.5878,
      "step": 39026
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.9883891344070435,
      "learning_rate": 9.844987318458236e-06,
      "loss": 2.3962,
      "step": 39027
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0798919200897217,
      "learning_rate": 9.844575631300544e-06,
      "loss": 2.2227,
      "step": 39028
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0096608400344849,
      "learning_rate": 9.844163944406339e-06,
      "loss": 2.3429,
      "step": 39029
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1388639211654663,
      "learning_rate": 9.84375225777632e-06,
      "loss": 2.2357,
      "step": 39030
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.133231282234192,
      "learning_rate": 9.843340571411181e-06,
      "loss": 2.2055,
      "step": 39031
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.9076299667358398,
      "learning_rate": 9.842928885311623e-06,
      "loss": 2.1423,
      "step": 39032
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0485931634902954,
      "learning_rate": 9.842517199478345e-06,
      "loss": 2.487,
      "step": 39033
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0064021348953247,
      "learning_rate": 9.842105513912042e-06,
      "loss": 2.2088,
      "step": 39034
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0133241415023804,
      "learning_rate": 9.841693828613411e-06,
      "loss": 2.3357,
      "step": 39035
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0187722444534302,
      "learning_rate": 9.841282143583156e-06,
      "loss": 2.1304,
      "step": 39036
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.9578328132629395,
      "learning_rate": 9.840870458821968e-06,
      "loss": 2.4699,
      "step": 39037
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.04079270362854,
      "learning_rate": 9.840458774330548e-06,
      "loss": 2.1914,
      "step": 39038
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0781137943267822,
      "learning_rate": 9.840047090109592e-06,
      "loss": 2.2382,
      "step": 39039
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.085579752922058,
      "learning_rate": 9.839635406159802e-06,
      "loss": 2.3102,
      "step": 39040
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.2350201606750488,
      "learning_rate": 9.839223722481872e-06,
      "loss": 2.2476,
      "step": 39041
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1726775169372559,
      "learning_rate": 9.838812039076502e-06,
      "loss": 2.2429,
      "step": 39042
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1009970903396606,
      "learning_rate": 9.83840035594439e-06,
      "loss": 2.2794,
      "step": 39043
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0340059995651245,
      "learning_rate": 9.837988673086233e-06,
      "loss": 2.4017,
      "step": 39044
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.079872965812683,
      "learning_rate": 9.837576990502729e-06,
      "loss": 2.4546,
      "step": 39045
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.2068337202072144,
      "learning_rate": 9.83716530819458e-06,
      "loss": 2.4206,
      "step": 39046
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.114737629890442,
      "learning_rate": 9.836753626162476e-06,
      "loss": 2.4711,
      "step": 39047
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0917078256607056,
      "learning_rate": 9.83634194440712e-06,
      "loss": 2.3614,
      "step": 39048
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1874576807022095,
      "learning_rate": 9.835930262929207e-06,
      "loss": 2.4073,
      "step": 39049
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1498817205429077,
      "learning_rate": 9.835518581729438e-06,
      "loss": 2.4136,
      "step": 39050
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.965005099773407,
      "learning_rate": 9.83510690080851e-06,
      "loss": 2.2793,
      "step": 39051
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1354180574417114,
      "learning_rate": 9.834695220167118e-06,
      "loss": 2.2161,
      "step": 39052
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.9642167687416077,
      "learning_rate": 9.834283539805966e-06,
      "loss": 2.02,
      "step": 39053
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.9591976404190063,
      "learning_rate": 9.833871859725746e-06,
      "loss": 2.2631,
      "step": 39054
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.9811419248580933,
      "learning_rate": 9.83346017992716e-06,
      "loss": 2.3197,
      "step": 39055
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.9813600778579712,
      "learning_rate": 9.833048500410902e-06,
      "loss": 2.3244,
      "step": 39056
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1855354309082031,
      "learning_rate": 9.832636821177673e-06,
      "loss": 2.2708,
      "step": 39057
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0641676187515259,
      "learning_rate": 9.832225142228174e-06,
      "loss": 2.3775,
      "step": 39058
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0708457231521606,
      "learning_rate": 9.831813463563094e-06,
      "loss": 2.4659,
      "step": 39059
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0123916864395142,
      "learning_rate": 9.83140178518314e-06,
      "loss": 2.5521,
      "step": 39060
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1117632389068604,
      "learning_rate": 9.830990107089004e-06,
      "loss": 2.313,
      "step": 39061
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.9842503666877747,
      "learning_rate": 9.830578429281385e-06,
      "loss": 2.3877,
      "step": 39062
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.142843246459961,
      "learning_rate": 9.830166751760982e-06,
      "loss": 2.2744,
      "step": 39063
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.067448616027832,
      "learning_rate": 9.829755074528493e-06,
      "loss": 2.4184,
      "step": 39064
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0593793392181396,
      "learning_rate": 9.829343397584614e-06,
      "loss": 2.4502,
      "step": 39065
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.9908950924873352,
      "learning_rate": 9.828931720930045e-06,
      "loss": 2.0478,
      "step": 39066
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1040353775024414,
      "learning_rate": 9.828520044565482e-06,
      "loss": 2.5169,
      "step": 39067
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1539896726608276,
      "learning_rate": 9.828108368491625e-06,
      "loss": 2.6125,
      "step": 39068
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0238004922866821,
      "learning_rate": 9.827696692709172e-06,
      "loss": 2.1765,
      "step": 39069
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.9905194640159607,
      "learning_rate": 9.827285017218817e-06,
      "loss": 2.1758,
      "step": 39070
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.9706029891967773,
      "learning_rate": 9.826873342021262e-06,
      "loss": 2.4286,
      "step": 39071
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.9401468634605408,
      "learning_rate": 9.826461667117205e-06,
      "loss": 2.3621,
      "step": 39072
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0327221155166626,
      "learning_rate": 9.826049992507345e-06,
      "loss": 2.2139,
      "step": 39073
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1885327100753784,
      "learning_rate": 9.825638318192375e-06,
      "loss": 2.0605,
      "step": 39074
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0355910062789917,
      "learning_rate": 9.825226644172995e-06,
      "loss": 2.3464,
      "step": 39075
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1996368169784546,
      "learning_rate": 9.824814970449903e-06,
      "loss": 2.3894,
      "step": 39076
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0664376020431519,
      "learning_rate": 9.824403297023797e-06,
      "loss": 2.3789,
      "step": 39077
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.551009178161621,
      "learning_rate": 9.823991623895377e-06,
      "loss": 2.3635,
      "step": 39078
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.2441715002059937,
      "learning_rate": 9.823579951065337e-06,
      "loss": 2.1335,
      "step": 39079
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1305843591690063,
      "learning_rate": 9.823168278534376e-06,
      "loss": 2.3818,
      "step": 39080
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0845857858657837,
      "learning_rate": 9.822756606303193e-06,
      "loss": 2.2092,
      "step": 39081
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.223757028579712,
      "learning_rate": 9.822344934372488e-06,
      "loss": 2.4275,
      "step": 39082
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0029277801513672,
      "learning_rate": 9.821933262742956e-06,
      "loss": 2.2041,
      "step": 39083
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0477867126464844,
      "learning_rate": 9.821521591415293e-06,
      "loss": 2.4424,
      "step": 39084
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0909801721572876,
      "learning_rate": 9.821109920390205e-06,
      "loss": 2.292,
      "step": 39085
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.2270104885101318,
      "learning_rate": 9.820698249668382e-06,
      "loss": 2.3774,
      "step": 39086
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0460902452468872,
      "learning_rate": 9.820286579250523e-06,
      "loss": 2.224,
      "step": 39087
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.222404956817627,
      "learning_rate": 9.819874909137326e-06,
      "loss": 2.3819,
      "step": 39088
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.047594428062439,
      "learning_rate": 9.819463239329491e-06,
      "loss": 2.3485,
      "step": 39089
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0289404392242432,
      "learning_rate": 9.819051569827715e-06,
      "loss": 2.4314,
      "step": 39090
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.078178882598877,
      "learning_rate": 9.818639900632695e-06,
      "loss": 2.4161,
      "step": 39091
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.199166178703308,
      "learning_rate": 9.818228231745131e-06,
      "loss": 2.4606,
      "step": 39092
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.201824426651001,
      "learning_rate": 9.817816563165718e-06,
      "loss": 2.2622,
      "step": 39093
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.9384130835533142,
      "learning_rate": 9.817404894895157e-06,
      "loss": 2.4855,
      "step": 39094
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1574513912200928,
      "learning_rate": 9.816993226934143e-06,
      "loss": 2.4578,
      "step": 39095
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.114336609840393,
      "learning_rate": 9.816581559283375e-06,
      "loss": 2.1075,
      "step": 39096
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1612097024917603,
      "learning_rate": 9.816169891943552e-06,
      "loss": 2.2652,
      "step": 39097
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.9716281890869141,
      "learning_rate": 9.81575822491537e-06,
      "loss": 2.4294,
      "step": 39098
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1101874113082886,
      "learning_rate": 9.815346558199532e-06,
      "loss": 2.4008,
      "step": 39099
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0279221534729004,
      "learning_rate": 9.814934891796729e-06,
      "loss": 2.6175,
      "step": 39100
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1042815446853638,
      "learning_rate": 9.81452322570766e-06,
      "loss": 2.3205,
      "step": 39101
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.217200517654419,
      "learning_rate": 9.814111559933024e-06,
      "loss": 2.3152,
      "step": 39102
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1146718263626099,
      "learning_rate": 9.81369989447352e-06,
      "loss": 2.3727,
      "step": 39103
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1907609701156616,
      "learning_rate": 9.813288229329846e-06,
      "loss": 2.3067,
      "step": 39104
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0224361419677734,
      "learning_rate": 9.812876564502699e-06,
      "loss": 2.384,
      "step": 39105
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1289204359054565,
      "learning_rate": 9.812464899992775e-06,
      "loss": 2.4754,
      "step": 39106
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0898120403289795,
      "learning_rate": 9.812053235800778e-06,
      "loss": 2.4864,
      "step": 39107
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0436289310455322,
      "learning_rate": 9.8116415719274e-06,
      "loss": 2.2242,
      "step": 39108
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0614389181137085,
      "learning_rate": 9.811229908373338e-06,
      "loss": 2.5383,
      "step": 39109
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.088576078414917,
      "learning_rate": 9.810818245139296e-06,
      "loss": 2.3838,
      "step": 39110
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0922764539718628,
      "learning_rate": 9.810406582225967e-06,
      "loss": 2.2622,
      "step": 39111
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0192339420318604,
      "learning_rate": 9.809994919634055e-06,
      "loss": 2.2179,
      "step": 39112
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1519602537155151,
      "learning_rate": 9.80958325736425e-06,
      "loss": 2.43,
      "step": 39113
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.298472285270691,
      "learning_rate": 9.809171595417251e-06,
      "loss": 2.478,
      "step": 39114
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0554763078689575,
      "learning_rate": 9.808759933793759e-06,
      "loss": 2.4441,
      "step": 39115
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1832823753356934,
      "learning_rate": 9.808348272494472e-06,
      "loss": 2.3402,
      "step": 39116
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0057051181793213,
      "learning_rate": 9.807936611520085e-06,
      "loss": 2.3188,
      "step": 39117
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0694695711135864,
      "learning_rate": 9.807524950871299e-06,
      "loss": 2.4132,
      "step": 39118
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0989197492599487,
      "learning_rate": 9.807113290548808e-06,
      "loss": 2.1775,
      "step": 39119
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1772249937057495,
      "learning_rate": 9.806701630553316e-06,
      "loss": 2.4056,
      "step": 39120
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.9457758665084839,
      "learning_rate": 9.806289970885515e-06,
      "loss": 2.0769,
      "step": 39121
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1014485359191895,
      "learning_rate": 9.805878311546106e-06,
      "loss": 2.3378,
      "step": 39122
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.118094563484192,
      "learning_rate": 9.805466652535785e-06,
      "loss": 2.4136,
      "step": 39123
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.107247233390808,
      "learning_rate": 9.805054993855251e-06,
      "loss": 2.3288,
      "step": 39124
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.081388235092163,
      "learning_rate": 9.804643335505204e-06,
      "loss": 2.3552,
      "step": 39125
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.9965454339981079,
      "learning_rate": 9.80423167748634e-06,
      "loss": 2.1453,
      "step": 39126
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1937522888183594,
      "learning_rate": 9.803820019799353e-06,
      "loss": 2.4098,
      "step": 39127
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.9921450614929199,
      "learning_rate": 9.803408362444946e-06,
      "loss": 2.3233,
      "step": 39128
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0445585250854492,
      "learning_rate": 9.802996705423814e-06,
      "loss": 2.4107,
      "step": 39129
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.026919960975647,
      "learning_rate": 9.802585048736657e-06,
      "loss": 2.1461,
      "step": 39130
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0888675451278687,
      "learning_rate": 9.802173392384171e-06,
      "loss": 2.2895,
      "step": 39131
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.077024221420288,
      "learning_rate": 9.801761736367056e-06,
      "loss": 2.339,
      "step": 39132
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.2174185514450073,
      "learning_rate": 9.801350080686008e-06,
      "loss": 2.3515,
      "step": 39133
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0845311880111694,
      "learning_rate": 9.800938425341727e-06,
      "loss": 2.3646,
      "step": 39134
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1202428340911865,
      "learning_rate": 9.800526770334907e-06,
      "loss": 2.0543,
      "step": 39135
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0575147867202759,
      "learning_rate": 9.80011511566625e-06,
      "loss": 2.3004,
      "step": 39136
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1389561891555786,
      "learning_rate": 9.799703461336451e-06,
      "loss": 2.4312,
      "step": 39137
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.983727753162384,
      "learning_rate": 9.79929180734621e-06,
      "loss": 2.3913,
      "step": 39138
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1073980331420898,
      "learning_rate": 9.798880153696228e-06,
      "loss": 2.3099,
      "step": 39139
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1292589902877808,
      "learning_rate": 9.798468500387194e-06,
      "loss": 2.5978,
      "step": 39140
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1545801162719727,
      "learning_rate": 9.79805684741981e-06,
      "loss": 2.4426,
      "step": 39141
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0922627449035645,
      "learning_rate": 9.797645194794776e-06,
      "loss": 2.3763,
      "step": 39142
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.9984363913536072,
      "learning_rate": 9.797233542512787e-06,
      "loss": 2.4138,
      "step": 39143
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0239471197128296,
      "learning_rate": 9.796821890574544e-06,
      "loss": 2.6028,
      "step": 39144
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0131587982177734,
      "learning_rate": 9.796410238980741e-06,
      "loss": 2.2485,
      "step": 39145
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.9495946764945984,
      "learning_rate": 9.79599858773208e-06,
      "loss": 2.4789,
      "step": 39146
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.701113224029541,
      "learning_rate": 9.795586936829256e-06,
      "loss": 2.3034,
      "step": 39147
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1272974014282227,
      "learning_rate": 9.79517528627297e-06,
      "loss": 2.4172,
      "step": 39148
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0142948627471924,
      "learning_rate": 9.794763636063913e-06,
      "loss": 2.2093,
      "step": 39149
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.2353819608688354,
      "learning_rate": 9.79435198620279e-06,
      "loss": 2.4454,
      "step": 39150
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0208661556243896,
      "learning_rate": 9.793940336690295e-06,
      "loss": 2.3666,
      "step": 39151
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.3352779150009155,
      "learning_rate": 9.793528687527131e-06,
      "loss": 2.3873,
      "step": 39152
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.3442587852478027,
      "learning_rate": 9.793117038713987e-06,
      "loss": 2.351,
      "step": 39153
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.027670979499817,
      "learning_rate": 9.79270539025157e-06,
      "loss": 2.1912,
      "step": 39154
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0743485689163208,
      "learning_rate": 9.792293742140572e-06,
      "loss": 2.1662,
      "step": 39155
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0090683698654175,
      "learning_rate": 9.79188209438169e-06,
      "loss": 2.2958,
      "step": 39156
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.9932503700256348,
      "learning_rate": 9.791470446975628e-06,
      "loss": 2.3368,
      "step": 39157
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.653867244720459,
      "learning_rate": 9.791058799923077e-06,
      "loss": 2.2466,
      "step": 39158
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1475385427474976,
      "learning_rate": 9.79064715322474e-06,
      "loss": 2.437,
      "step": 39159
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.024856686592102,
      "learning_rate": 9.790235506881311e-06,
      "loss": 2.331,
      "step": 39160
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1064083576202393,
      "learning_rate": 9.789823860893491e-06,
      "loss": 2.4357,
      "step": 39161
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0269017219543457,
      "learning_rate": 9.789412215261978e-06,
      "loss": 2.3937,
      "step": 39162
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1232424974441528,
      "learning_rate": 9.789000569987466e-06,
      "loss": 2.4,
      "step": 39163
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0506030321121216,
      "learning_rate": 9.788588925070657e-06,
      "loss": 2.4505,
      "step": 39164
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1184310913085938,
      "learning_rate": 9.788177280512251e-06,
      "loss": 2.211,
      "step": 39165
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0854289531707764,
      "learning_rate": 9.787765636312938e-06,
      "loss": 2.5985,
      "step": 39166
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.9607023000717163,
      "learning_rate": 9.787353992473419e-06,
      "loss": 2.2379,
      "step": 39167
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0014221668243408,
      "learning_rate": 9.786942348994394e-06,
      "loss": 2.5424,
      "step": 39168
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0938843488693237,
      "learning_rate": 9.786530705876557e-06,
      "loss": 2.1837,
      "step": 39169
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0363295078277588,
      "learning_rate": 9.786119063120612e-06,
      "loss": 2.261,
      "step": 39170
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.107326865196228,
      "learning_rate": 9.785707420727251e-06,
      "loss": 2.258,
      "step": 39171
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.061300277709961,
      "learning_rate": 9.785295778697174e-06,
      "loss": 2.2858,
      "step": 39172
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0630712509155273,
      "learning_rate": 9.78488413703108e-06,
      "loss": 2.2246,
      "step": 39173
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0250802040100098,
      "learning_rate": 9.784472495729667e-06,
      "loss": 2.5274,
      "step": 39174
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.050092339515686,
      "learning_rate": 9.784060854793628e-06,
      "loss": 2.436,
      "step": 39175
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.6713786125183105,
      "learning_rate": 9.78364921422367e-06,
      "loss": 2.422,
      "step": 39176
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0996426343917847,
      "learning_rate": 9.783237574020482e-06,
      "loss": 2.4005,
      "step": 39177
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.137465000152588,
      "learning_rate": 9.782825934184768e-06,
      "loss": 2.4639,
      "step": 39178
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0743842124938965,
      "learning_rate": 9.78241429471722e-06,
      "loss": 2.421,
      "step": 39179
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1310627460479736,
      "learning_rate": 9.782002655618539e-06,
      "loss": 2.5404,
      "step": 39180
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.043379783630371,
      "learning_rate": 9.781591016889422e-06,
      "loss": 2.3076,
      "step": 39181
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0871858596801758,
      "learning_rate": 9.78117937853057e-06,
      "loss": 2.3266,
      "step": 39182
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0858287811279297,
      "learning_rate": 9.780767740542679e-06,
      "loss": 2.0109,
      "step": 39183
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.017686128616333,
      "learning_rate": 9.780356102926443e-06,
      "loss": 2.0766,
      "step": 39184
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1260783672332764,
      "learning_rate": 9.779944465682565e-06,
      "loss": 2.3748,
      "step": 39185
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0216479301452637,
      "learning_rate": 9.779532828811738e-06,
      "loss": 2.3398,
      "step": 39186
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.9880026578903198,
      "learning_rate": 9.779121192314665e-06,
      "loss": 2.5553,
      "step": 39187
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1356204748153687,
      "learning_rate": 9.778709556192041e-06,
      "loss": 2.0572,
      "step": 39188
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.078516960144043,
      "learning_rate": 9.778297920444565e-06,
      "loss": 2.4818,
      "step": 39189
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.2247517108917236,
      "learning_rate": 9.777886285072935e-06,
      "loss": 2.4469,
      "step": 39190
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0641448497772217,
      "learning_rate": 9.777474650077851e-06,
      "loss": 2.4396,
      "step": 39191
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0804646015167236,
      "learning_rate": 9.777063015460003e-06,
      "loss": 2.3845,
      "step": 39192
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0989806652069092,
      "learning_rate": 9.776651381220095e-06,
      "loss": 2.3606,
      "step": 39193
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.2060667276382446,
      "learning_rate": 9.776239747358823e-06,
      "loss": 2.3831,
      "step": 39194
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1041834354400635,
      "learning_rate": 9.775828113876885e-06,
      "loss": 2.2368,
      "step": 39195
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.18446683883667,
      "learning_rate": 9.775416480774981e-06,
      "loss": 2.2616,
      "step": 39196
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.041435956954956,
      "learning_rate": 9.775004848053805e-06,
      "loss": 2.2524,
      "step": 39197
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.120687484741211,
      "learning_rate": 9.774593215714057e-06,
      "loss": 2.3039,
      "step": 39198
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0183035135269165,
      "learning_rate": 9.774181583756435e-06,
      "loss": 2.409,
      "step": 39199
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1256685256958008,
      "learning_rate": 9.773769952181638e-06,
      "loss": 2.3679,
      "step": 39200
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.9943780303001404,
      "learning_rate": 9.773358320990361e-06,
      "loss": 2.5487,
      "step": 39201
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.2619144916534424,
      "learning_rate": 9.772946690183303e-06,
      "loss": 2.3092,
      "step": 39202
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.01608407497406,
      "learning_rate": 9.772535059761164e-06,
      "loss": 2.457,
      "step": 39203
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.9486843347549438,
      "learning_rate": 9.772123429724641e-06,
      "loss": 2.2747,
      "step": 39204
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0936765670776367,
      "learning_rate": 9.77171180007443e-06,
      "loss": 2.312,
      "step": 39205
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.146417260169983,
      "learning_rate": 9.771300170811228e-06,
      "loss": 2.6783,
      "step": 39206
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.9907873272895813,
      "learning_rate": 9.770888541935734e-06,
      "loss": 2.3535,
      "step": 39207
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0537254810333252,
      "learning_rate": 9.770476913448645e-06,
      "loss": 2.1884,
      "step": 39208
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1220877170562744,
      "learning_rate": 9.770065285350662e-06,
      "loss": 2.5637,
      "step": 39209
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.3372137546539307,
      "learning_rate": 9.769653657642481e-06,
      "loss": 2.2588,
      "step": 39210
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0663236379623413,
      "learning_rate": 9.769242030324799e-06,
      "loss": 2.1679,
      "step": 39211
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.994459867477417,
      "learning_rate": 9.768830403398314e-06,
      "loss": 2.4303,
      "step": 39212
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.9570334553718567,
      "learning_rate": 9.768418776863724e-06,
      "loss": 2.2879,
      "step": 39213
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.9692428112030029,
      "learning_rate": 9.768007150721727e-06,
      "loss": 2.2794,
      "step": 39214
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0494012832641602,
      "learning_rate": 9.767595524973022e-06,
      "loss": 2.2213,
      "step": 39215
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.024878740310669,
      "learning_rate": 9.767183899618305e-06,
      "loss": 2.1976,
      "step": 39216
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.058272361755371,
      "learning_rate": 9.766772274658273e-06,
      "loss": 2.3566,
      "step": 39217
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.081599473953247,
      "learning_rate": 9.766360650093632e-06,
      "loss": 2.3885,
      "step": 39218
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1337765455245972,
      "learning_rate": 9.765949025925066e-06,
      "loss": 2.4004,
      "step": 39219
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1390109062194824,
      "learning_rate": 9.765537402153284e-06,
      "loss": 2.4555,
      "step": 39220
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1088472604751587,
      "learning_rate": 9.765125778778977e-06,
      "loss": 2.2833,
      "step": 39221
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.2406615018844604,
      "learning_rate": 9.764714155802845e-06,
      "loss": 2.0951,
      "step": 39222
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1322450637817383,
      "learning_rate": 9.764302533225588e-06,
      "loss": 2.2709,
      "step": 39223
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0222797393798828,
      "learning_rate": 9.763890911047902e-06,
      "loss": 2.0536,
      "step": 39224
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1008291244506836,
      "learning_rate": 9.763479289270484e-06,
      "loss": 2.2582,
      "step": 39225
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.2348089218139648,
      "learning_rate": 9.763067667894033e-06,
      "loss": 2.1586,
      "step": 39226
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.034055233001709,
      "learning_rate": 9.762656046919249e-06,
      "loss": 2.5792,
      "step": 39227
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0661886930465698,
      "learning_rate": 9.762244426346826e-06,
      "loss": 2.47,
      "step": 39228
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1374480724334717,
      "learning_rate": 9.761832806177463e-06,
      "loss": 2.3958,
      "step": 39229
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.016748309135437,
      "learning_rate": 9.761421186411859e-06,
      "loss": 2.3679,
      "step": 39230
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.3522380590438843,
      "learning_rate": 9.761009567050713e-06,
      "loss": 2.3724,
      "step": 39231
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0102413892745972,
      "learning_rate": 9.760597948094718e-06,
      "loss": 2.3275,
      "step": 39232
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.083783745765686,
      "learning_rate": 9.760186329544574e-06,
      "loss": 2.6469,
      "step": 39233
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.3581864833831787,
      "learning_rate": 9.75977471140098e-06,
      "loss": 2.2328,
      "step": 39234
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.137122392654419,
      "learning_rate": 9.759363093664633e-06,
      "loss": 2.2866,
      "step": 39235
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0220755338668823,
      "learning_rate": 9.758951476336229e-06,
      "loss": 2.6561,
      "step": 39236
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1438612937927246,
      "learning_rate": 9.75853985941647e-06,
      "loss": 2.3614,
      "step": 39237
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.101144790649414,
      "learning_rate": 9.758128242906053e-06,
      "loss": 2.23,
      "step": 39238
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0902037620544434,
      "learning_rate": 9.757716626805672e-06,
      "loss": 2.2036,
      "step": 39239
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0844218730926514,
      "learning_rate": 9.757305011116028e-06,
      "loss": 2.4103,
      "step": 39240
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0452030897140503,
      "learning_rate": 9.756893395837816e-06,
      "loss": 2.2557,
      "step": 39241
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1920115947723389,
      "learning_rate": 9.756481780971737e-06,
      "loss": 2.3498,
      "step": 39242
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.148219347000122,
      "learning_rate": 9.756070166518488e-06,
      "loss": 2.3849,
      "step": 39243
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1111714839935303,
      "learning_rate": 9.75565855247877e-06,
      "loss": 2.2738,
      "step": 39244
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.9789130091667175,
      "learning_rate": 9.755246938853272e-06,
      "loss": 2.256,
      "step": 39245
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0218005180358887,
      "learning_rate": 9.754835325642698e-06,
      "loss": 2.2499,
      "step": 39246
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.4141952991485596,
      "learning_rate": 9.754423712847745e-06,
      "loss": 2.3627,
      "step": 39247
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.4094401597976685,
      "learning_rate": 9.754012100469108e-06,
      "loss": 2.4617,
      "step": 39248
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0619820356369019,
      "learning_rate": 9.753600488507491e-06,
      "loss": 2.3444,
      "step": 39249
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0220361948013306,
      "learning_rate": 9.753188876963585e-06,
      "loss": 2.3326,
      "step": 39250
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.209992527961731,
      "learning_rate": 9.752777265838093e-06,
      "loss": 2.3376,
      "step": 39251
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.007099986076355,
      "learning_rate": 9.75236565513171e-06,
      "loss": 2.3866,
      "step": 39252
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0664806365966797,
      "learning_rate": 9.751954044845134e-06,
      "loss": 2.6089,
      "step": 39253
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0714198350906372,
      "learning_rate": 9.751542434979065e-06,
      "loss": 2.3471,
      "step": 39254
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0893332958221436,
      "learning_rate": 9.751130825534197e-06,
      "loss": 2.3929,
      "step": 39255
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.019020915031433,
      "learning_rate": 9.75071921651123e-06,
      "loss": 2.358,
      "step": 39256
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.984739363193512,
      "learning_rate": 9.750307607910867e-06,
      "loss": 2.199,
      "step": 39257
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.991642951965332,
      "learning_rate": 9.749895999733795e-06,
      "loss": 2.1461,
      "step": 39258
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0530039072036743,
      "learning_rate": 9.749484391980719e-06,
      "loss": 2.3751,
      "step": 39259
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0828264951705933,
      "learning_rate": 9.749072784652333e-06,
      "loss": 2.2587,
      "step": 39260
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1459001302719116,
      "learning_rate": 9.748661177749337e-06,
      "loss": 2.2144,
      "step": 39261
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.2180606126785278,
      "learning_rate": 9.74824957127243e-06,
      "loss": 2.2364,
      "step": 39262
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.9466733336448669,
      "learning_rate": 9.747837965222307e-06,
      "loss": 2.2602,
      "step": 39263
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0604249238967896,
      "learning_rate": 9.747426359599668e-06,
      "loss": 2.282,
      "step": 39264
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1473640203475952,
      "learning_rate": 9.74701475440521e-06,
      "loss": 2.5293,
      "step": 39265
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0217087268829346,
      "learning_rate": 9.746603149639631e-06,
      "loss": 2.1351,
      "step": 39266
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0199873447418213,
      "learning_rate": 9.746191545303628e-06,
      "loss": 2.1037,
      "step": 39267
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0631707906723022,
      "learning_rate": 9.7457799413979e-06,
      "loss": 2.2803,
      "step": 39268
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0951342582702637,
      "learning_rate": 9.745368337923143e-06,
      "loss": 2.4107,
      "step": 39269
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0040309429168701,
      "learning_rate": 9.744956734880058e-06,
      "loss": 2.463,
      "step": 39270
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.353015661239624,
      "learning_rate": 9.744545132269337e-06,
      "loss": 2.1914,
      "step": 39271
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0201455354690552,
      "learning_rate": 9.744133530091686e-06,
      "loss": 2.3234,
      "step": 39272
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1229033470153809,
      "learning_rate": 9.743721928347795e-06,
      "loss": 2.3255,
      "step": 39273
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.308036208152771,
      "learning_rate": 9.743310327038365e-06,
      "loss": 2.5726,
      "step": 39274
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.102900505065918,
      "learning_rate": 9.742898726164092e-06,
      "loss": 2.6835,
      "step": 39275
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1423839330673218,
      "learning_rate": 9.742487125725678e-06,
      "loss": 2.3032,
      "step": 39276
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.083923101425171,
      "learning_rate": 9.742075525723817e-06,
      "loss": 2.3066,
      "step": 39277
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1080479621887207,
      "learning_rate": 9.74166392615921e-06,
      "loss": 2.5223,
      "step": 39278
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0054055452346802,
      "learning_rate": 9.74125232703255e-06,
      "loss": 2.159,
      "step": 39279
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0310579538345337,
      "learning_rate": 9.740840728344539e-06,
      "loss": 2.2273,
      "step": 39280
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0954320430755615,
      "learning_rate": 9.740429130095873e-06,
      "loss": 2.3062,
      "step": 39281
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.159681797027588,
      "learning_rate": 9.74001753228725e-06,
      "loss": 2.5752,
      "step": 39282
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0799195766448975,
      "learning_rate": 9.73960593491937e-06,
      "loss": 2.3135,
      "step": 39283
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.048031210899353,
      "learning_rate": 9.73919433799293e-06,
      "loss": 2.4847,
      "step": 39284
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0294233560562134,
      "learning_rate": 9.738782741508624e-06,
      "loss": 2.2654,
      "step": 39285
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1024360656738281,
      "learning_rate": 9.73837114546715e-06,
      "loss": 2.4085,
      "step": 39286
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1947059631347656,
      "learning_rate": 9.73795954986921e-06,
      "loss": 2.3415,
      "step": 39287
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0520867109298706,
      "learning_rate": 9.7375479547155e-06,
      "loss": 2.4567,
      "step": 39288
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1169060468673706,
      "learning_rate": 9.737136360006716e-06,
      "loss": 2.5514,
      "step": 39289
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1798508167266846,
      "learning_rate": 9.736724765743558e-06,
      "loss": 2.3684,
      "step": 39290
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.2878209352493286,
      "learning_rate": 9.736313171926722e-06,
      "loss": 2.1311,
      "step": 39291
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1237220764160156,
      "learning_rate": 9.735901578556909e-06,
      "loss": 2.4625,
      "step": 39292
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.166022539138794,
      "learning_rate": 9.735489985634812e-06,
      "loss": 2.1446,
      "step": 39293
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0153329372406006,
      "learning_rate": 9.735078393161136e-06,
      "loss": 2.1282,
      "step": 39294
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.954195499420166,
      "learning_rate": 9.73466680113657e-06,
      "loss": 2.2934,
      "step": 39295
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.050371766090393,
      "learning_rate": 9.734255209561817e-06,
      "loss": 2.2249,
      "step": 39296
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0112179517745972,
      "learning_rate": 9.733843618437575e-06,
      "loss": 2.5835,
      "step": 39297
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0943294763565063,
      "learning_rate": 9.733432027764539e-06,
      "loss": 2.4721,
      "step": 39298
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1359078884124756,
      "learning_rate": 9.733020437543407e-06,
      "loss": 2.3602,
      "step": 39299
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.9631047248840332,
      "learning_rate": 9.73260884777488e-06,
      "loss": 2.1307,
      "step": 39300
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.2220101356506348,
      "learning_rate": 9.732197258459652e-06,
      "loss": 2.3988,
      "step": 39301
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1282038688659668,
      "learning_rate": 9.731785669598423e-06,
      "loss": 2.3994,
      "step": 39302
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.3018157482147217,
      "learning_rate": 9.73137408119189e-06,
      "loss": 2.2791,
      "step": 39303
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1626054048538208,
      "learning_rate": 9.730962493240751e-06,
      "loss": 2.4799,
      "step": 39304
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1543854475021362,
      "learning_rate": 9.730550905745703e-06,
      "loss": 2.2467,
      "step": 39305
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0254318714141846,
      "learning_rate": 9.730139318707444e-06,
      "loss": 2.4221,
      "step": 39306
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0762771368026733,
      "learning_rate": 9.729727732126673e-06,
      "loss": 2.2692,
      "step": 39307
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.073620080947876,
      "learning_rate": 9.729316146004086e-06,
      "loss": 2.1495,
      "step": 39308
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.092484712600708,
      "learning_rate": 9.728904560340382e-06,
      "loss": 2.2995,
      "step": 39309
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1259329319000244,
      "learning_rate": 9.728492975136264e-06,
      "loss": 2.3907,
      "step": 39310
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.9660278558731079,
      "learning_rate": 9.728081390392419e-06,
      "loss": 2.2759,
      "step": 39311
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1174572706222534,
      "learning_rate": 9.727669806109548e-06,
      "loss": 2.26,
      "step": 39312
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0750123262405396,
      "learning_rate": 9.727258222288353e-06,
      "loss": 2.5619,
      "step": 39313
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.120512843132019,
      "learning_rate": 9.72684663892953e-06,
      "loss": 2.246,
      "step": 39314
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0708578824996948,
      "learning_rate": 9.726435056033774e-06,
      "loss": 2.1968,
      "step": 39315
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.097629189491272,
      "learning_rate": 9.726023473601785e-06,
      "loss": 2.483,
      "step": 39316
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.2200536727905273,
      "learning_rate": 9.72561189163426e-06,
      "loss": 2.3958,
      "step": 39317
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.014679193496704,
      "learning_rate": 9.725200310131901e-06,
      "loss": 2.2447,
      "step": 39318
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0414341688156128,
      "learning_rate": 9.7247887290954e-06,
      "loss": 2.4013,
      "step": 39319
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.7345715761184692,
      "learning_rate": 9.724377148525455e-06,
      "loss": 2.3471,
      "step": 39320
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.166222333908081,
      "learning_rate": 9.723965568422769e-06,
      "loss": 2.5854,
      "step": 39321
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.2397160530090332,
      "learning_rate": 9.723553988788035e-06,
      "loss": 2.5221,
      "step": 39322
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.062258243560791,
      "learning_rate": 9.723142409621955e-06,
      "loss": 2.4775,
      "step": 39323
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.169724464416504,
      "learning_rate": 9.72273083092522e-06,
      "loss": 2.2512,
      "step": 39324
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1893104314804077,
      "learning_rate": 9.722319252698533e-06,
      "loss": 2.4672,
      "step": 39325
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1058025360107422,
      "learning_rate": 9.721907674942588e-06,
      "loss": 2.2195,
      "step": 39326
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.9855558276176453,
      "learning_rate": 9.721496097658087e-06,
      "loss": 2.2664,
      "step": 39327
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.135933518409729,
      "learning_rate": 9.721084520845727e-06,
      "loss": 2.0563,
      "step": 39328
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0084972381591797,
      "learning_rate": 9.720672944506202e-06,
      "loss": 2.4689,
      "step": 39329
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0442588329315186,
      "learning_rate": 9.720261368640213e-06,
      "loss": 2.3242,
      "step": 39330
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.238944172859192,
      "learning_rate": 9.719849793248458e-06,
      "loss": 2.4973,
      "step": 39331
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0802967548370361,
      "learning_rate": 9.719438218331632e-06,
      "loss": 2.3565,
      "step": 39332
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1173222064971924,
      "learning_rate": 9.719026643890434e-06,
      "loss": 2.4487,
      "step": 39333
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.2128268480300903,
      "learning_rate": 9.718615069925563e-06,
      "loss": 2.2429,
      "step": 39334
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0671907663345337,
      "learning_rate": 9.718203496437716e-06,
      "loss": 2.5194,
      "step": 39335
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.9215280413627625,
      "learning_rate": 9.717791923427595e-06,
      "loss": 2.2609,
      "step": 39336
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.2304600477218628,
      "learning_rate": 9.717380350895889e-06,
      "loss": 2.4693,
      "step": 39337
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0775858163833618,
      "learning_rate": 9.7169687788433e-06,
      "loss": 2.2392,
      "step": 39338
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1057299375534058,
      "learning_rate": 9.716557207270524e-06,
      "loss": 2.3392,
      "step": 39339
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1385231018066406,
      "learning_rate": 9.716145636178264e-06,
      "loss": 2.6498,
      "step": 39340
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1534074544906616,
      "learning_rate": 9.715734065567212e-06,
      "loss": 2.4856,
      "step": 39341
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1348286867141724,
      "learning_rate": 9.715322495438068e-06,
      "loss": 2.3817,
      "step": 39342
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.2480449676513672,
      "learning_rate": 9.71491092579153e-06,
      "loss": 2.581,
      "step": 39343
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1436904668807983,
      "learning_rate": 9.714499356628297e-06,
      "loss": 2.1475,
      "step": 39344
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1010487079620361,
      "learning_rate": 9.714087787949062e-06,
      "loss": 2.2548,
      "step": 39345
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0628752708435059,
      "learning_rate": 9.713676219754527e-06,
      "loss": 2.2896,
      "step": 39346
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0348150730133057,
      "learning_rate": 9.71326465204539e-06,
      "loss": 2.2235,
      "step": 39347
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0733487606048584,
      "learning_rate": 9.712853084822346e-06,
      "loss": 2.3668,
      "step": 39348
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0436207056045532,
      "learning_rate": 9.712441518086097e-06,
      "loss": 2.2714,
      "step": 39349
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.074648380279541,
      "learning_rate": 9.712029951837337e-06,
      "loss": 2.4099,
      "step": 39350
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0717434883117676,
      "learning_rate": 9.711618386076761e-06,
      "loss": 2.3788,
      "step": 39351
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.9839763045310974,
      "learning_rate": 9.711206820805074e-06,
      "loss": 2.5481,
      "step": 39352
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1161364316940308,
      "learning_rate": 9.710795256022967e-06,
      "loss": 2.366,
      "step": 39353
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1133466958999634,
      "learning_rate": 9.710383691731142e-06,
      "loss": 2.5322,
      "step": 39354
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0373246669769287,
      "learning_rate": 9.709972127930293e-06,
      "loss": 2.3805,
      "step": 39355
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.133303165435791,
      "learning_rate": 9.709560564621124e-06,
      "loss": 2.2756,
      "step": 39356
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0795769691467285,
      "learning_rate": 9.709149001804327e-06,
      "loss": 2.2057,
      "step": 39357
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.2371810674667358,
      "learning_rate": 9.7087374394806e-06,
      "loss": 2.4487,
      "step": 39358
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0162649154663086,
      "learning_rate": 9.708325877650642e-06,
      "loss": 2.357,
      "step": 39359
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.196470022201538,
      "learning_rate": 9.707914316315153e-06,
      "loss": 2.4673,
      "step": 39360
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0381137132644653,
      "learning_rate": 9.707502755474825e-06,
      "loss": 2.3546,
      "step": 39361
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1568481922149658,
      "learning_rate": 9.707091195130363e-06,
      "loss": 2.4811,
      "step": 39362
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.004306674003601,
      "learning_rate": 9.706679635282463e-06,
      "loss": 2.2334,
      "step": 39363
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0075953006744385,
      "learning_rate": 9.706268075931818e-06,
      "loss": 2.3681,
      "step": 39364
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0176246166229248,
      "learning_rate": 9.705856517079128e-06,
      "loss": 2.5829,
      "step": 39365
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.176973581314087,
      "learning_rate": 9.705444958725089e-06,
      "loss": 2.3093,
      "step": 39366
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0675523281097412,
      "learning_rate": 9.705033400870402e-06,
      "loss": 2.2338,
      "step": 39367
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1135149002075195,
      "learning_rate": 9.704621843515764e-06,
      "loss": 2.3058,
      "step": 39368
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1135200262069702,
      "learning_rate": 9.704210286661872e-06,
      "loss": 2.5004,
      "step": 39369
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1961710453033447,
      "learning_rate": 9.703798730309425e-06,
      "loss": 2.3987,
      "step": 39370
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.9805766344070435,
      "learning_rate": 9.70338717445912e-06,
      "loss": 2.4212,
      "step": 39371
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0432113409042358,
      "learning_rate": 9.702975619111652e-06,
      "loss": 2.5833,
      "step": 39372
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.127179741859436,
      "learning_rate": 9.702564064267721e-06,
      "loss": 2.4464,
      "step": 39373
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0031936168670654,
      "learning_rate": 9.702152509928026e-06,
      "loss": 2.0787,
      "step": 39374
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.9910933971405029,
      "learning_rate": 9.701740956093262e-06,
      "loss": 2.4657,
      "step": 39375
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0302958488464355,
      "learning_rate": 9.701329402764135e-06,
      "loss": 2.1908,
      "step": 39376
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0523970127105713,
      "learning_rate": 9.700917849941328e-06,
      "loss": 2.289,
      "step": 39377
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1506835222244263,
      "learning_rate": 9.700506297625549e-06,
      "loss": 2.443,
      "step": 39378
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.098466157913208,
      "learning_rate": 9.700094745817493e-06,
      "loss": 2.6417,
      "step": 39379
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.584411859512329,
      "learning_rate": 9.699683194517857e-06,
      "loss": 2.3969,
      "step": 39380
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0817763805389404,
      "learning_rate": 9.69927164372734e-06,
      "loss": 2.4371,
      "step": 39381
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.01473867893219,
      "learning_rate": 9.69886009344664e-06,
      "loss": 2.4363,
      "step": 39382
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1313505172729492,
      "learning_rate": 9.698448543676452e-06,
      "loss": 2.709,
      "step": 39383
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.9688218235969543,
      "learning_rate": 9.698036994417479e-06,
      "loss": 2.4792,
      "step": 39384
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0887242555618286,
      "learning_rate": 9.697625445670414e-06,
      "loss": 2.3603,
      "step": 39385
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0405935049057007,
      "learning_rate": 9.697213897435954e-06,
      "loss": 2.0636,
      "step": 39386
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0978784561157227,
      "learning_rate": 9.696802349714798e-06,
      "loss": 2.2409,
      "step": 39387
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0523163080215454,
      "learning_rate": 9.696390802507648e-06,
      "loss": 2.3658,
      "step": 39388
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1045318841934204,
      "learning_rate": 9.695979255815198e-06,
      "loss": 2.3494,
      "step": 39389
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1075903177261353,
      "learning_rate": 9.695567709638146e-06,
      "loss": 2.2581,
      "step": 39390
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0992729663848877,
      "learning_rate": 9.695156163977186e-06,
      "loss": 2.1813,
      "step": 39391
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0757699012756348,
      "learning_rate": 9.694744618833021e-06,
      "loss": 2.2757,
      "step": 39392
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1635643243789673,
      "learning_rate": 9.694333074206346e-06,
      "loss": 2.3801,
      "step": 39393
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0708017349243164,
      "learning_rate": 9.69392153009786e-06,
      "loss": 2.517,
      "step": 39394
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1809327602386475,
      "learning_rate": 9.69350998650826e-06,
      "loss": 2.1206,
      "step": 39395
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1007826328277588,
      "learning_rate": 9.693098443438243e-06,
      "loss": 2.2422,
      "step": 39396
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0990773439407349,
      "learning_rate": 9.692686900888508e-06,
      "loss": 2.3884,
      "step": 39397
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0088547468185425,
      "learning_rate": 9.692275358859752e-06,
      "loss": 2.4356,
      "step": 39398
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.112837314605713,
      "learning_rate": 9.691863817352672e-06,
      "loss": 2.3667,
      "step": 39399
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0222300291061401,
      "learning_rate": 9.69145227636797e-06,
      "loss": 2.3853,
      "step": 39400
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1097527742385864,
      "learning_rate": 9.691040735906336e-06,
      "loss": 2.2561,
      "step": 39401
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1688742637634277,
      "learning_rate": 9.690629195968478e-06,
      "loss": 2.4442,
      "step": 39402
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0574182271957397,
      "learning_rate": 9.690217656555082e-06,
      "loss": 2.2664,
      "step": 39403
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.9833082556724548,
      "learning_rate": 9.689806117666854e-06,
      "loss": 2.3277,
      "step": 39404
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0734655857086182,
      "learning_rate": 9.689394579304487e-06,
      "loss": 2.1648,
      "step": 39405
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.2188119888305664,
      "learning_rate": 9.68898304146868e-06,
      "loss": 2.5511,
      "step": 39406
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.232253074645996,
      "learning_rate": 9.688571504160132e-06,
      "loss": 2.3938,
      "step": 39407
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.106413722038269,
      "learning_rate": 9.68815996737954e-06,
      "loss": 2.206,
      "step": 39408
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.9827712774276733,
      "learning_rate": 9.687748431127602e-06,
      "loss": 2.3424,
      "step": 39409
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1953647136688232,
      "learning_rate": 9.687336895405015e-06,
      "loss": 2.3881,
      "step": 39410
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.078809142112732,
      "learning_rate": 9.686925360212476e-06,
      "loss": 2.2333,
      "step": 39411
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0757659673690796,
      "learning_rate": 9.686513825550686e-06,
      "loss": 2.2943,
      "step": 39412
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1079719066619873,
      "learning_rate": 9.686102291420338e-06,
      "loss": 2.0742,
      "step": 39413
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.9834416508674622,
      "learning_rate": 9.685690757822132e-06,
      "loss": 2.3012,
      "step": 39414
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.5454649925231934,
      "learning_rate": 9.685279224756767e-06,
      "loss": 2.2863,
      "step": 39415
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.087795376777649,
      "learning_rate": 9.68486769222494e-06,
      "loss": 2.2561,
      "step": 39416
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1239067316055298,
      "learning_rate": 9.684456160227344e-06,
      "loss": 2.4757,
      "step": 39417
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.2080059051513672,
      "learning_rate": 9.684044628764686e-06,
      "loss": 2.6193,
      "step": 39418
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0874818563461304,
      "learning_rate": 9.683633097837652e-06,
      "loss": 2.067,
      "step": 39419
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0324162244796753,
      "learning_rate": 9.68322156744695e-06,
      "loss": 2.1789,
      "step": 39420
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.01896333694458,
      "learning_rate": 9.682810037593271e-06,
      "loss": 2.2728,
      "step": 39421
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.034084677696228,
      "learning_rate": 9.682398508277316e-06,
      "loss": 2.4126,
      "step": 39422
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1503677368164062,
      "learning_rate": 9.681986979499781e-06,
      "loss": 2.47,
      "step": 39423
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0725548267364502,
      "learning_rate": 9.681575451261365e-06,
      "loss": 2.4538,
      "step": 39424
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.063635230064392,
      "learning_rate": 9.681163923562766e-06,
      "loss": 2.3725,
      "step": 39425
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1188831329345703,
      "learning_rate": 9.68075239640468e-06,
      "loss": 2.3382,
      "step": 39426
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.9382228255271912,
      "learning_rate": 9.680340869787804e-06,
      "loss": 2.2943,
      "step": 39427
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.2184760570526123,
      "learning_rate": 9.679929343712839e-06,
      "loss": 2.1436,
      "step": 39428
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1687862873077393,
      "learning_rate": 9.679517818180484e-06,
      "loss": 2.129,
      "step": 39429
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.9106783866882324,
      "learning_rate": 9.679106293191429e-06,
      "loss": 2.3383,
      "step": 39430
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0621699094772339,
      "learning_rate": 9.678694768746376e-06,
      "loss": 2.4383,
      "step": 39431
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.9907950758934021,
      "learning_rate": 9.678283244846022e-06,
      "loss": 2.3156,
      "step": 39432
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.225000262260437,
      "learning_rate": 9.677871721491069e-06,
      "loss": 2.2909,
      "step": 39433
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1600089073181152,
      "learning_rate": 9.677460198682207e-06,
      "loss": 2.167,
      "step": 39434
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0693657398223877,
      "learning_rate": 9.67704867642014e-06,
      "loss": 2.5348,
      "step": 39435
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.165553092956543,
      "learning_rate": 9.676637154705561e-06,
      "loss": 2.4866,
      "step": 39436
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.4595929384231567,
      "learning_rate": 9.67622563353917e-06,
      "loss": 2.2899,
      "step": 39437
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.122927188873291,
      "learning_rate": 9.675814112921665e-06,
      "loss": 2.4229,
      "step": 39438
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1413230895996094,
      "learning_rate": 9.675402592853743e-06,
      "loss": 2.38,
      "step": 39439
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0962562561035156,
      "learning_rate": 9.674991073336106e-06,
      "loss": 2.3559,
      "step": 39440
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.136323094367981,
      "learning_rate": 9.674579554369442e-06,
      "loss": 2.3149,
      "step": 39441
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0119658708572388,
      "learning_rate": 9.674168035954459e-06,
      "loss": 2.2228,
      "step": 39442
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.270363450050354,
      "learning_rate": 9.673756518091846e-06,
      "loss": 2.2101,
      "step": 39443
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1493455171585083,
      "learning_rate": 9.673345000782303e-06,
      "loss": 2.5062,
      "step": 39444
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1846287250518799,
      "learning_rate": 9.672933484026533e-06,
      "loss": 2.1868,
      "step": 39445
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0684144496917725,
      "learning_rate": 9.672521967825228e-06,
      "loss": 2.2208,
      "step": 39446
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0345109701156616,
      "learning_rate": 9.672110452179087e-06,
      "loss": 2.4039,
      "step": 39447
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.256556510925293,
      "learning_rate": 9.671698937088806e-06,
      "loss": 2.4262,
      "step": 39448
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.2485789060592651,
      "learning_rate": 9.671287422555086e-06,
      "loss": 2.296,
      "step": 39449
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0697388648986816,
      "learning_rate": 9.670875908578621e-06,
      "loss": 2.5239,
      "step": 39450
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0381008386611938,
      "learning_rate": 9.670464395160114e-06,
      "loss": 2.4309,
      "step": 39451
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.182124137878418,
      "learning_rate": 9.670052882300258e-06,
      "loss": 2.4838,
      "step": 39452
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0147385597229004,
      "learning_rate": 9.669641369999751e-06,
      "loss": 2.432,
      "step": 39453
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0785983800888062,
      "learning_rate": 9.669229858259293e-06,
      "loss": 2.2834,
      "step": 39454
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.090602993965149,
      "learning_rate": 9.668818347079584e-06,
      "loss": 2.1136,
      "step": 39455
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.336562991142273,
      "learning_rate": 9.668406836461312e-06,
      "loss": 2.2642,
      "step": 39456
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.058107852935791,
      "learning_rate": 9.667995326405182e-06,
      "loss": 2.5063,
      "step": 39457
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1433992385864258,
      "learning_rate": 9.66758381691189e-06,
      "loss": 2.3792,
      "step": 39458
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0484012365341187,
      "learning_rate": 9.667172307982135e-06,
      "loss": 2.4162,
      "step": 39459
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.9905135035514832,
      "learning_rate": 9.666760799616611e-06,
      "loss": 2.1873,
      "step": 39460
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1552269458770752,
      "learning_rate": 9.66634929181602e-06,
      "loss": 2.3141,
      "step": 39461
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1695932149887085,
      "learning_rate": 9.665937784581055e-06,
      "loss": 2.3569,
      "step": 39462
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.166351318359375,
      "learning_rate": 9.665526277912418e-06,
      "loss": 2.3982,
      "step": 39463
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0592209100723267,
      "learning_rate": 9.665114771810806e-06,
      "loss": 2.3361,
      "step": 39464
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1062473058700562,
      "learning_rate": 9.664703266276912e-06,
      "loss": 2.4696,
      "step": 39465
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1707247495651245,
      "learning_rate": 9.66429176131144e-06,
      "loss": 2.3318,
      "step": 39466
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1175689697265625,
      "learning_rate": 9.663880256915086e-06,
      "loss": 2.4301,
      "step": 39467
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1376591920852661,
      "learning_rate": 9.663468753088546e-06,
      "loss": 2.4708,
      "step": 39468
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.042917251586914,
      "learning_rate": 9.663057249832517e-06,
      "loss": 2.2267,
      "step": 39469
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.149402141571045,
      "learning_rate": 9.662645747147694e-06,
      "loss": 2.1932,
      "step": 39470
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.162495493888855,
      "learning_rate": 9.66223424503478e-06,
      "loss": 2.4493,
      "step": 39471
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1641244888305664,
      "learning_rate": 9.661822743494472e-06,
      "loss": 2.4915,
      "step": 39472
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0220754146575928,
      "learning_rate": 9.661411242527465e-06,
      "loss": 2.3661,
      "step": 39473
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.9961255192756653,
      "learning_rate": 9.660999742134461e-06,
      "loss": 2.239,
      "step": 39474
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.6645923852920532,
      "learning_rate": 9.66058824231615e-06,
      "loss": 2.3934,
      "step": 39475
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0576417446136475,
      "learning_rate": 9.660176743073238e-06,
      "loss": 2.4047,
      "step": 39476
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1394567489624023,
      "learning_rate": 9.659765244406417e-06,
      "loss": 2.5207,
      "step": 39477
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0532249212265015,
      "learning_rate": 9.659353746316384e-06,
      "loss": 2.3156,
      "step": 39478
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1342933177947998,
      "learning_rate": 9.658942248803841e-06,
      "loss": 2.2333,
      "step": 39479
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0494533777236938,
      "learning_rate": 9.658530751869483e-06,
      "loss": 2.5633,
      "step": 39480
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.3234646320343018,
      "learning_rate": 9.658119255514014e-06,
      "loss": 2.5219,
      "step": 39481
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1054259538650513,
      "learning_rate": 9.65770775973812e-06,
      "loss": 2.4947,
      "step": 39482
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.9646281003952026,
      "learning_rate": 9.657296264542503e-06,
      "loss": 2.1709,
      "step": 39483
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1476314067840576,
      "learning_rate": 9.656884769927864e-06,
      "loss": 2.1676,
      "step": 39484
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0427132844924927,
      "learning_rate": 9.656473275894898e-06,
      "loss": 2.2843,
      "step": 39485
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0183709859848022,
      "learning_rate": 9.656061782444302e-06,
      "loss": 2.3967,
      "step": 39486
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1389354467391968,
      "learning_rate": 9.655650289576776e-06,
      "loss": 2.3564,
      "step": 39487
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0494766235351562,
      "learning_rate": 9.655238797293017e-06,
      "loss": 2.537,
      "step": 39488
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0841922760009766,
      "learning_rate": 9.65482730559372e-06,
      "loss": 2.116,
      "step": 39489
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.079814076423645,
      "learning_rate": 9.654415814479585e-06,
      "loss": 2.5318,
      "step": 39490
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1221420764923096,
      "learning_rate": 9.654004323951309e-06,
      "loss": 2.1775,
      "step": 39491
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.2005759477615356,
      "learning_rate": 9.65359283400959e-06,
      "loss": 2.1707,
      "step": 39492
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1255286931991577,
      "learning_rate": 9.653181344655125e-06,
      "loss": 2.4368,
      "step": 39493
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1167492866516113,
      "learning_rate": 9.652769855888611e-06,
      "loss": 2.3741,
      "step": 39494
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1520613431930542,
      "learning_rate": 9.652358367710754e-06,
      "loss": 2.481,
      "step": 39495
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.00022292137146,
      "learning_rate": 9.651946880122236e-06,
      "loss": 2.39,
      "step": 39496
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.2687565088272095,
      "learning_rate": 9.651535393123764e-06,
      "loss": 2.2157,
      "step": 39497
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1333301067352295,
      "learning_rate": 9.651123906716035e-06,
      "loss": 2.1018,
      "step": 39498
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.101981282234192,
      "learning_rate": 9.650712420899743e-06,
      "loss": 2.3525,
      "step": 39499
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.215840458869934,
      "learning_rate": 9.650300935675592e-06,
      "loss": 2.3234,
      "step": 39500
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0773638486862183,
      "learning_rate": 9.649889451044272e-06,
      "loss": 2.2854,
      "step": 39501
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0715397596359253,
      "learning_rate": 9.649477967006489e-06,
      "loss": 2.2149,
      "step": 39502
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.300650954246521,
      "learning_rate": 9.649066483562935e-06,
      "loss": 2.3275,
      "step": 39503
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1235988140106201,
      "learning_rate": 9.648655000714306e-06,
      "loss": 2.461,
      "step": 39504
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.9481210708618164,
      "learning_rate": 9.648243518461304e-06,
      "loss": 2.3465,
      "step": 39505
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.021738052368164,
      "learning_rate": 9.647832036804624e-06,
      "loss": 2.5188,
      "step": 39506
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.067415475845337,
      "learning_rate": 9.647420555744964e-06,
      "loss": 2.3886,
      "step": 39507
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1273937225341797,
      "learning_rate": 9.647009075283027e-06,
      "loss": 2.4082,
      "step": 39508
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1702961921691895,
      "learning_rate": 9.646597595419501e-06,
      "loss": 2.5244,
      "step": 39509
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1287260055541992,
      "learning_rate": 9.64618611615509e-06,
      "loss": 2.407,
      "step": 39510
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0413111448287964,
      "learning_rate": 9.645774637490487e-06,
      "loss": 2.3499,
      "step": 39511
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.201991081237793,
      "learning_rate": 9.645363159426391e-06,
      "loss": 2.387,
      "step": 39512
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0978869199752808,
      "learning_rate": 9.644951681963503e-06,
      "loss": 2.3694,
      "step": 39513
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.254093050956726,
      "learning_rate": 9.644540205102518e-06,
      "loss": 2.2825,
      "step": 39514
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1883682012557983,
      "learning_rate": 9.644128728844132e-06,
      "loss": 2.1965,
      "step": 39515
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.001062273979187,
      "learning_rate": 9.643717253189048e-06,
      "loss": 2.2411,
      "step": 39516
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0254732370376587,
      "learning_rate": 9.643305778137957e-06,
      "loss": 2.1777,
      "step": 39517
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.135172724723816,
      "learning_rate": 9.642894303691562e-06,
      "loss": 2.2473,
      "step": 39518
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.9937936067581177,
      "learning_rate": 9.642482829850557e-06,
      "loss": 2.3823,
      "step": 39519
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.017006754875183,
      "learning_rate": 9.642071356615639e-06,
      "loss": 2.2481,
      "step": 39520
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.020448923110962,
      "learning_rate": 9.641659883987515e-06,
      "loss": 2.3249,
      "step": 39521
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.017941951751709,
      "learning_rate": 9.641248411966867e-06,
      "loss": 2.3342,
      "step": 39522
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1478044986724854,
      "learning_rate": 9.640836940554403e-06,
      "loss": 2.2381,
      "step": 39523
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0800937414169312,
      "learning_rate": 9.640425469750817e-06,
      "loss": 2.351,
      "step": 39524
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.051352620124817,
      "learning_rate": 9.640013999556808e-06,
      "loss": 2.2673,
      "step": 39525
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1565110683441162,
      "learning_rate": 9.639602529973072e-06,
      "loss": 2.4853,
      "step": 39526
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1542142629623413,
      "learning_rate": 9.639191061000309e-06,
      "loss": 2.561,
      "step": 39527
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1690974235534668,
      "learning_rate": 9.638779592639213e-06,
      "loss": 2.3185,
      "step": 39528
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0949658155441284,
      "learning_rate": 9.638368124890486e-06,
      "loss": 2.2312,
      "step": 39529
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.119493007659912,
      "learning_rate": 9.637956657754824e-06,
      "loss": 2.2719,
      "step": 39530
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.033458948135376,
      "learning_rate": 9.637545191232923e-06,
      "loss": 2.261,
      "step": 39531
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0338600873947144,
      "learning_rate": 9.63713372532548e-06,
      "loss": 2.2852,
      "step": 39532
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1142191886901855,
      "learning_rate": 9.636722260033195e-06,
      "loss": 2.3505,
      "step": 39533
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0567585229873657,
      "learning_rate": 9.636310795356765e-06,
      "loss": 2.5124,
      "step": 39534
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0742418766021729,
      "learning_rate": 9.635899331296886e-06,
      "loss": 2.5143,
      "step": 39535
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.051127552986145,
      "learning_rate": 9.635487867854258e-06,
      "loss": 2.2573,
      "step": 39536
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1022413969039917,
      "learning_rate": 9.635076405029576e-06,
      "loss": 2.5281,
      "step": 39537
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.075289249420166,
      "learning_rate": 9.634664942823537e-06,
      "loss": 2.4636,
      "step": 39538
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0064514875411987,
      "learning_rate": 9.634253481236841e-06,
      "loss": 2.2147,
      "step": 39539
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0322201251983643,
      "learning_rate": 9.633842020270184e-06,
      "loss": 2.2789,
      "step": 39540
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.9769060611724854,
      "learning_rate": 9.633430559924266e-06,
      "loss": 2.6536,
      "step": 39541
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1962864398956299,
      "learning_rate": 9.63301910019978e-06,
      "loss": 2.3164,
      "step": 39542
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0406643152236938,
      "learning_rate": 9.632607641097429e-06,
      "loss": 2.4264,
      "step": 39543
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0812910795211792,
      "learning_rate": 9.632196182617906e-06,
      "loss": 2.1314,
      "step": 39544
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.9877396821975708,
      "learning_rate": 9.631784724761913e-06,
      "loss": 2.2966,
      "step": 39545
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1061285734176636,
      "learning_rate": 9.631373267530142e-06,
      "loss": 2.3771,
      "step": 39546
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.040071725845337,
      "learning_rate": 9.630961810923299e-06,
      "loss": 2.0775,
      "step": 39547
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0316252708435059,
      "learning_rate": 9.630550354942072e-06,
      "loss": 2.4551,
      "step": 39548
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1259373426437378,
      "learning_rate": 9.63013889958716e-06,
      "loss": 2.237,
      "step": 39549
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1346551179885864,
      "learning_rate": 9.629727444859267e-06,
      "loss": 2.2632,
      "step": 39550
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.9975730180740356,
      "learning_rate": 9.629315990759084e-06,
      "loss": 2.3111,
      "step": 39551
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1994308233261108,
      "learning_rate": 9.628904537287311e-06,
      "loss": 2.1758,
      "step": 39552
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.07256019115448,
      "learning_rate": 9.628493084444647e-06,
      "loss": 2.2849,
      "step": 39553
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0521174669265747,
      "learning_rate": 9.628081632231787e-06,
      "loss": 2.2977,
      "step": 39554
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1797027587890625,
      "learning_rate": 9.62767018064943e-06,
      "loss": 2.4545,
      "step": 39555
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.073944330215454,
      "learning_rate": 9.627258729698273e-06,
      "loss": 2.3595,
      "step": 39556
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.3150705099105835,
      "learning_rate": 9.626847279379014e-06,
      "loss": 2.3343,
      "step": 39557
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.9644219279289246,
      "learning_rate": 9.626435829692352e-06,
      "loss": 2.3875,
      "step": 39558
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0333735942840576,
      "learning_rate": 9.626024380638981e-06,
      "loss": 2.2229,
      "step": 39559
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0857118368148804,
      "learning_rate": 9.625612932219602e-06,
      "loss": 2.2513,
      "step": 39560
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1994073390960693,
      "learning_rate": 9.625201484434909e-06,
      "loss": 2.3947,
      "step": 39561
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1370948553085327,
      "learning_rate": 9.6247900372856e-06,
      "loss": 2.3865,
      "step": 39562
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1426231861114502,
      "learning_rate": 9.624378590772376e-06,
      "loss": 2.2442,
      "step": 39563
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.585646867752075,
      "learning_rate": 9.623967144895934e-06,
      "loss": 2.2485,
      "step": 39564
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0381767749786377,
      "learning_rate": 9.623555699656966e-06,
      "loss": 2.2108,
      "step": 39565
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.153018593788147,
      "learning_rate": 9.623144255056176e-06,
      "loss": 2.3465,
      "step": 39566
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0980650186538696,
      "learning_rate": 9.622732811094255e-06,
      "loss": 2.3353,
      "step": 39567
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0328731536865234,
      "learning_rate": 9.622321367771907e-06,
      "loss": 2.5445,
      "step": 39568
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0733280181884766,
      "learning_rate": 9.621909925089827e-06,
      "loss": 2.3989,
      "step": 39569
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0375261306762695,
      "learning_rate": 9.621498483048711e-06,
      "loss": 2.6873,
      "step": 39570
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0705293416976929,
      "learning_rate": 9.621087041649258e-06,
      "loss": 2.3575,
      "step": 39571
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0120853185653687,
      "learning_rate": 9.620675600892167e-06,
      "loss": 2.3432,
      "step": 39572
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.2031952142715454,
      "learning_rate": 9.620264160778132e-06,
      "loss": 2.4333,
      "step": 39573
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.193170428276062,
      "learning_rate": 9.619852721307858e-06,
      "loss": 2.5426,
      "step": 39574
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.233579158782959,
      "learning_rate": 9.619441282482032e-06,
      "loss": 2.3955,
      "step": 39575
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1171941757202148,
      "learning_rate": 9.619029844301356e-06,
      "loss": 2.2766,
      "step": 39576
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.100846529006958,
      "learning_rate": 9.618618406766528e-06,
      "loss": 2.5223,
      "step": 39577
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.9995872378349304,
      "learning_rate": 9.618206969878246e-06,
      "loss": 2.4067,
      "step": 39578
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0749927759170532,
      "learning_rate": 9.617795533637207e-06,
      "loss": 2.4264,
      "step": 39579
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.120482087135315,
      "learning_rate": 9.617384098044108e-06,
      "loss": 2.3201,
      "step": 39580
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0533498525619507,
      "learning_rate": 9.616972663099648e-06,
      "loss": 2.4422,
      "step": 39581
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1206833124160767,
      "learning_rate": 9.616561228804521e-06,
      "loss": 2.2875,
      "step": 39582
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1236752271652222,
      "learning_rate": 9.616149795159428e-06,
      "loss": 2.3309,
      "step": 39583
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0841975212097168,
      "learning_rate": 9.615738362165067e-06,
      "loss": 2.2966,
      "step": 39584
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.2605905532836914,
      "learning_rate": 9.615326929822133e-06,
      "loss": 2.2991,
      "step": 39585
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1343332529067993,
      "learning_rate": 9.614915498131324e-06,
      "loss": 2.276,
      "step": 39586
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.137891173362732,
      "learning_rate": 9.61450406709334e-06,
      "loss": 2.5284,
      "step": 39587
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.9779128432273865,
      "learning_rate": 9.614092636708873e-06,
      "loss": 2.2172,
      "step": 39588
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0505280494689941,
      "learning_rate": 9.613681206978625e-06,
      "loss": 2.2554,
      "step": 39589
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.095784068107605,
      "learning_rate": 9.613269777903292e-06,
      "loss": 2.5038,
      "step": 39590
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.2840235233306885,
      "learning_rate": 9.612858349483572e-06,
      "loss": 2.248,
      "step": 39591
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.9253061413764954,
      "learning_rate": 9.612446921720163e-06,
      "loss": 2.6036,
      "step": 39592
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.070834755897522,
      "learning_rate": 9.61203549461376e-06,
      "loss": 2.2767,
      "step": 39593
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.2147388458251953,
      "learning_rate": 9.611624068165063e-06,
      "loss": 2.3934,
      "step": 39594
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0572030544281006,
      "learning_rate": 9.61121264237477e-06,
      "loss": 2.2515,
      "step": 39595
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.036370873451233,
      "learning_rate": 9.610801217243574e-06,
      "loss": 2.4095,
      "step": 39596
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0299005508422852,
      "learning_rate": 9.610389792772178e-06,
      "loss": 2.1659,
      "step": 39597
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.016369342803955,
      "learning_rate": 9.609978368961276e-06,
      "loss": 2.1007,
      "step": 39598
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0857892036437988,
      "learning_rate": 9.609566945811567e-06,
      "loss": 2.5826,
      "step": 39599
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0814518928527832,
      "learning_rate": 9.609155523323753e-06,
      "loss": 2.3184,
      "step": 39600
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0369640588760376,
      "learning_rate": 9.60874410149852e-06,
      "loss": 2.3743,
      "step": 39601
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0306947231292725,
      "learning_rate": 9.608332680336574e-06,
      "loss": 2.435,
      "step": 39602
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.119547724723816,
      "learning_rate": 9.607921259838611e-06,
      "loss": 2.178,
      "step": 39603
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1391059160232544,
      "learning_rate": 9.607509840005327e-06,
      "loss": 2.3896,
      "step": 39604
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1136304140090942,
      "learning_rate": 9.60709842083742e-06,
      "loss": 2.4182,
      "step": 39605
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1049487590789795,
      "learning_rate": 9.606687002335588e-06,
      "loss": 2.3132,
      "step": 39606
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1320880651474,
      "learning_rate": 9.606275584500528e-06,
      "loss": 2.1881,
      "step": 39607
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.9925567507743835,
      "learning_rate": 9.605864167332938e-06,
      "loss": 2.3907,
      "step": 39608
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0741363763809204,
      "learning_rate": 9.605452750833517e-06,
      "loss": 2.2957,
      "step": 39609
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1825875043869019,
      "learning_rate": 9.60504133500296e-06,
      "loss": 2.3948,
      "step": 39610
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1886993646621704,
      "learning_rate": 9.604629919841964e-06,
      "loss": 2.444,
      "step": 39611
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1576576232910156,
      "learning_rate": 9.604218505351228e-06,
      "loss": 2.184,
      "step": 39612
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.2427784204483032,
      "learning_rate": 9.603807091531455e-06,
      "loss": 2.3378,
      "step": 39613
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0093269348144531,
      "learning_rate": 9.60339567838333e-06,
      "loss": 2.6781,
      "step": 39614
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0957778692245483,
      "learning_rate": 9.60298426590756e-06,
      "loss": 2.4486,
      "step": 39615
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.3656561374664307,
      "learning_rate": 9.602572854104836e-06,
      "loss": 2.1114,
      "step": 39616
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1638033390045166,
      "learning_rate": 9.60216144297586e-06,
      "loss": 2.3913,
      "step": 39617
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.194864273071289,
      "learning_rate": 9.60175003252133e-06,
      "loss": 2.1612,
      "step": 39618
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0242886543273926,
      "learning_rate": 9.60133862274194e-06,
      "loss": 2.3315,
      "step": 39619
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.017461895942688,
      "learning_rate": 9.600927213638393e-06,
      "loss": 2.491,
      "step": 39620
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.129812240600586,
      "learning_rate": 9.60051580521138e-06,
      "loss": 2.3901,
      "step": 39621
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1229974031448364,
      "learning_rate": 9.6001043974616e-06,
      "loss": 2.4981,
      "step": 39622
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.9878091216087341,
      "learning_rate": 9.599692990389754e-06,
      "loss": 2.4659,
      "step": 39623
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0598737001419067,
      "learning_rate": 9.599281583996536e-06,
      "loss": 2.3765,
      "step": 39624
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0513968467712402,
      "learning_rate": 9.598870178282644e-06,
      "loss": 2.4709,
      "step": 39625
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.9474822878837585,
      "learning_rate": 9.598458773248781e-06,
      "loss": 2.179,
      "step": 39626
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0600098371505737,
      "learning_rate": 9.598047368895634e-06,
      "loss": 2.5084,
      "step": 39627
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.171162486076355,
      "learning_rate": 9.597635965223907e-06,
      "loss": 2.4601,
      "step": 39628
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1641910076141357,
      "learning_rate": 9.597224562234296e-06,
      "loss": 2.3488,
      "step": 39629
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0972282886505127,
      "learning_rate": 9.5968131599275e-06,
      "loss": 2.3806,
      "step": 39630
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1828259229660034,
      "learning_rate": 9.596401758304213e-06,
      "loss": 2.1924,
      "step": 39631
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.113120675086975,
      "learning_rate": 9.595990357365135e-06,
      "loss": 2.3432,
      "step": 39632
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1548047065734863,
      "learning_rate": 9.595578957110965e-06,
      "loss": 2.5794,
      "step": 39633
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1115647554397583,
      "learning_rate": 9.595167557542396e-06,
      "loss": 2.3216,
      "step": 39634
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0253952741622925,
      "learning_rate": 9.59475615866013e-06,
      "loss": 2.515,
      "step": 39635
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0249269008636475,
      "learning_rate": 9.594344760464861e-06,
      "loss": 2.3436,
      "step": 39636
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.10345458984375,
      "learning_rate": 9.593933362957289e-06,
      "loss": 2.4016,
      "step": 39637
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.9704903960227966,
      "learning_rate": 9.59352196613811e-06,
      "loss": 2.494,
      "step": 39638
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1586629152297974,
      "learning_rate": 9.59311057000802e-06,
      "loss": 2.4084,
      "step": 39639
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0905070304870605,
      "learning_rate": 9.592699174567725e-06,
      "loss": 2.4225,
      "step": 39640
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0528161525726318,
      "learning_rate": 9.592287779817909e-06,
      "loss": 2.4764,
      "step": 39641
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0422248840332031,
      "learning_rate": 9.591876385759277e-06,
      "loss": 2.3589,
      "step": 39642
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1212226152420044,
      "learning_rate": 9.591464992392525e-06,
      "loss": 2.2758,
      "step": 39643
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1452150344848633,
      "learning_rate": 9.591053599718351e-06,
      "loss": 2.2823,
      "step": 39644
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1189227104187012,
      "learning_rate": 9.590642207737452e-06,
      "loss": 2.2068,
      "step": 39645
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0214205980300903,
      "learning_rate": 9.590230816450527e-06,
      "loss": 2.4144,
      "step": 39646
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.157275676727295,
      "learning_rate": 9.58981942585827e-06,
      "loss": 2.3715,
      "step": 39647
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.030794620513916,
      "learning_rate": 9.589408035961382e-06,
      "loss": 2.5593,
      "step": 39648
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.9185066223144531,
      "learning_rate": 9.58899664676056e-06,
      "loss": 2.4687,
      "step": 39649
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0221465826034546,
      "learning_rate": 9.588585258256499e-06,
      "loss": 2.4185,
      "step": 39650
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0194816589355469,
      "learning_rate": 9.588173870449897e-06,
      "loss": 2.2296,
      "step": 39651
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0320512056350708,
      "learning_rate": 9.587762483341451e-06,
      "loss": 2.4173,
      "step": 39652
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0603317022323608,
      "learning_rate": 9.587351096931864e-06,
      "loss": 2.4592,
      "step": 39653
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0967079401016235,
      "learning_rate": 9.586939711221827e-06,
      "loss": 2.0286,
      "step": 39654
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.265486478805542,
      "learning_rate": 9.58652832621204e-06,
      "loss": 2.5238,
      "step": 39655
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0664606094360352,
      "learning_rate": 9.586116941903197e-06,
      "loss": 2.5128,
      "step": 39656
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.9858327507972717,
      "learning_rate": 9.585705558296e-06,
      "loss": 2.4313,
      "step": 39657
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.4659171104431152,
      "learning_rate": 9.585294175391145e-06,
      "loss": 2.2267,
      "step": 39658
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.9556710720062256,
      "learning_rate": 9.584882793189325e-06,
      "loss": 2.1192,
      "step": 39659
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0607914924621582,
      "learning_rate": 9.584471411691247e-06,
      "loss": 2.4,
      "step": 39660
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.6081688404083252,
      "learning_rate": 9.5840600308976e-06,
      "loss": 2.3981,
      "step": 39661
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.060835838317871,
      "learning_rate": 9.583648650809083e-06,
      "loss": 2.5251,
      "step": 39662
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1781467199325562,
      "learning_rate": 9.583237271426397e-06,
      "loss": 2.3328,
      "step": 39663
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0300620794296265,
      "learning_rate": 9.582825892750235e-06,
      "loss": 2.6269,
      "step": 39664
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.040175437927246,
      "learning_rate": 9.582414514781297e-06,
      "loss": 2.3624,
      "step": 39665
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1460886001586914,
      "learning_rate": 9.582003137520285e-06,
      "loss": 2.301,
      "step": 39666
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.2116976976394653,
      "learning_rate": 9.581591760967886e-06,
      "loss": 2.2741,
      "step": 39667
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0429847240447998,
      "learning_rate": 9.581180385124805e-06,
      "loss": 2.5366,
      "step": 39668
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1587696075439453,
      "learning_rate": 9.580769009991734e-06,
      "loss": 2.296,
      "step": 39669
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1481380462646484,
      "learning_rate": 9.580357635569374e-06,
      "loss": 2.3968,
      "step": 39670
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0946866273880005,
      "learning_rate": 9.579946261858423e-06,
      "loss": 2.6381,
      "step": 39671
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1107088327407837,
      "learning_rate": 9.579534888859575e-06,
      "loss": 2.3118,
      "step": 39672
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1143547296524048,
      "learning_rate": 9.579123516573532e-06,
      "loss": 2.4784,
      "step": 39673
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0397242307662964,
      "learning_rate": 9.578712145000988e-06,
      "loss": 2.2195,
      "step": 39674
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1491129398345947,
      "learning_rate": 9.578300774142641e-06,
      "loss": 2.2254,
      "step": 39675
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.2173937559127808,
      "learning_rate": 9.577889403999191e-06,
      "loss": 2.3187,
      "step": 39676
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.019648551940918,
      "learning_rate": 9.57747803457133e-06,
      "loss": 2.4037,
      "step": 39677
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.005204677581787,
      "learning_rate": 9.57706666585976e-06,
      "loss": 2.13,
      "step": 39678
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.288365364074707,
      "learning_rate": 9.576655297865178e-06,
      "loss": 2.2857,
      "step": 39679
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1309555768966675,
      "learning_rate": 9.57624393058828e-06,
      "loss": 2.1571,
      "step": 39680
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.060657024383545,
      "learning_rate": 9.575832564029761e-06,
      "loss": 2.1777,
      "step": 39681
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.9723988175392151,
      "learning_rate": 9.575421198190325e-06,
      "loss": 2.3005,
      "step": 39682
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1623274087905884,
      "learning_rate": 9.575009833070663e-06,
      "loss": 2.1516,
      "step": 39683
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1007733345031738,
      "learning_rate": 9.574598468671474e-06,
      "loss": 2.3801,
      "step": 39684
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.141613245010376,
      "learning_rate": 9.574187104993456e-06,
      "loss": 2.3889,
      "step": 39685
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0929887294769287,
      "learning_rate": 9.573775742037308e-06,
      "loss": 2.5018,
      "step": 39686
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.9509648680686951,
      "learning_rate": 9.573364379803724e-06,
      "loss": 2.4585,
      "step": 39687
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0443618297576904,
      "learning_rate": 9.572953018293406e-06,
      "loss": 2.3668,
      "step": 39688
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1076194047927856,
      "learning_rate": 9.572541657507046e-06,
      "loss": 2.279,
      "step": 39689
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0758605003356934,
      "learning_rate": 9.572130297445345e-06,
      "loss": 2.0811,
      "step": 39690
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.058498740196228,
      "learning_rate": 9.571718938108999e-06,
      "loss": 2.374,
      "step": 39691
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0035793781280518,
      "learning_rate": 9.571307579498709e-06,
      "loss": 2.1581,
      "step": 39692
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.2287147045135498,
      "learning_rate": 9.570896221615167e-06,
      "loss": 2.2515,
      "step": 39693
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0862144231796265,
      "learning_rate": 9.57048486445907e-06,
      "loss": 2.4077,
      "step": 39694
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0011646747589111,
      "learning_rate": 9.57007350803112e-06,
      "loss": 2.2876,
      "step": 39695
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0410298109054565,
      "learning_rate": 9.569662152332011e-06,
      "loss": 2.5246,
      "step": 39696
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1413260698318481,
      "learning_rate": 9.569250797362442e-06,
      "loss": 2.3805,
      "step": 39697
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1482999324798584,
      "learning_rate": 9.56883944312311e-06,
      "loss": 2.3781,
      "step": 39698
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.9796454310417175,
      "learning_rate": 9.568428089614712e-06,
      "loss": 2.3965,
      "step": 39699
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.9978113770484924,
      "learning_rate": 9.568016736837946e-06,
      "loss": 2.1285,
      "step": 39700
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0942997932434082,
      "learning_rate": 9.567605384793509e-06,
      "loss": 2.2877,
      "step": 39701
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0667418241500854,
      "learning_rate": 9.567194033482097e-06,
      "loss": 2.29,
      "step": 39702
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.9938788414001465,
      "learning_rate": 9.566782682904412e-06,
      "loss": 2.174,
      "step": 39703
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.15273916721344,
      "learning_rate": 9.566371333061145e-06,
      "loss": 2.3152,
      "step": 39704
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1370675563812256,
      "learning_rate": 9.565959983953e-06,
      "loss": 2.412,
      "step": 39705
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1318387985229492,
      "learning_rate": 9.565548635580667e-06,
      "loss": 2.4727,
      "step": 39706
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0876878499984741,
      "learning_rate": 9.565137287944848e-06,
      "loss": 2.1834,
      "step": 39707
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0129902362823486,
      "learning_rate": 9.564725941046239e-06,
      "loss": 2.4127,
      "step": 39708
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0359700918197632,
      "learning_rate": 9.564314594885538e-06,
      "loss": 2.2959,
      "step": 39709
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.040431261062622,
      "learning_rate": 9.563903249463444e-06,
      "loss": 2.375,
      "step": 39710
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.058105707168579,
      "learning_rate": 9.563491904780651e-06,
      "loss": 2.4284,
      "step": 39711
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.11481773853302,
      "learning_rate": 9.563080560837857e-06,
      "loss": 2.483,
      "step": 39712
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.9638488292694092,
      "learning_rate": 9.562669217635759e-06,
      "loss": 2.584,
      "step": 39713
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0502893924713135,
      "learning_rate": 9.562257875175057e-06,
      "loss": 2.0915,
      "step": 39714
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.9886722564697266,
      "learning_rate": 9.561846533456447e-06,
      "loss": 2.3562,
      "step": 39715
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.058872938156128,
      "learning_rate": 9.561435192480626e-06,
      "loss": 2.3123,
      "step": 39716
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.080734372138977,
      "learning_rate": 9.56102385224829e-06,
      "loss": 2.3758,
      "step": 39717
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0875766277313232,
      "learning_rate": 9.560612512760138e-06,
      "loss": 2.5111,
      "step": 39718
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.006649136543274,
      "learning_rate": 9.560201174016874e-06,
      "loss": 2.4486,
      "step": 39719
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.03083074092865,
      "learning_rate": 9.559789836019182e-06,
      "loss": 2.2999,
      "step": 39720
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.064117431640625,
      "learning_rate": 9.559378498767767e-06,
      "loss": 2.443,
      "step": 39721
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1555814743041992,
      "learning_rate": 9.558967162263324e-06,
      "loss": 2.469,
      "step": 39722
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0775158405303955,
      "learning_rate": 9.558555826506551e-06,
      "loss": 2.505,
      "step": 39723
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1711605787277222,
      "learning_rate": 9.558144491498147e-06,
      "loss": 2.3931,
      "step": 39724
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.998211681842804,
      "learning_rate": 9.557733157238807e-06,
      "loss": 2.2797,
      "step": 39725
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.212010383605957,
      "learning_rate": 9.557321823729231e-06,
      "loss": 2.346,
      "step": 39726
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0044739246368408,
      "learning_rate": 9.556910490970114e-06,
      "loss": 2.3349,
      "step": 39727
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0028983354568481,
      "learning_rate": 9.556499158962153e-06,
      "loss": 2.2762,
      "step": 39728
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.9812026023864746,
      "learning_rate": 9.556087827706048e-06,
      "loss": 2.4586,
      "step": 39729
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0938746929168701,
      "learning_rate": 9.555676497202494e-06,
      "loss": 2.4918,
      "step": 39730
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0048949718475342,
      "learning_rate": 9.55526516745219e-06,
      "loss": 2.2118,
      "step": 39731
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0332820415496826,
      "learning_rate": 9.554853838455836e-06,
      "loss": 2.2869,
      "step": 39732
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.045090913772583,
      "learning_rate": 9.55444251021412e-06,
      "loss": 2.5314,
      "step": 39733
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.212559700012207,
      "learning_rate": 9.554031182727745e-06,
      "loss": 2.3563,
      "step": 39734
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.2628353834152222,
      "learning_rate": 9.55361985599741e-06,
      "loss": 2.4232,
      "step": 39735
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0497331619262695,
      "learning_rate": 9.55320853002381e-06,
      "loss": 2.4663,
      "step": 39736
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0160095691680908,
      "learning_rate": 9.552797204807642e-06,
      "loss": 2.2226,
      "step": 39737
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0857114791870117,
      "learning_rate": 9.552385880349607e-06,
      "loss": 2.3592,
      "step": 39738
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.018061876296997,
      "learning_rate": 9.551974556650397e-06,
      "loss": 2.3848,
      "step": 39739
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1321355104446411,
      "learning_rate": 9.551563233710712e-06,
      "loss": 2.2996,
      "step": 39740
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0129673480987549,
      "learning_rate": 9.55115191153125e-06,
      "loss": 2.2236,
      "step": 39741
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.009699821472168,
      "learning_rate": 9.550740590112707e-06,
      "loss": 2.1185,
      "step": 39742
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1756504774093628,
      "learning_rate": 9.55032926945578e-06,
      "loss": 2.2944,
      "step": 39743
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0801984071731567,
      "learning_rate": 9.549917949561167e-06,
      "loss": 2.3921,
      "step": 39744
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.9985566139221191,
      "learning_rate": 9.54950663042957e-06,
      "loss": 2.2315,
      "step": 39745
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.042280912399292,
      "learning_rate": 9.549095312061677e-06,
      "loss": 2.1235,
      "step": 39746
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.025867223739624,
      "learning_rate": 9.54868399445819e-06,
      "loss": 2.4664,
      "step": 39747
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0960346460342407,
      "learning_rate": 9.548272677619808e-06,
      "loss": 2.2033,
      "step": 39748
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1054304838180542,
      "learning_rate": 9.547861361547225e-06,
      "loss": 2.2959,
      "step": 39749
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.2224459648132324,
      "learning_rate": 9.54745004624114e-06,
      "loss": 2.5241,
      "step": 39750
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.060578465461731,
      "learning_rate": 9.54703873170225e-06,
      "loss": 2.3728,
      "step": 39751
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0266363620758057,
      "learning_rate": 9.546627417931252e-06,
      "loss": 2.52,
      "step": 39752
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0746567249298096,
      "learning_rate": 9.546216104928843e-06,
      "loss": 2.5302,
      "step": 39753
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.078905701637268,
      "learning_rate": 9.545804792695721e-06,
      "loss": 2.3687,
      "step": 39754
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.2975808382034302,
      "learning_rate": 9.545393481232585e-06,
      "loss": 2.4742,
      "step": 39755
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.9644938111305237,
      "learning_rate": 9.54498217054013e-06,
      "loss": 2.5049,
      "step": 39756
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1190074682235718,
      "learning_rate": 9.544570860619054e-06,
      "loss": 2.2549,
      "step": 39757
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0934383869171143,
      "learning_rate": 9.544159551470057e-06,
      "loss": 2.3617,
      "step": 39758
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.2532795667648315,
      "learning_rate": 9.54374824309383e-06,
      "loss": 2.2604,
      "step": 39759
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.027795672416687,
      "learning_rate": 9.543336935491074e-06,
      "loss": 2.2566,
      "step": 39760
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0509803295135498,
      "learning_rate": 9.542925628662485e-06,
      "loss": 2.3768,
      "step": 39761
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.185670256614685,
      "learning_rate": 9.542514322608761e-06,
      "loss": 2.3328,
      "step": 39762
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.055666446685791,
      "learning_rate": 9.5421030173306e-06,
      "loss": 2.3445,
      "step": 39763
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1375458240509033,
      "learning_rate": 9.541691712828699e-06,
      "loss": 2.2311,
      "step": 39764
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.9758949279785156,
      "learning_rate": 9.541280409103754e-06,
      "loss": 2.2875,
      "step": 39765
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0882467031478882,
      "learning_rate": 9.540869106156468e-06,
      "loss": 2.3534,
      "step": 39766
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.9409374594688416,
      "learning_rate": 9.540457803987528e-06,
      "loss": 2.5832,
      "step": 39767
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.2045193910598755,
      "learning_rate": 9.540046502597639e-06,
      "loss": 2.32,
      "step": 39768
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1529887914657593,
      "learning_rate": 9.539635201987497e-06,
      "loss": 2.3686,
      "step": 39769
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0071035623550415,
      "learning_rate": 9.539223902157795e-06,
      "loss": 2.2307,
      "step": 39770
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0737686157226562,
      "learning_rate": 9.53881260310924e-06,
      "loss": 2.205,
      "step": 39771
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1123253107070923,
      "learning_rate": 9.538401304842519e-06,
      "loss": 2.2786,
      "step": 39772
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.9879166483879089,
      "learning_rate": 9.537990007358333e-06,
      "loss": 2.4938,
      "step": 39773
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.02887761592865,
      "learning_rate": 9.53757871065738e-06,
      "loss": 2.1352,
      "step": 39774
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.3416835069656372,
      "learning_rate": 9.537167414740356e-06,
      "loss": 2.251,
      "step": 39775
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1015498638153076,
      "learning_rate": 9.536756119607958e-06,
      "loss": 2.2961,
      "step": 39776
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.2401412725448608,
      "learning_rate": 9.536344825260884e-06,
      "loss": 2.297,
      "step": 39777
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0481551885604858,
      "learning_rate": 9.535933531699833e-06,
      "loss": 2.2514,
      "step": 39778
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.9899122714996338,
      "learning_rate": 9.5355222389255e-06,
      "loss": 2.4138,
      "step": 39779
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1616941690444946,
      "learning_rate": 9.535110946938583e-06,
      "loss": 2.4445,
      "step": 39780
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0852400064468384,
      "learning_rate": 9.53469965573978e-06,
      "loss": 2.4531,
      "step": 39781
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.224866271018982,
      "learning_rate": 9.534288365329786e-06,
      "loss": 2.4515,
      "step": 39782
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1395004987716675,
      "learning_rate": 9.5338770757093e-06,
      "loss": 2.326,
      "step": 39783
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1475330591201782,
      "learning_rate": 9.533465786879019e-06,
      "loss": 2.4109,
      "step": 39784
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0699650049209595,
      "learning_rate": 9.533054498839645e-06,
      "loss": 2.3729,
      "step": 39785
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.069393277168274,
      "learning_rate": 9.532643211591866e-06,
      "loss": 2.2514,
      "step": 39786
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0778785943984985,
      "learning_rate": 9.532231925136383e-06,
      "loss": 2.2526,
      "step": 39787
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.2766398191452026,
      "learning_rate": 9.531820639473895e-06,
      "loss": 2.3884,
      "step": 39788
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.9519124031066895,
      "learning_rate": 9.5314093546051e-06,
      "loss": 2.2373,
      "step": 39789
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1509788036346436,
      "learning_rate": 9.530998070530689e-06,
      "loss": 2.2041,
      "step": 39790
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.9961214065551758,
      "learning_rate": 9.530586787251368e-06,
      "loss": 2.6192,
      "step": 39791
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1816186904907227,
      "learning_rate": 9.530175504767825e-06,
      "loss": 2.3289,
      "step": 39792
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.3657559156417847,
      "learning_rate": 9.529764223080766e-06,
      "loss": 2.2437,
      "step": 39793
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.2879160642623901,
      "learning_rate": 9.529352942190886e-06,
      "loss": 2.3723,
      "step": 39794
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0317693948745728,
      "learning_rate": 9.528941662098877e-06,
      "loss": 2.351,
      "step": 39795
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1561894416809082,
      "learning_rate": 9.528530382805442e-06,
      "loss": 2.4642,
      "step": 39796
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0708101987838745,
      "learning_rate": 9.528119104311274e-06,
      "loss": 2.4526,
      "step": 39797
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.9974578022956848,
      "learning_rate": 9.527707826617074e-06,
      "loss": 2.4586,
      "step": 39798
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.9692306518554688,
      "learning_rate": 9.527296549723538e-06,
      "loss": 2.3951,
      "step": 39799
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.95750892162323,
      "learning_rate": 9.526885273631362e-06,
      "loss": 2.3648,
      "step": 39800
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.093257188796997,
      "learning_rate": 9.526473998341243e-06,
      "loss": 2.3823,
      "step": 39801
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0752358436584473,
      "learning_rate": 9.52606272385388e-06,
      "loss": 2.3896,
      "step": 39802
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1142158508300781,
      "learning_rate": 9.525651450169968e-06,
      "loss": 2.2928,
      "step": 39803
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0515906810760498,
      "learning_rate": 9.525240177290207e-06,
      "loss": 2.5103,
      "step": 39804
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0597481727600098,
      "learning_rate": 9.524828905215292e-06,
      "loss": 2.1608,
      "step": 39805
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0815445184707642,
      "learning_rate": 9.524417633945922e-06,
      "loss": 2.3763,
      "step": 39806
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0252934694290161,
      "learning_rate": 9.524006363482792e-06,
      "loss": 2.3732,
      "step": 39807
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0490919351577759,
      "learning_rate": 9.523595093826601e-06,
      "loss": 2.384,
      "step": 39808
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0162464380264282,
      "learning_rate": 9.523183824978045e-06,
      "loss": 2.3439,
      "step": 39809
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0263738632202148,
      "learning_rate": 9.522772556937822e-06,
      "loss": 2.4763,
      "step": 39810
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0490888357162476,
      "learning_rate": 9.522361289706636e-06,
      "loss": 2.4222,
      "step": 39811
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.167885661125183,
      "learning_rate": 9.52195002328517e-06,
      "loss": 2.4539,
      "step": 39812
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0703388452529907,
      "learning_rate": 9.52153875767413e-06,
      "loss": 2.291,
      "step": 39813
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.9987342357635498,
      "learning_rate": 9.521127492874211e-06,
      "loss": 2.1552,
      "step": 39814
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0739645957946777,
      "learning_rate": 9.520716228886113e-06,
      "loss": 2.3307,
      "step": 39815
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.9995331168174744,
      "learning_rate": 9.520304965710529e-06,
      "loss": 2.2572,
      "step": 39816
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0213664770126343,
      "learning_rate": 9.519893703348157e-06,
      "loss": 2.5007,
      "step": 39817
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0510846376419067,
      "learning_rate": 9.519482441799698e-06,
      "loss": 2.2411,
      "step": 39818
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0754903554916382,
      "learning_rate": 9.519071181065846e-06,
      "loss": 2.5869,
      "step": 39819
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.064363956451416,
      "learning_rate": 9.518659921147299e-06,
      "loss": 2.1125,
      "step": 39820
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0832520723342896,
      "learning_rate": 9.518248662044758e-06,
      "loss": 2.5215,
      "step": 39821
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1136574745178223,
      "learning_rate": 9.517837403758912e-06,
      "loss": 2.4029,
      "step": 39822
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.179249882698059,
      "learning_rate": 9.517426146290463e-06,
      "loss": 2.5334,
      "step": 39823
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.9944847822189331,
      "learning_rate": 9.517014889640111e-06,
      "loss": 2.3079,
      "step": 39824
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0283840894699097,
      "learning_rate": 9.516603633808547e-06,
      "loss": 2.4893,
      "step": 39825
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0266070365905762,
      "learning_rate": 9.51619237879647e-06,
      "loss": 2.2921,
      "step": 39826
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.170145869255066,
      "learning_rate": 9.51578112460458e-06,
      "loss": 2.3573,
      "step": 39827
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.052103877067566,
      "learning_rate": 9.515369871233574e-06,
      "loss": 2.4738,
      "step": 39828
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.9618053436279297,
      "learning_rate": 9.514958618684145e-06,
      "loss": 2.3318,
      "step": 39829
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0709593296051025,
      "learning_rate": 9.514547366956994e-06,
      "loss": 2.1409,
      "step": 39830
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0614614486694336,
      "learning_rate": 9.514136116052817e-06,
      "loss": 2.3256,
      "step": 39831
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1417030096054077,
      "learning_rate": 9.51372486597231e-06,
      "loss": 2.5324,
      "step": 39832
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.2037599086761475,
      "learning_rate": 9.513313616716173e-06,
      "loss": 2.5507,
      "step": 39833
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0497140884399414,
      "learning_rate": 9.512902368285101e-06,
      "loss": 2.5358,
      "step": 39834
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.035889983177185,
      "learning_rate": 9.51249112067979e-06,
      "loss": 2.4092,
      "step": 39835
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0057095289230347,
      "learning_rate": 9.512079873900942e-06,
      "loss": 2.3526,
      "step": 39836
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1107079982757568,
      "learning_rate": 9.511668627949253e-06,
      "loss": 2.3173,
      "step": 39837
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.9910479187965393,
      "learning_rate": 9.511257382825416e-06,
      "loss": 2.3403,
      "step": 39838
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1159281730651855,
      "learning_rate": 9.510846138530129e-06,
      "loss": 2.2278,
      "step": 39839
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0690457820892334,
      "learning_rate": 9.51043489506409e-06,
      "loss": 2.3983,
      "step": 39840
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0783816576004028,
      "learning_rate": 9.510023652427998e-06,
      "loss": 2.6377,
      "step": 39841
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0789589881896973,
      "learning_rate": 9.50961241062255e-06,
      "loss": 2.2288,
      "step": 39842
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.2058627605438232,
      "learning_rate": 9.50920116964844e-06,
      "loss": 2.2896,
      "step": 39843
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.2238526344299316,
      "learning_rate": 9.50878992950637e-06,
      "loss": 2.2523,
      "step": 39844
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1427890062332153,
      "learning_rate": 9.508378690197032e-06,
      "loss": 2.3202,
      "step": 39845
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1123791933059692,
      "learning_rate": 9.507967451721127e-06,
      "loss": 2.5095,
      "step": 39846
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.2705764770507812,
      "learning_rate": 9.50755621407935e-06,
      "loss": 2.4184,
      "step": 39847
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0004518032073975,
      "learning_rate": 9.507144977272399e-06,
      "loss": 2.4054,
      "step": 39848
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0887013673782349,
      "learning_rate": 9.506733741300974e-06,
      "loss": 2.2681,
      "step": 39849
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0269798040390015,
      "learning_rate": 9.506322506165769e-06,
      "loss": 2.4911,
      "step": 39850
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0790799856185913,
      "learning_rate": 9.505911271867478e-06,
      "loss": 2.2474,
      "step": 39851
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1672972440719604,
      "learning_rate": 9.505500038406802e-06,
      "loss": 2.1228,
      "step": 39852
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0481210947036743,
      "learning_rate": 9.505088805784439e-06,
      "loss": 2.4849,
      "step": 39853
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0457398891448975,
      "learning_rate": 9.504677574001085e-06,
      "loss": 2.4603,
      "step": 39854
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0538604259490967,
      "learning_rate": 9.504266343057438e-06,
      "loss": 2.2451,
      "step": 39855
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1520252227783203,
      "learning_rate": 9.503855112954194e-06,
      "loss": 2.5094,
      "step": 39856
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0485583543777466,
      "learning_rate": 9.50344388369205e-06,
      "loss": 2.4454,
      "step": 39857
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0671722888946533,
      "learning_rate": 9.5030326552717e-06,
      "loss": 2.1655,
      "step": 39858
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0795793533325195,
      "learning_rate": 9.502621427693847e-06,
      "loss": 2.3745,
      "step": 39859
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.2357820272445679,
      "learning_rate": 9.502210200959186e-06,
      "loss": 2.4767,
      "step": 39860
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.048802137374878,
      "learning_rate": 9.501798975068414e-06,
      "loss": 2.3719,
      "step": 39861
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1360889673233032,
      "learning_rate": 9.501387750022228e-06,
      "loss": 2.4828,
      "step": 39862
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1584810018539429,
      "learning_rate": 9.500976525821323e-06,
      "loss": 2.3861,
      "step": 39863
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1377735137939453,
      "learning_rate": 9.500565302466405e-06,
      "loss": 2.277,
      "step": 39864
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0942808389663696,
      "learning_rate": 9.50015407995816e-06,
      "loss": 2.5227,
      "step": 39865
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.9694200158119202,
      "learning_rate": 9.49974285829729e-06,
      "loss": 2.5723,
      "step": 39866
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0718430280685425,
      "learning_rate": 9.499331637484491e-06,
      "loss": 2.3009,
      "step": 39867
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.2304655313491821,
      "learning_rate": 9.49892041752046e-06,
      "loss": 2.2756,
      "step": 39868
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0053790807724,
      "learning_rate": 9.498509198405896e-06,
      "loss": 2.5385,
      "step": 39869
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0861053466796875,
      "learning_rate": 9.498097980141495e-06,
      "loss": 2.4955,
      "step": 39870
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1664553880691528,
      "learning_rate": 9.497686762727955e-06,
      "loss": 2.3052,
      "step": 39871
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0837358236312866,
      "learning_rate": 9.49727554616597e-06,
      "loss": 2.4033,
      "step": 39872
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.405526041984558,
      "learning_rate": 9.496864330456242e-06,
      "loss": 2.3575,
      "step": 39873
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0577232837677002,
      "learning_rate": 9.496453115599465e-06,
      "loss": 2.4322,
      "step": 39874
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0077040195465088,
      "learning_rate": 9.496041901596335e-06,
      "loss": 2.327,
      "step": 39875
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0776704549789429,
      "learning_rate": 9.495630688447554e-06,
      "loss": 2.2365,
      "step": 39876
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0309301614761353,
      "learning_rate": 9.495219476153817e-06,
      "loss": 2.3801,
      "step": 39877
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.180302381515503,
      "learning_rate": 9.494808264715819e-06,
      "loss": 2.4036,
      "step": 39878
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0649559497833252,
      "learning_rate": 9.494397054134255e-06,
      "loss": 2.2743,
      "step": 39879
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.316758394241333,
      "learning_rate": 9.493985844409828e-06,
      "loss": 2.1948,
      "step": 39880
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1861608028411865,
      "learning_rate": 9.49357463554323e-06,
      "loss": 2.4438,
      "step": 39881
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.094225287437439,
      "learning_rate": 9.493163427535162e-06,
      "loss": 2.3237,
      "step": 39882
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1931121349334717,
      "learning_rate": 9.49275222038632e-06,
      "loss": 2.4778,
      "step": 39883
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1541441679000854,
      "learning_rate": 9.4923410140974e-06,
      "loss": 2.4598,
      "step": 39884
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0679309368133545,
      "learning_rate": 9.491929808669101e-06,
      "loss": 2.1423,
      "step": 39885
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.125258445739746,
      "learning_rate": 9.491518604102119e-06,
      "loss": 2.1115,
      "step": 39886
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1347320079803467,
      "learning_rate": 9.491107400397149e-06,
      "loss": 2.2782,
      "step": 39887
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0126956701278687,
      "learning_rate": 9.490696197554891e-06,
      "loss": 2.3328,
      "step": 39888
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0878500938415527,
      "learning_rate": 9.490284995576042e-06,
      "loss": 2.2783,
      "step": 39889
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0275720357894897,
      "learning_rate": 9.489873794461303e-06,
      "loss": 2.4395,
      "step": 39890
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.9293544292449951,
      "learning_rate": 9.48946259421136e-06,
      "loss": 2.494,
      "step": 39891
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1225011348724365,
      "learning_rate": 9.489051394826918e-06,
      "loss": 2.443,
      "step": 39892
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0356297492980957,
      "learning_rate": 9.488640196308673e-06,
      "loss": 2.199,
      "step": 39893
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.9542086720466614,
      "learning_rate": 9.488228998657322e-06,
      "loss": 2.3562,
      "step": 39894
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1176564693450928,
      "learning_rate": 9.487817801873561e-06,
      "loss": 2.2238,
      "step": 39895
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0646435022354126,
      "learning_rate": 9.487406605958087e-06,
      "loss": 2.3566,
      "step": 39896
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1329880952835083,
      "learning_rate": 9.486995410911599e-06,
      "loss": 2.3954,
      "step": 39897
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0792254209518433,
      "learning_rate": 9.486584216734794e-06,
      "loss": 2.3787,
      "step": 39898
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0774413347244263,
      "learning_rate": 9.486173023428367e-06,
      "loss": 2.3364,
      "step": 39899
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.2107205390930176,
      "learning_rate": 9.485761830993016e-06,
      "loss": 2.4379,
      "step": 39900
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.3634811639785767,
      "learning_rate": 9.48535063942944e-06,
      "loss": 2.384,
      "step": 39901
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1828689575195312,
      "learning_rate": 9.484939448738333e-06,
      "loss": 2.5214,
      "step": 39902
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1144683361053467,
      "learning_rate": 9.484528258920398e-06,
      "loss": 2.5367,
      "step": 39903
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0281494855880737,
      "learning_rate": 9.484117069976324e-06,
      "loss": 2.3172,
      "step": 39904
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.9393455982208252,
      "learning_rate": 9.483705881906812e-06,
      "loss": 2.4923,
      "step": 39905
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.076311469078064,
      "learning_rate": 9.483294694712557e-06,
      "loss": 2.4919,
      "step": 39906
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.05131995677948,
      "learning_rate": 9.48288350839426e-06,
      "loss": 2.4236,
      "step": 39907
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.2702876329421997,
      "learning_rate": 9.482472322952614e-06,
      "loss": 2.3421,
      "step": 39908
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.090110182762146,
      "learning_rate": 9.48206113838832e-06,
      "loss": 2.3953,
      "step": 39909
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.158610224723816,
      "learning_rate": 9.48164995470207e-06,
      "loss": 2.3612,
      "step": 39910
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.9633854031562805,
      "learning_rate": 9.481238771894566e-06,
      "loss": 2.2131,
      "step": 39911
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.9831377267837524,
      "learning_rate": 9.480827589966506e-06,
      "loss": 2.3476,
      "step": 39912
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.019072413444519,
      "learning_rate": 9.48041640891858e-06,
      "loss": 2.1689,
      "step": 39913
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.9133225083351135,
      "learning_rate": 9.48000522875149e-06,
      "loss": 2.4623,
      "step": 39914
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1950067281723022,
      "learning_rate": 9.479594049465935e-06,
      "loss": 2.307,
      "step": 39915
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0652867555618286,
      "learning_rate": 9.47918287106261e-06,
      "loss": 2.2515,
      "step": 39916
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.104103446006775,
      "learning_rate": 9.478771693542207e-06,
      "loss": 2.3704,
      "step": 39917
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.134627342224121,
      "learning_rate": 9.478360516905432e-06,
      "loss": 2.3845,
      "step": 39918
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0317729711532593,
      "learning_rate": 9.477949341152975e-06,
      "loss": 2.0854,
      "step": 39919
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.060288667678833,
      "learning_rate": 9.477538166285537e-06,
      "loss": 2.4103,
      "step": 39920
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.078128695487976,
      "learning_rate": 9.477126992303811e-06,
      "loss": 2.3453,
      "step": 39921
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0903807878494263,
      "learning_rate": 9.476715819208499e-06,
      "loss": 2.4923,
      "step": 39922
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.019083023071289,
      "learning_rate": 9.476304647000295e-06,
      "loss": 2.4141,
      "step": 39923
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1127351522445679,
      "learning_rate": 9.475893475679896e-06,
      "loss": 2.4621,
      "step": 39924
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0556758642196655,
      "learning_rate": 9.475482305248001e-06,
      "loss": 2.375,
      "step": 39925
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0714975595474243,
      "learning_rate": 9.475071135705304e-06,
      "loss": 2.3658,
      "step": 39926
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.9942708015441895,
      "learning_rate": 9.474659967052507e-06,
      "loss": 2.4117,
      "step": 39927
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0357426404953003,
      "learning_rate": 9.474248799290302e-06,
      "loss": 2.1455,
      "step": 39928
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1297342777252197,
      "learning_rate": 9.473837632419393e-06,
      "loss": 2.3876,
      "step": 39929
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.2502442598342896,
      "learning_rate": 9.473426466440469e-06,
      "loss": 2.1529,
      "step": 39930
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0653432607650757,
      "learning_rate": 9.473015301354228e-06,
      "loss": 2.358,
      "step": 39931
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0646260976791382,
      "learning_rate": 9.47260413716137e-06,
      "loss": 2.2422,
      "step": 39932
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1618108749389648,
      "learning_rate": 9.472192973862592e-06,
      "loss": 2.5275,
      "step": 39933
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0845471620559692,
      "learning_rate": 9.47178181145859e-06,
      "loss": 2.3624,
      "step": 39934
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0654913187026978,
      "learning_rate": 9.47137064995006e-06,
      "loss": 2.2503,
      "step": 39935
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1614183187484741,
      "learning_rate": 9.470959489337702e-06,
      "loss": 2.5049,
      "step": 39936
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.979429304599762,
      "learning_rate": 9.47054832962221e-06,
      "loss": 2.3465,
      "step": 39937
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.9456658363342285,
      "learning_rate": 9.470137170804282e-06,
      "loss": 2.4476,
      "step": 39938
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0150041580200195,
      "learning_rate": 9.469726012884621e-06,
      "loss": 2.3388,
      "step": 39939
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1397265195846558,
      "learning_rate": 9.469314855863914e-06,
      "loss": 2.4117,
      "step": 39940
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0688978433609009,
      "learning_rate": 9.468903699742862e-06,
      "loss": 2.3707,
      "step": 39941
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0893058776855469,
      "learning_rate": 9.468492544522161e-06,
      "loss": 2.4306,
      "step": 39942
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0324956178665161,
      "learning_rate": 9.468081390202515e-06,
      "loss": 2.3197,
      "step": 39943
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1957718133926392,
      "learning_rate": 9.467670236784612e-06,
      "loss": 2.3301,
      "step": 39944
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.112695574760437,
      "learning_rate": 9.467259084269151e-06,
      "loss": 2.2614,
      "step": 39945
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0815352201461792,
      "learning_rate": 9.466847932656835e-06,
      "loss": 2.3731,
      "step": 39946
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0144518613815308,
      "learning_rate": 9.466436781948353e-06,
      "loss": 2.2667,
      "step": 39947
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.990983247756958,
      "learning_rate": 9.466025632144406e-06,
      "loss": 2.4869,
      "step": 39948
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1200590133666992,
      "learning_rate": 9.465614483245691e-06,
      "loss": 2.3479,
      "step": 39949
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1601461172103882,
      "learning_rate": 9.465203335252903e-06,
      "loss": 2.6272,
      "step": 39950
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1745884418487549,
      "learning_rate": 9.464792188166742e-06,
      "loss": 2.3461,
      "step": 39951
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.025721549987793,
      "learning_rate": 9.464381041987904e-06,
      "loss": 2.4135,
      "step": 39952
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0427709817886353,
      "learning_rate": 9.463969896717084e-06,
      "loss": 2.4832,
      "step": 39953
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.9912174940109253,
      "learning_rate": 9.463558752354983e-06,
      "loss": 2.1477,
      "step": 39954
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0222752094268799,
      "learning_rate": 9.463147608902293e-06,
      "loss": 2.3957,
      "step": 39955
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0666334629058838,
      "learning_rate": 9.46273646635972e-06,
      "loss": 2.4962,
      "step": 39956
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1481702327728271,
      "learning_rate": 9.462325324727949e-06,
      "loss": 2.4567,
      "step": 39957
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0755091905593872,
      "learning_rate": 9.461914184007684e-06,
      "loss": 2.4209,
      "step": 39958
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.065165400505066,
      "learning_rate": 9.46150304419962e-06,
      "loss": 2.4423,
      "step": 39959
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.9482043981552124,
      "learning_rate": 9.461091905304452e-06,
      "loss": 2.3046,
      "step": 39960
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.9639798402786255,
      "learning_rate": 9.460680767322882e-06,
      "loss": 2.5124,
      "step": 39961
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.3443650007247925,
      "learning_rate": 9.460269630255605e-06,
      "loss": 2.2392,
      "step": 39962
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1916418075561523,
      "learning_rate": 9.459858494103316e-06,
      "loss": 2.4098,
      "step": 39963
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.2055819034576416,
      "learning_rate": 9.459447358866716e-06,
      "loss": 2.4565,
      "step": 39964
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.043162226676941,
      "learning_rate": 9.459036224546497e-06,
      "loss": 2.3452,
      "step": 39965
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0321303606033325,
      "learning_rate": 9.45862509114336e-06,
      "loss": 2.2462,
      "step": 39966
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0869349241256714,
      "learning_rate": 9.458213958658003e-06,
      "loss": 2.3857,
      "step": 39967
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1312427520751953,
      "learning_rate": 9.457802827091117e-06,
      "loss": 2.4663,
      "step": 39968
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.136057734489441,
      "learning_rate": 9.457391696443405e-06,
      "loss": 2.4288,
      "step": 39969
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0605546236038208,
      "learning_rate": 9.456980566715558e-06,
      "loss": 2.3441,
      "step": 39970
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0113098621368408,
      "learning_rate": 9.456569437908279e-06,
      "loss": 2.6412,
      "step": 39971
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0836529731750488,
      "learning_rate": 9.45615831002226e-06,
      "loss": 2.2655,
      "step": 39972
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0650098323822021,
      "learning_rate": 9.455747183058203e-06,
      "loss": 2.4543,
      "step": 39973
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.014925479888916,
      "learning_rate": 9.455336057016802e-06,
      "loss": 2.3612,
      "step": 39974
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0242830514907837,
      "learning_rate": 9.454924931898754e-06,
      "loss": 2.2953,
      "step": 39975
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1116821765899658,
      "learning_rate": 9.454513807704756e-06,
      "loss": 2.2732,
      "step": 39976
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.9805033802986145,
      "learning_rate": 9.454102684435505e-06,
      "loss": 2.3482,
      "step": 39977
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.100731372833252,
      "learning_rate": 9.453691562091698e-06,
      "loss": 2.6143,
      "step": 39978
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.2436244487762451,
      "learning_rate": 9.453280440674032e-06,
      "loss": 2.3098,
      "step": 39979
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.9676676392555237,
      "learning_rate": 9.452869320183205e-06,
      "loss": 2.1728,
      "step": 39980
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0284810066223145,
      "learning_rate": 9.452458200619911e-06,
      "loss": 2.3333,
      "step": 39981
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.2553496360778809,
      "learning_rate": 9.452047081984855e-06,
      "loss": 2.32,
      "step": 39982
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1469038724899292,
      "learning_rate": 9.451635964278725e-06,
      "loss": 2.3156,
      "step": 39983
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.2099026441574097,
      "learning_rate": 9.451224847502218e-06,
      "loss": 2.6194,
      "step": 39984
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0434277057647705,
      "learning_rate": 9.450813731656036e-06,
      "loss": 2.3688,
      "step": 39985
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1056907176971436,
      "learning_rate": 9.450402616740873e-06,
      "loss": 2.4892,
      "step": 39986
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1169294118881226,
      "learning_rate": 9.449991502757427e-06,
      "loss": 2.1558,
      "step": 39987
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0112627744674683,
      "learning_rate": 9.449580389706395e-06,
      "loss": 2.3701,
      "step": 39988
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.9938499331474304,
      "learning_rate": 9.449169277588475e-06,
      "loss": 2.3177,
      "step": 39989
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1958993673324585,
      "learning_rate": 9.448758166404361e-06,
      "loss": 2.2398,
      "step": 39990
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0246069431304932,
      "learning_rate": 9.448347056154751e-06,
      "loss": 2.3831,
      "step": 39991
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0845224857330322,
      "learning_rate": 9.447935946840344e-06,
      "loss": 2.391,
      "step": 39992
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.207698941230774,
      "learning_rate": 9.447524838461834e-06,
      "loss": 2.2897,
      "step": 39993
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.2078486680984497,
      "learning_rate": 9.447113731019922e-06,
      "loss": 2.3309,
      "step": 39994
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.179211139678955,
      "learning_rate": 9.446702624515304e-06,
      "loss": 2.2444,
      "step": 39995
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1164705753326416,
      "learning_rate": 9.446291518948672e-06,
      "loss": 2.2681,
      "step": 39996
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0436501502990723,
      "learning_rate": 9.445880414320727e-06,
      "loss": 2.2909,
      "step": 39997
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0703125,
      "learning_rate": 9.445469310632166e-06,
      "loss": 2.4157,
      "step": 39998
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0462863445281982,
      "learning_rate": 9.445058207883683e-06,
      "loss": 2.1663,
      "step": 39999
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0559029579162598,
      "learning_rate": 9.444647106075977e-06,
      "loss": 2.4755,
      "step": 40000
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0295906066894531,
      "learning_rate": 9.444236005209746e-06,
      "loss": 2.1343,
      "step": 40001
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.9871832132339478,
      "learning_rate": 9.443824905285688e-06,
      "loss": 2.3663,
      "step": 40002
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0404562950134277,
      "learning_rate": 9.443413806304495e-06,
      "loss": 2.4209,
      "step": 40003
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0607588291168213,
      "learning_rate": 9.443002708266868e-06,
      "loss": 2.2124,
      "step": 40004
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1401201486587524,
      "learning_rate": 9.442591611173501e-06,
      "loss": 2.3803,
      "step": 40005
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1483752727508545,
      "learning_rate": 9.442180515025095e-06,
      "loss": 2.3828,
      "step": 40006
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0732543468475342,
      "learning_rate": 9.441769419822343e-06,
      "loss": 2.2798,
      "step": 40007
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0931038856506348,
      "learning_rate": 9.441358325565944e-06,
      "loss": 2.4442,
      "step": 40008
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.103395700454712,
      "learning_rate": 9.440947232256598e-06,
      "loss": 2.3394,
      "step": 40009
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1166070699691772,
      "learning_rate": 9.440536139894995e-06,
      "loss": 2.3927,
      "step": 40010
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1529496908187866,
      "learning_rate": 9.440125048481835e-06,
      "loss": 2.5728,
      "step": 40011
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.2250694036483765,
      "learning_rate": 9.439713958017814e-06,
      "loss": 2.1334,
      "step": 40012
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0950039625167847,
      "learning_rate": 9.43930286850363e-06,
      "loss": 2.3116,
      "step": 40013
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.2222319841384888,
      "learning_rate": 9.43889177993998e-06,
      "loss": 2.2102,
      "step": 40014
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.071887731552124,
      "learning_rate": 9.438480692327561e-06,
      "loss": 2.3278,
      "step": 40015
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0679826736450195,
      "learning_rate": 9.43806960566707e-06,
      "loss": 2.1601,
      "step": 40016
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0542926788330078,
      "learning_rate": 9.437658519959204e-06,
      "loss": 2.4277,
      "step": 40017
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0866976976394653,
      "learning_rate": 9.43724743520466e-06,
      "loss": 2.4091,
      "step": 40018
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.2143012285232544,
      "learning_rate": 9.436836351404133e-06,
      "loss": 2.233,
      "step": 40019
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.2024691104888916,
      "learning_rate": 9.436425268558323e-06,
      "loss": 2.2485,
      "step": 40020
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0866007804870605,
      "learning_rate": 9.436014186667924e-06,
      "loss": 2.4768,
      "step": 40021
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0909478664398193,
      "learning_rate": 9.435603105733639e-06,
      "loss": 2.6548,
      "step": 40022
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1449323892593384,
      "learning_rate": 9.435192025756155e-06,
      "loss": 2.2053,
      "step": 40023
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1183574199676514,
      "learning_rate": 9.434780946736174e-06,
      "loss": 2.336,
      "step": 40024
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.123464584350586,
      "learning_rate": 9.434369868674393e-06,
      "loss": 2.3452,
      "step": 40025
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.9972379803657532,
      "learning_rate": 9.433958791571508e-06,
      "loss": 2.3317,
      "step": 40026
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0227710008621216,
      "learning_rate": 9.433547715428219e-06,
      "loss": 2.1294,
      "step": 40027
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1133062839508057,
      "learning_rate": 9.433136640245218e-06,
      "loss": 2.4852,
      "step": 40028
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1283084154129028,
      "learning_rate": 9.432725566023205e-06,
      "loss": 2.2891,
      "step": 40029
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0735217332839966,
      "learning_rate": 9.432314492762878e-06,
      "loss": 2.2674,
      "step": 40030
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.011094570159912,
      "learning_rate": 9.431903420464931e-06,
      "loss": 2.1301,
      "step": 40031
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0134276151657104,
      "learning_rate": 9.431492349130061e-06,
      "loss": 2.5166,
      "step": 40032
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0136401653289795,
      "learning_rate": 9.431081278758966e-06,
      "loss": 2.2911,
      "step": 40033
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0858826637268066,
      "learning_rate": 9.430670209352344e-06,
      "loss": 2.3825,
      "step": 40034
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.240716814994812,
      "learning_rate": 9.430259140910893e-06,
      "loss": 2.2471,
      "step": 40035
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.034910798072815,
      "learning_rate": 9.429848073435306e-06,
      "loss": 2.2855,
      "step": 40036
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0683199167251587,
      "learning_rate": 9.42943700692628e-06,
      "loss": 2.3875,
      "step": 40037
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0150362253189087,
      "learning_rate": 9.429025941384512e-06,
      "loss": 2.2503,
      "step": 40038
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.2903631925582886,
      "learning_rate": 9.4286148768107e-06,
      "loss": 2.4214,
      "step": 40039
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0072379112243652,
      "learning_rate": 9.428203813205544e-06,
      "loss": 2.3188,
      "step": 40040
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.090194582939148,
      "learning_rate": 9.427792750569735e-06,
      "loss": 2.3624,
      "step": 40041
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0950989723205566,
      "learning_rate": 9.427381688903973e-06,
      "loss": 2.2273,
      "step": 40042
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.9942811727523804,
      "learning_rate": 9.426970628208956e-06,
      "loss": 2.3676,
      "step": 40043
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.102571964263916,
      "learning_rate": 9.426559568485378e-06,
      "loss": 2.452,
      "step": 40044
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.039435863494873,
      "learning_rate": 9.426148509733937e-06,
      "loss": 2.3407,
      "step": 40045
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.9941074252128601,
      "learning_rate": 9.425737451955332e-06,
      "loss": 2.4871,
      "step": 40046
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.3354876041412354,
      "learning_rate": 9.425326395150256e-06,
      "loss": 2.4589,
      "step": 40047
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.007176160812378,
      "learning_rate": 9.424915339319414e-06,
      "loss": 2.2783,
      "step": 40048
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1247135400772095,
      "learning_rate": 9.424504284463491e-06,
      "loss": 2.4257,
      "step": 40049
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.9956098198890686,
      "learning_rate": 9.42409323058319e-06,
      "loss": 2.3542,
      "step": 40050
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.2371903657913208,
      "learning_rate": 9.423682177679208e-06,
      "loss": 2.295,
      "step": 40051
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.8290570974349976,
      "learning_rate": 9.42327112575224e-06,
      "loss": 2.1641,
      "step": 40052
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0038444995880127,
      "learning_rate": 9.422860074802984e-06,
      "loss": 2.3774,
      "step": 40053
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0311199426651,
      "learning_rate": 9.422449024832138e-06,
      "loss": 2.3947,
      "step": 40054
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.055274248123169,
      "learning_rate": 9.422037975840397e-06,
      "loss": 2.3407,
      "step": 40055
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1554123163223267,
      "learning_rate": 9.42162692782846e-06,
      "loss": 2.1828,
      "step": 40056
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0142812728881836,
      "learning_rate": 9.42121588079702e-06,
      "loss": 2.4708,
      "step": 40057
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.040029764175415,
      "learning_rate": 9.42080483474678e-06,
      "loss": 2.0871,
      "step": 40058
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0496448278427124,
      "learning_rate": 9.420393789678432e-06,
      "loss": 2.3895,
      "step": 40059
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0549595355987549,
      "learning_rate": 9.419982745592671e-06,
      "loss": 2.4916,
      "step": 40060
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.2381280660629272,
      "learning_rate": 9.419571702490201e-06,
      "loss": 2.1632,
      "step": 40061
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1857693195343018,
      "learning_rate": 9.419160660371712e-06,
      "loss": 2.278,
      "step": 40062
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.11781907081604,
      "learning_rate": 9.418749619237904e-06,
      "loss": 2.2142,
      "step": 40063
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1203320026397705,
      "learning_rate": 9.418338579089475e-06,
      "loss": 2.3769,
      "step": 40064
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0489892959594727,
      "learning_rate": 9.41792753992712e-06,
      "loss": 2.5322,
      "step": 40065
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.147019386291504,
      "learning_rate": 9.417516501751531e-06,
      "loss": 2.2388,
      "step": 40066
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1419402360916138,
      "learning_rate": 9.417105464563413e-06,
      "loss": 2.2833,
      "step": 40067
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1647752523422241,
      "learning_rate": 9.41669442836346e-06,
      "loss": 2.1472,
      "step": 40068
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0691289901733398,
      "learning_rate": 9.416283393152367e-06,
      "loss": 2.4941,
      "step": 40069
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0932153463363647,
      "learning_rate": 9.415872358930832e-06,
      "loss": 2.3523,
      "step": 40070
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.2449588775634766,
      "learning_rate": 9.415461325699554e-06,
      "loss": 2.2269,
      "step": 40071
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.9932246804237366,
      "learning_rate": 9.415050293459226e-06,
      "loss": 2.2591,
      "step": 40072
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0455485582351685,
      "learning_rate": 9.414639262210546e-06,
      "loss": 2.1946,
      "step": 40073
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.154061198234558,
      "learning_rate": 9.414228231954218e-06,
      "loss": 2.2641,
      "step": 40074
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0183521509170532,
      "learning_rate": 9.413817202690926e-06,
      "loss": 2.2865,
      "step": 40075
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0597028732299805,
      "learning_rate": 9.413406174421374e-06,
      "loss": 2.2127,
      "step": 40076
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0644093751907349,
      "learning_rate": 9.412995147146258e-06,
      "loss": 2.4578,
      "step": 40077
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1262860298156738,
      "learning_rate": 9.412584120866274e-06,
      "loss": 2.2136,
      "step": 40078
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1886650323867798,
      "learning_rate": 9.412173095582119e-06,
      "loss": 2.3997,
      "step": 40079
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0935308933258057,
      "learning_rate": 9.411762071294492e-06,
      "loss": 2.3948,
      "step": 40080
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0557637214660645,
      "learning_rate": 9.411351048004087e-06,
      "loss": 2.3482,
      "step": 40081
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.2295414209365845,
      "learning_rate": 9.410940025711601e-06,
      "loss": 2.4253,
      "step": 40082
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0572246313095093,
      "learning_rate": 9.410529004417733e-06,
      "loss": 2.5821,
      "step": 40083
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.187341332435608,
      "learning_rate": 9.410117984123178e-06,
      "loss": 2.3732,
      "step": 40084
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0640674829483032,
      "learning_rate": 9.409706964828636e-06,
      "loss": 2.3766,
      "step": 40085
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0377187728881836,
      "learning_rate": 9.409295946534798e-06,
      "loss": 2.2301,
      "step": 40086
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1739495992660522,
      "learning_rate": 9.408884929242364e-06,
      "loss": 2.2541,
      "step": 40087
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1298909187316895,
      "learning_rate": 9.408473912952032e-06,
      "loss": 2.3212,
      "step": 40088
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0905749797821045,
      "learning_rate": 9.408062897664496e-06,
      "loss": 2.3629,
      "step": 40089
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1011408567428589,
      "learning_rate": 9.407651883380455e-06,
      "loss": 2.321,
      "step": 40090
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.148804783821106,
      "learning_rate": 9.407240870100604e-06,
      "loss": 2.0041,
      "step": 40091
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1183348894119263,
      "learning_rate": 9.406829857825642e-06,
      "loss": 2.4277,
      "step": 40092
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.9903051257133484,
      "learning_rate": 9.406418846556264e-06,
      "loss": 2.3229,
      "step": 40093
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1051151752471924,
      "learning_rate": 9.406007836293166e-06,
      "loss": 2.2161,
      "step": 40094
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0955110788345337,
      "learning_rate": 9.405596827037046e-06,
      "loss": 2.5297,
      "step": 40095
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0284459590911865,
      "learning_rate": 9.405185818788603e-06,
      "loss": 2.4864,
      "step": 40096
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1051216125488281,
      "learning_rate": 9.404774811548529e-06,
      "loss": 2.4974,
      "step": 40097
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1431291103363037,
      "learning_rate": 9.404363805317525e-06,
      "loss": 2.2794,
      "step": 40098
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.9612247943878174,
      "learning_rate": 9.403952800096284e-06,
      "loss": 2.1552,
      "step": 40099
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1686488389968872,
      "learning_rate": 9.403541795885508e-06,
      "loss": 2.2479,
      "step": 40100
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1328120231628418,
      "learning_rate": 9.403130792685892e-06,
      "loss": 2.3475,
      "step": 40101
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0697576999664307,
      "learning_rate": 9.402719790498129e-06,
      "loss": 2.1157,
      "step": 40102
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0257397890090942,
      "learning_rate": 9.402308789322917e-06,
      "loss": 2.3959,
      "step": 40103
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0792248249053955,
      "learning_rate": 9.401897789160955e-06,
      "loss": 2.4808,
      "step": 40104
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0774853229522705,
      "learning_rate": 9.401486790012939e-06,
      "loss": 2.3141,
      "step": 40105
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0809065103530884,
      "learning_rate": 9.401075791879563e-06,
      "loss": 2.257,
      "step": 40106
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0609664916992188,
      "learning_rate": 9.400664794761529e-06,
      "loss": 2.1773,
      "step": 40107
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.024694800376892,
      "learning_rate": 9.40025379865953e-06,
      "loss": 2.2699,
      "step": 40108
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1096577644348145,
      "learning_rate": 9.399842803574263e-06,
      "loss": 2.2704,
      "step": 40109
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0860636234283447,
      "learning_rate": 9.399431809506426e-06,
      "loss": 2.4089,
      "step": 40110
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0772026777267456,
      "learning_rate": 9.399020816456715e-06,
      "loss": 2.1012,
      "step": 40111
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1261060237884521,
      "learning_rate": 9.398609824425827e-06,
      "loss": 2.3829,
      "step": 40112
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1026674509048462,
      "learning_rate": 9.398198833414462e-06,
      "loss": 2.4329,
      "step": 40113
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0890552997589111,
      "learning_rate": 9.397787843423312e-06,
      "loss": 2.3111,
      "step": 40114
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.2190179824829102,
      "learning_rate": 9.397376854453073e-06,
      "loss": 2.4677,
      "step": 40115
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0951621532440186,
      "learning_rate": 9.396965866504443e-06,
      "loss": 2.4654,
      "step": 40116
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0708985328674316,
      "learning_rate": 9.396554879578123e-06,
      "loss": 2.2879,
      "step": 40117
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.9360520839691162,
      "learning_rate": 9.396143893674804e-06,
      "loss": 2.3254,
      "step": 40118
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.092793583869934,
      "learning_rate": 9.395732908795184e-06,
      "loss": 2.4045,
      "step": 40119
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1072478294372559,
      "learning_rate": 9.395321924939965e-06,
      "loss": 2.4262,
      "step": 40120
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1829382181167603,
      "learning_rate": 9.394910942109837e-06,
      "loss": 2.5682,
      "step": 40121
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.2029577493667603,
      "learning_rate": 9.394499960305498e-06,
      "loss": 2.2444,
      "step": 40122
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0370060205459595,
      "learning_rate": 9.394088979527648e-06,
      "loss": 2.71,
      "step": 40123
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.035681128501892,
      "learning_rate": 9.39367799977698e-06,
      "loss": 2.2119,
      "step": 40124
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1376733779907227,
      "learning_rate": 9.393267021054195e-06,
      "loss": 2.4549,
      "step": 40125
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.161690592765808,
      "learning_rate": 9.392856043359985e-06,
      "loss": 2.3316,
      "step": 40126
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0794637203216553,
      "learning_rate": 9.392445066695054e-06,
      "loss": 2.4391,
      "step": 40127
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.189597249031067,
      "learning_rate": 9.392034091060088e-06,
      "loss": 2.3917,
      "step": 40128
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.9786728620529175,
      "learning_rate": 9.39162311645579e-06,
      "loss": 2.3784,
      "step": 40129
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0866798162460327,
      "learning_rate": 9.391212142882858e-06,
      "loss": 2.3651,
      "step": 40130
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.9246422648429871,
      "learning_rate": 9.390801170341984e-06,
      "loss": 2.4512,
      "step": 40131
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.3581098318099976,
      "learning_rate": 9.39039019883387e-06,
      "loss": 2.5242,
      "step": 40132
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0827680826187134,
      "learning_rate": 9.389979228359208e-06,
      "loss": 2.38,
      "step": 40133
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.2513859272003174,
      "learning_rate": 9.389568258918698e-06,
      "loss": 2.4605,
      "step": 40134
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.010537028312683,
      "learning_rate": 9.389157290513035e-06,
      "loss": 2.2526,
      "step": 40135
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0860899686813354,
      "learning_rate": 9.388746323142916e-06,
      "loss": 2.3895,
      "step": 40136
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0145632028579712,
      "learning_rate": 9.388335356809038e-06,
      "loss": 2.2372,
      "step": 40137
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1108546257019043,
      "learning_rate": 9.387924391512099e-06,
      "loss": 2.5195,
      "step": 40138
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1269350051879883,
      "learning_rate": 9.387513427252794e-06,
      "loss": 2.5172,
      "step": 40139
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.2361794710159302,
      "learning_rate": 9.387102464031824e-06,
      "loss": 2.4335,
      "step": 40140
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1672853231430054,
      "learning_rate": 9.386691501849877e-06,
      "loss": 2.3632,
      "step": 40141
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0473653078079224,
      "learning_rate": 9.386280540707655e-06,
      "loss": 2.3998,
      "step": 40142
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0204105377197266,
      "learning_rate": 9.385869580605855e-06,
      "loss": 2.3002,
      "step": 40143
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0988001823425293,
      "learning_rate": 9.385458621545172e-06,
      "loss": 2.4632,
      "step": 40144
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.02018141746521,
      "learning_rate": 9.385047663526303e-06,
      "loss": 2.2076,
      "step": 40145
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1040375232696533,
      "learning_rate": 9.384636706549946e-06,
      "loss": 2.5327,
      "step": 40146
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.9986619353294373,
      "learning_rate": 9.384225750616797e-06,
      "loss": 2.6137,
      "step": 40147
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1075551509857178,
      "learning_rate": 9.383814795727555e-06,
      "loss": 2.4386,
      "step": 40148
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.17604660987854,
      "learning_rate": 9.383403841882911e-06,
      "loss": 2.0396,
      "step": 40149
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1953803300857544,
      "learning_rate": 9.382992889083565e-06,
      "loss": 2.3421,
      "step": 40150
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.973329484462738,
      "learning_rate": 9.382581937330216e-06,
      "loss": 2.6049,
      "step": 40151
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1325640678405762,
      "learning_rate": 9.382170986623556e-06,
      "loss": 2.3035,
      "step": 40152
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0718663930892944,
      "learning_rate": 9.381760036964283e-06,
      "loss": 2.3552,
      "step": 40153
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0109081268310547,
      "learning_rate": 9.3813490883531e-06,
      "loss": 2.2808,
      "step": 40154
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0281264781951904,
      "learning_rate": 9.380938140790697e-06,
      "loss": 2.1844,
      "step": 40155
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0277880430221558,
      "learning_rate": 9.380527194277769e-06,
      "loss": 2.326,
      "step": 40156
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.000488519668579,
      "learning_rate": 9.380116248815016e-06,
      "loss": 2.345,
      "step": 40157
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1395549774169922,
      "learning_rate": 9.379705304403134e-06,
      "loss": 2.3216,
      "step": 40158
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.167952060699463,
      "learning_rate": 9.37929436104282e-06,
      "loss": 2.3556,
      "step": 40159
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0435798168182373,
      "learning_rate": 9.37888341873477e-06,
      "loss": 2.434,
      "step": 40160
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0359363555908203,
      "learning_rate": 9.378472477479682e-06,
      "loss": 2.5017,
      "step": 40161
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1055999994277954,
      "learning_rate": 9.378061537278254e-06,
      "loss": 2.4153,
      "step": 40162
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1828795671463013,
      "learning_rate": 9.377650598131179e-06,
      "loss": 2.3188,
      "step": 40163
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1622295379638672,
      "learning_rate": 9.377239660039154e-06,
      "loss": 2.4083,
      "step": 40164
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1409313678741455,
      "learning_rate": 9.376828723002878e-06,
      "loss": 2.3134,
      "step": 40165
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0415794849395752,
      "learning_rate": 9.376417787023047e-06,
      "loss": 2.1668,
      "step": 40166
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1112252473831177,
      "learning_rate": 9.376006852100362e-06,
      "loss": 2.3018,
      "step": 40167
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0586796998977661,
      "learning_rate": 9.37559591823551e-06,
      "loss": 2.3549,
      "step": 40168
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.371247410774231,
      "learning_rate": 9.37518498542919e-06,
      "loss": 2.3395,
      "step": 40169
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.071465253829956,
      "learning_rate": 9.374774053682105e-06,
      "loss": 2.4266,
      "step": 40170
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0834660530090332,
      "learning_rate": 9.374363122994945e-06,
      "loss": 2.3066,
      "step": 40171
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1679917573928833,
      "learning_rate": 9.373952193368412e-06,
      "loss": 2.3283,
      "step": 40172
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1657743453979492,
      "learning_rate": 9.373541264803197e-06,
      "loss": 2.3641,
      "step": 40173
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.037415623664856,
      "learning_rate": 9.373130337300002e-06,
      "loss": 2.2681,
      "step": 40174
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.9916051030158997,
      "learning_rate": 9.37271941085952e-06,
      "loss": 2.3233,
      "step": 40175
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0080540180206299,
      "learning_rate": 9.372308485482453e-06,
      "loss": 2.2628,
      "step": 40176
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1121717691421509,
      "learning_rate": 9.37189756116949e-06,
      "loss": 2.3895,
      "step": 40177
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1745773553848267,
      "learning_rate": 9.371486637921333e-06,
      "loss": 2.6448,
      "step": 40178
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.6375598907470703,
      "learning_rate": 9.371075715738674e-06,
      "loss": 2.3287,
      "step": 40179
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0973817110061646,
      "learning_rate": 9.370664794622216e-06,
      "loss": 2.3599,
      "step": 40180
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1528687477111816,
      "learning_rate": 9.37025387457265e-06,
      "loss": 2.127,
      "step": 40181
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0157368183135986,
      "learning_rate": 9.369842955590676e-06,
      "loss": 2.3515,
      "step": 40182
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0516855716705322,
      "learning_rate": 9.369432037676989e-06,
      "loss": 2.3053,
      "step": 40183
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1771104335784912,
      "learning_rate": 9.369021120832284e-06,
      "loss": 2.3753,
      "step": 40184
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.145216703414917,
      "learning_rate": 9.36861020505726e-06,
      "loss": 2.4476,
      "step": 40185
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.051443099975586,
      "learning_rate": 9.368199290352615e-06,
      "loss": 2.241,
      "step": 40186
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0694866180419922,
      "learning_rate": 9.367788376719041e-06,
      "loss": 2.2857,
      "step": 40187
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0788758993148804,
      "learning_rate": 9.36737746415724e-06,
      "loss": 2.2123,
      "step": 40188
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.383925199508667,
      "learning_rate": 9.366966552667904e-06,
      "loss": 2.2425,
      "step": 40189
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.2793920040130615,
      "learning_rate": 9.366555642251734e-06,
      "loss": 2.2265,
      "step": 40190
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0744167566299438,
      "learning_rate": 9.366144732909422e-06,
      "loss": 2.6402,
      "step": 40191
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.249833106994629,
      "learning_rate": 9.365733824641668e-06,
      "loss": 2.4336,
      "step": 40192
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1512619256973267,
      "learning_rate": 9.365322917449171e-06,
      "loss": 2.3634,
      "step": 40193
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.039504885673523,
      "learning_rate": 9.36491201133262e-06,
      "loss": 2.482,
      "step": 40194
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.9901043176651001,
      "learning_rate": 9.364501106292715e-06,
      "loss": 2.1506,
      "step": 40195
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0180418491363525,
      "learning_rate": 9.364090202330154e-06,
      "loss": 2.5124,
      "step": 40196
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0546046495437622,
      "learning_rate": 9.363679299445633e-06,
      "loss": 2.2826,
      "step": 40197
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1124839782714844,
      "learning_rate": 9.363268397639847e-06,
      "loss": 2.3854,
      "step": 40198
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0260869264602661,
      "learning_rate": 9.362857496913495e-06,
      "loss": 2.2301,
      "step": 40199
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0457249879837036,
      "learning_rate": 9.362446597267274e-06,
      "loss": 2.4274,
      "step": 40200
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1134905815124512,
      "learning_rate": 9.362035698701878e-06,
      "loss": 2.2508,
      "step": 40201
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.9580283164978027,
      "learning_rate": 9.361624801218005e-06,
      "loss": 2.377,
      "step": 40202
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.228519082069397,
      "learning_rate": 9.361213904816352e-06,
      "loss": 2.5418,
      "step": 40203
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.147679090499878,
      "learning_rate": 9.360803009497615e-06,
      "loss": 2.3043,
      "step": 40204
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0438270568847656,
      "learning_rate": 9.36039211526249e-06,
      "loss": 2.3928,
      "step": 40205
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.061931848526001,
      "learning_rate": 9.359981222111675e-06,
      "loss": 2.3734,
      "step": 40206
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1094543933868408,
      "learning_rate": 9.359570330045863e-06,
      "loss": 2.4983,
      "step": 40207
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.9617084860801697,
      "learning_rate": 9.359159439065755e-06,
      "loss": 2.2581,
      "step": 40208
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0926793813705444,
      "learning_rate": 9.358748549172045e-06,
      "loss": 2.4539,
      "step": 40209
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0947750806808472,
      "learning_rate": 9.358337660365432e-06,
      "loss": 2.2047,
      "step": 40210
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.9995629191398621,
      "learning_rate": 9.357926772646609e-06,
      "loss": 2.2104,
      "step": 40211
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0358070135116577,
      "learning_rate": 9.357515886016276e-06,
      "loss": 2.3734,
      "step": 40212
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.065817952156067,
      "learning_rate": 9.357105000475125e-06,
      "loss": 2.2905,
      "step": 40213
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0485888719558716,
      "learning_rate": 9.35669411602386e-06,
      "loss": 2.258,
      "step": 40214
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0433331727981567,
      "learning_rate": 9.35628323266317e-06,
      "loss": 2.3364,
      "step": 40215
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1917623281478882,
      "learning_rate": 9.355872350393755e-06,
      "loss": 2.1424,
      "step": 40216
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.038719892501831,
      "learning_rate": 9.355461469216312e-06,
      "loss": 2.5128,
      "step": 40217
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0677101612091064,
      "learning_rate": 9.355050589131538e-06,
      "loss": 2.5109,
      "step": 40218
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.997811496257782,
      "learning_rate": 9.354639710140131e-06,
      "loss": 2.3743,
      "step": 40219
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0144463777542114,
      "learning_rate": 9.35422883224278e-06,
      "loss": 2.5879,
      "step": 40220
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0342005491256714,
      "learning_rate": 9.353817955440188e-06,
      "loss": 2.2987,
      "step": 40221
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.26823353767395,
      "learning_rate": 9.353407079733051e-06,
      "loss": 2.3382,
      "step": 40222
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0883697271347046,
      "learning_rate": 9.352996205122063e-06,
      "loss": 2.5686,
      "step": 40223
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1817964315414429,
      "learning_rate": 9.352585331607923e-06,
      "loss": 2.3015,
      "step": 40224
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0398902893066406,
      "learning_rate": 9.352174459191325e-06,
      "loss": 2.3685,
      "step": 40225
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1207178831100464,
      "learning_rate": 9.35176358787297e-06,
      "loss": 2.2141,
      "step": 40226
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0911900997161865,
      "learning_rate": 9.351352717653549e-06,
      "loss": 2.1385,
      "step": 40227
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1141566038131714,
      "learning_rate": 9.350941848533764e-06,
      "loss": 2.3246,
      "step": 40228
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0709589719772339,
      "learning_rate": 9.350530980514306e-06,
      "loss": 2.3092,
      "step": 40229
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0927374362945557,
      "learning_rate": 9.350120113595877e-06,
      "loss": 2.5128,
      "step": 40230
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1567984819412231,
      "learning_rate": 9.349709247779171e-06,
      "loss": 2.3549,
      "step": 40231
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0707670450210571,
      "learning_rate": 9.349298383064884e-06,
      "loss": 2.3197,
      "step": 40232
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1776598691940308,
      "learning_rate": 9.348887519453713e-06,
      "loss": 2.2375,
      "step": 40233
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.076666235923767,
      "learning_rate": 9.348476656946353e-06,
      "loss": 2.2866,
      "step": 40234
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.162619948387146,
      "learning_rate": 9.348065795543504e-06,
      "loss": 2.3741,
      "step": 40235
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1501225233078003,
      "learning_rate": 9.34765493524586e-06,
      "loss": 2.1492,
      "step": 40236
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0548875331878662,
      "learning_rate": 9.347244076054116e-06,
      "loss": 2.4594,
      "step": 40237
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.2415001392364502,
      "learning_rate": 9.346833217968974e-06,
      "loss": 2.3786,
      "step": 40238
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0113930702209473,
      "learning_rate": 9.346422360991125e-06,
      "loss": 2.2906,
      "step": 40239
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1055474281311035,
      "learning_rate": 9.346011505121268e-06,
      "loss": 2.4719,
      "step": 40240
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0790148973464966,
      "learning_rate": 9.345600650360099e-06,
      "loss": 2.2114,
      "step": 40241
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1368623971939087,
      "learning_rate": 9.345189796708312e-06,
      "loss": 2.3241,
      "step": 40242
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.035664439201355,
      "learning_rate": 9.344778944166609e-06,
      "loss": 2.3481,
      "step": 40243
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0238921642303467,
      "learning_rate": 9.344368092735683e-06,
      "loss": 2.3293,
      "step": 40244
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1050931215286255,
      "learning_rate": 9.343957242416231e-06,
      "loss": 2.4467,
      "step": 40245
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1826509237289429,
      "learning_rate": 9.343546393208956e-06,
      "loss": 2.201,
      "step": 40246
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.106525182723999,
      "learning_rate": 9.343135545114541e-06,
      "loss": 2.4618,
      "step": 40247
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.9943562746047974,
      "learning_rate": 9.34272469813369e-06,
      "loss": 2.2002,
      "step": 40248
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1282495260238647,
      "learning_rate": 9.3423138522671e-06,
      "loss": 2.4515,
      "step": 40249
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0368274450302124,
      "learning_rate": 9.341903007515467e-06,
      "loss": 2.4204,
      "step": 40250
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.9851095080375671,
      "learning_rate": 9.341492163879486e-06,
      "loss": 2.3081,
      "step": 40251
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.172687292098999,
      "learning_rate": 9.341081321359856e-06,
      "loss": 2.2474,
      "step": 40252
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.027693510055542,
      "learning_rate": 9.34067047995727e-06,
      "loss": 2.3506,
      "step": 40253
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0578866004943848,
      "learning_rate": 9.340259639672428e-06,
      "loss": 2.3716,
      "step": 40254
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0888316631317139,
      "learning_rate": 9.339848800506027e-06,
      "loss": 2.163,
      "step": 40255
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0251814126968384,
      "learning_rate": 9.339437962458759e-06,
      "loss": 2.4297,
      "step": 40256
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.050700068473816,
      "learning_rate": 9.339027125531323e-06,
      "loss": 2.2608,
      "step": 40257
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0409787893295288,
      "learning_rate": 9.338616289724416e-06,
      "loss": 2.2523,
      "step": 40258
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1497308015823364,
      "learning_rate": 9.338205455038739e-06,
      "loss": 2.3795,
      "step": 40259
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0257906913757324,
      "learning_rate": 9.337794621474978e-06,
      "loss": 2.3074,
      "step": 40260
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0794188976287842,
      "learning_rate": 9.337383789033837e-06,
      "loss": 2.3065,
      "step": 40261
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1344311237335205,
      "learning_rate": 9.336972957716009e-06,
      "loss": 2.2966,
      "step": 40262
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0584001541137695,
      "learning_rate": 9.33656212752219e-06,
      "loss": 2.3112,
      "step": 40263
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.172615885734558,
      "learning_rate": 9.336151298453082e-06,
      "loss": 2.3746,
      "step": 40264
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0814763307571411,
      "learning_rate": 9.335740470509376e-06,
      "loss": 2.2661,
      "step": 40265
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0698658227920532,
      "learning_rate": 9.335329643691773e-06,
      "loss": 2.3476,
      "step": 40266
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0056419372558594,
      "learning_rate": 9.334918818000964e-06,
      "loss": 2.2713,
      "step": 40267
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.2465182542800903,
      "learning_rate": 9.334507993437648e-06,
      "loss": 2.3865,
      "step": 40268
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.2653065919876099,
      "learning_rate": 9.334097170002522e-06,
      "loss": 2.4706,
      "step": 40269
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.9717122912406921,
      "learning_rate": 9.333686347696283e-06,
      "loss": 2.316,
      "step": 40270
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0394067764282227,
      "learning_rate": 9.333275526519626e-06,
      "loss": 2.222,
      "step": 40271
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0387053489685059,
      "learning_rate": 9.332864706473252e-06,
      "loss": 2.4879,
      "step": 40272
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0292543172836304,
      "learning_rate": 9.33245388755785e-06,
      "loss": 2.2873,
      "step": 40273
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1490694284439087,
      "learning_rate": 9.332043069774121e-06,
      "loss": 2.242,
      "step": 40274
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.9844443202018738,
      "learning_rate": 9.331632253122757e-06,
      "loss": 2.2598,
      "step": 40275
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0734565258026123,
      "learning_rate": 9.33122143760446e-06,
      "loss": 2.1427,
      "step": 40276
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1435824632644653,
      "learning_rate": 9.330810623219925e-06,
      "loss": 2.415,
      "step": 40277
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0542337894439697,
      "learning_rate": 9.330399809969847e-06,
      "loss": 2.4656,
      "step": 40278
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.2196658849716187,
      "learning_rate": 9.329988997854923e-06,
      "loss": 2.2508,
      "step": 40279
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.2327165603637695,
      "learning_rate": 9.329578186875849e-06,
      "loss": 2.5853,
      "step": 40280
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.9980885982513428,
      "learning_rate": 9.329167377033323e-06,
      "loss": 2.2421,
      "step": 40281
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.2499971389770508,
      "learning_rate": 9.328756568328041e-06,
      "loss": 2.3914,
      "step": 40282
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0612845420837402,
      "learning_rate": 9.328345760760698e-06,
      "loss": 2.365,
      "step": 40283
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.9805936813354492,
      "learning_rate": 9.327934954331992e-06,
      "loss": 2.2241,
      "step": 40284
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.043237328529358,
      "learning_rate": 9.327524149042622e-06,
      "loss": 2.2552,
      "step": 40285
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.3143339157104492,
      "learning_rate": 9.327113344893278e-06,
      "loss": 2.2205,
      "step": 40286
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1002081632614136,
      "learning_rate": 9.32670254188466e-06,
      "loss": 2.5151,
      "step": 40287
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0720210075378418,
      "learning_rate": 9.32629174001746e-06,
      "loss": 2.1044,
      "step": 40288
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.14889657497406,
      "learning_rate": 9.325880939292383e-06,
      "loss": 2.4579,
      "step": 40289
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.2063380479812622,
      "learning_rate": 9.32547013971012e-06,
      "loss": 2.3621,
      "step": 40290
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0156166553497314,
      "learning_rate": 9.325059341271368e-06,
      "loss": 2.3632,
      "step": 40291
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0936998128890991,
      "learning_rate": 9.324648543976824e-06,
      "loss": 2.3791,
      "step": 40292
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.137853980064392,
      "learning_rate": 9.324237747827182e-06,
      "loss": 2.4308,
      "step": 40293
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0038683414459229,
      "learning_rate": 9.323826952823146e-06,
      "loss": 2.2839,
      "step": 40294
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1694475412368774,
      "learning_rate": 9.323416158965404e-06,
      "loss": 2.2137,
      "step": 40295
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1403077840805054,
      "learning_rate": 9.323005366254653e-06,
      "loss": 2.3564,
      "step": 40296
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.9984341859817505,
      "learning_rate": 9.322594574691595e-06,
      "loss": 2.4131,
      "step": 40297
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0301775932312012,
      "learning_rate": 9.322183784276922e-06,
      "loss": 2.3184,
      "step": 40298
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.182727575302124,
      "learning_rate": 9.321772995011332e-06,
      "loss": 2.3462,
      "step": 40299
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.050339698791504,
      "learning_rate": 9.321362206895522e-06,
      "loss": 2.413,
      "step": 40300
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1044219732284546,
      "learning_rate": 9.320951419930187e-06,
      "loss": 2.227,
      "step": 40301
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.9996779561042786,
      "learning_rate": 9.320540634116021e-06,
      "loss": 2.3609,
      "step": 40302
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1369709968566895,
      "learning_rate": 9.320129849453725e-06,
      "loss": 2.5954,
      "step": 40303
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.270282506942749,
      "learning_rate": 9.319719065943994e-06,
      "loss": 2.2939,
      "step": 40304
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0248095989227295,
      "learning_rate": 9.319308283587521e-06,
      "loss": 2.4456,
      "step": 40305
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0596778392791748,
      "learning_rate": 9.318897502385009e-06,
      "loss": 2.5213,
      "step": 40306
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.986176609992981,
      "learning_rate": 9.318486722337148e-06,
      "loss": 2.3251,
      "step": 40307
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0739741325378418,
      "learning_rate": 9.318075943444638e-06,
      "loss": 2.2345,
      "step": 40308
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0498981475830078,
      "learning_rate": 9.317665165708176e-06,
      "loss": 2.5402,
      "step": 40309
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0238571166992188,
      "learning_rate": 9.317254389128454e-06,
      "loss": 2.2604,
      "step": 40310
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0217649936676025,
      "learning_rate": 9.316843613706173e-06,
      "loss": 2.2493,
      "step": 40311
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.157833218574524,
      "learning_rate": 9.316432839442031e-06,
      "loss": 2.5217,
      "step": 40312
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0207958221435547,
      "learning_rate": 9.316022066336719e-06,
      "loss": 2.3822,
      "step": 40313
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0982325077056885,
      "learning_rate": 9.315611294390932e-06,
      "loss": 2.5112,
      "step": 40314
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.069067120552063,
      "learning_rate": 9.315200523605372e-06,
      "loss": 2.1737,
      "step": 40315
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1437873840332031,
      "learning_rate": 9.314789753980733e-06,
      "loss": 2.478,
      "step": 40316
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0255571603775024,
      "learning_rate": 9.31437898551771e-06,
      "loss": 2.2974,
      "step": 40317
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.126511812210083,
      "learning_rate": 9.313968218217002e-06,
      "loss": 2.2464,
      "step": 40318
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.005856990814209,
      "learning_rate": 9.313557452079305e-06,
      "loss": 2.3231,
      "step": 40319
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.102981686592102,
      "learning_rate": 9.313146687105314e-06,
      "loss": 2.2998,
      "step": 40320
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.059186577796936,
      "learning_rate": 9.312735923295725e-06,
      "loss": 2.2906,
      "step": 40321
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.2352138757705688,
      "learning_rate": 9.312325160651237e-06,
      "loss": 2.2465,
      "step": 40322
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1526674032211304,
      "learning_rate": 9.311914399172543e-06,
      "loss": 2.2327,
      "step": 40323
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0343902111053467,
      "learning_rate": 9.311503638860342e-06,
      "loss": 2.4336,
      "step": 40324
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0022681951522827,
      "learning_rate": 9.311092879715332e-06,
      "loss": 2.5006,
      "step": 40325
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.066489577293396,
      "learning_rate": 9.310682121738202e-06,
      "loss": 2.2684,
      "step": 40326
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.037600040435791,
      "learning_rate": 9.310271364929655e-06,
      "loss": 2.2957,
      "step": 40327
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0205528736114502,
      "learning_rate": 9.309860609290387e-06,
      "loss": 2.2055,
      "step": 40328
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0958681106567383,
      "learning_rate": 9.30944985482109e-06,
      "loss": 2.3071,
      "step": 40329
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1269490718841553,
      "learning_rate": 9.309039101522464e-06,
      "loss": 2.2606,
      "step": 40330
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.2235255241394043,
      "learning_rate": 9.308628349395203e-06,
      "loss": 1.8637,
      "step": 40331
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1887847185134888,
      "learning_rate": 9.308217598440005e-06,
      "loss": 2.1671,
      "step": 40332
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4559390544891357,
      "learning_rate": 9.307806848657567e-06,
      "loss": 2.3186,
      "step": 40333
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0222827196121216,
      "learning_rate": 9.307396100048585e-06,
      "loss": 2.0233,
      "step": 40334
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0848760604858398,
      "learning_rate": 9.306985352613753e-06,
      "loss": 2.3946,
      "step": 40335
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0880731344223022,
      "learning_rate": 9.30657460635377e-06,
      "loss": 2.615,
      "step": 40336
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0844392776489258,
      "learning_rate": 9.306163861269331e-06,
      "loss": 2.2605,
      "step": 40337
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1143923997879028,
      "learning_rate": 9.305753117361138e-06,
      "loss": 2.4235,
      "step": 40338
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1898765563964844,
      "learning_rate": 9.305342374629876e-06,
      "loss": 2.3559,
      "step": 40339
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0255366563796997,
      "learning_rate": 9.304931633076248e-06,
      "loss": 2.3949,
      "step": 40340
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0793884992599487,
      "learning_rate": 9.30452089270095e-06,
      "loss": 2.1781,
      "step": 40341
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0995632410049438,
      "learning_rate": 9.304110153504678e-06,
      "loss": 2.2375,
      "step": 40342
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0828580856323242,
      "learning_rate": 9.303699415488129e-06,
      "loss": 2.4041,
      "step": 40343
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.3321254253387451,
      "learning_rate": 9.303288678651998e-06,
      "loss": 2.178,
      "step": 40344
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.418385624885559,
      "learning_rate": 9.30287794299698e-06,
      "loss": 2.4415,
      "step": 40345
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.3120862245559692,
      "learning_rate": 9.302467208523776e-06,
      "loss": 2.3794,
      "step": 40346
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.131758689880371,
      "learning_rate": 9.302056475233078e-06,
      "loss": 2.1226,
      "step": 40347
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1359517574310303,
      "learning_rate": 9.301645743125585e-06,
      "loss": 2.2744,
      "step": 40348
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1993778944015503,
      "learning_rate": 9.301235012201993e-06,
      "loss": 2.6193,
      "step": 40349
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1782037019729614,
      "learning_rate": 9.300824282462996e-06,
      "loss": 2.421,
      "step": 40350
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.9536689519882202,
      "learning_rate": 9.300413553909294e-06,
      "loss": 2.1938,
      "step": 40351
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1158093214035034,
      "learning_rate": 9.300002826541578e-06,
      "loss": 2.4141,
      "step": 40352
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1351816654205322,
      "learning_rate": 9.299592100360548e-06,
      "loss": 2.3959,
      "step": 40353
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.104119062423706,
      "learning_rate": 9.2991813753669e-06,
      "loss": 2.2545,
      "step": 40354
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0995824337005615,
      "learning_rate": 9.298770651561329e-06,
      "loss": 2.3793,
      "step": 40355
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0903749465942383,
      "learning_rate": 9.298359928944535e-06,
      "loss": 2.3699,
      "step": 40356
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0478475093841553,
      "learning_rate": 9.29794920751721e-06,
      "loss": 2.2801,
      "step": 40357
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.074635624885559,
      "learning_rate": 9.29753848728005e-06,
      "loss": 2.4576,
      "step": 40358
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0273176431655884,
      "learning_rate": 9.297127768233753e-06,
      "loss": 2.2755,
      "step": 40359
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1365962028503418,
      "learning_rate": 9.296717050379018e-06,
      "loss": 2.2414,
      "step": 40360
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.2532038688659668,
      "learning_rate": 9.296306333716537e-06,
      "loss": 2.477,
      "step": 40361
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.2604008913040161,
      "learning_rate": 9.295895618247007e-06,
      "loss": 2.0961,
      "step": 40362
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0416905879974365,
      "learning_rate": 9.295484903971126e-06,
      "loss": 2.2021,
      "step": 40363
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.02911376953125,
      "learning_rate": 9.295074190889594e-06,
      "loss": 2.3582,
      "step": 40364
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.085669994354248,
      "learning_rate": 9.294663479003098e-06,
      "loss": 2.2951,
      "step": 40365
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0400593280792236,
      "learning_rate": 9.294252768312338e-06,
      "loss": 2.3542,
      "step": 40366
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0985127687454224,
      "learning_rate": 9.293842058818012e-06,
      "loss": 2.2546,
      "step": 40367
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0090932846069336,
      "learning_rate": 9.293431350520815e-06,
      "loss": 2.1296,
      "step": 40368
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.2579116821289062,
      "learning_rate": 9.293020643421447e-06,
      "loss": 2.5668,
      "step": 40369
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1153528690338135,
      "learning_rate": 9.292609937520599e-06,
      "loss": 2.3339,
      "step": 40370
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1108216047286987,
      "learning_rate": 9.292199232818967e-06,
      "loss": 2.4108,
      "step": 40371
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.9306352138519287,
      "learning_rate": 9.291788529317252e-06,
      "loss": 2.3287,
      "step": 40372
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.053006887435913,
      "learning_rate": 9.291377827016147e-06,
      "loss": 2.3294,
      "step": 40373
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1482048034667969,
      "learning_rate": 9.29096712591635e-06,
      "loss": 2.3512,
      "step": 40374
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1410319805145264,
      "learning_rate": 9.290556426018555e-06,
      "loss": 2.4545,
      "step": 40375
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0987584590911865,
      "learning_rate": 9.29014572732346e-06,
      "loss": 2.2051,
      "step": 40376
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0931744575500488,
      "learning_rate": 9.289735029831763e-06,
      "loss": 2.3628,
      "step": 40377
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.9781783819198608,
      "learning_rate": 9.289324333544158e-06,
      "loss": 2.3115,
      "step": 40378
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0452253818511963,
      "learning_rate": 9.288913638461339e-06,
      "loss": 2.4451,
      "step": 40379
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0105024576187134,
      "learning_rate": 9.288502944584005e-06,
      "loss": 2.3244,
      "step": 40380
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.285486102104187,
      "learning_rate": 9.28809225191285e-06,
      "loss": 2.3374,
      "step": 40381
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1324145793914795,
      "learning_rate": 9.287681560448574e-06,
      "loss": 2.4756,
      "step": 40382
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0545681715011597,
      "learning_rate": 9.287270870191871e-06,
      "loss": 2.5202,
      "step": 40383
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1239205598831177,
      "learning_rate": 9.286860181143439e-06,
      "loss": 2.2292,
      "step": 40384
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.2481157779693604,
      "learning_rate": 9.286449493303971e-06,
      "loss": 2.094,
      "step": 40385
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0999083518981934,
      "learning_rate": 9.286038806674165e-06,
      "loss": 2.0352,
      "step": 40386
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.066332221031189,
      "learning_rate": 9.285628121254716e-06,
      "loss": 2.3428,
      "step": 40387
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0584582090377808,
      "learning_rate": 9.285217437046323e-06,
      "loss": 2.177,
      "step": 40388
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.2099549770355225,
      "learning_rate": 9.28480675404968e-06,
      "loss": 2.4063,
      "step": 40389
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.089884638786316,
      "learning_rate": 9.284396072265483e-06,
      "loss": 2.4825,
      "step": 40390
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0831785202026367,
      "learning_rate": 9.283985391694434e-06,
      "loss": 2.2319,
      "step": 40391
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0842816829681396,
      "learning_rate": 9.283574712337218e-06,
      "loss": 2.2606,
      "step": 40392
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.028009057044983,
      "learning_rate": 9.28316403419454e-06,
      "loss": 2.2478,
      "step": 40393
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.052755355834961,
      "learning_rate": 9.282753357267093e-06,
      "loss": 2.2101,
      "step": 40394
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.137255072593689,
      "learning_rate": 9.282342681555573e-06,
      "loss": 2.3233,
      "step": 40395
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0319573879241943,
      "learning_rate": 9.281932007060678e-06,
      "loss": 2.1543,
      "step": 40396
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.115187644958496,
      "learning_rate": 9.281521333783104e-06,
      "loss": 2.309,
      "step": 40397
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1075087785720825,
      "learning_rate": 9.281110661723545e-06,
      "loss": 2.4112,
      "step": 40398
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0406254529953003,
      "learning_rate": 9.280699990882699e-06,
      "loss": 2.2648,
      "step": 40399
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.156471848487854,
      "learning_rate": 9.280289321261262e-06,
      "loss": 2.3461,
      "step": 40400
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.089416742324829,
      "learning_rate": 9.27987865285993e-06,
      "loss": 2.5589,
      "step": 40401
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.99738609790802,
      "learning_rate": 9.2794679856794e-06,
      "loss": 2.6793,
      "step": 40402
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.095991849899292,
      "learning_rate": 9.279057319720366e-06,
      "loss": 2.4142,
      "step": 40403
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.517930269241333,
      "learning_rate": 9.278646654983531e-06,
      "loss": 2.4009,
      "step": 40404
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1411352157592773,
      "learning_rate": 9.278235991469583e-06,
      "loss": 2.3957,
      "step": 40405
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0305211544036865,
      "learning_rate": 9.277825329179217e-06,
      "loss": 2.3624,
      "step": 40406
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0137890577316284,
      "learning_rate": 9.277414668113136e-06,
      "loss": 2.255,
      "step": 40407
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0000770092010498,
      "learning_rate": 9.277004008272034e-06,
      "loss": 2.5855,
      "step": 40408
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0106631517410278,
      "learning_rate": 9.276593349656604e-06,
      "loss": 2.2306,
      "step": 40409
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.230385661125183,
      "learning_rate": 9.276182692267546e-06,
      "loss": 2.1232,
      "step": 40410
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.08247709274292,
      "learning_rate": 9.275772036105556e-06,
      "loss": 2.5592,
      "step": 40411
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0612292289733887,
      "learning_rate": 9.27536138117133e-06,
      "loss": 2.2347,
      "step": 40412
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0992125272750854,
      "learning_rate": 9.274950727465563e-06,
      "loss": 2.3726,
      "step": 40413
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.9596870541572571,
      "learning_rate": 9.274540074988948e-06,
      "loss": 2.3332,
      "step": 40414
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.090287208557129,
      "learning_rate": 9.274129423742187e-06,
      "loss": 2.2352,
      "step": 40415
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.9476087689399719,
      "learning_rate": 9.273718773725974e-06,
      "loss": 2.2551,
      "step": 40416
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0208115577697754,
      "learning_rate": 9.273308124941005e-06,
      "loss": 2.3704,
      "step": 40417
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0772753953933716,
      "learning_rate": 9.272897477387978e-06,
      "loss": 2.5459,
      "step": 40418
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.013892412185669,
      "learning_rate": 9.272486831067586e-06,
      "loss": 2.3153,
      "step": 40419
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.2137644290924072,
      "learning_rate": 9.272076185980525e-06,
      "loss": 2.2498,
      "step": 40420
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0470322370529175,
      "learning_rate": 9.271665542127492e-06,
      "loss": 2.5721,
      "step": 40421
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0135126113891602,
      "learning_rate": 9.271254899509184e-06,
      "loss": 2.1801,
      "step": 40422
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.9854925274848938,
      "learning_rate": 9.270844258126297e-06,
      "loss": 2.2895,
      "step": 40423
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.048884391784668,
      "learning_rate": 9.270433617979528e-06,
      "loss": 2.2392,
      "step": 40424
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.2680124044418335,
      "learning_rate": 9.270022979069573e-06,
      "loss": 2.2422,
      "step": 40425
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0092010498046875,
      "learning_rate": 9.269612341397124e-06,
      "loss": 2.3155,
      "step": 40426
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0223044157028198,
      "learning_rate": 9.269201704962883e-06,
      "loss": 2.3422,
      "step": 40427
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0243467092514038,
      "learning_rate": 9.268791069767541e-06,
      "loss": 2.3976,
      "step": 40428
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.215228796005249,
      "learning_rate": 9.268380435811798e-06,
      "loss": 2.4752,
      "step": 40429
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0151054859161377,
      "learning_rate": 9.267969803096356e-06,
      "loss": 2.2648,
      "step": 40430
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4279547929763794,
      "learning_rate": 9.267559171621898e-06,
      "loss": 2.3894,
      "step": 40431
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.994789183139801,
      "learning_rate": 9.267148541389124e-06,
      "loss": 2.1091,
      "step": 40432
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0629810094833374,
      "learning_rate": 9.266737912398734e-06,
      "loss": 2.5604,
      "step": 40433
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0336625576019287,
      "learning_rate": 9.266327284651423e-06,
      "loss": 2.1592,
      "step": 40434
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1333796977996826,
      "learning_rate": 9.265916658147884e-06,
      "loss": 2.6398,
      "step": 40435
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1271973848342896,
      "learning_rate": 9.265506032888818e-06,
      "loss": 2.3593,
      "step": 40436
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0543320178985596,
      "learning_rate": 9.265095408874918e-06,
      "loss": 2.2285,
      "step": 40437
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.159281611442566,
      "learning_rate": 9.264684786106881e-06,
      "loss": 2.2819,
      "step": 40438
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1684502363204956,
      "learning_rate": 9.264274164585406e-06,
      "loss": 2.4224,
      "step": 40439
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0984604358673096,
      "learning_rate": 9.263863544311183e-06,
      "loss": 2.3273,
      "step": 40440
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.2006824016571045,
      "learning_rate": 9.263452925284911e-06,
      "loss": 2.2858,
      "step": 40441
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1009653806686401,
      "learning_rate": 9.263042307507286e-06,
      "loss": 2.3185,
      "step": 40442
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1921441555023193,
      "learning_rate": 9.262631690979005e-06,
      "loss": 2.2788,
      "step": 40443
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.2081068754196167,
      "learning_rate": 9.262221075700766e-06,
      "loss": 2.5293,
      "step": 40444
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0177536010742188,
      "learning_rate": 9.261810461673259e-06,
      "loss": 2.3626,
      "step": 40445
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0155140161514282,
      "learning_rate": 9.261399848897189e-06,
      "loss": 2.2517,
      "step": 40446
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0505766868591309,
      "learning_rate": 9.260989237373241e-06,
      "loss": 2.3528,
      "step": 40447
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1333717107772827,
      "learning_rate": 9.260578627102119e-06,
      "loss": 2.4722,
      "step": 40448
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.188080072402954,
      "learning_rate": 9.260168018084518e-06,
      "loss": 2.2696,
      "step": 40449
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.2230215072631836,
      "learning_rate": 9.259757410321132e-06,
      "loss": 2.4548,
      "step": 40450
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0772159099578857,
      "learning_rate": 9.259346803812658e-06,
      "loss": 2.4873,
      "step": 40451
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1715725660324097,
      "learning_rate": 9.258936198559792e-06,
      "loss": 2.2164,
      "step": 40452
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0690109729766846,
      "learning_rate": 9.258525594563232e-06,
      "loss": 2.3633,
      "step": 40453
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0841596126556396,
      "learning_rate": 9.25811499182367e-06,
      "loss": 2.3385,
      "step": 40454
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0593842267990112,
      "learning_rate": 9.257704390341807e-06,
      "loss": 2.2807,
      "step": 40455
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.2128478288650513,
      "learning_rate": 9.257293790118336e-06,
      "loss": 2.2737,
      "step": 40456
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.168783187866211,
      "learning_rate": 9.256883191153957e-06,
      "loss": 2.1636,
      "step": 40457
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0334794521331787,
      "learning_rate": 9.256472593449359e-06,
      "loss": 2.3354,
      "step": 40458
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1371431350708008,
      "learning_rate": 9.256061997005243e-06,
      "loss": 2.2928,
      "step": 40459
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0325927734375,
      "learning_rate": 9.255651401822303e-06,
      "loss": 2.3834,
      "step": 40460
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4547333717346191,
      "learning_rate": 9.255240807901237e-06,
      "loss": 2.4307,
      "step": 40461
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1270464658737183,
      "learning_rate": 9.254830215242738e-06,
      "loss": 2.2979,
      "step": 40462
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.126672387123108,
      "learning_rate": 9.254419623847505e-06,
      "loss": 2.447,
      "step": 40463
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0558346509933472,
      "learning_rate": 9.254009033716234e-06,
      "loss": 2.3521,
      "step": 40464
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1605089902877808,
      "learning_rate": 9.25359844484962e-06,
      "loss": 2.44,
      "step": 40465
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0330976247787476,
      "learning_rate": 9.25318785724836e-06,
      "loss": 2.4873,
      "step": 40466
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1743577718734741,
      "learning_rate": 9.25277727091315e-06,
      "loss": 2.2923,
      "step": 40467
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0224230289459229,
      "learning_rate": 9.252366685844685e-06,
      "loss": 2.2659,
      "step": 40468
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.210864782333374,
      "learning_rate": 9.25195610204366e-06,
      "loss": 2.1822,
      "step": 40469
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.9480436444282532,
      "learning_rate": 9.251545519510776e-06,
      "loss": 2.3457,
      "step": 40470
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.11772620677948,
      "learning_rate": 9.251134938246723e-06,
      "loss": 2.5469,
      "step": 40471
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.989195704460144,
      "learning_rate": 9.2507243582522e-06,
      "loss": 2.1831,
      "step": 40472
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0972819328308105,
      "learning_rate": 9.250313779527902e-06,
      "loss": 2.3753,
      "step": 40473
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.036792516708374,
      "learning_rate": 9.249903202074528e-06,
      "loss": 2.441,
      "step": 40474
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.185032844543457,
      "learning_rate": 9.24949262589277e-06,
      "loss": 2.0792,
      "step": 40475
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1119621992111206,
      "learning_rate": 9.249082050983326e-06,
      "loss": 2.4274,
      "step": 40476
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0145690441131592,
      "learning_rate": 9.24867147734689e-06,
      "loss": 2.3543,
      "step": 40477
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.170516848564148,
      "learning_rate": 9.248260904984163e-06,
      "loss": 2.2615,
      "step": 40478
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.2606074810028076,
      "learning_rate": 9.247850333895836e-06,
      "loss": 2.2844,
      "step": 40479
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.078241229057312,
      "learning_rate": 9.247439764082608e-06,
      "loss": 2.3388,
      "step": 40480
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0773710012435913,
      "learning_rate": 9.247029195545173e-06,
      "loss": 2.3757,
      "step": 40481
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0776712894439697,
      "learning_rate": 9.246618628284229e-06,
      "loss": 2.3637,
      "step": 40482
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.037561297416687,
      "learning_rate": 9.246208062300474e-06,
      "loss": 2.3384,
      "step": 40483
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0283989906311035,
      "learning_rate": 9.245797497594598e-06,
      "loss": 2.0606,
      "step": 40484
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.044845700263977,
      "learning_rate": 9.245386934167299e-06,
      "loss": 2.3593,
      "step": 40485
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0532439947128296,
      "learning_rate": 9.244976372019276e-06,
      "loss": 2.542,
      "step": 40486
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0403348207473755,
      "learning_rate": 9.244565811151221e-06,
      "loss": 2.4732,
      "step": 40487
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.032891869544983,
      "learning_rate": 9.244155251563832e-06,
      "loss": 2.3192,
      "step": 40488
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1776381731033325,
      "learning_rate": 9.243744693257807e-06,
      "loss": 2.2898,
      "step": 40489
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0968961715698242,
      "learning_rate": 9.24333413623384e-06,
      "loss": 2.3338,
      "step": 40490
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0833895206451416,
      "learning_rate": 9.242923580492625e-06,
      "loss": 2.3826,
      "step": 40491
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0480265617370605,
      "learning_rate": 9.242513026034861e-06,
      "loss": 2.4196,
      "step": 40492
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.9992101788520813,
      "learning_rate": 9.242102472861243e-06,
      "loss": 2.2174,
      "step": 40493
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.102091908454895,
      "learning_rate": 9.24169192097247e-06,
      "loss": 2.413,
      "step": 40494
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.128896951675415,
      "learning_rate": 9.241281370369234e-06,
      "loss": 2.3754,
      "step": 40495
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.061568021774292,
      "learning_rate": 9.240870821052234e-06,
      "loss": 2.4708,
      "step": 40496
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.2879549264907837,
      "learning_rate": 9.240460273022161e-06,
      "loss": 2.1072,
      "step": 40497
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.125430703163147,
      "learning_rate": 9.240049726279714e-06,
      "loss": 2.4392,
      "step": 40498
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0664739608764648,
      "learning_rate": 9.239639180825588e-06,
      "loss": 2.3697,
      "step": 40499
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1737803220748901,
      "learning_rate": 9.239228636660483e-06,
      "loss": 2.1568,
      "step": 40500
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.069906234741211,
      "learning_rate": 9.23881809378509e-06,
      "loss": 2.426,
      "step": 40501
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1144001483917236,
      "learning_rate": 9.23840755220011e-06,
      "loss": 2.3893,
      "step": 40502
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1816415786743164,
      "learning_rate": 9.237997011906234e-06,
      "loss": 2.2709,
      "step": 40503
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.2281782627105713,
      "learning_rate": 9.23758647290416e-06,
      "loss": 2.2763,
      "step": 40504
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.9600663185119629,
      "learning_rate": 9.237175935194583e-06,
      "loss": 2.324,
      "step": 40505
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.2068368196487427,
      "learning_rate": 9.2367653987782e-06,
      "loss": 2.2097,
      "step": 40506
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1817116737365723,
      "learning_rate": 9.236354863655709e-06,
      "loss": 2.2664,
      "step": 40507
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.266229510307312,
      "learning_rate": 9.235944329827802e-06,
      "loss": 2.2798,
      "step": 40508
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1105334758758545,
      "learning_rate": 9.235533797295181e-06,
      "loss": 2.2595,
      "step": 40509
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0624111890792847,
      "learning_rate": 9.235123266058536e-06,
      "loss": 2.2662,
      "step": 40510
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1025313138961792,
      "learning_rate": 9.234712736118563e-06,
      "loss": 2.2147,
      "step": 40511
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0889981985092163,
      "learning_rate": 9.23430220747596e-06,
      "loss": 2.4043,
      "step": 40512
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0759645700454712,
      "learning_rate": 9.233891680131422e-06,
      "loss": 2.3498,
      "step": 40513
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.3149827718734741,
      "learning_rate": 9.233481154085646e-06,
      "loss": 2.3788,
      "step": 40514
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.9939830303192139,
      "learning_rate": 9.233070629339328e-06,
      "loss": 2.3215,
      "step": 40515
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0044325590133667,
      "learning_rate": 9.232660105893163e-06,
      "loss": 2.4362,
      "step": 40516
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1526345014572144,
      "learning_rate": 9.232249583747848e-06,
      "loss": 2.4474,
      "step": 40517
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.9970293045043945,
      "learning_rate": 9.231839062904079e-06,
      "loss": 2.3792,
      "step": 40518
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0055843591690063,
      "learning_rate": 9.231428543362551e-06,
      "loss": 2.2493,
      "step": 40519
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0837374925613403,
      "learning_rate": 9.23101802512396e-06,
      "loss": 2.5426,
      "step": 40520
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1387158632278442,
      "learning_rate": 9.230607508189003e-06,
      "loss": 2.2895,
      "step": 40521
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1562119722366333,
      "learning_rate": 9.230196992558374e-06,
      "loss": 2.4174,
      "step": 40522
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.9860625267028809,
      "learning_rate": 9.229786478232777e-06,
      "loss": 2.3225,
      "step": 40523
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.229590892791748,
      "learning_rate": 9.229375965212895e-06,
      "loss": 2.4377,
      "step": 40524
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0383800268173218,
      "learning_rate": 9.22896545349943e-06,
      "loss": 2.1349,
      "step": 40525
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0818243026733398,
      "learning_rate": 9.228554943093078e-06,
      "loss": 2.582,
      "step": 40526
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.9802556037902832,
      "learning_rate": 9.228144433994533e-06,
      "loss": 2.1004,
      "step": 40527
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5075709819793701,
      "learning_rate": 9.227733926204497e-06,
      "loss": 2.2919,
      "step": 40528
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.9228962659835815,
      "learning_rate": 9.227323419723658e-06,
      "loss": 2.2131,
      "step": 40529
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.056001901626587,
      "learning_rate": 9.22691291455272e-06,
      "loss": 2.2466,
      "step": 40530
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.2279386520385742,
      "learning_rate": 9.226502410692372e-06,
      "loss": 2.3051,
      "step": 40531
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0680835247039795,
      "learning_rate": 9.226091908143312e-06,
      "loss": 2.1493,
      "step": 40532
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0576797723770142,
      "learning_rate": 9.225681406906236e-06,
      "loss": 2.5389,
      "step": 40533
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.9914516806602478,
      "learning_rate": 9.225270906981842e-06,
      "loss": 2.7261,
      "step": 40534
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1606073379516602,
      "learning_rate": 9.224860408370822e-06,
      "loss": 2.5831,
      "step": 40535
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0875751972198486,
      "learning_rate": 9.224449911073881e-06,
      "loss": 2.4152,
      "step": 40536
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1571943759918213,
      "learning_rate": 9.224039415091701e-06,
      "loss": 2.2855,
      "step": 40537
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.098789095878601,
      "learning_rate": 9.223628920424987e-06,
      "loss": 2.2846,
      "step": 40538
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0190951824188232,
      "learning_rate": 9.223218427074431e-06,
      "loss": 2.5209,
      "step": 40539
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0424752235412598,
      "learning_rate": 9.222807935040733e-06,
      "loss": 2.3751,
      "step": 40540
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.248578667640686,
      "learning_rate": 9.222397444324585e-06,
      "loss": 2.3811,
      "step": 40541
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1224571466445923,
      "learning_rate": 9.221986954926685e-06,
      "loss": 2.3895,
      "step": 40542
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0843372344970703,
      "learning_rate": 9.221576466847727e-06,
      "loss": 2.1178,
      "step": 40543
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.2523986101150513,
      "learning_rate": 9.221165980088411e-06,
      "loss": 2.3182,
      "step": 40544
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.020708441734314,
      "learning_rate": 9.22075549464943e-06,
      "loss": 2.4199,
      "step": 40545
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1747021675109863,
      "learning_rate": 9.220345010531478e-06,
      "loss": 2.1727,
      "step": 40546
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0305432081222534,
      "learning_rate": 9.219934527735254e-06,
      "loss": 2.3735,
      "step": 40547
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.352777123451233,
      "learning_rate": 9.219524046261453e-06,
      "loss": 2.3471,
      "step": 40548
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0942683219909668,
      "learning_rate": 9.219113566110776e-06,
      "loss": 2.3727,
      "step": 40549
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0821815729141235,
      "learning_rate": 9.218703087283906e-06,
      "loss": 2.3551,
      "step": 40550
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1467193365097046,
      "learning_rate": 9.218292609781549e-06,
      "loss": 2.3002,
      "step": 40551
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1213704347610474,
      "learning_rate": 9.217882133604398e-06,
      "loss": 2.3161,
      "step": 40552
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0391771793365479,
      "learning_rate": 9.21747165875315e-06,
      "loss": 2.3538,
      "step": 40553
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0698747634887695,
      "learning_rate": 9.2170611852285e-06,
      "loss": 2.3953,
      "step": 40554
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0838501453399658,
      "learning_rate": 9.216650713031142e-06,
      "loss": 2.2947,
      "step": 40555
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.151128888130188,
      "learning_rate": 9.216240242161774e-06,
      "loss": 2.2928,
      "step": 40556
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0485670566558838,
      "learning_rate": 9.215829772621096e-06,
      "loss": 2.478,
      "step": 40557
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1402674913406372,
      "learning_rate": 9.215419304409796e-06,
      "loss": 2.3207,
      "step": 40558
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.9430999159812927,
      "learning_rate": 9.215008837528573e-06,
      "loss": 2.3294,
      "step": 40559
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0781344175338745,
      "learning_rate": 9.214598371978125e-06,
      "loss": 2.3436,
      "step": 40560
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.136046290397644,
      "learning_rate": 9.214187907759143e-06,
      "loss": 2.373,
      "step": 40561
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0879963636398315,
      "learning_rate": 9.213777444872331e-06,
      "loss": 2.4033,
      "step": 40562
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0355783700942993,
      "learning_rate": 9.213366983318376e-06,
      "loss": 2.3406,
      "step": 40563
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0470980405807495,
      "learning_rate": 9.21295652309798e-06,
      "loss": 2.5322,
      "step": 40564
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.9820085763931274,
      "learning_rate": 9.212546064211834e-06,
      "loss": 2.3039,
      "step": 40565
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.991962194442749,
      "learning_rate": 9.212135606660637e-06,
      "loss": 2.2298,
      "step": 40566
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0540332794189453,
      "learning_rate": 9.211725150445085e-06,
      "loss": 2.4277,
      "step": 40567
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4337612390518188,
      "learning_rate": 9.211314695565871e-06,
      "loss": 2.3429,
      "step": 40568
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1374776363372803,
      "learning_rate": 9.210904242023694e-06,
      "loss": 2.2957,
      "step": 40569
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1305906772613525,
      "learning_rate": 9.210493789819248e-06,
      "loss": 2.3664,
      "step": 40570
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0016509294509888,
      "learning_rate": 9.210083338953231e-06,
      "loss": 2.3807,
      "step": 40571
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.3619308471679688,
      "learning_rate": 9.209672889426336e-06,
      "loss": 2.3435,
      "step": 40572
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1242882013320923,
      "learning_rate": 9.20926244123926e-06,
      "loss": 2.3155,
      "step": 40573
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0170663595199585,
      "learning_rate": 9.208851994392697e-06,
      "loss": 2.2493,
      "step": 40574
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.2045754194259644,
      "learning_rate": 9.208441548887352e-06,
      "loss": 2.2985,
      "step": 40575
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1671205759048462,
      "learning_rate": 9.20803110472391e-06,
      "loss": 2.3095,
      "step": 40576
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0775606632232666,
      "learning_rate": 9.207620661903068e-06,
      "loss": 2.2986,
      "step": 40577
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1952037811279297,
      "learning_rate": 9.207210220425525e-06,
      "loss": 2.1196,
      "step": 40578
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0336377620697021,
      "learning_rate": 9.206799780291976e-06,
      "loss": 2.3136,
      "step": 40579
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1019611358642578,
      "learning_rate": 9.206389341503115e-06,
      "loss": 2.4417,
      "step": 40580
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0508222579956055,
      "learning_rate": 9.205978904059643e-06,
      "loss": 2.3011,
      "step": 40581
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0960111618041992,
      "learning_rate": 9.20556846796225e-06,
      "loss": 2.3864,
      "step": 40582
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1257121562957764,
      "learning_rate": 9.205158033211635e-06,
      "loss": 2.2457,
      "step": 40583
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.9618666768074036,
      "learning_rate": 9.204747599808493e-06,
      "loss": 2.526,
      "step": 40584
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1952781677246094,
      "learning_rate": 9.204337167753522e-06,
      "loss": 2.1872,
      "step": 40585
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1163157224655151,
      "learning_rate": 9.203926737047414e-06,
      "loss": 2.461,
      "step": 40586
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1160695552825928,
      "learning_rate": 9.203516307690866e-06,
      "loss": 2.2737,
      "step": 40587
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0729137659072876,
      "learning_rate": 9.203105879684574e-06,
      "loss": 2.148,
      "step": 40588
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.168105959892273,
      "learning_rate": 9.202695453029235e-06,
      "loss": 2.3247,
      "step": 40589
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0836529731750488,
      "learning_rate": 9.202285027725542e-06,
      "loss": 2.2564,
      "step": 40590
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0751614570617676,
      "learning_rate": 9.201874603774195e-06,
      "loss": 2.1541,
      "step": 40591
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.2238831520080566,
      "learning_rate": 9.201464181175887e-06,
      "loss": 2.4343,
      "step": 40592
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1059879064559937,
      "learning_rate": 9.201053759931311e-06,
      "loss": 2.1252,
      "step": 40593
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0569491386413574,
      "learning_rate": 9.200643340041169e-06,
      "loss": 2.362,
      "step": 40594
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1854490041732788,
      "learning_rate": 9.200232921506152e-06,
      "loss": 2.4819,
      "step": 40595
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0083266496658325,
      "learning_rate": 9.199822504326957e-06,
      "loss": 2.4342,
      "step": 40596
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0304358005523682,
      "learning_rate": 9.199412088504282e-06,
      "loss": 2.5093,
      "step": 40597
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1515123844146729,
      "learning_rate": 9.199001674038818e-06,
      "loss": 2.2313,
      "step": 40598
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1163021326065063,
      "learning_rate": 9.198591260931267e-06,
      "loss": 2.2515,
      "step": 40599
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.122934103012085,
      "learning_rate": 9.198180849182319e-06,
      "loss": 2.299,
      "step": 40600
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0450977087020874,
      "learning_rate": 9.197770438792672e-06,
      "loss": 2.2973,
      "step": 40601
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0507301092147827,
      "learning_rate": 9.197360029763028e-06,
      "loss": 2.5225,
      "step": 40602
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.2979611158370972,
      "learning_rate": 9.196949622094072e-06,
      "loss": 2.2064,
      "step": 40603
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0363253355026245,
      "learning_rate": 9.196539215786504e-06,
      "loss": 2.2301,
      "step": 40604
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1203311681747437,
      "learning_rate": 9.19612881084102e-06,
      "loss": 2.3515,
      "step": 40605
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.007561206817627,
      "learning_rate": 9.195718407258317e-06,
      "loss": 2.4053,
      "step": 40606
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0618890523910522,
      "learning_rate": 9.195308005039089e-06,
      "loss": 2.4372,
      "step": 40607
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.9946087598800659,
      "learning_rate": 9.194897604184033e-06,
      "loss": 2.2549,
      "step": 40608
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.9870803952217102,
      "learning_rate": 9.194487204693846e-06,
      "loss": 2.4584,
      "step": 40609
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1437853574752808,
      "learning_rate": 9.19407680656922e-06,
      "loss": 2.6336,
      "step": 40610
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0292885303497314,
      "learning_rate": 9.19366640981085e-06,
      "loss": 2.3557,
      "step": 40611
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1663386821746826,
      "learning_rate": 9.193256014419438e-06,
      "loss": 2.1213,
      "step": 40612
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0060313940048218,
      "learning_rate": 9.192845620395678e-06,
      "loss": 2.4131,
      "step": 40613
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0609606504440308,
      "learning_rate": 9.19243522774026e-06,
      "loss": 2.4541,
      "step": 40614
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0358781814575195,
      "learning_rate": 9.192024836453887e-06,
      "loss": 2.4204,
      "step": 40615
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1791729927062988,
      "learning_rate": 9.19161444653725e-06,
      "loss": 2.2284,
      "step": 40616
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.078565001487732,
      "learning_rate": 9.191204057991045e-06,
      "loss": 2.3398,
      "step": 40617
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.028557300567627,
      "learning_rate": 9.19079367081597e-06,
      "loss": 2.0987,
      "step": 40618
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.9867023229598999,
      "learning_rate": 9.190383285012717e-06,
      "loss": 2.3722,
      "step": 40619
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0350455045700073,
      "learning_rate": 9.189972900581987e-06,
      "loss": 2.181,
      "step": 40620
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0790541172027588,
      "learning_rate": 9.189562517524473e-06,
      "loss": 2.2287,
      "step": 40621
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0494948625564575,
      "learning_rate": 9.189152135840868e-06,
      "loss": 2.2409,
      "step": 40622
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1634622812271118,
      "learning_rate": 9.188741755531872e-06,
      "loss": 2.352,
      "step": 40623
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1304212808609009,
      "learning_rate": 9.18833137659818e-06,
      "loss": 2.3456,
      "step": 40624
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.082737684249878,
      "learning_rate": 9.187920999040484e-06,
      "loss": 2.3645,
      "step": 40625
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1823670864105225,
      "learning_rate": 9.187510622859485e-06,
      "loss": 2.287,
      "step": 40626
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4183523654937744,
      "learning_rate": 9.187100248055874e-06,
      "loss": 2.5085,
      "step": 40627
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0706437826156616,
      "learning_rate": 9.186689874630354e-06,
      "loss": 2.5198,
      "step": 40628
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4497288465499878,
      "learning_rate": 9.186279502583612e-06,
      "loss": 2.3782,
      "step": 40629
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0522797107696533,
      "learning_rate": 9.185869131916347e-06,
      "loss": 2.2353,
      "step": 40630
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1990052461624146,
      "learning_rate": 9.185458762629255e-06,
      "loss": 2.3697,
      "step": 40631
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0241254568099976,
      "learning_rate": 9.185048394723031e-06,
      "loss": 2.3195,
      "step": 40632
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0129410028457642,
      "learning_rate": 9.184638028198371e-06,
      "loss": 2.2829,
      "step": 40633
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0816408395767212,
      "learning_rate": 9.184227663055973e-06,
      "loss": 2.2649,
      "step": 40634
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0087995529174805,
      "learning_rate": 9.183817299296529e-06,
      "loss": 2.437,
      "step": 40635
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1053225994110107,
      "learning_rate": 9.183406936920736e-06,
      "loss": 2.2323,
      "step": 40636
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0135093927383423,
      "learning_rate": 9.18299657592929e-06,
      "loss": 2.2647,
      "step": 40637
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0854072570800781,
      "learning_rate": 9.182586216322888e-06,
      "loss": 2.2526,
      "step": 40638
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0636928081512451,
      "learning_rate": 9.182175858102223e-06,
      "loss": 2.1537,
      "step": 40639
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0483037233352661,
      "learning_rate": 9.181765501267994e-06,
      "loss": 2.267,
      "step": 40640
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0263173580169678,
      "learning_rate": 9.181355145820898e-06,
      "loss": 2.498,
      "step": 40641
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0888091325759888,
      "learning_rate": 9.180944791761622e-06,
      "loss": 2.2108,
      "step": 40642
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0318814516067505,
      "learning_rate": 9.180534439090868e-06,
      "loss": 2.3525,
      "step": 40643
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0073273181915283,
      "learning_rate": 9.18012408780933e-06,
      "loss": 2.4177,
      "step": 40644
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0767852067947388,
      "learning_rate": 9.179713737917706e-06,
      "loss": 2.199,
      "step": 40645
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.9850573539733887,
      "learning_rate": 9.179303389416688e-06,
      "loss": 2.3192,
      "step": 40646
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1569736003875732,
      "learning_rate": 9.178893042306975e-06,
      "loss": 2.196,
      "step": 40647
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.010587453842163,
      "learning_rate": 9.178482696589263e-06,
      "loss": 2.3412,
      "step": 40648
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0626001358032227,
      "learning_rate": 9.178072352264243e-06,
      "loss": 2.4891,
      "step": 40649
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0526455640792847,
      "learning_rate": 9.177662009332615e-06,
      "loss": 2.4861,
      "step": 40650
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1600871086120605,
      "learning_rate": 9.177251667795074e-06,
      "loss": 2.2808,
      "step": 40651
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1579886674880981,
      "learning_rate": 9.176841327652312e-06,
      "loss": 2.2543,
      "step": 40652
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.056745171546936,
      "learning_rate": 9.17643098890503e-06,
      "loss": 2.4425,
      "step": 40653
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0876727104187012,
      "learning_rate": 9.176020651553924e-06,
      "loss": 2.4635,
      "step": 40654
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0487773418426514,
      "learning_rate": 9.175610315599682e-06,
      "loss": 2.4314,
      "step": 40655
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.2209579944610596,
      "learning_rate": 9.175199981043006e-06,
      "loss": 2.3547,
      "step": 40656
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1757147312164307,
      "learning_rate": 9.17478964788459e-06,
      "loss": 2.2918,
      "step": 40657
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.112518548965454,
      "learning_rate": 9.174379316125128e-06,
      "loss": 2.188,
      "step": 40658
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.156868577003479,
      "learning_rate": 9.173968985765319e-06,
      "loss": 2.4397,
      "step": 40659
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0919455289840698,
      "learning_rate": 9.173558656805856e-06,
      "loss": 2.3811,
      "step": 40660
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.178220272064209,
      "learning_rate": 9.173148329247436e-06,
      "loss": 2.5648,
      "step": 40661
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.035080075263977,
      "learning_rate": 9.172738003090754e-06,
      "loss": 2.09,
      "step": 40662
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.2170032262802124,
      "learning_rate": 9.172327678336506e-06,
      "loss": 2.4981,
      "step": 40663
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.9754100441932678,
      "learning_rate": 9.171917354985387e-06,
      "loss": 2.3598,
      "step": 40664
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.31838858127594,
      "learning_rate": 9.171507033038093e-06,
      "loss": 2.4401,
      "step": 40665
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.176567792892456,
      "learning_rate": 9.17109671249532e-06,
      "loss": 2.3677,
      "step": 40666
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.2218385934829712,
      "learning_rate": 9.170686393357762e-06,
      "loss": 2.4284,
      "step": 40667
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.098915696144104,
      "learning_rate": 9.17027607562612e-06,
      "loss": 2.3776,
      "step": 40668
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1030784845352173,
      "learning_rate": 9.169865759301084e-06,
      "loss": 2.0147,
      "step": 40669
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.060875415802002,
      "learning_rate": 9.169455444383348e-06,
      "loss": 2.4032,
      "step": 40670
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.2134004831314087,
      "learning_rate": 9.169045130873614e-06,
      "loss": 2.2757,
      "step": 40671
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.2192771434783936,
      "learning_rate": 9.16863481877257e-06,
      "loss": 2.3406,
      "step": 40672
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1179883480072021,
      "learning_rate": 9.168224508080918e-06,
      "loss": 2.3676,
      "step": 40673
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1022800207138062,
      "learning_rate": 9.167814198799352e-06,
      "loss": 2.2315,
      "step": 40674
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0510447025299072,
      "learning_rate": 9.167403890928568e-06,
      "loss": 2.23,
      "step": 40675
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0681703090667725,
      "learning_rate": 9.166993584469259e-06,
      "loss": 2.3378,
      "step": 40676
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1299985647201538,
      "learning_rate": 9.16658327942212e-06,
      "loss": 2.3368,
      "step": 40677
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0312347412109375,
      "learning_rate": 9.166172975787851e-06,
      "loss": 2.2861,
      "step": 40678
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0980572700500488,
      "learning_rate": 9.165762673567144e-06,
      "loss": 2.1682,
      "step": 40679
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0353418588638306,
      "learning_rate": 9.165352372760697e-06,
      "loss": 2.376,
      "step": 40680
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1179457902908325,
      "learning_rate": 9.164942073369205e-06,
      "loss": 2.3447,
      "step": 40681
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.3369743824005127,
      "learning_rate": 9.164531775393365e-06,
      "loss": 2.4538,
      "step": 40682
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.2057799100875854,
      "learning_rate": 9.164121478833868e-06,
      "loss": 2.3444,
      "step": 40683
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1905275583267212,
      "learning_rate": 9.16371118369141e-06,
      "loss": 2.3707,
      "step": 40684
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1298905611038208,
      "learning_rate": 9.16330088996669e-06,
      "loss": 2.2111,
      "step": 40685
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1136515140533447,
      "learning_rate": 9.162890597660402e-06,
      "loss": 2.3353,
      "step": 40686
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.98993319272995,
      "learning_rate": 9.162480306773244e-06,
      "loss": 2.3848,
      "step": 40687
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0944411754608154,
      "learning_rate": 9.162070017305908e-06,
      "loss": 2.4777,
      "step": 40688
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.9839105010032654,
      "learning_rate": 9.16165972925909e-06,
      "loss": 2.2281,
      "step": 40689
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.050747036933899,
      "learning_rate": 9.161249442633486e-06,
      "loss": 2.6286,
      "step": 40690
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.9927340149879456,
      "learning_rate": 9.160839157429793e-06,
      "loss": 2.1458,
      "step": 40691
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0832422971725464,
      "learning_rate": 9.160428873648706e-06,
      "loss": 2.2768,
      "step": 40692
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1820839643478394,
      "learning_rate": 9.160018591290918e-06,
      "loss": 2.4213,
      "step": 40693
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0764219760894775,
      "learning_rate": 9.159608310357134e-06,
      "loss": 2.7968,
      "step": 40694
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0059044361114502,
      "learning_rate": 9.159198030848035e-06,
      "loss": 2.3187,
      "step": 40695
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1255782842636108,
      "learning_rate": 9.158787752764327e-06,
      "loss": 2.3447,
      "step": 40696
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1355735063552856,
      "learning_rate": 9.158377476106699e-06,
      "loss": 2.2884,
      "step": 40697
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1211493015289307,
      "learning_rate": 9.157967200875851e-06,
      "loss": 2.5843,
      "step": 40698
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.2432641983032227,
      "learning_rate": 9.157556927072477e-06,
      "loss": 2.4781,
      "step": 40699
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.9889055490493774,
      "learning_rate": 9.157146654697272e-06,
      "loss": 2.1341,
      "step": 40700
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1774771213531494,
      "learning_rate": 9.156736383750935e-06,
      "loss": 2.557,
      "step": 40701
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0792149305343628,
      "learning_rate": 9.156326114234157e-06,
      "loss": 2.5122,
      "step": 40702
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.9905449151992798,
      "learning_rate": 9.155915846147638e-06,
      "loss": 2.1853,
      "step": 40703
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0464733839035034,
      "learning_rate": 9.15550557949207e-06,
      "loss": 2.4841,
      "step": 40704
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0092275142669678,
      "learning_rate": 9.155095314268147e-06,
      "loss": 2.0862,
      "step": 40705
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0651438236236572,
      "learning_rate": 9.154685050476567e-06,
      "loss": 2.2466,
      "step": 40706
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.02078378200531,
      "learning_rate": 9.15427478811803e-06,
      "loss": 2.3488,
      "step": 40707
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.110050916671753,
      "learning_rate": 9.153864527193222e-06,
      "loss": 2.3625,
      "step": 40708
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0962255001068115,
      "learning_rate": 9.153454267702844e-06,
      "loss": 2.4985,
      "step": 40709
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0661308765411377,
      "learning_rate": 9.153044009647594e-06,
      "loss": 2.3089,
      "step": 40710
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0300081968307495,
      "learning_rate": 9.152633753028163e-06,
      "loss": 2.3797,
      "step": 40711
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.9993073344230652,
      "learning_rate": 9.152223497845248e-06,
      "loss": 2.2681,
      "step": 40712
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0404356718063354,
      "learning_rate": 9.151813244099542e-06,
      "loss": 2.3019,
      "step": 40713
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.2062090635299683,
      "learning_rate": 9.151402991791744e-06,
      "loss": 2.3215,
      "step": 40714
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1482475996017456,
      "learning_rate": 9.150992740922549e-06,
      "loss": 2.4783,
      "step": 40715
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1007847785949707,
      "learning_rate": 9.150582491492652e-06,
      "loss": 2.4611,
      "step": 40716
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0387229919433594,
      "learning_rate": 9.150172243502747e-06,
      "loss": 2.3777,
      "step": 40717
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0495753288269043,
      "learning_rate": 9.149761996953532e-06,
      "loss": 2.4304,
      "step": 40718
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.07858145236969,
      "learning_rate": 9.149351751845702e-06,
      "loss": 2.1529,
      "step": 40719
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.103948950767517,
      "learning_rate": 9.148941508179955e-06,
      "loss": 2.2331,
      "step": 40720
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0055111646652222,
      "learning_rate": 9.148531265956979e-06,
      "loss": 2.3316,
      "step": 40721
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1197248697280884,
      "learning_rate": 9.148121025177474e-06,
      "loss": 2.3732,
      "step": 40722
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1593024730682373,
      "learning_rate": 9.147710785842135e-06,
      "loss": 2.3082,
      "step": 40723
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0735077857971191,
      "learning_rate": 9.14730054795166e-06,
      "loss": 2.2688,
      "step": 40724
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1405593156814575,
      "learning_rate": 9.146890311506739e-06,
      "loss": 2.2325,
      "step": 40725
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4529396295547485,
      "learning_rate": 9.146480076508071e-06,
      "loss": 2.1375,
      "step": 40726
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0118604898452759,
      "learning_rate": 9.146069842956353e-06,
      "loss": 2.4053,
      "step": 40727
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0583691596984863,
      "learning_rate": 9.145659610852276e-06,
      "loss": 2.3715,
      "step": 40728
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0509523153305054,
      "learning_rate": 9.14524938019654e-06,
      "loss": 2.1857,
      "step": 40729
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0141016244888306,
      "learning_rate": 9.144839150989838e-06,
      "loss": 2.0989,
      "step": 40730
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0005316734313965,
      "learning_rate": 9.144428923232868e-06,
      "loss": 2.2526,
      "step": 40731
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0340570211410522,
      "learning_rate": 9.14401869692632e-06,
      "loss": 2.2927,
      "step": 40732
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0402158498764038,
      "learning_rate": 9.143608472070898e-06,
      "loss": 2.3521,
      "step": 40733
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0982173681259155,
      "learning_rate": 9.143198248667286e-06,
      "loss": 2.3393,
      "step": 40734
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1181961297988892,
      "learning_rate": 9.142788026716189e-06,
      "loss": 2.3377,
      "step": 40735
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0212196111679077,
      "learning_rate": 9.142377806218298e-06,
      "loss": 2.2231,
      "step": 40736
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1640112400054932,
      "learning_rate": 9.14196758717431e-06,
      "loss": 2.335,
      "step": 40737
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1492384672164917,
      "learning_rate": 9.14155736958492e-06,
      "loss": 2.2097,
      "step": 40738
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0761758089065552,
      "learning_rate": 9.141147153450823e-06,
      "loss": 2.3751,
      "step": 40739
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0450338125228882,
      "learning_rate": 9.140736938772715e-06,
      "loss": 2.1452,
      "step": 40740
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0389617681503296,
      "learning_rate": 9.140326725551291e-06,
      "loss": 2.2962,
      "step": 40741
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0775501728057861,
      "learning_rate": 9.139916513787246e-06,
      "loss": 2.1797,
      "step": 40742
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0138330459594727,
      "learning_rate": 9.139506303481277e-06,
      "loss": 2.3322,
      "step": 40743
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.9638322591781616,
      "learning_rate": 9.139096094634078e-06,
      "loss": 2.1332,
      "step": 40744
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.018699288368225,
      "learning_rate": 9.138685887246346e-06,
      "loss": 2.2516,
      "step": 40745
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1465662717819214,
      "learning_rate": 9.138275681318773e-06,
      "loss": 2.3854,
      "step": 40746
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1033656597137451,
      "learning_rate": 9.137865476852063e-06,
      "loss": 2.4087,
      "step": 40747
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1264766454696655,
      "learning_rate": 9.1374552738469e-06,
      "loss": 2.2386,
      "step": 40748
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.044510841369629,
      "learning_rate": 9.137045072303987e-06,
      "loss": 2.4146,
      "step": 40749
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.137859582901001,
      "learning_rate": 9.136634872224013e-06,
      "loss": 2.4362,
      "step": 40750
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0763901472091675,
      "learning_rate": 9.136224673607679e-06,
      "loss": 2.2422,
      "step": 40751
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.2673028707504272,
      "learning_rate": 9.13581447645568e-06,
      "loss": 2.5127,
      "step": 40752
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0744954347610474,
      "learning_rate": 9.13540428076871e-06,
      "loss": 2.584,
      "step": 40753
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.2044148445129395,
      "learning_rate": 9.134994086547463e-06,
      "loss": 2.1855,
      "step": 40754
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1263248920440674,
      "learning_rate": 9.134583893792636e-06,
      "loss": 2.1558,
      "step": 40755
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1430680751800537,
      "learning_rate": 9.134173702504928e-06,
      "loss": 2.633,
      "step": 40756
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0361050367355347,
      "learning_rate": 9.133763512685027e-06,
      "loss": 2.4598,
      "step": 40757
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0944815874099731,
      "learning_rate": 9.133353324333633e-06,
      "loss": 2.5123,
      "step": 40758
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1194194555282593,
      "learning_rate": 9.132943137451443e-06,
      "loss": 2.3806,
      "step": 40759
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0433576107025146,
      "learning_rate": 9.132532952039148e-06,
      "loss": 2.3275,
      "step": 40760
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.2541762590408325,
      "learning_rate": 9.132122768097446e-06,
      "loss": 2.1146,
      "step": 40761
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0472337007522583,
      "learning_rate": 9.13171258562703e-06,
      "loss": 2.3291,
      "step": 40762
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0746128559112549,
      "learning_rate": 9.131302404628597e-06,
      "loss": 2.4048,
      "step": 40763
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1710246801376343,
      "learning_rate": 9.130892225102842e-06,
      "loss": 2.3985,
      "step": 40764
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1222189664840698,
      "learning_rate": 9.130482047050462e-06,
      "loss": 2.3227,
      "step": 40765
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.011082649230957,
      "learning_rate": 9.130071870472153e-06,
      "loss": 2.2951,
      "step": 40766
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1634989976882935,
      "learning_rate": 9.129661695368605e-06,
      "loss": 2.324,
      "step": 40767
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0825591087341309,
      "learning_rate": 9.129251521740518e-06,
      "loss": 2.2976,
      "step": 40768
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0484246015548706,
      "learning_rate": 9.128841349588585e-06,
      "loss": 2.5274,
      "step": 40769
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1306660175323486,
      "learning_rate": 9.128431178913505e-06,
      "loss": 2.3926,
      "step": 40770
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1054446697235107,
      "learning_rate": 9.128021009715968e-06,
      "loss": 2.4196,
      "step": 40771
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.9763549566268921,
      "learning_rate": 9.127610841996674e-06,
      "loss": 2.2675,
      "step": 40772
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0023069381713867,
      "learning_rate": 9.12720067575632e-06,
      "loss": 2.2778,
      "step": 40773
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.023155927658081,
      "learning_rate": 9.126790510995593e-06,
      "loss": 2.4177,
      "step": 40774
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.2663936614990234,
      "learning_rate": 9.126380347715195e-06,
      "loss": 2.1548,
      "step": 40775
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1206088066101074,
      "learning_rate": 9.125970185915819e-06,
      "loss": 2.3842,
      "step": 40776
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1196403503417969,
      "learning_rate": 9.12556002559816e-06,
      "loss": 2.3337,
      "step": 40777
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1620923280715942,
      "learning_rate": 9.125149866762915e-06,
      "loss": 2.3518,
      "step": 40778
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.274174451828003,
      "learning_rate": 9.124739709410779e-06,
      "loss": 2.5646,
      "step": 40779
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4662905931472778,
      "learning_rate": 9.124329553542446e-06,
      "loss": 2.2868,
      "step": 40780
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0945161581039429,
      "learning_rate": 9.123919399158613e-06,
      "loss": 2.4409,
      "step": 40781
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.7025294303894043,
      "learning_rate": 9.123509246259974e-06,
      "loss": 2.2267,
      "step": 40782
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0824285745620728,
      "learning_rate": 9.123099094847224e-06,
      "loss": 2.4397,
      "step": 40783
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0772851705551147,
      "learning_rate": 9.12268894492106e-06,
      "loss": 2.3931,
      "step": 40784
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.132436752319336,
      "learning_rate": 9.122278796482177e-06,
      "loss": 2.2362,
      "step": 40785
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.141929268836975,
      "learning_rate": 9.121868649531275e-06,
      "loss": 2.3114,
      "step": 40786
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.045339584350586,
      "learning_rate": 9.121458504069038e-06,
      "loss": 2.2316,
      "step": 40787
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0944828987121582,
      "learning_rate": 9.121048360096168e-06,
      "loss": 2.4133,
      "step": 40788
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0855458974838257,
      "learning_rate": 9.12063821761336e-06,
      "loss": 2.2222,
      "step": 40789
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0599310398101807,
      "learning_rate": 9.120228076621309e-06,
      "loss": 2.271,
      "step": 40790
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1598442792892456,
      "learning_rate": 9.11981793712071e-06,
      "loss": 2.4028,
      "step": 40791
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1159180402755737,
      "learning_rate": 9.11940779911226e-06,
      "loss": 2.3713,
      "step": 40792
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0541881322860718,
      "learning_rate": 9.118997662596653e-06,
      "loss": 2.3911,
      "step": 40793
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0675033330917358,
      "learning_rate": 9.118587527574584e-06,
      "loss": 2.2839,
      "step": 40794
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.163526177406311,
      "learning_rate": 9.118177394046748e-06,
      "loss": 2.3411,
      "step": 40795
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.2202948331832886,
      "learning_rate": 9.11776726201384e-06,
      "loss": 2.3693,
      "step": 40796
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.06242036819458,
      "learning_rate": 9.117357131476557e-06,
      "loss": 2.345,
      "step": 40797
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.2100245952606201,
      "learning_rate": 9.116947002435592e-06,
      "loss": 2.2954,
      "step": 40798
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0662353038787842,
      "learning_rate": 9.116536874891646e-06,
      "loss": 2.3857,
      "step": 40799
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5876353979110718,
      "learning_rate": 9.116126748845408e-06,
      "loss": 2.2874,
      "step": 40800
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1918814182281494,
      "learning_rate": 9.115716624297574e-06,
      "loss": 2.4535,
      "step": 40801
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0201270580291748,
      "learning_rate": 9.11530650124884e-06,
      "loss": 2.2766,
      "step": 40802
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0275752544403076,
      "learning_rate": 9.114896379699902e-06,
      "loss": 2.2374,
      "step": 40803
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1005425453186035,
      "learning_rate": 9.114486259651456e-06,
      "loss": 2.1518,
      "step": 40804
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.143994927406311,
      "learning_rate": 9.114076141104194e-06,
      "loss": 2.3631,
      "step": 40805
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1849974393844604,
      "learning_rate": 9.113666024058816e-06,
      "loss": 2.4329,
      "step": 40806
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0194967985153198,
      "learning_rate": 9.113255908516013e-06,
      "loss": 2.4459,
      "step": 40807
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1463571786880493,
      "learning_rate": 9.112845794476484e-06,
      "loss": 2.2333,
      "step": 40808
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1091679334640503,
      "learning_rate": 9.112435681940922e-06,
      "loss": 2.3756,
      "step": 40809
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.9628368020057678,
      "learning_rate": 9.11202557091002e-06,
      "loss": 2.5673,
      "step": 40810
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.3360981941223145,
      "learning_rate": 9.111615461384478e-06,
      "loss": 2.3251,
      "step": 40811
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.256482481956482,
      "learning_rate": 9.111205353364989e-06,
      "loss": 2.436,
      "step": 40812
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.166054129600525,
      "learning_rate": 9.110795246852252e-06,
      "loss": 2.2621,
      "step": 40813
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0520663261413574,
      "learning_rate": 9.110385141846954e-06,
      "loss": 2.1876,
      "step": 40814
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.210542917251587,
      "learning_rate": 9.109975038349795e-06,
      "loss": 2.3963,
      "step": 40815
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0908607244491577,
      "learning_rate": 9.10956493636147e-06,
      "loss": 2.181,
      "step": 40816
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.120707631111145,
      "learning_rate": 9.109154835882675e-06,
      "loss": 2.4468,
      "step": 40817
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.511088252067566,
      "learning_rate": 9.108744736914105e-06,
      "loss": 2.3662,
      "step": 40818
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0509878396987915,
      "learning_rate": 9.108334639456453e-06,
      "loss": 2.656,
      "step": 40819
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0562207698822021,
      "learning_rate": 9.107924543510417e-06,
      "loss": 2.3626,
      "step": 40820
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.203269600868225,
      "learning_rate": 9.107514449076693e-06,
      "loss": 2.3403,
      "step": 40821
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1910895109176636,
      "learning_rate": 9.107104356155972e-06,
      "loss": 2.3295,
      "step": 40822
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0942713022232056,
      "learning_rate": 9.106694264748952e-06,
      "loss": 2.4673,
      "step": 40823
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.123231291770935,
      "learning_rate": 9.106284174856327e-06,
      "loss": 2.379,
      "step": 40824
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0704835653305054,
      "learning_rate": 9.105874086478795e-06,
      "loss": 2.3207,
      "step": 40825
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.2221819162368774,
      "learning_rate": 9.10546399961705e-06,
      "loss": 2.2668,
      "step": 40826
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0513694286346436,
      "learning_rate": 9.105053914271785e-06,
      "loss": 2.2917,
      "step": 40827
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.9590888023376465,
      "learning_rate": 9.104643830443699e-06,
      "loss": 2.2901,
      "step": 40828
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.9964153170585632,
      "learning_rate": 9.104233748133482e-06,
      "loss": 2.3125,
      "step": 40829
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0363866090774536,
      "learning_rate": 9.103823667341832e-06,
      "loss": 2.5392,
      "step": 40830
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1234989166259766,
      "learning_rate": 9.103413588069445e-06,
      "loss": 2.4792,
      "step": 40831
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.231040120124817,
      "learning_rate": 9.103003510317016e-06,
      "loss": 2.438,
      "step": 40832
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0275107622146606,
      "learning_rate": 9.102593434085238e-06,
      "loss": 2.3749,
      "step": 40833
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.2285622358322144,
      "learning_rate": 9.102183359374808e-06,
      "loss": 2.3204,
      "step": 40834
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.9691668152809143,
      "learning_rate": 9.101773286186421e-06,
      "loss": 2.2004,
      "step": 40835
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0678778886795044,
      "learning_rate": 9.101363214520775e-06,
      "loss": 2.3565,
      "step": 40836
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1821705102920532,
      "learning_rate": 9.10095314437856e-06,
      "loss": 2.2723,
      "step": 40837
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0020759105682373,
      "learning_rate": 9.100543075760473e-06,
      "loss": 2.2845,
      "step": 40838
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.9961031675338745,
      "learning_rate": 9.100133008667215e-06,
      "loss": 2.2558,
      "step": 40839
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0429792404174805,
      "learning_rate": 9.099722943099472e-06,
      "loss": 2.0605,
      "step": 40840
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0348244905471802,
      "learning_rate": 9.099312879057942e-06,
      "loss": 2.1842,
      "step": 40841
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.9938788414001465,
      "learning_rate": 9.098902816543323e-06,
      "loss": 2.3036,
      "step": 40842
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0008772611618042,
      "learning_rate": 9.098492755556307e-06,
      "loss": 2.4169,
      "step": 40843
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0546780824661255,
      "learning_rate": 9.098082696097591e-06,
      "loss": 2.3848,
      "step": 40844
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.021407961845398,
      "learning_rate": 9.097672638167871e-06,
      "loss": 2.5544,
      "step": 40845
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.088420033454895,
      "learning_rate": 9.097262581767839e-06,
      "loss": 2.3618,
      "step": 40846
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.066835641860962,
      "learning_rate": 9.096852526898193e-06,
      "loss": 2.5087,
      "step": 40847
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.8929558992385864,
      "learning_rate": 9.096442473559627e-06,
      "loss": 2.2642,
      "step": 40848
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.025374174118042,
      "learning_rate": 9.096032421752839e-06,
      "loss": 2.4632,
      "step": 40849
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1016860008239746,
      "learning_rate": 9.095622371478518e-06,
      "loss": 2.4455,
      "step": 40850
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.097054362297058,
      "learning_rate": 9.095212322737364e-06,
      "loss": 2.3313,
      "step": 40851
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0531679391860962,
      "learning_rate": 9.094802275530074e-06,
      "loss": 2.1953,
      "step": 40852
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.138954758644104,
      "learning_rate": 9.094392229857336e-06,
      "loss": 2.1487,
      "step": 40853
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0613255500793457,
      "learning_rate": 9.093982185719852e-06,
      "loss": 2.4954,
      "step": 40854
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0347288846969604,
      "learning_rate": 9.093572143118311e-06,
      "loss": 2.5639,
      "step": 40855
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0904326438903809,
      "learning_rate": 9.093162102053415e-06,
      "loss": 2.2106,
      "step": 40856
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.102170705795288,
      "learning_rate": 9.092752062525852e-06,
      "loss": 2.4564,
      "step": 40857
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.2826777696609497,
      "learning_rate": 9.092342024536323e-06,
      "loss": 2.3692,
      "step": 40858
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.9911341071128845,
      "learning_rate": 9.091931988085519e-06,
      "loss": 2.2116,
      "step": 40859
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0478324890136719,
      "learning_rate": 9.091521953174138e-06,
      "loss": 2.1894,
      "step": 40860
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.9361767172813416,
      "learning_rate": 9.091111919802873e-06,
      "loss": 2.253,
      "step": 40861
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.137306809425354,
      "learning_rate": 9.09070188797242e-06,
      "loss": 2.2948,
      "step": 40862
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.2577011585235596,
      "learning_rate": 9.090291857683476e-06,
      "loss": 2.4114,
      "step": 40863
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1216539144515991,
      "learning_rate": 9.089881828936733e-06,
      "loss": 2.3558,
      "step": 40864
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1740813255310059,
      "learning_rate": 9.08947180173289e-06,
      "loss": 2.3755,
      "step": 40865
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1907918453216553,
      "learning_rate": 9.08906177607264e-06,
      "loss": 2.3108,
      "step": 40866
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.086151123046875,
      "learning_rate": 9.088651751956674e-06,
      "loss": 2.3873,
      "step": 40867
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.105849027633667,
      "learning_rate": 9.088241729385691e-06,
      "loss": 2.5505,
      "step": 40868
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1798049211502075,
      "learning_rate": 9.087831708360387e-06,
      "loss": 2.2351,
      "step": 40869
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0373578071594238,
      "learning_rate": 9.087421688881457e-06,
      "loss": 2.2971,
      "step": 40870
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0198777914047241,
      "learning_rate": 9.087011670949593e-06,
      "loss": 2.3776,
      "step": 40871
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1117064952850342,
      "learning_rate": 9.086601654565493e-06,
      "loss": 2.2423,
      "step": 40872
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0245708227157593,
      "learning_rate": 9.086191639729851e-06,
      "loss": 2.4822,
      "step": 40873
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1531907320022583,
      "learning_rate": 9.085781626443362e-06,
      "loss": 2.3254,
      "step": 40874
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.9937134385108948,
      "learning_rate": 9.085371614706723e-06,
      "loss": 2.1304,
      "step": 40875
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.2089850902557373,
      "learning_rate": 9.084961604520625e-06,
      "loss": 2.3157,
      "step": 40876
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1367621421813965,
      "learning_rate": 9.08455159588577e-06,
      "loss": 2.3365,
      "step": 40877
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.9743168354034424,
      "learning_rate": 9.084141588802848e-06,
      "loss": 2.5398,
      "step": 40878
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1451411247253418,
      "learning_rate": 9.083731583272553e-06,
      "loss": 2.3505,
      "step": 40879
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.9992671608924866,
      "learning_rate": 9.083321579295581e-06,
      "loss": 2.6045,
      "step": 40880
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0608900785446167,
      "learning_rate": 9.082911576872628e-06,
      "loss": 2.3167,
      "step": 40881
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.9990739822387695,
      "learning_rate": 9.08250157600439e-06,
      "loss": 2.2657,
      "step": 40882
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0568281412124634,
      "learning_rate": 9.08209157669156e-06,
      "loss": 2.1671,
      "step": 40883
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.967104434967041,
      "learning_rate": 9.081681578934837e-06,
      "loss": 2.329,
      "step": 40884
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0400503873825073,
      "learning_rate": 9.08127158273491e-06,
      "loss": 2.1367,
      "step": 40885
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.039150595664978,
      "learning_rate": 9.080861588092478e-06,
      "loss": 2.21,
      "step": 40886
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1799870729446411,
      "learning_rate": 9.080451595008234e-06,
      "loss": 2.2436,
      "step": 40887
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1665385961532593,
      "learning_rate": 9.080041603482877e-06,
      "loss": 2.2484,
      "step": 40888
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0376276969909668,
      "learning_rate": 9.079631613517097e-06,
      "loss": 2.3974,
      "step": 40889
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.3223282098770142,
      "learning_rate": 9.079221625111591e-06,
      "loss": 2.515,
      "step": 40890
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.053367018699646,
      "learning_rate": 9.078811638267056e-06,
      "loss": 2.3841,
      "step": 40891
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.9813132882118225,
      "learning_rate": 9.07840165298419e-06,
      "loss": 2.4677,
      "step": 40892
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.3129503726959229,
      "learning_rate": 9.077991669263677e-06,
      "loss": 2.5023,
      "step": 40893
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1090657711029053,
      "learning_rate": 9.07758168710622e-06,
      "loss": 2.5074,
      "step": 40894
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.6167172193527222,
      "learning_rate": 9.077171706512513e-06,
      "loss": 2.3078,
      "step": 40895
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1762440204620361,
      "learning_rate": 9.07676172748325e-06,
      "loss": 2.4802,
      "step": 40896
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0169196128845215,
      "learning_rate": 9.076351750019126e-06,
      "loss": 2.5467,
      "step": 40897
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.077316164970398,
      "learning_rate": 9.075941774120838e-06,
      "loss": 2.2628,
      "step": 40898
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1868767738342285,
      "learning_rate": 9.075531799789078e-06,
      "loss": 2.4481,
      "step": 40899
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1002838611602783,
      "learning_rate": 9.075121827024545e-06,
      "loss": 2.3309,
      "step": 40900
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.048636794090271,
      "learning_rate": 9.074711855827929e-06,
      "loss": 2.162,
      "step": 40901
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.070032000541687,
      "learning_rate": 9.07430188619993e-06,
      "loss": 2.2911,
      "step": 40902
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.058372139930725,
      "learning_rate": 9.07389191814124e-06,
      "loss": 2.3269,
      "step": 40903
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.324665904045105,
      "learning_rate": 9.073481951652555e-06,
      "loss": 2.4593,
      "step": 40904
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0557681322097778,
      "learning_rate": 9.073071986734573e-06,
      "loss": 2.317,
      "step": 40905
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5475447177886963,
      "learning_rate": 9.072662023387982e-06,
      "loss": 2.3922,
      "step": 40906
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0995275974273682,
      "learning_rate": 9.07225206161348e-06,
      "loss": 2.28,
      "step": 40907
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.030246376991272,
      "learning_rate": 9.071842101411763e-06,
      "loss": 2.5789,
      "step": 40908
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.9637373685836792,
      "learning_rate": 9.071432142783526e-06,
      "loss": 2.2047,
      "step": 40909
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0259965658187866,
      "learning_rate": 9.071022185729465e-06,
      "loss": 2.2244,
      "step": 40910
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0852341651916504,
      "learning_rate": 9.070612230250274e-06,
      "loss": 2.3546,
      "step": 40911
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.048392415046692,
      "learning_rate": 9.070202276346646e-06,
      "loss": 2.2315,
      "step": 40912
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.074810266494751,
      "learning_rate": 9.069792324019278e-06,
      "loss": 2.02,
      "step": 40913
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.123041033744812,
      "learning_rate": 9.069382373268865e-06,
      "loss": 2.3328,
      "step": 40914
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.046365737915039,
      "learning_rate": 9.0689724240961e-06,
      "loss": 2.6315,
      "step": 40915
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.061716914176941,
      "learning_rate": 9.06856247650168e-06,
      "loss": 2.3018,
      "step": 40916
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.9849188923835754,
      "learning_rate": 9.068152530486302e-06,
      "loss": 2.2161,
      "step": 40917
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.164875864982605,
      "learning_rate": 9.06774258605066e-06,
      "loss": 2.2949,
      "step": 40918
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0465236902236938,
      "learning_rate": 9.067332643195446e-06,
      "loss": 2.1508,
      "step": 40919
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1634608507156372,
      "learning_rate": 9.066922701921355e-06,
      "loss": 2.361,
      "step": 40920
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0218948125839233,
      "learning_rate": 9.066512762229082e-06,
      "loss": 2.3292,
      "step": 40921
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1457664966583252,
      "learning_rate": 9.066102824119325e-06,
      "loss": 2.2463,
      "step": 40922
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0137319564819336,
      "learning_rate": 9.065692887592778e-06,
      "loss": 2.1442,
      "step": 40923
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.2145811319351196,
      "learning_rate": 9.065282952650134e-06,
      "loss": 2.2177,
      "step": 40924
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0802241563796997,
      "learning_rate": 9.06487301929209e-06,
      "loss": 2.2376,
      "step": 40925
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.9998158812522888,
      "learning_rate": 9.06446308751934e-06,
      "loss": 2.4385,
      "step": 40926
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1539918184280396,
      "learning_rate": 9.064053157332579e-06,
      "loss": 2.4377,
      "step": 40927
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1914011240005493,
      "learning_rate": 9.063643228732501e-06,
      "loss": 2.4237,
      "step": 40928
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0016071796417236,
      "learning_rate": 9.063233301719804e-06,
      "loss": 2.3678,
      "step": 40929
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.103773832321167,
      "learning_rate": 9.06282337629518e-06,
      "loss": 2.6287,
      "step": 40930
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.9122059941291809,
      "learning_rate": 9.06241345245933e-06,
      "loss": 2.2502,
      "step": 40931
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1990593671798706,
      "learning_rate": 9.062003530212939e-06,
      "loss": 2.5386,
      "step": 40932
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1082453727722168,
      "learning_rate": 9.061593609556705e-06,
      "loss": 2.2377,
      "step": 40933
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0688369274139404,
      "learning_rate": 9.061183690491327e-06,
      "loss": 2.1778,
      "step": 40934
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.9753151535987854,
      "learning_rate": 9.060773773017496e-06,
      "loss": 2.1922,
      "step": 40935
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1762853860855103,
      "learning_rate": 9.060363857135912e-06,
      "loss": 2.1006,
      "step": 40936
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.9994221925735474,
      "learning_rate": 9.059953942847262e-06,
      "loss": 2.0722,
      "step": 40937
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.2051531076431274,
      "learning_rate": 9.05954403015225e-06,
      "loss": 2.3601,
      "step": 40938
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1374589204788208,
      "learning_rate": 9.059134119051564e-06,
      "loss": 2.507,
      "step": 40939
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.109104871749878,
      "learning_rate": 9.058724209545901e-06,
      "loss": 2.376,
      "step": 40940
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0945795774459839,
      "learning_rate": 9.058314301635957e-06,
      "loss": 2.2141,
      "step": 40941
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0230001211166382,
      "learning_rate": 9.057904395322424e-06,
      "loss": 2.4592,
      "step": 40942
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.3446731567382812,
      "learning_rate": 9.057494490606e-06,
      "loss": 2.3169,
      "step": 40943
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0884466171264648,
      "learning_rate": 9.057084587487381e-06,
      "loss": 2.2166,
      "step": 40944
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.237074851989746,
      "learning_rate": 9.056674685967257e-06,
      "loss": 2.1005,
      "step": 40945
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.9800922870635986,
      "learning_rate": 9.05626478604633e-06,
      "loss": 2.4761,
      "step": 40946
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.06064772605896,
      "learning_rate": 9.055854887725284e-06,
      "loss": 2.4503,
      "step": 40947
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0815068483352661,
      "learning_rate": 9.055444991004825e-06,
      "loss": 2.0928,
      "step": 40948
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0363458395004272,
      "learning_rate": 9.05503509588564e-06,
      "loss": 2.2122,
      "step": 40949
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.073248267173767,
      "learning_rate": 9.054625202368429e-06,
      "loss": 2.2572,
      "step": 40950
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0759111642837524,
      "learning_rate": 9.054215310453884e-06,
      "loss": 2.4273,
      "step": 40951
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0874226093292236,
      "learning_rate": 9.053805420142702e-06,
      "loss": 2.3093,
      "step": 40952
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1791338920593262,
      "learning_rate": 9.053395531435576e-06,
      "loss": 2.1194,
      "step": 40953
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1412042379379272,
      "learning_rate": 9.052985644333203e-06,
      "loss": 2.2482,
      "step": 40954
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1152554750442505,
      "learning_rate": 9.052575758836274e-06,
      "loss": 2.5495,
      "step": 40955
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0885233879089355,
      "learning_rate": 9.052165874945487e-06,
      "loss": 2.4225,
      "step": 40956
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.2107436656951904,
      "learning_rate": 9.051755992661539e-06,
      "loss": 2.3084,
      "step": 40957
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.9896706938743591,
      "learning_rate": 9.051346111985125e-06,
      "loss": 2.2775,
      "step": 40958
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.025202989578247,
      "learning_rate": 9.050936232916932e-06,
      "loss": 2.2297,
      "step": 40959
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1158263683319092,
      "learning_rate": 9.050526355457658e-06,
      "loss": 2.4082,
      "step": 40960
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.9879835247993469,
      "learning_rate": 9.050116479608002e-06,
      "loss": 2.2338,
      "step": 40961
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0627201795578003,
      "learning_rate": 9.049706605368656e-06,
      "loss": 2.3104,
      "step": 40962
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0594779253005981,
      "learning_rate": 9.049296732740317e-06,
      "loss": 2.5889,
      "step": 40963
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0823824405670166,
      "learning_rate": 9.048886861723677e-06,
      "loss": 2.3475,
      "step": 40964
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.057318091392517,
      "learning_rate": 9.04847699231943e-06,
      "loss": 2.2593,
      "step": 40965
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.134971022605896,
      "learning_rate": 9.048067124528277e-06,
      "loss": 2.3691,
      "step": 40966
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.9800230860710144,
      "learning_rate": 9.047657258350907e-06,
      "loss": 2.3263,
      "step": 40967
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.040310263633728,
      "learning_rate": 9.047247393788018e-06,
      "loss": 2.3796,
      "step": 40968
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1519205570220947,
      "learning_rate": 9.0468375308403e-06,
      "loss": 2.3649,
      "step": 40969
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.2680888175964355,
      "learning_rate": 9.046427669508454e-06,
      "loss": 2.3176,
      "step": 40970
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0436500310897827,
      "learning_rate": 9.046017809793174e-06,
      "loss": 2.2475,
      "step": 40971
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0422669649124146,
      "learning_rate": 9.04560795169515e-06,
      "loss": 2.2265,
      "step": 40972
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.981362521648407,
      "learning_rate": 9.04519809521508e-06,
      "loss": 2.6682,
      "step": 40973
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0277278423309326,
      "learning_rate": 9.04478824035366e-06,
      "loss": 2.1449,
      "step": 40974
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1267454624176025,
      "learning_rate": 9.044378387111581e-06,
      "loss": 2.2375,
      "step": 40975
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.048682451248169,
      "learning_rate": 9.043968535489541e-06,
      "loss": 2.198,
      "step": 40976
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.142971396446228,
      "learning_rate": 9.043558685488234e-06,
      "loss": 2.3245,
      "step": 40977
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0667338371276855,
      "learning_rate": 9.043148837108355e-06,
      "loss": 2.342,
      "step": 40978
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.068505883216858,
      "learning_rate": 9.042738990350598e-06,
      "loss": 2.4012,
      "step": 40979
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.073824405670166,
      "learning_rate": 9.042329145215658e-06,
      "loss": 2.5632,
      "step": 40980
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0264092683792114,
      "learning_rate": 9.041919301704231e-06,
      "loss": 2.3558,
      "step": 40981
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.3429746627807617,
      "learning_rate": 9.04150945981701e-06,
      "loss": 2.4165,
      "step": 40982
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.9904412627220154,
      "learning_rate": 9.041099619554693e-06,
      "loss": 2.2052,
      "step": 40983
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1200847625732422,
      "learning_rate": 9.040689780917975e-06,
      "loss": 2.2774,
      "step": 40984
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.9224210977554321,
      "learning_rate": 9.040279943907544e-06,
      "loss": 2.1409,
      "step": 40985
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.042565107345581,
      "learning_rate": 9.0398701085241e-06,
      "loss": 2.292,
      "step": 40986
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.9827699661254883,
      "learning_rate": 9.039460274768336e-06,
      "loss": 2.1049,
      "step": 40987
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0452831983566284,
      "learning_rate": 9.039050442640949e-06,
      "loss": 2.5093,
      "step": 40988
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1844751834869385,
      "learning_rate": 9.038640612142632e-06,
      "loss": 2.2886,
      "step": 40989
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0304133892059326,
      "learning_rate": 9.03823078327408e-06,
      "loss": 2.4878,
      "step": 40990
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1083918809890747,
      "learning_rate": 9.037820956035989e-06,
      "loss": 2.5057,
      "step": 40991
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0563781261444092,
      "learning_rate": 9.037411130429051e-06,
      "loss": 2.4905,
      "step": 40992
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0784910917282104,
      "learning_rate": 9.037001306453963e-06,
      "loss": 2.1334,
      "step": 40993
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0203633308410645,
      "learning_rate": 9.036591484111421e-06,
      "loss": 2.4168,
      "step": 40994
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.9708954095840454,
      "learning_rate": 9.036181663402118e-06,
      "loss": 2.2593,
      "step": 40995
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0331417322158813,
      "learning_rate": 9.035771844326748e-06,
      "loss": 2.3424,
      "step": 40996
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0655046701431274,
      "learning_rate": 9.03536202688601e-06,
      "loss": 2.3827,
      "step": 40997
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0421825647354126,
      "learning_rate": 9.034952211080591e-06,
      "loss": 2.2992,
      "step": 40998
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.10344660282135,
      "learning_rate": 9.03454239691119e-06,
      "loss": 2.4691,
      "step": 40999
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.38051176071167,
      "learning_rate": 9.034132584378503e-06,
      "loss": 2.383,
      "step": 41000
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0641906261444092,
      "learning_rate": 9.033722773483223e-06,
      "loss": 2.3266,
      "step": 41001
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0245310068130493,
      "learning_rate": 9.033312964226049e-06,
      "loss": 2.5052,
      "step": 41002
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.045418620109558,
      "learning_rate": 9.032903156607667e-06,
      "loss": 2.1254,
      "step": 41003
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1781072616577148,
      "learning_rate": 9.03249335062878e-06,
      "loss": 2.455,
      "step": 41004
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.9978067874908447,
      "learning_rate": 9.032083546290079e-06,
      "loss": 2.3594,
      "step": 41005
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1230260133743286,
      "learning_rate": 9.031673743592257e-06,
      "loss": 2.3664,
      "step": 41006
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0917881727218628,
      "learning_rate": 9.031263942536012e-06,
      "loss": 2.4825,
      "step": 41007
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1406335830688477,
      "learning_rate": 9.030854143122038e-06,
      "loss": 2.4957,
      "step": 41008
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.067594289779663,
      "learning_rate": 9.03044434535103e-06,
      "loss": 2.2897,
      "step": 41009
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0807595252990723,
      "learning_rate": 9.030034549223686e-06,
      "loss": 2.1865,
      "step": 41010
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.2588884830474854,
      "learning_rate": 9.029624754740693e-06,
      "loss": 2.3051,
      "step": 41011
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1194543838500977,
      "learning_rate": 9.029214961902748e-06,
      "loss": 2.3672,
      "step": 41012
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0046162605285645,
      "learning_rate": 9.028805170710549e-06,
      "loss": 2.1972,
      "step": 41013
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.052361011505127,
      "learning_rate": 9.028395381164788e-06,
      "loss": 2.2525,
      "step": 41014
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0146691799163818,
      "learning_rate": 9.02798559326616e-06,
      "loss": 2.3096,
      "step": 41015
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0563291311264038,
      "learning_rate": 9.027575807015363e-06,
      "loss": 2.608,
      "step": 41016
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0952181816101074,
      "learning_rate": 9.027166022413087e-06,
      "loss": 2.5288,
      "step": 41017
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.7588540315628052,
      "learning_rate": 9.02675623946003e-06,
      "loss": 2.1762,
      "step": 41018
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0318629741668701,
      "learning_rate": 9.026346458156885e-06,
      "loss": 2.3793,
      "step": 41019
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1631990671157837,
      "learning_rate": 9.025936678504346e-06,
      "loss": 2.304,
      "step": 41020
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.049880862236023,
      "learning_rate": 9.025526900503111e-06,
      "loss": 2.4117,
      "step": 41021
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0055385828018188,
      "learning_rate": 9.025117124153872e-06,
      "loss": 2.2818,
      "step": 41022
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1300352811813354,
      "learning_rate": 9.024707349457328e-06,
      "loss": 2.3687,
      "step": 41023
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.2406580448150635,
      "learning_rate": 9.024297576414167e-06,
      "loss": 2.4457,
      "step": 41024
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1063297986984253,
      "learning_rate": 9.023887805025085e-06,
      "loss": 2.6023,
      "step": 41025
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1738489866256714,
      "learning_rate": 9.023478035290778e-06,
      "loss": 2.4555,
      "step": 41026
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0287644863128662,
      "learning_rate": 9.023068267211941e-06,
      "loss": 2.2665,
      "step": 41027
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0677406787872314,
      "learning_rate": 9.02265850078927e-06,
      "loss": 2.2943,
      "step": 41028
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1527701616287231,
      "learning_rate": 9.022248736023456e-06,
      "loss": 2.4179,
      "step": 41029
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.3749899864196777,
      "learning_rate": 9.0218389729152e-06,
      "loss": 2.4898,
      "step": 41030
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.08746337890625,
      "learning_rate": 9.021429211465191e-06,
      "loss": 2.4711,
      "step": 41031
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1745203733444214,
      "learning_rate": 9.021019451674125e-06,
      "loss": 2.2113,
      "step": 41032
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4561718702316284,
      "learning_rate": 9.020609693542696e-06,
      "loss": 2.4324,
      "step": 41033
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0434062480926514,
      "learning_rate": 9.020199937071599e-06,
      "loss": 2.3105,
      "step": 41034
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0610169172286987,
      "learning_rate": 9.01979018226153e-06,
      "loss": 2.3454,
      "step": 41035
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.059367299079895,
      "learning_rate": 9.019380429113182e-06,
      "loss": 2.1779,
      "step": 41036
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1122642755508423,
      "learning_rate": 9.018970677627256e-06,
      "loss": 2.058,
      "step": 41037
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0682710409164429,
      "learning_rate": 9.018560927804438e-06,
      "loss": 2.5076,
      "step": 41038
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.2564520835876465,
      "learning_rate": 9.018151179645424e-06,
      "loss": 2.2062,
      "step": 41039
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.10456383228302,
      "learning_rate": 9.01774143315091e-06,
      "loss": 2.5303,
      "step": 41040
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.217810869216919,
      "learning_rate": 9.017331688321593e-06,
      "loss": 2.3467,
      "step": 41041
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.145922303199768,
      "learning_rate": 9.016921945158164e-06,
      "loss": 2.2474,
      "step": 41042
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.07899010181427,
      "learning_rate": 9.016512203661319e-06,
      "loss": 2.3305,
      "step": 41043
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0453083515167236,
      "learning_rate": 9.016102463831754e-06,
      "loss": 2.3055,
      "step": 41044
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0513508319854736,
      "learning_rate": 9.015692725670162e-06,
      "loss": 2.4581,
      "step": 41045
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4058411121368408,
      "learning_rate": 9.015282989177238e-06,
      "loss": 2.3001,
      "step": 41046
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1483721733093262,
      "learning_rate": 9.014873254353677e-06,
      "loss": 2.3286,
      "step": 41047
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.023013949394226,
      "learning_rate": 9.014463521200176e-06,
      "loss": 2.4175,
      "step": 41048
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0865492820739746,
      "learning_rate": 9.014053789717423e-06,
      "loss": 2.5775,
      "step": 41049
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0451184511184692,
      "learning_rate": 9.013644059906122e-06,
      "loss": 2.3443,
      "step": 41050
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0622562170028687,
      "learning_rate": 9.01323433176696e-06,
      "loss": 2.196,
      "step": 41051
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.086954116821289,
      "learning_rate": 9.012824605300632e-06,
      "loss": 2.3284,
      "step": 41052
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1144366264343262,
      "learning_rate": 9.012414880507834e-06,
      "loss": 2.1257,
      "step": 41053
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0210158824920654,
      "learning_rate": 9.012005157389262e-06,
      "loss": 2.1757,
      "step": 41054
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0011608600616455,
      "learning_rate": 9.01159543594561e-06,
      "loss": 2.529,
      "step": 41055
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.9710819125175476,
      "learning_rate": 9.01118571617757e-06,
      "loss": 2.4945,
      "step": 41056
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.052796721458435,
      "learning_rate": 9.010775998085844e-06,
      "loss": 2.3739,
      "step": 41057
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1515644788742065,
      "learning_rate": 9.010366281671116e-06,
      "loss": 2.3752,
      "step": 41058
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.9770220518112183,
      "learning_rate": 9.009956566934088e-06,
      "loss": 2.4339,
      "step": 41059
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.002892017364502,
      "learning_rate": 9.009546853875452e-06,
      "loss": 2.3067,
      "step": 41060
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0351742506027222,
      "learning_rate": 9.009137142495903e-06,
      "loss": 2.316,
      "step": 41061
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.2536300420761108,
      "learning_rate": 9.008727432796136e-06,
      "loss": 2.3035,
      "step": 41062
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0512696504592896,
      "learning_rate": 9.008317724776848e-06,
      "loss": 2.1902,
      "step": 41063
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0360568761825562,
      "learning_rate": 9.007908018438729e-06,
      "loss": 2.2848,
      "step": 41064
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0186400413513184,
      "learning_rate": 9.007498313782474e-06,
      "loss": 2.115,
      "step": 41065
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0042941570281982,
      "learning_rate": 9.007088610808778e-06,
      "loss": 2.2563,
      "step": 41066
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0548239946365356,
      "learning_rate": 9.006678909518339e-06,
      "loss": 2.1337,
      "step": 41067
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0702934265136719,
      "learning_rate": 9.006269209911846e-06,
      "loss": 2.5546,
      "step": 41068
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.063137173652649,
      "learning_rate": 9.005859511989998e-06,
      "loss": 2.3164,
      "step": 41069
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0310031175613403,
      "learning_rate": 9.005449815753489e-06,
      "loss": 2.3143,
      "step": 41070
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.098060131072998,
      "learning_rate": 9.005040121203012e-06,
      "loss": 2.51,
      "step": 41071
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.025212287902832,
      "learning_rate": 9.004630428339261e-06,
      "loss": 2.17,
      "step": 41072
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.298264503479004,
      "learning_rate": 9.004220737162934e-06,
      "loss": 2.4523,
      "step": 41073
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.9702044129371643,
      "learning_rate": 9.003811047674723e-06,
      "loss": 2.3816,
      "step": 41074
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1176786422729492,
      "learning_rate": 9.003401359875321e-06,
      "loss": 2.594,
      "step": 41075
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1132533550262451,
      "learning_rate": 9.00299167376543e-06,
      "loss": 2.2697,
      "step": 41076
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.9833523035049438,
      "learning_rate": 9.002581989345735e-06,
      "loss": 2.2371,
      "step": 41077
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1257238388061523,
      "learning_rate": 9.002172306616933e-06,
      "loss": 2.4132,
      "step": 41078
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1954340934753418,
      "learning_rate": 9.00176262557972e-06,
      "loss": 2.2855,
      "step": 41079
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1096395254135132,
      "learning_rate": 9.001352946234792e-06,
      "loss": 2.2193,
      "step": 41080
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.076425552368164,
      "learning_rate": 9.000943268582841e-06,
      "loss": 2.5064,
      "step": 41081
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.06809401512146,
      "learning_rate": 9.000533592624563e-06,
      "loss": 2.2779,
      "step": 41082
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.2412974834442139,
      "learning_rate": 9.000123918360652e-06,
      "loss": 2.2797,
      "step": 41083
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0709136724472046,
      "learning_rate": 8.9997142457918e-06,
      "loss": 2.3642,
      "step": 41084
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.023505687713623,
      "learning_rate": 8.999304574918708e-06,
      "loss": 2.3042,
      "step": 41085
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.2344496250152588,
      "learning_rate": 8.998894905742066e-06,
      "loss": 2.2625,
      "step": 41086
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.3347898721694946,
      "learning_rate": 8.998485238262568e-06,
      "loss": 2.434,
      "step": 41087
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0810534954071045,
      "learning_rate": 8.998075572480908e-06,
      "loss": 2.3452,
      "step": 41088
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1235243082046509,
      "learning_rate": 8.997665908397787e-06,
      "loss": 2.3699,
      "step": 41089
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0915641784667969,
      "learning_rate": 8.99725624601389e-06,
      "loss": 2.2167,
      "step": 41090
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0139983892440796,
      "learning_rate": 8.996846585329917e-06,
      "loss": 2.2879,
      "step": 41091
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.132425308227539,
      "learning_rate": 8.996436926346564e-06,
      "loss": 2.3113,
      "step": 41092
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1035997867584229,
      "learning_rate": 8.99602726906452e-06,
      "loss": 2.3178,
      "step": 41093
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0527641773223877,
      "learning_rate": 8.995617613484484e-06,
      "loss": 2.4129,
      "step": 41094
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.9946426749229431,
      "learning_rate": 8.995207959607147e-06,
      "loss": 2.2487,
      "step": 41095
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0861573219299316,
      "learning_rate": 8.994798307433205e-06,
      "loss": 2.239,
      "step": 41096
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1538177728652954,
      "learning_rate": 8.994388656963354e-06,
      "loss": 2.3016,
      "step": 41097
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1333829164505005,
      "learning_rate": 8.993979008198288e-06,
      "loss": 2.2327,
      "step": 41098
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0422699451446533,
      "learning_rate": 8.993569361138699e-06,
      "loss": 2.2832,
      "step": 41099
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.9802418351173401,
      "learning_rate": 8.993159715785285e-06,
      "loss": 2.3787,
      "step": 41100
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0159984827041626,
      "learning_rate": 8.992750072138738e-06,
      "loss": 2.2158,
      "step": 41101
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0983974933624268,
      "learning_rate": 8.992340430199755e-06,
      "loss": 2.33,
      "step": 41102
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.174813151359558,
      "learning_rate": 8.99193078996903e-06,
      "loss": 2.3857,
      "step": 41103
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.3222826719284058,
      "learning_rate": 8.991521151447254e-06,
      "loss": 2.5565,
      "step": 41104
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1578484773635864,
      "learning_rate": 8.99111151463512e-06,
      "loss": 2.4307,
      "step": 41105
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1148368120193481,
      "learning_rate": 8.99070187953333e-06,
      "loss": 2.2061,
      "step": 41106
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1294026374816895,
      "learning_rate": 8.990292246142573e-06,
      "loss": 2.4189,
      "step": 41107
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.9794171452522278,
      "learning_rate": 8.989882614463545e-06,
      "loss": 2.3922,
      "step": 41108
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.013167142868042,
      "learning_rate": 8.989472984496941e-06,
      "loss": 2.3547,
      "step": 41109
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.9635435938835144,
      "learning_rate": 8.989063356243454e-06,
      "loss": 2.0515,
      "step": 41110
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.118546724319458,
      "learning_rate": 8.988653729703779e-06,
      "loss": 2.1722,
      "step": 41111
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0094001293182373,
      "learning_rate": 8.98824410487861e-06,
      "loss": 2.4482,
      "step": 41112
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.9218077659606934,
      "learning_rate": 8.987834481768644e-06,
      "loss": 2.4437,
      "step": 41113
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1608614921569824,
      "learning_rate": 8.987424860374574e-06,
      "loss": 2.6111,
      "step": 41114
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0597115755081177,
      "learning_rate": 8.987015240697092e-06,
      "loss": 2.1545,
      "step": 41115
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.9409583210945129,
      "learning_rate": 8.986605622736897e-06,
      "loss": 2.3363,
      "step": 41116
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0853264331817627,
      "learning_rate": 8.986196006494677e-06,
      "loss": 2.3269,
      "step": 41117
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0936293601989746,
      "learning_rate": 8.985786391971132e-06,
      "loss": 2.476,
      "step": 41118
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.105103850364685,
      "learning_rate": 8.985376779166953e-06,
      "loss": 2.3169,
      "step": 41119
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.099522352218628,
      "learning_rate": 8.984967168082839e-06,
      "loss": 2.5595,
      "step": 41120
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1444264650344849,
      "learning_rate": 8.984557558719479e-06,
      "loss": 2.5264,
      "step": 41121
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.9931246638298035,
      "learning_rate": 8.98414795107757e-06,
      "loss": 2.2534,
      "step": 41122
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.9647390842437744,
      "learning_rate": 8.983738345157805e-06,
      "loss": 2.3423,
      "step": 41123
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.2139097452163696,
      "learning_rate": 8.983328740960882e-06,
      "loss": 2.5327,
      "step": 41124
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0538066625595093,
      "learning_rate": 8.98291913848749e-06,
      "loss": 2.3547,
      "step": 41125
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.3042783737182617,
      "learning_rate": 8.982509537738328e-06,
      "loss": 2.24,
      "step": 41126
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.9923263192176819,
      "learning_rate": 8.982099938714088e-06,
      "loss": 2.3027,
      "step": 41127
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0852930545806885,
      "learning_rate": 8.981690341415466e-06,
      "loss": 2.3638,
      "step": 41128
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.025876760482788,
      "learning_rate": 8.981280745843158e-06,
      "loss": 2.358,
      "step": 41129
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0206010341644287,
      "learning_rate": 8.980871151997853e-06,
      "loss": 2.2146,
      "step": 41130
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.107468843460083,
      "learning_rate": 8.980461559880248e-06,
      "loss": 2.4074,
      "step": 41131
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1315675973892212,
      "learning_rate": 8.980051969491036e-06,
      "loss": 2.3367,
      "step": 41132
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0394277572631836,
      "learning_rate": 8.979642380830915e-06,
      "loss": 2.4632,
      "step": 41133
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0216140747070312,
      "learning_rate": 8.979232793900577e-06,
      "loss": 2.2128,
      "step": 41134
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1603424549102783,
      "learning_rate": 8.978823208700716e-06,
      "loss": 2.184,
      "step": 41135
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0389823913574219,
      "learning_rate": 8.978413625232028e-06,
      "loss": 2.1413,
      "step": 41136
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.187625527381897,
      "learning_rate": 8.978004043495205e-06,
      "loss": 2.4213,
      "step": 41137
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0790379047393799,
      "learning_rate": 8.977594463490942e-06,
      "loss": 2.296,
      "step": 41138
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1496589183807373,
      "learning_rate": 8.977184885219936e-06,
      "loss": 2.214,
      "step": 41139
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.9901059865951538,
      "learning_rate": 8.976775308682879e-06,
      "loss": 2.2145,
      "step": 41140
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.144937515258789,
      "learning_rate": 8.976365733880467e-06,
      "loss": 2.4237,
      "step": 41141
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0582858324050903,
      "learning_rate": 8.975956160813394e-06,
      "loss": 2.4015,
      "step": 41142
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0607564449310303,
      "learning_rate": 8.975546589482352e-06,
      "loss": 2.5461,
      "step": 41143
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1281489133834839,
      "learning_rate": 8.975137019888037e-06,
      "loss": 2.2888,
      "step": 41144
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1597576141357422,
      "learning_rate": 8.97472745203114e-06,
      "loss": 2.363,
      "step": 41145
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.043960452079773,
      "learning_rate": 8.97431788591236e-06,
      "loss": 2.2537,
      "step": 41146
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1337720155715942,
      "learning_rate": 8.973908321532391e-06,
      "loss": 2.5811,
      "step": 41147
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.2453584671020508,
      "learning_rate": 8.973498758891928e-06,
      "loss": 2.2594,
      "step": 41148
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1429016590118408,
      "learning_rate": 8.97308919799166e-06,
      "loss": 2.3415,
      "step": 41149
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0522522926330566,
      "learning_rate": 8.972679638832285e-06,
      "loss": 2.4142,
      "step": 41150
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.2995649576187134,
      "learning_rate": 8.972270081414498e-06,
      "loss": 2.1602,
      "step": 41151
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0251824855804443,
      "learning_rate": 8.971860525738992e-06,
      "loss": 2.4835,
      "step": 41152
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.96692955493927,
      "learning_rate": 8.97145097180646e-06,
      "loss": 2.3886,
      "step": 41153
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.990612268447876,
      "learning_rate": 8.9710414196176e-06,
      "loss": 2.4568,
      "step": 41154
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0904384851455688,
      "learning_rate": 8.970631869173108e-06,
      "loss": 2.6632,
      "step": 41155
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1073882579803467,
      "learning_rate": 8.97022232047367e-06,
      "loss": 2.4036,
      "step": 41156
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1299644708633423,
      "learning_rate": 8.969812773519984e-06,
      "loss": 2.2538,
      "step": 41157
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.135099172592163,
      "learning_rate": 8.969403228312746e-06,
      "loss": 2.3213,
      "step": 41158
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.056064486503601,
      "learning_rate": 8.968993684852651e-06,
      "loss": 2.5184,
      "step": 41159
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0387336015701294,
      "learning_rate": 8.96858414314039e-06,
      "loss": 2.5516,
      "step": 41160
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1765968799591064,
      "learning_rate": 8.968174603176658e-06,
      "loss": 2.3616,
      "step": 41161
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0273224115371704,
      "learning_rate": 8.967765064962152e-06,
      "loss": 2.2579,
      "step": 41162
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.9870483875274658,
      "learning_rate": 8.967355528497564e-06,
      "loss": 2.2906,
      "step": 41163
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1279668807983398,
      "learning_rate": 8.96694599378359e-06,
      "loss": 2.1261,
      "step": 41164
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0300490856170654,
      "learning_rate": 8.96653646082092e-06,
      "loss": 2.1955,
      "step": 41165
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0638084411621094,
      "learning_rate": 8.966126929610255e-06,
      "loss": 2.2587,
      "step": 41166
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0290836095809937,
      "learning_rate": 8.965717400152284e-06,
      "loss": 2.1857,
      "step": 41167
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1234279870986938,
      "learning_rate": 8.965307872447708e-06,
      "loss": 2.3801,
      "step": 41168
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.2681645154953003,
      "learning_rate": 8.96489834649721e-06,
      "loss": 2.3904,
      "step": 41169
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.5364396572113037,
      "learning_rate": 8.964488822301492e-06,
      "loss": 2.3569,
      "step": 41170
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0520848035812378,
      "learning_rate": 8.964079299861246e-06,
      "loss": 2.3769,
      "step": 41171
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0874710083007812,
      "learning_rate": 8.963669779177167e-06,
      "loss": 2.3586,
      "step": 41172
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.2047370672225952,
      "learning_rate": 8.96326026024995e-06,
      "loss": 2.4174,
      "step": 41173
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.3500134944915771,
      "learning_rate": 8.962850743080287e-06,
      "loss": 2.4643,
      "step": 41174
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.098153829574585,
      "learning_rate": 8.962441227668876e-06,
      "loss": 2.3829,
      "step": 41175
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1221843957901,
      "learning_rate": 8.962031714016407e-06,
      "loss": 2.4029,
      "step": 41176
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1236004829406738,
      "learning_rate": 8.961622202123578e-06,
      "loss": 2.2901,
      "step": 41177
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0833325386047363,
      "learning_rate": 8.96121269199108e-06,
      "loss": 2.3656,
      "step": 41178
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1539851427078247,
      "learning_rate": 8.960803183619608e-06,
      "loss": 2.4422,
      "step": 41179
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0352245569229126,
      "learning_rate": 8.960393677009857e-06,
      "loss": 2.3707,
      "step": 41180
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0988128185272217,
      "learning_rate": 8.959984172162523e-06,
      "loss": 2.2547,
      "step": 41181
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0999438762664795,
      "learning_rate": 8.9595746690783e-06,
      "loss": 2.385,
      "step": 41182
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.033898115158081,
      "learning_rate": 8.959165167757879e-06,
      "loss": 2.3137,
      "step": 41183
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0462309122085571,
      "learning_rate": 8.958755668201952e-06,
      "loss": 2.0876,
      "step": 41184
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1363166570663452,
      "learning_rate": 8.958346170411219e-06,
      "loss": 2.4721,
      "step": 41185
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.100180745124817,
      "learning_rate": 8.957936674386373e-06,
      "loss": 2.2562,
      "step": 41186
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0660561323165894,
      "learning_rate": 8.957527180128106e-06,
      "loss": 2.375,
      "step": 41187
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0161545276641846,
      "learning_rate": 8.957117687637114e-06,
      "loss": 2.301,
      "step": 41188
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0329575538635254,
      "learning_rate": 8.956708196914092e-06,
      "loss": 2.3383,
      "step": 41189
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0608227252960205,
      "learning_rate": 8.956298707959731e-06,
      "loss": 2.417,
      "step": 41190
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1373573541641235,
      "learning_rate": 8.955889220774728e-06,
      "loss": 2.492,
      "step": 41191
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0042967796325684,
      "learning_rate": 8.955479735359776e-06,
      "loss": 2.3125,
      "step": 41192
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1841906309127808,
      "learning_rate": 8.955070251715569e-06,
      "loss": 2.2253,
      "step": 41193
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1421948671340942,
      "learning_rate": 8.954660769842804e-06,
      "loss": 2.2296,
      "step": 41194
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.048827052116394,
      "learning_rate": 8.954251289742176e-06,
      "loss": 2.447,
      "step": 41195
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0044786930084229,
      "learning_rate": 8.953841811414371e-06,
      "loss": 2.3141,
      "step": 41196
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0934579372406006,
      "learning_rate": 8.953432334860089e-06,
      "loss": 2.3625,
      "step": 41197
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.002858281135559,
      "learning_rate": 8.953022860080023e-06,
      "loss": 2.3879,
      "step": 41198
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1249723434448242,
      "learning_rate": 8.952613387074868e-06,
      "loss": 2.2343,
      "step": 41199
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.152154564857483,
      "learning_rate": 8.952203915845318e-06,
      "loss": 2.2498,
      "step": 41200
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.9644744396209717,
      "learning_rate": 8.951794446392065e-06,
      "loss": 2.4057,
      "step": 41201
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.095655918121338,
      "learning_rate": 8.951384978715806e-06,
      "loss": 2.4823,
      "step": 41202
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0655213594436646,
      "learning_rate": 8.950975512817239e-06,
      "loss": 2.3827,
      "step": 41203
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1165257692337036,
      "learning_rate": 8.95056604869705e-06,
      "loss": 2.4706,
      "step": 41204
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1069419384002686,
      "learning_rate": 8.950156586355934e-06,
      "loss": 2.2738,
      "step": 41205
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.9992989897727966,
      "learning_rate": 8.94974712579459e-06,
      "loss": 2.2701,
      "step": 41206
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.118718147277832,
      "learning_rate": 8.94933766701371e-06,
      "loss": 2.4145,
      "step": 41207
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0030080080032349,
      "learning_rate": 8.94892821001399e-06,
      "loss": 2.4967,
      "step": 41208
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.088690996170044,
      "learning_rate": 8.94851875479612e-06,
      "loss": 2.3675,
      "step": 41209
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.9831352829933167,
      "learning_rate": 8.948109301360797e-06,
      "loss": 2.4701,
      "step": 41210
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1575372219085693,
      "learning_rate": 8.947699849708714e-06,
      "loss": 2.6311,
      "step": 41211
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1107733249664307,
      "learning_rate": 8.947290399840565e-06,
      "loss": 2.3788,
      "step": 41212
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0735396146774292,
      "learning_rate": 8.946880951757044e-06,
      "loss": 2.1848,
      "step": 41213
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.192142128944397,
      "learning_rate": 8.946471505458846e-06,
      "loss": 2.436,
      "step": 41214
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0722237825393677,
      "learning_rate": 8.946062060946665e-06,
      "loss": 2.268,
      "step": 41215
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.129119634628296,
      "learning_rate": 8.945652618221196e-06,
      "loss": 2.4917,
      "step": 41216
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.3284626007080078,
      "learning_rate": 8.945243177283132e-06,
      "loss": 2.4805,
      "step": 41217
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.3105859756469727,
      "learning_rate": 8.944833738133167e-06,
      "loss": 2.3375,
      "step": 41218
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.178601622581482,
      "learning_rate": 8.944424300771995e-06,
      "loss": 2.6075,
      "step": 41219
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.947385311126709,
      "learning_rate": 8.944014865200311e-06,
      "loss": 2.3988,
      "step": 41220
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1548701524734497,
      "learning_rate": 8.943605431418812e-06,
      "loss": 2.1308,
      "step": 41221
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0009405612945557,
      "learning_rate": 8.943195999428185e-06,
      "loss": 2.3459,
      "step": 41222
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0490349531173706,
      "learning_rate": 8.942786569229128e-06,
      "loss": 2.1574,
      "step": 41223
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0278791189193726,
      "learning_rate": 8.942377140822334e-06,
      "loss": 2.6512,
      "step": 41224
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0676960945129395,
      "learning_rate": 8.9419677142085e-06,
      "loss": 2.4663,
      "step": 41225
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.976593554019928,
      "learning_rate": 8.941558289388316e-06,
      "loss": 2.5014,
      "step": 41226
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4597469568252563,
      "learning_rate": 8.941148866362479e-06,
      "loss": 2.3025,
      "step": 41227
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1207146644592285,
      "learning_rate": 8.940739445131683e-06,
      "loss": 2.1888,
      "step": 41228
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1071653366088867,
      "learning_rate": 8.94033002569662e-06,
      "loss": 2.4925,
      "step": 41229
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1054116487503052,
      "learning_rate": 8.939920608057985e-06,
      "loss": 2.4696,
      "step": 41230
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1185734272003174,
      "learning_rate": 8.939511192216477e-06,
      "loss": 2.1614,
      "step": 41231
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1720315217971802,
      "learning_rate": 8.939101778172782e-06,
      "loss": 2.3729,
      "step": 41232
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0271058082580566,
      "learning_rate": 8.938692365927597e-06,
      "loss": 2.2914,
      "step": 41233
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.023545265197754,
      "learning_rate": 8.93828295548162e-06,
      "loss": 2.4566,
      "step": 41234
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1379926204681396,
      "learning_rate": 8.937873546835539e-06,
      "loss": 2.2434,
      "step": 41235
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1163002252578735,
      "learning_rate": 8.93746413999005e-06,
      "loss": 2.4998,
      "step": 41236
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1196380853652954,
      "learning_rate": 8.937054734945849e-06,
      "loss": 2.4291,
      "step": 41237
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.2257264852523804,
      "learning_rate": 8.936645331703631e-06,
      "loss": 2.2992,
      "step": 41238
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0806496143341064,
      "learning_rate": 8.936235930264087e-06,
      "loss": 2.4545,
      "step": 41239
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.062986135482788,
      "learning_rate": 8.935826530627911e-06,
      "loss": 2.2941,
      "step": 41240
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0623425245285034,
      "learning_rate": 8.935417132795797e-06,
      "loss": 2.1093,
      "step": 41241
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.2826683521270752,
      "learning_rate": 8.93500773676844e-06,
      "loss": 2.3173,
      "step": 41242
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0321546792984009,
      "learning_rate": 8.934598342546536e-06,
      "loss": 2.4004,
      "step": 41243
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.9761255979537964,
      "learning_rate": 8.934188950130776e-06,
      "loss": 2.4913,
      "step": 41244
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0216014385223389,
      "learning_rate": 8.933779559521855e-06,
      "loss": 2.2991,
      "step": 41245
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0985251665115356,
      "learning_rate": 8.933370170720467e-06,
      "loss": 2.3323,
      "step": 41246
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1122851371765137,
      "learning_rate": 8.932960783727307e-06,
      "loss": 2.3571,
      "step": 41247
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.062615990638733,
      "learning_rate": 8.932551398543072e-06,
      "loss": 2.4318,
      "step": 41248
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.9968454837799072,
      "learning_rate": 8.93214201516845e-06,
      "loss": 2.096,
      "step": 41249
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.9957227110862732,
      "learning_rate": 8.931732633604135e-06,
      "loss": 2.0919,
      "step": 41250
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0317349433898926,
      "learning_rate": 8.931323253850822e-06,
      "loss": 2.3288,
      "step": 41251
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.083298921585083,
      "learning_rate": 8.93091387590921e-06,
      "loss": 2.1243,
      "step": 41252
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.9805769324302673,
      "learning_rate": 8.930504499779988e-06,
      "loss": 2.3145,
      "step": 41253
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1077539920806885,
      "learning_rate": 8.93009512546385e-06,
      "loss": 2.4361,
      "step": 41254
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0365053415298462,
      "learning_rate": 8.929685752961493e-06,
      "loss": 2.5053,
      "step": 41255
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.2323518991470337,
      "learning_rate": 8.929276382273608e-06,
      "loss": 2.4695,
      "step": 41256
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.2218713760375977,
      "learning_rate": 8.928867013400891e-06,
      "loss": 2.2873,
      "step": 41257
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0279372930526733,
      "learning_rate": 8.928457646344035e-06,
      "loss": 2.3296,
      "step": 41258
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0609526634216309,
      "learning_rate": 8.928048281103738e-06,
      "loss": 2.3189,
      "step": 41259
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0573087930679321,
      "learning_rate": 8.927638917680687e-06,
      "loss": 2.474,
      "step": 41260
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1524335145950317,
      "learning_rate": 8.92722955607558e-06,
      "loss": 2.4779,
      "step": 41261
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.9949278831481934,
      "learning_rate": 8.92682019628911e-06,
      "loss": 2.3679,
      "step": 41262
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0668365955352783,
      "learning_rate": 8.926410838321971e-06,
      "loss": 2.4629,
      "step": 41263
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0326664447784424,
      "learning_rate": 8.926001482174857e-06,
      "loss": 2.3138,
      "step": 41264
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1229641437530518,
      "learning_rate": 8.925592127848462e-06,
      "loss": 2.2451,
      "step": 41265
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0827659368515015,
      "learning_rate": 8.925182775343483e-06,
      "loss": 2.3047,
      "step": 41266
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.281340479850769,
      "learning_rate": 8.92477342466061e-06,
      "loss": 2.4563,
      "step": 41267
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1087286472320557,
      "learning_rate": 8.924364075800535e-06,
      "loss": 2.4687,
      "step": 41268
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4735220670700073,
      "learning_rate": 8.923954728763957e-06,
      "loss": 2.3471,
      "step": 41269
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.9772441983222961,
      "learning_rate": 8.923545383551567e-06,
      "loss": 2.5479,
      "step": 41270
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.064183235168457,
      "learning_rate": 8.92313604016406e-06,
      "loss": 2.0985,
      "step": 41271
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.998569667339325,
      "learning_rate": 8.92272669860213e-06,
      "loss": 2.1943,
      "step": 41272
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1315723657608032,
      "learning_rate": 8.922317358866471e-06,
      "loss": 2.4976,
      "step": 41273
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0214581489562988,
      "learning_rate": 8.92190802095778e-06,
      "loss": 2.6032,
      "step": 41274
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.024411916732788,
      "learning_rate": 8.921498684876747e-06,
      "loss": 2.4294,
      "step": 41275
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.2062604427337646,
      "learning_rate": 8.921089350624064e-06,
      "loss": 2.433,
      "step": 41276
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.943692147731781,
      "learning_rate": 8.920680018200427e-06,
      "loss": 2.2644,
      "step": 41277
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.3450405597686768,
      "learning_rate": 8.920270687606532e-06,
      "loss": 2.4996,
      "step": 41278
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.181086540222168,
      "learning_rate": 8.919861358843071e-06,
      "loss": 2.3717,
      "step": 41279
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0315535068511963,
      "learning_rate": 8.919452031910738e-06,
      "loss": 2.2117,
      "step": 41280
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.941730797290802,
      "learning_rate": 8.919042706810226e-06,
      "loss": 2.3161,
      "step": 41281
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.071373462677002,
      "learning_rate": 8.918633383542234e-06,
      "loss": 2.4685,
      "step": 41282
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0473129749298096,
      "learning_rate": 8.918224062107449e-06,
      "loss": 2.248,
      "step": 41283
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1118924617767334,
      "learning_rate": 8.917814742506569e-06,
      "loss": 2.43,
      "step": 41284
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0565626621246338,
      "learning_rate": 8.917405424740286e-06,
      "loss": 2.1965,
      "step": 41285
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.9719426035881042,
      "learning_rate": 8.916996108809295e-06,
      "loss": 2.336,
      "step": 41286
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1001312732696533,
      "learning_rate": 8.916586794714294e-06,
      "loss": 2.4079,
      "step": 41287
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.269995093345642,
      "learning_rate": 8.91617748245597e-06,
      "loss": 2.2673,
      "step": 41288
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0755445957183838,
      "learning_rate": 8.915768172035018e-06,
      "loss": 2.3076,
      "step": 41289
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1536003351211548,
      "learning_rate": 8.915358863452135e-06,
      "loss": 2.4405,
      "step": 41290
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.2278269529342651,
      "learning_rate": 8.914949556708011e-06,
      "loss": 2.2916,
      "step": 41291
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.3009353876113892,
      "learning_rate": 8.914540251803344e-06,
      "loss": 2.3533,
      "step": 41292
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.2312722206115723,
      "learning_rate": 8.914130948738828e-06,
      "loss": 2.4713,
      "step": 41293
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0633795261383057,
      "learning_rate": 8.913721647515153e-06,
      "loss": 2.2388,
      "step": 41294
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.036520004272461,
      "learning_rate": 8.913312348133012e-06,
      "loss": 2.2557,
      "step": 41295
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.093029260635376,
      "learning_rate": 8.912903050593104e-06,
      "loss": 2.1858,
      "step": 41296
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.146400809288025,
      "learning_rate": 8.912493754896123e-06,
      "loss": 2.3957,
      "step": 41297
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1656789779663086,
      "learning_rate": 8.912084461042757e-06,
      "loss": 2.2585,
      "step": 41298
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.980827808380127,
      "learning_rate": 8.911675169033705e-06,
      "loss": 2.2684,
      "step": 41299
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0239958763122559,
      "learning_rate": 8.911265878869661e-06,
      "loss": 2.1605,
      "step": 41300
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1991854906082153,
      "learning_rate": 8.910856590551316e-06,
      "loss": 2.4089,
      "step": 41301
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0789449214935303,
      "learning_rate": 8.910447304079362e-06,
      "loss": 2.4647,
      "step": 41302
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.345496416091919,
      "learning_rate": 8.910038019454499e-06,
      "loss": 2.2517,
      "step": 41303
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1022883653640747,
      "learning_rate": 8.909628736677414e-06,
      "loss": 2.4586,
      "step": 41304
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.049426555633545,
      "learning_rate": 8.909219455748806e-06,
      "loss": 2.0334,
      "step": 41305
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0275851488113403,
      "learning_rate": 8.908810176669368e-06,
      "loss": 2.4156,
      "step": 41306
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0580081939697266,
      "learning_rate": 8.908400899439793e-06,
      "loss": 2.4653,
      "step": 41307
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.974678635597229,
      "learning_rate": 8.907991624060774e-06,
      "loss": 2.4075,
      "step": 41308
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1585546731948853,
      "learning_rate": 8.907582350533006e-06,
      "loss": 2.2981,
      "step": 41309
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0776700973510742,
      "learning_rate": 8.907173078857183e-06,
      "loss": 2.2196,
      "step": 41310
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.2225303649902344,
      "learning_rate": 8.906763809033999e-06,
      "loss": 2.3265,
      "step": 41311
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.090409278869629,
      "learning_rate": 8.906354541064146e-06,
      "loss": 2.2315,
      "step": 41312
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.156664490699768,
      "learning_rate": 8.905945274948325e-06,
      "loss": 2.2556,
      "step": 41313
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.9900496006011963,
      "learning_rate": 8.905536010687218e-06,
      "loss": 2.3081,
      "step": 41314
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.9566376209259033,
      "learning_rate": 8.905126748281525e-06,
      "loss": 2.3926,
      "step": 41315
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0424535274505615,
      "learning_rate": 8.90471748773194e-06,
      "loss": 2.366,
      "step": 41316
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.9877657294273376,
      "learning_rate": 8.904308229039156e-06,
      "loss": 2.3308,
      "step": 41317
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.016405463218689,
      "learning_rate": 8.903898972203867e-06,
      "loss": 2.4566,
      "step": 41318
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0567017793655396,
      "learning_rate": 8.903489717226768e-06,
      "loss": 2.3935,
      "step": 41319
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1358718872070312,
      "learning_rate": 8.90308046410855e-06,
      "loss": 2.1301,
      "step": 41320
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.036874532699585,
      "learning_rate": 8.90267121284991e-06,
      "loss": 2.178,
      "step": 41321
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1053169965744019,
      "learning_rate": 8.902261963451542e-06,
      "loss": 2.2703,
      "step": 41322
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1775705814361572,
      "learning_rate": 8.901852715914135e-06,
      "loss": 2.4388,
      "step": 41323
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0907750129699707,
      "learning_rate": 8.901443470238386e-06,
      "loss": 2.4509,
      "step": 41324
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.2576673030853271,
      "learning_rate": 8.90103422642499e-06,
      "loss": 2.3328,
      "step": 41325
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1655904054641724,
      "learning_rate": 8.900624984474639e-06,
      "loss": 2.3394,
      "step": 41326
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1131888628005981,
      "learning_rate": 8.90021574438803e-06,
      "loss": 2.2401,
      "step": 41327
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.194342017173767,
      "learning_rate": 8.899806506165853e-06,
      "loss": 2.4335,
      "step": 41328
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0205073356628418,
      "learning_rate": 8.899397269808801e-06,
      "loss": 2.585,
      "step": 41329
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.2416541576385498,
      "learning_rate": 8.89898803531757e-06,
      "loss": 2.367,
      "step": 41330
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.277347207069397,
      "learning_rate": 8.898578802692852e-06,
      "loss": 2.2802,
      "step": 41331
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.326215147972107,
      "learning_rate": 8.898169571935343e-06,
      "loss": 2.1944,
      "step": 41332
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.167039155960083,
      "learning_rate": 8.897760343045737e-06,
      "loss": 2.4125,
      "step": 41333
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.9832430481910706,
      "learning_rate": 8.897351116024725e-06,
      "loss": 2.3992,
      "step": 41334
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0852093696594238,
      "learning_rate": 8.896941890873003e-06,
      "loss": 2.3034,
      "step": 41335
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0213555097579956,
      "learning_rate": 8.896532667591263e-06,
      "loss": 2.1439,
      "step": 41336
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4717687368392944,
      "learning_rate": 8.896123446180201e-06,
      "loss": 2.3317,
      "step": 41337
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.142072319984436,
      "learning_rate": 8.895714226640508e-06,
      "loss": 2.4274,
      "step": 41338
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.2520179748535156,
      "learning_rate": 8.89530500897288e-06,
      "loss": 2.2976,
      "step": 41339
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1588531732559204,
      "learning_rate": 8.894895793178016e-06,
      "loss": 2.4303,
      "step": 41340
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0822064876556396,
      "learning_rate": 8.894486579256597e-06,
      "loss": 2.503,
      "step": 41341
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1941503286361694,
      "learning_rate": 8.894077367209325e-06,
      "loss": 2.5395,
      "step": 41342
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1308330297470093,
      "learning_rate": 8.893668157036893e-06,
      "loss": 2.3183,
      "step": 41343
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1332091093063354,
      "learning_rate": 8.893258948739993e-06,
      "loss": 2.4356,
      "step": 41344
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.2149957418441772,
      "learning_rate": 8.892849742319319e-06,
      "loss": 2.4834,
      "step": 41345
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.150184988975525,
      "learning_rate": 8.892440537775566e-06,
      "loss": 2.4632,
      "step": 41346
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0156117677688599,
      "learning_rate": 8.892031335109428e-06,
      "loss": 2.329,
      "step": 41347
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0164026021957397,
      "learning_rate": 8.891622134321595e-06,
      "loss": 2.2727,
      "step": 41348
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1929798126220703,
      "learning_rate": 8.891212935412768e-06,
      "loss": 2.564,
      "step": 41349
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1201752424240112,
      "learning_rate": 8.890803738383634e-06,
      "loss": 2.4153,
      "step": 41350
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.985285222530365,
      "learning_rate": 8.89039454323489e-06,
      "loss": 2.447,
      "step": 41351
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0780274868011475,
      "learning_rate": 8.889985349967226e-06,
      "loss": 2.2659,
      "step": 41352
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1666179895401,
      "learning_rate": 8.889576158581342e-06,
      "loss": 2.4141,
      "step": 41353
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0577197074890137,
      "learning_rate": 8.889166969077925e-06,
      "loss": 2.265,
      "step": 41354
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0321922302246094,
      "learning_rate": 8.888757781457672e-06,
      "loss": 2.3835,
      "step": 41355
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1230138540267944,
      "learning_rate": 8.88834859572128e-06,
      "loss": 2.3854,
      "step": 41356
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.02389657497406,
      "learning_rate": 8.887939411869435e-06,
      "loss": 2.3671,
      "step": 41357
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.2718253135681152,
      "learning_rate": 8.887530229902836e-06,
      "loss": 2.2639,
      "step": 41358
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1281872987747192,
      "learning_rate": 8.887121049822174e-06,
      "loss": 2.3019,
      "step": 41359
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.9521706700325012,
      "learning_rate": 8.886711871628144e-06,
      "loss": 2.1177,
      "step": 41360
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0987839698791504,
      "learning_rate": 8.88630269532144e-06,
      "loss": 2.4451,
      "step": 41361
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0274583101272583,
      "learning_rate": 8.885893520902757e-06,
      "loss": 2.186,
      "step": 41362
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0365180969238281,
      "learning_rate": 8.885484348372785e-06,
      "loss": 2.2458,
      "step": 41363
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1427830457687378,
      "learning_rate": 8.885075177732223e-06,
      "loss": 2.2617,
      "step": 41364
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0287251472473145,
      "learning_rate": 8.884666008981758e-06,
      "loss": 2.3839,
      "step": 41365
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0704706907272339,
      "learning_rate": 8.884256842122093e-06,
      "loss": 2.5083,
      "step": 41366
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.2053077220916748,
      "learning_rate": 8.88384767715391e-06,
      "loss": 2.4273,
      "step": 41367
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0932389497756958,
      "learning_rate": 8.883438514077908e-06,
      "loss": 2.1928,
      "step": 41368
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.111941933631897,
      "learning_rate": 8.883029352894784e-06,
      "loss": 2.316,
      "step": 41369
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.9786065816879272,
      "learning_rate": 8.882620193605226e-06,
      "loss": 2.3216,
      "step": 41370
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1067951917648315,
      "learning_rate": 8.882211036209931e-06,
      "loss": 2.2157,
      "step": 41371
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.162673830986023,
      "learning_rate": 8.881801880709591e-06,
      "loss": 2.2681,
      "step": 41372
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.2954288721084595,
      "learning_rate": 8.881392727104902e-06,
      "loss": 2.5072,
      "step": 41373
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0514689683914185,
      "learning_rate": 8.880983575396556e-06,
      "loss": 2.3531,
      "step": 41374
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.050620198249817,
      "learning_rate": 8.880574425585245e-06,
      "loss": 2.2591,
      "step": 41375
    },
    {
      "epoch": 0.54,
      "grad_norm": 3.091482162475586,
      "learning_rate": 8.880165277671667e-06,
      "loss": 2.5522,
      "step": 41376
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.087368369102478,
      "learning_rate": 8.879756131656514e-06,
      "loss": 2.3878,
      "step": 41377
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1372315883636475,
      "learning_rate": 8.879346987540476e-06,
      "loss": 2.2906,
      "step": 41378
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1732399463653564,
      "learning_rate": 8.878937845324252e-06,
      "loss": 2.0916,
      "step": 41379
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.026102900505066,
      "learning_rate": 8.878528705008529e-06,
      "loss": 2.6032,
      "step": 41380
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.035506248474121,
      "learning_rate": 8.878119566594005e-06,
      "loss": 2.3463,
      "step": 41381
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1510648727416992,
      "learning_rate": 8.877710430081376e-06,
      "loss": 2.3006,
      "step": 41382
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1943942308425903,
      "learning_rate": 8.87730129547133e-06,
      "loss": 2.3467,
      "step": 41383
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0705909729003906,
      "learning_rate": 8.876892162764565e-06,
      "loss": 2.5275,
      "step": 41384
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0386676788330078,
      "learning_rate": 8.87648303196177e-06,
      "loss": 2.2292,
      "step": 41385
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0938886404037476,
      "learning_rate": 8.876073903063644e-06,
      "loss": 2.3578,
      "step": 41386
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.9757729768753052,
      "learning_rate": 8.875664776070877e-06,
      "loss": 2.2146,
      "step": 41387
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1728615760803223,
      "learning_rate": 8.875255650984164e-06,
      "loss": 2.2945,
      "step": 41388
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.3983298540115356,
      "learning_rate": 8.874846527804198e-06,
      "loss": 2.5749,
      "step": 41389
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.11171555519104,
      "learning_rate": 8.874437406531671e-06,
      "loss": 2.3999,
      "step": 41390
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0894509553909302,
      "learning_rate": 8.87402828716728e-06,
      "loss": 2.49,
      "step": 41391
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1018774509429932,
      "learning_rate": 8.873619169711717e-06,
      "loss": 2.5091,
      "step": 41392
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0818908214569092,
      "learning_rate": 8.873210054165679e-06,
      "loss": 2.3172,
      "step": 41393
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1158889532089233,
      "learning_rate": 8.872800940529853e-06,
      "loss": 2.3229,
      "step": 41394
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0885974168777466,
      "learning_rate": 8.872391828804932e-06,
      "loss": 2.6056,
      "step": 41395
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.230961799621582,
      "learning_rate": 8.871982718991617e-06,
      "loss": 2.2481,
      "step": 41396
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.334960699081421,
      "learning_rate": 8.871573611090596e-06,
      "loss": 2.3798,
      "step": 41397
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.055855631828308,
      "learning_rate": 8.871164505102564e-06,
      "loss": 2.3159,
      "step": 41398
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0321664810180664,
      "learning_rate": 8.870755401028215e-06,
      "loss": 2.1981,
      "step": 41399
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.3736348152160645,
      "learning_rate": 8.870346298868241e-06,
      "loss": 2.2924,
      "step": 41400
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.9974033236503601,
      "learning_rate": 8.86993719862334e-06,
      "loss": 2.302,
      "step": 41401
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0299627780914307,
      "learning_rate": 8.8695281002942e-06,
      "loss": 2.4449,
      "step": 41402
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1451172828674316,
      "learning_rate": 8.869119003881518e-06,
      "loss": 2.3073,
      "step": 41403
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0851551294326782,
      "learning_rate": 8.868709909385987e-06,
      "loss": 2.432,
      "step": 41404
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.9704495072364807,
      "learning_rate": 8.868300816808299e-06,
      "loss": 2.315,
      "step": 41405
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1046090126037598,
      "learning_rate": 8.86789172614915e-06,
      "loss": 2.2153,
      "step": 41406
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0703928470611572,
      "learning_rate": 8.867482637409232e-06,
      "loss": 2.2688,
      "step": 41407
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1522109508514404,
      "learning_rate": 8.867073550589237e-06,
      "loss": 2.434,
      "step": 41408
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0963003635406494,
      "learning_rate": 8.866664465689859e-06,
      "loss": 2.4287,
      "step": 41409
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1015605926513672,
      "learning_rate": 8.866255382711795e-06,
      "loss": 2.4115,
      "step": 41410
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.2367347478866577,
      "learning_rate": 8.865846301655736e-06,
      "loss": 2.3159,
      "step": 41411
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1157704591751099,
      "learning_rate": 8.865437222522373e-06,
      "loss": 2.5116,
      "step": 41412
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.9925212264060974,
      "learning_rate": 8.865028145312404e-06,
      "loss": 2.4168,
      "step": 41413
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0452550649642944,
      "learning_rate": 8.86461907002652e-06,
      "loss": 2.6472,
      "step": 41414
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.329856276512146,
      "learning_rate": 8.864209996665414e-06,
      "loss": 2.3365,
      "step": 41415
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.2959190607070923,
      "learning_rate": 8.863800925229782e-06,
      "loss": 2.0846,
      "step": 41416
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.096012830734253,
      "learning_rate": 8.863391855720315e-06,
      "loss": 2.5698,
      "step": 41417
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1529431343078613,
      "learning_rate": 8.862982788137707e-06,
      "loss": 2.3822,
      "step": 41418
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0906540155410767,
      "learning_rate": 8.862573722482658e-06,
      "loss": 2.4306,
      "step": 41419
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.043556809425354,
      "learning_rate": 8.862164658755849e-06,
      "loss": 2.4416,
      "step": 41420
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1943926811218262,
      "learning_rate": 8.861755596957981e-06,
      "loss": 2.2274,
      "step": 41421
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0767406225204468,
      "learning_rate": 8.861346537089748e-06,
      "loss": 2.5514,
      "step": 41422
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.9834476113319397,
      "learning_rate": 8.86093747915184e-06,
      "loss": 2.2612,
      "step": 41423
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.008396863937378,
      "learning_rate": 8.860528423144952e-06,
      "loss": 2.3292,
      "step": 41424
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1106880903244019,
      "learning_rate": 8.860119369069779e-06,
      "loss": 2.296,
      "step": 41425
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.010875940322876,
      "learning_rate": 8.859710316927013e-06,
      "loss": 2.5428,
      "step": 41426
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.179741621017456,
      "learning_rate": 8.859301266717349e-06,
      "loss": 2.3775,
      "step": 41427
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.2023800611495972,
      "learning_rate": 8.858892218441476e-06,
      "loss": 2.3213,
      "step": 41428
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.132175326347351,
      "learning_rate": 8.858483172100093e-06,
      "loss": 2.1845,
      "step": 41429
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.159096121788025,
      "learning_rate": 8.85807412769389e-06,
      "loss": 2.3347,
      "step": 41430
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.138785481452942,
      "learning_rate": 8.857665085223562e-06,
      "loss": 2.2209,
      "step": 41431
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1268396377563477,
      "learning_rate": 8.857256044689806e-06,
      "loss": 2.1467,
      "step": 41432
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.2242698669433594,
      "learning_rate": 8.856847006093308e-06,
      "loss": 2.5808,
      "step": 41433
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0053447484970093,
      "learning_rate": 8.856437969434763e-06,
      "loss": 2.5548,
      "step": 41434
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.003104329109192,
      "learning_rate": 8.856028934714868e-06,
      "loss": 2.3957,
      "step": 41435
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.104396104812622,
      "learning_rate": 8.855619901934313e-06,
      "loss": 2.3076,
      "step": 41436
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0471594333648682,
      "learning_rate": 8.855210871093794e-06,
      "loss": 2.4327,
      "step": 41437
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0314745903015137,
      "learning_rate": 8.854801842194004e-06,
      "loss": 2.3755,
      "step": 41438
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4077153205871582,
      "learning_rate": 8.854392815235638e-06,
      "loss": 2.547,
      "step": 41439
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.9925306439399719,
      "learning_rate": 8.853983790219383e-06,
      "loss": 2.2807,
      "step": 41440
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.2008795738220215,
      "learning_rate": 8.85357476714594e-06,
      "loss": 2.1257,
      "step": 41441
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.3377619981765747,
      "learning_rate": 8.853165746015997e-06,
      "loss": 2.3206,
      "step": 41442
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0394974946975708,
      "learning_rate": 8.85275672683025e-06,
      "loss": 2.4641,
      "step": 41443
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.050490379333496,
      "learning_rate": 8.852347709589392e-06,
      "loss": 2.3386,
      "step": 41444
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1167669296264648,
      "learning_rate": 8.851938694294118e-06,
      "loss": 2.3626,
      "step": 41445
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.952954888343811,
      "learning_rate": 8.85152968094512e-06,
      "loss": 2.3332,
      "step": 41446
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0965888500213623,
      "learning_rate": 8.851120669543089e-06,
      "loss": 2.4759,
      "step": 41447
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.998582661151886,
      "learning_rate": 8.850711660088722e-06,
      "loss": 2.07,
      "step": 41448
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1317265033721924,
      "learning_rate": 8.850302652582708e-06,
      "loss": 2.4898,
      "step": 41449
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.182769536972046,
      "learning_rate": 8.849893647025747e-06,
      "loss": 2.3817,
      "step": 41450
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1125167608261108,
      "learning_rate": 8.849484643418524e-06,
      "loss": 2.6814,
      "step": 41451
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0587878227233887,
      "learning_rate": 8.84907564176174e-06,
      "loss": 2.3597,
      "step": 41452
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.156149983406067,
      "learning_rate": 8.848666642056086e-06,
      "loss": 2.2172,
      "step": 41453
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0977333784103394,
      "learning_rate": 8.848257644302252e-06,
      "loss": 2.4476,
      "step": 41454
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0210375785827637,
      "learning_rate": 8.847848648500937e-06,
      "loss": 2.1308,
      "step": 41455
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0973657369613647,
      "learning_rate": 8.84743965465283e-06,
      "loss": 2.1646,
      "step": 41456
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0579041242599487,
      "learning_rate": 8.847030662758626e-06,
      "loss": 2.5218,
      "step": 41457
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1803001165390015,
      "learning_rate": 8.84662167281902e-06,
      "loss": 2.429,
      "step": 41458
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0433847904205322,
      "learning_rate": 8.846212684834703e-06,
      "loss": 2.4457,
      "step": 41459
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0005205869674683,
      "learning_rate": 8.845803698806368e-06,
      "loss": 2.2684,
      "step": 41460
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.2121522426605225,
      "learning_rate": 8.845394714734709e-06,
      "loss": 2.2696,
      "step": 41461
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.3243658542633057,
      "learning_rate": 8.844985732620419e-06,
      "loss": 2.2786,
      "step": 41462
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.019904375076294,
      "learning_rate": 8.844576752464191e-06,
      "loss": 2.2397,
      "step": 41463
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0350195169448853,
      "learning_rate": 8.84416777426672e-06,
      "loss": 2.3885,
      "step": 41464
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0708121061325073,
      "learning_rate": 8.8437587980287e-06,
      "loss": 2.271,
      "step": 41465
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1208957433700562,
      "learning_rate": 8.843349823750822e-06,
      "loss": 2.4309,
      "step": 41466
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1677093505859375,
      "learning_rate": 8.84294085143378e-06,
      "loss": 2.5412,
      "step": 41467
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.003295660018921,
      "learning_rate": 8.84253188107827e-06,
      "loss": 2.5139,
      "step": 41468
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0510506629943848,
      "learning_rate": 8.84212291268498e-06,
      "loss": 2.2933,
      "step": 41469
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1891990900039673,
      "learning_rate": 8.841713946254605e-06,
      "loss": 2.2984,
      "step": 41470
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.9460503458976746,
      "learning_rate": 8.841304981787841e-06,
      "loss": 2.3142,
      "step": 41471
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0180003643035889,
      "learning_rate": 8.840896019285383e-06,
      "loss": 2.4358,
      "step": 41472
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0642554759979248,
      "learning_rate": 8.840487058747917e-06,
      "loss": 2.1809,
      "step": 41473
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1371612548828125,
      "learning_rate": 8.840078100176142e-06,
      "loss": 2.4568,
      "step": 41474
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.9537184834480286,
      "learning_rate": 8.83966914357075e-06,
      "loss": 2.4313,
      "step": 41475
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.2009589672088623,
      "learning_rate": 8.839260188932433e-06,
      "loss": 2.4443,
      "step": 41476
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0233254432678223,
      "learning_rate": 8.838851236261884e-06,
      "loss": 2.2965,
      "step": 41477
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.3516684770584106,
      "learning_rate": 8.838442285559799e-06,
      "loss": 2.3984,
      "step": 41478
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1609838008880615,
      "learning_rate": 8.838033336826867e-06,
      "loss": 2.0127,
      "step": 41479
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.066043734550476,
      "learning_rate": 8.837624390063789e-06,
      "loss": 2.2641,
      "step": 41480
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0516085624694824,
      "learning_rate": 8.83721544527125e-06,
      "loss": 2.1685,
      "step": 41481
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0052248239517212,
      "learning_rate": 8.836806502449946e-06,
      "loss": 2.2226,
      "step": 41482
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.251690149307251,
      "learning_rate": 8.836397561600573e-06,
      "loss": 2.4192,
      "step": 41483
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1507163047790527,
      "learning_rate": 8.83598862272382e-06,
      "loss": 2.2518,
      "step": 41484
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1331080198287964,
      "learning_rate": 8.835579685820388e-06,
      "loss": 2.4752,
      "step": 41485
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0239123106002808,
      "learning_rate": 8.835170750890961e-06,
      "loss": 2.5106,
      "step": 41486
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0347687005996704,
      "learning_rate": 8.834761817936234e-06,
      "loss": 2.4502,
      "step": 41487
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0202457904815674,
      "learning_rate": 8.834352886956905e-06,
      "loss": 2.4691,
      "step": 41488
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.017246127128601,
      "learning_rate": 8.833943957953663e-06,
      "loss": 2.4025,
      "step": 41489
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0647298097610474,
      "learning_rate": 8.833535030927203e-06,
      "loss": 2.4121,
      "step": 41490
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0464197397232056,
      "learning_rate": 8.833126105878217e-06,
      "loss": 2.3918,
      "step": 41491
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1067639589309692,
      "learning_rate": 8.8327171828074e-06,
      "loss": 2.6542,
      "step": 41492
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.2784875631332397,
      "learning_rate": 8.832308261715444e-06,
      "loss": 2.3247,
      "step": 41493
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4864274263381958,
      "learning_rate": 8.831899342603043e-06,
      "loss": 2.3755,
      "step": 41494
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0760571956634521,
      "learning_rate": 8.831490425470891e-06,
      "loss": 2.0383,
      "step": 41495
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1624674797058105,
      "learning_rate": 8.831081510319679e-06,
      "loss": 2.3334,
      "step": 41496
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.13788640499115,
      "learning_rate": 8.830672597150101e-06,
      "loss": 2.4058,
      "step": 41497
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.02914559841156,
      "learning_rate": 8.830263685962854e-06,
      "loss": 2.3367,
      "step": 41498
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0361409187316895,
      "learning_rate": 8.829854776758624e-06,
      "loss": 2.4293,
      "step": 41499
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1531437635421753,
      "learning_rate": 8.829445869538108e-06,
      "loss": 2.1716,
      "step": 41500
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0627411603927612,
      "learning_rate": 8.829036964301999e-06,
      "loss": 2.3491,
      "step": 41501
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0902268886566162,
      "learning_rate": 8.828628061050993e-06,
      "loss": 2.4345,
      "step": 41502
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1046265363693237,
      "learning_rate": 8.828219159785777e-06,
      "loss": 2.1994,
      "step": 41503
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.064825415611267,
      "learning_rate": 8.82781026050705e-06,
      "loss": 2.3973,
      "step": 41504
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.045757532119751,
      "learning_rate": 8.827401363215502e-06,
      "loss": 2.3431,
      "step": 41505
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0009267330169678,
      "learning_rate": 8.826992467911829e-06,
      "loss": 2.2513,
      "step": 41506
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.2036813497543335,
      "learning_rate": 8.82658357459672e-06,
      "loss": 2.3426,
      "step": 41507
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1917624473571777,
      "learning_rate": 8.826174683270871e-06,
      "loss": 2.3338,
      "step": 41508
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.106162667274475,
      "learning_rate": 8.825765793934975e-06,
      "loss": 2.6013,
      "step": 41509
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0947370529174805,
      "learning_rate": 8.825356906589724e-06,
      "loss": 2.5176,
      "step": 41510
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0092414617538452,
      "learning_rate": 8.824948021235818e-06,
      "loss": 2.356,
      "step": 41511
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1191582679748535,
      "learning_rate": 8.82453913787394e-06,
      "loss": 2.3771,
      "step": 41512
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1326220035552979,
      "learning_rate": 8.824130256504786e-06,
      "loss": 2.5995,
      "step": 41513
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1452736854553223,
      "learning_rate": 8.82372137712905e-06,
      "loss": 2.4748,
      "step": 41514
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0555214881896973,
      "learning_rate": 8.823312499747427e-06,
      "loss": 2.2749,
      "step": 41515
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.269337773323059,
      "learning_rate": 8.82290362436061e-06,
      "loss": 2.3683,
      "step": 41516
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0146807432174683,
      "learning_rate": 8.82249475096929e-06,
      "loss": 2.4134,
      "step": 41517
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0448246002197266,
      "learning_rate": 8.822085879574161e-06,
      "loss": 2.3193,
      "step": 41518
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.036102533340454,
      "learning_rate": 8.821677010175916e-06,
      "loss": 2.1648,
      "step": 41519
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.254089593887329,
      "learning_rate": 8.821268142775249e-06,
      "loss": 2.4341,
      "step": 41520
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0676696300506592,
      "learning_rate": 8.820859277372851e-06,
      "loss": 2.2813,
      "step": 41521
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.5463542938232422,
      "learning_rate": 8.820450413969419e-06,
      "loss": 2.5561,
      "step": 41522
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.3507241010665894,
      "learning_rate": 8.820041552565645e-06,
      "loss": 2.1397,
      "step": 41523
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0910677909851074,
      "learning_rate": 8.819632693162223e-06,
      "loss": 2.4194,
      "step": 41524
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0687559843063354,
      "learning_rate": 8.81922383575984e-06,
      "loss": 2.1026,
      "step": 41525
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1225688457489014,
      "learning_rate": 8.818814980359194e-06,
      "loss": 2.2591,
      "step": 41526
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1054631471633911,
      "learning_rate": 8.818406126960977e-06,
      "loss": 2.5552,
      "step": 41527
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.096457600593567,
      "learning_rate": 8.817997275565882e-06,
      "loss": 2.4462,
      "step": 41528
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0333737134933472,
      "learning_rate": 8.817588426174606e-06,
      "loss": 2.3156,
      "step": 41529
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0840661525726318,
      "learning_rate": 8.817179578787835e-06,
      "loss": 2.2197,
      "step": 41530
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.157569169998169,
      "learning_rate": 8.816770733406267e-06,
      "loss": 2.3485,
      "step": 41531
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0115762948989868,
      "learning_rate": 8.816361890030594e-06,
      "loss": 2.3854,
      "step": 41532
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0649325847625732,
      "learning_rate": 8.815953048661509e-06,
      "loss": 2.1393,
      "step": 41533
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0697790384292603,
      "learning_rate": 8.815544209299704e-06,
      "loss": 2.2794,
      "step": 41534
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1151119470596313,
      "learning_rate": 8.815135371945876e-06,
      "loss": 2.3755,
      "step": 41535
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.9834410548210144,
      "learning_rate": 8.814726536600713e-06,
      "loss": 2.3816,
      "step": 41536
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1810622215270996,
      "learning_rate": 8.814317703264916e-06,
      "loss": 2.1629,
      "step": 41537
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0379419326782227,
      "learning_rate": 8.813908871939167e-06,
      "loss": 2.3591,
      "step": 41538
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.2601428031921387,
      "learning_rate": 8.813500042624166e-06,
      "loss": 2.4918,
      "step": 41539
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0502326488494873,
      "learning_rate": 8.813091215320604e-06,
      "loss": 2.2789,
      "step": 41540
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1039180755615234,
      "learning_rate": 8.812682390029175e-06,
      "loss": 2.3856,
      "step": 41541
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.110317349433899,
      "learning_rate": 8.812273566750571e-06,
      "loss": 2.377,
      "step": 41542
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0899591445922852,
      "learning_rate": 8.811864745485485e-06,
      "loss": 2.6485,
      "step": 41543
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1032885313034058,
      "learning_rate": 8.811455926234613e-06,
      "loss": 2.369,
      "step": 41544
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0652543306350708,
      "learning_rate": 8.811047108998645e-06,
      "loss": 2.255,
      "step": 41545
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.050538182258606,
      "learning_rate": 8.810638293778276e-06,
      "loss": 2.3561,
      "step": 41546
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.039874792098999,
      "learning_rate": 8.810229480574197e-06,
      "loss": 2.2791,
      "step": 41547
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1257303953170776,
      "learning_rate": 8.809820669387104e-06,
      "loss": 2.5034,
      "step": 41548
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0835155248641968,
      "learning_rate": 8.809411860217687e-06,
      "loss": 2.1827,
      "step": 41549
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.215046763420105,
      "learning_rate": 8.80900305306664e-06,
      "loss": 2.2925,
      "step": 41550
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1745116710662842,
      "learning_rate": 8.808594247934661e-06,
      "loss": 2.2597,
      "step": 41551
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1185564994812012,
      "learning_rate": 8.808185444822434e-06,
      "loss": 2.3671,
      "step": 41552
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1392338275909424,
      "learning_rate": 8.807776643730656e-06,
      "loss": 2.4766,
      "step": 41553
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.2194002866744995,
      "learning_rate": 8.80736784466002e-06,
      "loss": 2.2594,
      "step": 41554
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0567197799682617,
      "learning_rate": 8.80695904761122e-06,
      "loss": 2.0395,
      "step": 41555
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0164611339569092,
      "learning_rate": 8.806550252584948e-06,
      "loss": 2.5762,
      "step": 41556
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0613415241241455,
      "learning_rate": 8.8061414595819e-06,
      "loss": 2.4369,
      "step": 41557
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1530570983886719,
      "learning_rate": 8.805732668602764e-06,
      "loss": 2.5258,
      "step": 41558
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0646456480026245,
      "learning_rate": 8.805323879648237e-06,
      "loss": 2.1526,
      "step": 41559
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1487243175506592,
      "learning_rate": 8.804915092719008e-06,
      "loss": 2.156,
      "step": 41560
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.2709989547729492,
      "learning_rate": 8.804506307815774e-06,
      "loss": 2.5185,
      "step": 41561
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0391111373901367,
      "learning_rate": 8.804097524939227e-06,
      "loss": 2.4083,
      "step": 41562
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0540589094161987,
      "learning_rate": 8.803688744090059e-06,
      "loss": 2.3291,
      "step": 41563
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0005708932876587,
      "learning_rate": 8.803279965268967e-06,
      "loss": 2.4019,
      "step": 41564
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.096076488494873,
      "learning_rate": 8.802871188476637e-06,
      "loss": 2.2557,
      "step": 41565
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1246588230133057,
      "learning_rate": 8.802462413713767e-06,
      "loss": 2.4989,
      "step": 41566
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1203186511993408,
      "learning_rate": 8.802053640981046e-06,
      "loss": 2.4816,
      "step": 41567
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.041455864906311,
      "learning_rate": 8.80164487027917e-06,
      "loss": 2.3079,
      "step": 41568
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1010640859603882,
      "learning_rate": 8.80123610160883e-06,
      "loss": 2.5029,
      "step": 41569
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.042715311050415,
      "learning_rate": 8.800827334970723e-06,
      "loss": 2.3858,
      "step": 41570
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.063066005706787,
      "learning_rate": 8.80041857036554e-06,
      "loss": 2.0182,
      "step": 41571
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0903465747833252,
      "learning_rate": 8.80000980779397e-06,
      "loss": 2.3758,
      "step": 41572
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.9882153868675232,
      "learning_rate": 8.79960104725671e-06,
      "loss": 2.0072,
      "step": 41573
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0738121271133423,
      "learning_rate": 8.799192288754455e-06,
      "loss": 2.1376,
      "step": 41574
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1082713603973389,
      "learning_rate": 8.798783532287892e-06,
      "loss": 2.3173,
      "step": 41575
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.009199857711792,
      "learning_rate": 8.798374777857719e-06,
      "loss": 2.5238,
      "step": 41576
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0692267417907715,
      "learning_rate": 8.79796602546463e-06,
      "loss": 2.3886,
      "step": 41577
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0572476387023926,
      "learning_rate": 8.797557275109313e-06,
      "loss": 2.4323,
      "step": 41578
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0263274908065796,
      "learning_rate": 8.797148526792462e-06,
      "loss": 2.2089,
      "step": 41579
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0883314609527588,
      "learning_rate": 8.79673978051477e-06,
      "loss": 2.4498,
      "step": 41580
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.114168405532837,
      "learning_rate": 8.796331036276931e-06,
      "loss": 2.4198,
      "step": 41581
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.9560215473175049,
      "learning_rate": 8.79592229407964e-06,
      "loss": 2.5625,
      "step": 41582
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1443597078323364,
      "learning_rate": 8.795513553923585e-06,
      "loss": 2.4823,
      "step": 41583
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0619858503341675,
      "learning_rate": 8.795104815809464e-06,
      "loss": 2.3865,
      "step": 41584
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.177981972694397,
      "learning_rate": 8.794696079737967e-06,
      "loss": 2.4469,
      "step": 41585
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.9980075359344482,
      "learning_rate": 8.794287345709786e-06,
      "loss": 2.3062,
      "step": 41586
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0369192361831665,
      "learning_rate": 8.793878613725617e-06,
      "loss": 2.3712,
      "step": 41587
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1960644721984863,
      "learning_rate": 8.79346988378615e-06,
      "loss": 2.2645,
      "step": 41588
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0864759683609009,
      "learning_rate": 8.793061155892081e-06,
      "loss": 2.3524,
      "step": 41589
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1333072185516357,
      "learning_rate": 8.792652430044103e-06,
      "loss": 2.3442,
      "step": 41590
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0071253776550293,
      "learning_rate": 8.792243706242903e-06,
      "loss": 2.361,
      "step": 41591
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.9995130300521851,
      "learning_rate": 8.79183498448918e-06,
      "loss": 2.357,
      "step": 41592
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1647793054580688,
      "learning_rate": 8.791426264783624e-06,
      "loss": 2.5524,
      "step": 41593
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.2585331201553345,
      "learning_rate": 8.791017547126928e-06,
      "loss": 2.3229,
      "step": 41594
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0485085248947144,
      "learning_rate": 8.790608831519787e-06,
      "loss": 2.3647,
      "step": 41595
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0416814088821411,
      "learning_rate": 8.790200117962891e-06,
      "loss": 2.4129,
      "step": 41596
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1113048791885376,
      "learning_rate": 8.789791406456936e-06,
      "loss": 2.1867,
      "step": 41597
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.029906153678894,
      "learning_rate": 8.789382697002613e-06,
      "loss": 2.3541,
      "step": 41598
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1884101629257202,
      "learning_rate": 8.788973989600613e-06,
      "loss": 2.2623,
      "step": 41599
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.075385332107544,
      "learning_rate": 8.788565284251633e-06,
      "loss": 2.2635,
      "step": 41600
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1184595823287964,
      "learning_rate": 8.788156580956362e-06,
      "loss": 2.2496,
      "step": 41601
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.9526345133781433,
      "learning_rate": 8.787747879715498e-06,
      "loss": 2.3437,
      "step": 41602
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.055090069770813,
      "learning_rate": 8.787339180529731e-06,
      "loss": 2.4504,
      "step": 41603
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0415217876434326,
      "learning_rate": 8.786930483399752e-06,
      "loss": 2.3803,
      "step": 41604
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.079035997390747,
      "learning_rate": 8.786521788326253e-06,
      "loss": 2.2761,
      "step": 41605
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.3202929496765137,
      "learning_rate": 8.786113095309932e-06,
      "loss": 2.4593,
      "step": 41606
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0679998397827148,
      "learning_rate": 8.785704404351476e-06,
      "loss": 2.4262,
      "step": 41607
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1540156602859497,
      "learning_rate": 8.785295715451583e-06,
      "loss": 2.2987,
      "step": 41608
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.2217748165130615,
      "learning_rate": 8.784887028610942e-06,
      "loss": 2.2956,
      "step": 41609
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.9881227016448975,
      "learning_rate": 8.78447834383025e-06,
      "loss": 2.3293,
      "step": 41610
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0666264295578003,
      "learning_rate": 8.784069661110194e-06,
      "loss": 2.3961,
      "step": 41611
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0147101879119873,
      "learning_rate": 8.783660980451473e-06,
      "loss": 2.3694,
      "step": 41612
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.334585189819336,
      "learning_rate": 8.783252301854778e-06,
      "loss": 2.1531,
      "step": 41613
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0234577655792236,
      "learning_rate": 8.782843625320799e-06,
      "loss": 2.3296,
      "step": 41614
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1987377405166626,
      "learning_rate": 8.782434950850229e-06,
      "loss": 2.5671,
      "step": 41615
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.172603726387024,
      "learning_rate": 8.782026278443765e-06,
      "loss": 2.1907,
      "step": 41616
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1019984483718872,
      "learning_rate": 8.781617608102097e-06,
      "loss": 2.4677,
      "step": 41617
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0366201400756836,
      "learning_rate": 8.781208939825918e-06,
      "loss": 2.4302,
      "step": 41618
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4021133184432983,
      "learning_rate": 8.780800273615919e-06,
      "loss": 2.5913,
      "step": 41619
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0762957334518433,
      "learning_rate": 8.780391609472798e-06,
      "loss": 2.3743,
      "step": 41620
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0701179504394531,
      "learning_rate": 8.779982947397244e-06,
      "loss": 2.2983,
      "step": 41621
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1236157417297363,
      "learning_rate": 8.779574287389946e-06,
      "loss": 2.4674,
      "step": 41622
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1870408058166504,
      "learning_rate": 8.779165629451604e-06,
      "loss": 2.2666,
      "step": 41623
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.2089197635650635,
      "learning_rate": 8.778756973582906e-06,
      "loss": 2.4605,
      "step": 41624
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1287325620651245,
      "learning_rate": 8.77834831978455e-06,
      "loss": 2.3333,
      "step": 41625
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.969538152217865,
      "learning_rate": 8.777939668057223e-06,
      "loss": 2.3141,
      "step": 41626
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1238516569137573,
      "learning_rate": 8.77753101840162e-06,
      "loss": 2.2975,
      "step": 41627
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0182616710662842,
      "learning_rate": 8.777122370818434e-06,
      "loss": 2.4025,
      "step": 41628
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0345242023468018,
      "learning_rate": 8.776713725308359e-06,
      "loss": 2.1695,
      "step": 41629
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.9865810871124268,
      "learning_rate": 8.77630508187209e-06,
      "loss": 2.25,
      "step": 41630
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1024959087371826,
      "learning_rate": 8.77589644051031e-06,
      "loss": 2.3138,
      "step": 41631
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1783519983291626,
      "learning_rate": 8.77548780122372e-06,
      "loss": 2.1236,
      "step": 41632
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.9809486865997314,
      "learning_rate": 8.775079164013012e-06,
      "loss": 2.4398,
      "step": 41633
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0555939674377441,
      "learning_rate": 8.774670528878875e-06,
      "loss": 2.4748,
      "step": 41634
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0965778827667236,
      "learning_rate": 8.774261895822006e-06,
      "loss": 2.2272,
      "step": 41635
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0496251583099365,
      "learning_rate": 8.773853264843095e-06,
      "loss": 2.5232,
      "step": 41636
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.9745321869850159,
      "learning_rate": 8.773444635942837e-06,
      "loss": 2.2042,
      "step": 41637
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.054526925086975,
      "learning_rate": 8.773036009121922e-06,
      "loss": 2.3139,
      "step": 41638
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.101048469543457,
      "learning_rate": 8.772627384381044e-06,
      "loss": 2.3728,
      "step": 41639
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.873967170715332,
      "learning_rate": 8.772218761720896e-06,
      "loss": 2.3806,
      "step": 41640
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0695173740386963,
      "learning_rate": 8.771810141142174e-06,
      "loss": 2.3188,
      "step": 41641
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1213947534561157,
      "learning_rate": 8.771401522645564e-06,
      "loss": 2.1792,
      "step": 41642
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0605491399765015,
      "learning_rate": 8.770992906231766e-06,
      "loss": 2.415,
      "step": 41643
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0716099739074707,
      "learning_rate": 8.770584291901465e-06,
      "loss": 2.2588,
      "step": 41644
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.9672019481658936,
      "learning_rate": 8.770175679655359e-06,
      "loss": 2.2411,
      "step": 41645
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0740667581558228,
      "learning_rate": 8.769767069494137e-06,
      "loss": 2.2197,
      "step": 41646
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1186230182647705,
      "learning_rate": 8.769358461418497e-06,
      "loss": 2.3627,
      "step": 41647
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1008424758911133,
      "learning_rate": 8.768949855429128e-06,
      "loss": 2.4137,
      "step": 41648
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0348799228668213,
      "learning_rate": 8.768541251526721e-06,
      "loss": 2.2849,
      "step": 41649
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.2314050197601318,
      "learning_rate": 8.768132649711973e-06,
      "loss": 2.3678,
      "step": 41650
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.9997051954269409,
      "learning_rate": 8.767724049985574e-06,
      "loss": 2.3518,
      "step": 41651
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0210387706756592,
      "learning_rate": 8.767315452348219e-06,
      "loss": 2.2859,
      "step": 41652
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1305739879608154,
      "learning_rate": 8.766906856800596e-06,
      "loss": 2.409,
      "step": 41653
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0154114961624146,
      "learning_rate": 8.766498263343403e-06,
      "loss": 2.1541,
      "step": 41654
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.031908392906189,
      "learning_rate": 8.76608967197733e-06,
      "loss": 2.3488,
      "step": 41655
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.2428535223007202,
      "learning_rate": 8.765681082703075e-06,
      "loss": 2.2593,
      "step": 41656
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.107788324356079,
      "learning_rate": 8.76527249552132e-06,
      "loss": 2.3847,
      "step": 41657
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.2349292039871216,
      "learning_rate": 8.764863910432764e-06,
      "loss": 2.1546,
      "step": 41658
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0829662084579468,
      "learning_rate": 8.7644553274381e-06,
      "loss": 2.3007,
      "step": 41659
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0807386636734009,
      "learning_rate": 8.764046746538018e-06,
      "loss": 2.3422,
      "step": 41660
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.2576987743377686,
      "learning_rate": 8.763638167733214e-06,
      "loss": 2.2246,
      "step": 41661
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.2482554912567139,
      "learning_rate": 8.763229591024378e-06,
      "loss": 2.4004,
      "step": 41662
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1155321598052979,
      "learning_rate": 8.762821016412205e-06,
      "loss": 2.3924,
      "step": 41663
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.035420536994934,
      "learning_rate": 8.762412443897386e-06,
      "loss": 2.3284,
      "step": 41664
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.111277461051941,
      "learning_rate": 8.762003873480614e-06,
      "loss": 2.3739,
      "step": 41665
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.097723126411438,
      "learning_rate": 8.761595305162582e-06,
      "loss": 2.2438,
      "step": 41666
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.5650272369384766,
      "learning_rate": 8.761186738943981e-06,
      "loss": 2.3438,
      "step": 41667
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.076819658279419,
      "learning_rate": 8.760778174825505e-06,
      "loss": 2.4302,
      "step": 41668
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1648533344268799,
      "learning_rate": 8.760369612807851e-06,
      "loss": 2.4594,
      "step": 41669
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0648417472839355,
      "learning_rate": 8.759961052891704e-06,
      "loss": 2.4768,
      "step": 41670
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1364719867706299,
      "learning_rate": 8.75955249507776e-06,
      "loss": 2.2148,
      "step": 41671
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.045420527458191,
      "learning_rate": 8.759143939366711e-06,
      "loss": 2.3133,
      "step": 41672
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.076133370399475,
      "learning_rate": 8.758735385759249e-06,
      "loss": 2.3791,
      "step": 41673
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0768617391586304,
      "learning_rate": 8.758326834256068e-06,
      "loss": 2.2354,
      "step": 41674
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1341094970703125,
      "learning_rate": 8.757918284857863e-06,
      "loss": 2.2945,
      "step": 41675
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0460457801818848,
      "learning_rate": 8.757509737565321e-06,
      "loss": 2.2481,
      "step": 41676
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.027160882949829,
      "learning_rate": 8.75710119237914e-06,
      "loss": 2.5391,
      "step": 41677
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0591579675674438,
      "learning_rate": 8.756692649300007e-06,
      "loss": 2.3647,
      "step": 41678
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1233714818954468,
      "learning_rate": 8.756284108328618e-06,
      "loss": 2.4061,
      "step": 41679
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.2726728916168213,
      "learning_rate": 8.755875569465667e-06,
      "loss": 2.296,
      "step": 41680
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0475496053695679,
      "learning_rate": 8.755467032711842e-06,
      "loss": 2.5289,
      "step": 41681
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.9438266754150391,
      "learning_rate": 8.755058498067845e-06,
      "loss": 2.4987,
      "step": 41682
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.192895531654358,
      "learning_rate": 8.754649965534357e-06,
      "loss": 2.5841,
      "step": 41683
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0152205228805542,
      "learning_rate": 8.754241435112076e-06,
      "loss": 2.5797,
      "step": 41684
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0730760097503662,
      "learning_rate": 8.753832906801694e-06,
      "loss": 2.2544,
      "step": 41685
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0316050052642822,
      "learning_rate": 8.753424380603903e-06,
      "loss": 2.2888,
      "step": 41686
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.116189956665039,
      "learning_rate": 8.753015856519396e-06,
      "loss": 2.295,
      "step": 41687
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.2505860328674316,
      "learning_rate": 8.752607334548867e-06,
      "loss": 2.3587,
      "step": 41688
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1114325523376465,
      "learning_rate": 8.752198814693008e-06,
      "loss": 2.216,
      "step": 41689
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1048938035964966,
      "learning_rate": 8.751790296952508e-06,
      "loss": 2.3463,
      "step": 41690
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1583929061889648,
      "learning_rate": 8.751381781328065e-06,
      "loss": 2.3267,
      "step": 41691
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.2201381921768188,
      "learning_rate": 8.750973267820367e-06,
      "loss": 2.3967,
      "step": 41692
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.9775282740592957,
      "learning_rate": 8.750564756430109e-06,
      "loss": 2.3721,
      "step": 41693
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.2574604749679565,
      "learning_rate": 8.750156247157984e-06,
      "loss": 2.6276,
      "step": 41694
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.3265902996063232,
      "learning_rate": 8.749747740004683e-06,
      "loss": 2.3391,
      "step": 41695
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0560060739517212,
      "learning_rate": 8.749339234970905e-06,
      "loss": 2.3137,
      "step": 41696
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0267385244369507,
      "learning_rate": 8.748930732057331e-06,
      "loss": 2.531,
      "step": 41697
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1607208251953125,
      "learning_rate": 8.74852223126466e-06,
      "loss": 2.3273,
      "step": 41698
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.040572166442871,
      "learning_rate": 8.748113732593582e-06,
      "loss": 2.3335,
      "step": 41699
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1086626052856445,
      "learning_rate": 8.747705236044794e-06,
      "loss": 2.4545,
      "step": 41700
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0999857187271118,
      "learning_rate": 8.747296741618983e-06,
      "loss": 2.4569,
      "step": 41701
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0883313417434692,
      "learning_rate": 8.746888249316845e-06,
      "loss": 2.1994,
      "step": 41702
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0125346183776855,
      "learning_rate": 8.746479759139075e-06,
      "loss": 2.3771,
      "step": 41703
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1358450651168823,
      "learning_rate": 8.746071271086359e-06,
      "loss": 2.0864,
      "step": 41704
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.184037208557129,
      "learning_rate": 8.745662785159392e-06,
      "loss": 2.3582,
      "step": 41705
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0508447885513306,
      "learning_rate": 8.74525430135887e-06,
      "loss": 2.1948,
      "step": 41706
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.9321531057357788,
      "learning_rate": 8.74484581968548e-06,
      "loss": 2.2963,
      "step": 41707
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0304839611053467,
      "learning_rate": 8.744437340139919e-06,
      "loss": 2.2979,
      "step": 41708
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.990896999835968,
      "learning_rate": 8.744028862722879e-06,
      "loss": 2.2429,
      "step": 41709
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0327571630477905,
      "learning_rate": 8.74362038743505e-06,
      "loss": 2.3206,
      "step": 41710
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0216572284698486,
      "learning_rate": 8.743211914277126e-06,
      "loss": 2.2404,
      "step": 41711
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.121158480644226,
      "learning_rate": 8.742803443249797e-06,
      "loss": 2.4635,
      "step": 41712
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0897225141525269,
      "learning_rate": 8.74239497435376e-06,
      "loss": 2.3014,
      "step": 41713
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1028194427490234,
      "learning_rate": 8.741986507589703e-06,
      "loss": 2.3002,
      "step": 41714
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.132774829864502,
      "learning_rate": 8.741578042958322e-06,
      "loss": 2.2549,
      "step": 41715
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0232446193695068,
      "learning_rate": 8.741169580460307e-06,
      "loss": 2.4896,
      "step": 41716
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0968536138534546,
      "learning_rate": 8.740761120096352e-06,
      "loss": 2.4427,
      "step": 41717
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0575287342071533,
      "learning_rate": 8.740352661867148e-06,
      "loss": 2.6552,
      "step": 41718
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.5873746871948242,
      "learning_rate": 8.73994420577339e-06,
      "loss": 2.365,
      "step": 41719
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.983616828918457,
      "learning_rate": 8.739535751815767e-06,
      "loss": 2.5947,
      "step": 41720
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0488970279693604,
      "learning_rate": 8.739127299994973e-06,
      "loss": 2.4494,
      "step": 41721
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.3244034051895142,
      "learning_rate": 8.738718850311708e-06,
      "loss": 2.3542,
      "step": 41722
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.008562684059143,
      "learning_rate": 8.73831040276665e-06,
      "loss": 2.3854,
      "step": 41723
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1556552648544312,
      "learning_rate": 8.737901957360499e-06,
      "loss": 2.3537,
      "step": 41724
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.136913537979126,
      "learning_rate": 8.737493514093948e-06,
      "loss": 2.4998,
      "step": 41725
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.063179850578308,
      "learning_rate": 8.737085072967688e-06,
      "loss": 2.2975,
      "step": 41726
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1643260717391968,
      "learning_rate": 8.73667663398241e-06,
      "loss": 2.2913,
      "step": 41727
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.050888180732727,
      "learning_rate": 8.73626819713881e-06,
      "loss": 2.4786,
      "step": 41728
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.9794625639915466,
      "learning_rate": 8.735859762437578e-06,
      "loss": 2.3539,
      "step": 41729
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0703654289245605,
      "learning_rate": 8.735451329879408e-06,
      "loss": 2.3028,
      "step": 41730
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1331335306167603,
      "learning_rate": 8.735042899464993e-06,
      "loss": 2.2115,
      "step": 41731
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0538688898086548,
      "learning_rate": 8.734634471195022e-06,
      "loss": 2.4037,
      "step": 41732
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0689390897750854,
      "learning_rate": 8.734226045070189e-06,
      "loss": 2.3939,
      "step": 41733
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0003278255462646,
      "learning_rate": 8.733817621091185e-06,
      "loss": 2.4439,
      "step": 41734
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1646232604980469,
      "learning_rate": 8.733409199258709e-06,
      "loss": 2.6317,
      "step": 41735
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0925737619400024,
      "learning_rate": 8.733000779573443e-06,
      "loss": 2.2791,
      "step": 41736
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1487174034118652,
      "learning_rate": 8.732592362036088e-06,
      "loss": 2.5272,
      "step": 41737
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.077994465827942,
      "learning_rate": 8.732183946647335e-06,
      "loss": 2.6236,
      "step": 41738
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0702717304229736,
      "learning_rate": 8.731775533407872e-06,
      "loss": 2.3273,
      "step": 41739
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1045218706130981,
      "learning_rate": 8.731367122318392e-06,
      "loss": 2.3762,
      "step": 41740
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0010863542556763,
      "learning_rate": 8.73095871337959e-06,
      "loss": 2.327,
      "step": 41741
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.025631070137024,
      "learning_rate": 8.73055030659216e-06,
      "loss": 2.387,
      "step": 41742
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0174857378005981,
      "learning_rate": 8.73014190195679e-06,
      "loss": 2.3854,
      "step": 41743
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.057646632194519,
      "learning_rate": 8.729733499474173e-06,
      "loss": 2.4209,
      "step": 41744
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1329188346862793,
      "learning_rate": 8.729325099145005e-06,
      "loss": 2.504,
      "step": 41745
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0288151502609253,
      "learning_rate": 8.728916700969977e-06,
      "loss": 2.2064,
      "step": 41746
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.9909052848815918,
      "learning_rate": 8.728508304949777e-06,
      "loss": 2.3061,
      "step": 41747
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.081562876701355,
      "learning_rate": 8.728099911085109e-06,
      "loss": 2.5293,
      "step": 41748
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.122158408164978,
      "learning_rate": 8.727691519376649e-06,
      "loss": 2.2397,
      "step": 41749
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0885859727859497,
      "learning_rate": 8.7272831298251e-06,
      "loss": 2.154,
      "step": 41750
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.9905291199684143,
      "learning_rate": 8.726874742431151e-06,
      "loss": 2.2188,
      "step": 41751
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.9376325607299805,
      "learning_rate": 8.726466357195494e-06,
      "loss": 1.9938,
      "step": 41752
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0099098682403564,
      "learning_rate": 8.726057974118825e-06,
      "loss": 2.3827,
      "step": 41753
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.2237915992736816,
      "learning_rate": 8.72564959320183e-06,
      "loss": 2.428,
      "step": 41754
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.182566523551941,
      "learning_rate": 8.725241214445208e-06,
      "loss": 2.3445,
      "step": 41755
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0752778053283691,
      "learning_rate": 8.724832837849649e-06,
      "loss": 2.4867,
      "step": 41756
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1594990491867065,
      "learning_rate": 8.724424463415842e-06,
      "loss": 2.3023,
      "step": 41757
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.9674820899963379,
      "learning_rate": 8.724016091144482e-06,
      "loss": 2.0484,
      "step": 41758
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.9529306888580322,
      "learning_rate": 8.723607721036266e-06,
      "loss": 2.4224,
      "step": 41759
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0060343742370605,
      "learning_rate": 8.723199353091878e-06,
      "loss": 2.2359,
      "step": 41760
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.024721384048462,
      "learning_rate": 8.722790987312015e-06,
      "loss": 2.4103,
      "step": 41761
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0923923254013062,
      "learning_rate": 8.722382623697368e-06,
      "loss": 2.3944,
      "step": 41762
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0415066480636597,
      "learning_rate": 8.721974262248628e-06,
      "loss": 2.3865,
      "step": 41763
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.075465202331543,
      "learning_rate": 8.72156590296649e-06,
      "loss": 2.3509,
      "step": 41764
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0654524564743042,
      "learning_rate": 8.721157545851646e-06,
      "loss": 2.3829,
      "step": 41765
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.051378607749939,
      "learning_rate": 8.720749190904785e-06,
      "loss": 2.31,
      "step": 41766
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0828460454940796,
      "learning_rate": 8.720340838126604e-06,
      "loss": 2.1861,
      "step": 41767
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1274423599243164,
      "learning_rate": 8.71993248751779e-06,
      "loss": 2.3184,
      "step": 41768
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0262421369552612,
      "learning_rate": 8.719524139079038e-06,
      "loss": 2.1902,
      "step": 41769
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1679831743240356,
      "learning_rate": 8.719115792811043e-06,
      "loss": 2.2079,
      "step": 41770
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.024854302406311,
      "learning_rate": 8.718707448714493e-06,
      "loss": 2.4693,
      "step": 41771
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.2492567300796509,
      "learning_rate": 8.718299106790082e-06,
      "loss": 2.3633,
      "step": 41772
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0632421970367432,
      "learning_rate": 8.717890767038501e-06,
      "loss": 2.4159,
      "step": 41773
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.046770453453064,
      "learning_rate": 8.717482429460444e-06,
      "loss": 2.3243,
      "step": 41774
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.3063055276870728,
      "learning_rate": 8.717074094056607e-06,
      "loss": 2.1626,
      "step": 41775
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.045438289642334,
      "learning_rate": 8.716665760827675e-06,
      "loss": 2.4446,
      "step": 41776
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.9732146859169006,
      "learning_rate": 8.716257429774342e-06,
      "loss": 2.3667,
      "step": 41777
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1126073598861694,
      "learning_rate": 8.7158491008973e-06,
      "loss": 2.6108,
      "step": 41778
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0361443758010864,
      "learning_rate": 8.715440774197244e-06,
      "loss": 2.392,
      "step": 41779
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.2591688632965088,
      "learning_rate": 8.715032449674865e-06,
      "loss": 2.4435,
      "step": 41780
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0463327169418335,
      "learning_rate": 8.714624127330855e-06,
      "loss": 2.2503,
      "step": 41781
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0389670133590698,
      "learning_rate": 8.714215807165905e-06,
      "loss": 2.4465,
      "step": 41782
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.9786521196365356,
      "learning_rate": 8.71380748918071e-06,
      "loss": 2.5533,
      "step": 41783
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.114292860031128,
      "learning_rate": 8.713399173375961e-06,
      "loss": 2.4367,
      "step": 41784
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.450452208518982,
      "learning_rate": 8.712990859752347e-06,
      "loss": 2.3401,
      "step": 41785
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0868273973464966,
      "learning_rate": 8.712582548310566e-06,
      "loss": 2.1195,
      "step": 41786
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1111363172531128,
      "learning_rate": 8.712174239051307e-06,
      "loss": 2.2219,
      "step": 41787
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.9764291048049927,
      "learning_rate": 8.711765931975264e-06,
      "loss": 2.2398,
      "step": 41788
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1797230243682861,
      "learning_rate": 8.711357627083124e-06,
      "loss": 2.2721,
      "step": 41789
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.9993240833282471,
      "learning_rate": 8.710949324375584e-06,
      "loss": 2.3496,
      "step": 41790
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.080501914024353,
      "learning_rate": 8.710541023853335e-06,
      "loss": 2.34,
      "step": 41791
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0958400964736938,
      "learning_rate": 8.71013272551707e-06,
      "loss": 2.289,
      "step": 41792
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.034635066986084,
      "learning_rate": 8.70972442936748e-06,
      "loss": 2.3057,
      "step": 41793
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1074512004852295,
      "learning_rate": 8.709316135405257e-06,
      "loss": 2.518,
      "step": 41794
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.151393175125122,
      "learning_rate": 8.708907843631093e-06,
      "loss": 2.2901,
      "step": 41795
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.9901745319366455,
      "learning_rate": 8.708499554045683e-06,
      "loss": 2.3007,
      "step": 41796
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.033586859703064,
      "learning_rate": 8.708091266649715e-06,
      "loss": 2.1502,
      "step": 41797
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.123803734779358,
      "learning_rate": 8.707682981443883e-06,
      "loss": 2.4148,
      "step": 41798
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.3418174982070923,
      "learning_rate": 8.70727469842888e-06,
      "loss": 2.2356,
      "step": 41799
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1716046333312988,
      "learning_rate": 8.706866417605398e-06,
      "loss": 2.1968,
      "step": 41800
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1244349479675293,
      "learning_rate": 8.706458138974133e-06,
      "loss": 2.213,
      "step": 41801
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0977022647857666,
      "learning_rate": 8.706049862535768e-06,
      "loss": 2.3051,
      "step": 41802
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1328047513961792,
      "learning_rate": 8.705641588290998e-06,
      "loss": 2.3601,
      "step": 41803
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.079608678817749,
      "learning_rate": 8.70523331624052e-06,
      "loss": 2.3316,
      "step": 41804
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.131667137145996,
      "learning_rate": 8.704825046385022e-06,
      "loss": 2.2044,
      "step": 41805
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0300099849700928,
      "learning_rate": 8.704416778725198e-06,
      "loss": 2.1758,
      "step": 41806
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1454358100891113,
      "learning_rate": 8.704008513261737e-06,
      "loss": 2.7309,
      "step": 41807
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1317822933197021,
      "learning_rate": 8.703600249995336e-06,
      "loss": 2.2607,
      "step": 41808
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1067006587982178,
      "learning_rate": 8.703191988926684e-06,
      "loss": 2.3507,
      "step": 41809
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1251901388168335,
      "learning_rate": 8.702783730056475e-06,
      "loss": 2.7027,
      "step": 41810
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0357164144515991,
      "learning_rate": 8.702375473385398e-06,
      "loss": 2.1322,
      "step": 41811
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.079961895942688,
      "learning_rate": 8.701967218914146e-06,
      "loss": 2.1725,
      "step": 41812
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0423370599746704,
      "learning_rate": 8.701558966643415e-06,
      "loss": 2.2964,
      "step": 41813
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.238242745399475,
      "learning_rate": 8.701150716573897e-06,
      "loss": 2.3963,
      "step": 41814
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.020889163017273,
      "learning_rate": 8.700742468706277e-06,
      "loss": 2.5974,
      "step": 41815
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.2093818187713623,
      "learning_rate": 8.700334223041251e-06,
      "loss": 2.4229,
      "step": 41816
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0957785844802856,
      "learning_rate": 8.699925979579514e-06,
      "loss": 2.5955,
      "step": 41817
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1904693841934204,
      "learning_rate": 8.699517738321752e-06,
      "loss": 2.3859,
      "step": 41818
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.097242832183838,
      "learning_rate": 8.699109499268664e-06,
      "loss": 2.4106,
      "step": 41819
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.2094305753707886,
      "learning_rate": 8.698701262420937e-06,
      "loss": 2.6705,
      "step": 41820
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0203264951705933,
      "learning_rate": 8.698293027779266e-06,
      "loss": 2.2409,
      "step": 41821
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0679386854171753,
      "learning_rate": 8.69788479534434e-06,
      "loss": 2.3999,
      "step": 41822
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0815656185150146,
      "learning_rate": 8.697476565116854e-06,
      "loss": 2.2574,
      "step": 41823
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0701278448104858,
      "learning_rate": 8.697068337097498e-06,
      "loss": 2.2227,
      "step": 41824
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.3177013397216797,
      "learning_rate": 8.696660111286966e-06,
      "loss": 2.3206,
      "step": 41825
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0941718816757202,
      "learning_rate": 8.696251887685949e-06,
      "loss": 2.2992,
      "step": 41826
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0486583709716797,
      "learning_rate": 8.69584366629514e-06,
      "loss": 2.2302,
      "step": 41827
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0741643905639648,
      "learning_rate": 8.695435447115232e-06,
      "loss": 2.3121,
      "step": 41828
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0502099990844727,
      "learning_rate": 8.695027230146913e-06,
      "loss": 2.349,
      "step": 41829
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.9659748673439026,
      "learning_rate": 8.694619015390877e-06,
      "loss": 2.4146,
      "step": 41830
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.174737572669983,
      "learning_rate": 8.694210802847815e-06,
      "loss": 2.4872,
      "step": 41831
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0885223150253296,
      "learning_rate": 8.69380259251842e-06,
      "loss": 2.1507,
      "step": 41832
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.027867317199707,
      "learning_rate": 8.693394384403386e-06,
      "loss": 2.3663,
      "step": 41833
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.2014622688293457,
      "learning_rate": 8.692986178503404e-06,
      "loss": 2.4006,
      "step": 41834
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1172438859939575,
      "learning_rate": 8.692577974819164e-06,
      "loss": 2.296,
      "step": 41835
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.2026238441467285,
      "learning_rate": 8.69216977335136e-06,
      "loss": 2.2046,
      "step": 41836
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1262792348861694,
      "learning_rate": 8.691761574100684e-06,
      "loss": 2.3362,
      "step": 41837
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.108680009841919,
      "learning_rate": 8.691353377067825e-06,
      "loss": 2.5374,
      "step": 41838
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1870226860046387,
      "learning_rate": 8.69094518225348e-06,
      "loss": 2.4034,
      "step": 41839
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0540788173675537,
      "learning_rate": 8.690536989658338e-06,
      "loss": 2.5425,
      "step": 41840
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.146605134010315,
      "learning_rate": 8.690128799283094e-06,
      "loss": 2.2513,
      "step": 41841
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0503555536270142,
      "learning_rate": 8.689720611128436e-06,
      "loss": 2.1322,
      "step": 41842
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.074537992477417,
      "learning_rate": 8.689312425195055e-06,
      "loss": 2.3123,
      "step": 41843
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.2364190816879272,
      "learning_rate": 8.688904241483647e-06,
      "loss": 2.2537,
      "step": 41844
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0002411603927612,
      "learning_rate": 8.6884960599949e-06,
      "loss": 2.4864,
      "step": 41845
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1190131902694702,
      "learning_rate": 8.68808788072951e-06,
      "loss": 2.5076,
      "step": 41846
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.9976622462272644,
      "learning_rate": 8.687679703688168e-06,
      "loss": 2.4551,
      "step": 41847
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.126905918121338,
      "learning_rate": 8.687271528871566e-06,
      "loss": 2.6762,
      "step": 41848
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1582982540130615,
      "learning_rate": 8.686863356280396e-06,
      "loss": 2.3359,
      "step": 41849
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.2516474723815918,
      "learning_rate": 8.686455185915347e-06,
      "loss": 2.532,
      "step": 41850
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1397380828857422,
      "learning_rate": 8.686047017777114e-06,
      "loss": 2.3452,
      "step": 41851
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1731820106506348,
      "learning_rate": 8.685638851866388e-06,
      "loss": 2.3442,
      "step": 41852
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.9747477173805237,
      "learning_rate": 8.68523068818386e-06,
      "loss": 2.3085,
      "step": 41853
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.999874472618103,
      "learning_rate": 8.684822526730225e-06,
      "loss": 2.3031,
      "step": 41854
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1773371696472168,
      "learning_rate": 8.684414367506174e-06,
      "loss": 2.245,
      "step": 41855
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.9984362125396729,
      "learning_rate": 8.684006210512397e-06,
      "loss": 2.3351,
      "step": 41856
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.298508882522583,
      "learning_rate": 8.683598055749586e-06,
      "loss": 2.4885,
      "step": 41857
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0617674589157104,
      "learning_rate": 8.683189903218433e-06,
      "loss": 2.1812,
      "step": 41858
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0955708026885986,
      "learning_rate": 8.68278175291963e-06,
      "loss": 2.1627,
      "step": 41859
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.162792444229126,
      "learning_rate": 8.682373604853872e-06,
      "loss": 2.251,
      "step": 41860
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0888036489486694,
      "learning_rate": 8.681965459021849e-06,
      "loss": 2.4865,
      "step": 41861
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0368596315383911,
      "learning_rate": 8.68155731542425e-06,
      "loss": 2.2355,
      "step": 41862
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1675701141357422,
      "learning_rate": 8.68114917406177e-06,
      "loss": 2.2537,
      "step": 41863
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1483657360076904,
      "learning_rate": 8.680741034935102e-06,
      "loss": 2.4531,
      "step": 41864
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0186450481414795,
      "learning_rate": 8.680332898044934e-06,
      "loss": 2.3297,
      "step": 41865
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.9806846976280212,
      "learning_rate": 8.679924763391963e-06,
      "loss": 2.453,
      "step": 41866
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1645478010177612,
      "learning_rate": 8.67951663097688e-06,
      "loss": 2.3483,
      "step": 41867
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1825933456420898,
      "learning_rate": 8.679108500800371e-06,
      "loss": 2.3233,
      "step": 41868
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.004686713218689,
      "learning_rate": 8.678700372863133e-06,
      "loss": 2.3155,
      "step": 41869
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0995479822158813,
      "learning_rate": 8.678292247165855e-06,
      "loss": 2.1899,
      "step": 41870
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.129945993423462,
      "learning_rate": 8.677884123709231e-06,
      "loss": 2.2782,
      "step": 41871
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.9999122619628906,
      "learning_rate": 8.677476002493954e-06,
      "loss": 2.0534,
      "step": 41872
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0187208652496338,
      "learning_rate": 8.677067883520713e-06,
      "loss": 2.4437,
      "step": 41873
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1492129564285278,
      "learning_rate": 8.676659766790202e-06,
      "loss": 2.2415,
      "step": 41874
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.046359658241272,
      "learning_rate": 8.676251652303111e-06,
      "loss": 2.159,
      "step": 41875
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0502725839614868,
      "learning_rate": 8.675843540060133e-06,
      "loss": 2.3212,
      "step": 41876
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0992642641067505,
      "learning_rate": 8.675435430061964e-06,
      "loss": 2.2276,
      "step": 41877
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.2397124767303467,
      "learning_rate": 8.675027322309287e-06,
      "loss": 2.3789,
      "step": 41878
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1364742517471313,
      "learning_rate": 8.6746192168028e-06,
      "loss": 2.6205,
      "step": 41879
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0802969932556152,
      "learning_rate": 8.674211113543195e-06,
      "loss": 2.3973,
      "step": 41880
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.9696109890937805,
      "learning_rate": 8.673803012531161e-06,
      "loss": 2.3982,
      "step": 41881
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1991127729415894,
      "learning_rate": 8.67339491376739e-06,
      "loss": 2.2068,
      "step": 41882
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.2040432691574097,
      "learning_rate": 8.672986817252578e-06,
      "loss": 2.4362,
      "step": 41883
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0873901844024658,
      "learning_rate": 8.672578722987412e-06,
      "loss": 2.2442,
      "step": 41884
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.3693792819976807,
      "learning_rate": 8.672170630972584e-06,
      "loss": 2.2965,
      "step": 41885
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0468475818634033,
      "learning_rate": 8.671762541208788e-06,
      "loss": 2.1685,
      "step": 41886
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.9946452975273132,
      "learning_rate": 8.671354453696715e-06,
      "loss": 2.3828,
      "step": 41887
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0097697973251343,
      "learning_rate": 8.670946368437058e-06,
      "loss": 2.1956,
      "step": 41888
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0425317287445068,
      "learning_rate": 8.670538285430508e-06,
      "loss": 2.2926,
      "step": 41889
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.9839459657669067,
      "learning_rate": 8.670130204677755e-06,
      "loss": 2.1217,
      "step": 41890
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0166748762130737,
      "learning_rate": 8.669722126179494e-06,
      "loss": 2.152,
      "step": 41891
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0079312324523926,
      "learning_rate": 8.669314049936414e-06,
      "loss": 2.379,
      "step": 41892
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0522997379302979,
      "learning_rate": 8.668905975949213e-06,
      "loss": 2.3461,
      "step": 41893
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1095073223114014,
      "learning_rate": 8.668497904218574e-06,
      "loss": 2.2466,
      "step": 41894
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.075203776359558,
      "learning_rate": 8.668089834745192e-06,
      "loss": 2.2609,
      "step": 41895
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1402069330215454,
      "learning_rate": 8.66768176752976e-06,
      "loss": 2.3343,
      "step": 41896
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.063451886177063,
      "learning_rate": 8.667273702572971e-06,
      "loss": 2.6027,
      "step": 41897
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.3789302110671997,
      "learning_rate": 8.666865639875513e-06,
      "loss": 2.296,
      "step": 41898
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.2157504558563232,
      "learning_rate": 8.666457579438078e-06,
      "loss": 2.4819,
      "step": 41899
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0187140703201294,
      "learning_rate": 8.666049521261362e-06,
      "loss": 2.3888,
      "step": 41900
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0000320672988892,
      "learning_rate": 8.665641465346055e-06,
      "loss": 2.5436,
      "step": 41901
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.9895070791244507,
      "learning_rate": 8.665233411692847e-06,
      "loss": 2.3503,
      "step": 41902
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.068560242652893,
      "learning_rate": 8.66482536030243e-06,
      "loss": 2.3422,
      "step": 41903
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1646294593811035,
      "learning_rate": 8.6644173111755e-06,
      "loss": 2.4262,
      "step": 41904
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.9838100671768188,
      "learning_rate": 8.664009264312743e-06,
      "loss": 2.2062,
      "step": 41905
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0283738374710083,
      "learning_rate": 8.663601219714855e-06,
      "loss": 2.2494,
      "step": 41906
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.9865911602973938,
      "learning_rate": 8.663193177382526e-06,
      "loss": 2.3764,
      "step": 41907
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.040848731994629,
      "learning_rate": 8.662785137316445e-06,
      "loss": 2.3958,
      "step": 41908
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.9600013494491577,
      "learning_rate": 8.662377099517307e-06,
      "loss": 2.2269,
      "step": 41909
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1347719430923462,
      "learning_rate": 8.661969063985804e-06,
      "loss": 2.4512,
      "step": 41910
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.2070986032485962,
      "learning_rate": 8.661561030722627e-06,
      "loss": 2.4306,
      "step": 41911
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0920774936676025,
      "learning_rate": 8.661152999728467e-06,
      "loss": 2.194,
      "step": 41912
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0508962869644165,
      "learning_rate": 8.660744971004015e-06,
      "loss": 2.3882,
      "step": 41913
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.010017991065979,
      "learning_rate": 8.660336944549966e-06,
      "loss": 2.3371,
      "step": 41914
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0828171968460083,
      "learning_rate": 8.659928920367008e-06,
      "loss": 2.3522,
      "step": 41915
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0965877771377563,
      "learning_rate": 8.659520898455835e-06,
      "loss": 2.2945,
      "step": 41916
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1886186599731445,
      "learning_rate": 8.659112878817138e-06,
      "loss": 2.3654,
      "step": 41917
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0679349899291992,
      "learning_rate": 8.65870486145161e-06,
      "loss": 2.2179,
      "step": 41918
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.106431245803833,
      "learning_rate": 8.658296846359941e-06,
      "loss": 2.475,
      "step": 41919
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.2031292915344238,
      "learning_rate": 8.657888833542827e-06,
      "loss": 2.4929,
      "step": 41920
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.098196268081665,
      "learning_rate": 8.65748082300095e-06,
      "loss": 2.289,
      "step": 41921
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1390666961669922,
      "learning_rate": 8.65707281473501e-06,
      "loss": 2.2651,
      "step": 41922
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1123342514038086,
      "learning_rate": 8.656664808745695e-06,
      "loss": 2.2456,
      "step": 41923
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1240630149841309,
      "learning_rate": 8.6562568050337e-06,
      "loss": 2.1112,
      "step": 41924
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0787475109100342,
      "learning_rate": 8.655848803599714e-06,
      "loss": 2.5127,
      "step": 41925
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0403202772140503,
      "learning_rate": 8.655440804444428e-06,
      "loss": 2.4994,
      "step": 41926
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.9610206484794617,
      "learning_rate": 8.655032807568536e-06,
      "loss": 2.4585,
      "step": 41927
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1648463010787964,
      "learning_rate": 8.654624812972728e-06,
      "loss": 2.3477,
      "step": 41928
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.117104172706604,
      "learning_rate": 8.654216820657698e-06,
      "loss": 2.2489,
      "step": 41929
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.9583392143249512,
      "learning_rate": 8.653808830624134e-06,
      "loss": 2.3276,
      "step": 41930
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1604126691818237,
      "learning_rate": 8.65340084287273e-06,
      "loss": 2.4475,
      "step": 41931
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.2353541851043701,
      "learning_rate": 8.65299285740418e-06,
      "loss": 2.2308,
      "step": 41932
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1436994075775146,
      "learning_rate": 8.652584874219173e-06,
      "loss": 2.4891,
      "step": 41933
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.9825790524482727,
      "learning_rate": 8.652176893318398e-06,
      "loss": 2.4917,
      "step": 41934
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1437262296676636,
      "learning_rate": 8.65176891470255e-06,
      "loss": 2.2489,
      "step": 41935
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.200439214706421,
      "learning_rate": 8.65136093837232e-06,
      "loss": 2.3191,
      "step": 41936
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0004701614379883,
      "learning_rate": 8.650952964328399e-06,
      "loss": 2.3971,
      "step": 41937
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1264556646347046,
      "learning_rate": 8.650544992571478e-06,
      "loss": 2.3352,
      "step": 41938
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0876916646957397,
      "learning_rate": 8.650137023102253e-06,
      "loss": 2.2457,
      "step": 41939
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0963561534881592,
      "learning_rate": 8.64972905592141e-06,
      "loss": 2.4712,
      "step": 41940
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.008329153060913,
      "learning_rate": 8.649321091029643e-06,
      "loss": 2.094,
      "step": 41941
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.9846673011779785,
      "learning_rate": 8.648913128427644e-06,
      "loss": 2.4262,
      "step": 41942
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1506543159484863,
      "learning_rate": 8.648505168116103e-06,
      "loss": 2.4432,
      "step": 41943
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1095157861709595,
      "learning_rate": 8.648097210095714e-06,
      "loss": 2.2977,
      "step": 41944
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0365710258483887,
      "learning_rate": 8.647689254367167e-06,
      "loss": 2.3759,
      "step": 41945
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.2186827659606934,
      "learning_rate": 8.647281300931159e-06,
      "loss": 2.3866,
      "step": 41946
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1286594867706299,
      "learning_rate": 8.646873349788372e-06,
      "loss": 2.3583,
      "step": 41947
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.016506552696228,
      "learning_rate": 8.646465400939502e-06,
      "loss": 2.5351,
      "step": 41948
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0866525173187256,
      "learning_rate": 8.64605745438524e-06,
      "loss": 2.2858,
      "step": 41949
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.2036683559417725,
      "learning_rate": 8.64564951012628e-06,
      "loss": 2.3003,
      "step": 41950
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.9832481741905212,
      "learning_rate": 8.64524156816331e-06,
      "loss": 2.3325,
      "step": 41951
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0923227071762085,
      "learning_rate": 8.644833628497025e-06,
      "loss": 2.3459,
      "step": 41952
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0766607522964478,
      "learning_rate": 8.644425691128114e-06,
      "loss": 2.4273,
      "step": 41953
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0086627006530762,
      "learning_rate": 8.64401775605727e-06,
      "loss": 2.5149,
      "step": 41954
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.9987513422966003,
      "learning_rate": 8.643609823285184e-06,
      "loss": 2.3412,
      "step": 41955
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.9961777329444885,
      "learning_rate": 8.643201892812547e-06,
      "loss": 2.2055,
      "step": 41956
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0669682025909424,
      "learning_rate": 8.642793964640052e-06,
      "loss": 2.3061,
      "step": 41957
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.052257776260376,
      "learning_rate": 8.642386038768392e-06,
      "loss": 2.388,
      "step": 41958
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0334516763687134,
      "learning_rate": 8.641978115198257e-06,
      "loss": 2.3451,
      "step": 41959
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1381897926330566,
      "learning_rate": 8.641570193930336e-06,
      "loss": 2.3795,
      "step": 41960
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0685573816299438,
      "learning_rate": 8.641162274965322e-06,
      "loss": 2.1844,
      "step": 41961
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0642566680908203,
      "learning_rate": 8.640754358303906e-06,
      "loss": 2.1639,
      "step": 41962
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0866894721984863,
      "learning_rate": 8.640346443946781e-06,
      "loss": 2.4448,
      "step": 41963
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0087517499923706,
      "learning_rate": 8.63993853189464e-06,
      "loss": 2.525,
      "step": 41964
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0707789659500122,
      "learning_rate": 8.639530622148171e-06,
      "loss": 2.2952,
      "step": 41965
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.2656285762786865,
      "learning_rate": 8.639122714708066e-06,
      "loss": 2.1795,
      "step": 41966
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1534769535064697,
      "learning_rate": 8.638714809575022e-06,
      "loss": 2.2342,
      "step": 41967
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.131239652633667,
      "learning_rate": 8.638306906749723e-06,
      "loss": 2.4573,
      "step": 41968
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0948543548583984,
      "learning_rate": 8.637899006232863e-06,
      "loss": 2.2895,
      "step": 41969
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.119840383529663,
      "learning_rate": 8.637491108025134e-06,
      "loss": 2.2679,
      "step": 41970
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.064894437789917,
      "learning_rate": 8.637083212127229e-06,
      "loss": 2.2618,
      "step": 41971
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1708683967590332,
      "learning_rate": 8.636675318539838e-06,
      "loss": 2.6474,
      "step": 41972
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.2463459968566895,
      "learning_rate": 8.636267427263655e-06,
      "loss": 2.404,
      "step": 41973
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0124688148498535,
      "learning_rate": 8.635859538299366e-06,
      "loss": 2.3215,
      "step": 41974
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1432466506958008,
      "learning_rate": 8.635451651647666e-06,
      "loss": 2.3247,
      "step": 41975
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0485398769378662,
      "learning_rate": 8.635043767309246e-06,
      "loss": 2.4599,
      "step": 41976
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0637527704238892,
      "learning_rate": 8.634635885284797e-06,
      "loss": 2.429,
      "step": 41977
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1770814657211304,
      "learning_rate": 8.634228005575011e-06,
      "loss": 2.4886,
      "step": 41978
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1002603769302368,
      "learning_rate": 8.63382012818058e-06,
      "loss": 2.2108,
      "step": 41979
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0638506412506104,
      "learning_rate": 8.633412253102193e-06,
      "loss": 2.3997,
      "step": 41980
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.2819310426712036,
      "learning_rate": 8.633004380340546e-06,
      "loss": 2.3552,
      "step": 41981
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.9852681159973145,
      "learning_rate": 8.632596509896328e-06,
      "loss": 2.4664,
      "step": 41982
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0879663228988647,
      "learning_rate": 8.632188641770229e-06,
      "loss": 2.5053,
      "step": 41983
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1205840110778809,
      "learning_rate": 8.631780775962942e-06,
      "loss": 2.4307,
      "step": 41984
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.098016619682312,
      "learning_rate": 8.631372912475158e-06,
      "loss": 2.4315,
      "step": 41985
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.9939536452293396,
      "learning_rate": 8.63096505130757e-06,
      "loss": 2.1189,
      "step": 41986
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1695042848587036,
      "learning_rate": 8.630557192460869e-06,
      "loss": 2.2702,
      "step": 41987
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.962144672870636,
      "learning_rate": 8.630149335935743e-06,
      "loss": 2.2677,
      "step": 41988
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.063557744026184,
      "learning_rate": 8.629741481732886e-06,
      "loss": 2.2781,
      "step": 41989
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1256273984909058,
      "learning_rate": 8.629333629852989e-06,
      "loss": 2.2206,
      "step": 41990
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.019773244857788,
      "learning_rate": 8.628925780296746e-06,
      "loss": 2.4461,
      "step": 41991
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.057749629020691,
      "learning_rate": 8.628517933064844e-06,
      "loss": 2.3004,
      "step": 41992
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.015906810760498,
      "learning_rate": 8.628110088157977e-06,
      "loss": 2.3773,
      "step": 41993
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0558719635009766,
      "learning_rate": 8.627702245576836e-06,
      "loss": 2.2977,
      "step": 41994
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0708101987838745,
      "learning_rate": 8.627294405322114e-06,
      "loss": 2.4983,
      "step": 41995
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0007107257843018,
      "learning_rate": 8.626886567394498e-06,
      "loss": 2.2491,
      "step": 41996
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.9850972890853882,
      "learning_rate": 8.626478731794685e-06,
      "loss": 2.4559,
      "step": 41997
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.206721305847168,
      "learning_rate": 8.626070898523362e-06,
      "loss": 2.2885,
      "step": 41998
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.063375473022461,
      "learning_rate": 8.625663067581225e-06,
      "loss": 2.2809,
      "step": 41999
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.098062515258789,
      "learning_rate": 8.625255238968959e-06,
      "loss": 2.4605,
      "step": 42000
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.12765634059906,
      "learning_rate": 8.624847412687258e-06,
      "loss": 2.5417,
      "step": 42001
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.2278251647949219,
      "learning_rate": 8.624439588736818e-06,
      "loss": 2.3741,
      "step": 42002
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0917068719863892,
      "learning_rate": 8.624031767118323e-06,
      "loss": 2.3899,
      "step": 42003
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.113998293876648,
      "learning_rate": 8.62362394783247e-06,
      "loss": 2.4946,
      "step": 42004
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.2255003452301025,
      "learning_rate": 8.623216130879948e-06,
      "loss": 2.1937,
      "step": 42005
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.9879331588745117,
      "learning_rate": 8.622808316261447e-06,
      "loss": 2.3924,
      "step": 42006
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.063215732574463,
      "learning_rate": 8.622400503977659e-06,
      "loss": 2.4485,
      "step": 42007
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.5796356201171875,
      "learning_rate": 8.621992694029277e-06,
      "loss": 2.3024,
      "step": 42008
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.2351062297821045,
      "learning_rate": 8.621584886416995e-06,
      "loss": 2.4382,
      "step": 42009
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1260571479797363,
      "learning_rate": 8.621177081141496e-06,
      "loss": 2.3584,
      "step": 42010
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1961368322372437,
      "learning_rate": 8.62076927820348e-06,
      "loss": 2.3254,
      "step": 42011
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0595107078552246,
      "learning_rate": 8.620361477603637e-06,
      "loss": 2.3244,
      "step": 42012
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0962791442871094,
      "learning_rate": 8.619953679342652e-06,
      "loss": 2.111,
      "step": 42013
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1692206859588623,
      "learning_rate": 8.619545883421221e-06,
      "loss": 2.4828,
      "step": 42014
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0498883724212646,
      "learning_rate": 8.619138089840034e-06,
      "loss": 2.1618,
      "step": 42015
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.156521201133728,
      "learning_rate": 8.618730298599783e-06,
      "loss": 2.289,
      "step": 42016
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0246729850769043,
      "learning_rate": 8.61832250970116e-06,
      "loss": 2.3656,
      "step": 42017
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0285227298736572,
      "learning_rate": 8.617914723144856e-06,
      "loss": 2.3301,
      "step": 42018
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0481414794921875,
      "learning_rate": 8.61750693893156e-06,
      "loss": 2.2607,
      "step": 42019
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.2478549480438232,
      "learning_rate": 8.617099157061967e-06,
      "loss": 2.3288,
      "step": 42020
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0342628955841064,
      "learning_rate": 8.616691377536766e-06,
      "loss": 2.3784,
      "step": 42021
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1094099283218384,
      "learning_rate": 8.616283600356647e-06,
      "loss": 2.2752,
      "step": 42022
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.9974688291549683,
      "learning_rate": 8.615875825522308e-06,
      "loss": 2.2298,
      "step": 42023
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0046616792678833,
      "learning_rate": 8.615468053034432e-06,
      "loss": 2.2445,
      "step": 42024
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.9984851479530334,
      "learning_rate": 8.615060282893718e-06,
      "loss": 2.1224,
      "step": 42025
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.9963381886482239,
      "learning_rate": 8.614652515100848e-06,
      "loss": 2.5038,
      "step": 42026
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.030371904373169,
      "learning_rate": 8.614244749656518e-06,
      "loss": 2.4367,
      "step": 42027
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1596952676773071,
      "learning_rate": 8.61383698656142e-06,
      "loss": 2.476,
      "step": 42028
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0241870880126953,
      "learning_rate": 8.613429225816248e-06,
      "loss": 2.4176,
      "step": 42029
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.098744511604309,
      "learning_rate": 8.613021467421687e-06,
      "loss": 2.3652,
      "step": 42030
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.290509581565857,
      "learning_rate": 8.612613711378432e-06,
      "loss": 2.3003,
      "step": 42031
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1020283699035645,
      "learning_rate": 8.612205957687174e-06,
      "loss": 2.1297,
      "step": 42032
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.3206877708435059,
      "learning_rate": 8.611798206348602e-06,
      "loss": 2.3078,
      "step": 42033
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.303774356842041,
      "learning_rate": 8.611390457363412e-06,
      "loss": 2.3125,
      "step": 42034
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0235365629196167,
      "learning_rate": 8.61098271073229e-06,
      "loss": 2.387,
      "step": 42035
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1178711652755737,
      "learning_rate": 8.610574966455928e-06,
      "loss": 2.3562,
      "step": 42036
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1027798652648926,
      "learning_rate": 8.610167224535021e-06,
      "loss": 2.4149,
      "step": 42037
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1251471042633057,
      "learning_rate": 8.609759484970264e-06,
      "loss": 2.3989,
      "step": 42038
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0352509021759033,
      "learning_rate": 8.609351747762336e-06,
      "loss": 2.1404,
      "step": 42039
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.011715054512024,
      "learning_rate": 8.608944012911933e-06,
      "loss": 2.2442,
      "step": 42040
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0168825387954712,
      "learning_rate": 8.60853628041975e-06,
      "loss": 2.4244,
      "step": 42041
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.15892493724823,
      "learning_rate": 8.608128550286475e-06,
      "loss": 2.4423,
      "step": 42042
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0385321378707886,
      "learning_rate": 8.6077208225128e-06,
      "loss": 2.2359,
      "step": 42043
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0618176460266113,
      "learning_rate": 8.607313097099417e-06,
      "loss": 2.6213,
      "step": 42044
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.2013474702835083,
      "learning_rate": 8.606905374047016e-06,
      "loss": 2.4195,
      "step": 42045
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.02436101436615,
      "learning_rate": 8.606497653356289e-06,
      "loss": 2.4527,
      "step": 42046
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1343785524368286,
      "learning_rate": 8.606089935027928e-06,
      "loss": 2.2461,
      "step": 42047
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0772404670715332,
      "learning_rate": 8.605682219062622e-06,
      "loss": 2.39,
      "step": 42048
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0264029502868652,
      "learning_rate": 8.605274505461063e-06,
      "loss": 2.2093,
      "step": 42049
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0471575260162354,
      "learning_rate": 8.604866794223943e-06,
      "loss": 2.5786,
      "step": 42050
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.2501026391983032,
      "learning_rate": 8.604459085351954e-06,
      "loss": 2.2766,
      "step": 42051
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1259336471557617,
      "learning_rate": 8.604051378845788e-06,
      "loss": 2.4565,
      "step": 42052
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.163710355758667,
      "learning_rate": 8.60364367470613e-06,
      "loss": 2.4594,
      "step": 42053
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0024335384368896,
      "learning_rate": 8.603235972933675e-06,
      "loss": 2.2881,
      "step": 42054
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0245734453201294,
      "learning_rate": 8.602828273529116e-06,
      "loss": 2.2147,
      "step": 42055
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0472044944763184,
      "learning_rate": 8.602420576493142e-06,
      "loss": 2.3339,
      "step": 42056
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.975033164024353,
      "learning_rate": 8.602012881826446e-06,
      "loss": 2.3586,
      "step": 42057
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.9744388461112976,
      "learning_rate": 8.601605189529717e-06,
      "loss": 2.1348,
      "step": 42058
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1401509046554565,
      "learning_rate": 8.601197499603646e-06,
      "loss": 2.3707,
      "step": 42059
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.928793728351593,
      "learning_rate": 8.600789812048925e-06,
      "loss": 2.3908,
      "step": 42060
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.022150993347168,
      "learning_rate": 8.600382126866246e-06,
      "loss": 2.4378,
      "step": 42061
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.2539196014404297,
      "learning_rate": 8.599974444056299e-06,
      "loss": 2.4655,
      "step": 42062
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.073621153831482,
      "learning_rate": 8.599566763619775e-06,
      "loss": 2.1709,
      "step": 42063
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0871620178222656,
      "learning_rate": 8.599159085557367e-06,
      "loss": 2.5113,
      "step": 42064
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0656330585479736,
      "learning_rate": 8.598751409869769e-06,
      "loss": 2.3953,
      "step": 42065
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0397804975509644,
      "learning_rate": 8.598343736557663e-06,
      "loss": 2.2696,
      "step": 42066
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.11519455909729,
      "learning_rate": 8.597936065621744e-06,
      "loss": 2.6623,
      "step": 42067
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1162896156311035,
      "learning_rate": 8.597528397062707e-06,
      "loss": 2.2212,
      "step": 42068
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.9332911372184753,
      "learning_rate": 8.597120730881238e-06,
      "loss": 2.1979,
      "step": 42069
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1458384990692139,
      "learning_rate": 8.59671306707803e-06,
      "loss": 2.5731,
      "step": 42070
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0504683256149292,
      "learning_rate": 8.596305405653777e-06,
      "loss": 2.4181,
      "step": 42071
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0917502641677856,
      "learning_rate": 8.595897746609165e-06,
      "loss": 2.3014,
      "step": 42072
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.2324562072753906,
      "learning_rate": 8.595490089944889e-06,
      "loss": 2.1363,
      "step": 42073
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0942323207855225,
      "learning_rate": 8.595082435661638e-06,
      "loss": 2.6402,
      "step": 42074
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.035301923751831,
      "learning_rate": 8.594674783760104e-06,
      "loss": 2.3473,
      "step": 42075
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.061896800994873,
      "learning_rate": 8.59426713424098e-06,
      "loss": 2.3796,
      "step": 42076
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.197869896888733,
      "learning_rate": 8.593859487104952e-06,
      "loss": 2.4044,
      "step": 42077
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0163569450378418,
      "learning_rate": 8.59345184235272e-06,
      "loss": 2.2733,
      "step": 42078
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.093622088432312,
      "learning_rate": 8.593044199984964e-06,
      "loss": 2.1182,
      "step": 42079
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.014565348625183,
      "learning_rate": 8.59263656000238e-06,
      "loss": 2.4102,
      "step": 42080
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1750035285949707,
      "learning_rate": 8.592228922405661e-06,
      "loss": 2.599,
      "step": 42081
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.224780797958374,
      "learning_rate": 8.591821287195495e-06,
      "loss": 2.1373,
      "step": 42082
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1508524417877197,
      "learning_rate": 8.591413654372575e-06,
      "loss": 2.2767,
      "step": 42083
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1183288097381592,
      "learning_rate": 8.591006023937591e-06,
      "loss": 2.3287,
      "step": 42084
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.136492133140564,
      "learning_rate": 8.590598395891236e-06,
      "loss": 2.3072,
      "step": 42085
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0642434358596802,
      "learning_rate": 8.590190770234198e-06,
      "loss": 2.4934,
      "step": 42086
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1061056852340698,
      "learning_rate": 8.58978314696717e-06,
      "loss": 2.3958,
      "step": 42087
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.9929147958755493,
      "learning_rate": 8.589375526090842e-06,
      "loss": 2.4121,
      "step": 42088
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.2148911952972412,
      "learning_rate": 8.588967907605905e-06,
      "loss": 2.2909,
      "step": 42089
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1881964206695557,
      "learning_rate": 8.588560291513052e-06,
      "loss": 2.4553,
      "step": 42090
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0954710245132446,
      "learning_rate": 8.588152677812975e-06,
      "loss": 2.4308,
      "step": 42091
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.9556474089622498,
      "learning_rate": 8.587745066506361e-06,
      "loss": 2.2839,
      "step": 42092
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0456585884094238,
      "learning_rate": 8.587337457593901e-06,
      "loss": 2.2861,
      "step": 42093
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1450543403625488,
      "learning_rate": 8.58692985107629e-06,
      "loss": 2.3364,
      "step": 42094
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.3426254987716675,
      "learning_rate": 8.586522246954213e-06,
      "loss": 2.2137,
      "step": 42095
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1076927185058594,
      "learning_rate": 8.586114645228366e-06,
      "loss": 2.3813,
      "step": 42096
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0327606201171875,
      "learning_rate": 8.58570704589944e-06,
      "loss": 2.4525,
      "step": 42097
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.018402099609375,
      "learning_rate": 8.585299448968123e-06,
      "loss": 2.3705,
      "step": 42098
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0219923257827759,
      "learning_rate": 8.584891854435109e-06,
      "loss": 2.4758,
      "step": 42099
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1457566022872925,
      "learning_rate": 8.584484262301088e-06,
      "loss": 2.3287,
      "step": 42100
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0077483654022217,
      "learning_rate": 8.584076672566748e-06,
      "loss": 2.5108,
      "step": 42101
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.065033197402954,
      "learning_rate": 8.583669085232784e-06,
      "loss": 2.4168,
      "step": 42102
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0686438083648682,
      "learning_rate": 8.583261500299886e-06,
      "loss": 2.3551,
      "step": 42103
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.4177122116088867,
      "learning_rate": 8.58285391776875e-06,
      "loss": 2.4405,
      "step": 42104
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.4614078998565674,
      "learning_rate": 8.582446337640055e-06,
      "loss": 2.4224,
      "step": 42105
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0113073587417603,
      "learning_rate": 8.5820387599145e-06,
      "loss": 2.373,
      "step": 42106
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1219695806503296,
      "learning_rate": 8.581631184592773e-06,
      "loss": 2.1983,
      "step": 42107
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1483850479125977,
      "learning_rate": 8.581223611675568e-06,
      "loss": 2.2303,
      "step": 42108
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0585579872131348,
      "learning_rate": 8.580816041163573e-06,
      "loss": 2.1234,
      "step": 42109
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.9952407479286194,
      "learning_rate": 8.58040847305748e-06,
      "loss": 2.5912,
      "step": 42110
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1294126510620117,
      "learning_rate": 8.58000090735798e-06,
      "loss": 2.5511,
      "step": 42111
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.146416425704956,
      "learning_rate": 8.579593344065767e-06,
      "loss": 2.2882,
      "step": 42112
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.2187086343765259,
      "learning_rate": 8.579185783181528e-06,
      "loss": 2.1892,
      "step": 42113
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.103468894958496,
      "learning_rate": 8.578778224705955e-06,
      "loss": 2.4292,
      "step": 42114
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.3252094984054565,
      "learning_rate": 8.578370668639737e-06,
      "loss": 2.1593,
      "step": 42115
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1689600944519043,
      "learning_rate": 8.57796311498357e-06,
      "loss": 2.244,
      "step": 42116
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.2784181833267212,
      "learning_rate": 8.57755556373814e-06,
      "loss": 2.4445,
      "step": 42117
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.079738736152649,
      "learning_rate": 8.57714801490414e-06,
      "loss": 2.5165,
      "step": 42118
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0783216953277588,
      "learning_rate": 8.57674046848226e-06,
      "loss": 2.3377,
      "step": 42119
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0182368755340576,
      "learning_rate": 8.576332924473192e-06,
      "loss": 2.2886,
      "step": 42120
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0252020359039307,
      "learning_rate": 8.575925382877626e-06,
      "loss": 2.3558,
      "step": 42121
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1157997846603394,
      "learning_rate": 8.575517843696253e-06,
      "loss": 2.5033,
      "step": 42122
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.2300513982772827,
      "learning_rate": 8.575110306929764e-06,
      "loss": 2.3319,
      "step": 42123
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.136328935623169,
      "learning_rate": 8.57470277257885e-06,
      "loss": 2.1852,
      "step": 42124
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.2271586656570435,
      "learning_rate": 8.574295240644201e-06,
      "loss": 2.1951,
      "step": 42125
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.055326223373413,
      "learning_rate": 8.57388771112651e-06,
      "loss": 2.4776,
      "step": 42126
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.9888570308685303,
      "learning_rate": 8.573480184026467e-06,
      "loss": 2.2233,
      "step": 42127
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.143881916999817,
      "learning_rate": 8.57307265934476e-06,
      "loss": 2.3284,
      "step": 42128
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.2555630207061768,
      "learning_rate": 8.572665137082085e-06,
      "loss": 2.4619,
      "step": 42129
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1199625730514526,
      "learning_rate": 8.572257617239131e-06,
      "loss": 2.3175,
      "step": 42130
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.2265889644622803,
      "learning_rate": 8.57185009981659e-06,
      "loss": 2.4385,
      "step": 42131
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.9810588955879211,
      "learning_rate": 8.571442584815147e-06,
      "loss": 2.1347,
      "step": 42132
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0019679069519043,
      "learning_rate": 8.571035072235499e-06,
      "loss": 2.3781,
      "step": 42133
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.950658917427063,
      "learning_rate": 8.570627562078332e-06,
      "loss": 2.2369,
      "step": 42134
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.247397780418396,
      "learning_rate": 8.57022005434434e-06,
      "loss": 2.2515,
      "step": 42135
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1689250469207764,
      "learning_rate": 8.569812549034214e-06,
      "loss": 2.5454,
      "step": 42136
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0902496576309204,
      "learning_rate": 8.569405046148644e-06,
      "loss": 2.2353,
      "step": 42137
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1576273441314697,
      "learning_rate": 8.56899754568832e-06,
      "loss": 2.4275,
      "step": 42138
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1403025388717651,
      "learning_rate": 8.568590047653937e-06,
      "loss": 2.31,
      "step": 42139
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0206971168518066,
      "learning_rate": 8.56818255204618e-06,
      "loss": 2.2184,
      "step": 42140
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.9345811605453491,
      "learning_rate": 8.567775058865745e-06,
      "loss": 2.1912,
      "step": 42141
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1281994581222534,
      "learning_rate": 8.567367568113319e-06,
      "loss": 2.4362,
      "step": 42142
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.04417085647583,
      "learning_rate": 8.566960079789592e-06,
      "loss": 2.4864,
      "step": 42143
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0674092769622803,
      "learning_rate": 8.56655259389526e-06,
      "loss": 2.5901,
      "step": 42144
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0494033098220825,
      "learning_rate": 8.56614511043101e-06,
      "loss": 2.3413,
      "step": 42145
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.158799648284912,
      "learning_rate": 8.565737629397531e-06,
      "loss": 2.2859,
      "step": 42146
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.094597578048706,
      "learning_rate": 8.56533015079552e-06,
      "loss": 2.2046,
      "step": 42147
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0563994646072388,
      "learning_rate": 8.564922674625662e-06,
      "loss": 2.3938,
      "step": 42148
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0282167196273804,
      "learning_rate": 8.56451520088865e-06,
      "loss": 2.3684,
      "step": 42149
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1425198316574097,
      "learning_rate": 8.564107729585172e-06,
      "loss": 2.3673,
      "step": 42150
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0767041444778442,
      "learning_rate": 8.563700260715924e-06,
      "loss": 2.5176,
      "step": 42151
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.083130121231079,
      "learning_rate": 8.563292794281593e-06,
      "loss": 2.337,
      "step": 42152
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1586878299713135,
      "learning_rate": 8.562885330282872e-06,
      "loss": 2.3144,
      "step": 42153
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.016502857208252,
      "learning_rate": 8.56247786872045e-06,
      "loss": 2.5677,
      "step": 42154
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1138025522232056,
      "learning_rate": 8.56207040959502e-06,
      "loss": 2.0666,
      "step": 42155
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1030384302139282,
      "learning_rate": 8.561662952907269e-06,
      "loss": 2.5797,
      "step": 42156
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.189221739768982,
      "learning_rate": 8.561255498657894e-06,
      "loss": 2.2906,
      "step": 42157
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0892014503479004,
      "learning_rate": 8.560848046847579e-06,
      "loss": 2.2096,
      "step": 42158
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0780936479568481,
      "learning_rate": 8.560440597477017e-06,
      "loss": 2.2814,
      "step": 42159
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0278595685958862,
      "learning_rate": 8.560033150546897e-06,
      "loss": 2.5361,
      "step": 42160
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0043606758117676,
      "learning_rate": 8.559625706057915e-06,
      "loss": 2.4915,
      "step": 42161
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0142457485198975,
      "learning_rate": 8.559218264010758e-06,
      "loss": 2.1989,
      "step": 42162
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1421992778778076,
      "learning_rate": 8.558810824406116e-06,
      "loss": 2.1121,
      "step": 42163
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0851871967315674,
      "learning_rate": 8.558403387244682e-06,
      "loss": 2.2868,
      "step": 42164
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0120694637298584,
      "learning_rate": 8.557995952527146e-06,
      "loss": 2.5094,
      "step": 42165
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0157815217971802,
      "learning_rate": 8.557588520254199e-06,
      "loss": 2.44,
      "step": 42166
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.9749533534049988,
      "learning_rate": 8.55718109042653e-06,
      "loss": 2.0384,
      "step": 42167
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.9582452178001404,
      "learning_rate": 8.556773663044832e-06,
      "loss": 2.4925,
      "step": 42168
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.027557134628296,
      "learning_rate": 8.556366238109796e-06,
      "loss": 2.533,
      "step": 42169
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0574674606323242,
      "learning_rate": 8.55595881562211e-06,
      "loss": 2.3819,
      "step": 42170
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.051851511001587,
      "learning_rate": 8.555551395582466e-06,
      "loss": 2.2805,
      "step": 42171
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.021996021270752,
      "learning_rate": 8.555143977991554e-06,
      "loss": 2.3201,
      "step": 42172
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.078795313835144,
      "learning_rate": 8.554736562850066e-06,
      "loss": 2.4862,
      "step": 42173
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1087316274642944,
      "learning_rate": 8.55432915015869e-06,
      "loss": 2.3637,
      "step": 42174
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.3104101419448853,
      "learning_rate": 8.553921739918123e-06,
      "loss": 2.3233,
      "step": 42175
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.086540699005127,
      "learning_rate": 8.553514332129049e-06,
      "loss": 2.3448,
      "step": 42176
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.02230966091156,
      "learning_rate": 8.55310692679216e-06,
      "loss": 2.3597,
      "step": 42177
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.2475380897521973,
      "learning_rate": 8.55269952390815e-06,
      "loss": 2.2378,
      "step": 42178
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0887508392333984,
      "learning_rate": 8.552292123477704e-06,
      "loss": 2.5551,
      "step": 42179
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1669251918792725,
      "learning_rate": 8.551884725501519e-06,
      "loss": 2.4202,
      "step": 42180
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.9705538153648376,
      "learning_rate": 8.55147732998028e-06,
      "loss": 2.4602,
      "step": 42181
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0955231189727783,
      "learning_rate": 8.551069936914682e-06,
      "loss": 2.5637,
      "step": 42182
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0618597269058228,
      "learning_rate": 8.550662546305418e-06,
      "loss": 2.56,
      "step": 42183
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.2610714435577393,
      "learning_rate": 8.550255158153171e-06,
      "loss": 2.1281,
      "step": 42184
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0248653888702393,
      "learning_rate": 8.549847772458634e-06,
      "loss": 2.5389,
      "step": 42185
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1432186365127563,
      "learning_rate": 8.5494403892225e-06,
      "loss": 2.1643,
      "step": 42186
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1435999870300293,
      "learning_rate": 8.549033008445458e-06,
      "loss": 2.3805,
      "step": 42187
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.034003734588623,
      "learning_rate": 8.548625630128199e-06,
      "loss": 2.5391,
      "step": 42188
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1180357933044434,
      "learning_rate": 8.548218254271415e-06,
      "loss": 2.3524,
      "step": 42189
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1158370971679688,
      "learning_rate": 8.547810880875793e-06,
      "loss": 2.3593,
      "step": 42190
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.063942551612854,
      "learning_rate": 8.547403509942027e-06,
      "loss": 2.1934,
      "step": 42191
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.231305480003357,
      "learning_rate": 8.546996141470806e-06,
      "loss": 2.4751,
      "step": 42192
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1660195589065552,
      "learning_rate": 8.546588775462823e-06,
      "loss": 2.5633,
      "step": 42193
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0905518531799316,
      "learning_rate": 8.546181411918766e-06,
      "loss": 2.4658,
      "step": 42194
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.144821286201477,
      "learning_rate": 8.545774050839327e-06,
      "loss": 2.2088,
      "step": 42195
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1088447570800781,
      "learning_rate": 8.545366692225194e-06,
      "loss": 2.2914,
      "step": 42196
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0059276819229126,
      "learning_rate": 8.544959336077066e-06,
      "loss": 2.4212,
      "step": 42197
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.140449047088623,
      "learning_rate": 8.544551982395621e-06,
      "loss": 2.4935,
      "step": 42198
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1137336492538452,
      "learning_rate": 8.544144631181556e-06,
      "loss": 2.1806,
      "step": 42199
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1335201263427734,
      "learning_rate": 8.543737282435563e-06,
      "loss": 2.6281,
      "step": 42200
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1231156587600708,
      "learning_rate": 8.54332993615833e-06,
      "loss": 2.4645,
      "step": 42201
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0795222520828247,
      "learning_rate": 8.542922592350546e-06,
      "loss": 2.5818,
      "step": 42202
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0638078451156616,
      "learning_rate": 8.542515251012909e-06,
      "loss": 2.4952,
      "step": 42203
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1528210639953613,
      "learning_rate": 8.542107912146101e-06,
      "loss": 2.3426,
      "step": 42204
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1132732629776,
      "learning_rate": 8.541700575750815e-06,
      "loss": 2.2392,
      "step": 42205
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0088081359863281,
      "learning_rate": 8.541293241827744e-06,
      "loss": 2.2082,
      "step": 42206
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1147124767303467,
      "learning_rate": 8.540885910377578e-06,
      "loss": 2.3157,
      "step": 42207
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.9973108172416687,
      "learning_rate": 8.540478581401005e-06,
      "loss": 2.3689,
      "step": 42208
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.14605712890625,
      "learning_rate": 8.540071254898718e-06,
      "loss": 2.2666,
      "step": 42209
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.9600393772125244,
      "learning_rate": 8.53966393087141e-06,
      "loss": 2.1975,
      "step": 42210
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0964844226837158,
      "learning_rate": 8.539256609319765e-06,
      "loss": 2.2866,
      "step": 42211
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.066084384918213,
      "learning_rate": 8.538849290244477e-06,
      "loss": 2.4014,
      "step": 42212
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1187151670455933,
      "learning_rate": 8.538441973646236e-06,
      "loss": 2.2425,
      "step": 42213
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1004159450531006,
      "learning_rate": 8.538034659525733e-06,
      "loss": 2.399,
      "step": 42214
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0648653507232666,
      "learning_rate": 8.53762734788366e-06,
      "loss": 2.5218,
      "step": 42215
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.982147753238678,
      "learning_rate": 8.537220038720703e-06,
      "loss": 2.6294,
      "step": 42216
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.137280821800232,
      "learning_rate": 8.536812732037556e-06,
      "loss": 2.101,
      "step": 42217
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.9478536248207092,
      "learning_rate": 8.536405427834909e-06,
      "loss": 2.4138,
      "step": 42218
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0385316610336304,
      "learning_rate": 8.535998126113452e-06,
      "loss": 2.5566,
      "step": 42219
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0238008499145508,
      "learning_rate": 8.535590826873876e-06,
      "loss": 2.2293,
      "step": 42220
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0083011388778687,
      "learning_rate": 8.535183530116872e-06,
      "loss": 2.4547,
      "step": 42221
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.093186855316162,
      "learning_rate": 8.534776235843128e-06,
      "loss": 2.4609,
      "step": 42222
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.2246770858764648,
      "learning_rate": 8.534368944053343e-06,
      "loss": 2.2888,
      "step": 42223
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.275159239768982,
      "learning_rate": 8.533961654748194e-06,
      "loss": 2.6558,
      "step": 42224
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0081838369369507,
      "learning_rate": 8.533554367928381e-06,
      "loss": 2.1074,
      "step": 42225
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0457969903945923,
      "learning_rate": 8.533147083594588e-06,
      "loss": 2.2924,
      "step": 42226
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0893712043762207,
      "learning_rate": 8.532739801747512e-06,
      "loss": 2.5613,
      "step": 42227
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0501314401626587,
      "learning_rate": 8.53233252238784e-06,
      "loss": 2.2883,
      "step": 42228
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0293819904327393,
      "learning_rate": 8.531925245516261e-06,
      "loss": 2.2266,
      "step": 42229
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0329374074935913,
      "learning_rate": 8.53151797113347e-06,
      "loss": 2.0362,
      "step": 42230
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.882325530052185,
      "learning_rate": 8.531110699240156e-06,
      "loss": 2.6027,
      "step": 42231
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.2508251667022705,
      "learning_rate": 8.530703429837006e-06,
      "loss": 2.2523,
      "step": 42232
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1143109798431396,
      "learning_rate": 8.530296162924713e-06,
      "loss": 2.1774,
      "step": 42233
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0364415645599365,
      "learning_rate": 8.529888898503968e-06,
      "loss": 2.2361,
      "step": 42234
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0701615810394287,
      "learning_rate": 8.529481636575458e-06,
      "loss": 2.3932,
      "step": 42235
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0883980989456177,
      "learning_rate": 8.529074377139881e-06,
      "loss": 2.3561,
      "step": 42236
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.088025450706482,
      "learning_rate": 8.528667120197919e-06,
      "loss": 2.3538,
      "step": 42237
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.3431051969528198,
      "learning_rate": 8.528259865750268e-06,
      "loss": 2.5812,
      "step": 42238
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.104434847831726,
      "learning_rate": 8.527852613797613e-06,
      "loss": 2.3741,
      "step": 42239
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.072077751159668,
      "learning_rate": 8.52744536434065e-06,
      "loss": 2.4681,
      "step": 42240
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1402798891067505,
      "learning_rate": 8.527038117380066e-06,
      "loss": 2.233,
      "step": 42241
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0367857217788696,
      "learning_rate": 8.526630872916552e-06,
      "loss": 2.3308,
      "step": 42242
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0493144989013672,
      "learning_rate": 8.5262236309508e-06,
      "loss": 2.2418,
      "step": 42243
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0892339944839478,
      "learning_rate": 8.525816391483496e-06,
      "loss": 2.5027,
      "step": 42244
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0756868124008179,
      "learning_rate": 8.525409154515338e-06,
      "loss": 2.2,
      "step": 42245
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1579644680023193,
      "learning_rate": 8.52500192004701e-06,
      "loss": 2.3249,
      "step": 42246
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.9968867897987366,
      "learning_rate": 8.524594688079203e-06,
      "loss": 2.1248,
      "step": 42247
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0541073083877563,
      "learning_rate": 8.524187458612611e-06,
      "loss": 2.2263,
      "step": 42248
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.112691879272461,
      "learning_rate": 8.523780231647924e-06,
      "loss": 2.0269,
      "step": 42249
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0579785108566284,
      "learning_rate": 8.52337300718583e-06,
      "loss": 2.4705,
      "step": 42250
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0877987146377563,
      "learning_rate": 8.522965785227015e-06,
      "loss": 2.2531,
      "step": 42251
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1648528575897217,
      "learning_rate": 8.522558565772178e-06,
      "loss": 2.3396,
      "step": 42252
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1609361171722412,
      "learning_rate": 8.522151348822004e-06,
      "loss": 2.3128,
      "step": 42253
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.331288456916809,
      "learning_rate": 8.521744134377185e-06,
      "loss": 2.3465,
      "step": 42254
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.040897250175476,
      "learning_rate": 8.52133692243841e-06,
      "loss": 2.3712,
      "step": 42255
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.9848933815956116,
      "learning_rate": 8.520929713006371e-06,
      "loss": 2.1052,
      "step": 42256
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0075172185897827,
      "learning_rate": 8.520522506081758e-06,
      "loss": 2.6168,
      "step": 42257
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1460193395614624,
      "learning_rate": 8.520115301665263e-06,
      "loss": 2.4437,
      "step": 42258
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0879350900650024,
      "learning_rate": 8.519708099757575e-06,
      "loss": 2.3918,
      "step": 42259
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0975509881973267,
      "learning_rate": 8.519300900359382e-06,
      "loss": 2.2646,
      "step": 42260
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0466805696487427,
      "learning_rate": 8.518893703471376e-06,
      "loss": 2.5204,
      "step": 42261
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.9779025912284851,
      "learning_rate": 8.518486509094248e-06,
      "loss": 2.6761,
      "step": 42262
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0720384120941162,
      "learning_rate": 8.518079317228689e-06,
      "loss": 2.6141,
      "step": 42263
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0926421880722046,
      "learning_rate": 8.517672127875387e-06,
      "loss": 2.4,
      "step": 42264
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0718284845352173,
      "learning_rate": 8.517264941035034e-06,
      "loss": 2.3694,
      "step": 42265
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0867098569869995,
      "learning_rate": 8.516857756708317e-06,
      "loss": 2.2891,
      "step": 42266
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.9998660087585449,
      "learning_rate": 8.51645057489593e-06,
      "loss": 2.3403,
      "step": 42267
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.174433946609497,
      "learning_rate": 8.516043395598563e-06,
      "loss": 2.182,
      "step": 42268
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.9935506582260132,
      "learning_rate": 8.515636218816905e-06,
      "loss": 2.2943,
      "step": 42269
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.9706808924674988,
      "learning_rate": 8.515229044551646e-06,
      "loss": 2.2248,
      "step": 42270
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.150909662246704,
      "learning_rate": 8.514821872803478e-06,
      "loss": 2.378,
      "step": 42271
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0985674858093262,
      "learning_rate": 8.514414703573089e-06,
      "loss": 2.3519,
      "step": 42272
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1357611417770386,
      "learning_rate": 8.514007536861172e-06,
      "loss": 2.3454,
      "step": 42273
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1151137351989746,
      "learning_rate": 8.513600372668416e-06,
      "loss": 2.4125,
      "step": 42274
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.4068877696990967,
      "learning_rate": 8.51319321099551e-06,
      "loss": 2.1337,
      "step": 42275
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0579416751861572,
      "learning_rate": 8.512786051843149e-06,
      "loss": 2.3664,
      "step": 42276
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1292423009872437,
      "learning_rate": 8.512378895212017e-06,
      "loss": 2.3718,
      "step": 42277
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0777122974395752,
      "learning_rate": 8.511971741102805e-06,
      "loss": 2.1811,
      "step": 42278
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0614819526672363,
      "learning_rate": 8.511564589516207e-06,
      "loss": 2.206,
      "step": 42279
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0131912231445312,
      "learning_rate": 8.511157440452909e-06,
      "loss": 2.403,
      "step": 42280
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1862391233444214,
      "learning_rate": 8.510750293913605e-06,
      "loss": 2.4944,
      "step": 42281
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0954172611236572,
      "learning_rate": 8.510343149898983e-06,
      "loss": 2.2869,
      "step": 42282
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.160791277885437,
      "learning_rate": 8.509936008409736e-06,
      "loss": 2.3439,
      "step": 42283
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1927917003631592,
      "learning_rate": 8.50952886944655e-06,
      "loss": 2.4686,
      "step": 42284
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1299240589141846,
      "learning_rate": 8.509121733010119e-06,
      "loss": 2.4312,
      "step": 42285
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1154916286468506,
      "learning_rate": 8.50871459910113e-06,
      "loss": 2.2201,
      "step": 42286
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0898717641830444,
      "learning_rate": 8.508307467720277e-06,
      "loss": 2.2266,
      "step": 42287
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.245775580406189,
      "learning_rate": 8.507900338868247e-06,
      "loss": 2.3406,
      "step": 42288
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0252376794815063,
      "learning_rate": 8.507493212545732e-06,
      "loss": 2.5065,
      "step": 42289
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1106270551681519,
      "learning_rate": 8.50708608875342e-06,
      "loss": 2.3077,
      "step": 42290
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1068049669265747,
      "learning_rate": 8.506678967492002e-06,
      "loss": 2.4225,
      "step": 42291
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0324742794036865,
      "learning_rate": 8.50627184876217e-06,
      "loss": 2.0933,
      "step": 42292
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.9394009709358215,
      "learning_rate": 8.505864732564614e-06,
      "loss": 2.3837,
      "step": 42293
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0565662384033203,
      "learning_rate": 8.505457618900021e-06,
      "loss": 2.3437,
      "step": 42294
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.034877896308899,
      "learning_rate": 8.505050507769083e-06,
      "loss": 2.5472,
      "step": 42295
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.072205901145935,
      "learning_rate": 8.504643399172492e-06,
      "loss": 2.5546,
      "step": 42296
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0621130466461182,
      "learning_rate": 8.504236293110934e-06,
      "loss": 2.3988,
      "step": 42297
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0453897714614868,
      "learning_rate": 8.503829189585104e-06,
      "loss": 2.2176,
      "step": 42298
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0062817335128784,
      "learning_rate": 8.503422088595688e-06,
      "loss": 2.4739,
      "step": 42299
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1250665187835693,
      "learning_rate": 8.50301499014338e-06,
      "loss": 2.3627,
      "step": 42300
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0695172548294067,
      "learning_rate": 8.502607894228867e-06,
      "loss": 2.3801,
      "step": 42301
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1348984241485596,
      "learning_rate": 8.502200800852844e-06,
      "loss": 2.5609,
      "step": 42302
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1866358518600464,
      "learning_rate": 8.501793710015996e-06,
      "loss": 2.4562,
      "step": 42303
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0641965866088867,
      "learning_rate": 8.501386621719012e-06,
      "loss": 2.5282,
      "step": 42304
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.001203179359436,
      "learning_rate": 8.500979535962584e-06,
      "loss": 2.3573,
      "step": 42305
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1403594017028809,
      "learning_rate": 8.500572452747404e-06,
      "loss": 2.4314,
      "step": 42306
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1167584657669067,
      "learning_rate": 8.500165372074162e-06,
      "loss": 2.4529,
      "step": 42307
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.074476718902588,
      "learning_rate": 8.499758293943544e-06,
      "loss": 2.2323,
      "step": 42308
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0257290601730347,
      "learning_rate": 8.499351218356247e-06,
      "loss": 2.3798,
      "step": 42309
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0293481349945068,
      "learning_rate": 8.498944145312955e-06,
      "loss": 2.4485,
      "step": 42310
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1370553970336914,
      "learning_rate": 8.49853707481436e-06,
      "loss": 2.441,
      "step": 42311
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1535056829452515,
      "learning_rate": 8.498130006861154e-06,
      "loss": 2.3528,
      "step": 42312
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1217025518417358,
      "learning_rate": 8.497722941454026e-06,
      "loss": 2.2922,
      "step": 42313
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0489317178726196,
      "learning_rate": 8.497315878593663e-06,
      "loss": 2.3323,
      "step": 42314
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0308616161346436,
      "learning_rate": 8.496908818280763e-06,
      "loss": 2.315,
      "step": 42315
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0117133855819702,
      "learning_rate": 8.496501760516008e-06,
      "loss": 2.5334,
      "step": 42316
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0147802829742432,
      "learning_rate": 8.496094705300088e-06,
      "loss": 2.0478,
      "step": 42317
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.9592832326889038,
      "learning_rate": 8.495687652633699e-06,
      "loss": 2.5016,
      "step": 42318
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0667777061462402,
      "learning_rate": 8.495280602517526e-06,
      "loss": 2.1584,
      "step": 42319
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.123928189277649,
      "learning_rate": 8.494873554952262e-06,
      "loss": 2.2272,
      "step": 42320
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0978069305419922,
      "learning_rate": 8.494466509938598e-06,
      "loss": 2.0971,
      "step": 42321
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.194980502128601,
      "learning_rate": 8.494059467477221e-06,
      "loss": 2.326,
      "step": 42322
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.2145916223526,
      "learning_rate": 8.49365242756882e-06,
      "loss": 2.2751,
      "step": 42323
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0300037860870361,
      "learning_rate": 8.49324539021409e-06,
      "loss": 2.2925,
      "step": 42324
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0630285739898682,
      "learning_rate": 8.492838355413715e-06,
      "loss": 2.1763,
      "step": 42325
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1009767055511475,
      "learning_rate": 8.49243132316839e-06,
      "loss": 2.4552,
      "step": 42326
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.3739912509918213,
      "learning_rate": 8.492024293478803e-06,
      "loss": 2.1499,
      "step": 42327
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.9234722852706909,
      "learning_rate": 8.49161726634565e-06,
      "loss": 2.4227,
      "step": 42328
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0931836366653442,
      "learning_rate": 8.49121024176961e-06,
      "loss": 2.1905,
      "step": 42329
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.2298717498779297,
      "learning_rate": 8.490803219751378e-06,
      "loss": 2.342,
      "step": 42330
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.170545220375061,
      "learning_rate": 8.490396200291647e-06,
      "loss": 2.2326,
      "step": 42331
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.2474510669708252,
      "learning_rate": 8.4899891833911e-06,
      "loss": 2.1706,
      "step": 42332
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1678725481033325,
      "learning_rate": 8.489582169050436e-06,
      "loss": 2.3332,
      "step": 42333
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0919746160507202,
      "learning_rate": 8.489175157270337e-06,
      "loss": 2.373,
      "step": 42334
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0647825002670288,
      "learning_rate": 8.488768148051498e-06,
      "loss": 2.6128,
      "step": 42335
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0811448097229004,
      "learning_rate": 8.488361141394608e-06,
      "loss": 2.3754,
      "step": 42336
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1279464960098267,
      "learning_rate": 8.487954137300354e-06,
      "loss": 2.4055,
      "step": 42337
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.035415768623352,
      "learning_rate": 8.487547135769432e-06,
      "loss": 2.3714,
      "step": 42338
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0474543571472168,
      "learning_rate": 8.487140136802526e-06,
      "loss": 2.3325,
      "step": 42339
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1989624500274658,
      "learning_rate": 8.486733140400329e-06,
      "loss": 2.3873,
      "step": 42340
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.9718599319458008,
      "learning_rate": 8.486326146563534e-06,
      "loss": 2.3503,
      "step": 42341
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0896871089935303,
      "learning_rate": 8.485919155292823e-06,
      "loss": 2.2996,
      "step": 42342
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0333448648452759,
      "learning_rate": 8.485512166588892e-06,
      "loss": 2.2208,
      "step": 42343
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1320173740386963,
      "learning_rate": 8.485105180452427e-06,
      "loss": 2.3335,
      "step": 42344
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0182290077209473,
      "learning_rate": 8.484698196884121e-06,
      "loss": 2.4109,
      "step": 42345
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.036612629890442,
      "learning_rate": 8.484291215884664e-06,
      "loss": 2.11,
      "step": 42346
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.2492296695709229,
      "learning_rate": 8.483884237454743e-06,
      "loss": 2.1319,
      "step": 42347
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1401950120925903,
      "learning_rate": 8.483477261595051e-06,
      "loss": 2.3304,
      "step": 42348
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1122312545776367,
      "learning_rate": 8.48307028830628e-06,
      "loss": 2.3448,
      "step": 42349
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1781903505325317,
      "learning_rate": 8.482663317589113e-06,
      "loss": 2.4425,
      "step": 42350
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.9892334938049316,
      "learning_rate": 8.482256349444245e-06,
      "loss": 2.4113,
      "step": 42351
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1249113082885742,
      "learning_rate": 8.481849383872365e-06,
      "loss": 2.356,
      "step": 42352
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1360331773757935,
      "learning_rate": 8.481442420874163e-06,
      "loss": 2.4432,
      "step": 42353
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1515681743621826,
      "learning_rate": 8.481035460450327e-06,
      "loss": 2.2727,
      "step": 42354
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.4312658309936523,
      "learning_rate": 8.480628502601551e-06,
      "loss": 2.2229,
      "step": 42355
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1282902956008911,
      "learning_rate": 8.480221547328523e-06,
      "loss": 2.405,
      "step": 42356
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.189797282218933,
      "learning_rate": 8.47981459463193e-06,
      "loss": 2.3443,
      "step": 42357
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.5288983583450317,
      "learning_rate": 8.479407644512463e-06,
      "loss": 2.2096,
      "step": 42358
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0904682874679565,
      "learning_rate": 8.479000696970815e-06,
      "loss": 2.238,
      "step": 42359
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0762052536010742,
      "learning_rate": 8.478593752007671e-06,
      "loss": 2.3036,
      "step": 42360
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0334056615829468,
      "learning_rate": 8.478186809623727e-06,
      "loss": 2.2902,
      "step": 42361
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.174574613571167,
      "learning_rate": 8.477779869819668e-06,
      "loss": 2.3335,
      "step": 42362
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0222433805465698,
      "learning_rate": 8.477372932596185e-06,
      "loss": 2.4672,
      "step": 42363
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0746510028839111,
      "learning_rate": 8.47696599795397e-06,
      "loss": 2.2707,
      "step": 42364
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0192410945892334,
      "learning_rate": 8.47655906589371e-06,
      "loss": 2.4229,
      "step": 42365
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.129220724105835,
      "learning_rate": 8.476152136416097e-06,
      "loss": 2.1669,
      "step": 42366
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0889132022857666,
      "learning_rate": 8.47574520952182e-06,
      "loss": 2.4996,
      "step": 42367
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.2181636095046997,
      "learning_rate": 8.475338285211573e-06,
      "loss": 2.4234,
      "step": 42368
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.072921872138977,
      "learning_rate": 8.474931363486036e-06,
      "loss": 2.1695,
      "step": 42369
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.111821174621582,
      "learning_rate": 8.474524444345905e-06,
      "loss": 2.3002,
      "step": 42370
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0550978183746338,
      "learning_rate": 8.474117527791871e-06,
      "loss": 2.3797,
      "step": 42371
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1657655239105225,
      "learning_rate": 8.473710613824621e-06,
      "loss": 2.2806,
      "step": 42372
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0879594087600708,
      "learning_rate": 8.473303702444846e-06,
      "loss": 2.2523,
      "step": 42373
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0483653545379639,
      "learning_rate": 8.472896793653236e-06,
      "loss": 2.4559,
      "step": 42374
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.9184972047805786,
      "learning_rate": 8.472489887450482e-06,
      "loss": 2.293,
      "step": 42375
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.9875892996788025,
      "learning_rate": 8.472082983837271e-06,
      "loss": 2.1657,
      "step": 42376
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.017490267753601,
      "learning_rate": 8.471676082814298e-06,
      "loss": 2.256,
      "step": 42377
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.9743567109107971,
      "learning_rate": 8.471269184382245e-06,
      "loss": 2.2221,
      "step": 42378
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.9691490530967712,
      "learning_rate": 8.470862288541806e-06,
      "loss": 2.4408,
      "step": 42379
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.9865838885307312,
      "learning_rate": 8.470455395293671e-06,
      "loss": 2.044,
      "step": 42380
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0435912609100342,
      "learning_rate": 8.470048504638533e-06,
      "loss": 2.4047,
      "step": 42381
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.029768705368042,
      "learning_rate": 8.469641616577076e-06,
      "loss": 2.2695,
      "step": 42382
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0370956659317017,
      "learning_rate": 8.469234731109993e-06,
      "loss": 2.3365,
      "step": 42383
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.074832797050476,
      "learning_rate": 8.468827848237972e-06,
      "loss": 2.3871,
      "step": 42384
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1407533884048462,
      "learning_rate": 8.468420967961703e-06,
      "loss": 2.3999,
      "step": 42385
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.3810468912124634,
      "learning_rate": 8.468014090281875e-06,
      "loss": 2.3151,
      "step": 42386
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0810340642929077,
      "learning_rate": 8.467607215199182e-06,
      "loss": 2.4017,
      "step": 42387
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0819886922836304,
      "learning_rate": 8.467200342714309e-06,
      "loss": 2.5353,
      "step": 42388
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1980229616165161,
      "learning_rate": 8.466793472827948e-06,
      "loss": 2.371,
      "step": 42389
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.2438818216323853,
      "learning_rate": 8.466386605540789e-06,
      "loss": 2.1597,
      "step": 42390
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.9833452701568604,
      "learning_rate": 8.465979740853521e-06,
      "loss": 2.198,
      "step": 42391
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1055283546447754,
      "learning_rate": 8.465572878766834e-06,
      "loss": 2.274,
      "step": 42392
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.110837459564209,
      "learning_rate": 8.465166019281418e-06,
      "loss": 2.4394,
      "step": 42393
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.070395588874817,
      "learning_rate": 8.464759162397966e-06,
      "loss": 2.5021,
      "step": 42394
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1424719095230103,
      "learning_rate": 8.46435230811716e-06,
      "loss": 2.3621,
      "step": 42395
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0882487297058105,
      "learning_rate": 8.463945456439696e-06,
      "loss": 2.2611,
      "step": 42396
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0045636892318726,
      "learning_rate": 8.463538607366259e-06,
      "loss": 2.2624,
      "step": 42397
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1579636335372925,
      "learning_rate": 8.463131760897542e-06,
      "loss": 2.0881,
      "step": 42398
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1905635595321655,
      "learning_rate": 8.462724917034234e-06,
      "loss": 2.4875,
      "step": 42399
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.11233389377594,
      "learning_rate": 8.462318075777026e-06,
      "loss": 2.2368,
      "step": 42400
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1305502653121948,
      "learning_rate": 8.461911237126604e-06,
      "loss": 2.522,
      "step": 42401
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1683719158172607,
      "learning_rate": 8.461504401083663e-06,
      "loss": 2.2439,
      "step": 42402
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0270720720291138,
      "learning_rate": 8.461097567648889e-06,
      "loss": 2.4051,
      "step": 42403
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0772573947906494,
      "learning_rate": 8.460690736822973e-06,
      "loss": 2.1758,
      "step": 42404
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.2169252634048462,
      "learning_rate": 8.460283908606604e-06,
      "loss": 2.3495,
      "step": 42405
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.021172285079956,
      "learning_rate": 8.459877083000471e-06,
      "loss": 2.348,
      "step": 42406
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0727132558822632,
      "learning_rate": 8.459470260005268e-06,
      "loss": 2.3482,
      "step": 42407
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1050183773040771,
      "learning_rate": 8.459063439621679e-06,
      "loss": 2.1782,
      "step": 42408
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0578224658966064,
      "learning_rate": 8.458656621850394e-06,
      "loss": 2.3649,
      "step": 42409
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0061004161834717,
      "learning_rate": 8.458249806692107e-06,
      "loss": 2.4233,
      "step": 42410
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.9781875014305115,
      "learning_rate": 8.457842994147505e-06,
      "loss": 2.1794,
      "step": 42411
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1959973573684692,
      "learning_rate": 8.457436184217279e-06,
      "loss": 2.2182,
      "step": 42412
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1538689136505127,
      "learning_rate": 8.457029376902116e-06,
      "loss": 2.3141,
      "step": 42413
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1141924858093262,
      "learning_rate": 8.456622572202706e-06,
      "loss": 2.1304,
      "step": 42414
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.083950400352478,
      "learning_rate": 8.456215770119742e-06,
      "loss": 2.4051,
      "step": 42415
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0243536233901978,
      "learning_rate": 8.45580897065391e-06,
      "loss": 2.1765,
      "step": 42416
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.147200584411621,
      "learning_rate": 8.455402173805902e-06,
      "loss": 2.342,
      "step": 42417
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.083389401435852,
      "learning_rate": 8.454995379576407e-06,
      "loss": 2.4742,
      "step": 42418
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0302106142044067,
      "learning_rate": 8.454588587966115e-06,
      "loss": 2.2895,
      "step": 42419
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1424139738082886,
      "learning_rate": 8.454181798975714e-06,
      "loss": 2.1793,
      "step": 42420
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.4597558975219727,
      "learning_rate": 8.453775012605899e-06,
      "loss": 2.3509,
      "step": 42421
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.9903869032859802,
      "learning_rate": 8.45336822885735e-06,
      "loss": 2.2408,
      "step": 42422
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0805675983428955,
      "learning_rate": 8.452961447730764e-06,
      "loss": 2.3925,
      "step": 42423
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.9851976037025452,
      "learning_rate": 8.452554669226829e-06,
      "loss": 2.2291,
      "step": 42424
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0672876834869385,
      "learning_rate": 8.452147893346232e-06,
      "loss": 2.1931,
      "step": 42425
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1957511901855469,
      "learning_rate": 8.451741120089664e-06,
      "loss": 2.3984,
      "step": 42426
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.9942627549171448,
      "learning_rate": 8.451334349457818e-06,
      "loss": 2.3999,
      "step": 42427
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0726443529129028,
      "learning_rate": 8.450927581451379e-06,
      "loss": 2.2801,
      "step": 42428
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.02581787109375,
      "learning_rate": 8.45052081607104e-06,
      "loss": 2.3604,
      "step": 42429
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1418569087982178,
      "learning_rate": 8.450114053317487e-06,
      "loss": 2.2291,
      "step": 42430
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.125314712524414,
      "learning_rate": 8.449707293191414e-06,
      "loss": 2.4087,
      "step": 42431
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0782415866851807,
      "learning_rate": 8.449300535693506e-06,
      "loss": 2.2548,
      "step": 42432
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0659793615341187,
      "learning_rate": 8.448893780824458e-06,
      "loss": 2.3618,
      "step": 42433
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0305546522140503,
      "learning_rate": 8.448487028584958e-06,
      "loss": 2.281,
      "step": 42434
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.109826683998108,
      "learning_rate": 8.448080278975688e-06,
      "loss": 2.3089,
      "step": 42435
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1476244926452637,
      "learning_rate": 8.447673531997347e-06,
      "loss": 2.1385,
      "step": 42436
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0801174640655518,
      "learning_rate": 8.447266787650618e-06,
      "loss": 2.4269,
      "step": 42437
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0519870519638062,
      "learning_rate": 8.446860045936195e-06,
      "loss": 2.4014,
      "step": 42438
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.3147828578948975,
      "learning_rate": 8.446453306854769e-06,
      "loss": 2.3442,
      "step": 42439
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.076071858406067,
      "learning_rate": 8.446046570407022e-06,
      "loss": 2.4432,
      "step": 42440
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0115596055984497,
      "learning_rate": 8.445639836593652e-06,
      "loss": 2.2895,
      "step": 42441
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.9439120292663574,
      "learning_rate": 8.445233105415341e-06,
      "loss": 2.4031,
      "step": 42442
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1852459907531738,
      "learning_rate": 8.444826376872784e-06,
      "loss": 2.3107,
      "step": 42443
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1566356420516968,
      "learning_rate": 8.444419650966667e-06,
      "loss": 2.5016,
      "step": 42444
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1122006177902222,
      "learning_rate": 8.444012927697682e-06,
      "loss": 2.3246,
      "step": 42445
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0479573011398315,
      "learning_rate": 8.443606207066518e-06,
      "loss": 2.189,
      "step": 42446
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.2270913124084473,
      "learning_rate": 8.443199489073869e-06,
      "loss": 2.409,
      "step": 42447
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0310389995574951,
      "learning_rate": 8.442792773720414e-06,
      "loss": 2.3425,
      "step": 42448
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0121031999588013,
      "learning_rate": 8.442386061006849e-06,
      "loss": 2.4353,
      "step": 42449
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0756133794784546,
      "learning_rate": 8.441979350933864e-06,
      "loss": 2.1704,
      "step": 42450
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.161163568496704,
      "learning_rate": 8.441572643502145e-06,
      "loss": 2.2542,
      "step": 42451
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1294879913330078,
      "learning_rate": 8.441165938712384e-06,
      "loss": 2.2667,
      "step": 42452
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1205415725708008,
      "learning_rate": 8.44075923656527e-06,
      "loss": 2.434,
      "step": 42453
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0655804872512817,
      "learning_rate": 8.440352537061494e-06,
      "loss": 2.29,
      "step": 42454
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0398602485656738,
      "learning_rate": 8.439945840201743e-06,
      "loss": 2.587,
      "step": 42455
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.111576795578003,
      "learning_rate": 8.439539145986709e-06,
      "loss": 2.3867,
      "step": 42456
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.095196008682251,
      "learning_rate": 8.439132454417079e-06,
      "loss": 2.2364,
      "step": 42457
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1884852647781372,
      "learning_rate": 8.438725765493542e-06,
      "loss": 2.3532,
      "step": 42458
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.991473913192749,
      "learning_rate": 8.43831907921679e-06,
      "loss": 2.1852,
      "step": 42459
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.9752995371818542,
      "learning_rate": 8.437912395587516e-06,
      "loss": 2.3397,
      "step": 42460
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.251257061958313,
      "learning_rate": 8.4375057146064e-06,
      "loss": 2.189,
      "step": 42461
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1860527992248535,
      "learning_rate": 8.437099036274135e-06,
      "loss": 2.2019,
      "step": 42462
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.054017424583435,
      "learning_rate": 8.436692360591412e-06,
      "loss": 2.3079,
      "step": 42463
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0178829431533813,
      "learning_rate": 8.436285687558922e-06,
      "loss": 2.3255,
      "step": 42464
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.9859473705291748,
      "learning_rate": 8.435879017177351e-06,
      "loss": 2.1964,
      "step": 42465
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.251239538192749,
      "learning_rate": 8.43547234944739e-06,
      "loss": 2.3236,
      "step": 42466
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0386931896209717,
      "learning_rate": 8.43506568436973e-06,
      "loss": 2.3578,
      "step": 42467
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1526713371276855,
      "learning_rate": 8.434659021945057e-06,
      "loss": 2.4012,
      "step": 42468
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0685598850250244,
      "learning_rate": 8.434252362174063e-06,
      "loss": 2.2005,
      "step": 42469
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.126693606376648,
      "learning_rate": 8.433845705057437e-06,
      "loss": 2.3691,
      "step": 42470
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.9964484572410583,
      "learning_rate": 8.433439050595864e-06,
      "loss": 2.3684,
      "step": 42471
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1752345561981201,
      "learning_rate": 8.433032398790042e-06,
      "loss": 2.3794,
      "step": 42472
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0603303909301758,
      "learning_rate": 8.432625749640655e-06,
      "loss": 2.3243,
      "step": 42473
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0321857929229736,
      "learning_rate": 8.432219103148393e-06,
      "loss": 2.4254,
      "step": 42474
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1334298849105835,
      "learning_rate": 8.431812459313945e-06,
      "loss": 2.4487,
      "step": 42475
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0462297201156616,
      "learning_rate": 8.431405818138e-06,
      "loss": 2.6537,
      "step": 42476
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1331216096878052,
      "learning_rate": 8.430999179621248e-06,
      "loss": 2.2256,
      "step": 42477
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.237227439880371,
      "learning_rate": 8.430592543764378e-06,
      "loss": 2.4154,
      "step": 42478
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0050166845321655,
      "learning_rate": 8.43018591056808e-06,
      "loss": 2.4045,
      "step": 42479
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.070918321609497,
      "learning_rate": 8.429779280033044e-06,
      "loss": 2.3571,
      "step": 42480
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1168683767318726,
      "learning_rate": 8.429372652159958e-06,
      "loss": 2.3232,
      "step": 42481
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1085704565048218,
      "learning_rate": 8.428966026949512e-06,
      "loss": 2.3474,
      "step": 42482
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1597241163253784,
      "learning_rate": 8.428559404402396e-06,
      "loss": 2.0757,
      "step": 42483
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0732688903808594,
      "learning_rate": 8.428152784519297e-06,
      "loss": 2.1743,
      "step": 42484
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.122902750968933,
      "learning_rate": 8.427746167300908e-06,
      "loss": 2.3249,
      "step": 42485
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0839815139770508,
      "learning_rate": 8.42733955274792e-06,
      "loss": 2.4687,
      "step": 42486
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1436121463775635,
      "learning_rate": 8.426932940861013e-06,
      "loss": 2.4213,
      "step": 42487
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1574394702911377,
      "learning_rate": 8.426526331640883e-06,
      "loss": 2.4572,
      "step": 42488
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1168475151062012,
      "learning_rate": 8.426119725088218e-06,
      "loss": 2.3741,
      "step": 42489
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1511303186416626,
      "learning_rate": 8.425713121203708e-06,
      "loss": 2.5548,
      "step": 42490
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.121134638786316,
      "learning_rate": 8.425306519988041e-06,
      "loss": 2.2065,
      "step": 42491
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0549099445343018,
      "learning_rate": 8.424899921441907e-06,
      "loss": 2.1509,
      "step": 42492
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.9997822046279907,
      "learning_rate": 8.424493325565997e-06,
      "loss": 2.1887,
      "step": 42493
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.346793293952942,
      "learning_rate": 8.424086732360996e-06,
      "loss": 2.2277,
      "step": 42494
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.115004539489746,
      "learning_rate": 8.4236801418276e-06,
      "loss": 2.4971,
      "step": 42495
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0811508893966675,
      "learning_rate": 8.423273553966492e-06,
      "loss": 2.3865,
      "step": 42496
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.9578971266746521,
      "learning_rate": 8.422866968778364e-06,
      "loss": 2.2467,
      "step": 42497
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1022462844848633,
      "learning_rate": 8.422460386263904e-06,
      "loss": 2.2417,
      "step": 42498
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0034642219543457,
      "learning_rate": 8.422053806423803e-06,
      "loss": 2.1723,
      "step": 42499
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.9572174549102783,
      "learning_rate": 8.42164722925875e-06,
      "loss": 2.4865,
      "step": 42500
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0753388404846191,
      "learning_rate": 8.421240654769434e-06,
      "loss": 2.47,
      "step": 42501
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1016182899475098,
      "learning_rate": 8.420834082956542e-06,
      "loss": 2.4154,
      "step": 42502
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0229207277297974,
      "learning_rate": 8.420427513820766e-06,
      "loss": 2.5102,
      "step": 42503
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0717682838439941,
      "learning_rate": 8.420020947362794e-06,
      "loss": 2.5409,
      "step": 42504
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.127625584602356,
      "learning_rate": 8.419614383583315e-06,
      "loss": 2.1399,
      "step": 42505
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.10651695728302,
      "learning_rate": 8.41920782248302e-06,
      "loss": 2.3973,
      "step": 42506
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.9874910116195679,
      "learning_rate": 8.418801264062595e-06,
      "loss": 2.2175,
      "step": 42507
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0836435556411743,
      "learning_rate": 8.418394708322733e-06,
      "loss": 2.2208,
      "step": 42508
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1578067541122437,
      "learning_rate": 8.417988155264121e-06,
      "loss": 2.3479,
      "step": 42509
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0387663841247559,
      "learning_rate": 8.41758160488745e-06,
      "loss": 2.1806,
      "step": 42510
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.202211856842041,
      "learning_rate": 8.417175057193406e-06,
      "loss": 2.4983,
      "step": 42511
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0810635089874268,
      "learning_rate": 8.416768512182681e-06,
      "loss": 2.3332,
      "step": 42512
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.3155065774917603,
      "learning_rate": 8.416361969855967e-06,
      "loss": 2.2436,
      "step": 42513
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0690019130706787,
      "learning_rate": 8.415955430213947e-06,
      "loss": 2.1963,
      "step": 42514
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1202441453933716,
      "learning_rate": 8.415548893257311e-06,
      "loss": 2.4704,
      "step": 42515
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0534967184066772,
      "learning_rate": 8.41514235898675e-06,
      "loss": 2.4878,
      "step": 42516
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.143372893333435,
      "learning_rate": 8.414735827402955e-06,
      "loss": 2.4742,
      "step": 42517
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0019580125808716,
      "learning_rate": 8.414329298506612e-06,
      "loss": 2.3672,
      "step": 42518
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0719739198684692,
      "learning_rate": 8.413922772298411e-06,
      "loss": 2.3503,
      "step": 42519
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.5120325088500977,
      "learning_rate": 8.413516248779042e-06,
      "loss": 2.3559,
      "step": 42520
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0896469354629517,
      "learning_rate": 8.413109727949194e-06,
      "loss": 2.2028,
      "step": 42521
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.5680104494094849,
      "learning_rate": 8.412703209809556e-06,
      "loss": 2.4312,
      "step": 42522
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.9880316853523254,
      "learning_rate": 8.41229669436082e-06,
      "loss": 2.3864,
      "step": 42523
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.996883749961853,
      "learning_rate": 8.41189018160367e-06,
      "loss": 2.5127,
      "step": 42524
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1076666116714478,
      "learning_rate": 8.411483671538796e-06,
      "loss": 2.3462,
      "step": 42525
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0412973165512085,
      "learning_rate": 8.411077164166891e-06,
      "loss": 2.3004,
      "step": 42526
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0286612510681152,
      "learning_rate": 8.410670659488642e-06,
      "loss": 2.1468,
      "step": 42527
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0911091566085815,
      "learning_rate": 8.410264157504734e-06,
      "loss": 2.4535,
      "step": 42528
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.2093360424041748,
      "learning_rate": 8.409857658215866e-06,
      "loss": 2.4956,
      "step": 42529
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.2338324785232544,
      "learning_rate": 8.409451161622716e-06,
      "loss": 2.5269,
      "step": 42530
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.062808632850647,
      "learning_rate": 8.40904466772598e-06,
      "loss": 2.3157,
      "step": 42531
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.9213054180145264,
      "learning_rate": 8.408638176526347e-06,
      "loss": 2.0399,
      "step": 42532
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0406206846237183,
      "learning_rate": 8.408231688024502e-06,
      "loss": 2.5055,
      "step": 42533
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1867698431015015,
      "learning_rate": 8.407825202221137e-06,
      "loss": 2.367,
      "step": 42534
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0488280057907104,
      "learning_rate": 8.407418719116941e-06,
      "loss": 2.289,
      "step": 42535
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.9950430393218994,
      "learning_rate": 8.407012238712603e-06,
      "loss": 2.6991,
      "step": 42536
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1579458713531494,
      "learning_rate": 8.406605761008812e-06,
      "loss": 2.5629,
      "step": 42537
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.250557541847229,
      "learning_rate": 8.406199286006256e-06,
      "loss": 2.4085,
      "step": 42538
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0430330038070679,
      "learning_rate": 8.40579281370563e-06,
      "loss": 2.2409,
      "step": 42539
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.07835054397583,
      "learning_rate": 8.405386344107615e-06,
      "loss": 2.4082,
      "step": 42540
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1915442943572998,
      "learning_rate": 8.404979877212901e-06,
      "loss": 2.4637,
      "step": 42541
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1674704551696777,
      "learning_rate": 8.404573413022181e-06,
      "loss": 2.5469,
      "step": 42542
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0853848457336426,
      "learning_rate": 8.40416695153614e-06,
      "loss": 2.2147,
      "step": 42543
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0995527505874634,
      "learning_rate": 8.403760492755472e-06,
      "loss": 2.1879,
      "step": 42544
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.056077241897583,
      "learning_rate": 8.403354036680864e-06,
      "loss": 2.4231,
      "step": 42545
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.9886137843132019,
      "learning_rate": 8.402947583313002e-06,
      "loss": 2.2545,
      "step": 42546
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0892215967178345,
      "learning_rate": 8.40254113265258e-06,
      "loss": 2.4608,
      "step": 42547
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0385552644729614,
      "learning_rate": 8.402134684700282e-06,
      "loss": 2.1585,
      "step": 42548
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.3535029888153076,
      "learning_rate": 8.401728239456801e-06,
      "loss": 2.3893,
      "step": 42549
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.063707709312439,
      "learning_rate": 8.401321796922827e-06,
      "loss": 2.2643,
      "step": 42550
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.2959766387939453,
      "learning_rate": 8.400915357099047e-06,
      "loss": 2.336,
      "step": 42551
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.151028037071228,
      "learning_rate": 8.400508919986149e-06,
      "loss": 2.3952,
      "step": 42552
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1109699010849,
      "learning_rate": 8.400102485584822e-06,
      "loss": 2.466,
      "step": 42553
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.106773853302002,
      "learning_rate": 8.399696053895754e-06,
      "loss": 2.377,
      "step": 42554
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1172856092453003,
      "learning_rate": 8.399289624919637e-06,
      "loss": 2.3118,
      "step": 42555
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.082707166671753,
      "learning_rate": 8.398883198657158e-06,
      "loss": 2.3198,
      "step": 42556
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1736201047897339,
      "learning_rate": 8.39847677510901e-06,
      "loss": 2.1529,
      "step": 42557
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1442984342575073,
      "learning_rate": 8.398070354275877e-06,
      "loss": 2.7134,
      "step": 42558
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0419135093688965,
      "learning_rate": 8.397663936158448e-06,
      "loss": 2.365,
      "step": 42559
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1232807636260986,
      "learning_rate": 8.397257520757415e-06,
      "loss": 2.4643,
      "step": 42560
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.9204996228218079,
      "learning_rate": 8.396851108073468e-06,
      "loss": 2.646,
      "step": 42561
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.7573661804199219,
      "learning_rate": 8.396444698107291e-06,
      "loss": 2.3655,
      "step": 42562
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0068923234939575,
      "learning_rate": 8.396038290859575e-06,
      "loss": 2.2741,
      "step": 42563
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1078213453292847,
      "learning_rate": 8.395631886331013e-06,
      "loss": 2.4107,
      "step": 42564
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.046006441116333,
      "learning_rate": 8.395225484522288e-06,
      "loss": 2.3489,
      "step": 42565
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1290570497512817,
      "learning_rate": 8.394819085434096e-06,
      "loss": 2.3092,
      "step": 42566
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.964130699634552,
      "learning_rate": 8.394412689067118e-06,
      "loss": 2.1786,
      "step": 42567
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1442615985870361,
      "learning_rate": 8.394006295422049e-06,
      "loss": 2.2024,
      "step": 42568
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.067916750907898,
      "learning_rate": 8.393599904499572e-06,
      "loss": 2.3588,
      "step": 42569
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.242161512374878,
      "learning_rate": 8.39319351630038e-06,
      "loss": 2.2972,
      "step": 42570
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0820907354354858,
      "learning_rate": 8.392787130825162e-06,
      "loss": 2.3713,
      "step": 42571
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.2270658016204834,
      "learning_rate": 8.392380748074606e-06,
      "loss": 2.4834,
      "step": 42572
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1616898775100708,
      "learning_rate": 8.391974368049403e-06,
      "loss": 2.3258,
      "step": 42573
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.035841703414917,
      "learning_rate": 8.391567990750238e-06,
      "loss": 2.3921,
      "step": 42574
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1151225566864014,
      "learning_rate": 8.391161616177802e-06,
      "loss": 2.2997,
      "step": 42575
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1156245470046997,
      "learning_rate": 8.390755244332786e-06,
      "loss": 2.224,
      "step": 42576
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1251546144485474,
      "learning_rate": 8.390348875215874e-06,
      "loss": 2.3917,
      "step": 42577
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0217832326889038,
      "learning_rate": 8.38994250882776e-06,
      "loss": 2.3126,
      "step": 42578
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1566880941390991,
      "learning_rate": 8.389536145169136e-06,
      "loss": 2.3011,
      "step": 42579
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0780835151672363,
      "learning_rate": 8.389129784240679e-06,
      "loss": 2.0284,
      "step": 42580
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.2721024751663208,
      "learning_rate": 8.388723426043086e-06,
      "loss": 2.3196,
      "step": 42581
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0042883157730103,
      "learning_rate": 8.388317070577044e-06,
      "loss": 2.41,
      "step": 42582
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.9995154142379761,
      "learning_rate": 8.387910717843242e-06,
      "loss": 2.4605,
      "step": 42583
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.9493402242660522,
      "learning_rate": 8.38750436784237e-06,
      "loss": 2.2221,
      "step": 42584
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1269582509994507,
      "learning_rate": 8.387098020575117e-06,
      "loss": 2.3433,
      "step": 42585
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.993911862373352,
      "learning_rate": 8.386691676042168e-06,
      "loss": 2.3695,
      "step": 42586
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0592877864837646,
      "learning_rate": 8.386285334244217e-06,
      "loss": 2.1619,
      "step": 42587
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0697113275527954,
      "learning_rate": 8.385878995181951e-06,
      "loss": 2.4546,
      "step": 42588
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1693717241287231,
      "learning_rate": 8.385472658856057e-06,
      "loss": 2.5575,
      "step": 42589
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1140546798706055,
      "learning_rate": 8.385066325267225e-06,
      "loss": 2.4579,
      "step": 42590
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0793237686157227,
      "learning_rate": 8.384659994416144e-06,
      "loss": 2.2409,
      "step": 42591
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1175868511199951,
      "learning_rate": 8.38425366630351e-06,
      "loss": 2.3706,
      "step": 42592
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.08732008934021,
      "learning_rate": 8.383847340929998e-06,
      "loss": 2.407,
      "step": 42593
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.061823844909668,
      "learning_rate": 8.383441018296305e-06,
      "loss": 2.4228,
      "step": 42594
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0342286825180054,
      "learning_rate": 8.383034698403118e-06,
      "loss": 2.2808,
      "step": 42595
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0192856788635254,
      "learning_rate": 8.382628381251125e-06,
      "loss": 2.399,
      "step": 42596
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0474048852920532,
      "learning_rate": 8.382222066841018e-06,
      "loss": 2.3136,
      "step": 42597
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.115832805633545,
      "learning_rate": 8.381815755173484e-06,
      "loss": 2.239,
      "step": 42598
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1279815435409546,
      "learning_rate": 8.38140944624921e-06,
      "loss": 2.2847,
      "step": 42599
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0489156246185303,
      "learning_rate": 8.381003140068888e-06,
      "loss": 2.5048,
      "step": 42600
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.225380301475525,
      "learning_rate": 8.380596836633205e-06,
      "loss": 2.5841,
      "step": 42601
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0155798196792603,
      "learning_rate": 8.380190535942851e-06,
      "loss": 2.0111,
      "step": 42602
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1567970514297485,
      "learning_rate": 8.379784237998513e-06,
      "loss": 2.2648,
      "step": 42603
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.4130281209945679,
      "learning_rate": 8.379377942800881e-06,
      "loss": 2.3559,
      "step": 42604
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1155140399932861,
      "learning_rate": 8.378971650350648e-06,
      "loss": 2.4295,
      "step": 42605
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0951759815216064,
      "learning_rate": 8.378565360648495e-06,
      "loss": 2.3363,
      "step": 42606
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0439451932907104,
      "learning_rate": 8.37815907369511e-06,
      "loss": 2.3458,
      "step": 42607
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.068246603012085,
      "learning_rate": 8.377752789491192e-06,
      "loss": 2.2651,
      "step": 42608
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0631568431854248,
      "learning_rate": 8.37734650803742e-06,
      "loss": 2.4684,
      "step": 42609
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0818836688995361,
      "learning_rate": 8.376940229334486e-06,
      "loss": 2.3977,
      "step": 42610
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1407667398452759,
      "learning_rate": 8.37653395338308e-06,
      "loss": 2.3902,
      "step": 42611
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0209691524505615,
      "learning_rate": 8.37612768018389e-06,
      "loss": 2.0978,
      "step": 42612
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.9777048230171204,
      "learning_rate": 8.375721409737607e-06,
      "loss": 2.2097,
      "step": 42613
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1847383975982666,
      "learning_rate": 8.375315142044915e-06,
      "loss": 2.3796,
      "step": 42614
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1928387880325317,
      "learning_rate": 8.374908877106505e-06,
      "loss": 2.2086,
      "step": 42615
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1131824254989624,
      "learning_rate": 8.374502614923066e-06,
      "loss": 2.3456,
      "step": 42616
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1320631504058838,
      "learning_rate": 8.374096355495285e-06,
      "loss": 2.1391,
      "step": 42617
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.9957135915756226,
      "learning_rate": 8.373690098823858e-06,
      "loss": 2.3237,
      "step": 42618
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.3455840349197388,
      "learning_rate": 8.373283844909465e-06,
      "loss": 2.4365,
      "step": 42619
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.9843401908874512,
      "learning_rate": 8.372877593752796e-06,
      "loss": 2.2503,
      "step": 42620
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.185410976409912,
      "learning_rate": 8.372471345354542e-06,
      "loss": 2.453,
      "step": 42621
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.9702674150466919,
      "learning_rate": 8.37206509971539e-06,
      "loss": 2.2641,
      "step": 42622
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1388285160064697,
      "learning_rate": 8.37165885683603e-06,
      "loss": 2.416,
      "step": 42623
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0766361951828003,
      "learning_rate": 8.371252616717151e-06,
      "loss": 2.3571,
      "step": 42624
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.041365146636963,
      "learning_rate": 8.370846379359442e-06,
      "loss": 2.148,
      "step": 42625
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.23891019821167,
      "learning_rate": 8.37044014476359e-06,
      "loss": 2.4994,
      "step": 42626
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0199614763259888,
      "learning_rate": 8.370033912930284e-06,
      "loss": 2.5612,
      "step": 42627
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1171249151229858,
      "learning_rate": 8.369627683860215e-06,
      "loss": 2.2479,
      "step": 42628
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0641170740127563,
      "learning_rate": 8.36922145755407e-06,
      "loss": 2.448,
      "step": 42629
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0919139385223389,
      "learning_rate": 8.368815234012535e-06,
      "loss": 2.2124,
      "step": 42630
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.057565689086914,
      "learning_rate": 8.368409013236307e-06,
      "loss": 2.0637,
      "step": 42631
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0841598510742188,
      "learning_rate": 8.368002795226063e-06,
      "loss": 2.2049,
      "step": 42632
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.043798804283142,
      "learning_rate": 8.3675965799825e-06,
      "loss": 2.3518,
      "step": 42633
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1895818710327148,
      "learning_rate": 8.367190367506303e-06,
      "loss": 2.3265,
      "step": 42634
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0980092287063599,
      "learning_rate": 8.366784157798161e-06,
      "loss": 2.289,
      "step": 42635
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.9969915747642517,
      "learning_rate": 8.366377950858763e-06,
      "loss": 2.3007,
      "step": 42636
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0499223470687866,
      "learning_rate": 8.3659717466888e-06,
      "loss": 2.3295,
      "step": 42637
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1008108854293823,
      "learning_rate": 8.365565545288958e-06,
      "loss": 2.4899,
      "step": 42638
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0322457551956177,
      "learning_rate": 8.365159346659926e-06,
      "loss": 2.0842,
      "step": 42639
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.753327488899231,
      "learning_rate": 8.364753150802393e-06,
      "loss": 2.2809,
      "step": 42640
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.9594206809997559,
      "learning_rate": 8.364346957717051e-06,
      "loss": 2.2017,
      "step": 42641
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1460872888565063,
      "learning_rate": 8.363940767404582e-06,
      "loss": 2.4752,
      "step": 42642
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0378271341323853,
      "learning_rate": 8.363534579865678e-06,
      "loss": 2.2981,
      "step": 42643
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0684908628463745,
      "learning_rate": 8.363128395101025e-06,
      "loss": 2.4402,
      "step": 42644
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1144741773605347,
      "learning_rate": 8.36272221311132e-06,
      "loss": 2.1213,
      "step": 42645
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0295684337615967,
      "learning_rate": 8.36231603389724e-06,
      "loss": 2.3112,
      "step": 42646
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0457268953323364,
      "learning_rate": 8.361909857459484e-06,
      "loss": 2.3728,
      "step": 42647
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0288163423538208,
      "learning_rate": 8.361503683798732e-06,
      "loss": 2.2861,
      "step": 42648
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0161492824554443,
      "learning_rate": 8.361097512915675e-06,
      "loss": 2.2374,
      "step": 42649
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0695686340332031,
      "learning_rate": 8.360691344811005e-06,
      "loss": 2.3041,
      "step": 42650
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1815645694732666,
      "learning_rate": 8.360285179485408e-06,
      "loss": 2.1339,
      "step": 42651
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1399670839309692,
      "learning_rate": 8.359879016939573e-06,
      "loss": 2.3921,
      "step": 42652
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.173322319984436,
      "learning_rate": 8.359472857174188e-06,
      "loss": 2.5168,
      "step": 42653
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0597968101501465,
      "learning_rate": 8.359066700189941e-06,
      "loss": 2.3824,
      "step": 42654
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1139857769012451,
      "learning_rate": 8.358660545987523e-06,
      "loss": 2.2221,
      "step": 42655
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.240169882774353,
      "learning_rate": 8.358254394567622e-06,
      "loss": 2.3531,
      "step": 42656
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.052071213722229,
      "learning_rate": 8.357848245930925e-06,
      "loss": 2.2611,
      "step": 42657
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1523898839950562,
      "learning_rate": 8.357442100078124e-06,
      "loss": 2.431,
      "step": 42658
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.9986776113510132,
      "learning_rate": 8.357035957009903e-06,
      "loss": 2.31,
      "step": 42659
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.9983615875244141,
      "learning_rate": 8.35662981672695e-06,
      "loss": 2.1123,
      "step": 42660
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1464557647705078,
      "learning_rate": 8.356223679229958e-06,
      "loss": 2.4028,
      "step": 42661
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1124539375305176,
      "learning_rate": 8.355817544519611e-06,
      "loss": 2.187,
      "step": 42662
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1032694578170776,
      "learning_rate": 8.355411412596601e-06,
      "loss": 2.3585,
      "step": 42663
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.2421358823776245,
      "learning_rate": 8.355005283461616e-06,
      "loss": 2.3036,
      "step": 42664
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.097099781036377,
      "learning_rate": 8.354599157115344e-06,
      "loss": 2.3066,
      "step": 42665
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0776715278625488,
      "learning_rate": 8.354193033558471e-06,
      "loss": 2.476,
      "step": 42666
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0038913488388062,
      "learning_rate": 8.353786912791689e-06,
      "loss": 2.2648,
      "step": 42667
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.055781602859497,
      "learning_rate": 8.353380794815686e-06,
      "loss": 2.5124,
      "step": 42668
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0556567907333374,
      "learning_rate": 8.352974679631151e-06,
      "loss": 2.4507,
      "step": 42669
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.2650201320648193,
      "learning_rate": 8.35256856723877e-06,
      "loss": 2.3644,
      "step": 42670
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1099830865859985,
      "learning_rate": 8.352162457639235e-06,
      "loss": 2.2417,
      "step": 42671
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0831338167190552,
      "learning_rate": 8.351756350833229e-06,
      "loss": 2.6255,
      "step": 42672
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0405054092407227,
      "learning_rate": 8.351350246821443e-06,
      "loss": 2.361,
      "step": 42673
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1040140390396118,
      "learning_rate": 8.350944145604568e-06,
      "loss": 2.2472,
      "step": 42674
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0144816637039185,
      "learning_rate": 8.35053804718329e-06,
      "loss": 2.3727,
      "step": 42675
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0868620872497559,
      "learning_rate": 8.350131951558298e-06,
      "loss": 2.3935,
      "step": 42676
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.165048599243164,
      "learning_rate": 8.349725858730282e-06,
      "loss": 2.3178,
      "step": 42677
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1174734830856323,
      "learning_rate": 8.349319768699927e-06,
      "loss": 2.1613,
      "step": 42678
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.201722502708435,
      "learning_rate": 8.348913681467923e-06,
      "loss": 2.254,
      "step": 42679
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0835552215576172,
      "learning_rate": 8.348507597034958e-06,
      "loss": 2.3938,
      "step": 42680
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.138029932975769,
      "learning_rate": 8.348101515401722e-06,
      "loss": 2.3226,
      "step": 42681
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0766105651855469,
      "learning_rate": 8.347695436568903e-06,
      "loss": 2.285,
      "step": 42682
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.9594473838806152,
      "learning_rate": 8.34728936053719e-06,
      "loss": 2.2852,
      "step": 42683
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.2133350372314453,
      "learning_rate": 8.346883287307272e-06,
      "loss": 2.2707,
      "step": 42684
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0498361587524414,
      "learning_rate": 8.346477216879834e-06,
      "loss": 2.1113,
      "step": 42685
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0636879205703735,
      "learning_rate": 8.346071149255564e-06,
      "loss": 2.4147,
      "step": 42686
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.005435585975647,
      "learning_rate": 8.345665084435154e-06,
      "loss": 2.4588,
      "step": 42687
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0344337224960327,
      "learning_rate": 8.34525902241929e-06,
      "loss": 2.2837,
      "step": 42688
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.143536925315857,
      "learning_rate": 8.344852963208663e-06,
      "loss": 2.4218,
      "step": 42689
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0692459344863892,
      "learning_rate": 8.34444690680396e-06,
      "loss": 2.5108,
      "step": 42690
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0081372261047363,
      "learning_rate": 8.344040853205867e-06,
      "loss": 2.1585,
      "step": 42691
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0789613723754883,
      "learning_rate": 8.343634802415076e-06,
      "loss": 2.2465,
      "step": 42692
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0389643907546997,
      "learning_rate": 8.343228754432273e-06,
      "loss": 2.4269,
      "step": 42693
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1119589805603027,
      "learning_rate": 8.342822709258147e-06,
      "loss": 2.3146,
      "step": 42694
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.098396897315979,
      "learning_rate": 8.342416666893389e-06,
      "loss": 2.6391,
      "step": 42695
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1630779504776,
      "learning_rate": 8.34201062733868e-06,
      "loss": 2.3604,
      "step": 42696
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0574995279312134,
      "learning_rate": 8.341604590594723e-06,
      "loss": 2.3448,
      "step": 42697
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0410335063934326,
      "learning_rate": 8.34119855666219e-06,
      "loss": 2.325,
      "step": 42698
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.002223014831543,
      "learning_rate": 8.340792525541776e-06,
      "loss": 2.362,
      "step": 42699
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1707546710968018,
      "learning_rate": 8.34038649723417e-06,
      "loss": 2.299,
      "step": 42700
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1893783807754517,
      "learning_rate": 8.339980471740058e-06,
      "loss": 2.3037,
      "step": 42701
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1409517526626587,
      "learning_rate": 8.339574449060132e-06,
      "loss": 2.4509,
      "step": 42702
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0484734773635864,
      "learning_rate": 8.33916842919508e-06,
      "loss": 2.2159,
      "step": 42703
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0172491073608398,
      "learning_rate": 8.338762412145585e-06,
      "loss": 2.1817,
      "step": 42704
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0318584442138672,
      "learning_rate": 8.338356397912339e-06,
      "loss": 2.404,
      "step": 42705
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0766717195510864,
      "learning_rate": 8.337950386496032e-06,
      "loss": 2.4261,
      "step": 42706
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0634456872940063,
      "learning_rate": 8.337544377897349e-06,
      "loss": 2.4053,
      "step": 42707
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0996793508529663,
      "learning_rate": 8.33713837211698e-06,
      "loss": 2.4646,
      "step": 42708
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0012465715408325,
      "learning_rate": 8.336732369155614e-06,
      "loss": 2.1111,
      "step": 42709
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.142599105834961,
      "learning_rate": 8.336326369013937e-06,
      "loss": 2.3799,
      "step": 42710
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.235154390335083,
      "learning_rate": 8.335920371692644e-06,
      "loss": 2.3647,
      "step": 42711
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1016159057617188,
      "learning_rate": 8.335514377192413e-06,
      "loss": 2.4275,
      "step": 42712
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0312464237213135,
      "learning_rate": 8.335108385513937e-06,
      "loss": 2.4885,
      "step": 42713
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.072888970375061,
      "learning_rate": 8.334702396657905e-06,
      "loss": 2.5149,
      "step": 42714
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1189255714416504,
      "learning_rate": 8.334296410625005e-06,
      "loss": 2.3544,
      "step": 42715
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.9796435236930847,
      "learning_rate": 8.333890427415925e-06,
      "loss": 2.3884,
      "step": 42716
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1956512928009033,
      "learning_rate": 8.333484447031352e-06,
      "loss": 2.2964,
      "step": 42717
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0035291910171509,
      "learning_rate": 8.333078469471977e-06,
      "loss": 2.4143,
      "step": 42718
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1053266525268555,
      "learning_rate": 8.332672494738486e-06,
      "loss": 2.417,
      "step": 42719
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1139254570007324,
      "learning_rate": 8.332266522831568e-06,
      "loss": 2.3664,
      "step": 42720
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0313239097595215,
      "learning_rate": 8.331860553751912e-06,
      "loss": 2.4986,
      "step": 42721
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.2445281744003296,
      "learning_rate": 8.331454587500205e-06,
      "loss": 2.2181,
      "step": 42722
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0401713848114014,
      "learning_rate": 8.331048624077133e-06,
      "loss": 2.3041,
      "step": 42723
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1531250476837158,
      "learning_rate": 8.330642663483396e-06,
      "loss": 2.3089,
      "step": 42724
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0238040685653687,
      "learning_rate": 8.330236705719665e-06,
      "loss": 2.4019,
      "step": 42725
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0611933469772339,
      "learning_rate": 8.329830750786636e-06,
      "loss": 2.3908,
      "step": 42726
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.039197564125061,
      "learning_rate": 8.329424798685e-06,
      "loss": 2.24,
      "step": 42727
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1612238883972168,
      "learning_rate": 8.329018849415443e-06,
      "loss": 2.4881,
      "step": 42728
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0867233276367188,
      "learning_rate": 8.328612902978651e-06,
      "loss": 2.2605,
      "step": 42729
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0118571519851685,
      "learning_rate": 8.328206959375314e-06,
      "loss": 2.2023,
      "step": 42730
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1188613176345825,
      "learning_rate": 8.327801018606123e-06,
      "loss": 2.3842,
      "step": 42731
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1454367637634277,
      "learning_rate": 8.327395080671762e-06,
      "loss": 2.4055,
      "step": 42732
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0803426504135132,
      "learning_rate": 8.32698914557292e-06,
      "loss": 2.2842,
      "step": 42733
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0749001502990723,
      "learning_rate": 8.326583213310285e-06,
      "loss": 2.4594,
      "step": 42734
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1215847730636597,
      "learning_rate": 8.326177283884546e-06,
      "loss": 2.5016,
      "step": 42735
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0873396396636963,
      "learning_rate": 8.325771357296393e-06,
      "loss": 2.2228,
      "step": 42736
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1210978031158447,
      "learning_rate": 8.325365433546515e-06,
      "loss": 2.1283,
      "step": 42737
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.021729588508606,
      "learning_rate": 8.324959512635594e-06,
      "loss": 2.5494,
      "step": 42738
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0911442041397095,
      "learning_rate": 8.324553594564321e-06,
      "loss": 2.3957,
      "step": 42739
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.9960671067237854,
      "learning_rate": 8.324147679333386e-06,
      "loss": 2.2504,
      "step": 42740
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0924232006072998,
      "learning_rate": 8.323741766943473e-06,
      "loss": 2.2417,
      "step": 42741
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1294009685516357,
      "learning_rate": 8.323335857395275e-06,
      "loss": 2.2515,
      "step": 42742
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0212212800979614,
      "learning_rate": 8.322929950689477e-06,
      "loss": 2.1633,
      "step": 42743
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0538197755813599,
      "learning_rate": 8.322524046826769e-06,
      "loss": 2.264,
      "step": 42744
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0864691734313965,
      "learning_rate": 8.322118145807839e-06,
      "loss": 2.5481,
      "step": 42745
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0495929718017578,
      "learning_rate": 8.321712247633373e-06,
      "loss": 2.1227,
      "step": 42746
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.997551441192627,
      "learning_rate": 8.321306352304062e-06,
      "loss": 2.3513,
      "step": 42747
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1237797737121582,
      "learning_rate": 8.32090045982059e-06,
      "loss": 2.3863,
      "step": 42748
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0485092401504517,
      "learning_rate": 8.32049457018365e-06,
      "loss": 2.4484,
      "step": 42749
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1557661294937134,
      "learning_rate": 8.320088683393933e-06,
      "loss": 2.4752,
      "step": 42750
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.9326133728027344,
      "learning_rate": 8.319682799452115e-06,
      "loss": 2.3701,
      "step": 42751
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0745346546173096,
      "learning_rate": 8.319276918358894e-06,
      "loss": 2.3327,
      "step": 42752
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.4031397104263306,
      "learning_rate": 8.318871040114953e-06,
      "loss": 2.2952,
      "step": 42753
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0761829614639282,
      "learning_rate": 8.318465164720984e-06,
      "loss": 2.3226,
      "step": 42754
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.991763174533844,
      "learning_rate": 8.318059292177671e-06,
      "loss": 2.4863,
      "step": 42755
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.213227391242981,
      "learning_rate": 8.317653422485705e-06,
      "loss": 2.3358,
      "step": 42756
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0587042570114136,
      "learning_rate": 8.317247555645772e-06,
      "loss": 2.3767,
      "step": 42757
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0859805345535278,
      "learning_rate": 8.316841691658565e-06,
      "loss": 2.3795,
      "step": 42758
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.9054247736930847,
      "learning_rate": 8.316435830524768e-06,
      "loss": 2.3747,
      "step": 42759
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.048794150352478,
      "learning_rate": 8.316029972245068e-06,
      "loss": 2.455,
      "step": 42760
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.193895936012268,
      "learning_rate": 8.315624116820156e-06,
      "loss": 2.4195,
      "step": 42761
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0699058771133423,
      "learning_rate": 8.315218264250716e-06,
      "loss": 2.4088,
      "step": 42762
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.161105990409851,
      "learning_rate": 8.314812414537441e-06,
      "loss": 2.2214,
      "step": 42763
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1315239667892456,
      "learning_rate": 8.314406567681016e-06,
      "loss": 2.0764,
      "step": 42764
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.210083246231079,
      "learning_rate": 8.314000723682131e-06,
      "loss": 2.466,
      "step": 42765
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0252623558044434,
      "learning_rate": 8.313594882541471e-06,
      "loss": 2.1667,
      "step": 42766
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1053558588027954,
      "learning_rate": 8.313189044259727e-06,
      "loss": 2.4051,
      "step": 42767
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0694231986999512,
      "learning_rate": 8.312783208837584e-06,
      "loss": 2.4151,
      "step": 42768
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0678727626800537,
      "learning_rate": 8.312377376275732e-06,
      "loss": 2.4071,
      "step": 42769
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.9991509318351746,
      "learning_rate": 8.311971546574859e-06,
      "loss": 2.3692,
      "step": 42770
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0471704006195068,
      "learning_rate": 8.311565719735652e-06,
      "loss": 2.179,
      "step": 42771
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1894780397415161,
      "learning_rate": 8.311159895758801e-06,
      "loss": 2.459,
      "step": 42772
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0497968196868896,
      "learning_rate": 8.31075407464499e-06,
      "loss": 2.3968,
      "step": 42773
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0859756469726562,
      "learning_rate": 8.310348256394912e-06,
      "loss": 2.4207,
      "step": 42774
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1216628551483154,
      "learning_rate": 8.30994244100925e-06,
      "loss": 2.1301,
      "step": 42775
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.2587039470672607,
      "learning_rate": 8.309536628488702e-06,
      "loss": 2.3723,
      "step": 42776
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1444010734558105,
      "learning_rate": 8.309130818833943e-06,
      "loss": 2.1673,
      "step": 42777
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.3836777210235596,
      "learning_rate": 8.308725012045666e-06,
      "loss": 2.399,
      "step": 42778
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.3566055297851562,
      "learning_rate": 8.30831920812456e-06,
      "loss": 2.2849,
      "step": 42779
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.2375160455703735,
      "learning_rate": 8.307913407071313e-06,
      "loss": 2.3442,
      "step": 42780
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0212719440460205,
      "learning_rate": 8.30750760888661e-06,
      "loss": 2.3602,
      "step": 42781
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1323273181915283,
      "learning_rate": 8.307101813571144e-06,
      "loss": 2.2588,
      "step": 42782
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0962185859680176,
      "learning_rate": 8.3066960211256e-06,
      "loss": 2.3819,
      "step": 42783
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1421953439712524,
      "learning_rate": 8.306290231550666e-06,
      "loss": 2.4452,
      "step": 42784
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.25371515750885,
      "learning_rate": 8.305884444847029e-06,
      "loss": 2.5849,
      "step": 42785
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1348304748535156,
      "learning_rate": 8.305478661015378e-06,
      "loss": 2.3754,
      "step": 42786
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1345276832580566,
      "learning_rate": 8.305072880056403e-06,
      "loss": 2.3008,
      "step": 42787
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0043683052062988,
      "learning_rate": 8.30466710197079e-06,
      "loss": 2.2057,
      "step": 42788
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.857952356338501,
      "learning_rate": 8.304261326759224e-06,
      "loss": 2.2779,
      "step": 42789
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0412229299545288,
      "learning_rate": 8.303855554422397e-06,
      "loss": 2.5547,
      "step": 42790
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1179770231246948,
      "learning_rate": 8.303449784960995e-06,
      "loss": 2.3548,
      "step": 42791
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0938794612884521,
      "learning_rate": 8.303044018375708e-06,
      "loss": 2.4245,
      "step": 42792
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.113092303276062,
      "learning_rate": 8.302638254667221e-06,
      "loss": 2.5632,
      "step": 42793
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.128037691116333,
      "learning_rate": 8.302232493836223e-06,
      "loss": 2.4497,
      "step": 42794
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.9996402859687805,
      "learning_rate": 8.301826735883402e-06,
      "loss": 2.2913,
      "step": 42795
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0232306718826294,
      "learning_rate": 8.301420980809446e-06,
      "loss": 2.4087,
      "step": 42796
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0876697301864624,
      "learning_rate": 8.301015228615043e-06,
      "loss": 2.4713,
      "step": 42797
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.2791506052017212,
      "learning_rate": 8.300609479300879e-06,
      "loss": 2.1752,
      "step": 42798
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0671906471252441,
      "learning_rate": 8.300203732867645e-06,
      "loss": 2.2785,
      "step": 42799
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.173242449760437,
      "learning_rate": 8.299797989316027e-06,
      "loss": 2.4359,
      "step": 42800
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1305367946624756,
      "learning_rate": 8.299392248646713e-06,
      "loss": 2.367,
      "step": 42801
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0610800981521606,
      "learning_rate": 8.29898651086039e-06,
      "loss": 2.3737,
      "step": 42802
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0263590812683105,
      "learning_rate": 8.298580775957753e-06,
      "loss": 2.5254,
      "step": 42803
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1906554698944092,
      "learning_rate": 8.298175043939478e-06,
      "loss": 2.2786,
      "step": 42804
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.9614422917366028,
      "learning_rate": 8.29776931480626e-06,
      "loss": 2.6165,
      "step": 42805
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1224448680877686,
      "learning_rate": 8.297363588558785e-06,
      "loss": 2.2552,
      "step": 42806
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.032062292098999,
      "learning_rate": 8.29695786519774e-06,
      "loss": 2.2995,
      "step": 42807
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0453734397888184,
      "learning_rate": 8.296552144723814e-06,
      "loss": 2.3227,
      "step": 42808
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0520237684249878,
      "learning_rate": 8.296146427137696e-06,
      "loss": 2.1788,
      "step": 42809
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1073713302612305,
      "learning_rate": 8.295740712440071e-06,
      "loss": 2.4184,
      "step": 42810
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.030360460281372,
      "learning_rate": 8.29533500063163e-06,
      "loss": 2.3519,
      "step": 42811
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0922812223434448,
      "learning_rate": 8.294929291713058e-06,
      "loss": 2.2749,
      "step": 42812
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0141011476516724,
      "learning_rate": 8.294523585685044e-06,
      "loss": 2.037,
      "step": 42813
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1060717105865479,
      "learning_rate": 8.294117882548275e-06,
      "loss": 2.3543,
      "step": 42814
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0949766635894775,
      "learning_rate": 8.293712182303445e-06,
      "loss": 2.1796,
      "step": 42815
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0713469982147217,
      "learning_rate": 8.293306484951232e-06,
      "loss": 2.2451,
      "step": 42816
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.9711352586746216,
      "learning_rate": 8.292900790492329e-06,
      "loss": 2.2767,
      "step": 42817
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.112369418144226,
      "learning_rate": 8.292495098927421e-06,
      "loss": 2.2601,
      "step": 42818
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0978155136108398,
      "learning_rate": 8.292089410257197e-06,
      "loss": 2.395,
      "step": 42819
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0007719993591309,
      "learning_rate": 8.291683724482348e-06,
      "loss": 2.2799,
      "step": 42820
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.9753283262252808,
      "learning_rate": 8.29127804160356e-06,
      "loss": 2.3534,
      "step": 42821
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0808045864105225,
      "learning_rate": 8.290872361621517e-06,
      "loss": 2.3196,
      "step": 42822
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0272547006607056,
      "learning_rate": 8.29046668453691e-06,
      "loss": 2.5262,
      "step": 42823
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.9811088442802429,
      "learning_rate": 8.290061010350426e-06,
      "loss": 2.2117,
      "step": 42824
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.178977370262146,
      "learning_rate": 8.289655339062754e-06,
      "loss": 2.4979,
      "step": 42825
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1403151750564575,
      "learning_rate": 8.28924967067458e-06,
      "loss": 2.2826,
      "step": 42826
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1243139505386353,
      "learning_rate": 8.28884400518659e-06,
      "loss": 2.3478,
      "step": 42827
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1719486713409424,
      "learning_rate": 8.288438342599478e-06,
      "loss": 2.3754,
      "step": 42828
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0802006721496582,
      "learning_rate": 8.28803268291393e-06,
      "loss": 2.5025,
      "step": 42829
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.026253342628479,
      "learning_rate": 8.28762702613063e-06,
      "loss": 2.326,
      "step": 42830
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.2379589080810547,
      "learning_rate": 8.287221372250264e-06,
      "loss": 2.4206,
      "step": 42831
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.106956958770752,
      "learning_rate": 8.286815721273524e-06,
      "loss": 2.0104,
      "step": 42832
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0692689418792725,
      "learning_rate": 8.286410073201097e-06,
      "loss": 2.5219,
      "step": 42833
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0081995725631714,
      "learning_rate": 8.28600442803367e-06,
      "loss": 2.3631,
      "step": 42834
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.9618228673934937,
      "learning_rate": 8.285598785771932e-06,
      "loss": 2.4047,
      "step": 42835
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0722888708114624,
      "learning_rate": 8.285193146416568e-06,
      "loss": 2.3948,
      "step": 42836
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.026315450668335,
      "learning_rate": 8.284787509968268e-06,
      "loss": 2.1859,
      "step": 42837
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.315151572227478,
      "learning_rate": 8.28438187642772e-06,
      "loss": 2.5699,
      "step": 42838
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.05588960647583,
      "learning_rate": 8.28397624579561e-06,
      "loss": 2.475,
      "step": 42839
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.145336627960205,
      "learning_rate": 8.283570618072624e-06,
      "loss": 2.4537,
      "step": 42840
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0963265895843506,
      "learning_rate": 8.283164993259456e-06,
      "loss": 2.359,
      "step": 42841
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1696221828460693,
      "learning_rate": 8.282759371356792e-06,
      "loss": 2.217,
      "step": 42842
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1517412662506104,
      "learning_rate": 8.282353752365313e-06,
      "loss": 2.4112,
      "step": 42843
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0512088537216187,
      "learning_rate": 8.281948136285711e-06,
      "loss": 2.2146,
      "step": 42844
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.078169822692871,
      "learning_rate": 8.281542523118673e-06,
      "loss": 2.159,
      "step": 42845
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0835597515106201,
      "learning_rate": 8.281136912864888e-06,
      "loss": 2.3328,
      "step": 42846
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0825960636138916,
      "learning_rate": 8.280731305525043e-06,
      "loss": 2.1533,
      "step": 42847
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.108483910560608,
      "learning_rate": 8.280325701099824e-06,
      "loss": 2.1928,
      "step": 42848
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1955422163009644,
      "learning_rate": 8.279920099589922e-06,
      "loss": 2.3893,
      "step": 42849
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0445586442947388,
      "learning_rate": 8.279514500996022e-06,
      "loss": 2.2839,
      "step": 42850
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1283100843429565,
      "learning_rate": 8.279108905318813e-06,
      "loss": 2.3946,
      "step": 42851
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.9750227928161621,
      "learning_rate": 8.27870331255898e-06,
      "loss": 2.2392,
      "step": 42852
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1118971109390259,
      "learning_rate": 8.278297722717212e-06,
      "loss": 2.456,
      "step": 42853
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0210607051849365,
      "learning_rate": 8.277892135794198e-06,
      "loss": 2.324,
      "step": 42854
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0967789888381958,
      "learning_rate": 8.277486551790625e-06,
      "loss": 2.2678,
      "step": 42855
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0506134033203125,
      "learning_rate": 8.277080970707184e-06,
      "loss": 2.4656,
      "step": 42856
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.2007728815078735,
      "learning_rate": 8.276675392544553e-06,
      "loss": 2.3559,
      "step": 42857
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.9801191091537476,
      "learning_rate": 8.276269817303427e-06,
      "loss": 2.3112,
      "step": 42858
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.3261544704437256,
      "learning_rate": 8.275864244984491e-06,
      "loss": 2.1375,
      "step": 42859
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.370785117149353,
      "learning_rate": 8.275458675588433e-06,
      "loss": 2.3639,
      "step": 42860
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0430163145065308,
      "learning_rate": 8.27505310911594e-06,
      "loss": 2.2974,
      "step": 42861
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.05928373336792,
      "learning_rate": 8.274647545567703e-06,
      "loss": 1.9896,
      "step": 42862
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.2282638549804688,
      "learning_rate": 8.274241984944407e-06,
      "loss": 2.3405,
      "step": 42863
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.256133794784546,
      "learning_rate": 8.273836427246738e-06,
      "loss": 2.1278,
      "step": 42864
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.3536641597747803,
      "learning_rate": 8.273430872475386e-06,
      "loss": 2.4358,
      "step": 42865
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0926769971847534,
      "learning_rate": 8.273025320631036e-06,
      "loss": 2.4166,
      "step": 42866
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1159145832061768,
      "learning_rate": 8.27261977171438e-06,
      "loss": 2.5877,
      "step": 42867
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.2124212980270386,
      "learning_rate": 8.2722142257261e-06,
      "loss": 2.3076,
      "step": 42868
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1988606452941895,
      "learning_rate": 8.271808682666891e-06,
      "loss": 2.33,
      "step": 42869
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.2463717460632324,
      "learning_rate": 8.271403142537432e-06,
      "loss": 2.2788,
      "step": 42870
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1466965675354004,
      "learning_rate": 8.270997605338414e-06,
      "loss": 2.1615,
      "step": 42871
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.080673336982727,
      "learning_rate": 8.270592071070525e-06,
      "loss": 2.5348,
      "step": 42872
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0912690162658691,
      "learning_rate": 8.270186539734452e-06,
      "loss": 2.289,
      "step": 42873
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.8526220321655273,
      "learning_rate": 8.269781011330881e-06,
      "loss": 2.3398,
      "step": 42874
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.2072787284851074,
      "learning_rate": 8.269375485860503e-06,
      "loss": 2.2883,
      "step": 42875
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0399898290634155,
      "learning_rate": 8.268969963324003e-06,
      "loss": 2.3367,
      "step": 42876
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.9403125047683716,
      "learning_rate": 8.26856444372207e-06,
      "loss": 2.2633,
      "step": 42877
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1134042739868164,
      "learning_rate": 8.26815892705539e-06,
      "loss": 2.2223,
      "step": 42878
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0150244235992432,
      "learning_rate": 8.26775341332465e-06,
      "loss": 2.5371,
      "step": 42879
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0246151685714722,
      "learning_rate": 8.26734790253054e-06,
      "loss": 2.6131,
      "step": 42880
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.093086838722229,
      "learning_rate": 8.266942394673745e-06,
      "loss": 2.0556,
      "step": 42881
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0036365985870361,
      "learning_rate": 8.266536889754956e-06,
      "loss": 2.2722,
      "step": 42882
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1540577411651611,
      "learning_rate": 8.266131387774856e-06,
      "loss": 2.4157,
      "step": 42883
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1310020685195923,
      "learning_rate": 8.265725888734134e-06,
      "loss": 2.5421,
      "step": 42884
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1822153329849243,
      "learning_rate": 8.265320392633476e-06,
      "loss": 2.2796,
      "step": 42885
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0253517627716064,
      "learning_rate": 8.264914899473573e-06,
      "loss": 2.3513,
      "step": 42886
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0786893367767334,
      "learning_rate": 8.264509409255108e-06,
      "loss": 2.4589,
      "step": 42887
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.600944995880127,
      "learning_rate": 8.264103921978774e-06,
      "loss": 2.3114,
      "step": 42888
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0536848306655884,
      "learning_rate": 8.263698437645254e-06,
      "loss": 2.1378,
      "step": 42889
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0710620880126953,
      "learning_rate": 8.263292956255236e-06,
      "loss": 2.4276,
      "step": 42890
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1744513511657715,
      "learning_rate": 8.262887477809406e-06,
      "loss": 2.1666,
      "step": 42891
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0924396514892578,
      "learning_rate": 8.262482002308458e-06,
      "loss": 2.3903,
      "step": 42892
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0703935623168945,
      "learning_rate": 8.262076529753073e-06,
      "loss": 2.2814,
      "step": 42893
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0562996864318848,
      "learning_rate": 8.26167106014394e-06,
      "loss": 2.2966,
      "step": 42894
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1259405612945557,
      "learning_rate": 8.261265593481752e-06,
      "loss": 2.339,
      "step": 42895
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1533855199813843,
      "learning_rate": 8.260860129767185e-06,
      "loss": 2.4463,
      "step": 42896
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0441994667053223,
      "learning_rate": 8.260454669000932e-06,
      "loss": 2.4516,
      "step": 42897
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.098267674446106,
      "learning_rate": 8.260049211183682e-06,
      "loss": 2.2127,
      "step": 42898
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0737110376358032,
      "learning_rate": 8.259643756316122e-06,
      "loss": 2.3925,
      "step": 42899
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0318454504013062,
      "learning_rate": 8.259238304398938e-06,
      "loss": 2.3858,
      "step": 42900
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1630183458328247,
      "learning_rate": 8.258832855432818e-06,
      "loss": 2.3796,
      "step": 42901
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0150035619735718,
      "learning_rate": 8.258427409418448e-06,
      "loss": 2.2224,
      "step": 42902
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1347850561141968,
      "learning_rate": 8.258021966356517e-06,
      "loss": 2.3754,
      "step": 42903
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.259027361869812,
      "learning_rate": 8.257616526247713e-06,
      "loss": 2.1721,
      "step": 42904
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.21320378780365,
      "learning_rate": 8.257211089092724e-06,
      "loss": 2.2273,
      "step": 42905
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1757231950759888,
      "learning_rate": 8.256805654892233e-06,
      "loss": 2.4003,
      "step": 42906
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0482045412063599,
      "learning_rate": 8.25640022364693e-06,
      "loss": 2.4143,
      "step": 42907
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0111730098724365,
      "learning_rate": 8.255994795357504e-06,
      "loss": 2.2582,
      "step": 42908
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0202765464782715,
      "learning_rate": 8.255589370024638e-06,
      "loss": 2.4732,
      "step": 42909
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1215780973434448,
      "learning_rate": 8.255183947649022e-06,
      "loss": 2.566,
      "step": 42910
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.2052005529403687,
      "learning_rate": 8.254778528231346e-06,
      "loss": 2.3531,
      "step": 42911
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0299062728881836,
      "learning_rate": 8.254373111772292e-06,
      "loss": 2.177,
      "step": 42912
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0421808958053589,
      "learning_rate": 8.25396769827255e-06,
      "loss": 2.1766,
      "step": 42913
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1873397827148438,
      "learning_rate": 8.253562287732806e-06,
      "loss": 2.2945,
      "step": 42914
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.15328848361969,
      "learning_rate": 8.253156880153749e-06,
      "loss": 2.3358,
      "step": 42915
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0775402784347534,
      "learning_rate": 8.252751475536065e-06,
      "loss": 2.2606,
      "step": 42916
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0525153875350952,
      "learning_rate": 8.252346073880441e-06,
      "loss": 2.6033,
      "step": 42917
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1991732120513916,
      "learning_rate": 8.251940675187568e-06,
      "loss": 2.2432,
      "step": 42918
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1370095014572144,
      "learning_rate": 8.251535279458127e-06,
      "loss": 2.1877,
      "step": 42919
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1973907947540283,
      "learning_rate": 8.251129886692811e-06,
      "loss": 2.5298,
      "step": 42920
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1181046962738037,
      "learning_rate": 8.250724496892308e-06,
      "loss": 2.5141,
      "step": 42921
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0869420766830444,
      "learning_rate": 8.250319110057298e-06,
      "loss": 2.2812,
      "step": 42922
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0522973537445068,
      "learning_rate": 8.249913726188472e-06,
      "loss": 2.2261,
      "step": 42923
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1129300594329834,
      "learning_rate": 8.249508345286518e-06,
      "loss": 2.2269,
      "step": 42924
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0831266641616821,
      "learning_rate": 8.249102967352123e-06,
      "loss": 2.4129,
      "step": 42925
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1493620872497559,
      "learning_rate": 8.248697592385973e-06,
      "loss": 2.4319,
      "step": 42926
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.031886100769043,
      "learning_rate": 8.248292220388756e-06,
      "loss": 2.2259,
      "step": 42927
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0476621389389038,
      "learning_rate": 8.247886851361161e-06,
      "loss": 2.4664,
      "step": 42928
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0627347230911255,
      "learning_rate": 8.247481485303871e-06,
      "loss": 2.2451,
      "step": 42929
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1496005058288574,
      "learning_rate": 8.247076122217578e-06,
      "loss": 2.2935,
      "step": 42930
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.091895341873169,
      "learning_rate": 8.246670762102968e-06,
      "loss": 2.3148,
      "step": 42931
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.3328787088394165,
      "learning_rate": 8.246265404960726e-06,
      "loss": 2.3489,
      "step": 42932
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.9804239273071289,
      "learning_rate": 8.245860050791544e-06,
      "loss": 2.1665,
      "step": 42933
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0420219898223877,
      "learning_rate": 8.2454546995961e-06,
      "loss": 2.4702,
      "step": 42934
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0589797496795654,
      "learning_rate": 8.245049351375091e-06,
      "loss": 2.3561,
      "step": 42935
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1508411169052124,
      "learning_rate": 8.2446440061292e-06,
      "loss": 2.4477,
      "step": 42936
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0544073581695557,
      "learning_rate": 8.244238663859112e-06,
      "loss": 2.4121,
      "step": 42937
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1486437320709229,
      "learning_rate": 8.243833324565516e-06,
      "loss": 2.5578,
      "step": 42938
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1725648641586304,
      "learning_rate": 8.243427988249101e-06,
      "loss": 2.5208,
      "step": 42939
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1257896423339844,
      "learning_rate": 8.243022654910553e-06,
      "loss": 2.2602,
      "step": 42940
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1488081216812134,
      "learning_rate": 8.242617324550556e-06,
      "loss": 2.339,
      "step": 42941
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1042065620422363,
      "learning_rate": 8.242211997169802e-06,
      "loss": 2.4556,
      "step": 42942
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0509755611419678,
      "learning_rate": 8.241806672768976e-06,
      "loss": 2.5848,
      "step": 42943
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1837973594665527,
      "learning_rate": 8.241401351348763e-06,
      "loss": 2.3578,
      "step": 42944
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0492771863937378,
      "learning_rate": 8.240996032909854e-06,
      "loss": 2.3914,
      "step": 42945
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.9851332306861877,
      "learning_rate": 8.240590717452934e-06,
      "loss": 2.1813,
      "step": 42946
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.017398715019226,
      "learning_rate": 8.240185404978692e-06,
      "loss": 2.3601,
      "step": 42947
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0338191986083984,
      "learning_rate": 8.239780095487817e-06,
      "loss": 2.3265,
      "step": 42948
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1463185548782349,
      "learning_rate": 8.239374788980989e-06,
      "loss": 2.3046,
      "step": 42949
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.6858420372009277,
      "learning_rate": 8.238969485458897e-06,
      "loss": 2.2988,
      "step": 42950
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.2604317665100098,
      "learning_rate": 8.238564184922232e-06,
      "loss": 2.3506,
      "step": 42951
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.256278157234192,
      "learning_rate": 8.238158887371679e-06,
      "loss": 2.3512,
      "step": 42952
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.098929762840271,
      "learning_rate": 8.237753592807925e-06,
      "loss": 2.3715,
      "step": 42953
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.009732723236084,
      "learning_rate": 8.237348301231656e-06,
      "loss": 2.4701,
      "step": 42954
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.184751033782959,
      "learning_rate": 8.23694301264356e-06,
      "loss": 2.3303,
      "step": 42955
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0653512477874756,
      "learning_rate": 8.236537727044328e-06,
      "loss": 2.4074,
      "step": 42956
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1310911178588867,
      "learning_rate": 8.23613244443464e-06,
      "loss": 2.5301,
      "step": 42957
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.2330400943756104,
      "learning_rate": 8.235727164815188e-06,
      "loss": 2.4427,
      "step": 42958
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.099217176437378,
      "learning_rate": 8.235321888186658e-06,
      "loss": 2.4639,
      "step": 42959
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1190568208694458,
      "learning_rate": 8.234916614549735e-06,
      "loss": 2.1656,
      "step": 42960
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0674864053726196,
      "learning_rate": 8.234511343905115e-06,
      "loss": 2.4258,
      "step": 42961
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.2403062582015991,
      "learning_rate": 8.23410607625347e-06,
      "loss": 2.344,
      "step": 42962
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0528753995895386,
      "learning_rate": 8.233700811595496e-06,
      "loss": 2.2693,
      "step": 42963
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.3438688516616821,
      "learning_rate": 8.23329554993188e-06,
      "loss": 2.2608,
      "step": 42964
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0218697786331177,
      "learning_rate": 8.232890291263307e-06,
      "loss": 2.518,
      "step": 42965
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0948480367660522,
      "learning_rate": 8.232485035590465e-06,
      "loss": 2.2173,
      "step": 42966
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1759564876556396,
      "learning_rate": 8.232079782914043e-06,
      "loss": 2.2941,
      "step": 42967
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.2088559865951538,
      "learning_rate": 8.231674533234723e-06,
      "loss": 2.3473,
      "step": 42968
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.113134741783142,
      "learning_rate": 8.231269286553196e-06,
      "loss": 2.4112,
      "step": 42969
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0807024240493774,
      "learning_rate": 8.230864042870148e-06,
      "loss": 2.2289,
      "step": 42970
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1892369985580444,
      "learning_rate": 8.230458802186266e-06,
      "loss": 2.3935,
      "step": 42971
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.079466462135315,
      "learning_rate": 8.230053564502235e-06,
      "loss": 2.3933,
      "step": 42972
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0951757431030273,
      "learning_rate": 8.229648329818747e-06,
      "loss": 2.2507,
      "step": 42973
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0152335166931152,
      "learning_rate": 8.229243098136489e-06,
      "loss": 2.3386,
      "step": 42974
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0804200172424316,
      "learning_rate": 8.228837869456142e-06,
      "loss": 2.3715,
      "step": 42975
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.106821060180664,
      "learning_rate": 8.228432643778393e-06,
      "loss": 2.5932,
      "step": 42976
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.081912875175476,
      "learning_rate": 8.228027421103933e-06,
      "loss": 2.2033,
      "step": 42977
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.2843884229660034,
      "learning_rate": 8.227622201433446e-06,
      "loss": 2.2199,
      "step": 42978
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0908864736557007,
      "learning_rate": 8.227216984767623e-06,
      "loss": 2.5002,
      "step": 42979
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.9993339776992798,
      "learning_rate": 8.226811771107149e-06,
      "loss": 2.396,
      "step": 42980
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0196435451507568,
      "learning_rate": 8.22640656045271e-06,
      "loss": 2.321,
      "step": 42981
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.9805105328559875,
      "learning_rate": 8.226001352804993e-06,
      "loss": 2.281,
      "step": 42982
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0566983222961426,
      "learning_rate": 8.225596148164686e-06,
      "loss": 2.157,
      "step": 42983
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0825750827789307,
      "learning_rate": 8.225190946532476e-06,
      "loss": 2.3417,
      "step": 42984
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.046913743019104,
      "learning_rate": 8.224785747909049e-06,
      "loss": 2.2653,
      "step": 42985
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0802339315414429,
      "learning_rate": 8.224380552295091e-06,
      "loss": 2.3821,
      "step": 42986
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0475525856018066,
      "learning_rate": 8.223975359691297e-06,
      "loss": 2.4313,
      "step": 42987
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1193499565124512,
      "learning_rate": 8.223570170098341e-06,
      "loss": 2.493,
      "step": 42988
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0837959051132202,
      "learning_rate": 8.223164983516915e-06,
      "loss": 2.6925,
      "step": 42989
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.9955779910087585,
      "learning_rate": 8.222759799947709e-06,
      "loss": 2.3585,
      "step": 42990
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0255075693130493,
      "learning_rate": 8.222354619391408e-06,
      "loss": 2.4618,
      "step": 42991
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0759159326553345,
      "learning_rate": 8.221949441848698e-06,
      "loss": 2.3975,
      "step": 42992
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.9559333324432373,
      "learning_rate": 8.221544267320265e-06,
      "loss": 2.2555,
      "step": 42993
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1120564937591553,
      "learning_rate": 8.2211390958068e-06,
      "loss": 2.2322,
      "step": 42994
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1310245990753174,
      "learning_rate": 8.220733927308988e-06,
      "loss": 2.2929,
      "step": 42995
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0687018632888794,
      "learning_rate": 8.220328761827514e-06,
      "loss": 2.2876,
      "step": 42996
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0670077800750732,
      "learning_rate": 8.219923599363065e-06,
      "loss": 2.4024,
      "step": 42997
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0665892362594604,
      "learning_rate": 8.219518439916332e-06,
      "loss": 2.5012,
      "step": 42998
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1875888109207153,
      "learning_rate": 8.219113283487995e-06,
      "loss": 2.3701,
      "step": 42999
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0605899095535278,
      "learning_rate": 8.218708130078746e-06,
      "loss": 2.3469,
      "step": 43000
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.062880516052246,
      "learning_rate": 8.218302979689275e-06,
      "loss": 2.3735,
      "step": 43001
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.2611916065216064,
      "learning_rate": 8.217897832320261e-06,
      "loss": 2.4086,
      "step": 43002
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.9883527159690857,
      "learning_rate": 8.217492687972393e-06,
      "loss": 2.2461,
      "step": 43003
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1449649333953857,
      "learning_rate": 8.217087546646359e-06,
      "loss": 2.3281,
      "step": 43004
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0659925937652588,
      "learning_rate": 8.216682408342847e-06,
      "loss": 2.2573,
      "step": 43005
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0389248132705688,
      "learning_rate": 8.216277273062541e-06,
      "loss": 2.1483,
      "step": 43006
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1312005519866943,
      "learning_rate": 8.21587214080613e-06,
      "loss": 2.29,
      "step": 43007
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.001533031463623,
      "learning_rate": 8.215467011574301e-06,
      "loss": 2.2419,
      "step": 43008
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0645321607589722,
      "learning_rate": 8.21506188536774e-06,
      "loss": 2.2353,
      "step": 43009
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1554932594299316,
      "learning_rate": 8.214656762187132e-06,
      "loss": 2.0947,
      "step": 43010
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.112236499786377,
      "learning_rate": 8.214251642033167e-06,
      "loss": 2.386,
      "step": 43011
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.01985764503479,
      "learning_rate": 8.213846524906531e-06,
      "loss": 2.3227,
      "step": 43012
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.053606629371643,
      "learning_rate": 8.21344141080791e-06,
      "loss": 2.413,
      "step": 43013
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1777138710021973,
      "learning_rate": 8.213036299737995e-06,
      "loss": 2.1705,
      "step": 43014
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0010371208190918,
      "learning_rate": 8.212631191697464e-06,
      "loss": 2.2004,
      "step": 43015
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0258146524429321,
      "learning_rate": 8.21222608668701e-06,
      "loss": 2.415,
      "step": 43016
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.9739105105400085,
      "learning_rate": 8.211820984707319e-06,
      "loss": 2.0121,
      "step": 43017
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0607267618179321,
      "learning_rate": 8.211415885759073e-06,
      "loss": 2.4408,
      "step": 43018
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.096971035003662,
      "learning_rate": 8.211010789842967e-06,
      "loss": 2.4635,
      "step": 43019
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.9682816863059998,
      "learning_rate": 8.210605696959683e-06,
      "loss": 2.3257,
      "step": 43020
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0958082675933838,
      "learning_rate": 8.210200607109908e-06,
      "loss": 2.4232,
      "step": 43021
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1073474884033203,
      "learning_rate": 8.209795520294328e-06,
      "loss": 2.3281,
      "step": 43022
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.9512530565261841,
      "learning_rate": 8.209390436513634e-06,
      "loss": 2.374,
      "step": 43023
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.167472004890442,
      "learning_rate": 8.208985355768506e-06,
      "loss": 2.4647,
      "step": 43024
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.3858460187911987,
      "learning_rate": 8.208580278059634e-06,
      "loss": 2.2471,
      "step": 43025
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1808276176452637,
      "learning_rate": 8.208175203387707e-06,
      "loss": 2.612,
      "step": 43026
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1974620819091797,
      "learning_rate": 8.207770131753411e-06,
      "loss": 2.3757,
      "step": 43027
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.997397243976593,
      "learning_rate": 8.207365063157429e-06,
      "loss": 2.5039,
      "step": 43028
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0673880577087402,
      "learning_rate": 8.206959997600452e-06,
      "loss": 2.3324,
      "step": 43029
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1246968507766724,
      "learning_rate": 8.206554935083163e-06,
      "loss": 2.3748,
      "step": 43030
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1105414628982544,
      "learning_rate": 8.206149875606252e-06,
      "loss": 2.1197,
      "step": 43031
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1688413619995117,
      "learning_rate": 8.205744819170402e-06,
      "loss": 2.2175,
      "step": 43032
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1070770025253296,
      "learning_rate": 8.205339765776303e-06,
      "loss": 2.3076,
      "step": 43033
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0142267942428589,
      "learning_rate": 8.20493471542464e-06,
      "loss": 2.4132,
      "step": 43034
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.2249903678894043,
      "learning_rate": 8.2045296681161e-06,
      "loss": 2.2022,
      "step": 43035
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1844667196273804,
      "learning_rate": 8.20412462385137e-06,
      "loss": 2.3432,
      "step": 43036
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1331675052642822,
      "learning_rate": 8.203719582631136e-06,
      "loss": 2.3544,
      "step": 43037
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.074483871459961,
      "learning_rate": 8.203314544456087e-06,
      "loss": 2.3968,
      "step": 43038
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.048752784729004,
      "learning_rate": 8.202909509326907e-06,
      "loss": 2.2941,
      "step": 43039
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0162426233291626,
      "learning_rate": 8.202504477244286e-06,
      "loss": 2.2472,
      "step": 43040
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.9956054091453552,
      "learning_rate": 8.202099448208907e-06,
      "loss": 2.5269,
      "step": 43041
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0049821138381958,
      "learning_rate": 8.201694422221455e-06,
      "loss": 2.3573,
      "step": 43042
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.017077088356018,
      "learning_rate": 8.201289399282621e-06,
      "loss": 2.397,
      "step": 43043
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.2624074220657349,
      "learning_rate": 8.200884379393088e-06,
      "loss": 2.3202,
      "step": 43044
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0458556413650513,
      "learning_rate": 8.200479362553545e-06,
      "loss": 2.5402,
      "step": 43045
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0382682085037231,
      "learning_rate": 8.200074348764681e-06,
      "loss": 2.2465,
      "step": 43046
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.104620099067688,
      "learning_rate": 8.199669338027177e-06,
      "loss": 2.3463,
      "step": 43047
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1208404302597046,
      "learning_rate": 8.199264330341724e-06,
      "loss": 2.1671,
      "step": 43048
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1056275367736816,
      "learning_rate": 8.198859325709007e-06,
      "loss": 2.3544,
      "step": 43049
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1266919374465942,
      "learning_rate": 8.198454324129712e-06,
      "loss": 2.3714,
      "step": 43050
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.039217472076416,
      "learning_rate": 8.198049325604528e-06,
      "loss": 2.1893,
      "step": 43051
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1161980628967285,
      "learning_rate": 8.197644330134138e-06,
      "loss": 2.4876,
      "step": 43052
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0305248498916626,
      "learning_rate": 8.197239337719233e-06,
      "loss": 2.1834,
      "step": 43053
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1493875980377197,
      "learning_rate": 8.196834348360494e-06,
      "loss": 2.1802,
      "step": 43054
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0951896905899048,
      "learning_rate": 8.196429362058612e-06,
      "loss": 2.3102,
      "step": 43055
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.04954993724823,
      "learning_rate": 8.19602437881427e-06,
      "loss": 2.1307,
      "step": 43056
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.2081925868988037,
      "learning_rate": 8.195619398628159e-06,
      "loss": 2.2985,
      "step": 43057
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.2158377170562744,
      "learning_rate": 8.195214421500963e-06,
      "loss": 2.3419,
      "step": 43058
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1003819704055786,
      "learning_rate": 8.194809447433367e-06,
      "loss": 2.3564,
      "step": 43059
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0525798797607422,
      "learning_rate": 8.19440447642606e-06,
      "loss": 1.973,
      "step": 43060
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0524837970733643,
      "learning_rate": 8.193999508479728e-06,
      "loss": 2.4983,
      "step": 43061
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0356556177139282,
      "learning_rate": 8.193594543595057e-06,
      "loss": 2.5041,
      "step": 43062
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1831271648406982,
      "learning_rate": 8.193189581772733e-06,
      "loss": 2.3751,
      "step": 43063
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0636388063430786,
      "learning_rate": 8.192784623013444e-06,
      "loss": 2.5321,
      "step": 43064
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1179444789886475,
      "learning_rate": 8.192379667317878e-06,
      "loss": 2.388,
      "step": 43065
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1320674419403076,
      "learning_rate": 8.19197471468672e-06,
      "loss": 2.3402,
      "step": 43066
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0188649892807007,
      "learning_rate": 8.191569765120653e-06,
      "loss": 2.4034,
      "step": 43067
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0317177772521973,
      "learning_rate": 8.191164818620367e-06,
      "loss": 2.2712,
      "step": 43068
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0477770566940308,
      "learning_rate": 8.190759875186548e-06,
      "loss": 2.4173,
      "step": 43069
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0708272457122803,
      "learning_rate": 8.190354934819882e-06,
      "loss": 2.4666,
      "step": 43070
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.9813265204429626,
      "learning_rate": 8.189949997521056e-06,
      "loss": 2.1007,
      "step": 43071
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1425014734268188,
      "learning_rate": 8.189545063290757e-06,
      "loss": 2.382,
      "step": 43072
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.10231351852417,
      "learning_rate": 8.18914013212967e-06,
      "loss": 2.2869,
      "step": 43073
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1667081117630005,
      "learning_rate": 8.188735204038483e-06,
      "loss": 2.3197,
      "step": 43074
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0415043830871582,
      "learning_rate": 8.18833027901788e-06,
      "loss": 2.3831,
      "step": 43075
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0858654975891113,
      "learning_rate": 8.187925357068552e-06,
      "loss": 2.3427,
      "step": 43076
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0886869430541992,
      "learning_rate": 8.18752043819118e-06,
      "loss": 2.4601,
      "step": 43077
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.2039839029312134,
      "learning_rate": 8.187115522386455e-06,
      "loss": 2.1866,
      "step": 43078
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0982012748718262,
      "learning_rate": 8.186710609655062e-06,
      "loss": 2.5354,
      "step": 43079
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0485879182815552,
      "learning_rate": 8.186305699997689e-06,
      "loss": 2.2938,
      "step": 43080
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.2477946281433105,
      "learning_rate": 8.185900793415017e-06,
      "loss": 2.3096,
      "step": 43081
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.143626093864441,
      "learning_rate": 8.185495889907735e-06,
      "loss": 2.3255,
      "step": 43082
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.097737431526184,
      "learning_rate": 8.185090989476531e-06,
      "loss": 2.2444,
      "step": 43083
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.8026492595672607,
      "learning_rate": 8.184686092122092e-06,
      "loss": 2.2716,
      "step": 43084
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0415219068527222,
      "learning_rate": 8.184281197845106e-06,
      "loss": 2.389,
      "step": 43085
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.9839969277381897,
      "learning_rate": 8.183876306646251e-06,
      "loss": 2.575,
      "step": 43086
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.030000925064087,
      "learning_rate": 8.183471418526221e-06,
      "loss": 2.2862,
      "step": 43087
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0291434526443481,
      "learning_rate": 8.183066533485699e-06,
      "loss": 2.4674,
      "step": 43088
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0932884216308594,
      "learning_rate": 8.182661651525375e-06,
      "loss": 2.2022,
      "step": 43089
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1573622226715088,
      "learning_rate": 8.182256772645932e-06,
      "loss": 2.5136,
      "step": 43090
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0630559921264648,
      "learning_rate": 8.181851896848056e-06,
      "loss": 2.4842,
      "step": 43091
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.13458251953125,
      "learning_rate": 8.181447024132436e-06,
      "loss": 2.4123,
      "step": 43092
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0493113994598389,
      "learning_rate": 8.181042154499762e-06,
      "loss": 2.3524,
      "step": 43093
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0609149932861328,
      "learning_rate": 8.18063728795071e-06,
      "loss": 2.0722,
      "step": 43094
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.01612389087677,
      "learning_rate": 8.180232424485973e-06,
      "loss": 2.2812,
      "step": 43095
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1229312419891357,
      "learning_rate": 8.179827564106236e-06,
      "loss": 2.3802,
      "step": 43096
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.9669157266616821,
      "learning_rate": 8.179422706812186e-06,
      "loss": 2.385,
      "step": 43097
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0976866483688354,
      "learning_rate": 8.179017852604509e-06,
      "loss": 2.2386,
      "step": 43098
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.20110285282135,
      "learning_rate": 8.17861300148389e-06,
      "loss": 2.1464,
      "step": 43099
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0265213251113892,
      "learning_rate": 8.178208153451019e-06,
      "loss": 2.2431,
      "step": 43100
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.2037383317947388,
      "learning_rate": 8.177803308506577e-06,
      "loss": 2.3567,
      "step": 43101
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1823487281799316,
      "learning_rate": 8.177398466651256e-06,
      "loss": 2.4491,
      "step": 43102
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0275979042053223,
      "learning_rate": 8.176993627885738e-06,
      "loss": 2.4241,
      "step": 43103
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1586942672729492,
      "learning_rate": 8.176588792210711e-06,
      "loss": 2.3162,
      "step": 43104
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0031602382659912,
      "learning_rate": 8.176183959626862e-06,
      "loss": 2.3314,
      "step": 43105
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1337698698043823,
      "learning_rate": 8.17577913013488e-06,
      "loss": 2.186,
      "step": 43106
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0886116027832031,
      "learning_rate": 8.175374303735444e-06,
      "loss": 2.2651,
      "step": 43107
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0986392498016357,
      "learning_rate": 8.174969480429245e-06,
      "loss": 2.3802,
      "step": 43108
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.9491040110588074,
      "learning_rate": 8.174564660216966e-06,
      "loss": 2.5413,
      "step": 43109
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1533842086791992,
      "learning_rate": 8.174159843099297e-06,
      "loss": 2.2685,
      "step": 43110
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1424230337142944,
      "learning_rate": 8.173755029076925e-06,
      "loss": 2.3588,
      "step": 43111
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1469255685806274,
      "learning_rate": 8.17335021815053e-06,
      "loss": 2.3528,
      "step": 43112
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.9776439070701599,
      "learning_rate": 8.172945410320809e-06,
      "loss": 2.2724,
      "step": 43113
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.062631368637085,
      "learning_rate": 8.172540605588438e-06,
      "loss": 2.392,
      "step": 43114
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1387728452682495,
      "learning_rate": 8.172135803954105e-06,
      "loss": 2.3914,
      "step": 43115
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0132330656051636,
      "learning_rate": 8.1717310054185e-06,
      "loss": 2.2562,
      "step": 43116
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.2909668684005737,
      "learning_rate": 8.171326209982307e-06,
      "loss": 2.1888,
      "step": 43117
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.4185227155685425,
      "learning_rate": 8.170921417646214e-06,
      "loss": 2.3824,
      "step": 43118
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0790411233901978,
      "learning_rate": 8.170516628410909e-06,
      "loss": 2.537,
      "step": 43119
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.265828013420105,
      "learning_rate": 8.17011184227707e-06,
      "loss": 2.4237,
      "step": 43120
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.095745325088501,
      "learning_rate": 8.16970705924539e-06,
      "loss": 2.1307,
      "step": 43121
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.9956545233726501,
      "learning_rate": 8.169302279316553e-06,
      "loss": 2.4234,
      "step": 43122
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0079376697540283,
      "learning_rate": 8.168897502491246e-06,
      "loss": 2.3453,
      "step": 43123
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.060699701309204,
      "learning_rate": 8.168492728770156e-06,
      "loss": 2.2421,
      "step": 43124
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.2241398096084595,
      "learning_rate": 8.168087958153968e-06,
      "loss": 2.3684,
      "step": 43125
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0690959692001343,
      "learning_rate": 8.167683190643367e-06,
      "loss": 2.2977,
      "step": 43126
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.9822576642036438,
      "learning_rate": 8.167278426239043e-06,
      "loss": 2.2634,
      "step": 43127
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1313576698303223,
      "learning_rate": 8.166873664941676e-06,
      "loss": 2.411,
      "step": 43128
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0505415201187134,
      "learning_rate": 8.16646890675196e-06,
      "loss": 2.33,
      "step": 43129
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.2851145267486572,
      "learning_rate": 8.166064151670575e-06,
      "loss": 2.4922,
      "step": 43130
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.111935019493103,
      "learning_rate": 8.16565939969821e-06,
      "loss": 2.1948,
      "step": 43131
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1309820413589478,
      "learning_rate": 8.165254650835555e-06,
      "loss": 2.1071,
      "step": 43132
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0217552185058594,
      "learning_rate": 8.164849905083287e-06,
      "loss": 2.2098,
      "step": 43133
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1577223539352417,
      "learning_rate": 8.164445162442096e-06,
      "loss": 2.4384,
      "step": 43134
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1055601835250854,
      "learning_rate": 8.16404042291267e-06,
      "loss": 2.4842,
      "step": 43135
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1404306888580322,
      "learning_rate": 8.163635686495694e-06,
      "loss": 2.2905,
      "step": 43136
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.2456848621368408,
      "learning_rate": 8.163230953191855e-06,
      "loss": 2.1076,
      "step": 43137
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1100306510925293,
      "learning_rate": 8.162826223001837e-06,
      "loss": 2.4545,
      "step": 43138
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1483596563339233,
      "learning_rate": 8.16242149592633e-06,
      "loss": 2.3979,
      "step": 43139
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.011427879333496,
      "learning_rate": 8.162016771966016e-06,
      "loss": 2.2977,
      "step": 43140
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0940536260604858,
      "learning_rate": 8.161612051121584e-06,
      "loss": 2.5732,
      "step": 43141
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0734773874282837,
      "learning_rate": 8.161207333393716e-06,
      "loss": 2.3972,
      "step": 43142
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.9619319438934326,
      "learning_rate": 8.160802618783103e-06,
      "loss": 2.4763,
      "step": 43143
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1511127948760986,
      "learning_rate": 8.16039790729043e-06,
      "loss": 2.6675,
      "step": 43144
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.2718746662139893,
      "learning_rate": 8.159993198916381e-06,
      "loss": 2.0577,
      "step": 43145
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.954613208770752,
      "learning_rate": 8.159588493661643e-06,
      "loss": 2.2979,
      "step": 43146
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0736057758331299,
      "learning_rate": 8.159183791526905e-06,
      "loss": 2.3996,
      "step": 43147
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1107114553451538,
      "learning_rate": 8.158779092512847e-06,
      "loss": 2.2031,
      "step": 43148
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.059098720550537,
      "learning_rate": 8.15837439662016e-06,
      "loss": 2.2548,
      "step": 43149
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.211789846420288,
      "learning_rate": 8.157969703849528e-06,
      "loss": 2.2614,
      "step": 43150
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.218140959739685,
      "learning_rate": 8.157565014201638e-06,
      "loss": 2.4126,
      "step": 43151
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0991848707199097,
      "learning_rate": 8.157160327677175e-06,
      "loss": 2.428,
      "step": 43152
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.029862880706787,
      "learning_rate": 8.156755644276826e-06,
      "loss": 2.2573,
      "step": 43153
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1112620830535889,
      "learning_rate": 8.156350964001276e-06,
      "loss": 2.3513,
      "step": 43154
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.118226408958435,
      "learning_rate": 8.155946286851213e-06,
      "loss": 2.2667,
      "step": 43155
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0502301454544067,
      "learning_rate": 8.155541612827321e-06,
      "loss": 2.2539,
      "step": 43156
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0600996017456055,
      "learning_rate": 8.155136941930288e-06,
      "loss": 2.6675,
      "step": 43157
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.159274697303772,
      "learning_rate": 8.154732274160798e-06,
      "loss": 2.1961,
      "step": 43158
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0692329406738281,
      "learning_rate": 8.154327609519543e-06,
      "loss": 2.2712,
      "step": 43159
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0755983591079712,
      "learning_rate": 8.1539229480072e-06,
      "loss": 2.3413,
      "step": 43160
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1162105798721313,
      "learning_rate": 8.153518289624458e-06,
      "loss": 2.3638,
      "step": 43161
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0863611698150635,
      "learning_rate": 8.153113634372003e-06,
      "loss": 2.3473,
      "step": 43162
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1322669982910156,
      "learning_rate": 8.152708982250523e-06,
      "loss": 2.2854,
      "step": 43163
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.9517849087715149,
      "learning_rate": 8.152304333260704e-06,
      "loss": 2.3815,
      "step": 43164
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1503010988235474,
      "learning_rate": 8.15189968740323e-06,
      "loss": 2.4142,
      "step": 43165
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.2153528928756714,
      "learning_rate": 8.151495044678789e-06,
      "loss": 2.3317,
      "step": 43166
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1700605154037476,
      "learning_rate": 8.151090405088064e-06,
      "loss": 2.2896,
      "step": 43167
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0535658597946167,
      "learning_rate": 8.150685768631746e-06,
      "loss": 2.3963,
      "step": 43168
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0685126781463623,
      "learning_rate": 8.150281135310517e-06,
      "loss": 2.269,
      "step": 43169
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.983523964881897,
      "learning_rate": 8.149876505125063e-06,
      "loss": 2.2897,
      "step": 43170
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1386120319366455,
      "learning_rate": 8.149471878076069e-06,
      "loss": 2.4807,
      "step": 43171
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.2120332717895508,
      "learning_rate": 8.149067254164227e-06,
      "loss": 2.2506,
      "step": 43172
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0291283130645752,
      "learning_rate": 8.148662633390216e-06,
      "loss": 2.1,
      "step": 43173
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1881749629974365,
      "learning_rate": 8.148258015754726e-06,
      "loss": 2.0696,
      "step": 43174
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1474864482879639,
      "learning_rate": 8.147853401258442e-06,
      "loss": 2.4126,
      "step": 43175
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0921499729156494,
      "learning_rate": 8.147448789902048e-06,
      "loss": 2.1866,
      "step": 43176
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0920331478118896,
      "learning_rate": 8.147044181686232e-06,
      "loss": 2.3039,
      "step": 43177
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.2301315069198608,
      "learning_rate": 8.146639576611678e-06,
      "loss": 2.3085,
      "step": 43178
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0495115518569946,
      "learning_rate": 8.146234974679074e-06,
      "loss": 2.388,
      "step": 43179
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.2220885753631592,
      "learning_rate": 8.145830375889105e-06,
      "loss": 2.2018,
      "step": 43180
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0315237045288086,
      "learning_rate": 8.145425780242458e-06,
      "loss": 2.276,
      "step": 43181
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0558539628982544,
      "learning_rate": 8.145021187739816e-06,
      "loss": 2.1341,
      "step": 43182
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.2099270820617676,
      "learning_rate": 8.144616598381869e-06,
      "loss": 2.2677,
      "step": 43183
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0497337579727173,
      "learning_rate": 8.144212012169299e-06,
      "loss": 2.2851,
      "step": 43184
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.632794976234436,
      "learning_rate": 8.143807429102799e-06,
      "loss": 2.2895,
      "step": 43185
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.159637689590454,
      "learning_rate": 8.143402849183045e-06,
      "loss": 2.2252,
      "step": 43186
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0646001100540161,
      "learning_rate": 8.142998272410728e-06,
      "loss": 2.1832,
      "step": 43187
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.243552565574646,
      "learning_rate": 8.142593698786533e-06,
      "loss": 2.3358,
      "step": 43188
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.2154648303985596,
      "learning_rate": 8.142189128311146e-06,
      "loss": 2.3206,
      "step": 43189
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.9647194147109985,
      "learning_rate": 8.141784560985252e-06,
      "loss": 2.4232,
      "step": 43190
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0801652669906616,
      "learning_rate": 8.14137999680954e-06,
      "loss": 2.5402,
      "step": 43191
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1229329109191895,
      "learning_rate": 8.140975435784693e-06,
      "loss": 2.0878,
      "step": 43192
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.6820100545883179,
      "learning_rate": 8.140570877911395e-06,
      "loss": 2.1869,
      "step": 43193
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1935291290283203,
      "learning_rate": 8.140166323190338e-06,
      "loss": 2.1608,
      "step": 43194
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.9575824737548828,
      "learning_rate": 8.139761771622202e-06,
      "loss": 2.2916,
      "step": 43195
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0156489610671997,
      "learning_rate": 8.139357223207675e-06,
      "loss": 2.2922,
      "step": 43196
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0767842531204224,
      "learning_rate": 8.138952677947448e-06,
      "loss": 2.3491,
      "step": 43197
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.046436071395874,
      "learning_rate": 8.138548135842199e-06,
      "loss": 2.3425,
      "step": 43198
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.2718987464904785,
      "learning_rate": 8.138143596892614e-06,
      "loss": 2.3947,
      "step": 43199
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.041152000427246,
      "learning_rate": 8.137739061099382e-06,
      "loss": 2.161,
      "step": 43200
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1601312160491943,
      "learning_rate": 8.137334528463187e-06,
      "loss": 2.3232,
      "step": 43201
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0924322605133057,
      "learning_rate": 8.136929998984718e-06,
      "loss": 2.502,
      "step": 43202
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0958539247512817,
      "learning_rate": 8.13652547266466e-06,
      "loss": 2.3913,
      "step": 43203
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1365671157836914,
      "learning_rate": 8.136120949503694e-06,
      "loss": 2.3221,
      "step": 43204
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.100150227546692,
      "learning_rate": 8.13571642950251e-06,
      "loss": 2.4433,
      "step": 43205
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.9937050938606262,
      "learning_rate": 8.135311912661793e-06,
      "loss": 2.2681,
      "step": 43206
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0060173273086548,
      "learning_rate": 8.134907398982228e-06,
      "loss": 2.1355,
      "step": 43207
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1115190982818604,
      "learning_rate": 8.134502888464504e-06,
      "loss": 2.3352,
      "step": 43208
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1798701286315918,
      "learning_rate": 8.134098381109302e-06,
      "loss": 2.3215,
      "step": 43209
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.3101458549499512,
      "learning_rate": 8.133693876917312e-06,
      "loss": 2.226,
      "step": 43210
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0668553113937378,
      "learning_rate": 8.13328937588922e-06,
      "loss": 2.2662,
      "step": 43211
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0979071855545044,
      "learning_rate": 8.132884878025706e-06,
      "loss": 2.4723,
      "step": 43212
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.14204740524292,
      "learning_rate": 8.13248038332746e-06,
      "loss": 2.318,
      "step": 43213
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.978502094745636,
      "learning_rate": 8.132075891795165e-06,
      "loss": 2.4861,
      "step": 43214
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1197905540466309,
      "learning_rate": 8.13167140342951e-06,
      "loss": 2.3899,
      "step": 43215
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0552045106887817,
      "learning_rate": 8.131266918231179e-06,
      "loss": 2.1757,
      "step": 43216
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.3358932733535767,
      "learning_rate": 8.13086243620086e-06,
      "loss": 2.3737,
      "step": 43217
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1367671489715576,
      "learning_rate": 8.130457957339234e-06,
      "loss": 2.2657,
      "step": 43218
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1923351287841797,
      "learning_rate": 8.130053481646991e-06,
      "loss": 2.3812,
      "step": 43219
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0351084470748901,
      "learning_rate": 8.129649009124815e-06,
      "loss": 2.1424,
      "step": 43220
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1616911888122559,
      "learning_rate": 8.129244539773391e-06,
      "loss": 2.2712,
      "step": 43221
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0614386796951294,
      "learning_rate": 8.128840073593408e-06,
      "loss": 2.4509,
      "step": 43222
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1482369899749756,
      "learning_rate": 8.128435610585547e-06,
      "loss": 2.3876,
      "step": 43223
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.091264247894287,
      "learning_rate": 8.128031150750496e-06,
      "loss": 2.3537,
      "step": 43224
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.028847098350525,
      "learning_rate": 8.127626694088946e-06,
      "loss": 2.5821,
      "step": 43225
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1816811561584473,
      "learning_rate": 8.127222240601571e-06,
      "loss": 2.1447,
      "step": 43226
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1089067459106445,
      "learning_rate": 8.126817790289066e-06,
      "loss": 2.361,
      "step": 43227
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.9813082814216614,
      "learning_rate": 8.126413343152111e-06,
      "loss": 2.2495,
      "step": 43228
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.07108736038208,
      "learning_rate": 8.126008899191396e-06,
      "loss": 2.3326,
      "step": 43229
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1276935338974,
      "learning_rate": 8.125604458407604e-06,
      "loss": 2.3281,
      "step": 43230
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0200215578079224,
      "learning_rate": 8.125200020801425e-06,
      "loss": 2.1883,
      "step": 43231
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0453708171844482,
      "learning_rate": 8.124795586373537e-06,
      "loss": 2.3164,
      "step": 43232
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.030236840248108,
      "learning_rate": 8.124391155124631e-06,
      "loss": 2.1048,
      "step": 43233
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0999791622161865,
      "learning_rate": 8.123986727055391e-06,
      "loss": 2.2961,
      "step": 43234
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1132584810256958,
      "learning_rate": 8.123582302166504e-06,
      "loss": 2.3739,
      "step": 43235
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0597965717315674,
      "learning_rate": 8.123177880458652e-06,
      "loss": 2.3799,
      "step": 43236
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0064921379089355,
      "learning_rate": 8.122773461932525e-06,
      "loss": 2.5215,
      "step": 43237
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0995700359344482,
      "learning_rate": 8.122369046588812e-06,
      "loss": 2.5291,
      "step": 43238
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1415082216262817,
      "learning_rate": 8.121964634428188e-06,
      "loss": 2.2223,
      "step": 43239
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1801520586013794,
      "learning_rate": 8.121560225451345e-06,
      "loss": 2.4194,
      "step": 43240
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.095726728439331,
      "learning_rate": 8.121155819658966e-06,
      "loss": 2.6549,
      "step": 43241
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1115554571151733,
      "learning_rate": 8.12075141705174e-06,
      "loss": 2.5422,
      "step": 43242
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0798695087432861,
      "learning_rate": 8.120347017630348e-06,
      "loss": 2.2756,
      "step": 43243
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0220085382461548,
      "learning_rate": 8.11994262139548e-06,
      "loss": 2.4867,
      "step": 43244
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.179787278175354,
      "learning_rate": 8.11953822834782e-06,
      "loss": 2.4105,
      "step": 43245
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0279842615127563,
      "learning_rate": 8.119133838488055e-06,
      "loss": 2.4753,
      "step": 43246
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.097328543663025,
      "learning_rate": 8.118729451816868e-06,
      "loss": 2.535,
      "step": 43247
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0993010997772217,
      "learning_rate": 8.118325068334944e-06,
      "loss": 2.3302,
      "step": 43248
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0846503973007202,
      "learning_rate": 8.117920688042971e-06,
      "loss": 2.3396,
      "step": 43249
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0777257680892944,
      "learning_rate": 8.117516310941633e-06,
      "loss": 2.4835,
      "step": 43250
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0166161060333252,
      "learning_rate": 8.117111937031622e-06,
      "loss": 2.5214,
      "step": 43251
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1615972518920898,
      "learning_rate": 8.11670756631361e-06,
      "loss": 2.2674,
      "step": 43252
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0243803262710571,
      "learning_rate": 8.116303198788294e-06,
      "loss": 2.5032,
      "step": 43253
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.9853038787841797,
      "learning_rate": 8.115898834456354e-06,
      "loss": 2.2992,
      "step": 43254
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0756120681762695,
      "learning_rate": 8.115494473318478e-06,
      "loss": 2.4006,
      "step": 43255
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0148786306381226,
      "learning_rate": 8.11509011537535e-06,
      "loss": 2.4015,
      "step": 43256
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1402829885482788,
      "learning_rate": 8.114685760627654e-06,
      "loss": 2.561,
      "step": 43257
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1648088693618774,
      "learning_rate": 8.11428140907608e-06,
      "loss": 2.3094,
      "step": 43258
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.138152003288269,
      "learning_rate": 8.113877060721314e-06,
      "loss": 2.2713,
      "step": 43259
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.9670509099960327,
      "learning_rate": 8.113472715564036e-06,
      "loss": 2.2779,
      "step": 43260
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0602444410324097,
      "learning_rate": 8.113068373604935e-06,
      "loss": 2.4474,
      "step": 43261
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.105330228805542,
      "learning_rate": 8.112664034844693e-06,
      "loss": 2.389,
      "step": 43262
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0639110803604126,
      "learning_rate": 8.112259699283999e-06,
      "loss": 2.654,
      "step": 43263
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0508071184158325,
      "learning_rate": 8.11185536692354e-06,
      "loss": 2.3224,
      "step": 43264
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.22053062915802,
      "learning_rate": 8.111451037764e-06,
      "loss": 2.4313,
      "step": 43265
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0634806156158447,
      "learning_rate": 8.111046711806062e-06,
      "loss": 2.2935,
      "step": 43266
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.2174513339996338,
      "learning_rate": 8.11064238905041e-06,
      "loss": 2.543,
      "step": 43267
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0670634508132935,
      "learning_rate": 8.110238069497733e-06,
      "loss": 2.5198,
      "step": 43268
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1006762981414795,
      "learning_rate": 8.109833753148717e-06,
      "loss": 2.2247,
      "step": 43269
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.034444808959961,
      "learning_rate": 8.109429440004044e-06,
      "loss": 2.4577,
      "step": 43270
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0798428058624268,
      "learning_rate": 8.109025130064404e-06,
      "loss": 2.469,
      "step": 43271
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.9876356720924377,
      "learning_rate": 8.10862082333048e-06,
      "loss": 2.4908,
      "step": 43272
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0123168230056763,
      "learning_rate": 8.108216519802956e-06,
      "loss": 2.0713,
      "step": 43273
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.9483691453933716,
      "learning_rate": 8.107812219482518e-06,
      "loss": 2.467,
      "step": 43274
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1635868549346924,
      "learning_rate": 8.107407922369854e-06,
      "loss": 2.5172,
      "step": 43275
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.219416618347168,
      "learning_rate": 8.107003628465647e-06,
      "loss": 2.295,
      "step": 43276
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.9669783115386963,
      "learning_rate": 8.106599337770587e-06,
      "loss": 2.1923,
      "step": 43277
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0788875818252563,
      "learning_rate": 8.106195050285351e-06,
      "loss": 2.3651,
      "step": 43278
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.9875605702400208,
      "learning_rate": 8.10579076601063e-06,
      "loss": 2.1925,
      "step": 43279
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.094915747642517,
      "learning_rate": 8.105386484947107e-06,
      "loss": 2.3898,
      "step": 43280
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.055072546005249,
      "learning_rate": 8.104982207095468e-06,
      "loss": 2.2959,
      "step": 43281
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0713828802108765,
      "learning_rate": 8.1045779324564e-06,
      "loss": 2.395,
      "step": 43282
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.045897126197815,
      "learning_rate": 8.104173661030586e-06,
      "loss": 2.3362,
      "step": 43283
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1466141939163208,
      "learning_rate": 8.103769392818713e-06,
      "loss": 2.2306,
      "step": 43284
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0814568996429443,
      "learning_rate": 8.103365127821467e-06,
      "loss": 2.5006,
      "step": 43285
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0015699863433838,
      "learning_rate": 8.10296086603953e-06,
      "loss": 2.405,
      "step": 43286
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.046895146369934,
      "learning_rate": 8.102556607473594e-06,
      "loss": 2.2139,
      "step": 43287
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.281122088432312,
      "learning_rate": 8.102152352124338e-06,
      "loss": 2.5242,
      "step": 43288
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.095570683479309,
      "learning_rate": 8.101748099992446e-06,
      "loss": 2.0969,
      "step": 43289
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1136956214904785,
      "learning_rate": 8.101343851078611e-06,
      "loss": 2.3903,
      "step": 43290
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.9940482974052429,
      "learning_rate": 8.100939605383511e-06,
      "loss": 2.4853,
      "step": 43291
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.038668155670166,
      "learning_rate": 8.100535362907835e-06,
      "loss": 2.2108,
      "step": 43292
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1414990425109863,
      "learning_rate": 8.100131123652269e-06,
      "loss": 2.2967,
      "step": 43293
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0707696676254272,
      "learning_rate": 8.099726887617495e-06,
      "loss": 2.2668,
      "step": 43294
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.083754301071167,
      "learning_rate": 8.099322654804201e-06,
      "loss": 2.414,
      "step": 43295
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1580288410186768,
      "learning_rate": 8.09891842521307e-06,
      "loss": 2.5395,
      "step": 43296
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.9949395060539246,
      "learning_rate": 8.098514198844786e-06,
      "loss": 2.3582,
      "step": 43297
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1798467636108398,
      "learning_rate": 8.098109975700041e-06,
      "loss": 2.6077,
      "step": 43298
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.212667465209961,
      "learning_rate": 8.097705755779515e-06,
      "loss": 2.3094,
      "step": 43299
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.9550322890281677,
      "learning_rate": 8.097301539083895e-06,
      "loss": 2.208,
      "step": 43300
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.015687346458435,
      "learning_rate": 8.096897325613863e-06,
      "loss": 2.1683,
      "step": 43301
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0984187126159668,
      "learning_rate": 8.09649311537011e-06,
      "loss": 2.5857,
      "step": 43302
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1079168319702148,
      "learning_rate": 8.096088908353316e-06,
      "loss": 2.1064,
      "step": 43303
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1818221807479858,
      "learning_rate": 8.095684704564173e-06,
      "loss": 2.2587,
      "step": 43304
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.988426923751831,
      "learning_rate": 8.095280504003357e-06,
      "loss": 2.27,
      "step": 43305
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.16234290599823,
      "learning_rate": 8.09487630667156e-06,
      "loss": 2.5769,
      "step": 43306
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.053099274635315,
      "learning_rate": 8.094472112569464e-06,
      "loss": 2.4217,
      "step": 43307
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0071995258331299,
      "learning_rate": 8.094067921697755e-06,
      "loss": 2.2396,
      "step": 43308
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1752463579177856,
      "learning_rate": 8.093663734057118e-06,
      "loss": 2.4373,
      "step": 43309
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0746896266937256,
      "learning_rate": 8.093259549648238e-06,
      "loss": 2.2729,
      "step": 43310
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.2270983457565308,
      "learning_rate": 8.092855368471804e-06,
      "loss": 2.2574,
      "step": 43311
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1589583158493042,
      "learning_rate": 8.092451190528495e-06,
      "loss": 2.2863,
      "step": 43312
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1202924251556396,
      "learning_rate": 8.092047015819001e-06,
      "loss": 2.2302,
      "step": 43313
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0445556640625,
      "learning_rate": 8.091642844344006e-06,
      "loss": 2.5234,
      "step": 43314
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0814361572265625,
      "learning_rate": 8.091238676104194e-06,
      "loss": 2.3345,
      "step": 43315
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1048301458358765,
      "learning_rate": 8.090834511100251e-06,
      "loss": 2.2591,
      "step": 43316
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.9905722737312317,
      "learning_rate": 8.090430349332866e-06,
      "loss": 2.3795,
      "step": 43317
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0186505317687988,
      "learning_rate": 8.090026190802714e-06,
      "loss": 2.3152,
      "step": 43318
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5530778169631958,
      "learning_rate": 8.089622035510489e-06,
      "loss": 2.3806,
      "step": 43319
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.9763182401657104,
      "learning_rate": 8.089217883456872e-06,
      "loss": 2.1633,
      "step": 43320
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0448449850082397,
      "learning_rate": 8.088813734642553e-06,
      "loss": 2.4203,
      "step": 43321
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.2228143215179443,
      "learning_rate": 8.088409589068211e-06,
      "loss": 2.3289,
      "step": 43322
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.078332543373108,
      "learning_rate": 8.088005446734534e-06,
      "loss": 2.4691,
      "step": 43323
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0907907485961914,
      "learning_rate": 8.087601307642209e-06,
      "loss": 2.7273,
      "step": 43324
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1296244859695435,
      "learning_rate": 8.087197171791917e-06,
      "loss": 2.4225,
      "step": 43325
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.2242556810379028,
      "learning_rate": 8.086793039184345e-06,
      "loss": 2.3984,
      "step": 43326
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.99335116147995,
      "learning_rate": 8.086388909820179e-06,
      "loss": 2.3753,
      "step": 43327
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1270837783813477,
      "learning_rate": 8.085984783700104e-06,
      "loss": 2.2026,
      "step": 43328
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0836329460144043,
      "learning_rate": 8.085580660824803e-06,
      "loss": 2.3367,
      "step": 43329
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0315744876861572,
      "learning_rate": 8.085176541194969e-06,
      "loss": 2.4625,
      "step": 43330
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1551847457885742,
      "learning_rate": 8.084772424811276e-06,
      "loss": 2.1852,
      "step": 43331
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1230158805847168,
      "learning_rate": 8.084368311674414e-06,
      "loss": 2.2273,
      "step": 43332
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1234909296035767,
      "learning_rate": 8.083964201785067e-06,
      "loss": 2.376,
      "step": 43333
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0637333393096924,
      "learning_rate": 8.083560095143922e-06,
      "loss": 2.1548,
      "step": 43334
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1316957473754883,
      "learning_rate": 8.083155991751663e-06,
      "loss": 2.3174,
      "step": 43335
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.003197193145752,
      "learning_rate": 8.082751891608975e-06,
      "loss": 2.3355,
      "step": 43336
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1866930723190308,
      "learning_rate": 8.082347794716542e-06,
      "loss": 2.4477,
      "step": 43337
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.3465206623077393,
      "learning_rate": 8.081943701075053e-06,
      "loss": 2.455,
      "step": 43338
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5196263790130615,
      "learning_rate": 8.08153961068519e-06,
      "loss": 2.4836,
      "step": 43339
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1109768152236938,
      "learning_rate": 8.081135523547636e-06,
      "loss": 2.3284,
      "step": 43340
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0900092124938965,
      "learning_rate": 8.08073143966308e-06,
      "loss": 2.2295,
      "step": 43341
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.070317029953003,
      "learning_rate": 8.080327359032204e-06,
      "loss": 2.399,
      "step": 43342
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.01746666431427,
      "learning_rate": 8.079923281655702e-06,
      "loss": 2.474,
      "step": 43343
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0369101762771606,
      "learning_rate": 8.079519207534246e-06,
      "loss": 2.5266,
      "step": 43344
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1707276105880737,
      "learning_rate": 8.079115136668525e-06,
      "loss": 2.2404,
      "step": 43345
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.075928807258606,
      "learning_rate": 8.078711069059226e-06,
      "loss": 2.3143,
      "step": 43346
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1177576780319214,
      "learning_rate": 8.078307004707034e-06,
      "loss": 2.3814,
      "step": 43347
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0229469537734985,
      "learning_rate": 8.077902943612635e-06,
      "loss": 2.3961,
      "step": 43348
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.337656855583191,
      "learning_rate": 8.077498885776712e-06,
      "loss": 2.2149,
      "step": 43349
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0471479892730713,
      "learning_rate": 8.07709483119995e-06,
      "loss": 2.4382,
      "step": 43350
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.17308509349823,
      "learning_rate": 8.076690779883034e-06,
      "loss": 2.1647,
      "step": 43351
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0617214441299438,
      "learning_rate": 8.07628673182665e-06,
      "loss": 2.5627,
      "step": 43352
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.2254817485809326,
      "learning_rate": 8.075882687031482e-06,
      "loss": 2.5622,
      "step": 43353
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0458812713623047,
      "learning_rate": 8.075478645498216e-06,
      "loss": 2.2811,
      "step": 43354
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0126689672470093,
      "learning_rate": 8.075074607227536e-06,
      "loss": 2.433,
      "step": 43355
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0246659517288208,
      "learning_rate": 8.074670572220132e-06,
      "loss": 2.3953,
      "step": 43356
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1103696823120117,
      "learning_rate": 8.074266540476679e-06,
      "loss": 2.4787,
      "step": 43357
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1135798692703247,
      "learning_rate": 8.073862511997867e-06,
      "loss": 2.3367,
      "step": 43358
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0547088384628296,
      "learning_rate": 8.073458486784382e-06,
      "loss": 2.4444,
      "step": 43359
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0087205171585083,
      "learning_rate": 8.073054464836909e-06,
      "loss": 2.2314,
      "step": 43360
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.089877724647522,
      "learning_rate": 8.072650446156131e-06,
      "loss": 2.1515,
      "step": 43361
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.091736078262329,
      "learning_rate": 8.072246430742733e-06,
      "loss": 2.2886,
      "step": 43362
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0138581991195679,
      "learning_rate": 8.071842418597404e-06,
      "loss": 2.5713,
      "step": 43363
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1722805500030518,
      "learning_rate": 8.071438409720822e-06,
      "loss": 2.3535,
      "step": 43364
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.055511236190796,
      "learning_rate": 8.071034404113677e-06,
      "loss": 2.1981,
      "step": 43365
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0102547407150269,
      "learning_rate": 8.070630401776654e-06,
      "loss": 2.2589,
      "step": 43366
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1021625995635986,
      "learning_rate": 8.070226402710435e-06,
      "loss": 2.4053,
      "step": 43367
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.13862943649292,
      "learning_rate": 8.069822406915707e-06,
      "loss": 2.3108,
      "step": 43368
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.2345247268676758,
      "learning_rate": 8.069418414393155e-06,
      "loss": 2.313,
      "step": 43369
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0851044654846191,
      "learning_rate": 8.069014425143465e-06,
      "loss": 2.2899,
      "step": 43370
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.314645767211914,
      "learning_rate": 8.068610439167318e-06,
      "loss": 2.2697,
      "step": 43371
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.9730988144874573,
      "learning_rate": 8.068206456465401e-06,
      "loss": 2.2838,
      "step": 43372
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.071158528327942,
      "learning_rate": 8.067802477038396e-06,
      "loss": 2.1656,
      "step": 43373
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1524602174758911,
      "learning_rate": 8.067398500886994e-06,
      "loss": 2.6837,
      "step": 43374
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0739126205444336,
      "learning_rate": 8.066994528011875e-06,
      "loss": 2.273,
      "step": 43375
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1851805448532104,
      "learning_rate": 8.066590558413724e-06,
      "loss": 2.2624,
      "step": 43376
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0207955837249756,
      "learning_rate": 8.06618659209323e-06,
      "loss": 2.3236,
      "step": 43377
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1282280683517456,
      "learning_rate": 8.065782629051074e-06,
      "loss": 2.2952,
      "step": 43378
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1218005418777466,
      "learning_rate": 8.06537866928794e-06,
      "loss": 2.4833,
      "step": 43379
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.965126633644104,
      "learning_rate": 8.064974712804516e-06,
      "loss": 2.2583,
      "step": 43380
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0179526805877686,
      "learning_rate": 8.064570759601485e-06,
      "loss": 2.4712,
      "step": 43381
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0655871629714966,
      "learning_rate": 8.064166809679533e-06,
      "loss": 2.4184,
      "step": 43382
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1133283376693726,
      "learning_rate": 8.063762863039346e-06,
      "loss": 2.2958,
      "step": 43383
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0353155136108398,
      "learning_rate": 8.063358919681606e-06,
      "loss": 2.405,
      "step": 43384
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1200740337371826,
      "learning_rate": 8.062954979606995e-06,
      "loss": 2.4649,
      "step": 43385
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0735111236572266,
      "learning_rate": 8.062551042816203e-06,
      "loss": 2.2241,
      "step": 43386
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0207127332687378,
      "learning_rate": 8.062147109309914e-06,
      "loss": 2.4306,
      "step": 43387
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0791449546813965,
      "learning_rate": 8.06174317908881e-06,
      "loss": 2.2676,
      "step": 43388
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0652778148651123,
      "learning_rate": 8.061339252153579e-06,
      "loss": 2.2792,
      "step": 43389
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.2766019105911255,
      "learning_rate": 8.060935328504905e-06,
      "loss": 2.5049,
      "step": 43390
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.060901165008545,
      "learning_rate": 8.060531408143471e-06,
      "loss": 2.3472,
      "step": 43391
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1440845727920532,
      "learning_rate": 8.060127491069965e-06,
      "loss": 2.404,
      "step": 43392
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0929116010665894,
      "learning_rate": 8.059723577285068e-06,
      "loss": 2.2391,
      "step": 43393
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.9686184525489807,
      "learning_rate": 8.059319666789467e-06,
      "loss": 2.3758,
      "step": 43394
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1924110651016235,
      "learning_rate": 8.058915759583846e-06,
      "loss": 2.4274,
      "step": 43395
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0919981002807617,
      "learning_rate": 8.058511855668893e-06,
      "loss": 2.3418,
      "step": 43396
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.2357202768325806,
      "learning_rate": 8.058107955045288e-06,
      "loss": 2.346,
      "step": 43397
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0411441326141357,
      "learning_rate": 8.057704057713716e-06,
      "loss": 2.1695,
      "step": 43398
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0976723432540894,
      "learning_rate": 8.057300163674863e-06,
      "loss": 2.3669,
      "step": 43399
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.2540416717529297,
      "learning_rate": 8.056896272929416e-06,
      "loss": 2.339,
      "step": 43400
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0967395305633545,
      "learning_rate": 8.056492385478055e-06,
      "loss": 2.3514,
      "step": 43401
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0242016315460205,
      "learning_rate": 8.056088501321469e-06,
      "loss": 2.3156,
      "step": 43402
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0637948513031006,
      "learning_rate": 8.055684620460339e-06,
      "loss": 2.4127,
      "step": 43403
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1070220470428467,
      "learning_rate": 8.055280742895353e-06,
      "loss": 2.4317,
      "step": 43404
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1169533729553223,
      "learning_rate": 8.054876868627196e-06,
      "loss": 2.2872,
      "step": 43405
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0886650085449219,
      "learning_rate": 8.054472997656548e-06,
      "loss": 2.2531,
      "step": 43406
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.203489065170288,
      "learning_rate": 8.054069129984099e-06,
      "loss": 2.3756,
      "step": 43407
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1461172103881836,
      "learning_rate": 8.05366526561053e-06,
      "loss": 2.3671,
      "step": 43408
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1754344701766968,
      "learning_rate": 8.053261404536528e-06,
      "loss": 2.2349,
      "step": 43409
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0018349885940552,
      "learning_rate": 8.052857546762776e-06,
      "loss": 2.2504,
      "step": 43410
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0257445573806763,
      "learning_rate": 8.052453692289961e-06,
      "loss": 2.3968,
      "step": 43411
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1559685468673706,
      "learning_rate": 8.052049841118764e-06,
      "loss": 2.4833,
      "step": 43412
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.097383737564087,
      "learning_rate": 8.051645993249871e-06,
      "loss": 2.2528,
      "step": 43413
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.9735119342803955,
      "learning_rate": 8.051242148683969e-06,
      "loss": 2.4923,
      "step": 43414
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.9983835816383362,
      "learning_rate": 8.05083830742174e-06,
      "loss": 2.1818,
      "step": 43415
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.2918535470962524,
      "learning_rate": 8.050434469463869e-06,
      "loss": 2.3158,
      "step": 43416
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1614248752593994,
      "learning_rate": 8.050030634811039e-06,
      "loss": 2.5521,
      "step": 43417
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0512373447418213,
      "learning_rate": 8.049626803463938e-06,
      "loss": 2.5192,
      "step": 43418
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.9570736289024353,
      "learning_rate": 8.049222975423251e-06,
      "loss": 2.2401,
      "step": 43419
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1545675992965698,
      "learning_rate": 8.04881915068966e-06,
      "loss": 2.4927,
      "step": 43420
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0910342931747437,
      "learning_rate": 8.04841532926385e-06,
      "loss": 2.1848,
      "step": 43421
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1321431398391724,
      "learning_rate": 8.048011511146511e-06,
      "loss": 2.2291,
      "step": 43422
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0842883586883545,
      "learning_rate": 8.047607696338318e-06,
      "loss": 2.496,
      "step": 43423
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0505436658859253,
      "learning_rate": 8.047203884839961e-06,
      "loss": 2.3379,
      "step": 43424
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0935406684875488,
      "learning_rate": 8.046800076652122e-06,
      "loss": 2.4794,
      "step": 43425
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0960721969604492,
      "learning_rate": 8.04639627177549e-06,
      "loss": 2.4294,
      "step": 43426
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1641370058059692,
      "learning_rate": 8.045992470210744e-06,
      "loss": 2.4745,
      "step": 43427
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.9950312972068787,
      "learning_rate": 8.045588671958573e-06,
      "loss": 2.3417,
      "step": 43428
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1615771055221558,
      "learning_rate": 8.04518487701966e-06,
      "loss": 2.2411,
      "step": 43429
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0156476497650146,
      "learning_rate": 8.044781085394688e-06,
      "loss": 2.3626,
      "step": 43430
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0720064640045166,
      "learning_rate": 8.044377297084346e-06,
      "loss": 2.3409,
      "step": 43431
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0620062351226807,
      "learning_rate": 8.043973512089314e-06,
      "loss": 2.3022,
      "step": 43432
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.031907320022583,
      "learning_rate": 8.04356973041028e-06,
      "loss": 2.3,
      "step": 43433
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.333064317703247,
      "learning_rate": 8.043165952047925e-06,
      "loss": 2.4483,
      "step": 43434
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0805034637451172,
      "learning_rate": 8.042762177002936e-06,
      "loss": 2.3544,
      "step": 43435
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0344610214233398,
      "learning_rate": 8.042358405275996e-06,
      "loss": 2.3409,
      "step": 43436
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0096594095230103,
      "learning_rate": 8.04195463686779e-06,
      "loss": 2.1066,
      "step": 43437
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0161367654800415,
      "learning_rate": 8.041550871779002e-06,
      "loss": 2.5846,
      "step": 43438
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.035804033279419,
      "learning_rate": 8.041147110010322e-06,
      "loss": 2.2736,
      "step": 43439
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1703234910964966,
      "learning_rate": 8.040743351562424e-06,
      "loss": 2.3611,
      "step": 43440
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.041655421257019,
      "learning_rate": 8.040339596436e-06,
      "loss": 2.4528,
      "step": 43441
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0961655378341675,
      "learning_rate": 8.039935844631732e-06,
      "loss": 2.5369,
      "step": 43442
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.3529870510101318,
      "learning_rate": 8.039532096150305e-06,
      "loss": 2.4421,
      "step": 43443
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.9933128356933594,
      "learning_rate": 8.039128350992405e-06,
      "loss": 2.2225,
      "step": 43444
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0846768617630005,
      "learning_rate": 8.038724609158713e-06,
      "loss": 2.6034,
      "step": 43445
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0668655633926392,
      "learning_rate": 8.038320870649917e-06,
      "loss": 2.3074,
      "step": 43446
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0431175231933594,
      "learning_rate": 8.037917135466699e-06,
      "loss": 1.9775,
      "step": 43447
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0916197299957275,
      "learning_rate": 8.037513403609745e-06,
      "loss": 2.3148,
      "step": 43448
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0200001001358032,
      "learning_rate": 8.037109675079742e-06,
      "loss": 2.4264,
      "step": 43449
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1016148328781128,
      "learning_rate": 8.036705949877368e-06,
      "loss": 2.1384,
      "step": 43450
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.06388521194458,
      "learning_rate": 8.036302228003309e-06,
      "loss": 2.1855,
      "step": 43451
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.125978708267212,
      "learning_rate": 8.035898509458252e-06,
      "loss": 2.1769,
      "step": 43452
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.9716573357582092,
      "learning_rate": 8.035494794242882e-06,
      "loss": 2.1584,
      "step": 43453
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.9826456904411316,
      "learning_rate": 8.035091082357882e-06,
      "loss": 2.4194,
      "step": 43454
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0739527940750122,
      "learning_rate": 8.034687373803933e-06,
      "loss": 2.3552,
      "step": 43455
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0505375862121582,
      "learning_rate": 8.034283668581725e-06,
      "loss": 2.1825,
      "step": 43456
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.101271629333496,
      "learning_rate": 8.03387996669194e-06,
      "loss": 2.2585,
      "step": 43457
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0132429599761963,
      "learning_rate": 8.033476268135262e-06,
      "loss": 2.2413,
      "step": 43458
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1023356914520264,
      "learning_rate": 8.033072572912377e-06,
      "loss": 2.3215,
      "step": 43459
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.124196171760559,
      "learning_rate": 8.032668881023965e-06,
      "loss": 2.2313,
      "step": 43460
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0901389122009277,
      "learning_rate": 8.03226519247072e-06,
      "loss": 2.2028,
      "step": 43461
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.223392128944397,
      "learning_rate": 8.031861507253316e-06,
      "loss": 2.1063,
      "step": 43462
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1394716501235962,
      "learning_rate": 8.031457825372441e-06,
      "loss": 2.2844,
      "step": 43463
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.119225025177002,
      "learning_rate": 8.03105414682878e-06,
      "loss": 2.3138,
      "step": 43464
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.9910492897033691,
      "learning_rate": 8.030650471623016e-06,
      "loss": 2.2325,
      "step": 43465
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.9983219504356384,
      "learning_rate": 8.030246799755836e-06,
      "loss": 2.3035,
      "step": 43466
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.9622082114219666,
      "learning_rate": 8.029843131227923e-06,
      "loss": 2.4266,
      "step": 43467
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.054201364517212,
      "learning_rate": 8.029439466039958e-06,
      "loss": 2.1619,
      "step": 43468
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0704538822174072,
      "learning_rate": 8.029035804192631e-06,
      "loss": 2.2455,
      "step": 43469
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.000869631767273,
      "learning_rate": 8.028632145686622e-06,
      "loss": 2.4705,
      "step": 43470
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0931419134140015,
      "learning_rate": 8.028228490522618e-06,
      "loss": 2.1532,
      "step": 43471
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0465149879455566,
      "learning_rate": 8.027824838701301e-06,
      "loss": 2.4212,
      "step": 43472
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.211703896522522,
      "learning_rate": 8.027421190223357e-06,
      "loss": 2.3028,
      "step": 43473
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0837243795394897,
      "learning_rate": 8.02701754508947e-06,
      "loss": 2.4862,
      "step": 43474
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1416701078414917,
      "learning_rate": 8.026613903300329e-06,
      "loss": 2.2378,
      "step": 43475
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1390143632888794,
      "learning_rate": 8.026210264856608e-06,
      "loss": 2.5214,
      "step": 43476
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1620529890060425,
      "learning_rate": 8.025806629758996e-06,
      "loss": 2.3655,
      "step": 43477
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.9806882739067078,
      "learning_rate": 8.025402998008181e-06,
      "loss": 2.3649,
      "step": 43478
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.060563087463379,
      "learning_rate": 8.024999369604842e-06,
      "loss": 2.3272,
      "step": 43479
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0719667673110962,
      "learning_rate": 8.024595744549664e-06,
      "loss": 2.1421,
      "step": 43480
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.9840867519378662,
      "learning_rate": 8.024192122843335e-06,
      "loss": 2.4007,
      "step": 43481
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.076915979385376,
      "learning_rate": 8.023788504486537e-06,
      "loss": 2.164,
      "step": 43482
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0040483474731445,
      "learning_rate": 8.023384889479953e-06,
      "loss": 2.1988,
      "step": 43483
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.2923908233642578,
      "learning_rate": 8.02298127782427e-06,
      "loss": 2.5013,
      "step": 43484
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0459851026535034,
      "learning_rate": 8.02257766952017e-06,
      "loss": 2.1615,
      "step": 43485
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0754611492156982,
      "learning_rate": 8.022174064568337e-06,
      "loss": 2.3997,
      "step": 43486
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1088298559188843,
      "learning_rate": 8.021770462969457e-06,
      "loss": 2.2299,
      "step": 43487
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.308480143547058,
      "learning_rate": 8.021366864724217e-06,
      "loss": 2.1808,
      "step": 43488
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.9782236218452454,
      "learning_rate": 8.020963269833295e-06,
      "loss": 2.3805,
      "step": 43489
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0878926515579224,
      "learning_rate": 8.020559678297374e-06,
      "loss": 2.2661,
      "step": 43490
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0345669984817505,
      "learning_rate": 8.020156090117146e-06,
      "loss": 2.1534,
      "step": 43491
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1200168132781982,
      "learning_rate": 8.019752505293291e-06,
      "loss": 2.2841,
      "step": 43492
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.003403663635254,
      "learning_rate": 8.019348923826492e-06,
      "loss": 2.3208,
      "step": 43493
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0440810918807983,
      "learning_rate": 8.018945345717433e-06,
      "loss": 2.4399,
      "step": 43494
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1064331531524658,
      "learning_rate": 8.018541770966803e-06,
      "loss": 2.3601,
      "step": 43495
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0711357593536377,
      "learning_rate": 8.01813819957528e-06,
      "loss": 2.1802,
      "step": 43496
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1245239973068237,
      "learning_rate": 8.017734631543551e-06,
      "loss": 2.5645,
      "step": 43497
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0462281703948975,
      "learning_rate": 8.017331066872303e-06,
      "loss": 2.2539,
      "step": 43498
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.2064892053604126,
      "learning_rate": 8.016927505562216e-06,
      "loss": 2.4576,
      "step": 43499
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0070791244506836,
      "learning_rate": 8.016523947613974e-06,
      "loss": 2.1293,
      "step": 43500
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0707476139068604,
      "learning_rate": 8.016120393028269e-06,
      "loss": 2.5976,
      "step": 43501
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0996638536453247,
      "learning_rate": 8.015716841805774e-06,
      "loss": 2.3957,
      "step": 43502
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1380224227905273,
      "learning_rate": 8.015313293947175e-06,
      "loss": 2.3754,
      "step": 43503
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.110877275466919,
      "learning_rate": 8.01490974945316e-06,
      "loss": 2.3592,
      "step": 43504
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.9932169318199158,
      "learning_rate": 8.014506208324415e-06,
      "loss": 2.2545,
      "step": 43505
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1326277256011963,
      "learning_rate": 8.014102670561619e-06,
      "loss": 2.4452,
      "step": 43506
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1148287057876587,
      "learning_rate": 8.01369913616546e-06,
      "loss": 2.1795,
      "step": 43507
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1524864435195923,
      "learning_rate": 8.013295605136619e-06,
      "loss": 2.3812,
      "step": 43508
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1017001867294312,
      "learning_rate": 8.012892077475782e-06,
      "loss": 2.3557,
      "step": 43509
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.129364252090454,
      "learning_rate": 8.012488553183632e-06,
      "loss": 2.5623,
      "step": 43510
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0147634744644165,
      "learning_rate": 8.012085032260854e-06,
      "loss": 2.3704,
      "step": 43511
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0692126750946045,
      "learning_rate": 8.011681514708133e-06,
      "loss": 2.4618,
      "step": 43512
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1293275356292725,
      "learning_rate": 8.01127800052615e-06,
      "loss": 2.3632,
      "step": 43513
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.5384860038757324,
      "learning_rate": 8.010874489715592e-06,
      "loss": 2.3527,
      "step": 43514
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.156304955482483,
      "learning_rate": 8.010470982277145e-06,
      "loss": 2.3137,
      "step": 43515
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.086037278175354,
      "learning_rate": 8.010067478211488e-06,
      "loss": 2.3032,
      "step": 43516
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1518142223358154,
      "learning_rate": 8.009663977519306e-06,
      "loss": 2.2825,
      "step": 43517
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.987284243106842,
      "learning_rate": 8.009260480201282e-06,
      "loss": 2.2723,
      "step": 43518
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0557087659835815,
      "learning_rate": 8.008856986258106e-06,
      "loss": 2.2601,
      "step": 43519
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0003082752227783,
      "learning_rate": 8.008453495690456e-06,
      "loss": 2.3062,
      "step": 43520
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1111253499984741,
      "learning_rate": 8.008050008499019e-06,
      "loss": 2.5907,
      "step": 43521
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.028226613998413,
      "learning_rate": 8.007646524684476e-06,
      "loss": 2.3964,
      "step": 43522
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0734325647354126,
      "learning_rate": 8.007243044247518e-06,
      "loss": 2.3113,
      "step": 43523
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1864240169525146,
      "learning_rate": 8.006839567188822e-06,
      "loss": 2.5498,
      "step": 43524
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1253876686096191,
      "learning_rate": 8.006436093509073e-06,
      "loss": 2.3679,
      "step": 43525
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1993014812469482,
      "learning_rate": 8.006032623208957e-06,
      "loss": 2.2295,
      "step": 43526
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.319472074508667,
      "learning_rate": 8.005629156289157e-06,
      "loss": 2.2327,
      "step": 43527
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.186688780784607,
      "learning_rate": 8.005225692750359e-06,
      "loss": 2.4009,
      "step": 43528
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.025908350944519,
      "learning_rate": 8.004822232593245e-06,
      "loss": 2.4062,
      "step": 43529
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.077324628829956,
      "learning_rate": 8.004418775818498e-06,
      "loss": 2.7511,
      "step": 43530
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1164519786834717,
      "learning_rate": 8.004015322426804e-06,
      "loss": 2.2384,
      "step": 43531
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.056941270828247,
      "learning_rate": 8.003611872418846e-06,
      "loss": 2.2703,
      "step": 43532
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0476235151290894,
      "learning_rate": 8.003208425795306e-06,
      "loss": 2.2413,
      "step": 43533
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1268864870071411,
      "learning_rate": 8.00280498255687e-06,
      "loss": 2.3419,
      "step": 43534
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0438536405563354,
      "learning_rate": 8.002401542704224e-06,
      "loss": 2.3428,
      "step": 43535
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.9893512725830078,
      "learning_rate": 8.001998106238049e-06,
      "loss": 2.1834,
      "step": 43536
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0228155851364136,
      "learning_rate": 8.00159467315903e-06,
      "loss": 2.3441,
      "step": 43537
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.2330056428909302,
      "learning_rate": 8.001191243467851e-06,
      "loss": 2.5083,
      "step": 43538
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.0998053550720215,
      "learning_rate": 8.000787817165196e-06,
      "loss": 2.2468,
      "step": 43539
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1145859956741333,
      "learning_rate": 8.000384394251748e-06,
      "loss": 2.0111,
      "step": 43540
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0108418464660645,
      "learning_rate": 7.999980974728195e-06,
      "loss": 2.3487,
      "step": 43541
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1404404640197754,
      "learning_rate": 7.999577558595214e-06,
      "loss": 2.2988,
      "step": 43542
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0816510915756226,
      "learning_rate": 7.999174145853492e-06,
      "loss": 2.4338,
      "step": 43543
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.2361751794815063,
      "learning_rate": 7.998770736503715e-06,
      "loss": 2.4865,
      "step": 43544
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.298859715461731,
      "learning_rate": 7.998367330546563e-06,
      "loss": 2.4602,
      "step": 43545
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0575767755508423,
      "learning_rate": 7.997963927982723e-06,
      "loss": 2.378,
      "step": 43546
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0097931623458862,
      "learning_rate": 7.997560528812877e-06,
      "loss": 2.278,
      "step": 43547
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0322458744049072,
      "learning_rate": 7.997157133037711e-06,
      "loss": 2.3415,
      "step": 43548
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1199544668197632,
      "learning_rate": 7.996753740657907e-06,
      "loss": 2.2595,
      "step": 43549
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.127285361289978,
      "learning_rate": 7.99635035167415e-06,
      "loss": 2.1768,
      "step": 43550
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0151253938674927,
      "learning_rate": 7.995946966087124e-06,
      "loss": 2.3468,
      "step": 43551
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0433553457260132,
      "learning_rate": 7.995543583897511e-06,
      "loss": 2.3104,
      "step": 43552
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1022586822509766,
      "learning_rate": 7.995140205105996e-06,
      "loss": 2.6652,
      "step": 43553
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0547807216644287,
      "learning_rate": 7.994736829713265e-06,
      "loss": 2.2636,
      "step": 43554
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0725750923156738,
      "learning_rate": 7.994333457719999e-06,
      "loss": 2.4008,
      "step": 43555
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0323116779327393,
      "learning_rate": 7.99393008912688e-06,
      "loss": 2.0628,
      "step": 43556
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0983163118362427,
      "learning_rate": 7.993526723934598e-06,
      "loss": 2.277,
      "step": 43557
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1057027578353882,
      "learning_rate": 7.993123362143832e-06,
      "loss": 2.4417,
      "step": 43558
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0653480291366577,
      "learning_rate": 7.992720003755265e-06,
      "loss": 2.6253,
      "step": 43559
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.2540241479873657,
      "learning_rate": 7.992316648769582e-06,
      "loss": 2.2422,
      "step": 43560
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0368692874908447,
      "learning_rate": 7.99191329718747e-06,
      "loss": 2.4104,
      "step": 43561
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.9938257932662964,
      "learning_rate": 7.99150994900961e-06,
      "loss": 2.116,
      "step": 43562
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.9777246713638306,
      "learning_rate": 7.991106604236684e-06,
      "loss": 2.4678,
      "step": 43563
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0246073007583618,
      "learning_rate": 7.99070326286938e-06,
      "loss": 2.4586,
      "step": 43564
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.255116581916809,
      "learning_rate": 7.99029992490838e-06,
      "loss": 2.308,
      "step": 43565
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0586109161376953,
      "learning_rate": 7.989896590354366e-06,
      "loss": 2.4338,
      "step": 43566
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0392903089523315,
      "learning_rate": 7.989493259208028e-06,
      "loss": 2.5749,
      "step": 43567
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1709425449371338,
      "learning_rate": 7.98908993147004e-06,
      "loss": 2.606,
      "step": 43568
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0410205125808716,
      "learning_rate": 7.988686607141092e-06,
      "loss": 2.3635,
      "step": 43569
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.051330327987671,
      "learning_rate": 7.988283286221865e-06,
      "loss": 2.2071,
      "step": 43570
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.199204921722412,
      "learning_rate": 7.987879968713046e-06,
      "loss": 2.3952,
      "step": 43571
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0808357000350952,
      "learning_rate": 7.987476654615316e-06,
      "loss": 2.4273,
      "step": 43572
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.2162690162658691,
      "learning_rate": 7.98707334392936e-06,
      "loss": 2.2067,
      "step": 43573
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.095205545425415,
      "learning_rate": 7.98667003665586e-06,
      "loss": 2.2925,
      "step": 43574
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1013665199279785,
      "learning_rate": 7.986266732795503e-06,
      "loss": 2.286,
      "step": 43575
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0477755069732666,
      "learning_rate": 7.985863432348969e-06,
      "loss": 2.3453,
      "step": 43576
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.2719531059265137,
      "learning_rate": 7.985460135316944e-06,
      "loss": 2.3655,
      "step": 43577
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.06413996219635,
      "learning_rate": 7.985056841700111e-06,
      "loss": 2.3991,
      "step": 43578
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.085838794708252,
      "learning_rate": 7.984653551499157e-06,
      "loss": 2.3008,
      "step": 43579
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0555869340896606,
      "learning_rate": 7.984250264714763e-06,
      "loss": 2.2636,
      "step": 43580
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0583440065383911,
      "learning_rate": 7.98384698134761e-06,
      "loss": 2.2747,
      "step": 43581
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1339242458343506,
      "learning_rate": 7.983443701398383e-06,
      "loss": 2.4005,
      "step": 43582
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0463073253631592,
      "learning_rate": 7.983040424867768e-06,
      "loss": 2.2304,
      "step": 43583
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1621562242507935,
      "learning_rate": 7.982637151756446e-06,
      "loss": 2.4953,
      "step": 43584
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0087693929672241,
      "learning_rate": 7.982233882065105e-06,
      "loss": 2.5416,
      "step": 43585
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1553704738616943,
      "learning_rate": 7.981830615794422e-06,
      "loss": 2.4255,
      "step": 43586
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1548089981079102,
      "learning_rate": 7.981427352945085e-06,
      "loss": 2.1345,
      "step": 43587
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1998628377914429,
      "learning_rate": 7.981024093517778e-06,
      "loss": 2.1537,
      "step": 43588
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0127428770065308,
      "learning_rate": 7.98062083751318e-06,
      "loss": 2.2488,
      "step": 43589
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.9704937934875488,
      "learning_rate": 7.980217584931982e-06,
      "loss": 2.2036,
      "step": 43590
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0326364040374756,
      "learning_rate": 7.979814335774863e-06,
      "loss": 2.1695,
      "step": 43591
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0429104566574097,
      "learning_rate": 7.979411090042506e-06,
      "loss": 2.348,
      "step": 43592
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1923104524612427,
      "learning_rate": 7.979007847735595e-06,
      "loss": 2.3613,
      "step": 43593
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1187121868133545,
      "learning_rate": 7.978604608854822e-06,
      "loss": 2.2147,
      "step": 43594
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1775367259979248,
      "learning_rate": 7.978201373400858e-06,
      "loss": 2.4942,
      "step": 43595
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1584120988845825,
      "learning_rate": 7.977798141374391e-06,
      "loss": 2.3669,
      "step": 43596
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.094006896018982,
      "learning_rate": 7.977394912776105e-06,
      "loss": 2.3665,
      "step": 43597
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1060070991516113,
      "learning_rate": 7.976991687606685e-06,
      "loss": 2.382,
      "step": 43598
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0617069005966187,
      "learning_rate": 7.976588465866813e-06,
      "loss": 2.3544,
      "step": 43599
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0618518590927124,
      "learning_rate": 7.976185247557174e-06,
      "loss": 2.3093,
      "step": 43600
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.192957878112793,
      "learning_rate": 7.97578203267845e-06,
      "loss": 2.3096,
      "step": 43601
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1409833431243896,
      "learning_rate": 7.975378821231324e-06,
      "loss": 2.2985,
      "step": 43602
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.201014757156372,
      "learning_rate": 7.974975613216482e-06,
      "loss": 2.1643,
      "step": 43603
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0545133352279663,
      "learning_rate": 7.974572408634606e-06,
      "loss": 2.2599,
      "step": 43604
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.064961314201355,
      "learning_rate": 7.974169207486381e-06,
      "loss": 2.4384,
      "step": 43605
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.095073938369751,
      "learning_rate": 7.97376600977249e-06,
      "loss": 2.2728,
      "step": 43606
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.9996705651283264,
      "learning_rate": 7.973362815493618e-06,
      "loss": 2.5142,
      "step": 43607
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1396137475967407,
      "learning_rate": 7.972959624650441e-06,
      "loss": 2.2497,
      "step": 43608
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0194035768508911,
      "learning_rate": 7.97255643724365e-06,
      "loss": 2.269,
      "step": 43609
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0754492282867432,
      "learning_rate": 7.972153253273926e-06,
      "loss": 2.5482,
      "step": 43610
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1288374662399292,
      "learning_rate": 7.971750072741953e-06,
      "loss": 2.2968,
      "step": 43611
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.9147363305091858,
      "learning_rate": 7.971346895648415e-06,
      "loss": 2.1715,
      "step": 43612
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.972630500793457,
      "learning_rate": 7.970943721993997e-06,
      "loss": 2.2057,
      "step": 43613
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0039136409759521,
      "learning_rate": 7.970540551779378e-06,
      "loss": 2.3248,
      "step": 43614
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1042592525482178,
      "learning_rate": 7.970137385005246e-06,
      "loss": 2.3913,
      "step": 43615
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.9228610396385193,
      "learning_rate": 7.96973422167228e-06,
      "loss": 2.2019,
      "step": 43616
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1563520431518555,
      "learning_rate": 7.969331061781167e-06,
      "loss": 2.2563,
      "step": 43617
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1137118339538574,
      "learning_rate": 7.968927905332588e-06,
      "loss": 2.1854,
      "step": 43618
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0207114219665527,
      "learning_rate": 7.96852475232723e-06,
      "loss": 2.1937,
      "step": 43619
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1515028476715088,
      "learning_rate": 7.968121602765777e-06,
      "loss": 2.4219,
      "step": 43620
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0534595251083374,
      "learning_rate": 7.967718456648905e-06,
      "loss": 2.3798,
      "step": 43621
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1405309438705444,
      "learning_rate": 7.967315313977304e-06,
      "loss": 2.3146,
      "step": 43622
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1018106937408447,
      "learning_rate": 7.966912174751655e-06,
      "loss": 2.3378,
      "step": 43623
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1986682415008545,
      "learning_rate": 7.96650903897264e-06,
      "loss": 2.2493,
      "step": 43624
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.103121280670166,
      "learning_rate": 7.966105906640946e-06,
      "loss": 2.4257,
      "step": 43625
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.176047682762146,
      "learning_rate": 7.965702777757256e-06,
      "loss": 2.3587,
      "step": 43626
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0828534364700317,
      "learning_rate": 7.965299652322252e-06,
      "loss": 2.354,
      "step": 43627
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1347404718399048,
      "learning_rate": 7.964896530336616e-06,
      "loss": 2.1739,
      "step": 43628
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1128429174423218,
      "learning_rate": 7.964493411801034e-06,
      "loss": 2.2257,
      "step": 43629
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.224155068397522,
      "learning_rate": 7.964090296716188e-06,
      "loss": 2.4373,
      "step": 43630
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1265310049057007,
      "learning_rate": 7.963687185082763e-06,
      "loss": 2.3225,
      "step": 43631
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1697009801864624,
      "learning_rate": 7.96328407690144e-06,
      "loss": 2.4192,
      "step": 43632
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0088694095611572,
      "learning_rate": 7.962880972172909e-06,
      "loss": 2.1667,
      "step": 43633
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0454540252685547,
      "learning_rate": 7.962477870897844e-06,
      "loss": 2.3169,
      "step": 43634
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1105098724365234,
      "learning_rate": 7.962074773076932e-06,
      "loss": 2.4061,
      "step": 43635
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0927985906600952,
      "learning_rate": 7.961671678710856e-06,
      "loss": 2.4882,
      "step": 43636
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1177787780761719,
      "learning_rate": 7.961268587800301e-06,
      "loss": 2.2273,
      "step": 43637
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.075792670249939,
      "learning_rate": 7.96086550034595e-06,
      "loss": 2.4448,
      "step": 43638
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0662637948989868,
      "learning_rate": 7.960462416348485e-06,
      "loss": 2.3787,
      "step": 43639
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0448976755142212,
      "learning_rate": 7.96005933580859e-06,
      "loss": 2.411,
      "step": 43640
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0602980852127075,
      "learning_rate": 7.95965625872695e-06,
      "loss": 2.3342,
      "step": 43641
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.056406855583191,
      "learning_rate": 7.959253185104246e-06,
      "loss": 2.409,
      "step": 43642
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.2382506132125854,
      "learning_rate": 7.95885011494116e-06,
      "loss": 2.1852,
      "step": 43643
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0993049144744873,
      "learning_rate": 7.95844704823838e-06,
      "loss": 2.3564,
      "step": 43644
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0631272792816162,
      "learning_rate": 7.958043984996584e-06,
      "loss": 2.2226,
      "step": 43645
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0456677675247192,
      "learning_rate": 7.957640925216463e-06,
      "loss": 2.3851,
      "step": 43646
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.9882659316062927,
      "learning_rate": 7.957237868898695e-06,
      "loss": 2.2163,
      "step": 43647
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0709898471832275,
      "learning_rate": 7.95683481604396e-06,
      "loss": 2.3959,
      "step": 43648
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.104368805885315,
      "learning_rate": 7.956431766652946e-06,
      "loss": 2.4293,
      "step": 43649
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0598597526550293,
      "learning_rate": 7.956028720726335e-06,
      "loss": 2.3528,
      "step": 43650
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.138808012008667,
      "learning_rate": 7.95562567826481e-06,
      "loss": 2.3386,
      "step": 43651
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.047540545463562,
      "learning_rate": 7.955222639269053e-06,
      "loss": 2.4356,
      "step": 43652
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0900909900665283,
      "learning_rate": 7.954819603739753e-06,
      "loss": 2.4025,
      "step": 43653
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.093422770500183,
      "learning_rate": 7.954416571677586e-06,
      "loss": 2.1708,
      "step": 43654
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1116358041763306,
      "learning_rate": 7.954013543083239e-06,
      "loss": 2.3734,
      "step": 43655
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.136933445930481,
      "learning_rate": 7.953610517957395e-06,
      "loss": 2.0523,
      "step": 43656
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1033605337142944,
      "learning_rate": 7.953207496300737e-06,
      "loss": 2.3125,
      "step": 43657
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.171087384223938,
      "learning_rate": 7.95280447811395e-06,
      "loss": 2.2221,
      "step": 43658
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0842913389205933,
      "learning_rate": 7.952401463397714e-06,
      "loss": 2.2486,
      "step": 43659
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.9873108863830566,
      "learning_rate": 7.951998452152717e-06,
      "loss": 2.4785,
      "step": 43660
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.032080054283142,
      "learning_rate": 7.951595444379635e-06,
      "loss": 2.5068,
      "step": 43661
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.9997991323471069,
      "learning_rate": 7.951192440079157e-06,
      "loss": 2.1625,
      "step": 43662
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0463799238204956,
      "learning_rate": 7.95078943925196e-06,
      "loss": 2.1031,
      "step": 43663
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1634927988052368,
      "learning_rate": 7.950386441898735e-06,
      "loss": 2.122,
      "step": 43664
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1416981220245361,
      "learning_rate": 7.949983448020161e-06,
      "loss": 2.3291,
      "step": 43665
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1459969282150269,
      "learning_rate": 7.949580457616922e-06,
      "loss": 2.3519,
      "step": 43666
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.101899266242981,
      "learning_rate": 7.9491774706897e-06,
      "loss": 2.0564,
      "step": 43667
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0726927518844604,
      "learning_rate": 7.94877448723918e-06,
      "loss": 2.3167,
      "step": 43668
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1550965309143066,
      "learning_rate": 7.948371507266046e-06,
      "loss": 2.3725,
      "step": 43669
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.2651389837265015,
      "learning_rate": 7.947968530770979e-06,
      "loss": 2.4936,
      "step": 43670
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1874895095825195,
      "learning_rate": 7.947565557754661e-06,
      "loss": 2.3413,
      "step": 43671
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1964002847671509,
      "learning_rate": 7.947162588217777e-06,
      "loss": 2.2657,
      "step": 43672
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.2173906564712524,
      "learning_rate": 7.946759622161012e-06,
      "loss": 2.2152,
      "step": 43673
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1002721786499023,
      "learning_rate": 7.946356659585044e-06,
      "loss": 2.0377,
      "step": 43674
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.2632335424423218,
      "learning_rate": 7.945953700490563e-06,
      "loss": 2.375,
      "step": 43675
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.110859990119934,
      "learning_rate": 7.945550744878247e-06,
      "loss": 2.2972,
      "step": 43676
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.9891233444213867,
      "learning_rate": 7.945147792748779e-06,
      "loss": 2.418,
      "step": 43677
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1934735774993896,
      "learning_rate": 7.944744844102844e-06,
      "loss": 2.3102,
      "step": 43678
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0408179759979248,
      "learning_rate": 7.944341898941124e-06,
      "loss": 2.5153,
      "step": 43679
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1039413213729858,
      "learning_rate": 7.943938957264303e-06,
      "loss": 2.3298,
      "step": 43680
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1182829141616821,
      "learning_rate": 7.943536019073066e-06,
      "loss": 2.4148,
      "step": 43681
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1453989744186401,
      "learning_rate": 7.94313308436809e-06,
      "loss": 2.2746,
      "step": 43682
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.2108211517333984,
      "learning_rate": 7.942730153150065e-06,
      "loss": 2.229,
      "step": 43683
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.2376738786697388,
      "learning_rate": 7.942327225419671e-06,
      "loss": 2.3908,
      "step": 43684
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1832488775253296,
      "learning_rate": 7.94192430117759e-06,
      "loss": 2.3478,
      "step": 43685
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0274029970169067,
      "learning_rate": 7.94152138042451e-06,
      "loss": 2.25,
      "step": 43686
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0357896089553833,
      "learning_rate": 7.941118463161109e-06,
      "loss": 2.2219,
      "step": 43687
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1636962890625,
      "learning_rate": 7.94071554938807e-06,
      "loss": 2.3947,
      "step": 43688
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1772353649139404,
      "learning_rate": 7.940312639106076e-06,
      "loss": 2.4447,
      "step": 43689
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0459054708480835,
      "learning_rate": 7.939909732315812e-06,
      "loss": 2.1312,
      "step": 43690
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1509281396865845,
      "learning_rate": 7.939506829017961e-06,
      "loss": 2.2794,
      "step": 43691
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0946201086044312,
      "learning_rate": 7.939103929213205e-06,
      "loss": 2.2543,
      "step": 43692
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.131395936012268,
      "learning_rate": 7.93870103290223e-06,
      "loss": 2.3426,
      "step": 43693
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.13798189163208,
      "learning_rate": 7.938298140085713e-06,
      "loss": 2.3714,
      "step": 43694
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.2242439985275269,
      "learning_rate": 7.937895250764343e-06,
      "loss": 2.2574,
      "step": 43695
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.2158637046813965,
      "learning_rate": 7.937492364938798e-06,
      "loss": 2.1918,
      "step": 43696
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0082523822784424,
      "learning_rate": 7.93708948260977e-06,
      "loss": 2.2074,
      "step": 43697
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1312614679336548,
      "learning_rate": 7.93668660377793e-06,
      "loss": 2.4047,
      "step": 43698
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.136445164680481,
      "learning_rate": 7.936283728443971e-06,
      "loss": 2.1648,
      "step": 43699
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1102356910705566,
      "learning_rate": 7.935880856608568e-06,
      "loss": 2.5606,
      "step": 43700
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0822263956069946,
      "learning_rate": 7.935477988272407e-06,
      "loss": 2.2693,
      "step": 43701
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.102532982826233,
      "learning_rate": 7.935075123436173e-06,
      "loss": 2.3908,
      "step": 43702
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1251696348190308,
      "learning_rate": 7.93467226210055e-06,
      "loss": 2.4521,
      "step": 43703
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0715229511260986,
      "learning_rate": 7.934269404266215e-06,
      "loss": 2.2859,
      "step": 43704
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.2410929203033447,
      "learning_rate": 7.933866549933855e-06,
      "loss": 2.3368,
      "step": 43705
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.6946226358413696,
      "learning_rate": 7.933463699104152e-06,
      "loss": 2.3763,
      "step": 43706
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0711590051651,
      "learning_rate": 7.933060851777788e-06,
      "loss": 2.3495,
      "step": 43707
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.049714207649231,
      "learning_rate": 7.93265800795545e-06,
      "loss": 2.4462,
      "step": 43708
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.9950138926506042,
      "learning_rate": 7.932255167637816e-06,
      "loss": 2.2635,
      "step": 43709
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0809246301651,
      "learning_rate": 7.931852330825572e-06,
      "loss": 2.2352,
      "step": 43710
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0768612623214722,
      "learning_rate": 7.9314494975194e-06,
      "loss": 2.288,
      "step": 43711
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1018118858337402,
      "learning_rate": 7.931046667719987e-06,
      "loss": 2.3345,
      "step": 43712
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0871338844299316,
      "learning_rate": 7.930643841428007e-06,
      "loss": 2.0325,
      "step": 43713
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.145134449005127,
      "learning_rate": 7.930241018644148e-06,
      "loss": 2.291,
      "step": 43714
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0538628101348877,
      "learning_rate": 7.929838199369093e-06,
      "loss": 2.2051,
      "step": 43715
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0768831968307495,
      "learning_rate": 7.929435383603525e-06,
      "loss": 2.3779,
      "step": 43716
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.9956062436103821,
      "learning_rate": 7.929032571348125e-06,
      "loss": 2.2996,
      "step": 43717
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.2029500007629395,
      "learning_rate": 7.92862976260358e-06,
      "loss": 2.2966,
      "step": 43718
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1530519723892212,
      "learning_rate": 7.928226957370567e-06,
      "loss": 2.3239,
      "step": 43719
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.9746993780136108,
      "learning_rate": 7.927824155649773e-06,
      "loss": 2.2208,
      "step": 43720
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0215628147125244,
      "learning_rate": 7.92742135744188e-06,
      "loss": 2.2301,
      "step": 43721
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0216059684753418,
      "learning_rate": 7.92701856274757e-06,
      "loss": 2.4079,
      "step": 43722
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1209442615509033,
      "learning_rate": 7.926615771567528e-06,
      "loss": 2.4172,
      "step": 43723
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1058710813522339,
      "learning_rate": 7.926212983902436e-06,
      "loss": 2.3185,
      "step": 43724
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.9977510571479797,
      "learning_rate": 7.925810199752978e-06,
      "loss": 2.3491,
      "step": 43725
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1607309579849243,
      "learning_rate": 7.925407419119833e-06,
      "loss": 2.3475,
      "step": 43726
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0851956605911255,
      "learning_rate": 7.925004642003685e-06,
      "loss": 2.3723,
      "step": 43727
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0032657384872437,
      "learning_rate": 7.924601868405216e-06,
      "loss": 2.3025,
      "step": 43728
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1245131492614746,
      "learning_rate": 7.924199098325113e-06,
      "loss": 2.2908,
      "step": 43729
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.127047061920166,
      "learning_rate": 7.923796331764055e-06,
      "loss": 2.4604,
      "step": 43730
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.173463225364685,
      "learning_rate": 7.923393568722729e-06,
      "loss": 2.2167,
      "step": 43731
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0676041841506958,
      "learning_rate": 7.922990809201811e-06,
      "loss": 2.1696,
      "step": 43732
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0686091184616089,
      "learning_rate": 7.92258805320199e-06,
      "loss": 2.361,
      "step": 43733
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0473712682724,
      "learning_rate": 7.922185300723946e-06,
      "loss": 2.4236,
      "step": 43734
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1262818574905396,
      "learning_rate": 7.921782551768361e-06,
      "loss": 2.3986,
      "step": 43735
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1067808866500854,
      "learning_rate": 7.92137980633592e-06,
      "loss": 2.1614,
      "step": 43736
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.9731394052505493,
      "learning_rate": 7.920977064427304e-06,
      "loss": 2.1409,
      "step": 43737
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.179660439491272,
      "learning_rate": 7.920574326043197e-06,
      "loss": 2.3547,
      "step": 43738
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0600218772888184,
      "learning_rate": 7.920171591184286e-06,
      "loss": 2.1878,
      "step": 43739
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.2499935626983643,
      "learning_rate": 7.919768859851245e-06,
      "loss": 2.2991,
      "step": 43740
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.164119839668274,
      "learning_rate": 7.919366132044758e-06,
      "loss": 2.4446,
      "step": 43741
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1733072996139526,
      "learning_rate": 7.918963407765513e-06,
      "loss": 2.3968,
      "step": 43742
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.060012698173523,
      "learning_rate": 7.918560687014191e-06,
      "loss": 2.4402,
      "step": 43743
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0040264129638672,
      "learning_rate": 7.918157969791473e-06,
      "loss": 2.1798,
      "step": 43744
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0374170541763306,
      "learning_rate": 7.917755256098043e-06,
      "loss": 2.3191,
      "step": 43745
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1024041175842285,
      "learning_rate": 7.917352545934583e-06,
      "loss": 2.2953,
      "step": 43746
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1084593534469604,
      "learning_rate": 7.916949839301775e-06,
      "loss": 2.0829,
      "step": 43747
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.024004340171814,
      "learning_rate": 7.916547136200304e-06,
      "loss": 2.5863,
      "step": 43748
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1987484693527222,
      "learning_rate": 7.91614443663085e-06,
      "loss": 2.3497,
      "step": 43749
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0337504148483276,
      "learning_rate": 7.9157417405941e-06,
      "loss": 2.2184,
      "step": 43750
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.095928430557251,
      "learning_rate": 7.915339048090731e-06,
      "loss": 2.4036,
      "step": 43751
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.9599211812019348,
      "learning_rate": 7.914936359121434e-06,
      "loss": 2.2646,
      "step": 43752
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1616021394729614,
      "learning_rate": 7.914533673686881e-06,
      "loss": 2.2647,
      "step": 43753
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0317226648330688,
      "learning_rate": 7.914130991787762e-06,
      "loss": 2.2739,
      "step": 43754
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1321697235107422,
      "learning_rate": 7.913728313424756e-06,
      "loss": 2.3275,
      "step": 43755
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.207557201385498,
      "learning_rate": 7.913325638598547e-06,
      "loss": 2.4411,
      "step": 43756
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0447051525115967,
      "learning_rate": 7.912922967309817e-06,
      "loss": 2.3122,
      "step": 43757
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.027881383895874,
      "learning_rate": 7.91252029955925e-06,
      "loss": 2.3466,
      "step": 43758
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.104236364364624,
      "learning_rate": 7.912117635347531e-06,
      "loss": 2.5164,
      "step": 43759
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.107304334640503,
      "learning_rate": 7.911714974675338e-06,
      "loss": 2.2418,
      "step": 43760
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1377092599868774,
      "learning_rate": 7.911312317543354e-06,
      "loss": 2.2809,
      "step": 43761
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.2938592433929443,
      "learning_rate": 7.910909663952265e-06,
      "loss": 2.2887,
      "step": 43762
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0775179862976074,
      "learning_rate": 7.910507013902749e-06,
      "loss": 2.5121,
      "step": 43763
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1405377388000488,
      "learning_rate": 7.91010436739549e-06,
      "loss": 2.4768,
      "step": 43764
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0321612358093262,
      "learning_rate": 7.909701724431178e-06,
      "loss": 2.152,
      "step": 43765
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0598187446594238,
      "learning_rate": 7.909299085010486e-06,
      "loss": 2.4155,
      "step": 43766
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0968785285949707,
      "learning_rate": 7.9088964491341e-06,
      "loss": 2.267,
      "step": 43767
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.2092031240463257,
      "learning_rate": 7.9084938168027e-06,
      "loss": 2.2653,
      "step": 43768
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0599758625030518,
      "learning_rate": 7.908091188016972e-06,
      "loss": 2.3183,
      "step": 43769
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1004862785339355,
      "learning_rate": 7.907688562777599e-06,
      "loss": 2.2441,
      "step": 43770
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0082459449768066,
      "learning_rate": 7.907285941085262e-06,
      "loss": 2.4431,
      "step": 43771
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1586143970489502,
      "learning_rate": 7.906883322940641e-06,
      "loss": 2.4526,
      "step": 43772
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0395722389221191,
      "learning_rate": 7.906480708344424e-06,
      "loss": 2.432,
      "step": 43773
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1219075918197632,
      "learning_rate": 7.906078097297289e-06,
      "loss": 2.5036,
      "step": 43774
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0388238430023193,
      "learning_rate": 7.905675489799922e-06,
      "loss": 2.4023,
      "step": 43775
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0908929109573364,
      "learning_rate": 7.905272885853003e-06,
      "loss": 2.5267,
      "step": 43776
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0781558752059937,
      "learning_rate": 7.904870285457214e-06,
      "loss": 2.1484,
      "step": 43777
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1403394937515259,
      "learning_rate": 7.904467688613246e-06,
      "loss": 2.467,
      "step": 43778
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1294106245040894,
      "learning_rate": 7.904065095321768e-06,
      "loss": 2.4981,
      "step": 43779
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0503300428390503,
      "learning_rate": 7.90366250558347e-06,
      "loss": 2.0487,
      "step": 43780
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.971896767616272,
      "learning_rate": 7.903259919399031e-06,
      "loss": 2.1708,
      "step": 43781
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1202847957611084,
      "learning_rate": 7.902857336769139e-06,
      "loss": 2.2245,
      "step": 43782
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0757991075515747,
      "learning_rate": 7.902454757694472e-06,
      "loss": 2.3591,
      "step": 43783
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0705499649047852,
      "learning_rate": 7.902052182175715e-06,
      "loss": 2.3585,
      "step": 43784
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0371665954589844,
      "learning_rate": 7.901649610213548e-06,
      "loss": 2.542,
      "step": 43785
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1789954900741577,
      "learning_rate": 7.901247041808655e-06,
      "loss": 2.5979,
      "step": 43786
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1354013681411743,
      "learning_rate": 7.900844476961721e-06,
      "loss": 2.2766,
      "step": 43787
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0500041246414185,
      "learning_rate": 7.900441915673423e-06,
      "loss": 2.4921,
      "step": 43788
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1240549087524414,
      "learning_rate": 7.900039357944447e-06,
      "loss": 2.3386,
      "step": 43789
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0914555788040161,
      "learning_rate": 7.899636803775474e-06,
      "loss": 2.2105,
      "step": 43790
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1319003105163574,
      "learning_rate": 7.89923425316719e-06,
      "loss": 2.2311,
      "step": 43791
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1613497734069824,
      "learning_rate": 7.898831706120271e-06,
      "loss": 2.3461,
      "step": 43792
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0763126611709595,
      "learning_rate": 7.898429162635406e-06,
      "loss": 2.1676,
      "step": 43793
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1974130868911743,
      "learning_rate": 7.898026622713272e-06,
      "loss": 2.4907,
      "step": 43794
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.2603989839553833,
      "learning_rate": 7.897624086354554e-06,
      "loss": 2.2971,
      "step": 43795
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1954067945480347,
      "learning_rate": 7.897221553559933e-06,
      "loss": 2.1985,
      "step": 43796
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.2104541063308716,
      "learning_rate": 7.896819024330094e-06,
      "loss": 2.1532,
      "step": 43797
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.9797336459159851,
      "learning_rate": 7.896416498665718e-06,
      "loss": 2.5591,
      "step": 43798
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1097897291183472,
      "learning_rate": 7.896013976567486e-06,
      "loss": 2.07,
      "step": 43799
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0730009078979492,
      "learning_rate": 7.895611458036083e-06,
      "loss": 2.3689,
      "step": 43800
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.053183913230896,
      "learning_rate": 7.89520894307219e-06,
      "loss": 2.4405,
      "step": 43801
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0890716314315796,
      "learning_rate": 7.894806431676488e-06,
      "loss": 2.4077,
      "step": 43802
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0771058797836304,
      "learning_rate": 7.894403923849662e-06,
      "loss": 2.205,
      "step": 43803
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.7325425148010254,
      "learning_rate": 7.894001419592394e-06,
      "loss": 2.4043,
      "step": 43804
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0281763076782227,
      "learning_rate": 7.893598918905368e-06,
      "loss": 2.2542,
      "step": 43805
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0666675567626953,
      "learning_rate": 7.893196421789262e-06,
      "loss": 2.2667,
      "step": 43806
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.070495367050171,
      "learning_rate": 7.892793928244758e-06,
      "loss": 2.2368,
      "step": 43807
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1258127689361572,
      "learning_rate": 7.892391438272543e-06,
      "loss": 2.3408,
      "step": 43808
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.232378363609314,
      "learning_rate": 7.891988951873295e-06,
      "loss": 2.2323,
      "step": 43809
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0747277736663818,
      "learning_rate": 7.891586469047699e-06,
      "loss": 2.4258,
      "step": 43810
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.161760926246643,
      "learning_rate": 7.891183989796436e-06,
      "loss": 2.2707,
      "step": 43811
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1168233156204224,
      "learning_rate": 7.89078151412019e-06,
      "loss": 2.3055,
      "step": 43812
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0613218545913696,
      "learning_rate": 7.89037904201964e-06,
      "loss": 2.2942,
      "step": 43813
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.249865174293518,
      "learning_rate": 7.889976573495473e-06,
      "loss": 2.3347,
      "step": 43814
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.104778528213501,
      "learning_rate": 7.889574108548369e-06,
      "loss": 2.4379,
      "step": 43815
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.2296638488769531,
      "learning_rate": 7.88917164717901e-06,
      "loss": 2.3409,
      "step": 43816
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.2137869596481323,
      "learning_rate": 7.888769189388076e-06,
      "loss": 2.5321,
      "step": 43817
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1889656782150269,
      "learning_rate": 7.888366735176256e-06,
      "loss": 2.3815,
      "step": 43818
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.074350357055664,
      "learning_rate": 7.887964284544225e-06,
      "loss": 2.1883,
      "step": 43819
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1971254348754883,
      "learning_rate": 7.887561837492667e-06,
      "loss": 2.445,
      "step": 43820
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.2138437032699585,
      "learning_rate": 7.887159394022267e-06,
      "loss": 2.4006,
      "step": 43821
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0329922437667847,
      "learning_rate": 7.886756954133706e-06,
      "loss": 2.4378,
      "step": 43822
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.4281810522079468,
      "learning_rate": 7.886354517827665e-06,
      "loss": 2.3966,
      "step": 43823
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.2070965766906738,
      "learning_rate": 7.885952085104826e-06,
      "loss": 2.0888,
      "step": 43824
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.130415439605713,
      "learning_rate": 7.885549655965874e-06,
      "loss": 2.3651,
      "step": 43825
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0677931308746338,
      "learning_rate": 7.88514723041149e-06,
      "loss": 2.3784,
      "step": 43826
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1556388139724731,
      "learning_rate": 7.884744808442354e-06,
      "loss": 2.5262,
      "step": 43827
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.168290376663208,
      "learning_rate": 7.884342390059152e-06,
      "loss": 2.3126,
      "step": 43828
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.9779332876205444,
      "learning_rate": 7.883939975262562e-06,
      "loss": 2.4562,
      "step": 43829
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0551854372024536,
      "learning_rate": 7.88353756405327e-06,
      "loss": 2.2851,
      "step": 43830
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.038888692855835,
      "learning_rate": 7.88313515643196e-06,
      "loss": 2.2464,
      "step": 43831
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.141875982284546,
      "learning_rate": 7.882732752399307e-06,
      "loss": 2.313,
      "step": 43832
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0720596313476562,
      "learning_rate": 7.882330351955997e-06,
      "loss": 2.2566,
      "step": 43833
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1585400104522705,
      "learning_rate": 7.881927955102712e-06,
      "loss": 2.4681,
      "step": 43834
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0834511518478394,
      "learning_rate": 7.881525561840134e-06,
      "loss": 2.1977,
      "step": 43835
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.185815691947937,
      "learning_rate": 7.881123172168948e-06,
      "loss": 2.3964,
      "step": 43836
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1696293354034424,
      "learning_rate": 7.88072078608983e-06,
      "loss": 2.2514,
      "step": 43837
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1816352605819702,
      "learning_rate": 7.880318403603468e-06,
      "loss": 2.3624,
      "step": 43838
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0731401443481445,
      "learning_rate": 7.879916024710542e-06,
      "loss": 2.184,
      "step": 43839
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0172728300094604,
      "learning_rate": 7.879513649411735e-06,
      "loss": 2.1577,
      "step": 43840
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.9487731456756592,
      "learning_rate": 7.879111277707726e-06,
      "loss": 2.2622,
      "step": 43841
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0941072702407837,
      "learning_rate": 7.8787089095992e-06,
      "loss": 2.4246,
      "step": 43842
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0953011512756348,
      "learning_rate": 7.878306545086842e-06,
      "loss": 2.3821,
      "step": 43843
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0397800207138062,
      "learning_rate": 7.87790418417133e-06,
      "loss": 2.3576,
      "step": 43844
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0928003787994385,
      "learning_rate": 7.877501826853344e-06,
      "loss": 2.3045,
      "step": 43845
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0577298402786255,
      "learning_rate": 7.87709947313357e-06,
      "loss": 2.2575,
      "step": 43846
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.984113872051239,
      "learning_rate": 7.87669712301269e-06,
      "loss": 2.5727,
      "step": 43847
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1187492609024048,
      "learning_rate": 7.876294776491382e-06,
      "loss": 2.1958,
      "step": 43848
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1496703624725342,
      "learning_rate": 7.875892433570335e-06,
      "loss": 2.43,
      "step": 43849
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.2138956785202026,
      "learning_rate": 7.875490094250224e-06,
      "loss": 2.3632,
      "step": 43850
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1253645420074463,
      "learning_rate": 7.875087758531736e-06,
      "loss": 2.3702,
      "step": 43851
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.2425439357757568,
      "learning_rate": 7.874685426415549e-06,
      "loss": 2.4766,
      "step": 43852
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0675753355026245,
      "learning_rate": 7.87428309790235e-06,
      "loss": 2.2292,
      "step": 43853
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.9976723194122314,
      "learning_rate": 7.873880772992816e-06,
      "loss": 2.2898,
      "step": 43854
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0662388801574707,
      "learning_rate": 7.873478451687634e-06,
      "loss": 2.338,
      "step": 43855
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0428966283798218,
      "learning_rate": 7.873076133987483e-06,
      "loss": 2.2684,
      "step": 43856
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.2485939264297485,
      "learning_rate": 7.872673819893049e-06,
      "loss": 2.1908,
      "step": 43857
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1340664625167847,
      "learning_rate": 7.872271509405006e-06,
      "loss": 2.4761,
      "step": 43858
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0608938932418823,
      "learning_rate": 7.871869202524041e-06,
      "loss": 2.4696,
      "step": 43859
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1472867727279663,
      "learning_rate": 7.871466899250837e-06,
      "loss": 2.3295,
      "step": 43860
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1997404098510742,
      "learning_rate": 7.871064599586073e-06,
      "loss": 2.2982,
      "step": 43861
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0822038650512695,
      "learning_rate": 7.870662303530435e-06,
      "loss": 2.3624,
      "step": 43862
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.024800419807434,
      "learning_rate": 7.8702600110846e-06,
      "loss": 2.35,
      "step": 43863
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0033090114593506,
      "learning_rate": 7.869857722249254e-06,
      "loss": 2.2742,
      "step": 43864
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.2341907024383545,
      "learning_rate": 7.869455437025079e-06,
      "loss": 2.4481,
      "step": 43865
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0518089532852173,
      "learning_rate": 7.869053155412753e-06,
      "loss": 2.2039,
      "step": 43866
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1756101846694946,
      "learning_rate": 7.868650877412963e-06,
      "loss": 2.2778,
      "step": 43867
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0093985795974731,
      "learning_rate": 7.868248603026387e-06,
      "loss": 2.3466,
      "step": 43868
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1642452478408813,
      "learning_rate": 7.86784633225371e-06,
      "loss": 2.578,
      "step": 43869
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0362931489944458,
      "learning_rate": 7.867444065095616e-06,
      "loss": 2.4209,
      "step": 43870
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.3409134149551392,
      "learning_rate": 7.86704180155278e-06,
      "loss": 2.4946,
      "step": 43871
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.999645471572876,
      "learning_rate": 7.866639541625886e-06,
      "loss": 2.3952,
      "step": 43872
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0215511322021484,
      "learning_rate": 7.866237285315617e-06,
      "loss": 2.248,
      "step": 43873
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0539634227752686,
      "learning_rate": 7.865835032622657e-06,
      "loss": 2.1504,
      "step": 43874
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.95173180103302,
      "learning_rate": 7.865432783547686e-06,
      "loss": 2.1874,
      "step": 43875
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.111327052116394,
      "learning_rate": 7.865030538091385e-06,
      "loss": 2.35,
      "step": 43876
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.083890438079834,
      "learning_rate": 7.864628296254441e-06,
      "loss": 2.0578,
      "step": 43877
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1060396432876587,
      "learning_rate": 7.864226058037528e-06,
      "loss": 2.2347,
      "step": 43878
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0688350200653076,
      "learning_rate": 7.863823823441334e-06,
      "loss": 2.2193,
      "step": 43879
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0673686265945435,
      "learning_rate": 7.863421592466538e-06,
      "loss": 2.3034,
      "step": 43880
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.104575276374817,
      "learning_rate": 7.863019365113822e-06,
      "loss": 2.1774,
      "step": 43881
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1930352449417114,
      "learning_rate": 7.862617141383868e-06,
      "loss": 2.3331,
      "step": 43882
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0794376134872437,
      "learning_rate": 7.86221492127736e-06,
      "loss": 2.322,
      "step": 43883
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.141929030418396,
      "learning_rate": 7.861812704794983e-06,
      "loss": 2.336,
      "step": 43884
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0486797094345093,
      "learning_rate": 7.861410491937408e-06,
      "loss": 2.1338,
      "step": 43885
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1046638488769531,
      "learning_rate": 7.861008282705323e-06,
      "loss": 2.2752,
      "step": 43886
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.113035798072815,
      "learning_rate": 7.860606077099413e-06,
      "loss": 2.5872,
      "step": 43887
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.4789502620697021,
      "learning_rate": 7.860203875120353e-06,
      "loss": 2.3394,
      "step": 43888
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.9823529720306396,
      "learning_rate": 7.85980167676883e-06,
      "loss": 2.3899,
      "step": 43889
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0874688625335693,
      "learning_rate": 7.859399482045524e-06,
      "loss": 2.3058,
      "step": 43890
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.055466651916504,
      "learning_rate": 7.858997290951119e-06,
      "loss": 2.2939,
      "step": 43891
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1476383209228516,
      "learning_rate": 7.858595103486294e-06,
      "loss": 2.3049,
      "step": 43892
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1948351860046387,
      "learning_rate": 7.858192919651732e-06,
      "loss": 2.1559,
      "step": 43893
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1552302837371826,
      "learning_rate": 7.857790739448115e-06,
      "loss": 2.4031,
      "step": 43894
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0724821090698242,
      "learning_rate": 7.857388562876122e-06,
      "loss": 2.3051,
      "step": 43895
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.4641667604446411,
      "learning_rate": 7.856986389936441e-06,
      "loss": 2.5913,
      "step": 43896
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1052799224853516,
      "learning_rate": 7.856584220629753e-06,
      "loss": 2.2805,
      "step": 43897
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.077162504196167,
      "learning_rate": 7.856182054956731e-06,
      "loss": 2.4206,
      "step": 43898
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.129653811454773,
      "learning_rate": 7.855779892918065e-06,
      "loss": 2.2648,
      "step": 43899
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0199098587036133,
      "learning_rate": 7.855377734514434e-06,
      "loss": 2.1145,
      "step": 43900
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1519907712936401,
      "learning_rate": 7.854975579746519e-06,
      "loss": 2.404,
      "step": 43901
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.9539346098899841,
      "learning_rate": 7.854573428615003e-06,
      "loss": 2.3931,
      "step": 43902
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0837315320968628,
      "learning_rate": 7.854171281120568e-06,
      "loss": 2.1871,
      "step": 43903
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.9759538173675537,
      "learning_rate": 7.853769137263898e-06,
      "loss": 2.4259,
      "step": 43904
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.9920159578323364,
      "learning_rate": 7.85336699704567e-06,
      "loss": 2.3323,
      "step": 43905
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0866742134094238,
      "learning_rate": 7.85296486046657e-06,
      "loss": 2.464,
      "step": 43906
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0527750253677368,
      "learning_rate": 7.852562727527275e-06,
      "loss": 2.4025,
      "step": 43907
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0456252098083496,
      "learning_rate": 7.85216059822847e-06,
      "loss": 2.5452,
      "step": 43908
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.9637956023216248,
      "learning_rate": 7.851758472570837e-06,
      "loss": 2.5393,
      "step": 43909
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1360762119293213,
      "learning_rate": 7.851356350555057e-06,
      "loss": 2.5187,
      "step": 43910
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.043141484260559,
      "learning_rate": 7.850954232181813e-06,
      "loss": 2.265,
      "step": 43911
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0412122011184692,
      "learning_rate": 7.850552117451783e-06,
      "loss": 2.2224,
      "step": 43912
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.078532338142395,
      "learning_rate": 7.85015000636565e-06,
      "loss": 2.3221,
      "step": 43913
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1320466995239258,
      "learning_rate": 7.849747898924099e-06,
      "loss": 2.3152,
      "step": 43914
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.9902585744857788,
      "learning_rate": 7.849345795127808e-06,
      "loss": 2.4445,
      "step": 43915
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0996122360229492,
      "learning_rate": 7.848943694977458e-06,
      "loss": 2.3949,
      "step": 43916
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0361660718917847,
      "learning_rate": 7.848541598473736e-06,
      "loss": 2.3459,
      "step": 43917
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1083085536956787,
      "learning_rate": 7.848139505617318e-06,
      "loss": 2.3281,
      "step": 43918
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.066451072692871,
      "learning_rate": 7.847737416408889e-06,
      "loss": 2.4173,
      "step": 43919
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1007338762283325,
      "learning_rate": 7.847335330849129e-06,
      "loss": 2.4596,
      "step": 43920
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1152657270431519,
      "learning_rate": 7.846933248938722e-06,
      "loss": 2.4913,
      "step": 43921
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0265861749649048,
      "learning_rate": 7.846531170678345e-06,
      "loss": 2.367,
      "step": 43922
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0140304565429688,
      "learning_rate": 7.846129096068688e-06,
      "loss": 2.4427,
      "step": 43923
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0663594007492065,
      "learning_rate": 7.845727025110425e-06,
      "loss": 2.3134,
      "step": 43924
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.3091766834259033,
      "learning_rate": 7.845324957804236e-06,
      "loss": 2.3907,
      "step": 43925
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.9818557500839233,
      "learning_rate": 7.84492289415081e-06,
      "loss": 2.3082,
      "step": 43926
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1070036888122559,
      "learning_rate": 7.844520834150824e-06,
      "loss": 2.5236,
      "step": 43927
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.9493961334228516,
      "learning_rate": 7.844118777804959e-06,
      "loss": 2.0992,
      "step": 43928
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0909523963928223,
      "learning_rate": 7.8437167251139e-06,
      "loss": 2.3222,
      "step": 43929
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0961811542510986,
      "learning_rate": 7.843314676078325e-06,
      "loss": 2.4798,
      "step": 43930
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0777918100357056,
      "learning_rate": 7.842912630698918e-06,
      "loss": 2.3204,
      "step": 43931
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0010366439819336,
      "learning_rate": 7.842510588976363e-06,
      "loss": 2.4424,
      "step": 43932
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0030181407928467,
      "learning_rate": 7.842108550911336e-06,
      "loss": 2.238,
      "step": 43933
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1086708307266235,
      "learning_rate": 7.841706516504521e-06,
      "loss": 2.3587,
      "step": 43934
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.172285795211792,
      "learning_rate": 7.8413044857566e-06,
      "loss": 2.2357,
      "step": 43935
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0173137187957764,
      "learning_rate": 7.840902458668256e-06,
      "loss": 2.2712,
      "step": 43936
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0618536472320557,
      "learning_rate": 7.840500435240166e-06,
      "loss": 2.6251,
      "step": 43937
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1023694276809692,
      "learning_rate": 7.840098415473016e-06,
      "loss": 2.5433,
      "step": 43938
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1576919555664062,
      "learning_rate": 7.839696399367486e-06,
      "loss": 2.419,
      "step": 43939
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.055768370628357,
      "learning_rate": 7.839294386924257e-06,
      "loss": 2.3192,
      "step": 43940
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.2247689962387085,
      "learning_rate": 7.838892378144009e-06,
      "loss": 2.6678,
      "step": 43941
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1383391618728638,
      "learning_rate": 7.838490373027425e-06,
      "loss": 2.4357,
      "step": 43942
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.030787706375122,
      "learning_rate": 7.83808837157519e-06,
      "loss": 2.1374,
      "step": 43943
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1975317001342773,
      "learning_rate": 7.837686373787979e-06,
      "loss": 2.2413,
      "step": 43944
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.091891884803772,
      "learning_rate": 7.837284379666479e-06,
      "loss": 2.4384,
      "step": 43945
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0974591970443726,
      "learning_rate": 7.836882389211368e-06,
      "loss": 2.3353,
      "step": 43946
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0857796669006348,
      "learning_rate": 7.83648040242333e-06,
      "loss": 2.3641,
      "step": 43947
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0380908250808716,
      "learning_rate": 7.836078419303045e-06,
      "loss": 2.2288,
      "step": 43948
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1925204992294312,
      "learning_rate": 7.835676439851199e-06,
      "loss": 2.2398,
      "step": 43949
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0316122770309448,
      "learning_rate": 7.835274464068464e-06,
      "loss": 2.2915,
      "step": 43950
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.2420077323913574,
      "learning_rate": 7.834872491955528e-06,
      "loss": 2.415,
      "step": 43951
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0635641813278198,
      "learning_rate": 7.83447052351307e-06,
      "loss": 2.2917,
      "step": 43952
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0961252450942993,
      "learning_rate": 7.834068558741774e-06,
      "loss": 2.3371,
      "step": 43953
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0765973329544067,
      "learning_rate": 7.83366659764232e-06,
      "loss": 2.3728,
      "step": 43954
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1061311960220337,
      "learning_rate": 7.833264640215387e-06,
      "loss": 2.4352,
      "step": 43955
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1815613508224487,
      "learning_rate": 7.832862686461662e-06,
      "loss": 2.2364,
      "step": 43956
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.055515170097351,
      "learning_rate": 7.832460736381822e-06,
      "loss": 2.5487,
      "step": 43957
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1035765409469604,
      "learning_rate": 7.83205878997655e-06,
      "loss": 2.2283,
      "step": 43958
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0481175184249878,
      "learning_rate": 7.831656847246528e-06,
      "loss": 2.0817,
      "step": 43959
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.098315715789795,
      "learning_rate": 7.831254908192434e-06,
      "loss": 2.379,
      "step": 43960
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1262331008911133,
      "learning_rate": 7.830852972814957e-06,
      "loss": 2.3892,
      "step": 43961
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1187736988067627,
      "learning_rate": 7.830451041114769e-06,
      "loss": 2.245,
      "step": 43962
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0138916969299316,
      "learning_rate": 7.83004911309256e-06,
      "loss": 2.3408,
      "step": 43963
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.203163981437683,
      "learning_rate": 7.829647188749002e-06,
      "loss": 2.2731,
      "step": 43964
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.184918761253357,
      "learning_rate": 7.829245268084784e-06,
      "loss": 2.4254,
      "step": 43965
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0669525861740112,
      "learning_rate": 7.828843351100584e-06,
      "loss": 2.3225,
      "step": 43966
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1004490852355957,
      "learning_rate": 7.828441437797087e-06,
      "loss": 2.241,
      "step": 43967
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0485365390777588,
      "learning_rate": 7.828039528174969e-06,
      "loss": 2.2434,
      "step": 43968
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.2591831684112549,
      "learning_rate": 7.827637622234913e-06,
      "loss": 2.2293,
      "step": 43969
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.993759274482727,
      "learning_rate": 7.827235719977601e-06,
      "loss": 2.3812,
      "step": 43970
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.098991870880127,
      "learning_rate": 7.826833821403717e-06,
      "loss": 2.3206,
      "step": 43971
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0814236402511597,
      "learning_rate": 7.826431926513938e-06,
      "loss": 2.6183,
      "step": 43972
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.020028829574585,
      "learning_rate": 7.826030035308948e-06,
      "loss": 2.3911,
      "step": 43973
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1435725688934326,
      "learning_rate": 7.825628147789426e-06,
      "loss": 2.6911,
      "step": 43974
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.2065999507904053,
      "learning_rate": 7.825226263956056e-06,
      "loss": 2.4365,
      "step": 43975
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.9832029342651367,
      "learning_rate": 7.824824383809523e-06,
      "loss": 2.2836,
      "step": 43976
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0312035083770752,
      "learning_rate": 7.824422507350498e-06,
      "loss": 2.3337,
      "step": 43977
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1386241912841797,
      "learning_rate": 7.824020634579669e-06,
      "loss": 2.1933,
      "step": 43978
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0517780780792236,
      "learning_rate": 7.823618765497715e-06,
      "loss": 2.0924,
      "step": 43979
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1021084785461426,
      "learning_rate": 7.82321690010532e-06,
      "loss": 2.3216,
      "step": 43980
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.032355785369873,
      "learning_rate": 7.822815038403161e-06,
      "loss": 2.4762,
      "step": 43981
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1874825954437256,
      "learning_rate": 7.822413180391923e-06,
      "loss": 2.1843,
      "step": 43982
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.167022466659546,
      "learning_rate": 7.822011326072285e-06,
      "loss": 2.24,
      "step": 43983
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1349291801452637,
      "learning_rate": 7.82160947544493e-06,
      "loss": 2.2633,
      "step": 43984
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.3126373291015625,
      "learning_rate": 7.82120762851054e-06,
      "loss": 2.3112,
      "step": 43985
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1036450862884521,
      "learning_rate": 7.820805785269793e-06,
      "loss": 2.2573,
      "step": 43986
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0315132141113281,
      "learning_rate": 7.820403945723374e-06,
      "loss": 2.1975,
      "step": 43987
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0285279750823975,
      "learning_rate": 7.820002109871962e-06,
      "loss": 2.3069,
      "step": 43988
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1668370962142944,
      "learning_rate": 7.819600277716242e-06,
      "loss": 2.5946,
      "step": 43989
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0342602729797363,
      "learning_rate": 7.819198449256886e-06,
      "loss": 2.2269,
      "step": 43990
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1059669256210327,
      "learning_rate": 7.818796624494583e-06,
      "loss": 2.3754,
      "step": 43991
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0149962902069092,
      "learning_rate": 7.818394803430012e-06,
      "loss": 2.3106,
      "step": 43992
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0211278200149536,
      "learning_rate": 7.817992986063853e-06,
      "loss": 2.1172,
      "step": 43993
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0098196268081665,
      "learning_rate": 7.817591172396788e-06,
      "loss": 2.3192,
      "step": 43994
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1203120946884155,
      "learning_rate": 7.817189362429503e-06,
      "loss": 2.4956,
      "step": 43995
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0598478317260742,
      "learning_rate": 7.816787556162672e-06,
      "loss": 2.1477,
      "step": 43996
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1255005598068237,
      "learning_rate": 7.81638575359698e-06,
      "loss": 2.2904,
      "step": 43997
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1384459733963013,
      "learning_rate": 7.815983954733105e-06,
      "loss": 2.3502,
      "step": 43998
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1295430660247803,
      "learning_rate": 7.815582159571732e-06,
      "loss": 2.3654,
      "step": 43999
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.104190468788147,
      "learning_rate": 7.815180368113539e-06,
      "loss": 2.0911,
      "step": 44000
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0390092134475708,
      "learning_rate": 7.81477858035921e-06,
      "loss": 2.4251,
      "step": 44001
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0638036727905273,
      "learning_rate": 7.814376796309428e-06,
      "loss": 2.3188,
      "step": 44002
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0469368696212769,
      "learning_rate": 7.813975015964867e-06,
      "loss": 2.2471,
      "step": 44003
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.10573148727417,
      "learning_rate": 7.813573239326214e-06,
      "loss": 2.1385,
      "step": 44004
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1765377521514893,
      "learning_rate": 7.813171466394146e-06,
      "loss": 2.2785,
      "step": 44005
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1582472324371338,
      "learning_rate": 7.812769697169347e-06,
      "loss": 2.4496,
      "step": 44006
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.2158952951431274,
      "learning_rate": 7.812367931652497e-06,
      "loss": 2.3803,
      "step": 44007
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.165549635887146,
      "learning_rate": 7.811966169844278e-06,
      "loss": 2.4467,
      "step": 44008
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1016815900802612,
      "learning_rate": 7.81156441174537e-06,
      "loss": 2.2574,
      "step": 44009
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1804767847061157,
      "learning_rate": 7.811162657356456e-06,
      "loss": 2.1933,
      "step": 44010
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1346386671066284,
      "learning_rate": 7.810760906678214e-06,
      "loss": 2.2935,
      "step": 44011
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1434078216552734,
      "learning_rate": 7.810359159711329e-06,
      "loss": 2.388,
      "step": 44012
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1886165142059326,
      "learning_rate": 7.809957416456478e-06,
      "loss": 2.3154,
      "step": 44013
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0196995735168457,
      "learning_rate": 7.809555676914344e-06,
      "loss": 1.9884,
      "step": 44014
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1220800876617432,
      "learning_rate": 7.809153941085615e-06,
      "loss": 2.3602,
      "step": 44015
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.089139699935913,
      "learning_rate": 7.808752208970957e-06,
      "loss": 2.2179,
      "step": 44016
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.2243869304656982,
      "learning_rate": 7.808350480571062e-06,
      "loss": 2.4226,
      "step": 44017
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.003108024597168,
      "learning_rate": 7.807948755886607e-06,
      "loss": 2.1181,
      "step": 44018
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.961983859539032,
      "learning_rate": 7.807547034918274e-06,
      "loss": 2.4689,
      "step": 44019
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0305519104003906,
      "learning_rate": 7.807145317666744e-06,
      "loss": 2.4031,
      "step": 44020
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1515307426452637,
      "learning_rate": 7.806743604132699e-06,
      "loss": 2.5029,
      "step": 44021
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.2916316986083984,
      "learning_rate": 7.80634189431682e-06,
      "loss": 2.3939,
      "step": 44022
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.2146896123886108,
      "learning_rate": 7.805940188219788e-06,
      "loss": 2.2934,
      "step": 44023
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1318106651306152,
      "learning_rate": 7.805538485842282e-06,
      "loss": 2.4513,
      "step": 44024
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0515131950378418,
      "learning_rate": 7.805136787184983e-06,
      "loss": 2.5754,
      "step": 44025
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.040123462677002,
      "learning_rate": 7.804735092248573e-06,
      "loss": 2.4262,
      "step": 44026
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.9575037956237793,
      "learning_rate": 7.804333401033737e-06,
      "loss": 2.3873,
      "step": 44027
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0037928819656372,
      "learning_rate": 7.803931713541149e-06,
      "loss": 2.4886,
      "step": 44028
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1175636053085327,
      "learning_rate": 7.803530029771498e-06,
      "loss": 2.4467,
      "step": 44029
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.084857702255249,
      "learning_rate": 7.803128349725457e-06,
      "loss": 2.5024,
      "step": 44030
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0499794483184814,
      "learning_rate": 7.802726673403709e-06,
      "loss": 2.1668,
      "step": 44031
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.982060432434082,
      "learning_rate": 7.802325000806934e-06,
      "loss": 2.4328,
      "step": 44032
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0099241733551025,
      "learning_rate": 7.801923331935819e-06,
      "loss": 2.3841,
      "step": 44033
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1849777698516846,
      "learning_rate": 7.801521666791038e-06,
      "loss": 2.4544,
      "step": 44034
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0254251956939697,
      "learning_rate": 7.801120005373277e-06,
      "loss": 2.4589,
      "step": 44035
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.9761664867401123,
      "learning_rate": 7.800718347683214e-06,
      "loss": 2.4332,
      "step": 44036
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0861144065856934,
      "learning_rate": 7.80031669372153e-06,
      "loss": 2.2352,
      "step": 44037
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.112744927406311,
      "learning_rate": 7.799915043488907e-06,
      "loss": 2.0864,
      "step": 44038
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.2045038938522339,
      "learning_rate": 7.799513396986025e-06,
      "loss": 2.3231,
      "step": 44039
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0627350807189941,
      "learning_rate": 7.799111754213567e-06,
      "loss": 2.4357,
      "step": 44040
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0498484373092651,
      "learning_rate": 7.798710115172211e-06,
      "loss": 2.4636,
      "step": 44041
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0604469776153564,
      "learning_rate": 7.798308479862643e-06,
      "loss": 1.9831,
      "step": 44042
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0999773740768433,
      "learning_rate": 7.797906848285538e-06,
      "loss": 2.3267,
      "step": 44043
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1926437616348267,
      "learning_rate": 7.797505220441578e-06,
      "loss": 2.3424,
      "step": 44044
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.053227186203003,
      "learning_rate": 7.797103596331444e-06,
      "loss": 2.241,
      "step": 44045
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.155385971069336,
      "learning_rate": 7.796701975955818e-06,
      "loss": 2.2678,
      "step": 44046
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.9645285606384277,
      "learning_rate": 7.796300359315381e-06,
      "loss": 2.2684,
      "step": 44047
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.120992660522461,
      "learning_rate": 7.795898746410814e-06,
      "loss": 2.6266,
      "step": 44048
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.9955874681472778,
      "learning_rate": 7.795497137242795e-06,
      "loss": 2.2257,
      "step": 44049
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1250557899475098,
      "learning_rate": 7.79509553181201e-06,
      "loss": 2.2876,
      "step": 44050
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.2246333360671997,
      "learning_rate": 7.794693930119137e-06,
      "loss": 2.3624,
      "step": 44051
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1344568729400635,
      "learning_rate": 7.794292332164855e-06,
      "loss": 2.4008,
      "step": 44052
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0482630729675293,
      "learning_rate": 7.793890737949848e-06,
      "loss": 2.3359,
      "step": 44053
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.096321940422058,
      "learning_rate": 7.793489147474793e-06,
      "loss": 2.4355,
      "step": 44054
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.2061412334442139,
      "learning_rate": 7.793087560740378e-06,
      "loss": 2.458,
      "step": 44055
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.9952901601791382,
      "learning_rate": 7.792685977747275e-06,
      "loss": 2.3685,
      "step": 44056
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0622162818908691,
      "learning_rate": 7.79228439849617e-06,
      "loss": 2.4149,
      "step": 44057
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.09925377368927,
      "learning_rate": 7.791882822987742e-06,
      "loss": 2.5024,
      "step": 44058
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.045960783958435,
      "learning_rate": 7.791481251222672e-06,
      "loss": 2.3898,
      "step": 44059
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0574252605438232,
      "learning_rate": 7.79107968320164e-06,
      "loss": 2.0954,
      "step": 44060
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0870925188064575,
      "learning_rate": 7.79067811892533e-06,
      "loss": 2.3223,
      "step": 44061
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.072415828704834,
      "learning_rate": 7.79027655839442e-06,
      "loss": 2.4258,
      "step": 44062
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1005600690841675,
      "learning_rate": 7.789875001609591e-06,
      "loss": 2.5334,
      "step": 44063
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1731938123703003,
      "learning_rate": 7.789473448571524e-06,
      "loss": 2.3222,
      "step": 44064
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.050025463104248,
      "learning_rate": 7.7890718992809e-06,
      "loss": 2.3795,
      "step": 44065
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.098386287689209,
      "learning_rate": 7.7886703537384e-06,
      "loss": 2.3563,
      "step": 44066
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.042401909828186,
      "learning_rate": 7.788268811944704e-06,
      "loss": 2.5963,
      "step": 44067
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.030738353729248,
      "learning_rate": 7.787867273900497e-06,
      "loss": 2.103,
      "step": 44068
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1987919807434082,
      "learning_rate": 7.787465739606451e-06,
      "loss": 2.3389,
      "step": 44069
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1234830617904663,
      "learning_rate": 7.787064209063253e-06,
      "loss": 2.5006,
      "step": 44070
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0959035158157349,
      "learning_rate": 7.786662682271582e-06,
      "loss": 2.4327,
      "step": 44071
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.978471577167511,
      "learning_rate": 7.786261159232118e-06,
      "loss": 2.4295,
      "step": 44072
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0031570196151733,
      "learning_rate": 7.785859639945542e-06,
      "loss": 2.2014,
      "step": 44073
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.2060282230377197,
      "learning_rate": 7.785458124412537e-06,
      "loss": 2.4231,
      "step": 44074
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.2005774974822998,
      "learning_rate": 7.785056612633781e-06,
      "loss": 2.2341,
      "step": 44075
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0629616975784302,
      "learning_rate": 7.784655104609955e-06,
      "loss": 2.5942,
      "step": 44076
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0344408750534058,
      "learning_rate": 7.784253600341741e-06,
      "loss": 2.1395,
      "step": 44077
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0717220306396484,
      "learning_rate": 7.78385209982982e-06,
      "loss": 2.4677,
      "step": 44078
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1059046983718872,
      "learning_rate": 7.783450603074872e-06,
      "loss": 2.3064,
      "step": 44079
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0211660861968994,
      "learning_rate": 7.783049110077577e-06,
      "loss": 2.2754,
      "step": 44080
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1133923530578613,
      "learning_rate": 7.782647620838615e-06,
      "loss": 2.3857,
      "step": 44081
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.6803759336471558,
      "learning_rate": 7.782246135358668e-06,
      "loss": 2.3001,
      "step": 44082
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1435420513153076,
      "learning_rate": 7.781844653638415e-06,
      "loss": 2.3404,
      "step": 44083
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0663951635360718,
      "learning_rate": 7.781443175678539e-06,
      "loss": 2.1829,
      "step": 44084
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.034956932067871,
      "learning_rate": 7.78104170147972e-06,
      "loss": 2.485,
      "step": 44085
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1275765895843506,
      "learning_rate": 7.780640231042635e-06,
      "loss": 2.3149,
      "step": 44086
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0679420232772827,
      "learning_rate": 7.78023876436797e-06,
      "loss": 2.3265,
      "step": 44087
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.058838963508606,
      "learning_rate": 7.779837301456403e-06,
      "loss": 2.3023,
      "step": 44088
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.074986457824707,
      "learning_rate": 7.779435842308614e-06,
      "loss": 2.3507,
      "step": 44089
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.093306541442871,
      "learning_rate": 7.779034386925283e-06,
      "loss": 2.5142,
      "step": 44090
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1124188899993896,
      "learning_rate": 7.778632935307094e-06,
      "loss": 2.4015,
      "step": 44091
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0983679294586182,
      "learning_rate": 7.778231487454724e-06,
      "loss": 2.3714,
      "step": 44092
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0256853103637695,
      "learning_rate": 7.777830043368855e-06,
      "loss": 2.6271,
      "step": 44093
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1631019115447998,
      "learning_rate": 7.777428603050173e-06,
      "loss": 2.1855,
      "step": 44094
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.001918911933899,
      "learning_rate": 7.77702716649935e-06,
      "loss": 2.3559,
      "step": 44095
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0185054540634155,
      "learning_rate": 7.776625733717067e-06,
      "loss": 2.2423,
      "step": 44096
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0527000427246094,
      "learning_rate": 7.776224304704008e-06,
      "loss": 2.2938,
      "step": 44097
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1789815425872803,
      "learning_rate": 7.775822879460854e-06,
      "loss": 2.6523,
      "step": 44098
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.147638201713562,
      "learning_rate": 7.775421457988283e-06,
      "loss": 2.4454,
      "step": 44099
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0887571573257446,
      "learning_rate": 7.775020040286978e-06,
      "loss": 2.3541,
      "step": 44100
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.9704415202140808,
      "learning_rate": 7.774618626357618e-06,
      "loss": 2.3383,
      "step": 44101
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1535100936889648,
      "learning_rate": 7.774217216200883e-06,
      "loss": 2.4498,
      "step": 44102
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0502192974090576,
      "learning_rate": 7.773815809817455e-06,
      "loss": 2.2089,
      "step": 44103
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1528165340423584,
      "learning_rate": 7.773414407208014e-06,
      "loss": 2.313,
      "step": 44104
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.2006832361221313,
      "learning_rate": 7.77301300837324e-06,
      "loss": 2.4571,
      "step": 44105
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.106082558631897,
      "learning_rate": 7.772611613313813e-06,
      "loss": 2.3771,
      "step": 44106
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0411385297775269,
      "learning_rate": 7.772210222030416e-06,
      "loss": 2.337,
      "step": 44107
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0436913967132568,
      "learning_rate": 7.77180883452373e-06,
      "loss": 2.3606,
      "step": 44108
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1696908473968506,
      "learning_rate": 7.77140745079443e-06,
      "loss": 2.4916,
      "step": 44109
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0377053022384644,
      "learning_rate": 7.771006070843198e-06,
      "loss": 2.3699,
      "step": 44110
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.213578462600708,
      "learning_rate": 7.770604694670718e-06,
      "loss": 2.5391,
      "step": 44111
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0583404302597046,
      "learning_rate": 7.770203322277668e-06,
      "loss": 2.2581,
      "step": 44112
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.130348563194275,
      "learning_rate": 7.769801953664733e-06,
      "loss": 2.465,
      "step": 44113
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1332019567489624,
      "learning_rate": 7.769400588832586e-06,
      "loss": 2.2204,
      "step": 44114
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0942479372024536,
      "learning_rate": 7.76899922778191e-06,
      "loss": 2.46,
      "step": 44115
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0584094524383545,
      "learning_rate": 7.768597870513386e-06,
      "loss": 2.2796,
      "step": 44116
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0120247602462769,
      "learning_rate": 7.768196517027695e-06,
      "loss": 2.2902,
      "step": 44117
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0889465808868408,
      "learning_rate": 7.767795167325518e-06,
      "loss": 2.1768,
      "step": 44118
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0714702606201172,
      "learning_rate": 7.767393821407535e-06,
      "loss": 2.3789,
      "step": 44119
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1162697076797485,
      "learning_rate": 7.766992479274422e-06,
      "loss": 2.5055,
      "step": 44120
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1406195163726807,
      "learning_rate": 7.766591140926872e-06,
      "loss": 2.4757,
      "step": 44121
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1770141124725342,
      "learning_rate": 7.766189806365549e-06,
      "loss": 2.3883,
      "step": 44122
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0641313791275024,
      "learning_rate": 7.765788475591143e-06,
      "loss": 2.5224,
      "step": 44123
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0409252643585205,
      "learning_rate": 7.765387148604332e-06,
      "loss": 2.3364,
      "step": 44124
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.9969385862350464,
      "learning_rate": 7.764985825405796e-06,
      "loss": 2.3005,
      "step": 44125
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1692965030670166,
      "learning_rate": 7.764584505996217e-06,
      "loss": 2.2992,
      "step": 44126
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.088111162185669,
      "learning_rate": 7.764183190376274e-06,
      "loss": 2.3813,
      "step": 44127
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0811225175857544,
      "learning_rate": 7.763781878546649e-06,
      "loss": 2.3674,
      "step": 44128
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.9904043674468994,
      "learning_rate": 7.763380570508018e-06,
      "loss": 2.2168,
      "step": 44129
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0322316884994507,
      "learning_rate": 7.762979266261066e-06,
      "loss": 2.4836,
      "step": 44130
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.055812120437622,
      "learning_rate": 7.762577965806472e-06,
      "loss": 2.3484,
      "step": 44131
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0159924030303955,
      "learning_rate": 7.762176669144916e-06,
      "loss": 2.3555,
      "step": 44132
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1976845264434814,
      "learning_rate": 7.761775376277078e-06,
      "loss": 2.1812,
      "step": 44133
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.016099452972412,
      "learning_rate": 7.761374087203643e-06,
      "loss": 2.1754,
      "step": 44134
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0304456949234009,
      "learning_rate": 7.760972801925283e-06,
      "loss": 2.2353,
      "step": 44135
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.106160283088684,
      "learning_rate": 7.760571520442681e-06,
      "loss": 2.3543,
      "step": 44136
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.2112560272216797,
      "learning_rate": 7.760170242756519e-06,
      "loss": 2.398,
      "step": 44137
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1020525693893433,
      "learning_rate": 7.759768968867479e-06,
      "loss": 2.3229,
      "step": 44138
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.9795291423797607,
      "learning_rate": 7.759367698776237e-06,
      "loss": 2.1564,
      "step": 44139
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1420239210128784,
      "learning_rate": 7.758966432483474e-06,
      "loss": 2.3331,
      "step": 44140
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1782938241958618,
      "learning_rate": 7.758565169989876e-06,
      "loss": 2.2987,
      "step": 44141
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0927393436431885,
      "learning_rate": 7.758163911296116e-06,
      "loss": 2.2166,
      "step": 44142
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0087807178497314,
      "learning_rate": 7.757762656402877e-06,
      "loss": 2.4863,
      "step": 44143
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1108901500701904,
      "learning_rate": 7.75736140531084e-06,
      "loss": 2.1482,
      "step": 44144
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.2467849254608154,
      "learning_rate": 7.756960158020684e-06,
      "loss": 2.4047,
      "step": 44145
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.041569471359253,
      "learning_rate": 7.756558914533087e-06,
      "loss": 2.1447,
      "step": 44146
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0592366456985474,
      "learning_rate": 7.75615767484874e-06,
      "loss": 2.3466,
      "step": 44147
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0231420993804932,
      "learning_rate": 7.755756438968309e-06,
      "loss": 2.4458,
      "step": 44148
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0691256523132324,
      "learning_rate": 7.75535520689248e-06,
      "loss": 2.2733,
      "step": 44149
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.245505452156067,
      "learning_rate": 7.754953978621936e-06,
      "loss": 2.2442,
      "step": 44150
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1476564407348633,
      "learning_rate": 7.754552754157352e-06,
      "loss": 2.2464,
      "step": 44151
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1207817792892456,
      "learning_rate": 7.754151533499413e-06,
      "loss": 2.5418,
      "step": 44152
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.127968192100525,
      "learning_rate": 7.753750316648797e-06,
      "loss": 2.4673,
      "step": 44153
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.114070177078247,
      "learning_rate": 7.753349103606183e-06,
      "loss": 2.3469,
      "step": 44154
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0340149402618408,
      "learning_rate": 7.752947894372253e-06,
      "loss": 2.5183,
      "step": 44155
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0769116878509521,
      "learning_rate": 7.752546688947687e-06,
      "loss": 2.3312,
      "step": 44156
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.3686537742614746,
      "learning_rate": 7.752145487333165e-06,
      "loss": 2.2702,
      "step": 44157
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.2198699712753296,
      "learning_rate": 7.751744289529366e-06,
      "loss": 2.2035,
      "step": 44158
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0199590921401978,
      "learning_rate": 7.75134309553697e-06,
      "loss": 2.2473,
      "step": 44159
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1914749145507812,
      "learning_rate": 7.750941905356664e-06,
      "loss": 2.1021,
      "step": 44160
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0787937641143799,
      "learning_rate": 7.750540718989119e-06,
      "loss": 2.2587,
      "step": 44161
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1019734144210815,
      "learning_rate": 7.750139536435017e-06,
      "loss": 2.2754,
      "step": 44162
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1340924501419067,
      "learning_rate": 7.74973835769504e-06,
      "loss": 2.1458,
      "step": 44163
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1615407466888428,
      "learning_rate": 7.749337182769867e-06,
      "loss": 2.3712,
      "step": 44164
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.056479573249817,
      "learning_rate": 7.748936011660179e-06,
      "loss": 2.3442,
      "step": 44165
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.9778984189033508,
      "learning_rate": 7.748534844366657e-06,
      "loss": 2.0328,
      "step": 44166
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0082697868347168,
      "learning_rate": 7.748133680889977e-06,
      "loss": 2.1552,
      "step": 44167
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1241399049758911,
      "learning_rate": 7.747732521230826e-06,
      "loss": 2.3276,
      "step": 44168
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1488893032073975,
      "learning_rate": 7.74733136538988e-06,
      "loss": 2.4066,
      "step": 44169
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1294829845428467,
      "learning_rate": 7.746930213367817e-06,
      "loss": 2.4495,
      "step": 44170
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1218178272247314,
      "learning_rate": 7.74652906516532e-06,
      "loss": 2.3893,
      "step": 44171
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.011458158493042,
      "learning_rate": 7.746127920783067e-06,
      "loss": 2.5126,
      "step": 44172
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.2156157493591309,
      "learning_rate": 7.74572678022174e-06,
      "loss": 2.4476,
      "step": 44173
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1103425025939941,
      "learning_rate": 7.74532564348202e-06,
      "loss": 2.1216,
      "step": 44174
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0847268104553223,
      "learning_rate": 7.744924510564586e-06,
      "loss": 2.4579,
      "step": 44175
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.2392096519470215,
      "learning_rate": 7.744523381470115e-06,
      "loss": 2.2361,
      "step": 44176
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0960184335708618,
      "learning_rate": 7.74412225619929e-06,
      "loss": 2.413,
      "step": 44177
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1382083892822266,
      "learning_rate": 7.74372113475279e-06,
      "loss": 2.2295,
      "step": 44178
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1196645498275757,
      "learning_rate": 7.743320017131296e-06,
      "loss": 2.4523,
      "step": 44179
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0478923320770264,
      "learning_rate": 7.742918903335487e-06,
      "loss": 2.3601,
      "step": 44180
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1327080726623535,
      "learning_rate": 7.742517793366043e-06,
      "loss": 2.2704,
      "step": 44181
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.2003273963928223,
      "learning_rate": 7.742116687223645e-06,
      "loss": 2.1346,
      "step": 44182
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0511904954910278,
      "learning_rate": 7.741715584908973e-06,
      "loss": 2.3148,
      "step": 44183
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.054099202156067,
      "learning_rate": 7.741314486422707e-06,
      "loss": 2.2942,
      "step": 44184
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.9934234619140625,
      "learning_rate": 7.740913391765523e-06,
      "loss": 2.1269,
      "step": 44185
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1978795528411865,
      "learning_rate": 7.740512300938108e-06,
      "loss": 2.2488,
      "step": 44186
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0818959474563599,
      "learning_rate": 7.74011121394114e-06,
      "loss": 2.366,
      "step": 44187
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0319055318832397,
      "learning_rate": 7.739710130775296e-06,
      "loss": 2.4282,
      "step": 44188
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0277431011199951,
      "learning_rate": 7.739309051441255e-06,
      "loss": 2.1372,
      "step": 44189
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.2815431356430054,
      "learning_rate": 7.738907975939699e-06,
      "loss": 2.1511,
      "step": 44190
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1171422004699707,
      "learning_rate": 7.738506904271309e-06,
      "loss": 2.3833,
      "step": 44191
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0992450714111328,
      "learning_rate": 7.738105836436763e-06,
      "loss": 2.2686,
      "step": 44192
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0254613161087036,
      "learning_rate": 7.737704772436742e-06,
      "loss": 2.6075,
      "step": 44193
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.188754916191101,
      "learning_rate": 7.737303712271928e-06,
      "loss": 2.4436,
      "step": 44194
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.12264883518219,
      "learning_rate": 7.736902655942998e-06,
      "loss": 2.1436,
      "step": 44195
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1100990772247314,
      "learning_rate": 7.73650160345063e-06,
      "loss": 2.5347,
      "step": 44196
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1294806003570557,
      "learning_rate": 7.736100554795512e-06,
      "loss": 2.3019,
      "step": 44197
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.264816403388977,
      "learning_rate": 7.735699509978314e-06,
      "loss": 2.2244,
      "step": 44198
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0416512489318848,
      "learning_rate": 7.735298468999722e-06,
      "loss": 2.1168,
      "step": 44199
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0399945974349976,
      "learning_rate": 7.734897431860413e-06,
      "loss": 2.4163,
      "step": 44200
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1445205211639404,
      "learning_rate": 7.73449639856107e-06,
      "loss": 2.2443,
      "step": 44201
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0233032703399658,
      "learning_rate": 7.73409536910237e-06,
      "loss": 2.3933,
      "step": 44202
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.164766788482666,
      "learning_rate": 7.733694343484995e-06,
      "loss": 2.4504,
      "step": 44203
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.111413836479187,
      "learning_rate": 7.73329332170962e-06,
      "loss": 2.3532,
      "step": 44204
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.2533085346221924,
      "learning_rate": 7.732892303776931e-06,
      "loss": 2.3804,
      "step": 44205
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1203035116195679,
      "learning_rate": 7.732491289687605e-06,
      "loss": 2.4284,
      "step": 44206
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.169288992881775,
      "learning_rate": 7.73209027944232e-06,
      "loss": 2.2476,
      "step": 44207
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.050356388092041,
      "learning_rate": 7.73168927304176e-06,
      "loss": 2.4523,
      "step": 44208
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1355440616607666,
      "learning_rate": 7.731288270486603e-06,
      "loss": 2.4203,
      "step": 44209
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0109831094741821,
      "learning_rate": 7.730887271777528e-06,
      "loss": 2.1785,
      "step": 44210
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.08597731590271,
      "learning_rate": 7.730486276915215e-06,
      "loss": 2.355,
      "step": 44211
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.009162187576294,
      "learning_rate": 7.730085285900343e-06,
      "loss": 2.3828,
      "step": 44212
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.040293574333191,
      "learning_rate": 7.729684298733599e-06,
      "loss": 2.2692,
      "step": 44213
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0520416498184204,
      "learning_rate": 7.729283315415651e-06,
      "loss": 2.4312,
      "step": 44214
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.135819435119629,
      "learning_rate": 7.728882335947186e-06,
      "loss": 2.2106,
      "step": 44215
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0868866443634033,
      "learning_rate": 7.728481360328882e-06,
      "loss": 2.4448,
      "step": 44216
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0804564952850342,
      "learning_rate": 7.728080388561418e-06,
      "loss": 2.2644,
      "step": 44217
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0929278135299683,
      "learning_rate": 7.727679420645476e-06,
      "loss": 2.1865,
      "step": 44218
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1571365594863892,
      "learning_rate": 7.727278456581734e-06,
      "loss": 2.1835,
      "step": 44219
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1723353862762451,
      "learning_rate": 7.726877496370871e-06,
      "loss": 2.5496,
      "step": 44220
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.3275033235549927,
      "learning_rate": 7.726476540013569e-06,
      "loss": 2.1342,
      "step": 44221
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1254791021347046,
      "learning_rate": 7.72607558751051e-06,
      "loss": 2.3632,
      "step": 44222
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0987763404846191,
      "learning_rate": 7.725674638862366e-06,
      "loss": 2.4246,
      "step": 44223
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1057956218719482,
      "learning_rate": 7.725273694069824e-06,
      "loss": 2.2824,
      "step": 44224
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1106595993041992,
      "learning_rate": 7.72487275313356e-06,
      "loss": 2.5041,
      "step": 44225
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.9661865830421448,
      "learning_rate": 7.724471816054256e-06,
      "loss": 2.4247,
      "step": 44226
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0731499195098877,
      "learning_rate": 7.724070882832589e-06,
      "loss": 2.6598,
      "step": 44227
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.3110520839691162,
      "learning_rate": 7.72366995346924e-06,
      "loss": 2.3784,
      "step": 44228
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.293558120727539,
      "learning_rate": 7.723269027964887e-06,
      "loss": 2.4486,
      "step": 44229
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0761882066726685,
      "learning_rate": 7.722868106320212e-06,
      "loss": 2.2339,
      "step": 44230
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0200995206832886,
      "learning_rate": 7.722467188535898e-06,
      "loss": 2.2017,
      "step": 44231
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1272366046905518,
      "learning_rate": 7.722066274612617e-06,
      "loss": 2.3506,
      "step": 44232
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0101850032806396,
      "learning_rate": 7.721665364551053e-06,
      "loss": 2.1918,
      "step": 44233
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0879076719284058,
      "learning_rate": 7.721264458351884e-06,
      "loss": 2.228,
      "step": 44234
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0510035753250122,
      "learning_rate": 7.72086355601579e-06,
      "loss": 2.476,
      "step": 44235
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0589739084243774,
      "learning_rate": 7.720462657543453e-06,
      "loss": 2.2859,
      "step": 44236
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.2192884683609009,
      "learning_rate": 7.720061762935551e-06,
      "loss": 2.2606,
      "step": 44237
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.9911022782325745,
      "learning_rate": 7.719660872192762e-06,
      "loss": 2.3131,
      "step": 44238
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0941661596298218,
      "learning_rate": 7.719259985315772e-06,
      "loss": 2.2888,
      "step": 44239
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0822851657867432,
      "learning_rate": 7.718859102305252e-06,
      "loss": 2.3693,
      "step": 44240
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.008427381515503,
      "learning_rate": 7.718458223161886e-06,
      "loss": 2.1085,
      "step": 44241
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.298791527748108,
      "learning_rate": 7.718057347886352e-06,
      "loss": 2.4052,
      "step": 44242
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1091498136520386,
      "learning_rate": 7.71765647647933e-06,
      "loss": 2.4406,
      "step": 44243
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1548762321472168,
      "learning_rate": 7.717255608941501e-06,
      "loss": 2.4013,
      "step": 44244
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1548316478729248,
      "learning_rate": 7.716854745273544e-06,
      "loss": 2.2247,
      "step": 44245
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.3717597723007202,
      "learning_rate": 7.716453885476138e-06,
      "loss": 2.2036,
      "step": 44246
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0588384866714478,
      "learning_rate": 7.716053029549963e-06,
      "loss": 2.3429,
      "step": 44247
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.134903073310852,
      "learning_rate": 7.715652177495698e-06,
      "loss": 2.2983,
      "step": 44248
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.2129400968551636,
      "learning_rate": 7.715251329314023e-06,
      "loss": 2.2346,
      "step": 44249
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1226489543914795,
      "learning_rate": 7.714850485005619e-06,
      "loss": 2.2945,
      "step": 44250
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.2096575498580933,
      "learning_rate": 7.714449644571162e-06,
      "loss": 2.1864,
      "step": 44251
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0383310317993164,
      "learning_rate": 7.714048808011336e-06,
      "loss": 1.977,
      "step": 44252
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0485548973083496,
      "learning_rate": 7.71364797532682e-06,
      "loss": 2.2178,
      "step": 44253
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1449813842773438,
      "learning_rate": 7.713247146518287e-06,
      "loss": 2.2838,
      "step": 44254
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1255602836608887,
      "learning_rate": 7.712846321586422e-06,
      "loss": 2.1202,
      "step": 44255
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1444177627563477,
      "learning_rate": 7.712445500531905e-06,
      "loss": 2.4202,
      "step": 44256
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0304168462753296,
      "learning_rate": 7.712044683355411e-06,
      "loss": 2.4096,
      "step": 44257
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.191581130027771,
      "learning_rate": 7.711643870057625e-06,
      "loss": 2.4569,
      "step": 44258
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0616209506988525,
      "learning_rate": 7.711243060639227e-06,
      "loss": 2.1779,
      "step": 44259
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.138110637664795,
      "learning_rate": 7.71084225510089e-06,
      "loss": 2.3328,
      "step": 44260
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.2509287595748901,
      "learning_rate": 7.710441453443298e-06,
      "loss": 2.466,
      "step": 44261
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0356296300888062,
      "learning_rate": 7.710040655667129e-06,
      "loss": 2.2543,
      "step": 44262
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1561657190322876,
      "learning_rate": 7.709639861773064e-06,
      "loss": 2.2409,
      "step": 44263
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1033557653427124,
      "learning_rate": 7.70923907176178e-06,
      "loss": 2.3698,
      "step": 44264
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1279100179672241,
      "learning_rate": 7.708838285633959e-06,
      "loss": 2.304,
      "step": 44265
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.2986971139907837,
      "learning_rate": 7.708437503390283e-06,
      "loss": 2.1063,
      "step": 44266
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0182781219482422,
      "learning_rate": 7.708036725031424e-06,
      "loss": 2.2529,
      "step": 44267
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0261824131011963,
      "learning_rate": 7.707635950558066e-06,
      "loss": 2.3254,
      "step": 44268
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1486862897872925,
      "learning_rate": 7.707235179970886e-06,
      "loss": 2.5084,
      "step": 44269
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0818406343460083,
      "learning_rate": 7.706834413270567e-06,
      "loss": 2.3679,
      "step": 44270
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1051729917526245,
      "learning_rate": 7.706433650457784e-06,
      "loss": 2.4627,
      "step": 44271
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.069991111755371,
      "learning_rate": 7.70603289153322e-06,
      "loss": 2.4034,
      "step": 44272
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.301779866218567,
      "learning_rate": 7.705632136497554e-06,
      "loss": 2.2116,
      "step": 44273
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.436751127243042,
      "learning_rate": 7.705231385351463e-06,
      "loss": 2.1851,
      "step": 44274
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1269608736038208,
      "learning_rate": 7.70483063809563e-06,
      "loss": 2.3664,
      "step": 44275
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.103708267211914,
      "learning_rate": 7.704429894730732e-06,
      "loss": 2.2684,
      "step": 44276
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.337659478187561,
      "learning_rate": 7.704029155257448e-06,
      "loss": 2.5189,
      "step": 44277
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1449064016342163,
      "learning_rate": 7.703628419676458e-06,
      "loss": 2.6165,
      "step": 44278
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0826292037963867,
      "learning_rate": 7.703227687988449e-06,
      "loss": 2.5192,
      "step": 44279
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0169548988342285,
      "learning_rate": 7.702826960194085e-06,
      "loss": 2.343,
      "step": 44280
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0537186861038208,
      "learning_rate": 7.702426236294054e-06,
      "loss": 2.3864,
      "step": 44281
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.263933777809143,
      "learning_rate": 7.702025516289035e-06,
      "loss": 2.2766,
      "step": 44282
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0780608654022217,
      "learning_rate": 7.701624800179706e-06,
      "loss": 2.1587,
      "step": 44283
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.02335786819458,
      "learning_rate": 7.701224087966747e-06,
      "loss": 2.2307,
      "step": 44284
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0164995193481445,
      "learning_rate": 7.700823379650838e-06,
      "loss": 2.2804,
      "step": 44285
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.082546591758728,
      "learning_rate": 7.70042267523266e-06,
      "loss": 2.4834,
      "step": 44286
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.175337791442871,
      "learning_rate": 7.70002197471289e-06,
      "loss": 2.1629,
      "step": 44287
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1040555238723755,
      "learning_rate": 7.699621278092205e-06,
      "loss": 2.2802,
      "step": 44288
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1330595016479492,
      "learning_rate": 7.699220585371287e-06,
      "loss": 2.2822,
      "step": 44289
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.135849952697754,
      "learning_rate": 7.698819896550817e-06,
      "loss": 2.3389,
      "step": 44290
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4871464967727661,
      "learning_rate": 7.69841921163147e-06,
      "loss": 2.3006,
      "step": 44291
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.083603024482727,
      "learning_rate": 7.698018530613931e-06,
      "loss": 2.455,
      "step": 44292
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0671435594558716,
      "learning_rate": 7.697617853498874e-06,
      "loss": 2.415,
      "step": 44293
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0628913640975952,
      "learning_rate": 7.69721718028698e-06,
      "loss": 2.3974,
      "step": 44294
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.2399303913116455,
      "learning_rate": 7.696816510978927e-06,
      "loss": 2.3384,
      "step": 44295
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.9455381631851196,
      "learning_rate": 7.696415845575396e-06,
      "loss": 2.1281,
      "step": 44296
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0943316221237183,
      "learning_rate": 7.696015184077066e-06,
      "loss": 2.2966,
      "step": 44297
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0999767780303955,
      "learning_rate": 7.695614526484616e-06,
      "loss": 2.375,
      "step": 44298
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.036531925201416,
      "learning_rate": 7.695213872798723e-06,
      "loss": 2.1501,
      "step": 44299
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.100049376487732,
      "learning_rate": 7.694813223020072e-06,
      "loss": 2.5643,
      "step": 44300
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0069262981414795,
      "learning_rate": 7.694412577149338e-06,
      "loss": 2.2705,
      "step": 44301
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.12860906124115,
      "learning_rate": 7.694011935187198e-06,
      "loss": 2.2241,
      "step": 44302
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.215149998664856,
      "learning_rate": 7.693611297134337e-06,
      "loss": 2.2789,
      "step": 44303
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.088982343673706,
      "learning_rate": 7.69321066299143e-06,
      "loss": 2.3522,
      "step": 44304
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.130731225013733,
      "learning_rate": 7.692810032759161e-06,
      "loss": 2.2385,
      "step": 44305
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.9652771949768066,
      "learning_rate": 7.692409406438203e-06,
      "loss": 2.2896,
      "step": 44306
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1194554567337036,
      "learning_rate": 7.692008784029236e-06,
      "loss": 2.1216,
      "step": 44307
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1973395347595215,
      "learning_rate": 7.691608165532943e-06,
      "loss": 2.3406,
      "step": 44308
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1118239164352417,
      "learning_rate": 7.69120755095e-06,
      "loss": 2.489,
      "step": 44309
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.121583342552185,
      "learning_rate": 7.690806940281087e-06,
      "loss": 2.3843,
      "step": 44310
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1328028440475464,
      "learning_rate": 7.690406333526885e-06,
      "loss": 2.1573,
      "step": 44311
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1514427661895752,
      "learning_rate": 7.69000573068807e-06,
      "loss": 2.5103,
      "step": 44312
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.9762585163116455,
      "learning_rate": 7.689605131765323e-06,
      "loss": 2.1196,
      "step": 44313
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.064865231513977,
      "learning_rate": 7.689204536759322e-06,
      "loss": 2.3062,
      "step": 44314
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.149581789970398,
      "learning_rate": 7.68880394567075e-06,
      "loss": 2.3816,
      "step": 44315
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.2333972454071045,
      "learning_rate": 7.688403358500282e-06,
      "loss": 2.3234,
      "step": 44316
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.161408543586731,
      "learning_rate": 7.688002775248596e-06,
      "loss": 2.328,
      "step": 44317
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0551213026046753,
      "learning_rate": 7.687602195916376e-06,
      "loss": 2.4644,
      "step": 44318
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.9863893985748291,
      "learning_rate": 7.687201620504298e-06,
      "loss": 2.3472,
      "step": 44319
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0213652849197388,
      "learning_rate": 7.68680104901304e-06,
      "loss": 2.3059,
      "step": 44320
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.9814780950546265,
      "learning_rate": 7.686400481443283e-06,
      "loss": 2.2848,
      "step": 44321
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0468124151229858,
      "learning_rate": 7.685999917795705e-06,
      "loss": 2.3816,
      "step": 44322
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.998195469379425,
      "learning_rate": 7.685599358070987e-06,
      "loss": 2.4019,
      "step": 44323
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.107427954673767,
      "learning_rate": 7.685198802269804e-06,
      "loss": 2.3256,
      "step": 44324
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0563511848449707,
      "learning_rate": 7.68479825039284e-06,
      "loss": 2.3678,
      "step": 44325
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0768760442733765,
      "learning_rate": 7.68439770244077e-06,
      "loss": 2.325,
      "step": 44326
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0813534259796143,
      "learning_rate": 7.683997158414275e-06,
      "loss": 2.4563,
      "step": 44327
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1339389085769653,
      "learning_rate": 7.683596618314034e-06,
      "loss": 2.4311,
      "step": 44328
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1906073093414307,
      "learning_rate": 7.683196082140727e-06,
      "loss": 2.1903,
      "step": 44329
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0917991399765015,
      "learning_rate": 7.68279554989503e-06,
      "loss": 2.4089,
      "step": 44330
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.2126723527908325,
      "learning_rate": 7.682395021577625e-06,
      "loss": 2.345,
      "step": 44331
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.2910301685333252,
      "learning_rate": 7.681994497189194e-06,
      "loss": 2.3426,
      "step": 44332
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1294505596160889,
      "learning_rate": 7.681593976730409e-06,
      "loss": 2.2856,
      "step": 44333
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0594278573989868,
      "learning_rate": 7.68119346020195e-06,
      "loss": 2.4312,
      "step": 44334
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1733219623565674,
      "learning_rate": 7.680792947604499e-06,
      "loss": 2.418,
      "step": 44335
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1516655683517456,
      "learning_rate": 7.680392438938733e-06,
      "loss": 2.4614,
      "step": 44336
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.064424991607666,
      "learning_rate": 7.679991934205333e-06,
      "loss": 2.2427,
      "step": 44337
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0359039306640625,
      "learning_rate": 7.679591433404976e-06,
      "loss": 2.269,
      "step": 44338
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0578296184539795,
      "learning_rate": 7.67919093653834e-06,
      "loss": 2.2933,
      "step": 44339
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0436596870422363,
      "learning_rate": 7.678790443606107e-06,
      "loss": 2.2411,
      "step": 44340
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.5419328212738037,
      "learning_rate": 7.678389954608956e-06,
      "loss": 2.3183,
      "step": 44341
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.155123233795166,
      "learning_rate": 7.677989469547566e-06,
      "loss": 2.3438,
      "step": 44342
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.090183138847351,
      "learning_rate": 7.67758898842261e-06,
      "loss": 2.3673,
      "step": 44343
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0982019901275635,
      "learning_rate": 7.677188511234774e-06,
      "loss": 2.2049,
      "step": 44344
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1502654552459717,
      "learning_rate": 7.676788037984736e-06,
      "loss": 2.0225,
      "step": 44345
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.9989232420921326,
      "learning_rate": 7.676387568673171e-06,
      "loss": 2.3068,
      "step": 44346
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0528682470321655,
      "learning_rate": 7.67598710330076e-06,
      "loss": 2.2,
      "step": 44347
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0672283172607422,
      "learning_rate": 7.675586641868182e-06,
      "loss": 2.3295,
      "step": 44348
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.192260503768921,
      "learning_rate": 7.675186184376119e-06,
      "loss": 2.2918,
      "step": 44349
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0604721307754517,
      "learning_rate": 7.674785730825243e-06,
      "loss": 2.4871,
      "step": 44350
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1121736764907837,
      "learning_rate": 7.674385281216239e-06,
      "loss": 2.2406,
      "step": 44351
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1602016687393188,
      "learning_rate": 7.673984835549782e-06,
      "loss": 2.3581,
      "step": 44352
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.246349573135376,
      "learning_rate": 7.673584393826553e-06,
      "loss": 2.3104,
      "step": 44353
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.046332836151123,
      "learning_rate": 7.67318395604723e-06,
      "loss": 2.3743,
      "step": 44354
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.3058563470840454,
      "learning_rate": 7.672783522212492e-06,
      "loss": 2.2295,
      "step": 44355
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0162321329116821,
      "learning_rate": 7.672383092323018e-06,
      "loss": 2.2315,
      "step": 44356
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0078707933425903,
      "learning_rate": 7.671982666379488e-06,
      "loss": 2.2395,
      "step": 44357
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0508277416229248,
      "learning_rate": 7.671582244382584e-06,
      "loss": 2.6123,
      "step": 44358
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1398950815200806,
      "learning_rate": 7.671181826332973e-06,
      "loss": 2.472,
      "step": 44359
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1131007671356201,
      "learning_rate": 7.670781412231344e-06,
      "loss": 2.3092,
      "step": 44360
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0614129304885864,
      "learning_rate": 7.670381002078371e-06,
      "loss": 2.3337,
      "step": 44361
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0830565690994263,
      "learning_rate": 7.669980595874737e-06,
      "loss": 2.3312,
      "step": 44362
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0614036321640015,
      "learning_rate": 7.669580193621119e-06,
      "loss": 2.6758,
      "step": 44363
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0891103744506836,
      "learning_rate": 7.669179795318195e-06,
      "loss": 2.4975,
      "step": 44364
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1007364988327026,
      "learning_rate": 7.668779400966643e-06,
      "loss": 2.4719,
      "step": 44365
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0540835857391357,
      "learning_rate": 7.668379010567144e-06,
      "loss": 2.5179,
      "step": 44366
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.9704880714416504,
      "learning_rate": 7.667978624120376e-06,
      "loss": 2.2426,
      "step": 44367
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0515711307525635,
      "learning_rate": 7.667578241627018e-06,
      "loss": 2.1719,
      "step": 44368
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.9488933086395264,
      "learning_rate": 7.667177863087747e-06,
      "loss": 2.2716,
      "step": 44369
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.119664192199707,
      "learning_rate": 7.666777488503247e-06,
      "loss": 2.2738,
      "step": 44370
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0472562313079834,
      "learning_rate": 7.666377117874191e-06,
      "loss": 2.3014,
      "step": 44371
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1463029384613037,
      "learning_rate": 7.66597675120126e-06,
      "loss": 2.2105,
      "step": 44372
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0840153694152832,
      "learning_rate": 7.66557638848513e-06,
      "loss": 2.2713,
      "step": 44373
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.464131236076355,
      "learning_rate": 7.665176029726482e-06,
      "loss": 2.3653,
      "step": 44374
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1000690460205078,
      "learning_rate": 7.664775674925996e-06,
      "loss": 2.2248,
      "step": 44375
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1235765218734741,
      "learning_rate": 7.664375324084349e-06,
      "loss": 2.2497,
      "step": 44376
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.16573166847229,
      "learning_rate": 7.663974977202221e-06,
      "loss": 2.3792,
      "step": 44377
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1544193029403687,
      "learning_rate": 7.663574634280292e-06,
      "loss": 2.4417,
      "step": 44378
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1256376504898071,
      "learning_rate": 7.663174295319233e-06,
      "loss": 2.21,
      "step": 44379
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0128307342529297,
      "learning_rate": 7.662773960319731e-06,
      "loss": 2.4886,
      "step": 44380
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.044778823852539,
      "learning_rate": 7.662373629282463e-06,
      "loss": 2.2455,
      "step": 44381
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0873759984970093,
      "learning_rate": 7.661973302208105e-06,
      "loss": 2.2886,
      "step": 44382
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1294599771499634,
      "learning_rate": 7.661572979097337e-06,
      "loss": 2.2073,
      "step": 44383
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0556308031082153,
      "learning_rate": 7.661172659950842e-06,
      "loss": 2.3905,
      "step": 44384
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.193549394607544,
      "learning_rate": 7.660772344769292e-06,
      "loss": 2.4634,
      "step": 44385
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.114227294921875,
      "learning_rate": 7.660372033553368e-06,
      "loss": 2.4483,
      "step": 44386
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0609890222549438,
      "learning_rate": 7.659971726303747e-06,
      "loss": 2.3884,
      "step": 44387
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0658469200134277,
      "learning_rate": 7.659571423021111e-06,
      "loss": 2.5806,
      "step": 44388
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1012314558029175,
      "learning_rate": 7.659171123706136e-06,
      "loss": 2.4731,
      "step": 44389
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1837385892868042,
      "learning_rate": 7.658770828359503e-06,
      "loss": 2.2279,
      "step": 44390
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.7345569133758545,
      "learning_rate": 7.658370536981889e-06,
      "loss": 2.366,
      "step": 44391
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.995786190032959,
      "learning_rate": 7.657970249573972e-06,
      "loss": 2.3079,
      "step": 44392
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.2022675275802612,
      "learning_rate": 7.657569966136431e-06,
      "loss": 2.3654,
      "step": 44393
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0860463380813599,
      "learning_rate": 7.657169686669948e-06,
      "loss": 2.4998,
      "step": 44394
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0791635513305664,
      "learning_rate": 7.656769411175196e-06,
      "loss": 2.184,
      "step": 44395
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.059551477432251,
      "learning_rate": 7.656369139652857e-06,
      "loss": 2.0538,
      "step": 44396
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1178756952285767,
      "learning_rate": 7.65596887210361e-06,
      "loss": 2.21,
      "step": 44397
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1728731393814087,
      "learning_rate": 7.655568608528135e-06,
      "loss": 2.3474,
      "step": 44398
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.340835690498352,
      "learning_rate": 7.655168348927105e-06,
      "loss": 2.2991,
      "step": 44399
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.9913159012794495,
      "learning_rate": 7.6547680933012e-06,
      "loss": 2.3544,
      "step": 44400
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0455073118209839,
      "learning_rate": 7.654367841651102e-06,
      "loss": 2.5782,
      "step": 44401
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1479369401931763,
      "learning_rate": 7.653967593977485e-06,
      "loss": 2.3422,
      "step": 44402
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.9410845637321472,
      "learning_rate": 7.653567350281033e-06,
      "loss": 2.3491,
      "step": 44403
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1377750635147095,
      "learning_rate": 7.653167110562419e-06,
      "loss": 2.2738,
      "step": 44404
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.034798264503479,
      "learning_rate": 7.652766874822329e-06,
      "loss": 2.6438,
      "step": 44405
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.2356570959091187,
      "learning_rate": 7.652366643061432e-06,
      "loss": 2.252,
      "step": 44406
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.084693193435669,
      "learning_rate": 7.651966415280412e-06,
      "loss": 2.2885,
      "step": 44407
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0953092575073242,
      "learning_rate": 7.651566191479946e-06,
      "loss": 2.2738,
      "step": 44408
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.9889910817146301,
      "learning_rate": 7.651165971660715e-06,
      "loss": 2.476,
      "step": 44409
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1829506158828735,
      "learning_rate": 7.650765755823393e-06,
      "loss": 2.3646,
      "step": 44410
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1867259740829468,
      "learning_rate": 7.650365543968668e-06,
      "loss": 2.3377,
      "step": 44411
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.16392982006073,
      "learning_rate": 7.649965336097206e-06,
      "loss": 2.3931,
      "step": 44412
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1038284301757812,
      "learning_rate": 7.649565132209691e-06,
      "loss": 2.3234,
      "step": 44413
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.08551824092865,
      "learning_rate": 7.649164932306801e-06,
      "loss": 2.3952,
      "step": 44414
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1950759887695312,
      "learning_rate": 7.648764736389216e-06,
      "loss": 2.1829,
      "step": 44415
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1435136795043945,
      "learning_rate": 7.648364544457613e-06,
      "loss": 2.417,
      "step": 44416
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1209793090820312,
      "learning_rate": 7.647964356512671e-06,
      "loss": 2.3426,
      "step": 44417
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1543525457382202,
      "learning_rate": 7.647564172555068e-06,
      "loss": 2.2455,
      "step": 44418
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0192110538482666,
      "learning_rate": 7.647163992585482e-06,
      "loss": 2.344,
      "step": 44419
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1955783367156982,
      "learning_rate": 7.646763816604593e-06,
      "loss": 2.4569,
      "step": 44420
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.097846269607544,
      "learning_rate": 7.646363644613077e-06,
      "loss": 2.2905,
      "step": 44421
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0756733417510986,
      "learning_rate": 7.645963476611616e-06,
      "loss": 2.263,
      "step": 44422
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1790663003921509,
      "learning_rate": 7.645563312600884e-06,
      "loss": 2.1838,
      "step": 44423
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1553783416748047,
      "learning_rate": 7.645163152581567e-06,
      "loss": 2.2815,
      "step": 44424
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0961793661117554,
      "learning_rate": 7.644762996554335e-06,
      "loss": 2.4337,
      "step": 44425
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.114725112915039,
      "learning_rate": 7.644362844519869e-06,
      "loss": 2.296,
      "step": 44426
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0312423706054688,
      "learning_rate": 7.643962696478846e-06,
      "loss": 2.354,
      "step": 44427
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.2287770509719849,
      "learning_rate": 7.643562552431947e-06,
      "loss": 2.4302,
      "step": 44428
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.9732706546783447,
      "learning_rate": 7.64316241237985e-06,
      "loss": 2.2817,
      "step": 44429
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.198970913887024,
      "learning_rate": 7.642762276323232e-06,
      "loss": 2.3057,
      "step": 44430
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0748172998428345,
      "learning_rate": 7.642362144262771e-06,
      "loss": 2.3195,
      "step": 44431
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0649089813232422,
      "learning_rate": 7.64196201619915e-06,
      "loss": 2.3598,
      "step": 44432
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.097163200378418,
      "learning_rate": 7.641561892133042e-06,
      "loss": 2.4853,
      "step": 44433
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1324235200881958,
      "learning_rate": 7.641161772065128e-06,
      "loss": 2.1311,
      "step": 44434
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.2737356424331665,
      "learning_rate": 7.640761655996085e-06,
      "loss": 2.3615,
      "step": 44435
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.118557333946228,
      "learning_rate": 7.64036154392659e-06,
      "loss": 2.3579,
      "step": 44436
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1012049913406372,
      "learning_rate": 7.639961435857327e-06,
      "loss": 2.3656,
      "step": 44437
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0267565250396729,
      "learning_rate": 7.639561331788967e-06,
      "loss": 2.1709,
      "step": 44438
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.031803011894226,
      "learning_rate": 7.639161231722195e-06,
      "loss": 2.2716,
      "step": 44439
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0944329500198364,
      "learning_rate": 7.638761135657684e-06,
      "loss": 2.3057,
      "step": 44440
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1206005811691284,
      "learning_rate": 7.638361043596113e-06,
      "loss": 2.1602,
      "step": 44441
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.2309931516647339,
      "learning_rate": 7.637960955538162e-06,
      "loss": 2.1542,
      "step": 44442
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0263887643814087,
      "learning_rate": 7.637560871484508e-06,
      "loss": 2.5022,
      "step": 44443
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1436357498168945,
      "learning_rate": 7.63716079143583e-06,
      "loss": 2.6038,
      "step": 44444
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.9854044914245605,
      "learning_rate": 7.636760715392807e-06,
      "loss": 2.3369,
      "step": 44445
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0634268522262573,
      "learning_rate": 7.636360643356115e-06,
      "loss": 2.2976,
      "step": 44446
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.103590488433838,
      "learning_rate": 7.635960575326435e-06,
      "loss": 2.3592,
      "step": 44447
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0705342292785645,
      "learning_rate": 7.635560511304445e-06,
      "loss": 2.358,
      "step": 44448
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0513291358947754,
      "learning_rate": 7.63516045129082e-06,
      "loss": 2.4684,
      "step": 44449
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1117830276489258,
      "learning_rate": 7.634760395286245e-06,
      "loss": 2.3825,
      "step": 44450
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.9490796327590942,
      "learning_rate": 7.63436034329139e-06,
      "loss": 2.1319,
      "step": 44451
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.177581787109375,
      "learning_rate": 7.633960295306937e-06,
      "loss": 2.5291,
      "step": 44452
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0250701904296875,
      "learning_rate": 7.633560251333562e-06,
      "loss": 2.4634,
      "step": 44453
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1059402227401733,
      "learning_rate": 7.633160211371946e-06,
      "loss": 2.2033,
      "step": 44454
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1181507110595703,
      "learning_rate": 7.632760175422766e-06,
      "loss": 2.1954,
      "step": 44455
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.062251091003418,
      "learning_rate": 7.6323601434867e-06,
      "loss": 2.4958,
      "step": 44456
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0388762950897217,
      "learning_rate": 7.63196011556443e-06,
      "loss": 2.2245,
      "step": 44457
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1071826219558716,
      "learning_rate": 7.631560091656628e-06,
      "loss": 2.3319,
      "step": 44458
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1310157775878906,
      "learning_rate": 7.631160071763975e-06,
      "loss": 2.4002,
      "step": 44459
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.2189313173294067,
      "learning_rate": 7.630760055887151e-06,
      "loss": 2.4456,
      "step": 44460
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.185859203338623,
      "learning_rate": 7.63036004402683e-06,
      "loss": 2.3092,
      "step": 44461
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.054606556892395,
      "learning_rate": 7.629960036183693e-06,
      "loss": 2.3113,
      "step": 44462
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0822217464447021,
      "learning_rate": 7.629560032358417e-06,
      "loss": 2.3393,
      "step": 44463
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1013199090957642,
      "learning_rate": 7.629160032551683e-06,
      "loss": 2.3506,
      "step": 44464
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0683038234710693,
      "learning_rate": 7.628760036764164e-06,
      "loss": 2.5375,
      "step": 44465
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1345676183700562,
      "learning_rate": 7.628360044996542e-06,
      "loss": 2.2844,
      "step": 44466
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.116280436515808,
      "learning_rate": 7.627960057249495e-06,
      "loss": 2.048,
      "step": 44467
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.15753972530365,
      "learning_rate": 7.6275600735236975e-06,
      "loss": 2.1702,
      "step": 44468
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.3247584104537964,
      "learning_rate": 7.627160093819831e-06,
      "loss": 2.4802,
      "step": 44469
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.2029180526733398,
      "learning_rate": 7.626760118138572e-06,
      "loss": 2.4577,
      "step": 44470
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1324225664138794,
      "learning_rate": 7.626360146480599e-06,
      "loss": 2.4711,
      "step": 44471
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1904088258743286,
      "learning_rate": 7.625960178846591e-06,
      "loss": 2.239,
      "step": 44472
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1160058975219727,
      "learning_rate": 7.625560215237224e-06,
      "loss": 2.4474,
      "step": 44473
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0651811361312866,
      "learning_rate": 7.625160255653179e-06,
      "loss": 2.58,
      "step": 44474
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.156040072441101,
      "learning_rate": 7.624760300095131e-06,
      "loss": 2.5061,
      "step": 44475
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.7842971086502075,
      "learning_rate": 7.62436034856376e-06,
      "loss": 2.2768,
      "step": 44476
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1470993757247925,
      "learning_rate": 7.623960401059747e-06,
      "loss": 2.2746,
      "step": 44477
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.9853155612945557,
      "learning_rate": 7.623560457583763e-06,
      "loss": 2.1965,
      "step": 44478
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1221740245819092,
      "learning_rate": 7.623160518136489e-06,
      "loss": 2.3772,
      "step": 44479
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0251340866088867,
      "learning_rate": 7.622760582718604e-06,
      "loss": 2.3488,
      "step": 44480
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1284767389297485,
      "learning_rate": 7.622360651330785e-06,
      "loss": 2.2527,
      "step": 44481
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0833446979522705,
      "learning_rate": 7.62196072397371e-06,
      "loss": 2.2858,
      "step": 44482
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.9870660305023193,
      "learning_rate": 7.621560800648058e-06,
      "loss": 2.1743,
      "step": 44483
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.25436270236969,
      "learning_rate": 7.621160881354506e-06,
      "loss": 2.2004,
      "step": 44484
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1351450681686401,
      "learning_rate": 7.6207609660937335e-06,
      "loss": 2.2705,
      "step": 44485
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.020586609840393,
      "learning_rate": 7.620361054866417e-06,
      "loss": 2.2443,
      "step": 44486
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1871894598007202,
      "learning_rate": 7.619961147673234e-06,
      "loss": 2.4373,
      "step": 44487
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4146945476531982,
      "learning_rate": 7.619561244514866e-06,
      "loss": 2.6489,
      "step": 44488
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.004416584968567,
      "learning_rate": 7.619161345391985e-06,
      "loss": 2.5066,
      "step": 44489
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.9874699115753174,
      "learning_rate": 7.618761450305275e-06,
      "loss": 2.304,
      "step": 44490
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.152035117149353,
      "learning_rate": 7.61836155925541e-06,
      "loss": 2.3933,
      "step": 44491
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1309120655059814,
      "learning_rate": 7.6179616722430684e-06,
      "loss": 2.2154,
      "step": 44492
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0837849378585815,
      "learning_rate": 7.617561789268928e-06,
      "loss": 2.3603,
      "step": 44493
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0993276834487915,
      "learning_rate": 7.617161910333668e-06,
      "loss": 2.4098,
      "step": 44494
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0637035369873047,
      "learning_rate": 7.616762035437968e-06,
      "loss": 2.3717,
      "step": 44495
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0777190923690796,
      "learning_rate": 7.6163621645825e-06,
      "loss": 1.9969,
      "step": 44496
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.9831404089927673,
      "learning_rate": 7.615962297767947e-06,
      "loss": 2.1113,
      "step": 44497
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.078105092048645,
      "learning_rate": 7.615562434994985e-06,
      "loss": 2.4107,
      "step": 44498
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0825204849243164,
      "learning_rate": 7.615162576264292e-06,
      "loss": 2.2542,
      "step": 44499
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0618196725845337,
      "learning_rate": 7.6147627215765475e-06,
      "loss": 2.1868,
      "step": 44500
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1501438617706299,
      "learning_rate": 7.614362870932427e-06,
      "loss": 2.2151,
      "step": 44501
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0284620523452759,
      "learning_rate": 7.613963024332609e-06,
      "loss": 2.3123,
      "step": 44502
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.9716081619262695,
      "learning_rate": 7.6135631817777766e-06,
      "loss": 2.5627,
      "step": 44503
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.2397944927215576,
      "learning_rate": 7.613163343268599e-06,
      "loss": 2.4565,
      "step": 44504
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.9481030702590942,
      "learning_rate": 7.612763508805756e-06,
      "loss": 2.2844,
      "step": 44505
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1529862880706787,
      "learning_rate": 7.612363678389929e-06,
      "loss": 2.44,
      "step": 44506
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.9860529899597168,
      "learning_rate": 7.611963852021793e-06,
      "loss": 2.4864,
      "step": 44507
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1822489500045776,
      "learning_rate": 7.611564029702028e-06,
      "loss": 2.5322,
      "step": 44508
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.157852292060852,
      "learning_rate": 7.61116421143131e-06,
      "loss": 2.5867,
      "step": 44509
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1091482639312744,
      "learning_rate": 7.610764397210317e-06,
      "loss": 2.3291,
      "step": 44510
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.18265962600708,
      "learning_rate": 7.610364587039727e-06,
      "loss": 2.4767,
      "step": 44511
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.135481595993042,
      "learning_rate": 7.60996478092022e-06,
      "loss": 2.3619,
      "step": 44512
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1001524925231934,
      "learning_rate": 7.60956497885247e-06,
      "loss": 2.34,
      "step": 44513
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.9688312411308289,
      "learning_rate": 7.609165180837157e-06,
      "loss": 2.0895,
      "step": 44514
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0286598205566406,
      "learning_rate": 7.608765386874957e-06,
      "loss": 2.1639,
      "step": 44515
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1849638223648071,
      "learning_rate": 7.608365596966556e-06,
      "loss": 2.5973,
      "step": 44516
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1082861423492432,
      "learning_rate": 7.6079658111126185e-06,
      "loss": 2.5091,
      "step": 44517
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0270891189575195,
      "learning_rate": 7.60756602931383e-06,
      "loss": 2.2913,
      "step": 44518
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.100930094718933,
      "learning_rate": 7.607166251570867e-06,
      "loss": 2.3814,
      "step": 44519
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.2866387367248535,
      "learning_rate": 7.606766477884407e-06,
      "loss": 2.1035,
      "step": 44520
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0438166856765747,
      "learning_rate": 7.606366708255126e-06,
      "loss": 2.4266,
      "step": 44521
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.2912346124649048,
      "learning_rate": 7.605966942683705e-06,
      "loss": 2.1744,
      "step": 44522
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0875526666641235,
      "learning_rate": 7.605567181170822e-06,
      "loss": 2.5886,
      "step": 44523
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.155458688735962,
      "learning_rate": 7.605167423717151e-06,
      "loss": 2.4904,
      "step": 44524
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.9499102830886841,
      "learning_rate": 7.604767670323372e-06,
      "loss": 2.4472,
      "step": 44525
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1488710641860962,
      "learning_rate": 7.604367920990161e-06,
      "loss": 2.4936,
      "step": 44526
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.056625247001648,
      "learning_rate": 7.603968175718198e-06,
      "loss": 2.1641,
      "step": 44527
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1944148540496826,
      "learning_rate": 7.6035684345081605e-06,
      "loss": 2.5067,
      "step": 44528
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0753157138824463,
      "learning_rate": 7.603168697360729e-06,
      "loss": 2.2252,
      "step": 44529
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.242161512374878,
      "learning_rate": 7.602768964276572e-06,
      "loss": 2.2971,
      "step": 44530
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1093443632125854,
      "learning_rate": 7.602369235256375e-06,
      "loss": 2.4152,
      "step": 44531
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.077754020690918,
      "learning_rate": 7.601969510300812e-06,
      "loss": 2.3153,
      "step": 44532
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0955677032470703,
      "learning_rate": 7.601569789410561e-06,
      "loss": 2.4646,
      "step": 44533
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0510191917419434,
      "learning_rate": 7.601170072586303e-06,
      "loss": 2.205,
      "step": 44534
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1847748756408691,
      "learning_rate": 7.600770359828711e-06,
      "loss": 2.227,
      "step": 44535
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1075726747512817,
      "learning_rate": 7.600370651138466e-06,
      "loss": 2.5503,
      "step": 44536
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1800365447998047,
      "learning_rate": 7.599970946516244e-06,
      "loss": 2.4669,
      "step": 44537
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.273294448852539,
      "learning_rate": 7.599571245962724e-06,
      "loss": 2.4417,
      "step": 44538
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0088669061660767,
      "learning_rate": 7.599171549478581e-06,
      "loss": 2.3834,
      "step": 44539
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.110111117362976,
      "learning_rate": 7.5987718570644955e-06,
      "loss": 2.3856,
      "step": 44540
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1896395683288574,
      "learning_rate": 7.598372168721144e-06,
      "loss": 2.2142,
      "step": 44541
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1118743419647217,
      "learning_rate": 7.597972484449203e-06,
      "loss": 2.2246,
      "step": 44542
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.2959612607955933,
      "learning_rate": 7.597572804249357e-06,
      "loss": 2.2593,
      "step": 44543
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0106213092803955,
      "learning_rate": 7.5971731281222715e-06,
      "loss": 2.1973,
      "step": 44544
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0866341590881348,
      "learning_rate": 7.596773456068631e-06,
      "loss": 2.2071,
      "step": 44545
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.143357753753662,
      "learning_rate": 7.596373788089112e-06,
      "loss": 2.6133,
      "step": 44546
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0207467079162598,
      "learning_rate": 7.5959741241843935e-06,
      "loss": 2.3861,
      "step": 44547
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0126129388809204,
      "learning_rate": 7.59557446435515e-06,
      "loss": 2.3563,
      "step": 44548
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0079702138900757,
      "learning_rate": 7.5951748086020615e-06,
      "loss": 2.3477,
      "step": 44549
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.2404451370239258,
      "learning_rate": 7.594775156925804e-06,
      "loss": 2.3919,
      "step": 44550
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1583858728408813,
      "learning_rate": 7.5943755093270586e-06,
      "loss": 2.2521,
      "step": 44551
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0459554195404053,
      "learning_rate": 7.593975865806499e-06,
      "loss": 2.3607,
      "step": 44552
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1563730239868164,
      "learning_rate": 7.593576226364803e-06,
      "loss": 2.1043,
      "step": 44553
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0670825242996216,
      "learning_rate": 7.5931765910026486e-06,
      "loss": 2.4304,
      "step": 44554
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0615979433059692,
      "learning_rate": 7.5927769597207136e-06,
      "loss": 2.3304,
      "step": 44555
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1717534065246582,
      "learning_rate": 7.592377332519678e-06,
      "loss": 2.4377,
      "step": 44556
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.05833101272583,
      "learning_rate": 7.591977709400215e-06,
      "loss": 2.5994,
      "step": 44557
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.118330478668213,
      "learning_rate": 7.591578090363005e-06,
      "loss": 2.2736,
      "step": 44558
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0563281774520874,
      "learning_rate": 7.5911784754087216e-06,
      "loss": 2.2118,
      "step": 44559
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0919158458709717,
      "learning_rate": 7.590778864538045e-06,
      "loss": 2.4344,
      "step": 44560
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0362727642059326,
      "learning_rate": 7.5903792577516535e-06,
      "loss": 2.3024,
      "step": 44561
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0185675621032715,
      "learning_rate": 7.589979655050224e-06,
      "loss": 2.4418,
      "step": 44562
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.139779806137085,
      "learning_rate": 7.589580056434433e-06,
      "loss": 2.3727,
      "step": 44563
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.077284574508667,
      "learning_rate": 7.589180461904957e-06,
      "loss": 2.2109,
      "step": 44564
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0429571866989136,
      "learning_rate": 7.5887808714624764e-06,
      "loss": 2.533,
      "step": 44565
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.049101710319519,
      "learning_rate": 7.588381285107666e-06,
      "loss": 2.4299,
      "step": 44566
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.9863414168357849,
      "learning_rate": 7.587981702841206e-06,
      "loss": 2.4638,
      "step": 44567
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0070122480392456,
      "learning_rate": 7.587582124663771e-06,
      "loss": 2.1987,
      "step": 44568
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0719881057739258,
      "learning_rate": 7.587182550576045e-06,
      "loss": 2.2907,
      "step": 44569
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.067472219467163,
      "learning_rate": 7.586782980578694e-06,
      "loss": 2.365,
      "step": 44570
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.2092704772949219,
      "learning_rate": 7.586383414672403e-06,
      "loss": 2.3497,
      "step": 44571
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0089834928512573,
      "learning_rate": 7.5859838528578455e-06,
      "loss": 2.3622,
      "step": 44572
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.9992860555648804,
      "learning_rate": 7.5855842951357025e-06,
      "loss": 2.3783,
      "step": 44573
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1012907028198242,
      "learning_rate": 7.58518474150665e-06,
      "loss": 2.2908,
      "step": 44574
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1024326086044312,
      "learning_rate": 7.584785191971365e-06,
      "loss": 2.3591,
      "step": 44575
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.05314040184021,
      "learning_rate": 7.584385646530524e-06,
      "loss": 2.3804,
      "step": 44576
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.9305835366249084,
      "learning_rate": 7.583986105184806e-06,
      "loss": 2.2107,
      "step": 44577
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0936777591705322,
      "learning_rate": 7.58358656793489e-06,
      "loss": 2.3838,
      "step": 44578
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.18477201461792,
      "learning_rate": 7.58318703478145e-06,
      "loss": 2.2695,
      "step": 44579
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.155545949935913,
      "learning_rate": 7.582787505725162e-06,
      "loss": 2.3671,
      "step": 44580
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0998311042785645,
      "learning_rate": 7.582387980766708e-06,
      "loss": 2.4211,
      "step": 44581
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.03364896774292,
      "learning_rate": 7.581988459906764e-06,
      "loss": 2.0058,
      "step": 44582
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0240780115127563,
      "learning_rate": 7.581588943146003e-06,
      "loss": 2.2191,
      "step": 44583
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0250329971313477,
      "learning_rate": 7.581189430485107e-06,
      "loss": 2.2561,
      "step": 44584
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.106443166732788,
      "learning_rate": 7.580789921924755e-06,
      "loss": 2.4365,
      "step": 44585
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.2404416799545288,
      "learning_rate": 7.580390417465617e-06,
      "loss": 2.1736,
      "step": 44586
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1776398420333862,
      "learning_rate": 7.5799909171083756e-06,
      "loss": 2.3955,
      "step": 44587
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.993844747543335,
      "learning_rate": 7.579591420853707e-06,
      "loss": 2.5633,
      "step": 44588
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1956707239151,
      "learning_rate": 7.5791919287022875e-06,
      "loss": 2.1896,
      "step": 44589
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.9953764081001282,
      "learning_rate": 7.578792440654795e-06,
      "loss": 2.5693,
      "step": 44590
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.9450145959854126,
      "learning_rate": 7.578392956711907e-06,
      "loss": 2.1988,
      "step": 44591
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.044887900352478,
      "learning_rate": 7.577993476874301e-06,
      "loss": 2.2506,
      "step": 44592
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0715001821517944,
      "learning_rate": 7.577594001142654e-06,
      "loss": 2.1511,
      "step": 44593
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1771447658538818,
      "learning_rate": 7.577194529517644e-06,
      "loss": 2.3353,
      "step": 44594
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1342979669570923,
      "learning_rate": 7.57679506199995e-06,
      "loss": 2.3226,
      "step": 44595
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.099800944328308,
      "learning_rate": 7.5763955985902425e-06,
      "loss": 2.247,
      "step": 44596
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.9757789373397827,
      "learning_rate": 7.575996139289204e-06,
      "loss": 2.5739,
      "step": 44597
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0804238319396973,
      "learning_rate": 7.575596684097508e-06,
      "loss": 2.2569,
      "step": 44598
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1501214504241943,
      "learning_rate": 7.5751972330158365e-06,
      "loss": 2.2202,
      "step": 44599
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.9716143608093262,
      "learning_rate": 7.574797786044863e-06,
      "loss": 2.572,
      "step": 44600
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1337206363677979,
      "learning_rate": 7.574398343185267e-06,
      "loss": 2.3569,
      "step": 44601
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0701102018356323,
      "learning_rate": 7.573998904437723e-06,
      "loss": 2.258,
      "step": 44602
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1037074327468872,
      "learning_rate": 7.5735994698029105e-06,
      "loss": 2.2136,
      "step": 44603
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1399040222167969,
      "learning_rate": 7.573200039281505e-06,
      "loss": 2.4126,
      "step": 44604
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0779242515563965,
      "learning_rate": 7.572800612874186e-06,
      "loss": 2.3589,
      "step": 44605
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0484986305236816,
      "learning_rate": 7.57240119058163e-06,
      "loss": 2.2736,
      "step": 44606
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.109015941619873,
      "learning_rate": 7.572001772404512e-06,
      "loss": 2.1674,
      "step": 44607
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.9985873103141785,
      "learning_rate": 7.571602358343511e-06,
      "loss": 2.2661,
      "step": 44608
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1056420803070068,
      "learning_rate": 7.5712029483993054e-06,
      "loss": 2.2282,
      "step": 44609
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.174012303352356,
      "learning_rate": 7.570803542572568e-06,
      "loss": 2.5687,
      "step": 44610
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1563103199005127,
      "learning_rate": 7.5704041408639784e-06,
      "loss": 2.1746,
      "step": 44611
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.005883812904358,
      "learning_rate": 7.570004743274213e-06,
      "loss": 2.4547,
      "step": 44612
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0447356700897217,
      "learning_rate": 7.569605349803952e-06,
      "loss": 2.2583,
      "step": 44613
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0521774291992188,
      "learning_rate": 7.569205960453868e-06,
      "loss": 2.3138,
      "step": 44614
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0359941720962524,
      "learning_rate": 7.568806575224641e-06,
      "loss": 2.3525,
      "step": 44615
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1573201417922974,
      "learning_rate": 7.5684071941169455e-06,
      "loss": 2.4,
      "step": 44616
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.041261911392212,
      "learning_rate": 7.568007817131462e-06,
      "loss": 2.2028,
      "step": 44617
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0066646337509155,
      "learning_rate": 7.567608444268863e-06,
      "loss": 2.2742,
      "step": 44618
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.146655559539795,
      "learning_rate": 7.567209075529831e-06,
      "loss": 2.6364,
      "step": 44619
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1141693592071533,
      "learning_rate": 7.5668097109150395e-06,
      "loss": 2.123,
      "step": 44620
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1130162477493286,
      "learning_rate": 7.566410350425166e-06,
      "loss": 2.1715,
      "step": 44621
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.070739507675171,
      "learning_rate": 7.566010994060893e-06,
      "loss": 2.4286,
      "step": 44622
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.053012728691101,
      "learning_rate": 7.565611641822888e-06,
      "loss": 2.4548,
      "step": 44623
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0492645502090454,
      "learning_rate": 7.5652122937118325e-06,
      "loss": 2.417,
      "step": 44624
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0920816659927368,
      "learning_rate": 7.564812949728403e-06,
      "loss": 2.1596,
      "step": 44625
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.016243577003479,
      "learning_rate": 7.564413609873277e-06,
      "loss": 2.2447,
      "step": 44626
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0823640823364258,
      "learning_rate": 7.564014274147131e-06,
      "loss": 2.3038,
      "step": 44627
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1524806022644043,
      "learning_rate": 7.5636149425506435e-06,
      "loss": 2.4938,
      "step": 44628
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1712725162506104,
      "learning_rate": 7.563215615084489e-06,
      "loss": 2.3466,
      "step": 44629
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0976141691207886,
      "learning_rate": 7.562816291749348e-06,
      "loss": 2.3869,
      "step": 44630
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4226707220077515,
      "learning_rate": 7.562416972545893e-06,
      "loss": 2.51,
      "step": 44631
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.2334058284759521,
      "learning_rate": 7.562017657474804e-06,
      "loss": 2.3968,
      "step": 44632
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0976831912994385,
      "learning_rate": 7.561618346536758e-06,
      "loss": 2.4786,
      "step": 44633
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1804978847503662,
      "learning_rate": 7.561219039732433e-06,
      "loss": 2.451,
      "step": 44634
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1700103282928467,
      "learning_rate": 7.560819737062504e-06,
      "loss": 2.3443,
      "step": 44635
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.2295950651168823,
      "learning_rate": 7.560420438527645e-06,
      "loss": 2.4425,
      "step": 44636
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1546207666397095,
      "learning_rate": 7.5600211441285365e-06,
      "loss": 2.4121,
      "step": 44637
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.126063585281372,
      "learning_rate": 7.559621853865855e-06,
      "loss": 2.1139,
      "step": 44638
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.9703539609909058,
      "learning_rate": 7.559222567740278e-06,
      "loss": 2.3399,
      "step": 44639
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1124520301818848,
      "learning_rate": 7.55882328575248e-06,
      "loss": 2.0623,
      "step": 44640
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.2104270458221436,
      "learning_rate": 7.558424007903142e-06,
      "loss": 2.3942,
      "step": 44641
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1092000007629395,
      "learning_rate": 7.558024734192938e-06,
      "loss": 2.4444,
      "step": 44642
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0498894453048706,
      "learning_rate": 7.557625464622544e-06,
      "loss": 2.3238,
      "step": 44643
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0516740083694458,
      "learning_rate": 7.557226199192637e-06,
      "loss": 2.7382,
      "step": 44644
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.090207815170288,
      "learning_rate": 7.556826937903897e-06,
      "loss": 2.3426,
      "step": 44645
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.9979285597801208,
      "learning_rate": 7.556427680756999e-06,
      "loss": 2.3876,
      "step": 44646
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1378575563430786,
      "learning_rate": 7.5560284277526185e-06,
      "loss": 2.428,
      "step": 44647
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0201969146728516,
      "learning_rate": 7.555629178891439e-06,
      "loss": 2.2828,
      "step": 44648
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0424820184707642,
      "learning_rate": 7.555229934174127e-06,
      "loss": 2.5297,
      "step": 44649
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0250667333602905,
      "learning_rate": 7.554830693601364e-06,
      "loss": 2.2278,
      "step": 44650
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.9336422681808472,
      "learning_rate": 7.554431457173827e-06,
      "loss": 2.0735,
      "step": 44651
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0752112865447998,
      "learning_rate": 7.554032224892193e-06,
      "loss": 2.3306,
      "step": 44652
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.090315818786621,
      "learning_rate": 7.55363299675714e-06,
      "loss": 2.4059,
      "step": 44653
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0722017288208008,
      "learning_rate": 7.5532337727693415e-06,
      "loss": 2.5373,
      "step": 44654
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0729472637176514,
      "learning_rate": 7.552834552929478e-06,
      "loss": 2.3718,
      "step": 44655
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.070129632949829,
      "learning_rate": 7.552435337238224e-06,
      "loss": 2.4563,
      "step": 44656
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1229168176651,
      "learning_rate": 7.552036125696256e-06,
      "loss": 2.4409,
      "step": 44657
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.049404263496399,
      "learning_rate": 7.551636918304253e-06,
      "loss": 2.2066,
      "step": 44658
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1929025650024414,
      "learning_rate": 7.551237715062889e-06,
      "loss": 2.4362,
      "step": 44659
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0937609672546387,
      "learning_rate": 7.550838515972843e-06,
      "loss": 2.2668,
      "step": 44660
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1073083877563477,
      "learning_rate": 7.550439321034795e-06,
      "loss": 2.4282,
      "step": 44661
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.2758444547653198,
      "learning_rate": 7.550040130249414e-06,
      "loss": 2.5293,
      "step": 44662
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0504764318466187,
      "learning_rate": 7.549640943617379e-06,
      "loss": 2.2418,
      "step": 44663
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.3997938632965088,
      "learning_rate": 7.54924176113937e-06,
      "loss": 2.4751,
      "step": 44664
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.2993439435958862,
      "learning_rate": 7.5488425828160605e-06,
      "loss": 2.345,
      "step": 44665
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.5448224544525146,
      "learning_rate": 7.548443408648129e-06,
      "loss": 2.2961,
      "step": 44666
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1640793085098267,
      "learning_rate": 7.548044238636251e-06,
      "loss": 2.3092,
      "step": 44667
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0122110843658447,
      "learning_rate": 7.5476450727811045e-06,
      "loss": 2.4351,
      "step": 44668
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1651368141174316,
      "learning_rate": 7.547245911083367e-06,
      "loss": 2.31,
      "step": 44669
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.063391089439392,
      "learning_rate": 7.546846753543713e-06,
      "loss": 2.425,
      "step": 44670
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0180494785308838,
      "learning_rate": 7.546447600162819e-06,
      "loss": 2.1838,
      "step": 44671
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0637050867080688,
      "learning_rate": 7.546048450941363e-06,
      "loss": 2.4932,
      "step": 44672
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1088507175445557,
      "learning_rate": 7.545649305880022e-06,
      "loss": 2.2678,
      "step": 44673
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0775213241577148,
      "learning_rate": 7.545250164979474e-06,
      "loss": 2.302,
      "step": 44674
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0687148571014404,
      "learning_rate": 7.544851028240393e-06,
      "loss": 2.3491,
      "step": 44675
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0833829641342163,
      "learning_rate": 7.544451895663454e-06,
      "loss": 2.4601,
      "step": 44676
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.9374187588691711,
      "learning_rate": 7.544052767249337e-06,
      "loss": 2.1585,
      "step": 44677
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.9884668588638306,
      "learning_rate": 7.5436536429987175e-06,
      "loss": 2.3318,
      "step": 44678
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0401524305343628,
      "learning_rate": 7.543254522912271e-06,
      "loss": 2.2858,
      "step": 44679
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1547142267227173,
      "learning_rate": 7.542855406990677e-06,
      "loss": 2.1808,
      "step": 44680
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.053562879562378,
      "learning_rate": 7.542456295234609e-06,
      "loss": 2.329,
      "step": 44681
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.101335883140564,
      "learning_rate": 7.542057187644745e-06,
      "loss": 2.4909,
      "step": 44682
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.5104609727859497,
      "learning_rate": 7.541658084221763e-06,
      "loss": 2.1357,
      "step": 44683
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0919039249420166,
      "learning_rate": 7.541258984966338e-06,
      "loss": 2.398,
      "step": 44684
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.005780816078186,
      "learning_rate": 7.540859889879145e-06,
      "loss": 2.5787,
      "step": 44685
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1888617277145386,
      "learning_rate": 7.5404607989608635e-06,
      "loss": 2.3947,
      "step": 44686
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.061687707901001,
      "learning_rate": 7.54006171221217e-06,
      "loss": 2.3153,
      "step": 44687
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1104038953781128,
      "learning_rate": 7.539662629633743e-06,
      "loss": 2.5104,
      "step": 44688
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1083780527114868,
      "learning_rate": 7.539263551226252e-06,
      "loss": 2.6004,
      "step": 44689
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0028586387634277,
      "learning_rate": 7.5388644769903774e-06,
      "loss": 2.3126,
      "step": 44690
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1627247333526611,
      "learning_rate": 7.538465406926796e-06,
      "loss": 2.097,
      "step": 44691
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1329346895217896,
      "learning_rate": 7.538066341036186e-06,
      "loss": 2.314,
      "step": 44692
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.167851209640503,
      "learning_rate": 7.53766727931922e-06,
      "loss": 2.2239,
      "step": 44693
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0907022953033447,
      "learning_rate": 7.537268221776577e-06,
      "loss": 2.5004,
      "step": 44694
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1557590961456299,
      "learning_rate": 7.5368691684089346e-06,
      "loss": 2.2372,
      "step": 44695
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.023833990097046,
      "learning_rate": 7.536470119216966e-06,
      "loss": 2.3071,
      "step": 44696
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0728145837783813,
      "learning_rate": 7.536071074201353e-06,
      "loss": 2.2718,
      "step": 44697
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0830460786819458,
      "learning_rate": 7.535672033362767e-06,
      "loss": 2.2652,
      "step": 44698
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0993242263793945,
      "learning_rate": 7.535272996701886e-06,
      "loss": 2.4033,
      "step": 44699
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0732523202896118,
      "learning_rate": 7.534873964219385e-06,
      "loss": 2.445,
      "step": 44700
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0882971286773682,
      "learning_rate": 7.5344749359159465e-06,
      "loss": 2.3206,
      "step": 44701
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0464881658554077,
      "learning_rate": 7.53407591179224e-06,
      "loss": 2.083,
      "step": 44702
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0290223360061646,
      "learning_rate": 7.533676891848945e-06,
      "loss": 2.1711,
      "step": 44703
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0981003046035767,
      "learning_rate": 7.533277876086737e-06,
      "loss": 2.3532,
      "step": 44704
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0667804479599,
      "learning_rate": 7.5328788645062935e-06,
      "loss": 2.5458,
      "step": 44705
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1102627515792847,
      "learning_rate": 7.532479857108289e-06,
      "loss": 2.3411,
      "step": 44706
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0781958103179932,
      "learning_rate": 7.532080853893402e-06,
      "loss": 2.1054,
      "step": 44707
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0664664506912231,
      "learning_rate": 7.531681854862308e-06,
      "loss": 2.2904,
      "step": 44708
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.13226318359375,
      "learning_rate": 7.531282860015684e-06,
      "loss": 2.4379,
      "step": 44709
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1495978832244873,
      "learning_rate": 7.5308838693542055e-06,
      "loss": 2.5548,
      "step": 44710
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0852779150009155,
      "learning_rate": 7.5304848828785495e-06,
      "loss": 2.3169,
      "step": 44711
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0560302734375,
      "learning_rate": 7.530085900589392e-06,
      "loss": 2.1082,
      "step": 44712
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0868488550186157,
      "learning_rate": 7.529686922487411e-06,
      "loss": 2.4048,
      "step": 44713
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0688635110855103,
      "learning_rate": 7.529287948573285e-06,
      "loss": 2.4481,
      "step": 44714
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1157530546188354,
      "learning_rate": 7.528888978847682e-06,
      "loss": 2.2091,
      "step": 44715
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.131771445274353,
      "learning_rate": 7.528490013311285e-06,
      "loss": 2.3503,
      "step": 44716
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0440481901168823,
      "learning_rate": 7.528091051964768e-06,
      "loss": 2.3739,
      "step": 44717
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1188379526138306,
      "learning_rate": 7.5276920948088075e-06,
      "loss": 2.2148,
      "step": 44718
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.146978735923767,
      "learning_rate": 7.5272931418440805e-06,
      "loss": 2.3183,
      "step": 44719
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.191373586654663,
      "learning_rate": 7.526894193071264e-06,
      "loss": 2.4036,
      "step": 44720
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.9765443205833435,
      "learning_rate": 7.526495248491032e-06,
      "loss": 2.0193,
      "step": 44721
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1655946969985962,
      "learning_rate": 7.526096308104063e-06,
      "loss": 2.4827,
      "step": 44722
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.135123372077942,
      "learning_rate": 7.525697371911034e-06,
      "loss": 2.3518,
      "step": 44723
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1335173845291138,
      "learning_rate": 7.525298439912621e-06,
      "loss": 2.4449,
      "step": 44724
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0479635000228882,
      "learning_rate": 7.524899512109496e-06,
      "loss": 2.3494,
      "step": 44725
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1162874698638916,
      "learning_rate": 7.52450058850234e-06,
      "loss": 2.2355,
      "step": 44726
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.064842700958252,
      "learning_rate": 7.52410166909183e-06,
      "loss": 2.4075,
      "step": 44727
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1851497888565063,
      "learning_rate": 7.523702753878638e-06,
      "loss": 2.1639,
      "step": 44728
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1900897026062012,
      "learning_rate": 7.523303842863442e-06,
      "loss": 2.2636,
      "step": 44729
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1390470266342163,
      "learning_rate": 7.5229049360469185e-06,
      "loss": 2.4044,
      "step": 44730
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.025831699371338,
      "learning_rate": 7.522506033429746e-06,
      "loss": 2.4505,
      "step": 44731
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1330959796905518,
      "learning_rate": 7.522107135012598e-06,
      "loss": 2.3708,
      "step": 44732
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.029051661491394,
      "learning_rate": 7.521708240796148e-06,
      "loss": 2.371,
      "step": 44733
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.096481442451477,
      "learning_rate": 7.5213093507810784e-06,
      "loss": 2.3008,
      "step": 44734
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0867241621017456,
      "learning_rate": 7.520910464968062e-06,
      "loss": 2.1614,
      "step": 44735
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0128337144851685,
      "learning_rate": 7.520511583357776e-06,
      "loss": 2.3998,
      "step": 44736
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0245654582977295,
      "learning_rate": 7.520112705950896e-06,
      "loss": 2.383,
      "step": 44737
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1775819063186646,
      "learning_rate": 7.519713832748099e-06,
      "loss": 2.3327,
      "step": 44738
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0432546138763428,
      "learning_rate": 7.51931496375006e-06,
      "loss": 2.2108,
      "step": 44739
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0031181573867798,
      "learning_rate": 7.5189160989574605e-06,
      "loss": 2.2034,
      "step": 44740
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.057181477546692,
      "learning_rate": 7.518517238370967e-06,
      "loss": 2.5151,
      "step": 44741
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.2181942462921143,
      "learning_rate": 7.518118381991262e-06,
      "loss": 2.3506,
      "step": 44742
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.390190601348877,
      "learning_rate": 7.517719529819018e-06,
      "loss": 2.3404,
      "step": 44743
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.073534607887268,
      "learning_rate": 7.517320681854916e-06,
      "loss": 2.3167,
      "step": 44744
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0698449611663818,
      "learning_rate": 7.516921838099629e-06,
      "loss": 2.3294,
      "step": 44745
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1983562707901,
      "learning_rate": 7.516522998553835e-06,
      "loss": 2.2741,
      "step": 44746
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0571426153182983,
      "learning_rate": 7.516124163218208e-06,
      "loss": 2.3213,
      "step": 44747
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0956416130065918,
      "learning_rate": 7.515725332093424e-06,
      "loss": 2.4287,
      "step": 44748
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.2447535991668701,
      "learning_rate": 7.515326505180163e-06,
      "loss": 2.2898,
      "step": 44749
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0779474973678589,
      "learning_rate": 7.514927682479096e-06,
      "loss": 2.2755,
      "step": 44750
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.087258219718933,
      "learning_rate": 7.514528863990902e-06,
      "loss": 2.254,
      "step": 44751
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1189714670181274,
      "learning_rate": 7.514130049716259e-06,
      "loss": 2.28,
      "step": 44752
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1978825330734253,
      "learning_rate": 7.513731239655842e-06,
      "loss": 2.4036,
      "step": 44753
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0609744787216187,
      "learning_rate": 7.513332433810321e-06,
      "loss": 2.6038,
      "step": 44754
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.336333990097046,
      "learning_rate": 7.51293363218038e-06,
      "loss": 2.4056,
      "step": 44755
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.9877228736877441,
      "learning_rate": 7.512534834766689e-06,
      "loss": 2.3667,
      "step": 44756
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1605744361877441,
      "learning_rate": 7.512136041569929e-06,
      "loss": 2.2171,
      "step": 44757
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0565412044525146,
      "learning_rate": 7.511737252590773e-06,
      "loss": 2.1861,
      "step": 44758
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.101188063621521,
      "learning_rate": 7.5113384678299e-06,
      "loss": 2.2534,
      "step": 44759
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0389070510864258,
      "learning_rate": 7.510939687287984e-06,
      "loss": 2.4203,
      "step": 44760
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0443589687347412,
      "learning_rate": 7.5105409109657004e-06,
      "loss": 2.2717,
      "step": 44761
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.9991427659988403,
      "learning_rate": 7.510142138863725e-06,
      "loss": 2.5531,
      "step": 44762
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.9904520511627197,
      "learning_rate": 7.509743370982736e-06,
      "loss": 2.5025,
      "step": 44763
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1085455417633057,
      "learning_rate": 7.509344607323408e-06,
      "loss": 2.1637,
      "step": 44764
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0177927017211914,
      "learning_rate": 7.508945847886418e-06,
      "loss": 2.3605,
      "step": 44765
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1955509185791016,
      "learning_rate": 7.50854709267244e-06,
      "loss": 2.4134,
      "step": 44766
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0854554176330566,
      "learning_rate": 7.508148341682156e-06,
      "loss": 2.3359,
      "step": 44767
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.9980031251907349,
      "learning_rate": 7.507749594916234e-06,
      "loss": 2.1505,
      "step": 44768
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.12161386013031,
      "learning_rate": 7.507350852375353e-06,
      "loss": 2.3556,
      "step": 44769
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0998643636703491,
      "learning_rate": 7.5069521140601885e-06,
      "loss": 2.2537,
      "step": 44770
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1321606636047363,
      "learning_rate": 7.506553379971417e-06,
      "loss": 2.0555,
      "step": 44771
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1609370708465576,
      "learning_rate": 7.506154650109716e-06,
      "loss": 2.3685,
      "step": 44772
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0325552225112915,
      "learning_rate": 7.50575592447576e-06,
      "loss": 2.3994,
      "step": 44773
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.3754935264587402,
      "learning_rate": 7.505357203070225e-06,
      "loss": 2.4655,
      "step": 44774
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.2380112409591675,
      "learning_rate": 7.504958485893788e-06,
      "loss": 2.3344,
      "step": 44775
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1418572664260864,
      "learning_rate": 7.504559772947122e-06,
      "loss": 2.3722,
      "step": 44776
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1652663946151733,
      "learning_rate": 7.504161064230906e-06,
      "loss": 2.2397,
      "step": 44777
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0420005321502686,
      "learning_rate": 7.503762359745815e-06,
      "loss": 2.1988,
      "step": 44778
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1584951877593994,
      "learning_rate": 7.503363659492524e-06,
      "loss": 2.3675,
      "step": 44779
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.2124117612838745,
      "learning_rate": 7.502964963471715e-06,
      "loss": 2.3806,
      "step": 44780
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0704058408737183,
      "learning_rate": 7.502566271684053e-06,
      "loss": 2.4264,
      "step": 44781
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1252061128616333,
      "learning_rate": 7.5021675841302195e-06,
      "loss": 2.2757,
      "step": 44782
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.076648235321045,
      "learning_rate": 7.501768900810892e-06,
      "loss": 2.1829,
      "step": 44783
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1169958114624023,
      "learning_rate": 7.501370221726743e-06,
      "loss": 2.5597,
      "step": 44784
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0622925758361816,
      "learning_rate": 7.500971546878452e-06,
      "loss": 2.4495,
      "step": 44785
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.977043628692627,
      "learning_rate": 7.500572876266692e-06,
      "loss": 2.3738,
      "step": 44786
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.061747431755066,
      "learning_rate": 7.500174209892141e-06,
      "loss": 2.2452,
      "step": 44787
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1068676710128784,
      "learning_rate": 7.499775547755472e-06,
      "loss": 2.2838,
      "step": 44788
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.5975992679595947,
      "learning_rate": 7.499376889857363e-06,
      "loss": 2.3161,
      "step": 44789
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0048507452011108,
      "learning_rate": 7.4989782361984886e-06,
      "loss": 2.5689,
      "step": 44790
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1035679578781128,
      "learning_rate": 7.498579586779526e-06,
      "loss": 2.239,
      "step": 44791
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.064817190170288,
      "learning_rate": 7.498180941601151e-06,
      "loss": 2.4036,
      "step": 44792
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0641157627105713,
      "learning_rate": 7.497782300664043e-06,
      "loss": 2.5353,
      "step": 44793
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0853859186172485,
      "learning_rate": 7.497383663968868e-06,
      "loss": 2.6559,
      "step": 44794
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0711185932159424,
      "learning_rate": 7.496985031516308e-06,
      "loss": 2.5784,
      "step": 44795
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.008939266204834,
      "learning_rate": 7.496586403307038e-06,
      "loss": 2.3802,
      "step": 44796
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.071426510810852,
      "learning_rate": 7.496187779341735e-06,
      "loss": 2.1125,
      "step": 44797
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1724934577941895,
      "learning_rate": 7.495789159621072e-06,
      "loss": 2.2529,
      "step": 44798
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.076838731765747,
      "learning_rate": 7.495390544145728e-06,
      "loss": 2.2781,
      "step": 44799
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1414204835891724,
      "learning_rate": 7.4949919329163755e-06,
      "loss": 2.4846,
      "step": 44800
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0932823419570923,
      "learning_rate": 7.494593325933694e-06,
      "loss": 2.3759,
      "step": 44801
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1996147632598877,
      "learning_rate": 7.494194723198356e-06,
      "loss": 2.2274,
      "step": 44802
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1144945621490479,
      "learning_rate": 7.493796124711039e-06,
      "loss": 2.3698,
      "step": 44803
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1833208799362183,
      "learning_rate": 7.493397530472419e-06,
      "loss": 2.4696,
      "step": 44804
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.2460637092590332,
      "learning_rate": 7.49299894048317e-06,
      "loss": 2.4505,
      "step": 44805
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.080819010734558,
      "learning_rate": 7.492600354743973e-06,
      "loss": 2.4921,
      "step": 44806
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0488203763961792,
      "learning_rate": 7.492201773255496e-06,
      "loss": 2.3608,
      "step": 44807
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.100998044013977,
      "learning_rate": 7.491803196018416e-06,
      "loss": 2.4877,
      "step": 44808
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0148171186447144,
      "learning_rate": 7.491404623033413e-06,
      "loss": 2.1625,
      "step": 44809
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.940366268157959,
      "learning_rate": 7.49100605430116e-06,
      "loss": 2.389,
      "step": 44810
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.149715781211853,
      "learning_rate": 7.490607489822332e-06,
      "loss": 2.4195,
      "step": 44811
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1896387338638306,
      "learning_rate": 7.4902089295976076e-06,
      "loss": 2.2743,
      "step": 44812
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0036226511001587,
      "learning_rate": 7.489810373627659e-06,
      "loss": 2.2766,
      "step": 44813
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1694597005844116,
      "learning_rate": 7.489411821913165e-06,
      "loss": 2.4047,
      "step": 44814
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0972415208816528,
      "learning_rate": 7.489013274454802e-06,
      "loss": 2.2497,
      "step": 44815
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0121322870254517,
      "learning_rate": 7.488614731253241e-06,
      "loss": 2.4624,
      "step": 44816
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.3403511047363281,
      "learning_rate": 7.4882161923091586e-06,
      "loss": 2.128,
      "step": 44817
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1209591627120972,
      "learning_rate": 7.487817657623233e-06,
      "loss": 2.4192,
      "step": 44818
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0006215572357178,
      "learning_rate": 7.487419127196142e-06,
      "loss": 2.0968,
      "step": 44819
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.2063782215118408,
      "learning_rate": 7.487020601028554e-06,
      "loss": 2.4862,
      "step": 44820
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.040012240409851,
      "learning_rate": 7.486622079121153e-06,
      "loss": 2.3097,
      "step": 44821
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0085639953613281,
      "learning_rate": 7.486223561474606e-06,
      "loss": 2.5799,
      "step": 44822
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.195852518081665,
      "learning_rate": 7.485825048089593e-06,
      "loss": 2.124,
      "step": 44823
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0592422485351562,
      "learning_rate": 7.4854265389667906e-06,
      "loss": 2.1753,
      "step": 44824
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.9925411939620972,
      "learning_rate": 7.485028034106871e-06,
      "loss": 2.3385,
      "step": 44825
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1593539714813232,
      "learning_rate": 7.484629533510515e-06,
      "loss": 2.4987,
      "step": 44826
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0512977838516235,
      "learning_rate": 7.484231037178393e-06,
      "loss": 2.4509,
      "step": 44827
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0034745931625366,
      "learning_rate": 7.4838325451111824e-06,
      "loss": 2.4281,
      "step": 44828
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0627095699310303,
      "learning_rate": 7.483434057309561e-06,
      "loss": 2.3629,
      "step": 44829
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1121944189071655,
      "learning_rate": 7.483035573774201e-06,
      "loss": 1.9955,
      "step": 44830
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1945698261260986,
      "learning_rate": 7.482637094505779e-06,
      "loss": 2.1636,
      "step": 44831
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1036205291748047,
      "learning_rate": 7.482238619504971e-06,
      "loss": 2.3325,
      "step": 44832
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0018703937530518,
      "learning_rate": 7.481840148772458e-06,
      "loss": 2.3059,
      "step": 44833
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0037190914154053,
      "learning_rate": 7.481441682308904e-06,
      "loss": 2.3218,
      "step": 44834
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.2417908906936646,
      "learning_rate": 7.481043220114992e-06,
      "loss": 2.1483,
      "step": 44835
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0808647871017456,
      "learning_rate": 7.480644762191393e-06,
      "loss": 2.0664,
      "step": 44836
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0779013633728027,
      "learning_rate": 7.480246308538788e-06,
      "loss": 2.3988,
      "step": 44837
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1033096313476562,
      "learning_rate": 7.47984785915785e-06,
      "loss": 2.3061,
      "step": 44838
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0695794820785522,
      "learning_rate": 7.479449414049252e-06,
      "loss": 2.1696,
      "step": 44839
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1755067110061646,
      "learning_rate": 7.4790509732136726e-06,
      "loss": 2.4,
      "step": 44840
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0355644226074219,
      "learning_rate": 7.478652536651788e-06,
      "loss": 2.5105,
      "step": 44841
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1264114379882812,
      "learning_rate": 7.478254104364273e-06,
      "loss": 2.367,
      "step": 44842
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0619173049926758,
      "learning_rate": 7.4778556763518005e-06,
      "loss": 2.373,
      "step": 44843
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0792410373687744,
      "learning_rate": 7.477457252615048e-06,
      "loss": 2.5789,
      "step": 44844
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1316245794296265,
      "learning_rate": 7.47705883315469e-06,
      "loss": 2.3663,
      "step": 44845
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.9910693764686584,
      "learning_rate": 7.476660417971404e-06,
      "loss": 2.3889,
      "step": 44846
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.253432035446167,
      "learning_rate": 7.476262007065863e-06,
      "loss": 2.5788,
      "step": 44847
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1372994184494019,
      "learning_rate": 7.475863600438743e-06,
      "loss": 2.4768,
      "step": 44848
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1666746139526367,
      "learning_rate": 7.4754651980907215e-06,
      "loss": 2.2814,
      "step": 44849
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4147286415100098,
      "learning_rate": 7.475066800022471e-06,
      "loss": 2.257,
      "step": 44850
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1369818449020386,
      "learning_rate": 7.474668406234666e-06,
      "loss": 2.2992,
      "step": 44851
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1947436332702637,
      "learning_rate": 7.474270016727986e-06,
      "loss": 2.3271,
      "step": 44852
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.173445463180542,
      "learning_rate": 7.4738716315031035e-06,
      "loss": 2.3847,
      "step": 44853
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0661091804504395,
      "learning_rate": 7.473473250560694e-06,
      "loss": 2.1238,
      "step": 44854
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0607436895370483,
      "learning_rate": 7.473074873901435e-06,
      "loss": 2.3661,
      "step": 44855
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1242965459823608,
      "learning_rate": 7.472676501526e-06,
      "loss": 2.2785,
      "step": 44856
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0607812404632568,
      "learning_rate": 7.472278133435064e-06,
      "loss": 2.5867,
      "step": 44857
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1361398696899414,
      "learning_rate": 7.471879769629303e-06,
      "loss": 2.4733,
      "step": 44858
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1394925117492676,
      "learning_rate": 7.471481410109398e-06,
      "loss": 2.2972,
      "step": 44859
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.012246012687683,
      "learning_rate": 7.4710830548760126e-06,
      "loss": 2.4899,
      "step": 44860
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1565725803375244,
      "learning_rate": 7.470684703929829e-06,
      "loss": 2.3475,
      "step": 44861
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.9999328255653381,
      "learning_rate": 7.470286357271522e-06,
      "loss": 2.185,
      "step": 44862
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.277176856994629,
      "learning_rate": 7.4698880149017676e-06,
      "loss": 2.4655,
      "step": 44863
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1413450241088867,
      "learning_rate": 7.469489676821239e-06,
      "loss": 2.3355,
      "step": 44864
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0423858165740967,
      "learning_rate": 7.4690913430306124e-06,
      "loss": 2.4622,
      "step": 44865
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.071720004081726,
      "learning_rate": 7.468693013530565e-06,
      "loss": 2.4254,
      "step": 44866
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.3811438083648682,
      "learning_rate": 7.468294688321769e-06,
      "loss": 2.2169,
      "step": 44867
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1458405256271362,
      "learning_rate": 7.467896367404901e-06,
      "loss": 2.3486,
      "step": 44868
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0066888332366943,
      "learning_rate": 7.467498050780637e-06,
      "loss": 2.2308,
      "step": 44869
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.093896508216858,
      "learning_rate": 7.467099738449653e-06,
      "loss": 2.3782,
      "step": 44870
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.096488356590271,
      "learning_rate": 7.466701430412621e-06,
      "loss": 2.3323,
      "step": 44871
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0811092853546143,
      "learning_rate": 7.466303126670222e-06,
      "loss": 2.1841,
      "step": 44872
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1185637712478638,
      "learning_rate": 7.465904827223123e-06,
      "loss": 2.4967,
      "step": 44873
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.2397981882095337,
      "learning_rate": 7.465506532072004e-06,
      "loss": 2.1965,
      "step": 44874
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.038992166519165,
      "learning_rate": 7.4651082412175405e-06,
      "loss": 2.0757,
      "step": 44875
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1130906343460083,
      "learning_rate": 7.464709954660407e-06,
      "loss": 2.3003,
      "step": 44876
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1437089443206787,
      "learning_rate": 7.464311672401281e-06,
      "loss": 2.5782,
      "step": 44877
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1364154815673828,
      "learning_rate": 7.463913394440831e-06,
      "loss": 2.3163,
      "step": 44878
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1006672382354736,
      "learning_rate": 7.46351512077974e-06,
      "loss": 2.4588,
      "step": 44879
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0526626110076904,
      "learning_rate": 7.4631168514186765e-06,
      "loss": 2.1335,
      "step": 44880
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.186787724494934,
      "learning_rate": 7.462718586358322e-06,
      "loss": 2.5346,
      "step": 44881
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0023800134658813,
      "learning_rate": 7.4623203255993474e-06,
      "loss": 2.2987,
      "step": 44882
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.021506905555725,
      "learning_rate": 7.4619220691424286e-06,
      "loss": 2.2951,
      "step": 44883
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.094158411026001,
      "learning_rate": 7.461523816988241e-06,
      "loss": 2.174,
      "step": 44884
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0387792587280273,
      "learning_rate": 7.461125569137465e-06,
      "loss": 2.3024,
      "step": 44885
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.182215929031372,
      "learning_rate": 7.460727325590766e-06,
      "loss": 2.344,
      "step": 44886
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.076692819595337,
      "learning_rate": 7.460329086348823e-06,
      "loss": 2.4097,
      "step": 44887
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0079065561294556,
      "learning_rate": 7.459930851412313e-06,
      "loss": 2.366,
      "step": 44888
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1026462316513062,
      "learning_rate": 7.459532620781911e-06,
      "loss": 2.4715,
      "step": 44889
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0770381689071655,
      "learning_rate": 7.459134394458289e-06,
      "loss": 2.2761,
      "step": 44890
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0569933652877808,
      "learning_rate": 7.458736172442127e-06,
      "loss": 2.2532,
      "step": 44891
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0476205348968506,
      "learning_rate": 7.458337954734096e-06,
      "loss": 2.3092,
      "step": 44892
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1254724264144897,
      "learning_rate": 7.457939741334872e-06,
      "loss": 2.474,
      "step": 44893
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.00843346118927,
      "learning_rate": 7.457541532245131e-06,
      "loss": 2.184,
      "step": 44894
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.038859486579895,
      "learning_rate": 7.457143327465548e-06,
      "loss": 2.4096,
      "step": 44895
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0645990371704102,
      "learning_rate": 7.456745126996796e-06,
      "loss": 2.3997,
      "step": 44896
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0348314046859741,
      "learning_rate": 7.456346930839553e-06,
      "loss": 2.3215,
      "step": 44897
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0769569873809814,
      "learning_rate": 7.455948738994498e-06,
      "loss": 2.422,
      "step": 44898
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1244006156921387,
      "learning_rate": 7.455550551462295e-06,
      "loss": 2.2673,
      "step": 44899
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0699708461761475,
      "learning_rate": 7.4551523682436255e-06,
      "loss": 2.3128,
      "step": 44900
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0937862396240234,
      "learning_rate": 7.454754189339164e-06,
      "loss": 2.3003,
      "step": 44901
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.2270499467849731,
      "learning_rate": 7.454356014749585e-06,
      "loss": 2.268,
      "step": 44902
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1300525665283203,
      "learning_rate": 7.453957844475564e-06,
      "loss": 2.3528,
      "step": 44903
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0418998003005981,
      "learning_rate": 7.4535596785177766e-06,
      "loss": 2.3514,
      "step": 44904
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0859416723251343,
      "learning_rate": 7.453161516876899e-06,
      "loss": 2.3485,
      "step": 44905
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0595943927764893,
      "learning_rate": 7.452763359553603e-06,
      "loss": 2.2549,
      "step": 44906
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0834017992019653,
      "learning_rate": 7.452365206548564e-06,
      "loss": 2.3641,
      "step": 44907
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0764055252075195,
      "learning_rate": 7.451967057862457e-06,
      "loss": 2.117,
      "step": 44908
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.2523741722106934,
      "learning_rate": 7.451568913495959e-06,
      "loss": 2.3202,
      "step": 44909
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0675241947174072,
      "learning_rate": 7.451170773449744e-06,
      "loss": 2.5833,
      "step": 44910
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0602458715438843,
      "learning_rate": 7.450772637724486e-06,
      "loss": 2.265,
      "step": 44911
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.180398941040039,
      "learning_rate": 7.450374506320866e-06,
      "loss": 2.4375,
      "step": 44912
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.9663175344467163,
      "learning_rate": 7.449976379239548e-06,
      "loss": 2.2847,
      "step": 44913
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.037866234779358,
      "learning_rate": 7.4495782564812135e-06,
      "loss": 2.3542,
      "step": 44914
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.9702056646347046,
      "learning_rate": 7.449180138046536e-06,
      "loss": 2.2665,
      "step": 44915
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1741447448730469,
      "learning_rate": 7.448782023936192e-06,
      "loss": 2.294,
      "step": 44916
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1494972705841064,
      "learning_rate": 7.448383914150855e-06,
      "loss": 2.3733,
      "step": 44917
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0887097120285034,
      "learning_rate": 7.447985808691201e-06,
      "loss": 2.2869,
      "step": 44918
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1343965530395508,
      "learning_rate": 7.4475877075579025e-06,
      "loss": 2.4654,
      "step": 44919
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.062476396560669,
      "learning_rate": 7.447189610751637e-06,
      "loss": 2.3325,
      "step": 44920
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1820886135101318,
      "learning_rate": 7.446791518273078e-06,
      "loss": 2.3313,
      "step": 44921
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.058358073234558,
      "learning_rate": 7.446393430122902e-06,
      "loss": 2.1893,
      "step": 44922
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0391813516616821,
      "learning_rate": 7.445995346301782e-06,
      "loss": 2.3566,
      "step": 44923
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0974708795547485,
      "learning_rate": 7.445597266810392e-06,
      "loss": 2.3969,
      "step": 44924
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.038128137588501,
      "learning_rate": 7.445199191649415e-06,
      "loss": 2.254,
      "step": 44925
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1361982822418213,
      "learning_rate": 7.444801120819514e-06,
      "loss": 2.2299,
      "step": 44926
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0440746545791626,
      "learning_rate": 7.444403054321368e-06,
      "loss": 2.5122,
      "step": 44927
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1717041730880737,
      "learning_rate": 7.444004992155655e-06,
      "loss": 2.2661,
      "step": 44928
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.2585943937301636,
      "learning_rate": 7.443606934323047e-06,
      "loss": 2.495,
      "step": 44929
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.9860138297080994,
      "learning_rate": 7.443208880824218e-06,
      "loss": 2.3312,
      "step": 44930
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1093400716781616,
      "learning_rate": 7.442810831659846e-06,
      "loss": 2.3964,
      "step": 44931
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0463216304779053,
      "learning_rate": 7.442412786830604e-06,
      "loss": 2.1466,
      "step": 44932
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0684453248977661,
      "learning_rate": 7.442014746337169e-06,
      "loss": 2.3127,
      "step": 44933
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.055121898651123,
      "learning_rate": 7.441616710180212e-06,
      "loss": 2.3984,
      "step": 44934
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1563782691955566,
      "learning_rate": 7.441218678360409e-06,
      "loss": 2.3673,
      "step": 44935
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.042878270149231,
      "learning_rate": 7.440820650878435e-06,
      "loss": 2.5444,
      "step": 44936
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.964999258518219,
      "learning_rate": 7.440422627734965e-06,
      "loss": 2.6283,
      "step": 44937
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0167715549468994,
      "learning_rate": 7.440024608930676e-06,
      "loss": 2.4284,
      "step": 44938
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1234956979751587,
      "learning_rate": 7.439626594466241e-06,
      "loss": 2.3013,
      "step": 44939
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.117596983909607,
      "learning_rate": 7.439228584342333e-06,
      "loss": 2.1419,
      "step": 44940
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0273245573043823,
      "learning_rate": 7.4388305785596275e-06,
      "loss": 2.3204,
      "step": 44941
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0578675270080566,
      "learning_rate": 7.4384325771187995e-06,
      "loss": 2.273,
      "step": 44942
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.141033411026001,
      "learning_rate": 7.438034580020523e-06,
      "loss": 2.4486,
      "step": 44943
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.196240782737732,
      "learning_rate": 7.437636587265476e-06,
      "loss": 2.5643,
      "step": 44944
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.102907419204712,
      "learning_rate": 7.437238598854329e-06,
      "loss": 2.2127,
      "step": 44945
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1599853038787842,
      "learning_rate": 7.436840614787759e-06,
      "loss": 2.1898,
      "step": 44946
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0909463167190552,
      "learning_rate": 7.43644263506644e-06,
      "loss": 2.228,
      "step": 44947
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1200660467147827,
      "learning_rate": 7.4360446596910475e-06,
      "loss": 2.0783,
      "step": 44948
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.019345760345459,
      "learning_rate": 7.435646688662255e-06,
      "loss": 2.4976,
      "step": 44949
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1787470579147339,
      "learning_rate": 7.435248721980739e-06,
      "loss": 2.4345,
      "step": 44950
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5258080959320068,
      "learning_rate": 7.434850759647177e-06,
      "loss": 2.3097,
      "step": 44951
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.074683427810669,
      "learning_rate": 7.434452801662235e-06,
      "loss": 2.3694,
      "step": 44952
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1885223388671875,
      "learning_rate": 7.434054848026592e-06,
      "loss": 2.371,
      "step": 44953
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1349554061889648,
      "learning_rate": 7.433656898740923e-06,
      "loss": 2.4014,
      "step": 44954
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0936810970306396,
      "learning_rate": 7.433258953805903e-06,
      "loss": 2.4022,
      "step": 44955
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.060651421546936,
      "learning_rate": 7.432861013222206e-06,
      "loss": 2.3511,
      "step": 44956
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.950355589389801,
      "learning_rate": 7.432463076990507e-06,
      "loss": 2.4109,
      "step": 44957
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0109130144119263,
      "learning_rate": 7.43206514511148e-06,
      "loss": 2.3022,
      "step": 44958
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1404788494110107,
      "learning_rate": 7.431667217585801e-06,
      "loss": 2.393,
      "step": 44959
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0563193559646606,
      "learning_rate": 7.431269294414145e-06,
      "loss": 2.3987,
      "step": 44960
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.134932518005371,
      "learning_rate": 7.430871375597181e-06,
      "loss": 2.2301,
      "step": 44961
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0720967054367065,
      "learning_rate": 7.430473461135592e-06,
      "loss": 2.1759,
      "step": 44962
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1258653402328491,
      "learning_rate": 7.430075551030046e-06,
      "loss": 2.2577,
      "step": 44963
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0094091892242432,
      "learning_rate": 7.429677645281221e-06,
      "loss": 2.1078,
      "step": 44964
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0432627201080322,
      "learning_rate": 7.42927974388979e-06,
      "loss": 2.4258,
      "step": 44965
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1221274137496948,
      "learning_rate": 7.428881846856428e-06,
      "loss": 2.2638,
      "step": 44966
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0867191553115845,
      "learning_rate": 7.428483954181811e-06,
      "loss": 2.4281,
      "step": 44967
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1683616638183594,
      "learning_rate": 7.428086065866611e-06,
      "loss": 2.4526,
      "step": 44968
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0943529605865479,
      "learning_rate": 7.4276881819115034e-06,
      "loss": 2.5151,
      "step": 44969
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0903149843215942,
      "learning_rate": 7.427290302317163e-06,
      "loss": 2.3073,
      "step": 44970
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0507303476333618,
      "learning_rate": 7.4268924270842626e-06,
      "loss": 2.3298,
      "step": 44971
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.9905403256416321,
      "learning_rate": 7.4264945562134795e-06,
      "loss": 2.3637,
      "step": 44972
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1524204015731812,
      "learning_rate": 7.426096689705489e-06,
      "loss": 2.452,
      "step": 44973
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0264010429382324,
      "learning_rate": 7.425698827560961e-06,
      "loss": 2.3546,
      "step": 44974
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0721639394760132,
      "learning_rate": 7.425300969780573e-06,
      "loss": 2.3618,
      "step": 44975
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.9667860269546509,
      "learning_rate": 7.424903116365e-06,
      "loss": 2.1942,
      "step": 44976
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1534500122070312,
      "learning_rate": 7.424505267314915e-06,
      "loss": 2.471,
      "step": 44977
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0875385999679565,
      "learning_rate": 7.424107422630997e-06,
      "loss": 2.3768,
      "step": 44978
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.9947391748428345,
      "learning_rate": 7.423709582313914e-06,
      "loss": 2.1917,
      "step": 44979
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.172993779182434,
      "learning_rate": 7.423311746364341e-06,
      "loss": 2.4046,
      "step": 44980
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1703139543533325,
      "learning_rate": 7.422913914782957e-06,
      "loss": 2.5127,
      "step": 44981
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0754683017730713,
      "learning_rate": 7.42251608757043e-06,
      "loss": 2.3236,
      "step": 44982
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.115793228149414,
      "learning_rate": 7.422118264727442e-06,
      "loss": 2.3754,
      "step": 44983
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1430280208587646,
      "learning_rate": 7.421720446254661e-06,
      "loss": 2.5324,
      "step": 44984
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.159788966178894,
      "learning_rate": 7.421322632152765e-06,
      "loss": 2.4079,
      "step": 44985
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.9630997776985168,
      "learning_rate": 7.420924822422428e-06,
      "loss": 2.0938,
      "step": 44986
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.107805609703064,
      "learning_rate": 7.420527017064323e-06,
      "loss": 2.3163,
      "step": 44987
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0891927480697632,
      "learning_rate": 7.4201292160791286e-06,
      "loss": 2.6277,
      "step": 44988
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.9827012419700623,
      "learning_rate": 7.419731419467513e-06,
      "loss": 2.1953,
      "step": 44989
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.1489360332489014,
      "learning_rate": 7.4193336272301545e-06,
      "loss": 2.3473,
      "step": 44990
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.033589243888855,
      "learning_rate": 7.418935839367727e-06,
      "loss": 2.3196,
      "step": 44991
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1405909061431885,
      "learning_rate": 7.4185380558809014e-06,
      "loss": 2.309,
      "step": 44992
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.163499355316162,
      "learning_rate": 7.418140276770357e-06,
      "loss": 2.3829,
      "step": 44993
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0193837881088257,
      "learning_rate": 7.417742502036766e-06,
      "loss": 2.261,
      "step": 44994
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0359231233596802,
      "learning_rate": 7.4173447316808034e-06,
      "loss": 2.3575,
      "step": 44995
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0675262212753296,
      "learning_rate": 7.4169469657031425e-06,
      "loss": 1.9691,
      "step": 44996
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.2229362726211548,
      "learning_rate": 7.416549204104456e-06,
      "loss": 2.2574,
      "step": 44997
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0328264236450195,
      "learning_rate": 7.416151446885423e-06,
      "loss": 2.2013,
      "step": 44998
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1050578355789185,
      "learning_rate": 7.415753694046712e-06,
      "loss": 2.2695,
      "step": 44999
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.2141520977020264,
      "learning_rate": 7.415355945589002e-06,
      "loss": 2.4148,
      "step": 45000
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.9704494476318359,
      "learning_rate": 7.414958201512965e-06,
      "loss": 2.3036,
      "step": 45001
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0991301536560059,
      "learning_rate": 7.414560461819275e-06,
      "loss": 2.4366,
      "step": 45002
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.14120352268219,
      "learning_rate": 7.41416272650861e-06,
      "loss": 2.138,
      "step": 45003
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0391277074813843,
      "learning_rate": 7.413764995581645e-06,
      "loss": 2.3916,
      "step": 45004
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1228983402252197,
      "learning_rate": 7.413367269039045e-06,
      "loss": 2.5221,
      "step": 45005
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.164874792098999,
      "learning_rate": 7.412969546881491e-06,
      "loss": 2.4098,
      "step": 45006
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.292757511138916,
      "learning_rate": 7.412571829109656e-06,
      "loss": 2.3154,
      "step": 45007
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0153096914291382,
      "learning_rate": 7.412174115724214e-06,
      "loss": 2.3585,
      "step": 45008
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1198310852050781,
      "learning_rate": 7.411776406725841e-06,
      "loss": 2.4278,
      "step": 45009
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0767594575881958,
      "learning_rate": 7.411378702115208e-06,
      "loss": 2.3403,
      "step": 45010
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0376567840576172,
      "learning_rate": 7.410981001892994e-06,
      "loss": 2.2791,
      "step": 45011
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0838005542755127,
      "learning_rate": 7.410583306059868e-06,
      "loss": 2.4336,
      "step": 45012
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1307053565979004,
      "learning_rate": 7.410185614616507e-06,
      "loss": 2.2815,
      "step": 45013
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.9580345749855042,
      "learning_rate": 7.409787927563585e-06,
      "loss": 2.1111,
      "step": 45014
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.117281198501587,
      "learning_rate": 7.409390244901776e-06,
      "loss": 2.2843,
      "step": 45015
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0993529558181763,
      "learning_rate": 7.408992566631756e-06,
      "loss": 2.5023,
      "step": 45016
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1244701147079468,
      "learning_rate": 7.408594892754198e-06,
      "loss": 2.313,
      "step": 45017
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1055532693862915,
      "learning_rate": 7.408197223269773e-06,
      "loss": 2.7096,
      "step": 45018
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0774260759353638,
      "learning_rate": 7.407799558179158e-06,
      "loss": 2.4475,
      "step": 45019
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0703800916671753,
      "learning_rate": 7.4074018974830265e-06,
      "loss": 2.3909,
      "step": 45020
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0238462686538696,
      "learning_rate": 7.407004241182052e-06,
      "loss": 2.4645,
      "step": 45021
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1239169836044312,
      "learning_rate": 7.40660658927691e-06,
      "loss": 2.274,
      "step": 45022
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.057584524154663,
      "learning_rate": 7.406208941768277e-06,
      "loss": 2.1134,
      "step": 45023
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.9661172032356262,
      "learning_rate": 7.405811298656822e-06,
      "loss": 2.3926,
      "step": 45024
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0034235715866089,
      "learning_rate": 7.405413659943221e-06,
      "loss": 2.5026,
      "step": 45025
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1240218877792358,
      "learning_rate": 7.405016025628149e-06,
      "loss": 2.2702,
      "step": 45026
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1098265647888184,
      "learning_rate": 7.4046183957122795e-06,
      "loss": 2.4676,
      "step": 45027
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1402969360351562,
      "learning_rate": 7.404220770196286e-06,
      "loss": 2.5512,
      "step": 45028
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1615263223648071,
      "learning_rate": 7.403823149080844e-06,
      "loss": 2.3409,
      "step": 45029
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.075205683708191,
      "learning_rate": 7.4034255323666314e-06,
      "loss": 2.1467,
      "step": 45030
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0624864101409912,
      "learning_rate": 7.403027920054313e-06,
      "loss": 2.3118,
      "step": 45031
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.9955970048904419,
      "learning_rate": 7.402630312144567e-06,
      "loss": 2.3986,
      "step": 45032
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0171524286270142,
      "learning_rate": 7.40223270863807e-06,
      "loss": 2.2766,
      "step": 45033
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1204394102096558,
      "learning_rate": 7.401835109535493e-06,
      "loss": 2.316,
      "step": 45034
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.9907500147819519,
      "learning_rate": 7.401437514837511e-06,
      "loss": 2.4094,
      "step": 45035
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.038033366203308,
      "learning_rate": 7.4010399245447975e-06,
      "loss": 2.1169,
      "step": 45036
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.171704649925232,
      "learning_rate": 7.4006423386580285e-06,
      "loss": 2.3089,
      "step": 45037
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0728118419647217,
      "learning_rate": 7.400244757177876e-06,
      "loss": 2.1764,
      "step": 45038
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.035011649131775,
      "learning_rate": 7.3998471801050155e-06,
      "loss": 2.5068,
      "step": 45039
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.208101511001587,
      "learning_rate": 7.399449607440119e-06,
      "loss": 2.3784,
      "step": 45040
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1968687772750854,
      "learning_rate": 7.399052039183861e-06,
      "loss": 2.4447,
      "step": 45041
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.9673266410827637,
      "learning_rate": 7.3986544753369185e-06,
      "loss": 2.3759,
      "step": 45042
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0756429433822632,
      "learning_rate": 7.398256915899966e-06,
      "loss": 2.4845,
      "step": 45043
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0403714179992676,
      "learning_rate": 7.397859360873671e-06,
      "loss": 2.2998,
      "step": 45044
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.9308002591133118,
      "learning_rate": 7.39746181025871e-06,
      "loss": 2.1045,
      "step": 45045
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0860190391540527,
      "learning_rate": 7.39706426405576e-06,
      "loss": 2.2541,
      "step": 45046
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0353797674179077,
      "learning_rate": 7.396666722265492e-06,
      "loss": 2.2866,
      "step": 45047
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0280156135559082,
      "learning_rate": 7.396269184888581e-06,
      "loss": 2.5603,
      "step": 45048
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1128087043762207,
      "learning_rate": 7.3958716519257015e-06,
      "loss": 2.5472,
      "step": 45049
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.2277716398239136,
      "learning_rate": 7.395474123377525e-06,
      "loss": 2.3246,
      "step": 45050
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0835471153259277,
      "learning_rate": 7.395076599244731e-06,
      "loss": 2.4292,
      "step": 45051
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0018925666809082,
      "learning_rate": 7.394679079527987e-06,
      "loss": 2.4734,
      "step": 45052
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1061763763427734,
      "learning_rate": 7.39428156422797e-06,
      "loss": 2.3182,
      "step": 45053
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0647839307785034,
      "learning_rate": 7.393884053345352e-06,
      "loss": 2.3813,
      "step": 45054
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0164592266082764,
      "learning_rate": 7.393486546880811e-06,
      "loss": 2.5325,
      "step": 45055
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.9714513421058655,
      "learning_rate": 7.393089044835015e-06,
      "loss": 2.5036,
      "step": 45056
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1999067068099976,
      "learning_rate": 7.392691547208648e-06,
      "loss": 2.4209,
      "step": 45057
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0643653869628906,
      "learning_rate": 7.392294054002371e-06,
      "loss": 2.3035,
      "step": 45058
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.011361837387085,
      "learning_rate": 7.3918965652168655e-06,
      "loss": 2.3941,
      "step": 45059
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0152467489242554,
      "learning_rate": 7.391499080852802e-06,
      "loss": 2.3637,
      "step": 45060
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.147861123085022,
      "learning_rate": 7.391101600910857e-06,
      "loss": 2.2887,
      "step": 45061
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.2316328287124634,
      "learning_rate": 7.3907041253917035e-06,
      "loss": 2.2961,
      "step": 45062
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1664763689041138,
      "learning_rate": 7.390306654296014e-06,
      "loss": 2.4492,
      "step": 45063
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0518596172332764,
      "learning_rate": 7.389909187624465e-06,
      "loss": 1.9858,
      "step": 45064
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1516344547271729,
      "learning_rate": 7.389511725377727e-06,
      "loss": 2.4678,
      "step": 45065
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.3581095933914185,
      "learning_rate": 7.389114267556476e-06,
      "loss": 2.3452,
      "step": 45066
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0100339651107788,
      "learning_rate": 7.388716814161387e-06,
      "loss": 2.506,
      "step": 45067
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.2026655673980713,
      "learning_rate": 7.388319365193132e-06,
      "loss": 2.279,
      "step": 45068
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.9697726964950562,
      "learning_rate": 7.387921920652383e-06,
      "loss": 2.1283,
      "step": 45069
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1254066228866577,
      "learning_rate": 7.3875244805398224e-06,
      "loss": 2.5971,
      "step": 45070
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.049114465713501,
      "learning_rate": 7.3871270448561125e-06,
      "loss": 2.5277,
      "step": 45071
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0158904790878296,
      "learning_rate": 7.386729613601931e-06,
      "loss": 2.4117,
      "step": 45072
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.043521761894226,
      "learning_rate": 7.386332186777954e-06,
      "loss": 2.2248,
      "step": 45073
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1709033250808716,
      "learning_rate": 7.385934764384852e-06,
      "loss": 2.0804,
      "step": 45074
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0095763206481934,
      "learning_rate": 7.385537346423302e-06,
      "loss": 2.0986,
      "step": 45075
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0178738832473755,
      "learning_rate": 7.385139932893974e-06,
      "loss": 2.3142,
      "step": 45076
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0669642686843872,
      "learning_rate": 7.384742523797546e-06,
      "loss": 2.4221,
      "step": 45077
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0509045124053955,
      "learning_rate": 7.384345119134692e-06,
      "loss": 2.7205,
      "step": 45078
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1443215608596802,
      "learning_rate": 7.3839477189060815e-06,
      "loss": 2.4851,
      "step": 45079
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1451936960220337,
      "learning_rate": 7.383550323112389e-06,
      "loss": 2.3246,
      "step": 45080
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0401673316955566,
      "learning_rate": 7.383152931754289e-06,
      "loss": 2.3812,
      "step": 45081
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0502158403396606,
      "learning_rate": 7.382755544832456e-06,
      "loss": 2.3502,
      "step": 45082
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0098594427108765,
      "learning_rate": 7.382358162347566e-06,
      "loss": 2.3815,
      "step": 45083
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.028813123703003,
      "learning_rate": 7.381960784300287e-06,
      "loss": 2.1972,
      "step": 45084
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0930116176605225,
      "learning_rate": 7.381563410691298e-06,
      "loss": 2.3476,
      "step": 45085
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.2638646364212036,
      "learning_rate": 7.381166041521268e-06,
      "loss": 2.196,
      "step": 45086
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0789743661880493,
      "learning_rate": 7.380768676790873e-06,
      "loss": 2.4516,
      "step": 45087
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1872754096984863,
      "learning_rate": 7.3803713165007864e-06,
      "loss": 2.4374,
      "step": 45088
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0657038688659668,
      "learning_rate": 7.379973960651682e-06,
      "loss": 2.1977,
      "step": 45089
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.151066780090332,
      "learning_rate": 7.379576609244232e-06,
      "loss": 2.2112,
      "step": 45090
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1976909637451172,
      "learning_rate": 7.3791792622791126e-06,
      "loss": 2.5009,
      "step": 45091
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0985047817230225,
      "learning_rate": 7.3787819197569965e-06,
      "loss": 2.7502,
      "step": 45092
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0510015487670898,
      "learning_rate": 7.3783845816785575e-06,
      "loss": 2.1542,
      "step": 45093
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.167570948600769,
      "learning_rate": 7.377987248044467e-06,
      "loss": 2.4526,
      "step": 45094
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.081886649131775,
      "learning_rate": 7.377589918855401e-06,
      "loss": 2.1963,
      "step": 45095
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1811938285827637,
      "learning_rate": 7.377192594112036e-06,
      "loss": 2.4873,
      "step": 45096
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0698866844177246,
      "learning_rate": 7.376795273815038e-06,
      "loss": 2.2465,
      "step": 45097
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0082794427871704,
      "learning_rate": 7.376397957965084e-06,
      "loss": 2.3523,
      "step": 45098
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1394509077072144,
      "learning_rate": 7.3760006465628485e-06,
      "loss": 2.2991,
      "step": 45099
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1042228937149048,
      "learning_rate": 7.375603339609005e-06,
      "loss": 2.3629,
      "step": 45100
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.027371883392334,
      "learning_rate": 7.3752060371042265e-06,
      "loss": 2.4902,
      "step": 45101
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1082818508148193,
      "learning_rate": 7.374808739049186e-06,
      "loss": 2.5721,
      "step": 45102
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1425819396972656,
      "learning_rate": 7.3744114454445584e-06,
      "loss": 2.5662,
      "step": 45103
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1217236518859863,
      "learning_rate": 7.374014156291016e-06,
      "loss": 2.448,
      "step": 45104
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.13237464427948,
      "learning_rate": 7.373616871589233e-06,
      "loss": 2.3032,
      "step": 45105
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.083128809928894,
      "learning_rate": 7.373219591339885e-06,
      "loss": 2.3733,
      "step": 45106
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.155705213546753,
      "learning_rate": 7.3728223155436415e-06,
      "loss": 2.43,
      "step": 45107
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.2349622249603271,
      "learning_rate": 7.372425044201177e-06,
      "loss": 2.4023,
      "step": 45108
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0941787958145142,
      "learning_rate": 7.372027777313168e-06,
      "loss": 2.2044,
      "step": 45109
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.178600549697876,
      "learning_rate": 7.3716305148802834e-06,
      "loss": 2.5649,
      "step": 45110
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0444822311401367,
      "learning_rate": 7.371233256903198e-06,
      "loss": 2.2604,
      "step": 45111
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0862106084823608,
      "learning_rate": 7.370836003382589e-06,
      "loss": 2.4441,
      "step": 45112
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0740816593170166,
      "learning_rate": 7.370438754319127e-06,
      "loss": 2.0468,
      "step": 45113
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1489039659500122,
      "learning_rate": 7.370041509713483e-06,
      "loss": 2.2653,
      "step": 45114
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0873584747314453,
      "learning_rate": 7.369644269566334e-06,
      "loss": 2.128,
      "step": 45115
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1503887176513672,
      "learning_rate": 7.3692470338783515e-06,
      "loss": 2.3815,
      "step": 45116
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0691990852355957,
      "learning_rate": 7.368849802650211e-06,
      "loss": 2.129,
      "step": 45117
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0313653945922852,
      "learning_rate": 7.368452575882583e-06,
      "loss": 2.2421,
      "step": 45118
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1790335178375244,
      "learning_rate": 7.368055353576144e-06,
      "loss": 2.1576,
      "step": 45119
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1235909461975098,
      "learning_rate": 7.3676581357315655e-06,
      "loss": 2.1084,
      "step": 45120
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0691370964050293,
      "learning_rate": 7.3672609223495215e-06,
      "loss": 2.5401,
      "step": 45121
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.027942419052124,
      "learning_rate": 7.3668637134306855e-06,
      "loss": 2.1431,
      "step": 45122
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1656607389450073,
      "learning_rate": 7.366466508975736e-06,
      "loss": 2.3444,
      "step": 45123
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.9953845739364624,
      "learning_rate": 7.366069308985335e-06,
      "loss": 2.409,
      "step": 45124
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0360039472579956,
      "learning_rate": 7.365672113460161e-06,
      "loss": 2.3522,
      "step": 45125
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.9742710590362549,
      "learning_rate": 7.365274922400891e-06,
      "loss": 2.2628,
      "step": 45126
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.091219425201416,
      "learning_rate": 7.364877735808194e-06,
      "loss": 2.3892,
      "step": 45127
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1158791780471802,
      "learning_rate": 7.364480553682745e-06,
      "loss": 2.4088,
      "step": 45128
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.3183584213256836,
      "learning_rate": 7.364083376025217e-06,
      "loss": 2.3579,
      "step": 45129
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0967216491699219,
      "learning_rate": 7.363686202836284e-06,
      "loss": 2.3998,
      "step": 45130
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.2649023532867432,
      "learning_rate": 7.363289034116619e-06,
      "loss": 2.2099,
      "step": 45131
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.126116394996643,
      "learning_rate": 7.362891869866895e-06,
      "loss": 2.739,
      "step": 45132
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1252338886260986,
      "learning_rate": 7.362494710087786e-06,
      "loss": 2.2272,
      "step": 45133
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.037282109260559,
      "learning_rate": 7.362097554779965e-06,
      "loss": 2.4262,
      "step": 45134
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.2180414199829102,
      "learning_rate": 7.3617004039441055e-06,
      "loss": 2.4338,
      "step": 45135
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.9745762348175049,
      "learning_rate": 7.361303257580882e-06,
      "loss": 2.4829,
      "step": 45136
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0466845035552979,
      "learning_rate": 7.3609061156909624e-06,
      "loss": 2.4957,
      "step": 45137
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0686410665512085,
      "learning_rate": 7.360508978275025e-06,
      "loss": 2.3008,
      "step": 45138
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.9995437860488892,
      "learning_rate": 7.360111845333742e-06,
      "loss": 2.2862,
      "step": 45139
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1033505201339722,
      "learning_rate": 7.359714716867785e-06,
      "loss": 2.2243,
      "step": 45140
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.3398174047470093,
      "learning_rate": 7.359317592877833e-06,
      "loss": 2.5639,
      "step": 45141
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0390973091125488,
      "learning_rate": 7.35892047336455e-06,
      "loss": 1.907,
      "step": 45142
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.092836856842041,
      "learning_rate": 7.358523358328617e-06,
      "loss": 2.4402,
      "step": 45143
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1721137762069702,
      "learning_rate": 7.3581262477707025e-06,
      "loss": 2.3339,
      "step": 45144
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0719190835952759,
      "learning_rate": 7.357729141691482e-06,
      "loss": 2.6195,
      "step": 45145
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.9428226947784424,
      "learning_rate": 7.357332040091628e-06,
      "loss": 2.2422,
      "step": 45146
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1523799896240234,
      "learning_rate": 7.356934942971814e-06,
      "loss": 2.3384,
      "step": 45147
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1196647882461548,
      "learning_rate": 7.356537850332713e-06,
      "loss": 2.5582,
      "step": 45148
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1032994985580444,
      "learning_rate": 7.356140762175004e-06,
      "loss": 2.4589,
      "step": 45149
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0851691961288452,
      "learning_rate": 7.355743678499347e-06,
      "loss": 2.3033,
      "step": 45150
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0209193229675293,
      "learning_rate": 7.355346599306426e-06,
      "loss": 2.404,
      "step": 45151
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.98347407579422,
      "learning_rate": 7.354949524596909e-06,
      "loss": 2.262,
      "step": 45152
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1746913194656372,
      "learning_rate": 7.354552454371471e-06,
      "loss": 2.4436,
      "step": 45153
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.9873132109642029,
      "learning_rate": 7.354155388630786e-06,
      "loss": 2.3942,
      "step": 45154
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0876902341842651,
      "learning_rate": 7.353758327375526e-06,
      "loss": 2.3793,
      "step": 45155
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.146794080734253,
      "learning_rate": 7.353361270606364e-06,
      "loss": 2.4663,
      "step": 45156
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1085631847381592,
      "learning_rate": 7.3529642183239725e-06,
      "loss": 2.6111,
      "step": 45157
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.256442904472351,
      "learning_rate": 7.352567170529028e-06,
      "loss": 2.5172,
      "step": 45158
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1135506629943848,
      "learning_rate": 7.352170127222199e-06,
      "loss": 2.2374,
      "step": 45159
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.9853115677833557,
      "learning_rate": 7.351773088404161e-06,
      "loss": 2.4114,
      "step": 45160
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1326783895492554,
      "learning_rate": 7.351376054075588e-06,
      "loss": 2.372,
      "step": 45161
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1650042533874512,
      "learning_rate": 7.3509790242371546e-06,
      "loss": 2.257,
      "step": 45162
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.146764874458313,
      "learning_rate": 7.350581998889529e-06,
      "loss": 2.2481,
      "step": 45163
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.9733777642250061,
      "learning_rate": 7.350184978033386e-06,
      "loss": 2.3198,
      "step": 45164
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.2858892679214478,
      "learning_rate": 7.349787961669397e-06,
      "loss": 2.4721,
      "step": 45165
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.983467161655426,
      "learning_rate": 7.349390949798238e-06,
      "loss": 2.1742,
      "step": 45166
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0344053506851196,
      "learning_rate": 7.348993942420584e-06,
      "loss": 2.4208,
      "step": 45167
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0834599733352661,
      "learning_rate": 7.348596939537103e-06,
      "loss": 2.2824,
      "step": 45168
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.9840985536575317,
      "learning_rate": 7.348199941148473e-06,
      "loss": 2.4021,
      "step": 45169
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1654236316680908,
      "learning_rate": 7.34780294725536e-06,
      "loss": 2.1507,
      "step": 45170
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1939748525619507,
      "learning_rate": 7.347405957858445e-06,
      "loss": 2.3646,
      "step": 45171
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0466809272766113,
      "learning_rate": 7.347008972958395e-06,
      "loss": 2.3819,
      "step": 45172
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.2595938444137573,
      "learning_rate": 7.346611992555885e-06,
      "loss": 2.3431,
      "step": 45173
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0123200416564941,
      "learning_rate": 7.34621501665159e-06,
      "loss": 2.3561,
      "step": 45174
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.983649492263794,
      "learning_rate": 7.345818045246184e-06,
      "loss": 2.3993,
      "step": 45175
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.167349100112915,
      "learning_rate": 7.345421078340335e-06,
      "loss": 2.5423,
      "step": 45176
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0706791877746582,
      "learning_rate": 7.3450241159347156e-06,
      "loss": 2.6187,
      "step": 45177
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.2997088432312012,
      "learning_rate": 7.344627158030004e-06,
      "loss": 2.4334,
      "step": 45178
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1074907779693604,
      "learning_rate": 7.3442302046268685e-06,
      "loss": 2.3193,
      "step": 45179
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0381650924682617,
      "learning_rate": 7.343833255725985e-06,
      "loss": 2.1612,
      "step": 45180
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0846656560897827,
      "learning_rate": 7.343436311328026e-06,
      "loss": 2.4669,
      "step": 45181
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1011476516723633,
      "learning_rate": 7.3430393714336635e-06,
      "loss": 2.2595,
      "step": 45182
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1078009605407715,
      "learning_rate": 7.342642436043571e-06,
      "loss": 2.4484,
      "step": 45183
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0085055828094482,
      "learning_rate": 7.3422455051584206e-06,
      "loss": 2.5578,
      "step": 45184
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0237226486206055,
      "learning_rate": 7.341848578778887e-06,
      "loss": 2.1567,
      "step": 45185
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1174793243408203,
      "learning_rate": 7.341451656905642e-06,
      "loss": 2.5541,
      "step": 45186
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.070094347000122,
      "learning_rate": 7.341054739539357e-06,
      "loss": 2.4403,
      "step": 45187
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1143271923065186,
      "learning_rate": 7.340657826680713e-06,
      "loss": 2.3276,
      "step": 45188
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1392420530319214,
      "learning_rate": 7.34026091833037e-06,
      "loss": 2.3347,
      "step": 45189
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0641758441925049,
      "learning_rate": 7.3398640144890064e-06,
      "loss": 2.0978,
      "step": 45190
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0642889738082886,
      "learning_rate": 7.3394671151572974e-06,
      "loss": 2.2445,
      "step": 45191
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.123855710029602,
      "learning_rate": 7.339070220335914e-06,
      "loss": 2.4153,
      "step": 45192
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1094236373901367,
      "learning_rate": 7.33867333002553e-06,
      "loss": 2.4247,
      "step": 45193
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1096901893615723,
      "learning_rate": 7.338276444226816e-06,
      "loss": 2.2391,
      "step": 45194
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1384071111679077,
      "learning_rate": 7.337879562940448e-06,
      "loss": 2.4145,
      "step": 45195
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5444676876068115,
      "learning_rate": 7.337482686167098e-06,
      "loss": 2.1945,
      "step": 45196
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0565922260284424,
      "learning_rate": 7.337085813907436e-06,
      "loss": 2.2084,
      "step": 45197
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0513440370559692,
      "learning_rate": 7.336688946162137e-06,
      "loss": 2.3947,
      "step": 45198
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1015188694000244,
      "learning_rate": 7.336292082931873e-06,
      "loss": 2.4291,
      "step": 45199
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0665748119354248,
      "learning_rate": 7.335895224217317e-06,
      "loss": 2.4581,
      "step": 45200
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.246442198753357,
      "learning_rate": 7.335498370019145e-06,
      "loss": 2.3773,
      "step": 45201
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0045987367630005,
      "learning_rate": 7.335101520338026e-06,
      "loss": 2.2999,
      "step": 45202
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1006762981414795,
      "learning_rate": 7.334704675174636e-06,
      "loss": 2.119,
      "step": 45203
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.251407265663147,
      "learning_rate": 7.334307834529642e-06,
      "loss": 2.4939,
      "step": 45204
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.2453703880310059,
      "learning_rate": 7.333910998403719e-06,
      "loss": 2.2839,
      "step": 45205
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1094460487365723,
      "learning_rate": 7.333514166797544e-06,
      "loss": 2.3468,
      "step": 45206
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1188138723373413,
      "learning_rate": 7.333117339711784e-06,
      "loss": 2.2596,
      "step": 45207
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1146690845489502,
      "learning_rate": 7.332720517147116e-06,
      "loss": 2.308,
      "step": 45208
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1082288026809692,
      "learning_rate": 7.332323699104211e-06,
      "loss": 2.4369,
      "step": 45209
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0119796991348267,
      "learning_rate": 7.331926885583741e-06,
      "loss": 2.3331,
      "step": 45210
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1081852912902832,
      "learning_rate": 7.331530076586379e-06,
      "loss": 2.4947,
      "step": 45211
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1351289749145508,
      "learning_rate": 7.331133272112801e-06,
      "loss": 2.4212,
      "step": 45212
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0357921123504639,
      "learning_rate": 7.330736472163676e-06,
      "loss": 2.1506,
      "step": 45213
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0734331607818604,
      "learning_rate": 7.330339676739675e-06,
      "loss": 2.4335,
      "step": 45214
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1982520818710327,
      "learning_rate": 7.32994288584148e-06,
      "loss": 2.3726,
      "step": 45215
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0218232870101929,
      "learning_rate": 7.329546099469752e-06,
      "loss": 2.3799,
      "step": 45216
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.094766616821289,
      "learning_rate": 7.329149317625168e-06,
      "loss": 2.1118,
      "step": 45217
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0893361568450928,
      "learning_rate": 7.328752540308403e-06,
      "loss": 2.4845,
      "step": 45218
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.9815815091133118,
      "learning_rate": 7.3283557675201275e-06,
      "loss": 2.3408,
      "step": 45219
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0374360084533691,
      "learning_rate": 7.327958999261013e-06,
      "loss": 2.3965,
      "step": 45220
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0837219953536987,
      "learning_rate": 7.327562235531735e-06,
      "loss": 2.4601,
      "step": 45221
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.427963376045227,
      "learning_rate": 7.327165476332966e-06,
      "loss": 2.2629,
      "step": 45222
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0782277584075928,
      "learning_rate": 7.326768721665376e-06,
      "loss": 2.0764,
      "step": 45223
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1857954263687134,
      "learning_rate": 7.326371971529641e-06,
      "loss": 2.7461,
      "step": 45224
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0065408945083618,
      "learning_rate": 7.32597522592643e-06,
      "loss": 2.3139,
      "step": 45225
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.072333574295044,
      "learning_rate": 7.325578484856417e-06,
      "loss": 2.1213,
      "step": 45226
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1919621229171753,
      "learning_rate": 7.325181748320276e-06,
      "loss": 2.345,
      "step": 45227
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.028416395187378,
      "learning_rate": 7.324785016318679e-06,
      "loss": 2.2595,
      "step": 45228
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.99901282787323,
      "learning_rate": 7.324388288852295e-06,
      "loss": 2.4935,
      "step": 45229
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.987604558467865,
      "learning_rate": 7.323991565921802e-06,
      "loss": 2.336,
      "step": 45230
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.9753507971763611,
      "learning_rate": 7.323594847527871e-06,
      "loss": 2.4751,
      "step": 45231
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0430450439453125,
      "learning_rate": 7.323198133671172e-06,
      "loss": 2.4171,
      "step": 45232
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1462934017181396,
      "learning_rate": 7.322801424352379e-06,
      "loss": 2.1836,
      "step": 45233
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.026952862739563,
      "learning_rate": 7.322404719572165e-06,
      "loss": 2.2882,
      "step": 45234
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0621414184570312,
      "learning_rate": 7.322008019331202e-06,
      "loss": 2.364,
      "step": 45235
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.092932105064392,
      "learning_rate": 7.321611323630161e-06,
      "loss": 2.4151,
      "step": 45236
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.053788185119629,
      "learning_rate": 7.321214632469718e-06,
      "loss": 2.2266,
      "step": 45237
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0597162246704102,
      "learning_rate": 7.3208179458505435e-06,
      "loss": 2.3998,
      "step": 45238
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0553613901138306,
      "learning_rate": 7.32042126377331e-06,
      "loss": 2.2739,
      "step": 45239
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0379880666732788,
      "learning_rate": 7.32002458623869e-06,
      "loss": 2.3556,
      "step": 45240
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0425161123275757,
      "learning_rate": 7.319627913247362e-06,
      "loss": 2.3747,
      "step": 45241
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.3138810396194458,
      "learning_rate": 7.319231244799987e-06,
      "loss": 2.4434,
      "step": 45242
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.4070003032684326,
      "learning_rate": 7.318834580897243e-06,
      "loss": 2.1795,
      "step": 45243
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0431681871414185,
      "learning_rate": 7.318437921539803e-06,
      "loss": 2.3079,
      "step": 45244
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0630167722702026,
      "learning_rate": 7.3180412667283395e-06,
      "loss": 2.4299,
      "step": 45245
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0806052684783936,
      "learning_rate": 7.317644616463524e-06,
      "loss": 2.3749,
      "step": 45246
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.050714135169983,
      "learning_rate": 7.317247970746029e-06,
      "loss": 2.1937,
      "step": 45247
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0762805938720703,
      "learning_rate": 7.316851329576527e-06,
      "loss": 2.3315,
      "step": 45248
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0997068881988525,
      "learning_rate": 7.316454692955693e-06,
      "loss": 2.0041,
      "step": 45249
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.9948753714561462,
      "learning_rate": 7.316058060884195e-06,
      "loss": 2.2487,
      "step": 45250
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1637749671936035,
      "learning_rate": 7.315661433362707e-06,
      "loss": 2.3092,
      "step": 45251
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1822326183319092,
      "learning_rate": 7.315264810391905e-06,
      "loss": 2.4911,
      "step": 45252
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0956089496612549,
      "learning_rate": 7.314868191972458e-06,
      "loss": 2.4329,
      "step": 45253
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1002542972564697,
      "learning_rate": 7.314471578105038e-06,
      "loss": 2.4913,
      "step": 45254
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.372941493988037,
      "learning_rate": 7.314074968790317e-06,
      "loss": 2.5871,
      "step": 45255
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.9980330467224121,
      "learning_rate": 7.313678364028969e-06,
      "loss": 2.4063,
      "step": 45256
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1096103191375732,
      "learning_rate": 7.313281763821664e-06,
      "loss": 2.4387,
      "step": 45257
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0503281354904175,
      "learning_rate": 7.312885168169078e-06,
      "loss": 2.1894,
      "step": 45258
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.2624684572219849,
      "learning_rate": 7.312488577071884e-06,
      "loss": 2.4119,
      "step": 45259
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.22963285446167,
      "learning_rate": 7.312091990530747e-06,
      "loss": 2.2877,
      "step": 45260
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1510995626449585,
      "learning_rate": 7.3116954085463465e-06,
      "loss": 2.4417,
      "step": 45261
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.994247555732727,
      "learning_rate": 7.3112988311193515e-06,
      "loss": 2.2435,
      "step": 45262
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.067408561706543,
      "learning_rate": 7.3109022582504356e-06,
      "loss": 2.1596,
      "step": 45263
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.092990517616272,
      "learning_rate": 7.3105056899402704e-06,
      "loss": 2.3398,
      "step": 45264
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.9974606037139893,
      "learning_rate": 7.310109126189529e-06,
      "loss": 2.4099,
      "step": 45265
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1002343893051147,
      "learning_rate": 7.309712566998882e-06,
      "loss": 2.337,
      "step": 45266
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.9856995940208435,
      "learning_rate": 7.309316012369003e-06,
      "loss": 2.3829,
      "step": 45267
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0071430206298828,
      "learning_rate": 7.30891946230057e-06,
      "loss": 2.3452,
      "step": 45268
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.2009482383728027,
      "learning_rate": 7.308522916794244e-06,
      "loss": 2.4813,
      "step": 45269
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.139145851135254,
      "learning_rate": 7.308126375850704e-06,
      "loss": 2.3156,
      "step": 45270
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.079045295715332,
      "learning_rate": 7.307729839470619e-06,
      "loss": 2.2752,
      "step": 45271
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0989019870758057,
      "learning_rate": 7.3073333076546626e-06,
      "loss": 2.578,
      "step": 45272
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0353440046310425,
      "learning_rate": 7.30693678040351e-06,
      "loss": 2.3316,
      "step": 45273
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0708141326904297,
      "learning_rate": 7.306540257717829e-06,
      "loss": 2.1088,
      "step": 45274
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0514166355133057,
      "learning_rate": 7.306143739598295e-06,
      "loss": 2.5129,
      "step": 45275
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0516098737716675,
      "learning_rate": 7.305747226045578e-06,
      "loss": 2.11,
      "step": 45276
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0489190816879272,
      "learning_rate": 7.305350717060352e-06,
      "loss": 2.358,
      "step": 45277
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1133893728256226,
      "learning_rate": 7.304954212643289e-06,
      "loss": 2.3566,
      "step": 45278
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.113995909690857,
      "learning_rate": 7.304557712795059e-06,
      "loss": 2.3519,
      "step": 45279
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1253509521484375,
      "learning_rate": 7.304161217516339e-06,
      "loss": 2.2354,
      "step": 45280
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0905377864837646,
      "learning_rate": 7.303764726807798e-06,
      "loss": 2.2578,
      "step": 45281
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1819992065429688,
      "learning_rate": 7.303368240670106e-06,
      "loss": 2.4002,
      "step": 45282
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1681106090545654,
      "learning_rate": 7.302971759103936e-06,
      "loss": 2.4803,
      "step": 45283
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.055193543434143,
      "learning_rate": 7.302575282109963e-06,
      "loss": 2.6481,
      "step": 45284
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1062275171279907,
      "learning_rate": 7.302178809688855e-06,
      "loss": 2.3633,
      "step": 45285
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0107470750808716,
      "learning_rate": 7.301782341841288e-06,
      "loss": 2.443,
      "step": 45286
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.456120491027832,
      "learning_rate": 7.3013858785679355e-06,
      "loss": 2.4025,
      "step": 45287
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.9753655791282654,
      "learning_rate": 7.300989419869464e-06,
      "loss": 2.4022,
      "step": 45288
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0555086135864258,
      "learning_rate": 7.300592965746549e-06,
      "loss": 2.1815,
      "step": 45289
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.033779501914978,
      "learning_rate": 7.300196516199862e-06,
      "loss": 2.2248,
      "step": 45290
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.9524509906768799,
      "learning_rate": 7.299800071230075e-06,
      "loss": 2.5363,
      "step": 45291
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0550942420959473,
      "learning_rate": 7.29940363083786e-06,
      "loss": 2.2483,
      "step": 45292
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0404611825942993,
      "learning_rate": 7.2990071950238904e-06,
      "loss": 2.6348,
      "step": 45293
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.046052098274231,
      "learning_rate": 7.29861076378884e-06,
      "loss": 2.5689,
      "step": 45294
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1835447549819946,
      "learning_rate": 7.298214337133376e-06,
      "loss": 2.3619,
      "step": 45295
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0586949586868286,
      "learning_rate": 7.29781791505817e-06,
      "loss": 2.2408,
      "step": 45296
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0532628297805786,
      "learning_rate": 7.297421497563898e-06,
      "loss": 2.3601,
      "step": 45297
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1260377168655396,
      "learning_rate": 7.29702508465123e-06,
      "loss": 2.3658,
      "step": 45298
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0578373670578003,
      "learning_rate": 7.296628676320838e-06,
      "loss": 2.1213,
      "step": 45299
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.4230927228927612,
      "learning_rate": 7.2962322725733954e-06,
      "loss": 2.3822,
      "step": 45300
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1100413799285889,
      "learning_rate": 7.295835873409573e-06,
      "loss": 2.4913,
      "step": 45301
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.020429015159607,
      "learning_rate": 7.295439478830044e-06,
      "loss": 2.4105,
      "step": 45302
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1977143287658691,
      "learning_rate": 7.2950430888354795e-06,
      "loss": 2.4799,
      "step": 45303
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1909725666046143,
      "learning_rate": 7.2946467034265515e-06,
      "loss": 2.393,
      "step": 45304
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0006728172302246,
      "learning_rate": 7.294250322603932e-06,
      "loss": 2.3978,
      "step": 45305
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.2292735576629639,
      "learning_rate": 7.293853946368293e-06,
      "loss": 2.096,
      "step": 45306
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.000131607055664,
      "learning_rate": 7.293457574720311e-06,
      "loss": 2.332,
      "step": 45307
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0136545896530151,
      "learning_rate": 7.293061207660648e-06,
      "loss": 2.1755,
      "step": 45308
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1057592630386353,
      "learning_rate": 7.292664845189983e-06,
      "loss": 2.4141,
      "step": 45309
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.125738263130188,
      "learning_rate": 7.292268487308985e-06,
      "loss": 2.2561,
      "step": 45310
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.9846914410591125,
      "learning_rate": 7.291872134018329e-06,
      "loss": 2.1721,
      "step": 45311
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0938222408294678,
      "learning_rate": 7.291475785318684e-06,
      "loss": 2.3611,
      "step": 45312
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.03775155544281,
      "learning_rate": 7.291079441210725e-06,
      "loss": 2.2738,
      "step": 45313
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.926383376121521,
      "learning_rate": 7.290683101695122e-06,
      "loss": 2.2139,
      "step": 45314
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0265240669250488,
      "learning_rate": 7.290286766772545e-06,
      "loss": 2.4117,
      "step": 45315
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.11409592628479,
      "learning_rate": 7.28989043644367e-06,
      "loss": 2.1971,
      "step": 45316
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0123109817504883,
      "learning_rate": 7.289494110709164e-06,
      "loss": 2.1637,
      "step": 45317
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0115034580230713,
      "learning_rate": 7.289097789569703e-06,
      "loss": 2.2463,
      "step": 45318
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.149917721748352,
      "learning_rate": 7.288701473025957e-06,
      "loss": 2.4042,
      "step": 45319
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.147973656654358,
      "learning_rate": 7.288305161078602e-06,
      "loss": 2.4161,
      "step": 45320
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0963345766067505,
      "learning_rate": 7.287908853728305e-06,
      "loss": 2.4375,
      "step": 45321
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.044520378112793,
      "learning_rate": 7.287512550975737e-06,
      "loss": 2.3238,
      "step": 45322
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.225329875946045,
      "learning_rate": 7.287116252821572e-06,
      "loss": 2.3205,
      "step": 45323
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0480672121047974,
      "learning_rate": 7.286719959266482e-06,
      "loss": 2.2906,
      "step": 45324
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0035250186920166,
      "learning_rate": 7.286323670311139e-06,
      "loss": 2.4281,
      "step": 45325
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.9831024408340454,
      "learning_rate": 7.285927385956213e-06,
      "loss": 2.4641,
      "step": 45326
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1574208736419678,
      "learning_rate": 7.285531106202379e-06,
      "loss": 2.4204,
      "step": 45327
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0809919834136963,
      "learning_rate": 7.285134831050306e-06,
      "loss": 2.4281,
      "step": 45328
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.3371673822402954,
      "learning_rate": 7.284738560500666e-06,
      "loss": 2.4246,
      "step": 45329
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.2745789289474487,
      "learning_rate": 7.284342294554133e-06,
      "loss": 2.3581,
      "step": 45330
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0770282745361328,
      "learning_rate": 7.2839460332113765e-06,
      "loss": 2.3586,
      "step": 45331
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0567606687545776,
      "learning_rate": 7.283549776473069e-06,
      "loss": 2.5466,
      "step": 45332
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.077085018157959,
      "learning_rate": 7.283153524339888e-06,
      "loss": 2.4847,
      "step": 45333
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.070760726928711,
      "learning_rate": 7.282757276812495e-06,
      "loss": 2.2879,
      "step": 45334
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0883597135543823,
      "learning_rate": 7.282361033891565e-06,
      "loss": 2.4031,
      "step": 45335
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.9849286079406738,
      "learning_rate": 7.281964795577772e-06,
      "loss": 2.2359,
      "step": 45336
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.11411452293396,
      "learning_rate": 7.281568561871788e-06,
      "loss": 2.406,
      "step": 45337
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.9244436025619507,
      "learning_rate": 7.281172332774282e-06,
      "loss": 2.4237,
      "step": 45338
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1105257272720337,
      "learning_rate": 7.280776108285928e-06,
      "loss": 2.3819,
      "step": 45339
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0640895366668701,
      "learning_rate": 7.280379888407397e-06,
      "loss": 2.263,
      "step": 45340
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0756468772888184,
      "learning_rate": 7.279983673139362e-06,
      "loss": 2.2921,
      "step": 45341
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.070948600769043,
      "learning_rate": 7.279587462482494e-06,
      "loss": 2.4342,
      "step": 45342
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.080819845199585,
      "learning_rate": 7.279191256437461e-06,
      "loss": 2.4345,
      "step": 45343
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1622999906539917,
      "learning_rate": 7.278795055004939e-06,
      "loss": 2.3812,
      "step": 45344
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.200275182723999,
      "learning_rate": 7.278398858185598e-06,
      "loss": 2.4453,
      "step": 45345
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1594970226287842,
      "learning_rate": 7.2780026659801106e-06,
      "loss": 2.2645,
      "step": 45346
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.2951887845993042,
      "learning_rate": 7.277606478389151e-06,
      "loss": 2.3085,
      "step": 45347
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.158926010131836,
      "learning_rate": 7.2772102954133836e-06,
      "loss": 2.3745,
      "step": 45348
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1181509494781494,
      "learning_rate": 7.276814117053488e-06,
      "loss": 2.4882,
      "step": 45349
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1570240259170532,
      "learning_rate": 7.27641794331013e-06,
      "loss": 2.235,
      "step": 45350
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1231058835983276,
      "learning_rate": 7.276021774183983e-06,
      "loss": 2.6549,
      "step": 45351
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.112378716468811,
      "learning_rate": 7.275625609675718e-06,
      "loss": 2.5513,
      "step": 45352
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0278488397598267,
      "learning_rate": 7.275229449786009e-06,
      "loss": 2.2906,
      "step": 45353
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0294536352157593,
      "learning_rate": 7.274833294515524e-06,
      "loss": 2.5361,
      "step": 45354
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0966278314590454,
      "learning_rate": 7.274437143864938e-06,
      "loss": 2.4666,
      "step": 45355
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1456925868988037,
      "learning_rate": 7.274040997834922e-06,
      "loss": 2.2878,
      "step": 45356
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.007175326347351,
      "learning_rate": 7.273644856426147e-06,
      "loss": 2.499,
      "step": 45357
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1612685918807983,
      "learning_rate": 7.273248719639283e-06,
      "loss": 2.5022,
      "step": 45358
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1358047723770142,
      "learning_rate": 7.272852587475004e-06,
      "loss": 2.216,
      "step": 45359
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0898686647415161,
      "learning_rate": 7.272456459933984e-06,
      "loss": 2.071,
      "step": 45360
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.728135347366333,
      "learning_rate": 7.272060337016887e-06,
      "loss": 2.3404,
      "step": 45361
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.10769522190094,
      "learning_rate": 7.271664218724389e-06,
      "loss": 2.3488,
      "step": 45362
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5740288496017456,
      "learning_rate": 7.271268105057161e-06,
      "loss": 2.2294,
      "step": 45363
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1540483236312866,
      "learning_rate": 7.270871996015876e-06,
      "loss": 2.3545,
      "step": 45364
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0040093660354614,
      "learning_rate": 7.270475891601202e-06,
      "loss": 2.2644,
      "step": 45365
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1688364744186401,
      "learning_rate": 7.270079791813813e-06,
      "loss": 2.441,
      "step": 45366
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1679213047027588,
      "learning_rate": 7.269683696654381e-06,
      "loss": 2.6813,
      "step": 45367
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.9838184714317322,
      "learning_rate": 7.269287606123576e-06,
      "loss": 2.3599,
      "step": 45368
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0308563709259033,
      "learning_rate": 7.268891520222072e-06,
      "loss": 2.2307,
      "step": 45369
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0355656147003174,
      "learning_rate": 7.268495438950538e-06,
      "loss": 2.2818,
      "step": 45370
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1508373022079468,
      "learning_rate": 7.268099362309646e-06,
      "loss": 2.5507,
      "step": 45371
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1040107011795044,
      "learning_rate": 7.267703290300068e-06,
      "loss": 2.3701,
      "step": 45372
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0977275371551514,
      "learning_rate": 7.267307222922475e-06,
      "loss": 2.3535,
      "step": 45373
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.137736201286316,
      "learning_rate": 7.266911160177538e-06,
      "loss": 2.4185,
      "step": 45374
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1216667890548706,
      "learning_rate": 7.266515102065928e-06,
      "loss": 2.5449,
      "step": 45375
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.223781704902649,
      "learning_rate": 7.266119048588317e-06,
      "loss": 2.3985,
      "step": 45376
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0801336765289307,
      "learning_rate": 7.2657229997453795e-06,
      "loss": 2.2074,
      "step": 45377
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.079877257347107,
      "learning_rate": 7.265326955537781e-06,
      "loss": 2.2622,
      "step": 45378
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1586638689041138,
      "learning_rate": 7.264930915966197e-06,
      "loss": 2.316,
      "step": 45379
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0468440055847168,
      "learning_rate": 7.264534881031299e-06,
      "loss": 2.39,
      "step": 45380
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1313576698303223,
      "learning_rate": 7.264138850733756e-06,
      "loss": 2.1524,
      "step": 45381
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1065242290496826,
      "learning_rate": 7.26374282507424e-06,
      "loss": 2.345,
      "step": 45382
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.195972204208374,
      "learning_rate": 7.263346804053425e-06,
      "loss": 2.2138,
      "step": 45383
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.9782840013504028,
      "learning_rate": 7.262950787671979e-06,
      "loss": 2.2784,
      "step": 45384
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.077232003211975,
      "learning_rate": 7.2625547759305755e-06,
      "loss": 2.5313,
      "step": 45385
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0755144357681274,
      "learning_rate": 7.26215876882989e-06,
      "loss": 2.3863,
      "step": 45386
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0503753423690796,
      "learning_rate": 7.261762766370584e-06,
      "loss": 2.3838,
      "step": 45387
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.229262113571167,
      "learning_rate": 7.261366768553333e-06,
      "loss": 2.1543,
      "step": 45388
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0785465240478516,
      "learning_rate": 7.260970775378809e-06,
      "loss": 2.2905,
      "step": 45389
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.026276707649231,
      "learning_rate": 7.260574786847685e-06,
      "loss": 2.295,
      "step": 45390
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0317269563674927,
      "learning_rate": 7.260178802960629e-06,
      "loss": 2.1446,
      "step": 45391
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0732260942459106,
      "learning_rate": 7.259782823718316e-06,
      "loss": 2.2962,
      "step": 45392
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1729736328125,
      "learning_rate": 7.259386849121414e-06,
      "loss": 2.4377,
      "step": 45393
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.085259199142456,
      "learning_rate": 7.258990879170596e-06,
      "loss": 2.2352,
      "step": 45394
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1030327081680298,
      "learning_rate": 7.258594913866532e-06,
      "loss": 2.1737,
      "step": 45395
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1353631019592285,
      "learning_rate": 7.258198953209897e-06,
      "loss": 2.5493,
      "step": 45396
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1326113939285278,
      "learning_rate": 7.257802997201357e-06,
      "loss": 2.2997,
      "step": 45397
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.099839448928833,
      "learning_rate": 7.257407045841587e-06,
      "loss": 2.3819,
      "step": 45398
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0290498733520508,
      "learning_rate": 7.2570110991312595e-06,
      "loss": 2.2685,
      "step": 45399
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0189177989959717,
      "learning_rate": 7.256615157071039e-06,
      "loss": 2.385,
      "step": 45400
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0437755584716797,
      "learning_rate": 7.256219219661602e-06,
      "loss": 2.4936,
      "step": 45401
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0494544506072998,
      "learning_rate": 7.255823286903618e-06,
      "loss": 2.2636,
      "step": 45402
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0396884679794312,
      "learning_rate": 7.25542735879776e-06,
      "loss": 2.2338,
      "step": 45403
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0050921440124512,
      "learning_rate": 7.255031435344697e-06,
      "loss": 2.2157,
      "step": 45404
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0211564302444458,
      "learning_rate": 7.2546355165451034e-06,
      "loss": 2.4835,
      "step": 45405
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1290525197982788,
      "learning_rate": 7.2542396023996465e-06,
      "loss": 2.3532,
      "step": 45406
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1437102556228638,
      "learning_rate": 7.253843692908998e-06,
      "loss": 2.3089,
      "step": 45407
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0320366621017456,
      "learning_rate": 7.253447788073831e-06,
      "loss": 2.5583,
      "step": 45408
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1662379503250122,
      "learning_rate": 7.253051887894817e-06,
      "loss": 2.3023,
      "step": 45409
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.055338978767395,
      "learning_rate": 7.2526559923726255e-06,
      "loss": 2.2261,
      "step": 45410
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.067042350769043,
      "learning_rate": 7.2522601015079275e-06,
      "loss": 2.3931,
      "step": 45411
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1241040229797363,
      "learning_rate": 7.251864215301397e-06,
      "loss": 2.5162,
      "step": 45412
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0084935426712036,
      "learning_rate": 7.251468333753704e-06,
      "loss": 2.5208,
      "step": 45413
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0339105129241943,
      "learning_rate": 7.251072456865517e-06,
      "loss": 2.1495,
      "step": 45414
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.04950749874115,
      "learning_rate": 7.250676584637508e-06,
      "loss": 2.414,
      "step": 45415
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.2530421018600464,
      "learning_rate": 7.250280717070348e-06,
      "loss": 2.1536,
      "step": 45416
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.9764275550842285,
      "learning_rate": 7.249884854164711e-06,
      "loss": 2.2665,
      "step": 45417
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1340676546096802,
      "learning_rate": 7.249488995921264e-06,
      "loss": 2.3145,
      "step": 45418
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0306199789047241,
      "learning_rate": 7.249093142340682e-06,
      "loss": 2.2737,
      "step": 45419
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0890530347824097,
      "learning_rate": 7.2486972934236325e-06,
      "loss": 2.3095,
      "step": 45420
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1405901908874512,
      "learning_rate": 7.248301449170789e-06,
      "loss": 2.3022,
      "step": 45421
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0488343238830566,
      "learning_rate": 7.247905609582823e-06,
      "loss": 2.3492,
      "step": 45422
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.00137460231781,
      "learning_rate": 7.247509774660404e-06,
      "loss": 2.5127,
      "step": 45423
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0130767822265625,
      "learning_rate": 7.247113944404203e-06,
      "loss": 2.3543,
      "step": 45424
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0710313320159912,
      "learning_rate": 7.246718118814891e-06,
      "loss": 2.3188,
      "step": 45425
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0045260190963745,
      "learning_rate": 7.246322297893145e-06,
      "loss": 2.3792,
      "step": 45426
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0595628023147583,
      "learning_rate": 7.245926481639624e-06,
      "loss": 2.3049,
      "step": 45427
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0804098844528198,
      "learning_rate": 7.245530670055009e-06,
      "loss": 2.4782,
      "step": 45428
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0767278671264648,
      "learning_rate": 7.245134863139966e-06,
      "loss": 2.5098,
      "step": 45429
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.2743078470230103,
      "learning_rate": 7.244739060895167e-06,
      "loss": 2.3694,
      "step": 45430
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5464210510253906,
      "learning_rate": 7.244343263321285e-06,
      "loss": 2.4301,
      "step": 45431
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1035776138305664,
      "learning_rate": 7.243947470418991e-06,
      "loss": 2.2936,
      "step": 45432
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1510095596313477,
      "learning_rate": 7.243551682188952e-06,
      "loss": 2.3805,
      "step": 45433
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0538051128387451,
      "learning_rate": 7.243155898631842e-06,
      "loss": 2.4554,
      "step": 45434
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.136810541152954,
      "learning_rate": 7.242760119748331e-06,
      "loss": 2.3979,
      "step": 45435
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0776907205581665,
      "learning_rate": 7.242364345539092e-06,
      "loss": 2.3828,
      "step": 45436
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0730427503585815,
      "learning_rate": 7.241968576004792e-06,
      "loss": 2.1239,
      "step": 45437
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1187541484832764,
      "learning_rate": 7.2415728111461056e-06,
      "loss": 2.2751,
      "step": 45438
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.2543803453445435,
      "learning_rate": 7.241177050963708e-06,
      "loss": 2.256,
      "step": 45439
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.2389044761657715,
      "learning_rate": 7.240781295458259e-06,
      "loss": 2.7184,
      "step": 45440
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.9748836755752563,
      "learning_rate": 7.240385544630435e-06,
      "loss": 2.6114,
      "step": 45441
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.2137147188186646,
      "learning_rate": 7.239989798480906e-06,
      "loss": 2.349,
      "step": 45442
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0622917413711548,
      "learning_rate": 7.2395940570103466e-06,
      "loss": 2.2886,
      "step": 45443
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.150516390800476,
      "learning_rate": 7.239198320219423e-06,
      "loss": 2.2758,
      "step": 45444
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.054998755455017,
      "learning_rate": 7.238802588108809e-06,
      "loss": 2.247,
      "step": 45445
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.020593285560608,
      "learning_rate": 7.238406860679172e-06,
      "loss": 2.328,
      "step": 45446
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.277122974395752,
      "learning_rate": 7.238011137931189e-06,
      "loss": 2.2752,
      "step": 45447
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0148561000823975,
      "learning_rate": 7.2376154198655255e-06,
      "loss": 2.5391,
      "step": 45448
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.2288364171981812,
      "learning_rate": 7.2372197064828546e-06,
      "loss": 2.3665,
      "step": 45449
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0641506910324097,
      "learning_rate": 7.2368239977838464e-06,
      "loss": 2.3217,
      "step": 45450
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1006779670715332,
      "learning_rate": 7.236428293769172e-06,
      "loss": 2.3564,
      "step": 45451
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0402346849441528,
      "learning_rate": 7.236032594439506e-06,
      "loss": 2.3753,
      "step": 45452
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0756455659866333,
      "learning_rate": 7.235636899795512e-06,
      "loss": 2.3799,
      "step": 45453
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.252482533454895,
      "learning_rate": 7.235241209837864e-06,
      "loss": 2.3544,
      "step": 45454
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0524981021881104,
      "learning_rate": 7.2348455245672335e-06,
      "loss": 2.3295,
      "step": 45455
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.9707306027412415,
      "learning_rate": 7.234449843984291e-06,
      "loss": 2.2579,
      "step": 45456
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1045653820037842,
      "learning_rate": 7.2340541680897055e-06,
      "loss": 2.4092,
      "step": 45457
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1050142049789429,
      "learning_rate": 7.233658496884151e-06,
      "loss": 2.3636,
      "step": 45458
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1125285625457764,
      "learning_rate": 7.233262830368295e-06,
      "loss": 2.3063,
      "step": 45459
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.2917920351028442,
      "learning_rate": 7.232867168542813e-06,
      "loss": 2.103,
      "step": 45460
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.033115267753601,
      "learning_rate": 7.232471511408371e-06,
      "loss": 2.1741,
      "step": 45461
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1454694271087646,
      "learning_rate": 7.232075858965641e-06,
      "loss": 2.1358,
      "step": 45462
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.674408197402954,
      "learning_rate": 7.231680211215294e-06,
      "loss": 2.4142,
      "step": 45463
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1031568050384521,
      "learning_rate": 7.231284568158001e-06,
      "loss": 2.3208,
      "step": 45464
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0678889751434326,
      "learning_rate": 7.230888929794435e-06,
      "loss": 2.4718,
      "step": 45465
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1017825603485107,
      "learning_rate": 7.2304932961252625e-06,
      "loss": 2.4175,
      "step": 45466
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0621590614318848,
      "learning_rate": 7.230097667151158e-06,
      "loss": 2.4611,
      "step": 45467
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.041191577911377,
      "learning_rate": 7.229702042872788e-06,
      "loss": 2.2723,
      "step": 45468
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.2169666290283203,
      "learning_rate": 7.229306423290825e-06,
      "loss": 2.4199,
      "step": 45469
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.136666178703308,
      "learning_rate": 7.22891080840594e-06,
      "loss": 2.5436,
      "step": 45470
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.120990514755249,
      "learning_rate": 7.228515198218805e-06,
      "loss": 2.2529,
      "step": 45471
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.084970474243164,
      "learning_rate": 7.228119592730089e-06,
      "loss": 2.3484,
      "step": 45472
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.4307196140289307,
      "learning_rate": 7.227723991940463e-06,
      "loss": 2.2668,
      "step": 45473
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0527042150497437,
      "learning_rate": 7.227328395850597e-06,
      "loss": 2.2394,
      "step": 45474
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.267149806022644,
      "learning_rate": 7.226932804461164e-06,
      "loss": 2.4533,
      "step": 45475
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0021064281463623,
      "learning_rate": 7.226537217772831e-06,
      "loss": 2.2625,
      "step": 45476
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.22671639919281,
      "learning_rate": 7.226141635786272e-06,
      "loss": 2.3098,
      "step": 45477
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0400842428207397,
      "learning_rate": 7.225746058502161e-06,
      "loss": 2.3959,
      "step": 45478
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.142792820930481,
      "learning_rate": 7.225350485921159e-06,
      "loss": 2.4465,
      "step": 45479
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1996824741363525,
      "learning_rate": 7.224954918043942e-06,
      "loss": 2.4127,
      "step": 45480
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0204741954803467,
      "learning_rate": 7.224559354871179e-06,
      "loss": 2.1042,
      "step": 45481
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.2051115036010742,
      "learning_rate": 7.224163796403542e-06,
      "loss": 2.4213,
      "step": 45482
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1052151918411255,
      "learning_rate": 7.223768242641702e-06,
      "loss": 2.3812,
      "step": 45483
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.465544581413269,
      "learning_rate": 7.223372693586328e-06,
      "loss": 2.2202,
      "step": 45484
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.6424719095230103,
      "learning_rate": 7.222977149238093e-06,
      "loss": 2.4405,
      "step": 45485
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0509657859802246,
      "learning_rate": 7.2225816095976645e-06,
      "loss": 2.431,
      "step": 45486
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1680781841278076,
      "learning_rate": 7.222186074665715e-06,
      "loss": 2.3806,
      "step": 45487
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0182480812072754,
      "learning_rate": 7.221790544442916e-06,
      "loss": 2.4693,
      "step": 45488
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.237724781036377,
      "learning_rate": 7.221395018929935e-06,
      "loss": 2.3813,
      "step": 45489
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.9704245328903198,
      "learning_rate": 7.220999498127443e-06,
      "loss": 2.2983,
      "step": 45490
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0735286474227905,
      "learning_rate": 7.220603982036114e-06,
      "loss": 2.2187,
      "step": 45491
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0480483770370483,
      "learning_rate": 7.220208470656617e-06,
      "loss": 2.7044,
      "step": 45492
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0760620832443237,
      "learning_rate": 7.219812963989619e-06,
      "loss": 2.3915,
      "step": 45493
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.011918306350708,
      "learning_rate": 7.219417462035794e-06,
      "loss": 2.2064,
      "step": 45494
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0678399801254272,
      "learning_rate": 7.219021964795812e-06,
      "loss": 2.433,
      "step": 45495
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.2458279132843018,
      "learning_rate": 7.2186264722703425e-06,
      "loss": 2.3175,
      "step": 45496
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0791544914245605,
      "learning_rate": 7.218230984460057e-06,
      "loss": 2.2922,
      "step": 45497
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0616545677185059,
      "learning_rate": 7.217835501365624e-06,
      "loss": 2.5327,
      "step": 45498
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1002436876296997,
      "learning_rate": 7.217440022987716e-06,
      "loss": 2.0979,
      "step": 45499
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0622856616973877,
      "learning_rate": 7.217044549327003e-06,
      "loss": 2.4759,
      "step": 45500
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0868393182754517,
      "learning_rate": 7.216649080384156e-06,
      "loss": 2.3312,
      "step": 45501
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.125360369682312,
      "learning_rate": 7.216253616159845e-06,
      "loss": 2.3147,
      "step": 45502
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0384045839309692,
      "learning_rate": 7.215858156654738e-06,
      "loss": 2.3007,
      "step": 45503
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0314610004425049,
      "learning_rate": 7.215462701869509e-06,
      "loss": 2.4486,
      "step": 45504
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1181401014328003,
      "learning_rate": 7.2150672518048326e-06,
      "loss": 2.3921,
      "step": 45505
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0330582857131958,
      "learning_rate": 7.214671806461366e-06,
      "loss": 2.3573,
      "step": 45506
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.255802869796753,
      "learning_rate": 7.214276365839789e-06,
      "loss": 2.3541,
      "step": 45507
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.2121878862380981,
      "learning_rate": 7.213880929940771e-06,
      "loss": 2.4078,
      "step": 45508
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.167604684829712,
      "learning_rate": 7.21348549876498e-06,
      "loss": 2.3774,
      "step": 45509
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1963547468185425,
      "learning_rate": 7.213090072313089e-06,
      "loss": 2.6038,
      "step": 45510
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0874018669128418,
      "learning_rate": 7.212694650585768e-06,
      "loss": 2.3994,
      "step": 45511
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.094044804573059,
      "learning_rate": 7.212299233583685e-06,
      "loss": 2.3972,
      "step": 45512
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.082063913345337,
      "learning_rate": 7.211903821307513e-06,
      "loss": 2.5145,
      "step": 45513
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0300222635269165,
      "learning_rate": 7.21150841375792e-06,
      "loss": 2.1635,
      "step": 45514
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.011081337928772,
      "learning_rate": 7.211113010935579e-06,
      "loss": 2.3856,
      "step": 45515
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0748347043991089,
      "learning_rate": 7.210717612841161e-06,
      "loss": 2.1656,
      "step": 45516
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0658034086227417,
      "learning_rate": 7.21032221947533e-06,
      "loss": 2.177,
      "step": 45517
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.048125982284546,
      "learning_rate": 7.2099268308387645e-06,
      "loss": 2.3964,
      "step": 45518
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.2011348009109497,
      "learning_rate": 7.209531446932128e-06,
      "loss": 2.1684,
      "step": 45519
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0748475790023804,
      "learning_rate": 7.209136067756093e-06,
      "loss": 2.5507,
      "step": 45520
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.9969282746315002,
      "learning_rate": 7.2087406933113314e-06,
      "loss": 2.1268,
      "step": 45521
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1209317445755005,
      "learning_rate": 7.208345323598512e-06,
      "loss": 2.2472,
      "step": 45522
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0777502059936523,
      "learning_rate": 7.207949958618308e-06,
      "loss": 2.0929,
      "step": 45523
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1575188636779785,
      "learning_rate": 7.207554598371384e-06,
      "loss": 2.171,
      "step": 45524
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1148622035980225,
      "learning_rate": 7.207159242858415e-06,
      "loss": 2.6425,
      "step": 45525
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.4185987710952759,
      "learning_rate": 7.206763892080068e-06,
      "loss": 2.4497,
      "step": 45526
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1947462558746338,
      "learning_rate": 7.2063685460370155e-06,
      "loss": 2.2685,
      "step": 45527
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1999061107635498,
      "learning_rate": 7.205973204729928e-06,
      "loss": 2.3892,
      "step": 45528
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.075616717338562,
      "learning_rate": 7.205577868159473e-06,
      "loss": 2.2545,
      "step": 45529
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.6766506433486938,
      "learning_rate": 7.205182536326324e-06,
      "loss": 2.3209,
      "step": 45530
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1516413688659668,
      "learning_rate": 7.204787209231153e-06,
      "loss": 2.2208,
      "step": 45531
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.808872938156128,
      "learning_rate": 7.204391886874623e-06,
      "loss": 2.4557,
      "step": 45532
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.9794614315032959,
      "learning_rate": 7.203996569257408e-06,
      "loss": 2.409,
      "step": 45533
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0332562923431396,
      "learning_rate": 7.2036012563801775e-06,
      "loss": 2.3644,
      "step": 45534
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0530641078948975,
      "learning_rate": 7.203205948243603e-06,
      "loss": 2.352,
      "step": 45535
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1964174509048462,
      "learning_rate": 7.202810644848355e-06,
      "loss": 2.4945,
      "step": 45536
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1015759706497192,
      "learning_rate": 7.202415346195101e-06,
      "loss": 2.2396,
      "step": 45537
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.784250020980835,
      "learning_rate": 7.202020052284513e-06,
      "loss": 2.2144,
      "step": 45538
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1270298957824707,
      "learning_rate": 7.201624763117261e-06,
      "loss": 2.2132,
      "step": 45539
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0390102863311768,
      "learning_rate": 7.201229478694017e-06,
      "loss": 2.1858,
      "step": 45540
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1155978441238403,
      "learning_rate": 7.200834199015447e-06,
      "loss": 2.4798,
      "step": 45541
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.3178588151931763,
      "learning_rate": 7.200438924082224e-06,
      "loss": 2.1548,
      "step": 45542
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1053352355957031,
      "learning_rate": 7.200043653895017e-06,
      "loss": 2.1047,
      "step": 45543
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.2977321147918701,
      "learning_rate": 7.199648388454501e-06,
      "loss": 2.1156,
      "step": 45544
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.3066520690917969,
      "learning_rate": 7.199253127761337e-06,
      "loss": 2.3364,
      "step": 45545
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0888948440551758,
      "learning_rate": 7.198857871816199e-06,
      "loss": 2.5666,
      "step": 45546
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.9610234498977661,
      "learning_rate": 7.19846262061976e-06,
      "loss": 2.5668,
      "step": 45547
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.014454960823059,
      "learning_rate": 7.198067374172686e-06,
      "loss": 2.3802,
      "step": 45548
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1108412742614746,
      "learning_rate": 7.197672132475648e-06,
      "loss": 2.6177,
      "step": 45549
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.033291220664978,
      "learning_rate": 7.1972768955293205e-06,
      "loss": 2.2967,
      "step": 45550
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0908316373825073,
      "learning_rate": 7.196881663334368e-06,
      "loss": 2.331,
      "step": 45551
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.068566083908081,
      "learning_rate": 7.196486435891461e-06,
      "loss": 2.2256,
      "step": 45552
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0180448293685913,
      "learning_rate": 7.196091213201272e-06,
      "loss": 2.294,
      "step": 45553
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0646110773086548,
      "learning_rate": 7.195695995264469e-06,
      "loss": 2.3837,
      "step": 45554
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1389235258102417,
      "learning_rate": 7.195300782081724e-06,
      "loss": 2.5006,
      "step": 45555
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0330209732055664,
      "learning_rate": 7.1949055736537054e-06,
      "loss": 2.3146,
      "step": 45556
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.066948413848877,
      "learning_rate": 7.194510369981087e-06,
      "loss": 2.5158,
      "step": 45557
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0720789432525635,
      "learning_rate": 7.194115171064533e-06,
      "loss": 2.4569,
      "step": 45558
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.9881476759910583,
      "learning_rate": 7.193719976904714e-06,
      "loss": 2.5215,
      "step": 45559
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.2989200353622437,
      "learning_rate": 7.193324787502304e-06,
      "loss": 2.3138,
      "step": 45560
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.081217646598816,
      "learning_rate": 7.19292960285797e-06,
      "loss": 2.2882,
      "step": 45561
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1124863624572754,
      "learning_rate": 7.192534422972382e-06,
      "loss": 2.2828,
      "step": 45562
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0885822772979736,
      "learning_rate": 7.19213924784621e-06,
      "loss": 2.2541,
      "step": 45563
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.103341817855835,
      "learning_rate": 7.191744077480126e-06,
      "loss": 2.1961,
      "step": 45564
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0053212642669678,
      "learning_rate": 7.191348911874798e-06,
      "loss": 2.3529,
      "step": 45565
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.2410997152328491,
      "learning_rate": 7.190953751030895e-06,
      "loss": 2.2886,
      "step": 45566
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1159614324569702,
      "learning_rate": 7.190558594949091e-06,
      "loss": 2.4493,
      "step": 45567
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.117737054824829,
      "learning_rate": 7.190163443630052e-06,
      "loss": 2.4694,
      "step": 45568
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0329527854919434,
      "learning_rate": 7.189768297074449e-06,
      "loss": 2.1988,
      "step": 45569
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.9534081816673279,
      "learning_rate": 7.1893731552829525e-06,
      "loss": 2.2889,
      "step": 45570
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0631272792816162,
      "learning_rate": 7.1889780182562366e-06,
      "loss": 2.2783,
      "step": 45571
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1443184614181519,
      "learning_rate": 7.18858288599496e-06,
      "loss": 2.2206,
      "step": 45572
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1106836795806885,
      "learning_rate": 7.1881877584998e-06,
      "loss": 2.346,
      "step": 45573
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.195560097694397,
      "learning_rate": 7.1877926357714265e-06,
      "loss": 2.3923,
      "step": 45574
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0711430311203003,
      "learning_rate": 7.187397517810507e-06,
      "loss": 2.371,
      "step": 45575
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0832502841949463,
      "learning_rate": 7.187002404617713e-06,
      "loss": 2.5917,
      "step": 45576
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1185531616210938,
      "learning_rate": 7.186607296193714e-06,
      "loss": 2.4386,
      "step": 45577
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0225328207015991,
      "learning_rate": 7.1862121925391815e-06,
      "loss": 2.1618,
      "step": 45578
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1713091135025024,
      "learning_rate": 7.185817093654782e-06,
      "loss": 2.2286,
      "step": 45579
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.9276300072669983,
      "learning_rate": 7.185421999541187e-06,
      "loss": 2.3977,
      "step": 45580
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1684664487838745,
      "learning_rate": 7.185026910199066e-06,
      "loss": 2.1379,
      "step": 45581
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0645095109939575,
      "learning_rate": 7.184631825629088e-06,
      "loss": 2.3025,
      "step": 45582
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0570303201675415,
      "learning_rate": 7.184236745831925e-06,
      "loss": 2.3306,
      "step": 45583
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.4544461965560913,
      "learning_rate": 7.183841670808247e-06,
      "loss": 2.3349,
      "step": 45584
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1845450401306152,
      "learning_rate": 7.183446600558721e-06,
      "loss": 2.4146,
      "step": 45585
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1615270376205444,
      "learning_rate": 7.183051535084018e-06,
      "loss": 2.0869,
      "step": 45586
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1562272310256958,
      "learning_rate": 7.182656474384806e-06,
      "loss": 2.3056,
      "step": 45587
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.2233234643936157,
      "learning_rate": 7.182261418461758e-06,
      "loss": 2.3183,
      "step": 45588
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.158426284790039,
      "learning_rate": 7.181866367315541e-06,
      "loss": 2.1449,
      "step": 45589
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1083317995071411,
      "learning_rate": 7.181471320946827e-06,
      "loss": 2.3805,
      "step": 45590
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.102293610572815,
      "learning_rate": 7.181076279356283e-06,
      "loss": 2.4987,
      "step": 45591
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1309891939163208,
      "learning_rate": 7.18068124254458e-06,
      "loss": 2.2117,
      "step": 45592
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.9927124381065369,
      "learning_rate": 7.180286210512391e-06,
      "loss": 2.173,
      "step": 45593
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.9829421043395996,
      "learning_rate": 7.17989118326038e-06,
      "loss": 2.4436,
      "step": 45594
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1695818901062012,
      "learning_rate": 7.179496160789221e-06,
      "loss": 2.1003,
      "step": 45595
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.2246805429458618,
      "learning_rate": 7.179101143099581e-06,
      "loss": 2.2056,
      "step": 45596
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1129648685455322,
      "learning_rate": 7.178706130192135e-06,
      "loss": 2.3676,
      "step": 45597
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1155595779418945,
      "learning_rate": 7.178311122067545e-06,
      "loss": 2.4444,
      "step": 45598
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.082452416419983,
      "learning_rate": 7.177916118726482e-06,
      "loss": 2.4208,
      "step": 45599
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.050389051437378,
      "learning_rate": 7.177521120169619e-06,
      "loss": 2.3359,
      "step": 45600
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1521023511886597,
      "learning_rate": 7.177126126397626e-06,
      "loss": 2.3251,
      "step": 45601
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.9979492425918579,
      "learning_rate": 7.1767311374111684e-06,
      "loss": 2.3725,
      "step": 45602
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1094387769699097,
      "learning_rate": 7.176336153210919e-06,
      "loss": 2.1082,
      "step": 45603
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0971486568450928,
      "learning_rate": 7.175941173797547e-06,
      "loss": 2.3491,
      "step": 45604
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0129761695861816,
      "learning_rate": 7.175546199171721e-06,
      "loss": 2.1318,
      "step": 45605
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.99200838804245,
      "learning_rate": 7.175151229334114e-06,
      "loss": 2.3114,
      "step": 45606
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.117674469947815,
      "learning_rate": 7.174756264285392e-06,
      "loss": 2.0538,
      "step": 45607
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0745481252670288,
      "learning_rate": 7.174361304026224e-06,
      "loss": 2.3447,
      "step": 45608
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0503615140914917,
      "learning_rate": 7.173966348557282e-06,
      "loss": 2.3906,
      "step": 45609
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.9891780614852905,
      "learning_rate": 7.173571397879235e-06,
      "loss": 2.3628,
      "step": 45610
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.981700599193573,
      "learning_rate": 7.173176451992752e-06,
      "loss": 2.3156,
      "step": 45611
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.9877352714538574,
      "learning_rate": 7.1727815108985014e-06,
      "loss": 2.5236,
      "step": 45612
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.204822063446045,
      "learning_rate": 7.172386574597156e-06,
      "loss": 2.0945,
      "step": 45613
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0024033784866333,
      "learning_rate": 7.171991643089381e-06,
      "loss": 2.4539,
      "step": 45614
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0788730382919312,
      "learning_rate": 7.1715967163758485e-06,
      "loss": 2.4722,
      "step": 45615
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.217589020729065,
      "learning_rate": 7.171201794457229e-06,
      "loss": 2.3099,
      "step": 45616
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0747560262680054,
      "learning_rate": 7.170806877334189e-06,
      "loss": 2.2133,
      "step": 45617
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1558891534805298,
      "learning_rate": 7.1704119650074015e-06,
      "loss": 2.0963,
      "step": 45618
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.9729412794113159,
      "learning_rate": 7.170017057477532e-06,
      "loss": 2.1289,
      "step": 45619
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0955291986465454,
      "learning_rate": 7.169622154745255e-06,
      "loss": 2.3698,
      "step": 45620
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.9787024855613708,
      "learning_rate": 7.169227256811235e-06,
      "loss": 2.3903,
      "step": 45621
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.11075758934021,
      "learning_rate": 7.168832363676144e-06,
      "loss": 2.5272,
      "step": 45622
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1050060987472534,
      "learning_rate": 7.168437475340655e-06,
      "loss": 2.6736,
      "step": 45623
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.071047067642212,
      "learning_rate": 7.168042591805431e-06,
      "loss": 2.4226,
      "step": 45624
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.212258219718933,
      "learning_rate": 7.167647713071142e-06,
      "loss": 2.4154,
      "step": 45625
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1793478727340698,
      "learning_rate": 7.167252839138461e-06,
      "loss": 2.3415,
      "step": 45626
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0583661794662476,
      "learning_rate": 7.166857970008054e-06,
      "loss": 2.1958,
      "step": 45627
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.088422179222107,
      "learning_rate": 7.166463105680595e-06,
      "loss": 2.5634,
      "step": 45628
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0481042861938477,
      "learning_rate": 7.166068246156748e-06,
      "loss": 2.2192,
      "step": 45629
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1484612226486206,
      "learning_rate": 7.165673391437185e-06,
      "loss": 2.22,
      "step": 45630
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0539321899414062,
      "learning_rate": 7.165278541522577e-06,
      "loss": 2.3786,
      "step": 45631
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1831603050231934,
      "learning_rate": 7.16488369641359e-06,
      "loss": 2.5451,
      "step": 45632
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0199189186096191,
      "learning_rate": 7.164488856110896e-06,
      "loss": 2.4005,
      "step": 45633
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.2247180938720703,
      "learning_rate": 7.164094020615167e-06,
      "loss": 2.6028,
      "step": 45634
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.9838370084762573,
      "learning_rate": 7.163699189927065e-06,
      "loss": 2.2142,
      "step": 45635
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.046235203742981,
      "learning_rate": 7.163304364047263e-06,
      "loss": 2.6034,
      "step": 45636
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1019374132156372,
      "learning_rate": 7.162909542976434e-06,
      "loss": 2.1904,
      "step": 45637
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.174941897392273,
      "learning_rate": 7.162514726715242e-06,
      "loss": 2.2439,
      "step": 45638
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1324599981307983,
      "learning_rate": 7.162119915264356e-06,
      "loss": 2.2634,
      "step": 45639
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1942592859268188,
      "learning_rate": 7.161725108624449e-06,
      "loss": 2.374,
      "step": 45640
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0089693069458008,
      "learning_rate": 7.161330306796191e-06,
      "loss": 2.2573,
      "step": 45641
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1104283332824707,
      "learning_rate": 7.160935509780247e-06,
      "loss": 2.338,
      "step": 45642
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0610976219177246,
      "learning_rate": 7.160540717577288e-06,
      "loss": 2.2935,
      "step": 45643
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1045657396316528,
      "learning_rate": 7.160145930187983e-06,
      "loss": 2.3028,
      "step": 45644
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0752828121185303,
      "learning_rate": 7.159751147613003e-06,
      "loss": 2.2906,
      "step": 45645
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.2013018131256104,
      "learning_rate": 7.159356369853016e-06,
      "loss": 2.3104,
      "step": 45646
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0373321771621704,
      "learning_rate": 7.1589615969086914e-06,
      "loss": 2.3659,
      "step": 45647
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.024832010269165,
      "learning_rate": 7.158566828780698e-06,
      "loss": 2.1259,
      "step": 45648
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1625218391418457,
      "learning_rate": 7.158172065469706e-06,
      "loss": 2.2575,
      "step": 45649
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.2435827255249023,
      "learning_rate": 7.157777306976389e-06,
      "loss": 2.2418,
      "step": 45650
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.059234380722046,
      "learning_rate": 7.1573825533014065e-06,
      "loss": 2.4508,
      "step": 45651
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0488934516906738,
      "learning_rate": 7.156987804445432e-06,
      "loss": 2.3056,
      "step": 45652
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0281164646148682,
      "learning_rate": 7.156593060409137e-06,
      "loss": 2.1592,
      "step": 45653
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.07109534740448,
      "learning_rate": 7.156198321193188e-06,
      "loss": 2.5142,
      "step": 45654
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1041741371154785,
      "learning_rate": 7.155803586798256e-06,
      "loss": 2.2794,
      "step": 45655
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0204004049301147,
      "learning_rate": 7.155408857225008e-06,
      "loss": 2.3798,
      "step": 45656
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.068386197090149,
      "learning_rate": 7.155014132474115e-06,
      "loss": 2.1558,
      "step": 45657
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1029329299926758,
      "learning_rate": 7.154619412546246e-06,
      "loss": 2.3384,
      "step": 45658
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0106526613235474,
      "learning_rate": 7.15422469744207e-06,
      "loss": 2.4802,
      "step": 45659
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0929096937179565,
      "learning_rate": 7.153829987162256e-06,
      "loss": 2.5887,
      "step": 45660
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1163058280944824,
      "learning_rate": 7.153435281707473e-06,
      "loss": 2.2065,
      "step": 45661
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0469540357589722,
      "learning_rate": 7.153040581078392e-06,
      "loss": 2.3352,
      "step": 45662
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1633983850479126,
      "learning_rate": 7.1526458852756815e-06,
      "loss": 2.159,
      "step": 45663
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1026232242584229,
      "learning_rate": 7.152251194300007e-06,
      "loss": 2.1605,
      "step": 45664
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0981178283691406,
      "learning_rate": 7.151856508152039e-06,
      "loss": 2.4189,
      "step": 45665
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1205893754959106,
      "learning_rate": 7.15146182683245e-06,
      "loss": 2.4785,
      "step": 45666
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1335214376449585,
      "learning_rate": 7.151067150341906e-06,
      "loss": 2.2245,
      "step": 45667
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.00919508934021,
      "learning_rate": 7.150672478681076e-06,
      "loss": 2.2465,
      "step": 45668
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0946931838989258,
      "learning_rate": 7.150277811850632e-06,
      "loss": 2.2965,
      "step": 45669
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1269724369049072,
      "learning_rate": 7.14988314985124e-06,
      "loss": 2.3136,
      "step": 45670
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0506949424743652,
      "learning_rate": 7.149488492683571e-06,
      "loss": 2.3625,
      "step": 45671
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0573385953903198,
      "learning_rate": 7.149093840348291e-06,
      "loss": 2.3455,
      "step": 45672
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.029098629951477,
      "learning_rate": 7.1486991928460736e-06,
      "loss": 2.1792,
      "step": 45673
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1478090286254883,
      "learning_rate": 7.148304550177583e-06,
      "loss": 2.3824,
      "step": 45674
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.2110785245895386,
      "learning_rate": 7.147909912343492e-06,
      "loss": 2.3675,
      "step": 45675
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0995535850524902,
      "learning_rate": 7.147515279344472e-06,
      "loss": 2.2337,
      "step": 45676
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0743223428726196,
      "learning_rate": 7.147120651181186e-06,
      "loss": 2.0507,
      "step": 45677
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0563853979110718,
      "learning_rate": 7.146726027854302e-06,
      "loss": 2.2243,
      "step": 45678
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.2436543703079224,
      "learning_rate": 7.146331409364496e-06,
      "loss": 2.4472,
      "step": 45679
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1022509336471558,
      "learning_rate": 7.145936795712431e-06,
      "loss": 2.2531,
      "step": 45680
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.9870467185974121,
      "learning_rate": 7.145542186898778e-06,
      "loss": 2.057,
      "step": 45681
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.2920334339141846,
      "learning_rate": 7.145147582924208e-06,
      "loss": 2.4525,
      "step": 45682
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1056265830993652,
      "learning_rate": 7.144752983789387e-06,
      "loss": 2.1208,
      "step": 45683
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0010520219802856,
      "learning_rate": 7.144358389494985e-06,
      "loss": 2.1501,
      "step": 45684
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1521210670471191,
      "learning_rate": 7.143963800041671e-06,
      "loss": 2.5457,
      "step": 45685
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1247729063034058,
      "learning_rate": 7.143569215430115e-06,
      "loss": 2.1232,
      "step": 45686
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0411081314086914,
      "learning_rate": 7.143174635660986e-06,
      "loss": 2.3631,
      "step": 45687
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1929844617843628,
      "learning_rate": 7.142780060734948e-06,
      "loss": 2.3874,
      "step": 45688
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.9586480855941772,
      "learning_rate": 7.142385490652681e-06,
      "loss": 2.1493,
      "step": 45689
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.007419466972351,
      "learning_rate": 7.141990925414842e-06,
      "loss": 2.3457,
      "step": 45690
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0678231716156006,
      "learning_rate": 7.141596365022106e-06,
      "loss": 2.3649,
      "step": 45691
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.042545199394226,
      "learning_rate": 7.141201809475138e-06,
      "loss": 2.4411,
      "step": 45692
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.2142146825790405,
      "learning_rate": 7.14080725877461e-06,
      "loss": 2.33,
      "step": 45693
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0668562650680542,
      "learning_rate": 7.1404127129211905e-06,
      "loss": 2.1382,
      "step": 45694
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0946553945541382,
      "learning_rate": 7.140018171915548e-06,
      "loss": 2.3417,
      "step": 45695
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.3599271774291992,
      "learning_rate": 7.139623635758355e-06,
      "loss": 2.3158,
      "step": 45696
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.9968743920326233,
      "learning_rate": 7.139229104450274e-06,
      "loss": 2.4378,
      "step": 45697
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.090922474861145,
      "learning_rate": 7.138834577991975e-06,
      "loss": 2.378,
      "step": 45698
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4851365089416504,
      "learning_rate": 7.138440056384131e-06,
      "loss": 2.4651,
      "step": 45699
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.2323431968688965,
      "learning_rate": 7.138045539627405e-06,
      "loss": 2.4678,
      "step": 45700
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.9889650940895081,
      "learning_rate": 7.137651027722472e-06,
      "loss": 2.3479,
      "step": 45701
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.105884075164795,
      "learning_rate": 7.13725652067e-06,
      "loss": 2.2665,
      "step": 45702
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0203274488449097,
      "learning_rate": 7.136862018470654e-06,
      "loss": 2.4581,
      "step": 45703
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1041382551193237,
      "learning_rate": 7.136467521125103e-06,
      "loss": 2.2858,
      "step": 45704
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.08466374874115,
      "learning_rate": 7.136073028634017e-06,
      "loss": 2.6164,
      "step": 45705
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1712617874145508,
      "learning_rate": 7.1356785409980655e-06,
      "loss": 2.3958,
      "step": 45706
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1127673387527466,
      "learning_rate": 7.1352840582179175e-06,
      "loss": 2.2586,
      "step": 45707
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0348553657531738,
      "learning_rate": 7.13488958029424e-06,
      "loss": 2.2145,
      "step": 45708
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.3527085781097412,
      "learning_rate": 7.134495107227704e-06,
      "loss": 2.2164,
      "step": 45709
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0772972106933594,
      "learning_rate": 7.134100639018975e-06,
      "loss": 2.3368,
      "step": 45710
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0860408544540405,
      "learning_rate": 7.133706175668726e-06,
      "loss": 2.4327,
      "step": 45711
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0885246992111206,
      "learning_rate": 7.133311717177622e-06,
      "loss": 2.4448,
      "step": 45712
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0452474355697632,
      "learning_rate": 7.132917263546333e-06,
      "loss": 2.4998,
      "step": 45713
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.022581696510315,
      "learning_rate": 7.132522814775529e-06,
      "loss": 2.3195,
      "step": 45714
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.2396636009216309,
      "learning_rate": 7.132128370865877e-06,
      "loss": 2.3413,
      "step": 45715
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.9929617047309875,
      "learning_rate": 7.131733931818051e-06,
      "loss": 2.3101,
      "step": 45716
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1466604471206665,
      "learning_rate": 7.131339497632711e-06,
      "loss": 2.2742,
      "step": 45717
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0032037496566772,
      "learning_rate": 7.130945068310528e-06,
      "loss": 2.3318,
      "step": 45718
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.9434301853179932,
      "learning_rate": 7.130550643852174e-06,
      "loss": 2.2151,
      "step": 45719
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.2512822151184082,
      "learning_rate": 7.130156224258316e-06,
      "loss": 2.3499,
      "step": 45720
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.2311700582504272,
      "learning_rate": 7.129761809529623e-06,
      "loss": 2.1084,
      "step": 45721
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1480507850646973,
      "learning_rate": 7.129367399666762e-06,
      "loss": 2.241,
      "step": 45722
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1504337787628174,
      "learning_rate": 7.128972994670404e-06,
      "loss": 2.3702,
      "step": 45723
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1516482830047607,
      "learning_rate": 7.1285785945412175e-06,
      "loss": 2.2598,
      "step": 45724
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.114068627357483,
      "learning_rate": 7.128184199279869e-06,
      "loss": 2.386,
      "step": 45725
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.023987889289856,
      "learning_rate": 7.1277898088870265e-06,
      "loss": 2.3927,
      "step": 45726
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1232978105545044,
      "learning_rate": 7.127395423363363e-06,
      "loss": 2.4455,
      "step": 45727
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.044592022895813,
      "learning_rate": 7.1270010427095424e-06,
      "loss": 2.3159,
      "step": 45728
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0379482507705688,
      "learning_rate": 7.126606666926239e-06,
      "loss": 2.2781,
      "step": 45729
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0832030773162842,
      "learning_rate": 7.126212296014114e-06,
      "loss": 2.493,
      "step": 45730
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.044882893562317,
      "learning_rate": 7.125817929973843e-06,
      "loss": 2.3324,
      "step": 45731
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.355423927307129,
      "learning_rate": 7.1254235688060894e-06,
      "loss": 2.2172,
      "step": 45732
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.10117769241333,
      "learning_rate": 7.125029212511522e-06,
      "loss": 2.3646,
      "step": 45733
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0250771045684814,
      "learning_rate": 7.124634861090813e-06,
      "loss": 2.5873,
      "step": 45734
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1033754348754883,
      "learning_rate": 7.124240514544627e-06,
      "loss": 2.186,
      "step": 45735
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0035830736160278,
      "learning_rate": 7.1238461728736345e-06,
      "loss": 2.457,
      "step": 45736
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0681474208831787,
      "learning_rate": 7.1234518360785045e-06,
      "loss": 2.2976,
      "step": 45737
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.187852382659912,
      "learning_rate": 7.123057504159905e-06,
      "loss": 2.1897,
      "step": 45738
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.112108588218689,
      "learning_rate": 7.122663177118504e-06,
      "loss": 2.3752,
      "step": 45739
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0763649940490723,
      "learning_rate": 7.12226885495497e-06,
      "loss": 2.3263,
      "step": 45740
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.9925790429115295,
      "learning_rate": 7.121874537669972e-06,
      "loss": 2.2229,
      "step": 45741
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0479105710983276,
      "learning_rate": 7.121480225264184e-06,
      "loss": 2.1863,
      "step": 45742
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.069957971572876,
      "learning_rate": 7.121085917738264e-06,
      "loss": 2.2232,
      "step": 45743
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.06025230884552,
      "learning_rate": 7.120691615092885e-06,
      "loss": 2.5027,
      "step": 45744
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1364232301712036,
      "learning_rate": 7.120297317328715e-06,
      "loss": 2.151,
      "step": 45745
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.034935712814331,
      "learning_rate": 7.119903024446424e-06,
      "loss": 2.3329,
      "step": 45746
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1022815704345703,
      "learning_rate": 7.119508736446678e-06,
      "loss": 2.5833,
      "step": 45747
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.186924695968628,
      "learning_rate": 7.11911445333015e-06,
      "loss": 2.2359,
      "step": 45748
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1843827962875366,
      "learning_rate": 7.118720175097503e-06,
      "loss": 2.259,
      "step": 45749
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0746614933013916,
      "learning_rate": 7.118325901749409e-06,
      "loss": 2.2693,
      "step": 45750
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0106791257858276,
      "learning_rate": 7.117931633286534e-06,
      "loss": 2.5234,
      "step": 45751
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0769423246383667,
      "learning_rate": 7.117537369709551e-06,
      "loss": 2.2493,
      "step": 45752
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0508676767349243,
      "learning_rate": 7.117143111019123e-06,
      "loss": 2.377,
      "step": 45753
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0033513307571411,
      "learning_rate": 7.11674885721592e-06,
      "loss": 2.4006,
      "step": 45754
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0145559310913086,
      "learning_rate": 7.116354608300613e-06,
      "loss": 2.3831,
      "step": 45755
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0707628726959229,
      "learning_rate": 7.115960364273866e-06,
      "loss": 2.7005,
      "step": 45756
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.9728046655654907,
      "learning_rate": 7.115566125136349e-06,
      "loss": 2.2039,
      "step": 45757
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0431623458862305,
      "learning_rate": 7.115171890888731e-06,
      "loss": 2.498,
      "step": 45758
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.054301142692566,
      "learning_rate": 7.114777661531684e-06,
      "loss": 2.4051,
      "step": 45759
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1065102815628052,
      "learning_rate": 7.114383437065868e-06,
      "loss": 2.2417,
      "step": 45760
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1909195184707642,
      "learning_rate": 7.113989217491957e-06,
      "loss": 2.1884,
      "step": 45761
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1355363130569458,
      "learning_rate": 7.113595002810617e-06,
      "loss": 2.2051,
      "step": 45762
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1158950328826904,
      "learning_rate": 7.113200793022519e-06,
      "loss": 2.1418,
      "step": 45763
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0393271446228027,
      "learning_rate": 7.1128065881283285e-06,
      "loss": 2.2391,
      "step": 45764
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.065482497215271,
      "learning_rate": 7.1124123881287155e-06,
      "loss": 2.6366,
      "step": 45765
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1082819700241089,
      "learning_rate": 7.112018193024347e-06,
      "loss": 2.346,
      "step": 45766
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0486538410186768,
      "learning_rate": 7.111624002815892e-06,
      "loss": 2.3694,
      "step": 45767
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.008201241493225,
      "learning_rate": 7.111229817504024e-06,
      "loss": 2.2627,
      "step": 45768
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0377891063690186,
      "learning_rate": 7.110835637089402e-06,
      "loss": 2.4099,
      "step": 45769
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.9984694123268127,
      "learning_rate": 7.110441461572697e-06,
      "loss": 2.2881,
      "step": 45770
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1394022703170776,
      "learning_rate": 7.110047290954579e-06,
      "loss": 2.33,
      "step": 45771
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.036180853843689,
      "learning_rate": 7.109653125235716e-06,
      "loss": 2.3858,
      "step": 45772
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1337881088256836,
      "learning_rate": 7.109258964416776e-06,
      "loss": 2.354,
      "step": 45773
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1665042638778687,
      "learning_rate": 7.108864808498428e-06,
      "loss": 2.2117,
      "step": 45774
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0924381017684937,
      "learning_rate": 7.108470657481338e-06,
      "loss": 2.2056,
      "step": 45775
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0420572757720947,
      "learning_rate": 7.1080765113661755e-06,
      "loss": 2.4566,
      "step": 45776
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.167361855506897,
      "learning_rate": 7.10768237015361e-06,
      "loss": 2.4026,
      "step": 45777
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1194064617156982,
      "learning_rate": 7.107288233844307e-06,
      "loss": 2.2407,
      "step": 45778
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.588259220123291,
      "learning_rate": 7.106894102438937e-06,
      "loss": 2.0487,
      "step": 45779
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1286096572875977,
      "learning_rate": 7.1064999759381695e-06,
      "loss": 2.3573,
      "step": 45780
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.249413013458252,
      "learning_rate": 7.106105854342668e-06,
      "loss": 2.2344,
      "step": 45781
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.267710566520691,
      "learning_rate": 7.105711737653105e-06,
      "loss": 2.0905,
      "step": 45782
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.9996781945228577,
      "learning_rate": 7.105317625870145e-06,
      "loss": 2.3178,
      "step": 45783
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.016669750213623,
      "learning_rate": 7.104923518994458e-06,
      "loss": 2.4297,
      "step": 45784
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0883225202560425,
      "learning_rate": 7.104529417026711e-06,
      "loss": 2.4688,
      "step": 45785
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0592663288116455,
      "learning_rate": 7.1041353199675736e-06,
      "loss": 2.3563,
      "step": 45786
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1056418418884277,
      "learning_rate": 7.1037412278177154e-06,
      "loss": 2.5091,
      "step": 45787
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0689064264297485,
      "learning_rate": 7.103347140577799e-06,
      "loss": 2.4826,
      "step": 45788
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0874571800231934,
      "learning_rate": 7.1029530582484985e-06,
      "loss": 2.3072,
      "step": 45789
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1503530740737915,
      "learning_rate": 7.102558980830477e-06,
      "loss": 2.3556,
      "step": 45790
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.2590545415878296,
      "learning_rate": 7.102164908324406e-06,
      "loss": 2.6489,
      "step": 45791
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1162885427474976,
      "learning_rate": 7.101770840730953e-06,
      "loss": 2.5369,
      "step": 45792
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1278622150421143,
      "learning_rate": 7.1013767780507844e-06,
      "loss": 2.3176,
      "step": 45793
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0493420362472534,
      "learning_rate": 7.10098272028457e-06,
      "loss": 2.2904,
      "step": 45794
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0488224029541016,
      "learning_rate": 7.100588667432982e-06,
      "loss": 2.6785,
      "step": 45795
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0956614017486572,
      "learning_rate": 7.100194619496678e-06,
      "loss": 2.1937,
      "step": 45796
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0125094652175903,
      "learning_rate": 7.099800576476335e-06,
      "loss": 2.2646,
      "step": 45797
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1035547256469727,
      "learning_rate": 7.099406538372614e-06,
      "loss": 2.2554,
      "step": 45798
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1555784940719604,
      "learning_rate": 7.099012505186189e-06,
      "loss": 2.5318,
      "step": 45799
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.9602203965187073,
      "learning_rate": 7.098618476917724e-06,
      "loss": 2.3412,
      "step": 45800
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0940591096878052,
      "learning_rate": 7.0982244535678904e-06,
      "loss": 2.4519,
      "step": 45801
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.051315426826477,
      "learning_rate": 7.097830435137353e-06,
      "loss": 2.3121,
      "step": 45802
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.062971591949463,
      "learning_rate": 7.097436421626782e-06,
      "loss": 2.3671,
      "step": 45803
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.09527587890625,
      "learning_rate": 7.097042413036845e-06,
      "loss": 2.362,
      "step": 45804
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1790966987609863,
      "learning_rate": 7.0966484093682075e-06,
      "loss": 2.3605,
      "step": 45805
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0522890090942383,
      "learning_rate": 7.0962544106215415e-06,
      "loss": 2.3617,
      "step": 45806
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.2041550874710083,
      "learning_rate": 7.095860416797513e-06,
      "loss": 2.2636,
      "step": 45807
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1728057861328125,
      "learning_rate": 7.095466427896793e-06,
      "loss": 2.285,
      "step": 45808
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.075927495956421,
      "learning_rate": 7.095072443920042e-06,
      "loss": 2.2967,
      "step": 45809
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.07681143283844,
      "learning_rate": 7.0946784648679325e-06,
      "loss": 2.4349,
      "step": 45810
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.9992889761924744,
      "learning_rate": 7.094284490741132e-06,
      "loss": 2.327,
      "step": 45811
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0825620889663696,
      "learning_rate": 7.093890521540309e-06,
      "loss": 2.4232,
      "step": 45812
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.052868366241455,
      "learning_rate": 7.093496557266131e-06,
      "loss": 2.476,
      "step": 45813
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.160448670387268,
      "learning_rate": 7.093102597919268e-06,
      "loss": 2.2281,
      "step": 45814
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0898268222808838,
      "learning_rate": 7.092708643500382e-06,
      "loss": 2.1343,
      "step": 45815
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0666617155075073,
      "learning_rate": 7.092314694010145e-06,
      "loss": 2.1842,
      "step": 45816
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0620578527450562,
      "learning_rate": 7.091920749449226e-06,
      "loss": 2.2003,
      "step": 45817
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1059131622314453,
      "learning_rate": 7.09152680981829e-06,
      "loss": 2.1661,
      "step": 45818
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1776050329208374,
      "learning_rate": 7.091132875118005e-06,
      "loss": 2.4164,
      "step": 45819
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0805529356002808,
      "learning_rate": 7.09073894534904e-06,
      "loss": 2.3853,
      "step": 45820
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0625758171081543,
      "learning_rate": 7.090345020512069e-06,
      "loss": 2.285,
      "step": 45821
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.993105947971344,
      "learning_rate": 7.089951100607747e-06,
      "loss": 2.2417,
      "step": 45822
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0706133842468262,
      "learning_rate": 7.089557185636748e-06,
      "loss": 2.3832,
      "step": 45823
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1906250715255737,
      "learning_rate": 7.089163275599742e-06,
      "loss": 2.2398,
      "step": 45824
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1055269241333008,
      "learning_rate": 7.088769370497393e-06,
      "loss": 2.3418,
      "step": 45825
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0326405763626099,
      "learning_rate": 7.0883754703303716e-06,
      "loss": 2.1634,
      "step": 45826
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0397107601165771,
      "learning_rate": 7.0879815750993454e-06,
      "loss": 2.3663,
      "step": 45827
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1422830820083618,
      "learning_rate": 7.08758768480498e-06,
      "loss": 2.2168,
      "step": 45828
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0307021141052246,
      "learning_rate": 7.087193799447945e-06,
      "loss": 2.0839,
      "step": 45829
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.2325152158737183,
      "learning_rate": 7.086799919028906e-06,
      "loss": 2.2043,
      "step": 45830
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0200308561325073,
      "learning_rate": 7.086406043548535e-06,
      "loss": 2.123,
      "step": 45831
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.06479012966156,
      "learning_rate": 7.086012173007495e-06,
      "loss": 2.3851,
      "step": 45832
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1076152324676514,
      "learning_rate": 7.085618307406456e-06,
      "loss": 2.2907,
      "step": 45833
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0545071363449097,
      "learning_rate": 7.085224446746091e-06,
      "loss": 2.3462,
      "step": 45834
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0699043273925781,
      "learning_rate": 7.084830591027058e-06,
      "loss": 2.1833,
      "step": 45835
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1924564838409424,
      "learning_rate": 7.0844367402500265e-06,
      "loss": 2.3938,
      "step": 45836
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.2818833589553833,
      "learning_rate": 7.084042894415669e-06,
      "loss": 2.2563,
      "step": 45837
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0206265449523926,
      "learning_rate": 7.083649053524651e-06,
      "loss": 2.3725,
      "step": 45838
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0260978937149048,
      "learning_rate": 7.0832552175776384e-06,
      "loss": 2.2688,
      "step": 45839
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1954753398895264,
      "learning_rate": 7.082861386575301e-06,
      "loss": 2.258,
      "step": 45840
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.373237133026123,
      "learning_rate": 7.082467560518306e-06,
      "loss": 2.1032,
      "step": 45841
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.997576892375946,
      "learning_rate": 7.082073739407322e-06,
      "loss": 2.1141,
      "step": 45842
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1895039081573486,
      "learning_rate": 7.081679923243014e-06,
      "loss": 2.0982,
      "step": 45843
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1335562467575073,
      "learning_rate": 7.081286112026051e-06,
      "loss": 2.062,
      "step": 45844
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0565738677978516,
      "learning_rate": 7.080892305757102e-06,
      "loss": 2.0495,
      "step": 45845
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1102725267410278,
      "learning_rate": 7.0804985044368326e-06,
      "loss": 2.2019,
      "step": 45846
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.08808171749115,
      "learning_rate": 7.0801047080659135e-06,
      "loss": 2.298,
      "step": 45847
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.2137471437454224,
      "learning_rate": 7.079710916645006e-06,
      "loss": 2.3644,
      "step": 45848
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1017740964889526,
      "learning_rate": 7.0793171301747855e-06,
      "loss": 2.2177,
      "step": 45849
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0696966648101807,
      "learning_rate": 7.078923348655914e-06,
      "loss": 2.3126,
      "step": 45850
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1424616575241089,
      "learning_rate": 7.078529572089059e-06,
      "loss": 2.3054,
      "step": 45851
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1521855592727661,
      "learning_rate": 7.078135800474893e-06,
      "loss": 2.3172,
      "step": 45852
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.239952564239502,
      "learning_rate": 7.077742033814077e-06,
      "loss": 2.2137,
      "step": 45853
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.153519868850708,
      "learning_rate": 7.077348272107282e-06,
      "loss": 2.4532,
      "step": 45854
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0703232288360596,
      "learning_rate": 7.0769545153551775e-06,
      "loss": 2.1821,
      "step": 45855
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.097198724746704,
      "learning_rate": 7.076560763558429e-06,
      "loss": 2.2961,
      "step": 45856
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0549671649932861,
      "learning_rate": 7.076167016717704e-06,
      "loss": 2.4938,
      "step": 45857
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0643061399459839,
      "learning_rate": 7.075773274833668e-06,
      "loss": 2.3062,
      "step": 45858
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1038802862167358,
      "learning_rate": 7.075379537906991e-06,
      "loss": 2.4114,
      "step": 45859
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0317106246948242,
      "learning_rate": 7.0749858059383415e-06,
      "loss": 2.3427,
      "step": 45860
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.113135576248169,
      "learning_rate": 7.074592078928388e-06,
      "loss": 2.439,
      "step": 45861
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1743324995040894,
      "learning_rate": 7.0741983568777925e-06,
      "loss": 2.2674,
      "step": 45862
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.2219754457473755,
      "learning_rate": 7.073804639787225e-06,
      "loss": 2.1641,
      "step": 45863
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.09521484375,
      "learning_rate": 7.073410927657354e-06,
      "loss": 2.5108,
      "step": 45864
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1497666835784912,
      "learning_rate": 7.073017220488845e-06,
      "loss": 2.4581,
      "step": 45865
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0936757326126099,
      "learning_rate": 7.0726235182823676e-06,
      "loss": 2.3501,
      "step": 45866
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1374670267105103,
      "learning_rate": 7.072229821038588e-06,
      "loss": 2.504,
      "step": 45867
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0012516975402832,
      "learning_rate": 7.071836128758174e-06,
      "loss": 2.5281,
      "step": 45868
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0389747619628906,
      "learning_rate": 7.071442441441794e-06,
      "loss": 2.192,
      "step": 45869
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.2387158870697021,
      "learning_rate": 7.071048759090116e-06,
      "loss": 2.4054,
      "step": 45870
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0498343706130981,
      "learning_rate": 7.070655081703803e-06,
      "loss": 2.1596,
      "step": 45871
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1777088642120361,
      "learning_rate": 7.0702614092835265e-06,
      "loss": 2.288,
      "step": 45872
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0791863203048706,
      "learning_rate": 7.069867741829952e-06,
      "loss": 2.6037,
      "step": 45873
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.158136248588562,
      "learning_rate": 7.06947407934375e-06,
      "loss": 2.1811,
      "step": 45874
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1551755666732788,
      "learning_rate": 7.0690804218255825e-06,
      "loss": 2.3552,
      "step": 45875
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1188631057739258,
      "learning_rate": 7.06868676927612e-06,
      "loss": 2.3165,
      "step": 45876
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1373066902160645,
      "learning_rate": 7.068293121696031e-06,
      "loss": 2.3367,
      "step": 45877
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1244922876358032,
      "learning_rate": 7.06789947908598e-06,
      "loss": 2.7062,
      "step": 45878
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1071891784667969,
      "learning_rate": 7.067505841446635e-06,
      "loss": 2.2843,
      "step": 45879
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.189083218574524,
      "learning_rate": 7.067112208778666e-06,
      "loss": 2.3975,
      "step": 45880
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.019864797592163,
      "learning_rate": 7.066718581082737e-06,
      "loss": 2.3858,
      "step": 45881
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.2729105949401855,
      "learning_rate": 7.066324958359517e-06,
      "loss": 2.2618,
      "step": 45882
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0717560052871704,
      "learning_rate": 7.065931340609673e-06,
      "loss": 2.5023,
      "step": 45883
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.2603909969329834,
      "learning_rate": 7.065537727833872e-06,
      "loss": 2.3565,
      "step": 45884
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.078067660331726,
      "learning_rate": 7.065144120032782e-06,
      "loss": 2.1493,
      "step": 45885
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0336863994598389,
      "learning_rate": 7.064750517207069e-06,
      "loss": 2.3334,
      "step": 45886
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.065378189086914,
      "learning_rate": 7.064356919357407e-06,
      "loss": 2.3598,
      "step": 45887
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0561155080795288,
      "learning_rate": 7.063963326484451e-06,
      "loss": 2.2941,
      "step": 45888
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0561857223510742,
      "learning_rate": 7.063569738588876e-06,
      "loss": 2.4612,
      "step": 45889
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0953136682510376,
      "learning_rate": 7.063176155671346e-06,
      "loss": 2.4564,
      "step": 45890
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.101895809173584,
      "learning_rate": 7.062782577732532e-06,
      "loss": 2.3705,
      "step": 45891
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0117110013961792,
      "learning_rate": 7.062389004773098e-06,
      "loss": 2.4992,
      "step": 45892
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1204867362976074,
      "learning_rate": 7.061995436793714e-06,
      "loss": 2.4131,
      "step": 45893
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.9986264705657959,
      "learning_rate": 7.061601873795044e-06,
      "loss": 2.0779,
      "step": 45894
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.086103081703186,
      "learning_rate": 7.0612083157777575e-06,
      "loss": 2.4591,
      "step": 45895
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1256603002548218,
      "learning_rate": 7.060814762742522e-06,
      "loss": 2.2389,
      "step": 45896
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0384199619293213,
      "learning_rate": 7.0604212146900035e-06,
      "loss": 2.4396,
      "step": 45897
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1632720232009888,
      "learning_rate": 7.06002767162087e-06,
      "loss": 2.1965,
      "step": 45898
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.116335153579712,
      "learning_rate": 7.059634133535787e-06,
      "loss": 2.2805,
      "step": 45899
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.211414098739624,
      "learning_rate": 7.059240600435424e-06,
      "loss": 2.2636,
      "step": 45900
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4065836668014526,
      "learning_rate": 7.058847072320446e-06,
      "loss": 2.5676,
      "step": 45901
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1529476642608643,
      "learning_rate": 7.05845354919152e-06,
      "loss": 2.4301,
      "step": 45902
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0474923849105835,
      "learning_rate": 7.058060031049315e-06,
      "loss": 2.3019,
      "step": 45903
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0392502546310425,
      "learning_rate": 7.057666517894497e-06,
      "loss": 2.428,
      "step": 45904
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.084509015083313,
      "learning_rate": 7.057273009727737e-06,
      "loss": 2.3183,
      "step": 45905
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.6346451044082642,
      "learning_rate": 7.056879506549694e-06,
      "loss": 2.3976,
      "step": 45906
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.9001866579055786,
      "learning_rate": 7.0564860083610395e-06,
      "loss": 2.3664,
      "step": 45907
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0718153715133667,
      "learning_rate": 7.0560925151624424e-06,
      "loss": 2.3778,
      "step": 45908
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.9586193561553955,
      "learning_rate": 7.055699026954567e-06,
      "loss": 2.4565,
      "step": 45909
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0086530447006226,
      "learning_rate": 7.055305543738081e-06,
      "loss": 2.5218,
      "step": 45910
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0165114402770996,
      "learning_rate": 7.054912065513653e-06,
      "loss": 2.3915,
      "step": 45911
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0398881435394287,
      "learning_rate": 7.054518592281949e-06,
      "loss": 2.4408,
      "step": 45912
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1752138137817383,
      "learning_rate": 7.054125124043639e-06,
      "loss": 2.4285,
      "step": 45913
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.2001031637191772,
      "learning_rate": 7.053731660799383e-06,
      "loss": 2.3266,
      "step": 45914
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0408155918121338,
      "learning_rate": 7.053338202549853e-06,
      "loss": 2.4172,
      "step": 45915
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1000654697418213,
      "learning_rate": 7.052944749295714e-06,
      "loss": 2.4354,
      "step": 45916
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1225049495697021,
      "learning_rate": 7.052551301037636e-06,
      "loss": 2.3012,
      "step": 45917
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0539592504501343,
      "learning_rate": 7.052157857776282e-06,
      "loss": 2.4407,
      "step": 45918
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.2278099060058594,
      "learning_rate": 7.051764419512322e-06,
      "loss": 2.4814,
      "step": 45919
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0609309673309326,
      "learning_rate": 7.051370986246421e-06,
      "loss": 2.3687,
      "step": 45920
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.2080391645431519,
      "learning_rate": 7.0509775579792486e-06,
      "loss": 2.3633,
      "step": 45921
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0033669471740723,
      "learning_rate": 7.050584134711469e-06,
      "loss": 2.462,
      "step": 45922
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0507266521453857,
      "learning_rate": 7.05019071644375e-06,
      "loss": 2.5298,
      "step": 45923
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0829709768295288,
      "learning_rate": 7.0497973031767596e-06,
      "loss": 2.4685,
      "step": 45924
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1603672504425049,
      "learning_rate": 7.049403894911164e-06,
      "loss": 2.4265,
      "step": 45925
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1527082920074463,
      "learning_rate": 7.049010491647632e-06,
      "loss": 2.2941,
      "step": 45926
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0313057899475098,
      "learning_rate": 7.04861709338683e-06,
      "loss": 2.1118,
      "step": 45927
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1129772663116455,
      "learning_rate": 7.0482237001294196e-06,
      "loss": 2.1913,
      "step": 45928
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0152075290679932,
      "learning_rate": 7.047830311876072e-06,
      "loss": 2.3724,
      "step": 45929
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1925352811813354,
      "learning_rate": 7.047436928627455e-06,
      "loss": 2.278,
      "step": 45930
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0344089269638062,
      "learning_rate": 7.047043550384234e-06,
      "loss": 2.3054,
      "step": 45931
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.019400954246521,
      "learning_rate": 7.046650177147078e-06,
      "loss": 2.2736,
      "step": 45932
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.024529218673706,
      "learning_rate": 7.0462568089166495e-06,
      "loss": 2.3838,
      "step": 45933
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.163037657737732,
      "learning_rate": 7.045863445693618e-06,
      "loss": 2.4563,
      "step": 45934
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.066226601600647,
      "learning_rate": 7.045470087478651e-06,
      "loss": 2.4519,
      "step": 45935
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.16237211227417,
      "learning_rate": 7.045076734272414e-06,
      "loss": 2.2776,
      "step": 45936
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0407626628875732,
      "learning_rate": 7.044683386075575e-06,
      "loss": 2.3763,
      "step": 45937
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1465954780578613,
      "learning_rate": 7.0442900428888e-06,
      "loss": 2.2904,
      "step": 45938
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.03990638256073,
      "learning_rate": 7.043896704712756e-06,
      "loss": 2.3652,
      "step": 45939
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0102744102478027,
      "learning_rate": 7.043503371548115e-06,
      "loss": 2.183,
      "step": 45940
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1532539129257202,
      "learning_rate": 7.0431100433955335e-06,
      "loss": 2.0965,
      "step": 45941
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0696030855178833,
      "learning_rate": 7.0427167202556845e-06,
      "loss": 2.1819,
      "step": 45942
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0750796794891357,
      "learning_rate": 7.042323402129233e-06,
      "loss": 2.4446,
      "step": 45943
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0158324241638184,
      "learning_rate": 7.041930089016846e-06,
      "loss": 2.3503,
      "step": 45944
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0918620824813843,
      "learning_rate": 7.041536780919194e-06,
      "loss": 2.0323,
      "step": 45945
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1009052991867065,
      "learning_rate": 7.041143477836938e-06,
      "loss": 2.2552,
      "step": 45946
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0785739421844482,
      "learning_rate": 7.040750179770748e-06,
      "loss": 2.2089,
      "step": 45947
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1306949853897095,
      "learning_rate": 7.040356886721289e-06,
      "loss": 2.3495,
      "step": 45948
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1470736265182495,
      "learning_rate": 7.03996359868923e-06,
      "loss": 2.1655,
      "step": 45949
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0296649932861328,
      "learning_rate": 7.039570315675237e-06,
      "loss": 2.1989,
      "step": 45950
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0884793996810913,
      "learning_rate": 7.039177037679976e-06,
      "loss": 2.0746,
      "step": 45951
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.2326886653900146,
      "learning_rate": 7.038783764704115e-06,
      "loss": 2.2871,
      "step": 45952
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0680022239685059,
      "learning_rate": 7.038390496748322e-06,
      "loss": 2.373,
      "step": 45953
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.134127140045166,
      "learning_rate": 7.037997233813258e-06,
      "loss": 2.1471,
      "step": 45954
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1788356304168701,
      "learning_rate": 7.037603975899594e-06,
      "loss": 2.1837,
      "step": 45955
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0840471982955933,
      "learning_rate": 7.037210723007995e-06,
      "loss": 2.2883,
      "step": 45956
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.959756076335907,
      "learning_rate": 7.0368174751391295e-06,
      "loss": 2.4099,
      "step": 45957
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1299673318862915,
      "learning_rate": 7.036424232293663e-06,
      "loss": 2.5303,
      "step": 45958
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0536253452301025,
      "learning_rate": 7.036030994472261e-06,
      "loss": 2.5973,
      "step": 45959
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.213780641555786,
      "learning_rate": 7.035637761675595e-06,
      "loss": 2.4544,
      "step": 45960
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1001867055892944,
      "learning_rate": 7.035244533904325e-06,
      "loss": 2.5329,
      "step": 45961
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1282474994659424,
      "learning_rate": 7.034851311159121e-06,
      "loss": 2.556,
      "step": 45962
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0601260662078857,
      "learning_rate": 7.034458093440651e-06,
      "loss": 2.3012,
      "step": 45963
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0901771783828735,
      "learning_rate": 7.034064880749578e-06,
      "loss": 2.5283,
      "step": 45964
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1058008670806885,
      "learning_rate": 7.03367167308657e-06,
      "loss": 2.4614,
      "step": 45965
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0284050703048706,
      "learning_rate": 7.033278470452298e-06,
      "loss": 2.311,
      "step": 45966
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.235124945640564,
      "learning_rate": 7.032885272847424e-06,
      "loss": 2.3316,
      "step": 45967
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.005002737045288,
      "learning_rate": 7.0324920802726125e-06,
      "loss": 2.316,
      "step": 45968
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1146644353866577,
      "learning_rate": 7.032098892728534e-06,
      "loss": 2.3136,
      "step": 45969
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1283401250839233,
      "learning_rate": 7.031705710215853e-06,
      "loss": 2.2143,
      "step": 45970
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1431269645690918,
      "learning_rate": 7.031312532735238e-06,
      "loss": 2.3503,
      "step": 45971
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1237106323242188,
      "learning_rate": 7.030919360287354e-06,
      "loss": 2.2964,
      "step": 45972
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1185178756713867,
      "learning_rate": 7.030526192872868e-06,
      "loss": 2.209,
      "step": 45973
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.2763416767120361,
      "learning_rate": 7.030133030492448e-06,
      "loss": 2.3393,
      "step": 45974
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.2293208837509155,
      "learning_rate": 7.029739873146757e-06,
      "loss": 2.2446,
      "step": 45975
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0791221857070923,
      "learning_rate": 7.029346720836465e-06,
      "loss": 2.4612,
      "step": 45976
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0580910444259644,
      "learning_rate": 7.028953573562236e-06,
      "loss": 2.5467,
      "step": 45977
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.142322301864624,
      "learning_rate": 7.028560431324738e-06,
      "loss": 2.3324,
      "step": 45978
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.097727656364441,
      "learning_rate": 7.028167294124643e-06,
      "loss": 2.2575,
      "step": 45979
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0597641468048096,
      "learning_rate": 7.027774161962606e-06,
      "loss": 2.3323,
      "step": 45980
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0576677322387695,
      "learning_rate": 7.027381034839298e-06,
      "loss": 2.2876,
      "step": 45981
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1594171524047852,
      "learning_rate": 7.026987912755387e-06,
      "loss": 2.3855,
      "step": 45982
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0347729921340942,
      "learning_rate": 7.026594795711541e-06,
      "loss": 2.1905,
      "step": 45983
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1659592390060425,
      "learning_rate": 7.026201683708422e-06,
      "loss": 2.2342,
      "step": 45984
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0569753646850586,
      "learning_rate": 7.0258085767467e-06,
      "loss": 2.4289,
      "step": 45985
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.6325730085372925,
      "learning_rate": 7.02541547482704e-06,
      "loss": 2.422,
      "step": 45986
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0874747037887573,
      "learning_rate": 7.025022377950108e-06,
      "loss": 2.2888,
      "step": 45987
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.19681978225708,
      "learning_rate": 7.024629286116573e-06,
      "loss": 2.3455,
      "step": 45988
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1348356008529663,
      "learning_rate": 7.024236199327099e-06,
      "loss": 2.6052,
      "step": 45989
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.9844667315483093,
      "learning_rate": 7.023843117582352e-06,
      "loss": 2.2508,
      "step": 45990
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.9857882857322693,
      "learning_rate": 7.0234500408829995e-06,
      "loss": 2.1038,
      "step": 45991
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.187957525253296,
      "learning_rate": 7.023056969229709e-06,
      "loss": 2.1625,
      "step": 45992
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.3373823165893555,
      "learning_rate": 7.022663902623144e-06,
      "loss": 2.3562,
      "step": 45993
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1429564952850342,
      "learning_rate": 7.022270841063971e-06,
      "loss": 2.4595,
      "step": 45994
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.3259648084640503,
      "learning_rate": 7.021877784552862e-06,
      "loss": 2.5363,
      "step": 45995
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.033461332321167,
      "learning_rate": 7.021484733090476e-06,
      "loss": 2.3144,
      "step": 45996
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1675647497177124,
      "learning_rate": 7.021091686677482e-06,
      "loss": 2.2522,
      "step": 45997
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.077130675315857,
      "learning_rate": 7.020698645314545e-06,
      "loss": 2.3117,
      "step": 45998
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0309572219848633,
      "learning_rate": 7.020305609002336e-06,
      "loss": 2.3761,
      "step": 45999
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0912501811981201,
      "learning_rate": 7.019912577741517e-06,
      "loss": 2.2223,
      "step": 46000
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1576488018035889,
      "learning_rate": 7.0195195515327555e-06,
      "loss": 2.4755,
      "step": 46001
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1238374710083008,
      "learning_rate": 7.019126530376718e-06,
      "loss": 2.3893,
      "step": 46002
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1366779804229736,
      "learning_rate": 7.01873351427407e-06,
      "loss": 2.4857,
      "step": 46003
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1069132089614868,
      "learning_rate": 7.018340503225478e-06,
      "loss": 2.3045,
      "step": 46004
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0860247611999512,
      "learning_rate": 7.01794749723161e-06,
      "loss": 2.3144,
      "step": 46005
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0290902853012085,
      "learning_rate": 7.017554496293135e-06,
      "loss": 2.353,
      "step": 46006
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.434397578239441,
      "learning_rate": 7.017161500410709e-06,
      "loss": 2.3824,
      "step": 46007
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0235260725021362,
      "learning_rate": 7.016768509585008e-06,
      "loss": 2.3557,
      "step": 46008
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0282843112945557,
      "learning_rate": 7.016375523816692e-06,
      "loss": 2.3731,
      "step": 46009
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.080330491065979,
      "learning_rate": 7.01598254310643e-06,
      "loss": 2.3142,
      "step": 46010
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0433721542358398,
      "learning_rate": 7.015589567454888e-06,
      "loss": 2.4911,
      "step": 46011
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1144689321517944,
      "learning_rate": 7.015196596862734e-06,
      "loss": 2.3055,
      "step": 46012
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0953620672225952,
      "learning_rate": 7.014803631330631e-06,
      "loss": 2.2561,
      "step": 46013
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1657018661499023,
      "learning_rate": 7.014410670859246e-06,
      "loss": 2.5263,
      "step": 46014
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0831888914108276,
      "learning_rate": 7.014017715449246e-06,
      "loss": 2.3915,
      "step": 46015
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0370322465896606,
      "learning_rate": 7.0136247651013e-06,
      "loss": 2.1917,
      "step": 46016
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.158354640007019,
      "learning_rate": 7.013231819816068e-06,
      "loss": 2.3203,
      "step": 46017
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.9961906671524048,
      "learning_rate": 7.01283887959422e-06,
      "loss": 2.3002,
      "step": 46018
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.119960069656372,
      "learning_rate": 7.012445944436423e-06,
      "loss": 2.402,
      "step": 46019
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0719075202941895,
      "learning_rate": 7.012053014343341e-06,
      "loss": 2.3846,
      "step": 46020
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0347968339920044,
      "learning_rate": 7.01166008931564e-06,
      "loss": 2.4548,
      "step": 46021
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1402863264083862,
      "learning_rate": 7.011267169353986e-06,
      "loss": 2.4584,
      "step": 46022
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.086840271949768,
      "learning_rate": 7.010874254459048e-06,
      "loss": 2.3845,
      "step": 46023
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0920469760894775,
      "learning_rate": 7.010481344631488e-06,
      "loss": 2.3157,
      "step": 46024
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0541024208068848,
      "learning_rate": 7.010088439871974e-06,
      "loss": 2.1696,
      "step": 46025
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1060045957565308,
      "learning_rate": 7.009695540181171e-06,
      "loss": 2.4221,
      "step": 46026
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1157277822494507,
      "learning_rate": 7.009302645559748e-06,
      "loss": 2.5903,
      "step": 46027
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0863046646118164,
      "learning_rate": 7.008909756008369e-06,
      "loss": 2.0773,
      "step": 46028
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.168606162071228,
      "learning_rate": 7.0085168715277e-06,
      "loss": 2.4948,
      "step": 46029
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0486321449279785,
      "learning_rate": 7.008123992118407e-06,
      "loss": 2.3403,
      "step": 46030
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.3352458477020264,
      "learning_rate": 7.007731117781158e-06,
      "loss": 2.1825,
      "step": 46031
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.2601617574691772,
      "learning_rate": 7.0073382485166194e-06,
      "loss": 2.5148,
      "step": 46032
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0976370573043823,
      "learning_rate": 7.006945384325452e-06,
      "loss": 2.1172,
      "step": 46033
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0949275493621826,
      "learning_rate": 7.006552525208326e-06,
      "loss": 2.3022,
      "step": 46034
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1503058671951294,
      "learning_rate": 7.006159671165905e-06,
      "loss": 2.4507,
      "step": 46035
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1352958679199219,
      "learning_rate": 7.005766822198859e-06,
      "loss": 2.5208,
      "step": 46036
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0637577772140503,
      "learning_rate": 7.0053739783078475e-06,
      "loss": 2.333,
      "step": 46037
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0491137504577637,
      "learning_rate": 7.004981139493543e-06,
      "loss": 2.3757,
      "step": 46038
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.3092875480651855,
      "learning_rate": 7.004588305756609e-06,
      "loss": 2.5089,
      "step": 46039
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1401082277297974,
      "learning_rate": 7.0041954770977105e-06,
      "loss": 2.1511,
      "step": 46040
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0543094873428345,
      "learning_rate": 7.003802653517515e-06,
      "loss": 2.5683,
      "step": 46041
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1136049032211304,
      "learning_rate": 7.003409835016688e-06,
      "loss": 2.216,
      "step": 46042
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1819759607315063,
      "learning_rate": 7.003017021595894e-06,
      "loss": 2.4005,
      "step": 46043
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.127179741859436,
      "learning_rate": 7.002624213255802e-06,
      "loss": 2.4981,
      "step": 46044
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.096648931503296,
      "learning_rate": 7.002231409997078e-06,
      "loss": 2.3964,
      "step": 46045
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0652464628219604,
      "learning_rate": 7.001838611820382e-06,
      "loss": 2.2484,
      "step": 46046
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1033116579055786,
      "learning_rate": 7.0014458187263845e-06,
      "loss": 2.416,
      "step": 46047
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0170888900756836,
      "learning_rate": 7.001053030715752e-06,
      "loss": 2.2705,
      "step": 46048
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0528905391693115,
      "learning_rate": 7.000660247789148e-06,
      "loss": 2.2524,
      "step": 46049
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.029929757118225,
      "learning_rate": 7.0002674699472405e-06,
      "loss": 2.3174,
      "step": 46050
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.009522557258606,
      "learning_rate": 6.999874697190694e-06,
      "loss": 2.3308,
      "step": 46051
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0677640438079834,
      "learning_rate": 6.999481929520174e-06,
      "loss": 2.3247,
      "step": 46052
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.185759425163269,
      "learning_rate": 6.999089166936346e-06,
      "loss": 2.5557,
      "step": 46053
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.184726357460022,
      "learning_rate": 6.998696409439878e-06,
      "loss": 2.2742,
      "step": 46054
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.2302467823028564,
      "learning_rate": 6.9983036570314355e-06,
      "loss": 2.3713,
      "step": 46055
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0673471689224243,
      "learning_rate": 6.997910909711683e-06,
      "loss": 2.3951,
      "step": 46056
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0085949897766113,
      "learning_rate": 6.997518167481286e-06,
      "loss": 2.2079,
      "step": 46057
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0122121572494507,
      "learning_rate": 6.997125430340917e-06,
      "loss": 2.2737,
      "step": 46058
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.9903190732002258,
      "learning_rate": 6.996732698291229e-06,
      "loss": 2.3828,
      "step": 46059
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0614701509475708,
      "learning_rate": 6.996339971332897e-06,
      "loss": 2.478,
      "step": 46060
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.143615961074829,
      "learning_rate": 6.995947249466584e-06,
      "loss": 2.4195,
      "step": 46061
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0880578756332397,
      "learning_rate": 6.995554532692956e-06,
      "loss": 2.2375,
      "step": 46062
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.094472050666809,
      "learning_rate": 6.995161821012679e-06,
      "loss": 2.384,
      "step": 46063
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.146634578704834,
      "learning_rate": 6.99476911442642e-06,
      "loss": 2.338,
      "step": 46064
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0706448554992676,
      "learning_rate": 6.994376412934842e-06,
      "loss": 2.2641,
      "step": 46065
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.2053931951522827,
      "learning_rate": 6.993983716538612e-06,
      "loss": 2.5159,
      "step": 46066
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0401725769042969,
      "learning_rate": 6.9935910252383985e-06,
      "loss": 2.1406,
      "step": 46067
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0301222801208496,
      "learning_rate": 6.993198339034863e-06,
      "loss": 2.3166,
      "step": 46068
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0762368440628052,
      "learning_rate": 6.992805657928674e-06,
      "loss": 2.3989,
      "step": 46069
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1286417245864868,
      "learning_rate": 6.992412981920496e-06,
      "loss": 2.2982,
      "step": 46070
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0191000699996948,
      "learning_rate": 6.992020311010993e-06,
      "loss": 2.1429,
      "step": 46071
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.005735993385315,
      "learning_rate": 6.991627645200838e-06,
      "loss": 2.3056,
      "step": 46072
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.2239991426467896,
      "learning_rate": 6.991234984490687e-06,
      "loss": 2.3259,
      "step": 46073
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0917891263961792,
      "learning_rate": 6.99084232888121e-06,
      "loss": 2.3802,
      "step": 46074
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1015177965164185,
      "learning_rate": 6.990449678373072e-06,
      "loss": 2.1755,
      "step": 46075
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.959785521030426,
      "learning_rate": 6.990057032966939e-06,
      "loss": 2.3404,
      "step": 46076
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.2713323831558228,
      "learning_rate": 6.989664392663478e-06,
      "loss": 2.4188,
      "step": 46077
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.2170929908752441,
      "learning_rate": 6.989271757463355e-06,
      "loss": 2.2058,
      "step": 46078
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.10771906375885,
      "learning_rate": 6.9888791273672315e-06,
      "loss": 2.5139,
      "step": 46079
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1633801460266113,
      "learning_rate": 6.9884865023757765e-06,
      "loss": 2.4292,
      "step": 46080
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0503005981445312,
      "learning_rate": 6.988093882489655e-06,
      "loss": 2.3553,
      "step": 46081
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1596901416778564,
      "learning_rate": 6.987701267709531e-06,
      "loss": 2.3063,
      "step": 46082
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1219804286956787,
      "learning_rate": 6.9873086580360724e-06,
      "loss": 2.3875,
      "step": 46083
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0849577188491821,
      "learning_rate": 6.986916053469943e-06,
      "loss": 2.1454,
      "step": 46084
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.2362968921661377,
      "learning_rate": 6.986523454011816e-06,
      "loss": 2.4871,
      "step": 46085
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0858267545700073,
      "learning_rate": 6.9861308596623435e-06,
      "loss": 2.5397,
      "step": 46086
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0548688173294067,
      "learning_rate": 6.985738270422198e-06,
      "loss": 2.3655,
      "step": 46087
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1026995182037354,
      "learning_rate": 6.985345686292044e-06,
      "loss": 2.2692,
      "step": 46088
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0552064180374146,
      "learning_rate": 6.984953107272549e-06,
      "loss": 2.3452,
      "step": 46089
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0790200233459473,
      "learning_rate": 6.984560533364378e-06,
      "loss": 2.508,
      "step": 46090
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0522363185882568,
      "learning_rate": 6.984167964568196e-06,
      "loss": 2.1639,
      "step": 46091
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0736515522003174,
      "learning_rate": 6.983775400884667e-06,
      "loss": 2.2083,
      "step": 46092
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0649062395095825,
      "learning_rate": 6.983382842314459e-06,
      "loss": 2.6098,
      "step": 46093
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0827692747116089,
      "learning_rate": 6.982990288858236e-06,
      "loss": 2.3781,
      "step": 46094
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0496573448181152,
      "learning_rate": 6.9825977405166635e-06,
      "loss": 2.2832,
      "step": 46095
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1093151569366455,
      "learning_rate": 6.982205197290408e-06,
      "loss": 2.213,
      "step": 46096
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.2048819065093994,
      "learning_rate": 6.9818126591801335e-06,
      "loss": 2.2597,
      "step": 46097
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.005043387413025,
      "learning_rate": 6.981420126186511e-06,
      "loss": 2.3482,
      "step": 46098
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.3372385501861572,
      "learning_rate": 6.981027598310197e-06,
      "loss": 2.1999,
      "step": 46099
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0589815378189087,
      "learning_rate": 6.980635075551861e-06,
      "loss": 2.2974,
      "step": 46100
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4614871740341187,
      "learning_rate": 6.980242557912169e-06,
      "loss": 2.0673,
      "step": 46101
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1855766773223877,
      "learning_rate": 6.979850045391785e-06,
      "loss": 2.3071,
      "step": 46102
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.9961175322532654,
      "learning_rate": 6.979457537991376e-06,
      "loss": 2.1578,
      "step": 46103
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.006062626838684,
      "learning_rate": 6.979065035711608e-06,
      "loss": 2.2837,
      "step": 46104
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.9936009645462036,
      "learning_rate": 6.978672538553144e-06,
      "loss": 2.429,
      "step": 46105
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4198503494262695,
      "learning_rate": 6.978280046516652e-06,
      "loss": 2.4115,
      "step": 46106
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.2039004564285278,
      "learning_rate": 6.977887559602796e-06,
      "loss": 2.3181,
      "step": 46107
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0748460292816162,
      "learning_rate": 6.97749507781224e-06,
      "loss": 2.6178,
      "step": 46108
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0945045948028564,
      "learning_rate": 6.977102601145651e-06,
      "loss": 2.358,
      "step": 46109
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0659700632095337,
      "learning_rate": 6.976710129603694e-06,
      "loss": 2.1044,
      "step": 46110
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1262589693069458,
      "learning_rate": 6.9763176631870366e-06,
      "loss": 2.4052,
      "step": 46111
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1057034730911255,
      "learning_rate": 6.975925201896339e-06,
      "loss": 2.1561,
      "step": 46112
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0536433458328247,
      "learning_rate": 6.9755327457322744e-06,
      "loss": 2.5049,
      "step": 46113
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.129849910736084,
      "learning_rate": 6.975140294695498e-06,
      "loss": 2.2654,
      "step": 46114
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.9071232676506042,
      "learning_rate": 6.974747848786683e-06,
      "loss": 2.2828,
      "step": 46115
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0891932249069214,
      "learning_rate": 6.9743554080064905e-06,
      "loss": 2.4337,
      "step": 46116
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1454479694366455,
      "learning_rate": 6.973962972355586e-06,
      "loss": 2.237,
      "step": 46117
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0425952672958374,
      "learning_rate": 6.973570541834638e-06,
      "loss": 2.4035,
      "step": 46118
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1086301803588867,
      "learning_rate": 6.9731781164443105e-06,
      "loss": 2.2152,
      "step": 46119
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0286494493484497,
      "learning_rate": 6.972785696185267e-06,
      "loss": 2.3341,
      "step": 46120
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.035717487335205,
      "learning_rate": 6.972393281058174e-06,
      "loss": 2.1881,
      "step": 46121
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.973504364490509,
      "learning_rate": 6.972000871063696e-06,
      "loss": 2.4113,
      "step": 46122
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0489813089370728,
      "learning_rate": 6.9716084662025e-06,
      "loss": 2.3309,
      "step": 46123
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.060096263885498,
      "learning_rate": 6.971216066475254e-06,
      "loss": 2.3442,
      "step": 46124
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.9140686392784119,
      "learning_rate": 6.970823671882615e-06,
      "loss": 2.2374,
      "step": 46125
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0904181003570557,
      "learning_rate": 6.970431282425253e-06,
      "loss": 2.3776,
      "step": 46126
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.9969882965087891,
      "learning_rate": 6.970038898103832e-06,
      "loss": 2.3255,
      "step": 46127
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.26887845993042,
      "learning_rate": 6.969646518919019e-06,
      "loss": 2.4066,
      "step": 46128
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.2738701105117798,
      "learning_rate": 6.9692541448714775e-06,
      "loss": 2.2677,
      "step": 46129
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0668668746948242,
      "learning_rate": 6.968861775961874e-06,
      "loss": 2.3734,
      "step": 46130
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0547630786895752,
      "learning_rate": 6.968469412190873e-06,
      "loss": 2.3425,
      "step": 46131
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.132491946220398,
      "learning_rate": 6.968077053559139e-06,
      "loss": 2.3845,
      "step": 46132
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0697077512741089,
      "learning_rate": 6.9676847000673385e-06,
      "loss": 2.3168,
      "step": 46133
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1686984300613403,
      "learning_rate": 6.967292351716138e-06,
      "loss": 2.2341,
      "step": 46134
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0651729106903076,
      "learning_rate": 6.966900008506198e-06,
      "loss": 2.5564,
      "step": 46135
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.030729055404663,
      "learning_rate": 6.9665076704381865e-06,
      "loss": 2.3372,
      "step": 46136
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.112396478652954,
      "learning_rate": 6.966115337512771e-06,
      "loss": 2.127,
      "step": 46137
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.053123116493225,
      "learning_rate": 6.965723009730612e-06,
      "loss": 2.3409,
      "step": 46138
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.9775931239128113,
      "learning_rate": 6.965330687092375e-06,
      "loss": 2.4222,
      "step": 46139
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1128315925598145,
      "learning_rate": 6.964938369598728e-06,
      "loss": 2.5498,
      "step": 46140
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0696953535079956,
      "learning_rate": 6.964546057250337e-06,
      "loss": 2.3338,
      "step": 46141
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0650817155838013,
      "learning_rate": 6.964153750047862e-06,
      "loss": 2.2347,
      "step": 46142
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.2603063583374023,
      "learning_rate": 6.963761447991971e-06,
      "loss": 2.1654,
      "step": 46143
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0300644636154175,
      "learning_rate": 6.96336915108333e-06,
      "loss": 2.4364,
      "step": 46144
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1392627954483032,
      "learning_rate": 6.962976859322602e-06,
      "loss": 2.1499,
      "step": 46145
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0330321788787842,
      "learning_rate": 6.9625845727104535e-06,
      "loss": 2.2481,
      "step": 46146
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.172831416130066,
      "learning_rate": 6.962192291247549e-06,
      "loss": 2.2233,
      "step": 46147
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0886505842208862,
      "learning_rate": 6.961800014934553e-06,
      "loss": 2.3292,
      "step": 46148
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0795695781707764,
      "learning_rate": 6.961407743772133e-06,
      "loss": 2.2101,
      "step": 46149
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.3058074712753296,
      "learning_rate": 6.961015477760951e-06,
      "loss": 2.4007,
      "step": 46150
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.2811075448989868,
      "learning_rate": 6.960623216901676e-06,
      "loss": 2.3183,
      "step": 46151
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0090447664260864,
      "learning_rate": 6.960230961194967e-06,
      "loss": 2.4127,
      "step": 46152
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1614911556243896,
      "learning_rate": 6.9598387106414935e-06,
      "loss": 2.1991,
      "step": 46153
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.9863317608833313,
      "learning_rate": 6.959446465241917e-06,
      "loss": 2.3459,
      "step": 46154
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0327661037445068,
      "learning_rate": 6.959054224996906e-06,
      "loss": 2.4645,
      "step": 46155
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0959806442260742,
      "learning_rate": 6.958661989907123e-06,
      "loss": 2.2771,
      "step": 46156
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.10728120803833,
      "learning_rate": 6.958269759973235e-06,
      "loss": 2.3446,
      "step": 46157
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1330782175064087,
      "learning_rate": 6.957877535195905e-06,
      "loss": 2.4378,
      "step": 46158
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1742393970489502,
      "learning_rate": 6.957485315575798e-06,
      "loss": 2.395,
      "step": 46159
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0823091268539429,
      "learning_rate": 6.957093101113581e-06,
      "loss": 2.2974,
      "step": 46160
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.010581612586975,
      "learning_rate": 6.9567008918099174e-06,
      "loss": 2.1596,
      "step": 46161
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0535601377487183,
      "learning_rate": 6.956308687665475e-06,
      "loss": 2.4732,
      "step": 46162
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1223355531692505,
      "learning_rate": 6.955916488680914e-06,
      "loss": 2.2212,
      "step": 46163
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1717617511749268,
      "learning_rate": 6.955524294856902e-06,
      "loss": 2.3273,
      "step": 46164
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0272551774978638,
      "learning_rate": 6.955132106194103e-06,
      "loss": 2.3803,
      "step": 46165
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.113107442855835,
      "learning_rate": 6.95473992269318e-06,
      "loss": 2.3096,
      "step": 46166
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.2230042219161987,
      "learning_rate": 6.954347744354801e-06,
      "loss": 2.0823,
      "step": 46167
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1502721309661865,
      "learning_rate": 6.953955571179632e-06,
      "loss": 2.2481,
      "step": 46168
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1320054531097412,
      "learning_rate": 6.953563403168333e-06,
      "loss": 2.6624,
      "step": 46169
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.2617311477661133,
      "learning_rate": 6.9531712403215714e-06,
      "loss": 2.3567,
      "step": 46170
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0448282957077026,
      "learning_rate": 6.952779082640012e-06,
      "loss": 2.287,
      "step": 46171
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.2996270656585693,
      "learning_rate": 6.952386930124319e-06,
      "loss": 2.3748,
      "step": 46172
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0921828746795654,
      "learning_rate": 6.95199478277516e-06,
      "loss": 2.3666,
      "step": 46173
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1195377111434937,
      "learning_rate": 6.951602640593196e-06,
      "loss": 2.0795,
      "step": 46174
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.180496096611023,
      "learning_rate": 6.9512105035790945e-06,
      "loss": 2.2612,
      "step": 46175
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1783664226531982,
      "learning_rate": 6.950818371733519e-06,
      "loss": 2.5383,
      "step": 46176
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0173351764678955,
      "learning_rate": 6.950426245057138e-06,
      "loss": 2.2101,
      "step": 46177
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1401175260543823,
      "learning_rate": 6.9500341235506084e-06,
      "loss": 2.2961,
      "step": 46178
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0316288471221924,
      "learning_rate": 6.949642007214601e-06,
      "loss": 2.0887,
      "step": 46179
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.2959990501403809,
      "learning_rate": 6.949249896049777e-06,
      "loss": 2.1529,
      "step": 46180
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.06449294090271,
      "learning_rate": 6.948857790056805e-06,
      "loss": 2.4087,
      "step": 46181
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1391048431396484,
      "learning_rate": 6.948465689236346e-06,
      "loss": 2.3759,
      "step": 46182
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1030356884002686,
      "learning_rate": 6.948073593589066e-06,
      "loss": 2.3458,
      "step": 46183
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0392862558364868,
      "learning_rate": 6.947681503115631e-06,
      "loss": 2.0798,
      "step": 46184
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0967512130737305,
      "learning_rate": 6.947289417816705e-06,
      "loss": 2.1438,
      "step": 46185
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1339603662490845,
      "learning_rate": 6.946897337692953e-06,
      "loss": 2.3586,
      "step": 46186
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1186439990997314,
      "learning_rate": 6.946505262745038e-06,
      "loss": 2.4044,
      "step": 46187
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0704870223999023,
      "learning_rate": 6.946113192973627e-06,
      "loss": 2.2016,
      "step": 46188
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1135333776474,
      "learning_rate": 6.945721128379383e-06,
      "loss": 2.2735,
      "step": 46189
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0547959804534912,
      "learning_rate": 6.945329068962976e-06,
      "loss": 2.2619,
      "step": 46190
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.9677282571792603,
      "learning_rate": 6.944937014725062e-06,
      "loss": 2.3329,
      "step": 46191
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.045206904411316,
      "learning_rate": 6.944544965666308e-06,
      "loss": 2.426,
      "step": 46192
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0335066318511963,
      "learning_rate": 6.944152921787382e-06,
      "loss": 2.3157,
      "step": 46193
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.2076902389526367,
      "learning_rate": 6.9437608830889435e-06,
      "loss": 2.444,
      "step": 46194
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.360522747039795,
      "learning_rate": 6.943368849571664e-06,
      "loss": 2.1167,
      "step": 46195
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.122823715209961,
      "learning_rate": 6.942976821236204e-06,
      "loss": 2.4662,
      "step": 46196
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0147567987442017,
      "learning_rate": 6.942584798083227e-06,
      "loss": 2.3081,
      "step": 46197
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.2420268058776855,
      "learning_rate": 6.942192780113401e-06,
      "loss": 2.3003,
      "step": 46198
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0522547960281372,
      "learning_rate": 6.941800767327387e-06,
      "loss": 2.2962,
      "step": 46199
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.197147011756897,
      "learning_rate": 6.941408759725852e-06,
      "loss": 2.3989,
      "step": 46200
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1489399671554565,
      "learning_rate": 6.94101675730946e-06,
      "loss": 2.2588,
      "step": 46201
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1424016952514648,
      "learning_rate": 6.940624760078873e-06,
      "loss": 2.2891,
      "step": 46202
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1152865886688232,
      "learning_rate": 6.940232768034765e-06,
      "loss": 2.5682,
      "step": 46203
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.060181975364685,
      "learning_rate": 6.939840781177789e-06,
      "loss": 2.2023,
      "step": 46204
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1000832319259644,
      "learning_rate": 6.9394487995086125e-06,
      "loss": 2.0646,
      "step": 46205
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0947632789611816,
      "learning_rate": 6.939056823027903e-06,
      "loss": 2.2425,
      "step": 46206
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1761751174926758,
      "learning_rate": 6.938664851736323e-06,
      "loss": 2.333,
      "step": 46207
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1206196546554565,
      "learning_rate": 6.938272885634537e-06,
      "loss": 2.3933,
      "step": 46208
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1754035949707031,
      "learning_rate": 6.93788092472321e-06,
      "loss": 2.2379,
      "step": 46209
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.2929326295852661,
      "learning_rate": 6.937488969003008e-06,
      "loss": 2.5189,
      "step": 46210
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0367083549499512,
      "learning_rate": 6.937097018474593e-06,
      "loss": 2.2378,
      "step": 46211
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0120784044265747,
      "learning_rate": 6.93670507313863e-06,
      "loss": 2.1526,
      "step": 46212
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1312981843948364,
      "learning_rate": 6.936313132995784e-06,
      "loss": 2.3207,
      "step": 46213
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0216147899627686,
      "learning_rate": 6.93592119804672e-06,
      "loss": 2.0466,
      "step": 46214
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1270595788955688,
      "learning_rate": 6.935529268292101e-06,
      "loss": 2.5714,
      "step": 46215
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0670615434646606,
      "learning_rate": 6.935137343732592e-06,
      "loss": 2.4078,
      "step": 46216
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.2037475109100342,
      "learning_rate": 6.934745424368863e-06,
      "loss": 2.3869,
      "step": 46217
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1006948947906494,
      "learning_rate": 6.934353510201568e-06,
      "loss": 2.3496,
      "step": 46218
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.100338101387024,
      "learning_rate": 6.933961601231376e-06,
      "loss": 2.1572,
      "step": 46219
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0047768354415894,
      "learning_rate": 6.933569697458953e-06,
      "loss": 2.4383,
      "step": 46220
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0925953388214111,
      "learning_rate": 6.933177798884962e-06,
      "loss": 2.2884,
      "step": 46221
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4137775897979736,
      "learning_rate": 6.932785905510066e-06,
      "loss": 2.1678,
      "step": 46222
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.2688463926315308,
      "learning_rate": 6.9323940173349335e-06,
      "loss": 2.2338,
      "step": 46223
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.010499358177185,
      "learning_rate": 6.932002134360226e-06,
      "loss": 2.3836,
      "step": 46224
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0654408931732178,
      "learning_rate": 6.931610256586607e-06,
      "loss": 2.6237,
      "step": 46225
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.2425481081008911,
      "learning_rate": 6.931218384014742e-06,
      "loss": 2.3877,
      "step": 46226
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.2685978412628174,
      "learning_rate": 6.930826516645295e-06,
      "loss": 2.5014,
      "step": 46227
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1143622398376465,
      "learning_rate": 6.930434654478931e-06,
      "loss": 2.1464,
      "step": 46228
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0864537954330444,
      "learning_rate": 6.930042797516314e-06,
      "loss": 2.5856,
      "step": 46229
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0885564088821411,
      "learning_rate": 6.929650945758111e-06,
      "loss": 2.3818,
      "step": 46230
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.702017068862915,
      "learning_rate": 6.929259099204983e-06,
      "loss": 2.1839,
      "step": 46231
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1224740743637085,
      "learning_rate": 6.928867257857592e-06,
      "loss": 2.2598,
      "step": 46232
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0269349813461304,
      "learning_rate": 6.928475421716605e-06,
      "loss": 2.3174,
      "step": 46233
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.037568211555481,
      "learning_rate": 6.928083590782687e-06,
      "loss": 2.5022,
      "step": 46234
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0885735750198364,
      "learning_rate": 6.927691765056502e-06,
      "loss": 2.4877,
      "step": 46235
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0428531169891357,
      "learning_rate": 6.927299944538713e-06,
      "loss": 2.4183,
      "step": 46236
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.097678303718567,
      "learning_rate": 6.926908129229985e-06,
      "loss": 2.354,
      "step": 46237
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0306943655014038,
      "learning_rate": 6.926516319130984e-06,
      "loss": 2.2709,
      "step": 46238
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0983089208602905,
      "learning_rate": 6.926124514242372e-06,
      "loss": 2.3291,
      "step": 46239
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0885422229766846,
      "learning_rate": 6.925732714564812e-06,
      "loss": 2.6036,
      "step": 46240
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.161141276359558,
      "learning_rate": 6.925340920098973e-06,
      "loss": 2.3746,
      "step": 46241
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.3904778957366943,
      "learning_rate": 6.924949130845513e-06,
      "loss": 2.2833,
      "step": 46242
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.9941079020500183,
      "learning_rate": 6.9245573468051055e-06,
      "loss": 2.1773,
      "step": 46243
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1775490045547485,
      "learning_rate": 6.924165567978405e-06,
      "loss": 2.5315,
      "step": 46244
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.7348054647445679,
      "learning_rate": 6.923773794366078e-06,
      "loss": 2.3086,
      "step": 46245
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1480860710144043,
      "learning_rate": 6.92338202596879e-06,
      "loss": 2.1734,
      "step": 46246
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.131523609161377,
      "learning_rate": 6.922990262787206e-06,
      "loss": 2.2596,
      "step": 46247
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.2670674324035645,
      "learning_rate": 6.922598504821988e-06,
      "loss": 2.2482,
      "step": 46248
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0849213600158691,
      "learning_rate": 6.9222067520738015e-06,
      "loss": 2.2718,
      "step": 46249
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0817703008651733,
      "learning_rate": 6.921815004543311e-06,
      "loss": 2.5332,
      "step": 46250
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0383247137069702,
      "learning_rate": 6.921423262231179e-06,
      "loss": 2.4329,
      "step": 46251
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.188127875328064,
      "learning_rate": 6.921031525138074e-06,
      "loss": 2.2724,
      "step": 46252
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.2053641080856323,
      "learning_rate": 6.920639793264654e-06,
      "loss": 2.2013,
      "step": 46253
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0985705852508545,
      "learning_rate": 6.9202480666115865e-06,
      "loss": 2.5315,
      "step": 46254
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0534167289733887,
      "learning_rate": 6.919856345179535e-06,
      "loss": 2.7277,
      "step": 46255
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0707292556762695,
      "learning_rate": 6.919464628969166e-06,
      "loss": 2.4972,
      "step": 46256
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1193617582321167,
      "learning_rate": 6.919072917981138e-06,
      "loss": 2.3386,
      "step": 46257
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0296862125396729,
      "learning_rate": 6.918681212216118e-06,
      "loss": 2.2246,
      "step": 46258
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0893727540969849,
      "learning_rate": 6.918289511674774e-06,
      "loss": 2.5474,
      "step": 46259
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0520106554031372,
      "learning_rate": 6.917897816357763e-06,
      "loss": 2.4426,
      "step": 46260
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.9488939642906189,
      "learning_rate": 6.917506126265751e-06,
      "loss": 2.2965,
      "step": 46261
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0132206678390503,
      "learning_rate": 6.917114441399406e-06,
      "loss": 2.3982,
      "step": 46262
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0541067123413086,
      "learning_rate": 6.916722761759387e-06,
      "loss": 2.261,
      "step": 46263
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1568176746368408,
      "learning_rate": 6.916331087346362e-06,
      "loss": 2.2995,
      "step": 46264
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.2612335681915283,
      "learning_rate": 6.915939418160993e-06,
      "loss": 2.3315,
      "step": 46265
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0947307348251343,
      "learning_rate": 6.915547754203944e-06,
      "loss": 2.4752,
      "step": 46266
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1653587818145752,
      "learning_rate": 6.915156095475879e-06,
      "loss": 2.254,
      "step": 46267
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0100669860839844,
      "learning_rate": 6.914764441977462e-06,
      "loss": 2.4051,
      "step": 46268
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1600826978683472,
      "learning_rate": 6.914372793709362e-06,
      "loss": 2.3909,
      "step": 46269
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0954891443252563,
      "learning_rate": 6.913981150672233e-06,
      "loss": 2.1925,
      "step": 46270
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0468003749847412,
      "learning_rate": 6.913589512866745e-06,
      "loss": 2.2459,
      "step": 46271
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.022544264793396,
      "learning_rate": 6.913197880293561e-06,
      "loss": 2.3724,
      "step": 46272
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1579103469848633,
      "learning_rate": 6.912806252953346e-06,
      "loss": 2.4009,
      "step": 46273
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1056469678878784,
      "learning_rate": 6.912414630846762e-06,
      "loss": 2.3918,
      "step": 46274
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0679638385772705,
      "learning_rate": 6.912023013974474e-06,
      "loss": 2.4897,
      "step": 46275
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.9673216342926025,
      "learning_rate": 6.911631402337145e-06,
      "loss": 2.2644,
      "step": 46276
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.9755362868309021,
      "learning_rate": 6.911239795935439e-06,
      "loss": 2.5664,
      "step": 46277
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1166967153549194,
      "learning_rate": 6.910848194770021e-06,
      "loss": 2.1962,
      "step": 46278
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0464286804199219,
      "learning_rate": 6.910456598841555e-06,
      "loss": 2.3629,
      "step": 46279
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1097999811172485,
      "learning_rate": 6.910065008150706e-06,
      "loss": 2.3668,
      "step": 46280
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1822277307510376,
      "learning_rate": 6.9096734226981335e-06,
      "loss": 2.2956,
      "step": 46281
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1797677278518677,
      "learning_rate": 6.909281842484506e-06,
      "loss": 2.2189,
      "step": 46282
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.027689814567566,
      "learning_rate": 6.908890267510484e-06,
      "loss": 2.0426,
      "step": 46283
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.2069923877716064,
      "learning_rate": 6.908498697776731e-06,
      "loss": 2.1937,
      "step": 46284
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1067805290222168,
      "learning_rate": 6.908107133283915e-06,
      "loss": 2.3575,
      "step": 46285
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0325274467468262,
      "learning_rate": 6.907715574032696e-06,
      "loss": 2.646,
      "step": 46286
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1081453561782837,
      "learning_rate": 6.907324020023738e-06,
      "loss": 2.3318,
      "step": 46287
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0890569686889648,
      "learning_rate": 6.906932471257707e-06,
      "loss": 2.4525,
      "step": 46288
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1227316856384277,
      "learning_rate": 6.906540927735265e-06,
      "loss": 2.4018,
      "step": 46289
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.19815993309021,
      "learning_rate": 6.906149389457075e-06,
      "loss": 2.4831,
      "step": 46290
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0135879516601562,
      "learning_rate": 6.905757856423802e-06,
      "loss": 2.3227,
      "step": 46291
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.2431998252868652,
      "learning_rate": 6.905366328636111e-06,
      "loss": 2.392,
      "step": 46292
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0239940881729126,
      "learning_rate": 6.9049748060946655e-06,
      "loss": 2.1247,
      "step": 46293
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1505320072174072,
      "learning_rate": 6.904583288800126e-06,
      "loss": 2.1982,
      "step": 46294
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1530342102050781,
      "learning_rate": 6.904191776753161e-06,
      "loss": 2.2966,
      "step": 46295
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.9678218960762024,
      "learning_rate": 6.903800269954435e-06,
      "loss": 2.2884,
      "step": 46296
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0513032674789429,
      "learning_rate": 6.9034087684046025e-06,
      "loss": 2.4159,
      "step": 46297
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1542537212371826,
      "learning_rate": 6.903017272104336e-06,
      "loss": 2.4691,
      "step": 46298
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0665663480758667,
      "learning_rate": 6.902625781054294e-06,
      "loss": 2.3893,
      "step": 46299
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.280827283859253,
      "learning_rate": 6.9022342952551434e-06,
      "loss": 2.3334,
      "step": 46300
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.2243435382843018,
      "learning_rate": 6.901842814707546e-06,
      "loss": 2.2383,
      "step": 46301
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0353798866271973,
      "learning_rate": 6.9014513394121685e-06,
      "loss": 2.576,
      "step": 46302
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0402469635009766,
      "learning_rate": 6.901059869369672e-06,
      "loss": 2.497,
      "step": 46303
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0655608177185059,
      "learning_rate": 6.900668404580721e-06,
      "loss": 2.2712,
      "step": 46304
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0617886781692505,
      "learning_rate": 6.900276945045977e-06,
      "loss": 2.2121,
      "step": 46305
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.076696753501892,
      "learning_rate": 6.899885490766107e-06,
      "loss": 2.4429,
      "step": 46306
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.059905767440796,
      "learning_rate": 6.899494041741772e-06,
      "loss": 2.4643,
      "step": 46307
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.9115421175956726,
      "learning_rate": 6.8991025979736395e-06,
      "loss": 2.3597,
      "step": 46308
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1466968059539795,
      "learning_rate": 6.89871115946237e-06,
      "loss": 2.4064,
      "step": 46309
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.2027721405029297,
      "learning_rate": 6.898319726208626e-06,
      "loss": 2.3875,
      "step": 46310
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1555982828140259,
      "learning_rate": 6.897928298213072e-06,
      "loss": 2.3524,
      "step": 46311
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0320369005203247,
      "learning_rate": 6.897536875476372e-06,
      "loss": 2.3993,
      "step": 46312
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0666812658309937,
      "learning_rate": 6.897145457999189e-06,
      "loss": 2.2975,
      "step": 46313
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1668965816497803,
      "learning_rate": 6.89675404578219e-06,
      "loss": 2.3281,
      "step": 46314
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0096784830093384,
      "learning_rate": 6.896362638826033e-06,
      "loss": 2.2297,
      "step": 46315
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0984910726547241,
      "learning_rate": 6.895971237131384e-06,
      "loss": 2.3648,
      "step": 46316
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.175172209739685,
      "learning_rate": 6.895579840698909e-06,
      "loss": 2.23,
      "step": 46317
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0152817964553833,
      "learning_rate": 6.895188449529267e-06,
      "loss": 2.4744,
      "step": 46318
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0238292217254639,
      "learning_rate": 6.894797063623125e-06,
      "loss": 2.2742,
      "step": 46319
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0775185823440552,
      "learning_rate": 6.894405682981146e-06,
      "loss": 2.3956,
      "step": 46320
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1200770139694214,
      "learning_rate": 6.894014307603991e-06,
      "loss": 2.4681,
      "step": 46321
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.153674840927124,
      "learning_rate": 6.89362293749233e-06,
      "loss": 2.4605,
      "step": 46322
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1006877422332764,
      "learning_rate": 6.893231572646819e-06,
      "loss": 2.4969,
      "step": 46323
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.105271339416504,
      "learning_rate": 6.892840213068123e-06,
      "loss": 2.2602,
      "step": 46324
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1401604413986206,
      "learning_rate": 6.892448858756907e-06,
      "loss": 2.3014,
      "step": 46325
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0592797994613647,
      "learning_rate": 6.892057509713834e-06,
      "loss": 2.3302,
      "step": 46326
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.994691014289856,
      "learning_rate": 6.8916661659395675e-06,
      "loss": 2.1682,
      "step": 46327
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0277525186538696,
      "learning_rate": 6.891274827434772e-06,
      "loss": 2.325,
      "step": 46328
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0860751867294312,
      "learning_rate": 6.890883494200109e-06,
      "loss": 2.2565,
      "step": 46329
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1470811367034912,
      "learning_rate": 6.8904921662362435e-06,
      "loss": 2.3853,
      "step": 46330
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0654621124267578,
      "learning_rate": 6.890100843543838e-06,
      "loss": 2.2295,
      "step": 46331
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.113283634185791,
      "learning_rate": 6.889709526123556e-06,
      "loss": 2.4512,
      "step": 46332
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0300955772399902,
      "learning_rate": 6.889318213976062e-06,
      "loss": 2.1518,
      "step": 46333
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.975993812084198,
      "learning_rate": 6.888926907102017e-06,
      "loss": 2.3397,
      "step": 46334
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0476441383361816,
      "learning_rate": 6.888535605502091e-06,
      "loss": 2.391,
      "step": 46335
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.017479658126831,
      "learning_rate": 6.888144309176938e-06,
      "loss": 2.0873,
      "step": 46336
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1245487928390503,
      "learning_rate": 6.887753018127226e-06,
      "loss": 2.3364,
      "step": 46337
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.9425362944602966,
      "learning_rate": 6.887361732353617e-06,
      "loss": 2.0839,
      "step": 46338
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.2366114854812622,
      "learning_rate": 6.886970451856776e-06,
      "loss": 2.4376,
      "step": 46339
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0449697971343994,
      "learning_rate": 6.8865791766373645e-06,
      "loss": 2.2311,
      "step": 46340
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1443744897842407,
      "learning_rate": 6.886187906696048e-06,
      "loss": 2.286,
      "step": 46341
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1433647871017456,
      "learning_rate": 6.885796642033491e-06,
      "loss": 2.4607,
      "step": 46342
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0342768430709839,
      "learning_rate": 6.885405382650351e-06,
      "loss": 2.3064,
      "step": 46343
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.2255879640579224,
      "learning_rate": 6.885014128547296e-06,
      "loss": 2.4063,
      "step": 46344
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0481206178665161,
      "learning_rate": 6.884622879724987e-06,
      "loss": 2.3188,
      "step": 46345
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0350916385650635,
      "learning_rate": 6.884231636184089e-06,
      "loss": 2.3681,
      "step": 46346
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.085254192352295,
      "learning_rate": 6.883840397925266e-06,
      "loss": 2.2935,
      "step": 46347
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1120167970657349,
      "learning_rate": 6.883449164949178e-06,
      "loss": 2.3398,
      "step": 46348
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0447946786880493,
      "learning_rate": 6.883057937256495e-06,
      "loss": 2.6219,
      "step": 46349
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0679389238357544,
      "learning_rate": 6.88266671484787e-06,
      "loss": 2.2511,
      "step": 46350
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1454908847808838,
      "learning_rate": 6.882275497723972e-06,
      "loss": 2.3906,
      "step": 46351
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.3388149738311768,
      "learning_rate": 6.881884285885465e-06,
      "loss": 2.1389,
      "step": 46352
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1619231700897217,
      "learning_rate": 6.88149307933301e-06,
      "loss": 2.3754,
      "step": 46353
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1136181354522705,
      "learning_rate": 6.881101878067271e-06,
      "loss": 2.4495,
      "step": 46354
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.133123517036438,
      "learning_rate": 6.880710682088911e-06,
      "loss": 2.4288,
      "step": 46355
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1431914567947388,
      "learning_rate": 6.880319491398595e-06,
      "loss": 2.3078,
      "step": 46356
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1684529781341553,
      "learning_rate": 6.8799283059969845e-06,
      "loss": 2.1449,
      "step": 46357
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0477709770202637,
      "learning_rate": 6.8795371258847435e-06,
      "loss": 2.6494,
      "step": 46358
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.006968379020691,
      "learning_rate": 6.879145951062533e-06,
      "loss": 2.3609,
      "step": 46359
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.083707571029663,
      "learning_rate": 6.878754781531019e-06,
      "loss": 2.4357,
      "step": 46360
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.110377550125122,
      "learning_rate": 6.8783636172908615e-06,
      "loss": 2.2434,
      "step": 46361
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0826506614685059,
      "learning_rate": 6.877972458342732e-06,
      "loss": 2.4716,
      "step": 46362
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.9558955430984497,
      "learning_rate": 6.877581304687282e-06,
      "loss": 2.2235,
      "step": 46363
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.137913703918457,
      "learning_rate": 6.87719015632518e-06,
      "loss": 2.3891,
      "step": 46364
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.125192403793335,
      "learning_rate": 6.876799013257087e-06,
      "loss": 2.4283,
      "step": 46365
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0572633743286133,
      "learning_rate": 6.876407875483671e-06,
      "loss": 2.351,
      "step": 46366
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.9689197540283203,
      "learning_rate": 6.876016743005589e-06,
      "loss": 2.2365,
      "step": 46367
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1091784238815308,
      "learning_rate": 6.875625615823509e-06,
      "loss": 2.1855,
      "step": 46368
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1335639953613281,
      "learning_rate": 6.875234493938092e-06,
      "loss": 2.3621,
      "step": 46369
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0083913803100586,
      "learning_rate": 6.874843377350002e-06,
      "loss": 2.378,
      "step": 46370
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0375422239303589,
      "learning_rate": 6.874452266059901e-06,
      "loss": 2.4582,
      "step": 46371
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.2318717241287231,
      "learning_rate": 6.874061160068452e-06,
      "loss": 2.2963,
      "step": 46372
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0710464715957642,
      "learning_rate": 6.873670059376316e-06,
      "loss": 2.1175,
      "step": 46373
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0971118211746216,
      "learning_rate": 6.87327896398416e-06,
      "loss": 2.4343,
      "step": 46374
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1566283702850342,
      "learning_rate": 6.8728878738926485e-06,
      "loss": 2.4413,
      "step": 46375
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0302687883377075,
      "learning_rate": 6.872496789102438e-06,
      "loss": 2.181,
      "step": 46376
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1062073707580566,
      "learning_rate": 6.872105709614197e-06,
      "loss": 2.3823,
      "step": 46377
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1437628269195557,
      "learning_rate": 6.871714635428586e-06,
      "loss": 2.5468,
      "step": 46378
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0435378551483154,
      "learning_rate": 6.871323566546267e-06,
      "loss": 2.5183,
      "step": 46379
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0837547779083252,
      "learning_rate": 6.870932502967903e-06,
      "loss": 2.2726,
      "step": 46380
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0638270378112793,
      "learning_rate": 6.8705414446941584e-06,
      "loss": 2.4943,
      "step": 46381
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.083894968032837,
      "learning_rate": 6.870150391725698e-06,
      "loss": 2.3039,
      "step": 46382
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.158638834953308,
      "learning_rate": 6.869759344063182e-06,
      "loss": 2.2035,
      "step": 46383
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0479451417922974,
      "learning_rate": 6.869368301707273e-06,
      "loss": 2.5865,
      "step": 46384
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.078966498374939,
      "learning_rate": 6.868977264658636e-06,
      "loss": 2.2896,
      "step": 46385
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.08293879032135,
      "learning_rate": 6.868586232917933e-06,
      "loss": 2.3714,
      "step": 46386
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.2572880983352661,
      "learning_rate": 6.868195206485826e-06,
      "loss": 2.169,
      "step": 46387
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.084317684173584,
      "learning_rate": 6.867804185362984e-06,
      "loss": 2.359,
      "step": 46388
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.120863914489746,
      "learning_rate": 6.867413169550059e-06,
      "loss": 2.2334,
      "step": 46389
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1617019176483154,
      "learning_rate": 6.86702215904772e-06,
      "loss": 2.1192,
      "step": 46390
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0239397287368774,
      "learning_rate": 6.866631153856631e-06,
      "loss": 2.2902,
      "step": 46391
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0504348278045654,
      "learning_rate": 6.866240153977451e-06,
      "loss": 2.2723,
      "step": 46392
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.3059747219085693,
      "learning_rate": 6.865849159410847e-06,
      "loss": 2.4047,
      "step": 46393
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0880488157272339,
      "learning_rate": 6.865458170157477e-06,
      "loss": 2.2638,
      "step": 46394
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1366294622421265,
      "learning_rate": 6.865067186218009e-06,
      "loss": 2.2959,
      "step": 46395
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0229854583740234,
      "learning_rate": 6.864676207593104e-06,
      "loss": 2.221,
      "step": 46396
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0700092315673828,
      "learning_rate": 6.864285234283423e-06,
      "loss": 2.2589,
      "step": 46397
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0171947479248047,
      "learning_rate": 6.8638942662896325e-06,
      "loss": 2.2338,
      "step": 46398
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.194972038269043,
      "learning_rate": 6.863503303612391e-06,
      "loss": 2.5882,
      "step": 46399
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.3540321588516235,
      "learning_rate": 6.863112346252363e-06,
      "loss": 2.5312,
      "step": 46400
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1152368783950806,
      "learning_rate": 6.862721394210215e-06,
      "loss": 2.2484,
      "step": 46401
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1029075384140015,
      "learning_rate": 6.862330447486603e-06,
      "loss": 2.2297,
      "step": 46402
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.073655128479004,
      "learning_rate": 6.861939506082194e-06,
      "loss": 2.2537,
      "step": 46403
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1992573738098145,
      "learning_rate": 6.86154856999765e-06,
      "loss": 2.4638,
      "step": 46404
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.152064323425293,
      "learning_rate": 6.8611576392336335e-06,
      "loss": 2.4104,
      "step": 46405
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0626001358032227,
      "learning_rate": 6.860766713790806e-06,
      "loss": 2.3586,
      "step": 46406
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.2190289497375488,
      "learning_rate": 6.860375793669832e-06,
      "loss": 2.2803,
      "step": 46407
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.152949571609497,
      "learning_rate": 6.859984878871374e-06,
      "loss": 2.1753,
      "step": 46408
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0482620000839233,
      "learning_rate": 6.859593969396095e-06,
      "loss": 2.348,
      "step": 46409
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.2494953870773315,
      "learning_rate": 6.859203065244655e-06,
      "loss": 2.2349,
      "step": 46410
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0577073097229004,
      "learning_rate": 6.858812166417721e-06,
      "loss": 2.4333,
      "step": 46411
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.085540771484375,
      "learning_rate": 6.858421272915951e-06,
      "loss": 2.357,
      "step": 46412
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1672788858413696,
      "learning_rate": 6.858030384740013e-06,
      "loss": 2.3517,
      "step": 46413
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1126693487167358,
      "learning_rate": 6.85763950189057e-06,
      "loss": 2.3291,
      "step": 46414
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.012620210647583,
      "learning_rate": 6.857248624368278e-06,
      "loss": 2.5114,
      "step": 46415
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.06244957447052,
      "learning_rate": 6.856857752173802e-06,
      "loss": 2.3597,
      "step": 46416
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1602187156677246,
      "learning_rate": 6.856466885307806e-06,
      "loss": 2.3269,
      "step": 46417
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1671806573867798,
      "learning_rate": 6.856076023770952e-06,
      "loss": 2.418,
      "step": 46418
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0402758121490479,
      "learning_rate": 6.855685167563903e-06,
      "loss": 2.4333,
      "step": 46419
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.069693684577942,
      "learning_rate": 6.855294316687323e-06,
      "loss": 2.1485,
      "step": 46420
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0799978971481323,
      "learning_rate": 6.854903471141872e-06,
      "loss": 2.1869,
      "step": 46421
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1150555610656738,
      "learning_rate": 6.854512630928214e-06,
      "loss": 2.232,
      "step": 46422
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1179413795471191,
      "learning_rate": 6.854121796047012e-06,
      "loss": 2.1753,
      "step": 46423
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.134764552116394,
      "learning_rate": 6.853730966498928e-06,
      "loss": 2.3069,
      "step": 46424
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.216483235359192,
      "learning_rate": 6.853340142284624e-06,
      "loss": 2.3038,
      "step": 46425
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1664705276489258,
      "learning_rate": 6.852949323404765e-06,
      "loss": 2.0366,
      "step": 46426
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.978984534740448,
      "learning_rate": 6.8525585098600124e-06,
      "loss": 2.3441,
      "step": 46427
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.2074198722839355,
      "learning_rate": 6.852167701651024e-06,
      "loss": 2.4715,
      "step": 46428
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.3348067998886108,
      "learning_rate": 6.851776898778468e-06,
      "loss": 2.3974,
      "step": 46429
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0155595541000366,
      "learning_rate": 6.851386101243005e-06,
      "loss": 2.3675,
      "step": 46430
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.074965238571167,
      "learning_rate": 6.8509953090452984e-06,
      "loss": 2.4509,
      "step": 46431
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.042757272720337,
      "learning_rate": 6.85060452218601e-06,
      "loss": 2.1903,
      "step": 46432
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0311498641967773,
      "learning_rate": 6.850213740665802e-06,
      "loss": 2.2934,
      "step": 46433
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0480906963348389,
      "learning_rate": 6.849822964485338e-06,
      "loss": 2.3579,
      "step": 46434
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0393027067184448,
      "learning_rate": 6.8494321936452775e-06,
      "loss": 2.2286,
      "step": 46435
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.9566128253936768,
      "learning_rate": 6.849041428146286e-06,
      "loss": 2.1581,
      "step": 46436
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0158593654632568,
      "learning_rate": 6.848650667989025e-06,
      "loss": 2.1501,
      "step": 46437
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.9725930094718933,
      "learning_rate": 6.848259913174157e-06,
      "loss": 2.5401,
      "step": 46438
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0458424091339111,
      "learning_rate": 6.847869163702343e-06,
      "loss": 2.3422,
      "step": 46439
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1669021844863892,
      "learning_rate": 6.847478419574249e-06,
      "loss": 2.3886,
      "step": 46440
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1114943027496338,
      "learning_rate": 6.847087680790539e-06,
      "loss": 2.5521,
      "step": 46441
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0812910795211792,
      "learning_rate": 6.846696947351868e-06,
      "loss": 2.2988,
      "step": 46442
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1006346940994263,
      "learning_rate": 6.8463062192589005e-06,
      "loss": 2.3344,
      "step": 46443
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1582945585250854,
      "learning_rate": 6.845915496512301e-06,
      "loss": 2.4085,
      "step": 46444
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.020772099494934,
      "learning_rate": 6.845524779112732e-06,
      "loss": 2.4443,
      "step": 46445
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0599863529205322,
      "learning_rate": 6.845134067060855e-06,
      "loss": 2.2875,
      "step": 46446
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0465993881225586,
      "learning_rate": 6.844743360357333e-06,
      "loss": 2.439,
      "step": 46447
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0776116847991943,
      "learning_rate": 6.844352659002828e-06,
      "loss": 2.4305,
      "step": 46448
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.090221881866455,
      "learning_rate": 6.843961962998001e-06,
      "loss": 2.2238,
      "step": 46449
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.2096138000488281,
      "learning_rate": 6.843571272343517e-06,
      "loss": 2.2539,
      "step": 46450
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1706736087799072,
      "learning_rate": 6.843180587040037e-06,
      "loss": 2.3628,
      "step": 46451
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.098209023475647,
      "learning_rate": 6.842789907088222e-06,
      "loss": 2.0731,
      "step": 46452
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0967764854431152,
      "learning_rate": 6.842399232488737e-06,
      "loss": 2.0874,
      "step": 46453
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.083518624305725,
      "learning_rate": 6.842008563242246e-06,
      "loss": 2.2379,
      "step": 46454
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0680981874465942,
      "learning_rate": 6.841617899349404e-06,
      "loss": 2.2055,
      "step": 46455
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0992228984832764,
      "learning_rate": 6.8412272408108786e-06,
      "loss": 2.2722,
      "step": 46456
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0926164388656616,
      "learning_rate": 6.840836587627331e-06,
      "loss": 2.3359,
      "step": 46457
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.2827447652816772,
      "learning_rate": 6.840445939799422e-06,
      "loss": 2.34,
      "step": 46458
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0430173873901367,
      "learning_rate": 6.840055297327815e-06,
      "loss": 2.2868,
      "step": 46459
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.451835036277771,
      "learning_rate": 6.839664660213177e-06,
      "loss": 2.239,
      "step": 46460
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.04989492893219,
      "learning_rate": 6.839274028456161e-06,
      "loss": 2.4467,
      "step": 46461
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.051825761795044,
      "learning_rate": 6.838883402057434e-06,
      "loss": 2.3107,
      "step": 46462
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.057206153869629,
      "learning_rate": 6.8384927810176604e-06,
      "loss": 2.1774,
      "step": 46463
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0207293033599854,
      "learning_rate": 6.838102165337499e-06,
      "loss": 2.3766,
      "step": 46464
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.2433850765228271,
      "learning_rate": 6.837711555017614e-06,
      "loss": 2.4601,
      "step": 46465
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.2569926977157593,
      "learning_rate": 6.837320950058666e-06,
      "loss": 2.2156,
      "step": 46466
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1917592287063599,
      "learning_rate": 6.836930350461322e-06,
      "loss": 2.2759,
      "step": 46467
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0758428573608398,
      "learning_rate": 6.836539756226236e-06,
      "loss": 2.4145,
      "step": 46468
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.073885440826416,
      "learning_rate": 6.836149167354074e-06,
      "loss": 2.3772,
      "step": 46469
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0917330980300903,
      "learning_rate": 6.8357585838455e-06,
      "loss": 2.3068,
      "step": 46470
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0718626976013184,
      "learning_rate": 6.835368005701173e-06,
      "loss": 2.3894,
      "step": 46471
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.9932539463043213,
      "learning_rate": 6.834977432921757e-06,
      "loss": 2.4683,
      "step": 46472
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0542970895767212,
      "learning_rate": 6.834586865507914e-06,
      "loss": 2.0001,
      "step": 46473
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0871644020080566,
      "learning_rate": 6.834196303460305e-06,
      "loss": 2.4427,
      "step": 46474
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1181765794754028,
      "learning_rate": 6.8338057467795956e-06,
      "loss": 2.3514,
      "step": 46475
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1747801303863525,
      "learning_rate": 6.833415195466444e-06,
      "loss": 2.3148,
      "step": 46476
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1117652654647827,
      "learning_rate": 6.8330246495215135e-06,
      "loss": 2.3512,
      "step": 46477
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0393558740615845,
      "learning_rate": 6.832634108945466e-06,
      "loss": 2.6286,
      "step": 46478
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1821978092193604,
      "learning_rate": 6.832243573738965e-06,
      "loss": 2.5763,
      "step": 46479
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.9887153506278992,
      "learning_rate": 6.831853043902674e-06,
      "loss": 2.4369,
      "step": 46480
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.066814661026001,
      "learning_rate": 6.831462519437251e-06,
      "loss": 2.5355,
      "step": 46481
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.072189211845398,
      "learning_rate": 6.831072000343357e-06,
      "loss": 2.5514,
      "step": 46482
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0606919527053833,
      "learning_rate": 6.8306814866216574e-06,
      "loss": 2.4243,
      "step": 46483
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.2318916320800781,
      "learning_rate": 6.830290978272813e-06,
      "loss": 2.4515,
      "step": 46484
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.2055771350860596,
      "learning_rate": 6.829900475297489e-06,
      "loss": 2.3306,
      "step": 46485
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.139747977256775,
      "learning_rate": 6.829509977696341e-06,
      "loss": 2.5286,
      "step": 46486
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.070333480834961,
      "learning_rate": 6.829119485470037e-06,
      "loss": 2.2657,
      "step": 46487
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.112873911857605,
      "learning_rate": 6.8287289986192386e-06,
      "loss": 2.3974,
      "step": 46488
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.007642388343811,
      "learning_rate": 6.828338517144602e-06,
      "loss": 2.3045,
      "step": 46489
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.160176396369934,
      "learning_rate": 6.827948041046795e-06,
      "loss": 2.3683,
      "step": 46490
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0720717906951904,
      "learning_rate": 6.8275575703264775e-06,
      "loss": 2.3359,
      "step": 46491
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.102289080619812,
      "learning_rate": 6.8271671049843104e-06,
      "loss": 2.2726,
      "step": 46492
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0248948335647583,
      "learning_rate": 6.826776645020959e-06,
      "loss": 2.3779,
      "step": 46493
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0878046751022339,
      "learning_rate": 6.826386190437082e-06,
      "loss": 2.2532,
      "step": 46494
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0558805465698242,
      "learning_rate": 6.825995741233343e-06,
      "loss": 2.4531,
      "step": 46495
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.9791696071624756,
      "learning_rate": 6.825605297410402e-06,
      "loss": 2.2929,
      "step": 46496
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0646402835845947,
      "learning_rate": 6.8252148589689225e-06,
      "loss": 2.2744,
      "step": 46497
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0678659677505493,
      "learning_rate": 6.824824425909566e-06,
      "loss": 2.336,
      "step": 46498
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.012197732925415,
      "learning_rate": 6.8244339982329935e-06,
      "loss": 2.446,
      "step": 46499
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0226876735687256,
      "learning_rate": 6.824043575939869e-06,
      "loss": 2.3548,
      "step": 46500
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0987783670425415,
      "learning_rate": 6.823653159030852e-06,
      "loss": 2.3884,
      "step": 46501
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1525851488113403,
      "learning_rate": 6.823262747506607e-06,
      "loss": 2.5172,
      "step": 46502
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1675140857696533,
      "learning_rate": 6.822872341367794e-06,
      "loss": 2.3018,
      "step": 46503
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0442299842834473,
      "learning_rate": 6.822481940615074e-06,
      "loss": 2.3663,
      "step": 46504
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.002132773399353,
      "learning_rate": 6.822091545249112e-06,
      "loss": 2.5012,
      "step": 46505
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.2314682006835938,
      "learning_rate": 6.821701155270571e-06,
      "loss": 2.1547,
      "step": 46506
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.171039342880249,
      "learning_rate": 6.821310770680105e-06,
      "loss": 2.215,
      "step": 46507
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0726282596588135,
      "learning_rate": 6.820920391478383e-06,
      "loss": 2.2321,
      "step": 46508
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.047026515007019,
      "learning_rate": 6.820530017666063e-06,
      "loss": 2.4196,
      "step": 46509
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.171412467956543,
      "learning_rate": 6.820139649243807e-06,
      "loss": 2.5228,
      "step": 46510
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1966867446899414,
      "learning_rate": 6.819749286212278e-06,
      "loss": 2.0798,
      "step": 46511
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0656152963638306,
      "learning_rate": 6.81935892857214e-06,
      "loss": 2.2346,
      "step": 46512
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0048340559005737,
      "learning_rate": 6.81896857632405e-06,
      "loss": 2.408,
      "step": 46513
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.043296217918396,
      "learning_rate": 6.8185782294686734e-06,
      "loss": 2.3305,
      "step": 46514
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0023595094680786,
      "learning_rate": 6.818187888006671e-06,
      "loss": 2.3094,
      "step": 46515
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.175242304801941,
      "learning_rate": 6.817797551938705e-06,
      "loss": 2.0511,
      "step": 46516
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0126534700393677,
      "learning_rate": 6.8174072212654355e-06,
      "loss": 2.2244,
      "step": 46517
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.9831118583679199,
      "learning_rate": 6.817016895987525e-06,
      "loss": 2.3827,
      "step": 46518
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.3564075231552124,
      "learning_rate": 6.816626576105636e-06,
      "loss": 2.1544,
      "step": 46519
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.3412697315216064,
      "learning_rate": 6.816236261620431e-06,
      "loss": 2.4217,
      "step": 46520
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.2007614374160767,
      "learning_rate": 6.815845952532567e-06,
      "loss": 2.4403,
      "step": 46521
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0608789920806885,
      "learning_rate": 6.8154556488427125e-06,
      "loss": 2.2959,
      "step": 46522
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.201007604598999,
      "learning_rate": 6.815065350551524e-06,
      "loss": 2.3839,
      "step": 46523
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.9980339407920837,
      "learning_rate": 6.814675057659663e-06,
      "loss": 2.089,
      "step": 46524
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1229335069656372,
      "learning_rate": 6.8142847701677936e-06,
      "loss": 2.0675,
      "step": 46525
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.9975005388259888,
      "learning_rate": 6.813894488076578e-06,
      "loss": 2.3026,
      "step": 46526
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.3870553970336914,
      "learning_rate": 6.813504211386674e-06,
      "loss": 2.5124,
      "step": 46527
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0380017757415771,
      "learning_rate": 6.813113940098748e-06,
      "loss": 2.3649,
      "step": 46528
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.9624267220497131,
      "learning_rate": 6.812723674213458e-06,
      "loss": 2.3257,
      "step": 46529
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0405775308609009,
      "learning_rate": 6.812333413731469e-06,
      "loss": 2.3773,
      "step": 46530
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.9905858635902405,
      "learning_rate": 6.811943158653438e-06,
      "loss": 2.2844,
      "step": 46531
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0377686023712158,
      "learning_rate": 6.8115529089800315e-06,
      "loss": 2.2981,
      "step": 46532
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1101561784744263,
      "learning_rate": 6.811162664711912e-06,
      "loss": 2.1731,
      "step": 46533
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.9511873722076416,
      "learning_rate": 6.810772425849734e-06,
      "loss": 2.1835,
      "step": 46534
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0626479387283325,
      "learning_rate": 6.810382192394162e-06,
      "loss": 2.0521,
      "step": 46535
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.2094146013259888,
      "learning_rate": 6.809991964345859e-06,
      "loss": 2.4614,
      "step": 46536
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.9999595880508423,
      "learning_rate": 6.809601741705486e-06,
      "loss": 2.4358,
      "step": 46537
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0309619903564453,
      "learning_rate": 6.809211524473705e-06,
      "loss": 2.3583,
      "step": 46538
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1201566457748413,
      "learning_rate": 6.808821312651177e-06,
      "loss": 2.4319,
      "step": 46539
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0790268182754517,
      "learning_rate": 6.8084311062385625e-06,
      "loss": 2.6461,
      "step": 46540
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1014939546585083,
      "learning_rate": 6.808040905236525e-06,
      "loss": 2.3891,
      "step": 46541
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.2111693620681763,
      "learning_rate": 6.807650709645725e-06,
      "loss": 2.4553,
      "step": 46542
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0549861192703247,
      "learning_rate": 6.807260519466824e-06,
      "loss": 2.3593,
      "step": 46543
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.2226247787475586,
      "learning_rate": 6.806870334700486e-06,
      "loss": 2.3149,
      "step": 46544
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0666671991348267,
      "learning_rate": 6.806480155347368e-06,
      "loss": 2.2705,
      "step": 46545
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0703729391098022,
      "learning_rate": 6.806089981408135e-06,
      "loss": 2.5906,
      "step": 46546
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1691640615463257,
      "learning_rate": 6.805699812883444e-06,
      "loss": 2.4376,
      "step": 46547
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1936894655227661,
      "learning_rate": 6.805309649773962e-06,
      "loss": 2.3637,
      "step": 46548
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0911709070205688,
      "learning_rate": 6.804919492080345e-06,
      "loss": 2.1883,
      "step": 46549
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0752208232879639,
      "learning_rate": 6.8045293398032605e-06,
      "loss": 2.3557,
      "step": 46550
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0732135772705078,
      "learning_rate": 6.804139192943364e-06,
      "loss": 2.491,
      "step": 46551
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0293207168579102,
      "learning_rate": 6.803749051501321e-06,
      "loss": 2.3159,
      "step": 46552
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1246006488800049,
      "learning_rate": 6.803358915477791e-06,
      "loss": 2.0615,
      "step": 46553
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0394680500030518,
      "learning_rate": 6.802968784873436e-06,
      "loss": 2.2365,
      "step": 46554
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1769940853118896,
      "learning_rate": 6.802578659688916e-06,
      "loss": 2.1002,
      "step": 46555
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1739583015441895,
      "learning_rate": 6.802188539924895e-06,
      "loss": 2.2789,
      "step": 46556
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1960076093673706,
      "learning_rate": 6.8017984255820314e-06,
      "loss": 2.3284,
      "step": 46557
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1325279474258423,
      "learning_rate": 6.801408316660989e-06,
      "loss": 2.33,
      "step": 46558
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.03187894821167,
      "learning_rate": 6.801018213162432e-06,
      "loss": 2.1904,
      "step": 46559
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.3334378004074097,
      "learning_rate": 6.800628115087013e-06,
      "loss": 2.1594,
      "step": 46560
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0193922519683838,
      "learning_rate": 6.8002380224354e-06,
      "loss": 2.3615,
      "step": 46561
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.2603213787078857,
      "learning_rate": 6.799847935208253e-06,
      "loss": 2.32,
      "step": 46562
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.153476595878601,
      "learning_rate": 6.79945785340623e-06,
      "loss": 2.1384,
      "step": 46563
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.9934903979301453,
      "learning_rate": 6.799067777029999e-06,
      "loss": 2.2879,
      "step": 46564
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.2626972198486328,
      "learning_rate": 6.798677706080215e-06,
      "loss": 2.4995,
      "step": 46565
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0181177854537964,
      "learning_rate": 6.7982876405575415e-06,
      "loss": 2.0157,
      "step": 46566
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0043251514434814,
      "learning_rate": 6.797897580462642e-06,
      "loss": 2.2575,
      "step": 46567
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1070667505264282,
      "learning_rate": 6.797507525796174e-06,
      "loss": 2.3604,
      "step": 46568
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0555782318115234,
      "learning_rate": 6.797117476558801e-06,
      "loss": 2.2898,
      "step": 46569
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0286509990692139,
      "learning_rate": 6.7967274327511845e-06,
      "loss": 2.1735,
      "step": 46570
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1100226640701294,
      "learning_rate": 6.796337394373984e-06,
      "loss": 2.2781,
      "step": 46571
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.9807734489440918,
      "learning_rate": 6.795947361427866e-06,
      "loss": 2.1614,
      "step": 46572
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0230787992477417,
      "learning_rate": 6.795557333913483e-06,
      "loss": 2.4908,
      "step": 46573
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0443530082702637,
      "learning_rate": 6.795167311831502e-06,
      "loss": 2.1411,
      "step": 46574
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0220592021942139,
      "learning_rate": 6.794777295182582e-06,
      "loss": 2.3638,
      "step": 46575
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0221564769744873,
      "learning_rate": 6.7943872839673856e-06,
      "loss": 2.4672,
      "step": 46576
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0372285842895508,
      "learning_rate": 6.793997278186573e-06,
      "loss": 2.459,
      "step": 46577
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.066493034362793,
      "learning_rate": 6.793607277840806e-06,
      "loss": 2.3781,
      "step": 46578
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.200406789779663,
      "learning_rate": 6.793217282930746e-06,
      "loss": 2.6045,
      "step": 46579
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0458626747131348,
      "learning_rate": 6.7928272934570516e-06,
      "loss": 2.2523,
      "step": 46580
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.206043004989624,
      "learning_rate": 6.792437309420388e-06,
      "loss": 2.2363,
      "step": 46581
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0289440155029297,
      "learning_rate": 6.792047330821413e-06,
      "loss": 2.4899,
      "step": 46582
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0576719045639038,
      "learning_rate": 6.791657357660791e-06,
      "loss": 2.0911,
      "step": 46583
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0849326848983765,
      "learning_rate": 6.791267389939178e-06,
      "loss": 2.3693,
      "step": 46584
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1101202964782715,
      "learning_rate": 6.79087742765724e-06,
      "loss": 2.4775,
      "step": 46585
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1668950319290161,
      "learning_rate": 6.790487470815641e-06,
      "loss": 2.2721,
      "step": 46586
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0688220262527466,
      "learning_rate": 6.790097519415032e-06,
      "loss": 2.2496,
      "step": 46587
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.5432770252227783,
      "learning_rate": 6.7897075734560805e-06,
      "loss": 2.5402,
      "step": 46588
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1634594202041626,
      "learning_rate": 6.789317632939446e-06,
      "loss": 2.3472,
      "step": 46589
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1070704460144043,
      "learning_rate": 6.78892769786579e-06,
      "loss": 2.3007,
      "step": 46590
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1614757776260376,
      "learning_rate": 6.788537768235774e-06,
      "loss": 2.3389,
      "step": 46591
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.2220332622528076,
      "learning_rate": 6.788147844050059e-06,
      "loss": 2.3192,
      "step": 46592
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1241674423217773,
      "learning_rate": 6.787757925309305e-06,
      "loss": 2.2595,
      "step": 46593
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1030447483062744,
      "learning_rate": 6.787368012014175e-06,
      "loss": 2.2831,
      "step": 46594
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.060125470161438,
      "learning_rate": 6.786978104165327e-06,
      "loss": 2.4278,
      "step": 46595
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0399501323699951,
      "learning_rate": 6.786588201763424e-06,
      "loss": 2.2681,
      "step": 46596
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.3748489618301392,
      "learning_rate": 6.786198304809128e-06,
      "loss": 2.2656,
      "step": 46597
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0110212564468384,
      "learning_rate": 6.785808413303099e-06,
      "loss": 2.2461,
      "step": 46598
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.001575231552124,
      "learning_rate": 6.7854185272460015e-06,
      "loss": 2.3467,
      "step": 46599
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1872365474700928,
      "learning_rate": 6.785028646638488e-06,
      "loss": 2.3608,
      "step": 46600
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.3502644300460815,
      "learning_rate": 6.7846387714812235e-06,
      "loss": 2.5006,
      "step": 46601
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0923537015914917,
      "learning_rate": 6.7842489017748704e-06,
      "loss": 2.2987,
      "step": 46602
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1771527528762817,
      "learning_rate": 6.783859037520088e-06,
      "loss": 2.2702,
      "step": 46603
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1144423484802246,
      "learning_rate": 6.783469178717539e-06,
      "loss": 2.2796,
      "step": 46604
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0673116445541382,
      "learning_rate": 6.783079325367883e-06,
      "loss": 1.9597,
      "step": 46605
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1153085231781006,
      "learning_rate": 6.782689477471783e-06,
      "loss": 2.2694,
      "step": 46606
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.045992374420166,
      "learning_rate": 6.782299635029897e-06,
      "loss": 2.1669,
      "step": 46607
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0188851356506348,
      "learning_rate": 6.781909798042886e-06,
      "loss": 2.1154,
      "step": 46608
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0285279750823975,
      "learning_rate": 6.781519966511411e-06,
      "loss": 2.231,
      "step": 46609
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0794768333435059,
      "learning_rate": 6.781130140436136e-06,
      "loss": 2.3806,
      "step": 46610
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0228784084320068,
      "learning_rate": 6.780740319817719e-06,
      "loss": 2.4055,
      "step": 46611
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1356251239776611,
      "learning_rate": 6.780350504656823e-06,
      "loss": 2.1835,
      "step": 46612
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.047682523727417,
      "learning_rate": 6.779960694954108e-06,
      "loss": 2.42,
      "step": 46613
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0162403583526611,
      "learning_rate": 6.779570890710232e-06,
      "loss": 2.3561,
      "step": 46614
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1406255960464478,
      "learning_rate": 6.779181091925857e-06,
      "loss": 2.0712,
      "step": 46615
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0537759065628052,
      "learning_rate": 6.778791298601646e-06,
      "loss": 2.1668,
      "step": 46616
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0057733058929443,
      "learning_rate": 6.7784015107382595e-06,
      "loss": 2.3243,
      "step": 46617
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1587836742401123,
      "learning_rate": 6.7780117283363554e-06,
      "loss": 2.1331,
      "step": 46618
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0852599143981934,
      "learning_rate": 6.777621951396597e-06,
      "loss": 2.3478,
      "step": 46619
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1991122961044312,
      "learning_rate": 6.777232179919645e-06,
      "loss": 2.4057,
      "step": 46620
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.982265830039978,
      "learning_rate": 6.77684241390616e-06,
      "loss": 2.3968,
      "step": 46621
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1448712348937988,
      "learning_rate": 6.776452653356802e-06,
      "loss": 2.1428,
      "step": 46622
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0640240907669067,
      "learning_rate": 6.7760628982722306e-06,
      "loss": 2.4373,
      "step": 46623
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0512807369232178,
      "learning_rate": 6.7756731486531105e-06,
      "loss": 2.223,
      "step": 46624
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.991461455821991,
      "learning_rate": 6.775283404500104e-06,
      "loss": 2.44,
      "step": 46625
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1433279514312744,
      "learning_rate": 6.774893665813864e-06,
      "loss": 2.3731,
      "step": 46626
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.157346248626709,
      "learning_rate": 6.774503932595054e-06,
      "loss": 2.3789,
      "step": 46627
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.971807599067688,
      "learning_rate": 6.774114204844336e-06,
      "loss": 2.3467,
      "step": 46628
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0988900661468506,
      "learning_rate": 6.773724482562372e-06,
      "loss": 2.4516,
      "step": 46629
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1056334972381592,
      "learning_rate": 6.77333476574982e-06,
      "loss": 2.4975,
      "step": 46630
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1230627298355103,
      "learning_rate": 6.7729450544073425e-06,
      "loss": 2.3008,
      "step": 46631
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0515350103378296,
      "learning_rate": 6.772555348535599e-06,
      "loss": 2.4748,
      "step": 46632
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1084643602371216,
      "learning_rate": 6.772165648135252e-06,
      "loss": 2.1056,
      "step": 46633
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.029436707496643,
      "learning_rate": 6.77177595320696e-06,
      "loss": 2.3754,
      "step": 46634
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0089764595031738,
      "learning_rate": 6.771386263751385e-06,
      "loss": 2.4668,
      "step": 46635
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.023775577545166,
      "learning_rate": 6.770996579769187e-06,
      "loss": 2.2968,
      "step": 46636
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0647701025009155,
      "learning_rate": 6.770606901261028e-06,
      "loss": 2.2802,
      "step": 46637
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0799500942230225,
      "learning_rate": 6.770217228227567e-06,
      "loss": 2.4449,
      "step": 46638
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0878028869628906,
      "learning_rate": 6.769827560669464e-06,
      "loss": 2.3015,
      "step": 46639
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.233945369720459,
      "learning_rate": 6.76943789858738e-06,
      "loss": 2.515,
      "step": 46640
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.040777564048767,
      "learning_rate": 6.7690482419819796e-06,
      "loss": 2.3777,
      "step": 46641
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.2105414867401123,
      "learning_rate": 6.768658590853916e-06,
      "loss": 2.4466,
      "step": 46642
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1216864585876465,
      "learning_rate": 6.768268945203855e-06,
      "loss": 2.1183,
      "step": 46643
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1089730262756348,
      "learning_rate": 6.767879305032454e-06,
      "loss": 2.2832,
      "step": 46644
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.9814411997795105,
      "learning_rate": 6.767489670340378e-06,
      "loss": 2.433,
      "step": 46645
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0367895364761353,
      "learning_rate": 6.767100041128284e-06,
      "loss": 2.2698,
      "step": 46646
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.3209495544433594,
      "learning_rate": 6.766710417396833e-06,
      "loss": 2.328,
      "step": 46647
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1764687299728394,
      "learning_rate": 6.766320799146686e-06,
      "loss": 2.3009,
      "step": 46648
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.956012487411499,
      "learning_rate": 6.765931186378505e-06,
      "loss": 2.1027,
      "step": 46649
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0410676002502441,
      "learning_rate": 6.765541579092947e-06,
      "loss": 2.4983,
      "step": 46650
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1141841411590576,
      "learning_rate": 6.76515197729068e-06,
      "loss": 2.2548,
      "step": 46651
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.022267460823059,
      "learning_rate": 6.764762380972354e-06,
      "loss": 2.4001,
      "step": 46652
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1683239936828613,
      "learning_rate": 6.7643727901386345e-06,
      "loss": 2.3835,
      "step": 46653
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0461149215698242,
      "learning_rate": 6.763983204790182e-06,
      "loss": 2.0836,
      "step": 46654
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.9658092260360718,
      "learning_rate": 6.763593624927658e-06,
      "loss": 2.4947,
      "step": 46655
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1283447742462158,
      "learning_rate": 6.763204050551719e-06,
      "loss": 2.3761,
      "step": 46656
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.978001058101654,
      "learning_rate": 6.762814481663031e-06,
      "loss": 2.243,
      "step": 46657
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.148316502571106,
      "learning_rate": 6.762424918262251e-06,
      "loss": 2.394,
      "step": 46658
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1075164079666138,
      "learning_rate": 6.762035360350039e-06,
      "loss": 2.3398,
      "step": 46659
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1054627895355225,
      "learning_rate": 6.761645807927058e-06,
      "loss": 2.3116,
      "step": 46660
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0263965129852295,
      "learning_rate": 6.761256260993966e-06,
      "loss": 2.4098,
      "step": 46661
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0789376497268677,
      "learning_rate": 6.760866719551427e-06,
      "loss": 2.0911,
      "step": 46662
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0045503377914429,
      "learning_rate": 6.7604771836000956e-06,
      "loss": 2.1288,
      "step": 46663
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.027985692024231,
      "learning_rate": 6.760087653140635e-06,
      "loss": 2.2289,
      "step": 46664
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0207537412643433,
      "learning_rate": 6.7596981281737085e-06,
      "loss": 2.1966,
      "step": 46665
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0448325872421265,
      "learning_rate": 6.7593086086999725e-06,
      "loss": 2.3435,
      "step": 46666
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1806752681732178,
      "learning_rate": 6.758919094720087e-06,
      "loss": 2.2032,
      "step": 46667
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1245453357696533,
      "learning_rate": 6.758529586234716e-06,
      "loss": 2.3596,
      "step": 46668
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0836076736450195,
      "learning_rate": 6.758140083244518e-06,
      "loss": 2.4083,
      "step": 46669
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0598971843719482,
      "learning_rate": 6.7577505857501516e-06,
      "loss": 2.3008,
      "step": 46670
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1671578884124756,
      "learning_rate": 6.757361093752277e-06,
      "loss": 2.2918,
      "step": 46671
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.046333909034729,
      "learning_rate": 6.756971607251558e-06,
      "loss": 2.3592,
      "step": 46672
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.056562900543213,
      "learning_rate": 6.756582126248652e-06,
      "loss": 2.2529,
      "step": 46673
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.9702410697937012,
      "learning_rate": 6.75619265074422e-06,
      "loss": 2.3351,
      "step": 46674
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1599242687225342,
      "learning_rate": 6.7558031807389235e-06,
      "loss": 2.2243,
      "step": 46675
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0917500257492065,
      "learning_rate": 6.755413716233421e-06,
      "loss": 2.2864,
      "step": 46676
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.108161211013794,
      "learning_rate": 6.755024257228374e-06,
      "loss": 2.382,
      "step": 46677
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.245309591293335,
      "learning_rate": 6.754634803724446e-06,
      "loss": 2.5897,
      "step": 46678
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0917539596557617,
      "learning_rate": 6.754245355722289e-06,
      "loss": 2.4404,
      "step": 46679
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.2152960300445557,
      "learning_rate": 6.753855913222569e-06,
      "loss": 2.1651,
      "step": 46680
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0763846635818481,
      "learning_rate": 6.753466476225942e-06,
      "loss": 2.1884,
      "step": 46681
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1301685571670532,
      "learning_rate": 6.753077044733074e-06,
      "loss": 2.4062,
      "step": 46682
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0006248950958252,
      "learning_rate": 6.752687618744621e-06,
      "loss": 2.4312,
      "step": 46683
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1002951860427856,
      "learning_rate": 6.752298198261245e-06,
      "loss": 2.3902,
      "step": 46684
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.103062391281128,
      "learning_rate": 6.751908783283605e-06,
      "loss": 2.3573,
      "step": 46685
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0853978395462036,
      "learning_rate": 6.751519373812362e-06,
      "loss": 2.4628,
      "step": 46686
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0583285093307495,
      "learning_rate": 6.751129969848176e-06,
      "loss": 2.0338,
      "step": 46687
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0445061922073364,
      "learning_rate": 6.750740571391707e-06,
      "loss": 2.2144,
      "step": 46688
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1643249988555908,
      "learning_rate": 6.750351178443615e-06,
      "loss": 2.4391,
      "step": 46689
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1271998882293701,
      "learning_rate": 6.749961791004562e-06,
      "loss": 2.2733,
      "step": 46690
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1039897203445435,
      "learning_rate": 6.749572409075208e-06,
      "loss": 2.2582,
      "step": 46691
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.9346320033073425,
      "learning_rate": 6.749183032656208e-06,
      "loss": 2.343,
      "step": 46692
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0090060234069824,
      "learning_rate": 6.748793661748226e-06,
      "loss": 2.2842,
      "step": 46693
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.129517912864685,
      "learning_rate": 6.748404296351923e-06,
      "loss": 2.3074,
      "step": 46694
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.057292103767395,
      "learning_rate": 6.748014936467957e-06,
      "loss": 2.3056,
      "step": 46695
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1285579204559326,
      "learning_rate": 6.747625582096991e-06,
      "loss": 2.1701,
      "step": 46696
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.136334776878357,
      "learning_rate": 6.74723623323968e-06,
      "loss": 2.4544,
      "step": 46697
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0361849069595337,
      "learning_rate": 6.746846889896687e-06,
      "loss": 2.4513,
      "step": 46698
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0124199390411377,
      "learning_rate": 6.746457552068674e-06,
      "loss": 2.3141,
      "step": 46699
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0121121406555176,
      "learning_rate": 6.746068219756298e-06,
      "loss": 2.426,
      "step": 46700
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0467532873153687,
      "learning_rate": 6.745678892960219e-06,
      "loss": 2.2255,
      "step": 46701
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0404642820358276,
      "learning_rate": 6.745289571681099e-06,
      "loss": 2.2418,
      "step": 46702
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1726818084716797,
      "learning_rate": 6.744900255919598e-06,
      "loss": 2.2388,
      "step": 46703
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1175649166107178,
      "learning_rate": 6.744510945676379e-06,
      "loss": 2.2592,
      "step": 46704
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0996382236480713,
      "learning_rate": 6.744121640952093e-06,
      "loss": 2.4522,
      "step": 46705
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.9877440929412842,
      "learning_rate": 6.7437323417474064e-06,
      "loss": 2.1283,
      "step": 46706
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0414305925369263,
      "learning_rate": 6.743343048062977e-06,
      "loss": 2.5466,
      "step": 46707
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1379197835922241,
      "learning_rate": 6.742953759899465e-06,
      "loss": 2.2206,
      "step": 46708
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0196993350982666,
      "learning_rate": 6.742564477257533e-06,
      "loss": 2.416,
      "step": 46709
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1144647598266602,
      "learning_rate": 6.742175200137837e-06,
      "loss": 2.386,
      "step": 46710
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1065912246704102,
      "learning_rate": 6.74178592854104e-06,
      "loss": 2.3436,
      "step": 46711
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0741689205169678,
      "learning_rate": 6.7413966624678005e-06,
      "loss": 2.2943,
      "step": 46712
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.4792217016220093,
      "learning_rate": 6.741007401918778e-06,
      "loss": 2.2867,
      "step": 46713
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1946991682052612,
      "learning_rate": 6.740618146894633e-06,
      "loss": 2.3972,
      "step": 46714
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.9878522157669067,
      "learning_rate": 6.740228897396026e-06,
      "loss": 2.184,
      "step": 46715
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.121330738067627,
      "learning_rate": 6.739839653423616e-06,
      "loss": 2.1975,
      "step": 46716
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0684058666229248,
      "learning_rate": 6.7394504149780684e-06,
      "loss": 2.3247,
      "step": 46717
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1409683227539062,
      "learning_rate": 6.739061182060034e-06,
      "loss": 2.2876,
      "step": 46718
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0726505517959595,
      "learning_rate": 6.738671954670175e-06,
      "loss": 2.5646,
      "step": 46719
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1448619365692139,
      "learning_rate": 6.738282732809154e-06,
      "loss": 2.1814,
      "step": 46720
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0789746046066284,
      "learning_rate": 6.737893516477629e-06,
      "loss": 2.3162,
      "step": 46721
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1935498714447021,
      "learning_rate": 6.737504305676261e-06,
      "loss": 2.2646,
      "step": 46722
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0619616508483887,
      "learning_rate": 6.737115100405709e-06,
      "loss": 2.3708,
      "step": 46723
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1565346717834473,
      "learning_rate": 6.736725900666634e-06,
      "loss": 2.3235,
      "step": 46724
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.9541657567024231,
      "learning_rate": 6.736336706459694e-06,
      "loss": 2.3166,
      "step": 46725
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0857213735580444,
      "learning_rate": 6.735947517785551e-06,
      "loss": 2.292,
      "step": 46726
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0187269449234009,
      "learning_rate": 6.735558334644861e-06,
      "loss": 2.4844,
      "step": 46727
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.031069278717041,
      "learning_rate": 6.735169157038289e-06,
      "loss": 2.4565,
      "step": 46728
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.639312744140625,
      "learning_rate": 6.734779984966489e-06,
      "loss": 2.4745,
      "step": 46729
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1534582376480103,
      "learning_rate": 6.734390818430126e-06,
      "loss": 2.2606,
      "step": 46730
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0494383573532104,
      "learning_rate": 6.73400165742986e-06,
      "loss": 2.163,
      "step": 46731
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0171769857406616,
      "learning_rate": 6.733612501966345e-06,
      "loss": 2.243,
      "step": 46732
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0393339395523071,
      "learning_rate": 6.7332233520402436e-06,
      "loss": 2.4745,
      "step": 46733
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0911047458648682,
      "learning_rate": 6.732834207652215e-06,
      "loss": 2.3449,
      "step": 46734
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0505059957504272,
      "learning_rate": 6.732445068802921e-06,
      "loss": 2.4022,
      "step": 46735
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.2132339477539062,
      "learning_rate": 6.73205593549302e-06,
      "loss": 2.5377,
      "step": 46736
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0338032245635986,
      "learning_rate": 6.731666807723172e-06,
      "loss": 2.432,
      "step": 46737
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.2961444854736328,
      "learning_rate": 6.731277685494035e-06,
      "loss": 2.2548,
      "step": 46738
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0898290872573853,
      "learning_rate": 6.730888568806271e-06,
      "loss": 2.2922,
      "step": 46739
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0356392860412598,
      "learning_rate": 6.7304994576605385e-06,
      "loss": 2.3568,
      "step": 46740
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.2511948347091675,
      "learning_rate": 6.730110352057499e-06,
      "loss": 2.3227,
      "step": 46741
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.2000951766967773,
      "learning_rate": 6.7297212519978084e-06,
      "loss": 2.248,
      "step": 46742
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1854604482650757,
      "learning_rate": 6.729332157482129e-06,
      "loss": 2.2896,
      "step": 46743
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.287201166152954,
      "learning_rate": 6.728943068511124e-06,
      "loss": 2.3688,
      "step": 46744
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.9344329833984375,
      "learning_rate": 6.728553985085447e-06,
      "loss": 2.272,
      "step": 46745
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0822842121124268,
      "learning_rate": 6.728164907205756e-06,
      "loss": 2.3731,
      "step": 46746
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.255863070487976,
      "learning_rate": 6.727775834872716e-06,
      "loss": 2.3322,
      "step": 46747
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.4193918704986572,
      "learning_rate": 6.727386768086986e-06,
      "loss": 2.3798,
      "step": 46748
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.9825026988983154,
      "learning_rate": 6.726997706849221e-06,
      "loss": 2.2137,
      "step": 46749
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0486760139465332,
      "learning_rate": 6.726608651160086e-06,
      "loss": 2.3119,
      "step": 46750
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.9879080653190613,
      "learning_rate": 6.726219601020238e-06,
      "loss": 2.2174,
      "step": 46751
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0331556797027588,
      "learning_rate": 6.725830556430338e-06,
      "loss": 2.1342,
      "step": 46752
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1787583827972412,
      "learning_rate": 6.7254415173910435e-06,
      "loss": 2.3438,
      "step": 46753
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1026722192764282,
      "learning_rate": 6.725052483903015e-06,
      "loss": 2.5065,
      "step": 46754
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0796632766723633,
      "learning_rate": 6.724663455966911e-06,
      "loss": 2.5273,
      "step": 46755
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0786529779434204,
      "learning_rate": 6.724274433583393e-06,
      "loss": 2.3415,
      "step": 46756
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1845837831497192,
      "learning_rate": 6.723885416753122e-06,
      "loss": 2.2355,
      "step": 46757
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.2577615976333618,
      "learning_rate": 6.723496405476753e-06,
      "loss": 2.4217,
      "step": 46758
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.5176024436950684,
      "learning_rate": 6.723107399754948e-06,
      "loss": 2.2283,
      "step": 46759
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0651836395263672,
      "learning_rate": 6.722718399588364e-06,
      "loss": 2.2651,
      "step": 46760
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0307121276855469,
      "learning_rate": 6.7223294049776636e-06,
      "loss": 2.28,
      "step": 46761
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1421749591827393,
      "learning_rate": 6.7219404159235045e-06,
      "loss": 2.3157,
      "step": 46762
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.024920105934143,
      "learning_rate": 6.721551432426547e-06,
      "loss": 2.3026,
      "step": 46763
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1232874393463135,
      "learning_rate": 6.72116245448745e-06,
      "loss": 2.4682,
      "step": 46764
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1310902833938599,
      "learning_rate": 6.720773482106873e-06,
      "loss": 2.3069,
      "step": 46765
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1569401025772095,
      "learning_rate": 6.720384515285476e-06,
      "loss": 2.2018,
      "step": 46766
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.078580617904663,
      "learning_rate": 6.719995554023917e-06,
      "loss": 2.2863,
      "step": 46767
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1175861358642578,
      "learning_rate": 6.719606598322857e-06,
      "loss": 2.2417,
      "step": 46768
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.067381739616394,
      "learning_rate": 6.719217648182955e-06,
      "loss": 2.3075,
      "step": 46769
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0481230020523071,
      "learning_rate": 6.718828703604873e-06,
      "loss": 2.1337,
      "step": 46770
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.9736687541007996,
      "learning_rate": 6.718439764589266e-06,
      "loss": 2.0851,
      "step": 46771
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.2025835514068604,
      "learning_rate": 6.7180508311367934e-06,
      "loss": 2.3699,
      "step": 46772
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0753248929977417,
      "learning_rate": 6.717661903248116e-06,
      "loss": 2.1975,
      "step": 46773
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1646915674209595,
      "learning_rate": 6.717272980923893e-06,
      "loss": 2.4609,
      "step": 46774
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0494180917739868,
      "learning_rate": 6.716884064164783e-06,
      "loss": 2.229,
      "step": 46775
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.069015383720398,
      "learning_rate": 6.716495152971449e-06,
      "loss": 2.383,
      "step": 46776
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0939322710037231,
      "learning_rate": 6.7161062473445445e-06,
      "loss": 2.235,
      "step": 46777
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0967042446136475,
      "learning_rate": 6.715717347284734e-06,
      "loss": 2.4299,
      "step": 46778
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.192484736442566,
      "learning_rate": 6.715328452792673e-06,
      "loss": 2.4326,
      "step": 46779
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.9903199076652527,
      "learning_rate": 6.714939563869025e-06,
      "loss": 2.4639,
      "step": 46780
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0900136232376099,
      "learning_rate": 6.7145506805144445e-06,
      "loss": 2.2494,
      "step": 46781
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1415441036224365,
      "learning_rate": 6.714161802729593e-06,
      "loss": 2.0252,
      "step": 46782
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.143479347229004,
      "learning_rate": 6.713772930515132e-06,
      "loss": 2.0903,
      "step": 46783
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1680784225463867,
      "learning_rate": 6.713384063871715e-06,
      "loss": 2.2768,
      "step": 46784
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.085018277168274,
      "learning_rate": 6.712995202800007e-06,
      "loss": 2.3122,
      "step": 46785
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0837135314941406,
      "learning_rate": 6.7126063473006655e-06,
      "loss": 2.3298,
      "step": 46786
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0997201204299927,
      "learning_rate": 6.712217497374347e-06,
      "loss": 2.5129,
      "step": 46787
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1325533390045166,
      "learning_rate": 6.711828653021712e-06,
      "loss": 2.1918,
      "step": 46788
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1152101755142212,
      "learning_rate": 6.7114398142434225e-06,
      "loss": 2.1486,
      "step": 46789
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1120606660842896,
      "learning_rate": 6.711050981040135e-06,
      "loss": 2.3344,
      "step": 46790
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.9685041904449463,
      "learning_rate": 6.710662153412509e-06,
      "loss": 2.1603,
      "step": 46791
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.095709204673767,
      "learning_rate": 6.7102733313612035e-06,
      "loss": 2.1626,
      "step": 46792
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1506644487380981,
      "learning_rate": 6.7098845148868785e-06,
      "loss": 2.4773,
      "step": 46793
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0350122451782227,
      "learning_rate": 6.7094957039901934e-06,
      "loss": 2.1745,
      "step": 46794
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1121400594711304,
      "learning_rate": 6.7091068986718065e-06,
      "loss": 2.3967,
      "step": 46795
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.206287145614624,
      "learning_rate": 6.708718098932381e-06,
      "loss": 2.3748,
      "step": 46796
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0590366125106812,
      "learning_rate": 6.708329304772569e-06,
      "loss": 2.4028,
      "step": 46797
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1013399362564087,
      "learning_rate": 6.707940516193031e-06,
      "loss": 2.3756,
      "step": 46798
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1392812728881836,
      "learning_rate": 6.70755173319443e-06,
      "loss": 2.2015,
      "step": 46799
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1142076253890991,
      "learning_rate": 6.707162955777422e-06,
      "loss": 2.2595,
      "step": 46800
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.9824573397636414,
      "learning_rate": 6.706774183942666e-06,
      "loss": 2.3296,
      "step": 46801
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0969542264938354,
      "learning_rate": 6.706385417690824e-06,
      "loss": 2.2528,
      "step": 46802
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0888171195983887,
      "learning_rate": 6.705996657022553e-06,
      "loss": 2.3541,
      "step": 46803
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.9939190745353699,
      "learning_rate": 6.705607901938511e-06,
      "loss": 2.129,
      "step": 46804
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0719255208969116,
      "learning_rate": 6.705219152439359e-06,
      "loss": 2.5328,
      "step": 46805
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.2091211080551147,
      "learning_rate": 6.704830408525757e-06,
      "loss": 2.3328,
      "step": 46806
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1474103927612305,
      "learning_rate": 6.70444167019836e-06,
      "loss": 2.3104,
      "step": 46807
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0959581136703491,
      "learning_rate": 6.704052937457834e-06,
      "loss": 2.3832,
      "step": 46808
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0358256101608276,
      "learning_rate": 6.70366421030483e-06,
      "loss": 2.2425,
      "step": 46809
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0708155632019043,
      "learning_rate": 6.703275488740012e-06,
      "loss": 2.3056,
      "step": 46810
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1781522035598755,
      "learning_rate": 6.702886772764035e-06,
      "loss": 2.2775,
      "step": 46811
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0175583362579346,
      "learning_rate": 6.702498062377563e-06,
      "loss": 2.4196,
      "step": 46812
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0501846075057983,
      "learning_rate": 6.70210935758125e-06,
      "loss": 2.3878,
      "step": 46813
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1677204370498657,
      "learning_rate": 6.70172065837576e-06,
      "loss": 2.4062,
      "step": 46814
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0780104398727417,
      "learning_rate": 6.701331964761747e-06,
      "loss": 2.261,
      "step": 46815
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.198970079421997,
      "learning_rate": 6.700943276739873e-06,
      "loss": 2.6227,
      "step": 46816
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.9752569794654846,
      "learning_rate": 6.700554594310795e-06,
      "loss": 2.1255,
      "step": 46817
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.5957388877868652,
      "learning_rate": 6.700165917475174e-06,
      "loss": 2.3853,
      "step": 46818
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.098197340965271,
      "learning_rate": 6.699777246233667e-06,
      "loss": 2.3692,
      "step": 46819
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1494815349578857,
      "learning_rate": 6.699388580586935e-06,
      "loss": 2.6966,
      "step": 46820
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0807743072509766,
      "learning_rate": 6.698999920535636e-06,
      "loss": 2.2631,
      "step": 46821
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0766173601150513,
      "learning_rate": 6.698611266080428e-06,
      "loss": 2.2747,
      "step": 46822
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0698086023330688,
      "learning_rate": 6.698222617221974e-06,
      "loss": 2.2912,
      "step": 46823
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.2226816415786743,
      "learning_rate": 6.697833973960927e-06,
      "loss": 2.2376,
      "step": 46824
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.12187922000885,
      "learning_rate": 6.697445336297946e-06,
      "loss": 2.3284,
      "step": 46825
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.005373239517212,
      "learning_rate": 6.697056704233694e-06,
      "loss": 2.2427,
      "step": 46826
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1038600206375122,
      "learning_rate": 6.696668077768827e-06,
      "loss": 2.2604,
      "step": 46827
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.2314200401306152,
      "learning_rate": 6.696279456904006e-06,
      "loss": 2.4491,
      "step": 46828
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.243281602859497,
      "learning_rate": 6.6958908416398875e-06,
      "loss": 2.3742,
      "step": 46829
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0878180265426636,
      "learning_rate": 6.695502231977131e-06,
      "loss": 2.3146,
      "step": 46830
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0139024257659912,
      "learning_rate": 6.6951136279163965e-06,
      "loss": 2.5905,
      "step": 46831
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0183844566345215,
      "learning_rate": 6.694725029458342e-06,
      "loss": 2.4165,
      "step": 46832
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.868099331855774,
      "learning_rate": 6.694336436603625e-06,
      "loss": 2.2728,
      "step": 46833
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.9659740328788757,
      "learning_rate": 6.6939478493529085e-06,
      "loss": 2.1383,
      "step": 46834
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.011884093284607,
      "learning_rate": 6.693559267706846e-06,
      "loss": 2.318,
      "step": 46835
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1155623197555542,
      "learning_rate": 6.6931706916661036e-06,
      "loss": 2.3651,
      "step": 46836
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.102380394935608,
      "learning_rate": 6.692782121231329e-06,
      "loss": 2.2572,
      "step": 46837
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0633870363235474,
      "learning_rate": 6.69239355640319e-06,
      "loss": 2.3534,
      "step": 46838
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.117567539215088,
      "learning_rate": 6.69200499718234e-06,
      "loss": 2.2575,
      "step": 46839
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.129212737083435,
      "learning_rate": 6.69161644356944e-06,
      "loss": 2.5364,
      "step": 46840
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.081517219543457,
      "learning_rate": 6.69122789556515e-06,
      "loss": 2.312,
      "step": 46841
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1799204349517822,
      "learning_rate": 6.6908393531701286e-06,
      "loss": 2.3862,
      "step": 46842
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0975673198699951,
      "learning_rate": 6.6904508163850316e-06,
      "loss": 2.607,
      "step": 46843
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.190109133720398,
      "learning_rate": 6.690062285210519e-06,
      "loss": 2.291,
      "step": 46844
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0962539911270142,
      "learning_rate": 6.689673759647249e-06,
      "loss": 2.4841,
      "step": 46845
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0572279691696167,
      "learning_rate": 6.689285239695882e-06,
      "loss": 2.4494,
      "step": 46846
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0210689306259155,
      "learning_rate": 6.6888967253570755e-06,
      "loss": 2.2408,
      "step": 46847
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.9859957098960876,
      "learning_rate": 6.688508216631488e-06,
      "loss": 2.3718,
      "step": 46848
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.9691382646560669,
      "learning_rate": 6.688119713519784e-06,
      "loss": 1.9441,
      "step": 46849
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0398647785186768,
      "learning_rate": 6.687731216022611e-06,
      "loss": 2.402,
      "step": 46850
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1674199104309082,
      "learning_rate": 6.6873427241406345e-06,
      "loss": 2.4534,
      "step": 46851
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.9690907597541809,
      "learning_rate": 6.68695423787451e-06,
      "loss": 2.374,
      "step": 46852
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.080367088317871,
      "learning_rate": 6.686565757224899e-06,
      "loss": 2.239,
      "step": 46853
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1550790071487427,
      "learning_rate": 6.686177282192459e-06,
      "loss": 2.2568,
      "step": 46854
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0580880641937256,
      "learning_rate": 6.685788812777848e-06,
      "loss": 2.2403,
      "step": 46855
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.205336570739746,
      "learning_rate": 6.685400348981725e-06,
      "loss": 2.2835,
      "step": 46856
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.137787103652954,
      "learning_rate": 6.685011890804749e-06,
      "loss": 2.3462,
      "step": 46857
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1408473253250122,
      "learning_rate": 6.684623438247579e-06,
      "loss": 2.2767,
      "step": 46858
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1703698635101318,
      "learning_rate": 6.684234991310872e-06,
      "loss": 2.2684,
      "step": 46859
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0579510927200317,
      "learning_rate": 6.683846549995286e-06,
      "loss": 2.3777,
      "step": 46860
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.113237977027893,
      "learning_rate": 6.683458114301482e-06,
      "loss": 2.4295,
      "step": 46861
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.063625693321228,
      "learning_rate": 6.683069684230121e-06,
      "loss": 2.0834,
      "step": 46862
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1999908685684204,
      "learning_rate": 6.682681259781855e-06,
      "loss": 2.2284,
      "step": 46863
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.9656467437744141,
      "learning_rate": 6.682292840957344e-06,
      "loss": 2.1826,
      "step": 46864
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.136906385421753,
      "learning_rate": 6.681904427757248e-06,
      "loss": 2.298,
      "step": 46865
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0515003204345703,
      "learning_rate": 6.681516020182225e-06,
      "loss": 2.3535,
      "step": 46866
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0583515167236328,
      "learning_rate": 6.6811276182329335e-06,
      "loss": 2.4591,
      "step": 46867
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.2639000415802002,
      "learning_rate": 6.680739221910032e-06,
      "loss": 2.5494,
      "step": 46868
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1303315162658691,
      "learning_rate": 6.6803508312141796e-06,
      "loss": 2.2816,
      "step": 46869
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0620179176330566,
      "learning_rate": 6.679962446146035e-06,
      "loss": 2.244,
      "step": 46870
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0070596933364868,
      "learning_rate": 6.679574066706256e-06,
      "loss": 2.2491,
      "step": 46871
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0697417259216309,
      "learning_rate": 6.679185692895498e-06,
      "loss": 2.2836,
      "step": 46872
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1019883155822754,
      "learning_rate": 6.678797324714424e-06,
      "loss": 2.4572,
      "step": 46873
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.3451110124588013,
      "learning_rate": 6.67840896216369e-06,
      "loss": 2.3785,
      "step": 46874
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.072363018989563,
      "learning_rate": 6.678020605243955e-06,
      "loss": 2.3933,
      "step": 46875
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0537289381027222,
      "learning_rate": 6.6776322539558795e-06,
      "loss": 2.1854,
      "step": 46876
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1884303092956543,
      "learning_rate": 6.67724390830012e-06,
      "loss": 2.186,
      "step": 46877
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.9534666538238525,
      "learning_rate": 6.6768555682773314e-06,
      "loss": 2.2027,
      "step": 46878
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.9697257280349731,
      "learning_rate": 6.676467233888176e-06,
      "loss": 2.2039,
      "step": 46879
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1250362396240234,
      "learning_rate": 6.6760789051333115e-06,
      "loss": 2.4402,
      "step": 46880
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1434921026229858,
      "learning_rate": 6.675690582013395e-06,
      "loss": 2.4199,
      "step": 46881
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1118218898773193,
      "learning_rate": 6.6753022645290864e-06,
      "loss": 2.2689,
      "step": 46882
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.166279673576355,
      "learning_rate": 6.674913952681043e-06,
      "loss": 2.3571,
      "step": 46883
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1365398168563843,
      "learning_rate": 6.674525646469925e-06,
      "loss": 2.4855,
      "step": 46884
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0526787042617798,
      "learning_rate": 6.6741373458963875e-06,
      "loss": 2.3431,
      "step": 46885
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0122227668762207,
      "learning_rate": 6.673749050961091e-06,
      "loss": 2.2639,
      "step": 46886
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1011749505996704,
      "learning_rate": 6.6733607616646945e-06,
      "loss": 2.2099,
      "step": 46887
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1620361804962158,
      "learning_rate": 6.6729724780078535e-06,
      "loss": 2.3309,
      "step": 46888
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.3122673034667969,
      "learning_rate": 6.672584199991232e-06,
      "loss": 2.336,
      "step": 46889
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0353482961654663,
      "learning_rate": 6.672195927615482e-06,
      "loss": 2.4091,
      "step": 46890
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1150635480880737,
      "learning_rate": 6.671807660881261e-06,
      "loss": 2.1513,
      "step": 46891
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0494574308395386,
      "learning_rate": 6.6714193997892305e-06,
      "loss": 2.2144,
      "step": 46892
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.14737069606781,
      "learning_rate": 6.671031144340049e-06,
      "loss": 2.1918,
      "step": 46893
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1384029388427734,
      "learning_rate": 6.670642894534374e-06,
      "loss": 2.4953,
      "step": 46894
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1701749563217163,
      "learning_rate": 6.670254650372863e-06,
      "loss": 2.4163,
      "step": 46895
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1284195184707642,
      "learning_rate": 6.669866411856174e-06,
      "loss": 2.3352,
      "step": 46896
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1659914255142212,
      "learning_rate": 6.669478178984967e-06,
      "loss": 2.5128,
      "step": 46897
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.3098872900009155,
      "learning_rate": 6.669089951759899e-06,
      "loss": 2.4701,
      "step": 46898
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0470129251480103,
      "learning_rate": 6.668701730181628e-06,
      "loss": 2.4825,
      "step": 46899
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.2168807983398438,
      "learning_rate": 6.668313514250812e-06,
      "loss": 2.3647,
      "step": 46900
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.23212730884552,
      "learning_rate": 6.6679253039681105e-06,
      "loss": 2.581,
      "step": 46901
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0381823778152466,
      "learning_rate": 6.667537099334182e-06,
      "loss": 2.3739,
      "step": 46902
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1894640922546387,
      "learning_rate": 6.667148900349681e-06,
      "loss": 2.2465,
      "step": 46903
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1125582456588745,
      "learning_rate": 6.66676070701527e-06,
      "loss": 2.1812,
      "step": 46904
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1430143117904663,
      "learning_rate": 6.666372519331602e-06,
      "loss": 2.3047,
      "step": 46905
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.006190299987793,
      "learning_rate": 6.6659843372993385e-06,
      "loss": 2.4575,
      "step": 46906
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.7498400211334229,
      "learning_rate": 6.6655961609191364e-06,
      "loss": 2.4561,
      "step": 46907
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0650733709335327,
      "learning_rate": 6.665207990191655e-06,
      "loss": 2.2996,
      "step": 46908
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0103508234024048,
      "learning_rate": 6.6648198251175524e-06,
      "loss": 2.4523,
      "step": 46909
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.059289813041687,
      "learning_rate": 6.664431665697486e-06,
      "loss": 2.3626,
      "step": 46910
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0915528535842896,
      "learning_rate": 6.664043511932112e-06,
      "loss": 2.4814,
      "step": 46911
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0715280771255493,
      "learning_rate": 6.663655363822092e-06,
      "loss": 2.3085,
      "step": 46912
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1224067211151123,
      "learning_rate": 6.663267221368083e-06,
      "loss": 2.4848,
      "step": 46913
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.9821698069572449,
      "learning_rate": 6.662879084570741e-06,
      "loss": 2.2757,
      "step": 46914
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0953725576400757,
      "learning_rate": 6.662490953430729e-06,
      "loss": 2.6135,
      "step": 46915
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0808804035186768,
      "learning_rate": 6.6621028279486976e-06,
      "loss": 2.2503,
      "step": 46916
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.2124541997909546,
      "learning_rate": 6.66171470812531e-06,
      "loss": 2.3918,
      "step": 46917
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0696028470993042,
      "learning_rate": 6.661326593961222e-06,
      "loss": 2.4483,
      "step": 46918
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0638269186019897,
      "learning_rate": 6.66093848545709e-06,
      "loss": 2.3827,
      "step": 46919
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.116725206375122,
      "learning_rate": 6.6605503826135754e-06,
      "loss": 2.2357,
      "step": 46920
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1288022994995117,
      "learning_rate": 6.660162285431335e-06,
      "loss": 2.3521,
      "step": 46921
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1240085363388062,
      "learning_rate": 6.659774193911028e-06,
      "loss": 2.1936,
      "step": 46922
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.012434959411621,
      "learning_rate": 6.65938610805331e-06,
      "loss": 2.5594,
      "step": 46923
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1570630073547363,
      "learning_rate": 6.658998027858839e-06,
      "loss": 2.1685,
      "step": 46924
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1648999452590942,
      "learning_rate": 6.6586099533282745e-06,
      "loss": 2.3384,
      "step": 46925
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1324262619018555,
      "learning_rate": 6.658221884462274e-06,
      "loss": 2.3705,
      "step": 46926
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.092547059059143,
      "learning_rate": 6.657833821261496e-06,
      "loss": 2.2981,
      "step": 46927
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.2096917629241943,
      "learning_rate": 6.657445763726598e-06,
      "loss": 2.2661,
      "step": 46928
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0620540380477905,
      "learning_rate": 6.657057711858235e-06,
      "loss": 2.2923,
      "step": 46929
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0201042890548706,
      "learning_rate": 6.656669665657068e-06,
      "loss": 2.2501,
      "step": 46930
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.9695349931716919,
      "learning_rate": 6.656281625123753e-06,
      "loss": 2.2117,
      "step": 46931
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1212329864501953,
      "learning_rate": 6.655893590258951e-06,
      "loss": 2.2342,
      "step": 46932
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1281096935272217,
      "learning_rate": 6.655505561063314e-06,
      "loss": 2.4597,
      "step": 46933
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1314994096755981,
      "learning_rate": 6.6551175375375055e-06,
      "loss": 2.4899,
      "step": 46934
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.2219462394714355,
      "learning_rate": 6.654729519682182e-06,
      "loss": 2.1656,
      "step": 46935
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1226367950439453,
      "learning_rate": 6.6543415074979985e-06,
      "loss": 2.3862,
      "step": 46936
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0573641061782837,
      "learning_rate": 6.653953500985615e-06,
      "loss": 2.3756,
      "step": 46937
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1385236978530884,
      "learning_rate": 6.6535655001456904e-06,
      "loss": 2.2665,
      "step": 46938
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.9984991550445557,
      "learning_rate": 6.65317750497888e-06,
      "loss": 2.4599,
      "step": 46939
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.3350794315338135,
      "learning_rate": 6.652789515485842e-06,
      "loss": 2.4498,
      "step": 46940
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1286247968673706,
      "learning_rate": 6.652401531667241e-06,
      "loss": 2.1483,
      "step": 46941
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0541235208511353,
      "learning_rate": 6.652013553523724e-06,
      "loss": 2.1463,
      "step": 46942
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.114946722984314,
      "learning_rate": 6.651625581055953e-06,
      "loss": 2.1836,
      "step": 46943
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0729187726974487,
      "learning_rate": 6.651237614264585e-06,
      "loss": 2.2136,
      "step": 46944
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.403085470199585,
      "learning_rate": 6.65084965315028e-06,
      "loss": 2.3279,
      "step": 46945
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0843636989593506,
      "learning_rate": 6.650461697713695e-06,
      "loss": 2.3277,
      "step": 46946
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0533841848373413,
      "learning_rate": 6.650073747955485e-06,
      "loss": 2.3276,
      "step": 46947
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.155177354812622,
      "learning_rate": 6.649685803876311e-06,
      "loss": 2.3803,
      "step": 46948
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.2229348421096802,
      "learning_rate": 6.6492978654768315e-06,
      "loss": 2.4753,
      "step": 46949
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.064002513885498,
      "learning_rate": 6.648909932757699e-06,
      "loss": 2.2878,
      "step": 46950
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.9947733879089355,
      "learning_rate": 6.648522005719576e-06,
      "loss": 2.3661,
      "step": 46951
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0089951753616333,
      "learning_rate": 6.648134084363118e-06,
      "loss": 2.3198,
      "step": 46952
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.035606026649475,
      "learning_rate": 6.647746168688983e-06,
      "loss": 2.3266,
      "step": 46953
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1278318166732788,
      "learning_rate": 6.6473582586978316e-06,
      "loss": 2.5224,
      "step": 46954
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.9794653654098511,
      "learning_rate": 6.646970354390318e-06,
      "loss": 2.2633,
      "step": 46955
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1325418949127197,
      "learning_rate": 6.646582455767098e-06,
      "loss": 2.3498,
      "step": 46956
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0459357500076294,
      "learning_rate": 6.6461945628288314e-06,
      "loss": 2.3261,
      "step": 46957
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1335065364837646,
      "learning_rate": 6.645806675576176e-06,
      "loss": 2.4066,
      "step": 46958
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1559839248657227,
      "learning_rate": 6.645418794009789e-06,
      "loss": 2.4454,
      "step": 46959
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.123788595199585,
      "learning_rate": 6.645030918130331e-06,
      "loss": 2.4013,
      "step": 46960
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1429725885391235,
      "learning_rate": 6.644643047938455e-06,
      "loss": 2.1604,
      "step": 46961
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.106687307357788,
      "learning_rate": 6.644255183434819e-06,
      "loss": 2.1782,
      "step": 46962
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1170332431793213,
      "learning_rate": 6.643867324620083e-06,
      "loss": 2.3612,
      "step": 46963
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0207442045211792,
      "learning_rate": 6.6434794714949025e-06,
      "loss": 2.6203,
      "step": 46964
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1206386089324951,
      "learning_rate": 6.643091624059936e-06,
      "loss": 2.44,
      "step": 46965
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.034156084060669,
      "learning_rate": 6.642703782315843e-06,
      "loss": 2.1104,
      "step": 46966
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0883384943008423,
      "learning_rate": 6.6423159462632756e-06,
      "loss": 2.2031,
      "step": 46967
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.9525402784347534,
      "learning_rate": 6.6419281159029e-06,
      "loss": 2.2211,
      "step": 46968
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.096253752708435,
      "learning_rate": 6.6415402912353655e-06,
      "loss": 2.4377,
      "step": 46969
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1556806564331055,
      "learning_rate": 6.641152472261332e-06,
      "loss": 2.2913,
      "step": 46970
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.9811590313911438,
      "learning_rate": 6.640764658981456e-06,
      "loss": 2.3114,
      "step": 46971
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1990998983383179,
      "learning_rate": 6.640376851396397e-06,
      "loss": 2.3675,
      "step": 46972
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.153138518333435,
      "learning_rate": 6.639989049506812e-06,
      "loss": 2.4058,
      "step": 46973
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0857224464416504,
      "learning_rate": 6.639601253313357e-06,
      "loss": 2.4491,
      "step": 46974
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0475709438323975,
      "learning_rate": 6.639213462816691e-06,
      "loss": 2.2916,
      "step": 46975
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.2399815320968628,
      "learning_rate": 6.638825678017472e-06,
      "loss": 2.3317,
      "step": 46976
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0360090732574463,
      "learning_rate": 6.638437898916356e-06,
      "loss": 2.4053,
      "step": 46977
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0956661701202393,
      "learning_rate": 6.638050125513999e-06,
      "loss": 2.2838,
      "step": 46978
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0599905252456665,
      "learning_rate": 6.6376623578110626e-06,
      "loss": 2.3029,
      "step": 46979
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1287890672683716,
      "learning_rate": 6.637274595808201e-06,
      "loss": 2.4529,
      "step": 46980
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.120357632637024,
      "learning_rate": 6.636886839506076e-06,
      "loss": 2.31,
      "step": 46981
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.2189719676971436,
      "learning_rate": 6.636499088905337e-06,
      "loss": 2.4063,
      "step": 46982
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1252481937408447,
      "learning_rate": 6.636111344006646e-06,
      "loss": 2.1984,
      "step": 46983
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1387649774551392,
      "learning_rate": 6.635723604810659e-06,
      "loss": 2.3297,
      "step": 46984
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0416673421859741,
      "learning_rate": 6.635335871318034e-06,
      "loss": 2.4341,
      "step": 46985
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1705350875854492,
      "learning_rate": 6.634948143529429e-06,
      "loss": 2.3305,
      "step": 46986
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0978854894638062,
      "learning_rate": 6.6345604214455015e-06,
      "loss": 2.3809,
      "step": 46987
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.040935754776001,
      "learning_rate": 6.63417270506691e-06,
      "loss": 2.4612,
      "step": 46988
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0855928659439087,
      "learning_rate": 6.633784994394308e-06,
      "loss": 2.3376,
      "step": 46989
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0018316507339478,
      "learning_rate": 6.6333972894283536e-06,
      "loss": 2.4588,
      "step": 46990
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0193274021148682,
      "learning_rate": 6.6330095901697066e-06,
      "loss": 2.4852,
      "step": 46991
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.957812488079071,
      "learning_rate": 6.632621896619022e-06,
      "loss": 1.997,
      "step": 46992
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0257066488265991,
      "learning_rate": 6.632234208776957e-06,
      "loss": 2.318,
      "step": 46993
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1015366315841675,
      "learning_rate": 6.631846526644175e-06,
      "loss": 2.4742,
      "step": 46994
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1283657550811768,
      "learning_rate": 6.631458850221324e-06,
      "loss": 2.5569,
      "step": 46995
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1823852062225342,
      "learning_rate": 6.631071179509066e-06,
      "loss": 2.3363,
      "step": 46996
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.9468178749084473,
      "learning_rate": 6.630683514508055e-06,
      "loss": 2.343,
      "step": 46997
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0210028886795044,
      "learning_rate": 6.630295855218951e-06,
      "loss": 2.3757,
      "step": 46998
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.347537875175476,
      "learning_rate": 6.629908201642412e-06,
      "loss": 2.2398,
      "step": 46999
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.2271605730056763,
      "learning_rate": 6.629520553779094e-06,
      "loss": 2.3917,
      "step": 47000
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.2809112071990967,
      "learning_rate": 6.629132911629652e-06,
      "loss": 2.2115,
      "step": 47001
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0726370811462402,
      "learning_rate": 6.628745275194748e-06,
      "loss": 2.3626,
      "step": 47002
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0450186729431152,
      "learning_rate": 6.628357644475034e-06,
      "loss": 2.3612,
      "step": 47003
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1471909284591675,
      "learning_rate": 6.62797001947117e-06,
      "loss": 2.3375,
      "step": 47004
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0614798069000244,
      "learning_rate": 6.627582400183814e-06,
      "loss": 2.1766,
      "step": 47005
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.132041573524475,
      "learning_rate": 6.62719478661362e-06,
      "loss": 2.3814,
      "step": 47006
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0945340394973755,
      "learning_rate": 6.626807178761252e-06,
      "loss": 2.3535,
      "step": 47007
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.2940642833709717,
      "learning_rate": 6.626419576627358e-06,
      "loss": 2.4153,
      "step": 47008
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0785521268844604,
      "learning_rate": 6.626031980212599e-06,
      "loss": 2.1205,
      "step": 47009
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.3575085401535034,
      "learning_rate": 6.625644389517632e-06,
      "loss": 2.0876,
      "step": 47010
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0426595211029053,
      "learning_rate": 6.6252568045431145e-06,
      "loss": 2.3111,
      "step": 47011
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0855613946914673,
      "learning_rate": 6.624869225289702e-06,
      "loss": 2.3618,
      "step": 47012
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1403831243515015,
      "learning_rate": 6.624481651758053e-06,
      "loss": 2.3813,
      "step": 47013
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1578466892242432,
      "learning_rate": 6.624094083948826e-06,
      "loss": 2.5901,
      "step": 47014
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.03363835811615,
      "learning_rate": 6.623706521862675e-06,
      "loss": 2.2516,
      "step": 47015
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.420180320739746,
      "learning_rate": 6.623318965500261e-06,
      "loss": 2.2535,
      "step": 47016
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.151113510131836,
      "learning_rate": 6.622931414862237e-06,
      "loss": 2.3005,
      "step": 47017
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0959677696228027,
      "learning_rate": 6.622543869949261e-06,
      "loss": 2.1693,
      "step": 47018
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.199973464012146,
      "learning_rate": 6.6221563307619905e-06,
      "loss": 2.2402,
      "step": 47019
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.240727424621582,
      "learning_rate": 6.621768797301081e-06,
      "loss": 2.2791,
      "step": 47020
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0656324625015259,
      "learning_rate": 6.621381269567196e-06,
      "loss": 2.3274,
      "step": 47021
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0676517486572266,
      "learning_rate": 6.620993747560984e-06,
      "loss": 2.0618,
      "step": 47022
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.148472547531128,
      "learning_rate": 6.6206062312831055e-06,
      "loss": 2.3507,
      "step": 47023
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.185487985610962,
      "learning_rate": 6.6202187207342165e-06,
      "loss": 2.425,
      "step": 47024
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1632490158081055,
      "learning_rate": 6.619831215914974e-06,
      "loss": 2.2274,
      "step": 47025
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0331391096115112,
      "learning_rate": 6.619443716826037e-06,
      "loss": 2.5785,
      "step": 47026
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.2059301137924194,
      "learning_rate": 6.619056223468061e-06,
      "loss": 2.5582,
      "step": 47027
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0710361003875732,
      "learning_rate": 6.618668735841702e-06,
      "loss": 2.3785,
      "step": 47028
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.246775507926941,
      "learning_rate": 6.618281253947618e-06,
      "loss": 2.4272,
      "step": 47029
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.019155502319336,
      "learning_rate": 6.617893777786466e-06,
      "loss": 2.1728,
      "step": 47030
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0932854413986206,
      "learning_rate": 6.617506307358903e-06,
      "loss": 2.2463,
      "step": 47031
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0893356800079346,
      "learning_rate": 6.617118842665585e-06,
      "loss": 2.1888,
      "step": 47032
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0197722911834717,
      "learning_rate": 6.616731383707169e-06,
      "loss": 2.3523,
      "step": 47033
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1148245334625244,
      "learning_rate": 6.616343930484317e-06,
      "loss": 2.3081,
      "step": 47034
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0025866031646729,
      "learning_rate": 6.615956482997676e-06,
      "loss": 2.2655,
      "step": 47035
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1746972799301147,
      "learning_rate": 6.615569041247909e-06,
      "loss": 2.2479,
      "step": 47036
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.07318115234375,
      "learning_rate": 6.615181605235672e-06,
      "loss": 2.2986,
      "step": 47037
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1026757955551147,
      "learning_rate": 6.61479417496162e-06,
      "loss": 2.2211,
      "step": 47038
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0886647701263428,
      "learning_rate": 6.614406750426412e-06,
      "loss": 2.3208,
      "step": 47039
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1688593626022339,
      "learning_rate": 6.614019331630705e-06,
      "loss": 2.3383,
      "step": 47040
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.051196575164795,
      "learning_rate": 6.6136319185751545e-06,
      "loss": 2.4637,
      "step": 47041
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.043341875076294,
      "learning_rate": 6.613244511260416e-06,
      "loss": 2.35,
      "step": 47042
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0781375169754028,
      "learning_rate": 6.612857109687149e-06,
      "loss": 2.1988,
      "step": 47043
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.017250895500183,
      "learning_rate": 6.612469713856011e-06,
      "loss": 2.4388,
      "step": 47044
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0623655319213867,
      "learning_rate": 6.612082323767654e-06,
      "loss": 2.3711,
      "step": 47045
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.2839833498001099,
      "learning_rate": 6.611694939422739e-06,
      "loss": 2.3221,
      "step": 47046
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0264469385147095,
      "learning_rate": 6.6113075608219235e-06,
      "loss": 2.2301,
      "step": 47047
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0593998432159424,
      "learning_rate": 6.610920187965859e-06,
      "loss": 2.4063,
      "step": 47048
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.158547043800354,
      "learning_rate": 6.610532820855206e-06,
      "loss": 2.399,
      "step": 47049
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0203746557235718,
      "learning_rate": 6.610145459490622e-06,
      "loss": 2.4402,
      "step": 47050
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.236200213432312,
      "learning_rate": 6.60975810387276e-06,
      "loss": 2.2764,
      "step": 47051
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.2024455070495605,
      "learning_rate": 6.609370754002279e-06,
      "loss": 2.3545,
      "step": 47052
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1023839712142944,
      "learning_rate": 6.608983409879835e-06,
      "loss": 2.3546,
      "step": 47053
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0302599668502808,
      "learning_rate": 6.608596071506086e-06,
      "loss": 2.2081,
      "step": 47054
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1333949565887451,
      "learning_rate": 6.6082087388816874e-06,
      "loss": 2.2408,
      "step": 47055
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.068425178527832,
      "learning_rate": 6.607821412007296e-06,
      "loss": 2.2592,
      "step": 47056
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.150959849357605,
      "learning_rate": 6.607434090883569e-06,
      "loss": 2.3454,
      "step": 47057
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0264654159545898,
      "learning_rate": 6.607046775511161e-06,
      "loss": 2.3074,
      "step": 47058
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0871055126190186,
      "learning_rate": 6.606659465890733e-06,
      "loss": 2.2314,
      "step": 47059
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.2049533128738403,
      "learning_rate": 6.606272162022941e-06,
      "loss": 2.4399,
      "step": 47060
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.167670488357544,
      "learning_rate": 6.6058848639084365e-06,
      "loss": 2.3049,
      "step": 47061
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0200837850570679,
      "learning_rate": 6.605497571547877e-06,
      "loss": 2.3579,
      "step": 47062
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0720548629760742,
      "learning_rate": 6.605110284941923e-06,
      "loss": 2.3674,
      "step": 47063
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0579732656478882,
      "learning_rate": 6.604723004091228e-06,
      "loss": 2.5416,
      "step": 47064
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1174534559249878,
      "learning_rate": 6.604335728996451e-06,
      "loss": 2.3873,
      "step": 47065
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.145296573638916,
      "learning_rate": 6.603948459658245e-06,
      "loss": 2.0709,
      "step": 47066
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.9538428783416748,
      "learning_rate": 6.603561196077271e-06,
      "loss": 2.1533,
      "step": 47067
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0703928470611572,
      "learning_rate": 6.603173938254182e-06,
      "loss": 2.2297,
      "step": 47068
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1757285594940186,
      "learning_rate": 6.602786686189635e-06,
      "loss": 2.2577,
      "step": 47069
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0687669515609741,
      "learning_rate": 6.602399439884289e-06,
      "loss": 2.3865,
      "step": 47070
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0221186876296997,
      "learning_rate": 6.6020121993387975e-06,
      "loss": 2.2304,
      "step": 47071
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.2393208742141724,
      "learning_rate": 6.6016249645538214e-06,
      "loss": 2.3911,
      "step": 47072
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.146026611328125,
      "learning_rate": 6.601237735530013e-06,
      "loss": 2.4124,
      "step": 47073
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1492085456848145,
      "learning_rate": 6.600850512268028e-06,
      "loss": 2.2465,
      "step": 47074
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.3049348592758179,
      "learning_rate": 6.600463294768525e-06,
      "loss": 2.115,
      "step": 47075
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0238107442855835,
      "learning_rate": 6.600076083032159e-06,
      "loss": 2.4476,
      "step": 47076
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.137877106666565,
      "learning_rate": 6.599688877059587e-06,
      "loss": 2.3676,
      "step": 47077
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.9904465079307556,
      "learning_rate": 6.599301676851469e-06,
      "loss": 2.3166,
      "step": 47078
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.9839078783988953,
      "learning_rate": 6.5989144824084565e-06,
      "loss": 2.4226,
      "step": 47079
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.2467210292816162,
      "learning_rate": 6.598527293731207e-06,
      "loss": 2.3076,
      "step": 47080
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.188502550125122,
      "learning_rate": 6.598140110820378e-06,
      "loss": 2.4583,
      "step": 47081
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1243107318878174,
      "learning_rate": 6.597752933676625e-06,
      "loss": 2.111,
      "step": 47082
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1617432832717896,
      "learning_rate": 6.597365762300605e-06,
      "loss": 2.3067,
      "step": 47083
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.068490982055664,
      "learning_rate": 6.596978596692976e-06,
      "loss": 2.1222,
      "step": 47084
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.2329566478729248,
      "learning_rate": 6.596591436854389e-06,
      "loss": 2.4236,
      "step": 47085
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0868639945983887,
      "learning_rate": 6.59620428278551e-06,
      "loss": 2.2943,
      "step": 47086
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.9264861345291138,
      "learning_rate": 6.595817134486986e-06,
      "loss": 2.3481,
      "step": 47087
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.2409918308258057,
      "learning_rate": 6.5954299919594745e-06,
      "loss": 2.3684,
      "step": 47088
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0688673257827759,
      "learning_rate": 6.595042855203635e-06,
      "loss": 2.2108,
      "step": 47089
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0277721881866455,
      "learning_rate": 6.594655724220122e-06,
      "loss": 2.273,
      "step": 47090
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0827679634094238,
      "learning_rate": 6.5942685990095925e-06,
      "loss": 2.4198,
      "step": 47091
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1780012845993042,
      "learning_rate": 6.5938814795727035e-06,
      "loss": 2.332,
      "step": 47092
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1043336391448975,
      "learning_rate": 6.59349436591011e-06,
      "loss": 2.246,
      "step": 47093
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0751944780349731,
      "learning_rate": 6.593107258022469e-06,
      "loss": 2.4571,
      "step": 47094
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.9963579177856445,
      "learning_rate": 6.592720155910436e-06,
      "loss": 2.4018,
      "step": 47095
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.040252447128296,
      "learning_rate": 6.592333059574667e-06,
      "loss": 2.4251,
      "step": 47096
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0853711366653442,
      "learning_rate": 6.591945969015821e-06,
      "loss": 2.3116,
      "step": 47097
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0944732427597046,
      "learning_rate": 6.59155888423455e-06,
      "loss": 2.2787,
      "step": 47098
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.674208402633667,
      "learning_rate": 6.591171805231514e-06,
      "loss": 2.3871,
      "step": 47099
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0087438821792603,
      "learning_rate": 6.5907847320073715e-06,
      "loss": 2.473,
      "step": 47100
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0488841533660889,
      "learning_rate": 6.590397664562771e-06,
      "loss": 2.1546,
      "step": 47101
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1369073390960693,
      "learning_rate": 6.590010602898371e-06,
      "loss": 2.2703,
      "step": 47102
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0984368324279785,
      "learning_rate": 6.58962354701483e-06,
      "loss": 2.2972,
      "step": 47103
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1459928750991821,
      "learning_rate": 6.589236496912804e-06,
      "loss": 2.3499,
      "step": 47104
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.9396252632141113,
      "learning_rate": 6.588849452592948e-06,
      "loss": 2.2051,
      "step": 47105
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0332586765289307,
      "learning_rate": 6.58846241405592e-06,
      "loss": 2.3609,
      "step": 47106
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0705183744430542,
      "learning_rate": 6.588075381302375e-06,
      "loss": 2.3628,
      "step": 47107
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.9895192384719849,
      "learning_rate": 6.5876883543329665e-06,
      "loss": 2.1697,
      "step": 47108
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0725446939468384,
      "learning_rate": 6.587301333148354e-06,
      "loss": 2.2396,
      "step": 47109
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.149850845336914,
      "learning_rate": 6.586914317749191e-06,
      "loss": 2.3618,
      "step": 47110
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0474357604980469,
      "learning_rate": 6.586527308136137e-06,
      "loss": 2.3954,
      "step": 47111
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.044892430305481,
      "learning_rate": 6.586140304309847e-06,
      "loss": 2.3682,
      "step": 47112
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.037153959274292,
      "learning_rate": 6.585753306270979e-06,
      "loss": 2.5076,
      "step": 47113
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1813451051712036,
      "learning_rate": 6.585366314020183e-06,
      "loss": 2.3924,
      "step": 47114
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.2083561420440674,
      "learning_rate": 6.584979327558117e-06,
      "loss": 2.3058,
      "step": 47115
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1155613660812378,
      "learning_rate": 6.584592346885441e-06,
      "loss": 2.4689,
      "step": 47116
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.096350908279419,
      "learning_rate": 6.584205372002806e-06,
      "loss": 2.4651,
      "step": 47117
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.9883655309677124,
      "learning_rate": 6.583818402910873e-06,
      "loss": 2.1762,
      "step": 47118
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.9312747716903687,
      "learning_rate": 6.583431439610295e-06,
      "loss": 2.2708,
      "step": 47119
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.9669069647789001,
      "learning_rate": 6.583044482101728e-06,
      "loss": 2.4236,
      "step": 47120
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0431185960769653,
      "learning_rate": 6.582657530385827e-06,
      "loss": 2.2869,
      "step": 47121
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0479998588562012,
      "learning_rate": 6.582270584463253e-06,
      "loss": 2.214,
      "step": 47122
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1093076467514038,
      "learning_rate": 6.581883644334657e-06,
      "loss": 2.2655,
      "step": 47123
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1882712841033936,
      "learning_rate": 6.581496710000697e-06,
      "loss": 2.4324,
      "step": 47124
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0473521947860718,
      "learning_rate": 6.581109781462027e-06,
      "loss": 2.1778,
      "step": 47125
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0744842290878296,
      "learning_rate": 6.580722858719311e-06,
      "loss": 2.2537,
      "step": 47126
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0685899257659912,
      "learning_rate": 6.580335941773193e-06,
      "loss": 2.3128,
      "step": 47127
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1297072172164917,
      "learning_rate": 6.579949030624334e-06,
      "loss": 2.5118,
      "step": 47128
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.9669724702835083,
      "learning_rate": 6.579562125273391e-06,
      "loss": 2.3449,
      "step": 47129
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0781561136245728,
      "learning_rate": 6.579175225721018e-06,
      "loss": 2.2224,
      "step": 47130
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.175583839416504,
      "learning_rate": 6.578788331967874e-06,
      "loss": 2.2767,
      "step": 47131
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0154927968978882,
      "learning_rate": 6.578401444014611e-06,
      "loss": 2.4144,
      "step": 47132
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.304207444190979,
      "learning_rate": 6.5780145618618875e-06,
      "loss": 2.3873,
      "step": 47133
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.2909080982208252,
      "learning_rate": 6.577627685510362e-06,
      "loss": 2.4847,
      "step": 47134
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.9647356867790222,
      "learning_rate": 6.577240814960684e-06,
      "loss": 2.258,
      "step": 47135
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0246983766555786,
      "learning_rate": 6.576853950213512e-06,
      "loss": 2.2945,
      "step": 47136
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0284483432769775,
      "learning_rate": 6.576467091269503e-06,
      "loss": 2.2216,
      "step": 47137
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.9916976690292358,
      "learning_rate": 6.576080238129312e-06,
      "loss": 2.3375,
      "step": 47138
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.9691698551177979,
      "learning_rate": 6.575693390793596e-06,
      "loss": 2.2369,
      "step": 47139
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1493028402328491,
      "learning_rate": 6.575306549263011e-06,
      "loss": 2.3436,
      "step": 47140
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1159957647323608,
      "learning_rate": 6.574919713538208e-06,
      "loss": 2.5081,
      "step": 47141
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.120579719543457,
      "learning_rate": 6.574532883619849e-06,
      "loss": 2.3865,
      "step": 47142
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0492182970046997,
      "learning_rate": 6.574146059508585e-06,
      "loss": 2.2994,
      "step": 47143
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.400314211845398,
      "learning_rate": 6.573759241205073e-06,
      "loss": 2.3765,
      "step": 47144
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1238726377487183,
      "learning_rate": 6.573372428709972e-06,
      "loss": 2.4297,
      "step": 47145
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.2123308181762695,
      "learning_rate": 6.572985622023934e-06,
      "loss": 2.332,
      "step": 47146
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1174594163894653,
      "learning_rate": 6.572598821147617e-06,
      "loss": 2.2603,
      "step": 47147
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.153549313545227,
      "learning_rate": 6.572212026081675e-06,
      "loss": 2.238,
      "step": 47148
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1201865673065186,
      "learning_rate": 6.571825236826765e-06,
      "loss": 2.2533,
      "step": 47149
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0153725147247314,
      "learning_rate": 6.571438453383543e-06,
      "loss": 2.3781,
      "step": 47150
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.050584316253662,
      "learning_rate": 6.571051675752662e-06,
      "loss": 2.528,
      "step": 47151
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0891566276550293,
      "learning_rate": 6.5706649039347854e-06,
      "loss": 2.4209,
      "step": 47152
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.4486726522445679,
      "learning_rate": 6.570278137930559e-06,
      "loss": 2.1182,
      "step": 47153
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1075347661972046,
      "learning_rate": 6.569891377740642e-06,
      "loss": 2.5548,
      "step": 47154
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0042301416397095,
      "learning_rate": 6.569504623365692e-06,
      "loss": 2.3295,
      "step": 47155
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.9843679070472717,
      "learning_rate": 6.569117874806362e-06,
      "loss": 2.2788,
      "step": 47156
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0840749740600586,
      "learning_rate": 6.56873113206331e-06,
      "loss": 2.4809,
      "step": 47157
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0598032474517822,
      "learning_rate": 6.568344395137189e-06,
      "loss": 2.153,
      "step": 47158
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.2917840480804443,
      "learning_rate": 6.567957664028658e-06,
      "loss": 2.33,
      "step": 47159
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.2033026218414307,
      "learning_rate": 6.56757093873837e-06,
      "loss": 2.1351,
      "step": 47160
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.08416748046875,
      "learning_rate": 6.567184219266982e-06,
      "loss": 2.278,
      "step": 47161
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1072982549667358,
      "learning_rate": 6.566797505615151e-06,
      "loss": 2.4497,
      "step": 47162
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0936800241470337,
      "learning_rate": 6.566410797783529e-06,
      "loss": 2.2696,
      "step": 47163
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.2724387645721436,
      "learning_rate": 6.566024095772773e-06,
      "loss": 2.4338,
      "step": 47164
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0129079818725586,
      "learning_rate": 6.565637399583538e-06,
      "loss": 2.3243,
      "step": 47165
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0488492250442505,
      "learning_rate": 6.565250709216484e-06,
      "loss": 2.3867,
      "step": 47166
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0960888862609863,
      "learning_rate": 6.564864024672259e-06,
      "loss": 2.1729,
      "step": 47167
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1441082954406738,
      "learning_rate": 6.564477345951525e-06,
      "loss": 2.5878,
      "step": 47168
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1002572774887085,
      "learning_rate": 6.564090673054935e-06,
      "loss": 2.1182,
      "step": 47169
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1563606262207031,
      "learning_rate": 6.563704005983142e-06,
      "loss": 2.2723,
      "step": 47170
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1211308240890503,
      "learning_rate": 6.563317344736805e-06,
      "loss": 2.2425,
      "step": 47171
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.282096028327942,
      "learning_rate": 6.562930689316578e-06,
      "loss": 2.2988,
      "step": 47172
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.2002366781234741,
      "learning_rate": 6.5625440397231176e-06,
      "loss": 2.2008,
      "step": 47173
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1242334842681885,
      "learning_rate": 6.562157395957078e-06,
      "loss": 2.3771,
      "step": 47174
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.145013689994812,
      "learning_rate": 6.561770758019116e-06,
      "loss": 2.2771,
      "step": 47175
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0786306858062744,
      "learning_rate": 6.5613841259098865e-06,
      "loss": 2.32,
      "step": 47176
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0774022340774536,
      "learning_rate": 6.560997499630044e-06,
      "loss": 2.4707,
      "step": 47177
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0146276950836182,
      "learning_rate": 6.560610879180245e-06,
      "loss": 2.3051,
      "step": 47178
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1452157497406006,
      "learning_rate": 6.560224264561148e-06,
      "loss": 2.164,
      "step": 47179
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1072256565093994,
      "learning_rate": 6.559837655773402e-06,
      "loss": 2.3926,
      "step": 47180
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.9518495798110962,
      "learning_rate": 6.559451052817666e-06,
      "loss": 2.3071,
      "step": 47181
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.178449034690857,
      "learning_rate": 6.559064455694595e-06,
      "loss": 2.3541,
      "step": 47182
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0701181888580322,
      "learning_rate": 6.558677864404843e-06,
      "loss": 2.2082,
      "step": 47183
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0712004899978638,
      "learning_rate": 6.558291278949067e-06,
      "loss": 2.1808,
      "step": 47184
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.040642499923706,
      "learning_rate": 6.557904699327923e-06,
      "loss": 2.308,
      "step": 47185
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1523805856704712,
      "learning_rate": 6.557518125542064e-06,
      "loss": 2.2175,
      "step": 47186
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1095622777938843,
      "learning_rate": 6.557131557592148e-06,
      "loss": 2.4602,
      "step": 47187
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0483068227767944,
      "learning_rate": 6.556744995478828e-06,
      "loss": 2.394,
      "step": 47188
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0923113822937012,
      "learning_rate": 6.556358439202761e-06,
      "loss": 2.6056,
      "step": 47189
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0359785556793213,
      "learning_rate": 6.555971888764604e-06,
      "loss": 2.383,
      "step": 47190
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.4225660562515259,
      "learning_rate": 6.555585344165008e-06,
      "loss": 2.3489,
      "step": 47191
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.2054712772369385,
      "learning_rate": 6.555198805404632e-06,
      "loss": 2.2582,
      "step": 47192
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0731728076934814,
      "learning_rate": 6.5548122724841276e-06,
      "loss": 2.5272,
      "step": 47193
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.9546202421188354,
      "learning_rate": 6.554425745404152e-06,
      "loss": 2.1574,
      "step": 47194
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1219501495361328,
      "learning_rate": 6.5540392241653606e-06,
      "loss": 2.214,
      "step": 47195
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0850071907043457,
      "learning_rate": 6.553652708768411e-06,
      "loss": 2.1321,
      "step": 47196
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.9735634326934814,
      "learning_rate": 6.553266199213955e-06,
      "loss": 2.3491,
      "step": 47197
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1188476085662842,
      "learning_rate": 6.552879695502646e-06,
      "loss": 2.4198,
      "step": 47198
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0909608602523804,
      "learning_rate": 6.552493197635144e-06,
      "loss": 2.4636,
      "step": 47199
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0573424100875854,
      "learning_rate": 6.552106705612102e-06,
      "loss": 2.3734,
      "step": 47200
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.01114022731781,
      "learning_rate": 6.5517202194341764e-06,
      "loss": 2.3558,
      "step": 47201
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1278785467147827,
      "learning_rate": 6.551333739102022e-06,
      "loss": 2.3604,
      "step": 47202
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.2083650827407837,
      "learning_rate": 6.550947264616292e-06,
      "loss": 2.3634,
      "step": 47203
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0068769454956055,
      "learning_rate": 6.550560795977643e-06,
      "loss": 2.1608,
      "step": 47204
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0778056383132935,
      "learning_rate": 6.550174333186735e-06,
      "loss": 2.0761,
      "step": 47205
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0737919807434082,
      "learning_rate": 6.549787876244215e-06,
      "loss": 2.4034,
      "step": 47206
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0871834754943848,
      "learning_rate": 6.549401425150741e-06,
      "loss": 2.6692,
      "step": 47207
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.083088755607605,
      "learning_rate": 6.549014979906969e-06,
      "loss": 2.5113,
      "step": 47208
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1155911684036255,
      "learning_rate": 6.548628540513553e-06,
      "loss": 2.4476,
      "step": 47209
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.143544316291809,
      "learning_rate": 6.54824210697115e-06,
      "loss": 2.2626,
      "step": 47210
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1005568504333496,
      "learning_rate": 6.547855679280414e-06,
      "loss": 2.3099,
      "step": 47211
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0925211906433105,
      "learning_rate": 6.547469257442001e-06,
      "loss": 2.5108,
      "step": 47212
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.161433219909668,
      "learning_rate": 6.547082841456564e-06,
      "loss": 2.546,
      "step": 47213
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0189512968063354,
      "learning_rate": 6.546696431324759e-06,
      "loss": 2.3174,
      "step": 47214
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1323662996292114,
      "learning_rate": 6.546310027047243e-06,
      "loss": 2.191,
      "step": 47215
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0029704570770264,
      "learning_rate": 6.545923628624668e-06,
      "loss": 2.0712,
      "step": 47216
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0862038135528564,
      "learning_rate": 6.545537236057691e-06,
      "loss": 2.455,
      "step": 47217
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1482584476470947,
      "learning_rate": 6.5451508493469705e-06,
      "loss": 2.4222,
      "step": 47218
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0835593938827515,
      "learning_rate": 6.544764468493154e-06,
      "loss": 2.4598,
      "step": 47219
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0665054321289062,
      "learning_rate": 6.544378093496901e-06,
      "loss": 2.3179,
      "step": 47220
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.9689230918884277,
      "learning_rate": 6.5439917243588646e-06,
      "loss": 2.3397,
      "step": 47221
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0352002382278442,
      "learning_rate": 6.543605361079701e-06,
      "loss": 2.3658,
      "step": 47222
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0693928003311157,
      "learning_rate": 6.543219003660065e-06,
      "loss": 2.2106,
      "step": 47223
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1108002662658691,
      "learning_rate": 6.5428326521006145e-06,
      "loss": 2.2466,
      "step": 47224
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1032065153121948,
      "learning_rate": 6.542446306401998e-06,
      "loss": 2.0908,
      "step": 47225
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.9949317574501038,
      "learning_rate": 6.5420599665648755e-06,
      "loss": 2.1309,
      "step": 47226
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.070433497428894,
      "learning_rate": 6.5416736325899e-06,
      "loss": 2.2757,
      "step": 47227
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1840152740478516,
      "learning_rate": 6.541287304477726e-06,
      "loss": 2.403,
      "step": 47228
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.9803977608680725,
      "learning_rate": 6.5409009822290105e-06,
      "loss": 2.3076,
      "step": 47229
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0060127973556519,
      "learning_rate": 6.540514665844408e-06,
      "loss": 2.3105,
      "step": 47230
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.7059135437011719,
      "learning_rate": 6.540128355324575e-06,
      "loss": 2.2279,
      "step": 47231
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.155875563621521,
      "learning_rate": 6.5397420506701616e-06,
      "loss": 2.5455,
      "step": 47232
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0201094150543213,
      "learning_rate": 6.539355751881824e-06,
      "loss": 2.2602,
      "step": 47233
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1973555088043213,
      "learning_rate": 6.538969458960219e-06,
      "loss": 2.4163,
      "step": 47234
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0416160821914673,
      "learning_rate": 6.538583171906001e-06,
      "loss": 2.368,
      "step": 47235
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1454479694366455,
      "learning_rate": 6.5381968907198246e-06,
      "loss": 2.349,
      "step": 47236
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0488044023513794,
      "learning_rate": 6.5378106154023444e-06,
      "loss": 2.3398,
      "step": 47237
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1732991933822632,
      "learning_rate": 6.537424345954216e-06,
      "loss": 2.1087,
      "step": 47238
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1665353775024414,
      "learning_rate": 6.537038082376093e-06,
      "loss": 2.3887,
      "step": 47239
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0457500219345093,
      "learning_rate": 6.53665182466863e-06,
      "loss": 2.4201,
      "step": 47240
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1599797010421753,
      "learning_rate": 6.536265572832486e-06,
      "loss": 2.4915,
      "step": 47241
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.2414888143539429,
      "learning_rate": 6.53587932686831e-06,
      "loss": 2.268,
      "step": 47242
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1088751554489136,
      "learning_rate": 6.53549308677676e-06,
      "loss": 2.2935,
      "step": 47243
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1041682958602905,
      "learning_rate": 6.53510685255849e-06,
      "loss": 2.2299,
      "step": 47244
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.997724175453186,
      "learning_rate": 6.534720624214159e-06,
      "loss": 2.377,
      "step": 47245
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.049535870552063,
      "learning_rate": 6.534334401744413e-06,
      "loss": 2.3069,
      "step": 47246
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0886355638504028,
      "learning_rate": 6.533948185149913e-06,
      "loss": 2.2039,
      "step": 47247
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1430517435073853,
      "learning_rate": 6.533561974431311e-06,
      "loss": 2.4012,
      "step": 47248
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0657265186309814,
      "learning_rate": 6.533175769589264e-06,
      "loss": 2.18,
      "step": 47249
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.096350908279419,
      "learning_rate": 6.532789570624425e-06,
      "loss": 2.3986,
      "step": 47250
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1860384941101074,
      "learning_rate": 6.5324033775374485e-06,
      "loss": 2.3517,
      "step": 47251
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1773090362548828,
      "learning_rate": 6.5320171903289946e-06,
      "loss": 2.2548,
      "step": 47252
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0916447639465332,
      "learning_rate": 6.531631008999708e-06,
      "loss": 2.5321,
      "step": 47253
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0621775388717651,
      "learning_rate": 6.531244833550252e-06,
      "loss": 2.1834,
      "step": 47254
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0590623617172241,
      "learning_rate": 6.530858663981277e-06,
      "loss": 2.0948,
      "step": 47255
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1506211757659912,
      "learning_rate": 6.530472500293438e-06,
      "loss": 2.0605,
      "step": 47256
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1784381866455078,
      "learning_rate": 6.5300863424873906e-06,
      "loss": 2.3554,
      "step": 47257
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1191761493682861,
      "learning_rate": 6.529700190563795e-06,
      "loss": 2.1229,
      "step": 47258
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0892930030822754,
      "learning_rate": 6.529314044523294e-06,
      "loss": 2.3086,
      "step": 47259
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0305696725845337,
      "learning_rate": 6.528927904366549e-06,
      "loss": 2.4653,
      "step": 47260
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1547974348068237,
      "learning_rate": 6.528541770094213e-06,
      "loss": 2.3158,
      "step": 47261
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0472710132598877,
      "learning_rate": 6.528155641706944e-06,
      "loss": 2.4086,
      "step": 47262
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0262163877487183,
      "learning_rate": 6.527769519205392e-06,
      "loss": 2.0123,
      "step": 47263
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0382484197616577,
      "learning_rate": 6.5273834025902135e-06,
      "loss": 2.4512,
      "step": 47264
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0186785459518433,
      "learning_rate": 6.526997291862065e-06,
      "loss": 2.2324,
      "step": 47265
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1599795818328857,
      "learning_rate": 6.526611187021599e-06,
      "loss": 2.462,
      "step": 47266
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1136709451675415,
      "learning_rate": 6.526225088069469e-06,
      "loss": 2.5792,
      "step": 47267
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1877148151397705,
      "learning_rate": 6.525838995006332e-06,
      "loss": 2.4601,
      "step": 47268
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.090494155883789,
      "learning_rate": 6.525452907832841e-06,
      "loss": 2.4012,
      "step": 47269
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0473878383636475,
      "learning_rate": 6.525066826549651e-06,
      "loss": 2.3333,
      "step": 47270
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.123819351196289,
      "learning_rate": 6.524680751157422e-06,
      "loss": 2.3488,
      "step": 47271
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0347517728805542,
      "learning_rate": 6.524294681656798e-06,
      "loss": 2.2685,
      "step": 47272
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.9784885048866272,
      "learning_rate": 6.523908618048438e-06,
      "loss": 2.5187,
      "step": 47273
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0736502408981323,
      "learning_rate": 6.523522560332997e-06,
      "loss": 2.4234,
      "step": 47274
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0389437675476074,
      "learning_rate": 6.5231365085111295e-06,
      "loss": 2.3161,
      "step": 47275
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1100399494171143,
      "learning_rate": 6.522750462583489e-06,
      "loss": 2.5764,
      "step": 47276
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.2060288190841675,
      "learning_rate": 6.5223644225507335e-06,
      "loss": 2.4044,
      "step": 47277
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.107709527015686,
      "learning_rate": 6.5219783884135125e-06,
      "loss": 2.2696,
      "step": 47278
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1262792348861694,
      "learning_rate": 6.521592360172484e-06,
      "loss": 2.4694,
      "step": 47279
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.100441336631775,
      "learning_rate": 6.521206337828303e-06,
      "loss": 2.279,
      "step": 47280
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1579875946044922,
      "learning_rate": 6.52082032138162e-06,
      "loss": 2.4123,
      "step": 47281
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0861448049545288,
      "learning_rate": 6.52043431083309e-06,
      "loss": 2.3294,
      "step": 47282
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.203755497932434,
      "learning_rate": 6.520048306183371e-06,
      "loss": 2.2005,
      "step": 47283
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0616446733474731,
      "learning_rate": 6.519662307433117e-06,
      "loss": 2.3631,
      "step": 47284
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1605558395385742,
      "learning_rate": 6.519276314582977e-06,
      "loss": 2.4109,
      "step": 47285
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1783318519592285,
      "learning_rate": 6.518890327633613e-06,
      "loss": 2.4217,
      "step": 47286
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.9853514432907104,
      "learning_rate": 6.518504346585672e-06,
      "loss": 2.3297,
      "step": 47287
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1597967147827148,
      "learning_rate": 6.518118371439812e-06,
      "loss": 2.171,
      "step": 47288
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1230990886688232,
      "learning_rate": 6.517732402196688e-06,
      "loss": 2.2615,
      "step": 47289
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.109151005744934,
      "learning_rate": 6.517346438856952e-06,
      "loss": 2.2812,
      "step": 47290
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1237869262695312,
      "learning_rate": 6.51696048142126e-06,
      "loss": 2.2439,
      "step": 47291
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0549354553222656,
      "learning_rate": 6.516574529890267e-06,
      "loss": 2.1789,
      "step": 47292
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0575034618377686,
      "learning_rate": 6.516188584264625e-06,
      "loss": 2.3105,
      "step": 47293
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1210311651229858,
      "learning_rate": 6.51580264454499e-06,
      "loss": 2.3581,
      "step": 47294
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.165081262588501,
      "learning_rate": 6.515416710732015e-06,
      "loss": 2.1894,
      "step": 47295
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0139787197113037,
      "learning_rate": 6.515030782826357e-06,
      "loss": 2.395,
      "step": 47296
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.9886970520019531,
      "learning_rate": 6.514644860828671e-06,
      "loss": 2.3459,
      "step": 47297
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0207829475402832,
      "learning_rate": 6.514258944739604e-06,
      "loss": 2.1335,
      "step": 47298
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0219756364822388,
      "learning_rate": 6.513873034559817e-06,
      "loss": 2.5932,
      "step": 47299
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0115127563476562,
      "learning_rate": 6.51348713028996e-06,
      "loss": 2.4628,
      "step": 47300
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.211328387260437,
      "learning_rate": 6.51310123193069e-06,
      "loss": 2.1884,
      "step": 47301
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1470166444778442,
      "learning_rate": 6.512715339482661e-06,
      "loss": 2.0934,
      "step": 47302
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0199005603790283,
      "learning_rate": 6.512329452946525e-06,
      "loss": 2.3201,
      "step": 47303
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.236504077911377,
      "learning_rate": 6.5119435723229385e-06,
      "loss": 2.1114,
      "step": 47304
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.052474856376648,
      "learning_rate": 6.5115576976125545e-06,
      "loss": 2.2943,
      "step": 47305
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.086211085319519,
      "learning_rate": 6.5111718288160285e-06,
      "loss": 2.2854,
      "step": 47306
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1700904369354248,
      "learning_rate": 6.510785965934014e-06,
      "loss": 2.2664,
      "step": 47307
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.9984048008918762,
      "learning_rate": 6.510400108967166e-06,
      "loss": 2.2675,
      "step": 47308
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0534751415252686,
      "learning_rate": 6.510014257916137e-06,
      "loss": 2.5649,
      "step": 47309
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0855119228363037,
      "learning_rate": 6.509628412781583e-06,
      "loss": 2.2149,
      "step": 47310
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.072622299194336,
      "learning_rate": 6.509242573564154e-06,
      "loss": 2.537,
      "step": 47311
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.082188367843628,
      "learning_rate": 6.508856740264509e-06,
      "loss": 2.4072,
      "step": 47312
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0734015703201294,
      "learning_rate": 6.508470912883298e-06,
      "loss": 2.3407,
      "step": 47313
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0206726789474487,
      "learning_rate": 6.508085091421179e-06,
      "loss": 2.3218,
      "step": 47314
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1424715518951416,
      "learning_rate": 6.507699275878803e-06,
      "loss": 2.3453,
      "step": 47315
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.2591346502304077,
      "learning_rate": 6.507313466256825e-06,
      "loss": 2.3839,
      "step": 47316
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1550289392471313,
      "learning_rate": 6.506927662555899e-06,
      "loss": 2.3213,
      "step": 47317
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.730836272239685,
      "learning_rate": 6.506541864776681e-06,
      "loss": 2.1238,
      "step": 47318
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1150892972946167,
      "learning_rate": 6.5061560729198206e-06,
      "loss": 2.4524,
      "step": 47319
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0231035947799683,
      "learning_rate": 6.505770286985977e-06,
      "loss": 2.3334,
      "step": 47320
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0189954042434692,
      "learning_rate": 6.5053845069758005e-06,
      "loss": 2.2375,
      "step": 47321
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.111429214477539,
      "learning_rate": 6.504998732889947e-06,
      "loss": 2.4617,
      "step": 47322
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1483521461486816,
      "learning_rate": 6.504612964729069e-06,
      "loss": 2.3292,
      "step": 47323
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.041487455368042,
      "learning_rate": 6.504227202493826e-06,
      "loss": 2.3727,
      "step": 47324
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0468635559082031,
      "learning_rate": 6.503841446184863e-06,
      "loss": 2.2459,
      "step": 47325
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.4118728637695312,
      "learning_rate": 6.5034556958028386e-06,
      "loss": 2.2265,
      "step": 47326
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0241599082946777,
      "learning_rate": 6.503069951348406e-06,
      "loss": 2.4075,
      "step": 47327
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.2135848999023438,
      "learning_rate": 6.502684212822221e-06,
      "loss": 2.1848,
      "step": 47328
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1161329746246338,
      "learning_rate": 6.502298480224935e-06,
      "loss": 2.292,
      "step": 47329
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0226869583129883,
      "learning_rate": 6.501912753557202e-06,
      "loss": 2.3796,
      "step": 47330
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.086561918258667,
      "learning_rate": 6.501527032819679e-06,
      "loss": 2.5513,
      "step": 47331
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.070518136024475,
      "learning_rate": 6.501141318013017e-06,
      "loss": 2.2183,
      "step": 47332
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0423858165740967,
      "learning_rate": 6.50075560913787e-06,
      "loss": 2.2522,
      "step": 47333
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1111150979995728,
      "learning_rate": 6.500369906194893e-06,
      "loss": 2.25,
      "step": 47334
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.063534140586853,
      "learning_rate": 6.49998420918474e-06,
      "loss": 2.3348,
      "step": 47335
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.105588436126709,
      "learning_rate": 6.499598518108066e-06,
      "loss": 2.4634,
      "step": 47336
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1415480375289917,
      "learning_rate": 6.499212832965525e-06,
      "loss": 2.2755,
      "step": 47337
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.060634732246399,
      "learning_rate": 6.498827153757764e-06,
      "loss": 2.3872,
      "step": 47338
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.2429096698760986,
      "learning_rate": 6.498441480485443e-06,
      "loss": 2.0839,
      "step": 47339
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1800239086151123,
      "learning_rate": 6.498055813149215e-06,
      "loss": 2.4014,
      "step": 47340
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0066852569580078,
      "learning_rate": 6.497670151749733e-06,
      "loss": 2.2312,
      "step": 47341
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.2172333002090454,
      "learning_rate": 6.497284496287655e-06,
      "loss": 2.2574,
      "step": 47342
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0858649015426636,
      "learning_rate": 6.496898846763627e-06,
      "loss": 2.2474,
      "step": 47343
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1045993566513062,
      "learning_rate": 6.496513203178308e-06,
      "loss": 2.5175,
      "step": 47344
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0643877983093262,
      "learning_rate": 6.496127565532351e-06,
      "loss": 2.3367,
      "step": 47345
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0371507406234741,
      "learning_rate": 6.495741933826409e-06,
      "loss": 2.1129,
      "step": 47346
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.247375726699829,
      "learning_rate": 6.495356308061136e-06,
      "loss": 2.6752,
      "step": 47347
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0850839614868164,
      "learning_rate": 6.494970688237185e-06,
      "loss": 2.3416,
      "step": 47348
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.3948806524276733,
      "learning_rate": 6.494585074355212e-06,
      "loss": 2.2613,
      "step": 47349
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.039603590965271,
      "learning_rate": 6.494199466415874e-06,
      "loss": 2.2478,
      "step": 47350
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1509150266647339,
      "learning_rate": 6.493813864419813e-06,
      "loss": 2.3379,
      "step": 47351
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.088908076286316,
      "learning_rate": 6.493428268367693e-06,
      "loss": 2.2732,
      "step": 47352
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0582526922225952,
      "learning_rate": 6.493042678260163e-06,
      "loss": 2.3153,
      "step": 47353
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0970045328140259,
      "learning_rate": 6.492657094097878e-06,
      "loss": 2.1085,
      "step": 47354
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0675303936004639,
      "learning_rate": 6.492271515881492e-06,
      "loss": 1.9922,
      "step": 47355
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.2817325592041016,
      "learning_rate": 6.49188594361166e-06,
      "loss": 2.3094,
      "step": 47356
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.2340960502624512,
      "learning_rate": 6.491500377289032e-06,
      "loss": 2.3016,
      "step": 47357
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.109742522239685,
      "learning_rate": 6.4911148169142644e-06,
      "loss": 2.3744,
      "step": 47358
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1704550981521606,
      "learning_rate": 6.4907292624880104e-06,
      "loss": 2.1717,
      "step": 47359
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0959157943725586,
      "learning_rate": 6.490343714010923e-06,
      "loss": 2.5508,
      "step": 47360
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1756850481033325,
      "learning_rate": 6.489958171483657e-06,
      "loss": 2.4805,
      "step": 47361
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.256801962852478,
      "learning_rate": 6.489572634906864e-06,
      "loss": 1.9595,
      "step": 47362
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0496480464935303,
      "learning_rate": 6.489187104281204e-06,
      "loss": 2.4013,
      "step": 47363
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0222957134246826,
      "learning_rate": 6.4888015796073204e-06,
      "loss": 2.1992,
      "step": 47364
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.155809998512268,
      "learning_rate": 6.488416060885873e-06,
      "loss": 2.766,
      "step": 47365
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.996829628944397,
      "learning_rate": 6.488030548117513e-06,
      "loss": 2.2983,
      "step": 47366
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.9813410043716431,
      "learning_rate": 6.4876450413028945e-06,
      "loss": 2.2276,
      "step": 47367
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0733388662338257,
      "learning_rate": 6.4872595404426746e-06,
      "loss": 2.3955,
      "step": 47368
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0152549743652344,
      "learning_rate": 6.486874045537501e-06,
      "loss": 2.3804,
      "step": 47369
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1199352741241455,
      "learning_rate": 6.486488556588033e-06,
      "loss": 2.1964,
      "step": 47370
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.165321946144104,
      "learning_rate": 6.486103073594918e-06,
      "loss": 2.3725,
      "step": 47371
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0356894731521606,
      "learning_rate": 6.485717596558813e-06,
      "loss": 2.1661,
      "step": 47372
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.96556556224823,
      "learning_rate": 6.485332125480373e-06,
      "loss": 2.5563,
      "step": 47373
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.083114743232727,
      "learning_rate": 6.484946660360247e-06,
      "loss": 2.346,
      "step": 47374
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.242066740989685,
      "learning_rate": 6.484561201199093e-06,
      "loss": 2.6441,
      "step": 47375
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0602236986160278,
      "learning_rate": 6.484175747997567e-06,
      "loss": 2.4728,
      "step": 47376
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0539946556091309,
      "learning_rate": 6.483790300756312e-06,
      "loss": 2.3184,
      "step": 47377
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1541424989700317,
      "learning_rate": 6.483404859475988e-06,
      "loss": 2.4109,
      "step": 47378
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.7267577648162842,
      "learning_rate": 6.483019424157249e-06,
      "loss": 2.2269,
      "step": 47379
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0035474300384521,
      "learning_rate": 6.482633994800747e-06,
      "loss": 2.4346,
      "step": 47380
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.05518639087677,
      "learning_rate": 6.4822485714071345e-06,
      "loss": 2.4457,
      "step": 47381
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.107908010482788,
      "learning_rate": 6.481863153977067e-06,
      "loss": 2.3635,
      "step": 47382
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.9910616874694824,
      "learning_rate": 6.4814777425111976e-06,
      "loss": 2.2954,
      "step": 47383
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.056447982788086,
      "learning_rate": 6.481092337010177e-06,
      "loss": 2.3301,
      "step": 47384
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.090558648109436,
      "learning_rate": 6.4807069374746626e-06,
      "loss": 2.3269,
      "step": 47385
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0682264566421509,
      "learning_rate": 6.480321543905305e-06,
      "loss": 2.4467,
      "step": 47386
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.2190282344818115,
      "learning_rate": 6.479936156302757e-06,
      "loss": 2.452,
      "step": 47387
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1259459257125854,
      "learning_rate": 6.4795507746676755e-06,
      "loss": 2.2887,
      "step": 47388
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.221598744392395,
      "learning_rate": 6.479165399000711e-06,
      "loss": 2.1644,
      "step": 47389
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1037653684616089,
      "learning_rate": 6.47878002930252e-06,
      "loss": 2.455,
      "step": 47390
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.2139805555343628,
      "learning_rate": 6.478394665573751e-06,
      "loss": 2.4006,
      "step": 47391
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.226504921913147,
      "learning_rate": 6.478009307815059e-06,
      "loss": 2.2194,
      "step": 47392
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0686434507369995,
      "learning_rate": 6.477623956027097e-06,
      "loss": 2.4351,
      "step": 47393
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.109707236289978,
      "learning_rate": 6.477238610210519e-06,
      "loss": 2.5085,
      "step": 47394
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.265288233757019,
      "learning_rate": 6.476853270365979e-06,
      "loss": 2.4447,
      "step": 47395
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1973276138305664,
      "learning_rate": 6.4764679364941305e-06,
      "loss": 2.3355,
      "step": 47396
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1803017854690552,
      "learning_rate": 6.476082608595624e-06,
      "loss": 2.3744,
      "step": 47397
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0439790487289429,
      "learning_rate": 6.475697286671118e-06,
      "loss": 2.6686,
      "step": 47398
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.2164311408996582,
      "learning_rate": 6.475311970721258e-06,
      "loss": 2.4313,
      "step": 47399
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1536729335784912,
      "learning_rate": 6.474926660746705e-06,
      "loss": 2.2513,
      "step": 47400
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0043257474899292,
      "learning_rate": 6.474541356748106e-06,
      "loss": 2.2658,
      "step": 47401
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0569658279418945,
      "learning_rate": 6.4741560587261176e-06,
      "loss": 2.65,
      "step": 47402
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0695440769195557,
      "learning_rate": 6.4737707666813954e-06,
      "loss": 2.2213,
      "step": 47403
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.2269070148468018,
      "learning_rate": 6.473385480614588e-06,
      "loss": 2.448,
      "step": 47404
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0729426145553589,
      "learning_rate": 6.473000200526349e-06,
      "loss": 2.5694,
      "step": 47405
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1598093509674072,
      "learning_rate": 6.472614926417331e-06,
      "loss": 2.4209,
      "step": 47406
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.054823637008667,
      "learning_rate": 6.4722296582881905e-06,
      "loss": 2.2811,
      "step": 47407
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0118173360824585,
      "learning_rate": 6.4718443961395785e-06,
      "loss": 2.1336,
      "step": 47408
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1145514249801636,
      "learning_rate": 6.471459139972147e-06,
      "loss": 2.2852,
      "step": 47409
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0709648132324219,
      "learning_rate": 6.4710738897865525e-06,
      "loss": 2.4825,
      "step": 47410
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.2842559814453125,
      "learning_rate": 6.470688645583445e-06,
      "loss": 2.1655,
      "step": 47411
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1506638526916504,
      "learning_rate": 6.4703034073634795e-06,
      "loss": 2.3689,
      "step": 47412
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.5326412916183472,
      "learning_rate": 6.469918175127307e-06,
      "loss": 2.3285,
      "step": 47413
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0741453170776367,
      "learning_rate": 6.469532948875584e-06,
      "loss": 2.0751,
      "step": 47414
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.2497400045394897,
      "learning_rate": 6.469147728608961e-06,
      "loss": 2.3523,
      "step": 47415
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.22314453125,
      "learning_rate": 6.468762514328095e-06,
      "loss": 2.1899,
      "step": 47416
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1795834302902222,
      "learning_rate": 6.468377306033632e-06,
      "loss": 2.5337,
      "step": 47417
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.083738923072815,
      "learning_rate": 6.467992103726229e-06,
      "loss": 2.3563,
      "step": 47418
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.08986234664917,
      "learning_rate": 6.467606907406538e-06,
      "loss": 2.1892,
      "step": 47419
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.3654112815856934,
      "learning_rate": 6.467221717075212e-06,
      "loss": 2.3906,
      "step": 47420
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.7778828144073486,
      "learning_rate": 6.466836532732905e-06,
      "loss": 2.4695,
      "step": 47421
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1085660457611084,
      "learning_rate": 6.4664513543802706e-06,
      "loss": 2.4006,
      "step": 47422
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0256379842758179,
      "learning_rate": 6.466066182017961e-06,
      "loss": 2.4721,
      "step": 47423
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.9796463251113892,
      "learning_rate": 6.4656810156466275e-06,
      "loss": 2.4086,
      "step": 47424
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0112884044647217,
      "learning_rate": 6.465295855266927e-06,
      "loss": 2.2885,
      "step": 47425
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1950218677520752,
      "learning_rate": 6.46491070087951e-06,
      "loss": 2.2492,
      "step": 47426
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1709487438201904,
      "learning_rate": 6.464525552485028e-06,
      "loss": 2.2193,
      "step": 47427
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0688022375106812,
      "learning_rate": 6.4641404100841344e-06,
      "loss": 2.4414,
      "step": 47428
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.082030177116394,
      "learning_rate": 6.463755273677486e-06,
      "loss": 2.3857,
      "step": 47429
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1384445428848267,
      "learning_rate": 6.4633701432657325e-06,
      "loss": 2.3383,
      "step": 47430
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.9720543026924133,
      "learning_rate": 6.462985018849524e-06,
      "loss": 2.4803,
      "step": 47431
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.9999160170555115,
      "learning_rate": 6.46259990042952e-06,
      "loss": 2.5155,
      "step": 47432
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1185314655303955,
      "learning_rate": 6.462214788006369e-06,
      "loss": 2.2489,
      "step": 47433
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.075779676437378,
      "learning_rate": 6.461829681580725e-06,
      "loss": 2.435,
      "step": 47434
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.9799314141273499,
      "learning_rate": 6.461444581153238e-06,
      "loss": 2.1841,
      "step": 47435
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.04250967502594,
      "learning_rate": 6.461059486724566e-06,
      "loss": 2.362,
      "step": 47436
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.210403323173523,
      "learning_rate": 6.460674398295358e-06,
      "loss": 2.3922,
      "step": 47437
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.103945016860962,
      "learning_rate": 6.460289315866268e-06,
      "loss": 2.3858,
      "step": 47438
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1817588806152344,
      "learning_rate": 6.4599042394379495e-06,
      "loss": 2.5012,
      "step": 47439
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0799269676208496,
      "learning_rate": 6.459519169011055e-06,
      "loss": 2.2259,
      "step": 47440
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.9945998787879944,
      "learning_rate": 6.459134104586236e-06,
      "loss": 2.2112,
      "step": 47441
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0819416046142578,
      "learning_rate": 6.458749046164152e-06,
      "loss": 2.4872,
      "step": 47442
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.2331279516220093,
      "learning_rate": 6.458363993745446e-06,
      "loss": 2.487,
      "step": 47443
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1111869812011719,
      "learning_rate": 6.457978947330774e-06,
      "loss": 2.4865,
      "step": 47444
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.102151870727539,
      "learning_rate": 6.4575939069207895e-06,
      "loss": 2.1026,
      "step": 47445
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0776751041412354,
      "learning_rate": 6.457208872516146e-06,
      "loss": 1.9228,
      "step": 47446
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0923203229904175,
      "learning_rate": 6.456823844117496e-06,
      "loss": 2.4005,
      "step": 47447
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.9835858345031738,
      "learning_rate": 6.456438821725491e-06,
      "loss": 2.2933,
      "step": 47448
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0167227983474731,
      "learning_rate": 6.456053805340785e-06,
      "loss": 2.237,
      "step": 47449
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.002933144569397,
      "learning_rate": 6.455668794964031e-06,
      "loss": 2.3772,
      "step": 47450
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0767443180084229,
      "learning_rate": 6.455283790595881e-06,
      "loss": 2.025,
      "step": 47451
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.9727709889411926,
      "learning_rate": 6.454898792236987e-06,
      "loss": 2.201,
      "step": 47452
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1064345836639404,
      "learning_rate": 6.454513799888003e-06,
      "loss": 2.4139,
      "step": 47453
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1326539516448975,
      "learning_rate": 6.454128813549582e-06,
      "loss": 2.288,
      "step": 47454
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1017202138900757,
      "learning_rate": 6.453743833222376e-06,
      "loss": 2.47,
      "step": 47455
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.9502344727516174,
      "learning_rate": 6.4533588589070354e-06,
      "loss": 2.2953,
      "step": 47456
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.132448673248291,
      "learning_rate": 6.452973890604215e-06,
      "loss": 2.2352,
      "step": 47457
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.9995949268341064,
      "learning_rate": 6.452588928314567e-06,
      "loss": 2.1348,
      "step": 47458
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1017529964447021,
      "learning_rate": 6.452203972038744e-06,
      "loss": 2.4084,
      "step": 47459
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0213366746902466,
      "learning_rate": 6.451819021777403e-06,
      "loss": 2.4682,
      "step": 47460
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0640833377838135,
      "learning_rate": 6.4514340775311875e-06,
      "loss": 2.4984,
      "step": 47461
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0549813508987427,
      "learning_rate": 6.451049139300757e-06,
      "loss": 2.5982,
      "step": 47462
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0724427700042725,
      "learning_rate": 6.450664207086761e-06,
      "loss": 2.267,
      "step": 47463
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.08588445186615,
      "learning_rate": 6.450279280889853e-06,
      "loss": 2.1366,
      "step": 47464
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.948604941368103,
      "learning_rate": 6.449894360710687e-06,
      "loss": 2.1948,
      "step": 47465
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0362099409103394,
      "learning_rate": 6.449509446549913e-06,
      "loss": 2.515,
      "step": 47466
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.9640932679176331,
      "learning_rate": 6.449124538408186e-06,
      "loss": 2.0992,
      "step": 47467
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0562738180160522,
      "learning_rate": 6.4487396362861565e-06,
      "loss": 2.1258,
      "step": 47468
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.4327077865600586,
      "learning_rate": 6.448354740184482e-06,
      "loss": 2.3136,
      "step": 47469
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.965813159942627,
      "learning_rate": 6.447969850103806e-06,
      "loss": 2.3017,
      "step": 47470
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1758403778076172,
      "learning_rate": 6.447584966044787e-06,
      "loss": 2.4526,
      "step": 47471
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0241543054580688,
      "learning_rate": 6.447200088008077e-06,
      "loss": 2.3194,
      "step": 47472
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1890287399291992,
      "learning_rate": 6.446815215994326e-06,
      "loss": 2.3732,
      "step": 47473
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1819045543670654,
      "learning_rate": 6.446430350004188e-06,
      "loss": 2.5204,
      "step": 47474
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0177251100540161,
      "learning_rate": 6.4460454900383175e-06,
      "loss": 2.2519,
      "step": 47475
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0569909811019897,
      "learning_rate": 6.445660636097364e-06,
      "loss": 2.3059,
      "step": 47476
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0536773204803467,
      "learning_rate": 6.445275788181981e-06,
      "loss": 2.2719,
      "step": 47477
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.2090264558792114,
      "learning_rate": 6.444890946292821e-06,
      "loss": 2.6632,
      "step": 47478
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.052648901939392,
      "learning_rate": 6.444506110430537e-06,
      "loss": 2.1281,
      "step": 47479
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1470311880111694,
      "learning_rate": 6.444121280595781e-06,
      "loss": 2.3355,
      "step": 47480
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.9544000029563904,
      "learning_rate": 6.443736456789203e-06,
      "loss": 2.2297,
      "step": 47481
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1966358423233032,
      "learning_rate": 6.443351639011463e-06,
      "loss": 2.2592,
      "step": 47482
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0400975942611694,
      "learning_rate": 6.442966827263205e-06,
      "loss": 2.0466,
      "step": 47483
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0102429389953613,
      "learning_rate": 6.442582021545084e-06,
      "loss": 2.1739,
      "step": 47484
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1069252490997314,
      "learning_rate": 6.442197221857752e-06,
      "loss": 2.3181,
      "step": 47485
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.042490005493164,
      "learning_rate": 6.441812428201862e-06,
      "loss": 2.0727,
      "step": 47486
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0625030994415283,
      "learning_rate": 6.441427640578066e-06,
      "loss": 2.4032,
      "step": 47487
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1371724605560303,
      "learning_rate": 6.44104285898702e-06,
      "loss": 2.2152,
      "step": 47488
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.111072063446045,
      "learning_rate": 6.44065808342937e-06,
      "loss": 2.477,
      "step": 47489
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1116070747375488,
      "learning_rate": 6.440273313905771e-06,
      "loss": 2.2535,
      "step": 47490
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1131017208099365,
      "learning_rate": 6.439888550416875e-06,
      "loss": 2.2966,
      "step": 47491
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0199052095413208,
      "learning_rate": 6.439503792963337e-06,
      "loss": 2.2368,
      "step": 47492
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.143300175666809,
      "learning_rate": 6.439119041545805e-06,
      "loss": 2.2857,
      "step": 47493
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1564890146255493,
      "learning_rate": 6.438734296164935e-06,
      "loss": 2.1666,
      "step": 47494
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0027445554733276,
      "learning_rate": 6.43834955682138e-06,
      "loss": 2.4399,
      "step": 47495
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1040160655975342,
      "learning_rate": 6.437964823515786e-06,
      "loss": 2.1275,
      "step": 47496
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.988438069820404,
      "learning_rate": 6.43758009624881e-06,
      "loss": 2.5256,
      "step": 47497
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.050819993019104,
      "learning_rate": 6.437195375021104e-06,
      "loss": 2.3774,
      "step": 47498
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.2698419094085693,
      "learning_rate": 6.436810659833317e-06,
      "loss": 2.3463,
      "step": 47499
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1820393800735474,
      "learning_rate": 6.4364259506861045e-06,
      "loss": 2.2696,
      "step": 47500
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.2066131830215454,
      "learning_rate": 6.436041247580119e-06,
      "loss": 2.2235,
      "step": 47501
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.2160351276397705,
      "learning_rate": 6.435656550516012e-06,
      "loss": 2.2504,
      "step": 47502
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.2045308351516724,
      "learning_rate": 6.435271859494434e-06,
      "loss": 2.4098,
      "step": 47503
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0424466133117676,
      "learning_rate": 6.434887174516038e-06,
      "loss": 2.1895,
      "step": 47504
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0398404598236084,
      "learning_rate": 6.434502495581477e-06,
      "loss": 2.357,
      "step": 47505
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0680526494979858,
      "learning_rate": 6.434117822691402e-06,
      "loss": 2.1945,
      "step": 47506
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.9937250018119812,
      "learning_rate": 6.433733155846466e-06,
      "loss": 2.4679,
      "step": 47507
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0581459999084473,
      "learning_rate": 6.433348495047326e-06,
      "loss": 2.7105,
      "step": 47508
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1973060369491577,
      "learning_rate": 6.4329638402946245e-06,
      "loss": 2.2326,
      "step": 47509
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.9894121289253235,
      "learning_rate": 6.432579191589019e-06,
      "loss": 2.382,
      "step": 47510
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0758180618286133,
      "learning_rate": 6.432194548931159e-06,
      "loss": 2.4608,
      "step": 47511
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1001672744750977,
      "learning_rate": 6.431809912321698e-06,
      "loss": 2.1075,
      "step": 47512
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.179960012435913,
      "learning_rate": 6.43142528176129e-06,
      "loss": 2.4276,
      "step": 47513
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.038132667541504,
      "learning_rate": 6.431040657250585e-06,
      "loss": 2.2627,
      "step": 47514
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.282942295074463,
      "learning_rate": 6.4306560387902336e-06,
      "loss": 2.2748,
      "step": 47515
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0554425716400146,
      "learning_rate": 6.430271426380894e-06,
      "loss": 2.5356,
      "step": 47516
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1743313074111938,
      "learning_rate": 6.4298868200232105e-06,
      "loss": 2.3705,
      "step": 47517
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.063187599182129,
      "learning_rate": 6.429502219717838e-06,
      "loss": 2.4198,
      "step": 47518
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0620567798614502,
      "learning_rate": 6.429117625465431e-06,
      "loss": 2.2558,
      "step": 47519
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1335604190826416,
      "learning_rate": 6.428733037266637e-06,
      "loss": 2.4149,
      "step": 47520
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1027864217758179,
      "learning_rate": 6.428348455122114e-06,
      "loss": 2.4217,
      "step": 47521
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.2226558923721313,
      "learning_rate": 6.427963879032511e-06,
      "loss": 2.2843,
      "step": 47522
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1069077253341675,
      "learning_rate": 6.427579308998475e-06,
      "loss": 2.3231,
      "step": 47523
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.2161997556686401,
      "learning_rate": 6.427194745020664e-06,
      "loss": 2.1544,
      "step": 47524
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.07861328125,
      "learning_rate": 6.426810187099728e-06,
      "loss": 2.2269,
      "step": 47525
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1746238470077515,
      "learning_rate": 6.4264256352363196e-06,
      "loss": 2.2058,
      "step": 47526
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.161177635192871,
      "learning_rate": 6.42604108943109e-06,
      "loss": 2.2509,
      "step": 47527
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.3399715423583984,
      "learning_rate": 6.4256565496846914e-06,
      "loss": 2.332,
      "step": 47528
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0871100425720215,
      "learning_rate": 6.425272015997775e-06,
      "loss": 2.532,
      "step": 47529
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.9456160664558411,
      "learning_rate": 6.424887488370994e-06,
      "loss": 2.1272,
      "step": 47530
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.5858302116394043,
      "learning_rate": 6.424502966805001e-06,
      "loss": 2.4108,
      "step": 47531
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1730523109436035,
      "learning_rate": 6.424118451300445e-06,
      "loss": 2.4276,
      "step": 47532
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.9854393601417542,
      "learning_rate": 6.423733941857979e-06,
      "loss": 2.166,
      "step": 47533
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1184718608856201,
      "learning_rate": 6.423349438478257e-06,
      "loss": 2.2638,
      "step": 47534
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0943918228149414,
      "learning_rate": 6.422964941161932e-06,
      "loss": 2.2302,
      "step": 47535
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0781192779541016,
      "learning_rate": 6.422580449909649e-06,
      "loss": 2.3522,
      "step": 47536
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0881023406982422,
      "learning_rate": 6.4221959647220645e-06,
      "loss": 2.1506,
      "step": 47537
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1061409711837769,
      "learning_rate": 6.421811485599828e-06,
      "loss": 2.5409,
      "step": 47538
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.184091567993164,
      "learning_rate": 6.421427012543594e-06,
      "loss": 2.4503,
      "step": 47539
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1602462530136108,
      "learning_rate": 6.4210425455540126e-06,
      "loss": 2.3336,
      "step": 47540
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.2518147230148315,
      "learning_rate": 6.420658084631737e-06,
      "loss": 2.1706,
      "step": 47541
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.2819833755493164,
      "learning_rate": 6.420273629777417e-06,
      "loss": 2.2605,
      "step": 47542
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1096818447113037,
      "learning_rate": 6.4198891809917065e-06,
      "loss": 2.3415,
      "step": 47543
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0436114072799683,
      "learning_rate": 6.419504738275257e-06,
      "loss": 2.5156,
      "step": 47544
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0984206199645996,
      "learning_rate": 6.419120301628719e-06,
      "loss": 2.4719,
      "step": 47545
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1058619022369385,
      "learning_rate": 6.418735871052743e-06,
      "loss": 2.5384,
      "step": 47546
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.078904628753662,
      "learning_rate": 6.418351446547983e-06,
      "loss": 2.1508,
      "step": 47547
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.10690438747406,
      "learning_rate": 6.417967028115092e-06,
      "loss": 2.2768,
      "step": 47548
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0375101566314697,
      "learning_rate": 6.4175826157547185e-06,
      "loss": 2.3905,
      "step": 47549
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1187608242034912,
      "learning_rate": 6.417198209467516e-06,
      "loss": 2.3394,
      "step": 47550
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1950629949569702,
      "learning_rate": 6.416813809254134e-06,
      "loss": 2.2212,
      "step": 47551
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0542963743209839,
      "learning_rate": 6.416429415115226e-06,
      "loss": 2.2451,
      "step": 47552
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.9601641297340393,
      "learning_rate": 6.416045027051445e-06,
      "loss": 2.3826,
      "step": 47553
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.9713448882102966,
      "learning_rate": 6.41566064506344e-06,
      "loss": 2.4722,
      "step": 47554
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.078908920288086,
      "learning_rate": 6.4152762691518624e-06,
      "loss": 2.3981,
      "step": 47555
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0446999073028564,
      "learning_rate": 6.414891899317366e-06,
      "loss": 2.3616,
      "step": 47556
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1110285520553589,
      "learning_rate": 6.414507535560601e-06,
      "loss": 2.0568,
      "step": 47557
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0446027517318726,
      "learning_rate": 6.414123177882221e-06,
      "loss": 2.3613,
      "step": 47558
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0670831203460693,
      "learning_rate": 6.413738826282876e-06,
      "loss": 2.3313,
      "step": 47559
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0427864789962769,
      "learning_rate": 6.413354480763217e-06,
      "loss": 2.2554,
      "step": 47560
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0267633199691772,
      "learning_rate": 6.4129701413239e-06,
      "loss": 2.5283,
      "step": 47561
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0551650524139404,
      "learning_rate": 6.412585807965568e-06,
      "loss": 2.4224,
      "step": 47562
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0551235675811768,
      "learning_rate": 6.412201480688879e-06,
      "loss": 2.4571,
      "step": 47563
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1238253116607666,
      "learning_rate": 6.411817159494482e-06,
      "loss": 2.4078,
      "step": 47564
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.2027852535247803,
      "learning_rate": 6.411432844383029e-06,
      "loss": 2.2525,
      "step": 47565
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.012561321258545,
      "learning_rate": 6.411048535355174e-06,
      "loss": 2.5079,
      "step": 47566
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.9918828010559082,
      "learning_rate": 6.410664232411565e-06,
      "loss": 2.2025,
      "step": 47567
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.186218023300171,
      "learning_rate": 6.410279935552856e-06,
      "loss": 2.5783,
      "step": 47568
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.01554274559021,
      "learning_rate": 6.409895644779696e-06,
      "loss": 2.2915,
      "step": 47569
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0494470596313477,
      "learning_rate": 6.40951136009274e-06,
      "loss": 2.4075,
      "step": 47570
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1770964860916138,
      "learning_rate": 6.409127081492636e-06,
      "loss": 2.2262,
      "step": 47571
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1760728359222412,
      "learning_rate": 6.408742808980037e-06,
      "loss": 2.3963,
      "step": 47572
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.3674498796463013,
      "learning_rate": 6.408358542555595e-06,
      "loss": 2.3287,
      "step": 47573
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0410939455032349,
      "learning_rate": 6.407974282219962e-06,
      "loss": 2.4567,
      "step": 47574
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.2172397375106812,
      "learning_rate": 6.407590027973787e-06,
      "loss": 2.1595,
      "step": 47575
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0104072093963623,
      "learning_rate": 6.4072057798177215e-06,
      "loss": 2.2839,
      "step": 47576
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.179337501525879,
      "learning_rate": 6.406821537752419e-06,
      "loss": 2.0853,
      "step": 47577
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0807024240493774,
      "learning_rate": 6.40643730177853e-06,
      "loss": 2.2512,
      "step": 47578
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.9914839863777161,
      "learning_rate": 6.406053071896705e-06,
      "loss": 2.2545,
      "step": 47579
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0238043069839478,
      "learning_rate": 6.405668848107596e-06,
      "loss": 2.3624,
      "step": 47580
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.063887596130371,
      "learning_rate": 6.405284630411854e-06,
      "loss": 2.3223,
      "step": 47581
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0118510723114014,
      "learning_rate": 6.404900418810132e-06,
      "loss": 2.3387,
      "step": 47582
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0467369556427002,
      "learning_rate": 6.40451621330308e-06,
      "loss": 2.2829,
      "step": 47583
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0260272026062012,
      "learning_rate": 6.404132013891349e-06,
      "loss": 2.5999,
      "step": 47584
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0466282367706299,
      "learning_rate": 6.403747820575592e-06,
      "loss": 2.3583,
      "step": 47585
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.9697705507278442,
      "learning_rate": 6.403363633356458e-06,
      "loss": 2.1247,
      "step": 47586
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0726922750473022,
      "learning_rate": 6.4029794522346035e-06,
      "loss": 2.2433,
      "step": 47587
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.882290005683899,
      "learning_rate": 6.402595277210672e-06,
      "loss": 2.3995,
      "step": 47588
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.14337158203125,
      "learning_rate": 6.402211108285319e-06,
      "loss": 2.1342,
      "step": 47589
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1279140710830688,
      "learning_rate": 6.401826945459194e-06,
      "loss": 2.4001,
      "step": 47590
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.067787766456604,
      "learning_rate": 6.40144278873295e-06,
      "loss": 2.3333,
      "step": 47591
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1365200281143188,
      "learning_rate": 6.401058638107238e-06,
      "loss": 2.3237,
      "step": 47592
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0078872442245483,
      "learning_rate": 6.4006744935827105e-06,
      "loss": 2.0914,
      "step": 47593
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1817512512207031,
      "learning_rate": 6.400290355160015e-06,
      "loss": 2.2875,
      "step": 47594
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.271214246749878,
      "learning_rate": 6.399906222839807e-06,
      "loss": 2.2671,
      "step": 47595
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0169435739517212,
      "learning_rate": 6.399522096622734e-06,
      "loss": 2.4842,
      "step": 47596
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1037611961364746,
      "learning_rate": 6.3991379765094505e-06,
      "loss": 2.5182,
      "step": 47597
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1201814413070679,
      "learning_rate": 6.398753862500605e-06,
      "loss": 2.2341,
      "step": 47598
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0502431392669678,
      "learning_rate": 6.3983697545968505e-06,
      "loss": 2.3275,
      "step": 47599
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1735951900482178,
      "learning_rate": 6.3979856527988415e-06,
      "loss": 2.5256,
      "step": 47600
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0960248708724976,
      "learning_rate": 6.39760155710722e-06,
      "loss": 2.4626,
      "step": 47601
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0771368741989136,
      "learning_rate": 6.3972174675226425e-06,
      "loss": 2.2317,
      "step": 47602
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1812664270401,
      "learning_rate": 6.39683338404576e-06,
      "loss": 2.4474,
      "step": 47603
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1403683423995972,
      "learning_rate": 6.396449306677225e-06,
      "loss": 2.5552,
      "step": 47604
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1058722734451294,
      "learning_rate": 6.396065235417685e-06,
      "loss": 2.2782,
      "step": 47605
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1291711330413818,
      "learning_rate": 6.395681170267798e-06,
      "loss": 2.0856,
      "step": 47606
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0143969058990479,
      "learning_rate": 6.395297111228206e-06,
      "loss": 2.1172,
      "step": 47607
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0593713521957397,
      "learning_rate": 6.394913058299565e-06,
      "loss": 2.3603,
      "step": 47608
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0549896955490112,
      "learning_rate": 6.394529011482526e-06,
      "loss": 2.4339,
      "step": 47609
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0599256753921509,
      "learning_rate": 6.394144970777738e-06,
      "loss": 2.1762,
      "step": 47610
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0336309671401978,
      "learning_rate": 6.393760936185855e-06,
      "loss": 2.5037,
      "step": 47611
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0289384126663208,
      "learning_rate": 6.393376907707527e-06,
      "loss": 2.2757,
      "step": 47612
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0377532243728638,
      "learning_rate": 6.392992885343403e-06,
      "loss": 2.2351,
      "step": 47613
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0937341451644897,
      "learning_rate": 6.392608869094141e-06,
      "loss": 1.9905,
      "step": 47614
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0871798992156982,
      "learning_rate": 6.392224858960383e-06,
      "loss": 2.394,
      "step": 47615
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.01997971534729,
      "learning_rate": 6.391840854942784e-06,
      "loss": 2.1901,
      "step": 47616
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0549867153167725,
      "learning_rate": 6.391456857041994e-06,
      "loss": 2.202,
      "step": 47617
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0124683380126953,
      "learning_rate": 6.3910728652586625e-06,
      "loss": 2.299,
      "step": 47618
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0246168375015259,
      "learning_rate": 6.390688879593445e-06,
      "loss": 2.3944,
      "step": 47619
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1115622520446777,
      "learning_rate": 6.390304900046991e-06,
      "loss": 2.38,
      "step": 47620
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0753912925720215,
      "learning_rate": 6.389920926619949e-06,
      "loss": 2.2649,
      "step": 47621
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1419715881347656,
      "learning_rate": 6.389536959312972e-06,
      "loss": 2.4108,
      "step": 47622
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.2366811037063599,
      "learning_rate": 6.389152998126711e-06,
      "loss": 2.2584,
      "step": 47623
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0516250133514404,
      "learning_rate": 6.388769043061816e-06,
      "loss": 2.1859,
      "step": 47624
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0352282524108887,
      "learning_rate": 6.388385094118937e-06,
      "loss": 2.2925,
      "step": 47625
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.2505004405975342,
      "learning_rate": 6.388001151298728e-06,
      "loss": 2.2051,
      "step": 47626
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.069109320640564,
      "learning_rate": 6.387617214601841e-06,
      "loss": 2.3637,
      "step": 47627
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.2102710008621216,
      "learning_rate": 6.38723328402892e-06,
      "loss": 2.2948,
      "step": 47628
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1034870147705078,
      "learning_rate": 6.38684935958062e-06,
      "loss": 2.3448,
      "step": 47629
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1178690195083618,
      "learning_rate": 6.386465441257592e-06,
      "loss": 2.0611,
      "step": 47630
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.9971075654029846,
      "learning_rate": 6.386081529060486e-06,
      "loss": 2.3161,
      "step": 47631
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1201871633529663,
      "learning_rate": 6.385697622989953e-06,
      "loss": 2.0588,
      "step": 47632
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.105709195137024,
      "learning_rate": 6.385313723046645e-06,
      "loss": 2.2899,
      "step": 47633
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1164065599441528,
      "learning_rate": 6.384929829231215e-06,
      "loss": 2.0418,
      "step": 47634
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.4615590572357178,
      "learning_rate": 6.384545941544307e-06,
      "loss": 2.3876,
      "step": 47635
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0152840614318848,
      "learning_rate": 6.384162059986575e-06,
      "loss": 2.3844,
      "step": 47636
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.9852803349494934,
      "learning_rate": 6.383778184558671e-06,
      "loss": 2.196,
      "step": 47637
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1146697998046875,
      "learning_rate": 6.3833943152612465e-06,
      "loss": 2.3125,
      "step": 47638
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.018723487854004,
      "learning_rate": 6.383010452094949e-06,
      "loss": 2.5747,
      "step": 47639
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.2339060306549072,
      "learning_rate": 6.382626595060437e-06,
      "loss": 2.4655,
      "step": 47640
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0233829021453857,
      "learning_rate": 6.38224274415835e-06,
      "loss": 2.4711,
      "step": 47641
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.2732911109924316,
      "learning_rate": 6.381858899389344e-06,
      "loss": 2.3212,
      "step": 47642
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.9806735515594482,
      "learning_rate": 6.381475060754071e-06,
      "loss": 2.193,
      "step": 47643
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1408658027648926,
      "learning_rate": 6.38109122825318e-06,
      "loss": 2.2512,
      "step": 47644
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0792690515518188,
      "learning_rate": 6.380707401887322e-06,
      "loss": 2.2698,
      "step": 47645
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1717469692230225,
      "learning_rate": 6.380323581657148e-06,
      "loss": 2.3356,
      "step": 47646
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0655981302261353,
      "learning_rate": 6.379939767563308e-06,
      "loss": 2.4322,
      "step": 47647
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1389334201812744,
      "learning_rate": 6.3795559596064535e-06,
      "loss": 2.306,
      "step": 47648
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0787880420684814,
      "learning_rate": 6.379172157787237e-06,
      "loss": 2.4778,
      "step": 47649
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0958207845687866,
      "learning_rate": 6.378788362106305e-06,
      "loss": 2.5738,
      "step": 47650
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.224547028541565,
      "learning_rate": 6.378404572564311e-06,
      "loss": 2.2112,
      "step": 47651
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.111067771911621,
      "learning_rate": 6.378020789161905e-06,
      "loss": 2.3456,
      "step": 47652
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0919309854507446,
      "learning_rate": 6.37763701189974e-06,
      "loss": 2.3315,
      "step": 47653
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0693949460983276,
      "learning_rate": 6.377253240778462e-06,
      "loss": 2.3367,
      "step": 47654
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1225833892822266,
      "learning_rate": 6.376869475798723e-06,
      "loss": 2.2286,
      "step": 47655
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.075656533241272,
      "learning_rate": 6.376485716961174e-06,
      "loss": 2.254,
      "step": 47656
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0612186193466187,
      "learning_rate": 6.376101964266465e-06,
      "loss": 2.269,
      "step": 47657
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1312357187271118,
      "learning_rate": 6.37571821771525e-06,
      "loss": 2.374,
      "step": 47658
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.2713266611099243,
      "learning_rate": 6.3753344773081745e-06,
      "loss": 2.4137,
      "step": 47659
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0700627565383911,
      "learning_rate": 6.374950743045892e-06,
      "loss": 2.3084,
      "step": 47660
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.22170889377594,
      "learning_rate": 6.374567014929054e-06,
      "loss": 2.1836,
      "step": 47661
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.2210547924041748,
      "learning_rate": 6.374183292958309e-06,
      "loss": 2.2385,
      "step": 47662
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0168654918670654,
      "learning_rate": 6.373799577134308e-06,
      "loss": 2.1088,
      "step": 47663
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1026383638381958,
      "learning_rate": 6.373415867457702e-06,
      "loss": 2.3103,
      "step": 47664
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1247152090072632,
      "learning_rate": 6.373032163929141e-06,
      "loss": 2.3536,
      "step": 47665
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0470080375671387,
      "learning_rate": 6.372648466549276e-06,
      "loss": 2.3319,
      "step": 47666
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.146276831626892,
      "learning_rate": 6.372264775318755e-06,
      "loss": 2.3793,
      "step": 47667
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0162931680679321,
      "learning_rate": 6.371881090238234e-06,
      "loss": 2.2171,
      "step": 47668
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.985549807548523,
      "learning_rate": 6.371497411308356e-06,
      "loss": 2.313,
      "step": 47669
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1202303171157837,
      "learning_rate": 6.371113738529777e-06,
      "loss": 2.3741,
      "step": 47670
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.123478651046753,
      "learning_rate": 6.3707300719031465e-06,
      "loss": 2.43,
      "step": 47671
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1986769437789917,
      "learning_rate": 6.3703464114291125e-06,
      "loss": 2.2299,
      "step": 47672
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.053709864616394,
      "learning_rate": 6.369962757108328e-06,
      "loss": 2.3958,
      "step": 47673
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1686676740646362,
      "learning_rate": 6.3695791089414415e-06,
      "loss": 2.4685,
      "step": 47674
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0639628171920776,
      "learning_rate": 6.369195466929106e-06,
      "loss": 2.4071,
      "step": 47675
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1441575288772583,
      "learning_rate": 6.368811831071968e-06,
      "loss": 2.29,
      "step": 47676
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.9993460774421692,
      "learning_rate": 6.368428201370682e-06,
      "loss": 2.3164,
      "step": 47677
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0739643573760986,
      "learning_rate": 6.368044577825897e-06,
      "loss": 2.4452,
      "step": 47678
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.9772903323173523,
      "learning_rate": 6.367660960438262e-06,
      "loss": 2.3988,
      "step": 47679
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0558980703353882,
      "learning_rate": 6.367277349208432e-06,
      "loss": 2.3193,
      "step": 47680
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0548995733261108,
      "learning_rate": 6.36689374413705e-06,
      "loss": 2.2866,
      "step": 47681
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0382229089736938,
      "learning_rate": 6.366510145224769e-06,
      "loss": 2.4945,
      "step": 47682
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1436975002288818,
      "learning_rate": 6.36612655247224e-06,
      "loss": 2.3894,
      "step": 47683
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1316255331039429,
      "learning_rate": 6.365742965880115e-06,
      "loss": 2.4703,
      "step": 47684
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0290517807006836,
      "learning_rate": 6.365359385449042e-06,
      "loss": 2.5343,
      "step": 47685
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0064491033554077,
      "learning_rate": 6.364975811179672e-06,
      "loss": 2.3463,
      "step": 47686
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.125594139099121,
      "learning_rate": 6.364592243072656e-06,
      "loss": 2.2347,
      "step": 47687
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1970851421356201,
      "learning_rate": 6.364208681128642e-06,
      "loss": 2.4044,
      "step": 47688
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.9760374426841736,
      "learning_rate": 6.363825125348282e-06,
      "loss": 2.3989,
      "step": 47689
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0341908931732178,
      "learning_rate": 6.363441575732229e-06,
      "loss": 2.4403,
      "step": 47690
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1974806785583496,
      "learning_rate": 6.363058032281127e-06,
      "loss": 2.3314,
      "step": 47691
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0867828130722046,
      "learning_rate": 6.362674494995631e-06,
      "loss": 2.2349,
      "step": 47692
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1647306680679321,
      "learning_rate": 6.36229096387639e-06,
      "loss": 2.2598,
      "step": 47693
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.020991325378418,
      "learning_rate": 6.361907438924053e-06,
      "loss": 2.3953,
      "step": 47694
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0650885105133057,
      "learning_rate": 6.3615239201392695e-06,
      "loss": 2.1735,
      "step": 47695
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1220756769180298,
      "learning_rate": 6.361140407522694e-06,
      "loss": 2.4815,
      "step": 47696
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1756391525268555,
      "learning_rate": 6.360756901074973e-06,
      "loss": 2.3417,
      "step": 47697
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.221838355064392,
      "learning_rate": 6.360373400796756e-06,
      "loss": 2.128,
      "step": 47698
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0065538883209229,
      "learning_rate": 6.359989906688695e-06,
      "loss": 2.2785,
      "step": 47699
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.029708981513977,
      "learning_rate": 6.3596064187514405e-06,
      "loss": 2.3177,
      "step": 47700
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.191164493560791,
      "learning_rate": 6.359222936985641e-06,
      "loss": 1.9889,
      "step": 47701
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0742402076721191,
      "learning_rate": 6.358839461391947e-06,
      "loss": 2.3111,
      "step": 47702
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0951106548309326,
      "learning_rate": 6.358455991971008e-06,
      "loss": 2.0333,
      "step": 47703
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.9625242948532104,
      "learning_rate": 6.3580725287234776e-06,
      "loss": 2.3299,
      "step": 47704
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0573633909225464,
      "learning_rate": 6.357689071650003e-06,
      "loss": 2.3303,
      "step": 47705
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1480257511138916,
      "learning_rate": 6.3573056207512375e-06,
      "loss": 2.3854,
      "step": 47706
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.086391806602478,
      "learning_rate": 6.356922176027825e-06,
      "loss": 2.1753,
      "step": 47707
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0766223669052124,
      "learning_rate": 6.356538737480419e-06,
      "loss": 2.1123,
      "step": 47708
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.029111623764038,
      "learning_rate": 6.356155305109669e-06,
      "loss": 2.1877,
      "step": 47709
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0900284051895142,
      "learning_rate": 6.355771878916226e-06,
      "loss": 2.2016,
      "step": 47710
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.9962533712387085,
      "learning_rate": 6.355388458900739e-06,
      "loss": 2.3398,
      "step": 47711
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.019582748413086,
      "learning_rate": 6.3550050450638575e-06,
      "loss": 2.3488,
      "step": 47712
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.2634798288345337,
      "learning_rate": 6.354621637406234e-06,
      "loss": 2.3896,
      "step": 47713
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0834600925445557,
      "learning_rate": 6.354238235928516e-06,
      "loss": 2.215,
      "step": 47714
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.9563963413238525,
      "learning_rate": 6.3538548406313546e-06,
      "loss": 2.2432,
      "step": 47715
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0700453519821167,
      "learning_rate": 6.353471451515399e-06,
      "loss": 2.3953,
      "step": 47716
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1008539199829102,
      "learning_rate": 6.3530880685813015e-06,
      "loss": 2.2116,
      "step": 47717
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0148073434829712,
      "learning_rate": 6.35270469182971e-06,
      "loss": 2.2007,
      "step": 47718
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0608160495758057,
      "learning_rate": 6.352321321261276e-06,
      "loss": 2.3589,
      "step": 47719
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1712291240692139,
      "learning_rate": 6.351937956876646e-06,
      "loss": 2.4404,
      "step": 47720
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.123409390449524,
      "learning_rate": 6.35155459867647e-06,
      "loss": 2.4739,
      "step": 47721
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1921621561050415,
      "learning_rate": 6.351171246661403e-06,
      "loss": 2.249,
      "step": 47722
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0883815288543701,
      "learning_rate": 6.3507879008320896e-06,
      "loss": 2.3661,
      "step": 47723
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1011921167373657,
      "learning_rate": 6.350404561189185e-06,
      "loss": 2.5791,
      "step": 47724
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.331952691078186,
      "learning_rate": 6.3500212277333325e-06,
      "loss": 2.2438,
      "step": 47725
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.9914601445198059,
      "learning_rate": 6.349637900465186e-06,
      "loss": 2.2703,
      "step": 47726
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0461729764938354,
      "learning_rate": 6.349254579385396e-06,
      "loss": 2.2206,
      "step": 47727
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1340326070785522,
      "learning_rate": 6.348871264494609e-06,
      "loss": 2.2879,
      "step": 47728
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.2946521043777466,
      "learning_rate": 6.3484879557934775e-06,
      "loss": 2.5017,
      "step": 47729
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0357009172439575,
      "learning_rate": 6.348104653282651e-06,
      "loss": 2.3097,
      "step": 47730
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0568984746932983,
      "learning_rate": 6.3477213569627785e-06,
      "loss": 2.2557,
      "step": 47731
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0974633693695068,
      "learning_rate": 6.347338066834514e-06,
      "loss": 2.1669,
      "step": 47732
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0228416919708252,
      "learning_rate": 6.3469547828985e-06,
      "loss": 2.1907,
      "step": 47733
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1109583377838135,
      "learning_rate": 6.34657150515539e-06,
      "loss": 2.3642,
      "step": 47734
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0712125301361084,
      "learning_rate": 6.346188233605835e-06,
      "loss": 2.3684,
      "step": 47735
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0112042427062988,
      "learning_rate": 6.34580496825048e-06,
      "loss": 2.5066,
      "step": 47736
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.2646325826644897,
      "learning_rate": 6.3454217090899804e-06,
      "loss": 2.1972,
      "step": 47737
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1197457313537598,
      "learning_rate": 6.345038456124983e-06,
      "loss": 2.46,
      "step": 47738
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.5424648523330688,
      "learning_rate": 6.344655209356138e-06,
      "loss": 2.1996,
      "step": 47739
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1661571264266968,
      "learning_rate": 6.3442719687840945e-06,
      "loss": 2.1279,
      "step": 47740
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0073590278625488,
      "learning_rate": 6.343888734409504e-06,
      "loss": 2.1962,
      "step": 47741
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.004168152809143,
      "learning_rate": 6.343505506233014e-06,
      "loss": 2.25,
      "step": 47742
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0227024555206299,
      "learning_rate": 6.3431222842552765e-06,
      "loss": 2.5051,
      "step": 47743
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.955055832862854,
      "learning_rate": 6.34273906847694e-06,
      "loss": 2.0602,
      "step": 47744
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1785171031951904,
      "learning_rate": 6.3423558588986564e-06,
      "loss": 2.3907,
      "step": 47745
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1299278736114502,
      "learning_rate": 6.34197265552107e-06,
      "loss": 2.3234,
      "step": 47746
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0681462287902832,
      "learning_rate": 6.341589458344835e-06,
      "loss": 2.3105,
      "step": 47747
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0476531982421875,
      "learning_rate": 6.341206267370597e-06,
      "loss": 2.0184,
      "step": 47748
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1012580394744873,
      "learning_rate": 6.340823082599009e-06,
      "loss": 2.3035,
      "step": 47749
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0477118492126465,
      "learning_rate": 6.3404399040307196e-06,
      "loss": 2.2463,
      "step": 47750
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.9957298636436462,
      "learning_rate": 6.340056731666379e-06,
      "loss": 2.3612,
      "step": 47751
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.2222471237182617,
      "learning_rate": 6.339673565506638e-06,
      "loss": 2.3409,
      "step": 47752
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.05709969997406,
      "learning_rate": 6.339290405552142e-06,
      "loss": 2.3124,
      "step": 47753
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.9395546913146973,
      "learning_rate": 6.3389072518035435e-06,
      "loss": 2.2852,
      "step": 47754
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0700390338897705,
      "learning_rate": 6.338524104261492e-06,
      "loss": 2.4131,
      "step": 47755
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0672205686569214,
      "learning_rate": 6.338140962926636e-06,
      "loss": 2.2193,
      "step": 47756
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1113883256912231,
      "learning_rate": 6.337757827799625e-06,
      "loss": 2.3849,
      "step": 47757
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0377825498580933,
      "learning_rate": 6.337374698881109e-06,
      "loss": 2.2495,
      "step": 47758
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.2023561000823975,
      "learning_rate": 6.336991576171742e-06,
      "loss": 2.339,
      "step": 47759
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.097856044769287,
      "learning_rate": 6.336608459672166e-06,
      "loss": 2.3904,
      "step": 47760
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0289815664291382,
      "learning_rate": 6.336225349383034e-06,
      "loss": 2.4065,
      "step": 47761
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0122116804122925,
      "learning_rate": 6.335842245304994e-06,
      "loss": 2.1584,
      "step": 47762
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.2191722393035889,
      "learning_rate": 6.335459147438696e-06,
      "loss": 2.4218,
      "step": 47763
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.054872989654541,
      "learning_rate": 6.335076055784793e-06,
      "loss": 2.3791,
      "step": 47764
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.9804320335388184,
      "learning_rate": 6.334692970343928e-06,
      "loss": 2.383,
      "step": 47765
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0798152685165405,
      "learning_rate": 6.334309891116754e-06,
      "loss": 2.6977,
      "step": 47766
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.9731078743934631,
      "learning_rate": 6.333926818103923e-06,
      "loss": 2.2259,
      "step": 47767
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0219753980636597,
      "learning_rate": 6.333543751306079e-06,
      "loss": 2.283,
      "step": 47768
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.074633002281189,
      "learning_rate": 6.333160690723876e-06,
      "loss": 2.3723,
      "step": 47769
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1484266519546509,
      "learning_rate": 6.332777636357961e-06,
      "loss": 2.435,
      "step": 47770
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0775833129882812,
      "learning_rate": 6.332394588208984e-06,
      "loss": 2.297,
      "step": 47771
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.9964659214019775,
      "learning_rate": 6.332011546277599e-06,
      "loss": 2.1178,
      "step": 47772
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0937604904174805,
      "learning_rate": 6.331628510564445e-06,
      "loss": 2.4438,
      "step": 47773
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0731512308120728,
      "learning_rate": 6.331245481070178e-06,
      "loss": 2.1929,
      "step": 47774
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0566431283950806,
      "learning_rate": 6.3308624577954456e-06,
      "loss": 2.3003,
      "step": 47775
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0261085033416748,
      "learning_rate": 6.330479440740899e-06,
      "loss": 2.4938,
      "step": 47776
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0246211290359497,
      "learning_rate": 6.330096429907185e-06,
      "loss": 2.2962,
      "step": 47777
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.063499927520752,
      "learning_rate": 6.329713425294955e-06,
      "loss": 2.1402,
      "step": 47778
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0132917165756226,
      "learning_rate": 6.329330426904858e-06,
      "loss": 2.2705,
      "step": 47779
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.023257851600647,
      "learning_rate": 6.328947434737544e-06,
      "loss": 2.3524,
      "step": 47780
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0780653953552246,
      "learning_rate": 6.328564448793659e-06,
      "loss": 2.2375,
      "step": 47781
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1218514442443848,
      "learning_rate": 6.328181469073856e-06,
      "loss": 2.501,
      "step": 47782
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0795011520385742,
      "learning_rate": 6.327798495578781e-06,
      "loss": 2.4201,
      "step": 47783
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1384426355361938,
      "learning_rate": 6.327415528309087e-06,
      "loss": 2.5249,
      "step": 47784
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.203622817993164,
      "learning_rate": 6.327032567265421e-06,
      "loss": 2.3705,
      "step": 47785
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.189512014389038,
      "learning_rate": 6.3266496124484325e-06,
      "loss": 2.5776,
      "step": 47786
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0747989416122437,
      "learning_rate": 6.326266663858769e-06,
      "loss": 2.4263,
      "step": 47787
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0943032503128052,
      "learning_rate": 6.325883721497082e-06,
      "loss": 2.4451,
      "step": 47788
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1653554439544678,
      "learning_rate": 6.32550078536402e-06,
      "loss": 2.2961,
      "step": 47789
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.046524167060852,
      "learning_rate": 6.325117855460231e-06,
      "loss": 2.3821,
      "step": 47790
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0405399799346924,
      "learning_rate": 6.324734931786365e-06,
      "loss": 2.3826,
      "step": 47791
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.003070592880249,
      "learning_rate": 6.3243520143430735e-06,
      "loss": 2.4737,
      "step": 47792
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1547998189926147,
      "learning_rate": 6.323969103131002e-06,
      "loss": 2.4752,
      "step": 47793
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0000097751617432,
      "learning_rate": 6.3235861981508015e-06,
      "loss": 2.1577,
      "step": 47794
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1166741847991943,
      "learning_rate": 6.323203299403121e-06,
      "loss": 2.2533,
      "step": 47795
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0826966762542725,
      "learning_rate": 6.32282040688861e-06,
      "loss": 2.2054,
      "step": 47796
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.099440336227417,
      "learning_rate": 6.322437520607916e-06,
      "loss": 2.3533,
      "step": 47797
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0496207475662231,
      "learning_rate": 6.3220546405616944e-06,
      "loss": 2.4614,
      "step": 47798
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0240094661712646,
      "learning_rate": 6.321671766750586e-06,
      "loss": 2.196,
      "step": 47799
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.201435923576355,
      "learning_rate": 6.3212888991752405e-06,
      "loss": 2.469,
      "step": 47800
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.008391261100769,
      "learning_rate": 6.320906037836311e-06,
      "loss": 2.2092,
      "step": 47801
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.036963939666748,
      "learning_rate": 6.320523182734444e-06,
      "loss": 2.1494,
      "step": 47802
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.108391284942627,
      "learning_rate": 6.32014033387029e-06,
      "loss": 2.4063,
      "step": 47803
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1815327405929565,
      "learning_rate": 6.319757491244498e-06,
      "loss": 2.3072,
      "step": 47804
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.4175782203674316,
      "learning_rate": 6.319374654857716e-06,
      "loss": 2.2253,
      "step": 47805
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.032816767692566,
      "learning_rate": 6.3189918247105945e-06,
      "loss": 2.4401,
      "step": 47806
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.03708016872406,
      "learning_rate": 6.3186090008037805e-06,
      "loss": 2.4351,
      "step": 47807
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1290860176086426,
      "learning_rate": 6.318226183137926e-06,
      "loss": 2.1763,
      "step": 47808
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0701656341552734,
      "learning_rate": 6.317843371713677e-06,
      "loss": 2.3781,
      "step": 47809
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1627548933029175,
      "learning_rate": 6.317460566531684e-06,
      "loss": 2.5493,
      "step": 47810
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0930098295211792,
      "learning_rate": 6.3170777675925965e-06,
      "loss": 2.2816,
      "step": 47811
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.033125638961792,
      "learning_rate": 6.316694974897061e-06,
      "loss": 2.2597,
      "step": 47812
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.060549020767212,
      "learning_rate": 6.316312188445727e-06,
      "loss": 2.2641,
      "step": 47813
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.9886159300804138,
      "learning_rate": 6.315929408239248e-06,
      "loss": 2.0676,
      "step": 47814
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1495463848114014,
      "learning_rate": 6.315546634278264e-06,
      "loss": 2.392,
      "step": 47815
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0140588283538818,
      "learning_rate": 6.315163866563432e-06,
      "loss": 2.3237,
      "step": 47816
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0943890810012817,
      "learning_rate": 6.3147811050953976e-06,
      "loss": 2.421,
      "step": 47817
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0367175340652466,
      "learning_rate": 6.31439834987481e-06,
      "loss": 2.314,
      "step": 47818
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0019352436065674,
      "learning_rate": 6.314015600902318e-06,
      "loss": 2.2166,
      "step": 47819
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.2527315616607666,
      "learning_rate": 6.313632858178571e-06,
      "loss": 2.3148,
      "step": 47820
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0834137201309204,
      "learning_rate": 6.313250121704216e-06,
      "loss": 2.242,
      "step": 47821
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0576066970825195,
      "learning_rate": 6.3128673914799044e-06,
      "loss": 2.6202,
      "step": 47822
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.086219310760498,
      "learning_rate": 6.312484667506285e-06,
      "loss": 2.4679,
      "step": 47823
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1991690397262573,
      "learning_rate": 6.312101949784004e-06,
      "loss": 2.4392,
      "step": 47824
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.041487455368042,
      "learning_rate": 6.311719238313717e-06,
      "loss": 2.4028,
      "step": 47825
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0808546543121338,
      "learning_rate": 6.311336533096063e-06,
      "loss": 2.6034,
      "step": 47826
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0992485284805298,
      "learning_rate": 6.310953834131695e-06,
      "loss": 2.2699,
      "step": 47827
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.2717149257659912,
      "learning_rate": 6.310571141421263e-06,
      "loss": 2.2722,
      "step": 47828
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.022016167640686,
      "learning_rate": 6.310188454965414e-06,
      "loss": 2.1024,
      "step": 47829
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.03030264377594,
      "learning_rate": 6.309805774764799e-06,
      "loss": 2.1468,
      "step": 47830
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0951299667358398,
      "learning_rate": 6.309423100820065e-06,
      "loss": 2.5147,
      "step": 47831
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0945193767547607,
      "learning_rate": 6.309040433131861e-06,
      "loss": 2.3465,
      "step": 47832
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1313916444778442,
      "learning_rate": 6.3086577717008356e-06,
      "loss": 2.4115,
      "step": 47833
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1059519052505493,
      "learning_rate": 6.308275116527638e-06,
      "loss": 2.4237,
      "step": 47834
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1819677352905273,
      "learning_rate": 6.307892467612917e-06,
      "loss": 2.4221,
      "step": 47835
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.2333186864852905,
      "learning_rate": 6.307509824957323e-06,
      "loss": 2.3211,
      "step": 47836
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0318406820297241,
      "learning_rate": 6.307127188561502e-06,
      "loss": 2.2926,
      "step": 47837
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1458805799484253,
      "learning_rate": 6.3067445584261035e-06,
      "loss": 2.4189,
      "step": 47838
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.2139861583709717,
      "learning_rate": 6.306361934551775e-06,
      "loss": 2.3288,
      "step": 47839
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1323542594909668,
      "learning_rate": 6.305979316939165e-06,
      "loss": 2.0294,
      "step": 47840
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0249847173690796,
      "learning_rate": 6.305596705588923e-06,
      "loss": 2.1448,
      "step": 47841
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1303142309188843,
      "learning_rate": 6.305214100501702e-06,
      "loss": 2.4046,
      "step": 47842
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0823849439620972,
      "learning_rate": 6.304831501678144e-06,
      "loss": 2.2293,
      "step": 47843
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0247167348861694,
      "learning_rate": 6.3044489091189e-06,
      "loss": 2.4865,
      "step": 47844
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.100530743598938,
      "learning_rate": 6.304066322824618e-06,
      "loss": 2.4502,
      "step": 47845
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0832961797714233,
      "learning_rate": 6.303683742795948e-06,
      "loss": 2.4939,
      "step": 47846
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1121186017990112,
      "learning_rate": 6.303301169033538e-06,
      "loss": 2.1882,
      "step": 47847
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.160517930984497,
      "learning_rate": 6.302918601538037e-06,
      "loss": 2.2714,
      "step": 47848
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0091280937194824,
      "learning_rate": 6.3025360403100925e-06,
      "loss": 2.4214,
      "step": 47849
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.9482077360153198,
      "learning_rate": 6.302153485350353e-06,
      "loss": 2.1737,
      "step": 47850
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1472065448760986,
      "learning_rate": 6.301770936659473e-06,
      "loss": 2.6029,
      "step": 47851
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.9883297085762024,
      "learning_rate": 6.30138839423809e-06,
      "loss": 2.316,
      "step": 47852
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0704272985458374,
      "learning_rate": 6.301005858086861e-06,
      "loss": 2.4273,
      "step": 47853
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0489434003829956,
      "learning_rate": 6.30062332820643e-06,
      "loss": 2.4408,
      "step": 47854
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.200380802154541,
      "learning_rate": 6.300240804597446e-06,
      "loss": 2.3399,
      "step": 47855
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1319053173065186,
      "learning_rate": 6.2998582872605605e-06,
      "loss": 2.4873,
      "step": 47856
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.031348466873169,
      "learning_rate": 6.29947577619642e-06,
      "loss": 2.2539,
      "step": 47857
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0528345108032227,
      "learning_rate": 6.299093271405672e-06,
      "loss": 2.6127,
      "step": 47858
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1325246095657349,
      "learning_rate": 6.298710772888967e-06,
      "loss": 2.411,
      "step": 47859
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0122474431991577,
      "learning_rate": 6.298328280646952e-06,
      "loss": 2.447,
      "step": 47860
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1798722743988037,
      "learning_rate": 6.297945794680276e-06,
      "loss": 2.3836,
      "step": 47861
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0366897583007812,
      "learning_rate": 6.297563314989588e-06,
      "loss": 2.2476,
      "step": 47862
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1287263631820679,
      "learning_rate": 6.297180841575534e-06,
      "loss": 2.4126,
      "step": 47863
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.9665277004241943,
      "learning_rate": 6.2967983744387705e-06,
      "loss": 2.3479,
      "step": 47864
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1177198886871338,
      "learning_rate": 6.296415913579934e-06,
      "loss": 2.1259,
      "step": 47865
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0316035747528076,
      "learning_rate": 6.29603345899968e-06,
      "loss": 2.3535,
      "step": 47866
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0445541143417358,
      "learning_rate": 6.295651010698654e-06,
      "loss": 2.3279,
      "step": 47867
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.9798622727394104,
      "learning_rate": 6.295268568677505e-06,
      "loss": 2.2759,
      "step": 47868
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0588501691818237,
      "learning_rate": 6.2948861329368826e-06,
      "loss": 2.4324,
      "step": 47869
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0503504276275635,
      "learning_rate": 6.294503703477437e-06,
      "loss": 2.3449,
      "step": 47870
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0384920835494995,
      "learning_rate": 6.294121280299811e-06,
      "loss": 2.3041,
      "step": 47871
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.9816679358482361,
      "learning_rate": 6.293738863404657e-06,
      "loss": 2.4072,
      "step": 47872
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1305011510849,
      "learning_rate": 6.29335645279262e-06,
      "loss": 2.2023,
      "step": 47873
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1952694654464722,
      "learning_rate": 6.292974048464353e-06,
      "loss": 2.1053,
      "step": 47874
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.267743706703186,
      "learning_rate": 6.292591650420501e-06,
      "loss": 2.3389,
      "step": 47875
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0787512063980103,
      "learning_rate": 6.292209258661713e-06,
      "loss": 2.3649,
      "step": 47876
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1210469007492065,
      "learning_rate": 6.291826873188642e-06,
      "loss": 2.2845,
      "step": 47877
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.076762080192566,
      "learning_rate": 6.291444494001927e-06,
      "loss": 2.4158,
      "step": 47878
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0968838930130005,
      "learning_rate": 6.291062121102221e-06,
      "loss": 2.3807,
      "step": 47879
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1236027479171753,
      "learning_rate": 6.290679754490172e-06,
      "loss": 2.3272,
      "step": 47880
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0494966506958008,
      "learning_rate": 6.290297394166428e-06,
      "loss": 2.6883,
      "step": 47881
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1549426317214966,
      "learning_rate": 6.2899150401316374e-06,
      "loss": 2.2661,
      "step": 47882
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0165472030639648,
      "learning_rate": 6.289532692386449e-06,
      "loss": 2.622,
      "step": 47883
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1149955987930298,
      "learning_rate": 6.28915035093151e-06,
      "loss": 2.4499,
      "step": 47884
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0726468563079834,
      "learning_rate": 6.288768015767469e-06,
      "loss": 2.4742,
      "step": 47885
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.483691930770874,
      "learning_rate": 6.288385686894975e-06,
      "loss": 2.3728,
      "step": 47886
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1331051588058472,
      "learning_rate": 6.288003364314674e-06,
      "loss": 2.4494,
      "step": 47887
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0565142631530762,
      "learning_rate": 6.287621048027216e-06,
      "loss": 2.4993,
      "step": 47888
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1420613527297974,
      "learning_rate": 6.287238738033249e-06,
      "loss": 2.2784,
      "step": 47889
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.515697956085205,
      "learning_rate": 6.2868564343334236e-06,
      "loss": 2.573,
      "step": 47890
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.056667447090149,
      "learning_rate": 6.286474136928382e-06,
      "loss": 2.2356,
      "step": 47891
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1074076890945435,
      "learning_rate": 6.286091845818776e-06,
      "loss": 2.5085,
      "step": 47892
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0513522624969482,
      "learning_rate": 6.285709561005251e-06,
      "loss": 2.1175,
      "step": 47893
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1033977270126343,
      "learning_rate": 6.285327282488458e-06,
      "loss": 2.5485,
      "step": 47894
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0952903032302856,
      "learning_rate": 6.284945010269046e-06,
      "loss": 2.2009,
      "step": 47895
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.376848578453064,
      "learning_rate": 6.284562744347659e-06,
      "loss": 2.3595,
      "step": 47896
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1056636571884155,
      "learning_rate": 6.2841804847249465e-06,
      "loss": 2.3642,
      "step": 47897
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0735986232757568,
      "learning_rate": 6.283798231401561e-06,
      "loss": 2.2058,
      "step": 47898
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0528143644332886,
      "learning_rate": 6.283415984378145e-06,
      "loss": 2.5264,
      "step": 47899
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.9673346281051636,
      "learning_rate": 6.283033743655347e-06,
      "loss": 2.4753,
      "step": 47900
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0764957666397095,
      "learning_rate": 6.282651509233817e-06,
      "loss": 2.2664,
      "step": 47901
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.2385581731796265,
      "learning_rate": 6.282269281114203e-06,
      "loss": 2.2487,
      "step": 47902
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1187514066696167,
      "learning_rate": 6.2818870592971516e-06,
      "loss": 2.4297,
      "step": 47903
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.09214186668396,
      "learning_rate": 6.281504843783315e-06,
      "loss": 2.3955,
      "step": 47904
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0347727537155151,
      "learning_rate": 6.281122634573334e-06,
      "loss": 2.4892,
      "step": 47905
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.112858533859253,
      "learning_rate": 6.28074043166786e-06,
      "loss": 2.4632,
      "step": 47906
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1528618335723877,
      "learning_rate": 6.280358235067542e-06,
      "loss": 2.5059,
      "step": 47907
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1654983758926392,
      "learning_rate": 6.279976044773026e-06,
      "loss": 1.9792,
      "step": 47908
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.2817420959472656,
      "learning_rate": 6.279593860784962e-06,
      "loss": 2.5102,
      "step": 47909
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1263450384140015,
      "learning_rate": 6.279211683103996e-06,
      "loss": 2.5185,
      "step": 47910
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.7501270771026611,
      "learning_rate": 6.278829511730778e-06,
      "loss": 2.429,
      "step": 47911
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1192227602005005,
      "learning_rate": 6.278447346665952e-06,
      "loss": 2.4927,
      "step": 47912
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.9941861629486084,
      "learning_rate": 6.278065187910171e-06,
      "loss": 2.5624,
      "step": 47913
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.079447627067566,
      "learning_rate": 6.27768303546408e-06,
      "loss": 2.3309,
      "step": 47914
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0761915445327759,
      "learning_rate": 6.277300889328326e-06,
      "loss": 2.3359,
      "step": 47915
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.9428544640541077,
      "learning_rate": 6.276918749503558e-06,
      "loss": 2.3762,
      "step": 47916
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1616852283477783,
      "learning_rate": 6.276536615990429e-06,
      "loss": 2.141,
      "step": 47917
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0803775787353516,
      "learning_rate": 6.276154488789577e-06,
      "loss": 2.4316,
      "step": 47918
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0029398202896118,
      "learning_rate": 6.275772367901656e-06,
      "loss": 2.2983,
      "step": 47919
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0692594051361084,
      "learning_rate": 6.275390253327311e-06,
      "loss": 2.284,
      "step": 47920
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.2402989864349365,
      "learning_rate": 6.27500814506719e-06,
      "loss": 2.362,
      "step": 47921
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0974276065826416,
      "learning_rate": 6.2746260431219445e-06,
      "loss": 2.3708,
      "step": 47922
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0602988004684448,
      "learning_rate": 6.274243947492218e-06,
      "loss": 2.3014,
      "step": 47923
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.050977110862732,
      "learning_rate": 6.27386185817866e-06,
      "loss": 2.2656,
      "step": 47924
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1092720031738281,
      "learning_rate": 6.273479775181919e-06,
      "loss": 2.1478,
      "step": 47925
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0938379764556885,
      "learning_rate": 6.273097698502643e-06,
      "loss": 2.3662,
      "step": 47926
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.2320696115493774,
      "learning_rate": 6.272715628141477e-06,
      "loss": 2.4459,
      "step": 47927
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0357109308242798,
      "learning_rate": 6.27233356409907e-06,
      "loss": 2.0833,
      "step": 47928
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.134589433670044,
      "learning_rate": 6.27195150637607e-06,
      "loss": 2.3664,
      "step": 47929
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0858622789382935,
      "learning_rate": 6.271569454973128e-06,
      "loss": 2.2165,
      "step": 47930
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1109060049057007,
      "learning_rate": 6.2711874098908844e-06,
      "loss": 2.4664,
      "step": 47931
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0264923572540283,
      "learning_rate": 6.270805371129994e-06,
      "loss": 2.0474,
      "step": 47932
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0892976522445679,
      "learning_rate": 6.270423338691099e-06,
      "loss": 2.2125,
      "step": 47933
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0462372303009033,
      "learning_rate": 6.270041312574851e-06,
      "loss": 2.5488,
      "step": 47934
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.03488028049469,
      "learning_rate": 6.269659292781893e-06,
      "loss": 2.4092,
      "step": 47935
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1125954389572144,
      "learning_rate": 6.2692772793128776e-06,
      "loss": 2.2732,
      "step": 47936
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.131923794746399,
      "learning_rate": 6.26889527216845e-06,
      "loss": 2.2133,
      "step": 47937
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1102937459945679,
      "learning_rate": 6.268513271349259e-06,
      "loss": 2.3407,
      "step": 47938
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.045830249786377,
      "learning_rate": 6.268131276855951e-06,
      "loss": 2.2675,
      "step": 47939
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0672669410705566,
      "learning_rate": 6.267749288689173e-06,
      "loss": 2.2874,
      "step": 47940
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0686404705047607,
      "learning_rate": 6.267367306849574e-06,
      "loss": 2.3631,
      "step": 47941
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.023128628730774,
      "learning_rate": 6.266985331337802e-06,
      "loss": 2.3916,
      "step": 47942
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1529203653335571,
      "learning_rate": 6.2666033621545085e-06,
      "loss": 2.2788,
      "step": 47943
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1213171482086182,
      "learning_rate": 6.26622139930033e-06,
      "loss": 2.4527,
      "step": 47944
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0333651304244995,
      "learning_rate": 6.265839442775923e-06,
      "loss": 2.3844,
      "step": 47945
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.051339864730835,
      "learning_rate": 6.26545749258193e-06,
      "loss": 2.5245,
      "step": 47946
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0676853656768799,
      "learning_rate": 6.2650755487190016e-06,
      "loss": 2.2236,
      "step": 47947
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.2228540182113647,
      "learning_rate": 6.264693611187784e-06,
      "loss": 2.4605,
      "step": 47948
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0362205505371094,
      "learning_rate": 6.264311679988927e-06,
      "loss": 2.4472,
      "step": 47949
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1641008853912354,
      "learning_rate": 6.263929755123075e-06,
      "loss": 2.2874,
      "step": 47950
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.133378028869629,
      "learning_rate": 6.263547836590877e-06,
      "loss": 2.4242,
      "step": 47951
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0084015130996704,
      "learning_rate": 6.26316592439298e-06,
      "loss": 2.352,
      "step": 47952
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1393777132034302,
      "learning_rate": 6.2627840185300315e-06,
      "loss": 2.3298,
      "step": 47953
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0984597206115723,
      "learning_rate": 6.262402119002682e-06,
      "loss": 2.3662,
      "step": 47954
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1181881427764893,
      "learning_rate": 6.262020225811574e-06,
      "loss": 2.2613,
      "step": 47955
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1343803405761719,
      "learning_rate": 6.2616383389573576e-06,
      "loss": 2.0274,
      "step": 47956
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0474470853805542,
      "learning_rate": 6.261256458440679e-06,
      "loss": 2.3277,
      "step": 47957
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.9935150146484375,
      "learning_rate": 6.260874584262184e-06,
      "loss": 2.4759,
      "step": 47958
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.9790512323379517,
      "learning_rate": 6.260492716422524e-06,
      "loss": 2.1654,
      "step": 47959
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0547670125961304,
      "learning_rate": 6.260110854922346e-06,
      "loss": 2.2375,
      "step": 47960
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.9621797800064087,
      "learning_rate": 6.2597289997622935e-06,
      "loss": 2.0717,
      "step": 47961
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0480519533157349,
      "learning_rate": 6.259347150943017e-06,
      "loss": 2.2456,
      "step": 47962
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.163454294204712,
      "learning_rate": 6.258965308465164e-06,
      "loss": 2.2497,
      "step": 47963
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0294413566589355,
      "learning_rate": 6.258583472329378e-06,
      "loss": 2.3753,
      "step": 47964
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0264222621917725,
      "learning_rate": 6.25820164253631e-06,
      "loss": 2.2118,
      "step": 47965
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.09151291847229,
      "learning_rate": 6.257819819086608e-06,
      "loss": 2.2257,
      "step": 47966
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.3352149724960327,
      "learning_rate": 6.257438001980916e-06,
      "loss": 2.3716,
      "step": 47967
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0013773441314697,
      "learning_rate": 6.2570561912198845e-06,
      "loss": 2.2741,
      "step": 47968
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.16360604763031,
      "learning_rate": 6.256674386804159e-06,
      "loss": 2.1999,
      "step": 47969
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.2514245510101318,
      "learning_rate": 6.2562925887343915e-06,
      "loss": 2.3072,
      "step": 47970
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0194944143295288,
      "learning_rate": 6.25591079701122e-06,
      "loss": 2.3425,
      "step": 47971
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0802862644195557,
      "learning_rate": 6.255529011635296e-06,
      "loss": 2.586,
      "step": 47972
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.045775294303894,
      "learning_rate": 6.255147232607268e-06,
      "loss": 2.5832,
      "step": 47973
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0185121297836304,
      "learning_rate": 6.254765459927783e-06,
      "loss": 2.1828,
      "step": 47974
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.046502709388733,
      "learning_rate": 6.2543836935974875e-06,
      "loss": 2.3281,
      "step": 47975
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1393862962722778,
      "learning_rate": 6.254001933617029e-06,
      "loss": 2.4534,
      "step": 47976
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1835930347442627,
      "learning_rate": 6.2536201799870545e-06,
      "loss": 2.2362,
      "step": 47977
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0281800031661987,
      "learning_rate": 6.25323843270821e-06,
      "loss": 2.1721,
      "step": 47978
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.023085355758667,
      "learning_rate": 6.252856691781146e-06,
      "loss": 2.3037,
      "step": 47979
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0855493545532227,
      "learning_rate": 6.252474957206507e-06,
      "loss": 2.496,
      "step": 47980
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1206597089767456,
      "learning_rate": 6.25209322898494e-06,
      "loss": 2.3655,
      "step": 47981
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1946778297424316,
      "learning_rate": 6.251711507117096e-06,
      "loss": 2.4321,
      "step": 47982
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.9950321912765503,
      "learning_rate": 6.251329791603618e-06,
      "loss": 2.1824,
      "step": 47983
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0089266300201416,
      "learning_rate": 6.250948082445152e-06,
      "loss": 2.3401,
      "step": 47984
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0976433753967285,
      "learning_rate": 6.250566379642349e-06,
      "loss": 2.6423,
      "step": 47985
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0084503889083862,
      "learning_rate": 6.250184683195852e-06,
      "loss": 2.5236,
      "step": 47986
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0277620553970337,
      "learning_rate": 6.249802993106312e-06,
      "loss": 2.1509,
      "step": 47987
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1383600234985352,
      "learning_rate": 6.249421309374376e-06,
      "loss": 2.3237,
      "step": 47988
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0762909650802612,
      "learning_rate": 6.249039632000687e-06,
      "loss": 2.1916,
      "step": 47989
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1726093292236328,
      "learning_rate": 6.2486579609858955e-06,
      "loss": 2.1493,
      "step": 47990
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1222243309020996,
      "learning_rate": 6.2482762963306465e-06,
      "loss": 2.3372,
      "step": 47991
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0613174438476562,
      "learning_rate": 6.24789463803559e-06,
      "loss": 2.5831,
      "step": 47992
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0345088243484497,
      "learning_rate": 6.24751298610137e-06,
      "loss": 2.5475,
      "step": 47993
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1236939430236816,
      "learning_rate": 6.247131340528634e-06,
      "loss": 2.3703,
      "step": 47994
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1575889587402344,
      "learning_rate": 6.24674970131803e-06,
      "loss": 2.3736,
      "step": 47995
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0947067737579346,
      "learning_rate": 6.24636806847021e-06,
      "loss": 2.4862,
      "step": 47996
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1089352369308472,
      "learning_rate": 6.245986441985809e-06,
      "loss": 2.1194,
      "step": 47997
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0950194597244263,
      "learning_rate": 6.245604821865483e-06,
      "loss": 2.1803,
      "step": 47998
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.061855673789978,
      "learning_rate": 6.245223208109875e-06,
      "loss": 2.3749,
      "step": 47999
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0416945219039917,
      "learning_rate": 6.244841600719634e-06,
      "loss": 2.3793,
      "step": 48000
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0874155759811401,
      "learning_rate": 6.244459999695407e-06,
      "loss": 2.2736,
      "step": 48001
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0120207071304321,
      "learning_rate": 6.244078405037839e-06,
      "loss": 2.4034,
      "step": 48002
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1493042707443237,
      "learning_rate": 6.24369681674758e-06,
      "loss": 2.4979,
      "step": 48003
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0736724138259888,
      "learning_rate": 6.2433152348252735e-06,
      "loss": 2.1646,
      "step": 48004
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1069828271865845,
      "learning_rate": 6.242933659271568e-06,
      "loss": 2.5182,
      "step": 48005
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0119752883911133,
      "learning_rate": 6.24255209008711e-06,
      "loss": 2.3244,
      "step": 48006
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.2509440183639526,
      "learning_rate": 6.242170527272549e-06,
      "loss": 2.4498,
      "step": 48007
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0327421426773071,
      "learning_rate": 6.2417889708285275e-06,
      "loss": 2.3725,
      "step": 48008
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.10807466506958,
      "learning_rate": 6.2414074207556985e-06,
      "loss": 2.5775,
      "step": 48009
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1039563417434692,
      "learning_rate": 6.241025877054702e-06,
      "loss": 2.3989,
      "step": 48010
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.042667269706726,
      "learning_rate": 6.240644339726186e-06,
      "loss": 2.3298,
      "step": 48011
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0314416885375977,
      "learning_rate": 6.240262808770801e-06,
      "loss": 2.3595,
      "step": 48012
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1702077388763428,
      "learning_rate": 6.239881284189189e-06,
      "loss": 2.3937,
      "step": 48013
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1684563159942627,
      "learning_rate": 6.239499765982001e-06,
      "loss": 2.1857,
      "step": 48014
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0967772006988525,
      "learning_rate": 6.239118254149882e-06,
      "loss": 2.3181,
      "step": 48015
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.2038124799728394,
      "learning_rate": 6.238736748693481e-06,
      "loss": 2.2792,
      "step": 48016
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1474664211273193,
      "learning_rate": 6.238355249613442e-06,
      "loss": 2.2534,
      "step": 48017
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.035210371017456,
      "learning_rate": 6.237973756910409e-06,
      "loss": 2.1313,
      "step": 48018
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0340484380722046,
      "learning_rate": 6.237592270585036e-06,
      "loss": 2.1102,
      "step": 48019
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0188013315200806,
      "learning_rate": 6.237210790637965e-06,
      "loss": 2.4785,
      "step": 48020
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1006228923797607,
      "learning_rate": 6.236829317069841e-06,
      "loss": 2.1932,
      "step": 48021
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.130133867263794,
      "learning_rate": 6.236447849881321e-06,
      "loss": 2.2395,
      "step": 48022
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0776665210723877,
      "learning_rate": 6.236066389073039e-06,
      "loss": 2.6451,
      "step": 48023
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.028385877609253,
      "learning_rate": 6.235684934645645e-06,
      "loss": 2.4002,
      "step": 48024
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1397994756698608,
      "learning_rate": 6.235303486599789e-06,
      "loss": 2.5444,
      "step": 48025
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.081100344657898,
      "learning_rate": 6.2349220449361155e-06,
      "loss": 2.2792,
      "step": 48026
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1444590091705322,
      "learning_rate": 6.234540609655273e-06,
      "loss": 2.3418,
      "step": 48027
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1325981616973877,
      "learning_rate": 6.234159180757905e-06,
      "loss": 2.6793,
      "step": 48028
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0495127439498901,
      "learning_rate": 6.233777758244662e-06,
      "loss": 2.1895,
      "step": 48029
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1188491582870483,
      "learning_rate": 6.233396342116187e-06,
      "loss": 2.4191,
      "step": 48030
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0331450700759888,
      "learning_rate": 6.233014932373128e-06,
      "loss": 2.5315,
      "step": 48031
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0931349992752075,
      "learning_rate": 6.232633529016132e-06,
      "loss": 2.2069,
      "step": 48032
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.003304362297058,
      "learning_rate": 6.2322521320458465e-06,
      "loss": 2.2158,
      "step": 48033
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0832024812698364,
      "learning_rate": 6.231870741462915e-06,
      "loss": 2.1862,
      "step": 48034
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1264681816101074,
      "learning_rate": 6.2314893572679925e-06,
      "loss": 2.5456,
      "step": 48035
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.032143235206604,
      "learning_rate": 6.231107979461713e-06,
      "loss": 2.4265,
      "step": 48036
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0163047313690186,
      "learning_rate": 6.23072660804473e-06,
      "loss": 2.2164,
      "step": 48037
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0455671548843384,
      "learning_rate": 6.230345243017689e-06,
      "loss": 2.2097,
      "step": 48038
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.9541608095169067,
      "learning_rate": 6.229963884381236e-06,
      "loss": 2.3862,
      "step": 48039
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0739668607711792,
      "learning_rate": 6.22958253213602e-06,
      "loss": 2.3877,
      "step": 48040
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1021063327789307,
      "learning_rate": 6.229201186282684e-06,
      "loss": 2.3199,
      "step": 48041
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.8180465698242188,
      "learning_rate": 6.228819846821877e-06,
      "loss": 2.2877,
      "step": 48042
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0861363410949707,
      "learning_rate": 6.228438513754244e-06,
      "loss": 2.1986,
      "step": 48043
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0589179992675781,
      "learning_rate": 6.228057187080434e-06,
      "loss": 2.1903,
      "step": 48044
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.9692003130912781,
      "learning_rate": 6.22767586680109e-06,
      "loss": 2.458,
      "step": 48045
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.005098581314087,
      "learning_rate": 6.227294552916859e-06,
      "loss": 2.3354,
      "step": 48046
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1579537391662598,
      "learning_rate": 6.22691324542839e-06,
      "loss": 2.3108,
      "step": 48047
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0547773838043213,
      "learning_rate": 6.226531944336326e-06,
      "loss": 2.3863,
      "step": 48048
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0525743961334229,
      "learning_rate": 6.226150649641318e-06,
      "loss": 2.126,
      "step": 48049
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.016237735748291,
      "learning_rate": 6.22576936134401e-06,
      "loss": 2.1637,
      "step": 48050
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1622589826583862,
      "learning_rate": 6.225388079445045e-06,
      "loss": 2.3275,
      "step": 48051
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0976389646530151,
      "learning_rate": 6.225006803945073e-06,
      "loss": 2.3332,
      "step": 48052
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0340653657913208,
      "learning_rate": 6.22462553484474e-06,
      "loss": 2.1116,
      "step": 48053
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.9802649021148682,
      "learning_rate": 6.224244272144693e-06,
      "loss": 2.2819,
      "step": 48054
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0867148637771606,
      "learning_rate": 6.223863015845575e-06,
      "loss": 2.3067,
      "step": 48055
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1515614986419678,
      "learning_rate": 6.223481765948036e-06,
      "loss": 2.4927,
      "step": 48056
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1530805826187134,
      "learning_rate": 6.223100522452722e-06,
      "loss": 2.3787,
      "step": 48057
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.2282735109329224,
      "learning_rate": 6.222719285360277e-06,
      "loss": 2.4052,
      "step": 48058
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.276680588722229,
      "learning_rate": 6.22233805467135e-06,
      "loss": 2.2211,
      "step": 48059
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.12413489818573,
      "learning_rate": 6.221956830386585e-06,
      "loss": 2.151,
      "step": 48060
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0176305770874023,
      "learning_rate": 6.22157561250663e-06,
      "loss": 2.2391,
      "step": 48061
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0374504327774048,
      "learning_rate": 6.221194401032133e-06,
      "loss": 2.2253,
      "step": 48062
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.9975455403327942,
      "learning_rate": 6.220813195963735e-06,
      "loss": 2.3055,
      "step": 48063
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0599569082260132,
      "learning_rate": 6.2204319973020855e-06,
      "loss": 2.3282,
      "step": 48064
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.178325891494751,
      "learning_rate": 6.22005080504783e-06,
      "loss": 2.4761,
      "step": 48065
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.3497012853622437,
      "learning_rate": 6.219669619201614e-06,
      "loss": 2.0934,
      "step": 48066
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0016242265701294,
      "learning_rate": 6.219288439764086e-06,
      "loss": 2.2714,
      "step": 48067
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0172675848007202,
      "learning_rate": 6.218907266735889e-06,
      "loss": 2.2754,
      "step": 48068
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0400612354278564,
      "learning_rate": 6.218526100117674e-06,
      "loss": 2.3399,
      "step": 48069
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.101743221282959,
      "learning_rate": 6.218144939910082e-06,
      "loss": 2.4705,
      "step": 48070
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0600496530532837,
      "learning_rate": 6.217763786113763e-06,
      "loss": 2.2582,
      "step": 48071
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.252077341079712,
      "learning_rate": 6.217382638729363e-06,
      "loss": 2.2815,
      "step": 48072
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0332602262496948,
      "learning_rate": 6.217001497757524e-06,
      "loss": 2.5054,
      "step": 48073
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1610803604125977,
      "learning_rate": 6.216620363198895e-06,
      "loss": 2.4841,
      "step": 48074
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1444495916366577,
      "learning_rate": 6.216239235054125e-06,
      "loss": 2.2749,
      "step": 48075
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0994921922683716,
      "learning_rate": 6.215858113323856e-06,
      "loss": 2.2657,
      "step": 48076
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.2382032871246338,
      "learning_rate": 6.2154769980087336e-06,
      "loss": 2.2592,
      "step": 48077
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0959770679473877,
      "learning_rate": 6.2150958891094085e-06,
      "loss": 2.4273,
      "step": 48078
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0148614645004272,
      "learning_rate": 6.2147147866265215e-06,
      "loss": 2.4481,
      "step": 48079
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1815826892852783,
      "learning_rate": 6.21433369056072e-06,
      "loss": 2.083,
      "step": 48080
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.9997124671936035,
      "learning_rate": 6.213952600912652e-06,
      "loss": 2.1642,
      "step": 48081
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.032009482383728,
      "learning_rate": 6.213571517682963e-06,
      "loss": 2.1641,
      "step": 48082
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0990962982177734,
      "learning_rate": 6.2131904408722975e-06,
      "loss": 2.3031,
      "step": 48083
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1362663507461548,
      "learning_rate": 6.212809370481303e-06,
      "loss": 2.5159,
      "step": 48084
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1429972648620605,
      "learning_rate": 6.212428306510625e-06,
      "loss": 2.2578,
      "step": 48085
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0026593208312988,
      "learning_rate": 6.212047248960911e-06,
      "loss": 2.5041,
      "step": 48086
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0175540447235107,
      "learning_rate": 6.211666197832806e-06,
      "loss": 2.2662,
      "step": 48087
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0969626903533936,
      "learning_rate": 6.211285153126958e-06,
      "loss": 2.3565,
      "step": 48088
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0610418319702148,
      "learning_rate": 6.210904114844008e-06,
      "loss": 2.1932,
      "step": 48089
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.109427571296692,
      "learning_rate": 6.210523082984603e-06,
      "loss": 2.174,
      "step": 48090
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0365670919418335,
      "learning_rate": 6.210142057549391e-06,
      "loss": 2.3306,
      "step": 48091
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1082940101623535,
      "learning_rate": 6.2097610385390185e-06,
      "loss": 2.3842,
      "step": 48092
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0885486602783203,
      "learning_rate": 6.20938002595413e-06,
      "loss": 2.2224,
      "step": 48093
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0323065519332886,
      "learning_rate": 6.208999019795372e-06,
      "loss": 2.3582,
      "step": 48094
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.131327509880066,
      "learning_rate": 6.20861802006339e-06,
      "loss": 2.1901,
      "step": 48095
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1208465099334717,
      "learning_rate": 6.20823702675883e-06,
      "loss": 2.4075,
      "step": 48096
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.9953389167785645,
      "learning_rate": 6.207856039882339e-06,
      "loss": 2.3962,
      "step": 48097
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.060203194618225,
      "learning_rate": 6.2074750594345615e-06,
      "loss": 2.3398,
      "step": 48098
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.130220651626587,
      "learning_rate": 6.207094085416143e-06,
      "loss": 2.1046,
      "step": 48099
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.071210265159607,
      "learning_rate": 6.206713117827734e-06,
      "loss": 2.2633,
      "step": 48100
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0897502899169922,
      "learning_rate": 6.206332156669974e-06,
      "loss": 2.2064,
      "step": 48101
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.095943808555603,
      "learning_rate": 6.2059512019435116e-06,
      "loss": 2.412,
      "step": 48102
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.2781617641448975,
      "learning_rate": 6.2055702536489915e-06,
      "loss": 2.3573,
      "step": 48103
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1534152030944824,
      "learning_rate": 6.20518931178706e-06,
      "loss": 2.3142,
      "step": 48104
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0952290296554565,
      "learning_rate": 6.204808376358364e-06,
      "loss": 2.2288,
      "step": 48105
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1387605667114258,
      "learning_rate": 6.20442744736355e-06,
      "loss": 2.1753,
      "step": 48106
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0219223499298096,
      "learning_rate": 6.20404652480326e-06,
      "loss": 2.1283,
      "step": 48107
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0805491209030151,
      "learning_rate": 6.203665608678142e-06,
      "loss": 2.3612,
      "step": 48108
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.035160779953003,
      "learning_rate": 6.203284698988843e-06,
      "loss": 2.3279,
      "step": 48109
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.029272198677063,
      "learning_rate": 6.202903795736007e-06,
      "loss": 2.4313,
      "step": 48110
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.119594931602478,
      "learning_rate": 6.20252289892028e-06,
      "loss": 2.2589,
      "step": 48111
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1750322580337524,
      "learning_rate": 6.202142008542309e-06,
      "loss": 2.2056,
      "step": 48112
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0612174272537231,
      "learning_rate": 6.201761124602739e-06,
      "loss": 2.2875,
      "step": 48113
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1932133436203003,
      "learning_rate": 6.201380247102218e-06,
      "loss": 2.3075,
      "step": 48114
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1087894439697266,
      "learning_rate": 6.200999376041386e-06,
      "loss": 2.4755,
      "step": 48115
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.088965654373169,
      "learning_rate": 6.200618511420893e-06,
      "loss": 2.3079,
      "step": 48116
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0368421077728271,
      "learning_rate": 6.200237653241381e-06,
      "loss": 2.4978,
      "step": 48117
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.137589693069458,
      "learning_rate": 6.1998568015035e-06,
      "loss": 2.4244,
      "step": 48118
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1184515953063965,
      "learning_rate": 6.199475956207893e-06,
      "loss": 2.4486,
      "step": 48119
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0411100387573242,
      "learning_rate": 6.199095117355207e-06,
      "loss": 2.3352,
      "step": 48120
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0394766330718994,
      "learning_rate": 6.198714284946087e-06,
      "loss": 2.3067,
      "step": 48121
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.9459125995635986,
      "learning_rate": 6.198333458981178e-06,
      "loss": 2.4606,
      "step": 48122
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0791606903076172,
      "learning_rate": 6.197952639461127e-06,
      "loss": 2.5061,
      "step": 48123
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0169261693954468,
      "learning_rate": 6.197571826386579e-06,
      "loss": 2.2384,
      "step": 48124
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1329870223999023,
      "learning_rate": 6.1971910197581795e-06,
      "loss": 2.4669,
      "step": 48125
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.9986158609390259,
      "learning_rate": 6.196810219576573e-06,
      "loss": 2.3832,
      "step": 48126
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.125072956085205,
      "learning_rate": 6.19642942584241e-06,
      "loss": 2.5114,
      "step": 48127
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1433005332946777,
      "learning_rate": 6.1960486385563305e-06,
      "loss": 2.2333,
      "step": 48128
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.9953651428222656,
      "learning_rate": 6.19566785771898e-06,
      "loss": 2.2693,
      "step": 48129
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0241862535476685,
      "learning_rate": 6.195287083331005e-06,
      "loss": 2.2227,
      "step": 48130
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1309645175933838,
      "learning_rate": 6.1949063153930545e-06,
      "loss": 2.1499,
      "step": 48131
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1162320375442505,
      "learning_rate": 6.194525553905768e-06,
      "loss": 2.4649,
      "step": 48132
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0115617513656616,
      "learning_rate": 6.194144798869797e-06,
      "loss": 2.5791,
      "step": 48133
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1930663585662842,
      "learning_rate": 6.193764050285784e-06,
      "loss": 2.2639,
      "step": 48134
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1906636953353882,
      "learning_rate": 6.193383308154373e-06,
      "loss": 2.2921,
      "step": 48135
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0448615550994873,
      "learning_rate": 6.193002572476212e-06,
      "loss": 2.4039,
      "step": 48136
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.9356288909912109,
      "learning_rate": 6.192621843251946e-06,
      "loss": 2.2844,
      "step": 48137
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0771095752716064,
      "learning_rate": 6.19224112048222e-06,
      "loss": 2.4253,
      "step": 48138
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1315017938613892,
      "learning_rate": 6.191860404167678e-06,
      "loss": 2.3099,
      "step": 48139
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.142175316810608,
      "learning_rate": 6.191479694308968e-06,
      "loss": 2.2438,
      "step": 48140
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0939984321594238,
      "learning_rate": 6.1910989909067384e-06,
      "loss": 2.1774,
      "step": 48141
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.2346051931381226,
      "learning_rate": 6.190718293961626e-06,
      "loss": 2.3677,
      "step": 48142
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0376535654067993,
      "learning_rate": 6.190337603474282e-06,
      "loss": 2.4193,
      "step": 48143
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0299464464187622,
      "learning_rate": 6.189956919445348e-06,
      "loss": 2.2929,
      "step": 48144
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0675089359283447,
      "learning_rate": 6.1895762418754735e-06,
      "loss": 2.2861,
      "step": 48145
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0230258703231812,
      "learning_rate": 6.189195570765302e-06,
      "loss": 2.4109,
      "step": 48146
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.3953521251678467,
      "learning_rate": 6.188814906115479e-06,
      "loss": 2.4214,
      "step": 48147
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0174717903137207,
      "learning_rate": 6.188434247926649e-06,
      "loss": 2.3178,
      "step": 48148
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.035393238067627,
      "learning_rate": 6.188053596199459e-06,
      "loss": 2.4167,
      "step": 48149
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0273935794830322,
      "learning_rate": 6.187672950934553e-06,
      "loss": 2.2649,
      "step": 48150
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0146464109420776,
      "learning_rate": 6.187292312132578e-06,
      "loss": 2.486,
      "step": 48151
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0816502571105957,
      "learning_rate": 6.186911679794176e-06,
      "loss": 2.4875,
      "step": 48152
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.032385230064392,
      "learning_rate": 6.186531053919995e-06,
      "loss": 2.3092,
      "step": 48153
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1194548606872559,
      "learning_rate": 6.186150434510685e-06,
      "loss": 2.2954,
      "step": 48154
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0269339084625244,
      "learning_rate": 6.185769821566881e-06,
      "loss": 2.2873,
      "step": 48155
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0833104848861694,
      "learning_rate": 6.185389215089233e-06,
      "loss": 2.1543,
      "step": 48156
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.2965439558029175,
      "learning_rate": 6.185008615078385e-06,
      "loss": 2.423,
      "step": 48157
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1859546899795532,
      "learning_rate": 6.184628021534985e-06,
      "loss": 2.6371,
      "step": 48158
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.5786371231079102,
      "learning_rate": 6.184247434459677e-06,
      "loss": 2.4353,
      "step": 48159
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.123168706893921,
      "learning_rate": 6.1838668538531055e-06,
      "loss": 2.4428,
      "step": 48160
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.2656947374343872,
      "learning_rate": 6.183486279715916e-06,
      "loss": 2.4098,
      "step": 48161
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0138559341430664,
      "learning_rate": 6.183105712048756e-06,
      "loss": 2.3724,
      "step": 48162
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0277212858200073,
      "learning_rate": 6.182725150852267e-06,
      "loss": 2.1447,
      "step": 48163
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.168096899986267,
      "learning_rate": 6.182344596127094e-06,
      "loss": 2.3503,
      "step": 48164
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.166002631187439,
      "learning_rate": 6.1819640478738856e-06,
      "loss": 2.2038,
      "step": 48165
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.055907964706421,
      "learning_rate": 6.181583506093282e-06,
      "loss": 2.3622,
      "step": 48166
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.122460961341858,
      "learning_rate": 6.181202970785938e-06,
      "loss": 2.2088,
      "step": 48167
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1544103622436523,
      "learning_rate": 6.18082244195249e-06,
      "loss": 2.1534,
      "step": 48168
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1548399925231934,
      "learning_rate": 6.180441919593582e-06,
      "loss": 2.468,
      "step": 48169
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.114027500152588,
      "learning_rate": 6.180061403709863e-06,
      "loss": 2.6142,
      "step": 48170
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1056557893753052,
      "learning_rate": 6.179680894301978e-06,
      "loss": 2.3781,
      "step": 48171
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1285063028335571,
      "learning_rate": 6.179300391370571e-06,
      "loss": 2.3318,
      "step": 48172
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.2205467224121094,
      "learning_rate": 6.178919894916289e-06,
      "loss": 2.4972,
      "step": 48173
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.2602646350860596,
      "learning_rate": 6.178539404939774e-06,
      "loss": 2.4656,
      "step": 48174
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1106994152069092,
      "learning_rate": 6.1781589214416726e-06,
      "loss": 2.1093,
      "step": 48175
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.33347487449646,
      "learning_rate": 6.177778444422631e-06,
      "loss": 2.275,
      "step": 48176
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1523315906524658,
      "learning_rate": 6.1773979738832916e-06,
      "loss": 2.3338,
      "step": 48177
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1401660442352295,
      "learning_rate": 6.177017509824301e-06,
      "loss": 2.1167,
      "step": 48178
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0112721920013428,
      "learning_rate": 6.176637052246304e-06,
      "loss": 2.4162,
      "step": 48179
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0404189825057983,
      "learning_rate": 6.17625660114995e-06,
      "loss": 2.4462,
      "step": 48180
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.019748330116272,
      "learning_rate": 6.175876156535876e-06,
      "loss": 2.5191,
      "step": 48181
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0822482109069824,
      "learning_rate": 6.1754957184047305e-06,
      "loss": 2.2797,
      "step": 48182
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1817750930786133,
      "learning_rate": 6.175115286757159e-06,
      "loss": 2.377,
      "step": 48183
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1151134967803955,
      "learning_rate": 6.1747348615938045e-06,
      "loss": 2.4218,
      "step": 48184
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0591610670089722,
      "learning_rate": 6.174354442915313e-06,
      "loss": 2.3938,
      "step": 48185
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0643380880355835,
      "learning_rate": 6.173974030722332e-06,
      "loss": 2.4351,
      "step": 48186
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1030250787734985,
      "learning_rate": 6.173593625015501e-06,
      "loss": 2.3219,
      "step": 48187
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0539387464523315,
      "learning_rate": 6.173213225795471e-06,
      "loss": 2.1739,
      "step": 48188
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0329854488372803,
      "learning_rate": 6.1728328330628824e-06,
      "loss": 2.3995,
      "step": 48189
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0754377841949463,
      "learning_rate": 6.172452446818384e-06,
      "loss": 2.2969,
      "step": 48190
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1200520992279053,
      "learning_rate": 6.172072067062616e-06,
      "loss": 2.4577,
      "step": 48191
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0525717735290527,
      "learning_rate": 6.171691693796227e-06,
      "loss": 2.2823,
      "step": 48192
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.170343041419983,
      "learning_rate": 6.171311327019858e-06,
      "loss": 2.5302,
      "step": 48193
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.2715364694595337,
      "learning_rate": 6.17093096673416e-06,
      "loss": 2.2786,
      "step": 48194
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0042763948440552,
      "learning_rate": 6.170550612939771e-06,
      "loss": 2.4372,
      "step": 48195
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.9921407699584961,
      "learning_rate": 6.1701702656373405e-06,
      "loss": 2.2784,
      "step": 48196
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1096653938293457,
      "learning_rate": 6.169789924827511e-06,
      "loss": 2.4205,
      "step": 48197
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.089521050453186,
      "learning_rate": 6.169409590510926e-06,
      "loss": 2.4524,
      "step": 48198
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.120750069618225,
      "learning_rate": 6.169029262688233e-06,
      "loss": 2.1004,
      "step": 48199
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0501011610031128,
      "learning_rate": 6.1686489413600755e-06,
      "loss": 2.2262,
      "step": 48200
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1669819355010986,
      "learning_rate": 6.1682686265270995e-06,
      "loss": 2.2574,
      "step": 48201
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1543080806732178,
      "learning_rate": 6.167888318189949e-06,
      "loss": 2.4042,
      "step": 48202
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1227277517318726,
      "learning_rate": 6.167508016349267e-06,
      "loss": 2.3651,
      "step": 48203
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0389964580535889,
      "learning_rate": 6.167127721005701e-06,
      "loss": 2.4607,
      "step": 48204
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1490004062652588,
      "learning_rate": 6.166747432159894e-06,
      "loss": 2.3975,
      "step": 48205
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.011234998703003,
      "learning_rate": 6.166367149812492e-06,
      "loss": 2.4278,
      "step": 48206
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0637245178222656,
      "learning_rate": 6.165986873964143e-06,
      "loss": 2.4993,
      "step": 48207
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1225671768188477,
      "learning_rate": 6.165606604615482e-06,
      "loss": 2.4109,
      "step": 48208
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.028502106666565,
      "learning_rate": 6.165226341767161e-06,
      "loss": 2.3565,
      "step": 48209
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.99439936876297,
      "learning_rate": 6.164846085419821e-06,
      "loss": 2.4323,
      "step": 48210
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.08200204372406,
      "learning_rate": 6.1644658355741095e-06,
      "loss": 2.3441,
      "step": 48211
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1556189060211182,
      "learning_rate": 6.164085592230671e-06,
      "loss": 2.5975,
      "step": 48212
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0991227626800537,
      "learning_rate": 6.1637053553901485e-06,
      "loss": 2.4815,
      "step": 48213
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0544214248657227,
      "learning_rate": 6.1633251250531855e-06,
      "loss": 2.579,
      "step": 48214
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1386691331863403,
      "learning_rate": 6.162944901220431e-06,
      "loss": 2.2527,
      "step": 48215
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.9849739074707031,
      "learning_rate": 6.162564683892527e-06,
      "loss": 2.2211,
      "step": 48216
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1607991456985474,
      "learning_rate": 6.1621844730701165e-06,
      "loss": 2.3017,
      "step": 48217
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1609786748886108,
      "learning_rate": 6.161804268753848e-06,
      "loss": 2.3636,
      "step": 48218
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1012828350067139,
      "learning_rate": 6.1614240709443615e-06,
      "loss": 2.3268,
      "step": 48219
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.053566336631775,
      "learning_rate": 6.161043879642308e-06,
      "loss": 2.3122,
      "step": 48220
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.2453210353851318,
      "learning_rate": 6.160663694848324e-06,
      "loss": 2.1765,
      "step": 48221
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.008027195930481,
      "learning_rate": 6.160283516563057e-06,
      "loss": 2.17,
      "step": 48222
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.259743094444275,
      "learning_rate": 6.159903344787154e-06,
      "loss": 2.4553,
      "step": 48223
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0829555988311768,
      "learning_rate": 6.1595231795212585e-06,
      "loss": 2.2879,
      "step": 48224
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.2263151407241821,
      "learning_rate": 6.159143020766013e-06,
      "loss": 2.5967,
      "step": 48225
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.9765722751617432,
      "learning_rate": 6.1587628685220635e-06,
      "loss": 2.2247,
      "step": 48226
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0152376890182495,
      "learning_rate": 6.158382722790053e-06,
      "loss": 2.4349,
      "step": 48227
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1054903268814087,
      "learning_rate": 6.158002583570628e-06,
      "loss": 2.3555,
      "step": 48228
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.975179135799408,
      "learning_rate": 6.157622450864431e-06,
      "loss": 2.5271,
      "step": 48229
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0869944095611572,
      "learning_rate": 6.1572423246721095e-06,
      "loss": 2.607,
      "step": 48230
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0196011066436768,
      "learning_rate": 6.156862204994305e-06,
      "loss": 2.2336,
      "step": 48231
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0447903871536255,
      "learning_rate": 6.1564820918316625e-06,
      "loss": 2.2528,
      "step": 48232
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.2343169450759888,
      "learning_rate": 6.15610198518483e-06,
      "loss": 2.4088,
      "step": 48233
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.3179022073745728,
      "learning_rate": 6.155721885054446e-06,
      "loss": 2.3411,
      "step": 48234
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0559114217758179,
      "learning_rate": 6.155341791441158e-06,
      "loss": 2.296,
      "step": 48235
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0196694135665894,
      "learning_rate": 6.154961704345608e-06,
      "loss": 2.3082,
      "step": 48236
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1243172883987427,
      "learning_rate": 6.154581623768443e-06,
      "loss": 2.6196,
      "step": 48237
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.4132846593856812,
      "learning_rate": 6.154201549710308e-06,
      "loss": 2.2262,
      "step": 48238
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0256024599075317,
      "learning_rate": 6.153821482171844e-06,
      "loss": 2.3873,
      "step": 48239
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1029107570648193,
      "learning_rate": 6.153441421153699e-06,
      "loss": 2.5224,
      "step": 48240
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0592856407165527,
      "learning_rate": 6.1530613666565135e-06,
      "loss": 2.4219,
      "step": 48241
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.282027006149292,
      "learning_rate": 6.152681318680935e-06,
      "loss": 2.1404,
      "step": 48242
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1054545640945435,
      "learning_rate": 6.152301277227606e-06,
      "loss": 2.3407,
      "step": 48243
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.2042889595031738,
      "learning_rate": 6.151921242297171e-06,
      "loss": 2.3467,
      "step": 48244
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0727441310882568,
      "learning_rate": 6.151541213890277e-06,
      "loss": 2.3898,
      "step": 48245
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.06718909740448,
      "learning_rate": 6.151161192007566e-06,
      "loss": 2.3399,
      "step": 48246
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.98772132396698,
      "learning_rate": 6.15078117664968e-06,
      "loss": 2.1829,
      "step": 48247
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1394853591918945,
      "learning_rate": 6.150401167817266e-06,
      "loss": 2.4799,
      "step": 48248
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.9784661531448364,
      "learning_rate": 6.150021165510967e-06,
      "loss": 2.5547,
      "step": 48249
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0828051567077637,
      "learning_rate": 6.1496411697314275e-06,
      "loss": 2.146,
      "step": 48250
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.994458019733429,
      "learning_rate": 6.149261180479293e-06,
      "loss": 2.1982,
      "step": 48251
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0589566230773926,
      "learning_rate": 6.148881197755208e-06,
      "loss": 2.319,
      "step": 48252
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1977449655532837,
      "learning_rate": 6.148501221559813e-06,
      "loss": 2.363,
      "step": 48253
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.9774909615516663,
      "learning_rate": 6.148121251893755e-06,
      "loss": 2.1527,
      "step": 48254
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0702245235443115,
      "learning_rate": 6.147741288757676e-06,
      "loss": 2.3725,
      "step": 48255
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.202905297279358,
      "learning_rate": 6.147361332152224e-06,
      "loss": 2.2701,
      "step": 48256
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0272619724273682,
      "learning_rate": 6.146981382078039e-06,
      "loss": 2.4881,
      "step": 48257
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0724186897277832,
      "learning_rate": 6.146601438535769e-06,
      "loss": 2.3003,
      "step": 48258
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.122010588645935,
      "learning_rate": 6.14622150152606e-06,
      "loss": 2.2308,
      "step": 48259
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1777883768081665,
      "learning_rate": 6.145841571049547e-06,
      "loss": 2.4411,
      "step": 48260
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0558664798736572,
      "learning_rate": 6.145461647106879e-06,
      "loss": 2.1688,
      "step": 48261
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0080550909042358,
      "learning_rate": 6.1450817296987005e-06,
      "loss": 2.2798,
      "step": 48262
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1235600709915161,
      "learning_rate": 6.144701818825657e-06,
      "loss": 2.3427,
      "step": 48263
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0979079008102417,
      "learning_rate": 6.144321914488389e-06,
      "loss": 2.4587,
      "step": 48264
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1212892532348633,
      "learning_rate": 6.143942016687544e-06,
      "loss": 2.4935,
      "step": 48265
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.9046604037284851,
      "learning_rate": 6.143562125423764e-06,
      "loss": 2.3141,
      "step": 48266
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.9739065766334534,
      "learning_rate": 6.143182240697694e-06,
      "loss": 2.3782,
      "step": 48267
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0782339572906494,
      "learning_rate": 6.142802362509977e-06,
      "loss": 2.2333,
      "step": 48268
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0764732360839844,
      "learning_rate": 6.142422490861259e-06,
      "loss": 2.3513,
      "step": 48269
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1428537368774414,
      "learning_rate": 6.14204262575218e-06,
      "loss": 2.5252,
      "step": 48270
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.00909423828125,
      "learning_rate": 6.1416627671833896e-06,
      "loss": 2.2505,
      "step": 48271
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1541742086410522,
      "learning_rate": 6.141282915155526e-06,
      "loss": 2.3301,
      "step": 48272
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.9896101355552673,
      "learning_rate": 6.1409030696692415e-06,
      "loss": 2.4829,
      "step": 48273
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.2432191371917725,
      "learning_rate": 6.140523230725171e-06,
      "loss": 2.3103,
      "step": 48274
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1670511960983276,
      "learning_rate": 6.1401433983239604e-06,
      "loss": 2.2368,
      "step": 48275
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.9793313145637512,
      "learning_rate": 6.139763572466255e-06,
      "loss": 2.2021,
      "step": 48276
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.2617579698562622,
      "learning_rate": 6.139383753152699e-06,
      "loss": 2.3306,
      "step": 48277
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.188844084739685,
      "learning_rate": 6.139003940383937e-06,
      "loss": 2.3303,
      "step": 48278
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0622960329055786,
      "learning_rate": 6.1386241341606115e-06,
      "loss": 2.3358,
      "step": 48279
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.119581699371338,
      "learning_rate": 6.138244334483368e-06,
      "loss": 2.3581,
      "step": 48280
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.047206163406372,
      "learning_rate": 6.137864541352848e-06,
      "loss": 2.2673,
      "step": 48281
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0620094537734985,
      "learning_rate": 6.137484754769696e-06,
      "loss": 2.277,
      "step": 48282
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1038135290145874,
      "learning_rate": 6.137104974734558e-06,
      "loss": 2.373,
      "step": 48283
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0808212757110596,
      "learning_rate": 6.136725201248075e-06,
      "loss": 2.3455,
      "step": 48284
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0885870456695557,
      "learning_rate": 6.136345434310891e-06,
      "loss": 2.4589,
      "step": 48285
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.3138349056243896,
      "learning_rate": 6.135965673923656e-06,
      "loss": 2.3443,
      "step": 48286
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1116275787353516,
      "learning_rate": 6.135585920087004e-06,
      "loss": 2.0776,
      "step": 48287
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.3199814558029175,
      "learning_rate": 6.135206172801585e-06,
      "loss": 2.2337,
      "step": 48288
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0631893873214722,
      "learning_rate": 6.13482643206804e-06,
      "loss": 2.241,
      "step": 48289
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.058318853378296,
      "learning_rate": 6.134446697887013e-06,
      "loss": 2.1373,
      "step": 48290
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.3042019605636597,
      "learning_rate": 6.13406697025915e-06,
      "loss": 2.4003,
      "step": 48291
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1250648498535156,
      "learning_rate": 6.133687249185094e-06,
      "loss": 2.5571,
      "step": 48292
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.9782724976539612,
      "learning_rate": 6.1333075346654865e-06,
      "loss": 2.3813,
      "step": 48293
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0354045629501343,
      "learning_rate": 6.132927826700974e-06,
      "loss": 2.0684,
      "step": 48294
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0231455564498901,
      "learning_rate": 6.132548125292199e-06,
      "loss": 2.3492,
      "step": 48295
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.057169795036316,
      "learning_rate": 6.132168430439805e-06,
      "loss": 2.3477,
      "step": 48296
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.9599222540855408,
      "learning_rate": 6.131788742144436e-06,
      "loss": 2.4016,
      "step": 48297
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.2928217649459839,
      "learning_rate": 6.1314090604067364e-06,
      "loss": 2.4464,
      "step": 48298
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.2430378198623657,
      "learning_rate": 6.131029385227354e-06,
      "loss": 2.4927,
      "step": 48299
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0284818410873413,
      "learning_rate": 6.130649716606921e-06,
      "loss": 2.3291,
      "step": 48300
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.2033427953720093,
      "learning_rate": 6.130270054546089e-06,
      "loss": 2.521,
      "step": 48301
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1427148580551147,
      "learning_rate": 6.129890399045501e-06,
      "loss": 2.2105,
      "step": 48302
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.020416259765625,
      "learning_rate": 6.129510750105798e-06,
      "loss": 2.3601,
      "step": 48303
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.9907087683677673,
      "learning_rate": 6.129131107727627e-06,
      "loss": 2.4918,
      "step": 48304
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0476853847503662,
      "learning_rate": 6.128751471911629e-06,
      "loss": 2.1362,
      "step": 48305
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1792877912521362,
      "learning_rate": 6.128371842658449e-06,
      "loss": 2.5924,
      "step": 48306
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1433465480804443,
      "learning_rate": 6.1279922199687315e-06,
      "loss": 2.3759,
      "step": 48307
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1327321529388428,
      "learning_rate": 6.12761260384312e-06,
      "loss": 2.3583,
      "step": 48308
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.9936681389808655,
      "learning_rate": 6.127232994282255e-06,
      "loss": 2.292,
      "step": 48309
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0942176580429077,
      "learning_rate": 6.126853391286781e-06,
      "loss": 2.6195,
      "step": 48310
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0367448329925537,
      "learning_rate": 6.126473794857344e-06,
      "loss": 2.2175,
      "step": 48311
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1409165859222412,
      "learning_rate": 6.126094204994586e-06,
      "loss": 2.3615,
      "step": 48312
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.5323911905288696,
      "learning_rate": 6.12571462169915e-06,
      "loss": 2.3182,
      "step": 48313
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.059171199798584,
      "learning_rate": 6.125335044971681e-06,
      "loss": 2.4047,
      "step": 48314
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0511322021484375,
      "learning_rate": 6.1249554748128194e-06,
      "loss": 2.1237,
      "step": 48315
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0127862691879272,
      "learning_rate": 6.124575911223212e-06,
      "loss": 2.3779,
      "step": 48316
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.2071349620819092,
      "learning_rate": 6.1241963542034995e-06,
      "loss": 2.4615,
      "step": 48317
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.2732973098754883,
      "learning_rate": 6.1238168037543275e-06,
      "loss": 2.3729,
      "step": 48318
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0258725881576538,
      "learning_rate": 6.123437259876339e-06,
      "loss": 2.3594,
      "step": 48319
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1209899187088013,
      "learning_rate": 6.123057722570177e-06,
      "loss": 2.2948,
      "step": 48320
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1097015142440796,
      "learning_rate": 6.122678191836485e-06,
      "loss": 2.2547,
      "step": 48321
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.266804814338684,
      "learning_rate": 6.122298667675908e-06,
      "loss": 2.375,
      "step": 48322
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0388622283935547,
      "learning_rate": 6.121919150089086e-06,
      "loss": 2.2349,
      "step": 48323
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.093956470489502,
      "learning_rate": 6.121539639076664e-06,
      "loss": 2.648,
      "step": 48324
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.092458724975586,
      "learning_rate": 6.121160134639291e-06,
      "loss": 2.4317,
      "step": 48325
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.008432388305664,
      "learning_rate": 6.120780636777601e-06,
      "loss": 2.1167,
      "step": 48326
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1188240051269531,
      "learning_rate": 6.120401145492241e-06,
      "loss": 2.1871,
      "step": 48327
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0370190143585205,
      "learning_rate": 6.1200216607838545e-06,
      "loss": 2.4336,
      "step": 48328
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0991755723953247,
      "learning_rate": 6.119642182653085e-06,
      "loss": 2.3497,
      "step": 48329
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0609016418457031,
      "learning_rate": 6.119262711100577e-06,
      "loss": 2.3053,
      "step": 48330
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.997317910194397,
      "learning_rate": 6.118883246126972e-06,
      "loss": 2.3118,
      "step": 48331
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0962475538253784,
      "learning_rate": 6.118503787732913e-06,
      "loss": 2.3501,
      "step": 48332
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.9838976263999939,
      "learning_rate": 6.118124335919046e-06,
      "loss": 2.1656,
      "step": 48333
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.9978000521659851,
      "learning_rate": 6.11774489068601e-06,
      "loss": 2.272,
      "step": 48334
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1643906831741333,
      "learning_rate": 6.117365452034454e-06,
      "loss": 2.2314,
      "step": 48335
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0050493478775024,
      "learning_rate": 6.116986019965019e-06,
      "loss": 2.1621,
      "step": 48336
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.137190580368042,
      "learning_rate": 6.116606594478345e-06,
      "loss": 2.2911,
      "step": 48337
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1015264987945557,
      "learning_rate": 6.116227175575077e-06,
      "loss": 2.3411,
      "step": 48338
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0572128295898438,
      "learning_rate": 6.115847763255861e-06,
      "loss": 2.6294,
      "step": 48339
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.128415584564209,
      "learning_rate": 6.1154683575213365e-06,
      "loss": 2.4901,
      "step": 48340
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0183141231536865,
      "learning_rate": 6.115088958372148e-06,
      "loss": 2.2792,
      "step": 48341
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0527461767196655,
      "learning_rate": 6.114709565808941e-06,
      "loss": 2.2899,
      "step": 48342
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.30733323097229,
      "learning_rate": 6.1143301798323525e-06,
      "loss": 2.207,
      "step": 48343
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0261002779006958,
      "learning_rate": 6.113950800443033e-06,
      "loss": 2.1235,
      "step": 48344
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.3212709426879883,
      "learning_rate": 6.11357142764162e-06,
      "loss": 2.3099,
      "step": 48345
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.997566819190979,
      "learning_rate": 6.11319206142876e-06,
      "loss": 2.4282,
      "step": 48346
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.247244954109192,
      "learning_rate": 6.112812701805095e-06,
      "loss": 2.2095,
      "step": 48347
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1021220684051514,
      "learning_rate": 6.112433348771267e-06,
      "loss": 2.2038,
      "step": 48348
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1236718893051147,
      "learning_rate": 6.112054002327922e-06,
      "loss": 2.0246,
      "step": 48349
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.057435154914856,
      "learning_rate": 6.1116746624757005e-06,
      "loss": 2.311,
      "step": 48350
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0613555908203125,
      "learning_rate": 6.111295329215248e-06,
      "loss": 2.3374,
      "step": 48351
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0898103713989258,
      "learning_rate": 6.110916002547208e-06,
      "loss": 2.3663,
      "step": 48352
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.217653751373291,
      "learning_rate": 6.110536682472219e-06,
      "loss": 2.3951,
      "step": 48353
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1051440238952637,
      "learning_rate": 6.110157368990925e-06,
      "loss": 2.2675,
      "step": 48354
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.9394173622131348,
      "learning_rate": 6.109778062103972e-06,
      "loss": 2.3311,
      "step": 48355
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.151671051979065,
      "learning_rate": 6.109398761812002e-06,
      "loss": 2.2397,
      "step": 48356
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0152838230133057,
      "learning_rate": 6.1090194681156575e-06,
      "loss": 2.3732,
      "step": 48357
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1658414602279663,
      "learning_rate": 6.108640181015581e-06,
      "loss": 2.4178,
      "step": 48358
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0417639017105103,
      "learning_rate": 6.108260900512417e-06,
      "loss": 2.4761,
      "step": 48359
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0033931732177734,
      "learning_rate": 6.107881626606809e-06,
      "loss": 2.2186,
      "step": 48360
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0022449493408203,
      "learning_rate": 6.107502359299397e-06,
      "loss": 2.0591,
      "step": 48361
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.079067587852478,
      "learning_rate": 6.107123098590826e-06,
      "loss": 2.1829,
      "step": 48362
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1010164022445679,
      "learning_rate": 6.10674384448174e-06,
      "loss": 2.4007,
      "step": 48363
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.146713137626648,
      "learning_rate": 6.1063645969727805e-06,
      "loss": 2.3564,
      "step": 48364
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1388826370239258,
      "learning_rate": 6.105985356064591e-06,
      "loss": 2.3607,
      "step": 48365
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0812822580337524,
      "learning_rate": 6.1056061217578125e-06,
      "loss": 2.404,
      "step": 48366
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.100713849067688,
      "learning_rate": 6.105226894053089e-06,
      "loss": 2.1973,
      "step": 48367
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.173431634902954,
      "learning_rate": 6.104847672951064e-06,
      "loss": 2.3943,
      "step": 48368
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.044967532157898,
      "learning_rate": 6.1044684584523794e-06,
      "loss": 2.2564,
      "step": 48369
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.2789462804794312,
      "learning_rate": 6.104089250557681e-06,
      "loss": 2.3523,
      "step": 48370
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.9836801886558533,
      "learning_rate": 6.103710049267608e-06,
      "loss": 2.1618,
      "step": 48371
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1992958784103394,
      "learning_rate": 6.103330854582805e-06,
      "loss": 2.1854,
      "step": 48372
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1496820449829102,
      "learning_rate": 6.102951666503913e-06,
      "loss": 2.6717,
      "step": 48373
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1245934963226318,
      "learning_rate": 6.102572485031577e-06,
      "loss": 2.5071,
      "step": 48374
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1649287939071655,
      "learning_rate": 6.10219331016644e-06,
      "loss": 2.3258,
      "step": 48375
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0483478307724,
      "learning_rate": 6.1018141419091435e-06,
      "loss": 2.3892,
      "step": 48376
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.09364652633667,
      "learning_rate": 6.1014349802603305e-06,
      "loss": 2.3321,
      "step": 48377
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1445131301879883,
      "learning_rate": 6.101055825220649e-06,
      "loss": 2.3882,
      "step": 48378
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1977177858352661,
      "learning_rate": 6.100676676790731e-06,
      "loss": 2.1833,
      "step": 48379
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0736640691757202,
      "learning_rate": 6.100297534971227e-06,
      "loss": 2.3764,
      "step": 48380
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.9230057597160339,
      "learning_rate": 6.099918399762776e-06,
      "loss": 2.4029,
      "step": 48381
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0064970254898071,
      "learning_rate": 6.099539271166024e-06,
      "loss": 2.1224,
      "step": 48382
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.302158236503601,
      "learning_rate": 6.099160149181611e-06,
      "loss": 2.4324,
      "step": 48383
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0418471097946167,
      "learning_rate": 6.098781033810183e-06,
      "loss": 2.1161,
      "step": 48384
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0596777200698853,
      "learning_rate": 6.09840192505238e-06,
      "loss": 2.261,
      "step": 48385
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0153319835662842,
      "learning_rate": 6.098022822908844e-06,
      "loss": 2.2378,
      "step": 48386
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.2141467332839966,
      "learning_rate": 6.09764372738022e-06,
      "loss": 2.2275,
      "step": 48387
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.2072501182556152,
      "learning_rate": 6.0972646384671496e-06,
      "loss": 2.3585,
      "step": 48388
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.200667142868042,
      "learning_rate": 6.096885556170275e-06,
      "loss": 2.3205,
      "step": 48389
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.084734559059143,
      "learning_rate": 6.0965064804902404e-06,
      "loss": 2.4174,
      "step": 48390
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1016896963119507,
      "learning_rate": 6.096127411427693e-06,
      "loss": 2.4438,
      "step": 48391
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1214553117752075,
      "learning_rate": 6.095748348983263e-06,
      "loss": 2.2381,
      "step": 48392
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1034116744995117,
      "learning_rate": 6.095369293157601e-06,
      "loss": 2.145,
      "step": 48393
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.2548675537109375,
      "learning_rate": 6.094990243951349e-06,
      "loss": 2.5237,
      "step": 48394
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.155749797821045,
      "learning_rate": 6.094611201365148e-06,
      "loss": 2.3742,
      "step": 48395
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1974568367004395,
      "learning_rate": 6.0942321653996426e-06,
      "loss": 2.4874,
      "step": 48396
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1740081310272217,
      "learning_rate": 6.093853136055474e-06,
      "loss": 2.1929,
      "step": 48397
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.199936866760254,
      "learning_rate": 6.093474113333289e-06,
      "loss": 2.7812,
      "step": 48398
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1757197380065918,
      "learning_rate": 6.093095097233722e-06,
      "loss": 2.1069,
      "step": 48399
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0474764108657837,
      "learning_rate": 6.092716087757421e-06,
      "loss": 2.2789,
      "step": 48400
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.071713924407959,
      "learning_rate": 6.092337084905027e-06,
      "loss": 2.5061,
      "step": 48401
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1611535549163818,
      "learning_rate": 6.091958088677183e-06,
      "loss": 2.3676,
      "step": 48402
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.9741812944412231,
      "learning_rate": 6.091579099074531e-06,
      "loss": 2.3505,
      "step": 48403
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1309690475463867,
      "learning_rate": 6.09120011609772e-06,
      "loss": 2.1279,
      "step": 48404
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1284774541854858,
      "learning_rate": 6.09082113974738e-06,
      "loss": 2.3624,
      "step": 48405
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0397393703460693,
      "learning_rate": 6.090442170024162e-06,
      "loss": 2.3995,
      "step": 48406
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.7319351434707642,
      "learning_rate": 6.090063206928705e-06,
      "loss": 2.4808,
      "step": 48407
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0272736549377441,
      "learning_rate": 6.089684250461654e-06,
      "loss": 2.0268,
      "step": 48408
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.9503095746040344,
      "learning_rate": 6.089305300623649e-06,
      "loss": 2.3796,
      "step": 48409
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1637014150619507,
      "learning_rate": 6.088926357415333e-06,
      "loss": 2.4406,
      "step": 48410
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.045831561088562,
      "learning_rate": 6.08854742083735e-06,
      "loss": 2.3056,
      "step": 48411
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0500860214233398,
      "learning_rate": 6.088168490890343e-06,
      "loss": 2.3818,
      "step": 48412
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.2615373134613037,
      "learning_rate": 6.087789567574951e-06,
      "loss": 2.3093,
      "step": 48413
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0414936542510986,
      "learning_rate": 6.087410650891819e-06,
      "loss": 2.2828,
      "step": 48414
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0664663314819336,
      "learning_rate": 6.087031740841587e-06,
      "loss": 2.2873,
      "step": 48415
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1045552492141724,
      "learning_rate": 6.0866528374249e-06,
      "loss": 2.2035,
      "step": 48416
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0497982501983643,
      "learning_rate": 6.0862739406423995e-06,
      "loss": 2.4956,
      "step": 48417
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1045657396316528,
      "learning_rate": 6.085895050494731e-06,
      "loss": 2.4887,
      "step": 48418
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.3886817693710327,
      "learning_rate": 6.08551616698253e-06,
      "loss": 2.3666,
      "step": 48419
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0479124784469604,
      "learning_rate": 6.085137290106442e-06,
      "loss": 2.3065,
      "step": 48420
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0384877920150757,
      "learning_rate": 6.084758419867109e-06,
      "loss": 2.3463,
      "step": 48421
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.183125376701355,
      "learning_rate": 6.084379556265176e-06,
      "loss": 2.3892,
      "step": 48422
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.020307183265686,
      "learning_rate": 6.084000699301281e-06,
      "loss": 2.5745,
      "step": 48423
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.241885781288147,
      "learning_rate": 6.083621848976069e-06,
      "loss": 2.1608,
      "step": 48424
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.3137744665145874,
      "learning_rate": 6.083243005290182e-06,
      "loss": 2.1452,
      "step": 48425
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0700538158416748,
      "learning_rate": 6.082864168244263e-06,
      "loss": 2.3272,
      "step": 48426
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1617310047149658,
      "learning_rate": 6.0824853378389525e-06,
      "loss": 2.2751,
      "step": 48427
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1450194120407104,
      "learning_rate": 6.082106514074892e-06,
      "loss": 2.3465,
      "step": 48428
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0949307680130005,
      "learning_rate": 6.081727696952726e-06,
      "loss": 2.3479,
      "step": 48429
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0716607570648193,
      "learning_rate": 6.081348886473095e-06,
      "loss": 2.3456,
      "step": 48430
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1461552381515503,
      "learning_rate": 6.080970082636645e-06,
      "loss": 2.4506,
      "step": 48431
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.3194427490234375,
      "learning_rate": 6.080591285444014e-06,
      "loss": 2.1753,
      "step": 48432
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1412062644958496,
      "learning_rate": 6.080212494895843e-06,
      "loss": 2.1048,
      "step": 48433
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0803879499435425,
      "learning_rate": 6.079833710992777e-06,
      "loss": 1.9735,
      "step": 48434
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.041211724281311,
      "learning_rate": 6.079454933735458e-06,
      "loss": 2.3022,
      "step": 48435
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.418775200843811,
      "learning_rate": 6.079076163124527e-06,
      "loss": 2.1764,
      "step": 48436
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0364351272583008,
      "learning_rate": 6.078697399160628e-06,
      "loss": 2.2369,
      "step": 48437
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0720478296279907,
      "learning_rate": 6.0783186418444004e-06,
      "loss": 2.2889,
      "step": 48438
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1380521059036255,
      "learning_rate": 6.07793989117649e-06,
      "loss": 2.3402,
      "step": 48439
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.9922165870666504,
      "learning_rate": 6.0775611471575336e-06,
      "loss": 2.0061,
      "step": 48440
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.2097339630126953,
      "learning_rate": 6.077182409788179e-06,
      "loss": 2.4988,
      "step": 48441
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0758049488067627,
      "learning_rate": 6.076803679069063e-06,
      "loss": 2.5371,
      "step": 48442
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0636285543441772,
      "learning_rate": 6.076424955000833e-06,
      "loss": 2.2713,
      "step": 48443
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1308197975158691,
      "learning_rate": 6.0760462375841315e-06,
      "loss": 2.4007,
      "step": 48444
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.9366201758384705,
      "learning_rate": 6.075667526819593e-06,
      "loss": 2.3057,
      "step": 48445
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0645798444747925,
      "learning_rate": 6.075288822707863e-06,
      "loss": 2.1852,
      "step": 48446
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.9667478799819946,
      "learning_rate": 6.074910125249587e-06,
      "loss": 2.1265,
      "step": 48447
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1992835998535156,
      "learning_rate": 6.074531434445401e-06,
      "loss": 2.2876,
      "step": 48448
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.204314947128296,
      "learning_rate": 6.074152750295953e-06,
      "loss": 2.1943,
      "step": 48449
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.144530177116394,
      "learning_rate": 6.073774072801881e-06,
      "loss": 2.2313,
      "step": 48450
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1063390970230103,
      "learning_rate": 6.0733954019638285e-06,
      "loss": 2.4622,
      "step": 48451
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.3847744464874268,
      "learning_rate": 6.0730167377824375e-06,
      "loss": 2.2893,
      "step": 48452
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.042679786682129,
      "learning_rate": 6.0726380802583495e-06,
      "loss": 2.3442,
      "step": 48453
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0075328350067139,
      "learning_rate": 6.072259429392209e-06,
      "loss": 2.4989,
      "step": 48454
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.2120287418365479,
      "learning_rate": 6.071880785184654e-06,
      "loss": 2.3303,
      "step": 48455
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0269384384155273,
      "learning_rate": 6.071502147636326e-06,
      "loss": 2.3263,
      "step": 48456
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0378624200820923,
      "learning_rate": 6.071123516747872e-06,
      "loss": 2.1668,
      "step": 48457
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.9880462884902954,
      "learning_rate": 6.070744892519928e-06,
      "loss": 2.2246,
      "step": 48458
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0466513633728027,
      "learning_rate": 6.070366274953141e-06,
      "loss": 2.2438,
      "step": 48459
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1742360591888428,
      "learning_rate": 6.06998766404815e-06,
      "loss": 2.2412,
      "step": 48460
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0699188709259033,
      "learning_rate": 6.069609059805597e-06,
      "loss": 2.1582,
      "step": 48461
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1680908203125,
      "learning_rate": 6.069230462226122e-06,
      "loss": 2.4168,
      "step": 48462
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0276594161987305,
      "learning_rate": 6.068851871310371e-06,
      "loss": 1.9351,
      "step": 48463
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0730265378952026,
      "learning_rate": 6.068473287058982e-06,
      "loss": 2.4622,
      "step": 48464
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0920226573944092,
      "learning_rate": 6.068094709472599e-06,
      "loss": 2.3885,
      "step": 48465
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0167924165725708,
      "learning_rate": 6.067716138551864e-06,
      "loss": 2.3082,
      "step": 48466
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.2024608850479126,
      "learning_rate": 6.067337574297418e-06,
      "loss": 2.1682,
      "step": 48467
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.2019262313842773,
      "learning_rate": 6.0669590167099035e-06,
      "loss": 2.423,
      "step": 48468
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1551297903060913,
      "learning_rate": 6.066580465789961e-06,
      "loss": 2.3303,
      "step": 48469
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.176408290863037,
      "learning_rate": 6.066201921538237e-06,
      "loss": 2.4656,
      "step": 48470
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0585132837295532,
      "learning_rate": 6.0658233839553645e-06,
      "loss": 2.3787,
      "step": 48471
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0687507390975952,
      "learning_rate": 6.06544485304199e-06,
      "loss": 2.2471,
      "step": 48472
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1210213899612427,
      "learning_rate": 6.065066328798757e-06,
      "loss": 2.3774,
      "step": 48473
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1110873222351074,
      "learning_rate": 6.064687811226303e-06,
      "loss": 2.4168,
      "step": 48474
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1336911916732788,
      "learning_rate": 6.0643093003252725e-06,
      "loss": 2.4207,
      "step": 48475
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0305659770965576,
      "learning_rate": 6.063930796096307e-06,
      "loss": 2.0318,
      "step": 48476
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0780792236328125,
      "learning_rate": 6.063552298540047e-06,
      "loss": 2.3628,
      "step": 48477
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.3199384212493896,
      "learning_rate": 6.063173807657137e-06,
      "loss": 2.4376,
      "step": 48478
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0884826183319092,
      "learning_rate": 6.062795323448215e-06,
      "loss": 2.3582,
      "step": 48479
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0551762580871582,
      "learning_rate": 6.062416845913925e-06,
      "loss": 2.4763,
      "step": 48480
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1606807708740234,
      "learning_rate": 6.062038375054907e-06,
      "loss": 2.3201,
      "step": 48481
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.7575528621673584,
      "learning_rate": 6.0616599108718065e-06,
      "loss": 2.3424,
      "step": 48482
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.2114267349243164,
      "learning_rate": 6.0612814533652595e-06,
      "loss": 2.2274,
      "step": 48483
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.9407504200935364,
      "learning_rate": 6.060903002535911e-06,
      "loss": 2.2265,
      "step": 48484
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1757822036743164,
      "learning_rate": 6.0605245583844015e-06,
      "loss": 2.1452,
      "step": 48485
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1254900693893433,
      "learning_rate": 6.060146120911372e-06,
      "loss": 2.3154,
      "step": 48486
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1026057004928589,
      "learning_rate": 6.059767690117466e-06,
      "loss": 2.2791,
      "step": 48487
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.6407833099365234,
      "learning_rate": 6.059389266003325e-06,
      "loss": 2.2388,
      "step": 48488
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0594336986541748,
      "learning_rate": 6.059010848569588e-06,
      "loss": 2.4195,
      "step": 48489
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.009124517440796,
      "learning_rate": 6.058632437816897e-06,
      "loss": 2.3437,
      "step": 48490
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0040305852890015,
      "learning_rate": 6.058254033745895e-06,
      "loss": 2.2784,
      "step": 48491
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.071937084197998,
      "learning_rate": 6.057875636357224e-06,
      "loss": 2.5775,
      "step": 48492
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.093140721321106,
      "learning_rate": 6.057497245651523e-06,
      "loss": 2.1498,
      "step": 48493
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1076140403747559,
      "learning_rate": 6.057118861629435e-06,
      "loss": 2.2442,
      "step": 48494
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1396119594573975,
      "learning_rate": 6.056740484291601e-06,
      "loss": 2.1641,
      "step": 48495
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0778383016586304,
      "learning_rate": 6.056362113638666e-06,
      "loss": 2.2651,
      "step": 48496
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1882902383804321,
      "learning_rate": 6.055983749671268e-06,
      "loss": 2.1905,
      "step": 48497
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0157238245010376,
      "learning_rate": 6.055605392390047e-06,
      "loss": 2.4333,
      "step": 48498
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1367534399032593,
      "learning_rate": 6.055227041795646e-06,
      "loss": 2.4438,
      "step": 48499
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.579002857208252,
      "learning_rate": 6.054848697888707e-06,
      "loss": 2.2628,
      "step": 48500
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1370117664337158,
      "learning_rate": 6.0544703606698695e-06,
      "loss": 2.4787,
      "step": 48501
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.2290163040161133,
      "learning_rate": 6.054092030139778e-06,
      "loss": 2.4905,
      "step": 48502
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1350224018096924,
      "learning_rate": 6.05371370629907e-06,
      "loss": 2.609,
      "step": 48503
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.2043291330337524,
      "learning_rate": 6.053335389148391e-06,
      "loss": 2.3129,
      "step": 48504
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1796650886535645,
      "learning_rate": 6.052957078688379e-06,
      "loss": 2.3023,
      "step": 48505
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1800549030303955,
      "learning_rate": 6.0525787749196785e-06,
      "loss": 2.2928,
      "step": 48506
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1777451038360596,
      "learning_rate": 6.0522004778429276e-06,
      "loss": 2.3174,
      "step": 48507
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.2878347635269165,
      "learning_rate": 6.05182218745877e-06,
      "loss": 2.2681,
      "step": 48508
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.062649130821228,
      "learning_rate": 6.051443903767849e-06,
      "loss": 2.2674,
      "step": 48509
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0495532751083374,
      "learning_rate": 6.0510656267708e-06,
      "loss": 2.2798,
      "step": 48510
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0827851295471191,
      "learning_rate": 6.050687356468268e-06,
      "loss": 2.2322,
      "step": 48511
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0851523876190186,
      "learning_rate": 6.050309092860892e-06,
      "loss": 2.4246,
      "step": 48512
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.3544553518295288,
      "learning_rate": 6.049930835949316e-06,
      "loss": 2.232,
      "step": 48513
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0710647106170654,
      "learning_rate": 6.04955258573418e-06,
      "loss": 2.3513,
      "step": 48514
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1084941625595093,
      "learning_rate": 6.049174342216125e-06,
      "loss": 2.3379,
      "step": 48515
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1560701131820679,
      "learning_rate": 6.048796105395793e-06,
      "loss": 2.3256,
      "step": 48516
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.226908564567566,
      "learning_rate": 6.048417875273825e-06,
      "loss": 1.9861,
      "step": 48517
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.194441318511963,
      "learning_rate": 6.04803965185086e-06,
      "loss": 2.1042,
      "step": 48518
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1512454748153687,
      "learning_rate": 6.047661435127542e-06,
      "loss": 2.3583,
      "step": 48519
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.139500617980957,
      "learning_rate": 6.047283225104511e-06,
      "loss": 2.3115,
      "step": 48520
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.223182201385498,
      "learning_rate": 6.0469050217824085e-06,
      "loss": 2.4338,
      "step": 48521
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.112571120262146,
      "learning_rate": 6.0465268251618755e-06,
      "loss": 2.498,
      "step": 48522
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.2950431108474731,
      "learning_rate": 6.046148635243558e-06,
      "loss": 2.2614,
      "step": 48523
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0449528694152832,
      "learning_rate": 6.045770452028089e-06,
      "loss": 2.3244,
      "step": 48524
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.068158507347107,
      "learning_rate": 6.045392275516112e-06,
      "loss": 2.3529,
      "step": 48525
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0019896030426025,
      "learning_rate": 6.045014105708269e-06,
      "loss": 2.3154,
      "step": 48526
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.3379868268966675,
      "learning_rate": 6.0446359426052015e-06,
      "loss": 2.2919,
      "step": 48527
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.9339873194694519,
      "learning_rate": 6.044257786207552e-06,
      "loss": 2.3032,
      "step": 48528
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0675122737884521,
      "learning_rate": 6.043879636515957e-06,
      "loss": 2.1949,
      "step": 48529
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0893781185150146,
      "learning_rate": 6.043501493531062e-06,
      "loss": 2.3739,
      "step": 48530
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0071302652359009,
      "learning_rate": 6.0431233572535065e-06,
      "loss": 2.1916,
      "step": 48531
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0553009510040283,
      "learning_rate": 6.042745227683931e-06,
      "loss": 2.4736,
      "step": 48532
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.9841613173484802,
      "learning_rate": 6.042367104822978e-06,
      "loss": 2.5107,
      "step": 48533
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1474051475524902,
      "learning_rate": 6.041988988671288e-06,
      "loss": 2.4408,
      "step": 48534
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0591884851455688,
      "learning_rate": 6.0416108792295e-06,
      "loss": 2.2698,
      "step": 48535
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.003637433052063,
      "learning_rate": 6.041232776498262e-06,
      "loss": 2.1621,
      "step": 48536
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0514246225357056,
      "learning_rate": 6.040854680478205e-06,
      "loss": 2.4236,
      "step": 48537
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.9660323858261108,
      "learning_rate": 6.0404765911699744e-06,
      "loss": 2.337,
      "step": 48538
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.16068434715271,
      "learning_rate": 6.040098508574212e-06,
      "loss": 2.1926,
      "step": 48539
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.057913899421692,
      "learning_rate": 6.039720432691558e-06,
      "loss": 2.6002,
      "step": 48540
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0331698656082153,
      "learning_rate": 6.039342363522654e-06,
      "loss": 2.4886,
      "step": 48541
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.2319737672805786,
      "learning_rate": 6.038964301068139e-06,
      "loss": 2.1904,
      "step": 48542
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1254404783248901,
      "learning_rate": 6.038586245328656e-06,
      "loss": 2.2814,
      "step": 48543
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1757885217666626,
      "learning_rate": 6.038208196304847e-06,
      "loss": 2.1109,
      "step": 48544
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.5078821182250977,
      "learning_rate": 6.0378301539973496e-06,
      "loss": 2.2726,
      "step": 48545
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.6043775081634521,
      "learning_rate": 6.037452118406806e-06,
      "loss": 2.5202,
      "step": 48546
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0851233005523682,
      "learning_rate": 6.037074089533857e-06,
      "loss": 2.335,
      "step": 48547
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0542148351669312,
      "learning_rate": 6.036696067379144e-06,
      "loss": 2.1939,
      "step": 48548
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0310715436935425,
      "learning_rate": 6.03631805194331e-06,
      "loss": 2.1879,
      "step": 48549
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.3528944253921509,
      "learning_rate": 6.035940043226994e-06,
      "loss": 2.3772,
      "step": 48550
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1733143329620361,
      "learning_rate": 6.035562041230831e-06,
      "loss": 2.1584,
      "step": 48551
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0199925899505615,
      "learning_rate": 6.03518404595547e-06,
      "loss": 2.4505,
      "step": 48552
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0537134408950806,
      "learning_rate": 6.0348060574015475e-06,
      "loss": 2.2797,
      "step": 48553
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0591360330581665,
      "learning_rate": 6.034428075569707e-06,
      "loss": 2.4147,
      "step": 48554
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1280986070632935,
      "learning_rate": 6.034050100460587e-06,
      "loss": 2.3718,
      "step": 48555
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.09788179397583,
      "learning_rate": 6.033672132074831e-06,
      "loss": 2.3395,
      "step": 48556
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0197842121124268,
      "learning_rate": 6.033294170413075e-06,
      "loss": 2.2424,
      "step": 48557
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.2488422393798828,
      "learning_rate": 6.032916215475963e-06,
      "loss": 2.4109,
      "step": 48558
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0403635501861572,
      "learning_rate": 6.032538267264139e-06,
      "loss": 2.3956,
      "step": 48559
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1783353090286255,
      "learning_rate": 6.0321603257782365e-06,
      "loss": 2.2988,
      "step": 48560
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1017009019851685,
      "learning_rate": 6.031782391018901e-06,
      "loss": 2.3724,
      "step": 48561
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0428061485290527,
      "learning_rate": 6.031404462986773e-06,
      "loss": 2.2983,
      "step": 48562
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0808168649673462,
      "learning_rate": 6.031026541682495e-06,
      "loss": 2.2905,
      "step": 48563
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0614782571792603,
      "learning_rate": 6.030648627106702e-06,
      "loss": 2.519,
      "step": 48564
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1473467350006104,
      "learning_rate": 6.030270719260037e-06,
      "loss": 2.4534,
      "step": 48565
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.237499713897705,
      "learning_rate": 6.0298928181431414e-06,
      "loss": 2.2544,
      "step": 48566
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0594791173934937,
      "learning_rate": 6.029514923756657e-06,
      "loss": 2.4775,
      "step": 48567
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0630735158920288,
      "learning_rate": 6.029137036101221e-06,
      "loss": 2.3921,
      "step": 48568
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.3429144620895386,
      "learning_rate": 6.028759155177477e-06,
      "loss": 2.3463,
      "step": 48569
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.2376748323440552,
      "learning_rate": 6.028381280986065e-06,
      "loss": 2.4606,
      "step": 48570
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0730584859848022,
      "learning_rate": 6.0280034135276254e-06,
      "loss": 2.3196,
      "step": 48571
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0983651876449585,
      "learning_rate": 6.027625552802802e-06,
      "loss": 2.3494,
      "step": 48572
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.029300332069397,
      "learning_rate": 6.027247698812229e-06,
      "loss": 2.4809,
      "step": 48573
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.118515968322754,
      "learning_rate": 6.02686985155655e-06,
      "loss": 2.2449,
      "step": 48574
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0689646005630493,
      "learning_rate": 6.026492011036404e-06,
      "loss": 2.5562,
      "step": 48575
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.9891618490219116,
      "learning_rate": 6.026114177252438e-06,
      "loss": 2.3572,
      "step": 48576
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.071813941001892,
      "learning_rate": 6.025736350205285e-06,
      "loss": 2.3596,
      "step": 48577
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1004998683929443,
      "learning_rate": 6.0253585298955906e-06,
      "loss": 2.4798,
      "step": 48578
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0207802057266235,
      "learning_rate": 6.024980716323991e-06,
      "loss": 2.1702,
      "step": 48579
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0933594703674316,
      "learning_rate": 6.024602909491128e-06,
      "loss": 2.1142,
      "step": 48580
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.046876311302185,
      "learning_rate": 6.024225109397642e-06,
      "loss": 2.3695,
      "step": 48581
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0061804056167603,
      "learning_rate": 6.023847316044176e-06,
      "loss": 2.397,
      "step": 48582
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.9693365097045898,
      "learning_rate": 6.0234695294313675e-06,
      "loss": 2.1691,
      "step": 48583
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.2049323320388794,
      "learning_rate": 6.023091749559859e-06,
      "loss": 2.4286,
      "step": 48584
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1517796516418457,
      "learning_rate": 6.022713976430289e-06,
      "loss": 2.356,
      "step": 48585
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.3008698225021362,
      "learning_rate": 6.0223362100433e-06,
      "loss": 2.3755,
      "step": 48586
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1055601835250854,
      "learning_rate": 6.021958450399531e-06,
      "loss": 2.5694,
      "step": 48587
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1905368566513062,
      "learning_rate": 6.021580697499623e-06,
      "loss": 2.1693,
      "step": 48588
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0734645128250122,
      "learning_rate": 6.021202951344221e-06,
      "loss": 2.1883,
      "step": 48589
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1201633214950562,
      "learning_rate": 6.020825211933955e-06,
      "loss": 2.205,
      "step": 48590
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1110374927520752,
      "learning_rate": 6.020447479269471e-06,
      "loss": 2.6403,
      "step": 48591
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.9743661880493164,
      "learning_rate": 6.020069753351411e-06,
      "loss": 2.35,
      "step": 48592
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1592522859573364,
      "learning_rate": 6.019692034180414e-06,
      "loss": 2.3104,
      "step": 48593
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1574636697769165,
      "learning_rate": 6.019314321757117e-06,
      "loss": 2.2258,
      "step": 48594
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1388561725616455,
      "learning_rate": 6.018936616082166e-06,
      "loss": 2.4725,
      "step": 48595
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0534858703613281,
      "learning_rate": 6.018558917156199e-06,
      "loss": 2.3454,
      "step": 48596
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0901097059249878,
      "learning_rate": 6.0181812249798535e-06,
      "loss": 2.4296,
      "step": 48597
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.09883713722229,
      "learning_rate": 6.0178035395537745e-06,
      "loss": 2.4659,
      "step": 48598
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.9915594458580017,
      "learning_rate": 6.017425860878599e-06,
      "loss": 2.2178,
      "step": 48599
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0834637880325317,
      "learning_rate": 6.01704818895497e-06,
      "loss": 2.3213,
      "step": 48600
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0606828927993774,
      "learning_rate": 6.016670523783525e-06,
      "loss": 2.3878,
      "step": 48601
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0172141790390015,
      "learning_rate": 6.016292865364908e-06,
      "loss": 2.3065,
      "step": 48602
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0374197959899902,
      "learning_rate": 6.015915213699752e-06,
      "loss": 2.3653,
      "step": 48603
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0277200937271118,
      "learning_rate": 6.015537568788704e-06,
      "loss": 2.4633,
      "step": 48604
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.155375361442566,
      "learning_rate": 6.015159930632401e-06,
      "loss": 2.2441,
      "step": 48605
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1336455345153809,
      "learning_rate": 6.014782299231487e-06,
      "loss": 2.2654,
      "step": 48606
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1727794408798218,
      "learning_rate": 6.014404674586597e-06,
      "loss": 2.3637,
      "step": 48607
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.149963140487671,
      "learning_rate": 6.014027056698373e-06,
      "loss": 2.1823,
      "step": 48608
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1553922891616821,
      "learning_rate": 6.013649445567455e-06,
      "loss": 2.4701,
      "step": 48609
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1274608373641968,
      "learning_rate": 6.013271841194486e-06,
      "loss": 2.1273,
      "step": 48610
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.570734977722168,
      "learning_rate": 6.012894243580103e-06,
      "loss": 2.2574,
      "step": 48611
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1531224250793457,
      "learning_rate": 6.012516652724947e-06,
      "loss": 2.347,
      "step": 48612
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1002932786941528,
      "learning_rate": 6.012139068629659e-06,
      "loss": 2.3324,
      "step": 48613
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1242363452911377,
      "learning_rate": 6.0117614912948775e-06,
      "loss": 2.2284,
      "step": 48614
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.142439365386963,
      "learning_rate": 6.0113839207212476e-06,
      "loss": 2.4372,
      "step": 48615
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.055754542350769,
      "learning_rate": 6.011006356909401e-06,
      "loss": 2.1376,
      "step": 48616
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.9812156558036804,
      "learning_rate": 6.010628799859983e-06,
      "loss": 2.3511,
      "step": 48617
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1392067670822144,
      "learning_rate": 6.010251249573632e-06,
      "loss": 2.2552,
      "step": 48618
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1396355628967285,
      "learning_rate": 6.009873706050988e-06,
      "loss": 2.4981,
      "step": 48619
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1061193943023682,
      "learning_rate": 6.009496169292694e-06,
      "loss": 2.3686,
      "step": 48620
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.146477460861206,
      "learning_rate": 6.0091186392993865e-06,
      "loss": 2.0933,
      "step": 48621
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1498889923095703,
      "learning_rate": 6.008741116071706e-06,
      "loss": 2.4201,
      "step": 48622
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0029261112213135,
      "learning_rate": 6.008363599610294e-06,
      "loss": 2.2467,
      "step": 48623
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0629653930664062,
      "learning_rate": 6.00798608991579e-06,
      "loss": 2.3875,
      "step": 48624
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.073243498802185,
      "learning_rate": 6.007608586988833e-06,
      "loss": 2.3614,
      "step": 48625
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0798258781433105,
      "learning_rate": 6.007231090830066e-06,
      "loss": 2.229,
      "step": 48626
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0890568494796753,
      "learning_rate": 6.006853601440126e-06,
      "loss": 2.3815,
      "step": 48627
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0716235637664795,
      "learning_rate": 6.006476118819653e-06,
      "loss": 2.2156,
      "step": 48628
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0106489658355713,
      "learning_rate": 6.006098642969289e-06,
      "loss": 2.3649,
      "step": 48629
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0680893659591675,
      "learning_rate": 6.005721173889671e-06,
      "loss": 2.4183,
      "step": 48630
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0994454622268677,
      "learning_rate": 6.0053437115814404e-06,
      "loss": 2.3074,
      "step": 48631
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.233012080192566,
      "learning_rate": 6.004966256045237e-06,
      "loss": 2.5057,
      "step": 48632
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0395102500915527,
      "learning_rate": 6.004588807281702e-06,
      "loss": 2.4232,
      "step": 48633
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.885246992111206,
      "learning_rate": 6.004211365291475e-06,
      "loss": 2.536,
      "step": 48634
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.9850316047668457,
      "learning_rate": 6.003833930075192e-06,
      "loss": 2.3682,
      "step": 48635
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0893367528915405,
      "learning_rate": 6.0034565016334975e-06,
      "loss": 2.0821,
      "step": 48636
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.9932586550712585,
      "learning_rate": 6.003079079967029e-06,
      "loss": 2.3005,
      "step": 48637
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.027950644493103,
      "learning_rate": 6.002701665076427e-06,
      "loss": 2.1756,
      "step": 48638
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.2736343145370483,
      "learning_rate": 6.002324256962331e-06,
      "loss": 2.4015,
      "step": 48639
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0665584802627563,
      "learning_rate": 6.001946855625381e-06,
      "loss": 2.2983,
      "step": 48640
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1031494140625,
      "learning_rate": 6.001569461066218e-06,
      "loss": 2.4928,
      "step": 48641
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.083762764930725,
      "learning_rate": 6.001192073285485e-06,
      "loss": 2.3769,
      "step": 48642
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.9791336059570312,
      "learning_rate": 6.000814692283812e-06,
      "loss": 2.3199,
      "step": 48643
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.9677894711494446,
      "learning_rate": 6.000437318061845e-06,
      "loss": 2.3213,
      "step": 48644
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0972825288772583,
      "learning_rate": 6.000059950620223e-06,
      "loss": 2.5244,
      "step": 48645
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.196142315864563,
      "learning_rate": 5.999682589959586e-06,
      "loss": 2.2378,
      "step": 48646
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0608510971069336,
      "learning_rate": 5.9993052360805726e-06,
      "loss": 2.3729,
      "step": 48647
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0493714809417725,
      "learning_rate": 5.9989278889838235e-06,
      "loss": 2.4065,
      "step": 48648
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.9640229344367981,
      "learning_rate": 5.998550548669979e-06,
      "loss": 2.1486,
      "step": 48649
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0716400146484375,
      "learning_rate": 5.998173215139678e-06,
      "loss": 2.5466,
      "step": 48650
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0003582239151,
      "learning_rate": 5.99779588839356e-06,
      "loss": 2.0951,
      "step": 48651
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0563400983810425,
      "learning_rate": 5.997418568432265e-06,
      "loss": 2.2601,
      "step": 48652
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0718244314193726,
      "learning_rate": 5.997041255256433e-06,
      "loss": 2.3124,
      "step": 48653
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1435033082962036,
      "learning_rate": 5.996663948866703e-06,
      "loss": 2.4995,
      "step": 48654
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0743030309677124,
      "learning_rate": 5.9962866492637185e-06,
      "loss": 2.492,
      "step": 48655
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1063019037246704,
      "learning_rate": 5.995909356448113e-06,
      "loss": 2.2081,
      "step": 48656
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1244150400161743,
      "learning_rate": 5.9955320704205266e-06,
      "loss": 2.309,
      "step": 48657
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1174813508987427,
      "learning_rate": 5.995154791181602e-06,
      "loss": 2.3677,
      "step": 48658
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0687264204025269,
      "learning_rate": 5.994777518731978e-06,
      "loss": 2.3094,
      "step": 48659
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0160716772079468,
      "learning_rate": 5.994400253072293e-06,
      "loss": 2.4455,
      "step": 48660
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1644511222839355,
      "learning_rate": 5.994022994203188e-06,
      "loss": 2.3043,
      "step": 48661
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1331007480621338,
      "learning_rate": 5.993645742125305e-06,
      "loss": 2.2389,
      "step": 48662
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1801412105560303,
      "learning_rate": 5.9932684968392774e-06,
      "loss": 2.2997,
      "step": 48663
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0418224334716797,
      "learning_rate": 5.992891258345748e-06,
      "loss": 2.3142,
      "step": 48664
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0428043603897095,
      "learning_rate": 5.992514026645355e-06,
      "loss": 2.195,
      "step": 48665
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1582711935043335,
      "learning_rate": 5.992136801738741e-06,
      "loss": 2.2071,
      "step": 48666
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1880320310592651,
      "learning_rate": 5.991759583626544e-06,
      "loss": 2.1903,
      "step": 48667
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1279405355453491,
      "learning_rate": 5.991382372309407e-06,
      "loss": 2.3832,
      "step": 48668
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0852272510528564,
      "learning_rate": 5.99100516778796e-06,
      "loss": 2.4769,
      "step": 48669
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0513477325439453,
      "learning_rate": 5.990627970062849e-06,
      "loss": 2.2662,
      "step": 48670
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.9932194352149963,
      "learning_rate": 5.990250779134712e-06,
      "loss": 2.3523,
      "step": 48671
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1887232065200806,
      "learning_rate": 5.9898735950041895e-06,
      "loss": 2.5678,
      "step": 48672
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0441409349441528,
      "learning_rate": 5.989496417671921e-06,
      "loss": 2.4176,
      "step": 48673
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0233042240142822,
      "learning_rate": 5.989119247138544e-06,
      "loss": 2.1311,
      "step": 48674
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.3309071063995361,
      "learning_rate": 5.988742083404699e-06,
      "loss": 2.2475,
      "step": 48675
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1064136028289795,
      "learning_rate": 5.9883649264710276e-06,
      "loss": 2.2516,
      "step": 48676
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0925182104110718,
      "learning_rate": 5.987987776338166e-06,
      "loss": 2.308,
      "step": 48677
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0391173362731934,
      "learning_rate": 5.987610633006755e-06,
      "loss": 2.4563,
      "step": 48678
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0844417810440063,
      "learning_rate": 5.987233496477434e-06,
      "loss": 2.2527,
      "step": 48679
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1304386854171753,
      "learning_rate": 5.98685636675084e-06,
      "loss": 2.3425,
      "step": 48680
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0879521369934082,
      "learning_rate": 5.986479243827621e-06,
      "loss": 2.4525,
      "step": 48681
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.060593605041504,
      "learning_rate": 5.986102127708404e-06,
      "loss": 2.2112,
      "step": 48682
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0856871604919434,
      "learning_rate": 5.985725018393836e-06,
      "loss": 2.4105,
      "step": 48683
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.009092092514038,
      "learning_rate": 5.985347915884552e-06,
      "loss": 2.1837,
      "step": 48684
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.054384708404541,
      "learning_rate": 5.9849708201811955e-06,
      "loss": 2.5825,
      "step": 48685
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.066554069519043,
      "learning_rate": 5.984593731284403e-06,
      "loss": 2.4175,
      "step": 48686
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0898702144622803,
      "learning_rate": 5.984216649194815e-06,
      "loss": 2.4455,
      "step": 48687
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0114929676055908,
      "learning_rate": 5.983839573913071e-06,
      "loss": 2.3227,
      "step": 48688
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0878140926361084,
      "learning_rate": 5.9834625054398085e-06,
      "loss": 2.4173,
      "step": 48689
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.374933123588562,
      "learning_rate": 5.9830854437756714e-06,
      "loss": 2.549,
      "step": 48690
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0807584524154663,
      "learning_rate": 5.982708388921292e-06,
      "loss": 2.1493,
      "step": 48691
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.2324724197387695,
      "learning_rate": 5.982331340877313e-06,
      "loss": 2.2991,
      "step": 48692
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.2494969367980957,
      "learning_rate": 5.981954299644374e-06,
      "loss": 2.1469,
      "step": 48693
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.217957615852356,
      "learning_rate": 5.9815772652231166e-06,
      "loss": 2.3352,
      "step": 48694
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.2767056226730347,
      "learning_rate": 5.981200237614175e-06,
      "loss": 2.5173,
      "step": 48695
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1349562406539917,
      "learning_rate": 5.980823216818191e-06,
      "loss": 2.2555,
      "step": 48696
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0568995475769043,
      "learning_rate": 5.980446202835802e-06,
      "loss": 2.2034,
      "step": 48697
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0086420774459839,
      "learning_rate": 5.980069195667648e-06,
      "loss": 2.1542,
      "step": 48698
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.131132960319519,
      "learning_rate": 5.97969219531437e-06,
      "loss": 2.318,
      "step": 48699
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1821942329406738,
      "learning_rate": 5.979315201776604e-06,
      "loss": 2.4573,
      "step": 48700
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.112539529800415,
      "learning_rate": 5.978938215054993e-06,
      "loss": 2.2626,
      "step": 48701
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.106492519378662,
      "learning_rate": 5.978561235150171e-06,
      "loss": 2.6697,
      "step": 48702
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.15016770362854,
      "learning_rate": 5.978184262062781e-06,
      "loss": 2.262,
      "step": 48703
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.129492163658142,
      "learning_rate": 5.977807295793461e-06,
      "loss": 2.3449,
      "step": 48704
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0007833242416382,
      "learning_rate": 5.97743033634285e-06,
      "loss": 2.3017,
      "step": 48705
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.06260347366333,
      "learning_rate": 5.977053383711587e-06,
      "loss": 2.451,
      "step": 48706
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1485871076583862,
      "learning_rate": 5.976676437900312e-06,
      "loss": 2.1915,
      "step": 48707
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.055866003036499,
      "learning_rate": 5.9762994989096655e-06,
      "loss": 2.1874,
      "step": 48708
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0507497787475586,
      "learning_rate": 5.975922566740282e-06,
      "loss": 2.2553,
      "step": 48709
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0857772827148438,
      "learning_rate": 5.975545641392802e-06,
      "loss": 2.5125,
      "step": 48710
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.018017292022705,
      "learning_rate": 5.9751687228678655e-06,
      "loss": 2.1252,
      "step": 48711
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.075993299484253,
      "learning_rate": 5.97479181116611e-06,
      "loss": 2.3075,
      "step": 48712
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.2481225728988647,
      "learning_rate": 5.974414906288176e-06,
      "loss": 2.47,
      "step": 48713
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1197322607040405,
      "learning_rate": 5.974038008234702e-06,
      "loss": 2.2778,
      "step": 48714
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0541770458221436,
      "learning_rate": 5.9736611170063286e-06,
      "loss": 2.2809,
      "step": 48715
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.979555070400238,
      "learning_rate": 5.973284232603691e-06,
      "loss": 1.9685,
      "step": 48716
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0461463928222656,
      "learning_rate": 5.972907355027431e-06,
      "loss": 2.543,
      "step": 48717
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.251217007637024,
      "learning_rate": 5.972530484278189e-06,
      "loss": 2.3325,
      "step": 48718
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0812244415283203,
      "learning_rate": 5.9721536203566e-06,
      "loss": 2.3778,
      "step": 48719
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0042178630828857,
      "learning_rate": 5.971776763263304e-06,
      "loss": 2.4031,
      "step": 48720
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0005273818969727,
      "learning_rate": 5.971399912998943e-06,
      "loss": 2.1237,
      "step": 48721
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0305966138839722,
      "learning_rate": 5.97102306956415e-06,
      "loss": 2.3272,
      "step": 48722
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.07097589969635,
      "learning_rate": 5.970646232959569e-06,
      "loss": 2.4153,
      "step": 48723
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.043168067932129,
      "learning_rate": 5.970269403185838e-06,
      "loss": 2.2813,
      "step": 48724
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1149498224258423,
      "learning_rate": 5.969892580243592e-06,
      "loss": 2.3321,
      "step": 48725
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1217384338378906,
      "learning_rate": 5.969515764133475e-06,
      "loss": 2.4092,
      "step": 48726
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0470184087753296,
      "learning_rate": 5.969138954856121e-06,
      "loss": 2.2295,
      "step": 48727
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0655269622802734,
      "learning_rate": 5.968762152412173e-06,
      "loss": 2.4583,
      "step": 48728
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1530344486236572,
      "learning_rate": 5.9683853568022665e-06,
      "loss": 2.2072,
      "step": 48729
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.111568808555603,
      "learning_rate": 5.968008568027044e-06,
      "loss": 2.5302,
      "step": 48730
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.2629865407943726,
      "learning_rate": 5.96763178608714e-06,
      "loss": 2.2786,
      "step": 48731
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.098537802696228,
      "learning_rate": 5.967255010983195e-06,
      "loss": 2.3368,
      "step": 48732
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0460847616195679,
      "learning_rate": 5.96687824271585e-06,
      "loss": 2.3507,
      "step": 48733
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.081745982170105,
      "learning_rate": 5.966501481285745e-06,
      "loss": 2.2668,
      "step": 48734
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0283912420272827,
      "learning_rate": 5.9661247266935096e-06,
      "loss": 2.464,
      "step": 48735
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.069414496421814,
      "learning_rate": 5.965747978939791e-06,
      "loss": 2.344,
      "step": 48736
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0684694051742554,
      "learning_rate": 5.9653712380252235e-06,
      "loss": 2.0734,
      "step": 48737
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.057708501815796,
      "learning_rate": 5.964994503950449e-06,
      "loss": 2.3747,
      "step": 48738
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0531078577041626,
      "learning_rate": 5.964617776716104e-06,
      "loss": 2.312,
      "step": 48739
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.3415772914886475,
      "learning_rate": 5.964241056322828e-06,
      "loss": 2.1952,
      "step": 48740
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0597461462020874,
      "learning_rate": 5.963864342771259e-06,
      "loss": 2.4729,
      "step": 48741
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1749811172485352,
      "learning_rate": 5.963487636062036e-06,
      "loss": 2.2791,
      "step": 48742
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0453639030456543,
      "learning_rate": 5.963110936195798e-06,
      "loss": 2.27,
      "step": 48743
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0975128412246704,
      "learning_rate": 5.962734243173184e-06,
      "loss": 2.2105,
      "step": 48744
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0848811864852905,
      "learning_rate": 5.962357556994833e-06,
      "loss": 2.0428,
      "step": 48745
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.169797658920288,
      "learning_rate": 5.961980877661382e-06,
      "loss": 2.3468,
      "step": 48746
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1294963359832764,
      "learning_rate": 5.96160420517347e-06,
      "loss": 2.2026,
      "step": 48747
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1268284320831299,
      "learning_rate": 5.961227539531736e-06,
      "loss": 2.1942,
      "step": 48748
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.34070885181427,
      "learning_rate": 5.960850880736817e-06,
      "loss": 2.4962,
      "step": 48749
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0322785377502441,
      "learning_rate": 5.960474228789353e-06,
      "loss": 2.5227,
      "step": 48750
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0527745485305786,
      "learning_rate": 5.960097583689982e-06,
      "loss": 2.3901,
      "step": 48751
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1377346515655518,
      "learning_rate": 5.959720945439345e-06,
      "loss": 2.3949,
      "step": 48752
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.3787190914154053,
      "learning_rate": 5.959344314038076e-06,
      "loss": 2.1675,
      "step": 48753
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.083852767944336,
      "learning_rate": 5.958967689486816e-06,
      "loss": 2.347,
      "step": 48754
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1278101205825806,
      "learning_rate": 5.958591071786204e-06,
      "loss": 2.3213,
      "step": 48755
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.077852725982666,
      "learning_rate": 5.958214460936877e-06,
      "loss": 2.2286,
      "step": 48756
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0850988626480103,
      "learning_rate": 5.957837856939475e-06,
      "loss": 2.3784,
      "step": 48757
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1040900945663452,
      "learning_rate": 5.957461259794635e-06,
      "loss": 2.1747,
      "step": 48758
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.066288948059082,
      "learning_rate": 5.957084669502996e-06,
      "loss": 2.2249,
      "step": 48759
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1174043416976929,
      "learning_rate": 5.956708086065201e-06,
      "loss": 2.4357,
      "step": 48760
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.013430118560791,
      "learning_rate": 5.9563315094818795e-06,
      "loss": 2.0842,
      "step": 48761
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.4256136417388916,
      "learning_rate": 5.955954939753674e-06,
      "loss": 2.3007,
      "step": 48762
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.2241700887680054,
      "learning_rate": 5.955578376881224e-06,
      "loss": 2.7372,
      "step": 48763
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.2075196504592896,
      "learning_rate": 5.9552018208651675e-06,
      "loss": 2.2069,
      "step": 48764
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0738614797592163,
      "learning_rate": 5.954825271706142e-06,
      "loss": 2.3048,
      "step": 48765
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1139386892318726,
      "learning_rate": 5.954448729404786e-06,
      "loss": 2.3904,
      "step": 48766
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0834159851074219,
      "learning_rate": 5.954072193961737e-06,
      "loss": 2.1745,
      "step": 48767
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0360791683197021,
      "learning_rate": 5.953695665377637e-06,
      "loss": 2.3629,
      "step": 48768
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1122767925262451,
      "learning_rate": 5.95331914365312e-06,
      "loss": 2.5137,
      "step": 48769
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0490416288375854,
      "learning_rate": 5.952942628788827e-06,
      "loss": 2.2893,
      "step": 48770
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.9990584254264832,
      "learning_rate": 5.952566120785395e-06,
      "loss": 2.2013,
      "step": 48771
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.9357103705406189,
      "learning_rate": 5.952189619643462e-06,
      "loss": 2.3203,
      "step": 48772
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1062803268432617,
      "learning_rate": 5.95181312536367e-06,
      "loss": 2.2993,
      "step": 48773
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0246033668518066,
      "learning_rate": 5.951436637946655e-06,
      "loss": 2.539,
      "step": 48774
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0974080562591553,
      "learning_rate": 5.951060157393052e-06,
      "loss": 2.36,
      "step": 48775
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0090328454971313,
      "learning_rate": 5.9506836837035e-06,
      "loss": 2.5067,
      "step": 48776
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.163530945777893,
      "learning_rate": 5.950307216878641e-06,
      "loss": 2.5057,
      "step": 48777
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0141856670379639,
      "learning_rate": 5.94993075691911e-06,
      "loss": 2.3542,
      "step": 48778
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1143858432769775,
      "learning_rate": 5.949554303825547e-06,
      "loss": 2.2097,
      "step": 48779
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0895980596542358,
      "learning_rate": 5.949177857598592e-06,
      "loss": 2.5201,
      "step": 48780
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1473662853240967,
      "learning_rate": 5.948801418238876e-06,
      "loss": 2.1907,
      "step": 48781
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0891737937927246,
      "learning_rate": 5.948424985747044e-06,
      "loss": 2.2567,
      "step": 48782
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.200974941253662,
      "learning_rate": 5.948048560123732e-06,
      "loss": 2.3785,
      "step": 48783
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.9722033739089966,
      "learning_rate": 5.9476721413695786e-06,
      "loss": 2.2666,
      "step": 48784
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1479471921920776,
      "learning_rate": 5.947295729485221e-06,
      "loss": 2.1768,
      "step": 48785
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0090522766113281,
      "learning_rate": 5.946919324471298e-06,
      "loss": 2.2638,
      "step": 48786
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0191380977630615,
      "learning_rate": 5.946542926328451e-06,
      "loss": 2.3342,
      "step": 48787
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.2623567581176758,
      "learning_rate": 5.9461665350573104e-06,
      "loss": 2.3907,
      "step": 48788
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0725855827331543,
      "learning_rate": 5.945790150658519e-06,
      "loss": 2.2417,
      "step": 48789
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0906236171722412,
      "learning_rate": 5.945413773132715e-06,
      "loss": 2.3456,
      "step": 48790
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1349482536315918,
      "learning_rate": 5.945037402480536e-06,
      "loss": 2.2802,
      "step": 48791
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1416594982147217,
      "learning_rate": 5.944661038702618e-06,
      "loss": 2.3252,
      "step": 48792
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1461293697357178,
      "learning_rate": 5.944284681799602e-06,
      "loss": 2.2872,
      "step": 48793
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.2738721370697021,
      "learning_rate": 5.943908331772125e-06,
      "loss": 2.1552,
      "step": 48794
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.131103754043579,
      "learning_rate": 5.9435319886208255e-06,
      "loss": 2.2859,
      "step": 48795
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0565921068191528,
      "learning_rate": 5.943155652346342e-06,
      "loss": 2.3209,
      "step": 48796
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.9552994966506958,
      "learning_rate": 5.942779322949309e-06,
      "loss": 2.0678,
      "step": 48797
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1556203365325928,
      "learning_rate": 5.942403000430368e-06,
      "loss": 2.4432,
      "step": 48798
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.042314052581787,
      "learning_rate": 5.942026684790156e-06,
      "loss": 2.2659,
      "step": 48799
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.169232726097107,
      "learning_rate": 5.941650376029316e-06,
      "loss": 2.5771,
      "step": 48800
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1419470310211182,
      "learning_rate": 5.9412740741484755e-06,
      "loss": 2.2366,
      "step": 48801
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1009901762008667,
      "learning_rate": 5.940897779148278e-06,
      "loss": 2.1405,
      "step": 48802
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0540918111801147,
      "learning_rate": 5.940521491029361e-06,
      "loss": 2.2857,
      "step": 48803
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0435233116149902,
      "learning_rate": 5.940145209792363e-06,
      "loss": 2.4277,
      "step": 48804
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0546798706054688,
      "learning_rate": 5.939768935437922e-06,
      "loss": 2.1923,
      "step": 48805
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.9681958556175232,
      "learning_rate": 5.939392667966675e-06,
      "loss": 2.3527,
      "step": 48806
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0993616580963135,
      "learning_rate": 5.93901640737926e-06,
      "loss": 2.1048,
      "step": 48807
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1075811386108398,
      "learning_rate": 5.938640153676317e-06,
      "loss": 2.276,
      "step": 48808
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0343178510665894,
      "learning_rate": 5.93826390685848e-06,
      "loss": 2.4744,
      "step": 48809
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0565173625946045,
      "learning_rate": 5.937887666926389e-06,
      "loss": 2.2499,
      "step": 48810
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.4390019178390503,
      "learning_rate": 5.937511433880683e-06,
      "loss": 2.3981,
      "step": 48811
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1446861028671265,
      "learning_rate": 5.9371352077219954e-06,
      "loss": 2.412,
      "step": 48812
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.7253525257110596,
      "learning_rate": 5.9367589884509726e-06,
      "loss": 2.4364,
      "step": 48813
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0910719633102417,
      "learning_rate": 5.936382776068244e-06,
      "loss": 2.3458,
      "step": 48814
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0577179193496704,
      "learning_rate": 5.9360065705744505e-06,
      "loss": 2.2312,
      "step": 48815
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0637534856796265,
      "learning_rate": 5.935630371970228e-06,
      "loss": 2.4644,
      "step": 48816
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0997809171676636,
      "learning_rate": 5.9352541802562156e-06,
      "loss": 2.5891,
      "step": 48817
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0041526556015015,
      "learning_rate": 5.934877995433053e-06,
      "loss": 2.2627,
      "step": 48818
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0300604104995728,
      "learning_rate": 5.9345018175013756e-06,
      "loss": 2.3446,
      "step": 48819
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1012738943099976,
      "learning_rate": 5.9341256464618216e-06,
      "loss": 2.3307,
      "step": 48820
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.9952098727226257,
      "learning_rate": 5.933749482315029e-06,
      "loss": 2.2392,
      "step": 48821
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0805150270462036,
      "learning_rate": 5.9333733250616355e-06,
      "loss": 2.5033,
      "step": 48822
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.145531415939331,
      "learning_rate": 5.932997174702278e-06,
      "loss": 2.2829,
      "step": 48823
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0225032567977905,
      "learning_rate": 5.932621031237596e-06,
      "loss": 2.58,
      "step": 48824
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0919206142425537,
      "learning_rate": 5.932244894668225e-06,
      "loss": 2.3433,
      "step": 48825
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0819381475448608,
      "learning_rate": 5.9318687649948085e-06,
      "loss": 2.3337,
      "step": 48826
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.9973843097686768,
      "learning_rate": 5.9314926422179755e-06,
      "loss": 2.2065,
      "step": 48827
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.003629446029663,
      "learning_rate": 5.931116526338367e-06,
      "loss": 2.2337,
      "step": 48828
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.102874755859375,
      "learning_rate": 5.93074041735662e-06,
      "loss": 2.3415,
      "step": 48829
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.20767080783844,
      "learning_rate": 5.930364315273375e-06,
      "loss": 2.4143,
      "step": 48830
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.967437207698822,
      "learning_rate": 5.929988220089266e-06,
      "loss": 2.2584,
      "step": 48831
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.050640344619751,
      "learning_rate": 5.929612131804934e-06,
      "loss": 2.1895,
      "step": 48832
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1249613761901855,
      "learning_rate": 5.929236050421014e-06,
      "loss": 2.4615,
      "step": 48833
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.9353447556495667,
      "learning_rate": 5.928859975938145e-06,
      "loss": 2.603,
      "step": 48834
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1687415838241577,
      "learning_rate": 5.928483908356963e-06,
      "loss": 2.3097,
      "step": 48835
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0462086200714111,
      "learning_rate": 5.92810784767811e-06,
      "loss": 2.2905,
      "step": 48836
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1188793182373047,
      "learning_rate": 5.9277317939022185e-06,
      "loss": 2.4733,
      "step": 48837
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1190259456634521,
      "learning_rate": 5.9273557470299255e-06,
      "loss": 2.4949,
      "step": 48838
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.009981632232666,
      "learning_rate": 5.926979707061875e-06,
      "loss": 2.4865,
      "step": 48839
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0532801151275635,
      "learning_rate": 5.926603673998696e-06,
      "loss": 2.3276,
      "step": 48840
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.9738783240318298,
      "learning_rate": 5.926227647841032e-06,
      "loss": 2.2908,
      "step": 48841
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.992963969707489,
      "learning_rate": 5.925851628589519e-06,
      "loss": 2.538,
      "step": 48842
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0506627559661865,
      "learning_rate": 5.9254756162447924e-06,
      "loss": 2.4583,
      "step": 48843
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1065669059753418,
      "learning_rate": 5.925099610807492e-06,
      "loss": 2.3332,
      "step": 48844
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.2673077583312988,
      "learning_rate": 5.924723612278254e-06,
      "loss": 2.3194,
      "step": 48845
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.007794976234436,
      "learning_rate": 5.924347620657716e-06,
      "loss": 2.224,
      "step": 48846
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0890353918075562,
      "learning_rate": 5.9239716359465145e-06,
      "loss": 2.5859,
      "step": 48847
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0389440059661865,
      "learning_rate": 5.923595658145291e-06,
      "loss": 2.1356,
      "step": 48848
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.9513076543807983,
      "learning_rate": 5.923219687254678e-06,
      "loss": 2.4261,
      "step": 48849
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0467274188995361,
      "learning_rate": 5.922843723275316e-06,
      "loss": 2.3005,
      "step": 48850
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1022822856903076,
      "learning_rate": 5.92246776620784e-06,
      "loss": 2.2855,
      "step": 48851
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.9544757604598999,
      "learning_rate": 5.922091816052889e-06,
      "loss": 2.2507,
      "step": 48852
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1437479257583618,
      "learning_rate": 5.921715872811105e-06,
      "loss": 2.2687,
      "step": 48853
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1689709424972534,
      "learning_rate": 5.921339936483115e-06,
      "loss": 2.316,
      "step": 48854
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0758309364318848,
      "learning_rate": 5.920964007069563e-06,
      "loss": 2.33,
      "step": 48855
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1113005876541138,
      "learning_rate": 5.920588084571082e-06,
      "loss": 2.4204,
      "step": 48856
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.3526737689971924,
      "learning_rate": 5.920212168988315e-06,
      "loss": 2.4015,
      "step": 48857
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0709788799285889,
      "learning_rate": 5.919836260321895e-06,
      "loss": 2.1538,
      "step": 48858
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.187692403793335,
      "learning_rate": 5.9194603585724615e-06,
      "loss": 2.2412,
      "step": 48859
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1477664709091187,
      "learning_rate": 5.91908446374065e-06,
      "loss": 2.4421,
      "step": 48860
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.3561081886291504,
      "learning_rate": 5.918708575827099e-06,
      "loss": 2.2496,
      "step": 48861
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1515121459960938,
      "learning_rate": 5.918332694832446e-06,
      "loss": 2.6156,
      "step": 48862
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1053404808044434,
      "learning_rate": 5.917956820757329e-06,
      "loss": 2.4187,
      "step": 48863
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.9994054436683655,
      "learning_rate": 5.9175809536023825e-06,
      "loss": 2.3198,
      "step": 48864
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1373554468154907,
      "learning_rate": 5.9172050933682455e-06,
      "loss": 2.3105,
      "step": 48865
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0549269914627075,
      "learning_rate": 5.916829240055555e-06,
      "loss": 2.3484,
      "step": 48866
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0391408205032349,
      "learning_rate": 5.916453393664947e-06,
      "loss": 2.4356,
      "step": 48867
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.594444513320923,
      "learning_rate": 5.916077554197059e-06,
      "loss": 2.3573,
      "step": 48868
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1774165630340576,
      "learning_rate": 5.915701721652529e-06,
      "loss": 2.2755,
      "step": 48869
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0129114389419556,
      "learning_rate": 5.915325896031997e-06,
      "loss": 2.1543,
      "step": 48870
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0795656442642212,
      "learning_rate": 5.914950077336094e-06,
      "loss": 2.1019,
      "step": 48871
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0785712003707886,
      "learning_rate": 5.91457426556546e-06,
      "loss": 2.4305,
      "step": 48872
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0717008113861084,
      "learning_rate": 5.914198460720733e-06,
      "loss": 2.0779,
      "step": 48873
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.122611165046692,
      "learning_rate": 5.913822662802549e-06,
      "loss": 2.2665,
      "step": 48874
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0049346685409546,
      "learning_rate": 5.913446871811543e-06,
      "loss": 2.2586,
      "step": 48875
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.2312508821487427,
      "learning_rate": 5.913071087748356e-06,
      "loss": 2.4697,
      "step": 48876
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1141839027404785,
      "learning_rate": 5.912695310613626e-06,
      "loss": 2.4468,
      "step": 48877
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.2052565813064575,
      "learning_rate": 5.912319540407985e-06,
      "loss": 2.3404,
      "step": 48878
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1719292402267456,
      "learning_rate": 5.911943777132076e-06,
      "loss": 2.3185,
      "step": 48879
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.2595772743225098,
      "learning_rate": 5.911568020786531e-06,
      "loss": 2.3712,
      "step": 48880
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1046090126037598,
      "learning_rate": 5.911192271371986e-06,
      "loss": 2.4243,
      "step": 48881
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0469862222671509,
      "learning_rate": 5.910816528889083e-06,
      "loss": 2.1896,
      "step": 48882
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1000717878341675,
      "learning_rate": 5.910440793338454e-06,
      "loss": 2.4488,
      "step": 48883
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0565701723098755,
      "learning_rate": 5.910065064720741e-06,
      "loss": 2.2029,
      "step": 48884
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0178203582763672,
      "learning_rate": 5.909689343036578e-06,
      "loss": 2.2374,
      "step": 48885
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0748015642166138,
      "learning_rate": 5.9093136282866014e-06,
      "loss": 2.4021,
      "step": 48886
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0676759481430054,
      "learning_rate": 5.908937920471451e-06,
      "loss": 2.4511,
      "step": 48887
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.036503791809082,
      "learning_rate": 5.90856221959176e-06,
      "loss": 2.1719,
      "step": 48888
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.157487154006958,
      "learning_rate": 5.908186525648167e-06,
      "loss": 2.3575,
      "step": 48889
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0137550830841064,
      "learning_rate": 5.907810838641312e-06,
      "loss": 2.3091,
      "step": 48890
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0090750455856323,
      "learning_rate": 5.907435158571828e-06,
      "loss": 2.5031,
      "step": 48891
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1061352491378784,
      "learning_rate": 5.907059485440354e-06,
      "loss": 2.3049,
      "step": 48892
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0483509302139282,
      "learning_rate": 5.9066838192475255e-06,
      "loss": 2.1356,
      "step": 48893
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0767287015914917,
      "learning_rate": 5.906308159993977e-06,
      "loss": 2.4576,
      "step": 48894
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1486269235610962,
      "learning_rate": 5.9059325076803495e-06,
      "loss": 2.248,
      "step": 48895
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0240706205368042,
      "learning_rate": 5.9055568623072776e-06,
      "loss": 2.1769,
      "step": 48896
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.198142647743225,
      "learning_rate": 5.9051812238754e-06,
      "loss": 2.3674,
      "step": 48897
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0817307233810425,
      "learning_rate": 5.904805592385353e-06,
      "loss": 2.3442,
      "step": 48898
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.2354577779769897,
      "learning_rate": 5.904429967837771e-06,
      "loss": 2.2602,
      "step": 48899
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0759533643722534,
      "learning_rate": 5.904054350233294e-06,
      "loss": 2.4681,
      "step": 48900
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1130931377410889,
      "learning_rate": 5.903678739572556e-06,
      "loss": 2.3479,
      "step": 48901
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0676136016845703,
      "learning_rate": 5.903303135856195e-06,
      "loss": 2.3147,
      "step": 48902
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1975382566452026,
      "learning_rate": 5.902927539084848e-06,
      "loss": 2.3656,
      "step": 48903
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.3206144571304321,
      "learning_rate": 5.902551949259152e-06,
      "loss": 2.5096,
      "step": 48904
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1043241024017334,
      "learning_rate": 5.902176366379747e-06,
      "loss": 2.1688,
      "step": 48905
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.9967751502990723,
      "learning_rate": 5.901800790447262e-06,
      "loss": 2.3154,
      "step": 48906
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1382508277893066,
      "learning_rate": 5.901425221462338e-06,
      "loss": 2.4371,
      "step": 48907
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0535577535629272,
      "learning_rate": 5.901049659425611e-06,
      "loss": 2.3982,
      "step": 48908
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0525380373001099,
      "learning_rate": 5.900674104337718e-06,
      "loss": 2.4195,
      "step": 48909
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.034325361251831,
      "learning_rate": 5.900298556199295e-06,
      "loss": 2.3336,
      "step": 48910
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.2547354698181152,
      "learning_rate": 5.899923015010982e-06,
      "loss": 2.4291,
      "step": 48911
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0748207569122314,
      "learning_rate": 5.899547480773411e-06,
      "loss": 2.5692,
      "step": 48912
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0668315887451172,
      "learning_rate": 5.899171953487221e-06,
      "loss": 2.4801,
      "step": 48913
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0323479175567627,
      "learning_rate": 5.898796433153049e-06,
      "loss": 2.4454,
      "step": 48914
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.5575060844421387,
      "learning_rate": 5.8984209197715304e-06,
      "loss": 2.3536,
      "step": 48915
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1115975379943848,
      "learning_rate": 5.898045413343302e-06,
      "loss": 2.3711,
      "step": 48916
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0746225118637085,
      "learning_rate": 5.897669913869001e-06,
      "loss": 2.4871,
      "step": 48917
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.010546088218689,
      "learning_rate": 5.897294421349267e-06,
      "loss": 2.5243,
      "step": 48918
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0497074127197266,
      "learning_rate": 5.89691893578473e-06,
      "loss": 2.3979,
      "step": 48919
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.170809030532837,
      "learning_rate": 5.896543457176029e-06,
      "loss": 2.167,
      "step": 48920
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0061655044555664,
      "learning_rate": 5.896167985523803e-06,
      "loss": 2.3672,
      "step": 48921
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0158379077911377,
      "learning_rate": 5.895792520828686e-06,
      "loss": 2.2136,
      "step": 48922
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.07357919216156,
      "learning_rate": 5.895417063091315e-06,
      "loss": 2.2909,
      "step": 48923
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1643141508102417,
      "learning_rate": 5.8950416123123265e-06,
      "loss": 2.4767,
      "step": 48924
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1627930402755737,
      "learning_rate": 5.894666168492358e-06,
      "loss": 2.2035,
      "step": 48925
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.7249507904052734,
      "learning_rate": 5.894290731632048e-06,
      "loss": 2.4379,
      "step": 48926
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.2225630283355713,
      "learning_rate": 5.893915301732027e-06,
      "loss": 2.3316,
      "step": 48927
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1216273307800293,
      "learning_rate": 5.893539878792935e-06,
      "loss": 2.1403,
      "step": 48928
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0561630725860596,
      "learning_rate": 5.89316446281541e-06,
      "loss": 2.517,
      "step": 48929
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0501810312271118,
      "learning_rate": 5.892789053800085e-06,
      "loss": 2.1931,
      "step": 48930
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0147868394851685,
      "learning_rate": 5.892413651747597e-06,
      "loss": 2.4238,
      "step": 48931
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.996303141117096,
      "learning_rate": 5.89203825665859e-06,
      "loss": 2.1042,
      "step": 48932
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.285832166671753,
      "learning_rate": 5.891662868533688e-06,
      "loss": 2.2842,
      "step": 48933
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1199557781219482,
      "learning_rate": 5.891287487373534e-06,
      "loss": 2.4505,
      "step": 48934
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0190352201461792,
      "learning_rate": 5.890912113178764e-06,
      "loss": 2.4618,
      "step": 48935
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.2382045984268188,
      "learning_rate": 5.890536745950013e-06,
      "loss": 2.2735,
      "step": 48936
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1543021202087402,
      "learning_rate": 5.890161385687918e-06,
      "loss": 2.3801,
      "step": 48937
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.10435152053833,
      "learning_rate": 5.8897860323931165e-06,
      "loss": 2.4196,
      "step": 48938
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.059077501296997,
      "learning_rate": 5.889410686066244e-06,
      "loss": 2.3495,
      "step": 48939
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.9725605249404907,
      "learning_rate": 5.8890353467079365e-06,
      "loss": 2.2883,
      "step": 48940
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0998059511184692,
      "learning_rate": 5.8886600143188326e-06,
      "loss": 2.2934,
      "step": 48941
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0769814252853394,
      "learning_rate": 5.888284688899564e-06,
      "loss": 2.4404,
      "step": 48942
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0408287048339844,
      "learning_rate": 5.8879093704507705e-06,
      "loss": 2.3748,
      "step": 48943
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0572165250778198,
      "learning_rate": 5.887534058973088e-06,
      "loss": 2.4362,
      "step": 48944
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0870764255523682,
      "learning_rate": 5.887158754467155e-06,
      "loss": 2.2556,
      "step": 48945
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1344389915466309,
      "learning_rate": 5.886783456933602e-06,
      "loss": 2.133,
      "step": 48946
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0702847242355347,
      "learning_rate": 5.886408166373067e-06,
      "loss": 2.2133,
      "step": 48947
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.207847237586975,
      "learning_rate": 5.886032882786188e-06,
      "loss": 2.1793,
      "step": 48948
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1652641296386719,
      "learning_rate": 5.8856576061736e-06,
      "loss": 2.313,
      "step": 48949
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.9932755827903748,
      "learning_rate": 5.885282336535941e-06,
      "loss": 2.1678,
      "step": 48950
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1606334447860718,
      "learning_rate": 5.884907073873846e-06,
      "loss": 2.2688,
      "step": 48951
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.008543610572815,
      "learning_rate": 5.884531818187951e-06,
      "loss": 2.3581,
      "step": 48952
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0022906064987183,
      "learning_rate": 5.884156569478892e-06,
      "loss": 2.3268,
      "step": 48953
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1734426021575928,
      "learning_rate": 5.883781327747307e-06,
      "loss": 2.1766,
      "step": 48954
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1075115203857422,
      "learning_rate": 5.883406092993829e-06,
      "loss": 2.297,
      "step": 48955
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1283409595489502,
      "learning_rate": 5.883030865219095e-06,
      "loss": 2.2681,
      "step": 48956
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0652892589569092,
      "learning_rate": 5.882655644423743e-06,
      "loss": 2.4172,
      "step": 48957
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0315580368041992,
      "learning_rate": 5.882280430608409e-06,
      "loss": 2.4109,
      "step": 48958
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1685068607330322,
      "learning_rate": 5.881905223773728e-06,
      "loss": 2.4013,
      "step": 48959
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.4553966522216797,
      "learning_rate": 5.881530023920336e-06,
      "loss": 2.1247,
      "step": 48960
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1670705080032349,
      "learning_rate": 5.881154831048868e-06,
      "loss": 2.3978,
      "step": 48961
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.9984872937202454,
      "learning_rate": 5.88077964515996e-06,
      "loss": 2.4446,
      "step": 48962
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1883344650268555,
      "learning_rate": 5.88040446625425e-06,
      "loss": 2.0967,
      "step": 48963
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0672396421432495,
      "learning_rate": 5.880029294332374e-06,
      "loss": 2.11,
      "step": 48964
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1822656393051147,
      "learning_rate": 5.879654129394967e-06,
      "loss": 2.4078,
      "step": 48965
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0130431652069092,
      "learning_rate": 5.879278971442666e-06,
      "loss": 2.3153,
      "step": 48966
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.19871187210083,
      "learning_rate": 5.878903820476105e-06,
      "loss": 2.3423,
      "step": 48967
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0236213207244873,
      "learning_rate": 5.878528676495922e-06,
      "loss": 2.3807,
      "step": 48968
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.2128359079360962,
      "learning_rate": 5.878153539502751e-06,
      "loss": 2.2239,
      "step": 48969
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1871706247329712,
      "learning_rate": 5.87777840949723e-06,
      "loss": 2.5209,
      "step": 48970
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0793843269348145,
      "learning_rate": 5.877403286479999e-06,
      "loss": 2.1781,
      "step": 48971
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.362777829170227,
      "learning_rate": 5.8770281704516855e-06,
      "loss": 2.3072,
      "step": 48972
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0865625143051147,
      "learning_rate": 5.876653061412927e-06,
      "loss": 2.3402,
      "step": 48973
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1452008485794067,
      "learning_rate": 5.876277959364362e-06,
      "loss": 2.1967,
      "step": 48974
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0526237487792969,
      "learning_rate": 5.875902864306629e-06,
      "loss": 2.2741,
      "step": 48975
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.108222484588623,
      "learning_rate": 5.875527776240357e-06,
      "loss": 2.2362,
      "step": 48976
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0387331247329712,
      "learning_rate": 5.875152695166186e-06,
      "loss": 2.3161,
      "step": 48977
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0661214590072632,
      "learning_rate": 5.8747776210847525e-06,
      "loss": 2.6212,
      "step": 48978
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0429202318191528,
      "learning_rate": 5.874402553996692e-06,
      "loss": 2.343,
      "step": 48979
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.054137945175171,
      "learning_rate": 5.874027493902639e-06,
      "loss": 2.2315,
      "step": 48980
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0603832006454468,
      "learning_rate": 5.87365244080323e-06,
      "loss": 2.4026,
      "step": 48981
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1050411462783813,
      "learning_rate": 5.873277394699102e-06,
      "loss": 2.1988,
      "step": 48982
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.2412978410720825,
      "learning_rate": 5.872902355590888e-06,
      "loss": 2.2665,
      "step": 48983
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1720103025436401,
      "learning_rate": 5.872527323479227e-06,
      "loss": 2.4001,
      "step": 48984
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.9995683431625366,
      "learning_rate": 5.8721522983647526e-06,
      "loss": 2.2606,
      "step": 48985
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.177811861038208,
      "learning_rate": 5.8717772802481e-06,
      "loss": 2.4358,
      "step": 48986
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0683859586715698,
      "learning_rate": 5.871402269129907e-06,
      "loss": 2.3002,
      "step": 48987
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.2923179864883423,
      "learning_rate": 5.871027265010809e-06,
      "loss": 2.4569,
      "step": 48988
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.4822907447814941,
      "learning_rate": 5.870652267891441e-06,
      "loss": 2.2188,
      "step": 48989
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.083866000175476,
      "learning_rate": 5.870277277772438e-06,
      "loss": 2.3055,
      "step": 48990
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.052135705947876,
      "learning_rate": 5.869902294654437e-06,
      "loss": 2.0958,
      "step": 48991
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1368882656097412,
      "learning_rate": 5.869527318538073e-06,
      "loss": 2.1472,
      "step": 48992
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0543829202651978,
      "learning_rate": 5.869152349423981e-06,
      "loss": 2.3496,
      "step": 48993
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1665154695510864,
      "learning_rate": 5.868777387312798e-06,
      "loss": 2.1908,
      "step": 48994
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1155445575714111,
      "learning_rate": 5.8684024322051615e-06,
      "loss": 2.3867,
      "step": 48995
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.046905517578125,
      "learning_rate": 5.868027484101704e-06,
      "loss": 2.3166,
      "step": 48996
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0207736492156982,
      "learning_rate": 5.867652543003062e-06,
      "loss": 2.3378,
      "step": 48997
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1394139528274536,
      "learning_rate": 5.867277608909874e-06,
      "loss": 2.1344,
      "step": 48998
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.2685587406158447,
      "learning_rate": 5.86690268182277e-06,
      "loss": 2.5294,
      "step": 48999
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1117124557495117,
      "learning_rate": 5.866527761742389e-06,
      "loss": 2.3238,
      "step": 49000
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0565158128738403,
      "learning_rate": 5.866152848669364e-06,
      "loss": 2.3423,
      "step": 49001
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0598260164260864,
      "learning_rate": 5.865777942604335e-06,
      "loss": 2.479,
      "step": 49002
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0788971185684204,
      "learning_rate": 5.8654030435479345e-06,
      "loss": 2.1539,
      "step": 49003
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.9938332438468933,
      "learning_rate": 5.865028151500798e-06,
      "loss": 2.3189,
      "step": 49004
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0767029523849487,
      "learning_rate": 5.864653266463562e-06,
      "loss": 2.5359,
      "step": 49005
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.18427312374115,
      "learning_rate": 5.864278388436864e-06,
      "loss": 2.2032,
      "step": 49006
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.049005389213562,
      "learning_rate": 5.863903517421334e-06,
      "loss": 2.4103,
      "step": 49007
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.077735424041748,
      "learning_rate": 5.863528653417614e-06,
      "loss": 2.2417,
      "step": 49008
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1567994356155396,
      "learning_rate": 5.863153796426336e-06,
      "loss": 2.1902,
      "step": 49009
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0513007640838623,
      "learning_rate": 5.862778946448135e-06,
      "loss": 2.434,
      "step": 49010
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1297523975372314,
      "learning_rate": 5.86240410348365e-06,
      "loss": 2.4547,
      "step": 49011
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.084964632987976,
      "learning_rate": 5.86202926753351e-06,
      "loss": 2.5223,
      "step": 49012
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0916727781295776,
      "learning_rate": 5.861654438598355e-06,
      "loss": 2.163,
      "step": 49013
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1396338939666748,
      "learning_rate": 5.861279616678821e-06,
      "loss": 2.4794,
      "step": 49014
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1973576545715332,
      "learning_rate": 5.86090480177554e-06,
      "loss": 2.4087,
      "step": 49015
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0416358709335327,
      "learning_rate": 5.860529993889152e-06,
      "loss": 2.3419,
      "step": 49016
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1064133644104004,
      "learning_rate": 5.86015519302029e-06,
      "loss": 2.3755,
      "step": 49017
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1272598505020142,
      "learning_rate": 5.859780399169588e-06,
      "loss": 2.2971,
      "step": 49018
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.23284912109375,
      "learning_rate": 5.8594056123376805e-06,
      "loss": 2.3047,
      "step": 49019
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.060420036315918,
      "learning_rate": 5.859030832525208e-06,
      "loss": 2.2863,
      "step": 49020
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0539740324020386,
      "learning_rate": 5.8586560597328015e-06,
      "loss": 2.2795,
      "step": 49021
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.07587730884552,
      "learning_rate": 5.858281293961099e-06,
      "loss": 2.4939,
      "step": 49022
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.2088618278503418,
      "learning_rate": 5.857906535210733e-06,
      "loss": 2.4532,
      "step": 49023
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.9627395272254944,
      "learning_rate": 5.857531783482346e-06,
      "loss": 2.0376,
      "step": 49024
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.247188925743103,
      "learning_rate": 5.857157038776562e-06,
      "loss": 2.1578,
      "step": 49025
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.158307671546936,
      "learning_rate": 5.856782301094024e-06,
      "loss": 2.2142,
      "step": 49026
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0957375764846802,
      "learning_rate": 5.856407570435363e-06,
      "loss": 2.3572,
      "step": 49027
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.130022406578064,
      "learning_rate": 5.856032846801218e-06,
      "loss": 2.3596,
      "step": 49028
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0468722581863403,
      "learning_rate": 5.855658130192223e-06,
      "loss": 2.3872,
      "step": 49029
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.4096330404281616,
      "learning_rate": 5.8552834206090125e-06,
      "loss": 2.341,
      "step": 49030
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.142835259437561,
      "learning_rate": 5.854908718052222e-06,
      "loss": 2.1606,
      "step": 49031
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.064845085144043,
      "learning_rate": 5.854534022522488e-06,
      "loss": 2.4304,
      "step": 49032
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1653929948806763,
      "learning_rate": 5.854159334020444e-06,
      "loss": 2.3508,
      "step": 49033
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0239311456680298,
      "learning_rate": 5.8537846525467254e-06,
      "loss": 2.4258,
      "step": 49034
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1055216789245605,
      "learning_rate": 5.853409978101969e-06,
      "loss": 2.3597,
      "step": 49035
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1929585933685303,
      "learning_rate": 5.8530353106868074e-06,
      "loss": 2.4414,
      "step": 49036
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0631023645401,
      "learning_rate": 5.8526606503018835e-06,
      "loss": 2.5136,
      "step": 49037
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.2619330883026123,
      "learning_rate": 5.852285996947821e-06,
      "loss": 2.5106,
      "step": 49038
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0842169523239136,
      "learning_rate": 5.85191135062526e-06,
      "loss": 2.3859,
      "step": 49039
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0632144212722778,
      "learning_rate": 5.851536711334835e-06,
      "loss": 2.1986,
      "step": 49040
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.106661319732666,
      "learning_rate": 5.8511620790771835e-06,
      "loss": 2.2062,
      "step": 49041
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0260114669799805,
      "learning_rate": 5.850787453852939e-06,
      "loss": 2.412,
      "step": 49042
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1824572086334229,
      "learning_rate": 5.850412835662736e-06,
      "loss": 2.378,
      "step": 49043
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1376641988754272,
      "learning_rate": 5.850038224507213e-06,
      "loss": 2.3799,
      "step": 49044
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1544462442398071,
      "learning_rate": 5.849663620387001e-06,
      "loss": 2.1583,
      "step": 49045
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0130237340927124,
      "learning_rate": 5.849289023302734e-06,
      "loss": 2.2325,
      "step": 49046
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0419443845748901,
      "learning_rate": 5.848914433255051e-06,
      "loss": 2.2565,
      "step": 49047
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0808219909667969,
      "learning_rate": 5.848539850244585e-06,
      "loss": 2.5803,
      "step": 49048
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0982682704925537,
      "learning_rate": 5.8481652742719715e-06,
      "loss": 2.4216,
      "step": 49049
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.9242180585861206,
      "learning_rate": 5.84779070533785e-06,
      "loss": 2.3385,
      "step": 49050
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.462094783782959,
      "learning_rate": 5.8474161434428465e-06,
      "loss": 2.4715,
      "step": 49051
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.7956656217575073,
      "learning_rate": 5.847041588587601e-06,
      "loss": 2.1823,
      "step": 49052
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.9723628759384155,
      "learning_rate": 5.846667040772747e-06,
      "loss": 2.1136,
      "step": 49053
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1454964876174927,
      "learning_rate": 5.8462924999989215e-06,
      "loss": 2.2869,
      "step": 49054
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1742725372314453,
      "learning_rate": 5.8459179662667565e-06,
      "loss": 2.3273,
      "step": 49055
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1760271787643433,
      "learning_rate": 5.84554343957689e-06,
      "loss": 2.5262,
      "step": 49056
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0360337495803833,
      "learning_rate": 5.845168919929957e-06,
      "loss": 2.2769,
      "step": 49057
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0780283212661743,
      "learning_rate": 5.84479440732659e-06,
      "loss": 2.4231,
      "step": 49058
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.4563777446746826,
      "learning_rate": 5.844419901767424e-06,
      "loss": 2.2695,
      "step": 49059
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1463208198547363,
      "learning_rate": 5.844045403253096e-06,
      "loss": 2.4173,
      "step": 49060
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.3588638305664062,
      "learning_rate": 5.8436709117842395e-06,
      "loss": 2.4536,
      "step": 49061
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1264747381210327,
      "learning_rate": 5.8432964273614886e-06,
      "loss": 2.2087,
      "step": 49062
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.2027443647384644,
      "learning_rate": 5.842921949985485e-06,
      "loss": 2.406,
      "step": 49063
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1362513303756714,
      "learning_rate": 5.842547479656852e-06,
      "loss": 2.3797,
      "step": 49064
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0678027868270874,
      "learning_rate": 5.842173016376233e-06,
      "loss": 2.0719,
      "step": 49065
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1414436101913452,
      "learning_rate": 5.841798560144259e-06,
      "loss": 2.0646,
      "step": 49066
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1963640451431274,
      "learning_rate": 5.841424110961565e-06,
      "loss": 2.3186,
      "step": 49067
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1167147159576416,
      "learning_rate": 5.841049668828785e-06,
      "loss": 2.3814,
      "step": 49068
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1737498044967651,
      "learning_rate": 5.840675233746558e-06,
      "loss": 2.4806,
      "step": 49069
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1082426309585571,
      "learning_rate": 5.840300805715515e-06,
      "loss": 2.3162,
      "step": 49070
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1467840671539307,
      "learning_rate": 5.839926384736292e-06,
      "loss": 2.1171,
      "step": 49071
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0613930225372314,
      "learning_rate": 5.839551970809525e-06,
      "loss": 2.1436,
      "step": 49072
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1260573863983154,
      "learning_rate": 5.839177563935846e-06,
      "loss": 2.3228,
      "step": 49073
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1168054342269897,
      "learning_rate": 5.838803164115892e-06,
      "loss": 2.4863,
      "step": 49074
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.103765845298767,
      "learning_rate": 5.838428771350295e-06,
      "loss": 2.2441,
      "step": 49075
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.2651716470718384,
      "learning_rate": 5.838054385639691e-06,
      "loss": 2.2709,
      "step": 49076
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1002998352050781,
      "learning_rate": 5.837680006984722e-06,
      "loss": 2.1665,
      "step": 49077
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0192116498947144,
      "learning_rate": 5.837305635386008e-06,
      "loss": 2.2644,
      "step": 49078
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1265922784805298,
      "learning_rate": 5.836931270844196e-06,
      "loss": 2.3648,
      "step": 49079
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.3784842491149902,
      "learning_rate": 5.836556913359912e-06,
      "loss": 2.235,
      "step": 49080
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.6431028842926025,
      "learning_rate": 5.836182562933799e-06,
      "loss": 2.4051,
      "step": 49081
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0899591445922852,
      "learning_rate": 5.835808219566483e-06,
      "loss": 2.3417,
      "step": 49082
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1341626644134521,
      "learning_rate": 5.835433883258608e-06,
      "loss": 2.3008,
      "step": 49083
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0567344427108765,
      "learning_rate": 5.835059554010799e-06,
      "loss": 2.4374,
      "step": 49084
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1466938257217407,
      "learning_rate": 5.8346852318237e-06,
      "loss": 2.2133,
      "step": 49085
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0297574996948242,
      "learning_rate": 5.8343109166979395e-06,
      "loss": 2.1604,
      "step": 49086
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0527591705322266,
      "learning_rate": 5.83393660863415e-06,
      "loss": 2.1727,
      "step": 49087
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.5384283065795898,
      "learning_rate": 5.833562307632974e-06,
      "loss": 1.9811,
      "step": 49088
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0447195768356323,
      "learning_rate": 5.833188013695037e-06,
      "loss": 2.4567,
      "step": 49089
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0893406867980957,
      "learning_rate": 5.83281372682098e-06,
      "loss": 2.307,
      "step": 49090
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.9818918704986572,
      "learning_rate": 5.832439447011439e-06,
      "loss": 2.318,
      "step": 49091
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.2632098197937012,
      "learning_rate": 5.832065174267043e-06,
      "loss": 2.4298,
      "step": 49092
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.227721929550171,
      "learning_rate": 5.831690908588425e-06,
      "loss": 2.4744,
      "step": 49093
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.2171003818511963,
      "learning_rate": 5.831316649976228e-06,
      "loss": 2.1345,
      "step": 49094
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.123241662979126,
      "learning_rate": 5.8309423984310765e-06,
      "loss": 2.5243,
      "step": 49095
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1955549716949463,
      "learning_rate": 5.830568153953613e-06,
      "loss": 2.5199,
      "step": 49096
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.025061011314392,
      "learning_rate": 5.830193916544467e-06,
      "loss": 2.3291,
      "step": 49097
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1041479110717773,
      "learning_rate": 5.8298196862042765e-06,
      "loss": 2.2355,
      "step": 49098
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.9082009196281433,
      "learning_rate": 5.829445462933671e-06,
      "loss": 2.2397,
      "step": 49099
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.3637347221374512,
      "learning_rate": 5.829071246733293e-06,
      "loss": 2.3189,
      "step": 49100
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1668174266815186,
      "learning_rate": 5.8286970376037676e-06,
      "loss": 2.2569,
      "step": 49101
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.4221843481063843,
      "learning_rate": 5.828322835545736e-06,
      "loss": 2.3642,
      "step": 49102
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0390797853469849,
      "learning_rate": 5.827948640559833e-06,
      "loss": 2.377,
      "step": 49103
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.12991464138031,
      "learning_rate": 5.827574452646689e-06,
      "loss": 2.4353,
      "step": 49104
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0406354665756226,
      "learning_rate": 5.827200271806935e-06,
      "loss": 2.3896,
      "step": 49105
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1009583473205566,
      "learning_rate": 5.826826098041215e-06,
      "loss": 2.3868,
      "step": 49106
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0100654363632202,
      "learning_rate": 5.8264519313501535e-06,
      "loss": 2.3883,
      "step": 49107
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1587913036346436,
      "learning_rate": 5.826077771734393e-06,
      "loss": 2.2704,
      "step": 49108
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.2858078479766846,
      "learning_rate": 5.825703619194561e-06,
      "loss": 2.5139,
      "step": 49109
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.062882661819458,
      "learning_rate": 5.825329473731299e-06,
      "loss": 2.1541,
      "step": 49110
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.9915173053741455,
      "learning_rate": 5.824955335345232e-06,
      "loss": 2.4156,
      "step": 49111
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1244513988494873,
      "learning_rate": 5.824581204037006e-06,
      "loss": 2.427,
      "step": 49112
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0663228034973145,
      "learning_rate": 5.824207079807248e-06,
      "loss": 2.198,
      "step": 49113
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0888347625732422,
      "learning_rate": 5.8238329626565886e-06,
      "loss": 2.1423,
      "step": 49114
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0613723993301392,
      "learning_rate": 5.82345885258567e-06,
      "loss": 2.2945,
      "step": 49115
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1022405624389648,
      "learning_rate": 5.823084749595125e-06,
      "loss": 2.1569,
      "step": 49116
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0824429988861084,
      "learning_rate": 5.822710653685581e-06,
      "loss": 2.3494,
      "step": 49117
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.051290512084961,
      "learning_rate": 5.82233656485768e-06,
      "loss": 2.2014,
      "step": 49118
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0726134777069092,
      "learning_rate": 5.8219624831120505e-06,
      "loss": 2.3664,
      "step": 49119
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0970165729522705,
      "learning_rate": 5.821588408449332e-06,
      "loss": 2.4326,
      "step": 49120
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0453901290893555,
      "learning_rate": 5.821214340870156e-06,
      "loss": 2.2796,
      "step": 49121
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0278503894805908,
      "learning_rate": 5.820840280375154e-06,
      "loss": 2.3672,
      "step": 49122
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.2049083709716797,
      "learning_rate": 5.820466226964966e-06,
      "loss": 2.3146,
      "step": 49123
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0235176086425781,
      "learning_rate": 5.820092180640219e-06,
      "loss": 2.3176,
      "step": 49124
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1608272790908813,
      "learning_rate": 5.819718141401555e-06,
      "loss": 2.4496,
      "step": 49125
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0763373374938965,
      "learning_rate": 5.8193441092496e-06,
      "loss": 2.357,
      "step": 49126
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0754834413528442,
      "learning_rate": 5.818970084184996e-06,
      "loss": 2.2286,
      "step": 49127
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1630343198776245,
      "learning_rate": 5.818596066208369e-06,
      "loss": 2.3497,
      "step": 49128
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.152688980102539,
      "learning_rate": 5.818222055320366e-06,
      "loss": 2.2743,
      "step": 49129
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.9933657050132751,
      "learning_rate": 5.817848051521604e-06,
      "loss": 2.2671,
      "step": 49130
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1043351888656616,
      "learning_rate": 5.81747405481273e-06,
      "loss": 2.3165,
      "step": 49131
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1132248640060425,
      "learning_rate": 5.817100065194369e-06,
      "loss": 2.2968,
      "step": 49132
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.010025978088379,
      "learning_rate": 5.816726082667163e-06,
      "loss": 2.5509,
      "step": 49133
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.118040919303894,
      "learning_rate": 5.81635210723174e-06,
      "loss": 2.3686,
      "step": 49134
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.105412483215332,
      "learning_rate": 5.815978138888738e-06,
      "loss": 2.4053,
      "step": 49135
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.9673304557800293,
      "learning_rate": 5.8156041776387874e-06,
      "loss": 2.2634,
      "step": 49136
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.2191861867904663,
      "learning_rate": 5.815230223482527e-06,
      "loss": 2.1546,
      "step": 49137
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0560131072998047,
      "learning_rate": 5.814856276420584e-06,
      "loss": 2.1981,
      "step": 49138
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1760754585266113,
      "learning_rate": 5.814482336453602e-06,
      "loss": 2.2975,
      "step": 49139
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.089186429977417,
      "learning_rate": 5.814108403582204e-06,
      "loss": 2.4164,
      "step": 49140
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.011773705482483,
      "learning_rate": 5.813734477807031e-06,
      "loss": 2.3812,
      "step": 49141
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.056641697883606,
      "learning_rate": 5.8133605591287175e-06,
      "loss": 2.374,
      "step": 49142
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0834647417068481,
      "learning_rate": 5.812986647547896e-06,
      "loss": 2.26,
      "step": 49143
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0408576726913452,
      "learning_rate": 5.812612743065194e-06,
      "loss": 2.3294,
      "step": 49144
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.2270361185073853,
      "learning_rate": 5.8122388456812536e-06,
      "loss": 2.3959,
      "step": 49145
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.2171097993850708,
      "learning_rate": 5.811864955396702e-06,
      "loss": 2.3088,
      "step": 49146
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.15438711643219,
      "learning_rate": 5.8114910722121834e-06,
      "loss": 2.1592,
      "step": 49147
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.078238606452942,
      "learning_rate": 5.8111171961283215e-06,
      "loss": 2.2807,
      "step": 49148
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0693110227584839,
      "learning_rate": 5.810743327145752e-06,
      "loss": 2.3827,
      "step": 49149
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1676253080368042,
      "learning_rate": 5.810369465265114e-06,
      "loss": 2.4719,
      "step": 49150
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.055653691291809,
      "learning_rate": 5.809995610487032e-06,
      "loss": 2.3383,
      "step": 49151
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1634474992752075,
      "learning_rate": 5.809621762812151e-06,
      "loss": 2.3878,
      "step": 49152
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0141961574554443,
      "learning_rate": 5.8092479222410946e-06,
      "loss": 2.2497,
      "step": 49153
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0237544775009155,
      "learning_rate": 5.808874088774505e-06,
      "loss": 2.4694,
      "step": 49154
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1592676639556885,
      "learning_rate": 5.80850026241301e-06,
      "loss": 2.2247,
      "step": 49155
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0608395338058472,
      "learning_rate": 5.8081264431572515e-06,
      "loss": 2.3332,
      "step": 49156
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0672800540924072,
      "learning_rate": 5.807752631007849e-06,
      "loss": 2.3929,
      "step": 49157
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.080802083015442,
      "learning_rate": 5.807378825965449e-06,
      "loss": 2.2459,
      "step": 49158
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0530821084976196,
      "learning_rate": 5.807005028030677e-06,
      "loss": 2.4579,
      "step": 49159
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0715219974517822,
      "learning_rate": 5.806631237204174e-06,
      "loss": 2.2206,
      "step": 49160
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1298032999038696,
      "learning_rate": 5.806257453486566e-06,
      "loss": 2.3651,
      "step": 49161
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1105623245239258,
      "learning_rate": 5.8058836768784945e-06,
      "loss": 2.3291,
      "step": 49162
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0579861402511597,
      "learning_rate": 5.805509907380584e-06,
      "loss": 2.36,
      "step": 49163
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1961356401443481,
      "learning_rate": 5.805136144993479e-06,
      "loss": 2.2983,
      "step": 49164
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.2823715209960938,
      "learning_rate": 5.804762389717804e-06,
      "loss": 2.5634,
      "step": 49165
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.175933599472046,
      "learning_rate": 5.8043886415542e-06,
      "loss": 2.2769,
      "step": 49166
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0496853590011597,
      "learning_rate": 5.804014900503292e-06,
      "loss": 2.5841,
      "step": 49167
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.9133113622665405,
      "learning_rate": 5.803641166565721e-06,
      "loss": 2.2628,
      "step": 49168
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.173965573310852,
      "learning_rate": 5.80326743974212e-06,
      "loss": 2.2771,
      "step": 49169
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.3507877588272095,
      "learning_rate": 5.802893720033121e-06,
      "loss": 2.5238,
      "step": 49170
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1056491136550903,
      "learning_rate": 5.802520007439352e-06,
      "loss": 2.3297,
      "step": 49171
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0642812252044678,
      "learning_rate": 5.802146301961455e-06,
      "loss": 2.3924,
      "step": 49172
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0475397109985352,
      "learning_rate": 5.801772603600057e-06,
      "loss": 2.2891,
      "step": 49173
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0784891843795776,
      "learning_rate": 5.801398912355798e-06,
      "loss": 2.0909,
      "step": 49174
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1221002340316772,
      "learning_rate": 5.801025228229304e-06,
      "loss": 2.3181,
      "step": 49175
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.2526047229766846,
      "learning_rate": 5.8006515512212165e-06,
      "loss": 2.2805,
      "step": 49176
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1389930248260498,
      "learning_rate": 5.800277881332167e-06,
      "loss": 2.3652,
      "step": 49177
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1259993314743042,
      "learning_rate": 5.799904218562782e-06,
      "loss": 2.4906,
      "step": 49178
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.05976140499115,
      "learning_rate": 5.799530562913704e-06,
      "loss": 2.2638,
      "step": 49179
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1439818143844604,
      "learning_rate": 5.799156914385559e-06,
      "loss": 2.3748,
      "step": 49180
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0961743593215942,
      "learning_rate": 5.798783272978987e-06,
      "loss": 2.2307,
      "step": 49181
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.084671139717102,
      "learning_rate": 5.79840963869462e-06,
      "loss": 2.2829,
      "step": 49182
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.2140796184539795,
      "learning_rate": 5.798036011533088e-06,
      "loss": 2.1469,
      "step": 49183
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1260888576507568,
      "learning_rate": 5.797662391495022e-06,
      "loss": 2.3552,
      "step": 49184
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.2411928176879883,
      "learning_rate": 5.797288778581065e-06,
      "loss": 2.1449,
      "step": 49185
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1350681781768799,
      "learning_rate": 5.796915172791842e-06,
      "loss": 2.3293,
      "step": 49186
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0754729509353638,
      "learning_rate": 5.796541574127991e-06,
      "loss": 2.3099,
      "step": 49187
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.058842420578003,
      "learning_rate": 5.7961679825901395e-06,
      "loss": 2.2786,
      "step": 49188
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1026860475540161,
      "learning_rate": 5.795794398178931e-06,
      "loss": 2.4675,
      "step": 49189
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0678237676620483,
      "learning_rate": 5.795420820894988e-06,
      "loss": 2.4954,
      "step": 49190
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.070149302482605,
      "learning_rate": 5.795047250738952e-06,
      "loss": 2.1861,
      "step": 49191
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1432303190231323,
      "learning_rate": 5.79467368771145e-06,
      "loss": 2.2011,
      "step": 49192
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0652960538864136,
      "learning_rate": 5.794300131813122e-06,
      "loss": 2.127,
      "step": 49193
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0901517868041992,
      "learning_rate": 5.793926583044594e-06,
      "loss": 2.4508,
      "step": 49194
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.9876672625541687,
      "learning_rate": 5.793553041406509e-06,
      "loss": 2.1417,
      "step": 49195
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0893076658248901,
      "learning_rate": 5.793179506899487e-06,
      "loss": 2.2903,
      "step": 49196
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1635328531265259,
      "learning_rate": 5.792805979524172e-06,
      "loss": 2.2409,
      "step": 49197
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.4476943016052246,
      "learning_rate": 5.7924324592811894e-06,
      "loss": 2.1316,
      "step": 49198
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0961371660232544,
      "learning_rate": 5.792058946171182e-06,
      "loss": 2.1796,
      "step": 49199
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1617772579193115,
      "learning_rate": 5.791685440194772e-06,
      "loss": 2.2105,
      "step": 49200
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.143925666809082,
      "learning_rate": 5.791311941352603e-06,
      "loss": 2.0758,
      "step": 49201
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1883814334869385,
      "learning_rate": 5.790938449645299e-06,
      "loss": 2.1861,
      "step": 49202
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0652881860733032,
      "learning_rate": 5.790564965073502e-06,
      "loss": 2.3351,
      "step": 49203
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1588512659072876,
      "learning_rate": 5.79019148763784e-06,
      "loss": 2.3766,
      "step": 49204
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0312632322311401,
      "learning_rate": 5.789818017338943e-06,
      "loss": 2.1897,
      "step": 49205
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0830597877502441,
      "learning_rate": 5.789444554177452e-06,
      "loss": 2.375,
      "step": 49206
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1319663524627686,
      "learning_rate": 5.7890710981539925e-06,
      "loss": 2.2829,
      "step": 49207
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0893667936325073,
      "learning_rate": 5.788697649269204e-06,
      "loss": 2.4343,
      "step": 49208
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1188921928405762,
      "learning_rate": 5.7883242075237175e-06,
      "loss": 2.3587,
      "step": 49209
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1418359279632568,
      "learning_rate": 5.787950772918165e-06,
      "loss": 2.1041,
      "step": 49210
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0836451053619385,
      "learning_rate": 5.787577345453177e-06,
      "loss": 2.4301,
      "step": 49211
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1664172410964966,
      "learning_rate": 5.787203925129392e-06,
      "loss": 2.4121,
      "step": 49212
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.179032802581787,
      "learning_rate": 5.7868305119474365e-06,
      "loss": 2.4127,
      "step": 49213
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1685069799423218,
      "learning_rate": 5.786457105907953e-06,
      "loss": 2.6128,
      "step": 49214
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0489147901535034,
      "learning_rate": 5.786083707011565e-06,
      "loss": 2.1227,
      "step": 49215
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1782383918762207,
      "learning_rate": 5.785710315258912e-06,
      "loss": 2.2877,
      "step": 49216
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0234854221343994,
      "learning_rate": 5.785336930650622e-06,
      "loss": 2.2458,
      "step": 49217
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0625073909759521,
      "learning_rate": 5.784963553187334e-06,
      "loss": 2.2533,
      "step": 49218
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0191162824630737,
      "learning_rate": 5.784590182869674e-06,
      "loss": 2.4445,
      "step": 49219
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0956212282180786,
      "learning_rate": 5.784216819698282e-06,
      "loss": 2.3281,
      "step": 49220
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.082356333732605,
      "learning_rate": 5.783843463673784e-06,
      "loss": 2.1494,
      "step": 49221
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1700365543365479,
      "learning_rate": 5.783470114796823e-06,
      "loss": 2.3087,
      "step": 49222
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.015434980392456,
      "learning_rate": 5.78309677306802e-06,
      "loss": 2.3151,
      "step": 49223
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1058940887451172,
      "learning_rate": 5.782723438488015e-06,
      "loss": 2.2338,
      "step": 49224
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.9935046434402466,
      "learning_rate": 5.7823501110574355e-06,
      "loss": 2.3495,
      "step": 49225
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1251901388168335,
      "learning_rate": 5.781976790776923e-06,
      "loss": 2.3584,
      "step": 49226
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0916091203689575,
      "learning_rate": 5.781603477647101e-06,
      "loss": 2.3575,
      "step": 49227
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.9842780828475952,
      "learning_rate": 5.781230171668611e-06,
      "loss": 2.3476,
      "step": 49228
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1776164770126343,
      "learning_rate": 5.780856872842076e-06,
      "loss": 2.4495,
      "step": 49229
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1533706188201904,
      "learning_rate": 5.7804835811681416e-06,
      "loss": 2.2414,
      "step": 49230
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0312933921813965,
      "learning_rate": 5.780110296647431e-06,
      "loss": 2.3762,
      "step": 49231
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1666045188903809,
      "learning_rate": 5.779737019280576e-06,
      "loss": 2.4284,
      "step": 49232
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0480012893676758,
      "learning_rate": 5.779363749068217e-06,
      "loss": 2.4863,
      "step": 49233
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0077415704727173,
      "learning_rate": 5.778990486010981e-06,
      "loss": 2.3131,
      "step": 49234
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.063132643699646,
      "learning_rate": 5.778617230109504e-06,
      "loss": 2.2444,
      "step": 49235
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.2109713554382324,
      "learning_rate": 5.7782439813644175e-06,
      "loss": 2.2113,
      "step": 49236
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.103116750717163,
      "learning_rate": 5.77787073977635e-06,
      "loss": 2.5317,
      "step": 49237
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0372600555419922,
      "learning_rate": 5.777497505345943e-06,
      "loss": 2.5199,
      "step": 49238
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0809046030044556,
      "learning_rate": 5.777124278073824e-06,
      "loss": 2.0462,
      "step": 49239
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0627942085266113,
      "learning_rate": 5.776751057960624e-06,
      "loss": 2.2896,
      "step": 49240
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.210453748703003,
      "learning_rate": 5.77637784500698e-06,
      "loss": 2.597,
      "step": 49241
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1740453243255615,
      "learning_rate": 5.776004639213519e-06,
      "loss": 2.4238,
      "step": 49242
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0980993509292603,
      "learning_rate": 5.775631440580882e-06,
      "loss": 2.4087,
      "step": 49243
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1030101776123047,
      "learning_rate": 5.775258249109692e-06,
      "loss": 2.4074,
      "step": 49244
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0613720417022705,
      "learning_rate": 5.774885064800591e-06,
      "loss": 2.3737,
      "step": 49245
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0167001485824585,
      "learning_rate": 5.774511887654204e-06,
      "loss": 2.1826,
      "step": 49246
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0659737586975098,
      "learning_rate": 5.774138717671172e-06,
      "loss": 2.6425,
      "step": 49247
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1129356622695923,
      "learning_rate": 5.77376555485212e-06,
      "loss": 2.2902,
      "step": 49248
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1208853721618652,
      "learning_rate": 5.7733923991976845e-06,
      "loss": 2.5328,
      "step": 49249
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1228193044662476,
      "learning_rate": 5.773019250708494e-06,
      "loss": 2.1261,
      "step": 49250
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0590789318084717,
      "learning_rate": 5.772646109385185e-06,
      "loss": 2.3697,
      "step": 49251
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.127933382987976,
      "learning_rate": 5.772272975228388e-06,
      "loss": 2.3457,
      "step": 49252
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.023300051689148,
      "learning_rate": 5.771899848238738e-06,
      "loss": 2.1976,
      "step": 49253
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1018013954162598,
      "learning_rate": 5.771526728416863e-06,
      "loss": 2.015,
      "step": 49254
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1046264171600342,
      "learning_rate": 5.771153615763403e-06,
      "loss": 2.4687,
      "step": 49255
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.057238221168518,
      "learning_rate": 5.770780510278983e-06,
      "loss": 2.124,
      "step": 49256
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0686776638031006,
      "learning_rate": 5.770407411964242e-06,
      "loss": 2.4049,
      "step": 49257
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.117425799369812,
      "learning_rate": 5.770034320819804e-06,
      "loss": 2.2949,
      "step": 49258
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0031596422195435,
      "learning_rate": 5.7696612368463125e-06,
      "loss": 2.2144,
      "step": 49259
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.2185229063034058,
      "learning_rate": 5.769288160044393e-06,
      "loss": 2.1531,
      "step": 49260
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.2343504428863525,
      "learning_rate": 5.76891509041468e-06,
      "loss": 2.1826,
      "step": 49261
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0521950721740723,
      "learning_rate": 5.7685420279577995e-06,
      "loss": 2.4748,
      "step": 49262
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0829424858093262,
      "learning_rate": 5.768168972674395e-06,
      "loss": 2.5928,
      "step": 49263
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0555164813995361,
      "learning_rate": 5.76779592456509e-06,
      "loss": 2.4946,
      "step": 49264
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0001548528671265,
      "learning_rate": 5.767422883630522e-06,
      "loss": 2.2086,
      "step": 49265
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0235042572021484,
      "learning_rate": 5.767049849871322e-06,
      "loss": 2.3454,
      "step": 49266
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0029938220977783,
      "learning_rate": 5.76667682328812e-06,
      "loss": 2.4394,
      "step": 49267
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.11359703540802,
      "learning_rate": 5.766303803881554e-06,
      "loss": 2.2109,
      "step": 49268
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0725839138031006,
      "learning_rate": 5.765930791652248e-06,
      "loss": 2.3457,
      "step": 49269
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0282208919525146,
      "learning_rate": 5.765557786600844e-06,
      "loss": 2.3072,
      "step": 49270
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1461176872253418,
      "learning_rate": 5.765184788727967e-06,
      "loss": 2.2777,
      "step": 49271
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0198962688446045,
      "learning_rate": 5.764811798034253e-06,
      "loss": 2.3402,
      "step": 49272
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0877115726470947,
      "learning_rate": 5.764438814520331e-06,
      "loss": 2.2968,
      "step": 49273
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0382553339004517,
      "learning_rate": 5.764065838186843e-06,
      "loss": 2.4599,
      "step": 49274
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0997027158737183,
      "learning_rate": 5.763692869034407e-06,
      "loss": 2.2382,
      "step": 49275
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.167440414428711,
      "learning_rate": 5.7633199070636645e-06,
      "loss": 2.3614,
      "step": 49276
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.076206088066101,
      "learning_rate": 5.762946952275242e-06,
      "loss": 2.2457,
      "step": 49277
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.2007858753204346,
      "learning_rate": 5.76257400466978e-06,
      "loss": 2.1576,
      "step": 49278
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0765442848205566,
      "learning_rate": 5.762201064247901e-06,
      "loss": 2.3625,
      "step": 49279
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0340086221694946,
      "learning_rate": 5.7618281310102475e-06,
      "loss": 2.5285,
      "step": 49280
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0865808725357056,
      "learning_rate": 5.76145520495744e-06,
      "loss": 2.2322,
      "step": 49281
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0422853231430054,
      "learning_rate": 5.761082286090123e-06,
      "loss": 2.3319,
      "step": 49282
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0709086656570435,
      "learning_rate": 5.76070937440892e-06,
      "loss": 2.4187,
      "step": 49283
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0529160499572754,
      "learning_rate": 5.760336469914467e-06,
      "loss": 2.4029,
      "step": 49284
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0901355743408203,
      "learning_rate": 5.759963572607392e-06,
      "loss": 2.2826,
      "step": 49285
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.4181456565856934,
      "learning_rate": 5.759590682488335e-06,
      "loss": 2.5534,
      "step": 49286
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0465890169143677,
      "learning_rate": 5.759217799557923e-06,
      "loss": 2.4339,
      "step": 49287
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.030268669128418,
      "learning_rate": 5.758844923816789e-06,
      "loss": 2.1464,
      "step": 49288
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.125797986984253,
      "learning_rate": 5.75847205526556e-06,
      "loss": 2.0977,
      "step": 49289
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0475060939788818,
      "learning_rate": 5.7580991939048784e-06,
      "loss": 2.3514,
      "step": 49290
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.451918363571167,
      "learning_rate": 5.757726339735366e-06,
      "loss": 2.1531,
      "step": 49291
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1125218868255615,
      "learning_rate": 5.757353492757662e-06,
      "loss": 2.1653,
      "step": 49292
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.063494086265564,
      "learning_rate": 5.7569806529723945e-06,
      "loss": 2.4119,
      "step": 49293
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.310210943222046,
      "learning_rate": 5.7566078203802e-06,
      "loss": 2.3798,
      "step": 49294
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1688276529312134,
      "learning_rate": 5.756234994981708e-06,
      "loss": 2.5912,
      "step": 49295
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.217653512954712,
      "learning_rate": 5.755862176777547e-06,
      "loss": 2.4942,
      "step": 49296
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0765451192855835,
      "learning_rate": 5.755489365768355e-06,
      "loss": 2.4356,
      "step": 49297
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0902565717697144,
      "learning_rate": 5.755116561954759e-06,
      "loss": 2.3962,
      "step": 49298
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0532011985778809,
      "learning_rate": 5.7547437653373965e-06,
      "loss": 2.2964,
      "step": 49299
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.121998906135559,
      "learning_rate": 5.754370975916891e-06,
      "loss": 2.4953,
      "step": 49300
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.147068977355957,
      "learning_rate": 5.7539981936938885e-06,
      "loss": 2.4581,
      "step": 49301
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.2200065851211548,
      "learning_rate": 5.753625418669004e-06,
      "loss": 2.4022,
      "step": 49302
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1492542028427124,
      "learning_rate": 5.753252650842883e-06,
      "loss": 2.5025,
      "step": 49303
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0985828638076782,
      "learning_rate": 5.752879890216148e-06,
      "loss": 2.538,
      "step": 49304
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.256089210510254,
      "learning_rate": 5.752507136789438e-06,
      "loss": 2.1784,
      "step": 49305
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1573022603988647,
      "learning_rate": 5.7521343905633785e-06,
      "loss": 2.298,
      "step": 49306
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.9756553173065186,
      "learning_rate": 5.751761651538608e-06,
      "loss": 2.2493,
      "step": 49307
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1226907968521118,
      "learning_rate": 5.751388919715753e-06,
      "loss": 2.3145,
      "step": 49308
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1873856782913208,
      "learning_rate": 5.751016195095451e-06,
      "loss": 2.4243,
      "step": 49309
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0606317520141602,
      "learning_rate": 5.750643477678326e-06,
      "loss": 2.3641,
      "step": 49310
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.167404055595398,
      "learning_rate": 5.7502707674650196e-06,
      "loss": 2.3849,
      "step": 49311
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.063186764717102,
      "learning_rate": 5.749898064456154e-06,
      "loss": 2.2329,
      "step": 49312
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1091792583465576,
      "learning_rate": 5.749525368652369e-06,
      "loss": 2.1401,
      "step": 49313
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.2105792760849,
      "learning_rate": 5.749152680054293e-06,
      "loss": 2.3573,
      "step": 49314
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1099165678024292,
      "learning_rate": 5.748779998662558e-06,
      "loss": 2.3386,
      "step": 49315
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.127490758895874,
      "learning_rate": 5.7484073244777916e-06,
      "loss": 2.3,
      "step": 49316
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.024214744567871,
      "learning_rate": 5.7480346575006326e-06,
      "loss": 2.3033,
      "step": 49317
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0730865001678467,
      "learning_rate": 5.747661997731706e-06,
      "loss": 2.1271,
      "step": 49318
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1606078147888184,
      "learning_rate": 5.747289345171653e-06,
      "loss": 2.3059,
      "step": 49319
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0617289543151855,
      "learning_rate": 5.746916699821093e-06,
      "loss": 2.3196,
      "step": 49320
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0018938779830933,
      "learning_rate": 5.74654406168067e-06,
      "loss": 2.5353,
      "step": 49321
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0484497547149658,
      "learning_rate": 5.74617143075101e-06,
      "loss": 2.3665,
      "step": 49322
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1478276252746582,
      "learning_rate": 5.745798807032741e-06,
      "loss": 2.4599,
      "step": 49323
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1032533645629883,
      "learning_rate": 5.745426190526501e-06,
      "loss": 2.4034,
      "step": 49324
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0453664064407349,
      "learning_rate": 5.745053581232916e-06,
      "loss": 2.5316,
      "step": 49325
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.9695688486099243,
      "learning_rate": 5.7446809791526234e-06,
      "loss": 2.6766,
      "step": 49326
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0483113527297974,
      "learning_rate": 5.744308384286255e-06,
      "loss": 2.319,
      "step": 49327
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0078448057174683,
      "learning_rate": 5.743935796634438e-06,
      "loss": 2.443,
      "step": 49328
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0424708127975464,
      "learning_rate": 5.743563216197802e-06,
      "loss": 2.1263,
      "step": 49329
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1000858545303345,
      "learning_rate": 5.743190642976986e-06,
      "loss": 2.2195,
      "step": 49330
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1863371133804321,
      "learning_rate": 5.7428180769726134e-06,
      "loss": 2.3508,
      "step": 49331
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0417324304580688,
      "learning_rate": 5.742445518185327e-06,
      "loss": 2.3938,
      "step": 49332
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0600544214248657,
      "learning_rate": 5.742072966615746e-06,
      "loss": 2.1697,
      "step": 49333
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.9654466509819031,
      "learning_rate": 5.741700422264511e-06,
      "loss": 2.3527,
      "step": 49334
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.084070086479187,
      "learning_rate": 5.741327885132249e-06,
      "loss": 2.1993,
      "step": 49335
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.376826524734497,
      "learning_rate": 5.740955355219594e-06,
      "loss": 2.5828,
      "step": 49336
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1467679738998413,
      "learning_rate": 5.740582832527174e-06,
      "loss": 2.3522,
      "step": 49337
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0548878908157349,
      "learning_rate": 5.740210317055625e-06,
      "loss": 2.1443,
      "step": 49338
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0241138935089111,
      "learning_rate": 5.739837808805575e-06,
      "loss": 2.1783,
      "step": 49339
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0921927690505981,
      "learning_rate": 5.739465307777664e-06,
      "loss": 2.3157,
      "step": 49340
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0242316722869873,
      "learning_rate": 5.739092813972509e-06,
      "loss": 2.5375,
      "step": 49341
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.051265835762024,
      "learning_rate": 5.738720327390752e-06,
      "loss": 2.0786,
      "step": 49342
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1525479555130005,
      "learning_rate": 5.738347848033017e-06,
      "loss": 2.4637,
      "step": 49343
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0579603910446167,
      "learning_rate": 5.737975375899945e-06,
      "loss": 2.572,
      "step": 49344
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1533677577972412,
      "learning_rate": 5.737602910992157e-06,
      "loss": 2.2638,
      "step": 49345
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.155661940574646,
      "learning_rate": 5.737230453310295e-06,
      "loss": 2.2028,
      "step": 49346
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0204823017120361,
      "learning_rate": 5.7368580028549815e-06,
      "loss": 2.234,
      "step": 49347
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0216933488845825,
      "learning_rate": 5.736485559626854e-06,
      "loss": 2.3147,
      "step": 49348
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1743465662002563,
      "learning_rate": 5.736113123626542e-06,
      "loss": 2.3963,
      "step": 49349
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0962163209915161,
      "learning_rate": 5.735740694854672e-06,
      "loss": 2.2094,
      "step": 49350
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1100115776062012,
      "learning_rate": 5.735368273311886e-06,
      "loss": 2.1167,
      "step": 49351
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0094819068908691,
      "learning_rate": 5.734995858998802e-06,
      "loss": 2.1101,
      "step": 49352
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0407224893569946,
      "learning_rate": 5.734623451916063e-06,
      "loss": 2.191,
      "step": 49353
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1485671997070312,
      "learning_rate": 5.734251052064297e-06,
      "loss": 2.4029,
      "step": 49354
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.049842357635498,
      "learning_rate": 5.73387865944413e-06,
      "loss": 2.1793,
      "step": 49355
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0165417194366455,
      "learning_rate": 5.7335062740562e-06,
      "loss": 2.1557,
      "step": 49356
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1792521476745605,
      "learning_rate": 5.733133895901136e-06,
      "loss": 2.4115,
      "step": 49357
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.2326220273971558,
      "learning_rate": 5.732761524979565e-06,
      "loss": 2.1963,
      "step": 49358
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0407335758209229,
      "learning_rate": 5.732389161292127e-06,
      "loss": 2.2881,
      "step": 49359
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0569639205932617,
      "learning_rate": 5.7320168048394434e-06,
      "loss": 2.3223,
      "step": 49360
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.2106273174285889,
      "learning_rate": 5.731644455622154e-06,
      "loss": 2.2567,
      "step": 49361
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.2449562549591064,
      "learning_rate": 5.731272113640884e-06,
      "loss": 2.3116,
      "step": 49362
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1096851825714111,
      "learning_rate": 5.7308997788962704e-06,
      "loss": 2.3577,
      "step": 49363
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0955315828323364,
      "learning_rate": 5.730527451388937e-06,
      "loss": 2.1965,
      "step": 49364
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.2479190826416016,
      "learning_rate": 5.730155131119524e-06,
      "loss": 2.4519,
      "step": 49365
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.073179841041565,
      "learning_rate": 5.729782818088654e-06,
      "loss": 2.0442,
      "step": 49366
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.7513132095336914,
      "learning_rate": 5.729410512296968e-06,
      "loss": 2.4079,
      "step": 49367
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.184403419494629,
      "learning_rate": 5.729038213745084e-06,
      "loss": 2.4216,
      "step": 49368
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1315690279006958,
      "learning_rate": 5.728665922433645e-06,
      "loss": 2.3643,
      "step": 49369
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0258597135543823,
      "learning_rate": 5.728293638363271e-06,
      "loss": 2.4346,
      "step": 49370
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1611523628234863,
      "learning_rate": 5.727921361534606e-06,
      "loss": 2.4122,
      "step": 49371
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1807739734649658,
      "learning_rate": 5.7275490919482704e-06,
      "loss": 2.2114,
      "step": 49372
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1308940649032593,
      "learning_rate": 5.727176829604902e-06,
      "loss": 2.2374,
      "step": 49373
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0568134784698486,
      "learning_rate": 5.726804574505127e-06,
      "loss": 2.4278,
      "step": 49374
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1213520765304565,
      "learning_rate": 5.726432326649582e-06,
      "loss": 2.3574,
      "step": 49375
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0355859994888306,
      "learning_rate": 5.726060086038889e-06,
      "loss": 2.2904,
      "step": 49376
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0801026821136475,
      "learning_rate": 5.7256878526736915e-06,
      "loss": 2.2554,
      "step": 49377
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1050440073013306,
      "learning_rate": 5.725315626554613e-06,
      "loss": 2.253,
      "step": 49378
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.4688563346862793,
      "learning_rate": 5.724943407682282e-06,
      "loss": 2.0693,
      "step": 49379
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1908482313156128,
      "learning_rate": 5.724571196057337e-06,
      "loss": 2.4371,
      "step": 49380
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1232802867889404,
      "learning_rate": 5.724198991680405e-06,
      "loss": 2.3197,
      "step": 49381
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0672335624694824,
      "learning_rate": 5.723826794552112e-06,
      "loss": 2.3185,
      "step": 49382
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.9939961433410645,
      "learning_rate": 5.7234546046731e-06,
      "loss": 2.3767,
      "step": 49383
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.054328203201294,
      "learning_rate": 5.723082422043992e-06,
      "loss": 2.1447,
      "step": 49384
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.002297043800354,
      "learning_rate": 5.722710246665417e-06,
      "loss": 2.5076,
      "step": 49385
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0504136085510254,
      "learning_rate": 5.722338078538015e-06,
      "loss": 2.1547,
      "step": 49386
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1537517309188843,
      "learning_rate": 5.721965917662406e-06,
      "loss": 2.4586,
      "step": 49387
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.9733226895332336,
      "learning_rate": 5.721593764039233e-06,
      "loss": 2.2466,
      "step": 49388
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.12241792678833,
      "learning_rate": 5.721221617669115e-06,
      "loss": 2.2053,
      "step": 49389
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.045561671257019,
      "learning_rate": 5.720849478552693e-06,
      "loss": 2.2072,
      "step": 49390
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.9991975426673889,
      "learning_rate": 5.72047734669059e-06,
      "loss": 2.3289,
      "step": 49391
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.2633371353149414,
      "learning_rate": 5.720105222083443e-06,
      "loss": 2.2927,
      "step": 49392
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1426994800567627,
      "learning_rate": 5.719733104731881e-06,
      "loss": 2.2009,
      "step": 49393
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.161151647567749,
      "learning_rate": 5.719360994636532e-06,
      "loss": 2.3136,
      "step": 49394
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1242561340332031,
      "learning_rate": 5.718988891798027e-06,
      "loss": 2.4459,
      "step": 49395
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.252632737159729,
      "learning_rate": 5.7186167962170015e-06,
      "loss": 2.3339,
      "step": 49396
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1105248928070068,
      "learning_rate": 5.718244707894078e-06,
      "loss": 2.334,
      "step": 49397
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.177173137664795,
      "learning_rate": 5.717872626829899e-06,
      "loss": 2.4349,
      "step": 49398
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1618579626083374,
      "learning_rate": 5.717500553025084e-06,
      "loss": 2.3747,
      "step": 49399
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0451740026474,
      "learning_rate": 5.717128486480272e-06,
      "loss": 2.3438,
      "step": 49400
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1895986795425415,
      "learning_rate": 5.716756427196089e-06,
      "loss": 2.4891,
      "step": 49401
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1938807964324951,
      "learning_rate": 5.716384375173167e-06,
      "loss": 2.3046,
      "step": 49402
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.181966781616211,
      "learning_rate": 5.716012330412136e-06,
      "loss": 2.2891,
      "step": 49403
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.9741940498352051,
      "learning_rate": 5.715640292913631e-06,
      "loss": 2.3359,
      "step": 49404
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.096311330795288,
      "learning_rate": 5.71526826267828e-06,
      "loss": 2.0415,
      "step": 49405
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1544443368911743,
      "learning_rate": 5.714896239706711e-06,
      "loss": 2.2318,
      "step": 49406
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.2639167308807373,
      "learning_rate": 5.7145242239995535e-06,
      "loss": 2.3644,
      "step": 49407
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.280381441116333,
      "learning_rate": 5.714152215557446e-06,
      "loss": 2.2156,
      "step": 49408
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1491189002990723,
      "learning_rate": 5.7137802143810105e-06,
      "loss": 2.176,
      "step": 49409
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1319431066513062,
      "learning_rate": 5.713408220470885e-06,
      "loss": 2.3376,
      "step": 49410
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0881108045578003,
      "learning_rate": 5.713036233827693e-06,
      "loss": 2.3075,
      "step": 49411
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0668268203735352,
      "learning_rate": 5.712664254452073e-06,
      "loss": 2.4091,
      "step": 49412
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.2736811637878418,
      "learning_rate": 5.712292282344651e-06,
      "loss": 2.404,
      "step": 49413
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.9835598468780518,
      "learning_rate": 5.711920317506054e-06,
      "loss": 2.176,
      "step": 49414
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0328818559646606,
      "learning_rate": 5.711548359936923e-06,
      "loss": 2.2242,
      "step": 49415
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0438891649246216,
      "learning_rate": 5.711176409637875e-06,
      "loss": 2.1261,
      "step": 49416
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.9700613617897034,
      "learning_rate": 5.710804466609554e-06,
      "loss": 2.3052,
      "step": 49417
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.054316520690918,
      "learning_rate": 5.71043253085258e-06,
      "loss": 2.1431,
      "step": 49418
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.06354558467865,
      "learning_rate": 5.710060602367595e-06,
      "loss": 2.3202,
      "step": 49419
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.046957015991211,
      "learning_rate": 5.709688681155217e-06,
      "loss": 2.2755,
      "step": 49420
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1465160846710205,
      "learning_rate": 5.709316767216082e-06,
      "loss": 2.4183,
      "step": 49421
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0141483545303345,
      "learning_rate": 5.708944860550819e-06,
      "loss": 2.1575,
      "step": 49422
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.183403491973877,
      "learning_rate": 5.708572961160064e-06,
      "loss": 2.3444,
      "step": 49423
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.274058222770691,
      "learning_rate": 5.708201069044438e-06,
      "loss": 2.2883,
      "step": 49424
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1229984760284424,
      "learning_rate": 5.707829184204581e-06,
      "loss": 2.4187,
      "step": 49425
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0232149362564087,
      "learning_rate": 5.707457306641116e-06,
      "loss": 2.3287,
      "step": 49426
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1035277843475342,
      "learning_rate": 5.70708543635468e-06,
      "loss": 2.5269,
      "step": 49427
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1528260707855225,
      "learning_rate": 5.706713573345897e-06,
      "loss": 2.5397,
      "step": 49428
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0579266548156738,
      "learning_rate": 5.7063417176154045e-06,
      "loss": 2.3489,
      "step": 49429
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0498733520507812,
      "learning_rate": 5.705969869163825e-06,
      "loss": 2.5749,
      "step": 49430
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0656969547271729,
      "learning_rate": 5.705598027991796e-06,
      "loss": 2.3619,
      "step": 49431
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1480872631072998,
      "learning_rate": 5.70522619409994e-06,
      "loss": 2.3206,
      "step": 49432
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.037774682044983,
      "learning_rate": 5.704854367488901e-06,
      "loss": 2.2063,
      "step": 49433
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1231002807617188,
      "learning_rate": 5.704482548159294e-06,
      "loss": 2.4146,
      "step": 49434
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.054857850074768,
      "learning_rate": 5.704110736111758e-06,
      "loss": 2.2824,
      "step": 49435
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0431455373764038,
      "learning_rate": 5.703738931346916e-06,
      "loss": 2.3582,
      "step": 49436
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1276044845581055,
      "learning_rate": 5.70336713386541e-06,
      "loss": 2.3667,
      "step": 49437
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1530582904815674,
      "learning_rate": 5.7029953436678575e-06,
      "loss": 2.43,
      "step": 49438
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.9957685470581055,
      "learning_rate": 5.7026235607549e-06,
      "loss": 2.0888,
      "step": 49439
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1501461267471313,
      "learning_rate": 5.702251785127162e-06,
      "loss": 2.4935,
      "step": 49440
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.160590648651123,
      "learning_rate": 5.701880016785271e-06,
      "loss": 2.5512,
      "step": 49441
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0136940479278564,
      "learning_rate": 5.701508255729864e-06,
      "loss": 2.4128,
      "step": 49442
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0793094635009766,
      "learning_rate": 5.701136501961565e-06,
      "loss": 2.4213,
      "step": 49443
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0654247999191284,
      "learning_rate": 5.700764755481011e-06,
      "loss": 2.32,
      "step": 49444
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0261913537979126,
      "learning_rate": 5.7003930162888244e-06,
      "loss": 2.0918,
      "step": 49445
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0807827711105347,
      "learning_rate": 5.700021284385646e-06,
      "loss": 2.3225,
      "step": 49446
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.9800241589546204,
      "learning_rate": 5.699649559772094e-06,
      "loss": 2.3538,
      "step": 49447
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.2137811183929443,
      "learning_rate": 5.699277842448806e-06,
      "loss": 2.3682,
      "step": 49448
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0428096055984497,
      "learning_rate": 5.698906132416406e-06,
      "loss": 2.2664,
      "step": 49449
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.075014591217041,
      "learning_rate": 5.698534429675533e-06,
      "loss": 2.5439,
      "step": 49450
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1263049840927124,
      "learning_rate": 5.698162734226809e-06,
      "loss": 2.3159,
      "step": 49451
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1942697763442993,
      "learning_rate": 5.697791046070871e-06,
      "loss": 2.38,
      "step": 49452
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1726157665252686,
      "learning_rate": 5.697419365208341e-06,
      "loss": 2.1399,
      "step": 49453
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.105580449104309,
      "learning_rate": 5.697047691639859e-06,
      "loss": 2.5008,
      "step": 49454
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.9946666359901428,
      "learning_rate": 5.696676025366043e-06,
      "loss": 2.4715,
      "step": 49455
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1083096265792847,
      "learning_rate": 5.696304366387537e-06,
      "loss": 2.2801,
      "step": 49456
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1736748218536377,
      "learning_rate": 5.695932714704959e-06,
      "loss": 2.2561,
      "step": 49457
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.5273407697677612,
      "learning_rate": 5.695561070318948e-06,
      "loss": 2.3114,
      "step": 49458
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1433683633804321,
      "learning_rate": 5.695189433230131e-06,
      "loss": 2.2498,
      "step": 49459
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.9957221746444702,
      "learning_rate": 5.6948178034391345e-06,
      "loss": 2.4647,
      "step": 49460
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.2006434202194214,
      "learning_rate": 5.694446180946589e-06,
      "loss": 2.6125,
      "step": 49461
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0734533071517944,
      "learning_rate": 5.69407456575313e-06,
      "loss": 2.2834,
      "step": 49462
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1237980127334595,
      "learning_rate": 5.693702957859381e-06,
      "loss": 2.5766,
      "step": 49463
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.9439439177513123,
      "learning_rate": 5.693331357265978e-06,
      "loss": 2.096,
      "step": 49464
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.9703131318092346,
      "learning_rate": 5.692959763973544e-06,
      "loss": 2.3914,
      "step": 49465
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.2644331455230713,
      "learning_rate": 5.692588177982718e-06,
      "loss": 2.5081,
      "step": 49466
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1748851537704468,
      "learning_rate": 5.692216599294125e-06,
      "loss": 2.2954,
      "step": 49467
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0318500995635986,
      "learning_rate": 5.691845027908389e-06,
      "loss": 2.3158,
      "step": 49468
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.112890601158142,
      "learning_rate": 5.6914734638261515e-06,
      "loss": 2.161,
      "step": 49469
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0208231210708618,
      "learning_rate": 5.691101907048031e-06,
      "loss": 2.3454,
      "step": 49470
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0008420944213867,
      "learning_rate": 5.690730357574669e-06,
      "loss": 2.3787,
      "step": 49471
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0401684045791626,
      "learning_rate": 5.690358815406688e-06,
      "loss": 2.2794,
      "step": 49472
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.048380732536316,
      "learning_rate": 5.689987280544715e-06,
      "loss": 2.5134,
      "step": 49473
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1343257427215576,
      "learning_rate": 5.6896157529893905e-06,
      "loss": 2.2134,
      "step": 49474
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1211700439453125,
      "learning_rate": 5.689244232741335e-06,
      "loss": 2.4447,
      "step": 49475
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0404542684555054,
      "learning_rate": 5.688872719801179e-06,
      "loss": 2.3244,
      "step": 49476
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1727845668792725,
      "learning_rate": 5.688501214169559e-06,
      "loss": 2.2935,
      "step": 49477
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0531625747680664,
      "learning_rate": 5.688129715847096e-06,
      "loss": 2.5631,
      "step": 49478
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1960783004760742,
      "learning_rate": 5.6877582248344275e-06,
      "loss": 2.4364,
      "step": 49479
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.102728009223938,
      "learning_rate": 5.687386741132177e-06,
      "loss": 2.6659,
      "step": 49480
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1133469343185425,
      "learning_rate": 5.6870152647409804e-06,
      "loss": 2.4599,
      "step": 49481
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0513232946395874,
      "learning_rate": 5.686643795661462e-06,
      "loss": 2.2795,
      "step": 49482
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0985337495803833,
      "learning_rate": 5.686272333894256e-06,
      "loss": 2.1056,
      "step": 49483
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1660475730895996,
      "learning_rate": 5.685900879439986e-06,
      "loss": 2.3391,
      "step": 49484
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.184752106666565,
      "learning_rate": 5.685529432299295e-06,
      "loss": 2.4457,
      "step": 49485
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1211735010147095,
      "learning_rate": 5.685157992472794e-06,
      "loss": 2.191,
      "step": 49486
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1376076936721802,
      "learning_rate": 5.684786559961126e-06,
      "loss": 2.1683,
      "step": 49487
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.9928386211395264,
      "learning_rate": 5.684415134764914e-06,
      "loss": 2.0215,
      "step": 49488
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0707306861877441,
      "learning_rate": 5.684043716884795e-06,
      "loss": 2.545,
      "step": 49489
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1691563129425049,
      "learning_rate": 5.6836723063213885e-06,
      "loss": 2.1389,
      "step": 49490
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1819682121276855,
      "learning_rate": 5.683300903075335e-06,
      "loss": 2.4952,
      "step": 49491
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0208954811096191,
      "learning_rate": 5.682929507147253e-06,
      "loss": 2.5163,
      "step": 49492
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0150336027145386,
      "learning_rate": 5.682558118537784e-06,
      "loss": 2.2731,
      "step": 49493
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.402856707572937,
      "learning_rate": 5.682186737247546e-06,
      "loss": 2.6402,
      "step": 49494
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1821060180664062,
      "learning_rate": 5.681815363277179e-06,
      "loss": 2.1536,
      "step": 49495
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.9692234992980957,
      "learning_rate": 5.681443996627307e-06,
      "loss": 2.4238,
      "step": 49496
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0688046216964722,
      "learning_rate": 5.681072637298558e-06,
      "loss": 2.2781,
      "step": 49497
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0409045219421387,
      "learning_rate": 5.680701285291567e-06,
      "loss": 2.2849,
      "step": 49498
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.056919813156128,
      "learning_rate": 5.68032994060696e-06,
      "loss": 2.6235,
      "step": 49499
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0912868976593018,
      "learning_rate": 5.679958603245363e-06,
      "loss": 2.4413,
      "step": 49500
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0540122985839844,
      "learning_rate": 5.679587273207414e-06,
      "loss": 2.4631,
      "step": 49501
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0394716262817383,
      "learning_rate": 5.679215950493738e-06,
      "loss": 2.3799,
      "step": 49502
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0742559432983398,
      "learning_rate": 5.678844635104961e-06,
      "loss": 2.3307,
      "step": 49503
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.2148948907852173,
      "learning_rate": 5.678473327041718e-06,
      "loss": 2.3322,
      "step": 49504
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1110869646072388,
      "learning_rate": 5.678102026304635e-06,
      "loss": 2.4215,
      "step": 49505
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0574266910552979,
      "learning_rate": 5.677730732894344e-06,
      "loss": 2.2588,
      "step": 49506
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.2072398662567139,
      "learning_rate": 5.677359446811471e-06,
      "loss": 2.2159,
      "step": 49507
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0150477886199951,
      "learning_rate": 5.6769881680566516e-06,
      "loss": 2.3609,
      "step": 49508
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.2220088243484497,
      "learning_rate": 5.676616896630508e-06,
      "loss": 2.3441,
      "step": 49509
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.061690092086792,
      "learning_rate": 5.676245632533677e-06,
      "loss": 2.4356,
      "step": 49510
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0612986087799072,
      "learning_rate": 5.675874375766779e-06,
      "loss": 2.1629,
      "step": 49511
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.046858787536621,
      "learning_rate": 5.675503126330456e-06,
      "loss": 2.2831,
      "step": 49512
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1185449361801147,
      "learning_rate": 5.675131884225322e-06,
      "loss": 2.5032,
      "step": 49513
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.318401575088501,
      "learning_rate": 5.674760649452018e-06,
      "loss": 2.2429,
      "step": 49514
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0939909219741821,
      "learning_rate": 5.674389422011166e-06,
      "loss": 2.4187,
      "step": 49515
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.3017401695251465,
      "learning_rate": 5.674018201903403e-06,
      "loss": 2.5396,
      "step": 49516
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.165784478187561,
      "learning_rate": 5.67364698912935e-06,
      "loss": 2.2494,
      "step": 49517
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0387474298477173,
      "learning_rate": 5.673275783689644e-06,
      "loss": 2.474,
      "step": 49518
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.098516583442688,
      "learning_rate": 5.672904585584906e-06,
      "loss": 2.4473,
      "step": 49519
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.164871096611023,
      "learning_rate": 5.672533394815776e-06,
      "loss": 2.548,
      "step": 49520
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0594490766525269,
      "learning_rate": 5.6721622113828715e-06,
      "loss": 2.3378,
      "step": 49521
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.992516815662384,
      "learning_rate": 5.671791035286831e-06,
      "loss": 2.1249,
      "step": 49522
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.068374752998352,
      "learning_rate": 5.671419866528281e-06,
      "loss": 2.3281,
      "step": 49523
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.129299283027649,
      "learning_rate": 5.671048705107846e-06,
      "loss": 2.3916,
      "step": 49524
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0383224487304688,
      "learning_rate": 5.670677551026163e-06,
      "loss": 2.3842,
      "step": 49525
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.9844582080841064,
      "learning_rate": 5.670306404283858e-06,
      "loss": 2.1069,
      "step": 49526
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0489095449447632,
      "learning_rate": 5.669935264881555e-06,
      "loss": 2.3999,
      "step": 49527
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.9782652854919434,
      "learning_rate": 5.669564132819891e-06,
      "loss": 2.5397,
      "step": 49528
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1307886838912964,
      "learning_rate": 5.66919300809949e-06,
      "loss": 2.3411,
      "step": 49529
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0543508529663086,
      "learning_rate": 5.668821890720985e-06,
      "loss": 2.3591,
      "step": 49530
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0875533819198608,
      "learning_rate": 5.668450780685004e-06,
      "loss": 2.3218,
      "step": 49531
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.9824172258377075,
      "learning_rate": 5.668079677992174e-06,
      "loss": 2.5022,
      "step": 49532
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.051473617553711,
      "learning_rate": 5.667708582643126e-06,
      "loss": 2.435,
      "step": 49533
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0279566049575806,
      "learning_rate": 5.667337494638485e-06,
      "loss": 2.15,
      "step": 49534
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1363080739974976,
      "learning_rate": 5.666966413978888e-06,
      "loss": 2.4421,
      "step": 49535
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.140142560005188,
      "learning_rate": 5.666595340664956e-06,
      "loss": 2.4572,
      "step": 49536
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.2704678773880005,
      "learning_rate": 5.6662242746973265e-06,
      "loss": 1.9823,
      "step": 49537
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.084648847579956,
      "learning_rate": 5.6658532160766225e-06,
      "loss": 2.4449,
      "step": 49538
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0711642503738403,
      "learning_rate": 5.6654821648034754e-06,
      "loss": 2.3057,
      "step": 49539
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1301848888397217,
      "learning_rate": 5.665111120878509e-06,
      "loss": 2.2166,
      "step": 49540
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.9997678399085999,
      "learning_rate": 5.6647400843023606e-06,
      "loss": 2.1606,
      "step": 49541
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1191213130950928,
      "learning_rate": 5.6643690550756515e-06,
      "loss": 2.3336,
      "step": 49542
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.610011100769043,
      "learning_rate": 5.663998033199018e-06,
      "loss": 2.3814,
      "step": 49543
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1009478569030762,
      "learning_rate": 5.663627018673081e-06,
      "loss": 2.2338,
      "step": 49544
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1188793182373047,
      "learning_rate": 5.663256011498479e-06,
      "loss": 2.4584,
      "step": 49545
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0936293601989746,
      "learning_rate": 5.662885011675832e-06,
      "loss": 2.464,
      "step": 49546
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0654114484786987,
      "learning_rate": 5.662514019205776e-06,
      "loss": 2.3802,
      "step": 49547
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.2339636087417603,
      "learning_rate": 5.662143034088933e-06,
      "loss": 2.406,
      "step": 49548
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.9798035025596619,
      "learning_rate": 5.66177205632594e-06,
      "loss": 2.4142,
      "step": 49549
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1095253229141235,
      "learning_rate": 5.661401085917417e-06,
      "loss": 2.2289,
      "step": 49550
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0781348943710327,
      "learning_rate": 5.661030122864006e-06,
      "loss": 2.3037,
      "step": 49551
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1113985776901245,
      "learning_rate": 5.660659167166319e-06,
      "loss": 2.6464,
      "step": 49552
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0654897689819336,
      "learning_rate": 5.660288218824998e-06,
      "loss": 2.4161,
      "step": 49553
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.2322964668273926,
      "learning_rate": 5.659917277840663e-06,
      "loss": 2.3604,
      "step": 49554
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4185917377471924,
      "learning_rate": 5.6595463442139505e-06,
      "loss": 2.3709,
      "step": 49555
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0280706882476807,
      "learning_rate": 5.659175417945482e-06,
      "loss": 2.391,
      "step": 49556
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.9507601261138916,
      "learning_rate": 5.658804499035893e-06,
      "loss": 2.3741,
      "step": 49557
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0449568033218384,
      "learning_rate": 5.6584335874858105e-06,
      "loss": 2.473,
      "step": 49558
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0710402727127075,
      "learning_rate": 5.658062683295859e-06,
      "loss": 2.3727,
      "step": 49559
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.2320128679275513,
      "learning_rate": 5.657691786466673e-06,
      "loss": 2.5137,
      "step": 49560
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.9495640993118286,
      "learning_rate": 5.657320896998876e-06,
      "loss": 2.1384,
      "step": 49561
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.147692322731018,
      "learning_rate": 5.6569500148931034e-06,
      "loss": 2.3758,
      "step": 49562
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1814031600952148,
      "learning_rate": 5.656579140149976e-06,
      "loss": 2.283,
      "step": 49563
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1310173273086548,
      "learning_rate": 5.656208272770134e-06,
      "loss": 2.2539,
      "step": 49564
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1102279424667358,
      "learning_rate": 5.65583741275419e-06,
      "loss": 2.0925,
      "step": 49565
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0521835088729858,
      "learning_rate": 5.655466560102787e-06,
      "loss": 2.3671,
      "step": 49566
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0365374088287354,
      "learning_rate": 5.655095714816543e-06,
      "loss": 2.1605,
      "step": 49567
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1966602802276611,
      "learning_rate": 5.654724876896097e-06,
      "loss": 2.3014,
      "step": 49568
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.059056282043457,
      "learning_rate": 5.6543540463420675e-06,
      "loss": 1.9956,
      "step": 49569
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.043564796447754,
      "learning_rate": 5.653983223155093e-06,
      "loss": 2.4425,
      "step": 49570
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0689542293548584,
      "learning_rate": 5.653612407335794e-06,
      "loss": 2.4778,
      "step": 49571
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0597479343414307,
      "learning_rate": 5.6532415988848045e-06,
      "loss": 2.3354,
      "step": 49572
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1078637838363647,
      "learning_rate": 5.652870797802748e-06,
      "loss": 2.3327,
      "step": 49573
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0537431240081787,
      "learning_rate": 5.65250000409026e-06,
      "loss": 2.1976,
      "step": 49574
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1574922800064087,
      "learning_rate": 5.6521292177479615e-06,
      "loss": 2.2454,
      "step": 49575
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.063876986503601,
      "learning_rate": 5.651758438776489e-06,
      "loss": 2.1636,
      "step": 49576
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0272842645645142,
      "learning_rate": 5.6513876671764624e-06,
      "loss": 2.2201,
      "step": 49577
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0307313203811646,
      "learning_rate": 5.651016902948523e-06,
      "loss": 2.3472,
      "step": 49578
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0828357934951782,
      "learning_rate": 5.650646146093283e-06,
      "loss": 2.3438,
      "step": 49579
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0629349946975708,
      "learning_rate": 5.6502753966113825e-06,
      "loss": 2.0704,
      "step": 49580
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.2453932762145996,
      "learning_rate": 5.649904654503443e-06,
      "loss": 2.3141,
      "step": 49581
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1121083498001099,
      "learning_rate": 5.6495339197701e-06,
      "loss": 2.2635,
      "step": 49582
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.198459506034851,
      "learning_rate": 5.649163192411976e-06,
      "loss": 2.1398,
      "step": 49583
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0039206743240356,
      "learning_rate": 5.648792472429705e-06,
      "loss": 2.3415,
      "step": 49584
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1892683506011963,
      "learning_rate": 5.648421759823913e-06,
      "loss": 2.3618,
      "step": 49585
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.3537890911102295,
      "learning_rate": 5.648051054595225e-06,
      "loss": 2.1303,
      "step": 49586
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0526583194732666,
      "learning_rate": 5.647680356744274e-06,
      "loss": 2.5833,
      "step": 49587
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1832767724990845,
      "learning_rate": 5.647309666271685e-06,
      "loss": 2.4511,
      "step": 49588
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1401921510696411,
      "learning_rate": 5.646938983178092e-06,
      "loss": 2.2503,
      "step": 49589
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0893460512161255,
      "learning_rate": 5.646568307464115e-06,
      "loss": 2.3897,
      "step": 49590
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0369824171066284,
      "learning_rate": 5.64619763913039e-06,
      "loss": 2.2887,
      "step": 49591
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0183707475662231,
      "learning_rate": 5.6458269781775445e-06,
      "loss": 2.429,
      "step": 49592
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.9707210659980774,
      "learning_rate": 5.6454563246062045e-06,
      "loss": 2.1944,
      "step": 49593
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0806574821472168,
      "learning_rate": 5.645085678416995e-06,
      "loss": 2.1558,
      "step": 49594
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.9568116068840027,
      "learning_rate": 5.644715039610551e-06,
      "loss": 2.3238,
      "step": 49595
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1227753162384033,
      "learning_rate": 5.644344408187495e-06,
      "loss": 2.299,
      "step": 49596
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.2595174312591553,
      "learning_rate": 5.64397378414846e-06,
      "loss": 2.3071,
      "step": 49597
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0156270265579224,
      "learning_rate": 5.643603167494071e-06,
      "loss": 2.5101,
      "step": 49598
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1216875314712524,
      "learning_rate": 5.64323255822496e-06,
      "loss": 2.4533,
      "step": 49599
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1126662492752075,
      "learning_rate": 5.64286195634175e-06,
      "loss": 2.4725,
      "step": 49600
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.9609558582305908,
      "learning_rate": 5.642491361845076e-06,
      "loss": 2.3491,
      "step": 49601
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0597050189971924,
      "learning_rate": 5.64212077473556e-06,
      "loss": 2.4164,
      "step": 49602
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1848597526550293,
      "learning_rate": 5.641750195013835e-06,
      "loss": 2.4854,
      "step": 49603
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0951305627822876,
      "learning_rate": 5.641379622680527e-06,
      "loss": 2.1998,
      "step": 49604
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.207074761390686,
      "learning_rate": 5.6410090577362664e-06,
      "loss": 2.271,
      "step": 49605
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.007553219795227,
      "learning_rate": 5.640638500181674e-06,
      "loss": 2.4782,
      "step": 49606
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1190006732940674,
      "learning_rate": 5.640267950017387e-06,
      "loss": 2.2966,
      "step": 49607
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.091424822807312,
      "learning_rate": 5.639897407244026e-06,
      "loss": 2.3415,
      "step": 49608
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1124293804168701,
      "learning_rate": 5.6395268718622284e-06,
      "loss": 2.2457,
      "step": 49609
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0811362266540527,
      "learning_rate": 5.6391563438726115e-06,
      "loss": 2.5107,
      "step": 49610
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1736056804656982,
      "learning_rate": 5.638785823275814e-06,
      "loss": 2.2235,
      "step": 49611
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0801388025283813,
      "learning_rate": 5.6384153100724534e-06,
      "loss": 2.2537,
      "step": 49612
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.9962841868400574,
      "learning_rate": 5.638044804263169e-06,
      "loss": 2.4748,
      "step": 49613
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.107073426246643,
      "learning_rate": 5.6376743058485835e-06,
      "loss": 2.1488,
      "step": 49614
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.03663969039917,
      "learning_rate": 5.6373038148293205e-06,
      "loss": 2.1545,
      "step": 49615
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.9592756628990173,
      "learning_rate": 5.636933331206016e-06,
      "loss": 2.4009,
      "step": 49616
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.108350157737732,
      "learning_rate": 5.636562854979295e-06,
      "loss": 2.3302,
      "step": 49617
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0938647985458374,
      "learning_rate": 5.636192386149781e-06,
      "loss": 2.4821,
      "step": 49618
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.3830978870391846,
      "learning_rate": 5.63582192471811e-06,
      "loss": 2.2481,
      "step": 49619
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.2605717182159424,
      "learning_rate": 5.635451470684905e-06,
      "loss": 2.3802,
      "step": 49620
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.2292627096176147,
      "learning_rate": 5.635081024050793e-06,
      "loss": 2.2572,
      "step": 49621
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0500904321670532,
      "learning_rate": 5.6347105848164075e-06,
      "loss": 2.2798,
      "step": 49622
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.3452205657958984,
      "learning_rate": 5.63434015298237e-06,
      "loss": 2.4043,
      "step": 49623
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1055089235305786,
      "learning_rate": 5.633969728549314e-06,
      "loss": 2.4911,
      "step": 49624
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0671257972717285,
      "learning_rate": 5.633599311517861e-06,
      "loss": 2.2557,
      "step": 49625
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.9886766076087952,
      "learning_rate": 5.633228901888648e-06,
      "loss": 2.275,
      "step": 49626
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0253268480300903,
      "learning_rate": 5.632858499662294e-06,
      "loss": 2.2731,
      "step": 49627
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.9922598004341125,
      "learning_rate": 5.632488104839435e-06,
      "loss": 2.4097,
      "step": 49628
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0648887157440186,
      "learning_rate": 5.632117717420691e-06,
      "loss": 2.23,
      "step": 49629
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1101435422897339,
      "learning_rate": 5.631747337406701e-06,
      "loss": 2.4406,
      "step": 49630
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4528337717056274,
      "learning_rate": 5.631376964798078e-06,
      "loss": 2.2624,
      "step": 49631
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.007777452468872,
      "learning_rate": 5.631006599595461e-06,
      "loss": 2.2317,
      "step": 49632
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.030985951423645,
      "learning_rate": 5.630636241799472e-06,
      "loss": 2.5454,
      "step": 49633
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.107565999031067,
      "learning_rate": 5.630265891410744e-06,
      "loss": 2.3867,
      "step": 49634
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.063637137413025,
      "learning_rate": 5.629895548429898e-06,
      "loss": 2.4114,
      "step": 49635
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.144399642944336,
      "learning_rate": 5.6295252128575705e-06,
      "loss": 2.4012,
      "step": 49636
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1072092056274414,
      "learning_rate": 5.629154884694381e-06,
      "loss": 2.3494,
      "step": 49637
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1502561569213867,
      "learning_rate": 5.628784563940964e-06,
      "loss": 2.4225,
      "step": 49638
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1158292293548584,
      "learning_rate": 5.628414250597941e-06,
      "loss": 2.1699,
      "step": 49639
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.2020511627197266,
      "learning_rate": 5.628043944665948e-06,
      "loss": 2.3105,
      "step": 49640
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0659202337265015,
      "learning_rate": 5.6276736461456065e-06,
      "loss": 2.2843,
      "step": 49641
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1098382472991943,
      "learning_rate": 5.627303355037543e-06,
      "loss": 2.32,
      "step": 49642
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1153064966201782,
      "learning_rate": 5.626933071342391e-06,
      "loss": 2.3402,
      "step": 49643
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1885923147201538,
      "learning_rate": 5.626562795060775e-06,
      "loss": 2.4253,
      "step": 49644
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0926110744476318,
      "learning_rate": 5.626192526193319e-06,
      "loss": 2.4578,
      "step": 49645
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0615769624710083,
      "learning_rate": 5.625822264740659e-06,
      "loss": 2.1555,
      "step": 49646
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1272163391113281,
      "learning_rate": 5.625452010703414e-06,
      "loss": 2.2227,
      "step": 49647
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0111569166183472,
      "learning_rate": 5.625081764082221e-06,
      "loss": 2.352,
      "step": 49648
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.014049768447876,
      "learning_rate": 5.624711524877702e-06,
      "loss": 2.4714,
      "step": 49649
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1068806648254395,
      "learning_rate": 5.62434129309048e-06,
      "loss": 2.4415,
      "step": 49650
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1435202360153198,
      "learning_rate": 5.623971068721192e-06,
      "loss": 2.507,
      "step": 49651
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1696007251739502,
      "learning_rate": 5.62360085177046e-06,
      "loss": 2.2583,
      "step": 49652
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1493138074874878,
      "learning_rate": 5.623230642238915e-06,
      "loss": 2.2176,
      "step": 49653
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0553103685379028,
      "learning_rate": 5.62286044012718e-06,
      "loss": 2.2977,
      "step": 49654
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1609387397766113,
      "learning_rate": 5.6224902454358874e-06,
      "loss": 2.2979,
      "step": 49655
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1811467409133911,
      "learning_rate": 5.622120058165661e-06,
      "loss": 2.2669,
      "step": 49656
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.2902536392211914,
      "learning_rate": 5.621749878317137e-06,
      "loss": 2.0887,
      "step": 49657
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4270634651184082,
      "learning_rate": 5.621379705890928e-06,
      "loss": 2.3956,
      "step": 49658
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0743731260299683,
      "learning_rate": 5.621009540887674e-06,
      "loss": 2.5499,
      "step": 49659
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0813651084899902,
      "learning_rate": 5.620639383307995e-06,
      "loss": 2.4266,
      "step": 49660
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.9867164492607117,
      "learning_rate": 5.620269233152524e-06,
      "loss": 2.0495,
      "step": 49661
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.9908005595207214,
      "learning_rate": 5.6198990904218835e-06,
      "loss": 2.4253,
      "step": 49662
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0239328145980835,
      "learning_rate": 5.619528955116706e-06,
      "loss": 2.3107,
      "step": 49663
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1035462617874146,
      "learning_rate": 5.619158827237614e-06,
      "loss": 2.2642,
      "step": 49664
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0517127513885498,
      "learning_rate": 5.618788706785242e-06,
      "loss": 2.3889,
      "step": 49665
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0317922830581665,
      "learning_rate": 5.618418593760209e-06,
      "loss": 2.2405,
      "step": 49666
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.067196249961853,
      "learning_rate": 5.6180484881631495e-06,
      "loss": 2.2551,
      "step": 49667
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0297802686691284,
      "learning_rate": 5.617678389994684e-06,
      "loss": 2.1814,
      "step": 49668
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1721158027648926,
      "learning_rate": 5.617308299255448e-06,
      "loss": 2.1819,
      "step": 49669
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0968674421310425,
      "learning_rate": 5.616938215946065e-06,
      "loss": 2.2223,
      "step": 49670
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0890735387802124,
      "learning_rate": 5.616568140067163e-06,
      "loss": 2.3654,
      "step": 49671
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.2138055562973022,
      "learning_rate": 5.616198071619363e-06,
      "loss": 2.2808,
      "step": 49672
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0228855609893799,
      "learning_rate": 5.615828010603301e-06,
      "loss": 2.2369,
      "step": 49673
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1073801517486572,
      "learning_rate": 5.6154579570196e-06,
      "loss": 2.343,
      "step": 49674
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0160998106002808,
      "learning_rate": 5.615087910868891e-06,
      "loss": 2.0129,
      "step": 49675
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0828453302383423,
      "learning_rate": 5.6147178721518e-06,
      "loss": 2.2659,
      "step": 49676
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1052361726760864,
      "learning_rate": 5.614347840868948e-06,
      "loss": 2.471,
      "step": 49677
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0091005563735962,
      "learning_rate": 5.613977817020973e-06,
      "loss": 2.2975,
      "step": 49678
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0543354749679565,
      "learning_rate": 5.613607800608492e-06,
      "loss": 2.4176,
      "step": 49679
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0404508113861084,
      "learning_rate": 5.61323779163214e-06,
      "loss": 2.2637,
      "step": 49680
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1993460655212402,
      "learning_rate": 5.612867790092538e-06,
      "loss": 2.3042,
      "step": 49681
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0063077211380005,
      "learning_rate": 5.612497795990322e-06,
      "loss": 2.2237,
      "step": 49682
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.2342909574508667,
      "learning_rate": 5.6121278093261115e-06,
      "loss": 2.4448,
      "step": 49683
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.031113624572754,
      "learning_rate": 5.611757830100537e-06,
      "loss": 2.3965,
      "step": 49684
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1297743320465088,
      "learning_rate": 5.611387858314221e-06,
      "loss": 2.4647,
      "step": 49685
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1373668909072876,
      "learning_rate": 5.611017893967799e-06,
      "loss": 2.4423,
      "step": 49686
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1152740716934204,
      "learning_rate": 5.610647937061891e-06,
      "loss": 2.1855,
      "step": 49687
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.680909514427185,
      "learning_rate": 5.610277987597128e-06,
      "loss": 2.4476,
      "step": 49688
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0405938625335693,
      "learning_rate": 5.609908045574134e-06,
      "loss": 2.2336,
      "step": 49689
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1068159341812134,
      "learning_rate": 5.609538110993542e-06,
      "loss": 2.2678,
      "step": 49690
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.058571219444275,
      "learning_rate": 5.609168183855971e-06,
      "loss": 2.4262,
      "step": 49691
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0936063528060913,
      "learning_rate": 5.608798264162055e-06,
      "loss": 2.4308,
      "step": 49692
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1557680368423462,
      "learning_rate": 5.608428351912416e-06,
      "loss": 2.3305,
      "step": 49693
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0536078214645386,
      "learning_rate": 5.608058447107688e-06,
      "loss": 2.3798,
      "step": 49694
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1055983304977417,
      "learning_rate": 5.607688549748489e-06,
      "loss": 2.1863,
      "step": 49695
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.6712646484375,
      "learning_rate": 5.607318659835459e-06,
      "loss": 2.4252,
      "step": 49696
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0830950736999512,
      "learning_rate": 5.60694877736921e-06,
      "loss": 2.3602,
      "step": 49697
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1051104068756104,
      "learning_rate": 5.606578902350379e-06,
      "loss": 2.344,
      "step": 49698
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0817501544952393,
      "learning_rate": 5.606209034779586e-06,
      "loss": 2.3162,
      "step": 49699
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0795931816101074,
      "learning_rate": 5.605839174657466e-06,
      "loss": 2.5001,
      "step": 49700
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1740086078643799,
      "learning_rate": 5.605469321984638e-06,
      "loss": 2.4811,
      "step": 49701
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1495957374572754,
      "learning_rate": 5.605099476761737e-06,
      "loss": 2.3007,
      "step": 49702
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0660948753356934,
      "learning_rate": 5.604729638989385e-06,
      "loss": 2.4176,
      "step": 49703
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.9883172512054443,
      "learning_rate": 5.604359808668209e-06,
      "loss": 2.4641,
      "step": 49704
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0455445051193237,
      "learning_rate": 5.603989985798839e-06,
      "loss": 2.4491,
      "step": 49705
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0856318473815918,
      "learning_rate": 5.603620170381896e-06,
      "loss": 2.1725,
      "step": 49706
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.958240270614624,
      "learning_rate": 5.6032503624180145e-06,
      "loss": 2.2417,
      "step": 49707
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1196705102920532,
      "learning_rate": 5.602880561907814e-06,
      "loss": 2.3385,
      "step": 49708
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.111371397972107,
      "learning_rate": 5.60251076885193e-06,
      "loss": 2.3215,
      "step": 49709
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1910557746887207,
      "learning_rate": 5.602140983250983e-06,
      "loss": 2.3645,
      "step": 49710
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0260189771652222,
      "learning_rate": 5.601771205105604e-06,
      "loss": 2.3736,
      "step": 49711
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0942435264587402,
      "learning_rate": 5.601401434416412e-06,
      "loss": 2.2626,
      "step": 49712
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0246102809906006,
      "learning_rate": 5.601031671184042e-06,
      "loss": 2.4004,
      "step": 49713
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0191668272018433,
      "learning_rate": 5.600661915409116e-06,
      "loss": 2.5591,
      "step": 49714
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.093069314956665,
      "learning_rate": 5.600292167092265e-06,
      "loss": 2.3502,
      "step": 49715
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.140610694885254,
      "learning_rate": 5.59992242623411e-06,
      "loss": 2.403,
      "step": 49716
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.283844232559204,
      "learning_rate": 5.599552692835288e-06,
      "loss": 2.369,
      "step": 49717
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.2041878700256348,
      "learning_rate": 5.599182966896412e-06,
      "loss": 2.2972,
      "step": 49718
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.031131386756897,
      "learning_rate": 5.598813248418122e-06,
      "loss": 2.46,
      "step": 49719
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.051518201828003,
      "learning_rate": 5.598443537401035e-06,
      "loss": 2.3396,
      "step": 49720
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0405805110931396,
      "learning_rate": 5.598073833845785e-06,
      "loss": 2.4267,
      "step": 49721
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1227179765701294,
      "learning_rate": 5.597704137752995e-06,
      "loss": 2.3546,
      "step": 49722
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.9662249088287354,
      "learning_rate": 5.597334449123293e-06,
      "loss": 2.4953,
      "step": 49723
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.3516584634780884,
      "learning_rate": 5.5969647679573e-06,
      "loss": 2.0679,
      "step": 49724
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0709435939788818,
      "learning_rate": 5.5965950942556524e-06,
      "loss": 2.247,
      "step": 49725
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0321366786956787,
      "learning_rate": 5.596225428018969e-06,
      "loss": 2.2887,
      "step": 49726
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1022465229034424,
      "learning_rate": 5.595855769247882e-06,
      "loss": 2.4179,
      "step": 49727
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.2014665603637695,
      "learning_rate": 5.595486117943012e-06,
      "loss": 2.1433,
      "step": 49728
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1143063306808472,
      "learning_rate": 5.595116474104993e-06,
      "loss": 2.2909,
      "step": 49729
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.122483253479004,
      "learning_rate": 5.594746837734444e-06,
      "loss": 2.4518,
      "step": 49730
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.005595326423645,
      "learning_rate": 5.594377208832e-06,
      "loss": 2.3633,
      "step": 49731
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.2580299377441406,
      "learning_rate": 5.594007587398282e-06,
      "loss": 2.455,
      "step": 49732
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.105269432067871,
      "learning_rate": 5.593637973433916e-06,
      "loss": 2.5051,
      "step": 49733
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0568569898605347,
      "learning_rate": 5.5932683669395324e-06,
      "loss": 2.2005,
      "step": 49734
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0644152164459229,
      "learning_rate": 5.592898767915753e-06,
      "loss": 2.4534,
      "step": 49735
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1989527940750122,
      "learning_rate": 5.5925291763632105e-06,
      "loss": 2.304,
      "step": 49736
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1733824014663696,
      "learning_rate": 5.592159592282529e-06,
      "loss": 2.2332,
      "step": 49737
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1493724584579468,
      "learning_rate": 5.591790015674332e-06,
      "loss": 2.332,
      "step": 49738
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0213427543640137,
      "learning_rate": 5.591420446539246e-06,
      "loss": 2.1803,
      "step": 49739
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.067403793334961,
      "learning_rate": 5.591050884877905e-06,
      "loss": 2.3912,
      "step": 49740
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1605554819107056,
      "learning_rate": 5.5906813306909234e-06,
      "loss": 2.4777,
      "step": 49741
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0803041458129883,
      "learning_rate": 5.5903117839789386e-06,
      "loss": 2.4162,
      "step": 49742
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1414239406585693,
      "learning_rate": 5.58994224474257e-06,
      "loss": 2.3907,
      "step": 49743
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1700377464294434,
      "learning_rate": 5.589572712982451e-06,
      "loss": 2.0924,
      "step": 49744
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0979403257369995,
      "learning_rate": 5.589203188699202e-06,
      "loss": 2.3971,
      "step": 49745
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0214406251907349,
      "learning_rate": 5.5888336718934525e-06,
      "loss": 2.0935,
      "step": 49746
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0448309183120728,
      "learning_rate": 5.588464162565825e-06,
      "loss": 2.2876,
      "step": 49747
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1792465448379517,
      "learning_rate": 5.588094660716953e-06,
      "loss": 2.2992,
      "step": 49748
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0739657878875732,
      "learning_rate": 5.58772516634746e-06,
      "loss": 2.1693,
      "step": 49749
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.2898787260055542,
      "learning_rate": 5.587355679457969e-06,
      "loss": 2.4621,
      "step": 49750
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.226369023323059,
      "learning_rate": 5.586986200049105e-06,
      "loss": 2.3204,
      "step": 49751
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0812122821807861,
      "learning_rate": 5.586616728121503e-06,
      "loss": 2.2977,
      "step": 49752
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.2189512252807617,
      "learning_rate": 5.586247263675779e-06,
      "loss": 2.5036,
      "step": 49753
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.03659987449646,
      "learning_rate": 5.585877806712571e-06,
      "loss": 2.1526,
      "step": 49754
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0271440744400024,
      "learning_rate": 5.5855083572324935e-06,
      "loss": 2.5017,
      "step": 49755
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0724079608917236,
      "learning_rate": 5.5851389152361815e-06,
      "loss": 2.2155,
      "step": 49756
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0770379304885864,
      "learning_rate": 5.584769480724254e-06,
      "loss": 2.3219,
      "step": 49757
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1100845336914062,
      "learning_rate": 5.584400053697347e-06,
      "loss": 2.2057,
      "step": 49758
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1766693592071533,
      "learning_rate": 5.584030634156081e-06,
      "loss": 2.2355,
      "step": 49759
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.969584584236145,
      "learning_rate": 5.583661222101078e-06,
      "loss": 2.3468,
      "step": 49760
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0551403760910034,
      "learning_rate": 5.583291817532972e-06,
      "loss": 2.266,
      "step": 49761
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.042246699333191,
      "learning_rate": 5.582922420452387e-06,
      "loss": 2.2781,
      "step": 49762
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0525330305099487,
      "learning_rate": 5.582553030859943e-06,
      "loss": 2.3049,
      "step": 49763
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1111998558044434,
      "learning_rate": 5.582183648756276e-06,
      "loss": 2.3353,
      "step": 49764
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0994962453842163,
      "learning_rate": 5.581814274142005e-06,
      "loss": 2.1401,
      "step": 49765
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1981862783432007,
      "learning_rate": 5.5814449070177614e-06,
      "loss": 2.4279,
      "step": 49766
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1600326299667358,
      "learning_rate": 5.581075547384167e-06,
      "loss": 2.3599,
      "step": 49767
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1396454572677612,
      "learning_rate": 5.5807061952418486e-06,
      "loss": 2.3407,
      "step": 49768
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.136909008026123,
      "learning_rate": 5.580336850591437e-06,
      "loss": 2.4178,
      "step": 49769
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4014358520507812,
      "learning_rate": 5.579967513433549e-06,
      "loss": 2.1038,
      "step": 49770
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.9936587810516357,
      "learning_rate": 5.579598183768822e-06,
      "loss": 2.2856,
      "step": 49771
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.9964843392372131,
      "learning_rate": 5.579228861597872e-06,
      "loss": 2.292,
      "step": 49772
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1286814212799072,
      "learning_rate": 5.5788595469213335e-06,
      "loss": 2.2647,
      "step": 49773
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1078976392745972,
      "learning_rate": 5.578490239739826e-06,
      "loss": 2.2053,
      "step": 49774
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1129108667373657,
      "learning_rate": 5.578120940053986e-06,
      "loss": 2.3134,
      "step": 49775
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.9833784103393555,
      "learning_rate": 5.577751647864423e-06,
      "loss": 2.2641,
      "step": 49776
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1183738708496094,
      "learning_rate": 5.577382363171777e-06,
      "loss": 2.1797,
      "step": 49777
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1001827716827393,
      "learning_rate": 5.5770130859766655e-06,
      "loss": 2.515,
      "step": 49778
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.9976291060447693,
      "learning_rate": 5.576643816279721e-06,
      "loss": 2.5572,
      "step": 49779
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0294040441513062,
      "learning_rate": 5.5762745540815634e-06,
      "loss": 2.0795,
      "step": 49780
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.156855821609497,
      "learning_rate": 5.575905299382824e-06,
      "loss": 2.3126,
      "step": 49781
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1752523183822632,
      "learning_rate": 5.575536052184125e-06,
      "loss": 2.3001,
      "step": 49782
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0879802703857422,
      "learning_rate": 5.575166812486097e-06,
      "loss": 2.2435,
      "step": 49783
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0475116968154907,
      "learning_rate": 5.57479758028936e-06,
      "loss": 2.3396,
      "step": 49784
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1075788736343384,
      "learning_rate": 5.574428355594545e-06,
      "loss": 2.3025,
      "step": 49785
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.120458722114563,
      "learning_rate": 5.574059138402275e-06,
      "loss": 2.5499,
      "step": 49786
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1613726615905762,
      "learning_rate": 5.5736899287131784e-06,
      "loss": 2.6015,
      "step": 49787
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.9714049696922302,
      "learning_rate": 5.57332072652788e-06,
      "loss": 2.1255,
      "step": 49788
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.20210862159729,
      "learning_rate": 5.572951531847006e-06,
      "loss": 2.2836,
      "step": 49789
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0734719038009644,
      "learning_rate": 5.5725823446711755e-06,
      "loss": 2.2364,
      "step": 49790
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.041823148727417,
      "learning_rate": 5.572213165001026e-06,
      "loss": 2.3758,
      "step": 49791
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0100643634796143,
      "learning_rate": 5.5718439928371736e-06,
      "loss": 2.2864,
      "step": 49792
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0386090278625488,
      "learning_rate": 5.5714748281802525e-06,
      "loss": 2.4208,
      "step": 49793
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1000957489013672,
      "learning_rate": 5.571105671030884e-06,
      "loss": 2.4371,
      "step": 49794
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.9612117409706116,
      "learning_rate": 5.57073652138969e-06,
      "loss": 2.1429,
      "step": 49795
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0437482595443726,
      "learning_rate": 5.570367379257305e-06,
      "loss": 2.4655,
      "step": 49796
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0858935117721558,
      "learning_rate": 5.569998244634347e-06,
      "loss": 2.4574,
      "step": 49797
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0663776397705078,
      "learning_rate": 5.569629117521449e-06,
      "loss": 2.2007,
      "step": 49798
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0620466470718384,
      "learning_rate": 5.569259997919227e-06,
      "loss": 2.2432,
      "step": 49799
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0046029090881348,
      "learning_rate": 5.568890885828319e-06,
      "loss": 2.4711,
      "step": 49800
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4055390357971191,
      "learning_rate": 5.568521781249339e-06,
      "loss": 2.2525,
      "step": 49801
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1098521947860718,
      "learning_rate": 5.568152684182926e-06,
      "loss": 2.284,
      "step": 49802
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.08958899974823,
      "learning_rate": 5.567783594629691e-06,
      "loss": 2.3148,
      "step": 49803
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0815938711166382,
      "learning_rate": 5.56741451259027e-06,
      "loss": 2.291,
      "step": 49804
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1289887428283691,
      "learning_rate": 5.567045438065282e-06,
      "loss": 2.2617,
      "step": 49805
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0268785953521729,
      "learning_rate": 5.56667637105536e-06,
      "loss": 2.3743,
      "step": 49806
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1667299270629883,
      "learning_rate": 5.566307311561119e-06,
      "loss": 2.4096,
      "step": 49807
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1794219017028809,
      "learning_rate": 5.565938259583198e-06,
      "loss": 2.6116,
      "step": 49808
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0839725732803345,
      "learning_rate": 5.565569215122211e-06,
      "loss": 2.4483,
      "step": 49809
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.182433009147644,
      "learning_rate": 5.565200178178793e-06,
      "loss": 2.5063,
      "step": 49810
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1276766061782837,
      "learning_rate": 5.56483114875356e-06,
      "loss": 2.2837,
      "step": 49811
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0971747636795044,
      "learning_rate": 5.564462126847148e-06,
      "loss": 2.3813,
      "step": 49812
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.10292649269104,
      "learning_rate": 5.564093112460174e-06,
      "loss": 2.3263,
      "step": 49813
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0399729013442993,
      "learning_rate": 5.563724105593269e-06,
      "loss": 2.2778,
      "step": 49814
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0918940305709839,
      "learning_rate": 5.563355106247057e-06,
      "loss": 2.1476,
      "step": 49815
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.079233169555664,
      "learning_rate": 5.562986114422164e-06,
      "loss": 2.3009,
      "step": 49816
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0559868812561035,
      "learning_rate": 5.56261713011921e-06,
      "loss": 2.4457,
      "step": 49817
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0549372434616089,
      "learning_rate": 5.56224815333883e-06,
      "loss": 2.4414,
      "step": 49818
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1463062763214111,
      "learning_rate": 5.56187918408164e-06,
      "loss": 2.3746,
      "step": 49819
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0265629291534424,
      "learning_rate": 5.561510222348273e-06,
      "loss": 2.2159,
      "step": 49820
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0044423341751099,
      "learning_rate": 5.56114126813935e-06,
      "loss": 2.3383,
      "step": 49821
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0144284963607788,
      "learning_rate": 5.5607723214555e-06,
      "loss": 2.1706,
      "step": 49822
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0848157405853271,
      "learning_rate": 5.560403382297348e-06,
      "loss": 2.5127,
      "step": 49823
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0566060543060303,
      "learning_rate": 5.5600344506655125e-06,
      "loss": 2.4076,
      "step": 49824
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0972262620925903,
      "learning_rate": 5.559665526560629e-06,
      "loss": 2.2921,
      "step": 49825
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.142453908920288,
      "learning_rate": 5.559296609983316e-06,
      "loss": 2.3817,
      "step": 49826
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0347040891647339,
      "learning_rate": 5.558927700934205e-06,
      "loss": 2.3403,
      "step": 49827
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0961575508117676,
      "learning_rate": 5.558558799413916e-06,
      "loss": 2.2929,
      "step": 49828
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0767414569854736,
      "learning_rate": 5.558189905423077e-06,
      "loss": 2.2046,
      "step": 49829
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1186617612838745,
      "learning_rate": 5.557821018962308e-06,
      "loss": 2.3391,
      "step": 49830
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.157509684562683,
      "learning_rate": 5.557452140032244e-06,
      "loss": 2.2468,
      "step": 49831
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.207905888557434,
      "learning_rate": 5.557083268633501e-06,
      "loss": 2.318,
      "step": 49832
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1539539098739624,
      "learning_rate": 5.556714404766712e-06,
      "loss": 2.3964,
      "step": 49833
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0853296518325806,
      "learning_rate": 5.556345548432494e-06,
      "loss": 2.4728,
      "step": 49834
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0277107954025269,
      "learning_rate": 5.555976699631483e-06,
      "loss": 2.6412,
      "step": 49835
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0658708810806274,
      "learning_rate": 5.555607858364294e-06,
      "loss": 2.1812,
      "step": 49836
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.9923722743988037,
      "learning_rate": 5.55523902463156e-06,
      "loss": 2.4796,
      "step": 49837
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0907361507415771,
      "learning_rate": 5.554870198433899e-06,
      "loss": 2.1542,
      "step": 49838
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.049715280532837,
      "learning_rate": 5.554501379771946e-06,
      "loss": 2.4956,
      "step": 49839
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.9988310933113098,
      "learning_rate": 5.554132568646315e-06,
      "loss": 2.2746,
      "step": 49840
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0188031196594238,
      "learning_rate": 5.553763765057645e-06,
      "loss": 2.2605,
      "step": 49841
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1147055625915527,
      "learning_rate": 5.553394969006545e-06,
      "loss": 2.1634,
      "step": 49842
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0465507507324219,
      "learning_rate": 5.553026180493652e-06,
      "loss": 2.243,
      "step": 49843
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1001490354537964,
      "learning_rate": 5.552657399519583e-06,
      "loss": 2.374,
      "step": 49844
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0917479991912842,
      "learning_rate": 5.552288626084973e-06,
      "loss": 2.2835,
      "step": 49845
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0656557083129883,
      "learning_rate": 5.551919860190438e-06,
      "loss": 2.2854,
      "step": 49846
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.95506751537323,
      "learning_rate": 5.55155110183661e-06,
      "loss": 2.2068,
      "step": 49847
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1429592370986938,
      "learning_rate": 5.551182351024106e-06,
      "loss": 2.383,
      "step": 49848
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0269980430603027,
      "learning_rate": 5.550813607753561e-06,
      "loss": 2.3769,
      "step": 49849
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0668249130249023,
      "learning_rate": 5.550444872025595e-06,
      "loss": 2.38,
      "step": 49850
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.2249881029129028,
      "learning_rate": 5.55007614384083e-06,
      "loss": 2.2522,
      "step": 49851
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1606559753417969,
      "learning_rate": 5.5497074231999e-06,
      "loss": 2.2808,
      "step": 49852
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0957648754119873,
      "learning_rate": 5.549338710103418e-06,
      "loss": 2.2877,
      "step": 49853
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0812993049621582,
      "learning_rate": 5.548970004552021e-06,
      "loss": 2.3434,
      "step": 49854
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1525232791900635,
      "learning_rate": 5.548601306546329e-06,
      "loss": 2.2911,
      "step": 49855
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.042389154434204,
      "learning_rate": 5.548232616086966e-06,
      "loss": 2.2943,
      "step": 49856
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0766562223434448,
      "learning_rate": 5.547863933174553e-06,
      "loss": 2.2513,
      "step": 49857
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0907073020935059,
      "learning_rate": 5.547495257809725e-06,
      "loss": 2.1629,
      "step": 49858
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0184260606765747,
      "learning_rate": 5.547126589993099e-06,
      "loss": 2.2306,
      "step": 49859
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.2895116806030273,
      "learning_rate": 5.546757929725305e-06,
      "loss": 2.2861,
      "step": 49860
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.017361044883728,
      "learning_rate": 5.5463892770069625e-06,
      "loss": 2.3877,
      "step": 49861
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0178431272506714,
      "learning_rate": 5.546020631838703e-06,
      "loss": 2.2758,
      "step": 49862
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1606272459030151,
      "learning_rate": 5.545651994221145e-06,
      "loss": 2.1342,
      "step": 49863
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1186167001724243,
      "learning_rate": 5.545283364154921e-06,
      "loss": 2.2107,
      "step": 49864
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0840100049972534,
      "learning_rate": 5.544914741640647e-06,
      "loss": 2.2858,
      "step": 49865
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.3299109935760498,
      "learning_rate": 5.544546126678956e-06,
      "loss": 2.121,
      "step": 49866
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0729824304580688,
      "learning_rate": 5.544177519270471e-06,
      "loss": 2.3563,
      "step": 49867
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.038763403892517,
      "learning_rate": 5.543808919415813e-06,
      "loss": 2.2862,
      "step": 49868
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5032070875167847,
      "learning_rate": 5.543440327115607e-06,
      "loss": 2.4113,
      "step": 49869
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.9798635840415955,
      "learning_rate": 5.543071742370483e-06,
      "loss": 2.2652,
      "step": 49870
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1370079517364502,
      "learning_rate": 5.542703165181059e-06,
      "loss": 2.3762,
      "step": 49871
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0425604581832886,
      "learning_rate": 5.542334595547968e-06,
      "loss": 2.2009,
      "step": 49872
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.2959415912628174,
      "learning_rate": 5.5419660334718265e-06,
      "loss": 2.2868,
      "step": 49873
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.8591597080230713,
      "learning_rate": 5.541597478953268e-06,
      "loss": 2.5818,
      "step": 49874
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0586401224136353,
      "learning_rate": 5.541228931992908e-06,
      "loss": 2.3983,
      "step": 49875
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1594377756118774,
      "learning_rate": 5.54086039259138e-06,
      "loss": 2.2863,
      "step": 49876
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1436303853988647,
      "learning_rate": 5.540491860749304e-06,
      "loss": 2.3338,
      "step": 49877
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0438590049743652,
      "learning_rate": 5.540123336467303e-06,
      "loss": 2.3635,
      "step": 49878
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.043697476387024,
      "learning_rate": 5.539754819746007e-06,
      "loss": 2.3203,
      "step": 49879
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1583058834075928,
      "learning_rate": 5.5393863105860345e-06,
      "loss": 2.3492,
      "step": 49880
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1972756385803223,
      "learning_rate": 5.539017808988017e-06,
      "loss": 2.5174,
      "step": 49881
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0580734014511108,
      "learning_rate": 5.538649314952578e-06,
      "loss": 2.6708,
      "step": 49882
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0833158493041992,
      "learning_rate": 5.538280828480333e-06,
      "loss": 2.3824,
      "step": 49883
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1800134181976318,
      "learning_rate": 5.53791234957192e-06,
      "loss": 2.5429,
      "step": 49884
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0052475929260254,
      "learning_rate": 5.537543878227957e-06,
      "loss": 2.0523,
      "step": 49885
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1573330163955688,
      "learning_rate": 5.537175414449066e-06,
      "loss": 2.1528,
      "step": 49886
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0566442012786865,
      "learning_rate": 5.536806958235876e-06,
      "loss": 2.3976,
      "step": 49887
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1233410835266113,
      "learning_rate": 5.536438509589008e-06,
      "loss": 2.251,
      "step": 49888
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1966736316680908,
      "learning_rate": 5.536070068509094e-06,
      "loss": 2.288,
      "step": 49889
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1660568714141846,
      "learning_rate": 5.5357016349967486e-06,
      "loss": 2.6354,
      "step": 49890
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0244548320770264,
      "learning_rate": 5.535333209052605e-06,
      "loss": 2.3302,
      "step": 49891
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0069167613983154,
      "learning_rate": 5.534964790677281e-06,
      "loss": 2.1877,
      "step": 49892
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1467134952545166,
      "learning_rate": 5.534596379871409e-06,
      "loss": 2.2781,
      "step": 49893
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.07536780834198,
      "learning_rate": 5.534227976635607e-06,
      "loss": 2.3722,
      "step": 49894
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.3680198192596436,
      "learning_rate": 5.533859580970502e-06,
      "loss": 2.2632,
      "step": 49895
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.024414300918579,
      "learning_rate": 5.5334911928767145e-06,
      "loss": 2.4715,
      "step": 49896
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1627027988433838,
      "learning_rate": 5.533122812354876e-06,
      "loss": 2.177,
      "step": 49897
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0717829465866089,
      "learning_rate": 5.532754439405605e-06,
      "loss": 2.3166,
      "step": 49898
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0579800605773926,
      "learning_rate": 5.5323860740295325e-06,
      "loss": 2.4024,
      "step": 49899
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0506439208984375,
      "learning_rate": 5.532017716227273e-06,
      "loss": 2.3976,
      "step": 49900
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1723650693893433,
      "learning_rate": 5.531649365999463e-06,
      "loss": 2.2747,
      "step": 49901
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0489423274993896,
      "learning_rate": 5.531281023346715e-06,
      "loss": 2.2143,
      "step": 49902
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.9890508651733398,
      "learning_rate": 5.530912688269665e-06,
      "loss": 2.1897,
      "step": 49903
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1630853414535522,
      "learning_rate": 5.530544360768926e-06,
      "loss": 2.2819,
      "step": 49904
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0200157165527344,
      "learning_rate": 5.530176040845134e-06,
      "loss": 2.4371,
      "step": 49905
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.026319146156311,
      "learning_rate": 5.529807728498906e-06,
      "loss": 2.1975,
      "step": 49906
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0321388244628906,
      "learning_rate": 5.529439423730869e-06,
      "loss": 2.3901,
      "step": 49907
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1345486640930176,
      "learning_rate": 5.529071126541643e-06,
      "loss": 2.2888,
      "step": 49908
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.9873083233833313,
      "learning_rate": 5.528702836931859e-06,
      "loss": 2.3855,
      "step": 49909
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.2083735466003418,
      "learning_rate": 5.528334554902133e-06,
      "loss": 2.5043,
      "step": 49910
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.9827686548233032,
      "learning_rate": 5.5279662804531e-06,
      "loss": 2.2191,
      "step": 49911
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1989070177078247,
      "learning_rate": 5.527598013585378e-06,
      "loss": 2.2624,
      "step": 49912
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.142064094543457,
      "learning_rate": 5.5272297542995874e-06,
      "loss": 2.1049,
      "step": 49913
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.9989428520202637,
      "learning_rate": 5.526861502596364e-06,
      "loss": 2.5291,
      "step": 49914
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5794572830200195,
      "learning_rate": 5.526493258476317e-06,
      "loss": 2.3128,
      "step": 49915
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1191449165344238,
      "learning_rate": 5.526125021940085e-06,
      "loss": 2.163,
      "step": 49916
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.3048311471939087,
      "learning_rate": 5.525756792988284e-06,
      "loss": 2.2959,
      "step": 49917
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.2250707149505615,
      "learning_rate": 5.5253885716215416e-06,
      "loss": 2.4462,
      "step": 49918
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0843764543533325,
      "learning_rate": 5.525020357840478e-06,
      "loss": 2.3293,
      "step": 49919
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1634008884429932,
      "learning_rate": 5.524652151645728e-06,
      "loss": 2.4327,
      "step": 49920
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1248401403427124,
      "learning_rate": 5.524283953037901e-06,
      "loss": 2.5432,
      "step": 49921
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.032860279083252,
      "learning_rate": 5.523915762017631e-06,
      "loss": 2.2843,
      "step": 49922
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0359888076782227,
      "learning_rate": 5.523547578585537e-06,
      "loss": 2.3772,
      "step": 49923
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1108444929122925,
      "learning_rate": 5.523179402742249e-06,
      "loss": 2.3488,
      "step": 49924
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1188902854919434,
      "learning_rate": 5.522811234488384e-06,
      "loss": 2.1217,
      "step": 49925
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0812700986862183,
      "learning_rate": 5.522443073824573e-06,
      "loss": 2.2293,
      "step": 49926
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0056837797164917,
      "learning_rate": 5.522074920751432e-06,
      "loss": 2.2912,
      "step": 49927
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0244206190109253,
      "learning_rate": 5.521706775269598e-06,
      "loss": 2.214,
      "step": 49928
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.3216664791107178,
      "learning_rate": 5.521338637379681e-06,
      "loss": 2.2881,
      "step": 49929
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.113471269607544,
      "learning_rate": 5.520970507082315e-06,
      "loss": 2.3767,
      "step": 49930
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.032450556755066,
      "learning_rate": 5.520602384378117e-06,
      "loss": 2.1153,
      "step": 49931
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0190072059631348,
      "learning_rate": 5.520234269267718e-06,
      "loss": 2.2273,
      "step": 49932
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1648191213607788,
      "learning_rate": 5.519866161751739e-06,
      "loss": 2.4129,
      "step": 49933
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.305464744567871,
      "learning_rate": 5.519498061830804e-06,
      "loss": 1.913,
      "step": 49934
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.049749493598938,
      "learning_rate": 5.519129969505532e-06,
      "loss": 2.288,
      "step": 49935
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0447710752487183,
      "learning_rate": 5.518761884776555e-06,
      "loss": 2.2741,
      "step": 49936
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0736348628997803,
      "learning_rate": 5.518393807644492e-06,
      "loss": 2.2606,
      "step": 49937
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.065834879875183,
      "learning_rate": 5.5180257381099715e-06,
      "loss": 2.1438,
      "step": 49938
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.2194360494613647,
      "learning_rate": 5.517657676173611e-06,
      "loss": 2.3695,
      "step": 49939
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0495244264602661,
      "learning_rate": 5.517289621836042e-06,
      "loss": 2.2915,
      "step": 49940
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0508041381835938,
      "learning_rate": 5.5169215750978845e-06,
      "loss": 2.5185,
      "step": 49941
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.9868234395980835,
      "learning_rate": 5.516553535959757e-06,
      "loss": 2.0928,
      "step": 49942
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0646165609359741,
      "learning_rate": 5.516185504422296e-06,
      "loss": 2.3564,
      "step": 49943
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0599569082260132,
      "learning_rate": 5.515817480486112e-06,
      "loss": 2.2608,
      "step": 49944
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.057641863822937,
      "learning_rate": 5.5154494641518396e-06,
      "loss": 2.2561,
      "step": 49945
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1015006303787231,
      "learning_rate": 5.5150814554200946e-06,
      "loss": 2.3892,
      "step": 49946
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0591702461242676,
      "learning_rate": 5.514713454291513e-06,
      "loss": 2.2694,
      "step": 49947
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0673649311065674,
      "learning_rate": 5.514345460766701e-06,
      "loss": 2.3046,
      "step": 49948
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1392347812652588,
      "learning_rate": 5.513977474846297e-06,
      "loss": 2.5113,
      "step": 49949
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.171558141708374,
      "learning_rate": 5.513609496530916e-06,
      "loss": 2.4355,
      "step": 49950
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0756670236587524,
      "learning_rate": 5.513241525821189e-06,
      "loss": 2.2886,
      "step": 49951
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0493392944335938,
      "learning_rate": 5.512873562717731e-06,
      "loss": 2.3546,
      "step": 49952
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0525329113006592,
      "learning_rate": 5.512505607221177e-06,
      "loss": 2.2892,
      "step": 49953
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0419503450393677,
      "learning_rate": 5.512137659332138e-06,
      "loss": 2.5513,
      "step": 49954
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1426656246185303,
      "learning_rate": 5.511769719051251e-06,
      "loss": 2.2807,
      "step": 49955
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0851956605911255,
      "learning_rate": 5.511401786379129e-06,
      "loss": 2.49,
      "step": 49956
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1713497638702393,
      "learning_rate": 5.511033861316404e-06,
      "loss": 2.4168,
      "step": 49957
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.9694262146949768,
      "learning_rate": 5.510665943863691e-06,
      "loss": 2.3679,
      "step": 49958
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1277799606323242,
      "learning_rate": 5.510298034021622e-06,
      "loss": 2.2834,
      "step": 49959
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.9915353655815125,
      "learning_rate": 5.5099301317908184e-06,
      "loss": 2.186,
      "step": 49960
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1401958465576172,
      "learning_rate": 5.509562237171902e-06,
      "loss": 2.2752,
      "step": 49961
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1135514974594116,
      "learning_rate": 5.509194350165494e-06,
      "loss": 2.3268,
      "step": 49962
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.9910590052604675,
      "learning_rate": 5.508826470772225e-06,
      "loss": 2.4378,
      "step": 49963
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.9623297452926636,
      "learning_rate": 5.5084585989927106e-06,
      "loss": 2.4232,
      "step": 49964
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1350592374801636,
      "learning_rate": 5.508090734827583e-06,
      "loss": 2.0672,
      "step": 49965
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.055040717124939,
      "learning_rate": 5.5077228782774575e-06,
      "loss": 2.3511,
      "step": 49966
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0876556634902954,
      "learning_rate": 5.5073550293429656e-06,
      "loss": 2.2619,
      "step": 49967
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1130965948104858,
      "learning_rate": 5.506987188024728e-06,
      "loss": 2.1079,
      "step": 49968
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.9766919016838074,
      "learning_rate": 5.506619354323363e-06,
      "loss": 2.1839,
      "step": 49969
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1395024061203003,
      "learning_rate": 5.5062515282395016e-06,
      "loss": 2.366,
      "step": 49970
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0011863708496094,
      "learning_rate": 5.5058837097737605e-06,
      "loss": 2.3416,
      "step": 49971
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1469902992248535,
      "learning_rate": 5.505515898926772e-06,
      "loss": 2.4008,
      "step": 49972
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0123939514160156,
      "learning_rate": 5.505148095699155e-06,
      "loss": 2.3176,
      "step": 49973
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1002000570297241,
      "learning_rate": 5.504780300091532e-06,
      "loss": 2.2976,
      "step": 49974
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0538456439971924,
      "learning_rate": 5.504412512104524e-06,
      "loss": 2.3158,
      "step": 49975
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.130759596824646,
      "learning_rate": 5.50404473173876e-06,
      "loss": 2.4527,
      "step": 49976
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.997490406036377,
      "learning_rate": 5.503676958994859e-06,
      "loss": 2.2189,
      "step": 49977
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1384941339492798,
      "learning_rate": 5.5033091938734495e-06,
      "loss": 2.5509,
      "step": 49978
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.2131329774856567,
      "learning_rate": 5.50294143637515e-06,
      "loss": 2.2498,
      "step": 49979
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.163784384727478,
      "learning_rate": 5.502573686500589e-06,
      "loss": 2.3136,
      "step": 49980
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0639376640319824,
      "learning_rate": 5.502205944250383e-06,
      "loss": 2.4353,
      "step": 49981
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.032385230064392,
      "learning_rate": 5.501838209625163e-06,
      "loss": 2.3324,
      "step": 49982
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1021960973739624,
      "learning_rate": 5.501470482625546e-06,
      "loss": 2.557,
      "step": 49983
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0171633958816528,
      "learning_rate": 5.501102763252162e-06,
      "loss": 2.3496,
      "step": 49984
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0258700847625732,
      "learning_rate": 5.500735051505627e-06,
      "loss": 2.2323,
      "step": 49985
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.9852165579795837,
      "learning_rate": 5.500367347386575e-06,
      "loss": 2.1679,
      "step": 49986
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.083290934562683,
      "learning_rate": 5.4999996508956154e-06,
      "loss": 2.2682,
      "step": 49987
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0718305110931396,
      "learning_rate": 5.499631962033383e-06,
      "loss": 2.5667,
      "step": 49988
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0379701852798462,
      "learning_rate": 5.499264280800491e-06,
      "loss": 2.2281,
      "step": 49989
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0885899066925049,
      "learning_rate": 5.498896607197575e-06,
      "loss": 2.3759,
      "step": 49990
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1053004264831543,
      "learning_rate": 5.498528941225245e-06,
      "loss": 2.3964,
      "step": 49991
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.087640404701233,
      "learning_rate": 5.498161282884136e-06,
      "loss": 2.4389,
      "step": 49992
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0526877641677856,
      "learning_rate": 5.497793632174862e-06,
      "loss": 2.3786,
      "step": 49993
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1313190460205078,
      "learning_rate": 5.497425989098055e-06,
      "loss": 2.4148,
      "step": 49994
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1040802001953125,
      "learning_rate": 5.497058353654334e-06,
      "loss": 2.4553,
      "step": 49995
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.100651502609253,
      "learning_rate": 5.496690725844318e-06,
      "loss": 2.4576,
      "step": 49996
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.052789330482483,
      "learning_rate": 5.49632310566864e-06,
      "loss": 2.3818,
      "step": 49997
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.187126636505127,
      "learning_rate": 5.495955493127911e-06,
      "loss": 2.5844,
      "step": 49998
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.088957667350769,
      "learning_rate": 5.495587888222765e-06,
      "loss": 2.421,
      "step": 49999
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1970347166061401,
      "learning_rate": 5.495220290953822e-06,
      "loss": 2.2757,
      "step": 50000
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.073329210281372,
      "learning_rate": 5.494852701321699e-06,
      "loss": 2.1774,
      "step": 50001
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0280227661132812,
      "learning_rate": 5.494485119327029e-06,
      "loss": 2.4818,
      "step": 50002
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.327232837677002,
      "learning_rate": 5.49411754497043e-06,
      "loss": 2.2613,
      "step": 50003
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0378109216690063,
      "learning_rate": 5.493749978252522e-06,
      "loss": 2.5696,
      "step": 50004
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0829648971557617,
      "learning_rate": 5.493382419173936e-06,
      "loss": 2.3618,
      "step": 50005
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.2113287448883057,
      "learning_rate": 5.493014867735284e-06,
      "loss": 2.3373,
      "step": 50006
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0091036558151245,
      "learning_rate": 5.492647323937203e-06,
      "loss": 2.3785,
      "step": 50007
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.2075212001800537,
      "learning_rate": 5.492279787780305e-06,
      "loss": 2.2968,
      "step": 50008
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.9640800952911377,
      "learning_rate": 5.49191225926522e-06,
      "loss": 2.1314,
      "step": 50009
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4581862688064575,
      "learning_rate": 5.4915447383925635e-06,
      "loss": 2.2837,
      "step": 50010
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.247515320777893,
      "learning_rate": 5.491177225162968e-06,
      "loss": 2.2504,
      "step": 50011
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.158665657043457,
      "learning_rate": 5.490809719577052e-06,
      "loss": 2.2998,
      "step": 50012
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.134547233581543,
      "learning_rate": 5.490442221635437e-06,
      "loss": 2.294,
      "step": 50013
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.2059369087219238,
      "learning_rate": 5.490074731338744e-06,
      "loss": 2.336,
      "step": 50014
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0499951839447021,
      "learning_rate": 5.489707248687604e-06,
      "loss": 2.3673,
      "step": 50015
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1964080333709717,
      "learning_rate": 5.489339773682629e-06,
      "loss": 2.3479,
      "step": 50016
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0219264030456543,
      "learning_rate": 5.488972306324454e-06,
      "loss": 2.0981,
      "step": 50017
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.9943774342536926,
      "learning_rate": 5.488604846613692e-06,
      "loss": 2.2369,
      "step": 50018
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.036206603050232,
      "learning_rate": 5.488237394550972e-06,
      "loss": 2.1331,
      "step": 50019
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0834206342697144,
      "learning_rate": 5.487869950136914e-06,
      "loss": 2.545,
      "step": 50020
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0885021686553955,
      "learning_rate": 5.487502513372145e-06,
      "loss": 2.1775,
      "step": 50021
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0589780807495117,
      "learning_rate": 5.4871350842572805e-06,
      "loss": 2.1634,
      "step": 50022
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1290127038955688,
      "learning_rate": 5.486767662792951e-06,
      "loss": 2.4377,
      "step": 50023
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.216498613357544,
      "learning_rate": 5.486400248979776e-06,
      "loss": 2.4409,
      "step": 50024
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1324801445007324,
      "learning_rate": 5.486032842818376e-06,
      "loss": 2.3683,
      "step": 50025
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.2012513875961304,
      "learning_rate": 5.485665444309378e-06,
      "loss": 2.3619,
      "step": 50026
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.164554476737976,
      "learning_rate": 5.485298053453405e-06,
      "loss": 2.4593,
      "step": 50027
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.9963782429695129,
      "learning_rate": 5.484930670251074e-06,
      "loss": 2.3756,
      "step": 50028
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.05940842628479,
      "learning_rate": 5.484563294703016e-06,
      "loss": 2.1783,
      "step": 50029
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.048647165298462,
      "learning_rate": 5.484195926809848e-06,
      "loss": 2.3706,
      "step": 50030
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.9602325558662415,
      "learning_rate": 5.4838285665721915e-06,
      "loss": 2.4507,
      "step": 50031
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.266977310180664,
      "learning_rate": 5.483461213990676e-06,
      "loss": 2.1968,
      "step": 50032
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0360004901885986,
      "learning_rate": 5.483093869065917e-06,
      "loss": 2.4767,
      "step": 50033
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.2156529426574707,
      "learning_rate": 5.4827265317985435e-06,
      "loss": 2.2767,
      "step": 50034
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.2148133516311646,
      "learning_rate": 5.4823592021891715e-06,
      "loss": 2.2045,
      "step": 50035
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0401880741119385,
      "learning_rate": 5.481991880238433e-06,
      "loss": 2.2819,
      "step": 50036
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0050840377807617,
      "learning_rate": 5.48162456594694e-06,
      "loss": 2.3793,
      "step": 50037
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1572041511535645,
      "learning_rate": 5.4812572593153245e-06,
      "loss": 2.2998,
      "step": 50038
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.087034821510315,
      "learning_rate": 5.4808899603442066e-06,
      "loss": 2.2649,
      "step": 50039
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4151493310928345,
      "learning_rate": 5.480522669034206e-06,
      "loss": 1.9712,
      "step": 50040
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0127532482147217,
      "learning_rate": 5.480155385385944e-06,
      "loss": 2.2657,
      "step": 50041
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0591384172439575,
      "learning_rate": 5.479788109400051e-06,
      "loss": 2.2197,
      "step": 50042
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0433034896850586,
      "learning_rate": 5.4794208410771386e-06,
      "loss": 2.4076,
      "step": 50043
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0578382015228271,
      "learning_rate": 5.47905358041784e-06,
      "loss": 2.2198,
      "step": 50044
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1389340162277222,
      "learning_rate": 5.478686327422771e-06,
      "loss": 2.3048,
      "step": 50045
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0505619049072266,
      "learning_rate": 5.478319082092559e-06,
      "loss": 2.2413,
      "step": 50046
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1714012622833252,
      "learning_rate": 5.477951844427821e-06,
      "loss": 2.4166,
      "step": 50047
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1475720405578613,
      "learning_rate": 5.4775846144291875e-06,
      "loss": 2.3418,
      "step": 50048
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0993037223815918,
      "learning_rate": 5.477217392097272e-06,
      "loss": 2.2904,
      "step": 50049
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0363689661026,
      "learning_rate": 5.476850177432704e-06,
      "loss": 2.3533,
      "step": 50050
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1587165594100952,
      "learning_rate": 5.476482970436106e-06,
      "loss": 2.4651,
      "step": 50051
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0793673992156982,
      "learning_rate": 5.476115771108095e-06,
      "loss": 2.4319,
      "step": 50052
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.2071375846862793,
      "learning_rate": 5.4757485794492935e-06,
      "loss": 2.6074,
      "step": 50053
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0735318660736084,
      "learning_rate": 5.4753813954603316e-06,
      "loss": 2.5839,
      "step": 50054
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1183788776397705,
      "learning_rate": 5.475014219141823e-06,
      "loss": 2.2551,
      "step": 50055
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.148672103881836,
      "learning_rate": 5.474647050494398e-06,
      "loss": 2.2074,
      "step": 50056
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.9837347865104675,
      "learning_rate": 5.474279889518672e-06,
      "loss": 2.2864,
      "step": 50057
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0114566087722778,
      "learning_rate": 5.473912736215274e-06,
      "loss": 2.5322,
      "step": 50058
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.16433584690094,
      "learning_rate": 5.4735455905848235e-06,
      "loss": 2.3366,
      "step": 50059
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.076993703842163,
      "learning_rate": 5.473178452627938e-06,
      "loss": 2.343,
      "step": 50060
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1234102249145508,
      "learning_rate": 5.472811322345249e-06,
      "loss": 2.3816,
      "step": 50061
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1221075057983398,
      "learning_rate": 5.47244419973737e-06,
      "loss": 2.0708,
      "step": 50062
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.029706358909607,
      "learning_rate": 5.472077084804932e-06,
      "loss": 2.1795,
      "step": 50063
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1957215070724487,
      "learning_rate": 5.471709977548549e-06,
      "loss": 2.0177,
      "step": 50064
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.9901069402694702,
      "learning_rate": 5.471342877968855e-06,
      "loss": 2.256,
      "step": 50065
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.141723871231079,
      "learning_rate": 5.470975786066458e-06,
      "loss": 2.4033,
      "step": 50066
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0392029285430908,
      "learning_rate": 5.4706087018419904e-06,
      "loss": 2.2914,
      "step": 50067
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1140741109848022,
      "learning_rate": 5.470241625296066e-06,
      "loss": 2.4136,
      "step": 50068
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.3709238767623901,
      "learning_rate": 5.469874556429319e-06,
      "loss": 2.3203,
      "step": 50069
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1567881107330322,
      "learning_rate": 5.469507495242359e-06,
      "loss": 2.1366,
      "step": 50070
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.9989087581634521,
      "learning_rate": 5.4691404417358196e-06,
      "loss": 2.4197,
      "step": 50071
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1108607053756714,
      "learning_rate": 5.4687733959103124e-06,
      "loss": 2.3326,
      "step": 50072
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0780786275863647,
      "learning_rate": 5.468406357766469e-06,
      "loss": 2.2766,
      "step": 50073
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.2446538209915161,
      "learning_rate": 5.468039327304905e-06,
      "loss": 2.4168,
      "step": 50074
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0755704641342163,
      "learning_rate": 5.467672304526248e-06,
      "loss": 2.7446,
      "step": 50075
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.143549919128418,
      "learning_rate": 5.467305289431114e-06,
      "loss": 2.1214,
      "step": 50076
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.074440360069275,
      "learning_rate": 5.466938282020132e-06,
      "loss": 2.2963,
      "step": 50077
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1793997287750244,
      "learning_rate": 5.466571282293922e-06,
      "loss": 2.4361,
      "step": 50078
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.117798089981079,
      "learning_rate": 5.466204290253103e-06,
      "loss": 2.3561,
      "step": 50079
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.045273780822754,
      "learning_rate": 5.465837305898296e-06,
      "loss": 2.2734,
      "step": 50080
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0686653852462769,
      "learning_rate": 5.465470329230131e-06,
      "loss": 2.0646,
      "step": 50081
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0789567232131958,
      "learning_rate": 5.465103360249221e-06,
      "loss": 2.2798,
      "step": 50082
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0414282083511353,
      "learning_rate": 5.4647363989561965e-06,
      "loss": 2.4528,
      "step": 50083
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.9959335327148438,
      "learning_rate": 5.464369445351671e-06,
      "loss": 2.1777,
      "step": 50084
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.2300817966461182,
      "learning_rate": 5.464002499436275e-06,
      "loss": 2.3117,
      "step": 50085
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0200804471969604,
      "learning_rate": 5.463635561210628e-06,
      "loss": 2.18,
      "step": 50086
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0913928747177124,
      "learning_rate": 5.463268630675344e-06,
      "loss": 2.4274,
      "step": 50087
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1219831705093384,
      "learning_rate": 5.462901707831059e-06,
      "loss": 2.296,
      "step": 50088
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.286910057067871,
      "learning_rate": 5.4625347926783826e-06,
      "loss": 2.3279,
      "step": 50089
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.041059970855713,
      "learning_rate": 5.462167885217946e-06,
      "loss": 2.1463,
      "step": 50090
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0651847124099731,
      "learning_rate": 5.461800985450363e-06,
      "loss": 2.4484,
      "step": 50091
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1296969652175903,
      "learning_rate": 5.461434093376268e-06,
      "loss": 2.5645,
      "step": 50092
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1090162992477417,
      "learning_rate": 5.461067208996267e-06,
      "loss": 2.3877,
      "step": 50093
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0854743719100952,
      "learning_rate": 5.460700332310995e-06,
      "loss": 2.2875,
      "step": 50094
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.014318585395813,
      "learning_rate": 5.460333463321064e-06,
      "loss": 2.3904,
      "step": 50095
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0265380144119263,
      "learning_rate": 5.459966602027103e-06,
      "loss": 2.323,
      "step": 50096
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.2920209169387817,
      "learning_rate": 5.45959974842973e-06,
      "loss": 2.3616,
      "step": 50097
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0574620962142944,
      "learning_rate": 5.459232902529572e-06,
      "loss": 2.3367,
      "step": 50098
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0498645305633545,
      "learning_rate": 5.458866064327243e-06,
      "loss": 2.3574,
      "step": 50099
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.3035237789154053,
      "learning_rate": 5.458499233823373e-06,
      "loss": 2.3162,
      "step": 50100
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1409597396850586,
      "learning_rate": 5.458132411018577e-06,
      "loss": 2.2329,
      "step": 50101
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1446073055267334,
      "learning_rate": 5.457765595913485e-06,
      "loss": 2.4275,
      "step": 50102
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0566785335540771,
      "learning_rate": 5.4573987885087075e-06,
      "loss": 2.4522,
      "step": 50103
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.091407060623169,
      "learning_rate": 5.4570319888048795e-06,
      "loss": 2.389,
      "step": 50104
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1625396013259888,
      "learning_rate": 5.4566651968026155e-06,
      "loss": 2.3903,
      "step": 50105
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1128580570220947,
      "learning_rate": 5.4562984125025365e-06,
      "loss": 2.2094,
      "step": 50106
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0083154439926147,
      "learning_rate": 5.4559316359052625e-06,
      "loss": 2.2071,
      "step": 50107
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0984138250350952,
      "learning_rate": 5.455564867011423e-06,
      "loss": 2.3287,
      "step": 50108
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1057729721069336,
      "learning_rate": 5.4551981058216305e-06,
      "loss": 2.5492,
      "step": 50109
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0512504577636719,
      "learning_rate": 5.454831352336516e-06,
      "loss": 2.2925,
      "step": 50110
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0274039506912231,
      "learning_rate": 5.454464606556693e-06,
      "loss": 2.2422,
      "step": 50111
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1190630197525024,
      "learning_rate": 5.4540978684827905e-06,
      "loss": 2.4052,
      "step": 50112
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0076799392700195,
      "learning_rate": 5.453731138115428e-06,
      "loss": 2.2698,
      "step": 50113
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1670887470245361,
      "learning_rate": 5.4533644154552204e-06,
      "loss": 2.1948,
      "step": 50114
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1498347520828247,
      "learning_rate": 5.4529977005028e-06,
      "loss": 2.2625,
      "step": 50115
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0667787790298462,
      "learning_rate": 5.452630993258778e-06,
      "loss": 2.4366,
      "step": 50116
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1504493951797485,
      "learning_rate": 5.4522642937237865e-06,
      "loss": 2.4576,
      "step": 50117
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1122764348983765,
      "learning_rate": 5.451897601898442e-06,
      "loss": 2.2983,
      "step": 50118
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.9975764751434326,
      "learning_rate": 5.451530917783363e-06,
      "loss": 2.203,
      "step": 50119
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0834366083145142,
      "learning_rate": 5.451164241379179e-06,
      "loss": 2.5208,
      "step": 50120
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.2343462705612183,
      "learning_rate": 5.450797572686506e-06,
      "loss": 2.2725,
      "step": 50121
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1311897039413452,
      "learning_rate": 5.450430911705962e-06,
      "loss": 2.1892,
      "step": 50122
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.032787799835205,
      "learning_rate": 5.450064258438178e-06,
      "loss": 2.2558,
      "step": 50123
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.943340003490448,
      "learning_rate": 5.449697612883766e-06,
      "loss": 1.9886,
      "step": 50124
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0842101573944092,
      "learning_rate": 5.449330975043359e-06,
      "loss": 2.2685,
      "step": 50125
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0704152584075928,
      "learning_rate": 5.448964344917565e-06,
      "loss": 2.2217,
      "step": 50126
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1763052940368652,
      "learning_rate": 5.4485977225070185e-06,
      "loss": 2.3675,
      "step": 50127
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.184080719947815,
      "learning_rate": 5.448231107812329e-06,
      "loss": 2.2995,
      "step": 50128
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0155096054077148,
      "learning_rate": 5.4478645008341305e-06,
      "loss": 2.3774,
      "step": 50129
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1668143272399902,
      "learning_rate": 5.447497901573031e-06,
      "loss": 2.4909,
      "step": 50130
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.429237961769104,
      "learning_rate": 5.44713131002967e-06,
      "loss": 2.3416,
      "step": 50131
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0693855285644531,
      "learning_rate": 5.446764726204648e-06,
      "loss": 2.3184,
      "step": 50132
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.9678652882575989,
      "learning_rate": 5.446398150098602e-06,
      "loss": 2.2578,
      "step": 50133
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0626314878463745,
      "learning_rate": 5.446031581712141e-06,
      "loss": 2.3749,
      "step": 50134
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0870018005371094,
      "learning_rate": 5.4456650210459e-06,
      "loss": 2.3346,
      "step": 50135
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1389453411102295,
      "learning_rate": 5.44529846810049e-06,
      "loss": 2.3745,
      "step": 50136
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.135136365890503,
      "learning_rate": 5.444931922876538e-06,
      "loss": 2.2525,
      "step": 50137
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0307978391647339,
      "learning_rate": 5.444565385374661e-06,
      "loss": 2.0225,
      "step": 50138
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.151308536529541,
      "learning_rate": 5.444198855595486e-06,
      "loss": 2.6569,
      "step": 50139
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0888251066207886,
      "learning_rate": 5.443832333539628e-06,
      "loss": 2.3867,
      "step": 50140
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0407575368881226,
      "learning_rate": 5.4434658192077165e-06,
      "loss": 2.4057,
      "step": 50141
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1056127548217773,
      "learning_rate": 5.4430993126003664e-06,
      "loss": 2.4082,
      "step": 50142
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.9518170952796936,
      "learning_rate": 5.442732813718197e-06,
      "loss": 2.2824,
      "step": 50143
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.9579430818557739,
      "learning_rate": 5.442366322561837e-06,
      "loss": 2.344,
      "step": 50144
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0722507238388062,
      "learning_rate": 5.441999839131904e-06,
      "loss": 2.4293,
      "step": 50145
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.043941855430603,
      "learning_rate": 5.441633363429015e-06,
      "loss": 2.3876,
      "step": 50146
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.210376262664795,
      "learning_rate": 5.4412668954538e-06,
      "loss": 2.275,
      "step": 50147
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1109025478363037,
      "learning_rate": 5.4409004352068775e-06,
      "loss": 2.361,
      "step": 50148
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.7265563011169434,
      "learning_rate": 5.44053398268886e-06,
      "loss": 2.4486,
      "step": 50149
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.069653868675232,
      "learning_rate": 5.440167537900382e-06,
      "loss": 2.2127,
      "step": 50150
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1192035675048828,
      "learning_rate": 5.4398011008420526e-06,
      "loss": 2.3479,
      "step": 50151
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.2223458290100098,
      "learning_rate": 5.439434671514504e-06,
      "loss": 2.4994,
      "step": 50152
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0181351900100708,
      "learning_rate": 5.439068249918348e-06,
      "loss": 2.2959,
      "step": 50153
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0546544790267944,
      "learning_rate": 5.438701836054214e-06,
      "loss": 2.4899,
      "step": 50154
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.9986059665679932,
      "learning_rate": 5.438335429922714e-06,
      "loss": 2.1782,
      "step": 50155
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.2029849290847778,
      "learning_rate": 5.4379690315244805e-06,
      "loss": 2.4146,
      "step": 50156
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0452919006347656,
      "learning_rate": 5.437602640860128e-06,
      "loss": 2.3323,
      "step": 50157
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.047197937965393,
      "learning_rate": 5.4372362579302776e-06,
      "loss": 2.2565,
      "step": 50158
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1449030637741089,
      "learning_rate": 5.436869882735547e-06,
      "loss": 2.2678,
      "step": 50159
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0600054264068604,
      "learning_rate": 5.436503515276565e-06,
      "loss": 2.5175,
      "step": 50160
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0425249338150024,
      "learning_rate": 5.436137155553946e-06,
      "loss": 2.4929,
      "step": 50161
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1074460744857788,
      "learning_rate": 5.4357708035683165e-06,
      "loss": 2.5002,
      "step": 50162
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.9946164488792419,
      "learning_rate": 5.435404459320293e-06,
      "loss": 2.2077,
      "step": 50163
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.169791340827942,
      "learning_rate": 5.435038122810501e-06,
      "loss": 2.4785,
      "step": 50164
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.2514854669570923,
      "learning_rate": 5.434671794039556e-06,
      "loss": 2.2724,
      "step": 50165
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0735383033752441,
      "learning_rate": 5.434305473008085e-06,
      "loss": 2.2982,
      "step": 50166
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1811703443527222,
      "learning_rate": 5.433939159716706e-06,
      "loss": 2.3906,
      "step": 50167
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.994427502155304,
      "learning_rate": 5.4335728541660404e-06,
      "loss": 2.1446,
      "step": 50168
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1564159393310547,
      "learning_rate": 5.4332065563567105e-06,
      "loss": 2.1961,
      "step": 50169
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1198147535324097,
      "learning_rate": 5.432840266289332e-06,
      "loss": 2.5518,
      "step": 50170
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1442662477493286,
      "learning_rate": 5.432473983964533e-06,
      "loss": 2.3136,
      "step": 50171
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1230946779251099,
      "learning_rate": 5.432107709382931e-06,
      "loss": 2.227,
      "step": 50172
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1223595142364502,
      "learning_rate": 5.431741442545144e-06,
      "loss": 2.173,
      "step": 50173
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1447830200195312,
      "learning_rate": 5.431375183451799e-06,
      "loss": 2.3995,
      "step": 50174
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0169795751571655,
      "learning_rate": 5.43100893210351e-06,
      "loss": 2.1908,
      "step": 50175
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.096627116203308,
      "learning_rate": 5.4306426885009065e-06,
      "loss": 2.5084,
      "step": 50176
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1972885131835938,
      "learning_rate": 5.430276452644605e-06,
      "loss": 2.3772,
      "step": 50177
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1562120914459229,
      "learning_rate": 5.429910224535221e-06,
      "loss": 2.3356,
      "step": 50178
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0991650819778442,
      "learning_rate": 5.429544004173384e-06,
      "loss": 2.2679,
      "step": 50179
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0014591217041016,
      "learning_rate": 5.4291777915597076e-06,
      "loss": 2.4857,
      "step": 50180
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0769957304000854,
      "learning_rate": 5.428811586694821e-06,
      "loss": 2.3333,
      "step": 50181
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1420689821243286,
      "learning_rate": 5.428445389579336e-06,
      "loss": 2.3683,
      "step": 50182
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0601441860198975,
      "learning_rate": 5.4280792002138814e-06,
      "loss": 2.0183,
      "step": 50183
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0390598773956299,
      "learning_rate": 5.427713018599074e-06,
      "loss": 2.2831,
      "step": 50184
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.2245547771453857,
      "learning_rate": 5.427346844735536e-06,
      "loss": 2.6005,
      "step": 50185
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1093521118164062,
      "learning_rate": 5.426980678623882e-06,
      "loss": 2.4246,
      "step": 50186
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1224770545959473,
      "learning_rate": 5.426614520264741e-06,
      "loss": 2.259,
      "step": 50187
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0992385149002075,
      "learning_rate": 5.4262483696587285e-06,
      "loss": 2.4985,
      "step": 50188
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0584018230438232,
      "learning_rate": 5.425882226806471e-06,
      "loss": 2.2562,
      "step": 50189
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0369563102722168,
      "learning_rate": 5.42551609170858e-06,
      "loss": 2.3973,
      "step": 50190
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0308880805969238,
      "learning_rate": 5.425149964365686e-06,
      "loss": 2.4139,
      "step": 50191
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0715105533599854,
      "learning_rate": 5.4247838447784025e-06,
      "loss": 2.2693,
      "step": 50192
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.058314323425293,
      "learning_rate": 5.424417732947356e-06,
      "loss": 2.3655,
      "step": 50193
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1153628826141357,
      "learning_rate": 5.42405162887316e-06,
      "loss": 2.377,
      "step": 50194
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.024086356163025,
      "learning_rate": 5.423685532556444e-06,
      "loss": 2.1295,
      "step": 50195
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0302051305770874,
      "learning_rate": 5.423319443997819e-06,
      "loss": 2.4129,
      "step": 50196
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.2921042442321777,
      "learning_rate": 5.422953363197919e-06,
      "loss": 2.3021,
      "step": 50197
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.084674596786499,
      "learning_rate": 5.422587290157348e-06,
      "loss": 2.2158,
      "step": 50198
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1568784713745117,
      "learning_rate": 5.42222122487674e-06,
      "loss": 2.2028,
      "step": 50199
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0583471059799194,
      "learning_rate": 5.4218551673567044e-06,
      "loss": 2.192,
      "step": 50200
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1084357500076294,
      "learning_rate": 5.421489117597873e-06,
      "loss": 2.3903,
      "step": 50201
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0867269039154053,
      "learning_rate": 5.421123075600857e-06,
      "loss": 2.2128,
      "step": 50202
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0074183940887451,
      "learning_rate": 5.420757041366284e-06,
      "loss": 2.4728,
      "step": 50203
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1070739030838013,
      "learning_rate": 5.420391014894772e-06,
      "loss": 2.2197,
      "step": 50204
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.198461890220642,
      "learning_rate": 5.420024996186939e-06,
      "loss": 2.4068,
      "step": 50205
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0156652927398682,
      "learning_rate": 5.419658985243409e-06,
      "loss": 2.4493,
      "step": 50206
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.9804617166519165,
      "learning_rate": 5.419292982064798e-06,
      "loss": 2.1751,
      "step": 50207
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1026582717895508,
      "learning_rate": 5.418926986651734e-06,
      "loss": 2.3885,
      "step": 50208
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1040916442871094,
      "learning_rate": 5.418560999004827e-06,
      "loss": 2.3031,
      "step": 50209
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1547242403030396,
      "learning_rate": 5.418195019124712e-06,
      "loss": 2.4553,
      "step": 50210
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0956863164901733,
      "learning_rate": 5.417829047011994e-06,
      "loss": 2.2437,
      "step": 50211
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1967698335647583,
      "learning_rate": 5.417463082667304e-06,
      "loss": 2.2649,
      "step": 50212
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1347311735153198,
      "learning_rate": 5.417097126091255e-06,
      "loss": 2.3635,
      "step": 50213
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.082822561264038,
      "learning_rate": 5.4167311772844735e-06,
      "loss": 2.3524,
      "step": 50214
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1628568172454834,
      "learning_rate": 5.416365236247575e-06,
      "loss": 2.5944,
      "step": 50215
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.9395081996917725,
      "learning_rate": 5.415999302981185e-06,
      "loss": 2.3486,
      "step": 50216
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0453169345855713,
      "learning_rate": 5.415633377485918e-06,
      "loss": 2.4727,
      "step": 50217
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0947253704071045,
      "learning_rate": 5.415267459762402e-06,
      "loss": 2.4253,
      "step": 50218
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0906407833099365,
      "learning_rate": 5.414901549811249e-06,
      "loss": 2.3949,
      "step": 50219
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1640393733978271,
      "learning_rate": 5.414535647633086e-06,
      "loss": 2.0817,
      "step": 50220
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1723061800003052,
      "learning_rate": 5.4141697532285266e-06,
      "loss": 2.314,
      "step": 50221
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0143245458602905,
      "learning_rate": 5.413803866598199e-06,
      "loss": 2.0576,
      "step": 50222
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1155155897140503,
      "learning_rate": 5.41343798774272e-06,
      "loss": 2.3529,
      "step": 50223
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1419570446014404,
      "learning_rate": 5.41307211666271e-06,
      "loss": 2.4621,
      "step": 50224
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.084375262260437,
      "learning_rate": 5.412706253358784e-06,
      "loss": 2.2799,
      "step": 50225
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0353022813796997,
      "learning_rate": 5.41234039783157e-06,
      "loss": 2.5703,
      "step": 50226
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1368868350982666,
      "learning_rate": 5.411974550081682e-06,
      "loss": 2.331,
      "step": 50227
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.2180088758468628,
      "learning_rate": 5.411608710109746e-06,
      "loss": 2.3844,
      "step": 50228
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0224729776382446,
      "learning_rate": 5.4112428779163765e-06,
      "loss": 2.3237,
      "step": 50229
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0719661712646484,
      "learning_rate": 5.410877053502199e-06,
      "loss": 2.2725,
      "step": 50230
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1086856126785278,
      "learning_rate": 5.410511236867833e-06,
      "loss": 2.3985,
      "step": 50231
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1342873573303223,
      "learning_rate": 5.410145428013893e-06,
      "loss": 2.4418,
      "step": 50232
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.2181304693222046,
      "learning_rate": 5.409779626941006e-06,
      "loss": 2.2211,
      "step": 50233
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.132193922996521,
      "learning_rate": 5.409413833649786e-06,
      "loss": 2.3218,
      "step": 50234
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.051643967628479,
      "learning_rate": 5.40904804814086e-06,
      "loss": 2.329,
      "step": 50235
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.95577073097229,
      "learning_rate": 5.408682270414841e-06,
      "loss": 2.3153,
      "step": 50236
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0640650987625122,
      "learning_rate": 5.408316500472356e-06,
      "loss": 2.3905,
      "step": 50237
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0344537496566772,
      "learning_rate": 5.407950738314022e-06,
      "loss": 2.4465,
      "step": 50238
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.10930335521698,
      "learning_rate": 5.407584983940458e-06,
      "loss": 2.1821,
      "step": 50239
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0919246673583984,
      "learning_rate": 5.407219237352282e-06,
      "loss": 2.4551,
      "step": 50240
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1937285661697388,
      "learning_rate": 5.4068534985501195e-06,
      "loss": 2.3213,
      "step": 50241
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0597808361053467,
      "learning_rate": 5.406487767534584e-06,
      "loss": 2.2591,
      "step": 50242
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.001283049583435,
      "learning_rate": 5.406122044306304e-06,
      "loss": 2.4276,
      "step": 50243
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0021157264709473,
      "learning_rate": 5.4057563288658895e-06,
      "loss": 2.3581,
      "step": 50244
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1338428258895874,
      "learning_rate": 5.40539062121397e-06,
      "loss": 2.3332,
      "step": 50245
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.219670057296753,
      "learning_rate": 5.405024921351159e-06,
      "loss": 2.578,
      "step": 50246
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.087827205657959,
      "learning_rate": 5.404659229278082e-06,
      "loss": 2.2592,
      "step": 50247
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.090254306793213,
      "learning_rate": 5.4042935449953505e-06,
      "loss": 2.4888,
      "step": 50248
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0222519636154175,
      "learning_rate": 5.403927868503595e-06,
      "loss": 2.2804,
      "step": 50249
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0616369247436523,
      "learning_rate": 5.403562199803429e-06,
      "loss": 2.139,
      "step": 50250
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0997086763381958,
      "learning_rate": 5.403196538895474e-06,
      "loss": 2.27,
      "step": 50251
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0294114351272583,
      "learning_rate": 5.4028308857803455e-06,
      "loss": 2.3372,
      "step": 50252
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1202512979507446,
      "learning_rate": 5.4024652404586705e-06,
      "loss": 2.2927,
      "step": 50253
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.2891693115234375,
      "learning_rate": 5.402099602931062e-06,
      "loss": 2.4188,
      "step": 50254
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1339466571807861,
      "learning_rate": 5.401733973198148e-06,
      "loss": 2.1622,
      "step": 50255
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.068757176399231,
      "learning_rate": 5.401368351260539e-06,
      "loss": 2.1237,
      "step": 50256
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.255289912223816,
      "learning_rate": 5.401002737118863e-06,
      "loss": 2.3006,
      "step": 50257
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.3249431848526,
      "learning_rate": 5.4006371307737335e-06,
      "loss": 2.3625,
      "step": 50258
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0494461059570312,
      "learning_rate": 5.400271532225778e-06,
      "loss": 2.2724,
      "step": 50259
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.055987000465393,
      "learning_rate": 5.399905941475611e-06,
      "loss": 2.1318,
      "step": 50260
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1719977855682373,
      "learning_rate": 5.399540358523848e-06,
      "loss": 2.2831,
      "step": 50261
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1005305051803589,
      "learning_rate": 5.399174783371117e-06,
      "loss": 2.3041,
      "step": 50262
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0307185649871826,
      "learning_rate": 5.398809216018036e-06,
      "loss": 2.2676,
      "step": 50263
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.062397837638855,
      "learning_rate": 5.398443656465219e-06,
      "loss": 2.4301,
      "step": 50264
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.9793391227722168,
      "learning_rate": 5.3980781047132935e-06,
      "loss": 2.0561,
      "step": 50265
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1939693689346313,
      "learning_rate": 5.397712560762874e-06,
      "loss": 2.3806,
      "step": 50266
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0376979112625122,
      "learning_rate": 5.397347024614579e-06,
      "loss": 2.2476,
      "step": 50267
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0882505178451538,
      "learning_rate": 5.3969814962690346e-06,
      "loss": 2.2856,
      "step": 50268
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0863691568374634,
      "learning_rate": 5.3966159757268535e-06,
      "loss": 2.5533,
      "step": 50269
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0356203317642212,
      "learning_rate": 5.3962504629886615e-06,
      "loss": 2.3055,
      "step": 50270
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0823357105255127,
      "learning_rate": 5.395884958055071e-06,
      "loss": 2.3229,
      "step": 50271
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0855101346969604,
      "learning_rate": 5.39551946092671e-06,
      "loss": 2.4231,
      "step": 50272
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.079707384109497,
      "learning_rate": 5.3951539716041924e-06,
      "loss": 2.334,
      "step": 50273
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1064362525939941,
      "learning_rate": 5.3947884900881405e-06,
      "loss": 2.3558,
      "step": 50274
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1760598421096802,
      "learning_rate": 5.394423016379171e-06,
      "loss": 2.2038,
      "step": 50275
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0218666791915894,
      "learning_rate": 5.394057550477911e-06,
      "loss": 2.4363,
      "step": 50276
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.2032129764556885,
      "learning_rate": 5.393692092384969e-06,
      "loss": 2.5914,
      "step": 50277
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1702286005020142,
      "learning_rate": 5.3933266421009735e-06,
      "loss": 2.4693,
      "step": 50278
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.9464188814163208,
      "learning_rate": 5.392961199626535e-06,
      "loss": 2.2244,
      "step": 50279
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0707199573516846,
      "learning_rate": 5.392595764962283e-06,
      "loss": 2.2663,
      "step": 50280
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.2855010032653809,
      "learning_rate": 5.392230338108829e-06,
      "loss": 2.3403,
      "step": 50281
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.2978391647338867,
      "learning_rate": 5.391864919066801e-06,
      "loss": 2.5428,
      "step": 50282
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1334751844406128,
      "learning_rate": 5.391499507836808e-06,
      "loss": 2.3811,
      "step": 50283
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0494675636291504,
      "learning_rate": 5.391134104419479e-06,
      "loss": 2.5396,
      "step": 50284
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.036560297012329,
      "learning_rate": 5.390768708815426e-06,
      "loss": 2.2433,
      "step": 50285
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0204488039016724,
      "learning_rate": 5.390403321025275e-06,
      "loss": 2.402,
      "step": 50286
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.9267227649688721,
      "learning_rate": 5.390037941049644e-06,
      "loss": 2.0753,
      "step": 50287
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.120935320854187,
      "learning_rate": 5.3896725688891464e-06,
      "loss": 2.2962,
      "step": 50288
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0479633808135986,
      "learning_rate": 5.389307204544408e-06,
      "loss": 2.2655,
      "step": 50289
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.9835268259048462,
      "learning_rate": 5.388941848016047e-06,
      "loss": 2.4012,
      "step": 50290
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.2047184705734253,
      "learning_rate": 5.3885764993046785e-06,
      "loss": 2.2273,
      "step": 50291
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0823378562927246,
      "learning_rate": 5.38821115841093e-06,
      "loss": 2.5418,
      "step": 50292
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1376383304595947,
      "learning_rate": 5.3878458253354115e-06,
      "loss": 2.4045,
      "step": 50293
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.081166386604309,
      "learning_rate": 5.38748050007875e-06,
      "loss": 2.3282,
      "step": 50294
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1878305673599243,
      "learning_rate": 5.387115182641562e-06,
      "loss": 2.1535,
      "step": 50295
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0790437459945679,
      "learning_rate": 5.3867498730244636e-06,
      "loss": 2.2354,
      "step": 50296
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1185717582702637,
      "learning_rate": 5.386384571228081e-06,
      "loss": 2.3753,
      "step": 50297
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.149722933769226,
      "learning_rate": 5.386019277253025e-06,
      "loss": 2.1372,
      "step": 50298
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0175491571426392,
      "learning_rate": 5.385653991099924e-06,
      "loss": 2.3553,
      "step": 50299
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0703809261322021,
      "learning_rate": 5.3852887127693885e-06,
      "loss": 2.4413,
      "step": 50300
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.2630045413970947,
      "learning_rate": 5.384923442262047e-06,
      "loss": 2.2628,
      "step": 50301
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1603000164031982,
      "learning_rate": 5.384558179578512e-06,
      "loss": 2.1703,
      "step": 50302
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1873315572738647,
      "learning_rate": 5.384192924719405e-06,
      "loss": 2.3054,
      "step": 50303
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0581940412521362,
      "learning_rate": 5.383827677685341e-06,
      "loss": 2.4449,
      "step": 50304
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0589478015899658,
      "learning_rate": 5.383462438476946e-06,
      "loss": 2.4817,
      "step": 50305
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0319914817810059,
      "learning_rate": 5.383097207094833e-06,
      "loss": 2.3949,
      "step": 50306
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.9930051565170288,
      "learning_rate": 5.3827319835396276e-06,
      "loss": 2.2771,
      "step": 50307
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0472859144210815,
      "learning_rate": 5.382366767811942e-06,
      "loss": 2.2874,
      "step": 50308
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1674319505691528,
      "learning_rate": 5.3820015599124024e-06,
      "loss": 2.5103,
      "step": 50309
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.20452082157135,
      "learning_rate": 5.38163635984162e-06,
      "loss": 2.3837,
      "step": 50310
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1399270296096802,
      "learning_rate": 5.3812711676002225e-06,
      "loss": 2.4836,
      "step": 50311
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0323001146316528,
      "learning_rate": 5.380905983188822e-06,
      "loss": 2.226,
      "step": 50312
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.262717604637146,
      "learning_rate": 5.380540806608043e-06,
      "loss": 2.3638,
      "step": 50313
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0202813148498535,
      "learning_rate": 5.380175637858498e-06,
      "loss": 2.4157,
      "step": 50314
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1130906343460083,
      "learning_rate": 5.379810476940815e-06,
      "loss": 2.2257,
      "step": 50315
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0495892763137817,
      "learning_rate": 5.379445323855607e-06,
      "loss": 2.3869,
      "step": 50316
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0071585178375244,
      "learning_rate": 5.3790801786034955e-06,
      "loss": 2.2929,
      "step": 50317
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.5827068090438843,
      "learning_rate": 5.378715041185092e-06,
      "loss": 2.2128,
      "step": 50318
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0685501098632812,
      "learning_rate": 5.378349911601028e-06,
      "loss": 2.3979,
      "step": 50319
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.050352931022644,
      "learning_rate": 5.377984789851911e-06,
      "loss": 2.3487,
      "step": 50320
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0375146865844727,
      "learning_rate": 5.377619675938368e-06,
      "loss": 2.1737,
      "step": 50321
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4050185680389404,
      "learning_rate": 5.377254569861017e-06,
      "loss": 2.5879,
      "step": 50322
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.2713240385055542,
      "learning_rate": 5.37688947162047e-06,
      "loss": 2.32,
      "step": 50323
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.145675539970398,
      "learning_rate": 5.376524381217354e-06,
      "loss": 2.2607,
      "step": 50324
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0433441400527954,
      "learning_rate": 5.376159298652283e-06,
      "loss": 2.3596,
      "step": 50325
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.091726541519165,
      "learning_rate": 5.375794223925881e-06,
      "loss": 2.4643,
      "step": 50326
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1469496488571167,
      "learning_rate": 5.3754291570387586e-06,
      "loss": 2.1141,
      "step": 50327
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.3459339141845703,
      "learning_rate": 5.3750640979915445e-06,
      "loss": 2.2394,
      "step": 50328
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1561404466629028,
      "learning_rate": 5.374699046784854e-06,
      "loss": 2.2765,
      "step": 50329
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.090372920036316,
      "learning_rate": 5.3743340034193035e-06,
      "loss": 2.5247,
      "step": 50330
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.2054129838943481,
      "learning_rate": 5.37396896789551e-06,
      "loss": 2.3909,
      "step": 50331
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1556353569030762,
      "learning_rate": 5.373603940214099e-06,
      "loss": 2.1176,
      "step": 50332
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0955573320388794,
      "learning_rate": 5.373238920375682e-06,
      "loss": 2.2717,
      "step": 50333
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0527976751327515,
      "learning_rate": 5.372873908380886e-06,
      "loss": 2.3126,
      "step": 50334
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0726287364959717,
      "learning_rate": 5.37250890423032e-06,
      "loss": 2.4935,
      "step": 50335
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1100534200668335,
      "learning_rate": 5.372143907924614e-06,
      "loss": 2.459,
      "step": 50336
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.137020468711853,
      "learning_rate": 5.371778919464377e-06,
      "loss": 2.4424,
      "step": 50337
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.9817813634872437,
      "learning_rate": 5.371413938850234e-06,
      "loss": 2.4682,
      "step": 50338
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1465984582901,
      "learning_rate": 5.371048966082799e-06,
      "loss": 2.4242,
      "step": 50339
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.190925121307373,
      "learning_rate": 5.370684001162696e-06,
      "loss": 2.5719,
      "step": 50340
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.050636649131775,
      "learning_rate": 5.370319044090537e-06,
      "loss": 2.4163,
      "step": 50341
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1125030517578125,
      "learning_rate": 5.3699540948669534e-06,
      "loss": 2.4385,
      "step": 50342
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0654252767562866,
      "learning_rate": 5.369589153492547e-06,
      "loss": 2.1708,
      "step": 50343
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.9798770546913147,
      "learning_rate": 5.369224219967949e-06,
      "loss": 2.3897,
      "step": 50344
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0801554918289185,
      "learning_rate": 5.368859294293769e-06,
      "loss": 2.4545,
      "step": 50345
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0376818180084229,
      "learning_rate": 5.368494376470635e-06,
      "loss": 2.3377,
      "step": 50346
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.968467116355896,
      "learning_rate": 5.368129466499157e-06,
      "loss": 2.2171,
      "step": 50347
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1189234256744385,
      "learning_rate": 5.367764564379961e-06,
      "loss": 2.3468,
      "step": 50348
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0896458625793457,
      "learning_rate": 5.367399670113661e-06,
      "loss": 2.2127,
      "step": 50349
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.9775950312614441,
      "learning_rate": 5.367034783700874e-06,
      "loss": 2.1469,
      "step": 50350
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.9709630608558655,
      "learning_rate": 5.3666699051422255e-06,
      "loss": 2.1994,
      "step": 50351
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1086724996566772,
      "learning_rate": 5.366305034438326e-06,
      "loss": 2.3834,
      "step": 50352
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0459493398666382,
      "learning_rate": 5.365940171589801e-06,
      "loss": 2.1608,
      "step": 50353
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.10216224193573,
      "learning_rate": 5.365575316597262e-06,
      "loss": 2.3695,
      "step": 50354
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.9625497460365295,
      "learning_rate": 5.365210469461337e-06,
      "loss": 2.4233,
      "step": 50355
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1947576999664307,
      "learning_rate": 5.364845630182638e-06,
      "loss": 2.2615,
      "step": 50356
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0507951974868774,
      "learning_rate": 5.364480798761784e-06,
      "loss": 2.433,
      "step": 50357
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1772339344024658,
      "learning_rate": 5.364115975199391e-06,
      "loss": 2.4404,
      "step": 50358
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.09978449344635,
      "learning_rate": 5.363751159496084e-06,
      "loss": 2.269,
      "step": 50359
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0458163022994995,
      "learning_rate": 5.363386351652476e-06,
      "loss": 2.3276,
      "step": 50360
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0788599252700806,
      "learning_rate": 5.363021551669188e-06,
      "loss": 2.492,
      "step": 50361
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0565321445465088,
      "learning_rate": 5.362656759546836e-06,
      "loss": 2.2708,
      "step": 50362
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0803656578063965,
      "learning_rate": 5.3622919752860436e-06,
      "loss": 2.3398,
      "step": 50363
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0909228324890137,
      "learning_rate": 5.361927198887422e-06,
      "loss": 2.3658,
      "step": 50364
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.3653929233551025,
      "learning_rate": 5.361562430351598e-06,
      "loss": 2.5659,
      "step": 50365
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1673476696014404,
      "learning_rate": 5.361197669679181e-06,
      "loss": 2.3628,
      "step": 50366
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0964213609695435,
      "learning_rate": 5.360832916870797e-06,
      "loss": 2.2448,
      "step": 50367
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0064265727996826,
      "learning_rate": 5.360468171927063e-06,
      "loss": 2.1058,
      "step": 50368
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.191358208656311,
      "learning_rate": 5.360103434848593e-06,
      "loss": 2.4096,
      "step": 50369
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0767498016357422,
      "learning_rate": 5.359738705636006e-06,
      "loss": 2.4036,
      "step": 50370
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.148525357246399,
      "learning_rate": 5.359373984289926e-06,
      "loss": 2.6139,
      "step": 50371
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0987297296524048,
      "learning_rate": 5.359009270810963e-06,
      "loss": 2.1053,
      "step": 50372
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1580983400344849,
      "learning_rate": 5.358644565199744e-06,
      "loss": 2.5486,
      "step": 50373
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0874987840652466,
      "learning_rate": 5.358279867456879e-06,
      "loss": 2.3024,
      "step": 50374
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0487918853759766,
      "learning_rate": 5.3579151775829944e-06,
      "loss": 2.2715,
      "step": 50375
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.310483455657959,
      "learning_rate": 5.3575504955786995e-06,
      "loss": 2.3557,
      "step": 50376
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1950584650039673,
      "learning_rate": 5.357185821444621e-06,
      "loss": 2.5079,
      "step": 50377
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0773295164108276,
      "learning_rate": 5.356821155181375e-06,
      "loss": 2.458,
      "step": 50378
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0820285081863403,
      "learning_rate": 5.3564564967895745e-06,
      "loss": 2.331,
      "step": 50379
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0120611190795898,
      "learning_rate": 5.356091846269844e-06,
      "loss": 2.4079,
      "step": 50380
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0334815979003906,
      "learning_rate": 5.355727203622796e-06,
      "loss": 2.6136,
      "step": 50381
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.091635823249817,
      "learning_rate": 5.355362568849056e-06,
      "loss": 2.5083,
      "step": 50382
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.104636549949646,
      "learning_rate": 5.354997941949237e-06,
      "loss": 2.4874,
      "step": 50383
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.2288552522659302,
      "learning_rate": 5.354633322923959e-06,
      "loss": 2.2619,
      "step": 50384
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0100692510604858,
      "learning_rate": 5.3542687117738334e-06,
      "loss": 2.3518,
      "step": 50385
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0332553386688232,
      "learning_rate": 5.35390410849949e-06,
      "loss": 2.432,
      "step": 50386
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0923218727111816,
      "learning_rate": 5.353539513101536e-06,
      "loss": 2.4942,
      "step": 50387
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0413779020309448,
      "learning_rate": 5.353174925580599e-06,
      "loss": 2.0927,
      "step": 50388
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0449855327606201,
      "learning_rate": 5.352810345937288e-06,
      "loss": 2.318,
      "step": 50389
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1065974235534668,
      "learning_rate": 5.352445774172231e-06,
      "loss": 2.393,
      "step": 50390
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1129494905471802,
      "learning_rate": 5.352081210286036e-06,
      "loss": 2.3105,
      "step": 50391
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0223206281661987,
      "learning_rate": 5.351716654279329e-06,
      "loss": 2.1706,
      "step": 50392
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.00677490234375,
      "learning_rate": 5.351352106152721e-06,
      "loss": 2.3984,
      "step": 50393
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.109429955482483,
      "learning_rate": 5.3509875659068375e-06,
      "loss": 2.2706,
      "step": 50394
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0099167823791504,
      "learning_rate": 5.350623033542293e-06,
      "loss": 2.4031,
      "step": 50395
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1728193759918213,
      "learning_rate": 5.350258509059704e-06,
      "loss": 2.5061,
      "step": 50396
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0807584524154663,
      "learning_rate": 5.349893992459687e-06,
      "loss": 2.246,
      "step": 50397
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.057021141052246,
      "learning_rate": 5.349529483742868e-06,
      "loss": 2.3003,
      "step": 50398
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.9916404485702515,
      "learning_rate": 5.349164982909854e-06,
      "loss": 2.2854,
      "step": 50399
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.9747064113616943,
      "learning_rate": 5.348800489961272e-06,
      "loss": 2.3923,
      "step": 50400
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1976776123046875,
      "learning_rate": 5.348436004897732e-06,
      "loss": 2.3901,
      "step": 50401
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.077646255493164,
      "learning_rate": 5.348071527719862e-06,
      "loss": 2.5463,
      "step": 50402
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0612891912460327,
      "learning_rate": 5.3477070584282705e-06,
      "loss": 2.3601,
      "step": 50403
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0321229696273804,
      "learning_rate": 5.347342597023581e-06,
      "loss": 2.4291,
      "step": 50404
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0614429712295532,
      "learning_rate": 5.346978143506412e-06,
      "loss": 2.2871,
      "step": 50405
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.9746492505073547,
      "learning_rate": 5.3466136978773735e-06,
      "loss": 2.3617,
      "step": 50406
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0656373500823975,
      "learning_rate": 5.3462492601370916e-06,
      "loss": 2.2391,
      "step": 50407
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.9663931131362915,
      "learning_rate": 5.345884830286182e-06,
      "loss": 2.2159,
      "step": 50408
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0538450479507446,
      "learning_rate": 5.3455204083252575e-06,
      "loss": 2.2227,
      "step": 50409
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1138842105865479,
      "learning_rate": 5.3451559942549445e-06,
      "loss": 2.2956,
      "step": 50410
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.123693585395813,
      "learning_rate": 5.344791588075853e-06,
      "loss": 2.344,
      "step": 50411
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0948994159698486,
      "learning_rate": 5.344427189788606e-06,
      "loss": 2.5377,
      "step": 50412
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0003665685653687,
      "learning_rate": 5.344062799393821e-06,
      "loss": 2.3644,
      "step": 50413
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0876178741455078,
      "learning_rate": 5.343698416892109e-06,
      "loss": 2.5324,
      "step": 50414
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0919160842895508,
      "learning_rate": 5.343334042284099e-06,
      "loss": 2.3123,
      "step": 50415
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0464340448379517,
      "learning_rate": 5.342969675570397e-06,
      "loss": 2.2375,
      "step": 50416
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1074148416519165,
      "learning_rate": 5.3426053167516305e-06,
      "loss": 2.4101,
      "step": 50417
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0163007974624634,
      "learning_rate": 5.342240965828407e-06,
      "loss": 2.2799,
      "step": 50418
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0354832410812378,
      "learning_rate": 5.3418766228013564e-06,
      "loss": 2.1824,
      "step": 50419
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.9945663809776306,
      "learning_rate": 5.3415122876710845e-06,
      "loss": 2.4687,
      "step": 50420
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.2956727743148804,
      "learning_rate": 5.341147960438224e-06,
      "loss": 2.5545,
      "step": 50421
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0472640991210938,
      "learning_rate": 5.340783641103375e-06,
      "loss": 2.1372,
      "step": 50422
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0337467193603516,
      "learning_rate": 5.340419329667166e-06,
      "loss": 2.1728,
      "step": 50423
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1015300750732422,
      "learning_rate": 5.3400550261302085e-06,
      "loss": 2.241,
      "step": 50424
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1999380588531494,
      "learning_rate": 5.339690730493128e-06,
      "loss": 2.5608,
      "step": 50425
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.062816858291626,
      "learning_rate": 5.3393264427565316e-06,
      "loss": 2.459,
      "step": 50426
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.044996976852417,
      "learning_rate": 5.338962162921049e-06,
      "loss": 2.2801,
      "step": 50427
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1682788133621216,
      "learning_rate": 5.338597890987285e-06,
      "loss": 2.344,
      "step": 50428
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1992090940475464,
      "learning_rate": 5.338233626955869e-06,
      "loss": 2.0724,
      "step": 50429
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.022948145866394,
      "learning_rate": 5.337869370827409e-06,
      "loss": 2.3522,
      "step": 50430
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0243473052978516,
      "learning_rate": 5.337505122602531e-06,
      "loss": 2.3218,
      "step": 50431
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.6256059408187866,
      "learning_rate": 5.337140882281843e-06,
      "loss": 2.4097,
      "step": 50432
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0270854234695435,
      "learning_rate": 5.336776649865972e-06,
      "loss": 2.331,
      "step": 50433
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.089425802230835,
      "learning_rate": 5.336412425355532e-06,
      "loss": 2.5168,
      "step": 50434
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0243456363677979,
      "learning_rate": 5.336048208751139e-06,
      "loss": 2.2653,
      "step": 50435
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1155261993408203,
      "learning_rate": 5.335684000053407e-06,
      "loss": 2.0092,
      "step": 50436
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.307350516319275,
      "learning_rate": 5.335319799262961e-06,
      "loss": 2.3451,
      "step": 50437
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1152210235595703,
      "learning_rate": 5.334955606380411e-06,
      "loss": 2.3719,
      "step": 50438
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0485938787460327,
      "learning_rate": 5.334591421406383e-06,
      "loss": 2.6604,
      "step": 50439
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.095373511314392,
      "learning_rate": 5.334227244341488e-06,
      "loss": 2.6222,
      "step": 50440
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.155097246170044,
      "learning_rate": 5.333863075186343e-06,
      "loss": 2.4308,
      "step": 50441
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1371688842773438,
      "learning_rate": 5.333498913941571e-06,
      "loss": 2.2606,
      "step": 50442
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1382441520690918,
      "learning_rate": 5.333134760607781e-06,
      "loss": 2.1939,
      "step": 50443
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1981632709503174,
      "learning_rate": 5.332770615185598e-06,
      "loss": 2.3984,
      "step": 50444
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1143890619277954,
      "learning_rate": 5.332406477675635e-06,
      "loss": 2.3878,
      "step": 50445
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1791622638702393,
      "learning_rate": 5.332042348078513e-06,
      "loss": 2.1928,
      "step": 50446
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0494647026062012,
      "learning_rate": 5.3316782263948475e-06,
      "loss": 2.4059,
      "step": 50447
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1863433122634888,
      "learning_rate": 5.331314112625256e-06,
      "loss": 2.3989,
      "step": 50448
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0426312685012817,
      "learning_rate": 5.33095000677035e-06,
      "loss": 2.1951,
      "step": 50449
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.030367136001587,
      "learning_rate": 5.330585908830757e-06,
      "loss": 2.3866,
      "step": 50450
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.091437816619873,
      "learning_rate": 5.330221818807083e-06,
      "loss": 2.5145,
      "step": 50451
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1762018203735352,
      "learning_rate": 5.329857736699957e-06,
      "loss": 2.4085,
      "step": 50452
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.9693743586540222,
      "learning_rate": 5.3294936625099855e-06,
      "loss": 2.2162,
      "step": 50453
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1961432695388794,
      "learning_rate": 5.329129596237797e-06,
      "loss": 2.1746,
      "step": 50454
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0320343971252441,
      "learning_rate": 5.328765537883995e-06,
      "loss": 2.3123,
      "step": 50455
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0843292474746704,
      "learning_rate": 5.328401487449211e-06,
      "loss": 2.2838,
      "step": 50456
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.15126633644104,
      "learning_rate": 5.32803744493405e-06,
      "loss": 2.3238,
      "step": 50457
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0356512069702148,
      "learning_rate": 5.327673410339138e-06,
      "loss": 2.15,
      "step": 50458
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0635309219360352,
      "learning_rate": 5.327309383665086e-06,
      "loss": 2.2429,
      "step": 50459
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.9403479695320129,
      "learning_rate": 5.326945364912517e-06,
      "loss": 2.344,
      "step": 50460
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.9925052523612976,
      "learning_rate": 5.326581354082044e-06,
      "loss": 2.3611,
      "step": 50461
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1473608016967773,
      "learning_rate": 5.326217351174285e-06,
      "loss": 2.2579,
      "step": 50462
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1623179912567139,
      "learning_rate": 5.325853356189854e-06,
      "loss": 2.6209,
      "step": 50463
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.112309455871582,
      "learning_rate": 5.325489369129375e-06,
      "loss": 2.4362,
      "step": 50464
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.132680892944336,
      "learning_rate": 5.325125389993457e-06,
      "loss": 2.3576,
      "step": 50465
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.119632363319397,
      "learning_rate": 5.324761418782724e-06,
      "loss": 2.4081,
      "step": 50466
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0514578819274902,
      "learning_rate": 5.324397455497792e-06,
      "loss": 2.4284,
      "step": 50467
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.2116281986236572,
      "learning_rate": 5.324033500139269e-06,
      "loss": 2.3156,
      "step": 50468
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.9784654378890991,
      "learning_rate": 5.323669552707787e-06,
      "loss": 2.2753,
      "step": 50469
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.033352017402649,
      "learning_rate": 5.323305613203949e-06,
      "loss": 2.3877,
      "step": 50470
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0470541715621948,
      "learning_rate": 5.322941681628382e-06,
      "loss": 2.3572,
      "step": 50471
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1940699815750122,
      "learning_rate": 5.322577757981697e-06,
      "loss": 2.3944,
      "step": 50472
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1742054224014282,
      "learning_rate": 5.322213842264515e-06,
      "loss": 2.1258,
      "step": 50473
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1982002258300781,
      "learning_rate": 5.321849934477452e-06,
      "loss": 2.4457,
      "step": 50474
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.018332600593567,
      "learning_rate": 5.321486034621124e-06,
      "loss": 2.2975,
      "step": 50475
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.3369734287261963,
      "learning_rate": 5.321122142696143e-06,
      "loss": 2.3588,
      "step": 50476
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0679129362106323,
      "learning_rate": 5.3207582587031355e-06,
      "loss": 2.2615,
      "step": 50477
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.024829387664795,
      "learning_rate": 5.320394382642711e-06,
      "loss": 2.3091,
      "step": 50478
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.2068357467651367,
      "learning_rate": 5.320030514515491e-06,
      "loss": 2.3425,
      "step": 50479
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0858479738235474,
      "learning_rate": 5.319666654322086e-06,
      "loss": 2.3949,
      "step": 50480
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1463046073913574,
      "learning_rate": 5.319302802063122e-06,
      "loss": 2.275,
      "step": 50481
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0541661977767944,
      "learning_rate": 5.318938957739207e-06,
      "loss": 2.2307,
      "step": 50482
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0490871667861938,
      "learning_rate": 5.3185751213509665e-06,
      "loss": 2.2984,
      "step": 50483
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.035197377204895,
      "learning_rate": 5.318211292899007e-06,
      "loss": 2.1929,
      "step": 50484
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1038298606872559,
      "learning_rate": 5.317847472383957e-06,
      "loss": 2.0651,
      "step": 50485
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0707592964172363,
      "learning_rate": 5.3174836598064216e-06,
      "loss": 2.2773,
      "step": 50486
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.012119174003601,
      "learning_rate": 5.317119855167032e-06,
      "loss": 2.3743,
      "step": 50487
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1377078294754028,
      "learning_rate": 5.316756058466388e-06,
      "loss": 2.3464,
      "step": 50488
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.131134271621704,
      "learning_rate": 5.316392269705117e-06,
      "loss": 2.1019,
      "step": 50489
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1428501605987549,
      "learning_rate": 5.316028488883831e-06,
      "loss": 2.2058,
      "step": 50490
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1360024213790894,
      "learning_rate": 5.315664716003152e-06,
      "loss": 2.1936,
      "step": 50491
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1192175149917603,
      "learning_rate": 5.315300951063689e-06,
      "loss": 2.3703,
      "step": 50492
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0716136693954468,
      "learning_rate": 5.3149371940660675e-06,
      "loss": 2.0098,
      "step": 50493
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.9923566579818726,
      "learning_rate": 5.314573445010898e-06,
      "loss": 2.3441,
      "step": 50494
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.9595856070518494,
      "learning_rate": 5.3142097038988e-06,
      "loss": 2.2484,
      "step": 50495
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.157020926475525,
      "learning_rate": 5.313845970730391e-06,
      "loss": 2.1945,
      "step": 50496
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1009103059768677,
      "learning_rate": 5.313482245506282e-06,
      "loss": 2.3017,
      "step": 50497
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0702736377716064,
      "learning_rate": 5.313118528227097e-06,
      "loss": 2.38,
      "step": 50498
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0376613140106201,
      "learning_rate": 5.312754818893444e-06,
      "loss": 2.4635,
      "step": 50499
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0882571935653687,
      "learning_rate": 5.312391117505951e-06,
      "loss": 2.3,
      "step": 50500
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.2014240026474,
      "learning_rate": 5.312027424065226e-06,
      "loss": 2.3857,
      "step": 50501
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0249755382537842,
      "learning_rate": 5.3116637385718886e-06,
      "loss": 2.0333,
      "step": 50502
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0667545795440674,
      "learning_rate": 5.31130006102655e-06,
      "loss": 2.2235,
      "step": 50503
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0947784185409546,
      "learning_rate": 5.310936391429835e-06,
      "loss": 2.5253,
      "step": 50504
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.9544692635536194,
      "learning_rate": 5.310572729782353e-06,
      "loss": 2.1715,
      "step": 50505
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0331398248672485,
      "learning_rate": 5.310209076084728e-06,
      "loss": 2.194,
      "step": 50506
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.012370228767395,
      "learning_rate": 5.309845430337568e-06,
      "loss": 2.2017,
      "step": 50507
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.00413978099823,
      "learning_rate": 5.309481792541498e-06,
      "loss": 2.2614,
      "step": 50508
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0755274295806885,
      "learning_rate": 5.309118162697127e-06,
      "loss": 2.5114,
      "step": 50509
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0937141180038452,
      "learning_rate": 5.308754540805077e-06,
      "loss": 2.4202,
      "step": 50510
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.092435359954834,
      "learning_rate": 5.308390926865959e-06,
      "loss": 2.3131,
      "step": 50511
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.074952483177185,
      "learning_rate": 5.308027320880398e-06,
      "loss": 2.2324,
      "step": 50512
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0774354934692383,
      "learning_rate": 5.3076637228490035e-06,
      "loss": 2.2492,
      "step": 50513
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1085909605026245,
      "learning_rate": 5.3073001327723925e-06,
      "loss": 2.3535,
      "step": 50514
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1606779098510742,
      "learning_rate": 5.306936550651181e-06,
      "loss": 2.2698,
      "step": 50515
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0266181230545044,
      "learning_rate": 5.306572976485989e-06,
      "loss": 2.3748,
      "step": 50516
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1266446113586426,
      "learning_rate": 5.306209410277426e-06,
      "loss": 2.4065,
      "step": 50517
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0513943433761597,
      "learning_rate": 5.3058458520261165e-06,
      "loss": 2.5311,
      "step": 50518
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.9357712268829346,
      "learning_rate": 5.305482301732672e-06,
      "loss": 2.2011,
      "step": 50519
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.122990608215332,
      "learning_rate": 5.305118759397712e-06,
      "loss": 2.2079,
      "step": 50520
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0706849098205566,
      "learning_rate": 5.304755225021847e-06,
      "loss": 2.2426,
      "step": 50521
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.037795066833496,
      "learning_rate": 5.304391698605701e-06,
      "loss": 2.3813,
      "step": 50522
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.9718037843704224,
      "learning_rate": 5.304028180149887e-06,
      "loss": 2.34,
      "step": 50523
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.146714448928833,
      "learning_rate": 5.303664669655015e-06,
      "loss": 2.3346,
      "step": 50524
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0277717113494873,
      "learning_rate": 5.303301167121713e-06,
      "loss": 2.2764,
      "step": 50525
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1061458587646484,
      "learning_rate": 5.30293767255059e-06,
      "loss": 2.413,
      "step": 50526
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1020104885101318,
      "learning_rate": 5.3025741859422595e-06,
      "loss": 2.2778,
      "step": 50527
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.9810087084770203,
      "learning_rate": 5.3022107072973465e-06,
      "loss": 2.3341,
      "step": 50528
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0390732288360596,
      "learning_rate": 5.301847236616457e-06,
      "loss": 2.4053,
      "step": 50529
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.2118167877197266,
      "learning_rate": 5.301483773900216e-06,
      "loss": 2.597,
      "step": 50530
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0304632186889648,
      "learning_rate": 5.3011203191492375e-06,
      "loss": 2.3282,
      "step": 50531
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1392769813537598,
      "learning_rate": 5.300756872364132e-06,
      "loss": 2.199,
      "step": 50532
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0463104248046875,
      "learning_rate": 5.300393433545523e-06,
      "loss": 2.5442,
      "step": 50533
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1261281967163086,
      "learning_rate": 5.30003000269402e-06,
      "loss": 2.4325,
      "step": 50534
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1780807971954346,
      "learning_rate": 5.299666579810247e-06,
      "loss": 2.4011,
      "step": 50535
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0994994640350342,
      "learning_rate": 5.299303164894811e-06,
      "loss": 2.2981,
      "step": 50536
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1833173036575317,
      "learning_rate": 5.298939757948338e-06,
      "loss": 2.4459,
      "step": 50537
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1608315706253052,
      "learning_rate": 5.2985763589714345e-06,
      "loss": 2.4243,
      "step": 50538
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1277927160263062,
      "learning_rate": 5.2982129679647244e-06,
      "loss": 2.6246,
      "step": 50539
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0110362768173218,
      "learning_rate": 5.2978495849288205e-06,
      "loss": 2.3052,
      "step": 50540
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.158287763595581,
      "learning_rate": 5.297486209864338e-06,
      "loss": 2.2909,
      "step": 50541
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.629367709159851,
      "learning_rate": 5.297122842771891e-06,
      "loss": 2.3266,
      "step": 50542
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0397288799285889,
      "learning_rate": 5.2967594836521005e-06,
      "loss": 2.2172,
      "step": 50543
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0987578630447388,
      "learning_rate": 5.296396132505577e-06,
      "loss": 2.2858,
      "step": 50544
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0987749099731445,
      "learning_rate": 5.296032789332943e-06,
      "loss": 2.3917,
      "step": 50545
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1906671524047852,
      "learning_rate": 5.295669454134809e-06,
      "loss": 2.4598,
      "step": 50546
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1379773616790771,
      "learning_rate": 5.295306126911794e-06,
      "loss": 2.4559,
      "step": 50547
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1020827293395996,
      "learning_rate": 5.294942807664512e-06,
      "loss": 2.5869,
      "step": 50548
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.2875691652297974,
      "learning_rate": 5.294579496393582e-06,
      "loss": 2.1614,
      "step": 50549
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.154813289642334,
      "learning_rate": 5.294216193099613e-06,
      "loss": 2.4707,
      "step": 50550
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0437331199645996,
      "learning_rate": 5.29385289778323e-06,
      "loss": 2.3956,
      "step": 50551
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1307884454727173,
      "learning_rate": 5.293489610445045e-06,
      "loss": 2.3533,
      "step": 50552
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.04914391040802,
      "learning_rate": 5.293126331085674e-06,
      "loss": 2.396,
      "step": 50553
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0593432188034058,
      "learning_rate": 5.292763059705728e-06,
      "loss": 2.393,
      "step": 50554
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0856369733810425,
      "learning_rate": 5.29239979630583e-06,
      "loss": 2.456,
      "step": 50555
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0798219442367554,
      "learning_rate": 5.292036540886589e-06,
      "loss": 2.5843,
      "step": 50556
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0482497215270996,
      "learning_rate": 5.291673293448629e-06,
      "loss": 2.5516,
      "step": 50557
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0469895601272583,
      "learning_rate": 5.291310053992562e-06,
      "loss": 2.5213,
      "step": 50558
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0369274616241455,
      "learning_rate": 5.290946822518999e-06,
      "loss": 2.2383,
      "step": 50559
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1879955530166626,
      "learning_rate": 5.290583599028565e-06,
      "loss": 2.4977,
      "step": 50560
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.194350242614746,
      "learning_rate": 5.290220383521867e-06,
      "loss": 2.2197,
      "step": 50561
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0782302618026733,
      "learning_rate": 5.289857175999526e-06,
      "loss": 2.1111,
      "step": 50562
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.2370290756225586,
      "learning_rate": 5.289493976462155e-06,
      "loss": 2.3325,
      "step": 50563
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1202740669250488,
      "learning_rate": 5.289130784910375e-06,
      "loss": 2.4217,
      "step": 50564
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0543479919433594,
      "learning_rate": 5.288767601344792e-06,
      "loss": 2.1282,
      "step": 50565
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.06354558467865,
      "learning_rate": 5.288404425766036e-06,
      "loss": 2.2402,
      "step": 50566
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1561049222946167,
      "learning_rate": 5.288041258174708e-06,
      "loss": 2.3872,
      "step": 50567
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0822250843048096,
      "learning_rate": 5.2876780985714315e-06,
      "loss": 2.092,
      "step": 50568
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1189237833023071,
      "learning_rate": 5.287314946956816e-06,
      "loss": 2.1998,
      "step": 50569
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0541502237319946,
      "learning_rate": 5.286951803331488e-06,
      "loss": 2.2866,
      "step": 50570
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0934194326400757,
      "learning_rate": 5.286588667696051e-06,
      "loss": 2.6115,
      "step": 50571
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1194297075271606,
      "learning_rate": 5.2862255400511305e-06,
      "loss": 2.3216,
      "step": 50572
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0659817457199097,
      "learning_rate": 5.2858624203973365e-06,
      "loss": 2.2323,
      "step": 50573
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0213403701782227,
      "learning_rate": 5.2854993087352865e-06,
      "loss": 2.3324,
      "step": 50574
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.207918643951416,
      "learning_rate": 5.285136205065593e-06,
      "loss": 2.3272,
      "step": 50575
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.2208887338638306,
      "learning_rate": 5.284773109388879e-06,
      "loss": 2.4696,
      "step": 50576
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0451610088348389,
      "learning_rate": 5.28441002170575e-06,
      "loss": 2.5371,
      "step": 50577
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1478701829910278,
      "learning_rate": 5.284046942016831e-06,
      "loss": 2.4321,
      "step": 50578
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0729185342788696,
      "learning_rate": 5.283683870322735e-06,
      "loss": 2.3933,
      "step": 50579
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.048824429512024,
      "learning_rate": 5.283320806624074e-06,
      "loss": 2.2826,
      "step": 50580
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0549988746643066,
      "learning_rate": 5.282957750921461e-06,
      "loss": 2.4029,
      "step": 50581
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1017725467681885,
      "learning_rate": 5.28259470321552e-06,
      "loss": 2.4277,
      "step": 50582
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.17227041721344,
      "learning_rate": 5.28223166350686e-06,
      "loss": 2.2018,
      "step": 50583
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0950210094451904,
      "learning_rate": 5.281868631796101e-06,
      "loss": 2.3038,
      "step": 50584
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1512832641601562,
      "learning_rate": 5.281505608083856e-06,
      "loss": 2.3742,
      "step": 50585
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1194334030151367,
      "learning_rate": 5.281142592370736e-06,
      "loss": 2.3242,
      "step": 50586
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0053203105926514,
      "learning_rate": 5.280779584657366e-06,
      "loss": 2.4645,
      "step": 50587
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.2269212007522583,
      "learning_rate": 5.280416584944352e-06,
      "loss": 2.3309,
      "step": 50588
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0488059520721436,
      "learning_rate": 5.280053593232317e-06,
      "loss": 2.227,
      "step": 50589
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0213818550109863,
      "learning_rate": 5.27969060952187e-06,
      "loss": 2.2526,
      "step": 50590
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.128760576248169,
      "learning_rate": 5.279327633813634e-06,
      "loss": 2.3068,
      "step": 50591
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1676416397094727,
      "learning_rate": 5.27896466610822e-06,
      "loss": 2.2466,
      "step": 50592
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.081842064857483,
      "learning_rate": 5.278601706406241e-06,
      "loss": 2.1502,
      "step": 50593
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.9768202304840088,
      "learning_rate": 5.278238754708311e-06,
      "loss": 2.1544,
      "step": 50594
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1113297939300537,
      "learning_rate": 5.277875811015054e-06,
      "loss": 2.2739,
      "step": 50595
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0025856494903564,
      "learning_rate": 5.277512875327075e-06,
      "loss": 2.3737,
      "step": 50596
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0142590999603271,
      "learning_rate": 5.277149947644999e-06,
      "loss": 2.3417,
      "step": 50597
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1115487813949585,
      "learning_rate": 5.276787027969432e-06,
      "loss": 2.5539,
      "step": 50598
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0759117603302002,
      "learning_rate": 5.276424116300998e-06,
      "loss": 2.223,
      "step": 50599
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1222138404846191,
      "learning_rate": 5.276061212640304e-06,
      "loss": 2.3803,
      "step": 50600
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0942445993423462,
      "learning_rate": 5.275698316987974e-06,
      "loss": 2.3523,
      "step": 50601
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.9210569858551025,
      "learning_rate": 5.2753354293446135e-06,
      "loss": 2.0738,
      "step": 50602
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0447630882263184,
      "learning_rate": 5.274972549710846e-06,
      "loss": 2.1976,
      "step": 50603
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0116297006607056,
      "learning_rate": 5.274609678087282e-06,
      "loss": 2.4152,
      "step": 50604
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1361429691314697,
      "learning_rate": 5.274246814474539e-06,
      "loss": 2.214,
      "step": 50605
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0519145727157593,
      "learning_rate": 5.273883958873232e-06,
      "loss": 2.5056,
      "step": 50606
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.032676100730896,
      "learning_rate": 5.273521111283976e-06,
      "loss": 2.3328,
      "step": 50607
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1176971197128296,
      "learning_rate": 5.2731582717073815e-06,
      "loss": 2.3559,
      "step": 50608
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1938270330429077,
      "learning_rate": 5.272795440144069e-06,
      "loss": 2.3194,
      "step": 50609
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0404263734817505,
      "learning_rate": 5.272432616594651e-06,
      "loss": 2.4714,
      "step": 50610
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0375564098358154,
      "learning_rate": 5.272069801059747e-06,
      "loss": 2.4232,
      "step": 50611
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1432738304138184,
      "learning_rate": 5.271706993539964e-06,
      "loss": 2.3537,
      "step": 50612
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0669987201690674,
      "learning_rate": 5.271344194035928e-06,
      "loss": 2.2126,
      "step": 50613
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.2157999277114868,
      "learning_rate": 5.2709814025482456e-06,
      "loss": 2.2798,
      "step": 50614
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.164942979812622,
      "learning_rate": 5.270618619077531e-06,
      "loss": 2.5283,
      "step": 50615
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.3564635515213013,
      "learning_rate": 5.270255843624406e-06,
      "loss": 2.3403,
      "step": 50616
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0174951553344727,
      "learning_rate": 5.269893076189479e-06,
      "loss": 2.4023,
      "step": 50617
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.2471539974212646,
      "learning_rate": 5.26953031677337e-06,
      "loss": 2.1214,
      "step": 50618
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.244924783706665,
      "learning_rate": 5.269167565376694e-06,
      "loss": 2.29,
      "step": 50619
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0238771438598633,
      "learning_rate": 5.268804822000062e-06,
      "loss": 2.193,
      "step": 50620
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.3521788120269775,
      "learning_rate": 5.268442086644088e-06,
      "loss": 2.4901,
      "step": 50621
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1885790824890137,
      "learning_rate": 5.268079359309393e-06,
      "loss": 2.4619,
      "step": 50622
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1296744346618652,
      "learning_rate": 5.267716639996585e-06,
      "loss": 2.3408,
      "step": 50623
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1245783567428589,
      "learning_rate": 5.267353928706287e-06,
      "loss": 2.3451,
      "step": 50624
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0518609285354614,
      "learning_rate": 5.266991225439104e-06,
      "loss": 2.0992,
      "step": 50625
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.130219578742981,
      "learning_rate": 5.266628530195661e-06,
      "loss": 2.2088,
      "step": 50626
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0491888523101807,
      "learning_rate": 5.266265842976566e-06,
      "loss": 2.4606,
      "step": 50627
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1361888647079468,
      "learning_rate": 5.265903163782438e-06,
      "loss": 2.4029,
      "step": 50628
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.190428376197815,
      "learning_rate": 5.265540492613887e-06,
      "loss": 2.489,
      "step": 50629
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0109773874282837,
      "learning_rate": 5.265177829471533e-06,
      "loss": 2.4781,
      "step": 50630
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.2236371040344238,
      "learning_rate": 5.264815174355986e-06,
      "loss": 2.4213,
      "step": 50631
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.9478358626365662,
      "learning_rate": 5.264452527267871e-06,
      "loss": 2.4863,
      "step": 50632
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1964255571365356,
      "learning_rate": 5.264089888207787e-06,
      "loss": 2.4558,
      "step": 50633
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0012812614440918,
      "learning_rate": 5.263727257176361e-06,
      "loss": 2.1651,
      "step": 50634
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.01030695438385,
      "learning_rate": 5.2633646341742e-06,
      "loss": 2.2893,
      "step": 50635
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1386770009994507,
      "learning_rate": 5.263002019201926e-06,
      "loss": 2.3449,
      "step": 50636
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0262426137924194,
      "learning_rate": 5.262639412260146e-06,
      "loss": 2.177,
      "step": 50637
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.149552583694458,
      "learning_rate": 5.262276813349484e-06,
      "loss": 2.2185,
      "step": 50638
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0944565534591675,
      "learning_rate": 5.261914222470544e-06,
      "loss": 2.3636,
      "step": 50639
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1537431478500366,
      "learning_rate": 5.261551639623952e-06,
      "loss": 2.4245,
      "step": 50640
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.109847903251648,
      "learning_rate": 5.261189064810315e-06,
      "loss": 2.2584,
      "step": 50641
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0837185382843018,
      "learning_rate": 5.260826498030247e-06,
      "loss": 2.3476,
      "step": 50642
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0281599760055542,
      "learning_rate": 5.260463939284368e-06,
      "loss": 2.3912,
      "step": 50643
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0899077653884888,
      "learning_rate": 5.2601013885732865e-06,
      "loss": 2.2676,
      "step": 50644
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.032811164855957,
      "learning_rate": 5.259738845897625e-06,
      "loss": 2.3297,
      "step": 50645
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.2097798585891724,
      "learning_rate": 5.259376311257993e-06,
      "loss": 2.4092,
      "step": 50646
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0744035243988037,
      "learning_rate": 5.259013784655001e-06,
      "loss": 2.2436,
      "step": 50647
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.2002569437026978,
      "learning_rate": 5.258651266089274e-06,
      "loss": 2.4836,
      "step": 50648
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1490579843521118,
      "learning_rate": 5.25828875556142e-06,
      "loss": 2.2766,
      "step": 50649
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.045770525932312,
      "learning_rate": 5.25792625307205e-06,
      "loss": 2.2855,
      "step": 50650
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1944313049316406,
      "learning_rate": 5.257563758621787e-06,
      "loss": 2.1831,
      "step": 50651
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4292960166931152,
      "learning_rate": 5.257201272211238e-06,
      "loss": 2.2733,
      "step": 50652
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0741831064224243,
      "learning_rate": 5.256838793841024e-06,
      "loss": 2.1915,
      "step": 50653
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0795494318008423,
      "learning_rate": 5.256476323511755e-06,
      "loss": 2.4058,
      "step": 50654
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.11684250831604,
      "learning_rate": 5.256113861224048e-06,
      "loss": 2.3645,
      "step": 50655
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.9970862865447998,
      "learning_rate": 5.2557514069785135e-06,
      "loss": 2.2087,
      "step": 50656
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.093045711517334,
      "learning_rate": 5.255388960775772e-06,
      "loss": 2.5076,
      "step": 50657
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.2048614025115967,
      "learning_rate": 5.255026522616436e-06,
      "loss": 2.24,
      "step": 50658
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1647390127182007,
      "learning_rate": 5.254664092501119e-06,
      "loss": 2.5014,
      "step": 50659
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.157346248626709,
      "learning_rate": 5.254301670430431e-06,
      "loss": 2.1078,
      "step": 50660
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.190250277519226,
      "learning_rate": 5.253939256404994e-06,
      "loss": 2.3274,
      "step": 50661
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.032122254371643,
      "learning_rate": 5.253576850425415e-06,
      "loss": 2.3055,
      "step": 50662
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1315253973007202,
      "learning_rate": 5.253214452492317e-06,
      "loss": 2.5779,
      "step": 50663
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1573078632354736,
      "learning_rate": 5.252852062606304e-06,
      "loss": 2.428,
      "step": 50664
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.9968995451927185,
      "learning_rate": 5.252489680768002e-06,
      "loss": 2.3322,
      "step": 50665
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1763269901275635,
      "learning_rate": 5.2521273069780146e-06,
      "loss": 2.1969,
      "step": 50666
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1040281057357788,
      "learning_rate": 5.251764941236965e-06,
      "loss": 2.2675,
      "step": 50667
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.097913146018982,
      "learning_rate": 5.2514025835454595e-06,
      "loss": 2.3625,
      "step": 50668
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0359505414962769,
      "learning_rate": 5.25104023390412e-06,
      "loss": 2.3273,
      "step": 50669
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0806152820587158,
      "learning_rate": 5.250677892313558e-06,
      "loss": 2.139,
      "step": 50670
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1942710876464844,
      "learning_rate": 5.250315558774386e-06,
      "loss": 2.3245,
      "step": 50671
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0394489765167236,
      "learning_rate": 5.249953233287215e-06,
      "loss": 2.3245,
      "step": 50672
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.3137803077697754,
      "learning_rate": 5.249590915852668e-06,
      "loss": 2.2448,
      "step": 50673
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.928947925567627,
      "learning_rate": 5.24922860647135e-06,
      "loss": 2.2133,
      "step": 50674
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.184159517288208,
      "learning_rate": 5.2488663051438845e-06,
      "loss": 2.1796,
      "step": 50675
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.094082236289978,
      "learning_rate": 5.24850401187088e-06,
      "loss": 2.4241,
      "step": 50676
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.235334873199463,
      "learning_rate": 5.248141726652949e-06,
      "loss": 2.5809,
      "step": 50677
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.2415610551834106,
      "learning_rate": 5.247779449490711e-06,
      "loss": 2.3933,
      "step": 50678
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0367364883422852,
      "learning_rate": 5.247417180384775e-06,
      "loss": 2.3013,
      "step": 50679
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0754530429840088,
      "learning_rate": 5.247054919335761e-06,
      "loss": 2.3516,
      "step": 50680
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.017479419708252,
      "learning_rate": 5.246692666344275e-06,
      "loss": 2.3262,
      "step": 50681
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0067459344863892,
      "learning_rate": 5.246330421410941e-06,
      "loss": 2.2923,
      "step": 50682
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0400476455688477,
      "learning_rate": 5.245968184536363e-06,
      "loss": 2.1233,
      "step": 50683
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0637608766555786,
      "learning_rate": 5.245605955721165e-06,
      "loss": 2.39,
      "step": 50684
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1800754070281982,
      "learning_rate": 5.245243734965957e-06,
      "loss": 2.4663,
      "step": 50685
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0154969692230225,
      "learning_rate": 5.2448815222713505e-06,
      "loss": 2.3968,
      "step": 50686
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.116080641746521,
      "learning_rate": 5.244519317637957e-06,
      "loss": 2.457,
      "step": 50687
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0461666584014893,
      "learning_rate": 5.2441571210664e-06,
      "loss": 2.3338,
      "step": 50688
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0283551216125488,
      "learning_rate": 5.2437949325572845e-06,
      "loss": 2.3506,
      "step": 50689
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1602758169174194,
      "learning_rate": 5.243432752111233e-06,
      "loss": 2.1963,
      "step": 50690
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.981488049030304,
      "learning_rate": 5.243070579728848e-06,
      "loss": 2.3523,
      "step": 50691
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.2020292282104492,
      "learning_rate": 5.242708415410758e-06,
      "loss": 2.2738,
      "step": 50692
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1273479461669922,
      "learning_rate": 5.2423462591575625e-06,
      "loss": 2.2334,
      "step": 50693
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0688140392303467,
      "learning_rate": 5.241984110969887e-06,
      "loss": 2.2205,
      "step": 50694
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0871076583862305,
      "learning_rate": 5.241621970848337e-06,
      "loss": 2.3785,
      "step": 50695
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0221067667007446,
      "learning_rate": 5.241259838793535e-06,
      "loss": 2.4748,
      "step": 50696
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.9980449676513672,
      "learning_rate": 5.240897714806088e-06,
      "loss": 2.2394,
      "step": 50697
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1478971242904663,
      "learning_rate": 5.240535598886614e-06,
      "loss": 2.4033,
      "step": 50698
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.054106593132019,
      "learning_rate": 5.2401734910357184e-06,
      "loss": 2.5538,
      "step": 50699
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0342119932174683,
      "learning_rate": 5.2398113912540274e-06,
      "loss": 2.3286,
      "step": 50700
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1586304903030396,
      "learning_rate": 5.239449299542144e-06,
      "loss": 2.1346,
      "step": 50701
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1383769512176514,
      "learning_rate": 5.239087215900691e-06,
      "loss": 2.1681,
      "step": 50702
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0586744546890259,
      "learning_rate": 5.238725140330279e-06,
      "loss": 2.3399,
      "step": 50703
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.038950800895691,
      "learning_rate": 5.238363072831517e-06,
      "loss": 2.3424,
      "step": 50704
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0905085802078247,
      "learning_rate": 5.2380010134050264e-06,
      "loss": 2.3364,
      "step": 50705
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.011064052581787,
      "learning_rate": 5.237638962051413e-06,
      "loss": 2.3416,
      "step": 50706
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1971027851104736,
      "learning_rate": 5.237276918771299e-06,
      "loss": 2.638,
      "step": 50707
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.3000866174697876,
      "learning_rate": 5.236914883565292e-06,
      "loss": 2.2869,
      "step": 50708
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0640006065368652,
      "learning_rate": 5.23655285643401e-06,
      "loss": 2.1149,
      "step": 50709
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.191440463066101,
      "learning_rate": 5.236190837378061e-06,
      "loss": 2.3292,
      "step": 50710
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.2129031419754028,
      "learning_rate": 5.235828826398071e-06,
      "loss": 2.5011,
      "step": 50711
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0228935480117798,
      "learning_rate": 5.235466823494637e-06,
      "loss": 2.465,
      "step": 50712
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0661346912384033,
      "learning_rate": 5.235104828668384e-06,
      "loss": 2.2936,
      "step": 50713
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0266562700271606,
      "learning_rate": 5.234742841919918e-06,
      "loss": 2.2851,
      "step": 50714
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1311278343200684,
      "learning_rate": 5.234380863249863e-06,
      "loss": 2.4435,
      "step": 50715
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1008864641189575,
      "learning_rate": 5.234018892658821e-06,
      "loss": 2.1754,
      "step": 50716
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1867231130599976,
      "learning_rate": 5.233656930147417e-06,
      "loss": 2.3726,
      "step": 50717
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.2063238620758057,
      "learning_rate": 5.233294975716254e-06,
      "loss": 2.4293,
      "step": 50718
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.067603349685669,
      "learning_rate": 5.232933029365954e-06,
      "loss": 1.9984,
      "step": 50719
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.071510910987854,
      "learning_rate": 5.232571091097125e-06,
      "loss": 2.136,
      "step": 50720
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.058135986328125,
      "learning_rate": 5.232209160910385e-06,
      "loss": 2.1507,
      "step": 50721
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.2823764085769653,
      "learning_rate": 5.231847238806342e-06,
      "loss": 2.1846,
      "step": 50722
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0730293989181519,
      "learning_rate": 5.2314853247856175e-06,
      "loss": 2.357,
      "step": 50723
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.132275104522705,
      "learning_rate": 5.23112341884882e-06,
      "loss": 2.4515,
      "step": 50724
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.9865778088569641,
      "learning_rate": 5.230761520996563e-06,
      "loss": 2.3493,
      "step": 50725
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0958830118179321,
      "learning_rate": 5.230399631229458e-06,
      "loss": 2.3818,
      "step": 50726
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.140708565711975,
      "learning_rate": 5.230037749548125e-06,
      "loss": 2.0827,
      "step": 50727
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.9303495287895203,
      "learning_rate": 5.229675875953169e-06,
      "loss": 2.4616,
      "step": 50728
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.9875922203063965,
      "learning_rate": 5.22931401044521e-06,
      "loss": 2.2039,
      "step": 50729
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1246856451034546,
      "learning_rate": 5.228952153024858e-06,
      "loss": 2.3471,
      "step": 50730
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4903942346572876,
      "learning_rate": 5.228590303692731e-06,
      "loss": 2.5717,
      "step": 50731
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.3279998302459717,
      "learning_rate": 5.228228462449439e-06,
      "loss": 2.4435,
      "step": 50732
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.086167812347412,
      "learning_rate": 5.227866629295593e-06,
      "loss": 2.2375,
      "step": 50733
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.2281334400177002,
      "learning_rate": 5.227504804231811e-06,
      "loss": 2.5909,
      "step": 50734
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0804970264434814,
      "learning_rate": 5.227142987258702e-06,
      "loss": 2.3924,
      "step": 50735
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.010870337486267,
      "learning_rate": 5.226781178376886e-06,
      "loss": 2.3301,
      "step": 50736
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1294118165969849,
      "learning_rate": 5.2264193775869734e-06,
      "loss": 2.2173,
      "step": 50737
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0549827814102173,
      "learning_rate": 5.2260575848895744e-06,
      "loss": 2.3614,
      "step": 50738
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0552223920822144,
      "learning_rate": 5.225695800285301e-06,
      "loss": 2.279,
      "step": 50739
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0946919918060303,
      "learning_rate": 5.225334023774774e-06,
      "loss": 2.313,
      "step": 50740
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0635662078857422,
      "learning_rate": 5.224972255358598e-06,
      "loss": 2.128,
      "step": 50741
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.078068733215332,
      "learning_rate": 5.224610495037397e-06,
      "loss": 2.4975,
      "step": 50742
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0898470878601074,
      "learning_rate": 5.224248742811772e-06,
      "loss": 2.0419,
      "step": 50743
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.046842098236084,
      "learning_rate": 5.2238869986823474e-06,
      "loss": 2.327,
      "step": 50744
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1202356815338135,
      "learning_rate": 5.223525262649727e-06,
      "loss": 2.374,
      "step": 50745
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1542041301727295,
      "learning_rate": 5.223163534714533e-06,
      "loss": 2.1676,
      "step": 50746
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0861337184906006,
      "learning_rate": 5.22280181487737e-06,
      "loss": 2.4797,
      "step": 50747
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.2315183877944946,
      "learning_rate": 5.22244010313886e-06,
      "loss": 2.3924,
      "step": 50748
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.0174973011016846,
      "learning_rate": 5.222078399499606e-06,
      "loss": 2.2729,
      "step": 50749
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.9804604649543762,
      "learning_rate": 5.221716703960231e-06,
      "loss": 2.2685,
      "step": 50750
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0871771574020386,
      "learning_rate": 5.221355016521345e-06,
      "loss": 2.313,
      "step": 50751
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.04743230342865,
      "learning_rate": 5.22099333718356e-06,
      "loss": 2.187,
      "step": 50752
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.2103701829910278,
      "learning_rate": 5.220631665947486e-06,
      "loss": 2.44,
      "step": 50753
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1780364513397217,
      "learning_rate": 5.220270002813742e-06,
      "loss": 2.3766,
      "step": 50754
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.091385841369629,
      "learning_rate": 5.219908347782935e-06,
      "loss": 2.439,
      "step": 50755
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1123853921890259,
      "learning_rate": 5.2195467008556856e-06,
      "loss": 2.229,
      "step": 50756
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.033379077911377,
      "learning_rate": 5.219185062032599e-06,
      "loss": 2.3263,
      "step": 50757
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0722544193267822,
      "learning_rate": 5.218823431314296e-06,
      "loss": 2.1488,
      "step": 50758
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0074515342712402,
      "learning_rate": 5.218461808701386e-06,
      "loss": 2.344,
      "step": 50759
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.9696322083473206,
      "learning_rate": 5.218100194194478e-06,
      "loss": 2.2399,
      "step": 50760
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0824148654937744,
      "learning_rate": 5.217738587794192e-06,
      "loss": 2.4377,
      "step": 50761
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.261672019958496,
      "learning_rate": 5.217376989501135e-06,
      "loss": 2.0595,
      "step": 50762
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.2452776432037354,
      "learning_rate": 5.217015399315927e-06,
      "loss": 2.2764,
      "step": 50763
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0361642837524414,
      "learning_rate": 5.216653817239177e-06,
      "loss": 2.3734,
      "step": 50764
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.9864150881767273,
      "learning_rate": 5.216292243271492e-06,
      "loss": 2.5085,
      "step": 50765
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.3182357549667358,
      "learning_rate": 5.215930677413497e-06,
      "loss": 2.3879,
      "step": 50766
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0087882280349731,
      "learning_rate": 5.2155691196657975e-06,
      "loss": 2.2938,
      "step": 50767
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.10606849193573,
      "learning_rate": 5.215207570029005e-06,
      "loss": 2.3203,
      "step": 50768
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0851984024047852,
      "learning_rate": 5.214846028503738e-06,
      "loss": 2.4047,
      "step": 50769
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.2454020977020264,
      "learning_rate": 5.214484495090604e-06,
      "loss": 2.3545,
      "step": 50770
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.025288462638855,
      "learning_rate": 5.21412296979022e-06,
      "loss": 1.9899,
      "step": 50771
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0132166147232056,
      "learning_rate": 5.213761452603196e-06,
      "loss": 2.1039,
      "step": 50772
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1480463743209839,
      "learning_rate": 5.2133999435301505e-06,
      "loss": 2.5858,
      "step": 50773
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0839322805404663,
      "learning_rate": 5.213038442571687e-06,
      "loss": 2.2866,
      "step": 50774
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.2156018018722534,
      "learning_rate": 5.2126769497284265e-06,
      "loss": 2.2586,
      "step": 50775
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0527068376541138,
      "learning_rate": 5.212315465000976e-06,
      "loss": 2.0724,
      "step": 50776
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.14165461063385,
      "learning_rate": 5.211953988389959e-06,
      "loss": 2.3983,
      "step": 50777
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.086869239807129,
      "learning_rate": 5.211592519895973e-06,
      "loss": 2.4811,
      "step": 50778
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1291323900222778,
      "learning_rate": 5.2112310595196415e-06,
      "loss": 2.1867,
      "step": 50779
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.9898945689201355,
      "learning_rate": 5.21086960726157e-06,
      "loss": 2.1596,
      "step": 50780
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1700379848480225,
      "learning_rate": 5.210508163122379e-06,
      "loss": 2.3142,
      "step": 50781
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.038280963897705,
      "learning_rate": 5.210146727102674e-06,
      "loss": 2.2553,
      "step": 50782
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0502538681030273,
      "learning_rate": 5.209785299203074e-06,
      "loss": 2.313,
      "step": 50783
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0495035648345947,
      "learning_rate": 5.209423879424186e-06,
      "loss": 2.2605,
      "step": 50784
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0752918720245361,
      "learning_rate": 5.209062467766629e-06,
      "loss": 2.1998,
      "step": 50785
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.9990619421005249,
      "learning_rate": 5.208701064231009e-06,
      "loss": 2.2227,
      "step": 50786
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.138933539390564,
      "learning_rate": 5.208339668817946e-06,
      "loss": 2.2041,
      "step": 50787
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0085822343826294,
      "learning_rate": 5.207978281528047e-06,
      "loss": 2.3176,
      "step": 50788
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.070613980293274,
      "learning_rate": 5.207616902361925e-06,
      "loss": 2.2657,
      "step": 50789
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1211434602737427,
      "learning_rate": 5.207255531320197e-06,
      "loss": 2.3565,
      "step": 50790
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.2417374849319458,
      "learning_rate": 5.2068941684034715e-06,
      "loss": 2.2382,
      "step": 50791
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.095541000366211,
      "learning_rate": 5.2065328136123595e-06,
      "loss": 2.2773,
      "step": 50792
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1401251554489136,
      "learning_rate": 5.206171466947478e-06,
      "loss": 2.2399,
      "step": 50793
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.2028099298477173,
      "learning_rate": 5.205810128409441e-06,
      "loss": 2.1941,
      "step": 50794
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.054550051689148,
      "learning_rate": 5.205448797998851e-06,
      "loss": 2.2714,
      "step": 50795
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0438774824142456,
      "learning_rate": 5.205087475716334e-06,
      "loss": 2.4157,
      "step": 50796
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1756792068481445,
      "learning_rate": 5.20472616156249e-06,
      "loss": 2.584,
      "step": 50797
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0389970541000366,
      "learning_rate": 5.204364855537942e-06,
      "loss": 2.2324,
      "step": 50798
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0059279203414917,
      "learning_rate": 5.204003557643296e-06,
      "loss": 2.2267,
      "step": 50799
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.077547311782837,
      "learning_rate": 5.203642267879169e-06,
      "loss": 2.1353,
      "step": 50800
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0532814264297485,
      "learning_rate": 5.2032809862461665e-06,
      "loss": 2.3287,
      "step": 50801
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0088229179382324,
      "learning_rate": 5.20291971274491e-06,
      "loss": 2.2189,
      "step": 50802
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0757567882537842,
      "learning_rate": 5.202558447376007e-06,
      "loss": 2.1041,
      "step": 50803
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.008992075920105,
      "learning_rate": 5.202197190140071e-06,
      "loss": 2.4147,
      "step": 50804
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1860008239746094,
      "learning_rate": 5.20183594103771e-06,
      "loss": 2.506,
      "step": 50805
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1081299781799316,
      "learning_rate": 5.201474700069544e-06,
      "loss": 2.3411,
      "step": 50806
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1338868141174316,
      "learning_rate": 5.201113467236177e-06,
      "loss": 2.5019,
      "step": 50807
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4168214797973633,
      "learning_rate": 5.2007522425382314e-06,
      "loss": 2.1904,
      "step": 50808
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0534858703613281,
      "learning_rate": 5.20039102597631e-06,
      "loss": 2.2889,
      "step": 50809
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1023955345153809,
      "learning_rate": 5.2000298175510324e-06,
      "loss": 2.5111,
      "step": 50810
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.155649185180664,
      "learning_rate": 5.199668617263004e-06,
      "loss": 2.2937,
      "step": 50811
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1419024467468262,
      "learning_rate": 5.199307425112845e-06,
      "loss": 2.4662,
      "step": 50812
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.9762154817581177,
      "learning_rate": 5.19894624110116e-06,
      "loss": 2.2554,
      "step": 50813
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.2465533018112183,
      "learning_rate": 5.198585065228569e-06,
      "loss": 2.2822,
      "step": 50814
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0779398679733276,
      "learning_rate": 5.198223897495681e-06,
      "loss": 2.387,
      "step": 50815
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.091902256011963,
      "learning_rate": 5.197862737903108e-06,
      "loss": 2.3021,
      "step": 50816
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1234679222106934,
      "learning_rate": 5.197501586451457e-06,
      "loss": 2.4825,
      "step": 50817
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4712780714035034,
      "learning_rate": 5.197140443141349e-06,
      "loss": 2.2156,
      "step": 50818
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.2442755699157715,
      "learning_rate": 5.196779307973389e-06,
      "loss": 2.41,
      "step": 50819
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.125111699104309,
      "learning_rate": 5.196418180948195e-06,
      "loss": 2.3637,
      "step": 50820
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.151776671409607,
      "learning_rate": 5.196057062066379e-06,
      "loss": 2.2904,
      "step": 50821
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.051661491394043,
      "learning_rate": 5.195695951328546e-06,
      "loss": 2.2749,
      "step": 50822
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0898021459579468,
      "learning_rate": 5.195334848735317e-06,
      "loss": 2.2024,
      "step": 50823
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1611034870147705,
      "learning_rate": 5.194973754287297e-06,
      "loss": 2.4362,
      "step": 50824
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0403196811676025,
      "learning_rate": 5.194612667985105e-06,
      "loss": 2.3029,
      "step": 50825
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0276378393173218,
      "learning_rate": 5.194251589829347e-06,
      "loss": 2.3934,
      "step": 50826
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.2155237197875977,
      "learning_rate": 5.193890519820639e-06,
      "loss": 2.2084,
      "step": 50827
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.120907187461853,
      "learning_rate": 5.19352945795959e-06,
      "loss": 2.1218,
      "step": 50828
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0186866521835327,
      "learning_rate": 5.193168404246818e-06,
      "loss": 2.5047,
      "step": 50829
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.382863163948059,
      "learning_rate": 5.19280735868293e-06,
      "loss": 2.2146,
      "step": 50830
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4988811016082764,
      "learning_rate": 5.192446321268539e-06,
      "loss": 2.1226,
      "step": 50831
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0385128259658813,
      "learning_rate": 5.192085292004254e-06,
      "loss": 2.3024,
      "step": 50832
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1173360347747803,
      "learning_rate": 5.191724270890695e-06,
      "loss": 2.285,
      "step": 50833
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.9641777873039246,
      "learning_rate": 5.191363257928463e-06,
      "loss": 2.4458,
      "step": 50834
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.2054225206375122,
      "learning_rate": 5.191002253118183e-06,
      "loss": 2.235,
      "step": 50835
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0707083940505981,
      "learning_rate": 5.190641256460454e-06,
      "loss": 2.295,
      "step": 50836
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0679081678390503,
      "learning_rate": 5.1902802679559e-06,
      "loss": 2.3207,
      "step": 50837
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.174088478088379,
      "learning_rate": 5.189919287605123e-06,
      "loss": 2.292,
      "step": 50838
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0894445180892944,
      "learning_rate": 5.1895583154087424e-06,
      "loss": 2.3822,
      "step": 50839
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.2513118982315063,
      "learning_rate": 5.189197351367363e-06,
      "loss": 2.6204,
      "step": 50840
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.147657036781311,
      "learning_rate": 5.1888363954816045e-06,
      "loss": 2.4214,
      "step": 50841
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1615692377090454,
      "learning_rate": 5.188475447752071e-06,
      "loss": 2.4586,
      "step": 50842
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0453389883041382,
      "learning_rate": 5.188114508179387e-06,
      "loss": 2.2386,
      "step": 50843
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0438412427902222,
      "learning_rate": 5.187753576764148e-06,
      "loss": 2.2104,
      "step": 50844
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0698835849761963,
      "learning_rate": 5.187392653506977e-06,
      "loss": 2.2421,
      "step": 50845
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0989758968353271,
      "learning_rate": 5.187031738408479e-06,
      "loss": 2.1133,
      "step": 50846
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1990396976470947,
      "learning_rate": 5.186670831469274e-06,
      "loss": 2.2664,
      "step": 50847
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.106328010559082,
      "learning_rate": 5.186309932689963e-06,
      "loss": 2.2257,
      "step": 50848
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1480205059051514,
      "learning_rate": 5.18594904207117e-06,
      "loss": 2.4217,
      "step": 50849
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.2325888872146606,
      "learning_rate": 5.185588159613502e-06,
      "loss": 2.4118,
      "step": 50850
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1325353384017944,
      "learning_rate": 5.1852272853175625e-06,
      "loss": 2.2263,
      "step": 50851
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0122945308685303,
      "learning_rate": 5.184866419183976e-06,
      "loss": 2.2007,
      "step": 50852
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0372565984725952,
      "learning_rate": 5.184505561213344e-06,
      "loss": 2.2142,
      "step": 50853
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.067254900932312,
      "learning_rate": 5.18414471140629e-06,
      "loss": 2.466,
      "step": 50854
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0907295942306519,
      "learning_rate": 5.1837838697634105e-06,
      "loss": 2.4607,
      "step": 50855
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1694256067276,
      "learning_rate": 5.183423036285335e-06,
      "loss": 2.3773,
      "step": 50856
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0955384969711304,
      "learning_rate": 5.183062210972658e-06,
      "loss": 2.2029,
      "step": 50857
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0849626064300537,
      "learning_rate": 5.182701393826003e-06,
      "loss": 2.2839,
      "step": 50858
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1784385442733765,
      "learning_rate": 5.182340584845973e-06,
      "loss": 2.3007,
      "step": 50859
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0907931327819824,
      "learning_rate": 5.1819797840331885e-06,
      "loss": 2.3151,
      "step": 50860
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0581878423690796,
      "learning_rate": 5.181618991388253e-06,
      "loss": 2.3876,
      "step": 50861
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.9737477898597717,
      "learning_rate": 5.181258206911785e-06,
      "loss": 2.4613,
      "step": 50862
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1574506759643555,
      "learning_rate": 5.18089743060439e-06,
      "loss": 2.4143,
      "step": 50863
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1348544359207153,
      "learning_rate": 5.180536662466686e-06,
      "loss": 2.1055,
      "step": 50864
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.115918517112732,
      "learning_rate": 5.1801759024992785e-06,
      "loss": 2.413,
      "step": 50865
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1395660638809204,
      "learning_rate": 5.179815150702785e-06,
      "loss": 2.3392,
      "step": 50866
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0644971132278442,
      "learning_rate": 5.179454407077809e-06,
      "loss": 2.2168,
      "step": 50867
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.21497642993927,
      "learning_rate": 5.179093671624973e-06,
      "loss": 2.3013,
      "step": 50868
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1209619045257568,
      "learning_rate": 5.178732944344881e-06,
      "loss": 2.2575,
      "step": 50869
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0844486951828003,
      "learning_rate": 5.1783722252381465e-06,
      "loss": 2.4433,
      "step": 50870
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.3165359497070312,
      "learning_rate": 5.178011514305377e-06,
      "loss": 2.2573,
      "step": 50871
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1710389852523804,
      "learning_rate": 5.177650811547191e-06,
      "loss": 2.4829,
      "step": 50872
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0633713006973267,
      "learning_rate": 5.177290116964193e-06,
      "loss": 2.4663,
      "step": 50873
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0684561729431152,
      "learning_rate": 5.176929430557003e-06,
      "loss": 2.3933,
      "step": 50874
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0195943117141724,
      "learning_rate": 5.176568752326222e-06,
      "loss": 2.2315,
      "step": 50875
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0540269613265991,
      "learning_rate": 5.176208082272471e-06,
      "loss": 2.4552,
      "step": 50876
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0115500688552856,
      "learning_rate": 5.175847420396358e-06,
      "loss": 2.3329,
      "step": 50877
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0593938827514648,
      "learning_rate": 5.175486766698491e-06,
      "loss": 2.2669,
      "step": 50878
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0404300689697266,
      "learning_rate": 5.175126121179487e-06,
      "loss": 2.2755,
      "step": 50879
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0995938777923584,
      "learning_rate": 5.174765483839951e-06,
      "loss": 2.1922,
      "step": 50880
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1261838674545288,
      "learning_rate": 5.174404854680501e-06,
      "loss": 2.3672,
      "step": 50881
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.094492793083191,
      "learning_rate": 5.174044233701747e-06,
      "loss": 2.2905,
      "step": 50882
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0990761518478394,
      "learning_rate": 5.173683620904293e-06,
      "loss": 2.4953,
      "step": 50883
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.205230951309204,
      "learning_rate": 5.173323016288762e-06,
      "loss": 2.3017,
      "step": 50884
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0231069326400757,
      "learning_rate": 5.172962419855758e-06,
      "loss": 2.1865,
      "step": 50885
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.9661028981208801,
      "learning_rate": 5.17260183160589e-06,
      "loss": 2.1109,
      "step": 50886
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.2204188108444214,
      "learning_rate": 5.172241251539778e-06,
      "loss": 2.2843,
      "step": 50887
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0854265689849854,
      "learning_rate": 5.171880679658024e-06,
      "loss": 2.4099,
      "step": 50888
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0462144613265991,
      "learning_rate": 5.171520115961246e-06,
      "loss": 2.3981,
      "step": 50889
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.2925055027008057,
      "learning_rate": 5.171159560450049e-06,
      "loss": 2.4317,
      "step": 50890
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0957379341125488,
      "learning_rate": 5.170799013125054e-06,
      "loss": 2.3051,
      "step": 50891
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.34409499168396,
      "learning_rate": 5.170438473986861e-06,
      "loss": 2.3812,
      "step": 50892
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1009135246276855,
      "learning_rate": 5.1700779430360895e-06,
      "loss": 2.403,
      "step": 50893
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1992899179458618,
      "learning_rate": 5.169717420273346e-06,
      "loss": 2.3279,
      "step": 50894
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0804500579833984,
      "learning_rate": 5.169356905699245e-06,
      "loss": 2.5375,
      "step": 50895
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1525191068649292,
      "learning_rate": 5.168996399314397e-06,
      "loss": 2.1505,
      "step": 50896
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0016268491744995,
      "learning_rate": 5.168635901119412e-06,
      "loss": 2.2355,
      "step": 50897
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0593737363815308,
      "learning_rate": 5.168275411114898e-06,
      "loss": 2.1583,
      "step": 50898
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.3466802835464478,
      "learning_rate": 5.167914929301473e-06,
      "loss": 2.4577,
      "step": 50899
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.104368805885315,
      "learning_rate": 5.1675544556797405e-06,
      "loss": 2.2904,
      "step": 50900
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.088304042816162,
      "learning_rate": 5.167193990250321e-06,
      "loss": 2.2415,
      "step": 50901
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0797648429870605,
      "learning_rate": 5.166833533013814e-06,
      "loss": 2.3517,
      "step": 50902
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1023445129394531,
      "learning_rate": 5.166473083970842e-06,
      "loss": 2.2501,
      "step": 50903
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1861876249313354,
      "learning_rate": 5.166112643122007e-06,
      "loss": 2.15,
      "step": 50904
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0637799501419067,
      "learning_rate": 5.165752210467927e-06,
      "loss": 2.1166,
      "step": 50905
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1370314359664917,
      "learning_rate": 5.165391786009211e-06,
      "loss": 2.4674,
      "step": 50906
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1347531080245972,
      "learning_rate": 5.165031369746465e-06,
      "loss": 2.4301,
      "step": 50907
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.031297206878662,
      "learning_rate": 5.164670961680307e-06,
      "loss": 2.2783,
      "step": 50908
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.051629900932312,
      "learning_rate": 5.164310561811346e-06,
      "loss": 2.2191,
      "step": 50909
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1045485734939575,
      "learning_rate": 5.1639501701401885e-06,
      "loss": 2.3947,
      "step": 50910
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0477979183197021,
      "learning_rate": 5.163589786667451e-06,
      "loss": 2.3741,
      "step": 50911
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1206320524215698,
      "learning_rate": 5.163229411393744e-06,
      "loss": 2.2519,
      "step": 50912
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.2805120944976807,
      "learning_rate": 5.162869044319672e-06,
      "loss": 2.4094,
      "step": 50913
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0575623512268066,
      "learning_rate": 5.1625086854458555e-06,
      "loss": 2.4143,
      "step": 50914
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.390821933746338,
      "learning_rate": 5.162148334772896e-06,
      "loss": 2.4036,
      "step": 50915
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0324984788894653,
      "learning_rate": 5.161787992301412e-06,
      "loss": 2.3504,
      "step": 50916
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1019587516784668,
      "learning_rate": 5.161427658032009e-06,
      "loss": 2.1742,
      "step": 50917
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0742567777633667,
      "learning_rate": 5.161067331965304e-06,
      "loss": 2.3229,
      "step": 50918
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.412382960319519,
      "learning_rate": 5.1607070141019e-06,
      "loss": 2.1725,
      "step": 50919
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.092488408088684,
      "learning_rate": 5.160346704442416e-06,
      "loss": 2.3021,
      "step": 50920
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0008848905563354,
      "learning_rate": 5.159986402987453e-06,
      "loss": 2.2069,
      "step": 50921
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.3576841354370117,
      "learning_rate": 5.159626109737638e-06,
      "loss": 2.2853,
      "step": 50922
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0309313535690308,
      "learning_rate": 5.159265824693562e-06,
      "loss": 2.2205,
      "step": 50923
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1415977478027344,
      "learning_rate": 5.15890554785585e-06,
      "loss": 2.2313,
      "step": 50924
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1649184226989746,
      "learning_rate": 5.1585452792251035e-06,
      "loss": 2.171,
      "step": 50925
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1750717163085938,
      "learning_rate": 5.1581850188019425e-06,
      "loss": 2.4663,
      "step": 50926
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.082948088645935,
      "learning_rate": 5.157824766586968e-06,
      "loss": 2.2339,
      "step": 50927
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.088737964630127,
      "learning_rate": 5.1574645225807995e-06,
      "loss": 2.1998,
      "step": 50928
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.2518452405929565,
      "learning_rate": 5.157104286784041e-06,
      "loss": 2.4216,
      "step": 50929
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1465400457382202,
      "learning_rate": 5.156744059197307e-06,
      "loss": 2.4278,
      "step": 50930
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1195318698883057,
      "learning_rate": 5.156383839821206e-06,
      "loss": 2.3868,
      "step": 50931
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0478757619857788,
      "learning_rate": 5.156023628656354e-06,
      "loss": 2.4621,
      "step": 50932
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1889328956604004,
      "learning_rate": 5.155663425703357e-06,
      "loss": 2.446,
      "step": 50933
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0766959190368652,
      "learning_rate": 5.155303230962821e-06,
      "loss": 2.223,
      "step": 50934
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0444958209991455,
      "learning_rate": 5.1549430444353676e-06,
      "loss": 2.2132,
      "step": 50935
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.098344087600708,
      "learning_rate": 5.1545828661216e-06,
      "loss": 2.4274,
      "step": 50936
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.095587968826294,
      "learning_rate": 5.154222696022128e-06,
      "loss": 2.2173,
      "step": 50937
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1823891401290894,
      "learning_rate": 5.153862534137568e-06,
      "loss": 2.3472,
      "step": 50938
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0198456048965454,
      "learning_rate": 5.1535023804685266e-06,
      "loss": 2.2571,
      "step": 50939
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0260239839553833,
      "learning_rate": 5.1531422350156115e-06,
      "loss": 2.2947,
      "step": 50940
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1925071477890015,
      "learning_rate": 5.15278209777944e-06,
      "loss": 2.5598,
      "step": 50941
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.026956558227539,
      "learning_rate": 5.152421968760617e-06,
      "loss": 2.2642,
      "step": 50942
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.9933991432189941,
      "learning_rate": 5.152061847959758e-06,
      "loss": 2.2498,
      "step": 50943
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0811727046966553,
      "learning_rate": 5.151701735377468e-06,
      "loss": 2.3316,
      "step": 50944
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1032978296279907,
      "learning_rate": 5.151341631014364e-06,
      "loss": 2.2284,
      "step": 50945
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1555125713348389,
      "learning_rate": 5.150981534871048e-06,
      "loss": 2.3194,
      "step": 50946
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0435785055160522,
      "learning_rate": 5.1506214469481386e-06,
      "loss": 2.4073,
      "step": 50947
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.2197576761245728,
      "learning_rate": 5.150261367246245e-06,
      "loss": 2.5059,
      "step": 50948
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1289423704147339,
      "learning_rate": 5.149901295765974e-06,
      "loss": 2.4771,
      "step": 50949
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.056534767150879,
      "learning_rate": 5.149541232507934e-06,
      "loss": 2.4746,
      "step": 50950
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0716254711151123,
      "learning_rate": 5.149181177472744e-06,
      "loss": 2.1857,
      "step": 50951
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0059428215026855,
      "learning_rate": 5.148821130661003e-06,
      "loss": 2.1975,
      "step": 50952
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.9727739691734314,
      "learning_rate": 5.148461092073333e-06,
      "loss": 2.298,
      "step": 50953
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1822282075881958,
      "learning_rate": 5.1481010617103365e-06,
      "loss": 2.318,
      "step": 50954
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0304877758026123,
      "learning_rate": 5.147741039572629e-06,
      "loss": 2.6489,
      "step": 50955
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0501270294189453,
      "learning_rate": 5.147381025660815e-06,
      "loss": 2.2925,
      "step": 50956
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.102966070175171,
      "learning_rate": 5.147021019975511e-06,
      "loss": 2.4315,
      "step": 50957
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.023977518081665,
      "learning_rate": 5.146661022517322e-06,
      "loss": 2.5959,
      "step": 50958
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0883071422576904,
      "learning_rate": 5.146301033286864e-06,
      "loss": 2.3246,
      "step": 50959
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0537117719650269,
      "learning_rate": 5.14594105228474e-06,
      "loss": 2.3419,
      "step": 50960
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0867527723312378,
      "learning_rate": 5.1455810795115715e-06,
      "loss": 2.3019,
      "step": 50961
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.336969256401062,
      "learning_rate": 5.145221114967953e-06,
      "loss": 2.2897,
      "step": 50962
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0536532402038574,
      "learning_rate": 5.1448611586545096e-06,
      "loss": 2.1093,
      "step": 50963
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1896213293075562,
      "learning_rate": 5.144501210571839e-06,
      "loss": 2.5611,
      "step": 50964
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0733413696289062,
      "learning_rate": 5.144141270720563e-06,
      "loss": 2.1866,
      "step": 50965
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0632375478744507,
      "learning_rate": 5.143781339101281e-06,
      "loss": 2.4465,
      "step": 50966
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0003468990325928,
      "learning_rate": 5.143421415714615e-06,
      "loss": 2.4657,
      "step": 50967
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0213736295700073,
      "learning_rate": 5.1430615005611666e-06,
      "loss": 2.3108,
      "step": 50968
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0724530220031738,
      "learning_rate": 5.1427015936415445e-06,
      "loss": 2.3814,
      "step": 50969
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1068929433822632,
      "learning_rate": 5.142341694956367e-06,
      "loss": 2.3614,
      "step": 50970
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.2102690935134888,
      "learning_rate": 5.141981804506235e-06,
      "loss": 2.1825,
      "step": 50971
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0943111181259155,
      "learning_rate": 5.141621922291767e-06,
      "loss": 2.5093,
      "step": 50972
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0574449300765991,
      "learning_rate": 5.141262048313566e-06,
      "loss": 2.4801,
      "step": 50973
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.094766616821289,
      "learning_rate": 5.140902182572249e-06,
      "loss": 2.327,
      "step": 50974
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0060290098190308,
      "learning_rate": 5.140542325068423e-06,
      "loss": 2.2106,
      "step": 50975
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1292386054992676,
      "learning_rate": 5.140182475802697e-06,
      "loss": 2.517,
      "step": 50976
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.088201880455017,
      "learning_rate": 5.139822634775679e-06,
      "loss": 2.1273,
      "step": 50977
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1577924489974976,
      "learning_rate": 5.139462801987984e-06,
      "loss": 2.3833,
      "step": 50978
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1745976209640503,
      "learning_rate": 5.139102977440217e-06,
      "loss": 2.0655,
      "step": 50979
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0494410991668701,
      "learning_rate": 5.138743161132993e-06,
      "loss": 2.4307,
      "step": 50980
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0637494325637817,
      "learning_rate": 5.138383353066917e-06,
      "loss": 2.4101,
      "step": 50981
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0051552057266235,
      "learning_rate": 5.138023553242606e-06,
      "loss": 2.3507,
      "step": 50982
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.139328956604004,
      "learning_rate": 5.13766376166066e-06,
      "loss": 2.5305,
      "step": 50983
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.130228877067566,
      "learning_rate": 5.1373039783216995e-06,
      "loss": 2.3333,
      "step": 50984
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1805493831634521,
      "learning_rate": 5.136944203226325e-06,
      "loss": 2.2607,
      "step": 50985
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.034041404724121,
      "learning_rate": 5.136584436375155e-06,
      "loss": 2.395,
      "step": 50986
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0746937990188599,
      "learning_rate": 5.1362246777687905e-06,
      "loss": 2.4774,
      "step": 50987
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0637327432632446,
      "learning_rate": 5.135864927407856e-06,
      "loss": 2.4783,
      "step": 50988
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1379979848861694,
      "learning_rate": 5.135505185292943e-06,
      "loss": 2.2787,
      "step": 50989
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.2288833856582642,
      "learning_rate": 5.135145451424673e-06,
      "loss": 2.3243,
      "step": 50990
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1954244375228882,
      "learning_rate": 5.134785725803649e-06,
      "loss": 2.3085,
      "step": 50991
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1687617301940918,
      "learning_rate": 5.134426008430489e-06,
      "loss": 2.4616,
      "step": 50992
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1522985696792603,
      "learning_rate": 5.134066299305794e-06,
      "loss": 2.0777,
      "step": 50993
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1494392156600952,
      "learning_rate": 5.133706598430182e-06,
      "loss": 2.5754,
      "step": 50994
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1023054122924805,
      "learning_rate": 5.133346905804259e-06,
      "loss": 2.546,
      "step": 50995
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1354596614837646,
      "learning_rate": 5.132987221428632e-06,
      "loss": 2.0658,
      "step": 50996
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0370256900787354,
      "learning_rate": 5.132627545303916e-06,
      "loss": 2.2173,
      "step": 50997
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1019648313522339,
      "learning_rate": 5.132267877430715e-06,
      "loss": 2.4141,
      "step": 50998
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.9864311814308167,
      "learning_rate": 5.131908217809646e-06,
      "loss": 2.4694,
      "step": 50999
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.103567361831665,
      "learning_rate": 5.131548566441311e-06,
      "loss": 2.2519,
      "step": 51000
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.964666485786438,
      "learning_rate": 5.131188923326327e-06,
      "loss": 2.1889,
      "step": 51001
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1066462993621826,
      "learning_rate": 5.1308292884653e-06,
      "loss": 2.2845,
      "step": 51002
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0426274538040161,
      "learning_rate": 5.130469661858839e-06,
      "loss": 2.4199,
      "step": 51003
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0693111419677734,
      "learning_rate": 5.130110043507553e-06,
      "loss": 2.4736,
      "step": 51004
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.9936769008636475,
      "learning_rate": 5.129750433412055e-06,
      "loss": 2.1936,
      "step": 51005
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0668871402740479,
      "learning_rate": 5.129390831572949e-06,
      "loss": 2.3446,
      "step": 51006
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1287153959274292,
      "learning_rate": 5.1290312379908524e-06,
      "loss": 2.2688,
      "step": 51007
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.9719114303588867,
      "learning_rate": 5.128671652666367e-06,
      "loss": 2.2328,
      "step": 51008
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1228126287460327,
      "learning_rate": 5.1283120756001105e-06,
      "loss": 2.27,
      "step": 51009
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0759875774383545,
      "learning_rate": 5.127952506792684e-06,
      "loss": 2.5284,
      "step": 51010
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1081109046936035,
      "learning_rate": 5.127592946244705e-06,
      "loss": 2.2688,
      "step": 51011
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1635662317276,
      "learning_rate": 5.1272333939567755e-06,
      "loss": 2.2369,
      "step": 51012
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1103315353393555,
      "learning_rate": 5.126873849929512e-06,
      "loss": 2.448,
      "step": 51013
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.3326311111450195,
      "learning_rate": 5.126514314163522e-06,
      "loss": 2.3421,
      "step": 51014
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0146982669830322,
      "learning_rate": 5.1261547866594146e-06,
      "loss": 2.3754,
      "step": 51015
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1489263772964478,
      "learning_rate": 5.125795267417792e-06,
      "loss": 2.4549,
      "step": 51016
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1531202793121338,
      "learning_rate": 5.125435756439276e-06,
      "loss": 2.1725,
      "step": 51017
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1680086851119995,
      "learning_rate": 5.125076253724466e-06,
      "loss": 2.404,
      "step": 51018
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1579111814498901,
      "learning_rate": 5.12471675927398e-06,
      "loss": 2.3112,
      "step": 51019
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1099497079849243,
      "learning_rate": 5.124357273088418e-06,
      "loss": 2.3601,
      "step": 51020
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1140836477279663,
      "learning_rate": 5.1239977951684005e-06,
      "loss": 2.2559,
      "step": 51021
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.105209231376648,
      "learning_rate": 5.123638325514526e-06,
      "loss": 2.5539,
      "step": 51022
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0234565734863281,
      "learning_rate": 5.1232788641274125e-06,
      "loss": 2.4253,
      "step": 51023
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.9855530858039856,
      "learning_rate": 5.122919411007666e-06,
      "loss": 2.4502,
      "step": 51024
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4061899185180664,
      "learning_rate": 5.122559966155894e-06,
      "loss": 2.2426,
      "step": 51025
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0752204656600952,
      "learning_rate": 5.122200529572708e-06,
      "loss": 2.1929,
      "step": 51026
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0796992778778076,
      "learning_rate": 5.121841101258719e-06,
      "loss": 2.2128,
      "step": 51027
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1304277181625366,
      "learning_rate": 5.12148168121453e-06,
      "loss": 2.4747,
      "step": 51028
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.22346031665802,
      "learning_rate": 5.121122269440757e-06,
      "loss": 2.249,
      "step": 51029
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0803261995315552,
      "learning_rate": 5.120762865938009e-06,
      "loss": 2.3297,
      "step": 51030
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0571739673614502,
      "learning_rate": 5.120403470706887e-06,
      "loss": 2.4551,
      "step": 51031
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.97747802734375,
      "learning_rate": 5.120044083748012e-06,
      "loss": 2.3643,
      "step": 51032
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.091538429260254,
      "learning_rate": 5.119684705061984e-06,
      "loss": 2.3305,
      "step": 51033
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0569051504135132,
      "learning_rate": 5.1193253346494185e-06,
      "loss": 2.2968,
      "step": 51034
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1505533456802368,
      "learning_rate": 5.118965972510919e-06,
      "loss": 2.4969,
      "step": 51035
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.056434154510498,
      "learning_rate": 5.1186066186471015e-06,
      "loss": 2.2948,
      "step": 51036
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.967536211013794,
      "learning_rate": 5.1182472730585675e-06,
      "loss": 2.2762,
      "step": 51037
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.379813313484192,
      "learning_rate": 5.117887935745936e-06,
      "loss": 2.3193,
      "step": 51038
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.345562219619751,
      "learning_rate": 5.117528606709804e-06,
      "loss": 2.3077,
      "step": 51039
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0501995086669922,
      "learning_rate": 5.117169285950791e-06,
      "loss": 2.042,
      "step": 51040
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0506457090377808,
      "learning_rate": 5.116809973469504e-06,
      "loss": 2.5033,
      "step": 51041
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.099077820777893,
      "learning_rate": 5.116450669266549e-06,
      "loss": 2.4171,
      "step": 51042
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0928971767425537,
      "learning_rate": 5.116091373342533e-06,
      "loss": 2.3413,
      "step": 51043
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.2138835191726685,
      "learning_rate": 5.115732085698073e-06,
      "loss": 2.546,
      "step": 51044
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0290842056274414,
      "learning_rate": 5.115372806333769e-06,
      "loss": 2.2688,
      "step": 51045
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0138776302337646,
      "learning_rate": 5.11501353525024e-06,
      "loss": 2.3234,
      "step": 51046
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.190409541130066,
      "learning_rate": 5.114654272448084e-06,
      "loss": 2.4344,
      "step": 51047
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1185526847839355,
      "learning_rate": 5.114295017927921e-06,
      "loss": 2.4291,
      "step": 51048
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1811459064483643,
      "learning_rate": 5.11393577169035e-06,
      "loss": 2.4532,
      "step": 51049
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0268396139144897,
      "learning_rate": 5.1135765337359886e-06,
      "loss": 2.5678,
      "step": 51050
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.9854137897491455,
      "learning_rate": 5.113217304065443e-06,
      "loss": 2.2013,
      "step": 51051
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0990082025527954,
      "learning_rate": 5.112858082679317e-06,
      "loss": 2.329,
      "step": 51052
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1107157468795776,
      "learning_rate": 5.1124988695782285e-06,
      "loss": 2.4772,
      "step": 51053
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0682306289672852,
      "learning_rate": 5.112139664762782e-06,
      "loss": 2.4363,
      "step": 51054
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1197031736373901,
      "learning_rate": 5.111780468233582e-06,
      "loss": 2.4675,
      "step": 51055
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0841103792190552,
      "learning_rate": 5.111421279991244e-06,
      "loss": 2.3627,
      "step": 51056
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0028388500213623,
      "learning_rate": 5.111062100036377e-06,
      "loss": 2.331,
      "step": 51057
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0845094919204712,
      "learning_rate": 5.110702928369583e-06,
      "loss": 2.3919,
      "step": 51058
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0583750009536743,
      "learning_rate": 5.110343764991479e-06,
      "loss": 2.3105,
      "step": 51059
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1135731935501099,
      "learning_rate": 5.109984609902666e-06,
      "loss": 2.139,
      "step": 51060
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1529451608657837,
      "learning_rate": 5.109625463103762e-06,
      "loss": 2.4112,
      "step": 51061
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0920060873031616,
      "learning_rate": 5.1092663245953665e-06,
      "loss": 2.3568,
      "step": 51062
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.9389047622680664,
      "learning_rate": 5.108907194378097e-06,
      "loss": 2.2865,
      "step": 51063
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1649342775344849,
      "learning_rate": 5.108548072452555e-06,
      "loss": 2.3356,
      "step": 51064
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0562996864318848,
      "learning_rate": 5.108188958819356e-06,
      "loss": 2.4314,
      "step": 51065
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0796281099319458,
      "learning_rate": 5.107829853479101e-06,
      "loss": 2.4243,
      "step": 51066
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0713624954223633,
      "learning_rate": 5.107470756432411e-06,
      "loss": 2.4369,
      "step": 51067
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1748427152633667,
      "learning_rate": 5.1071116676798785e-06,
      "loss": 2.3189,
      "step": 51068
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0607985258102417,
      "learning_rate": 5.106752587222126e-06,
      "loss": 2.3616,
      "step": 51069
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0952547788619995,
      "learning_rate": 5.106393515059751e-06,
      "loss": 2.2692,
      "step": 51070
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.3580454587936401,
      "learning_rate": 5.106034451193374e-06,
      "loss": 2.3275,
      "step": 51071
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.2653372287750244,
      "learning_rate": 5.105675395623592e-06,
      "loss": 2.3718,
      "step": 51072
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.077708125114441,
      "learning_rate": 5.105316348351024e-06,
      "loss": 2.3629,
      "step": 51073
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0211437940597534,
      "learning_rate": 5.104957309376271e-06,
      "loss": 2.5441,
      "step": 51074
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1782444715499878,
      "learning_rate": 5.104598278699948e-06,
      "loss": 2.2837,
      "step": 51075
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0777539014816284,
      "learning_rate": 5.104239256322657e-06,
      "loss": 2.4024,
      "step": 51076
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.9724181294441223,
      "learning_rate": 5.103880242245014e-06,
      "loss": 2.1729,
      "step": 51077
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1123342514038086,
      "learning_rate": 5.1035212364676205e-06,
      "loss": 2.5454,
      "step": 51078
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.159237027168274,
      "learning_rate": 5.1031622389910905e-06,
      "loss": 2.4631,
      "step": 51079
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1093940734863281,
      "learning_rate": 5.102803249816033e-06,
      "loss": 2.4043,
      "step": 51080
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1066967248916626,
      "learning_rate": 5.102444268943052e-06,
      "loss": 2.3537,
      "step": 51081
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0715705156326294,
      "learning_rate": 5.1020852963727545e-06,
      "loss": 2.1351,
      "step": 51082
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1109561920166016,
      "learning_rate": 5.101726332105756e-06,
      "loss": 2.3871,
      "step": 51083
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.9446277022361755,
      "learning_rate": 5.10136737614266e-06,
      "loss": 2.4108,
      "step": 51084
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.136447548866272,
      "learning_rate": 5.10100842848408e-06,
      "loss": 2.426,
      "step": 51085
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1493669748306274,
      "learning_rate": 5.10064948913062e-06,
      "loss": 2.1179,
      "step": 51086
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1743924617767334,
      "learning_rate": 5.100290558082886e-06,
      "loss": 2.5749,
      "step": 51087
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0337271690368652,
      "learning_rate": 5.0999316353414955e-06,
      "loss": 2.4056,
      "step": 51088
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1017911434173584,
      "learning_rate": 5.0995727209070465e-06,
      "loss": 2.4701,
      "step": 51089
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0363271236419678,
      "learning_rate": 5.099213814780157e-06,
      "loss": 2.1322,
      "step": 51090
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0447916984558105,
      "learning_rate": 5.098854916961426e-06,
      "loss": 2.4148,
      "step": 51091
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.123092532157898,
      "learning_rate": 5.098496027451474e-06,
      "loss": 2.156,
      "step": 51092
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0989983081817627,
      "learning_rate": 5.0981371462509e-06,
      "loss": 2.2998,
      "step": 51093
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0693436861038208,
      "learning_rate": 5.097778273360315e-06,
      "loss": 2.5132,
      "step": 51094
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.2274290323257446,
      "learning_rate": 5.097419408780324e-06,
      "loss": 2.3769,
      "step": 51095
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0469876527786255,
      "learning_rate": 5.0970605525115434e-06,
      "loss": 2.2117,
      "step": 51096
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0322015285491943,
      "learning_rate": 5.096701704554572e-06,
      "loss": 2.3144,
      "step": 51097
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0293183326721191,
      "learning_rate": 5.096342864910026e-06,
      "loss": 2.1955,
      "step": 51098
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.103783130645752,
      "learning_rate": 5.095984033578507e-06,
      "loss": 2.6257,
      "step": 51099
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0811973810195923,
      "learning_rate": 5.095625210560632e-06,
      "loss": 2.4082,
      "step": 51100
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1508703231811523,
      "learning_rate": 5.0952663958569995e-06,
      "loss": 2.543,
      "step": 51101
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0350325107574463,
      "learning_rate": 5.0949075894682276e-06,
      "loss": 2.0522,
      "step": 51102
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.026900053024292,
      "learning_rate": 5.094548791394914e-06,
      "loss": 2.258,
      "step": 51103
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.4266506433486938,
      "learning_rate": 5.094190001637678e-06,
      "loss": 2.2927,
      "step": 51104
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1026369333267212,
      "learning_rate": 5.093831220197116e-06,
      "loss": 2.1592,
      "step": 51105
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0012223720550537,
      "learning_rate": 5.093472447073853e-06,
      "loss": 2.3191,
      "step": 51106
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0882922410964966,
      "learning_rate": 5.093113682268478e-06,
      "loss": 2.3635,
      "step": 51107
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1547493934631348,
      "learning_rate": 5.092754925781611e-06,
      "loss": 2.4157,
      "step": 51108
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1029011011123657,
      "learning_rate": 5.092396177613854e-06,
      "loss": 2.3527,
      "step": 51109
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.114256501197815,
      "learning_rate": 5.0920374377658235e-06,
      "loss": 2.3009,
      "step": 51110
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0304579734802246,
      "learning_rate": 5.0916787062381166e-06,
      "loss": 2.2239,
      "step": 51111
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1369117498397827,
      "learning_rate": 5.091319983031353e-06,
      "loss": 2.58,
      "step": 51112
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0611040592193604,
      "learning_rate": 5.090961268146134e-06,
      "loss": 2.3787,
      "step": 51113
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0773340463638306,
      "learning_rate": 5.090602561583066e-06,
      "loss": 2.2257,
      "step": 51114
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.99309241771698,
      "learning_rate": 5.090243863342763e-06,
      "loss": 2.1882,
      "step": 51115
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.9819972515106201,
      "learning_rate": 5.089885173425826e-06,
      "loss": 2.5812,
      "step": 51116
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0860037803649902,
      "learning_rate": 5.089526491832872e-06,
      "loss": 2.3356,
      "step": 51117
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.2979079484939575,
      "learning_rate": 5.0891678185645e-06,
      "loss": 2.4342,
      "step": 51118
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0396437644958496,
      "learning_rate": 5.0888091536213266e-06,
      "loss": 2.3297,
      "step": 51119
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.045024037361145,
      "learning_rate": 5.0884504970039565e-06,
      "loss": 2.2251,
      "step": 51120
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1055909395217896,
      "learning_rate": 5.088091848712995e-06,
      "loss": 2.081,
      "step": 51121
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.096582055091858,
      "learning_rate": 5.087733208749048e-06,
      "loss": 2.2969,
      "step": 51122
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0793302059173584,
      "learning_rate": 5.087374577112732e-06,
      "loss": 2.4918,
      "step": 51123
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1205636262893677,
      "learning_rate": 5.087015953804646e-06,
      "loss": 2.2631,
      "step": 51124
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.104467511177063,
      "learning_rate": 5.086657338825407e-06,
      "loss": 2.1569,
      "step": 51125
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0884475708007812,
      "learning_rate": 5.086298732175613e-06,
      "loss": 2.3768,
      "step": 51126
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0601328611373901,
      "learning_rate": 5.085940133855882e-06,
      "loss": 2.2474,
      "step": 51127
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0883712768554688,
      "learning_rate": 5.085581543866813e-06,
      "loss": 2.2604,
      "step": 51128
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.9580998420715332,
      "learning_rate": 5.0852229622090224e-06,
      "loss": 2.3424,
      "step": 51129
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0948213338851929,
      "learning_rate": 5.084864388883108e-06,
      "loss": 2.1601,
      "step": 51130
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0378695726394653,
      "learning_rate": 5.084505823889689e-06,
      "loss": 2.032,
      "step": 51131
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.4340918064117432,
      "learning_rate": 5.0841472672293644e-06,
      "loss": 2.3326,
      "step": 51132
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.086470603942871,
      "learning_rate": 5.083788718902751e-06,
      "loss": 2.3408,
      "step": 51133
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.9451881051063538,
      "learning_rate": 5.083430178910445e-06,
      "loss": 2.3722,
      "step": 51134
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0802186727523804,
      "learning_rate": 5.083071647253064e-06,
      "loss": 2.2555,
      "step": 51135
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.049589991569519,
      "learning_rate": 5.082713123931207e-06,
      "loss": 2.2913,
      "step": 51136
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.097285270690918,
      "learning_rate": 5.08235460894549e-06,
      "loss": 2.1985,
      "step": 51137
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0314428806304932,
      "learning_rate": 5.081996102296516e-06,
      "loss": 2.2728,
      "step": 51138
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1452293395996094,
      "learning_rate": 5.081637603984897e-06,
      "loss": 2.2709,
      "step": 51139
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0942403078079224,
      "learning_rate": 5.0812791140112336e-06,
      "loss": 2.1549,
      "step": 51140
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.9529996514320374,
      "learning_rate": 5.080920632376143e-06,
      "loss": 2.4965,
      "step": 51141
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0952500104904175,
      "learning_rate": 5.0805621590802265e-06,
      "loss": 2.2979,
      "step": 51142
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0386829376220703,
      "learning_rate": 5.0802036941240905e-06,
      "loss": 2.3347,
      "step": 51143
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.183211326599121,
      "learning_rate": 5.07984523750835e-06,
      "loss": 2.1712,
      "step": 51144
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.091023325920105,
      "learning_rate": 5.079486789233603e-06,
      "loss": 2.2947,
      "step": 51145
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0797187089920044,
      "learning_rate": 5.079128349300466e-06,
      "loss": 2.4521,
      "step": 51146
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0763221979141235,
      "learning_rate": 5.078769917709543e-06,
      "loss": 2.1745,
      "step": 51147
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0724080801010132,
      "learning_rate": 5.078411494461442e-06,
      "loss": 2.3508,
      "step": 51148
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.175140380859375,
      "learning_rate": 5.078053079556766e-06,
      "loss": 2.2385,
      "step": 51149
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0801928043365479,
      "learning_rate": 5.077694672996132e-06,
      "loss": 2.2064,
      "step": 51150
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.038802981376648,
      "learning_rate": 5.077336274780138e-06,
      "loss": 2.3222,
      "step": 51151
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1061389446258545,
      "learning_rate": 5.076977884909399e-06,
      "loss": 2.2052,
      "step": 51152
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.783813714981079,
      "learning_rate": 5.076619503384516e-06,
      "loss": 2.3209,
      "step": 51153
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1572113037109375,
      "learning_rate": 5.076261130206104e-06,
      "loss": 2.107,
      "step": 51154
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0359221696853638,
      "learning_rate": 5.075902765374763e-06,
      "loss": 2.3979,
      "step": 51155
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.098845362663269,
      "learning_rate": 5.0755444088911076e-06,
      "loss": 2.3993,
      "step": 51156
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1098119020462036,
      "learning_rate": 5.075186060755739e-06,
      "loss": 2.271,
      "step": 51157
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0832716226577759,
      "learning_rate": 5.074827720969271e-06,
      "loss": 2.2967,
      "step": 51158
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0772781372070312,
      "learning_rate": 5.074469389532306e-06,
      "loss": 2.2595,
      "step": 51159
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.9669939279556274,
      "learning_rate": 5.074111066445455e-06,
      "loss": 2.2841,
      "step": 51160
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1231372356414795,
      "learning_rate": 5.07375275170932e-06,
      "loss": 2.3738,
      "step": 51161
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0259228944778442,
      "learning_rate": 5.073394445324516e-06,
      "loss": 2.3212,
      "step": 51162
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.070446491241455,
      "learning_rate": 5.073036147291641e-06,
      "loss": 2.3559,
      "step": 51163
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0758496522903442,
      "learning_rate": 5.072677857611312e-06,
      "loss": 2.2853,
      "step": 51164
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.9710341095924377,
      "learning_rate": 5.072319576284129e-06,
      "loss": 2.4661,
      "step": 51165
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0017117261886597,
      "learning_rate": 5.071961303310706e-06,
      "loss": 2.8188,
      "step": 51166
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0683815479278564,
      "learning_rate": 5.071603038691644e-06,
      "loss": 2.2878,
      "step": 51167
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.048539638519287,
      "learning_rate": 5.0712447824275575e-06,
      "loss": 2.4736,
      "step": 51168
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.15293288230896,
      "learning_rate": 5.070886534519049e-06,
      "loss": 2.3489,
      "step": 51169
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0231595039367676,
      "learning_rate": 5.070528294966723e-06,
      "loss": 2.5048,
      "step": 51170
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0550154447555542,
      "learning_rate": 5.0701700637711935e-06,
      "loss": 2.438,
      "step": 51171
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.9774651527404785,
      "learning_rate": 5.069811840933065e-06,
      "loss": 2.2906,
      "step": 51172
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1310077905654907,
      "learning_rate": 5.069453626452941e-06,
      "loss": 2.4282,
      "step": 51173
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1835869550704956,
      "learning_rate": 5.069095420331436e-06,
      "loss": 2.1333,
      "step": 51174
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0908594131469727,
      "learning_rate": 5.068737222569154e-06,
      "loss": 2.2758,
      "step": 51175
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.413226842880249,
      "learning_rate": 5.068379033166697e-06,
      "loss": 2.2675,
      "step": 51176
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1826958656311035,
      "learning_rate": 5.068020852124681e-06,
      "loss": 2.4819,
      "step": 51177
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.146824836730957,
      "learning_rate": 5.067662679443706e-06,
      "loss": 2.3751,
      "step": 51178
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.188010811805725,
      "learning_rate": 5.067304515124385e-06,
      "loss": 2.3524,
      "step": 51179
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.2431823015213013,
      "learning_rate": 5.066946359167319e-06,
      "loss": 2.2253,
      "step": 51180
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1538348197937012,
      "learning_rate": 5.066588211573122e-06,
      "loss": 2.4288,
      "step": 51181
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0670273303985596,
      "learning_rate": 5.066230072342395e-06,
      "loss": 2.5937,
      "step": 51182
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1212157011032104,
      "learning_rate": 5.065871941475753e-06,
      "loss": 2.319,
      "step": 51183
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0453113317489624,
      "learning_rate": 5.065513818973793e-06,
      "loss": 2.2365,
      "step": 51184
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1952872276306152,
      "learning_rate": 5.06515570483713e-06,
      "loss": 2.3208,
      "step": 51185
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1435511112213135,
      "learning_rate": 5.06479759906637e-06,
      "loss": 2.5719,
      "step": 51186
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0973817110061646,
      "learning_rate": 5.064439501662117e-06,
      "loss": 2.3747,
      "step": 51187
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0604257583618164,
      "learning_rate": 5.064081412624977e-06,
      "loss": 2.5485,
      "step": 51188
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0459040403366089,
      "learning_rate": 5.063723331955564e-06,
      "loss": 2.3621,
      "step": 51189
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0512524843215942,
      "learning_rate": 5.063365259654476e-06,
      "loss": 2.4366,
      "step": 51190
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0186054706573486,
      "learning_rate": 5.063007195722327e-06,
      "loss": 2.2845,
      "step": 51191
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.9322623014450073,
      "learning_rate": 5.06264914015972e-06,
      "loss": 2.392,
      "step": 51192
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0493232011795044,
      "learning_rate": 5.062291092967266e-06,
      "loss": 2.4511,
      "step": 51193
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0654749870300293,
      "learning_rate": 5.061933054145567e-06,
      "loss": 2.4537,
      "step": 51194
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0917272567749023,
      "learning_rate": 5.061575023695236e-06,
      "loss": 2.3533,
      "step": 51195
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.104433298110962,
      "learning_rate": 5.061217001616873e-06,
      "loss": 2.2189,
      "step": 51196
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.2415865659713745,
      "learning_rate": 5.060858987911093e-06,
      "loss": 2.3689,
      "step": 51197
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1144438982009888,
      "learning_rate": 5.060500982578497e-06,
      "loss": 2.5341,
      "step": 51198
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1411161422729492,
      "learning_rate": 5.060142985619694e-06,
      "loss": 2.2712,
      "step": 51199
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1554886102676392,
      "learning_rate": 5.0597849970352865e-06,
      "loss": 2.4616,
      "step": 51200
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.9918678402900696,
      "learning_rate": 5.059427016825888e-06,
      "loss": 2.1203,
      "step": 51201
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1113934516906738,
      "learning_rate": 5.0590690449920995e-06,
      "loss": 2.33,
      "step": 51202
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1379746198654175,
      "learning_rate": 5.058711081534536e-06,
      "loss": 2.3699,
      "step": 51203
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1895860433578491,
      "learning_rate": 5.058353126453798e-06,
      "loss": 2.2826,
      "step": 51204
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1159641742706299,
      "learning_rate": 5.05799517975049e-06,
      "loss": 2.534,
      "step": 51205
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.6559734344482422,
      "learning_rate": 5.057637241425226e-06,
      "loss": 2.3045,
      "step": 51206
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1055150032043457,
      "learning_rate": 5.057279311478605e-06,
      "loss": 2.5597,
      "step": 51207
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1491107940673828,
      "learning_rate": 5.0569213899112425e-06,
      "loss": 2.3088,
      "step": 51208
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.2885452508926392,
      "learning_rate": 5.056563476723737e-06,
      "loss": 2.3439,
      "step": 51209
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0327643156051636,
      "learning_rate": 5.056205571916702e-06,
      "loss": 2.5405,
      "step": 51210
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0555930137634277,
      "learning_rate": 5.055847675490738e-06,
      "loss": 2.3357,
      "step": 51211
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1332354545593262,
      "learning_rate": 5.055489787446464e-06,
      "loss": 2.5076,
      "step": 51212
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.9969822764396667,
      "learning_rate": 5.055131907784468e-06,
      "loss": 2.0858,
      "step": 51213
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.9850546717643738,
      "learning_rate": 5.054774036505371e-06,
      "loss": 2.1895,
      "step": 51214
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1175336837768555,
      "learning_rate": 5.054416173609771e-06,
      "loss": 2.2887,
      "step": 51215
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.272324562072754,
      "learning_rate": 5.054058319098283e-06,
      "loss": 2.1508,
      "step": 51216
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.9924213886260986,
      "learning_rate": 5.053700472971506e-06,
      "loss": 2.245,
      "step": 51217
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.3773545026779175,
      "learning_rate": 5.053342635230053e-06,
      "loss": 2.3518,
      "step": 51218
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0364580154418945,
      "learning_rate": 5.052984805874525e-06,
      "loss": 2.4164,
      "step": 51219
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0514392852783203,
      "learning_rate": 5.0526269849055335e-06,
      "loss": 2.3931,
      "step": 51220
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0064469575881958,
      "learning_rate": 5.05226917232368e-06,
      "loss": 2.3041,
      "step": 51221
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1687088012695312,
      "learning_rate": 5.051911368129579e-06,
      "loss": 2.3108,
      "step": 51222
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1168572902679443,
      "learning_rate": 5.051553572323826e-06,
      "loss": 2.4178,
      "step": 51223
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.12020742893219,
      "learning_rate": 5.051195784907039e-06,
      "loss": 2.3076,
      "step": 51224
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.9794755578041077,
      "learning_rate": 5.0508380058798204e-06,
      "loss": 2.2719,
      "step": 51225
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.2165331840515137,
      "learning_rate": 5.050480235242774e-06,
      "loss": 2.216,
      "step": 51226
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0404361486434937,
      "learning_rate": 5.050122472996504e-06,
      "loss": 2.3855,
      "step": 51227
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0722594261169434,
      "learning_rate": 5.0497647191416255e-06,
      "loss": 2.5115,
      "step": 51228
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1445351839065552,
      "learning_rate": 5.049406973678734e-06,
      "loss": 2.2716,
      "step": 51229
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1875451803207397,
      "learning_rate": 5.049049236608449e-06,
      "loss": 2.3707,
      "step": 51230
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0884779691696167,
      "learning_rate": 5.048691507931369e-06,
      "loss": 2.3594,
      "step": 51231
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1283239126205444,
      "learning_rate": 5.048333787648097e-06,
      "loss": 2.3258,
      "step": 51232
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0884373188018799,
      "learning_rate": 5.0479760757592485e-06,
      "loss": 2.1429,
      "step": 51233
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1540836095809937,
      "learning_rate": 5.047618372265424e-06,
      "loss": 2.2655,
      "step": 51234
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1025283336639404,
      "learning_rate": 5.047260677167232e-06,
      "loss": 2.4928,
      "step": 51235
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0519025325775146,
      "learning_rate": 5.046902990465277e-06,
      "loss": 2.4618,
      "step": 51236
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0383563041687012,
      "learning_rate": 5.046545312160168e-06,
      "loss": 2.3182,
      "step": 51237
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0419527292251587,
      "learning_rate": 5.0461876422525115e-06,
      "loss": 2.225,
      "step": 51238
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0844359397888184,
      "learning_rate": 5.045829980742911e-06,
      "loss": 2.3018,
      "step": 51239
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.131030797958374,
      "learning_rate": 5.0454723276319725e-06,
      "loss": 2.2599,
      "step": 51240
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0792337656021118,
      "learning_rate": 5.045114682920307e-06,
      "loss": 2.2129,
      "step": 51241
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.9853088855743408,
      "learning_rate": 5.044757046608514e-06,
      "loss": 2.3548,
      "step": 51242
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1425096988677979,
      "learning_rate": 5.044399418697207e-06,
      "loss": 2.2974,
      "step": 51243
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0542949438095093,
      "learning_rate": 5.044041799186985e-06,
      "loss": 2.3108,
      "step": 51244
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0454109907150269,
      "learning_rate": 5.043684188078463e-06,
      "loss": 2.3775,
      "step": 51245
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0609405040740967,
      "learning_rate": 5.043326585372238e-06,
      "loss": 2.3607,
      "step": 51246
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0603200197219849,
      "learning_rate": 5.042968991068924e-06,
      "loss": 2.3822,
      "step": 51247
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0805699825286865,
      "learning_rate": 5.04261140516912e-06,
      "loss": 2.3183,
      "step": 51248
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1831214427947998,
      "learning_rate": 5.0422538276734415e-06,
      "loss": 2.4892,
      "step": 51249
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0143815279006958,
      "learning_rate": 5.041896258582484e-06,
      "loss": 2.3397,
      "step": 51250
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.3535130023956299,
      "learning_rate": 5.0415386978968664e-06,
      "loss": 2.3213,
      "step": 51251
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1631137132644653,
      "learning_rate": 5.041181145617181e-06,
      "loss": 2.2975,
      "step": 51252
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1536880731582642,
      "learning_rate": 5.040823601744043e-06,
      "loss": 2.4284,
      "step": 51253
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1969891786575317,
      "learning_rate": 5.040466066278054e-06,
      "loss": 2.3941,
      "step": 51254
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1662241220474243,
      "learning_rate": 5.040108539219824e-06,
      "loss": 2.3583,
      "step": 51255
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1527680158615112,
      "learning_rate": 5.039751020569953e-06,
      "loss": 2.5999,
      "step": 51256
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0776821374893188,
      "learning_rate": 5.039393510329056e-06,
      "loss": 2.531,
      "step": 51257
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1772363185882568,
      "learning_rate": 5.03903600849773e-06,
      "loss": 2.2429,
      "step": 51258
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1239104270935059,
      "learning_rate": 5.038678515076589e-06,
      "loss": 2.139,
      "step": 51259
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0721917152404785,
      "learning_rate": 5.038321030066235e-06,
      "loss": 2.448,
      "step": 51260
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.3251144886016846,
      "learning_rate": 5.037963553467272e-06,
      "loss": 2.4077,
      "step": 51261
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0338501930236816,
      "learning_rate": 5.037606085280311e-06,
      "loss": 2.3166,
      "step": 51262
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.3815821409225464,
      "learning_rate": 5.037248625505953e-06,
      "loss": 2.2801,
      "step": 51263
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1458617448806763,
      "learning_rate": 5.036891174144809e-06,
      "loss": 2.433,
      "step": 51264
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1445307731628418,
      "learning_rate": 5.036533731197484e-06,
      "loss": 2.5299,
      "step": 51265
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1079416275024414,
      "learning_rate": 5.036176296664581e-06,
      "loss": 2.3533,
      "step": 51266
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.131926417350769,
      "learning_rate": 5.035818870546704e-06,
      "loss": 2.1993,
      "step": 51267
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1375608444213867,
      "learning_rate": 5.035461452844465e-06,
      "loss": 2.3836,
      "step": 51268
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.086122989654541,
      "learning_rate": 5.035104043558466e-06,
      "loss": 2.5291,
      "step": 51269
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.9559184312820435,
      "learning_rate": 5.034746642689314e-06,
      "loss": 2.2401,
      "step": 51270
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0073363780975342,
      "learning_rate": 5.034389250237615e-06,
      "loss": 2.2174,
      "step": 51271
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.9623116850852966,
      "learning_rate": 5.034031866203976e-06,
      "loss": 2.1957,
      "step": 51272
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0866668224334717,
      "learning_rate": 5.033674490589e-06,
      "loss": 2.1895,
      "step": 51273
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.2267251014709473,
      "learning_rate": 5.033317123393296e-06,
      "loss": 2.4021,
      "step": 51274
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.075756549835205,
      "learning_rate": 5.032959764617467e-06,
      "loss": 2.2883,
      "step": 51275
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1655157804489136,
      "learning_rate": 5.032602414262122e-06,
      "loss": 2.4954,
      "step": 51276
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0657833814620972,
      "learning_rate": 5.032245072327863e-06,
      "loss": 2.3569,
      "step": 51277
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.165228009223938,
      "learning_rate": 5.031887738815304e-06,
      "loss": 2.4274,
      "step": 51278
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0776212215423584,
      "learning_rate": 5.031530413725038e-06,
      "loss": 2.4812,
      "step": 51279
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0855984687805176,
      "learning_rate": 5.031173097057681e-06,
      "loss": 2.4057,
      "step": 51280
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.093697428703308,
      "learning_rate": 5.030815788813831e-06,
      "loss": 2.4019,
      "step": 51281
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.9522297382354736,
      "learning_rate": 5.030458488994101e-06,
      "loss": 2.4776,
      "step": 51282
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1112054586410522,
      "learning_rate": 5.030101197599091e-06,
      "loss": 2.581,
      "step": 51283
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.001135230064392,
      "learning_rate": 5.0297439146294126e-06,
      "loss": 2.2328,
      "step": 51284
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.604974627494812,
      "learning_rate": 5.029386640085665e-06,
      "loss": 2.2438,
      "step": 51285
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0569887161254883,
      "learning_rate": 5.0290293739684595e-06,
      "loss": 2.4601,
      "step": 51286
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0398991107940674,
      "learning_rate": 5.0286721162784e-06,
      "loss": 2.306,
      "step": 51287
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0402647256851196,
      "learning_rate": 5.028314867016089e-06,
      "loss": 2.1413,
      "step": 51288
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.255228042602539,
      "learning_rate": 5.027957626182136e-06,
      "loss": 2.1741,
      "step": 51289
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1005741357803345,
      "learning_rate": 5.027600393777143e-06,
      "loss": 2.3213,
      "step": 51290
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.067217469215393,
      "learning_rate": 5.027243169801721e-06,
      "loss": 2.3019,
      "step": 51291
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0843544006347656,
      "learning_rate": 5.026885954256473e-06,
      "loss": 2.3968,
      "step": 51292
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.030566930770874,
      "learning_rate": 5.026528747142004e-06,
      "loss": 2.487,
      "step": 51293
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.2339448928833008,
      "learning_rate": 5.026171548458914e-06,
      "loss": 2.5296,
      "step": 51294
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.2320982217788696,
      "learning_rate": 5.025814358207819e-06,
      "loss": 2.3027,
      "step": 51295
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1083345413208008,
      "learning_rate": 5.025457176389317e-06,
      "loss": 2.2114,
      "step": 51296
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0262010097503662,
      "learning_rate": 5.025100003004018e-06,
      "loss": 2.126,
      "step": 51297
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0010908842086792,
      "learning_rate": 5.024742838052523e-06,
      "loss": 2.2021,
      "step": 51298
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.9978140592575073,
      "learning_rate": 5.024385681535444e-06,
      "loss": 2.2489,
      "step": 51299
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.2832599878311157,
      "learning_rate": 5.0240285334533775e-06,
      "loss": 2.3588,
      "step": 51300
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0955551862716675,
      "learning_rate": 5.023671393806939e-06,
      "loss": 2.42,
      "step": 51301
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0673933029174805,
      "learning_rate": 5.0233142625967255e-06,
      "loss": 2.3555,
      "step": 51302
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0346980094909668,
      "learning_rate": 5.02295713982335e-06,
      "loss": 2.2058,
      "step": 51303
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.9516595005989075,
      "learning_rate": 5.0226000254874145e-06,
      "loss": 2.1896,
      "step": 51304
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0423290729522705,
      "learning_rate": 5.022242919589522e-06,
      "loss": 2.4683,
      "step": 51305
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1118048429489136,
      "learning_rate": 5.021885822130277e-06,
      "loss": 2.2337,
      "step": 51306
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.2457505464553833,
      "learning_rate": 5.0215287331102905e-06,
      "loss": 2.2998,
      "step": 51307
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.9710497856140137,
      "learning_rate": 5.021171652530161e-06,
      "loss": 2.2127,
      "step": 51308
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0944888591766357,
      "learning_rate": 5.020814580390503e-06,
      "loss": 2.3466,
      "step": 51309
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.2722898721694946,
      "learning_rate": 5.020457516691911e-06,
      "loss": 2.5382,
      "step": 51310
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0845168828964233,
      "learning_rate": 5.020100461435e-06,
      "loss": 2.3559,
      "step": 51311
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1060582399368286,
      "learning_rate": 5.019743414620368e-06,
      "loss": 2.4823,
      "step": 51312
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.2253272533416748,
      "learning_rate": 5.019386376248628e-06,
      "loss": 2.3167,
      "step": 51313
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.291617751121521,
      "learning_rate": 5.019029346320377e-06,
      "loss": 2.3778,
      "step": 51314
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.123940110206604,
      "learning_rate": 5.018672324836226e-06,
      "loss": 2.2653,
      "step": 51315
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.133022665977478,
      "learning_rate": 5.018315311796781e-06,
      "loss": 2.374,
      "step": 51316
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1908332109451294,
      "learning_rate": 5.017958307202644e-06,
      "loss": 2.2013,
      "step": 51317
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0940877199172974,
      "learning_rate": 5.017601311054415e-06,
      "loss": 2.2801,
      "step": 51318
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0710759162902832,
      "learning_rate": 5.01724432335271e-06,
      "loss": 2.3993,
      "step": 51319
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0488073825836182,
      "learning_rate": 5.016887344098126e-06,
      "loss": 2.3592,
      "step": 51320
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.2100411653518677,
      "learning_rate": 5.016530373291275e-06,
      "loss": 2.4771,
      "step": 51321
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.315813422203064,
      "learning_rate": 5.016173410932757e-06,
      "loss": 2.4044,
      "step": 51322
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.289380669593811,
      "learning_rate": 5.015816457023176e-06,
      "loss": 2.4718,
      "step": 51323
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1262006759643555,
      "learning_rate": 5.015459511563143e-06,
      "loss": 2.1957,
      "step": 51324
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0590368509292603,
      "learning_rate": 5.015102574553256e-06,
      "loss": 2.4367,
      "step": 51325
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0680818557739258,
      "learning_rate": 5.01474564599413e-06,
      "loss": 2.2754,
      "step": 51326
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.2072410583496094,
      "learning_rate": 5.0143887258863584e-06,
      "loss": 2.4686,
      "step": 51327
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.111627459526062,
      "learning_rate": 5.014031814230556e-06,
      "loss": 2.3172,
      "step": 51328
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1805094480514526,
      "learning_rate": 5.0136749110273195e-06,
      "loss": 2.439,
      "step": 51329
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1840723752975464,
      "learning_rate": 5.013318016277267e-06,
      "loss": 2.3711,
      "step": 51330
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0739798545837402,
      "learning_rate": 5.012961129980987e-06,
      "loss": 2.2933,
      "step": 51331
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0328267812728882,
      "learning_rate": 5.012604252139095e-06,
      "loss": 2.3881,
      "step": 51332
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.215879201889038,
      "learning_rate": 5.012247382752191e-06,
      "loss": 2.4301,
      "step": 51333
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.02290678024292,
      "learning_rate": 5.011890521820886e-06,
      "loss": 2.39,
      "step": 51334
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1006933450698853,
      "learning_rate": 5.011533669345776e-06,
      "loss": 2.2827,
      "step": 51335
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.06062650680542,
      "learning_rate": 5.011176825327477e-06,
      "loss": 2.1106,
      "step": 51336
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.053043246269226,
      "learning_rate": 5.010819989766584e-06,
      "loss": 2.4274,
      "step": 51337
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0598002672195435,
      "learning_rate": 5.010463162663709e-06,
      "loss": 2.3727,
      "step": 51338
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.292121410369873,
      "learning_rate": 5.010106344019451e-06,
      "loss": 2.2755,
      "step": 51339
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0097283124923706,
      "learning_rate": 5.009749533834421e-06,
      "loss": 2.32,
      "step": 51340
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1364030838012695,
      "learning_rate": 5.009392732109218e-06,
      "loss": 2.3727,
      "step": 51341
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0846153497695923,
      "learning_rate": 5.009035938844453e-06,
      "loss": 2.4295,
      "step": 51342
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0009733438491821,
      "learning_rate": 5.008679154040728e-06,
      "loss": 2.0689,
      "step": 51343
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.3384572267532349,
      "learning_rate": 5.008322377698648e-06,
      "loss": 2.448,
      "step": 51344
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1017400026321411,
      "learning_rate": 5.007965609818813e-06,
      "loss": 2.0851,
      "step": 51345
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.3066277503967285,
      "learning_rate": 5.007608850401835e-06,
      "loss": 2.2296,
      "step": 51346
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.05929696559906,
      "learning_rate": 5.007252099448313e-06,
      "loss": 2.1751,
      "step": 51347
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.992534339427948,
      "learning_rate": 5.006895356958858e-06,
      "loss": 2.3623,
      "step": 51348
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0189133882522583,
      "learning_rate": 5.0065386229340715e-06,
      "loss": 2.613,
      "step": 51349
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1577060222625732,
      "learning_rate": 5.006181897374556e-06,
      "loss": 2.3153,
      "step": 51350
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.9882256984710693,
      "learning_rate": 5.005825180280921e-06,
      "loss": 2.4278,
      "step": 51351
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0665841102600098,
      "learning_rate": 5.005468471653765e-06,
      "loss": 2.292,
      "step": 51352
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.9590708017349243,
      "learning_rate": 5.0051117714937e-06,
      "loss": 2.1607,
      "step": 51353
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0561714172363281,
      "learning_rate": 5.004755079801323e-06,
      "loss": 2.3521,
      "step": 51354
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0795665979385376,
      "learning_rate": 5.004398396577248e-06,
      "loss": 2.346,
      "step": 51355
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1356538534164429,
      "learning_rate": 5.004041721822069e-06,
      "loss": 2.3365,
      "step": 51356
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0044949054718018,
      "learning_rate": 5.0036850555364045e-06,
      "loss": 2.3323,
      "step": 51357
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.157548427581787,
      "learning_rate": 5.003328397720843e-06,
      "loss": 2.184,
      "step": 51358
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0900019407272339,
      "learning_rate": 5.002971748376002e-06,
      "loss": 2.42,
      "step": 51359
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1078397035598755,
      "learning_rate": 5.002615107502475e-06,
      "loss": 2.3099,
      "step": 51360
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0982894897460938,
      "learning_rate": 5.002258475100879e-06,
      "loss": 2.4864,
      "step": 51361
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.4276024103164673,
      "learning_rate": 5.001901851171807e-06,
      "loss": 2.2488,
      "step": 51362
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0072005987167358,
      "learning_rate": 5.0015452357158735e-06,
      "loss": 2.2981,
      "step": 51363
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.2104166746139526,
      "learning_rate": 5.001188628733673e-06,
      "loss": 2.157,
      "step": 51364
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0446791648864746,
      "learning_rate": 5.000832030225821e-06,
      "loss": 2.2549,
      "step": 51365
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0925647020339966,
      "learning_rate": 5.000475440192911e-06,
      "loss": 2.4437,
      "step": 51366
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0508222579956055,
      "learning_rate": 5.000118858635559e-06,
      "loss": 2.2427,
      "step": 51367
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.2763468027114868,
      "learning_rate": 4.999762285554358e-06,
      "loss": 2.1948,
      "step": 51368
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.095374345779419,
      "learning_rate": 4.9994057209499216e-06,
      "loss": 2.3475,
      "step": 51369
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1607608795166016,
      "learning_rate": 4.999049164822851e-06,
      "loss": 2.333,
      "step": 51370
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1874889135360718,
      "learning_rate": 4.99869261717375e-06,
      "loss": 2.31,
      "step": 51371
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1601330041885376,
      "learning_rate": 4.998336078003221e-06,
      "loss": 2.5595,
      "step": 51372
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.2347222566604614,
      "learning_rate": 4.997979547311873e-06,
      "loss": 2.363,
      "step": 51373
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0028024911880493,
      "learning_rate": 4.997623025100306e-06,
      "loss": 2.2606,
      "step": 51374
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.157938003540039,
      "learning_rate": 4.997266511369129e-06,
      "loss": 2.1487,
      "step": 51375
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.100426197052002,
      "learning_rate": 4.99691000611894e-06,
      "loss": 2.2051,
      "step": 51376
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.002286672592163,
      "learning_rate": 4.996553509350353e-06,
      "loss": 2.1797,
      "step": 51377
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.031951665878296,
      "learning_rate": 4.996197021063964e-06,
      "loss": 2.5017,
      "step": 51378
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.3869017362594604,
      "learning_rate": 4.995840541260378e-06,
      "loss": 2.1979,
      "step": 51379
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0354905128479004,
      "learning_rate": 4.995484069940205e-06,
      "loss": 2.3828,
      "step": 51380
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1621509790420532,
      "learning_rate": 4.9951276071040425e-06,
      "loss": 2.4418,
      "step": 51381
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.3523597717285156,
      "learning_rate": 4.994771152752501e-06,
      "loss": 2.3419,
      "step": 51382
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1301708221435547,
      "learning_rate": 4.994414706886182e-06,
      "loss": 2.4262,
      "step": 51383
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1209138631820679,
      "learning_rate": 4.994058269505689e-06,
      "loss": 2.2217,
      "step": 51384
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.082763433456421,
      "learning_rate": 4.993701840611623e-06,
      "loss": 2.4997,
      "step": 51385
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.2139071226119995,
      "learning_rate": 4.9933454202045976e-06,
      "loss": 2.1916,
      "step": 51386
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1095374822616577,
      "learning_rate": 4.992989008285206e-06,
      "loss": 2.4039,
      "step": 51387
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1126036643981934,
      "learning_rate": 4.992632604854063e-06,
      "loss": 2.1481,
      "step": 51388
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1027863025665283,
      "learning_rate": 4.992276209911762e-06,
      "loss": 2.2843,
      "step": 51389
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1831399202346802,
      "learning_rate": 4.991919823458918e-06,
      "loss": 2.5083,
      "step": 51390
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.13834547996521,
      "learning_rate": 4.991563445496127e-06,
      "loss": 2.6529,
      "step": 51391
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1727268695831299,
      "learning_rate": 4.9912070760239986e-06,
      "loss": 2.4943,
      "step": 51392
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0737035274505615,
      "learning_rate": 4.990850715043132e-06,
      "loss": 2.3281,
      "step": 51393
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.036076545715332,
      "learning_rate": 4.990494362554137e-06,
      "loss": 2.3346,
      "step": 51394
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0575302839279175,
      "learning_rate": 4.99013801855761e-06,
      "loss": 2.0872,
      "step": 51395
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.133042812347412,
      "learning_rate": 4.989781683054169e-06,
      "loss": 2.2568,
      "step": 51396
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0561810731887817,
      "learning_rate": 4.9894253560444e-06,
      "loss": 2.2972,
      "step": 51397
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0925239324569702,
      "learning_rate": 4.98906903752892e-06,
      "loss": 2.3076,
      "step": 51398
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.2895137071609497,
      "learning_rate": 4.988712727508325e-06,
      "loss": 2.2397,
      "step": 51399
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.107927918434143,
      "learning_rate": 4.9883564259832276e-06,
      "loss": 2.2434,
      "step": 51400
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.030552625656128,
      "learning_rate": 4.988000132954223e-06,
      "loss": 2.4923,
      "step": 51401
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.058213710784912,
      "learning_rate": 4.9876438484219225e-06,
      "loss": 2.3803,
      "step": 51402
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.160222053527832,
      "learning_rate": 4.987287572386923e-06,
      "loss": 2.2255,
      "step": 51403
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0048828125,
      "learning_rate": 4.986931304849838e-06,
      "loss": 2.1319,
      "step": 51404
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.087612271308899,
      "learning_rate": 4.986575045811265e-06,
      "loss": 2.3603,
      "step": 51405
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0189673900604248,
      "learning_rate": 4.986218795271805e-06,
      "loss": 2.3339,
      "step": 51406
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.9784427881240845,
      "learning_rate": 4.985862553232069e-06,
      "loss": 2.4052,
      "step": 51407
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0830612182617188,
      "learning_rate": 4.985506319692654e-06,
      "loss": 2.4072,
      "step": 51408
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0945268869400024,
      "learning_rate": 4.9851500946541734e-06,
      "loss": 2.4371,
      "step": 51409
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0466852188110352,
      "learning_rate": 4.984793878117224e-06,
      "loss": 2.3551,
      "step": 51410
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1113324165344238,
      "learning_rate": 4.984437670082412e-06,
      "loss": 2.3424,
      "step": 51411
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0659525394439697,
      "learning_rate": 4.984081470550335e-06,
      "loss": 2.5129,
      "step": 51412
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.9557170867919922,
      "learning_rate": 4.983725279521607e-06,
      "loss": 2.2159,
      "step": 51413
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.9964559078216553,
      "learning_rate": 4.983369096996823e-06,
      "loss": 2.4373,
      "step": 51414
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.3902627229690552,
      "learning_rate": 4.983012922976596e-06,
      "loss": 2.3389,
      "step": 51415
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.028693437576294,
      "learning_rate": 4.982656757461519e-06,
      "loss": 2.3647,
      "step": 51416
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1277666091918945,
      "learning_rate": 4.982300600452206e-06,
      "loss": 2.3033,
      "step": 51417
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1716172695159912,
      "learning_rate": 4.981944451949252e-06,
      "loss": 2.1849,
      "step": 51418
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1458560228347778,
      "learning_rate": 4.981588311953271e-06,
      "loss": 2.3679,
      "step": 51419
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.028560757637024,
      "learning_rate": 4.981232180464854e-06,
      "loss": 2.1572,
      "step": 51420
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.444628119468689,
      "learning_rate": 4.980876057484617e-06,
      "loss": 2.3972,
      "step": 51421
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.9293978214263916,
      "learning_rate": 4.980519943013154e-06,
      "loss": 2.2404,
      "step": 51422
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0347846746444702,
      "learning_rate": 4.9801638370510805e-06,
      "loss": 2.126,
      "step": 51423
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1903375387191772,
      "learning_rate": 4.979807739598985e-06,
      "loss": 2.5768,
      "step": 51424
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1169427633285522,
      "learning_rate": 4.979451650657483e-06,
      "loss": 2.3342,
      "step": 51425
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.8319483995437622,
      "learning_rate": 4.97909557022717e-06,
      "loss": 2.3573,
      "step": 51426
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.5503677129745483,
      "learning_rate": 4.978739498308658e-06,
      "loss": 2.4701,
      "step": 51427
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.06902277469635,
      "learning_rate": 4.978383434902542e-06,
      "loss": 2.3029,
      "step": 51428
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1179330348968506,
      "learning_rate": 4.978027380009434e-06,
      "loss": 2.2809,
      "step": 51429
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0358110666275024,
      "learning_rate": 4.97767133362993e-06,
      "loss": 2.4078,
      "step": 51430
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0170217752456665,
      "learning_rate": 4.977315295764639e-06,
      "loss": 2.4345,
      "step": 51431
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1263768672943115,
      "learning_rate": 4.976959266414162e-06,
      "loss": 2.2373,
      "step": 51432
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0719374418258667,
      "learning_rate": 4.976603245579104e-06,
      "loss": 2.3449,
      "step": 51433
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0319136381149292,
      "learning_rate": 4.976247233260071e-06,
      "loss": 2.2633,
      "step": 51434
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1549525260925293,
      "learning_rate": 4.9758912294576565e-06,
      "loss": 2.3836,
      "step": 51435
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.2634074687957764,
      "learning_rate": 4.975535234172477e-06,
      "loss": 2.5208,
      "step": 51436
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.2319374084472656,
      "learning_rate": 4.975179247405128e-06,
      "loss": 2.2886,
      "step": 51437
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0596319437026978,
      "learning_rate": 4.974823269156213e-06,
      "loss": 2.401,
      "step": 51438
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0381922721862793,
      "learning_rate": 4.97446729942634e-06,
      "loss": 2.2591,
      "step": 51439
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1545971632003784,
      "learning_rate": 4.97411133821611e-06,
      "loss": 2.1665,
      "step": 51440
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0304059982299805,
      "learning_rate": 4.9737553855261235e-06,
      "loss": 2.4015,
      "step": 51441
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0623005628585815,
      "learning_rate": 4.9733994413569885e-06,
      "loss": 2.551,
      "step": 51442
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0302108526229858,
      "learning_rate": 4.973043505709304e-06,
      "loss": 2.2939,
      "step": 51443
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0622739791870117,
      "learning_rate": 4.9726875785836805e-06,
      "loss": 2.3709,
      "step": 51444
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0855506658554077,
      "learning_rate": 4.972331659980713e-06,
      "loss": 2.3094,
      "step": 51445
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1436779499053955,
      "learning_rate": 4.971975749901011e-06,
      "loss": 2.3157,
      "step": 51446
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.9958188533782959,
      "learning_rate": 4.9716198483451735e-06,
      "loss": 2.5816,
      "step": 51447
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0793582201004028,
      "learning_rate": 4.9712639553138096e-06,
      "loss": 2.3347,
      "step": 51448
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1283900737762451,
      "learning_rate": 4.970908070807519e-06,
      "loss": 2.3326,
      "step": 51449
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.107019305229187,
      "learning_rate": 4.970552194826905e-06,
      "loss": 2.1785,
      "step": 51450
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.2534339427947998,
      "learning_rate": 4.970196327372568e-06,
      "loss": 2.2423,
      "step": 51451
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1766854524612427,
      "learning_rate": 4.969840468445116e-06,
      "loss": 2.3466,
      "step": 51452
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.2732365131378174,
      "learning_rate": 4.969484618045148e-06,
      "loss": 2.3175,
      "step": 51453
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.2202202081680298,
      "learning_rate": 4.969128776173274e-06,
      "loss": 2.4851,
      "step": 51454
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1061054468154907,
      "learning_rate": 4.968772942830089e-06,
      "loss": 2.2838,
      "step": 51455
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1170592308044434,
      "learning_rate": 4.968417118016205e-06,
      "loss": 2.4077,
      "step": 51456
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1189161539077759,
      "learning_rate": 4.968061301732215e-06,
      "loss": 2.2968,
      "step": 51457
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0968495607376099,
      "learning_rate": 4.967705493978732e-06,
      "loss": 2.0987,
      "step": 51458
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.4150151014328003,
      "learning_rate": 4.967349694756351e-06,
      "loss": 2.3841,
      "step": 51459
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.024195909500122,
      "learning_rate": 4.966993904065685e-06,
      "loss": 2.4323,
      "step": 51460
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1542627811431885,
      "learning_rate": 4.966638121907329e-06,
      "loss": 2.2514,
      "step": 51461
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1346004009246826,
      "learning_rate": 4.966282348281889e-06,
      "loss": 2.2382,
      "step": 51462
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.127977728843689,
      "learning_rate": 4.965926583189964e-06,
      "loss": 2.3025,
      "step": 51463
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0509562492370605,
      "learning_rate": 4.965570826632165e-06,
      "loss": 2.1881,
      "step": 51464
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.069719910621643,
      "learning_rate": 4.9652150786090855e-06,
      "loss": 2.0792,
      "step": 51465
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0651112794876099,
      "learning_rate": 4.96485933912134e-06,
      "loss": 2.0967,
      "step": 51466
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.136999487876892,
      "learning_rate": 4.964503608169523e-06,
      "loss": 2.2853,
      "step": 51467
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0542625188827515,
      "learning_rate": 4.964147885754238e-06,
      "loss": 2.2811,
      "step": 51468
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1331120729446411,
      "learning_rate": 4.963792171876093e-06,
      "loss": 2.3224,
      "step": 51469
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0860154628753662,
      "learning_rate": 4.963436466535684e-06,
      "loss": 2.3738,
      "step": 51470
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1461951732635498,
      "learning_rate": 4.963080769733623e-06,
      "loss": 2.4612,
      "step": 51471
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1120612621307373,
      "learning_rate": 4.962725081470503e-06,
      "loss": 2.3239,
      "step": 51472
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1091448068618774,
      "learning_rate": 4.962369401746936e-06,
      "loss": 2.1784,
      "step": 51473
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.083614468574524,
      "learning_rate": 4.962013730563517e-06,
      "loss": 2.3275,
      "step": 51474
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0395095348358154,
      "learning_rate": 4.961658067920863e-06,
      "loss": 2.2847,
      "step": 51475
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.9597957134246826,
      "learning_rate": 4.9613024138195565e-06,
      "loss": 2.2366,
      "step": 51476
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.9631038904190063,
      "learning_rate": 4.960946768260217e-06,
      "loss": 2.3561,
      "step": 51477
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0618382692337036,
      "learning_rate": 4.960591131243436e-06,
      "loss": 2.4898,
      "step": 51478
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1651474237442017,
      "learning_rate": 4.960235502769826e-06,
      "loss": 2.2782,
      "step": 51479
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0494436025619507,
      "learning_rate": 4.959879882839982e-06,
      "loss": 2.2756,
      "step": 51480
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.2656742334365845,
      "learning_rate": 4.959524271454513e-06,
      "loss": 2.3957,
      "step": 51481
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.9931043386459351,
      "learning_rate": 4.959168668614017e-06,
      "loss": 2.4192,
      "step": 51482
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1295628547668457,
      "learning_rate": 4.958813074319103e-06,
      "loss": 2.0377,
      "step": 51483
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0205752849578857,
      "learning_rate": 4.958457488570365e-06,
      "loss": 2.2916,
      "step": 51484
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1081931591033936,
      "learning_rate": 4.958101911368416e-06,
      "loss": 2.0984,
      "step": 51485
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1228569746017456,
      "learning_rate": 4.9577463427138495e-06,
      "loss": 2.3668,
      "step": 51486
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.9889394044876099,
      "learning_rate": 4.957390782607275e-06,
      "loss": 2.303,
      "step": 51487
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0713495016098022,
      "learning_rate": 4.957035231049289e-06,
      "loss": 2.3602,
      "step": 51488
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1833031177520752,
      "learning_rate": 4.956679688040508e-06,
      "loss": 2.1273,
      "step": 51489
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0363478660583496,
      "learning_rate": 4.9563241535815145e-06,
      "loss": 2.3672,
      "step": 51490
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0628745555877686,
      "learning_rate": 4.9559686276729265e-06,
      "loss": 2.5025,
      "step": 51491
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0036466121673584,
      "learning_rate": 4.955613110315337e-06,
      "loss": 2.2194,
      "step": 51492
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0962095260620117,
      "learning_rate": 4.955257601509358e-06,
      "loss": 2.4033,
      "step": 51493
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.2172796726226807,
      "learning_rate": 4.954902101255583e-06,
      "loss": 2.4116,
      "step": 51494
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.9442577362060547,
      "learning_rate": 4.954546609554623e-06,
      "loss": 2.2086,
      "step": 51495
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0528461933135986,
      "learning_rate": 4.954191126407077e-06,
      "loss": 2.2303,
      "step": 51496
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.9941734671592712,
      "learning_rate": 4.953835651813544e-06,
      "loss": 2.4236,
      "step": 51497
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.9939619898796082,
      "learning_rate": 4.953480185774634e-06,
      "loss": 2.2573,
      "step": 51498
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1868247985839844,
      "learning_rate": 4.953124728290941e-06,
      "loss": 2.2196,
      "step": 51499
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.2359611988067627,
      "learning_rate": 4.952769279363077e-06,
      "loss": 2.457,
      "step": 51500
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1308261156082153,
      "learning_rate": 4.952413838991636e-06,
      "loss": 2.3968,
      "step": 51501
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0625756978988647,
      "learning_rate": 4.952058407177231e-06,
      "loss": 2.4089,
      "step": 51502
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.168768286705017,
      "learning_rate": 4.951702983920451e-06,
      "loss": 2.2551,
      "step": 51503
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.087575912475586,
      "learning_rate": 4.9513475692219095e-06,
      "loss": 2.448,
      "step": 51504
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1495062112808228,
      "learning_rate": 4.950992163082201e-06,
      "loss": 2.1058,
      "step": 51505
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0513689517974854,
      "learning_rate": 4.950636765501936e-06,
      "loss": 2.4346,
      "step": 51506
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0520437955856323,
      "learning_rate": 4.950281376481709e-06,
      "loss": 2.2551,
      "step": 51507
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0570787191390991,
      "learning_rate": 4.949925996022131e-06,
      "loss": 2.3249,
      "step": 51508
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1991896629333496,
      "learning_rate": 4.949570624123797e-06,
      "loss": 2.2421,
      "step": 51509
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.126158356666565,
      "learning_rate": 4.9492152607873144e-06,
      "loss": 2.3478,
      "step": 51510
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1030209064483643,
      "learning_rate": 4.94885990601328e-06,
      "loss": 2.3745,
      "step": 51511
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.084994912147522,
      "learning_rate": 4.948504559802305e-06,
      "loss": 2.2011,
      "step": 51512
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.9903175830841064,
      "learning_rate": 4.948149222154982e-06,
      "loss": 2.175,
      "step": 51513
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1292189359664917,
      "learning_rate": 4.9477938930719225e-06,
      "loss": 2.383,
      "step": 51514
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.960615873336792,
      "learning_rate": 4.947438572553724e-06,
      "loss": 2.3068,
      "step": 51515
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0704680681228638,
      "learning_rate": 4.94708326060099e-06,
      "loss": 2.1567,
      "step": 51516
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.176439642906189,
      "learning_rate": 4.946727957214318e-06,
      "loss": 2.301,
      "step": 51517
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0093467235565186,
      "learning_rate": 4.946372662394318e-06,
      "loss": 2.3702,
      "step": 51518
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1553210020065308,
      "learning_rate": 4.946017376141587e-06,
      "loss": 2.4247,
      "step": 51519
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1494371891021729,
      "learning_rate": 4.945662098456731e-06,
      "loss": 2.2754,
      "step": 51520
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.09413743019104,
      "learning_rate": 4.945306829340347e-06,
      "loss": 2.4762,
      "step": 51521
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.4013111591339111,
      "learning_rate": 4.944951568793045e-06,
      "loss": 2.3534,
      "step": 51522
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.2199640274047852,
      "learning_rate": 4.944596316815423e-06,
      "loss": 2.3003,
      "step": 51523
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.2885794639587402,
      "learning_rate": 4.9442410734080796e-06,
      "loss": 2.4903,
      "step": 51524
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0708661079406738,
      "learning_rate": 4.9438858385716235e-06,
      "loss": 2.3319,
      "step": 51525
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1590957641601562,
      "learning_rate": 4.94353061230665e-06,
      "loss": 2.4211,
      "step": 51526
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0858619213104248,
      "learning_rate": 4.943175394613772e-06,
      "loss": 2.3251,
      "step": 51527
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1443876028060913,
      "learning_rate": 4.942820185493583e-06,
      "loss": 2.5041,
      "step": 51528
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1439990997314453,
      "learning_rate": 4.942464984946688e-06,
      "loss": 2.35,
      "step": 51529
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1237460374832153,
      "learning_rate": 4.9421097929736835e-06,
      "loss": 2.4166,
      "step": 51530
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1498626470565796,
      "learning_rate": 4.941754609575181e-06,
      "loss": 2.2339,
      "step": 51531
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1914345026016235,
      "learning_rate": 4.941399434751776e-06,
      "loss": 2.3385,
      "step": 51532
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1334798336029053,
      "learning_rate": 4.941044268504075e-06,
      "loss": 2.27,
      "step": 51533
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1546618938446045,
      "learning_rate": 4.940689110832674e-06,
      "loss": 2.459,
      "step": 51534
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1318556070327759,
      "learning_rate": 4.940333961738183e-06,
      "loss": 1.9399,
      "step": 51535
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0591285228729248,
      "learning_rate": 4.939978821221197e-06,
      "loss": 2.2777,
      "step": 51536
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.096078872680664,
      "learning_rate": 4.9396236892823245e-06,
      "loss": 2.4806,
      "step": 51537
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1156936883926392,
      "learning_rate": 4.939268565922161e-06,
      "loss": 2.3033,
      "step": 51538
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0776311159133911,
      "learning_rate": 4.938913451141315e-06,
      "loss": 2.4489,
      "step": 51539
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.2003800868988037,
      "learning_rate": 4.938558344940382e-06,
      "loss": 2.2755,
      "step": 51540
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1055026054382324,
      "learning_rate": 4.9382032473199735e-06,
      "loss": 2.3028,
      "step": 51541
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.073184847831726,
      "learning_rate": 4.93784815828068e-06,
      "loss": 2.229,
      "step": 51542
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1115174293518066,
      "learning_rate": 4.93749307782311e-06,
      "loss": 2.3411,
      "step": 51543
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0807507038116455,
      "learning_rate": 4.9371380059478615e-06,
      "loss": 2.4851,
      "step": 51544
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0935264825820923,
      "learning_rate": 4.9367829426555434e-06,
      "loss": 2.3234,
      "step": 51545
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.9817225337028503,
      "learning_rate": 4.936427887946749e-06,
      "loss": 2.2989,
      "step": 51546
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1616023778915405,
      "learning_rate": 4.936072841822088e-06,
      "loss": 2.135,
      "step": 51547
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.9825506210327148,
      "learning_rate": 4.935717804282157e-06,
      "loss": 2.4805,
      "step": 51548
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0361844301223755,
      "learning_rate": 4.935362775327562e-06,
      "loss": 2.4245,
      "step": 51549
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.2820175886154175,
      "learning_rate": 4.935007754958898e-06,
      "loss": 2.3818,
      "step": 51550
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0585697889328003,
      "learning_rate": 4.934652743176776e-06,
      "loss": 2.318,
      "step": 51551
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0823092460632324,
      "learning_rate": 4.934297739981794e-06,
      "loss": 2.2512,
      "step": 51552
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1020402908325195,
      "learning_rate": 4.933942745374549e-06,
      "loss": 2.6627,
      "step": 51553
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1213544607162476,
      "learning_rate": 4.933587759355651e-06,
      "loss": 2.3055,
      "step": 51554
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0835585594177246,
      "learning_rate": 4.933232781925698e-06,
      "loss": 2.4181,
      "step": 51555
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1750551462173462,
      "learning_rate": 4.932877813085286e-06,
      "loss": 2.59,
      "step": 51556
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0188924074172974,
      "learning_rate": 4.932522852835028e-06,
      "loss": 2.3312,
      "step": 51557
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0706627368927002,
      "learning_rate": 4.93216790117552e-06,
      "loss": 2.085,
      "step": 51558
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0224390029907227,
      "learning_rate": 4.9318129581073595e-06,
      "loss": 2.3071,
      "step": 51559
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1150875091552734,
      "learning_rate": 4.931458023631156e-06,
      "loss": 2.4895,
      "step": 51560
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.9514086246490479,
      "learning_rate": 4.931103097747504e-06,
      "loss": 2.2573,
      "step": 51561
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.2721525430679321,
      "learning_rate": 4.930748180457013e-06,
      "loss": 2.1992,
      "step": 51562
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0977839231491089,
      "learning_rate": 4.9303932717602765e-06,
      "loss": 2.5496,
      "step": 51563
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.9643242359161377,
      "learning_rate": 4.930038371657905e-06,
      "loss": 2.2702,
      "step": 51564
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.022769570350647,
      "learning_rate": 4.929683480150491e-06,
      "loss": 2.4772,
      "step": 51565
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0579010248184204,
      "learning_rate": 4.929328597238644e-06,
      "loss": 2.2586,
      "step": 51566
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0058887004852295,
      "learning_rate": 4.928973722922959e-06,
      "loss": 2.2516,
      "step": 51567
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0706313848495483,
      "learning_rate": 4.928618857204048e-06,
      "loss": 2.2855,
      "step": 51568
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1350014209747314,
      "learning_rate": 4.928264000082499e-06,
      "loss": 2.4263,
      "step": 51569
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.085504174232483,
      "learning_rate": 4.927909151558922e-06,
      "loss": 2.3433,
      "step": 51570
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1111615896224976,
      "learning_rate": 4.927554311633914e-06,
      "loss": 2.3153,
      "step": 51571
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0628756284713745,
      "learning_rate": 4.927199480308083e-06,
      "loss": 2.2503,
      "step": 51572
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0849664211273193,
      "learning_rate": 4.926844657582021e-06,
      "loss": 2.4776,
      "step": 51573
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1073888540267944,
      "learning_rate": 4.926489843456341e-06,
      "loss": 2.346,
      "step": 51574
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1383819580078125,
      "learning_rate": 4.926135037931634e-06,
      "loss": 2.2447,
      "step": 51575
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1987324953079224,
      "learning_rate": 4.9257802410085094e-06,
      "loss": 2.2813,
      "step": 51576
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.9789242148399353,
      "learning_rate": 4.925425452687564e-06,
      "loss": 2.4545,
      "step": 51577
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.2510299682617188,
      "learning_rate": 4.925070672969402e-06,
      "loss": 2.3452,
      "step": 51578
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1916769742965698,
      "learning_rate": 4.9247159018546245e-06,
      "loss": 2.1441,
      "step": 51579
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0953176021575928,
      "learning_rate": 4.924361139343827e-06,
      "loss": 1.995,
      "step": 51580
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.9745550155639648,
      "learning_rate": 4.924006385437622e-06,
      "loss": 2.3919,
      "step": 51581
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.018847107887268,
      "learning_rate": 4.9236516401366034e-06,
      "loss": 2.3579,
      "step": 51582
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.011685848236084,
      "learning_rate": 4.9232969034413705e-06,
      "loss": 2.1267,
      "step": 51583
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0833442211151123,
      "learning_rate": 4.922942175352531e-06,
      "loss": 2.1768,
      "step": 51584
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1408607959747314,
      "learning_rate": 4.922587455870685e-06,
      "loss": 2.3479,
      "step": 51585
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.162858247756958,
      "learning_rate": 4.922232744996427e-06,
      "loss": 2.5205,
      "step": 51586
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0489519834518433,
      "learning_rate": 4.921878042730368e-06,
      "loss": 2.3324,
      "step": 51587
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1288796663284302,
      "learning_rate": 4.921523349073101e-06,
      "loss": 2.505,
      "step": 51588
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0441991090774536,
      "learning_rate": 4.921168664025235e-06,
      "loss": 2.3968,
      "step": 51589
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0975353717803955,
      "learning_rate": 4.9208139875873635e-06,
      "loss": 2.2232,
      "step": 51590
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1515679359436035,
      "learning_rate": 4.920459319760097e-06,
      "loss": 2.3045,
      "step": 51591
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1410346031188965,
      "learning_rate": 4.9201046605440265e-06,
      "loss": 2.2666,
      "step": 51592
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.004416823387146,
      "learning_rate": 4.919750009939762e-06,
      "loss": 2.1951,
      "step": 51593
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.026984453201294,
      "learning_rate": 4.9193953679479015e-06,
      "loss": 2.4102,
      "step": 51594
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1772164106369019,
      "learning_rate": 4.9190407345690465e-06,
      "loss": 2.3413,
      "step": 51595
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.088879108428955,
      "learning_rate": 4.918686109803792e-06,
      "loss": 2.3952,
      "step": 51596
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.044144630432129,
      "learning_rate": 4.91833149365275e-06,
      "loss": 2.3246,
      "step": 51597
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0743874311447144,
      "learning_rate": 4.91797688611651e-06,
      "loss": 2.2066,
      "step": 51598
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.041495442390442,
      "learning_rate": 4.917622287195684e-06,
      "loss": 2.294,
      "step": 51599
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.9831804633140564,
      "learning_rate": 4.917267696890867e-06,
      "loss": 2.3909,
      "step": 51600
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.021642804145813,
      "learning_rate": 4.916913115202664e-06,
      "loss": 2.5475,
      "step": 51601
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0478419065475464,
      "learning_rate": 4.91655854213167e-06,
      "loss": 2.3498,
      "step": 51602
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1484439373016357,
      "learning_rate": 4.9162039776784945e-06,
      "loss": 2.3481,
      "step": 51603
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.405367136001587,
      "learning_rate": 4.9158494218437296e-06,
      "loss": 2.3622,
      "step": 51604
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0995421409606934,
      "learning_rate": 4.915494874627984e-06,
      "loss": 2.212,
      "step": 51605
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.5180586576461792,
      "learning_rate": 4.915140336031851e-06,
      "loss": 2.314,
      "step": 51606
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1204588413238525,
      "learning_rate": 4.914785806055946e-06,
      "loss": 2.3978,
      "step": 51607
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1354316473007202,
      "learning_rate": 4.9144312847008515e-06,
      "loss": 2.1957,
      "step": 51608
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1300969123840332,
      "learning_rate": 4.914076771967181e-06,
      "loss": 2.113,
      "step": 51609
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.6377084255218506,
      "learning_rate": 4.9137222678555275e-06,
      "loss": 2.329,
      "step": 51610
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.180977463722229,
      "learning_rate": 4.913367772366501e-06,
      "loss": 2.2902,
      "step": 51611
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0875029563903809,
      "learning_rate": 4.913013285500693e-06,
      "loss": 2.1824,
      "step": 51612
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.2111992835998535,
      "learning_rate": 4.912658807258713e-06,
      "loss": 2.6115,
      "step": 51613
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.2529593706130981,
      "learning_rate": 4.9123043376411585e-06,
      "loss": 2.4548,
      "step": 51614
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1887868642807007,
      "learning_rate": 4.911949876648625e-06,
      "loss": 2.6457,
      "step": 51615
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1368744373321533,
      "learning_rate": 4.911595424281723e-06,
      "loss": 2.269,
      "step": 51616
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0867109298706055,
      "learning_rate": 4.911240980541045e-06,
      "loss": 2.5197,
      "step": 51617
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.3021796941757202,
      "learning_rate": 4.910886545427199e-06,
      "loss": 2.2612,
      "step": 51618
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0225510597229004,
      "learning_rate": 4.91053211894078e-06,
      "loss": 2.1369,
      "step": 51619
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0322121381759644,
      "learning_rate": 4.910177701082397e-06,
      "loss": 2.2374,
      "step": 51620
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.052183747291565,
      "learning_rate": 4.909823291852639e-06,
      "loss": 2.1581,
      "step": 51621
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.9919775128364563,
      "learning_rate": 4.909468891252116e-06,
      "loss": 2.2287,
      "step": 51622
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0855647325515747,
      "learning_rate": 4.909114499281422e-06,
      "loss": 2.0725,
      "step": 51623
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0895572900772095,
      "learning_rate": 4.908760115941166e-06,
      "loss": 2.2155,
      "step": 51624
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1719409227371216,
      "learning_rate": 4.908405741231941e-06,
      "loss": 2.4724,
      "step": 51625
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.095190405845642,
      "learning_rate": 4.908051375154353e-06,
      "loss": 2.2491,
      "step": 51626
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0228173732757568,
      "learning_rate": 4.907697017708999e-06,
      "loss": 2.4396,
      "step": 51627
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.17363703250885,
      "learning_rate": 4.907342668896484e-06,
      "loss": 2.5077,
      "step": 51628
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0479800701141357,
      "learning_rate": 4.906988328717404e-06,
      "loss": 2.4074,
      "step": 51629
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.9698100686073303,
      "learning_rate": 4.906633997172365e-06,
      "loss": 2.4376,
      "step": 51630
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1058659553527832,
      "learning_rate": 4.9062796742619616e-06,
      "loss": 2.2452,
      "step": 51631
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.035530686378479,
      "learning_rate": 4.905925359986799e-06,
      "loss": 2.2269,
      "step": 51632
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0433765649795532,
      "learning_rate": 4.905571054347475e-06,
      "loss": 2.5684,
      "step": 51633
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0996991395950317,
      "learning_rate": 4.905216757344598e-06,
      "loss": 2.6424,
      "step": 51634
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0633399486541748,
      "learning_rate": 4.904862468978756e-06,
      "loss": 2.1437,
      "step": 51635
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0059187412261963,
      "learning_rate": 4.904508189250559e-06,
      "loss": 2.1731,
      "step": 51636
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0373878479003906,
      "learning_rate": 4.904153918160601e-06,
      "loss": 2.2472,
      "step": 51637
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0884543657302856,
      "learning_rate": 4.9037996557094905e-06,
      "loss": 2.3787,
      "step": 51638
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0540953874588013,
      "learning_rate": 4.903445401897819e-06,
      "loss": 2.451,
      "step": 51639
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0574572086334229,
      "learning_rate": 4.9030911567261954e-06,
      "loss": 2.1624,
      "step": 51640
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0627362728118896,
      "learning_rate": 4.902736920195217e-06,
      "loss": 2.0577,
      "step": 51641
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1337497234344482,
      "learning_rate": 4.902382692305479e-06,
      "loss": 2.3265,
      "step": 51642
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.064606785774231,
      "learning_rate": 4.902028473057593e-06,
      "loss": 2.3023,
      "step": 51643
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.186999797821045,
      "learning_rate": 4.901674262452147e-06,
      "loss": 2.6406,
      "step": 51644
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.2476558685302734,
      "learning_rate": 4.901320060489754e-06,
      "loss": 2.4287,
      "step": 51645
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.9989878535270691,
      "learning_rate": 4.9009658671710025e-06,
      "loss": 2.1982,
      "step": 51646
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.000919222831726,
      "learning_rate": 4.900611682496508e-06,
      "loss": 2.1179,
      "step": 51647
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0620654821395874,
      "learning_rate": 4.9002575064668525e-06,
      "loss": 2.3164,
      "step": 51648
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1772518157958984,
      "learning_rate": 4.89990333908265e-06,
      "loss": 2.2166,
      "step": 51649
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0317809581756592,
      "learning_rate": 4.899549180344493e-06,
      "loss": 2.1826,
      "step": 51650
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1396700143814087,
      "learning_rate": 4.899195030252989e-06,
      "loss": 2.3763,
      "step": 51651
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0167385339736938,
      "learning_rate": 4.8988408888087315e-06,
      "loss": 2.0813,
      "step": 51652
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0433158874511719,
      "learning_rate": 4.898486756012327e-06,
      "loss": 2.4248,
      "step": 51653
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.052966833114624,
      "learning_rate": 4.898132631864369e-06,
      "loss": 2.3266,
      "step": 51654
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.229242205619812,
      "learning_rate": 4.897778516365467e-06,
      "loss": 2.2236,
      "step": 51655
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0803334712982178,
      "learning_rate": 4.897424409516211e-06,
      "loss": 2.414,
      "step": 51656
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0309321880340576,
      "learning_rate": 4.897070311317211e-06,
      "loss": 2.4497,
      "step": 51657
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.9559579491615295,
      "learning_rate": 4.896716221769059e-06,
      "loss": 2.1387,
      "step": 51658
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0307527780532837,
      "learning_rate": 4.896362140872362e-06,
      "loss": 2.2637,
      "step": 51659
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.100921630859375,
      "learning_rate": 4.896008068627719e-06,
      "loss": 2.5725,
      "step": 51660
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0476586818695068,
      "learning_rate": 4.895654005035727e-06,
      "loss": 2.4459,
      "step": 51661
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1187114715576172,
      "learning_rate": 4.895299950096985e-06,
      "loss": 2.2525,
      "step": 51662
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.9761129021644592,
      "learning_rate": 4.894945903812099e-06,
      "loss": 2.4185,
      "step": 51663
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.220415711402893,
      "learning_rate": 4.894591866181663e-06,
      "loss": 2.3808,
      "step": 51664
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0870585441589355,
      "learning_rate": 4.8942378372062835e-06,
      "loss": 2.2678,
      "step": 51665
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.2451903820037842,
      "learning_rate": 4.893883816886554e-06,
      "loss": 2.3093,
      "step": 51666
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0639044046401978,
      "learning_rate": 4.8935298052230825e-06,
      "loss": 2.2144,
      "step": 51667
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.122588038444519,
      "learning_rate": 4.893175802216461e-06,
      "loss": 2.4297,
      "step": 51668
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.9689295887947083,
      "learning_rate": 4.892821807867297e-06,
      "loss": 2.3111,
      "step": 51669
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0193955898284912,
      "learning_rate": 4.892467822176188e-06,
      "loss": 2.1357,
      "step": 51670
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.9793574213981628,
      "learning_rate": 4.892113845143727e-06,
      "loss": 2.2452,
      "step": 51671
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.996738612651825,
      "learning_rate": 4.8917598767705264e-06,
      "loss": 2.1644,
      "step": 51672
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1494226455688477,
      "learning_rate": 4.89140591705718e-06,
      "loss": 2.5431,
      "step": 51673
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.01907217502594,
      "learning_rate": 4.891051966004284e-06,
      "loss": 2.2166,
      "step": 51674
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.2398613691329956,
      "learning_rate": 4.890698023612446e-06,
      "loss": 2.4075,
      "step": 51675
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.262485384941101,
      "learning_rate": 4.890344089882263e-06,
      "loss": 2.4243,
      "step": 51676
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.2906488180160522,
      "learning_rate": 4.88999016481433e-06,
      "loss": 2.3292,
      "step": 51677
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.2318403720855713,
      "learning_rate": 4.889636248409255e-06,
      "loss": 2.3956,
      "step": 51678
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0741742849349976,
      "learning_rate": 4.889282340667631e-06,
      "loss": 2.1323,
      "step": 51679
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.2857204675674438,
      "learning_rate": 4.888928441590066e-06,
      "loss": 2.3678,
      "step": 51680
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.2130436897277832,
      "learning_rate": 4.8885745511771525e-06,
      "loss": 2.5032,
      "step": 51681
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1196894645690918,
      "learning_rate": 4.888220669429495e-06,
      "loss": 2.2434,
      "step": 51682
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1031219959259033,
      "learning_rate": 4.88786679634769e-06,
      "loss": 2.367,
      "step": 51683
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0674493312835693,
      "learning_rate": 4.887512931932341e-06,
      "loss": 2.3894,
      "step": 51684
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0969738960266113,
      "learning_rate": 4.887159076184044e-06,
      "loss": 2.3763,
      "step": 51685
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1837962865829468,
      "learning_rate": 4.886805229103407e-06,
      "loss": 2.185,
      "step": 51686
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.250546932220459,
      "learning_rate": 4.8864513906910186e-06,
      "loss": 2.3134,
      "step": 51687
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0159837007522583,
      "learning_rate": 4.886097560947486e-06,
      "loss": 2.3177,
      "step": 51688
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0511835813522339,
      "learning_rate": 4.885743739873403e-06,
      "loss": 2.4595,
      "step": 51689
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.007980227470398,
      "learning_rate": 4.8853899274693785e-06,
      "loss": 2.4087,
      "step": 51690
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.12978994846344,
      "learning_rate": 4.885036123736002e-06,
      "loss": 2.1413,
      "step": 51691
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.9781410098075867,
      "learning_rate": 4.884682328673883e-06,
      "loss": 2.1215,
      "step": 51692
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.086997389793396,
      "learning_rate": 4.884328542283613e-06,
      "loss": 2.4271,
      "step": 51693
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.04832923412323,
      "learning_rate": 4.883974764565799e-06,
      "loss": 2.2877,
      "step": 51694
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1732574701309204,
      "learning_rate": 4.883620995521033e-06,
      "loss": 2.3164,
      "step": 51695
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0308743715286255,
      "learning_rate": 4.883267235149923e-06,
      "loss": 2.4977,
      "step": 51696
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.093341588973999,
      "learning_rate": 4.882913483453065e-06,
      "loss": 2.4219,
      "step": 51697
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.183485746383667,
      "learning_rate": 4.882559740431056e-06,
      "loss": 2.2195,
      "step": 51698
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.187725305557251,
      "learning_rate": 4.8822060060845004e-06,
      "loss": 2.2204,
      "step": 51699
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.2622393369674683,
      "learning_rate": 4.881852280413995e-06,
      "loss": 2.3755,
      "step": 51700
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1643825769424438,
      "learning_rate": 4.881498563420137e-06,
      "loss": 2.3675,
      "step": 51701
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.060874342918396,
      "learning_rate": 4.881144855103533e-06,
      "loss": 2.2777,
      "step": 51702
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0718071460723877,
      "learning_rate": 4.880791155464779e-06,
      "loss": 2.3001,
      "step": 51703
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1624637842178345,
      "learning_rate": 4.88043746450447e-06,
      "loss": 2.3641,
      "step": 51704
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0446799993515015,
      "learning_rate": 4.880083782223214e-06,
      "loss": 2.5209,
      "step": 51705
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0485715866088867,
      "learning_rate": 4.879730108621603e-06,
      "loss": 2.2847,
      "step": 51706
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0769667625427246,
      "learning_rate": 4.8793764437002434e-06,
      "loss": 2.5772,
      "step": 51707
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1963461637496948,
      "learning_rate": 4.879022787459729e-06,
      "loss": 2.3064,
      "step": 51708
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1095294952392578,
      "learning_rate": 4.878669139900663e-06,
      "loss": 2.2908,
      "step": 51709
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.9617618918418884,
      "learning_rate": 4.878315501023641e-06,
      "loss": 2.3937,
      "step": 51710
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0931291580200195,
      "learning_rate": 4.877961870829271e-06,
      "loss": 2.3243,
      "step": 51711
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.2461603879928589,
      "learning_rate": 4.877608249318141e-06,
      "loss": 2.5113,
      "step": 51712
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1058461666107178,
      "learning_rate": 4.877254636490865e-06,
      "loss": 2.2103,
      "step": 51713
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1513381004333496,
      "learning_rate": 4.876901032348025e-06,
      "loss": 2.4304,
      "step": 51714
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0447933673858643,
      "learning_rate": 4.876547436890233e-06,
      "loss": 2.0081,
      "step": 51715
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.001562476158142,
      "learning_rate": 4.876193850118082e-06,
      "loss": 2.3169,
      "step": 51716
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.080053687095642,
      "learning_rate": 4.875840272032177e-06,
      "loss": 2.1388,
      "step": 51717
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.080353856086731,
      "learning_rate": 4.87548670263311e-06,
      "loss": 2.3121,
      "step": 51718
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1183924674987793,
      "learning_rate": 4.87513314192149e-06,
      "loss": 2.1708,
      "step": 51719
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.162797451019287,
      "learning_rate": 4.874779589897906e-06,
      "loss": 2.5824,
      "step": 51720
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0230497121810913,
      "learning_rate": 4.874426046562967e-06,
      "loss": 2.1979,
      "step": 51721
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.9916908144950867,
      "learning_rate": 4.874072511917265e-06,
      "loss": 2.2055,
      "step": 51722
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0689187049865723,
      "learning_rate": 4.873718985961405e-06,
      "loss": 2.4758,
      "step": 51723
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0868738889694214,
      "learning_rate": 4.873365468695979e-06,
      "loss": 2.1623,
      "step": 51724
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0399444103240967,
      "learning_rate": 4.873011960121597e-06,
      "loss": 2.4803,
      "step": 51725
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1171351671218872,
      "learning_rate": 4.87265846023885e-06,
      "loss": 2.2483,
      "step": 51726
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.130473256111145,
      "learning_rate": 4.8723049690483415e-06,
      "loss": 2.4562,
      "step": 51727
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.166847586631775,
      "learning_rate": 4.871951486550663e-06,
      "loss": 2.477,
      "step": 51728
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.2944436073303223,
      "learning_rate": 4.871598012746425e-06,
      "loss": 2.2373,
      "step": 51729
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1709336042404175,
      "learning_rate": 4.871244547636217e-06,
      "loss": 2.521,
      "step": 51730
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1040595769882202,
      "learning_rate": 4.8708910912206455e-06,
      "loss": 2.23,
      "step": 51731
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0860333442687988,
      "learning_rate": 4.870537643500307e-06,
      "loss": 2.3514,
      "step": 51732
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0796617269515991,
      "learning_rate": 4.8701842044757965e-06,
      "loss": 2.2662,
      "step": 51733
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0883046388626099,
      "learning_rate": 4.869830774147721e-06,
      "loss": 2.2671,
      "step": 51734
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0759474039077759,
      "learning_rate": 4.869477352516672e-06,
      "loss": 2.1286,
      "step": 51735
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.09687077999115,
      "learning_rate": 4.869123939583256e-06,
      "loss": 2.1856,
      "step": 51736
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.118149757385254,
      "learning_rate": 4.868770535348064e-06,
      "loss": 2.3369,
      "step": 51737
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1166014671325684,
      "learning_rate": 4.868417139811704e-06,
      "loss": 2.2653,
      "step": 51738
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0904881954193115,
      "learning_rate": 4.868063752974771e-06,
      "loss": 2.416,
      "step": 51739
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1290342807769775,
      "learning_rate": 4.867710374837863e-06,
      "loss": 2.1221,
      "step": 51740
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1590298414230347,
      "learning_rate": 4.867357005401577e-06,
      "loss": 2.2346,
      "step": 51741
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1122541427612305,
      "learning_rate": 4.867003644666517e-06,
      "loss": 2.2385,
      "step": 51742
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.214293122291565,
      "learning_rate": 4.866650292633278e-06,
      "loss": 2.5747,
      "step": 51743
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1393437385559082,
      "learning_rate": 4.866296949302464e-06,
      "loss": 2.277,
      "step": 51744
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.2546359300613403,
      "learning_rate": 4.865943614674666e-06,
      "loss": 2.1613,
      "step": 51745
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.122026801109314,
      "learning_rate": 4.865590288750493e-06,
      "loss": 2.3524,
      "step": 51746
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.9941620826721191,
      "learning_rate": 4.865236971530536e-06,
      "loss": 2.4357,
      "step": 51747
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.114899754524231,
      "learning_rate": 4.864883663015399e-06,
      "loss": 2.3892,
      "step": 51748
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1038457155227661,
      "learning_rate": 4.8645303632056764e-06,
      "loss": 2.3171,
      "step": 51749
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.013238787651062,
      "learning_rate": 4.864177072101972e-06,
      "loss": 2.2926,
      "step": 51750
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0988818407058716,
      "learning_rate": 4.86382378970488e-06,
      "loss": 2.4634,
      "step": 51751
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1846868991851807,
      "learning_rate": 4.863470516015009e-06,
      "loss": 2.2699,
      "step": 51752
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1801238059997559,
      "learning_rate": 4.863117251032942e-06,
      "loss": 2.1771,
      "step": 51753
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1600335836410522,
      "learning_rate": 4.862763994759291e-06,
      "loss": 2.2966,
      "step": 51754
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1247128248214722,
      "learning_rate": 4.862410747194646e-06,
      "loss": 2.3543,
      "step": 51755
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0267722606658936,
      "learning_rate": 4.862057508339614e-06,
      "loss": 2.209,
      "step": 51756
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0375232696533203,
      "learning_rate": 4.861704278194786e-06,
      "loss": 2.1533,
      "step": 51757
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0346546173095703,
      "learning_rate": 4.8613510567607695e-06,
      "loss": 2.5738,
      "step": 51758
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.034395456314087,
      "learning_rate": 4.860997844038158e-06,
      "loss": 2.5024,
      "step": 51759
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.9956345558166504,
      "learning_rate": 4.860644640027546e-06,
      "loss": 2.3613,
      "step": 51760
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1090376377105713,
      "learning_rate": 4.8602914447295424e-06,
      "loss": 2.374,
      "step": 51761
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0796259641647339,
      "learning_rate": 4.859938258144736e-06,
      "loss": 2.3529,
      "step": 51762
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.029076099395752,
      "learning_rate": 4.859585080273736e-06,
      "loss": 2.2342,
      "step": 51763
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.2173819541931152,
      "learning_rate": 4.85923191111713e-06,
      "loss": 2.3808,
      "step": 51764
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.032630443572998,
      "learning_rate": 4.858878750675526e-06,
      "loss": 2.3127,
      "step": 51765
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0526031255722046,
      "learning_rate": 4.85852559894952e-06,
      "loss": 2.2912,
      "step": 51766
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0393569469451904,
      "learning_rate": 4.858172455939707e-06,
      "loss": 2.2496,
      "step": 51767
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0120272636413574,
      "learning_rate": 4.857819321646686e-06,
      "loss": 2.3597,
      "step": 51768
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1295804977416992,
      "learning_rate": 4.857466196071062e-06,
      "loss": 2.1712,
      "step": 51769
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.2928515672683716,
      "learning_rate": 4.8571130792134245e-06,
      "loss": 2.6126,
      "step": 51770
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0453227758407593,
      "learning_rate": 4.856759971074383e-06,
      "loss": 2.2732,
      "step": 51771
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0747183561325073,
      "learning_rate": 4.856406871654524e-06,
      "loss": 2.3266,
      "step": 51772
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.121113657951355,
      "learning_rate": 4.8560537809544575e-06,
      "loss": 2.4864,
      "step": 51773
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.189233660697937,
      "learning_rate": 4.855700698974774e-06,
      "loss": 2.3026,
      "step": 51774
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.284449815750122,
      "learning_rate": 4.855347625716077e-06,
      "loss": 2.3317,
      "step": 51775
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0801736116409302,
      "learning_rate": 4.85499456117896e-06,
      "loss": 2.2163,
      "step": 51776
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0989844799041748,
      "learning_rate": 4.854641505364029e-06,
      "loss": 2.3387,
      "step": 51777
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.9924110174179077,
      "learning_rate": 4.854288458271872e-06,
      "loss": 2.3677,
      "step": 51778
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0216560363769531,
      "learning_rate": 4.853935419903103e-06,
      "loss": 2.3749,
      "step": 51779
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0610352754592896,
      "learning_rate": 4.853582390258304e-06,
      "loss": 2.6832,
      "step": 51780
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0061880350112915,
      "learning_rate": 4.853229369338083e-06,
      "loss": 2.3526,
      "step": 51781
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0619914531707764,
      "learning_rate": 4.852876357143034e-06,
      "loss": 2.4957,
      "step": 51782
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0041903257369995,
      "learning_rate": 4.85252335367376e-06,
      "loss": 2.2861,
      "step": 51783
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0811207294464111,
      "learning_rate": 4.852170358930853e-06,
      "loss": 2.4246,
      "step": 51784
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0994453430175781,
      "learning_rate": 4.85181737291492e-06,
      "loss": 2.2707,
      "step": 51785
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1821563243865967,
      "learning_rate": 4.851464395626551e-06,
      "loss": 2.5259,
      "step": 51786
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0178906917572021,
      "learning_rate": 4.851111427066351e-06,
      "loss": 2.3871,
      "step": 51787
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0883349180221558,
      "learning_rate": 4.8507584672349165e-06,
      "loss": 2.3617,
      "step": 51788
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1529768705368042,
      "learning_rate": 4.8504055161328405e-06,
      "loss": 2.2444,
      "step": 51789
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1369004249572754,
      "learning_rate": 4.85005257376073e-06,
      "loss": 2.4043,
      "step": 51790
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0895116329193115,
      "learning_rate": 4.849699640119174e-06,
      "loss": 2.4453,
      "step": 51791
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0320817232131958,
      "learning_rate": 4.849346715208781e-06,
      "loss": 2.3728,
      "step": 51792
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.9915345907211304,
      "learning_rate": 4.848993799030144e-06,
      "loss": 2.2626,
      "step": 51793
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0776478052139282,
      "learning_rate": 4.848640891583862e-06,
      "loss": 2.575,
      "step": 51794
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0683155059814453,
      "learning_rate": 4.848287992870528e-06,
      "loss": 2.19,
      "step": 51795
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0884010791778564,
      "learning_rate": 4.8479351028907494e-06,
      "loss": 2.3806,
      "step": 51796
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1709699630737305,
      "learning_rate": 4.847582221645116e-06,
      "loss": 2.3082,
      "step": 51797
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.9524481296539307,
      "learning_rate": 4.847229349134235e-06,
      "loss": 2.3609,
      "step": 51798
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0362738370895386,
      "learning_rate": 4.846876485358695e-06,
      "loss": 2.3624,
      "step": 51799
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0953819751739502,
      "learning_rate": 4.846523630319102e-06,
      "loss": 2.2374,
      "step": 51800
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.016383171081543,
      "learning_rate": 4.846170784016049e-06,
      "loss": 2.1494,
      "step": 51801
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0728247165679932,
      "learning_rate": 4.845817946450139e-06,
      "loss": 2.3127,
      "step": 51802
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.426888346672058,
      "learning_rate": 4.845465117621964e-06,
      "loss": 2.5005,
      "step": 51803
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.3028528690338135,
      "learning_rate": 4.8451122975321286e-06,
      "loss": 2.3499,
      "step": 51804
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0719211101531982,
      "learning_rate": 4.844759486181229e-06,
      "loss": 2.2525,
      "step": 51805
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1769003868103027,
      "learning_rate": 4.844406683569861e-06,
      "loss": 2.4168,
      "step": 51806
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0509926080703735,
      "learning_rate": 4.844053889698621e-06,
      "loss": 2.2079,
      "step": 51807
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.9626803994178772,
      "learning_rate": 4.843701104568114e-06,
      "loss": 2.322,
      "step": 51808
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1590577363967896,
      "learning_rate": 4.84334832817893e-06,
      "loss": 2.3003,
      "step": 51809
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.2321733236312866,
      "learning_rate": 4.842995560531675e-06,
      "loss": 2.4575,
      "step": 51810
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1024816036224365,
      "learning_rate": 4.842642801626939e-06,
      "loss": 2.04,
      "step": 51811
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.404065728187561,
      "learning_rate": 4.842290051465328e-06,
      "loss": 2.2635,
      "step": 51812
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0874778032302856,
      "learning_rate": 4.841937310047432e-06,
      "loss": 2.3659,
      "step": 51813
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1169556379318237,
      "learning_rate": 4.841584577373858e-06,
      "loss": 2.3864,
      "step": 51814
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.2102720737457275,
      "learning_rate": 4.8412318534451986e-06,
      "loss": 2.4376,
      "step": 51815
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.9790189862251282,
      "learning_rate": 4.840879138262048e-06,
      "loss": 2.218,
      "step": 51816
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0844508409500122,
      "learning_rate": 4.840526431825013e-06,
      "loss": 2.2687,
      "step": 51817
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.097443699836731,
      "learning_rate": 4.840173734134686e-06,
      "loss": 2.3393,
      "step": 51818
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0548768043518066,
      "learning_rate": 4.839821045191664e-06,
      "loss": 2.1786,
      "step": 51819
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.054351568222046,
      "learning_rate": 4.839468364996549e-06,
      "loss": 2.2603,
      "step": 51820
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1207475662231445,
      "learning_rate": 4.839115693549937e-06,
      "loss": 2.5018,
      "step": 51821
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0894551277160645,
      "learning_rate": 4.838763030852423e-06,
      "loss": 2.5438,
      "step": 51822
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.051110863685608,
      "learning_rate": 4.83841037690461e-06,
      "loss": 2.2256,
      "step": 51823
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1862599849700928,
      "learning_rate": 4.838057731707089e-06,
      "loss": 2.5351,
      "step": 51824
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0908106565475464,
      "learning_rate": 4.837705095260467e-06,
      "loss": 2.3235,
      "step": 51825
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0858275890350342,
      "learning_rate": 4.837352467565333e-06,
      "loss": 2.2921,
      "step": 51826
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1090550422668457,
      "learning_rate": 4.836999848622291e-06,
      "loss": 2.3251,
      "step": 51827
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1664869785308838,
      "learning_rate": 4.836647238431934e-06,
      "loss": 2.154,
      "step": 51828
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1001393795013428,
      "learning_rate": 4.836294636994866e-06,
      "loss": 2.2484,
      "step": 51829
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0121283531188965,
      "learning_rate": 4.835942044311678e-06,
      "loss": 2.3979,
      "step": 51830
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.967576801776886,
      "learning_rate": 4.835589460382978e-06,
      "loss": 2.1862,
      "step": 51831
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1195132732391357,
      "learning_rate": 4.835236885209348e-06,
      "loss": 2.2204,
      "step": 51832
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.9563398957252502,
      "learning_rate": 4.834884318791398e-06,
      "loss": 2.2232,
      "step": 51833
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0985894203186035,
      "learning_rate": 4.834531761129719e-06,
      "loss": 2.3588,
      "step": 51834
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1444683074951172,
      "learning_rate": 4.834179212224915e-06,
      "loss": 2.3466,
      "step": 51835
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.9978653192520142,
      "learning_rate": 4.833826672077576e-06,
      "loss": 2.2752,
      "step": 51836
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0266700983047485,
      "learning_rate": 4.833474140688309e-06,
      "loss": 2.2993,
      "step": 51837
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1177973747253418,
      "learning_rate": 4.833121618057702e-06,
      "loss": 2.3777,
      "step": 51838
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1252129077911377,
      "learning_rate": 4.832769104186361e-06,
      "loss": 2.4353,
      "step": 51839
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.071629285812378,
      "learning_rate": 4.8324165990748755e-06,
      "loss": 2.1163,
      "step": 51840
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0865669250488281,
      "learning_rate": 4.832064102723851e-06,
      "loss": 2.4192,
      "step": 51841
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.2145034074783325,
      "learning_rate": 4.831711615133878e-06,
      "loss": 2.3257,
      "step": 51842
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.311318039894104,
      "learning_rate": 4.831359136305562e-06,
      "loss": 2.2675,
      "step": 51843
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1545770168304443,
      "learning_rate": 4.831006666239496e-06,
      "loss": 2.2998,
      "step": 51844
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0703169107437134,
      "learning_rate": 4.830654204936277e-06,
      "loss": 2.583,
      "step": 51845
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0792067050933838,
      "learning_rate": 4.830301752396499e-06,
      "loss": 2.2271,
      "step": 51846
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0272085666656494,
      "learning_rate": 4.829949308620768e-06,
      "loss": 2.5232,
      "step": 51847
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0741928815841675,
      "learning_rate": 4.8295968736096734e-06,
      "loss": 2.404,
      "step": 51848
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.9888050556182861,
      "learning_rate": 4.82924444736382e-06,
      "loss": 2.4142,
      "step": 51849
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0138438940048218,
      "learning_rate": 4.828892029883801e-06,
      "loss": 2.1604,
      "step": 51850
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0772444009780884,
      "learning_rate": 4.8285396211702105e-06,
      "loss": 2.2827,
      "step": 51851
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1231400966644287,
      "learning_rate": 4.828187221223655e-06,
      "loss": 2.3595,
      "step": 51852
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.155055284500122,
      "learning_rate": 4.827834830044722e-06,
      "loss": 2.3594,
      "step": 51853
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1354961395263672,
      "learning_rate": 4.827482447634018e-06,
      "loss": 2.2656,
      "step": 51854
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1900430917739868,
      "learning_rate": 4.827130073992132e-06,
      "loss": 2.2403,
      "step": 51855
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.2517844438552856,
      "learning_rate": 4.82677770911967e-06,
      "loss": 2.2789,
      "step": 51856
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1163251399993896,
      "learning_rate": 4.82642535301722e-06,
      "loss": 2.4421,
      "step": 51857
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1051219701766968,
      "learning_rate": 4.8260730056853924e-06,
      "loss": 2.4273,
      "step": 51858
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0322473049163818,
      "learning_rate": 4.825720667124768e-06,
      "loss": 2.4028,
      "step": 51859
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.010114073753357,
      "learning_rate": 4.825368337335957e-06,
      "loss": 2.198,
      "step": 51860
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.072413682937622,
      "learning_rate": 4.825016016319548e-06,
      "loss": 2.3769,
      "step": 51861
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0643364191055298,
      "learning_rate": 4.824663704076146e-06,
      "loss": 2.3454,
      "step": 51862
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1419475078582764,
      "learning_rate": 4.8243114006063405e-06,
      "loss": 2.353,
      "step": 51863
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0018534660339355,
      "learning_rate": 4.8239591059107375e-06,
      "loss": 2.3531,
      "step": 51864
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.058217167854309,
      "learning_rate": 4.823606819989924e-06,
      "loss": 2.3151,
      "step": 51865
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.174076795578003,
      "learning_rate": 4.8232545428445085e-06,
      "loss": 2.2504,
      "step": 51866
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0166913270950317,
      "learning_rate": 4.822902274475078e-06,
      "loss": 2.3444,
      "step": 51867
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0362348556518555,
      "learning_rate": 4.822550014882239e-06,
      "loss": 2.1872,
      "step": 51868
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.12935471534729,
      "learning_rate": 4.8221977640665794e-06,
      "loss": 2.3079,
      "step": 51869
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.126119613647461,
      "learning_rate": 4.821845522028704e-06,
      "loss": 2.1745,
      "step": 51870
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1238477230072021,
      "learning_rate": 4.821493288769208e-06,
      "loss": 2.0624,
      "step": 51871
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.064488410949707,
      "learning_rate": 4.821141064288687e-06,
      "loss": 2.3188,
      "step": 51872
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0886528491973877,
      "learning_rate": 4.820788848587734e-06,
      "loss": 2.2104,
      "step": 51873
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0538265705108643,
      "learning_rate": 4.8204366416669545e-06,
      "loss": 2.2991,
      "step": 51874
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0593332052230835,
      "learning_rate": 4.820084443526938e-06,
      "loss": 2.266,
      "step": 51875
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.171968936920166,
      "learning_rate": 4.819732254168289e-06,
      "loss": 2.3588,
      "step": 51876
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0121594667434692,
      "learning_rate": 4.819380073591601e-06,
      "loss": 2.2621,
      "step": 51877
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0586986541748047,
      "learning_rate": 4.819027901797467e-06,
      "loss": 2.3615,
      "step": 51878
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1583763360977173,
      "learning_rate": 4.818675738786491e-06,
      "loss": 2.2544,
      "step": 51879
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1235060691833496,
      "learning_rate": 4.818323584559265e-06,
      "loss": 2.3508,
      "step": 51880
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1192131042480469,
      "learning_rate": 4.817971439116389e-06,
      "loss": 2.3976,
      "step": 51881
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1624423265457153,
      "learning_rate": 4.817619302458456e-06,
      "loss": 2.267,
      "step": 51882
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.086214542388916,
      "learning_rate": 4.81726717458607e-06,
      "loss": 2.4446,
      "step": 51883
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.007118582725525,
      "learning_rate": 4.8169150554998245e-06,
      "loss": 2.3062,
      "step": 51884
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0855399370193481,
      "learning_rate": 4.816562945200315e-06,
      "loss": 2.1548,
      "step": 51885
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.3023072481155396,
      "learning_rate": 4.816210843688135e-06,
      "loss": 2.421,
      "step": 51886
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0559672117233276,
      "learning_rate": 4.815858750963889e-06,
      "loss": 2.1185,
      "step": 51887
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1453728675842285,
      "learning_rate": 4.815506667028167e-06,
      "loss": 2.1575,
      "step": 51888
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.020027756690979,
      "learning_rate": 4.8151545918815735e-06,
      "loss": 2.3598,
      "step": 51889
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.050506830215454,
      "learning_rate": 4.814802525524697e-06,
      "loss": 2.5131,
      "step": 51890
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.085012435913086,
      "learning_rate": 4.814450467958142e-06,
      "loss": 2.2513,
      "step": 51891
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0420979261398315,
      "learning_rate": 4.814098419182498e-06,
      "loss": 2.1812,
      "step": 51892
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.949336588382721,
      "learning_rate": 4.81374637919837e-06,
      "loss": 2.4963,
      "step": 51893
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.135591983795166,
      "learning_rate": 4.813394348006346e-06,
      "loss": 2.4415,
      "step": 51894
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.134976863861084,
      "learning_rate": 4.813042325607032e-06,
      "loss": 2.515,
      "step": 51895
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.2125632762908936,
      "learning_rate": 4.812690312001016e-06,
      "loss": 2.3082,
      "step": 51896
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.3963758945465088,
      "learning_rate": 4.812338307188905e-06,
      "loss": 2.376,
      "step": 51897
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0278042554855347,
      "learning_rate": 4.811986311171284e-06,
      "loss": 2.1926,
      "step": 51898
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1319620609283447,
      "learning_rate": 4.811634323948757e-06,
      "loss": 2.201,
      "step": 51899
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1442583799362183,
      "learning_rate": 4.811282345521917e-06,
      "loss": 2.3341,
      "step": 51900
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1192617416381836,
      "learning_rate": 4.810930375891365e-06,
      "loss": 2.2927,
      "step": 51901
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1662052869796753,
      "learning_rate": 4.8105784150576926e-06,
      "loss": 2.3269,
      "step": 51902
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1613889932632446,
      "learning_rate": 4.810226463021502e-06,
      "loss": 2.3117,
      "step": 51903
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.2190685272216797,
      "learning_rate": 4.809874519783384e-06,
      "loss": 2.4461,
      "step": 51904
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0155185461044312,
      "learning_rate": 4.809522585343942e-06,
      "loss": 2.1773,
      "step": 51905
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.9789732694625854,
      "learning_rate": 4.809170659703769e-06,
      "loss": 2.4259,
      "step": 51906
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0944839715957642,
      "learning_rate": 4.808818742863458e-06,
      "loss": 2.3727,
      "step": 51907
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1326584815979004,
      "learning_rate": 4.808466834823613e-06,
      "loss": 2.4438,
      "step": 51908
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0749173164367676,
      "learning_rate": 4.808114935584822e-06,
      "loss": 2.3014,
      "step": 51909
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1224844455718994,
      "learning_rate": 4.807763045147691e-06,
      "loss": 2.3648,
      "step": 51910
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0720916986465454,
      "learning_rate": 4.8074111635128105e-06,
      "loss": 2.2401,
      "step": 51911
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0589227676391602,
      "learning_rate": 4.80705929068078e-06,
      "loss": 2.5243,
      "step": 51912
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.084967017173767,
      "learning_rate": 4.80670742665219e-06,
      "loss": 2.1221,
      "step": 51913
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0122536420822144,
      "learning_rate": 4.806355571427644e-06,
      "loss": 2.249,
      "step": 51914
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.062727928161621,
      "learning_rate": 4.806003725007734e-06,
      "loss": 2.5465,
      "step": 51915
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1026721000671387,
      "learning_rate": 4.8056518873930615e-06,
      "loss": 2.5386,
      "step": 51916
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0814218521118164,
      "learning_rate": 4.805300058584216e-06,
      "loss": 2.3201,
      "step": 51917
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.2843377590179443,
      "learning_rate": 4.8049482385818e-06,
      "loss": 2.2736,
      "step": 51918
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.202483892440796,
      "learning_rate": 4.804596427386407e-06,
      "loss": 2.2121,
      "step": 51919
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1132577657699585,
      "learning_rate": 4.8042446249986355e-06,
      "loss": 2.4171,
      "step": 51920
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.9812943339347839,
      "learning_rate": 4.803892831419077e-06,
      "loss": 2.1119,
      "step": 51921
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0283690690994263,
      "learning_rate": 4.803541046648336e-06,
      "loss": 2.2576,
      "step": 51922
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.112287163734436,
      "learning_rate": 4.803189270687e-06,
      "loss": 2.2469,
      "step": 51923
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1486366987228394,
      "learning_rate": 4.8028375035356765e-06,
      "loss": 2.3028,
      "step": 51924
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.079047679901123,
      "learning_rate": 4.802485745194948e-06,
      "loss": 1.9009,
      "step": 51925
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0602161884307861,
      "learning_rate": 4.802133995665422e-06,
      "loss": 2.4207,
      "step": 51926
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1217138767242432,
      "learning_rate": 4.8017822549476864e-06,
      "loss": 2.391,
      "step": 51927
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1379939317703247,
      "learning_rate": 4.801430523042347e-06,
      "loss": 2.3906,
      "step": 51928
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1953085660934448,
      "learning_rate": 4.8010787999499885e-06,
      "loss": 2.4314,
      "step": 51929
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.2573238611221313,
      "learning_rate": 4.800727085671218e-06,
      "loss": 2.2678,
      "step": 51930
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.02301025390625,
      "learning_rate": 4.800375380206624e-06,
      "loss": 2.2118,
      "step": 51931
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1714733839035034,
      "learning_rate": 4.800023683556809e-06,
      "loss": 2.3517,
      "step": 51932
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.034735083580017,
      "learning_rate": 4.799671995722367e-06,
      "loss": 2.3413,
      "step": 51933
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.112068772315979,
      "learning_rate": 4.799320316703889e-06,
      "loss": 2.1203,
      "step": 51934
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0562647581100464,
      "learning_rate": 4.798968646501981e-06,
      "loss": 2.3727,
      "step": 51935
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0767298936843872,
      "learning_rate": 4.798616985117227e-06,
      "loss": 2.3054,
      "step": 51936
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0543630123138428,
      "learning_rate": 4.798265332550235e-06,
      "loss": 2.3439,
      "step": 51937
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0210657119750977,
      "learning_rate": 4.797913688801596e-06,
      "loss": 2.3137,
      "step": 51938
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0308433771133423,
      "learning_rate": 4.797562053871907e-06,
      "loss": 2.3936,
      "step": 51939
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.8652875423431396,
      "learning_rate": 4.797210427761758e-06,
      "loss": 2.3317,
      "step": 51940
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1826568841934204,
      "learning_rate": 4.796858810471755e-06,
      "loss": 2.3893,
      "step": 51941
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0407295227050781,
      "learning_rate": 4.796507202002486e-06,
      "loss": 2.3925,
      "step": 51942
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1450623273849487,
      "learning_rate": 4.796155602354554e-06,
      "loss": 2.2141,
      "step": 51943
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1338359117507935,
      "learning_rate": 4.795804011528548e-06,
      "loss": 2.5042,
      "step": 51944
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.2054184675216675,
      "learning_rate": 4.795452429525072e-06,
      "loss": 2.3079,
      "step": 51945
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.07505464553833,
      "learning_rate": 4.7951008563447125e-06,
      "loss": 2.3679,
      "step": 51946
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.011598825454712,
      "learning_rate": 4.794749291988074e-06,
      "loss": 2.2489,
      "step": 51947
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.2050769329071045,
      "learning_rate": 4.794397736455747e-06,
      "loss": 2.7142,
      "step": 51948
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1060070991516113,
      "learning_rate": 4.794046189748334e-06,
      "loss": 2.4293,
      "step": 51949
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.242255687713623,
      "learning_rate": 4.7936946518664255e-06,
      "loss": 2.3812,
      "step": 51950
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1435036659240723,
      "learning_rate": 4.793343122810617e-06,
      "loss": 2.1581,
      "step": 51951
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1484681367874146,
      "learning_rate": 4.7929916025815046e-06,
      "loss": 2.3457,
      "step": 51952
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.2492809295654297,
      "learning_rate": 4.792640091179689e-06,
      "loss": 2.372,
      "step": 51953
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.5721200704574585,
      "learning_rate": 4.792288588605758e-06,
      "loss": 2.1688,
      "step": 51954
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1247875690460205,
      "learning_rate": 4.791937094860317e-06,
      "loss": 2.1498,
      "step": 51955
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.050826072692871,
      "learning_rate": 4.791585609943953e-06,
      "loss": 2.4515,
      "step": 51956
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.850185513496399,
      "learning_rate": 4.79123413385727e-06,
      "loss": 2.4703,
      "step": 51957
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.159765362739563,
      "learning_rate": 4.790882666600855e-06,
      "loss": 2.3914,
      "step": 51958
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0795212984085083,
      "learning_rate": 4.7905312081753145e-06,
      "loss": 2.457,
      "step": 51959
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0642811059951782,
      "learning_rate": 4.790179758581232e-06,
      "loss": 2.2641,
      "step": 51960
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1360158920288086,
      "learning_rate": 4.789828317819216e-06,
      "loss": 2.4226,
      "step": 51961
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0201570987701416,
      "learning_rate": 4.789476885889854e-06,
      "loss": 2.2869,
      "step": 51962
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.198052167892456,
      "learning_rate": 4.789125462793746e-06,
      "loss": 2.3109,
      "step": 51963
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0197036266326904,
      "learning_rate": 4.788774048531481e-06,
      "loss": 2.3788,
      "step": 51964
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0555338859558105,
      "learning_rate": 4.7884226431036614e-06,
      "loss": 2.3151,
      "step": 51965
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0721124410629272,
      "learning_rate": 4.7880712465108795e-06,
      "loss": 2.382,
      "step": 51966
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0399620532989502,
      "learning_rate": 4.7877198587537346e-06,
      "loss": 2.371,
      "step": 51967
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.2026783227920532,
      "learning_rate": 4.787368479832821e-06,
      "loss": 2.17,
      "step": 51968
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0602856874465942,
      "learning_rate": 4.787017109748729e-06,
      "loss": 2.0966,
      "step": 51969
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.2282217741012573,
      "learning_rate": 4.786665748502064e-06,
      "loss": 2.4109,
      "step": 51970
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0343576669692993,
      "learning_rate": 4.786314396093411e-06,
      "loss": 2.3958,
      "step": 51971
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.2739431858062744,
      "learning_rate": 4.7859630525233755e-06,
      "loss": 2.395,
      "step": 51972
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1558361053466797,
      "learning_rate": 4.7856117177925455e-06,
      "loss": 2.3548,
      "step": 51973
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.030048131942749,
      "learning_rate": 4.785260391901524e-06,
      "loss": 2.5243,
      "step": 51974
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0559601783752441,
      "learning_rate": 4.784909074850897e-06,
      "loss": 2.2533,
      "step": 51975
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.9882732629776001,
      "learning_rate": 4.7845577666412745e-06,
      "loss": 2.5275,
      "step": 51976
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.2599273920059204,
      "learning_rate": 4.784206467273235e-06,
      "loss": 2.293,
      "step": 51977
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0711925029754639,
      "learning_rate": 4.783855176747386e-06,
      "loss": 2.3527,
      "step": 51978
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.122002124786377,
      "learning_rate": 4.7835038950643155e-06,
      "loss": 2.2955,
      "step": 51979
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1146904230117798,
      "learning_rate": 4.783152622224626e-06,
      "loss": 2.4836,
      "step": 51980
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1085612773895264,
      "learning_rate": 4.782801358228907e-06,
      "loss": 2.6734,
      "step": 51981
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.049124836921692,
      "learning_rate": 4.78245010307776e-06,
      "loss": 2.2804,
      "step": 51982
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1319668292999268,
      "learning_rate": 4.782098856771772e-06,
      "loss": 2.174,
      "step": 51983
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0842690467834473,
      "learning_rate": 4.781747619311548e-06,
      "loss": 2.4941,
      "step": 51984
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1069118976593018,
      "learning_rate": 4.781396390697676e-06,
      "loss": 2.1546,
      "step": 51985
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.054992914199829,
      "learning_rate": 4.7810451709307586e-06,
      "loss": 2.3258,
      "step": 51986
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0395761728286743,
      "learning_rate": 4.780693960011382e-06,
      "loss": 2.3158,
      "step": 51987
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1589208841323853,
      "learning_rate": 4.7803427579401504e-06,
      "loss": 2.3268,
      "step": 51988
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.334803581237793,
      "learning_rate": 4.779991564717657e-06,
      "loss": 2.3886,
      "step": 51989
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.119401454925537,
      "learning_rate": 4.7796403803444945e-06,
      "loss": 2.2301,
      "step": 51990
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.093964695930481,
      "learning_rate": 4.7792892048212556e-06,
      "loss": 2.258,
      "step": 51991
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.9621367454528809,
      "learning_rate": 4.7789380381485436e-06,
      "loss": 2.2843,
      "step": 51992
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.155123233795166,
      "learning_rate": 4.778586880326946e-06,
      "loss": 2.1543,
      "step": 51993
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0679280757904053,
      "learning_rate": 4.778235731357065e-06,
      "loss": 2.3185,
      "step": 51994
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0744028091430664,
      "learning_rate": 4.777884591239494e-06,
      "loss": 2.2835,
      "step": 51995
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1291402578353882,
      "learning_rate": 4.777533459974821e-06,
      "loss": 2.4204,
      "step": 51996
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0442131757736206,
      "learning_rate": 4.7771823375636525e-06,
      "loss": 2.2644,
      "step": 51997
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0883312225341797,
      "learning_rate": 4.776831224006574e-06,
      "loss": 2.3727,
      "step": 51998
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0892640352249146,
      "learning_rate": 4.7764801193041885e-06,
      "loss": 2.3561,
      "step": 51999
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0413312911987305,
      "learning_rate": 4.776129023457086e-06,
      "loss": 2.319,
      "step": 52000
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.04693603515625,
      "learning_rate": 4.775777936465865e-06,
      "loss": 2.0392,
      "step": 52001
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.3064868450164795,
      "learning_rate": 4.775426858331117e-06,
      "loss": 2.2072,
      "step": 52002
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.038116455078125,
      "learning_rate": 4.775075789053447e-06,
      "loss": 2.4214,
      "step": 52003
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0161267518997192,
      "learning_rate": 4.774724728633434e-06,
      "loss": 2.2654,
      "step": 52004
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.3255865573883057,
      "learning_rate": 4.774373677071687e-06,
      "loss": 2.1878,
      "step": 52005
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0107332468032837,
      "learning_rate": 4.7740226343687915e-06,
      "loss": 2.223,
      "step": 52006
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0138972997665405,
      "learning_rate": 4.77367160052535e-06,
      "loss": 2.6383,
      "step": 52007
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.064452052116394,
      "learning_rate": 4.773320575541951e-06,
      "loss": 2.0855,
      "step": 52008
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0639368295669556,
      "learning_rate": 4.7729695594191975e-06,
      "loss": 2.3724,
      "step": 52009
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0696648359298706,
      "learning_rate": 4.772618552157676e-06,
      "loss": 2.3767,
      "step": 52010
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0779670476913452,
      "learning_rate": 4.77226755375799e-06,
      "loss": 2.2985,
      "step": 52011
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.2630761861801147,
      "learning_rate": 4.771916564220727e-06,
      "loss": 2.2744,
      "step": 52012
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.034253478050232,
      "learning_rate": 4.771565583546488e-06,
      "loss": 2.0885,
      "step": 52013
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0063985586166382,
      "learning_rate": 4.771214611735862e-06,
      "loss": 2.3275,
      "step": 52014
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0562666654586792,
      "learning_rate": 4.770863648789452e-06,
      "loss": 2.2681,
      "step": 52015
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.114624261856079,
      "learning_rate": 4.770512694707848e-06,
      "loss": 2.2483,
      "step": 52016
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0326893329620361,
      "learning_rate": 4.770161749491644e-06,
      "loss": 2.3349,
      "step": 52017
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.17998206615448,
      "learning_rate": 4.769810813141433e-06,
      "loss": 2.3835,
      "step": 52018
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.036179780960083,
      "learning_rate": 4.7694598856578175e-06,
      "loss": 2.1692,
      "step": 52019
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0905898809432983,
      "learning_rate": 4.769108967041385e-06,
      "loss": 2.4445,
      "step": 52020
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.2102917432785034,
      "learning_rate": 4.768758057292736e-06,
      "loss": 2.1992,
      "step": 52021
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.037684440612793,
      "learning_rate": 4.768407156412459e-06,
      "loss": 2.211,
      "step": 52022
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.181038498878479,
      "learning_rate": 4.768056264401157e-06,
      "loss": 2.4478,
      "step": 52023
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0724740028381348,
      "learning_rate": 4.767705381259421e-06,
      "loss": 2.5322,
      "step": 52024
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0688655376434326,
      "learning_rate": 4.767354506987841e-06,
      "loss": 2.2179,
      "step": 52025
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0293792486190796,
      "learning_rate": 4.76700364158702e-06,
      "loss": 2.4216,
      "step": 52026
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0236825942993164,
      "learning_rate": 4.766652785057546e-06,
      "loss": 2.5748,
      "step": 52027
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0498765707015991,
      "learning_rate": 4.766301937400021e-06,
      "loss": 2.1703,
      "step": 52028
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.9625265598297119,
      "learning_rate": 4.765951098615035e-06,
      "loss": 2.3323,
      "step": 52029
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1352335214614868,
      "learning_rate": 4.765600268703183e-06,
      "loss": 2.549,
      "step": 52030
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1819051504135132,
      "learning_rate": 4.7652494476650564e-06,
      "loss": 2.25,
      "step": 52031
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1691094636917114,
      "learning_rate": 4.764898635501258e-06,
      "loss": 2.1268,
      "step": 52032
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0869216918945312,
      "learning_rate": 4.7645478322123755e-06,
      "loss": 2.1743,
      "step": 52033
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.6335277557373047,
      "learning_rate": 4.764197037799007e-06,
      "loss": 2.1613,
      "step": 52034
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.7481017112731934,
      "learning_rate": 4.7638462522617456e-06,
      "loss": 2.1194,
      "step": 52035
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0347737073898315,
      "learning_rate": 4.76349547560119e-06,
      "loss": 2.423,
      "step": 52036
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1855241060256958,
      "learning_rate": 4.763144707817928e-06,
      "loss": 2.221,
      "step": 52037
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1611887216567993,
      "learning_rate": 4.762793948912562e-06,
      "loss": 2.4504,
      "step": 52038
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0179741382598877,
      "learning_rate": 4.762443198885679e-06,
      "loss": 2.3201,
      "step": 52039
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0451250076293945,
      "learning_rate": 4.76209245773788e-06,
      "loss": 2.3973,
      "step": 52040
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.090707778930664,
      "learning_rate": 4.761741725469754e-06,
      "loss": 2.274,
      "step": 52041
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.190417766571045,
      "learning_rate": 4.7613910020819056e-06,
      "loss": 2.3262,
      "step": 52042
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0882459878921509,
      "learning_rate": 4.761040287574915e-06,
      "loss": 2.3806,
      "step": 52043
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1138036251068115,
      "learning_rate": 4.760689581949388e-06,
      "loss": 2.435,
      "step": 52044
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0942015647888184,
      "learning_rate": 4.760338885205911e-06,
      "loss": 2.091,
      "step": 52045
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1770048141479492,
      "learning_rate": 4.759988197345086e-06,
      "loss": 2.3366,
      "step": 52046
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0953449010849,
      "learning_rate": 4.7596375183675006e-06,
      "loss": 2.6101,
      "step": 52047
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1175687313079834,
      "learning_rate": 4.759286848273758e-06,
      "loss": 2.2708,
      "step": 52048
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.2388110160827637,
      "learning_rate": 4.758936187064441e-06,
      "loss": 2.35,
      "step": 52049
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0756542682647705,
      "learning_rate": 4.758585534740157e-06,
      "loss": 2.4452,
      "step": 52050
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.100058913230896,
      "learning_rate": 4.758234891301493e-06,
      "loss": 2.5361,
      "step": 52051
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0465869903564453,
      "learning_rate": 4.757884256749042e-06,
      "loss": 2.3299,
      "step": 52052
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0866482257843018,
      "learning_rate": 4.757533631083402e-06,
      "loss": 2.3495,
      "step": 52053
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0007597208023071,
      "learning_rate": 4.757183014305164e-06,
      "loss": 2.2569,
      "step": 52054
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0425910949707031,
      "learning_rate": 4.756832406414928e-06,
      "loss": 2.4738,
      "step": 52055
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.9732210636138916,
      "learning_rate": 4.756481807413287e-06,
      "loss": 2.2465,
      "step": 52056
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0825496912002563,
      "learning_rate": 4.756131217300832e-06,
      "loss": 2.2591,
      "step": 52057
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1644500494003296,
      "learning_rate": 4.755780636078156e-06,
      "loss": 2.2415,
      "step": 52058
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.2769525051116943,
      "learning_rate": 4.755430063745858e-06,
      "loss": 2.2422,
      "step": 52059
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1133924722671509,
      "learning_rate": 4.755079500304528e-06,
      "loss": 2.3055,
      "step": 52060
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.11734938621521,
      "learning_rate": 4.754728945754766e-06,
      "loss": 2.3074,
      "step": 52061
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0911266803741455,
      "learning_rate": 4.754378400097159e-06,
      "loss": 2.1723,
      "step": 52062
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1026500463485718,
      "learning_rate": 4.75402786333231e-06,
      "loss": 2.2989,
      "step": 52063
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0086597204208374,
      "learning_rate": 4.753677335460805e-06,
      "loss": 2.3543,
      "step": 52064
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.2721108198165894,
      "learning_rate": 4.753326816483245e-06,
      "loss": 2.4243,
      "step": 52065
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1041971445083618,
      "learning_rate": 4.752976306400217e-06,
      "loss": 2.1276,
      "step": 52066
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0303096771240234,
      "learning_rate": 4.752625805212323e-06,
      "loss": 2.1652,
      "step": 52067
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1843193769454956,
      "learning_rate": 4.75227531292015e-06,
      "loss": 2.3022,
      "step": 52068
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1093984842300415,
      "learning_rate": 4.751924829524303e-06,
      "loss": 2.2677,
      "step": 52069
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0427193641662598,
      "learning_rate": 4.751574355025362e-06,
      "loss": 2.3697,
      "step": 52070
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.2146084308624268,
      "learning_rate": 4.751223889423931e-06,
      "loss": 2.3548,
      "step": 52071
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.084635853767395,
      "learning_rate": 4.750873432720596e-06,
      "loss": 2.1041,
      "step": 52072
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1661394834518433,
      "learning_rate": 4.750522984915962e-06,
      "loss": 2.2075,
      "step": 52073
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0091692209243774,
      "learning_rate": 4.750172546010612e-06,
      "loss": 2.2302,
      "step": 52074
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1499673128128052,
      "learning_rate": 4.749822116005151e-06,
      "loss": 2.4934,
      "step": 52075
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1587777137756348,
      "learning_rate": 4.7494716949001614e-06,
      "loss": 2.4108,
      "step": 52076
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0795435905456543,
      "learning_rate": 4.749121282696248e-06,
      "loss": 2.2572,
      "step": 52077
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0614351034164429,
      "learning_rate": 4.748770879393997e-06,
      "loss": 2.2004,
      "step": 52078
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.9996375441551208,
      "learning_rate": 4.748420484994009e-06,
      "loss": 2.3101,
      "step": 52079
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0303730964660645,
      "learning_rate": 4.748070099496874e-06,
      "loss": 2.4567,
      "step": 52080
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1945518255233765,
      "learning_rate": 4.747719722903183e-06,
      "loss": 2.0841,
      "step": 52081
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0823862552642822,
      "learning_rate": 4.747369355213538e-06,
      "loss": 2.2365,
      "step": 52082
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1191061735153198,
      "learning_rate": 4.747018996428529e-06,
      "loss": 2.1723,
      "step": 52083
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0337239503860474,
      "learning_rate": 4.746668646548744e-06,
      "loss": 2.3416,
      "step": 52084
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.01779043674469,
      "learning_rate": 4.746318305574788e-06,
      "loss": 2.2408,
      "step": 52085
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1035772562026978,
      "learning_rate": 4.745967973507248e-06,
      "loss": 2.5494,
      "step": 52086
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0788549184799194,
      "learning_rate": 4.745617650346715e-06,
      "loss": 2.352,
      "step": 52087
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1123285293579102,
      "learning_rate": 4.745267336093793e-06,
      "loss": 2.3486,
      "step": 52088
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0169764757156372,
      "learning_rate": 4.7449170307490645e-06,
      "loss": 2.5677,
      "step": 52089
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0344117879867554,
      "learning_rate": 4.744566734313134e-06,
      "loss": 2.5036,
      "step": 52090
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.062229871749878,
      "learning_rate": 4.744216446786586e-06,
      "loss": 2.4405,
      "step": 52091
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.105069875717163,
      "learning_rate": 4.743866168170021e-06,
      "loss": 2.4461,
      "step": 52092
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0123568773269653,
      "learning_rate": 4.743515898464029e-06,
      "loss": 2.3368,
      "step": 52093
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.2827244997024536,
      "learning_rate": 4.743165637669207e-06,
      "loss": 2.2555,
      "step": 52094
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.07161283493042,
      "learning_rate": 4.742815385786148e-06,
      "loss": 2.3632,
      "step": 52095
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.2116055488586426,
      "learning_rate": 4.7424651428154444e-06,
      "loss": 2.2757,
      "step": 52096
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1194616556167603,
      "learning_rate": 4.7421149087576855e-06,
      "loss": 2.3922,
      "step": 52097
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1308467388153076,
      "learning_rate": 4.7417646836134754e-06,
      "loss": 2.2786,
      "step": 52098
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.213671326637268,
      "learning_rate": 4.741414467383397e-06,
      "loss": 2.2648,
      "step": 52099
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0190216302871704,
      "learning_rate": 4.741064260068055e-06,
      "loss": 2.242,
      "step": 52100
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.759631633758545,
      "learning_rate": 4.740714061668032e-06,
      "loss": 2.5763,
      "step": 52101
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0394703149795532,
      "learning_rate": 4.740363872183932e-06,
      "loss": 2.2497,
      "step": 52102
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0731265544891357,
      "learning_rate": 4.740013691616339e-06,
      "loss": 2.5741,
      "step": 52103
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.074112057685852,
      "learning_rate": 4.739663519965855e-06,
      "loss": 2.1632,
      "step": 52104
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0336235761642456,
      "learning_rate": 4.739313357233066e-06,
      "loss": 2.3636,
      "step": 52105
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.088486671447754,
      "learning_rate": 4.738963203418575e-06,
      "loss": 2.2312,
      "step": 52106
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0957213640213013,
      "learning_rate": 4.7386130585229696e-06,
      "loss": 2.1847,
      "step": 52107
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1211425065994263,
      "learning_rate": 4.738262922546844e-06,
      "loss": 2.3967,
      "step": 52108
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0969212055206299,
      "learning_rate": 4.737912795490787e-06,
      "loss": 2.4939,
      "step": 52109
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.9528806805610657,
      "learning_rate": 4.737562677355402e-06,
      "loss": 2.2806,
      "step": 52110
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0061073303222656,
      "learning_rate": 4.737212568141273e-06,
      "loss": 2.2941,
      "step": 52111
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1137830018997192,
      "learning_rate": 4.736862467849003e-06,
      "loss": 2.367,
      "step": 52112
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0030497312545776,
      "learning_rate": 4.736512376479179e-06,
      "loss": 2.2376,
      "step": 52113
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0452059507369995,
      "learning_rate": 4.7361622940323925e-06,
      "loss": 2.2011,
      "step": 52114
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0021452903747559,
      "learning_rate": 4.735812220509245e-06,
      "loss": 2.3804,
      "step": 52115
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0136849880218506,
      "learning_rate": 4.735462155910322e-06,
      "loss": 2.4816,
      "step": 52116
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0578066110610962,
      "learning_rate": 4.735112100236223e-06,
      "loss": 2.3297,
      "step": 52117
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.9923486113548279,
      "learning_rate": 4.734762053487536e-06,
      "loss": 2.3392,
      "step": 52118
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.399692416191101,
      "learning_rate": 4.734412015664862e-06,
      "loss": 2.3022,
      "step": 52119
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0134403705596924,
      "learning_rate": 4.734061986768784e-06,
      "loss": 2.4918,
      "step": 52120
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.129984974861145,
      "learning_rate": 4.733711966799911e-06,
      "loss": 2.4747,
      "step": 52121
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.9429689645767212,
      "learning_rate": 4.7333619557588175e-06,
      "loss": 2.2949,
      "step": 52122
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1761424541473389,
      "learning_rate": 4.73301195364611e-06,
      "loss": 2.288,
      "step": 52123
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1330814361572266,
      "learning_rate": 4.732661960462374e-06,
      "loss": 2.4104,
      "step": 52124
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0937631130218506,
      "learning_rate": 4.732311976208209e-06,
      "loss": 2.1423,
      "step": 52125
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1360386610031128,
      "learning_rate": 4.731962000884204e-06,
      "loss": 2.5647,
      "step": 52126
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0432943105697632,
      "learning_rate": 4.731612034490956e-06,
      "loss": 2.5177,
      "step": 52127
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1946605443954468,
      "learning_rate": 4.731262077029055e-06,
      "loss": 2.2699,
      "step": 52128
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1499346494674683,
      "learning_rate": 4.730912128499099e-06,
      "loss": 2.2989,
      "step": 52129
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1903905868530273,
      "learning_rate": 4.730562188901673e-06,
      "loss": 2.2553,
      "step": 52130
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.4362331628799438,
      "learning_rate": 4.7302122582373795e-06,
      "loss": 2.2711,
      "step": 52131
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1867238283157349,
      "learning_rate": 4.729862336506804e-06,
      "loss": 2.4249,
      "step": 52132
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0329030752182007,
      "learning_rate": 4.729512423710547e-06,
      "loss": 2.4197,
      "step": 52133
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1957721710205078,
      "learning_rate": 4.729162519849198e-06,
      "loss": 2.0899,
      "step": 52134
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0995392799377441,
      "learning_rate": 4.72881262492335e-06,
      "loss": 2.2173,
      "step": 52135
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.113341212272644,
      "learning_rate": 4.728462738933593e-06,
      "loss": 2.5069,
      "step": 52136
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1918628215789795,
      "learning_rate": 4.728112861880525e-06,
      "loss": 2.4392,
      "step": 52137
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0792298316955566,
      "learning_rate": 4.727762993764736e-06,
      "loss": 2.5207,
      "step": 52138
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.12641441822052,
      "learning_rate": 4.727413134586824e-06,
      "loss": 2.0263,
      "step": 52139
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0109432935714722,
      "learning_rate": 4.727063284347374e-06,
      "loss": 2.2866,
      "step": 52140
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0908995866775513,
      "learning_rate": 4.726713443046989e-06,
      "loss": 2.1935,
      "step": 52141
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1226571798324585,
      "learning_rate": 4.726363610686255e-06,
      "loss": 2.3126,
      "step": 52142
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0687220096588135,
      "learning_rate": 4.726013787265765e-06,
      "loss": 2.2553,
      "step": 52143
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1002874374389648,
      "learning_rate": 4.7256639727861165e-06,
      "loss": 2.2768,
      "step": 52144
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.095047116279602,
      "learning_rate": 4.7253141672478975e-06,
      "loss": 2.2258,
      "step": 52145
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.9946572780609131,
      "learning_rate": 4.724964370651707e-06,
      "loss": 2.458,
      "step": 52146
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0414336919784546,
      "learning_rate": 4.72461458299813e-06,
      "loss": 2.4418,
      "step": 52147
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.067562222480774,
      "learning_rate": 4.724264804287772e-06,
      "loss": 2.4767,
      "step": 52148
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1201890707015991,
      "learning_rate": 4.7239150345212115e-06,
      "loss": 2.2531,
      "step": 52149
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1996402740478516,
      "learning_rate": 4.72356527369905e-06,
      "loss": 2.1737,
      "step": 52150
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1452503204345703,
      "learning_rate": 4.723215521821876e-06,
      "loss": 2.3912,
      "step": 52151
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0437862873077393,
      "learning_rate": 4.722865778890289e-06,
      "loss": 2.5392,
      "step": 52152
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0528874397277832,
      "learning_rate": 4.722516044904872e-06,
      "loss": 2.258,
      "step": 52153
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1435825824737549,
      "learning_rate": 4.722166319866229e-06,
      "loss": 2.4848,
      "step": 52154
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.040225625038147,
      "learning_rate": 4.721816603774943e-06,
      "loss": 2.1137,
      "step": 52155
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0562325716018677,
      "learning_rate": 4.721466896631615e-06,
      "loss": 2.3851,
      "step": 52156
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.24784517288208,
      "learning_rate": 4.721117198436832e-06,
      "loss": 2.1774,
      "step": 52157
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1000022888183594,
      "learning_rate": 4.720767509191192e-06,
      "loss": 2.2812,
      "step": 52158
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1540582180023193,
      "learning_rate": 4.72041782889528e-06,
      "loss": 2.3422,
      "step": 52159
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.2313791513442993,
      "learning_rate": 4.7200681575496985e-06,
      "loss": 2.3828,
      "step": 52160
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.2137541770935059,
      "learning_rate": 4.719718495155036e-06,
      "loss": 2.3972,
      "step": 52161
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1133941411972046,
      "learning_rate": 4.719368841711885e-06,
      "loss": 2.2531,
      "step": 52162
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0933513641357422,
      "learning_rate": 4.719019197220832e-06,
      "loss": 2.4416,
      "step": 52163
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1420314311981201,
      "learning_rate": 4.718669561682482e-06,
      "loss": 2.513,
      "step": 52164
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1558948755264282,
      "learning_rate": 4.718319935097417e-06,
      "loss": 2.2451,
      "step": 52165
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0187338590621948,
      "learning_rate": 4.717970317466236e-06,
      "loss": 2.1439,
      "step": 52166
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.08396315574646,
      "learning_rate": 4.717620708789528e-06,
      "loss": 2.1463,
      "step": 52167
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.006964921951294,
      "learning_rate": 4.717271109067891e-06,
      "loss": 2.5017,
      "step": 52168
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1825796365737915,
      "learning_rate": 4.716921518301915e-06,
      "loss": 2.2178,
      "step": 52169
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.2763183116912842,
      "learning_rate": 4.716571936492187e-06,
      "loss": 2.2187,
      "step": 52170
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1181697845458984,
      "learning_rate": 4.716222363639309e-06,
      "loss": 2.4483,
      "step": 52171
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.124691367149353,
      "learning_rate": 4.7158727997438645e-06,
      "loss": 2.2538,
      "step": 52172
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0330599546432495,
      "learning_rate": 4.715523244806456e-06,
      "loss": 2.4443,
      "step": 52173
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1250650882720947,
      "learning_rate": 4.71517369882767e-06,
      "loss": 2.3514,
      "step": 52174
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1239311695098877,
      "learning_rate": 4.714824161808099e-06,
      "loss": 2.2532,
      "step": 52175
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1171398162841797,
      "learning_rate": 4.714474633748333e-06,
      "loss": 2.3864,
      "step": 52176
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0798823833465576,
      "learning_rate": 4.714125114648973e-06,
      "loss": 2.5153,
      "step": 52177
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.202232837677002,
      "learning_rate": 4.713775604510603e-06,
      "loss": 2.4631,
      "step": 52178
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0449520349502563,
      "learning_rate": 4.713426103333822e-06,
      "loss": 2.4236,
      "step": 52179
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.2240591049194336,
      "learning_rate": 4.713076611119215e-06,
      "loss": 2.2028,
      "step": 52180
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.3695627450942993,
      "learning_rate": 4.712727127867383e-06,
      "loss": 2.2297,
      "step": 52181
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0644558668136597,
      "learning_rate": 4.712377653578912e-06,
      "loss": 2.4286,
      "step": 52182
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.2657071352005005,
      "learning_rate": 4.7120281882544005e-06,
      "loss": 2.2624,
      "step": 52183
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1263177394866943,
      "learning_rate": 4.711678731894433e-06,
      "loss": 2.2147,
      "step": 52184
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1495249271392822,
      "learning_rate": 4.7113292844996106e-06,
      "loss": 2.3462,
      "step": 52185
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.237014889717102,
      "learning_rate": 4.710979846070518e-06,
      "loss": 2.1635,
      "step": 52186
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0998356342315674,
      "learning_rate": 4.710630416607758e-06,
      "loss": 2.104,
      "step": 52187
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.9999145269393921,
      "learning_rate": 4.710280996111908e-06,
      "loss": 2.2917,
      "step": 52188
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1161876916885376,
      "learning_rate": 4.7099315845835725e-06,
      "loss": 2.3049,
      "step": 52189
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0043244361877441,
      "learning_rate": 4.709582182023336e-06,
      "loss": 2.4465,
      "step": 52190
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.058037519454956,
      "learning_rate": 4.709232788431799e-06,
      "loss": 2.2974,
      "step": 52191
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0363261699676514,
      "learning_rate": 4.708883403809546e-06,
      "loss": 2.1527,
      "step": 52192
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0284491777420044,
      "learning_rate": 4.708534028157175e-06,
      "loss": 2.3008,
      "step": 52193
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1008943319320679,
      "learning_rate": 4.708184661475272e-06,
      "loss": 2.249,
      "step": 52194
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.138259768486023,
      "learning_rate": 4.707835303764438e-06,
      "loss": 2.3296,
      "step": 52195
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1628605127334595,
      "learning_rate": 4.707485955025256e-06,
      "loss": 2.5929,
      "step": 52196
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0121245384216309,
      "learning_rate": 4.707136615258328e-06,
      "loss": 2.482,
      "step": 52197
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0555260181427002,
      "learning_rate": 4.706787284464239e-06,
      "loss": 2.353,
      "step": 52198
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.3532624244689941,
      "learning_rate": 4.7064379626435805e-06,
      "loss": 2.3513,
      "step": 52199
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0385068655014038,
      "learning_rate": 4.706088649796952e-06,
      "loss": 2.2379,
      "step": 52200
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.025649905204773,
      "learning_rate": 4.705739345924939e-06,
      "loss": 2.2303,
      "step": 52201
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.9838306307792664,
      "learning_rate": 4.705390051028133e-06,
      "loss": 2.2015,
      "step": 52202
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.2793370485305786,
      "learning_rate": 4.705040765107133e-06,
      "loss": 2.3174,
      "step": 52203
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1374080181121826,
      "learning_rate": 4.704691488162526e-06,
      "loss": 2.4256,
      "step": 52204
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0392018556594849,
      "learning_rate": 4.704342220194902e-06,
      "loss": 2.2653,
      "step": 52205
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1327298879623413,
      "learning_rate": 4.7039929612048595e-06,
      "loss": 2.2609,
      "step": 52206
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0222171545028687,
      "learning_rate": 4.703643711192985e-06,
      "loss": 2.3897,
      "step": 52207
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.215836524963379,
      "learning_rate": 4.703294470159875e-06,
      "loss": 2.2872,
      "step": 52208
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1025879383087158,
      "learning_rate": 4.702945238106116e-06,
      "loss": 2.3889,
      "step": 52209
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.343212604522705,
      "learning_rate": 4.702596015032308e-06,
      "loss": 2.3897,
      "step": 52210
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.078714370727539,
      "learning_rate": 4.702246800939034e-06,
      "loss": 2.2946,
      "step": 52211
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1295676231384277,
      "learning_rate": 4.701897595826894e-06,
      "loss": 2.2577,
      "step": 52212
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.2122095823287964,
      "learning_rate": 4.701548399696474e-06,
      "loss": 2.345,
      "step": 52213
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0270432233810425,
      "learning_rate": 4.701199212548375e-06,
      "loss": 2.3421,
      "step": 52214
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.2307015657424927,
      "learning_rate": 4.700850034383176e-06,
      "loss": 2.2103,
      "step": 52215
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1395354270935059,
      "learning_rate": 4.700500865201477e-06,
      "loss": 2.2628,
      "step": 52216
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0531738996505737,
      "learning_rate": 4.700151705003867e-06,
      "loss": 2.4131,
      "step": 52217
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.2185022830963135,
      "learning_rate": 4.699802553790942e-06,
      "loss": 2.3866,
      "step": 52218
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0196094512939453,
      "learning_rate": 4.699453411563287e-06,
      "loss": 2.3324,
      "step": 52219
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1859664916992188,
      "learning_rate": 4.6991042783215025e-06,
      "loss": 2.4575,
      "step": 52220
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0313037633895874,
      "learning_rate": 4.698755154066172e-06,
      "loss": 2.3947,
      "step": 52221
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0435914993286133,
      "learning_rate": 4.698406038797895e-06,
      "loss": 2.3731,
      "step": 52222
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0597466230392456,
      "learning_rate": 4.698056932517257e-06,
      "loss": 2.2492,
      "step": 52223
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0073678493499756,
      "learning_rate": 4.697707835224854e-06,
      "loss": 2.006,
      "step": 52224
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1298060417175293,
      "learning_rate": 4.697358746921278e-06,
      "loss": 2.1542,
      "step": 52225
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1472200155258179,
      "learning_rate": 4.697009667607114e-06,
      "loss": 2.2266,
      "step": 52226
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1365771293640137,
      "learning_rate": 4.696660597282964e-06,
      "loss": 2.3147,
      "step": 52227
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0643770694732666,
      "learning_rate": 4.696311535949415e-06,
      "loss": 2.2974,
      "step": 52228
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.084351897239685,
      "learning_rate": 4.695962483607052e-06,
      "loss": 2.2906,
      "step": 52229
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.9556892514228821,
      "learning_rate": 4.69561344025648e-06,
      "loss": 2.2473,
      "step": 52230
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1652891635894775,
      "learning_rate": 4.695264405898282e-06,
      "loss": 2.3376,
      "step": 52231
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0860140323638916,
      "learning_rate": 4.69491538053305e-06,
      "loss": 2.4604,
      "step": 52232
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.105916142463684,
      "learning_rate": 4.694566364161378e-06,
      "loss": 2.3826,
      "step": 52233
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.079162359237671,
      "learning_rate": 4.694217356783856e-06,
      "loss": 2.0048,
      "step": 52234
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.9891235828399658,
      "learning_rate": 4.693868358401078e-06,
      "loss": 2.5209,
      "step": 52235
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.9884274005889893,
      "learning_rate": 4.693519369013632e-06,
      "loss": 2.5292,
      "step": 52236
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1257989406585693,
      "learning_rate": 4.693170388622114e-06,
      "loss": 2.2893,
      "step": 52237
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1135673522949219,
      "learning_rate": 4.692821417227111e-06,
      "loss": 2.4493,
      "step": 52238
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.076941728591919,
      "learning_rate": 4.692472454829221e-06,
      "loss": 2.2437,
      "step": 52239
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0737143754959106,
      "learning_rate": 4.692123501429032e-06,
      "loss": 2.1812,
      "step": 52240
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.9955441951751709,
      "learning_rate": 4.691774557027134e-06,
      "loss": 2.3614,
      "step": 52241
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1098170280456543,
      "learning_rate": 4.691425621624117e-06,
      "loss": 2.2626,
      "step": 52242
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1442469358444214,
      "learning_rate": 4.69107669522058e-06,
      "loss": 2.3289,
      "step": 52243
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.257602572441101,
      "learning_rate": 4.690727777817105e-06,
      "loss": 2.3378,
      "step": 52244
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0532413721084595,
      "learning_rate": 4.690378869414292e-06,
      "loss": 2.4478,
      "step": 52245
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1618341207504272,
      "learning_rate": 4.690029970012725e-06,
      "loss": 2.3888,
      "step": 52246
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0340304374694824,
      "learning_rate": 4.6896810796130035e-06,
      "loss": 2.4578,
      "step": 52247
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.141784906387329,
      "learning_rate": 4.689332198215712e-06,
      "loss": 2.2464,
      "step": 52248
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0809355974197388,
      "learning_rate": 4.6889833258214465e-06,
      "loss": 2.3205,
      "step": 52249
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1096242666244507,
      "learning_rate": 4.688634462430794e-06,
      "loss": 2.3301,
      "step": 52250
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1949940919876099,
      "learning_rate": 4.688285608044353e-06,
      "loss": 2.3076,
      "step": 52251
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0959713459014893,
      "learning_rate": 4.687936762662706e-06,
      "loss": 2.2619,
      "step": 52252
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1013866662979126,
      "learning_rate": 4.687587926286457e-06,
      "loss": 2.2651,
      "step": 52253
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0565612316131592,
      "learning_rate": 4.687239098916181e-06,
      "loss": 2.2407,
      "step": 52254
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0842680931091309,
      "learning_rate": 4.686890280552482e-06,
      "loss": 2.297,
      "step": 52255
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1954542398452759,
      "learning_rate": 4.6865414711959426e-06,
      "loss": 2.3698,
      "step": 52256
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.004942774772644,
      "learning_rate": 4.6861926708471626e-06,
      "loss": 2.227,
      "step": 52257
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0795063972473145,
      "learning_rate": 4.685843879506726e-06,
      "loss": 2.3101,
      "step": 52258
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.234450101852417,
      "learning_rate": 4.685495097175231e-06,
      "loss": 2.2883,
      "step": 52259
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.160617470741272,
      "learning_rate": 4.685146323853264e-06,
      "loss": 2.3788,
      "step": 52260
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0884313583374023,
      "learning_rate": 4.684797559541416e-06,
      "loss": 2.3452,
      "step": 52261
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.126239538192749,
      "learning_rate": 4.684448804240281e-06,
      "loss": 2.1542,
      "step": 52262
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0421103239059448,
      "learning_rate": 4.684100057950447e-06,
      "loss": 2.3081,
      "step": 52263
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1974347829818726,
      "learning_rate": 4.683751320672511e-06,
      "loss": 2.3911,
      "step": 52264
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1039906740188599,
      "learning_rate": 4.683402592407054e-06,
      "loss": 2.3601,
      "step": 52265
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.2258695363998413,
      "learning_rate": 4.6830538731546834e-06,
      "loss": 2.3262,
      "step": 52266
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0502125024795532,
      "learning_rate": 4.682705162915973e-06,
      "loss": 2.3494,
      "step": 52267
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1312566995620728,
      "learning_rate": 4.682356461691525e-06,
      "loss": 2.2024,
      "step": 52268
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0738322734832764,
      "learning_rate": 4.6820077694819236e-06,
      "loss": 2.2981,
      "step": 52269
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0981005430221558,
      "learning_rate": 4.681659086287766e-06,
      "loss": 2.35,
      "step": 52270
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1329048871994019,
      "learning_rate": 4.681310412109638e-06,
      "loss": 2.3246,
      "step": 52271
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0499378442764282,
      "learning_rate": 4.680961746948137e-06,
      "loss": 2.5176,
      "step": 52272
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.111218810081482,
      "learning_rate": 4.680613090803846e-06,
      "loss": 2.3459,
      "step": 52273
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.2495191097259521,
      "learning_rate": 4.680264443677364e-06,
      "loss": 2.517,
      "step": 52274
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.124292254447937,
      "learning_rate": 4.679915805569276e-06,
      "loss": 2.2177,
      "step": 52275
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.025905966758728,
      "learning_rate": 4.67956717648018e-06,
      "loss": 2.4167,
      "step": 52276
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.4516913890838623,
      "learning_rate": 4.679218556410658e-06,
      "loss": 2.4129,
      "step": 52277
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1424283981323242,
      "learning_rate": 4.678869945361309e-06,
      "loss": 2.331,
      "step": 52278
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0512458086013794,
      "learning_rate": 4.678521343332721e-06,
      "loss": 2.2208,
      "step": 52279
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0557441711425781,
      "learning_rate": 4.678172750325485e-06,
      "loss": 2.322,
      "step": 52280
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1446044445037842,
      "learning_rate": 4.677824166340187e-06,
      "loss": 2.2662,
      "step": 52281
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0808448791503906,
      "learning_rate": 4.677475591377426e-06,
      "loss": 2.0338,
      "step": 52282
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.028567910194397,
      "learning_rate": 4.677127025437786e-06,
      "loss": 2.226,
      "step": 52283
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0206892490386963,
      "learning_rate": 4.676778468521866e-06,
      "loss": 2.3871,
      "step": 52284
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1288552284240723,
      "learning_rate": 4.6764299206302475e-06,
      "loss": 2.413,
      "step": 52285
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1268221139907837,
      "learning_rate": 4.676081381763531e-06,
      "loss": 2.4783,
      "step": 52286
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.2245452404022217,
      "learning_rate": 4.675732851922302e-06,
      "loss": 2.0776,
      "step": 52287
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1835211515426636,
      "learning_rate": 4.675384331107148e-06,
      "loss": 2.2477,
      "step": 52288
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0935413837432861,
      "learning_rate": 4.675035819318667e-06,
      "loss": 2.4929,
      "step": 52289
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1133121252059937,
      "learning_rate": 4.674687316557444e-06,
      "loss": 2.3486,
      "step": 52290
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1755177974700928,
      "learning_rate": 4.674338822824075e-06,
      "loss": 2.4357,
      "step": 52291
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1136219501495361,
      "learning_rate": 4.6739903381191445e-06,
      "loss": 2.3721,
      "step": 52292
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0904242992401123,
      "learning_rate": 4.673641862443254e-06,
      "loss": 2.3641,
      "step": 52293
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1572387218475342,
      "learning_rate": 4.6732933957969814e-06,
      "loss": 2.4886,
      "step": 52294
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0711194276809692,
      "learning_rate": 4.672944938180925e-06,
      "loss": 2.6324,
      "step": 52295
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0761351585388184,
      "learning_rate": 4.672596489595671e-06,
      "loss": 2.2779,
      "step": 52296
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1626818180084229,
      "learning_rate": 4.672248050041817e-06,
      "loss": 2.3093,
      "step": 52297
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0778491497039795,
      "learning_rate": 4.671899619519945e-06,
      "loss": 2.32,
      "step": 52298
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0529941320419312,
      "learning_rate": 4.671551198030654e-06,
      "loss": 2.2815,
      "step": 52299
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0073317289352417,
      "learning_rate": 4.671202785574528e-06,
      "loss": 2.4795,
      "step": 52300
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0871107578277588,
      "learning_rate": 4.670854382152163e-06,
      "loss": 2.3824,
      "step": 52301
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0434763431549072,
      "learning_rate": 4.670505987764144e-06,
      "loss": 2.5134,
      "step": 52302
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1280481815338135,
      "learning_rate": 4.670157602411069e-06,
      "loss": 2.3241,
      "step": 52303
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0853945016860962,
      "learning_rate": 4.66980922609352e-06,
      "loss": 2.4514,
      "step": 52304
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1241718530654907,
      "learning_rate": 4.669460858812097e-06,
      "loss": 2.3592,
      "step": 52305
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.2340736389160156,
      "learning_rate": 4.669112500567384e-06,
      "loss": 2.398,
      "step": 52306
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.15188729763031,
      "learning_rate": 4.668764151359975e-06,
      "loss": 2.2872,
      "step": 52307
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1795077323913574,
      "learning_rate": 4.668415811190454e-06,
      "loss": 2.4009,
      "step": 52308
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0734754800796509,
      "learning_rate": 4.668067480059419e-06,
      "loss": 2.5564,
      "step": 52309
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1355767250061035,
      "learning_rate": 4.667719157967455e-06,
      "loss": 2.4146,
      "step": 52310
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1159275770187378,
      "learning_rate": 4.667370844915159e-06,
      "loss": 2.3448,
      "step": 52311
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.089215636253357,
      "learning_rate": 4.667022540903114e-06,
      "loss": 2.3392,
      "step": 52312
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0882142782211304,
      "learning_rate": 4.666674245931917e-06,
      "loss": 2.3269,
      "step": 52313
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.278356671333313,
      "learning_rate": 4.666325960002154e-06,
      "loss": 2.2442,
      "step": 52314
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.084601879119873,
      "learning_rate": 4.665977683114418e-06,
      "loss": 2.1361,
      "step": 52315
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0422848463058472,
      "learning_rate": 4.6656294152693e-06,
      "loss": 2.1239,
      "step": 52316
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1637088060379028,
      "learning_rate": 4.665281156467386e-06,
      "loss": 2.3085,
      "step": 52317
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.9939671158790588,
      "learning_rate": 4.664932906709273e-06,
      "loss": 2.4562,
      "step": 52318
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.111034870147705,
      "learning_rate": 4.6645846659955466e-06,
      "loss": 2.434,
      "step": 52319
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.032367467880249,
      "learning_rate": 4.6642364343267955e-06,
      "loss": 2.5006,
      "step": 52320
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.2760426998138428,
      "learning_rate": 4.663888211703616e-06,
      "loss": 2.1387,
      "step": 52321
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1130821704864502,
      "learning_rate": 4.663539998126595e-06,
      "loss": 2.285,
      "step": 52322
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0978714227676392,
      "learning_rate": 4.66319179359632e-06,
      "loss": 2.3616,
      "step": 52323
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.065487027168274,
      "learning_rate": 4.662843598113388e-06,
      "loss": 1.9934,
      "step": 52324
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0781147480010986,
      "learning_rate": 4.662495411678381e-06,
      "loss": 2.4292,
      "step": 52325
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0269122123718262,
      "learning_rate": 4.662147234291898e-06,
      "loss": 2.3621,
      "step": 52326
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.028124213218689,
      "learning_rate": 4.661799065954522e-06,
      "loss": 2.0324,
      "step": 52327
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1168674230575562,
      "learning_rate": 4.66145090666685e-06,
      "loss": 2.2742,
      "step": 52328
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0686821937561035,
      "learning_rate": 4.661102756429466e-06,
      "loss": 2.3518,
      "step": 52329
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0999829769134521,
      "learning_rate": 4.6607546152429635e-06,
      "loss": 2.3329,
      "step": 52330
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0484598875045776,
      "learning_rate": 4.660406483107931e-06,
      "loss": 2.2128,
      "step": 52331
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.9913228750228882,
      "learning_rate": 4.660058360024966e-06,
      "loss": 2.2913,
      "step": 52332
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0449135303497314,
      "learning_rate": 4.6597102459946465e-06,
      "loss": 2.4102,
      "step": 52333
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.2983518838882446,
      "learning_rate": 4.6593621410175704e-06,
      "loss": 2.2534,
      "step": 52334
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0924010276794434,
      "learning_rate": 4.659014045094324e-06,
      "loss": 2.3613,
      "step": 52335
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0401638746261597,
      "learning_rate": 4.658665958225501e-06,
      "loss": 2.1496,
      "step": 52336
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0789846181869507,
      "learning_rate": 4.658317880411687e-06,
      "loss": 2.2654,
      "step": 52337
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.008506178855896,
      "learning_rate": 4.657969811653479e-06,
      "loss": 2.4012,
      "step": 52338
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1331493854522705,
      "learning_rate": 4.65762175195146e-06,
      "loss": 2.1344,
      "step": 52339
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.197242259979248,
      "learning_rate": 4.657273701306225e-06,
      "loss": 2.3145,
      "step": 52340
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1958820819854736,
      "learning_rate": 4.65692565971836e-06,
      "loss": 2.3366,
      "step": 52341
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.062491536140442,
      "learning_rate": 4.65657762718846e-06,
      "loss": 2.363,
      "step": 52342
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0324814319610596,
      "learning_rate": 4.656229603717113e-06,
      "loss": 2.1643,
      "step": 52343
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0823053121566772,
      "learning_rate": 4.655881589304904e-06,
      "loss": 2.3466,
      "step": 52344
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1268445253372192,
      "learning_rate": 4.655533583952431e-06,
      "loss": 2.4181,
      "step": 52345
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0613949298858643,
      "learning_rate": 4.65518558766028e-06,
      "loss": 2.2319,
      "step": 52346
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0382493734359741,
      "learning_rate": 4.654837600429037e-06,
      "loss": 2.3487,
      "step": 52347
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0419752597808838,
      "learning_rate": 4.6544896222593e-06,
      "loss": 2.4553,
      "step": 52348
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1289926767349243,
      "learning_rate": 4.654141653151655e-06,
      "loss": 2.2818,
      "step": 52349
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0769485235214233,
      "learning_rate": 4.653793693106688e-06,
      "loss": 2.226,
      "step": 52350
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1709561347961426,
      "learning_rate": 4.653445742124997e-06,
      "loss": 2.3945,
      "step": 52351
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1626882553100586,
      "learning_rate": 4.653097800207164e-06,
      "loss": 2.6057,
      "step": 52352
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.055765151977539,
      "learning_rate": 4.652749867353784e-06,
      "loss": 2.5395,
      "step": 52353
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0503380298614502,
      "learning_rate": 4.652401943565443e-06,
      "loss": 2.383,
      "step": 52354
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0678822994232178,
      "learning_rate": 4.652054028842737e-06,
      "loss": 2.3393,
      "step": 52355
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.9719595909118652,
      "learning_rate": 4.651706123186248e-06,
      "loss": 2.1942,
      "step": 52356
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0818760395050049,
      "learning_rate": 4.651358226596574e-06,
      "loss": 2.2185,
      "step": 52357
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.517451286315918,
      "learning_rate": 4.651010339074295e-06,
      "loss": 2.5999,
      "step": 52358
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1305606365203857,
      "learning_rate": 4.650662460620015e-06,
      "loss": 2.3631,
      "step": 52359
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1064330339431763,
      "learning_rate": 4.650314591234308e-06,
      "loss": 2.3445,
      "step": 52360
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.013358235359192,
      "learning_rate": 4.649966730917774e-06,
      "loss": 2.348,
      "step": 52361
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1236625909805298,
      "learning_rate": 4.6496188796709965e-06,
      "loss": 2.6313,
      "step": 52362
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.9953751564025879,
      "learning_rate": 4.64927103749457e-06,
      "loss": 2.2392,
      "step": 52363
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1372190713882446,
      "learning_rate": 4.64892320438908e-06,
      "loss": 2.5402,
      "step": 52364
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.025986909866333,
      "learning_rate": 4.648575380355122e-06,
      "loss": 2.4119,
      "step": 52365
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1103206872940063,
      "learning_rate": 4.648227565393277e-06,
      "loss": 2.3711,
      "step": 52366
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.9969614148139954,
      "learning_rate": 4.6478797595041455e-06,
      "loss": 2.4558,
      "step": 52367
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.7190420627593994,
      "learning_rate": 4.647531962688306e-06,
      "loss": 2.3994,
      "step": 52368
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1104246377944946,
      "learning_rate": 4.647184174946358e-06,
      "loss": 2.1423,
      "step": 52369
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.209326982498169,
      "learning_rate": 4.646836396278884e-06,
      "loss": 2.1006,
      "step": 52370
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0961570739746094,
      "learning_rate": 4.64648862668648e-06,
      "loss": 2.2655,
      "step": 52371
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.192637324333191,
      "learning_rate": 4.64614086616973e-06,
      "loss": 2.25,
      "step": 52372
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1485373973846436,
      "learning_rate": 4.645793114729226e-06,
      "loss": 2.3048,
      "step": 52373
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0966823101043701,
      "learning_rate": 4.6454453723655535e-06,
      "loss": 2.4893,
      "step": 52374
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0638681650161743,
      "learning_rate": 4.645097639079309e-06,
      "loss": 2.3018,
      "step": 52375
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0747265815734863,
      "learning_rate": 4.644749914871074e-06,
      "loss": 2.3694,
      "step": 52376
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.090429425239563,
      "learning_rate": 4.644402199741447e-06,
      "loss": 2.3331,
      "step": 52377
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0520371198654175,
      "learning_rate": 4.644054493691013e-06,
      "loss": 2.4347,
      "step": 52378
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.9994553923606873,
      "learning_rate": 4.643706796720356e-06,
      "loss": 2.5351,
      "step": 52379
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1483783721923828,
      "learning_rate": 4.643359108830076e-06,
      "loss": 2.4488,
      "step": 52380
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0840874910354614,
      "learning_rate": 4.643011430020753e-06,
      "loss": 2.2737,
      "step": 52381
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.156928300857544,
      "learning_rate": 4.642663760292984e-06,
      "loss": 2.2101,
      "step": 52382
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.9709038138389587,
      "learning_rate": 4.642316099647351e-06,
      "loss": 2.1315,
      "step": 52383
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0657641887664795,
      "learning_rate": 4.641968448084451e-06,
      "loss": 2.2872,
      "step": 52384
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.22213876247406,
      "learning_rate": 4.641620805604871e-06,
      "loss": 2.5683,
      "step": 52385
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1379116773605347,
      "learning_rate": 4.641273172209197e-06,
      "loss": 2.2976,
      "step": 52386
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.2150828838348389,
      "learning_rate": 4.640925547898019e-06,
      "loss": 2.3845,
      "step": 52387
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.206073522567749,
      "learning_rate": 4.64057793267193e-06,
      "loss": 2.3067,
      "step": 52388
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0176092386245728,
      "learning_rate": 4.640230326531513e-06,
      "loss": 2.3368,
      "step": 52389
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.2264434099197388,
      "learning_rate": 4.639882729477366e-06,
      "loss": 2.2627,
      "step": 52390
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.062965989112854,
      "learning_rate": 4.639535141510069e-06,
      "loss": 2.2811,
      "step": 52391
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0082387924194336,
      "learning_rate": 4.6391875626302204e-06,
      "loss": 2.4763,
      "step": 52392
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.2297954559326172,
      "learning_rate": 4.638839992838401e-06,
      "loss": 2.3892,
      "step": 52393
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.129298210144043,
      "learning_rate": 4.638492432135206e-06,
      "loss": 2.3194,
      "step": 52394
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1046308279037476,
      "learning_rate": 4.638144880521222e-06,
      "loss": 2.3928,
      "step": 52395
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1644437313079834,
      "learning_rate": 4.63779733799704e-06,
      "loss": 2.1779,
      "step": 52396
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.2183785438537598,
      "learning_rate": 4.637449804563244e-06,
      "loss": 2.4826,
      "step": 52397
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0799379348754883,
      "learning_rate": 4.637102280220436e-06,
      "loss": 2.3243,
      "step": 52398
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.103455662727356,
      "learning_rate": 4.636754764969187e-06,
      "loss": 2.1266,
      "step": 52399
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.051655888557434,
      "learning_rate": 4.6364072588101e-06,
      "loss": 2.3083,
      "step": 52400
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.397783637046814,
      "learning_rate": 4.6360597617437555e-06,
      "loss": 2.2599,
      "step": 52401
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.100206732749939,
      "learning_rate": 4.63571227377075e-06,
      "loss": 2.1319,
      "step": 52402
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0360381603240967,
      "learning_rate": 4.635364794891666e-06,
      "loss": 2.2011,
      "step": 52403
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0837254524230957,
      "learning_rate": 4.635017325107099e-06,
      "loss": 2.4626,
      "step": 52404
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.9927689433097839,
      "learning_rate": 4.6346698644176345e-06,
      "loss": 2.2821,
      "step": 52405
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.111765742301941,
      "learning_rate": 4.634322412823859e-06,
      "loss": 2.4205,
      "step": 52406
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1723040342330933,
      "learning_rate": 4.633974970326367e-06,
      "loss": 2.2404,
      "step": 52407
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.9928238987922668,
      "learning_rate": 4.6336275369257426e-06,
      "loss": 2.2696,
      "step": 52408
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1340025663375854,
      "learning_rate": 4.633280112622579e-06,
      "loss": 2.5776,
      "step": 52409
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0145704746246338,
      "learning_rate": 4.632932697417461e-06,
      "loss": 1.9747,
      "step": 52410
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0569924116134644,
      "learning_rate": 4.632585291310986e-06,
      "loss": 2.2485,
      "step": 52411
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.2242662906646729,
      "learning_rate": 4.632237894303729e-06,
      "loss": 2.2664,
      "step": 52412
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.267426609992981,
      "learning_rate": 4.631890506396292e-06,
      "loss": 2.5302,
      "step": 52413
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0701435804367065,
      "learning_rate": 4.631543127589253e-06,
      "loss": 2.2674,
      "step": 52414
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.2003397941589355,
      "learning_rate": 4.631195757883211e-06,
      "loss": 2.5061,
      "step": 52415
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1600792407989502,
      "learning_rate": 4.630848397278747e-06,
      "loss": 2.2299,
      "step": 52416
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0549968481063843,
      "learning_rate": 4.630501045776457e-06,
      "loss": 2.329,
      "step": 52417
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0170831680297852,
      "learning_rate": 4.630153703376921e-06,
      "loss": 2.1461,
      "step": 52418
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.052819848060608,
      "learning_rate": 4.629806370080737e-06,
      "loss": 2.0535,
      "step": 52419
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0252656936645508,
      "learning_rate": 4.629459045888487e-06,
      "loss": 2.3065,
      "step": 52420
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1160221099853516,
      "learning_rate": 4.629111730800766e-06,
      "loss": 2.2909,
      "step": 52421
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.4280213117599487,
      "learning_rate": 4.628764424818155e-06,
      "loss": 2.4827,
      "step": 52422
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.3441187143325806,
      "learning_rate": 4.628417127941251e-06,
      "loss": 2.4893,
      "step": 52423
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0784649848937988,
      "learning_rate": 4.6280698401706395e-06,
      "loss": 2.4948,
      "step": 52424
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.088691234588623,
      "learning_rate": 4.6277225615069075e-06,
      "loss": 2.1474,
      "step": 52425
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.964126706123352,
      "learning_rate": 4.627375291950642e-06,
      "loss": 2.2186,
      "step": 52426
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1113815307617188,
      "learning_rate": 4.627028031502438e-06,
      "loss": 2.4083,
      "step": 52427
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0100942850112915,
      "learning_rate": 4.626680780162878e-06,
      "loss": 2.3578,
      "step": 52428
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1471487283706665,
      "learning_rate": 4.626333537932555e-06,
      "loss": 2.3294,
      "step": 52429
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0708280801773071,
      "learning_rate": 4.6259863048120544e-06,
      "loss": 2.2095,
      "step": 52430
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1039420366287231,
      "learning_rate": 4.6256390808019705e-06,
      "loss": 2.3281,
      "step": 52431
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.9841208457946777,
      "learning_rate": 4.625291865902882e-06,
      "loss": 2.3484,
      "step": 52432
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1793560981750488,
      "learning_rate": 4.624944660115389e-06,
      "loss": 2.3345,
      "step": 52433
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0295696258544922,
      "learning_rate": 4.624597463440075e-06,
      "loss": 2.4282,
      "step": 52434
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.089376449584961,
      "learning_rate": 4.624250275877523e-06,
      "loss": 2.3116,
      "step": 52435
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0680118799209595,
      "learning_rate": 4.6239030974283314e-06,
      "loss": 2.3942,
      "step": 52436
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0488296747207642,
      "learning_rate": 4.623555928093081e-06,
      "loss": 2.1651,
      "step": 52437
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0743802785873413,
      "learning_rate": 4.623208767872366e-06,
      "loss": 2.4194,
      "step": 52438
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0753138065338135,
      "learning_rate": 4.622861616766774e-06,
      "loss": 2.417,
      "step": 52439
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.9739909768104553,
      "learning_rate": 4.62251447477689e-06,
      "loss": 2.4136,
      "step": 52440
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1490049362182617,
      "learning_rate": 4.622167341903302e-06,
      "loss": 2.3428,
      "step": 52441
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.2060763835906982,
      "learning_rate": 4.621820218146605e-06,
      "loss": 2.2778,
      "step": 52442
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0414531230926514,
      "learning_rate": 4.621473103507379e-06,
      "loss": 2.3577,
      "step": 52443
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1136051416397095,
      "learning_rate": 4.6211259979862215e-06,
      "loss": 2.2763,
      "step": 52444
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.2274316549301147,
      "learning_rate": 4.620778901583711e-06,
      "loss": 2.2638,
      "step": 52445
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.204325795173645,
      "learning_rate": 4.620431814300447e-06,
      "loss": 2.2751,
      "step": 52446
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.9943854808807373,
      "learning_rate": 4.6200847361370065e-06,
      "loss": 2.4534,
      "step": 52447
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.4840794801712036,
      "learning_rate": 4.619737667093987e-06,
      "loss": 2.5021,
      "step": 52448
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.2198458909988403,
      "learning_rate": 4.619390607171972e-06,
      "loss": 2.085,
      "step": 52449
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1538622379302979,
      "learning_rate": 4.619043556371553e-06,
      "loss": 2.3977,
      "step": 52450
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1164591312408447,
      "learning_rate": 4.618696514693318e-06,
      "loss": 2.0628,
      "step": 52451
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0605021715164185,
      "learning_rate": 4.618349482137853e-06,
      "loss": 2.1236,
      "step": 52452
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1629893779754639,
      "learning_rate": 4.6180024587057435e-06,
      "loss": 2.4706,
      "step": 52453
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.232608675956726,
      "learning_rate": 4.617655444397587e-06,
      "loss": 2.3583,
      "step": 52454
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1876085996627808,
      "learning_rate": 4.61730843921396e-06,
      "loss": 2.4998,
      "step": 52455
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0241265296936035,
      "learning_rate": 4.616961443155462e-06,
      "loss": 2.0995,
      "step": 52456
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1774014234542847,
      "learning_rate": 4.616614456222673e-06,
      "loss": 2.3867,
      "step": 52457
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.207340121269226,
      "learning_rate": 4.616267478416189e-06,
      "loss": 2.4453,
      "step": 52458
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.9281719326972961,
      "learning_rate": 4.615920509736589e-06,
      "loss": 1.9326,
      "step": 52459
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0439270734786987,
      "learning_rate": 4.615573550184469e-06,
      "loss": 2.4075,
      "step": 52460
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.098221778869629,
      "learning_rate": 4.615226599760415e-06,
      "loss": 2.4355,
      "step": 52461
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0695784091949463,
      "learning_rate": 4.614879658465011e-06,
      "loss": 2.362,
      "step": 52462
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1800169944763184,
      "learning_rate": 4.614532726298853e-06,
      "loss": 2.1675,
      "step": 52463
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1023131608963013,
      "learning_rate": 4.614185803262523e-06,
      "loss": 2.2494,
      "step": 52464
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1234878301620483,
      "learning_rate": 4.613838889356607e-06,
      "loss": 2.289,
      "step": 52465
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0462740659713745,
      "learning_rate": 4.613491984581703e-06,
      "loss": 2.4902,
      "step": 52466
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.071782112121582,
      "learning_rate": 4.613145088938391e-06,
      "loss": 2.3557,
      "step": 52467
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1455971002578735,
      "learning_rate": 4.612798202427259e-06,
      "loss": 2.5279,
      "step": 52468
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0421041250228882,
      "learning_rate": 4.612451325048899e-06,
      "loss": 2.2826,
      "step": 52469
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.141711950302124,
      "learning_rate": 4.612104456803896e-06,
      "loss": 2.4899,
      "step": 52470
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.103975534439087,
      "learning_rate": 4.611757597692841e-06,
      "loss": 2.3947,
      "step": 52471
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.238588809967041,
      "learning_rate": 4.611410747716316e-06,
      "loss": 2.3058,
      "step": 52472
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.103804349899292,
      "learning_rate": 4.611063906874919e-06,
      "loss": 2.2997,
      "step": 52473
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1048096418380737,
      "learning_rate": 4.610717075169226e-06,
      "loss": 2.1943,
      "step": 52474
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.2090551853179932,
      "learning_rate": 4.610370252599837e-06,
      "loss": 2.2588,
      "step": 52475
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0969436168670654,
      "learning_rate": 4.61002343916733e-06,
      "loss": 2.6152,
      "step": 52476
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.078971266746521,
      "learning_rate": 4.609676634872306e-06,
      "loss": 2.3223,
      "step": 52477
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.127181053161621,
      "learning_rate": 4.609329839715335e-06,
      "loss": 2.0392,
      "step": 52478
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0785490274429321,
      "learning_rate": 4.608983053697018e-06,
      "loss": 2.5249,
      "step": 52479
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.169547200202942,
      "learning_rate": 4.608636276817935e-06,
      "loss": 2.4293,
      "step": 52480
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1066548824310303,
      "learning_rate": 4.608289509078681e-06,
      "loss": 2.4328,
      "step": 52481
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.964916467666626,
      "learning_rate": 4.607942750479839e-06,
      "loss": 2.4058,
      "step": 52482
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1042547225952148,
      "learning_rate": 4.607596001022001e-06,
      "loss": 2.3841,
      "step": 52483
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.2386870384216309,
      "learning_rate": 4.607249260705749e-06,
      "loss": 2.4654,
      "step": 52484
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1144200563430786,
      "learning_rate": 4.6069025295316776e-06,
      "loss": 2.4131,
      "step": 52485
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1991071701049805,
      "learning_rate": 4.606555807500367e-06,
      "loss": 2.3628,
      "step": 52486
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1778087615966797,
      "learning_rate": 4.606209094612413e-06,
      "loss": 2.1973,
      "step": 52487
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1688708066940308,
      "learning_rate": 4.6058623908683955e-06,
      "loss": 2.3034,
      "step": 52488
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1409801244735718,
      "learning_rate": 4.6055156962689105e-06,
      "loss": 2.2403,
      "step": 52489
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.17977774143219,
      "learning_rate": 4.605169010814542e-06,
      "loss": 2.269,
      "step": 52490
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0937128067016602,
      "learning_rate": 4.604822334505876e-06,
      "loss": 2.297,
      "step": 52491
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0872560739517212,
      "learning_rate": 4.604475667343499e-06,
      "loss": 2.2918,
      "step": 52492
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.061614751815796,
      "learning_rate": 4.604129009328004e-06,
      "loss": 2.0169,
      "step": 52493
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1313458681106567,
      "learning_rate": 4.603782360459973e-06,
      "loss": 2.2194,
      "step": 52494
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.028056025505066,
      "learning_rate": 4.603435720739999e-06,
      "loss": 2.3535,
      "step": 52495
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.053413987159729,
      "learning_rate": 4.603089090168667e-06,
      "loss": 2.4212,
      "step": 52496
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0467877388000488,
      "learning_rate": 4.6027424687465624e-06,
      "loss": 2.4689,
      "step": 52497
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.321692943572998,
      "learning_rate": 4.602395856474279e-06,
      "loss": 2.3836,
      "step": 52498
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0450154542922974,
      "learning_rate": 4.602049253352395e-06,
      "loss": 2.3088,
      "step": 52499
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0687748193740845,
      "learning_rate": 4.6017026593815095e-06,
      "loss": 2.3103,
      "step": 52500
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.053446888923645,
      "learning_rate": 4.601356074562199e-06,
      "loss": 2.2305,
      "step": 52501
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.071662187576294,
      "learning_rate": 4.601009498895061e-06,
      "loss": 2.6515,
      "step": 52502
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1225239038467407,
      "learning_rate": 4.600662932380674e-06,
      "loss": 2.3942,
      "step": 52503
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.9395812749862671,
      "learning_rate": 4.600316375019636e-06,
      "loss": 2.0784,
      "step": 52504
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0102254152297974,
      "learning_rate": 4.599969826812523e-06,
      "loss": 2.2478,
      "step": 52505
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.040010690689087,
      "learning_rate": 4.59962328775993e-06,
      "loss": 2.2295,
      "step": 52506
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.9959129095077515,
      "learning_rate": 4.599276757862438e-06,
      "loss": 2.2749,
      "step": 52507
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1907213926315308,
      "learning_rate": 4.598930237120644e-06,
      "loss": 2.1617,
      "step": 52508
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1159586906433105,
      "learning_rate": 4.598583725535126e-06,
      "loss": 2.2211,
      "step": 52509
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0108342170715332,
      "learning_rate": 4.598237223106478e-06,
      "loss": 2.0763,
      "step": 52510
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0738505125045776,
      "learning_rate": 4.597890729835284e-06,
      "loss": 2.2764,
      "step": 52511
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.123230218887329,
      "learning_rate": 4.5975442457221345e-06,
      "loss": 2.4162,
      "step": 52512
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0253419876098633,
      "learning_rate": 4.597197770767611e-06,
      "loss": 2.3114,
      "step": 52513
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1036771535873413,
      "learning_rate": 4.596851304972309e-06,
      "loss": 2.4332,
      "step": 52514
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0498193502426147,
      "learning_rate": 4.596504848336807e-06,
      "loss": 2.3657,
      "step": 52515
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1080992221832275,
      "learning_rate": 4.5961584008617e-06,
      "loss": 2.2399,
      "step": 52516
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0993537902832031,
      "learning_rate": 4.595811962547574e-06,
      "loss": 2.1953,
      "step": 52517
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.136337161064148,
      "learning_rate": 4.595465533395013e-06,
      "loss": 2.3149,
      "step": 52518
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.008443832397461,
      "learning_rate": 4.595119113404603e-06,
      "loss": 2.1438,
      "step": 52519
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1170337200164795,
      "learning_rate": 4.594772702576937e-06,
      "loss": 2.3302,
      "step": 52520
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0941016674041748,
      "learning_rate": 4.5944263009125955e-06,
      "loss": 2.432,
      "step": 52521
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.298545002937317,
      "learning_rate": 4.594079908412175e-06,
      "loss": 2.2388,
      "step": 52522
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.715498685836792,
      "learning_rate": 4.593733525076255e-06,
      "loss": 2.3927,
      "step": 52523
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0551449060440063,
      "learning_rate": 4.593387150905422e-06,
      "loss": 2.1745,
      "step": 52524
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0050610303878784,
      "learning_rate": 4.59304078590027e-06,
      "loss": 2.2619,
      "step": 52525
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0717025995254517,
      "learning_rate": 4.59269443006138e-06,
      "loss": 2.1784,
      "step": 52526
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1083050966262817,
      "learning_rate": 4.592348083389343e-06,
      "loss": 2.5436,
      "step": 52527
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0866382122039795,
      "learning_rate": 4.592001745884742e-06,
      "loss": 2.3788,
      "step": 52528
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0496430397033691,
      "learning_rate": 4.591655417548171e-06,
      "loss": 2.384,
      "step": 52529
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1067531108856201,
      "learning_rate": 4.591309098380212e-06,
      "loss": 2.243,
      "step": 52530
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.148870587348938,
      "learning_rate": 4.590962788381454e-06,
      "loss": 2.3929,
      "step": 52531
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.072605848312378,
      "learning_rate": 4.59061648755248e-06,
      "loss": 2.0934,
      "step": 52532
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.183638095855713,
      "learning_rate": 4.590270195893882e-06,
      "loss": 2.484,
      "step": 52533
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1205570697784424,
      "learning_rate": 4.5899239134062435e-06,
      "loss": 2.0232,
      "step": 52534
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1201540231704712,
      "learning_rate": 4.589577640090156e-06,
      "loss": 2.2141,
      "step": 52535
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1215159893035889,
      "learning_rate": 4.589231375946199e-06,
      "loss": 2.2671,
      "step": 52536
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0994956493377686,
      "learning_rate": 4.588885120974969e-06,
      "loss": 2.2851,
      "step": 52537
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0456000566482544,
      "learning_rate": 4.588538875177047e-06,
      "loss": 2.3041,
      "step": 52538
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.076914668083191,
      "learning_rate": 4.588192638553022e-06,
      "loss": 2.2384,
      "step": 52539
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.3165602684020996,
      "learning_rate": 4.587846411103478e-06,
      "loss": 2.4292,
      "step": 52540
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.9834133386611938,
      "learning_rate": 4.587500192829008e-06,
      "loss": 2.4262,
      "step": 52541
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.077479600906372,
      "learning_rate": 4.58715398373019e-06,
      "loss": 2.5412,
      "step": 52542
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1423078775405884,
      "learning_rate": 4.586807783807625e-06,
      "loss": 2.2269,
      "step": 52543
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.214429259300232,
      "learning_rate": 4.5864615930618825e-06,
      "loss": 2.1666,
      "step": 52544
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.162575364112854,
      "learning_rate": 4.586115411493563e-06,
      "loss": 2.2821,
      "step": 52545
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1169703006744385,
      "learning_rate": 4.585769239103244e-06,
      "loss": 2.1698,
      "step": 52546
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1708818674087524,
      "learning_rate": 4.58542307589152e-06,
      "loss": 2.3357,
      "step": 52547
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1254675388336182,
      "learning_rate": 4.585076921858971e-06,
      "loss": 2.3526,
      "step": 52548
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0739352703094482,
      "learning_rate": 4.5847307770061915e-06,
      "loss": 2.2025,
      "step": 52549
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.080623745918274,
      "learning_rate": 4.584384641333759e-06,
      "loss": 2.2646,
      "step": 52550
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0428329706192017,
      "learning_rate": 4.58403851484227e-06,
      "loss": 2.3303,
      "step": 52551
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0583717823028564,
      "learning_rate": 4.583692397532307e-06,
      "loss": 2.2502,
      "step": 52552
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0905474424362183,
      "learning_rate": 4.583346289404453e-06,
      "loss": 2.2973,
      "step": 52553
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.2718960046768188,
      "learning_rate": 4.5830001904593e-06,
      "loss": 2.2139,
      "step": 52554
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.061529517173767,
      "learning_rate": 4.58265410069743e-06,
      "loss": 2.4001,
      "step": 52555
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1502089500427246,
      "learning_rate": 4.582308020119437e-06,
      "loss": 2.2868,
      "step": 52556
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0915528535842896,
      "learning_rate": 4.581961948725904e-06,
      "loss": 2.3491,
      "step": 52557
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.2164602279663086,
      "learning_rate": 4.581615886517416e-06,
      "loss": 2.2634,
      "step": 52558
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1981675624847412,
      "learning_rate": 4.581269833494557e-06,
      "loss": 2.178,
      "step": 52559
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.9454931020736694,
      "learning_rate": 4.58092378965792e-06,
      "loss": 2.3189,
      "step": 52560
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.23472261428833,
      "learning_rate": 4.580577755008088e-06,
      "loss": 2.382,
      "step": 52561
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0587037801742554,
      "learning_rate": 4.58023172954565e-06,
      "loss": 2.4237,
      "step": 52562
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.097205638885498,
      "learning_rate": 4.579885713271188e-06,
      "loss": 2.4115,
      "step": 52563
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0676952600479126,
      "learning_rate": 4.579539706185297e-06,
      "loss": 2.293,
      "step": 52564
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.9876351952552795,
      "learning_rate": 4.579193708288552e-06,
      "loss": 2.5435,
      "step": 52565
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.492385745048523,
      "learning_rate": 4.578847719581552e-06,
      "loss": 2.5028,
      "step": 52566
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.2464635372161865,
      "learning_rate": 4.578501740064872e-06,
      "loss": 2.4682,
      "step": 52567
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0472207069396973,
      "learning_rate": 4.578155769739109e-06,
      "loss": 2.495,
      "step": 52568
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.125928282737732,
      "learning_rate": 4.577809808604844e-06,
      "loss": 2.3886,
      "step": 52569
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0553672313690186,
      "learning_rate": 4.5774638566626635e-06,
      "loss": 2.4389,
      "step": 52570
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.9764336347579956,
      "learning_rate": 4.577117913913151e-06,
      "loss": 2.0596,
      "step": 52571
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0340672731399536,
      "learning_rate": 4.576771980356901e-06,
      "loss": 2.2744,
      "step": 52572
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1096363067626953,
      "learning_rate": 4.576426055994492e-06,
      "loss": 2.318,
      "step": 52573
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1159099340438843,
      "learning_rate": 4.576080140826517e-06,
      "loss": 2.3239,
      "step": 52574
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0232081413269043,
      "learning_rate": 4.575734234853555e-06,
      "loss": 2.4224,
      "step": 52575
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1421787738800049,
      "learning_rate": 4.5753883380762e-06,
      "loss": 2.3993,
      "step": 52576
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.169477105140686,
      "learning_rate": 4.575042450495033e-06,
      "loss": 2.4515,
      "step": 52577
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0789908170700073,
      "learning_rate": 4.574696572110645e-06,
      "loss": 2.4036,
      "step": 52578
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0722792148590088,
      "learning_rate": 4.57435070292362e-06,
      "loss": 2.338,
      "step": 52579
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0625016689300537,
      "learning_rate": 4.574004842934541e-06,
      "loss": 2.3024,
      "step": 52580
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0777249336242676,
      "learning_rate": 4.573658992144e-06,
      "loss": 2.2787,
      "step": 52581
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.2014358043670654,
      "learning_rate": 4.5733131505525776e-06,
      "loss": 2.2894,
      "step": 52582
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0481916666030884,
      "learning_rate": 4.572967318160868e-06,
      "loss": 2.3883,
      "step": 52583
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.2458418607711792,
      "learning_rate": 4.5726214949694515e-06,
      "loss": 2.1846,
      "step": 52584
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0969572067260742,
      "learning_rate": 4.572275680978916e-06,
      "loss": 2.4054,
      "step": 52585
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.096321940422058,
      "learning_rate": 4.5719298761898436e-06,
      "loss": 2.2876,
      "step": 52586
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.073264479637146,
      "learning_rate": 4.571584080602828e-06,
      "loss": 2.4416,
      "step": 52587
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1918548345565796,
      "learning_rate": 4.571238294218449e-06,
      "loss": 2.3931,
      "step": 52588
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0147840976715088,
      "learning_rate": 4.570892517037298e-06,
      "loss": 2.3658,
      "step": 52589
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0919733047485352,
      "learning_rate": 4.570546749059955e-06,
      "loss": 2.5101,
      "step": 52590
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0514384508132935,
      "learning_rate": 4.570200990287014e-06,
      "loss": 2.5064,
      "step": 52591
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1794005632400513,
      "learning_rate": 4.569855240719053e-06,
      "loss": 2.3633,
      "step": 52592
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0775425434112549,
      "learning_rate": 4.569509500356667e-06,
      "loss": 2.5473,
      "step": 52593
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0692933797836304,
      "learning_rate": 4.569163769200432e-06,
      "loss": 2.5341,
      "step": 52594
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0618759393692017,
      "learning_rate": 4.568818047250945e-06,
      "loss": 2.3927,
      "step": 52595
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1696851253509521,
      "learning_rate": 4.568472334508784e-06,
      "loss": 2.2735,
      "step": 52596
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0790349245071411,
      "learning_rate": 4.56812663097454e-06,
      "loss": 2.481,
      "step": 52597
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.9940716028213501,
      "learning_rate": 4.567780936648792e-06,
      "loss": 2.3748,
      "step": 52598
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1310762166976929,
      "learning_rate": 4.567435251532134e-06,
      "loss": 2.4023,
      "step": 52599
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0054779052734375,
      "learning_rate": 4.567089575625146e-06,
      "loss": 2.436,
      "step": 52600
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0192736387252808,
      "learning_rate": 4.566743908928418e-06,
      "loss": 2.2795,
      "step": 52601
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.158905029296875,
      "learning_rate": 4.566398251442533e-06,
      "loss": 2.4692,
      "step": 52602
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0832182168960571,
      "learning_rate": 4.566052603168083e-06,
      "loss": 2.5187,
      "step": 52603
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0657691955566406,
      "learning_rate": 4.565706964105645e-06,
      "loss": 2.2764,
      "step": 52604
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1481640338897705,
      "learning_rate": 4.565361334255813e-06,
      "loss": 2.3157,
      "step": 52605
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0879731178283691,
      "learning_rate": 4.565015713619167e-06,
      "loss": 2.2341,
      "step": 52606
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0990850925445557,
      "learning_rate": 4.564670102196298e-06,
      "loss": 2.4365,
      "step": 52607
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0527170896530151,
      "learning_rate": 4.56432449998779e-06,
      "loss": 2.249,
      "step": 52608
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1811217069625854,
      "learning_rate": 4.563978906994227e-06,
      "loss": 2.3859,
      "step": 52609
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1502529382705688,
      "learning_rate": 4.563633323216195e-06,
      "loss": 2.3806,
      "step": 52610
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1827080249786377,
      "learning_rate": 4.563287748654283e-06,
      "loss": 2.0324,
      "step": 52611
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0395476818084717,
      "learning_rate": 4.5629421833090715e-06,
      "loss": 2.3613,
      "step": 52612
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.2098782062530518,
      "learning_rate": 4.562596627181154e-06,
      "loss": 2.1583,
      "step": 52613
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.032379150390625,
      "learning_rate": 4.562251080271111e-06,
      "loss": 2.4005,
      "step": 52614
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0350629091262817,
      "learning_rate": 4.561905542579526e-06,
      "loss": 2.3946,
      "step": 52615
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1146856546401978,
      "learning_rate": 4.561560014106992e-06,
      "loss": 2.364,
      "step": 52616
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.098206639289856,
      "learning_rate": 4.561214494854086e-06,
      "loss": 2.3881,
      "step": 52617
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.2039203643798828,
      "learning_rate": 4.560868984821404e-06,
      "loss": 2.2491,
      "step": 52618
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.2232083082199097,
      "learning_rate": 4.560523484009521e-06,
      "loss": 2.3647,
      "step": 52619
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0264298915863037,
      "learning_rate": 4.560177992419033e-06,
      "loss": 2.2832,
      "step": 52620
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1216354370117188,
      "learning_rate": 4.5598325100505165e-06,
      "loss": 2.3487,
      "step": 52621
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.114944577217102,
      "learning_rate": 4.559487036904569e-06,
      "loss": 2.4426,
      "step": 52622
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1335035562515259,
      "learning_rate": 4.559141572981761e-06,
      "loss": 2.4473,
      "step": 52623
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0669894218444824,
      "learning_rate": 4.558796118282689e-06,
      "loss": 2.5214,
      "step": 52624
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0807095766067505,
      "learning_rate": 4.5584506728079325e-06,
      "loss": 2.4178,
      "step": 52625
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0505858659744263,
      "learning_rate": 4.558105236558083e-06,
      "loss": 2.359,
      "step": 52626
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1074730157852173,
      "learning_rate": 4.5577598095337206e-06,
      "loss": 2.3389,
      "step": 52627
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0165127515792847,
      "learning_rate": 4.557414391735436e-06,
      "loss": 2.2246,
      "step": 52628
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0778242349624634,
      "learning_rate": 4.557068983163809e-06,
      "loss": 2.2721,
      "step": 52629
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.102291464805603,
      "learning_rate": 4.556723583819432e-06,
      "loss": 2.4621,
      "step": 52630
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1993916034698486,
      "learning_rate": 4.5563781937028824e-06,
      "loss": 2.3581,
      "step": 52631
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.9902945756912231,
      "learning_rate": 4.556032812814756e-06,
      "loss": 2.3347,
      "step": 52632
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0638991594314575,
      "learning_rate": 4.555687441155627e-06,
      "loss": 2.4883,
      "step": 52633
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1262403726577759,
      "learning_rate": 4.555342078726091e-06,
      "loss": 2.4251,
      "step": 52634
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0863051414489746,
      "learning_rate": 4.554996725526728e-06,
      "loss": 2.3513,
      "step": 52635
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.112183928489685,
      "learning_rate": 4.554651381558125e-06,
      "loss": 2.2494,
      "step": 52636
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.2909830808639526,
      "learning_rate": 4.554306046820863e-06,
      "loss": 2.4473,
      "step": 52637
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.6932543516159058,
      "learning_rate": 4.553960721315536e-06,
      "loss": 2.4133,
      "step": 52638
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1287633180618286,
      "learning_rate": 4.55361540504272e-06,
      "loss": 2.248,
      "step": 52639
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1352568864822388,
      "learning_rate": 4.5532700980030085e-06,
      "loss": 2.4605,
      "step": 52640
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0916837453842163,
      "learning_rate": 4.552924800196984e-06,
      "loss": 2.3217,
      "step": 52641
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0257395505905151,
      "learning_rate": 4.5525795116252265e-06,
      "loss": 2.2903,
      "step": 52642
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0005500316619873,
      "learning_rate": 4.552234232288332e-06,
      "loss": 2.3024,
      "step": 52643
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1859369277954102,
      "learning_rate": 4.551888962186876e-06,
      "loss": 2.3608,
      "step": 52644
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0845422744750977,
      "learning_rate": 4.55154370132145e-06,
      "loss": 2.5097,
      "step": 52645
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.72849178314209,
      "learning_rate": 4.5511984496926344e-06,
      "loss": 2.3076,
      "step": 52646
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1159833669662476,
      "learning_rate": 4.550853207301021e-06,
      "loss": 2.1622,
      "step": 52647
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0648115873336792,
      "learning_rate": 4.550507974147188e-06,
      "loss": 2.28,
      "step": 52648
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1476083993911743,
      "learning_rate": 4.550162750231731e-06,
      "loss": 2.2244,
      "step": 52649
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0072922706604004,
      "learning_rate": 4.549817535555221e-06,
      "loss": 2.3758,
      "step": 52650
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0605947971343994,
      "learning_rate": 4.549472330118254e-06,
      "loss": 2.2464,
      "step": 52651
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.2006539106369019,
      "learning_rate": 4.5491271339214095e-06,
      "loss": 2.2692,
      "step": 52652
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0291107892990112,
      "learning_rate": 4.548781946965277e-06,
      "loss": 2.1987,
      "step": 52653
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1420621871948242,
      "learning_rate": 4.548436769250436e-06,
      "loss": 2.416,
      "step": 52654
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0895991325378418,
      "learning_rate": 4.54809160077748e-06,
      "loss": 2.3809,
      "step": 52655
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.072011947631836,
      "learning_rate": 4.547746441546985e-06,
      "loss": 2.3265,
      "step": 52656
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.176450252532959,
      "learning_rate": 4.5474012915595445e-06,
      "loss": 2.4069,
      "step": 52657
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.064786672592163,
      "learning_rate": 4.547056150815736e-06,
      "loss": 2.2391,
      "step": 52658
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.999600350856781,
      "learning_rate": 4.546711019316152e-06,
      "loss": 2.398,
      "step": 52659
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1132673025131226,
      "learning_rate": 4.54636589706137e-06,
      "loss": 2.3364,
      "step": 52660
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1273707151412964,
      "learning_rate": 4.5460207840519834e-06,
      "loss": 2.3345,
      "step": 52661
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0329382419586182,
      "learning_rate": 4.545675680288572e-06,
      "loss": 2.5132,
      "step": 52662
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.019253134727478,
      "learning_rate": 4.545330585771722e-06,
      "loss": 2.3335,
      "step": 52663
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0457470417022705,
      "learning_rate": 4.544985500502015e-06,
      "loss": 2.486,
      "step": 52664
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0072107315063477,
      "learning_rate": 4.544640424480042e-06,
      "loss": 2.0604,
      "step": 52665
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.971634566783905,
      "learning_rate": 4.544295357706382e-06,
      "loss": 2.4268,
      "step": 52666
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0437432527542114,
      "learning_rate": 4.5439503001816265e-06,
      "loss": 2.3284,
      "step": 52667
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1809440851211548,
      "learning_rate": 4.543605251906354e-06,
      "loss": 2.3171,
      "step": 52668
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0974723100662231,
      "learning_rate": 4.543260212881155e-06,
      "loss": 2.1403,
      "step": 52669
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0838422775268555,
      "learning_rate": 4.542915183106613e-06,
      "loss": 2.3158,
      "step": 52670
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0661392211914062,
      "learning_rate": 4.542570162583307e-06,
      "loss": 2.424,
      "step": 52671
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1314184665679932,
      "learning_rate": 4.542225151311832e-06,
      "loss": 2.5079,
      "step": 52672
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0829497575759888,
      "learning_rate": 4.541880149292763e-06,
      "loss": 2.4531,
      "step": 52673
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1940486431121826,
      "learning_rate": 4.541535156526694e-06,
      "loss": 2.4958,
      "step": 52674
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0162043571472168,
      "learning_rate": 4.541190173014205e-06,
      "loss": 2.2196,
      "step": 52675
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0028622150421143,
      "learning_rate": 4.540845198755881e-06,
      "loss": 2.4861,
      "step": 52676
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1798145771026611,
      "learning_rate": 4.5405002337523045e-06,
      "loss": 2.2985,
      "step": 52677
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0380864143371582,
      "learning_rate": 4.540155278004065e-06,
      "loss": 2.2572,
      "step": 52678
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0685226917266846,
      "learning_rate": 4.539810331511742e-06,
      "loss": 2.236,
      "step": 52679
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.9838533401489258,
      "learning_rate": 4.539465394275927e-06,
      "loss": 2.4068,
      "step": 52680
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1130934953689575,
      "learning_rate": 4.539120466297197e-06,
      "loss": 2.3357,
      "step": 52681
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1409211158752441,
      "learning_rate": 4.538775547576147e-06,
      "loss": 2.2784,
      "step": 52682
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0832360982894897,
      "learning_rate": 4.53843063811335e-06,
      "loss": 2.3887,
      "step": 52683
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.085952877998352,
      "learning_rate": 4.538085737909401e-06,
      "loss": 2.3152,
      "step": 52684
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0214003324508667,
      "learning_rate": 4.537740846964876e-06,
      "loss": 2.0331,
      "step": 52685
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.153913974761963,
      "learning_rate": 4.5373959652803675e-06,
      "loss": 2.258,
      "step": 52686
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.068880558013916,
      "learning_rate": 4.537051092856453e-06,
      "loss": 2.3802,
      "step": 52687
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0608731508255005,
      "learning_rate": 4.536706229693728e-06,
      "loss": 2.2676,
      "step": 52688
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1895465850830078,
      "learning_rate": 4.5363613757927615e-06,
      "loss": 2.3726,
      "step": 52689
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.9310213923454285,
      "learning_rate": 4.536016531154152e-06,
      "loss": 2.2896,
      "step": 52690
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1871169805526733,
      "learning_rate": 4.535671695778473e-06,
      "loss": 2.2537,
      "step": 52691
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.155524730682373,
      "learning_rate": 4.535326869666318e-06,
      "loss": 2.3694,
      "step": 52692
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0507184267044067,
      "learning_rate": 4.5349820528182655e-06,
      "loss": 2.4711,
      "step": 52693
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0832401514053345,
      "learning_rate": 4.534637245234906e-06,
      "loss": 2.4128,
      "step": 52694
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.149578332901001,
      "learning_rate": 4.534292446916817e-06,
      "loss": 2.1915,
      "step": 52695
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.148760437965393,
      "learning_rate": 4.533947657864591e-06,
      "loss": 2.219,
      "step": 52696
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.048235535621643,
      "learning_rate": 4.533602878078808e-06,
      "loss": 2.208,
      "step": 52697
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1103541851043701,
      "learning_rate": 4.533258107560049e-06,
      "loss": 2.4336,
      "step": 52698
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0853673219680786,
      "learning_rate": 4.532913346308906e-06,
      "loss": 2.5851,
      "step": 52699
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.108810544013977,
      "learning_rate": 4.532568594325956e-06,
      "loss": 2.3632,
      "step": 52700
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1025457382202148,
      "learning_rate": 4.53222385161179e-06,
      "loss": 2.3948,
      "step": 52701
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0882668495178223,
      "learning_rate": 4.5318791181669905e-06,
      "loss": 2.4382,
      "step": 52702
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1534353494644165,
      "learning_rate": 4.531534393992142e-06,
      "loss": 2.2673,
      "step": 52703
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0698013305664062,
      "learning_rate": 4.531189679087823e-06,
      "loss": 2.2279,
      "step": 52704
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.039725422859192,
      "learning_rate": 4.530844973454627e-06,
      "loss": 2.2237,
      "step": 52705
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0592041015625,
      "learning_rate": 4.53050027709313e-06,
      "loss": 2.2168,
      "step": 52706
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.3162044286727905,
      "learning_rate": 4.530155590003924e-06,
      "loss": 2.4726,
      "step": 52707
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.11067795753479,
      "learning_rate": 4.5298109121875865e-06,
      "loss": 2.1178,
      "step": 52708
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.2697937488555908,
      "learning_rate": 4.529466243644709e-06,
      "loss": 2.3784,
      "step": 52709
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.155250906944275,
      "learning_rate": 4.529121584375867e-06,
      "loss": 2.3044,
      "step": 52710
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0720449686050415,
      "learning_rate": 4.528776934381655e-06,
      "loss": 2.4043,
      "step": 52711
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.168943166732788,
      "learning_rate": 4.528432293662649e-06,
      "loss": 2.2888,
      "step": 52712
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1340382099151611,
      "learning_rate": 4.528087662219438e-06,
      "loss": 2.4585,
      "step": 52713
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0157920122146606,
      "learning_rate": 4.527743040052606e-06,
      "loss": 2.2028,
      "step": 52714
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0267528295516968,
      "learning_rate": 4.527398427162736e-06,
      "loss": 2.0283,
      "step": 52715
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0020660161972046,
      "learning_rate": 4.527053823550407e-06,
      "loss": 2.2549,
      "step": 52716
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0552104711532593,
      "learning_rate": 4.526709229216213e-06,
      "loss": 2.2839,
      "step": 52717
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1577688455581665,
      "learning_rate": 4.526364644160729e-06,
      "loss": 2.3571,
      "step": 52718
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0266419649124146,
      "learning_rate": 4.526020068384548e-06,
      "loss": 2.3621,
      "step": 52719
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.060551404953003,
      "learning_rate": 4.525675501888246e-06,
      "loss": 2.3615,
      "step": 52720
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0854560136795044,
      "learning_rate": 4.525330944672414e-06,
      "loss": 2.4126,
      "step": 52721
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0384249687194824,
      "learning_rate": 4.5249863967376286e-06,
      "loss": 2.2604,
      "step": 52722
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1982920169830322,
      "learning_rate": 4.524641858084483e-06,
      "loss": 2.2837,
      "step": 52723
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.074979305267334,
      "learning_rate": 4.524297328713553e-06,
      "loss": 2.4011,
      "step": 52724
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0969758033752441,
      "learning_rate": 4.523952808625429e-06,
      "loss": 2.1597,
      "step": 52725
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0616176128387451,
      "learning_rate": 4.523608297820693e-06,
      "loss": 2.311,
      "step": 52726
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.9557175040245056,
      "learning_rate": 4.523263796299924e-06,
      "loss": 2.2657,
      "step": 52727
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.9893430471420288,
      "learning_rate": 4.522919304063714e-06,
      "loss": 2.1837,
      "step": 52728
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0826506614685059,
      "learning_rate": 4.522574821112644e-06,
      "loss": 2.2407,
      "step": 52729
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1820685863494873,
      "learning_rate": 4.5222303474472924e-06,
      "loss": 2.0844,
      "step": 52730
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0683168172836304,
      "learning_rate": 4.5218858830682535e-06,
      "loss": 2.2192,
      "step": 52731
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.147638201713562,
      "learning_rate": 4.521541427976105e-06,
      "loss": 2.2853,
      "step": 52732
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0542774200439453,
      "learning_rate": 4.521196982171429e-06,
      "loss": 2.3843,
      "step": 52733
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1217248439788818,
      "learning_rate": 4.5208525456548135e-06,
      "loss": 2.3168,
      "step": 52734
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.4324527978897095,
      "learning_rate": 4.5205081184268396e-06,
      "loss": 2.292,
      "step": 52735
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.076126217842102,
      "learning_rate": 4.520163700488095e-06,
      "loss": 2.2788,
      "step": 52736
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.2013287544250488,
      "learning_rate": 4.519819291839158e-06,
      "loss": 2.2843,
      "step": 52737
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1030579805374146,
      "learning_rate": 4.51947489248062e-06,
      "loss": 2.4496,
      "step": 52738
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.077045202255249,
      "learning_rate": 4.519130502413056e-06,
      "loss": 2.4299,
      "step": 52739
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0321924686431885,
      "learning_rate": 4.5187861216370575e-06,
      "loss": 2.2505,
      "step": 52740
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.08979332447052,
      "learning_rate": 4.518441750153206e-06,
      "loss": 2.2627,
      "step": 52741
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.603487253189087,
      "learning_rate": 4.5180973879620845e-06,
      "loss": 2.3427,
      "step": 52742
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1030422449111938,
      "learning_rate": 4.517753035064274e-06,
      "loss": 2.1506,
      "step": 52743
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.019219994544983,
      "learning_rate": 4.5174086914603645e-06,
      "loss": 2.1357,
      "step": 52744
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0687263011932373,
      "learning_rate": 4.517064357150931e-06,
      "loss": 2.1387,
      "step": 52745
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0242350101470947,
      "learning_rate": 4.516720032136568e-06,
      "loss": 2.3621,
      "step": 52746
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.083514928817749,
      "learning_rate": 4.516375716417849e-06,
      "loss": 2.139,
      "step": 52747
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0871806144714355,
      "learning_rate": 4.516031409995367e-06,
      "loss": 2.4825,
      "step": 52748
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1564143896102905,
      "learning_rate": 4.515687112869697e-06,
      "loss": 2.3961,
      "step": 52749
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.9913436770439148,
      "learning_rate": 4.51534282504143e-06,
      "loss": 2.275,
      "step": 52750
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0707390308380127,
      "learning_rate": 4.514998546511143e-06,
      "loss": 2.2789,
      "step": 52751
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.2087953090667725,
      "learning_rate": 4.514654277279428e-06,
      "loss": 2.2909,
      "step": 52752
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0493861436843872,
      "learning_rate": 4.514310017346864e-06,
      "loss": 2.3775,
      "step": 52753
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.9923823475837708,
      "learning_rate": 4.513965766714033e-06,
      "loss": 2.5699,
      "step": 52754
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0469510555267334,
      "learning_rate": 4.513621525381517e-06,
      "loss": 2.17,
      "step": 52755
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1234540939331055,
      "learning_rate": 4.513277293349906e-06,
      "loss": 2.498,
      "step": 52756
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0792492628097534,
      "learning_rate": 4.512933070619776e-06,
      "loss": 2.5876,
      "step": 52757
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.066348910331726,
      "learning_rate": 4.5125888571917195e-06,
      "loss": 2.4151,
      "step": 52758
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1681097745895386,
      "learning_rate": 4.512244653066314e-06,
      "loss": 2.2766,
      "step": 52759
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0960241556167603,
      "learning_rate": 4.511900458244142e-06,
      "loss": 2.3037,
      "step": 52760
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1558855772018433,
      "learning_rate": 4.511556272725792e-06,
      "loss": 2.5822,
      "step": 52761
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1257894039154053,
      "learning_rate": 4.511212096511841e-06,
      "loss": 2.5307,
      "step": 52762
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0227309465408325,
      "learning_rate": 4.510867929602881e-06,
      "loss": 2.2687,
      "step": 52763
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.2086248397827148,
      "learning_rate": 4.510523771999486e-06,
      "loss": 2.4446,
      "step": 52764
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0606633424758911,
      "learning_rate": 4.510179623702248e-06,
      "loss": 2.1453,
      "step": 52765
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0208245515823364,
      "learning_rate": 4.509835484711743e-06,
      "loss": 2.3812,
      "step": 52766
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0027060508728027,
      "learning_rate": 4.509491355028566e-06,
      "loss": 2.212,
      "step": 52767
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.3341435194015503,
      "learning_rate": 4.509147234653284e-06,
      "loss": 2.3508,
      "step": 52768
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.045554280281067,
      "learning_rate": 4.508803123586493e-06,
      "loss": 2.2166,
      "step": 52769
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.2420446872711182,
      "learning_rate": 4.508459021828767e-06,
      "loss": 2.3175,
      "step": 52770
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.2058755159378052,
      "learning_rate": 4.5081149293807e-06,
      "loss": 2.2988,
      "step": 52771
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.045559048652649,
      "learning_rate": 4.507770846242865e-06,
      "loss": 2.2586,
      "step": 52772
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0928740501403809,
      "learning_rate": 4.507426772415854e-06,
      "loss": 2.5512,
      "step": 52773
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.131675124168396,
      "learning_rate": 4.507082707900243e-06,
      "loss": 2.3895,
      "step": 52774
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1448649168014526,
      "learning_rate": 4.5067386526966215e-06,
      "loss": 2.2695,
      "step": 52775
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.4768728017807007,
      "learning_rate": 4.506394606805567e-06,
      "loss": 2.0793,
      "step": 52776
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1359585523605347,
      "learning_rate": 4.506050570227671e-06,
      "loss": 2.4137,
      "step": 52777
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.08119797706604,
      "learning_rate": 4.505706542963505e-06,
      "loss": 2.3373,
      "step": 52778
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.098403811454773,
      "learning_rate": 4.505362525013663e-06,
      "loss": 2.4356,
      "step": 52779
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0041460990905762,
      "learning_rate": 4.505018516378724e-06,
      "loss": 2.292,
      "step": 52780
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.2155170440673828,
      "learning_rate": 4.504674517059271e-06,
      "loss": 2.1009,
      "step": 52781
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.065136432647705,
      "learning_rate": 4.504330527055884e-06,
      "loss": 2.3118,
      "step": 52782
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0366530418395996,
      "learning_rate": 4.503986546369154e-06,
      "loss": 2.4781,
      "step": 52783
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.9409377574920654,
      "learning_rate": 4.503642574999654e-06,
      "loss": 2.2808,
      "step": 52784
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1177390813827515,
      "learning_rate": 4.503298612947976e-06,
      "loss": 2.2737,
      "step": 52785
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1143379211425781,
      "learning_rate": 4.502954660214697e-06,
      "loss": 2.2877,
      "step": 52786
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.281754970550537,
      "learning_rate": 4.5026107168004065e-06,
      "loss": 2.3802,
      "step": 52787
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0906500816345215,
      "learning_rate": 4.502266782705684e-06,
      "loss": 2.4697,
      "step": 52788
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.008423924446106,
      "learning_rate": 4.501922857931109e-06,
      "loss": 2.3218,
      "step": 52789
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1584845781326294,
      "learning_rate": 4.501578942477271e-06,
      "loss": 2.4134,
      "step": 52790
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0692793130874634,
      "learning_rate": 4.5012350363447465e-06,
      "loss": 2.7031,
      "step": 52791
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.4570988416671753,
      "learning_rate": 4.500891139534126e-06,
      "loss": 2.4,
      "step": 52792
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1007215976715088,
      "learning_rate": 4.500547252045985e-06,
      "loss": 2.1807,
      "step": 52793
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1409672498703003,
      "learning_rate": 4.500203373880917e-06,
      "loss": 2.2218,
      "step": 52794
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1139191389083862,
      "learning_rate": 4.499859505039491e-06,
      "loss": 2.6693,
      "step": 52795
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0012377500534058,
      "learning_rate": 4.499515645522301e-06,
      "loss": 2.4558,
      "step": 52796
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1005096435546875,
      "learning_rate": 4.4991717953299205e-06,
      "loss": 2.2466,
      "step": 52797
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0792427062988281,
      "learning_rate": 4.498827954462943e-06,
      "loss": 2.4008,
      "step": 52798
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.9976975321769714,
      "learning_rate": 4.498484122921942e-06,
      "loss": 2.3867,
      "step": 52799
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0900176763534546,
      "learning_rate": 4.498140300707508e-06,
      "loss": 2.2398,
      "step": 52800
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.061842441558838,
      "learning_rate": 4.4977964878202184e-06,
      "loss": 2.3987,
      "step": 52801
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0728625059127808,
      "learning_rate": 4.4974526842606604e-06,
      "loss": 2.6024,
      "step": 52802
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0690691471099854,
      "learning_rate": 4.497108890029411e-06,
      "loss": 2.1403,
      "step": 52803
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.2683721780776978,
      "learning_rate": 4.496765105127061e-06,
      "loss": 2.5936,
      "step": 52804
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0824180841445923,
      "learning_rate": 4.4964213295541845e-06,
      "loss": 2.2656,
      "step": 52805
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0713608264923096,
      "learning_rate": 4.496077563311373e-06,
      "loss": 2.0042,
      "step": 52806
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1597926616668701,
      "learning_rate": 4.495733806399204e-06,
      "loss": 2.3927,
      "step": 52807
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1105166673660278,
      "learning_rate": 4.495390058818261e-06,
      "loss": 2.4219,
      "step": 52808
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1207478046417236,
      "learning_rate": 4.4950463205691245e-06,
      "loss": 2.2427,
      "step": 52809
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.2422770261764526,
      "learning_rate": 4.4947025916523815e-06,
      "loss": 2.2804,
      "step": 52810
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0399004220962524,
      "learning_rate": 4.49435887206861e-06,
      "loss": 2.5336,
      "step": 52811
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1850216388702393,
      "learning_rate": 4.494015161818399e-06,
      "loss": 2.3817,
      "step": 52812
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1870484352111816,
      "learning_rate": 4.493671460902325e-06,
      "loss": 2.3101,
      "step": 52813
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1190595626831055,
      "learning_rate": 4.4933277693209764e-06,
      "loss": 2.4161,
      "step": 52814
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.3117483854293823,
      "learning_rate": 4.492984087074932e-06,
      "loss": 2.1887,
      "step": 52815
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0775741338729858,
      "learning_rate": 4.492640414164773e-06,
      "loss": 2.4931,
      "step": 52816
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0167360305786133,
      "learning_rate": 4.492296750591087e-06,
      "loss": 2.4197,
      "step": 52817
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.195253610610962,
      "learning_rate": 4.49195309635445e-06,
      "loss": 2.2757,
      "step": 52818
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.148762822151184,
      "learning_rate": 4.491609451455453e-06,
      "loss": 2.4275,
      "step": 52819
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1457817554473877,
      "learning_rate": 4.491265815894674e-06,
      "loss": 2.2508,
      "step": 52820
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0641324520111084,
      "learning_rate": 4.490922189672695e-06,
      "loss": 2.22,
      "step": 52821
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.2031153440475464,
      "learning_rate": 4.490578572790095e-06,
      "loss": 2.3102,
      "step": 52822
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0397123098373413,
      "learning_rate": 4.490234965247465e-06,
      "loss": 2.4776,
      "step": 52823
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.099098563194275,
      "learning_rate": 4.48989136704538e-06,
      "loss": 2.3327,
      "step": 52824
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.9958680272102356,
      "learning_rate": 4.489547778184429e-06,
      "loss": 2.62,
      "step": 52825
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1297883987426758,
      "learning_rate": 4.489204198665188e-06,
      "loss": 2.4241,
      "step": 52826
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1130210161209106,
      "learning_rate": 4.488860628488245e-06,
      "loss": 2.2572,
      "step": 52827
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0612565279006958,
      "learning_rate": 4.488517067654177e-06,
      "loss": 2.3504,
      "step": 52828
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.2277194261550903,
      "learning_rate": 4.488173516163573e-06,
      "loss": 2.374,
      "step": 52829
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.018091082572937,
      "learning_rate": 4.4878299740170086e-06,
      "loss": 2.3572,
      "step": 52830
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.9992272853851318,
      "learning_rate": 4.487486441215073e-06,
      "loss": 2.3409,
      "step": 52831
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1620042324066162,
      "learning_rate": 4.487142917758341e-06,
      "loss": 2.3299,
      "step": 52832
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1354044675827026,
      "learning_rate": 4.486799403647407e-06,
      "loss": 2.2599,
      "step": 52833
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.2924238443374634,
      "learning_rate": 4.486455898882838e-06,
      "loss": 2.314,
      "step": 52834
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1612251996994019,
      "learning_rate": 4.4861124034652256e-06,
      "loss": 2.3541,
      "step": 52835
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.151806354522705,
      "learning_rate": 4.4857689173951495e-06,
      "loss": 2.3549,
      "step": 52836
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0379269123077393,
      "learning_rate": 4.485425440673195e-06,
      "loss": 2.0741,
      "step": 52837
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0367546081542969,
      "learning_rate": 4.485081973299937e-06,
      "loss": 2.2468,
      "step": 52838
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.3033466339111328,
      "learning_rate": 4.484738515275968e-06,
      "loss": 2.2399,
      "step": 52839
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.125500202178955,
      "learning_rate": 4.484395066601862e-06,
      "loss": 2.4091,
      "step": 52840
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0848448276519775,
      "learning_rate": 4.484051627278208e-06,
      "loss": 2.2234,
      "step": 52841
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.16892671585083,
      "learning_rate": 4.48370819730558e-06,
      "loss": 2.4848,
      "step": 52842
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.9936115145683289,
      "learning_rate": 4.483364776684569e-06,
      "loss": 2.1732,
      "step": 52843
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0217838287353516,
      "learning_rate": 4.483021365415752e-06,
      "loss": 2.4647,
      "step": 52844
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0426641702651978,
      "learning_rate": 4.48267796349971e-06,
      "loss": 2.5081,
      "step": 52845
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1879204511642456,
      "learning_rate": 4.48233457093703e-06,
      "loss": 2.5272,
      "step": 52846
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.082024335861206,
      "learning_rate": 4.4819911877282916e-06,
      "loss": 2.2189,
      "step": 52847
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0099380016326904,
      "learning_rate": 4.481647813874074e-06,
      "loss": 2.2772,
      "step": 52848
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.9678599834442139,
      "learning_rate": 4.481304449374965e-06,
      "loss": 2.394,
      "step": 52849
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0903650522232056,
      "learning_rate": 4.480961094231544e-06,
      "loss": 2.2125,
      "step": 52850
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0273205041885376,
      "learning_rate": 4.480617748444389e-06,
      "loss": 2.4058,
      "step": 52851
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0543930530548096,
      "learning_rate": 4.48027441201409e-06,
      "loss": 2.2283,
      "step": 52852
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0518332719802856,
      "learning_rate": 4.47993108494122e-06,
      "loss": 2.3583,
      "step": 52853
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1499416828155518,
      "learning_rate": 4.479587767226372e-06,
      "loss": 2.1972,
      "step": 52854
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.7592761516571045,
      "learning_rate": 4.479244458870117e-06,
      "loss": 2.4886,
      "step": 52855
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.088548183441162,
      "learning_rate": 4.478901159873046e-06,
      "loss": 2.4461,
      "step": 52856
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0852833986282349,
      "learning_rate": 4.478557870235733e-06,
      "loss": 2.1937,
      "step": 52857
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1243739128112793,
      "learning_rate": 4.4782145899587685e-06,
      "loss": 2.1929,
      "step": 52858
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0956493616104126,
      "learning_rate": 4.477871319042729e-06,
      "loss": 2.3087,
      "step": 52859
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0729258060455322,
      "learning_rate": 4.477528057488199e-06,
      "loss": 2.58,
      "step": 52860
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0923069715499878,
      "learning_rate": 4.477184805295753e-06,
      "loss": 2.4153,
      "step": 52861
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0578792095184326,
      "learning_rate": 4.476841562465985e-06,
      "loss": 2.3636,
      "step": 52862
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1029237508773804,
      "learning_rate": 4.476498328999464e-06,
      "loss": 2.3962,
      "step": 52863
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0450761318206787,
      "learning_rate": 4.4761551048967845e-06,
      "loss": 2.2822,
      "step": 52864
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0948331356048584,
      "learning_rate": 4.475811890158516e-06,
      "loss": 2.2792,
      "step": 52865
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.2274965047836304,
      "learning_rate": 4.475468684785254e-06,
      "loss": 2.5499,
      "step": 52866
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0899635553359985,
      "learning_rate": 4.475125488777567e-06,
      "loss": 2.3243,
      "step": 52867
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.177065372467041,
      "learning_rate": 4.474782302136047e-06,
      "loss": 2.4444,
      "step": 52868
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.2226516008377075,
      "learning_rate": 4.474439124861267e-06,
      "loss": 2.4501,
      "step": 52869
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0831173658370972,
      "learning_rate": 4.474095956953819e-06,
      "loss": 2.3459,
      "step": 52870
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.045036792755127,
      "learning_rate": 4.473752798414278e-06,
      "loss": 2.4466,
      "step": 52871
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0612094402313232,
      "learning_rate": 4.473409649243223e-06,
      "loss": 2.6007,
      "step": 52872
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1769589185714722,
      "learning_rate": 4.473066509441244e-06,
      "loss": 2.1967,
      "step": 52873
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.2679173946380615,
      "learning_rate": 4.472723379008919e-06,
      "loss": 2.3134,
      "step": 52874
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1514779329299927,
      "learning_rate": 4.472380257946823e-06,
      "loss": 2.317,
      "step": 52875
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1211525201797485,
      "learning_rate": 4.47203714625555e-06,
      "loss": 2.381,
      "step": 52876
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0356539487838745,
      "learning_rate": 4.471694043935674e-06,
      "loss": 2.292,
      "step": 52877
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.01334810256958,
      "learning_rate": 4.471350950987775e-06,
      "loss": 2.2997,
      "step": 52878
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.200901746749878,
      "learning_rate": 4.471007867412441e-06,
      "loss": 2.4265,
      "step": 52879
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.171854019165039,
      "learning_rate": 4.470664793210246e-06,
      "loss": 2.4425,
      "step": 52880
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0926755666732788,
      "learning_rate": 4.470321728381781e-06,
      "loss": 2.3467,
      "step": 52881
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.085994005203247,
      "learning_rate": 4.469978672927618e-06,
      "loss": 2.326,
      "step": 52882
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.2142959833145142,
      "learning_rate": 4.469635626848347e-06,
      "loss": 2.2457,
      "step": 52883
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.133284568786621,
      "learning_rate": 4.469292590144541e-06,
      "loss": 2.1787,
      "step": 52884
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0997682809829712,
      "learning_rate": 4.468949562816792e-06,
      "loss": 2.2859,
      "step": 52885
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0230244398117065,
      "learning_rate": 4.468606544865674e-06,
      "loss": 2.5871,
      "step": 52886
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.075867772102356,
      "learning_rate": 4.4682635362917714e-06,
      "loss": 2.4017,
      "step": 52887
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1010197401046753,
      "learning_rate": 4.467920537095659e-06,
      "loss": 2.481,
      "step": 52888
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.166886568069458,
      "learning_rate": 4.4675775472779295e-06,
      "loss": 2.4261,
      "step": 52889
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0094326734542847,
      "learning_rate": 4.4672345668391534e-06,
      "loss": 2.0626,
      "step": 52890
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1912187337875366,
      "learning_rate": 4.466891595779922e-06,
      "loss": 2.477,
      "step": 52891
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.9653318524360657,
      "learning_rate": 4.466548634100808e-06,
      "loss": 2.4642,
      "step": 52892
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1717419624328613,
      "learning_rate": 4.466205681802401e-06,
      "loss": 2.7332,
      "step": 52893
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.9205643534660339,
      "learning_rate": 4.465862738885274e-06,
      "loss": 2.3913,
      "step": 52894
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.187259554862976,
      "learning_rate": 4.465519805350017e-06,
      "loss": 2.2741,
      "step": 52895
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.09665846824646,
      "learning_rate": 4.465176881197203e-06,
      "loss": 2.3133,
      "step": 52896
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0229358673095703,
      "learning_rate": 4.464833966427421e-06,
      "loss": 2.1926,
      "step": 52897
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0821833610534668,
      "learning_rate": 4.4644910610412495e-06,
      "loss": 2.2183,
      "step": 52898
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0616097450256348,
      "learning_rate": 4.464148165039268e-06,
      "loss": 2.3827,
      "step": 52899
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.061036229133606,
      "learning_rate": 4.4638052784220564e-06,
      "loss": 2.1828,
      "step": 52900
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0676823854446411,
      "learning_rate": 4.463462401190202e-06,
      "loss": 2.5725,
      "step": 52901
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.997291624546051,
      "learning_rate": 4.4631195333442775e-06,
      "loss": 2.2776,
      "step": 52902
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0593498945236206,
      "learning_rate": 4.462776674884875e-06,
      "loss": 2.2629,
      "step": 52903
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.2246400117874146,
      "learning_rate": 4.462433825812565e-06,
      "loss": 2.0773,
      "step": 52904
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0132496356964111,
      "learning_rate": 4.462090986127936e-06,
      "loss": 2.2349,
      "step": 52905
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0699490308761597,
      "learning_rate": 4.461748155831569e-06,
      "loss": 2.5307,
      "step": 52906
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.115317940711975,
      "learning_rate": 4.461405334924039e-06,
      "loss": 2.3851,
      "step": 52907
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0566860437393188,
      "learning_rate": 4.461062523405934e-06,
      "loss": 2.4463,
      "step": 52908
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.9378740787506104,
      "learning_rate": 4.460719721277829e-06,
      "loss": 2.1654,
      "step": 52909
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0720534324645996,
      "learning_rate": 4.460376928540312e-06,
      "loss": 2.2404,
      "step": 52910
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0053753852844238,
      "learning_rate": 4.460034145193958e-06,
      "loss": 2.3736,
      "step": 52911
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.115464448928833,
      "learning_rate": 4.459691371239357e-06,
      "loss": 2.4187,
      "step": 52912
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.2568204402923584,
      "learning_rate": 4.459348606677078e-06,
      "loss": 2.2069,
      "step": 52913
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.028177261352539,
      "learning_rate": 4.459005851507709e-06,
      "loss": 2.3127,
      "step": 52914
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.6729130744934082,
      "learning_rate": 4.4586631057318285e-06,
      "loss": 2.3787,
      "step": 52915
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1236158609390259,
      "learning_rate": 4.458320369350022e-06,
      "loss": 2.399,
      "step": 52916
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1238985061645508,
      "learning_rate": 4.457977642362864e-06,
      "loss": 2.3706,
      "step": 52917
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0803219079971313,
      "learning_rate": 4.457634924770943e-06,
      "loss": 2.1704,
      "step": 52918
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0041536092758179,
      "learning_rate": 4.457292216574832e-06,
      "loss": 2.439,
      "step": 52919
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.9895153045654297,
      "learning_rate": 4.45694951777512e-06,
      "loss": 1.9063,
      "step": 52920
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1369976997375488,
      "learning_rate": 4.45660682837238e-06,
      "loss": 2.307,
      "step": 52921
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.058817982673645,
      "learning_rate": 4.456264148367201e-06,
      "loss": 2.1893,
      "step": 52922
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0162389278411865,
      "learning_rate": 4.455921477760156e-06,
      "loss": 2.1842,
      "step": 52923
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1707675457000732,
      "learning_rate": 4.455578816551835e-06,
      "loss": 2.3278,
      "step": 52924
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1194534301757812,
      "learning_rate": 4.455236164742811e-06,
      "loss": 2.4133,
      "step": 52925
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.173577070236206,
      "learning_rate": 4.4548935223336695e-06,
      "loss": 2.3928,
      "step": 52926
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0305581092834473,
      "learning_rate": 4.454550889324986e-06,
      "loss": 2.5731,
      "step": 52927
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0915344953536987,
      "learning_rate": 4.4542082657173466e-06,
      "loss": 2.1479,
      "step": 52928
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0317909717559814,
      "learning_rate": 4.453865651511329e-06,
      "loss": 2.4803,
      "step": 52929
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.2787832021713257,
      "learning_rate": 4.453523046707519e-06,
      "loss": 2.2905,
      "step": 52930
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.090919017791748,
      "learning_rate": 4.453180451306488e-06,
      "loss": 2.2989,
      "step": 52931
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1581348180770874,
      "learning_rate": 4.452837865308828e-06,
      "loss": 2.6346,
      "step": 52932
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1814618110656738,
      "learning_rate": 4.452495288715114e-06,
      "loss": 2.4224,
      "step": 52933
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0660696029663086,
      "learning_rate": 4.452152721525924e-06,
      "loss": 2.4928,
      "step": 52934
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1989235877990723,
      "learning_rate": 4.451810163741845e-06,
      "loss": 2.4128,
      "step": 52935
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1707466840744019,
      "learning_rate": 4.45146761536345e-06,
      "loss": 2.2899,
      "step": 52936
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0675427913665771,
      "learning_rate": 4.4511250763913304e-06,
      "loss": 2.2698,
      "step": 52937
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.10154128074646,
      "learning_rate": 4.450782546826059e-06,
      "loss": 2.3408,
      "step": 52938
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.053837776184082,
      "learning_rate": 4.450440026668219e-06,
      "loss": 2.1447,
      "step": 52939
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.9863823056221008,
      "learning_rate": 4.4500975159183865e-06,
      "loss": 2.2141,
      "step": 52940
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0073692798614502,
      "learning_rate": 4.44975501457715e-06,
      "loss": 2.2225,
      "step": 52941
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.2292317152023315,
      "learning_rate": 4.449412522645082e-06,
      "loss": 2.552,
      "step": 52942
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.066625714302063,
      "learning_rate": 4.449070040122772e-06,
      "loss": 2.3906,
      "step": 52943
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1295497417449951,
      "learning_rate": 4.448727567010792e-06,
      "loss": 2.337,
      "step": 52944
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0385730266571045,
      "learning_rate": 4.448385103309729e-06,
      "loss": 2.1946,
      "step": 52945
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.147937297821045,
      "learning_rate": 4.448042649020159e-06,
      "loss": 2.2945,
      "step": 52946
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0774319171905518,
      "learning_rate": 4.447700204142667e-06,
      "loss": 2.1926,
      "step": 52947
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.983534574508667,
      "learning_rate": 4.447357768677828e-06,
      "loss": 1.9892,
      "step": 52948
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.122193455696106,
      "learning_rate": 4.447015342626228e-06,
      "loss": 2.5766,
      "step": 52949
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0564097166061401,
      "learning_rate": 4.4466729259884435e-06,
      "loss": 2.3801,
      "step": 52950
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.9995622038841248,
      "learning_rate": 4.44633051876506e-06,
      "loss": 2.1491,
      "step": 52951
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0656111240386963,
      "learning_rate": 4.445988120956654e-06,
      "loss": 2.2038,
      "step": 52952
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0376245975494385,
      "learning_rate": 4.445645732563806e-06,
      "loss": 2.3189,
      "step": 52953
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0735303163528442,
      "learning_rate": 4.445303353587095e-06,
      "loss": 2.3308,
      "step": 52954
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.173356533050537,
      "learning_rate": 4.444960984027106e-06,
      "loss": 2.3717,
      "step": 52955
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.3199838399887085,
      "learning_rate": 4.444618623884413e-06,
      "loss": 2.1625,
      "step": 52956
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1034492254257202,
      "learning_rate": 4.4442762731596045e-06,
      "loss": 2.1722,
      "step": 52957
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0593862533569336,
      "learning_rate": 4.443933931853253e-06,
      "loss": 2.414,
      "step": 52958
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.2023046016693115,
      "learning_rate": 4.443591599965945e-06,
      "loss": 2.2243,
      "step": 52959
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0724471807479858,
      "learning_rate": 4.443249277498256e-06,
      "loss": 2.3649,
      "step": 52960
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1608937978744507,
      "learning_rate": 4.442906964450772e-06,
      "loss": 2.4505,
      "step": 52961
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0796034336090088,
      "learning_rate": 4.4425646608240705e-06,
      "loss": 2.23,
      "step": 52962
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1173185110092163,
      "learning_rate": 4.442222366618726e-06,
      "loss": 2.5144,
      "step": 52963
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.3094134330749512,
      "learning_rate": 4.441880081835329e-06,
      "loss": 2.3618,
      "step": 52964
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1733993291854858,
      "learning_rate": 4.441537806474454e-06,
      "loss": 2.346,
      "step": 52965
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0890454053878784,
      "learning_rate": 4.4411955405366795e-06,
      "loss": 2.4254,
      "step": 52966
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.2106693983078003,
      "learning_rate": 4.440853284022592e-06,
      "loss": 2.3698,
      "step": 52967
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.225448727607727,
      "learning_rate": 4.440511036932766e-06,
      "loss": 2.3447,
      "step": 52968
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1495417356491089,
      "learning_rate": 4.440168799267782e-06,
      "loss": 2.4477,
      "step": 52969
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.751959800720215,
      "learning_rate": 4.439826571028225e-06,
      "loss": 2.2534,
      "step": 52970
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0561078786849976,
      "learning_rate": 4.439484352214669e-06,
      "loss": 2.5469,
      "step": 52971
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.9843209385871887,
      "learning_rate": 4.4391421428277004e-06,
      "loss": 2.4348,
      "step": 52972
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.9963007569313049,
      "learning_rate": 4.438799942867893e-06,
      "loss": 2.1454,
      "step": 52973
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.2369452714920044,
      "learning_rate": 4.438457752335833e-06,
      "loss": 2.4043,
      "step": 52974
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.2051494121551514,
      "learning_rate": 4.438115571232094e-06,
      "loss": 2.1487,
      "step": 52975
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1607446670532227,
      "learning_rate": 4.437773399557264e-06,
      "loss": 2.0613,
      "step": 52976
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1121352910995483,
      "learning_rate": 4.4374312373119146e-06,
      "loss": 2.1525,
      "step": 52977
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1034809350967407,
      "learning_rate": 4.4370890844966375e-06,
      "loss": 2.1455,
      "step": 52978
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0666284561157227,
      "learning_rate": 4.436746941111998e-06,
      "loss": 2.1816,
      "step": 52979
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0806527137756348,
      "learning_rate": 4.436404807158586e-06,
      "loss": 2.2808,
      "step": 52980
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0816056728363037,
      "learning_rate": 4.436062682636977e-06,
      "loss": 2.2732,
      "step": 52981
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.042871356010437,
      "learning_rate": 4.435720567547755e-06,
      "loss": 2.1888,
      "step": 52982
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0889911651611328,
      "learning_rate": 4.435378461891495e-06,
      "loss": 2.3324,
      "step": 52983
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.117903709411621,
      "learning_rate": 4.435036365668785e-06,
      "loss": 2.1801,
      "step": 52984
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0397592782974243,
      "learning_rate": 4.434694278880194e-06,
      "loss": 2.1797,
      "step": 52985
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.2189357280731201,
      "learning_rate": 4.4343522015263115e-06,
      "loss": 2.2561,
      "step": 52986
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1206434965133667,
      "learning_rate": 4.434010133607711e-06,
      "loss": 2.4891,
      "step": 52987
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.243430733680725,
      "learning_rate": 4.433668075124978e-06,
      "loss": 2.0807,
      "step": 52988
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.9435131549835205,
      "learning_rate": 4.433326026078689e-06,
      "loss": 2.3051,
      "step": 52989
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1097469329833984,
      "learning_rate": 4.432983986469421e-06,
      "loss": 2.1133,
      "step": 52990
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1241744756698608,
      "learning_rate": 4.432641956297762e-06,
      "loss": 2.4481,
      "step": 52991
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.138157844543457,
      "learning_rate": 4.432299935564286e-06,
      "loss": 2.3426,
      "step": 52992
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.6682639122009277,
      "learning_rate": 4.4319579242695694e-06,
      "loss": 2.4502,
      "step": 52993
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.9918091297149658,
      "learning_rate": 4.431615922414201e-06,
      "loss": 2.1712,
      "step": 52994
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1740061044692993,
      "learning_rate": 4.431273929998755e-06,
      "loss": 2.3738,
      "step": 52995
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.2351765632629395,
      "learning_rate": 4.43093194702381e-06,
      "loss": 2.562,
      "step": 52996
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.093969464302063,
      "learning_rate": 4.4305899734899505e-06,
      "loss": 2.3315,
      "step": 52997
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1111574172973633,
      "learning_rate": 4.430248009397749e-06,
      "loss": 2.0737,
      "step": 52998
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1678515672683716,
      "learning_rate": 4.429906054747795e-06,
      "loss": 2.3483,
      "step": 52999
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.22777259349823,
      "learning_rate": 4.4295641095406595e-06,
      "loss": 2.1657,
      "step": 53000
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.206413984298706,
      "learning_rate": 4.429222173776928e-06,
      "loss": 2.1776,
      "step": 53001
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0682522058486938,
      "learning_rate": 4.428880247457176e-06,
      "loss": 2.3395,
      "step": 53002
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1296240091323853,
      "learning_rate": 4.428538330581987e-06,
      "loss": 2.2366,
      "step": 53003
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0907690525054932,
      "learning_rate": 4.42819642315194e-06,
      "loss": 2.3089,
      "step": 53004
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0548689365386963,
      "learning_rate": 4.427854525167613e-06,
      "loss": 2.4941,
      "step": 53005
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.2174930572509766,
      "learning_rate": 4.427512636629583e-06,
      "loss": 2.3332,
      "step": 53006
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1090624332427979,
      "learning_rate": 4.4271707575384345e-06,
      "loss": 2.521,
      "step": 53007
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.061837077140808,
      "learning_rate": 4.426828887894743e-06,
      "loss": 2.5002,
      "step": 53008
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.044970989227295,
      "learning_rate": 4.426487027699093e-06,
      "loss": 2.4231,
      "step": 53009
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1632041931152344,
      "learning_rate": 4.426145176952058e-06,
      "loss": 2.2991,
      "step": 53010
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1400293111801147,
      "learning_rate": 4.425803335654224e-06,
      "loss": 2.4665,
      "step": 53011
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1402153968811035,
      "learning_rate": 4.4254615038061645e-06,
      "loss": 2.1904,
      "step": 53012
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.9960340261459351,
      "learning_rate": 4.425119681408464e-06,
      "loss": 2.4803,
      "step": 53013
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1039968729019165,
      "learning_rate": 4.4247778684616975e-06,
      "loss": 2.2806,
      "step": 53014
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.145050048828125,
      "learning_rate": 4.42443606496645e-06,
      "loss": 2.6398,
      "step": 53015
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1621761322021484,
      "learning_rate": 4.424094270923297e-06,
      "loss": 2.3649,
      "step": 53016
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1334935426712036,
      "learning_rate": 4.423752486332815e-06,
      "loss": 2.237,
      "step": 53017
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0162434577941895,
      "learning_rate": 4.423410711195591e-06,
      "loss": 1.9301,
      "step": 53018
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1491717100143433,
      "learning_rate": 4.423068945512201e-06,
      "loss": 2.238,
      "step": 53019
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1123015880584717,
      "learning_rate": 4.42272718928322e-06,
      "loss": 2.2915,
      "step": 53020
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1088736057281494,
      "learning_rate": 4.422385442509235e-06,
      "loss": 2.2118,
      "step": 53021
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.057939887046814,
      "learning_rate": 4.422043705190817e-06,
      "loss": 2.0055,
      "step": 53022
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0661827325820923,
      "learning_rate": 4.421701977328554e-06,
      "loss": 2.1738,
      "step": 53023
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.211444616317749,
      "learning_rate": 4.421360258923021e-06,
      "loss": 2.3979,
      "step": 53024
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1637914180755615,
      "learning_rate": 4.421018549974793e-06,
      "loss": 2.2773,
      "step": 53025
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.065504789352417,
      "learning_rate": 4.42067685048446e-06,
      "loss": 2.4364,
      "step": 53026
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0640994310379028,
      "learning_rate": 4.420335160452589e-06,
      "loss": 2.1583,
      "step": 53027
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0176985263824463,
      "learning_rate": 4.4199934798797705e-06,
      "loss": 2.2779,
      "step": 53028
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.066423773765564,
      "learning_rate": 4.419651808766574e-06,
      "loss": 2.3126,
      "step": 53029
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0722907781600952,
      "learning_rate": 4.419310147113587e-06,
      "loss": 2.2747,
      "step": 53030
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.2408238649368286,
      "learning_rate": 4.418968494921385e-06,
      "loss": 2.3874,
      "step": 53031
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.078141689300537,
      "learning_rate": 4.418626852190548e-06,
      "loss": 2.361,
      "step": 53032
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1269015073776245,
      "learning_rate": 4.41828521892165e-06,
      "loss": 2.2111,
      "step": 53033
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1043254137039185,
      "learning_rate": 4.417943595115277e-06,
      "loss": 2.4237,
      "step": 53034
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.032108187675476,
      "learning_rate": 4.4176019807720035e-06,
      "loss": 2.3644,
      "step": 53035
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0325098037719727,
      "learning_rate": 4.417260375892415e-06,
      "loss": 2.3954,
      "step": 53036
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1019092798233032,
      "learning_rate": 4.416918780477082e-06,
      "loss": 2.2088,
      "step": 53037
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0883471965789795,
      "learning_rate": 4.416577194526591e-06,
      "loss": 2.2513,
      "step": 53038
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0849885940551758,
      "learning_rate": 4.416235618041515e-06,
      "loss": 2.2339,
      "step": 53039
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0414297580718994,
      "learning_rate": 4.4158940510224405e-06,
      "loss": 2.4563,
      "step": 53040
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.015113353729248,
      "learning_rate": 4.415552493469938e-06,
      "loss": 2.4368,
      "step": 53041
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.120070457458496,
      "learning_rate": 4.4152109453845935e-06,
      "loss": 2.5055,
      "step": 53042
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1887134313583374,
      "learning_rate": 4.414869406766981e-06,
      "loss": 2.4694,
      "step": 53043
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.050533413887024,
      "learning_rate": 4.414527877617688e-06,
      "loss": 2.2449,
      "step": 53044
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0598138570785522,
      "learning_rate": 4.4141863579372805e-06,
      "loss": 2.463,
      "step": 53045
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1165649890899658,
      "learning_rate": 4.413844847726348e-06,
      "loss": 2.194,
      "step": 53046
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1189967393875122,
      "learning_rate": 4.413503346985461e-06,
      "loss": 2.2917,
      "step": 53047
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1365420818328857,
      "learning_rate": 4.413161855715208e-06,
      "loss": 2.3823,
      "step": 53048
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1959575414657593,
      "learning_rate": 4.412820373916159e-06,
      "loss": 2.2502,
      "step": 53049
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1576316356658936,
      "learning_rate": 4.412478901588901e-06,
      "loss": 2.1864,
      "step": 53050
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0070717334747314,
      "learning_rate": 4.4121374387340076e-06,
      "loss": 2.1379,
      "step": 53051
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0552499294281006,
      "learning_rate": 4.4117959853520565e-06,
      "loss": 2.1571,
      "step": 53052
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.130831241607666,
      "learning_rate": 4.411454541443632e-06,
      "loss": 2.2121,
      "step": 53053
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1533308029174805,
      "learning_rate": 4.411113107009306e-06,
      "loss": 2.2258,
      "step": 53054
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1162594556808472,
      "learning_rate": 4.410771682049665e-06,
      "loss": 2.284,
      "step": 53055
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1347590684890747,
      "learning_rate": 4.410430266565281e-06,
      "loss": 2.277,
      "step": 53056
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.2179397344589233,
      "learning_rate": 4.410088860556743e-06,
      "loss": 2.2051,
      "step": 53057
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0168300867080688,
      "learning_rate": 4.4097474640246145e-06,
      "loss": 2.3757,
      "step": 53058
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1178840398788452,
      "learning_rate": 4.409406076969486e-06,
      "loss": 2.5272,
      "step": 53059
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1794623136520386,
      "learning_rate": 4.4090646993919296e-06,
      "loss": 2.4953,
      "step": 53060
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0285534858703613,
      "learning_rate": 4.40872333129253e-06,
      "loss": 2.4576,
      "step": 53061
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.9766756296157837,
      "learning_rate": 4.408381972671859e-06,
      "loss": 2.4177,
      "step": 53062
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.9868726134300232,
      "learning_rate": 4.408040623530504e-06,
      "loss": 2.3532,
      "step": 53063
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.9528583288192749,
      "learning_rate": 4.407699283869035e-06,
      "loss": 2.3554,
      "step": 53064
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.9897772669792175,
      "learning_rate": 4.407357953688037e-06,
      "loss": 2.2143,
      "step": 53065
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1044543981552124,
      "learning_rate": 4.407016632988084e-06,
      "loss": 2.3565,
      "step": 53066
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1408209800720215,
      "learning_rate": 4.40667532176976e-06,
      "loss": 2.2156,
      "step": 53067
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1394370794296265,
      "learning_rate": 4.406334020033637e-06,
      "loss": 2.0611,
      "step": 53068
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.155285358428955,
      "learning_rate": 4.4059927277803005e-06,
      "loss": 2.305,
      "step": 53069
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.188794493675232,
      "learning_rate": 4.405651445010326e-06,
      "loss": 2.287,
      "step": 53070
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1329193115234375,
      "learning_rate": 4.405310171724291e-06,
      "loss": 2.3261,
      "step": 53071
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1173304319381714,
      "learning_rate": 4.4049689079227705e-06,
      "loss": 2.3957,
      "step": 53072
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0227667093276978,
      "learning_rate": 4.404627653606352e-06,
      "loss": 2.3629,
      "step": 53073
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.3426978588104248,
      "learning_rate": 4.404286408775606e-06,
      "loss": 2.3955,
      "step": 53074
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0996952056884766,
      "learning_rate": 4.4039451734311175e-06,
      "loss": 2.2801,
      "step": 53075
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0422242879867554,
      "learning_rate": 4.403603947573458e-06,
      "loss": 2.3475,
      "step": 53076
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0637834072113037,
      "learning_rate": 4.403262731203214e-06,
      "loss": 2.253,
      "step": 53077
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1306307315826416,
      "learning_rate": 4.4029215243209556e-06,
      "loss": 2.1435,
      "step": 53078
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.055038332939148,
      "learning_rate": 4.402580326927269e-06,
      "loss": 2.3917,
      "step": 53079
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.04111647605896,
      "learning_rate": 4.402239139022729e-06,
      "loss": 2.26,
      "step": 53080
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0195581912994385,
      "learning_rate": 4.40189796060791e-06,
      "loss": 2.5968,
      "step": 53081
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.324674129486084,
      "learning_rate": 4.401556791683399e-06,
      "loss": 2.2354,
      "step": 53082
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.9797044992446899,
      "learning_rate": 4.401215632249769e-06,
      "loss": 2.3296,
      "step": 53083
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1175155639648438,
      "learning_rate": 4.400874482307596e-06,
      "loss": 2.3129,
      "step": 53084
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.14589524269104,
      "learning_rate": 4.400533341857464e-06,
      "loss": 2.3408,
      "step": 53085
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0353516340255737,
      "learning_rate": 4.400192210899951e-06,
      "loss": 2.2518,
      "step": 53086
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1212235689163208,
      "learning_rate": 4.399851089435627e-06,
      "loss": 2.4371,
      "step": 53087
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0207651853561401,
      "learning_rate": 4.3995099774650804e-06,
      "loss": 2.3093,
      "step": 53088
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0226304531097412,
      "learning_rate": 4.399168874988883e-06,
      "loss": 2.4658,
      "step": 53089
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.003321886062622,
      "learning_rate": 4.3988277820076185e-06,
      "loss": 2.4764,
      "step": 53090
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1967582702636719,
      "learning_rate": 4.398486698521859e-06,
      "loss": 2.0682,
      "step": 53091
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0948097705841064,
      "learning_rate": 4.398145624532189e-06,
      "loss": 2.3215,
      "step": 53092
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1403543949127197,
      "learning_rate": 4.3978045600391804e-06,
      "loss": 2.4213,
      "step": 53093
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1805812120437622,
      "learning_rate": 4.397463505043418e-06,
      "loss": 2.1629,
      "step": 53094
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0532886981964111,
      "learning_rate": 4.397122459545472e-06,
      "loss": 2.1416,
      "step": 53095
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0690075159072876,
      "learning_rate": 4.3967814235459306e-06,
      "loss": 2.3414,
      "step": 53096
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.168384075164795,
      "learning_rate": 4.396440397045365e-06,
      "loss": 2.2826,
      "step": 53097
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1683346033096313,
      "learning_rate": 4.396099380044355e-06,
      "loss": 2.4671,
      "step": 53098
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.2054393291473389,
      "learning_rate": 4.395758372543476e-06,
      "loss": 2.2208,
      "step": 53099
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0500998497009277,
      "learning_rate": 4.3954173745433115e-06,
      "loss": 2.4148,
      "step": 53100
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1107852458953857,
      "learning_rate": 4.395076386044434e-06,
      "loss": 2.2875,
      "step": 53101
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.080886721611023,
      "learning_rate": 4.394735407047427e-06,
      "loss": 2.1527,
      "step": 53102
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0468899011611938,
      "learning_rate": 4.3943944375528625e-06,
      "loss": 2.3278,
      "step": 53103
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.200281023979187,
      "learning_rate": 4.394053477561325e-06,
      "loss": 2.3757,
      "step": 53104
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.2156227827072144,
      "learning_rate": 4.393712527073386e-06,
      "loss": 2.3342,
      "step": 53105
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0709962844848633,
      "learning_rate": 4.393371586089631e-06,
      "loss": 2.2787,
      "step": 53106
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.028389573097229,
      "learning_rate": 4.393030654610634e-06,
      "loss": 2.2981,
      "step": 53107
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.079738736152649,
      "learning_rate": 4.392689732636968e-06,
      "loss": 2.5412,
      "step": 53108
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1833199262619019,
      "learning_rate": 4.392348820169221e-06,
      "loss": 2.4041,
      "step": 53109
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1695338487625122,
      "learning_rate": 4.392007917207965e-06,
      "loss": 2.2231,
      "step": 53110
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.155940294265747,
      "learning_rate": 4.391667023753775e-06,
      "loss": 2.3067,
      "step": 53111
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1459355354309082,
      "learning_rate": 4.391326139807236e-06,
      "loss": 2.305,
      "step": 53112
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1346690654754639,
      "learning_rate": 4.390985265368924e-06,
      "loss": 2.402,
      "step": 53113
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.9659633636474609,
      "learning_rate": 4.39064440043941e-06,
      "loss": 2.4585,
      "step": 53114
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.9691364169120789,
      "learning_rate": 4.390303545019281e-06,
      "loss": 2.2615,
      "step": 53115
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1208726167678833,
      "learning_rate": 4.389962699109108e-06,
      "loss": 2.3542,
      "step": 53116
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.2258732318878174,
      "learning_rate": 4.389621862709476e-06,
      "loss": 2.2783,
      "step": 53117
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0771450996398926,
      "learning_rate": 4.389281035820954e-06,
      "loss": 2.5542,
      "step": 53118
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0324139595031738,
      "learning_rate": 4.388940218444129e-06,
      "loss": 2.1775,
      "step": 53119
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.086006999015808,
      "learning_rate": 4.3885994105795696e-06,
      "loss": 2.3421,
      "step": 53120
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.2767068147659302,
      "learning_rate": 4.388258612227862e-06,
      "loss": 2.3094,
      "step": 53121
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.05705988407135,
      "learning_rate": 4.387917823389578e-06,
      "loss": 2.4382,
      "step": 53122
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.065064549446106,
      "learning_rate": 4.387577044065303e-06,
      "loss": 2.3425,
      "step": 53123
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.058387279510498,
      "learning_rate": 4.3872362742556025e-06,
      "loss": 2.418,
      "step": 53124
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1602931022644043,
      "learning_rate": 4.386895513961066e-06,
      "loss": 2.4727,
      "step": 53125
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.9485217928886414,
      "learning_rate": 4.386554763182261e-06,
      "loss": 2.2421,
      "step": 53126
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0889992713928223,
      "learning_rate": 4.386214021919775e-06,
      "loss": 2.2959,
      "step": 53127
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.3119086027145386,
      "learning_rate": 4.385873290174176e-06,
      "loss": 2.3468,
      "step": 53128
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0319457054138184,
      "learning_rate": 4.38553256794605e-06,
      "loss": 2.4749,
      "step": 53129
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1249570846557617,
      "learning_rate": 4.385191855235969e-06,
      "loss": 2.3256,
      "step": 53130
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.032174825668335,
      "learning_rate": 4.384851152044516e-06,
      "loss": 2.2145,
      "step": 53131
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.041247844696045,
      "learning_rate": 4.3845104583722605e-06,
      "loss": 2.3545,
      "step": 53132
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1307886838912964,
      "learning_rate": 4.38416977421979e-06,
      "loss": 2.2219,
      "step": 53133
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1514196395874023,
      "learning_rate": 4.383829099587677e-06,
      "loss": 2.5199,
      "step": 53134
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.072258472442627,
      "learning_rate": 4.383488434476494e-06,
      "loss": 2.4768,
      "step": 53135
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0812827348709106,
      "learning_rate": 4.383147778886827e-06,
      "loss": 2.2009,
      "step": 53136
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1855663061141968,
      "learning_rate": 4.382807132819251e-06,
      "loss": 2.4911,
      "step": 53137
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0934990644454956,
      "learning_rate": 4.382466496274339e-06,
      "loss": 2.2793,
      "step": 53138
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.9938748478889465,
      "learning_rate": 4.3821258692526755e-06,
      "loss": 2.2529,
      "step": 53139
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.2000066041946411,
      "learning_rate": 4.381785251754831e-06,
      "loss": 2.2403,
      "step": 53140
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0543478727340698,
      "learning_rate": 4.38144464378139e-06,
      "loss": 2.442,
      "step": 53141
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1777091026306152,
      "learning_rate": 4.381104045332926e-06,
      "loss": 2.3899,
      "step": 53142
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.2258491516113281,
      "learning_rate": 4.380763456410013e-06,
      "loss": 2.3004,
      "step": 53143
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1573286056518555,
      "learning_rate": 4.380422877013236e-06,
      "loss": 2.3995,
      "step": 53144
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1354529857635498,
      "learning_rate": 4.380082307143164e-06,
      "loss": 2.4201,
      "step": 53145
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.030146837234497,
      "learning_rate": 4.3797417468003845e-06,
      "loss": 2.5348,
      "step": 53146
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.3768819570541382,
      "learning_rate": 4.379401195985463e-06,
      "loss": 2.2138,
      "step": 53147
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0815836191177368,
      "learning_rate": 4.379060654698989e-06,
      "loss": 2.2511,
      "step": 53148
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1074832677841187,
      "learning_rate": 4.378720122941532e-06,
      "loss": 2.4592,
      "step": 53149
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.023486614227295,
      "learning_rate": 4.378379600713673e-06,
      "loss": 2.2626,
      "step": 53150
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.00011146068573,
      "learning_rate": 4.3780390880159816e-06,
      "loss": 2.2396,
      "step": 53151
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.090022325515747,
      "learning_rate": 4.377698584849046e-06,
      "loss": 2.3526,
      "step": 53152
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1673448085784912,
      "learning_rate": 4.377358091213434e-06,
      "loss": 2.5332,
      "step": 53153
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1718593835830688,
      "learning_rate": 4.37701760710973e-06,
      "loss": 2.4495,
      "step": 53154
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1212866306304932,
      "learning_rate": 4.376677132538506e-06,
      "loss": 2.407,
      "step": 53155
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0944809913635254,
      "learning_rate": 4.376336667500345e-06,
      "loss": 2.294,
      "step": 53156
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0265916585922241,
      "learning_rate": 4.375996211995817e-06,
      "loss": 2.3362,
      "step": 53157
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1130588054656982,
      "learning_rate": 4.375655766025506e-06,
      "loss": 2.2966,
      "step": 53158
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.145188570022583,
      "learning_rate": 4.375315329589983e-06,
      "loss": 2.4726,
      "step": 53159
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.129799246788025,
      "learning_rate": 4.3749749026898315e-06,
      "loss": 2.1072,
      "step": 53160
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.3649370670318604,
      "learning_rate": 4.3746344853256215e-06,
      "loss": 2.2575,
      "step": 53161
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1955410242080688,
      "learning_rate": 4.374294077497938e-06,
      "loss": 2.4101,
      "step": 53162
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0491946935653687,
      "learning_rate": 4.373953679207353e-06,
      "loss": 2.2825,
      "step": 53163
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1310360431671143,
      "learning_rate": 4.3736132904544455e-06,
      "loss": 2.4079,
      "step": 53164
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1055668592453003,
      "learning_rate": 4.373272911239787e-06,
      "loss": 2.433,
      "step": 53165
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.9521872997283936,
      "learning_rate": 4.372932541563964e-06,
      "loss": 2.2041,
      "step": 53166
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.2013431787490845,
      "learning_rate": 4.372592181427544e-06,
      "loss": 2.3136,
      "step": 53167
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.186403751373291,
      "learning_rate": 4.372251830831113e-06,
      "loss": 2.229,
      "step": 53168
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.102448582649231,
      "learning_rate": 4.371911489775244e-06,
      "loss": 2.3734,
      "step": 53169
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.2283507585525513,
      "learning_rate": 4.371571158260509e-06,
      "loss": 2.1978,
      "step": 53170
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0295629501342773,
      "learning_rate": 4.371230836287493e-06,
      "loss": 2.4485,
      "step": 53171
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1377726793289185,
      "learning_rate": 4.370890523856767e-06,
      "loss": 2.1092,
      "step": 53172
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0402182340621948,
      "learning_rate": 4.370550220968912e-06,
      "loss": 2.362,
      "step": 53173
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0083321332931519,
      "learning_rate": 4.3702099276245015e-06,
      "loss": 2.2074,
      "step": 53174
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.9813117384910583,
      "learning_rate": 4.369869643824117e-06,
      "loss": 2.1357,
      "step": 53175
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.6918330192565918,
      "learning_rate": 4.369529369568332e-06,
      "loss": 2.6602,
      "step": 53176
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1075847148895264,
      "learning_rate": 4.369189104857726e-06,
      "loss": 2.1267,
      "step": 53177
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1303088665008545,
      "learning_rate": 4.36884884969287e-06,
      "loss": 2.2379,
      "step": 53178
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.074510097503662,
      "learning_rate": 4.368508604074347e-06,
      "loss": 2.3575,
      "step": 53179
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0100560188293457,
      "learning_rate": 4.368168368002728e-06,
      "loss": 2.295,
      "step": 53180
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.2084964513778687,
      "learning_rate": 4.367828141478596e-06,
      "loss": 2.4955,
      "step": 53181
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1133016347885132,
      "learning_rate": 4.3674879245025226e-06,
      "loss": 2.3504,
      "step": 53182
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1701072454452515,
      "learning_rate": 4.36714771707509e-06,
      "loss": 2.3503,
      "step": 53183
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0480557680130005,
      "learning_rate": 4.366807519196868e-06,
      "loss": 2.3357,
      "step": 53184
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1103378534317017,
      "learning_rate": 4.366467330868441e-06,
      "loss": 2.4311,
      "step": 53185
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0749469995498657,
      "learning_rate": 4.366127152090379e-06,
      "loss": 2.22,
      "step": 53186
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1393448114395142,
      "learning_rate": 4.365786982863265e-06,
      "loss": 2.3479,
      "step": 53187
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1165881156921387,
      "learning_rate": 4.365446823187668e-06,
      "loss": 2.2161,
      "step": 53188
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0508356094360352,
      "learning_rate": 4.365106673064177e-06,
      "loss": 2.2976,
      "step": 53189
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.034160852432251,
      "learning_rate": 4.364766532493353e-06,
      "loss": 2.1426,
      "step": 53190
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0042986869812012,
      "learning_rate": 4.364426401475783e-06,
      "loss": 2.315,
      "step": 53191
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.2290983200073242,
      "learning_rate": 4.3640862800120385e-06,
      "loss": 2.3251,
      "step": 53192
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0656020641326904,
      "learning_rate": 4.363746168102702e-06,
      "loss": 2.4979,
      "step": 53193
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1139296293258667,
      "learning_rate": 4.3634060657483425e-06,
      "loss": 2.2639,
      "step": 53194
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0530275106430054,
      "learning_rate": 4.363065972949544e-06,
      "loss": 2.2957,
      "step": 53195
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.02783203125,
      "learning_rate": 4.362725889706876e-06,
      "loss": 2.3908,
      "step": 53196
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0832895040512085,
      "learning_rate": 4.3623858160209236e-06,
      "loss": 2.3229,
      "step": 53197
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.2384734153747559,
      "learning_rate": 4.362045751892256e-06,
      "loss": 2.5601,
      "step": 53198
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1308550834655762,
      "learning_rate": 4.36170569732145e-06,
      "loss": 2.484,
      "step": 53199
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1371979713439941,
      "learning_rate": 4.361365652309089e-06,
      "loss": 2.4471,
      "step": 53200
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1221513748168945,
      "learning_rate": 4.361025616855739e-06,
      "loss": 2.065,
      "step": 53201
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1616417169570923,
      "learning_rate": 4.360685590961986e-06,
      "loss": 2.2272,
      "step": 53202
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.097975254058838,
      "learning_rate": 4.360345574628403e-06,
      "loss": 2.3367,
      "step": 53203
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1333755254745483,
      "learning_rate": 4.360005567855566e-06,
      "loss": 2.5724,
      "step": 53204
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1348825693130493,
      "learning_rate": 4.3596655706440485e-06,
      "loss": 2.1068,
      "step": 53205
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.3196378946304321,
      "learning_rate": 4.359325582994432e-06,
      "loss": 2.4678,
      "step": 53206
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1237059831619263,
      "learning_rate": 4.3589856049072875e-06,
      "loss": 2.3849,
      "step": 53207
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.010236382484436,
      "learning_rate": 4.358645636383198e-06,
      "loss": 2.2425,
      "step": 53208
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1060943603515625,
      "learning_rate": 4.3583056774227325e-06,
      "loss": 2.2642,
      "step": 53209
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.183637261390686,
      "learning_rate": 4.3579657280264755e-06,
      "loss": 2.3026,
      "step": 53210
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1456655263900757,
      "learning_rate": 4.357625788194995e-06,
      "loss": 2.4308,
      "step": 53211
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0916568040847778,
      "learning_rate": 4.3572858579288746e-06,
      "loss": 2.2675,
      "step": 53212
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.134596586227417,
      "learning_rate": 4.3569459372286835e-06,
      "loss": 2.4154,
      "step": 53213
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0754764080047607,
      "learning_rate": 4.356606026095006e-06,
      "loss": 2.3699,
      "step": 53214
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.217912197113037,
      "learning_rate": 4.3562661245284135e-06,
      "loss": 2.4287,
      "step": 53215
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1132956743240356,
      "learning_rate": 4.355926232529482e-06,
      "loss": 2.2544,
      "step": 53216
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.9064074158668518,
      "learning_rate": 4.355586350098786e-06,
      "loss": 2.3487,
      "step": 53217
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0814992189407349,
      "learning_rate": 4.355246477236908e-06,
      "loss": 2.3318,
      "step": 53218
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0439600944519043,
      "learning_rate": 4.354906613944416e-06,
      "loss": 2.3376,
      "step": 53219
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.9323151707649231,
      "learning_rate": 4.354566760221894e-06,
      "loss": 2.2146,
      "step": 53220
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.9519717693328857,
      "learning_rate": 4.354226916069911e-06,
      "loss": 2.4918,
      "step": 53221
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.970757782459259,
      "learning_rate": 4.353887081489051e-06,
      "loss": 2.5205,
      "step": 53222
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0554016828536987,
      "learning_rate": 4.353547256479882e-06,
      "loss": 2.311,
      "step": 53223
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1439151763916016,
      "learning_rate": 4.353207441042987e-06,
      "loss": 2.2274,
      "step": 53224
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1095259189605713,
      "learning_rate": 4.352867635178939e-06,
      "loss": 2.5324,
      "step": 53225
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.431315541267395,
      "learning_rate": 4.35252783888831e-06,
      "loss": 2.6431,
      "step": 53226
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0895920991897583,
      "learning_rate": 4.352188052171687e-06,
      "loss": 2.4091,
      "step": 53227
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1023521423339844,
      "learning_rate": 4.3518482750296354e-06,
      "loss": 2.4467,
      "step": 53228
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0521882772445679,
      "learning_rate": 4.351508507462733e-06,
      "loss": 2.3161,
      "step": 53229
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0058603286743164,
      "learning_rate": 4.351168749471562e-06,
      "loss": 2.4455,
      "step": 53230
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0541067123413086,
      "learning_rate": 4.350829001056693e-06,
      "loss": 2.2122,
      "step": 53231
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1056523323059082,
      "learning_rate": 4.3504892622187e-06,
      "loss": 2.4597,
      "step": 53232
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.205059289932251,
      "learning_rate": 4.350149532958166e-06,
      "loss": 2.4318,
      "step": 53233
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1886017322540283,
      "learning_rate": 4.349809813275659e-06,
      "loss": 2.4469,
      "step": 53234
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.034805417060852,
      "learning_rate": 4.349470103171762e-06,
      "loss": 2.5719,
      "step": 53235
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.028852939605713,
      "learning_rate": 4.349130402647045e-06,
      "loss": 2.3626,
      "step": 53236
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.366933822631836,
      "learning_rate": 4.34879071170209e-06,
      "loss": 2.2804,
      "step": 53237
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.9374738931655884,
      "learning_rate": 4.348451030337466e-06,
      "loss": 2.3184,
      "step": 53238
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.4049752950668335,
      "learning_rate": 4.348111358553756e-06,
      "loss": 2.5128,
      "step": 53239
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1717921495437622,
      "learning_rate": 4.34777169635153e-06,
      "loss": 2.3827,
      "step": 53240
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0873310565948486,
      "learning_rate": 4.347432043731368e-06,
      "loss": 2.4136,
      "step": 53241
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.093658208847046,
      "learning_rate": 4.347092400693844e-06,
      "loss": 2.1602,
      "step": 53242
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.2464165687561035,
      "learning_rate": 4.346752767239534e-06,
      "loss": 2.3319,
      "step": 53243
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0663623809814453,
      "learning_rate": 4.34641314336901e-06,
      "loss": 2.1871,
      "step": 53244
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0054384469985962,
      "learning_rate": 4.346073529082855e-06,
      "loss": 2.4884,
      "step": 53245
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.284712553024292,
      "learning_rate": 4.345733924381636e-06,
      "loss": 2.3278,
      "step": 53246
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.9568608403205872,
      "learning_rate": 4.3453943292659396e-06,
      "loss": 2.479,
      "step": 53247
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0836106538772583,
      "learning_rate": 4.34505474373633e-06,
      "loss": 2.4211,
      "step": 53248
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0154733657836914,
      "learning_rate": 4.344715167793393e-06,
      "loss": 2.4627,
      "step": 53249
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1705067157745361,
      "learning_rate": 4.344375601437696e-06,
      "loss": 2.3323,
      "step": 53250
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.070554256439209,
      "learning_rate": 4.344036044669823e-06,
      "loss": 2.305,
      "step": 53251
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1360177993774414,
      "learning_rate": 4.343696497490344e-06,
      "loss": 2.5301,
      "step": 53252
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0560952425003052,
      "learning_rate": 4.343356959899832e-06,
      "loss": 2.224,
      "step": 53253
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.2191252708435059,
      "learning_rate": 4.34301743189887e-06,
      "loss": 2.2408,
      "step": 53254
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0632350444793701,
      "learning_rate": 4.34267791348803e-06,
      "loss": 2.3998,
      "step": 53255
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1901373863220215,
      "learning_rate": 4.342338404667883e-06,
      "loss": 2.4119,
      "step": 53256
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.154667854309082,
      "learning_rate": 4.341998905439014e-06,
      "loss": 2.4937,
      "step": 53257
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.2749112844467163,
      "learning_rate": 4.341659415801989e-06,
      "loss": 2.2601,
      "step": 53258
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.306878685951233,
      "learning_rate": 4.341319935757392e-06,
      "loss": 2.2971,
      "step": 53259
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1424108743667603,
      "learning_rate": 4.340980465305794e-06,
      "loss": 2.4164,
      "step": 53260
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1131105422973633,
      "learning_rate": 4.340641004447768e-06,
      "loss": 2.501,
      "step": 53261
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0449920892715454,
      "learning_rate": 4.3403015531838966e-06,
      "loss": 2.3012,
      "step": 53262
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.08319091796875,
      "learning_rate": 4.339962111514747e-06,
      "loss": 2.5041,
      "step": 53263
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0979814529418945,
      "learning_rate": 4.339622679440902e-06,
      "loss": 2.1903,
      "step": 53264
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0871800184249878,
      "learning_rate": 4.339283256962932e-06,
      "loss": 2.5156,
      "step": 53265
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.9950474500656128,
      "learning_rate": 4.338943844081417e-06,
      "loss": 2.1397,
      "step": 53266
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0446096658706665,
      "learning_rate": 4.338604440796925e-06,
      "loss": 2.2744,
      "step": 53267
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.9699731469154358,
      "learning_rate": 4.338265047110045e-06,
      "loss": 2.3673,
      "step": 53268
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0233795642852783,
      "learning_rate": 4.337925663021335e-06,
      "loss": 2.2717,
      "step": 53269
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1204166412353516,
      "learning_rate": 4.337586288531384e-06,
      "loss": 2.4218,
      "step": 53270
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1115977764129639,
      "learning_rate": 4.3372469236407565e-06,
      "loss": 2.172,
      "step": 53271
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.094730257987976,
      "learning_rate": 4.336907568350038e-06,
      "loss": 2.3349,
      "step": 53272
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0155673027038574,
      "learning_rate": 4.336568222659796e-06,
      "loss": 2.4654,
      "step": 53273
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.4096251726150513,
      "learning_rate": 4.336228886570612e-06,
      "loss": 2.2753,
      "step": 53274
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.943735659122467,
      "learning_rate": 4.3358895600830555e-06,
      "loss": 2.3575,
      "step": 53275
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0701838731765747,
      "learning_rate": 4.335550243197707e-06,
      "loss": 2.3725,
      "step": 53276
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.9712802767753601,
      "learning_rate": 4.335210935915136e-06,
      "loss": 2.0554,
      "step": 53277
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0368541479110718,
      "learning_rate": 4.334871638235925e-06,
      "loss": 2.2495,
      "step": 53278
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0972965955734253,
      "learning_rate": 4.334532350160642e-06,
      "loss": 2.2469,
      "step": 53279
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0041310787200928,
      "learning_rate": 4.334193071689868e-06,
      "loss": 2.263,
      "step": 53280
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0255775451660156,
      "learning_rate": 4.333853802824176e-06,
      "loss": 2.0743,
      "step": 53281
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.275437355041504,
      "learning_rate": 4.333514543564141e-06,
      "loss": 2.5006,
      "step": 53282
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1603617668151855,
      "learning_rate": 4.333175293910335e-06,
      "loss": 2.4552,
      "step": 53283
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1305235624313354,
      "learning_rate": 4.332836053863337e-06,
      "loss": 2.362,
      "step": 53284
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1278637647628784,
      "learning_rate": 4.332496823423719e-06,
      "loss": 2.4468,
      "step": 53285
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0664526224136353,
      "learning_rate": 4.332157602592063e-06,
      "loss": 2.401,
      "step": 53286
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0391457080841064,
      "learning_rate": 4.331818391368937e-06,
      "loss": 2.3659,
      "step": 53287
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.5062644481658936,
      "learning_rate": 4.331479189754916e-06,
      "loss": 2.254,
      "step": 53288
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.2255116701126099,
      "learning_rate": 4.3311399977505805e-06,
      "loss": 2.3797,
      "step": 53289
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1587245464324951,
      "learning_rate": 4.3308008153564994e-06,
      "loss": 2.4713,
      "step": 53290
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.137111783027649,
      "learning_rate": 4.330461642573254e-06,
      "loss": 2.4432,
      "step": 53291
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0104163885116577,
      "learning_rate": 4.330122479401412e-06,
      "loss": 2.3043,
      "step": 53292
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0376254320144653,
      "learning_rate": 4.329783325841555e-06,
      "loss": 2.2035,
      "step": 53293
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0538145303726196,
      "learning_rate": 4.329444181894256e-06,
      "loss": 2.3222,
      "step": 53294
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0139247179031372,
      "learning_rate": 4.32910504756009e-06,
      "loss": 2.1505,
      "step": 53295
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1877694129943848,
      "learning_rate": 4.328765922839627e-06,
      "loss": 2.3886,
      "step": 53296
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.044108510017395,
      "learning_rate": 4.32842680773345e-06,
      "loss": 2.2666,
      "step": 53297
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0508744716644287,
      "learning_rate": 4.328087702242127e-06,
      "loss": 2.4503,
      "step": 53298
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1466161012649536,
      "learning_rate": 4.327748606366238e-06,
      "loss": 2.387,
      "step": 53299
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1249072551727295,
      "learning_rate": 4.327409520106352e-06,
      "loss": 2.402,
      "step": 53300
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0422159433364868,
      "learning_rate": 4.327070443463052e-06,
      "loss": 2.3638,
      "step": 53301
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.9863196015357971,
      "learning_rate": 4.326731376436904e-06,
      "loss": 2.449,
      "step": 53302
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0556551218032837,
      "learning_rate": 4.32639231902849e-06,
      "loss": 2.3743,
      "step": 53303
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.140936017036438,
      "learning_rate": 4.326053271238381e-06,
      "loss": 2.3715,
      "step": 53304
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1268872022628784,
      "learning_rate": 4.325714233067154e-06,
      "loss": 2.3801,
      "step": 53305
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0969690084457397,
      "learning_rate": 4.325375204515379e-06,
      "loss": 2.0719,
      "step": 53306
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.080297589302063,
      "learning_rate": 4.32503618558364e-06,
      "loss": 2.4863,
      "step": 53307
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1431670188903809,
      "learning_rate": 4.324697176272504e-06,
      "loss": 2.3598,
      "step": 53308
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.939947783946991,
      "learning_rate": 4.324358176582548e-06,
      "loss": 2.2554,
      "step": 53309
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0327483415603638,
      "learning_rate": 4.324019186514343e-06,
      "loss": 2.1314,
      "step": 53310
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1984704732894897,
      "learning_rate": 4.32368020606847e-06,
      "loss": 2.5696,
      "step": 53311
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0353728532791138,
      "learning_rate": 4.3233412352454985e-06,
      "loss": 2.154,
      "step": 53312
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0180386304855347,
      "learning_rate": 4.323002274046008e-06,
      "loss": 2.2239,
      "step": 53313
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1169172525405884,
      "learning_rate": 4.322663322470567e-06,
      "loss": 2.0022,
      "step": 53314
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.9993828535079956,
      "learning_rate": 4.322324380519756e-06,
      "loss": 2.3277,
      "step": 53315
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0420019626617432,
      "learning_rate": 4.321985448194149e-06,
      "loss": 2.2047,
      "step": 53316
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0252679586410522,
      "learning_rate": 4.321646525494315e-06,
      "loss": 2.586,
      "step": 53317
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1550997495651245,
      "learning_rate": 4.321307612420835e-06,
      "loss": 2.4503,
      "step": 53318
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1693722009658813,
      "learning_rate": 4.3209687089742795e-06,
      "loss": 2.4361,
      "step": 53319
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1394344568252563,
      "learning_rate": 4.3206298151552264e-06,
      "loss": 2.3198,
      "step": 53320
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0330454111099243,
      "learning_rate": 4.320290930964247e-06,
      "loss": 2.2926,
      "step": 53321
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.331106185913086,
      "learning_rate": 4.3199520564019195e-06,
      "loss": 2.209,
      "step": 53322
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.9679495096206665,
      "learning_rate": 4.319613191468811e-06,
      "loss": 2.3921,
      "step": 53323
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.137915015220642,
      "learning_rate": 4.319274336165505e-06,
      "loss": 2.4693,
      "step": 53324
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.9789819121360779,
      "learning_rate": 4.3189354904925675e-06,
      "loss": 2.1121,
      "step": 53325
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.008315086364746,
      "learning_rate": 4.318596654450582e-06,
      "loss": 2.3432,
      "step": 53326
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.217794418334961,
      "learning_rate": 4.3182578280401135e-06,
      "loss": 2.3094,
      "step": 53327
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0497865676879883,
      "learning_rate": 4.317919011261745e-06,
      "loss": 2.2059,
      "step": 53328
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.2077463865280151,
      "learning_rate": 4.317580204116044e-06,
      "loss": 2.2535,
      "step": 53329
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1011576652526855,
      "learning_rate": 4.31724140660359e-06,
      "loss": 2.3656,
      "step": 53330
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.212129831314087,
      "learning_rate": 4.316902618724954e-06,
      "loss": 2.7462,
      "step": 53331
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0171912908554077,
      "learning_rate": 4.3165638404807145e-06,
      "loss": 2.3764,
      "step": 53332
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0268359184265137,
      "learning_rate": 4.316225071871438e-06,
      "loss": 2.1693,
      "step": 53333
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0709254741668701,
      "learning_rate": 4.315886312897712e-06,
      "loss": 2.4126,
      "step": 53334
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1983462572097778,
      "learning_rate": 4.315547563560094e-06,
      "loss": 2.25,
      "step": 53335
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0792464017868042,
      "learning_rate": 4.315208823859172e-06,
      "loss": 2.2118,
      "step": 53336
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1031521558761597,
      "learning_rate": 4.31487009379551e-06,
      "loss": 2.1909,
      "step": 53337
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.196655511856079,
      "learning_rate": 4.314531373369693e-06,
      "loss": 2.1119,
      "step": 53338
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1546423435211182,
      "learning_rate": 4.3141926625822835e-06,
      "loss": 2.4536,
      "step": 53339
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0122120380401611,
      "learning_rate": 4.313853961433866e-06,
      "loss": 2.4852,
      "step": 53340
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0758419036865234,
      "learning_rate": 4.313515269925007e-06,
      "loss": 2.2651,
      "step": 53341
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0249230861663818,
      "learning_rate": 4.3131765880562886e-06,
      "loss": 2.4889,
      "step": 53342
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.073675513267517,
      "learning_rate": 4.312837915828279e-06,
      "loss": 2.2859,
      "step": 53343
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.167602300643921,
      "learning_rate": 4.31249925324155e-06,
      "loss": 2.4147,
      "step": 53344
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.9717909097671509,
      "learning_rate": 4.3121606002966845e-06,
      "loss": 2.0652,
      "step": 53345
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.18085515499115,
      "learning_rate": 4.311821956994247e-06,
      "loss": 2.3679,
      "step": 53346
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.056185245513916,
      "learning_rate": 4.311483323334819e-06,
      "loss": 2.2741,
      "step": 53347
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0505443811416626,
      "learning_rate": 4.311144699318973e-06,
      "loss": 2.2641,
      "step": 53348
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0546720027923584,
      "learning_rate": 4.310806084947281e-06,
      "loss": 2.3791,
      "step": 53349
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0358022451400757,
      "learning_rate": 4.310467480220315e-06,
      "loss": 2.156,
      "step": 53350
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0364991426467896,
      "learning_rate": 4.310128885138654e-06,
      "loss": 2.2313,
      "step": 53351
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1451482772827148,
      "learning_rate": 4.309790299702868e-06,
      "loss": 2.2304,
      "step": 53352
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1554492712020874,
      "learning_rate": 4.309451723913537e-06,
      "loss": 2.148,
      "step": 53353
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1122980117797852,
      "learning_rate": 4.309113157771225e-06,
      "loss": 2.2661,
      "step": 53354
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.2970772981643677,
      "learning_rate": 4.308774601276517e-06,
      "loss": 2.3694,
      "step": 53355
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0417815446853638,
      "learning_rate": 4.308436054429978e-06,
      "loss": 2.2931,
      "step": 53356
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1243594884872437,
      "learning_rate": 4.308097517232189e-06,
      "loss": 2.0869,
      "step": 53357
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0425572395324707,
      "learning_rate": 4.307758989683717e-06,
      "loss": 2.3208,
      "step": 53358
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0471320152282715,
      "learning_rate": 4.3074204717851435e-06,
      "loss": 2.1891,
      "step": 53359
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.9408071041107178,
      "learning_rate": 4.307081963537037e-06,
      "loss": 2.2225,
      "step": 53360
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1226390600204468,
      "learning_rate": 4.306743464939975e-06,
      "loss": 2.3607,
      "step": 53361
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1268272399902344,
      "learning_rate": 4.306404975994523e-06,
      "loss": 2.2005,
      "step": 53362
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.9530336856842041,
      "learning_rate": 4.306066496701267e-06,
      "loss": 2.4203,
      "step": 53363
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0608819723129272,
      "learning_rate": 4.305728027060771e-06,
      "loss": 2.3676,
      "step": 53364
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.3790359497070312,
      "learning_rate": 4.305389567073614e-06,
      "loss": 2.2149,
      "step": 53365
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1518367528915405,
      "learning_rate": 4.305051116740367e-06,
      "loss": 2.1936,
      "step": 53366
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1846023797988892,
      "learning_rate": 4.304712676061607e-06,
      "loss": 2.5338,
      "step": 53367
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0151329040527344,
      "learning_rate": 4.3043742450379025e-06,
      "loss": 2.3561,
      "step": 53368
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1307584047317505,
      "learning_rate": 4.304035823669834e-06,
      "loss": 2.4219,
      "step": 53369
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0743827819824219,
      "learning_rate": 4.303697411957972e-06,
      "loss": 2.2818,
      "step": 53370
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1450130939483643,
      "learning_rate": 4.303359009902886e-06,
      "loss": 2.3399,
      "step": 53371
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0363154411315918,
      "learning_rate": 4.303020617505159e-06,
      "loss": 2.2415,
      "step": 53372
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0482256412506104,
      "learning_rate": 4.302682234765357e-06,
      "loss": 2.4451,
      "step": 53373
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.2144700288772583,
      "learning_rate": 4.302343861684054e-06,
      "loss": 2.3453,
      "step": 53374
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1021100282669067,
      "learning_rate": 4.302005498261829e-06,
      "loss": 2.3224,
      "step": 53375
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.9656177163124084,
      "learning_rate": 4.301667144499249e-06,
      "loss": 2.353,
      "step": 53376
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0606459379196167,
      "learning_rate": 4.3013288003968935e-06,
      "loss": 2.3267,
      "step": 53377
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1010953187942505,
      "learning_rate": 4.300990465955333e-06,
      "loss": 2.2553,
      "step": 53378
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.069347858428955,
      "learning_rate": 4.3006521411751386e-06,
      "loss": 2.1949,
      "step": 53379
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1124452352523804,
      "learning_rate": 4.300313826056891e-06,
      "loss": 2.6009,
      "step": 53380
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1000899076461792,
      "learning_rate": 4.299975520601155e-06,
      "loss": 2.3439,
      "step": 53381
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.061323642730713,
      "learning_rate": 4.299637224808513e-06,
      "loss": 2.255,
      "step": 53382
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.9860333800315857,
      "learning_rate": 4.29929893867953e-06,
      "loss": 2.1393,
      "step": 53383
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0399436950683594,
      "learning_rate": 4.298960662214787e-06,
      "loss": 2.123,
      "step": 53384
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0198431015014648,
      "learning_rate": 4.298622395414851e-06,
      "loss": 2.1671,
      "step": 53385
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0361478328704834,
      "learning_rate": 4.298284138280301e-06,
      "loss": 2.5982,
      "step": 53386
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1578313112258911,
      "learning_rate": 4.2979458908117085e-06,
      "loss": 2.1948,
      "step": 53387
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.9188708066940308,
      "learning_rate": 4.297607653009647e-06,
      "loss": 2.4719,
      "step": 53388
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.9650001525878906,
      "learning_rate": 4.297269424874685e-06,
      "loss": 2.2988,
      "step": 53389
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0441815853118896,
      "learning_rate": 4.2969312064074044e-06,
      "loss": 2.4943,
      "step": 53390
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.161263346672058,
      "learning_rate": 4.29659299760837e-06,
      "loss": 2.4582,
      "step": 53391
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1736997365951538,
      "learning_rate": 4.296254798478163e-06,
      "loss": 2.1205,
      "step": 53392
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1228787899017334,
      "learning_rate": 4.295916609017351e-06,
      "loss": 2.25,
      "step": 53393
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.111519694328308,
      "learning_rate": 4.295578429226512e-06,
      "loss": 2.586,
      "step": 53394
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1193915605545044,
      "learning_rate": 4.295240259106213e-06,
      "loss": 2.2919,
      "step": 53395
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1397920846939087,
      "learning_rate": 4.294902098657036e-06,
      "loss": 2.3374,
      "step": 53396
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0875334739685059,
      "learning_rate": 4.294563947879546e-06,
      "loss": 2.2078,
      "step": 53397
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1827898025512695,
      "learning_rate": 4.294225806774321e-06,
      "loss": 2.1938,
      "step": 53398
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.054439902305603,
      "learning_rate": 4.2938876753419344e-06,
      "loss": 2.2463,
      "step": 53399
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.005636215209961,
      "learning_rate": 4.293549553582958e-06,
      "loss": 2.3967,
      "step": 53400
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1492170095443726,
      "learning_rate": 4.293211441497961e-06,
      "loss": 2.3729,
      "step": 53401
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.9962366223335266,
      "learning_rate": 4.292873339087525e-06,
      "loss": 2.2565,
      "step": 53402
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0904635190963745,
      "learning_rate": 4.292535246352214e-06,
      "loss": 2.3249,
      "step": 53403
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0200635194778442,
      "learning_rate": 4.292197163292609e-06,
      "loss": 2.1795,
      "step": 53404
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1961721181869507,
      "learning_rate": 4.291859089909282e-06,
      "loss": 2.1174,
      "step": 53405
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0898888111114502,
      "learning_rate": 4.291521026202799e-06,
      "loss": 2.7097,
      "step": 53406
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.3278416395187378,
      "learning_rate": 4.291182972173743e-06,
      "loss": 2.4322,
      "step": 53407
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1640454530715942,
      "learning_rate": 4.290844927822678e-06,
      "loss": 2.301,
      "step": 53408
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0941071510314941,
      "learning_rate": 4.290506893150185e-06,
      "loss": 2.3174,
      "step": 53409
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1075881719589233,
      "learning_rate": 4.29016886815683e-06,
      "loss": 2.3886,
      "step": 53410
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.185608983039856,
      "learning_rate": 4.289830852843193e-06,
      "loss": 2.3698,
      "step": 53411
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1067240238189697,
      "learning_rate": 4.289492847209841e-06,
      "loss": 2.3404,
      "step": 53412
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0730031728744507,
      "learning_rate": 4.289154851257356e-06,
      "loss": 2.3081,
      "step": 53413
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0509309768676758,
      "learning_rate": 4.288816864986297e-06,
      "loss": 2.4537,
      "step": 53414
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0575096607208252,
      "learning_rate": 4.2884788883972485e-06,
      "loss": 2.2292,
      "step": 53415
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0541008710861206,
      "learning_rate": 4.288140921490775e-06,
      "loss": 2.3221,
      "step": 53416
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0526880025863647,
      "learning_rate": 4.287802964267459e-06,
      "loss": 2.2514,
      "step": 53417
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0159106254577637,
      "learning_rate": 4.287465016727864e-06,
      "loss": 2.2964,
      "step": 53418
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1229069232940674,
      "learning_rate": 4.287127078872572e-06,
      "loss": 2.5447,
      "step": 53419
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.042769432067871,
      "learning_rate": 4.286789150702146e-06,
      "loss": 2.2587,
      "step": 53420
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0999724864959717,
      "learning_rate": 4.286451232217168e-06,
      "loss": 2.4803,
      "step": 53421
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0329484939575195,
      "learning_rate": 4.286113323418204e-06,
      "loss": 2.3293,
      "step": 53422
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1424599885940552,
      "learning_rate": 4.2857754243058334e-06,
      "loss": 2.354,
      "step": 53423
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0639351606369019,
      "learning_rate": 4.285437534880621e-06,
      "loss": 2.1576,
      "step": 53424
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.149457573890686,
      "learning_rate": 4.2850996551431485e-06,
      "loss": 2.254,
      "step": 53425
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0272754430770874,
      "learning_rate": 4.284761785093984e-06,
      "loss": 2.2169,
      "step": 53426
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.9901127219200134,
      "learning_rate": 4.284423924733701e-06,
      "loss": 2.2456,
      "step": 53427
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0003031492233276,
      "learning_rate": 4.284086074062866e-06,
      "loss": 2.4852,
      "step": 53428
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.15180504322052,
      "learning_rate": 4.283748233082063e-06,
      "loss": 2.1774,
      "step": 53429
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1729071140289307,
      "learning_rate": 4.283410401791856e-06,
      "loss": 2.509,
      "step": 53430
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1529836654663086,
      "learning_rate": 4.283072580192824e-06,
      "loss": 2.4048,
      "step": 53431
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.083141565322876,
      "learning_rate": 4.282734768285533e-06,
      "loss": 2.0794,
      "step": 53432
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1649657487869263,
      "learning_rate": 4.282396966070563e-06,
      "loss": 2.3565,
      "step": 53433
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1905930042266846,
      "learning_rate": 4.282059173548483e-06,
      "loss": 2.2504,
      "step": 53434
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.2188011407852173,
      "learning_rate": 4.281721390719862e-06,
      "loss": 2.245,
      "step": 53435
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1000843048095703,
      "learning_rate": 4.28138361758528e-06,
      "loss": 2.4641,
      "step": 53436
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1272509098052979,
      "learning_rate": 4.2810458541453024e-06,
      "loss": 2.0834,
      "step": 53437
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1576480865478516,
      "learning_rate": 4.280708100400509e-06,
      "loss": 2.2217,
      "step": 53438
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.028426170349121,
      "learning_rate": 4.280370356351469e-06,
      "loss": 2.23,
      "step": 53439
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.015977144241333,
      "learning_rate": 4.280032621998753e-06,
      "loss": 2.3783,
      "step": 53440
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.206087589263916,
      "learning_rate": 4.2796948973429335e-06,
      "loss": 2.2407,
      "step": 53441
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.125291347503662,
      "learning_rate": 4.279357182384587e-06,
      "loss": 2.4623,
      "step": 53442
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0143256187438965,
      "learning_rate": 4.279019477124281e-06,
      "loss": 2.3629,
      "step": 53443
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1223242282867432,
      "learning_rate": 4.278681781562595e-06,
      "loss": 2.4817,
      "step": 53444
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.040250539779663,
      "learning_rate": 4.278344095700092e-06,
      "loss": 2.2649,
      "step": 53445
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0960886478424072,
      "learning_rate": 4.278006419537355e-06,
      "loss": 2.2609,
      "step": 53446
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.2163054943084717,
      "learning_rate": 4.2776687530749465e-06,
      "loss": 2.168,
      "step": 53447
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.2169554233551025,
      "learning_rate": 4.277331096313447e-06,
      "loss": 2.0904,
      "step": 53448
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0736241340637207,
      "learning_rate": 4.276993449253424e-06,
      "loss": 2.4112,
      "step": 53449
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.3425363302230835,
      "learning_rate": 4.276655811895453e-06,
      "loss": 2.2104,
      "step": 53450
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1209696531295776,
      "learning_rate": 4.276318184240102e-06,
      "loss": 2.3745,
      "step": 53451
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0013277530670166,
      "learning_rate": 4.275980566287949e-06,
      "loss": 2.1138,
      "step": 53452
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1856569051742554,
      "learning_rate": 4.2756429580395644e-06,
      "loss": 2.5515,
      "step": 53453
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0528002977371216,
      "learning_rate": 4.275305359495519e-06,
      "loss": 2.3213,
      "step": 53454
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1242395639419556,
      "learning_rate": 4.274967770656383e-06,
      "loss": 2.278,
      "step": 53455
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1748732328414917,
      "learning_rate": 4.274630191522735e-06,
      "loss": 2.4052,
      "step": 53456
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0489026308059692,
      "learning_rate": 4.27429262209514e-06,
      "loss": 2.381,
      "step": 53457
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1187702417373657,
      "learning_rate": 4.2739550623741776e-06,
      "loss": 2.3727,
      "step": 53458
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1185531616210938,
      "learning_rate": 4.273617512360414e-06,
      "loss": 2.5287,
      "step": 53459
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.150004506111145,
      "learning_rate": 4.273279972054426e-06,
      "loss": 2.3805,
      "step": 53460
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1493353843688965,
      "learning_rate": 4.272942441456784e-06,
      "loss": 2.498,
      "step": 53461
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1913130283355713,
      "learning_rate": 4.272604920568057e-06,
      "loss": 2.446,
      "step": 53462
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0341734886169434,
      "learning_rate": 4.272267409388824e-06,
      "loss": 2.3484,
      "step": 53463
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.147498369216919,
      "learning_rate": 4.27192990791965e-06,
      "loss": 2.3574,
      "step": 53464
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1969084739685059,
      "learning_rate": 4.271592416161114e-06,
      "loss": 2.2724,
      "step": 53465
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1967910528182983,
      "learning_rate": 4.271254934113783e-06,
      "loss": 2.4928,
      "step": 53466
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.04683256149292,
      "learning_rate": 4.2709174617782325e-06,
      "loss": 2.3549,
      "step": 53467
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0061287879943848,
      "learning_rate": 4.270579999155029e-06,
      "loss": 2.3728,
      "step": 53468
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1184074878692627,
      "learning_rate": 4.270242546244752e-06,
      "loss": 2.3084,
      "step": 53469
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.4395136833190918,
      "learning_rate": 4.269905103047967e-06,
      "loss": 2.4379,
      "step": 53470
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0643435716629028,
      "learning_rate": 4.2695676695652514e-06,
      "loss": 2.3098,
      "step": 53471
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1424708366394043,
      "learning_rate": 4.269230245797172e-06,
      "loss": 2.3016,
      "step": 53472
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1741913557052612,
      "learning_rate": 4.268892831744308e-06,
      "loss": 2.3213,
      "step": 53473
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.035513162612915,
      "learning_rate": 4.268555427407222e-06,
      "loss": 2.2703,
      "step": 53474
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.342929720878601,
      "learning_rate": 4.268218032786497e-06,
      "loss": 2.2631,
      "step": 53475
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.039404273033142,
      "learning_rate": 4.267880647882694e-06,
      "loss": 2.145,
      "step": 53476
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0570424795150757,
      "learning_rate": 4.267543272696394e-06,
      "loss": 2.2596,
      "step": 53477
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1133227348327637,
      "learning_rate": 4.267205907228161e-06,
      "loss": 2.2544,
      "step": 53478
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1375880241394043,
      "learning_rate": 4.266868551478579e-06,
      "loss": 2.3028,
      "step": 53479
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0601645708084106,
      "learning_rate": 4.266531205448206e-06,
      "loss": 2.2435,
      "step": 53480
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0350168943405151,
      "learning_rate": 4.266193869137621e-06,
      "loss": 1.9607,
      "step": 53481
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1203995943069458,
      "learning_rate": 4.265856542547392e-06,
      "loss": 2.333,
      "step": 53482
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.3475744724273682,
      "learning_rate": 4.265519225678097e-06,
      "loss": 2.344,
      "step": 53483
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1680173873901367,
      "learning_rate": 4.265181918530301e-06,
      "loss": 2.1789,
      "step": 53484
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1388520002365112,
      "learning_rate": 4.2648446211045825e-06,
      "loss": 2.4298,
      "step": 53485
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.071134090423584,
      "learning_rate": 4.264507333401506e-06,
      "loss": 2.4544,
      "step": 53486
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.078344702720642,
      "learning_rate": 4.264170055421652e-06,
      "loss": 2.5019,
      "step": 53487
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0623056888580322,
      "learning_rate": 4.2638327871655875e-06,
      "loss": 2.1046,
      "step": 53488
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0189318656921387,
      "learning_rate": 4.2634955286338794e-06,
      "loss": 2.2856,
      "step": 53489
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1822609901428223,
      "learning_rate": 4.26315827982711e-06,
      "loss": 2.2418,
      "step": 53490
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1975760459899902,
      "learning_rate": 4.262821040745841e-06,
      "loss": 2.2762,
      "step": 53491
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.293923020362854,
      "learning_rate": 4.262483811390652e-06,
      "loss": 2.3115,
      "step": 53492
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0586023330688477,
      "learning_rate": 4.262146591762111e-06,
      "loss": 2.4099,
      "step": 53493
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0905441045761108,
      "learning_rate": 4.261809381860787e-06,
      "loss": 2.444,
      "step": 53494
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.128430724143982,
      "learning_rate": 4.261472181687256e-06,
      "loss": 2.41,
      "step": 53495
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.015198826789856,
      "learning_rate": 4.261134991242091e-06,
      "loss": 2.2593,
      "step": 53496
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.964433491230011,
      "learning_rate": 4.260797810525855e-06,
      "loss": 2.1993,
      "step": 53497
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0384312868118286,
      "learning_rate": 4.26046063953913e-06,
      "loss": 2.3428,
      "step": 53498
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.2092621326446533,
      "learning_rate": 4.26012347828248e-06,
      "loss": 2.1463,
      "step": 53499
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1104475259780884,
      "learning_rate": 4.2597863267564834e-06,
      "loss": 2.3049,
      "step": 53500
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.160264015197754,
      "learning_rate": 4.259449184961703e-06,
      "loss": 2.4536,
      "step": 53501
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1216375827789307,
      "learning_rate": 4.25911205289872e-06,
      "loss": 2.21,
      "step": 53502
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.120469331741333,
      "learning_rate": 4.258774930568098e-06,
      "loss": 2.3282,
      "step": 53503
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.026991605758667,
      "learning_rate": 4.258437817970414e-06,
      "loss": 2.2467,
      "step": 53504
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0467039346694946,
      "learning_rate": 4.258100715106238e-06,
      "loss": 2.1271,
      "step": 53505
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0628975629806519,
      "learning_rate": 4.257763621976141e-06,
      "loss": 2.3326,
      "step": 53506
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.128931999206543,
      "learning_rate": 4.2574265385806915e-06,
      "loss": 2.3122,
      "step": 53507
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.4271496534347534,
      "learning_rate": 4.257089464920465e-06,
      "loss": 2.5125,
      "step": 53508
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0771530866622925,
      "learning_rate": 4.25675240099603e-06,
      "loss": 2.3917,
      "step": 53509
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1581848859786987,
      "learning_rate": 4.256415346807963e-06,
      "loss": 2.3707,
      "step": 53510
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.2111567258834839,
      "learning_rate": 4.256078302356827e-06,
      "loss": 2.1627,
      "step": 53511
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0924245119094849,
      "learning_rate": 4.255741267643203e-06,
      "loss": 2.3663,
      "step": 53512
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0869220495224,
      "learning_rate": 4.255404242667653e-06,
      "loss": 2.1708,
      "step": 53513
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.05026376247406,
      "learning_rate": 4.255067227430758e-06,
      "loss": 2.3248,
      "step": 53514
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.073349952697754,
      "learning_rate": 4.25473022193308e-06,
      "loss": 2.405,
      "step": 53515
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.9568145871162415,
      "learning_rate": 4.254393226175199e-06,
      "loss": 2.21,
      "step": 53516
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1711701154708862,
      "learning_rate": 4.25405624015768e-06,
      "loss": 2.3323,
      "step": 53517
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0705060958862305,
      "learning_rate": 4.253719263881099e-06,
      "loss": 2.4886,
      "step": 53518
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1922098398208618,
      "learning_rate": 4.253382297346018e-06,
      "loss": 2.3417,
      "step": 53519
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1475398540496826,
      "learning_rate": 4.2530453405530205e-06,
      "loss": 2.3682,
      "step": 53520
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0586169958114624,
      "learning_rate": 4.252708393502668e-06,
      "loss": 2.169,
      "step": 53521
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0951001644134521,
      "learning_rate": 4.252371456195538e-06,
      "loss": 2.5353,
      "step": 53522
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0443110466003418,
      "learning_rate": 4.252034528632202e-06,
      "loss": 2.4645,
      "step": 53523
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0871938467025757,
      "learning_rate": 4.251697610813223e-06,
      "loss": 2.4571,
      "step": 53524
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.3092037439346313,
      "learning_rate": 4.2513607027391815e-06,
      "loss": 2.4168,
      "step": 53525
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1084033250808716,
      "learning_rate": 4.251023804410642e-06,
      "loss": 2.2271,
      "step": 53526
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.9535787105560303,
      "learning_rate": 4.250686915828181e-06,
      "loss": 2.1879,
      "step": 53527
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0514835119247437,
      "learning_rate": 4.250350036992365e-06,
      "loss": 2.4948,
      "step": 53528
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0706888437271118,
      "learning_rate": 4.250013167903771e-06,
      "loss": 2.3027,
      "step": 53529
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.105709433555603,
      "learning_rate": 4.249676308562961e-06,
      "loss": 2.2814,
      "step": 53530
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.2229723930358887,
      "learning_rate": 4.249339458970516e-06,
      "loss": 2.3228,
      "step": 53531
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0549126863479614,
      "learning_rate": 4.249002619127003e-06,
      "loss": 2.1095,
      "step": 53532
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1572710275650024,
      "learning_rate": 4.248665789032992e-06,
      "loss": 2.1646,
      "step": 53533
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1633788347244263,
      "learning_rate": 4.248328968689051e-06,
      "loss": 2.4358,
      "step": 53534
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.2239664793014526,
      "learning_rate": 4.247992158095758e-06,
      "loss": 2.3291,
      "step": 53535
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0923490524291992,
      "learning_rate": 4.247655357253677e-06,
      "loss": 2.3097,
      "step": 53536
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.085080862045288,
      "learning_rate": 4.2473185661633865e-06,
      "loss": 2.1364,
      "step": 53537
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1949756145477295,
      "learning_rate": 4.246981784825449e-06,
      "loss": 2.04,
      "step": 53538
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1944987773895264,
      "learning_rate": 4.246645013240444e-06,
      "loss": 2.3051,
      "step": 53539
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0659154653549194,
      "learning_rate": 4.246308251408935e-06,
      "loss": 2.2747,
      "step": 53540
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0716639757156372,
      "learning_rate": 4.245971499331498e-06,
      "loss": 2.2229,
      "step": 53541
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1874315738677979,
      "learning_rate": 4.245634757008701e-06,
      "loss": 2.363,
      "step": 53542
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1429836750030518,
      "learning_rate": 4.245298024441118e-06,
      "loss": 2.5451,
      "step": 53543
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.2284845113754272,
      "learning_rate": 4.244961301629318e-06,
      "loss": 2.278,
      "step": 53544
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.004080891609192,
      "learning_rate": 4.244624588573872e-06,
      "loss": 2.3791,
      "step": 53545
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.028778076171875,
      "learning_rate": 4.244287885275347e-06,
      "loss": 2.3941,
      "step": 53546
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1552544832229614,
      "learning_rate": 4.24395119173432e-06,
      "loss": 2.1239,
      "step": 53547
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0546165704727173,
      "learning_rate": 4.243614507951357e-06,
      "loss": 2.4437,
      "step": 53548
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.070130467414856,
      "learning_rate": 4.243277833927033e-06,
      "loss": 2.3391,
      "step": 53549
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0711525678634644,
      "learning_rate": 4.242941169661914e-06,
      "loss": 1.9425,
      "step": 53550
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.270962119102478,
      "learning_rate": 4.242604515156575e-06,
      "loss": 2.2616,
      "step": 53551
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0513235330581665,
      "learning_rate": 4.2422678704115874e-06,
      "loss": 2.2404,
      "step": 53552
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1911615133285522,
      "learning_rate": 4.241931235427514e-06,
      "loss": 2.4518,
      "step": 53553
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.3260782957077026,
      "learning_rate": 4.241594610204937e-06,
      "loss": 2.2509,
      "step": 53554
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0740083456039429,
      "learning_rate": 4.241257994744415e-06,
      "loss": 2.3712,
      "step": 53555
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0227512121200562,
      "learning_rate": 4.240921389046529e-06,
      "loss": 2.4155,
      "step": 53556
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.9573888182640076,
      "learning_rate": 4.240584793111843e-06,
      "loss": 2.327,
      "step": 53557
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.249924898147583,
      "learning_rate": 4.240248206940937e-06,
      "loss": 2.2429,
      "step": 53558
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0275530815124512,
      "learning_rate": 4.239911630534366e-06,
      "loss": 2.3631,
      "step": 53559
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.2040679454803467,
      "learning_rate": 4.239575063892713e-06,
      "loss": 2.5018,
      "step": 53560
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0584560632705688,
      "learning_rate": 4.239238507016542e-06,
      "loss": 2.3943,
      "step": 53561
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1602294445037842,
      "learning_rate": 4.2389019599064305e-06,
      "loss": 2.5258,
      "step": 53562
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.26828134059906,
      "learning_rate": 4.23856542256294e-06,
      "loss": 2.2406,
      "step": 53563
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1206624507904053,
      "learning_rate": 4.23822889498665e-06,
      "loss": 2.3303,
      "step": 53564
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.2811633348464966,
      "learning_rate": 4.237892377178124e-06,
      "loss": 2.4652,
      "step": 53565
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.011161208152771,
      "learning_rate": 4.237555869137938e-06,
      "loss": 2.2533,
      "step": 53566
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.047624111175537,
      "learning_rate": 4.237219370866657e-06,
      "loss": 2.276,
      "step": 53567
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1245355606079102,
      "learning_rate": 4.2368828823648565e-06,
      "loss": 2.2469,
      "step": 53568
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.2265251874923706,
      "learning_rate": 4.236546403633103e-06,
      "loss": 2.1329,
      "step": 53569
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0866237878799438,
      "learning_rate": 4.236209934671971e-06,
      "loss": 2.5419,
      "step": 53570
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1276973485946655,
      "learning_rate": 4.235873475482029e-06,
      "loss": 2.1602,
      "step": 53571
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.103203535079956,
      "learning_rate": 4.2355370260638474e-06,
      "loss": 2.3451,
      "step": 53572
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1549694538116455,
      "learning_rate": 4.235200586417991e-06,
      "loss": 2.2226,
      "step": 53573
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.059950828552246,
      "learning_rate": 4.234864156545041e-06,
      "loss": 2.3481,
      "step": 53574
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0612084865570068,
      "learning_rate": 4.234527736445558e-06,
      "loss": 2.3794,
      "step": 53575
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.088841199874878,
      "learning_rate": 4.234191326120119e-06,
      "loss": 2.5257,
      "step": 53576
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.047114372253418,
      "learning_rate": 4.2338549255692876e-06,
      "loss": 2.0473,
      "step": 53577
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.126856803894043,
      "learning_rate": 4.233518534793643e-06,
      "loss": 2.2197,
      "step": 53578
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0736501216888428,
      "learning_rate": 4.23318215379375e-06,
      "loss": 2.1602,
      "step": 53579
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.2014771699905396,
      "learning_rate": 4.232845782570177e-06,
      "loss": 2.1976,
      "step": 53580
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1785305738449097,
      "learning_rate": 4.232509421123498e-06,
      "loss": 2.3589,
      "step": 53581
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1353940963745117,
      "learning_rate": 4.23217306945428e-06,
      "loss": 2.1879,
      "step": 53582
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0858172178268433,
      "learning_rate": 4.2318367275631e-06,
      "loss": 2.5476,
      "step": 53583
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.041953682899475,
      "learning_rate": 4.2315003954505215e-06,
      "loss": 2.4845,
      "step": 53584
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.9646238684654236,
      "learning_rate": 4.2311640731171165e-06,
      "loss": 2.2231,
      "step": 53585
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.9918742179870605,
      "learning_rate": 4.230827760563453e-06,
      "loss": 2.2798,
      "step": 53586
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.124979853630066,
      "learning_rate": 4.230491457790106e-06,
      "loss": 2.3751,
      "step": 53587
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.9765089154243469,
      "learning_rate": 4.230155164797639e-06,
      "loss": 2.404,
      "step": 53588
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0894166231155396,
      "learning_rate": 4.22981888158663e-06,
      "loss": 2.303,
      "step": 53589
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0480175018310547,
      "learning_rate": 4.2294826081576425e-06,
      "loss": 2.2537,
      "step": 53590
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.155801773071289,
      "learning_rate": 4.229146344511252e-06,
      "loss": 2.3078,
      "step": 53591
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1860737800598145,
      "learning_rate": 4.228810090648021e-06,
      "loss": 2.3961,
      "step": 53592
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0589075088500977,
      "learning_rate": 4.228473846568529e-06,
      "loss": 2.3403,
      "step": 53593
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.098073959350586,
      "learning_rate": 4.228137612273338e-06,
      "loss": 2.4592,
      "step": 53594
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1345367431640625,
      "learning_rate": 4.2278013877630255e-06,
      "loss": 2.4326,
      "step": 53595
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0860645771026611,
      "learning_rate": 4.227465173038153e-06,
      "loss": 2.411,
      "step": 53596
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.2665151357650757,
      "learning_rate": 4.227128968099299e-06,
      "loss": 2.2856,
      "step": 53597
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1801443099975586,
      "learning_rate": 4.226792772947028e-06,
      "loss": 2.2409,
      "step": 53598
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0223288536071777,
      "learning_rate": 4.226456587581912e-06,
      "loss": 2.0975,
      "step": 53599
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.3478562831878662,
      "learning_rate": 4.226120412004517e-06,
      "loss": 2.4156,
      "step": 53600
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.080145001411438,
      "learning_rate": 4.225784246215418e-06,
      "loss": 2.2825,
      "step": 53601
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0961313247680664,
      "learning_rate": 4.225448090215182e-06,
      "loss": 2.2985,
      "step": 53602
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.059212327003479,
      "learning_rate": 4.225111944004382e-06,
      "loss": 2.1828,
      "step": 53603
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1187200546264648,
      "learning_rate": 4.224775807583582e-06,
      "loss": 2.2346,
      "step": 53604
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0312823057174683,
      "learning_rate": 4.22443968095336e-06,
      "loss": 2.6102,
      "step": 53605
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.297142505645752,
      "learning_rate": 4.2241035641142755e-06,
      "loss": 2.2776,
      "step": 53606
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.090863823890686,
      "learning_rate": 4.22376745706691e-06,
      "loss": 2.3867,
      "step": 53607
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.000311255455017,
      "learning_rate": 4.223431359811827e-06,
      "loss": 2.1223,
      "step": 53608
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1438454389572144,
      "learning_rate": 4.223095272349592e-06,
      "loss": 2.312,
      "step": 53609
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.048291563987732,
      "learning_rate": 4.2227591946807846e-06,
      "loss": 2.3019,
      "step": 53610
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0671920776367188,
      "learning_rate": 4.222423126805969e-06,
      "loss": 2.4014,
      "step": 53611
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1710152626037598,
      "learning_rate": 4.222087068725712e-06,
      "loss": 2.3065,
      "step": 53612
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.071977138519287,
      "learning_rate": 4.22175102044059e-06,
      "loss": 2.276,
      "step": 53613
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1966768503189087,
      "learning_rate": 4.221414981951169e-06,
      "loss": 2.3021,
      "step": 53614
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.3284798860549927,
      "learning_rate": 4.221078953258015e-06,
      "loss": 2.2076,
      "step": 53615
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0696033239364624,
      "learning_rate": 4.220742934361707e-06,
      "loss": 2.2222,
      "step": 53616
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.097609043121338,
      "learning_rate": 4.2204069252628046e-06,
      "loss": 2.3536,
      "step": 53617
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0779037475585938,
      "learning_rate": 4.2200709259618855e-06,
      "loss": 2.0797,
      "step": 53618
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1081538200378418,
      "learning_rate": 4.219734936459513e-06,
      "loss": 2.4705,
      "step": 53619
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.007079839706421,
      "learning_rate": 4.219398956756263e-06,
      "loss": 2.6492,
      "step": 53620
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0301121473312378,
      "learning_rate": 4.219062986852698e-06,
      "loss": 2.2242,
      "step": 53621
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1319782733917236,
      "learning_rate": 4.218727026749395e-06,
      "loss": 2.2943,
      "step": 53622
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0870167016983032,
      "learning_rate": 4.218391076446917e-06,
      "loss": 2.3189,
      "step": 53623
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0695520639419556,
      "learning_rate": 4.2180551359458435e-06,
      "loss": 2.2216,
      "step": 53624
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0774255990982056,
      "learning_rate": 4.217719205246728e-06,
      "loss": 2.3549,
      "step": 53625
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0499647855758667,
      "learning_rate": 4.217383284350155e-06,
      "loss": 2.3476,
      "step": 53626
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1215115785598755,
      "learning_rate": 4.217047373256683e-06,
      "loss": 2.2373,
      "step": 53627
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0885483026504517,
      "learning_rate": 4.2167114719668885e-06,
      "loss": 2.3106,
      "step": 53628
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0176035165786743,
      "learning_rate": 4.216375580481337e-06,
      "loss": 2.1382,
      "step": 53629
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0594738721847534,
      "learning_rate": 4.2160396988006034e-06,
      "loss": 2.0781,
      "step": 53630
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.141160488128662,
      "learning_rate": 4.215703826925249e-06,
      "loss": 2.212,
      "step": 53631
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0626119375228882,
      "learning_rate": 4.215367964855852e-06,
      "loss": 2.1839,
      "step": 53632
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1310129165649414,
      "learning_rate": 4.215032112592973e-06,
      "loss": 2.5138,
      "step": 53633
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0226894617080688,
      "learning_rate": 4.2146962701371896e-06,
      "loss": 2.1242,
      "step": 53634
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0350252389907837,
      "learning_rate": 4.214360437489068e-06,
      "loss": 2.0727,
      "step": 53635
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.099996566772461,
      "learning_rate": 4.214024614649172e-06,
      "loss": 2.1368,
      "step": 53636
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0107816457748413,
      "learning_rate": 4.21368880161808e-06,
      "loss": 2.4476,
      "step": 53637
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1282416582107544,
      "learning_rate": 4.213352998396357e-06,
      "loss": 2.2415,
      "step": 53638
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0752822160720825,
      "learning_rate": 4.2130172049845685e-06,
      "loss": 2.2355,
      "step": 53639
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0317195653915405,
      "learning_rate": 4.2126814213832915e-06,
      "loss": 2.3505,
      "step": 53640
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0819191932678223,
      "learning_rate": 4.212345647593091e-06,
      "loss": 2.3918,
      "step": 53641
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0999966859817505,
      "learning_rate": 4.212009883614533e-06,
      "loss": 2.2671,
      "step": 53642
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0360362529754639,
      "learning_rate": 4.211674129448194e-06,
      "loss": 2.3443,
      "step": 53643
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.086408257484436,
      "learning_rate": 4.211338385094635e-06,
      "loss": 2.2501,
      "step": 53644
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0690468549728394,
      "learning_rate": 4.211002650554434e-06,
      "loss": 2.4903,
      "step": 53645
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.058801531791687,
      "learning_rate": 4.210666925828153e-06,
      "loss": 2.1745,
      "step": 53646
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.069101095199585,
      "learning_rate": 4.2103312109163665e-06,
      "loss": 2.32,
      "step": 53647
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0973753929138184,
      "learning_rate": 4.209995505819637e-06,
      "loss": 2.555,
      "step": 53648
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0371887683868408,
      "learning_rate": 4.209659810538542e-06,
      "loss": 2.2089,
      "step": 53649
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0214471817016602,
      "learning_rate": 4.209324125073646e-06,
      "loss": 2.3328,
      "step": 53650
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.9896013140678406,
      "learning_rate": 4.208988449425519e-06,
      "loss": 2.3704,
      "step": 53651
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0697935819625854,
      "learning_rate": 4.208652783594724e-06,
      "loss": 2.0986,
      "step": 53652
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0768529176712036,
      "learning_rate": 4.208317127581841e-06,
      "loss": 2.2814,
      "step": 53653
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1835664510726929,
      "learning_rate": 4.20798148138743e-06,
      "loss": 2.2943,
      "step": 53654
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.2378549575805664,
      "learning_rate": 4.207645845012066e-06,
      "loss": 2.3858,
      "step": 53655
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.045274019241333,
      "learning_rate": 4.207310218456312e-06,
      "loss": 2.3619,
      "step": 53656
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0248931646347046,
      "learning_rate": 4.206974601720744e-06,
      "loss": 2.1319,
      "step": 53657
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0183814764022827,
      "learning_rate": 4.206638994805924e-06,
      "loss": 2.264,
      "step": 53658
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0951310396194458,
      "learning_rate": 4.206303397712428e-06,
      "loss": 2.225,
      "step": 53659
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0605332851409912,
      "learning_rate": 4.205967810440817e-06,
      "loss": 2.5866,
      "step": 53660
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0472368001937866,
      "learning_rate": 4.205632232991669e-06,
      "loss": 2.2841,
      "step": 53661
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1125208139419556,
      "learning_rate": 4.205296665365547e-06,
      "loss": 2.2775,
      "step": 53662
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.10335111618042,
      "learning_rate": 4.204961107563021e-06,
      "loss": 2.3435,
      "step": 53663
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0388262271881104,
      "learning_rate": 4.204625559584657e-06,
      "loss": 2.2734,
      "step": 53664
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0893601179122925,
      "learning_rate": 4.20429002143103e-06,
      "loss": 2.286,
      "step": 53665
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.003550410270691,
      "learning_rate": 4.203954493102702e-06,
      "loss": 2.2855,
      "step": 53666
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.9864551424980164,
      "learning_rate": 4.2036189746002485e-06,
      "loss": 2.4059,
      "step": 53667
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1102776527404785,
      "learning_rate": 4.203283465924233e-06,
      "loss": 2.3001,
      "step": 53668
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0642528533935547,
      "learning_rate": 4.2029479670752285e-06,
      "loss": 2.425,
      "step": 53669
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.169273853302002,
      "learning_rate": 4.202612478053801e-06,
      "loss": 2.5464,
      "step": 53670
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.249006986618042,
      "learning_rate": 4.202276998860518e-06,
      "loss": 2.2724,
      "step": 53671
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0808591842651367,
      "learning_rate": 4.201941529495953e-06,
      "loss": 2.1684,
      "step": 53672
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1094541549682617,
      "learning_rate": 4.201606069960668e-06,
      "loss": 2.285,
      "step": 53673
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.156319260597229,
      "learning_rate": 4.201270620255239e-06,
      "loss": 2.1523,
      "step": 53674
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1480169296264648,
      "learning_rate": 4.200935180380228e-06,
      "loss": 2.3708,
      "step": 53675
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.090567946434021,
      "learning_rate": 4.2005997503362105e-06,
      "loss": 2.3876,
      "step": 53676
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0118696689605713,
      "learning_rate": 4.200264330123753e-06,
      "loss": 2.3321,
      "step": 53677
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.9448866844177246,
      "learning_rate": 4.19992891974342e-06,
      "loss": 2.3905,
      "step": 53678
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.9912430047988892,
      "learning_rate": 4.19959351919578e-06,
      "loss": 2.4369,
      "step": 53679
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.9425942897796631,
      "learning_rate": 4.199258128481408e-06,
      "loss": 2.2233,
      "step": 53680
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0284725427627563,
      "learning_rate": 4.198922747600865e-06,
      "loss": 2.304,
      "step": 53681
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0670214891433716,
      "learning_rate": 4.198587376554728e-06,
      "loss": 2.4173,
      "step": 53682
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1674989461898804,
      "learning_rate": 4.198252015343558e-06,
      "loss": 2.3194,
      "step": 53683
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.115931749343872,
      "learning_rate": 4.197916663967929e-06,
      "loss": 2.4395,
      "step": 53684
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0583115816116333,
      "learning_rate": 4.197581322428403e-06,
      "loss": 2.1984,
      "step": 53685
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1357353925704956,
      "learning_rate": 4.197245990725558e-06,
      "loss": 2.3019,
      "step": 53686
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1975830793380737,
      "learning_rate": 4.196910668859952e-06,
      "loss": 2.2365,
      "step": 53687
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.2041878700256348,
      "learning_rate": 4.196575356832162e-06,
      "loss": 2.1633,
      "step": 53688
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0988537073135376,
      "learning_rate": 4.196240054642751e-06,
      "loss": 2.3833,
      "step": 53689
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.2037516832351685,
      "learning_rate": 4.195904762292294e-06,
      "loss": 2.3789,
      "step": 53690
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0834884643554688,
      "learning_rate": 4.19556947978135e-06,
      "loss": 2.2994,
      "step": 53691
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.094077229499817,
      "learning_rate": 4.195234207110495e-06,
      "loss": 2.2279,
      "step": 53692
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.8188856840133667,
      "learning_rate": 4.19489894428029e-06,
      "loss": 2.2029,
      "step": 53693
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1634656190872192,
      "learning_rate": 4.1945636912913126e-06,
      "loss": 2.2795,
      "step": 53694
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0279954671859741,
      "learning_rate": 4.194228448144123e-06,
      "loss": 2.2181,
      "step": 53695
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0749281644821167,
      "learning_rate": 4.193893214839296e-06,
      "loss": 2.2858,
      "step": 53696
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.2846815586090088,
      "learning_rate": 4.193557991377397e-06,
      "loss": 2.3012,
      "step": 53697
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0659912824630737,
      "learning_rate": 4.1932227777589905e-06,
      "loss": 2.2349,
      "step": 53698
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0851718187332153,
      "learning_rate": 4.192887573984652e-06,
      "loss": 2.1481,
      "step": 53699
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.026713252067566,
      "learning_rate": 4.192552380054944e-06,
      "loss": 2.2797,
      "step": 53700
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.030020833015442,
      "learning_rate": 4.19221719597044e-06,
      "loss": 2.3879,
      "step": 53701
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1379621028900146,
      "learning_rate": 4.1918820217317005e-06,
      "loss": 2.2347,
      "step": 53702
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0609596967697144,
      "learning_rate": 4.191546857339306e-06,
      "loss": 2.422,
      "step": 53703
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1633824110031128,
      "learning_rate": 4.1912117027938106e-06,
      "loss": 2.2038,
      "step": 53704
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0078060626983643,
      "learning_rate": 4.1908765580957925e-06,
      "loss": 2.3486,
      "step": 53705
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1606462001800537,
      "learning_rate": 4.190541423245812e-06,
      "loss": 2.2076,
      "step": 53706
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.199674367904663,
      "learning_rate": 4.190206298244446e-06,
      "loss": 2.4408,
      "step": 53707
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.084646463394165,
      "learning_rate": 4.189871183092254e-06,
      "loss": 2.2944,
      "step": 53708
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0042766332626343,
      "learning_rate": 4.189536077789813e-06,
      "loss": 2.1657,
      "step": 53709
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0888546705245972,
      "learning_rate": 4.189200982337682e-06,
      "loss": 2.2877,
      "step": 53710
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0428353548049927,
      "learning_rate": 4.188865896736437e-06,
      "loss": 2.4094,
      "step": 53711
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.50760817527771,
      "learning_rate": 4.1885308209866394e-06,
      "loss": 2.3011,
      "step": 53712
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1331504583358765,
      "learning_rate": 4.188195755088864e-06,
      "loss": 2.5898,
      "step": 53713
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1558762788772583,
      "learning_rate": 4.187860699043672e-06,
      "loss": 2.3768,
      "step": 53714
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0972403287887573,
      "learning_rate": 4.187525652851637e-06,
      "loss": 2.1384,
      "step": 53715
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0847066640853882,
      "learning_rate": 4.187190616513326e-06,
      "loss": 2.2732,
      "step": 53716
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.200657844543457,
      "learning_rate": 4.186855590029305e-06,
      "loss": 2.1423,
      "step": 53717
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0465353727340698,
      "learning_rate": 4.186520573400139e-06,
      "loss": 2.3361,
      "step": 53718
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.035274624824524,
      "learning_rate": 4.186185566626404e-06,
      "loss": 2.0997,
      "step": 53719
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1393972635269165,
      "learning_rate": 4.18585056970866e-06,
      "loss": 2.2334,
      "step": 53720
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.102358102798462,
      "learning_rate": 4.1855155826474804e-06,
      "loss": 2.4912,
      "step": 53721
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.8100788593292236,
      "learning_rate": 4.1851806054434285e-06,
      "loss": 2.2416,
      "step": 53722
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0882775783538818,
      "learning_rate": 4.184845638097078e-06,
      "loss": 2.2777,
      "step": 53723
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.096919298171997,
      "learning_rate": 4.18451068060899e-06,
      "loss": 2.431,
      "step": 53724
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.9716601371765137,
      "learning_rate": 4.1841757329797395e-06,
      "loss": 2.4128,
      "step": 53725
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0172853469848633,
      "learning_rate": 4.18384079520989e-06,
      "loss": 2.3367,
      "step": 53726
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0953173637390137,
      "learning_rate": 4.183505867300007e-06,
      "loss": 2.1939,
      "step": 53727
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.096983551979065,
      "learning_rate": 4.183170949250665e-06,
      "loss": 2.2531,
      "step": 53728
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.4112547636032104,
      "learning_rate": 4.1828360410624295e-06,
      "loss": 2.4906,
      "step": 53729
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.112176537513733,
      "learning_rate": 4.182501142735861e-06,
      "loss": 2.3351,
      "step": 53730
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1244397163391113,
      "learning_rate": 4.182166254271538e-06,
      "loss": 2.3513,
      "step": 53731
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0708231925964355,
      "learning_rate": 4.181831375670022e-06,
      "loss": 2.3517,
      "step": 53732
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0641510486602783,
      "learning_rate": 4.181496506931879e-06,
      "loss": 2.3187,
      "step": 53733
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0927740335464478,
      "learning_rate": 4.181161648057684e-06,
      "loss": 2.3792,
      "step": 53734
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.090104103088379,
      "learning_rate": 4.1808267990479965e-06,
      "loss": 2.217,
      "step": 53735
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0076922178268433,
      "learning_rate": 4.180491959903391e-06,
      "loss": 2.4028,
      "step": 53736
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.114219069480896,
      "learning_rate": 4.18015713062443e-06,
      "loss": 2.0995,
      "step": 53737
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1962354183197021,
      "learning_rate": 4.179822311211686e-06,
      "loss": 2.2914,
      "step": 53738
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1470822095870972,
      "learning_rate": 4.17948750166572e-06,
      "loss": 2.2483,
      "step": 53739
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.020940899848938,
      "learning_rate": 4.179152701987107e-06,
      "loss": 2.4885,
      "step": 53740
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.04714035987854,
      "learning_rate": 4.178817912176408e-06,
      "loss": 2.1047,
      "step": 53741
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.077984094619751,
      "learning_rate": 4.1784831322342005e-06,
      "loss": 2.3353,
      "step": 53742
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0613547563552856,
      "learning_rate": 4.1781483621610385e-06,
      "loss": 2.2383,
      "step": 53743
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1014224290847778,
      "learning_rate": 4.1778136019574985e-06,
      "loss": 2.2788,
      "step": 53744
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0736496448516846,
      "learning_rate": 4.177478851624144e-06,
      "loss": 2.2529,
      "step": 53745
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0417851209640503,
      "learning_rate": 4.177144111161546e-06,
      "loss": 2.201,
      "step": 53746
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1724114418029785,
      "learning_rate": 4.1768093805702684e-06,
      "loss": 2.3475,
      "step": 53747
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1262813806533813,
      "learning_rate": 4.176474659850883e-06,
      "loss": 2.2558,
      "step": 53748
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.001046895980835,
      "learning_rate": 4.176139949003951e-06,
      "loss": 2.2152,
      "step": 53749
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0538979768753052,
      "learning_rate": 4.175805248030048e-06,
      "loss": 2.4495,
      "step": 53750
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.402945637702942,
      "learning_rate": 4.175470556929733e-06,
      "loss": 2.2842,
      "step": 53751
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0388411283493042,
      "learning_rate": 4.17513587570358e-06,
      "loss": 2.2061,
      "step": 53752
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0751229524612427,
      "learning_rate": 4.174801204352155e-06,
      "loss": 2.1321,
      "step": 53753
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0812287330627441,
      "learning_rate": 4.17446654287602e-06,
      "loss": 2.1362,
      "step": 53754
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.2615547180175781,
      "learning_rate": 4.174131891275749e-06,
      "loss": 2.4646,
      "step": 53755
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.120249629020691,
      "learning_rate": 4.173797249551908e-06,
      "loss": 2.2174,
      "step": 53756
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1446589231491089,
      "learning_rate": 4.1734626177050585e-06,
      "loss": 2.03,
      "step": 53757
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0100486278533936,
      "learning_rate": 4.173127995735778e-06,
      "loss": 2.0941,
      "step": 53758
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.9655706882476807,
      "learning_rate": 4.1727933836446254e-06,
      "loss": 2.3282,
      "step": 53759
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.9877167344093323,
      "learning_rate": 4.172458781432168e-06,
      "loss": 2.2457,
      "step": 53760
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.125362515449524,
      "learning_rate": 4.17212418909898e-06,
      "loss": 2.2684,
      "step": 53761
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1061756610870361,
      "learning_rate": 4.17178960664562e-06,
      "loss": 2.3843,
      "step": 53762
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0590546131134033,
      "learning_rate": 4.1714550340726635e-06,
      "loss": 2.2187,
      "step": 53763
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0480703115463257,
      "learning_rate": 4.171120471380669e-06,
      "loss": 2.2402,
      "step": 53764
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.159727931022644,
      "learning_rate": 4.170785918570214e-06,
      "loss": 2.4606,
      "step": 53765
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.033587098121643,
      "learning_rate": 4.170451375641855e-06,
      "loss": 2.2622,
      "step": 53766
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0086417198181152,
      "learning_rate": 4.170116842596169e-06,
      "loss": 2.3085,
      "step": 53767
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.095688819885254,
      "learning_rate": 4.169782319433714e-06,
      "loss": 2.2277,
      "step": 53768
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1137975454330444,
      "learning_rate": 4.169447806155069e-06,
      "loss": 2.4207,
      "step": 53769
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.2687785625457764,
      "learning_rate": 4.169113302760785e-06,
      "loss": 2.4054,
      "step": 53770
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.999320924282074,
      "learning_rate": 4.168778809251443e-06,
      "loss": 2.1735,
      "step": 53771
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0992600917816162,
      "learning_rate": 4.1684443256276e-06,
      "loss": 2.2446,
      "step": 53772
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.9555371999740601,
      "learning_rate": 4.168109851889831e-06,
      "loss": 2.3456,
      "step": 53773
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1801702976226807,
      "learning_rate": 4.167775388038697e-06,
      "loss": 2.3259,
      "step": 53774
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1047550439834595,
      "learning_rate": 4.1674409340747716e-06,
      "loss": 2.3492,
      "step": 53775
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.9652165174484253,
      "learning_rate": 4.167106489998613e-06,
      "loss": 2.3114,
      "step": 53776
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1115378141403198,
      "learning_rate": 4.166772055810798e-06,
      "loss": 2.5906,
      "step": 53777
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0933794975280762,
      "learning_rate": 4.1664376315118835e-06,
      "loss": 2.2702,
      "step": 53778
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.270685076713562,
      "learning_rate": 4.166103217102446e-06,
      "loss": 2.3152,
      "step": 53779
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1245054006576538,
      "learning_rate": 4.165768812583048e-06,
      "loss": 2.2327,
      "step": 53780
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1446503400802612,
      "learning_rate": 4.1654344179542535e-06,
      "loss": 2.3778,
      "step": 53781
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1779311895370483,
      "learning_rate": 4.165100033216636e-06,
      "loss": 2.1867,
      "step": 53782
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.118285894393921,
      "learning_rate": 4.164765658370757e-06,
      "loss": 2.3691,
      "step": 53783
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.140363335609436,
      "learning_rate": 4.1644312934171834e-06,
      "loss": 2.2316,
      "step": 53784
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1229803562164307,
      "learning_rate": 4.164096938356487e-06,
      "loss": 2.2496,
      "step": 53785
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.4200972318649292,
      "learning_rate": 4.163762593189226e-06,
      "loss": 2.2948,
      "step": 53786
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0645617246627808,
      "learning_rate": 4.163428257915978e-06,
      "loss": 2.3564,
      "step": 53787
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.9827156662940979,
      "learning_rate": 4.163093932537303e-06,
      "loss": 2.4756,
      "step": 53788
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1305030584335327,
      "learning_rate": 4.162759617053765e-06,
      "loss": 2.3139,
      "step": 53789
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.9216329455375671,
      "learning_rate": 4.162425311465941e-06,
      "loss": 2.2543,
      "step": 53790
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.9985395669937134,
      "learning_rate": 4.162091015774385e-06,
      "loss": 2.1766,
      "step": 53791
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.4269218444824219,
      "learning_rate": 4.1617567299796755e-06,
      "loss": 2.2122,
      "step": 53792
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0368869304656982,
      "learning_rate": 4.16142245408237e-06,
      "loss": 2.4442,
      "step": 53793
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.092157244682312,
      "learning_rate": 4.161088188083044e-06,
      "loss": 2.5196,
      "step": 53794
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.9906196594238281,
      "learning_rate": 4.160753931982257e-06,
      "loss": 2.5239,
      "step": 53795
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.262894630432129,
      "learning_rate": 4.16041968578058e-06,
      "loss": 2.4637,
      "step": 53796
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0755716562271118,
      "learning_rate": 4.160085449478572e-06,
      "loss": 2.283,
      "step": 53797
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0376765727996826,
      "learning_rate": 4.159751223076811e-06,
      "loss": 2.2587,
      "step": 53798
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.9525024890899658,
      "learning_rate": 4.159417006575853e-06,
      "loss": 2.1539,
      "step": 53799
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0833854675292969,
      "learning_rate": 4.159082799976274e-06,
      "loss": 2.0881,
      "step": 53800
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.062559962272644,
      "learning_rate": 4.158748603278631e-06,
      "loss": 2.3049,
      "step": 53801
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0277976989746094,
      "learning_rate": 4.1584144164835004e-06,
      "loss": 2.1988,
      "step": 53802
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.154895544052124,
      "learning_rate": 4.15808023959144e-06,
      "loss": 2.5618,
      "step": 53803
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0190708637237549,
      "learning_rate": 4.157746072603024e-06,
      "loss": 2.386,
      "step": 53804
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.043066382408142,
      "learning_rate": 4.1574119155188106e-06,
      "loss": 2.2195,
      "step": 53805
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.98467618227005,
      "learning_rate": 4.157077768339376e-06,
      "loss": 2.2909,
      "step": 53806
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1759699583053589,
      "learning_rate": 4.1567436310652765e-06,
      "loss": 2.4962,
      "step": 53807
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0647691488265991,
      "learning_rate": 4.156409503697092e-06,
      "loss": 2.2364,
      "step": 53808
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1683284044265747,
      "learning_rate": 4.156075386235372e-06,
      "loss": 2.3198,
      "step": 53809
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1879308223724365,
      "learning_rate": 4.155741278680696e-06,
      "loss": 2.2672,
      "step": 53810
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.3868610858917236,
      "learning_rate": 4.155407181033622e-06,
      "loss": 2.2592,
      "step": 53811
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1405247449874878,
      "learning_rate": 4.155073093294723e-06,
      "loss": 2.355,
      "step": 53812
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1471238136291504,
      "learning_rate": 4.15473901546456e-06,
      "loss": 2.2419,
      "step": 53813
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.151437759399414,
      "learning_rate": 4.154404947543705e-06,
      "loss": 2.4789,
      "step": 53814
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0212252140045166,
      "learning_rate": 4.154070889532722e-06,
      "loss": 2.2511,
      "step": 53815
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.150550365447998,
      "learning_rate": 4.153736841432172e-06,
      "loss": 2.5866,
      "step": 53816
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1092616319656372,
      "learning_rate": 4.15340280324263e-06,
      "loss": 2.2354,
      "step": 53817
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1413277387619019,
      "learning_rate": 4.153068774964653e-06,
      "loss": 2.2985,
      "step": 53818
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0860376358032227,
      "learning_rate": 4.152734756598819e-06,
      "loss": 2.3636,
      "step": 53819
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1225792169570923,
      "learning_rate": 4.152400748145682e-06,
      "loss": 2.3787,
      "step": 53820
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0816545486450195,
      "learning_rate": 4.1520667496058174e-06,
      "loss": 2.2587,
      "step": 53821
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0737857818603516,
      "learning_rate": 4.15173276097979e-06,
      "loss": 2.4411,
      "step": 53822
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0810415744781494,
      "learning_rate": 4.151398782268161e-06,
      "loss": 2.4127,
      "step": 53823
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1148030757904053,
      "learning_rate": 4.1510648134714985e-06,
      "loss": 2.5129,
      "step": 53824
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.2355358600616455,
      "learning_rate": 4.150730854590371e-06,
      "loss": 2.4866,
      "step": 53825
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1193336248397827,
      "learning_rate": 4.150396905625341e-06,
      "loss": 2.3459,
      "step": 53826
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.2650251388549805,
      "learning_rate": 4.150062966576981e-06,
      "loss": 2.1217,
      "step": 53827
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.3039976358413696,
      "learning_rate": 4.149729037445849e-06,
      "loss": 2.3598,
      "step": 53828
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0004500150680542,
      "learning_rate": 4.149395118232519e-06,
      "loss": 2.2165,
      "step": 53829
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0796825885772705,
      "learning_rate": 4.149061208937549e-06,
      "loss": 2.4978,
      "step": 53830
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.097320318222046,
      "learning_rate": 4.148727309561515e-06,
      "loss": 2.5137,
      "step": 53831
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0388261079788208,
      "learning_rate": 4.148393420104971e-06,
      "loss": 2.324,
      "step": 53832
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0114368200302124,
      "learning_rate": 4.148059540568494e-06,
      "loss": 2.326,
      "step": 53833
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.081213355064392,
      "learning_rate": 4.147725670952643e-06,
      "loss": 2.1509,
      "step": 53834
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0694295167922974,
      "learning_rate": 4.147391811257992e-06,
      "loss": 2.3337,
      "step": 53835
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0863202810287476,
      "learning_rate": 4.147057961485095e-06,
      "loss": 2.3355,
      "step": 53836
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0045392513275146,
      "learning_rate": 4.146724121634528e-06,
      "loss": 2.3303,
      "step": 53837
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.9733859896659851,
      "learning_rate": 4.14639029170685e-06,
      "loss": 2.2894,
      "step": 53838
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1685395240783691,
      "learning_rate": 4.146056471702633e-06,
      "loss": 2.4193,
      "step": 53839
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0606509447097778,
      "learning_rate": 4.145722661622438e-06,
      "loss": 2.2198,
      "step": 53840
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0441206693649292,
      "learning_rate": 4.145388861466836e-06,
      "loss": 2.457,
      "step": 53841
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0088740587234497,
      "learning_rate": 4.145055071236385e-06,
      "loss": 2.3454,
      "step": 53842
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0225704908370972,
      "learning_rate": 4.144721290931661e-06,
      "loss": 2.3648,
      "step": 53843
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0525884628295898,
      "learning_rate": 4.1443875205532244e-06,
      "loss": 2.2773,
      "step": 53844
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.106299638748169,
      "learning_rate": 4.144053760101639e-06,
      "loss": 2.1351,
      "step": 53845
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1196575164794922,
      "learning_rate": 4.143720009577475e-06,
      "loss": 2.2375,
      "step": 53846
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.037477731704712,
      "learning_rate": 4.143386268981292e-06,
      "loss": 2.2964,
      "step": 53847
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0331740379333496,
      "learning_rate": 4.1430525383136645e-06,
      "loss": 2.5457,
      "step": 53848
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1601390838623047,
      "learning_rate": 4.142718817575153e-06,
      "loss": 2.5502,
      "step": 53849
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0937021970748901,
      "learning_rate": 4.142385106766325e-06,
      "loss": 2.4884,
      "step": 53850
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1041134595870972,
      "learning_rate": 4.14205140588774e-06,
      "loss": 2.1518,
      "step": 53851
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1943432092666626,
      "learning_rate": 4.141717714939974e-06,
      "loss": 2.3604,
      "step": 53852
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0970580577850342,
      "learning_rate": 4.141384033923583e-06,
      "loss": 2.4707,
      "step": 53853
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0039880275726318,
      "learning_rate": 4.1410503628391405e-06,
      "loss": 2.5761,
      "step": 53854
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0205397605895996,
      "learning_rate": 4.140716701687207e-06,
      "loss": 2.329,
      "step": 53855
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.130505919456482,
      "learning_rate": 4.140383050468352e-06,
      "loss": 2.3663,
      "step": 53856
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.288998007774353,
      "learning_rate": 4.140049409183136e-06,
      "loss": 2.3263,
      "step": 53857
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0934895277023315,
      "learning_rate": 4.139715777832131e-06,
      "loss": 2.3899,
      "step": 53858
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1854922771453857,
      "learning_rate": 4.139382156415897e-06,
      "loss": 2.4435,
      "step": 53859
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.100866436958313,
      "learning_rate": 4.1390485449350045e-06,
      "loss": 2.1496,
      "step": 53860
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.9974448680877686,
      "learning_rate": 4.1387149433900176e-06,
      "loss": 2.3616,
      "step": 53861
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0322319269180298,
      "learning_rate": 4.1383813517814996e-06,
      "loss": 2.5117,
      "step": 53862
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0654640197753906,
      "learning_rate": 4.138047770110014e-06,
      "loss": 2.3507,
      "step": 53863
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1327378749847412,
      "learning_rate": 4.137714198376134e-06,
      "loss": 2.3212,
      "step": 53864
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.025044560432434,
      "learning_rate": 4.137380636580416e-06,
      "loss": 2.5987,
      "step": 53865
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1510173082351685,
      "learning_rate": 4.1370470847234335e-06,
      "loss": 2.2572,
      "step": 53866
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0775268077850342,
      "learning_rate": 4.136713542805746e-06,
      "loss": 2.3078,
      "step": 53867
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.101730227470398,
      "learning_rate": 4.136380010827924e-06,
      "loss": 2.2578,
      "step": 53868
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.144106388092041,
      "learning_rate": 4.136046488790527e-06,
      "loss": 2.3736,
      "step": 53869
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1094062328338623,
      "learning_rate": 4.135712976694129e-06,
      "loss": 2.4932,
      "step": 53870
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0477267503738403,
      "learning_rate": 4.135379474539288e-06,
      "loss": 2.2016,
      "step": 53871
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0617727041244507,
      "learning_rate": 4.135045982326569e-06,
      "loss": 2.2958,
      "step": 53872
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0525662899017334,
      "learning_rate": 4.134712500056543e-06,
      "loss": 2.3052,
      "step": 53873
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.2207138538360596,
      "learning_rate": 4.134379027729773e-06,
      "loss": 2.5557,
      "step": 53874
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1341174840927124,
      "learning_rate": 4.13404556534682e-06,
      "loss": 2.2995,
      "step": 53875
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1697056293487549,
      "learning_rate": 4.133712112908257e-06,
      "loss": 2.4309,
      "step": 53876
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0837701559066772,
      "learning_rate": 4.133378670414644e-06,
      "loss": 2.3476,
      "step": 53877
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0317436456680298,
      "learning_rate": 4.133045237866545e-06,
      "loss": 2.4462,
      "step": 53878
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0235419273376465,
      "learning_rate": 4.1327118152645305e-06,
      "loss": 2.3079,
      "step": 53879
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0183411836624146,
      "learning_rate": 4.13237840260916e-06,
      "loss": 2.3295,
      "step": 53880
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.109114408493042,
      "learning_rate": 4.132044999901005e-06,
      "loss": 2.2808,
      "step": 53881
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1332064867019653,
      "learning_rate": 4.131711607140624e-06,
      "loss": 2.2374,
      "step": 53882
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.140023946762085,
      "learning_rate": 4.131378224328589e-06,
      "loss": 2.2734,
      "step": 53883
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.9929975271224976,
      "learning_rate": 4.131044851465459e-06,
      "loss": 2.2324,
      "step": 53884
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1152615547180176,
      "learning_rate": 4.130711488551805e-06,
      "loss": 2.3181,
      "step": 53885
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0769046545028687,
      "learning_rate": 4.130378135588186e-06,
      "loss": 2.5172,
      "step": 53886
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1988121271133423,
      "learning_rate": 4.130044792575176e-06,
      "loss": 2.1959,
      "step": 53887
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.2552858591079712,
      "learning_rate": 4.129711459513328e-06,
      "loss": 2.3325,
      "step": 53888
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0831893682479858,
      "learning_rate": 4.1293781364032174e-06,
      "loss": 2.294,
      "step": 53889
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0815112590789795,
      "learning_rate": 4.129044823245401e-06,
      "loss": 2.3015,
      "step": 53890
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.992487907409668,
      "learning_rate": 4.128711520040453e-06,
      "loss": 2.1096,
      "step": 53891
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0793527364730835,
      "learning_rate": 4.128378226788928e-06,
      "loss": 2.3937,
      "step": 53892
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0282033681869507,
      "learning_rate": 4.128044943491401e-06,
      "loss": 2.1634,
      "step": 53893
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.056410789489746,
      "learning_rate": 4.1277116701484295e-06,
      "loss": 2.4038,
      "step": 53894
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1143884658813477,
      "learning_rate": 4.127378406760585e-06,
      "loss": 2.3458,
      "step": 53895
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1755623817443848,
      "learning_rate": 4.127045153328426e-06,
      "loss": 2.1889,
      "step": 53896
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.034204125404358,
      "learning_rate": 4.126711909852523e-06,
      "loss": 2.2501,
      "step": 53897
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0695744752883911,
      "learning_rate": 4.12637867633344e-06,
      "loss": 2.2456,
      "step": 53898
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.452264666557312,
      "learning_rate": 4.126045452771735e-06,
      "loss": 2.5846,
      "step": 53899
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0865708589553833,
      "learning_rate": 4.125712239167984e-06,
      "loss": 2.3297,
      "step": 53900
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.9883089661598206,
      "learning_rate": 4.1253790355227445e-06,
      "loss": 2.3962,
      "step": 53901
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0844841003417969,
      "learning_rate": 4.12504584183658e-06,
      "loss": 2.1452,
      "step": 53902
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1808894872665405,
      "learning_rate": 4.124712658110061e-06,
      "loss": 2.482,
      "step": 53903
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1211258172988892,
      "learning_rate": 4.1243794843437475e-06,
      "loss": 2.3966,
      "step": 53904
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1365654468536377,
      "learning_rate": 4.124046320538211e-06,
      "loss": 2.2851,
      "step": 53905
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.8359218835830688,
      "learning_rate": 4.12371316669401e-06,
      "loss": 2.3152,
      "step": 53906
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0864654779434204,
      "learning_rate": 4.123380022811708e-06,
      "loss": 2.3799,
      "step": 53907
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1224720478057861,
      "learning_rate": 4.123046888891877e-06,
      "loss": 2.2332,
      "step": 53908
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1536306142807007,
      "learning_rate": 4.122713764935074e-06,
      "loss": 2.1644,
      "step": 53909
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0564016103744507,
      "learning_rate": 4.122380650941871e-06,
      "loss": 2.2329,
      "step": 53910
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0513081550598145,
      "learning_rate": 4.122047546912825e-06,
      "loss": 2.2007,
      "step": 53911
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1128437519073486,
      "learning_rate": 4.1217144528485095e-06,
      "loss": 2.3035,
      "step": 53912
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1637303829193115,
      "learning_rate": 4.1213813687494795e-06,
      "loss": 2.0688,
      "step": 53913
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.2070317268371582,
      "learning_rate": 4.121048294616313e-06,
      "loss": 2.2957,
      "step": 53914
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.6156086921691895,
      "learning_rate": 4.120715230449559e-06,
      "loss": 2.4665,
      "step": 53915
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1274322271347046,
      "learning_rate": 4.120382176249793e-06,
      "loss": 2.6402,
      "step": 53916
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0886467695236206,
      "learning_rate": 4.120049132017573e-06,
      "loss": 2.23,
      "step": 53917
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1035284996032715,
      "learning_rate": 4.119716097753469e-06,
      "loss": 2.2387,
      "step": 53918
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0699081420898438,
      "learning_rate": 4.1193830734580405e-06,
      "loss": 2.5853,
      "step": 53919
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1079034805297852,
      "learning_rate": 4.119050059131858e-06,
      "loss": 2.2323,
      "step": 53920
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1654102802276611,
      "learning_rate": 4.118717054775479e-06,
      "loss": 2.1015,
      "step": 53921
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0939717292785645,
      "learning_rate": 4.118384060389477e-06,
      "loss": 2.4427,
      "step": 53922
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1596510410308838,
      "learning_rate": 4.118051075974407e-06,
      "loss": 2.5745,
      "step": 53923
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1602678298950195,
      "learning_rate": 4.117718101530842e-06,
      "loss": 2.1339,
      "step": 53924
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.147218942642212,
      "learning_rate": 4.1173851370593376e-06,
      "loss": 2.2351,
      "step": 53925
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0982460975646973,
      "learning_rate": 4.1170521825604685e-06,
      "loss": 2.3816,
      "step": 53926
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0876997709274292,
      "learning_rate": 4.116719238034792e-06,
      "loss": 2.3048,
      "step": 53927
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0744596719741821,
      "learning_rate": 4.116386303482874e-06,
      "loss": 2.0717,
      "step": 53928
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.062673807144165,
      "learning_rate": 4.116053378905277e-06,
      "loss": 2.4692,
      "step": 53929
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.324109673500061,
      "learning_rate": 4.11572046430257e-06,
      "loss": 2.2585,
      "step": 53930
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1034417152404785,
      "learning_rate": 4.115387559675314e-06,
      "loss": 2.5205,
      "step": 53931
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.3317676782608032,
      "learning_rate": 4.115054665024075e-06,
      "loss": 2.2025,
      "step": 53932
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0233074426651,
      "learning_rate": 4.114721780349418e-06,
      "loss": 2.1699,
      "step": 53933
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0385961532592773,
      "learning_rate": 4.114388905651902e-06,
      "loss": 2.2272,
      "step": 53934
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.239804983139038,
      "learning_rate": 4.114056040932099e-06,
      "loss": 2.395,
      "step": 53935
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0917105674743652,
      "learning_rate": 4.113723186190566e-06,
      "loss": 2.2807,
      "step": 53936
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0442123413085938,
      "learning_rate": 4.113390341427874e-06,
      "loss": 2.1438,
      "step": 53937
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0188945531845093,
      "learning_rate": 4.113057506644581e-06,
      "loss": 2.5871,
      "step": 53938
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0122936964035034,
      "learning_rate": 4.112724681841258e-06,
      "loss": 2.2196,
      "step": 53939
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0861916542053223,
      "learning_rate": 4.112391867018466e-06,
      "loss": 2.1988,
      "step": 53940
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.142398715019226,
      "learning_rate": 4.112059062176769e-06,
      "loss": 2.5344,
      "step": 53941
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.134756088256836,
      "learning_rate": 4.111726267316727e-06,
      "loss": 2.4026,
      "step": 53942
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0359883308410645,
      "learning_rate": 4.111393482438911e-06,
      "loss": 2.2147,
      "step": 53943
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1107878684997559,
      "learning_rate": 4.111060707543879e-06,
      "loss": 2.2879,
      "step": 53944
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.9765588641166687,
      "learning_rate": 4.110727942632204e-06,
      "loss": 2.3099,
      "step": 53945
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0553001165390015,
      "learning_rate": 4.110395187704439e-06,
      "loss": 2.3091,
      "step": 53946
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1232333183288574,
      "learning_rate": 4.110062442761158e-06,
      "loss": 2.4659,
      "step": 53947
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.096986174583435,
      "learning_rate": 4.109729707802918e-06,
      "loss": 2.3902,
      "step": 53948
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1113131046295166,
      "learning_rate": 4.109396982830288e-06,
      "loss": 2.1874,
      "step": 53949
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1928743124008179,
      "learning_rate": 4.109064267843827e-06,
      "loss": 2.2254,
      "step": 53950
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.2235631942749023,
      "learning_rate": 4.108731562844106e-06,
      "loss": 2.435,
      "step": 53951
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0930581092834473,
      "learning_rate": 4.108398867831681e-06,
      "loss": 2.0703,
      "step": 53952
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0520703792572021,
      "learning_rate": 4.1080661828071276e-06,
      "loss": 2.3832,
      "step": 53953
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0714786052703857,
      "learning_rate": 4.107733507770995e-06,
      "loss": 2.4351,
      "step": 53954
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0774633884429932,
      "learning_rate": 4.1074008427238576e-06,
      "loss": 2.3672,
      "step": 53955
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1029337644577026,
      "learning_rate": 4.1070681876662735e-06,
      "loss": 2.2792,
      "step": 53956
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1941192150115967,
      "learning_rate": 4.106735542598812e-06,
      "loss": 2.4249,
      "step": 53957
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0612515211105347,
      "learning_rate": 4.106402907522031e-06,
      "loss": 2.2357,
      "step": 53958
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0811773538589478,
      "learning_rate": 4.1060702824365014e-06,
      "loss": 2.4753,
      "step": 53959
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1146337985992432,
      "learning_rate": 4.1057376673427806e-06,
      "loss": 2.3252,
      "step": 53960
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1296436786651611,
      "learning_rate": 4.105405062241438e-06,
      "loss": 2.3592,
      "step": 53961
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0901696681976318,
      "learning_rate": 4.105072467133034e-06,
      "loss": 2.3642,
      "step": 53962
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0381507873535156,
      "learning_rate": 4.104739882018132e-06,
      "loss": 2.2604,
      "step": 53963
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0859127044677734,
      "learning_rate": 4.104407306897299e-06,
      "loss": 2.2723,
      "step": 53964
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.222357153892517,
      "learning_rate": 4.104074741771094e-06,
      "loss": 2.3421,
      "step": 53965
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1902492046356201,
      "learning_rate": 4.103742186640086e-06,
      "loss": 2.4951,
      "step": 53966
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0899184942245483,
      "learning_rate": 4.103409641504836e-06,
      "loss": 2.1933,
      "step": 53967
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1942658424377441,
      "learning_rate": 4.10307710636591e-06,
      "loss": 2.4055,
      "step": 53968
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0940227508544922,
      "learning_rate": 4.102744581223865e-06,
      "loss": 2.2991,
      "step": 53969
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.116565465927124,
      "learning_rate": 4.102412066079274e-06,
      "loss": 2.072,
      "step": 53970
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1239101886749268,
      "learning_rate": 4.102079560932692e-06,
      "loss": 2.3426,
      "step": 53971
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1175422668457031,
      "learning_rate": 4.101747065784691e-06,
      "loss": 2.2561,
      "step": 53972
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.119939923286438,
      "learning_rate": 4.101414580635827e-06,
      "loss": 2.3257,
      "step": 53973
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.069994330406189,
      "learning_rate": 4.101082105486672e-06,
      "loss": 2.2359,
      "step": 53974
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0641107559204102,
      "learning_rate": 4.10074964033778e-06,
      "loss": 2.4506,
      "step": 53975
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.2714381217956543,
      "learning_rate": 4.100417185189723e-06,
      "loss": 2.2834,
      "step": 53976
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0320615768432617,
      "learning_rate": 4.100084740043059e-06,
      "loss": 2.5294,
      "step": 53977
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0594555139541626,
      "learning_rate": 4.099752304898357e-06,
      "loss": 2.3421,
      "step": 53978
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.3655110597610474,
      "learning_rate": 4.0994198797561725e-06,
      "loss": 2.3404,
      "step": 53979
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0204205513000488,
      "learning_rate": 4.099087464617082e-06,
      "loss": 2.1943,
      "step": 53980
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.2035764455795288,
      "learning_rate": 4.0987550594816324e-06,
      "loss": 2.1723,
      "step": 53981
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0793092250823975,
      "learning_rate": 4.098422664350401e-06,
      "loss": 2.6042,
      "step": 53982
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0874338150024414,
      "learning_rate": 4.098090279223942e-06,
      "loss": 2.3857,
      "step": 53983
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1531955003738403,
      "learning_rate": 4.097757904102827e-06,
      "loss": 2.5098,
      "step": 53984
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1359480619430542,
      "learning_rate": 4.097425538987611e-06,
      "loss": 2.1531,
      "step": 53985
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1321316957473755,
      "learning_rate": 4.097093183878865e-06,
      "loss": 2.2803,
      "step": 53986
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1944302320480347,
      "learning_rate": 4.096760838777146e-06,
      "loss": 2.5276,
      "step": 53987
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1370911598205566,
      "learning_rate": 4.096428503683025e-06,
      "loss": 2.2951,
      "step": 53988
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.9582489728927612,
      "learning_rate": 4.096096178597061e-06,
      "loss": 2.296,
      "step": 53989
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0157368183135986,
      "learning_rate": 4.095763863519813e-06,
      "loss": 2.2493,
      "step": 53990
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.607285976409912,
      "learning_rate": 4.095431558451853e-06,
      "loss": 2.4763,
      "step": 53991
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0064574480056763,
      "learning_rate": 4.095099263393737e-06,
      "loss": 2.3772,
      "step": 53992
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1375999450683594,
      "learning_rate": 4.094766978346035e-06,
      "loss": 2.3515,
      "step": 53993
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0695313215255737,
      "learning_rate": 4.0944347033093054e-06,
      "loss": 2.1942,
      "step": 53994
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1017184257507324,
      "learning_rate": 4.094102438284114e-06,
      "loss": 2.197,
      "step": 53995
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1288727521896362,
      "learning_rate": 4.093770183271019e-06,
      "loss": 2.4426,
      "step": 53996
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0110633373260498,
      "learning_rate": 4.093437938270591e-06,
      "loss": 2.2922,
      "step": 53997
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0930759906768799,
      "learning_rate": 4.0931057032833874e-06,
      "loss": 2.4957,
      "step": 53998
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0894629955291748,
      "learning_rate": 4.092773478309977e-06,
      "loss": 2.2916,
      "step": 53999
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0541185140609741,
      "learning_rate": 4.092441263350916e-06,
      "loss": 2.3993,
      "step": 54000
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1022112369537354,
      "learning_rate": 4.0921090584067755e-06,
      "loss": 2.4488,
      "step": 54001
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0453484058380127,
      "learning_rate": 4.091776863478111e-06,
      "loss": 2.4287,
      "step": 54002
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.051856279373169,
      "learning_rate": 4.091444678565493e-06,
      "loss": 2.2667,
      "step": 54003
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.820849061012268,
      "learning_rate": 4.091112503669478e-06,
      "loss": 2.3367,
      "step": 54004
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.3668038845062256,
      "learning_rate": 4.0907803387906344e-06,
      "loss": 2.1339,
      "step": 54005
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0423717498779297,
      "learning_rate": 4.0904481839295245e-06,
      "loss": 2.3493,
      "step": 54006
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0313427448272705,
      "learning_rate": 4.090116039086709e-06,
      "loss": 2.2155,
      "step": 54007
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0734940767288208,
      "learning_rate": 4.089783904262748e-06,
      "loss": 2.4997,
      "step": 54008
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0040597915649414,
      "learning_rate": 4.089451779458212e-06,
      "loss": 2.3174,
      "step": 54009
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1127358675003052,
      "learning_rate": 4.0891196646736586e-06,
      "loss": 2.2052,
      "step": 54010
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0278640985488892,
      "learning_rate": 4.088787559909655e-06,
      "loss": 2.275,
      "step": 54011
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.2498321533203125,
      "learning_rate": 4.088455465166758e-06,
      "loss": 2.334,
      "step": 54012
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.034942626953125,
      "learning_rate": 4.088123380445539e-06,
      "loss": 2.1616,
      "step": 54013
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.274553894996643,
      "learning_rate": 4.087791305746552e-06,
      "loss": 2.4479,
      "step": 54014
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1210198402404785,
      "learning_rate": 4.08745924107037e-06,
      "loss": 2.2544,
      "step": 54015
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.2286123037338257,
      "learning_rate": 4.087127186417549e-06,
      "loss": 2.4122,
      "step": 54016
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.126729965209961,
      "learning_rate": 4.08679514178865e-06,
      "loss": 2.4242,
      "step": 54017
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.253105640411377,
      "learning_rate": 4.086463107184242e-06,
      "loss": 2.2281,
      "step": 54018
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.017953872680664,
      "learning_rate": 4.086131082604886e-06,
      "loss": 2.3588,
      "step": 54019
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0435377359390259,
      "learning_rate": 4.085799068051141e-06,
      "loss": 2.5133,
      "step": 54020
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0181407928466797,
      "learning_rate": 4.0854670635235745e-06,
      "loss": 2.4744,
      "step": 54021
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0193018913269043,
      "learning_rate": 4.0851350690227455e-06,
      "loss": 2.3254,
      "step": 54022
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.007696270942688,
      "learning_rate": 4.084803084549223e-06,
      "loss": 2.3091,
      "step": 54023
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.217317819595337,
      "learning_rate": 4.084471110103565e-06,
      "loss": 2.2664,
      "step": 54024
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0733999013900757,
      "learning_rate": 4.084139145686331e-06,
      "loss": 2.1382,
      "step": 54025
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1088835000991821,
      "learning_rate": 4.083807191298092e-06,
      "loss": 2.1637,
      "step": 54026
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.034029483795166,
      "learning_rate": 4.083475246939404e-06,
      "loss": 2.2617,
      "step": 54027
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0628727674484253,
      "learning_rate": 4.083143312610836e-06,
      "loss": 2.3609,
      "step": 54028
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.194804310798645,
      "learning_rate": 4.082811388312942e-06,
      "loss": 2.4238,
      "step": 54029
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0028164386749268,
      "learning_rate": 4.082479474046294e-06,
      "loss": 2.2213,
      "step": 54030
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0958876609802246,
      "learning_rate": 4.082147569811447e-06,
      "loss": 2.2499,
      "step": 54031
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.9731107950210571,
      "learning_rate": 4.081815675608974e-06,
      "loss": 2.0698,
      "step": 54032
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.2981059551239014,
      "learning_rate": 4.081483791439424e-06,
      "loss": 2.2253,
      "step": 54033
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1326199769973755,
      "learning_rate": 4.08115191730337e-06,
      "loss": 2.3032,
      "step": 54034
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1173019409179688,
      "learning_rate": 4.080820053201367e-06,
      "loss": 2.4693,
      "step": 54035
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0857853889465332,
      "learning_rate": 4.080488199133985e-06,
      "loss": 2.1513,
      "step": 54036
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.032022476196289,
      "learning_rate": 4.080156355101781e-06,
      "loss": 2.6177,
      "step": 54037
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0806548595428467,
      "learning_rate": 4.079824521105322e-06,
      "loss": 2.3075,
      "step": 54038
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.094300627708435,
      "learning_rate": 4.079492697145165e-06,
      "loss": 2.4768,
      "step": 54039
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1758378744125366,
      "learning_rate": 4.07916088322188e-06,
      "loss": 2.3518,
      "step": 54040
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1542996168136597,
      "learning_rate": 4.078829079336022e-06,
      "loss": 2.2743,
      "step": 54041
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.05934739112854,
      "learning_rate": 4.07849728548816e-06,
      "loss": 2.5894,
      "step": 54042
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1989763975143433,
      "learning_rate": 4.078165501678849e-06,
      "loss": 2.1144,
      "step": 54043
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0717618465423584,
      "learning_rate": 4.077833727908661e-06,
      "loss": 2.2068,
      "step": 54044
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.086525559425354,
      "learning_rate": 4.0775019641781525e-06,
      "loss": 2.3664,
      "step": 54045
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.2023223638534546,
      "learning_rate": 4.077170210487887e-06,
      "loss": 2.207,
      "step": 54046
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0958551168441772,
      "learning_rate": 4.076838466838422e-06,
      "loss": 2.4251,
      "step": 54047
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0109783411026,
      "learning_rate": 4.076506733230329e-06,
      "loss": 2.1781,
      "step": 54048
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1769953966140747,
      "learning_rate": 4.076175009664161e-06,
      "loss": 2.3947,
      "step": 54049
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.00006103515625,
      "learning_rate": 4.07584329614049e-06,
      "loss": 2.6404,
      "step": 54050
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0304011106491089,
      "learning_rate": 4.075511592659874e-06,
      "loss": 2.3392,
      "step": 54051
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.157395362854004,
      "learning_rate": 4.075179899222871e-06,
      "loss": 2.2011,
      "step": 54052
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0582329034805298,
      "learning_rate": 4.07484821583005e-06,
      "loss": 2.3494,
      "step": 54053
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.3104467391967773,
      "learning_rate": 4.0745165424819675e-06,
      "loss": 2.0578,
      "step": 54054
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.9908941984176636,
      "learning_rate": 4.074184879179193e-06,
      "loss": 2.2633,
      "step": 54055
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.482363224029541,
      "learning_rate": 4.073853225922281e-06,
      "loss": 1.9958,
      "step": 54056
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1050450801849365,
      "learning_rate": 4.0735215827118005e-06,
      "loss": 2.2578,
      "step": 54057
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.079052448272705,
      "learning_rate": 4.0731899495483065e-06,
      "loss": 2.401,
      "step": 54058
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0410577058792114,
      "learning_rate": 4.072858326432372e-06,
      "loss": 2.4243,
      "step": 54059
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1413344144821167,
      "learning_rate": 4.072526713364546e-06,
      "loss": 2.2062,
      "step": 54060
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0977935791015625,
      "learning_rate": 4.0721951103454006e-06,
      "loss": 2.2411,
      "step": 54061
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0760676860809326,
      "learning_rate": 4.07186351737549e-06,
      "loss": 2.3114,
      "step": 54062
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0824543237686157,
      "learning_rate": 4.071531934455386e-06,
      "loss": 2.2978,
      "step": 54063
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1910290718078613,
      "learning_rate": 4.071200361585641e-06,
      "loss": 2.3832,
      "step": 54064
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.062441349029541,
      "learning_rate": 4.070868798766826e-06,
      "loss": 2.3248,
      "step": 54065
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0377652645111084,
      "learning_rate": 4.070537245999494e-06,
      "loss": 2.3078,
      "step": 54066
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1324617862701416,
      "learning_rate": 4.0702057032842165e-06,
      "loss": 2.3746,
      "step": 54067
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.101667046546936,
      "learning_rate": 4.069874170621546e-06,
      "loss": 2.1685,
      "step": 54068
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.073330044746399,
      "learning_rate": 4.0695426480120536e-06,
      "loss": 2.2955,
      "step": 54069
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0607504844665527,
      "learning_rate": 4.0692111354562935e-06,
      "loss": 2.5187,
      "step": 54070
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.145674467086792,
      "learning_rate": 4.068879632954836e-06,
      "loss": 2.1848,
      "step": 54071
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.083683729171753,
      "learning_rate": 4.068548140508237e-06,
      "loss": 2.4901,
      "step": 54072
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0960030555725098,
      "learning_rate": 4.06821665811706e-06,
      "loss": 2.3267,
      "step": 54073
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.038622498512268,
      "learning_rate": 4.0678851857818635e-06,
      "loss": 2.3198,
      "step": 54074
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0068002939224243,
      "learning_rate": 4.067553723503216e-06,
      "loss": 2.0574,
      "step": 54075
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.006232738494873,
      "learning_rate": 4.067222271281673e-06,
      "loss": 2.2984,
      "step": 54076
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0649759769439697,
      "learning_rate": 4.066890829117803e-06,
      "loss": 2.2239,
      "step": 54077
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1009395122528076,
      "learning_rate": 4.066559397012161e-06,
      "loss": 2.2763,
      "step": 54078
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1769039630889893,
      "learning_rate": 4.066227974965315e-06,
      "loss": 2.3533,
      "step": 54079
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.2214418649673462,
      "learning_rate": 4.065896562977825e-06,
      "loss": 2.5057,
      "step": 54080
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1802713871002197,
      "learning_rate": 4.065565161050249e-06,
      "loss": 2.4656,
      "step": 54081
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.114424705505371,
      "learning_rate": 4.065233769183154e-06,
      "loss": 2.2452,
      "step": 54082
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.2318050861358643,
      "learning_rate": 4.0649023873770975e-06,
      "loss": 2.2998,
      "step": 54083
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.029597282409668,
      "learning_rate": 4.064571015632647e-06,
      "loss": 2.4437,
      "step": 54084
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.222912311553955,
      "learning_rate": 4.064239653950359e-06,
      "loss": 2.4409,
      "step": 54085
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1152982711791992,
      "learning_rate": 4.063908302330799e-06,
      "loss": 2.2209,
      "step": 54086
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.2986894845962524,
      "learning_rate": 4.063576960774522e-06,
      "loss": 2.284,
      "step": 54087
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1290727853775024,
      "learning_rate": 4.063245629282097e-06,
      "loss": 2.3794,
      "step": 54088
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.3675663471221924,
      "learning_rate": 4.062914307854081e-06,
      "loss": 2.2737,
      "step": 54089
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.063395619392395,
      "learning_rate": 4.06258299649104e-06,
      "loss": 2.2936,
      "step": 54090
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.06216299533844,
      "learning_rate": 4.062251695193531e-06,
      "loss": 2.39,
      "step": 54091
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0780327320098877,
      "learning_rate": 4.061920403962121e-06,
      "loss": 2.3308,
      "step": 54092
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.2288306951522827,
      "learning_rate": 4.061589122797366e-06,
      "loss": 2.4096,
      "step": 54093
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0671170949935913,
      "learning_rate": 4.061257851699832e-06,
      "loss": 2.3767,
      "step": 54094
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.2362900972366333,
      "learning_rate": 4.060926590670076e-06,
      "loss": 2.3927,
      "step": 54095
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.035158395767212,
      "learning_rate": 4.0605953397086674e-06,
      "loss": 2.2305,
      "step": 54096
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0373891592025757,
      "learning_rate": 4.060264098816158e-06,
      "loss": 2.4478,
      "step": 54097
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.2442176342010498,
      "learning_rate": 4.059932867993121e-06,
      "loss": 2.2304,
      "step": 54098
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.095741868019104,
      "learning_rate": 4.059601647240106e-06,
      "loss": 2.3025,
      "step": 54099
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.01426362991333,
      "learning_rate": 4.059270436557681e-06,
      "loss": 2.3628,
      "step": 54100
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0092216730117798,
      "learning_rate": 4.058939235946403e-06,
      "loss": 2.1886,
      "step": 54101
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.10319983959198,
      "learning_rate": 4.058608045406841e-06,
      "loss": 2.3531,
      "step": 54102
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0717401504516602,
      "learning_rate": 4.058276864939547e-06,
      "loss": 2.3439,
      "step": 54103
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.084477424621582,
      "learning_rate": 4.0579456945450925e-06,
      "loss": 2.4811,
      "step": 54104
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.9571237564086914,
      "learning_rate": 4.05761453422403e-06,
      "loss": 2.3187,
      "step": 54105
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0899702310562134,
      "learning_rate": 4.057283383976928e-06,
      "loss": 2.0996,
      "step": 54106
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0581268072128296,
      "learning_rate": 4.056952243804344e-06,
      "loss": 2.382,
      "step": 54107
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0933045148849487,
      "learning_rate": 4.056621113706838e-06,
      "loss": 2.3465,
      "step": 54108
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.9694174528121948,
      "learning_rate": 4.056289993684976e-06,
      "loss": 2.3325,
      "step": 54109
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.078800916671753,
      "learning_rate": 4.055958883739314e-06,
      "loss": 2.2224,
      "step": 54110
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.448824405670166,
      "learning_rate": 4.055627783870418e-06,
      "loss": 2.1153,
      "step": 54111
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0485877990722656,
      "learning_rate": 4.055296694078849e-06,
      "loss": 2.2628,
      "step": 54112
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.071134090423584,
      "learning_rate": 4.054965614365166e-06,
      "loss": 2.2883,
      "step": 54113
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.264424204826355,
      "learning_rate": 4.054634544729927e-06,
      "loss": 2.2582,
      "step": 54114
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1085727214813232,
      "learning_rate": 4.054303485173702e-06,
      "loss": 2.2526,
      "step": 54115
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.072237491607666,
      "learning_rate": 4.053972435697043e-06,
      "loss": 2.2088,
      "step": 54116
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.9313855767250061,
      "learning_rate": 4.053641396300518e-06,
      "loss": 2.1986,
      "step": 54117
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1658895015716553,
      "learning_rate": 4.053310366984684e-06,
      "loss": 2.3338,
      "step": 54118
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.017435073852539,
      "learning_rate": 4.0529793477501075e-06,
      "loss": 2.2612,
      "step": 54119
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1678886413574219,
      "learning_rate": 4.052648338597343e-06,
      "loss": 2.072,
      "step": 54120
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1828622817993164,
      "learning_rate": 4.052317339526957e-06,
      "loss": 2.2247,
      "step": 54121
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.110215663909912,
      "learning_rate": 4.051986350539505e-06,
      "loss": 2.616,
      "step": 54122
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1248279809951782,
      "learning_rate": 4.0516553716355564e-06,
      "loss": 2.2443,
      "step": 54123
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.2244070768356323,
      "learning_rate": 4.051324402815663e-06,
      "loss": 2.2827,
      "step": 54124
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.226191759109497,
      "learning_rate": 4.050993444080398e-06,
      "loss": 2.1131,
      "step": 54125
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0256221294403076,
      "learning_rate": 4.050662495430307e-06,
      "loss": 2.2092,
      "step": 54126
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0672465562820435,
      "learning_rate": 4.050331556865963e-06,
      "loss": 2.1543,
      "step": 54127
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.145208716392517,
      "learning_rate": 4.0500006283879186e-06,
      "loss": 2.3182,
      "step": 54128
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0940109491348267,
      "learning_rate": 4.049669709996744e-06,
      "loss": 2.3154,
      "step": 54129
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.033868670463562,
      "learning_rate": 4.0493388016929915e-06,
      "loss": 2.3193,
      "step": 54130
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.3277528285980225,
      "learning_rate": 4.049007903477229e-06,
      "loss": 2.2914,
      "step": 54131
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.215226650238037,
      "learning_rate": 4.048677015350011e-06,
      "loss": 2.3016,
      "step": 54132
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0919543504714966,
      "learning_rate": 4.0483461373119045e-06,
      "loss": 2.2334,
      "step": 54133
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1330398321151733,
      "learning_rate": 4.048015269363469e-06,
      "loss": 2.4821,
      "step": 54134
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1904284954071045,
      "learning_rate": 4.047684411505261e-06,
      "loss": 2.2162,
      "step": 54135
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0814424753189087,
      "learning_rate": 4.047353563737847e-06,
      "loss": 2.3672,
      "step": 54136
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.099895715713501,
      "learning_rate": 4.047022726061783e-06,
      "loss": 2.2336,
      "step": 54137
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0564076900482178,
      "learning_rate": 4.046691898477634e-06,
      "loss": 2.1831,
      "step": 54138
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5445528030395508,
      "learning_rate": 4.04636108098596e-06,
      "loss": 2.2157,
      "step": 54139
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0370488166809082,
      "learning_rate": 4.046030273587317e-06,
      "loss": 2.4459,
      "step": 54140
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1179699897766113,
      "learning_rate": 4.045699476282274e-06,
      "loss": 2.5123,
      "step": 54141
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0783923864364624,
      "learning_rate": 4.045368689071388e-06,
      "loss": 2.1973,
      "step": 54142
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.9906846880912781,
      "learning_rate": 4.0450379119552145e-06,
      "loss": 2.2451,
      "step": 54143
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.2464570999145508,
      "learning_rate": 4.044707144934323e-06,
      "loss": 2.2422,
      "step": 54144
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1331157684326172,
      "learning_rate": 4.044376388009268e-06,
      "loss": 2.2213,
      "step": 54145
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0744609832763672,
      "learning_rate": 4.044045641180615e-06,
      "loss": 2.4537,
      "step": 54146
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1812158823013306,
      "learning_rate": 4.043714904448919e-06,
      "loss": 2.131,
      "step": 54147
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.037341833114624,
      "learning_rate": 4.043384177814746e-06,
      "loss": 2.2876,
      "step": 54148
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1225229501724243,
      "learning_rate": 4.0430534612786534e-06,
      "loss": 2.1939,
      "step": 54149
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.3692150115966797,
      "learning_rate": 4.042722754841205e-06,
      "loss": 2.5998,
      "step": 54150
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0666974782943726,
      "learning_rate": 4.042392058502961e-06,
      "loss": 2.415,
      "step": 54151
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0687382221221924,
      "learning_rate": 4.04206137226448e-06,
      "loss": 2.4559,
      "step": 54152
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0255964994430542,
      "learning_rate": 4.041730696126318e-06,
      "loss": 2.4785,
      "step": 54153
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0698254108428955,
      "learning_rate": 4.0414000300890455e-06,
      "loss": 2.3236,
      "step": 54154
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0811536312103271,
      "learning_rate": 4.041069374153215e-06,
      "loss": 2.4666,
      "step": 54155
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0468590259552002,
      "learning_rate": 4.040738728319393e-06,
      "loss": 2.3693,
      "step": 54156
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1289329528808594,
      "learning_rate": 4.040408092588135e-06,
      "loss": 2.3009,
      "step": 54157
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1929705142974854,
      "learning_rate": 4.0400774669600075e-06,
      "loss": 2.1596,
      "step": 54158
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1188840866088867,
      "learning_rate": 4.039746851435563e-06,
      "loss": 1.9932,
      "step": 54159
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.2168465852737427,
      "learning_rate": 4.03941624601537e-06,
      "loss": 2.4724,
      "step": 54160
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.071312665939331,
      "learning_rate": 4.039085650699982e-06,
      "loss": 2.1779,
      "step": 54161
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.05189847946167,
      "learning_rate": 4.038755065489966e-06,
      "loss": 2.389,
      "step": 54162
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0449599027633667,
      "learning_rate": 4.038424490385879e-06,
      "loss": 2.718,
      "step": 54163
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.122711181640625,
      "learning_rate": 4.038093925388283e-06,
      "loss": 2.185,
      "step": 54164
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.089267611503601,
      "learning_rate": 4.037763370497732e-06,
      "loss": 2.3452,
      "step": 54165
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0534547567367554,
      "learning_rate": 4.0374328257147955e-06,
      "loss": 2.2972,
      "step": 54166
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0640156269073486,
      "learning_rate": 4.037102291040026e-06,
      "loss": 2.4284,
      "step": 54167
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1607799530029297,
      "learning_rate": 4.0367717664739915e-06,
      "loss": 2.2003,
      "step": 54168
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.4236139059066772,
      "learning_rate": 4.036441252017249e-06,
      "loss": 2.3318,
      "step": 54169
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0276278257369995,
      "learning_rate": 4.036110747670354e-06,
      "loss": 2.3915,
      "step": 54170
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.082858920097351,
      "learning_rate": 4.035780253433874e-06,
      "loss": 2.1564,
      "step": 54171
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1615839004516602,
      "learning_rate": 4.035449769308364e-06,
      "loss": 2.4713,
      "step": 54172
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.088675856590271,
      "learning_rate": 4.03511929529439e-06,
      "loss": 2.2964,
      "step": 54173
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0212277173995972,
      "learning_rate": 4.034788831392505e-06,
      "loss": 2.4895,
      "step": 54174
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0583202838897705,
      "learning_rate": 4.034458377603276e-06,
      "loss": 2.17,
      "step": 54175
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.071058988571167,
      "learning_rate": 4.034127933927257e-06,
      "loss": 2.3243,
      "step": 54176
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0980641841888428,
      "learning_rate": 4.033797500365019e-06,
      "loss": 2.2625,
      "step": 54177
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0945005416870117,
      "learning_rate": 4.033467076917107e-06,
      "loss": 2.3949,
      "step": 54178
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1464239358901978,
      "learning_rate": 4.033136663584093e-06,
      "loss": 2.4291,
      "step": 54179
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.021578311920166,
      "learning_rate": 4.032806260366529e-06,
      "loss": 2.2981,
      "step": 54180
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0991532802581787,
      "learning_rate": 4.032475867264982e-06,
      "loss": 2.1668,
      "step": 54181
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0476016998291016,
      "learning_rate": 4.032145484280006e-06,
      "loss": 2.358,
      "step": 54182
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0639227628707886,
      "learning_rate": 4.031815111412168e-06,
      "loss": 2.4142,
      "step": 54183
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.143984079360962,
      "learning_rate": 4.031484748662021e-06,
      "loss": 2.648,
      "step": 54184
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.3154845237731934,
      "learning_rate": 4.031154396030132e-06,
      "loss": 2.2539,
      "step": 54185
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1143827438354492,
      "learning_rate": 4.030824053517054e-06,
      "loss": 2.5526,
      "step": 54186
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.2282737493515015,
      "learning_rate": 4.030493721123353e-06,
      "loss": 2.3888,
      "step": 54187
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.3106967210769653,
      "learning_rate": 4.030163398849584e-06,
      "loss": 2.3199,
      "step": 54188
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.2318962812423706,
      "learning_rate": 4.029833086696312e-06,
      "loss": 2.3875,
      "step": 54189
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.9543739557266235,
      "learning_rate": 4.029502784664096e-06,
      "loss": 2.266,
      "step": 54190
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1201297044754028,
      "learning_rate": 4.029172492753493e-06,
      "loss": 2.484,
      "step": 54191
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0647468566894531,
      "learning_rate": 4.02884221096506e-06,
      "loss": 2.3093,
      "step": 54192
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1530189514160156,
      "learning_rate": 4.028511939299367e-06,
      "loss": 2.4898,
      "step": 54193
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.2917393445968628,
      "learning_rate": 4.028181677756963e-06,
      "loss": 2.349,
      "step": 54194
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0357128381729126,
      "learning_rate": 4.027851426338416e-06,
      "loss": 2.3573,
      "step": 54195
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0772606134414673,
      "learning_rate": 4.0275211850442796e-06,
      "loss": 2.3766,
      "step": 54196
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1467915773391724,
      "learning_rate": 4.027190953875122e-06,
      "loss": 2.2764,
      "step": 54197
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0871974229812622,
      "learning_rate": 4.0268607328314965e-06,
      "loss": 2.4324,
      "step": 54198
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.085800051689148,
      "learning_rate": 4.026530521913961e-06,
      "loss": 2.2098,
      "step": 54199
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0649378299713135,
      "learning_rate": 4.026200321123083e-06,
      "loss": 2.5433,
      "step": 54200
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.157329797744751,
      "learning_rate": 4.025870130459413e-06,
      "loss": 1.9999,
      "step": 54201
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0519646406173706,
      "learning_rate": 4.025539949923521e-06,
      "loss": 2.4124,
      "step": 54202
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.2981799840927124,
      "learning_rate": 4.025209779515956e-06,
      "loss": 2.0647,
      "step": 54203
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1593257188796997,
      "learning_rate": 4.0248796192372904e-06,
      "loss": 2.2929,
      "step": 54204
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0941097736358643,
      "learning_rate": 4.02454946908807e-06,
      "loss": 2.292,
      "step": 54205
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0010685920715332,
      "learning_rate": 4.024219329068866e-06,
      "loss": 2.3028,
      "step": 54206
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0977381467819214,
      "learning_rate": 4.0238891991802275e-06,
      "loss": 2.2984,
      "step": 54207
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0235788822174072,
      "learning_rate": 4.0235590794227256e-06,
      "loss": 2.1403,
      "step": 54208
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0829012393951416,
      "learning_rate": 4.023228969796908e-06,
      "loss": 2.3938,
      "step": 54209
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1625672578811646,
      "learning_rate": 4.022898870303347e-06,
      "loss": 2.376,
      "step": 54210
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0168120861053467,
      "learning_rate": 4.022568780942591e-06,
      "loss": 2.4612,
      "step": 54211
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.9522780179977417,
      "learning_rate": 4.022238701715207e-06,
      "loss": 2.3817,
      "step": 54212
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.133810043334961,
      "learning_rate": 4.02190863262175e-06,
      "loss": 2.3202,
      "step": 54213
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.99387526512146,
      "learning_rate": 4.0215785736627845e-06,
      "loss": 2.1946,
      "step": 54214
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0332368612289429,
      "learning_rate": 4.021248524838863e-06,
      "loss": 2.3615,
      "step": 54215
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.13552725315094,
      "learning_rate": 4.020918486150554e-06,
      "loss": 2.4094,
      "step": 54216
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0086727142333984,
      "learning_rate": 4.02058845759841e-06,
      "loss": 2.0977,
      "step": 54217
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0319780111312866,
      "learning_rate": 4.020258439182994e-06,
      "loss": 2.3164,
      "step": 54218
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.2346482276916504,
      "learning_rate": 4.019928430904859e-06,
      "loss": 2.2812,
      "step": 54219
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0902574062347412,
      "learning_rate": 4.019598432764574e-06,
      "loss": 2.381,
      "step": 54220
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0688157081604004,
      "learning_rate": 4.019268444762691e-06,
      "loss": 2.5369,
      "step": 54221
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.194203495979309,
      "learning_rate": 4.018938466899775e-06,
      "loss": 2.4073,
      "step": 54222
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0407294034957886,
      "learning_rate": 4.01860849917638e-06,
      "loss": 2.2315,
      "step": 54223
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.151584267616272,
      "learning_rate": 4.0182785415930706e-06,
      "loss": 2.4026,
      "step": 54224
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1793800592422485,
      "learning_rate": 4.017948594150403e-06,
      "loss": 2.6436,
      "step": 54225
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.2192682027816772,
      "learning_rate": 4.0176186568489364e-06,
      "loss": 2.1151,
      "step": 54226
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.091522216796875,
      "learning_rate": 4.017288729689233e-06,
      "loss": 2.0725,
      "step": 54227
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0421198606491089,
      "learning_rate": 4.016958812671846e-06,
      "loss": 2.0759,
      "step": 54228
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0328055620193481,
      "learning_rate": 4.0166289057973425e-06,
      "loss": 2.3175,
      "step": 54229
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.2051643133163452,
      "learning_rate": 4.016299009066279e-06,
      "loss": 2.1773,
      "step": 54230
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1702340841293335,
      "learning_rate": 4.015969122479213e-06,
      "loss": 2.3438,
      "step": 54231
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0346349477767944,
      "learning_rate": 4.015639246036702e-06,
      "loss": 2.357,
      "step": 54232
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1609728336334229,
      "learning_rate": 4.01530937973931e-06,
      "loss": 2.3265,
      "step": 54233
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.2209144830703735,
      "learning_rate": 4.014979523587591e-06,
      "loss": 2.4461,
      "step": 54234
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.173896312713623,
      "learning_rate": 4.014649677582111e-06,
      "loss": 2.413,
      "step": 54235
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0434350967407227,
      "learning_rate": 4.014319841723422e-06,
      "loss": 2.2474,
      "step": 54236
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.105455994606018,
      "learning_rate": 4.013990016012091e-06,
      "loss": 2.391,
      "step": 54237
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0609345436096191,
      "learning_rate": 4.013660200448667e-06,
      "loss": 2.329,
      "step": 54238
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0731219053268433,
      "learning_rate": 4.013330395033719e-06,
      "loss": 2.3757,
      "step": 54239
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0864723920822144,
      "learning_rate": 4.013000599767799e-06,
      "loss": 2.5156,
      "step": 54240
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0094560384750366,
      "learning_rate": 4.0126708146514725e-06,
      "loss": 2.1965,
      "step": 54241
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.988747239112854,
      "learning_rate": 4.012341039685291e-06,
      "loss": 2.2171,
      "step": 54242
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0988402366638184,
      "learning_rate": 4.012011274869826e-06,
      "loss": 2.3265,
      "step": 54243
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0815848112106323,
      "learning_rate": 4.011681520205621e-06,
      "loss": 2.394,
      "step": 54244
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1653127670288086,
      "learning_rate": 4.011351775693244e-06,
      "loss": 2.3083,
      "step": 54245
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0558671951293945,
      "learning_rate": 4.011022041333249e-06,
      "loss": 2.3802,
      "step": 54246
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1386032104492188,
      "learning_rate": 4.010692317126203e-06,
      "loss": 2.29,
      "step": 54247
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0445164442062378,
      "learning_rate": 4.0103626030726565e-06,
      "loss": 2.2711,
      "step": 54248
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.181356430053711,
      "learning_rate": 4.010032899173175e-06,
      "loss": 2.3558,
      "step": 54249
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0708922147750854,
      "learning_rate": 4.009703205428311e-06,
      "loss": 2.1461,
      "step": 54250
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0475910902023315,
      "learning_rate": 4.009373521838631e-06,
      "loss": 2.0718,
      "step": 54251
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1991121768951416,
      "learning_rate": 4.00904384840469e-06,
      "loss": 2.3695,
      "step": 54252
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.9994386434555054,
      "learning_rate": 4.008714185127044e-06,
      "loss": 2.2503,
      "step": 54253
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1029906272888184,
      "learning_rate": 4.008384532006257e-06,
      "loss": 2.2313,
      "step": 54254
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0895917415618896,
      "learning_rate": 4.008054889042882e-06,
      "loss": 2.2943,
      "step": 54255
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.023513913154602,
      "learning_rate": 4.007725256237486e-06,
      "loss": 2.1207,
      "step": 54256
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1779049634933472,
      "learning_rate": 4.007395633590621e-06,
      "loss": 2.2394,
      "step": 54257
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1711621284484863,
      "learning_rate": 4.007066021102846e-06,
      "loss": 2.4339,
      "step": 54258
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.217566728591919,
      "learning_rate": 4.006736418774726e-06,
      "loss": 2.3626,
      "step": 54259
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.2132295370101929,
      "learning_rate": 4.006406826606815e-06,
      "loss": 2.3062,
      "step": 54260
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0982637405395508,
      "learning_rate": 4.006077244599668e-06,
      "loss": 2.381,
      "step": 54261
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0719820261001587,
      "learning_rate": 4.0057476727538514e-06,
      "loss": 2.2631,
      "step": 54262
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1795438528060913,
      "learning_rate": 4.0054181110699174e-06,
      "loss": 2.2855,
      "step": 54263
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.050957441329956,
      "learning_rate": 4.0050885595484315e-06,
      "loss": 2.0853,
      "step": 54264
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.2060784101486206,
      "learning_rate": 4.004759018189945e-06,
      "loss": 2.4125,
      "step": 54265
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.9795947670936584,
      "learning_rate": 4.004429486995024e-06,
      "loss": 2.2559,
      "step": 54266
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0901681184768677,
      "learning_rate": 4.00409996596422e-06,
      "loss": 2.2676,
      "step": 54267
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1057645082473755,
      "learning_rate": 4.003770455098098e-06,
      "loss": 2.2693,
      "step": 54268
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0260863304138184,
      "learning_rate": 4.00344095439721e-06,
      "loss": 2.1341,
      "step": 54269
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.074892520904541,
      "learning_rate": 4.003111463862126e-06,
      "loss": 2.1076,
      "step": 54270
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0478215217590332,
      "learning_rate": 4.002781983493389e-06,
      "loss": 2.524,
      "step": 54271
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1382163763046265,
      "learning_rate": 4.002452513291569e-06,
      "loss": 2.2318,
      "step": 54272
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.123672604560852,
      "learning_rate": 4.0021230532572175e-06,
      "loss": 2.3101,
      "step": 54273
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.146685242652893,
      "learning_rate": 4.001793603390901e-06,
      "loss": 2.2942,
      "step": 54274
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1230230331420898,
      "learning_rate": 4.001464163693168e-06,
      "loss": 2.3102,
      "step": 54275
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0865968465805054,
      "learning_rate": 4.0011347341645875e-06,
      "loss": 2.2264,
      "step": 54276
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0430548191070557,
      "learning_rate": 4.000805314805709e-06,
      "loss": 2.3789,
      "step": 54277
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1806995868682861,
      "learning_rate": 4.0004759056170985e-06,
      "loss": 2.2352,
      "step": 54278
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.146816372871399,
      "learning_rate": 4.000146506599306e-06,
      "loss": 2.4421,
      "step": 54279
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0890522003173828,
      "learning_rate": 3.9998171177529e-06,
      "loss": 2.4495,
      "step": 54280
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.034906029701233,
      "learning_rate": 3.999487739078433e-06,
      "loss": 2.3708,
      "step": 54281
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1294138431549072,
      "learning_rate": 3.99915837057646e-06,
      "loss": 2.3298,
      "step": 54282
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.128667950630188,
      "learning_rate": 3.9988290122475474e-06,
      "loss": 2.1703,
      "step": 54283
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1416151523590088,
      "learning_rate": 3.998499664092249e-06,
      "loss": 2.4024,
      "step": 54284
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1701886653900146,
      "learning_rate": 3.998170326111121e-06,
      "loss": 2.2905,
      "step": 54285
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.141905426979065,
      "learning_rate": 3.997840998304726e-06,
      "loss": 2.2881,
      "step": 54286
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.225677251815796,
      "learning_rate": 3.997511680673623e-06,
      "loss": 2.3382,
      "step": 54287
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.139204740524292,
      "learning_rate": 3.997182373218363e-06,
      "loss": 2.3858,
      "step": 54288
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1663480997085571,
      "learning_rate": 3.996853075939513e-06,
      "loss": 2.2663,
      "step": 54289
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.13605797290802,
      "learning_rate": 3.996523788837624e-06,
      "loss": 2.2423,
      "step": 54290
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.126234531402588,
      "learning_rate": 3.996194511913262e-06,
      "loss": 2.4657,
      "step": 54291
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.9963400363922119,
      "learning_rate": 3.995865245166977e-06,
      "loss": 2.2256,
      "step": 54292
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.9558942914009094,
      "learning_rate": 3.995535988599334e-06,
      "loss": 2.3232,
      "step": 54293
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.042704701423645,
      "learning_rate": 3.995206742210885e-06,
      "loss": 2.2335,
      "step": 54294
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0444953441619873,
      "learning_rate": 3.994877506002195e-06,
      "loss": 2.4133,
      "step": 54295
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1058430671691895,
      "learning_rate": 3.9945482799738185e-06,
      "loss": 2.2617,
      "step": 54296
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.2016643285751343,
      "learning_rate": 3.994219064126314e-06,
      "loss": 2.381,
      "step": 54297
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0316002368927002,
      "learning_rate": 3.993889858460236e-06,
      "loss": 2.2999,
      "step": 54298
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0254255533218384,
      "learning_rate": 3.9935606629761496e-06,
      "loss": 2.6058,
      "step": 54299
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0274081230163574,
      "learning_rate": 3.993231477674605e-06,
      "loss": 2.3343,
      "step": 54300
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0945518016815186,
      "learning_rate": 3.992902302556167e-06,
      "loss": 2.2801,
      "step": 54301
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0637847185134888,
      "learning_rate": 3.992573137621389e-06,
      "loss": 2.4147,
      "step": 54302
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1433606147766113,
      "learning_rate": 3.992243982870835e-06,
      "loss": 2.267,
      "step": 54303
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1143710613250732,
      "learning_rate": 3.991914838305055e-06,
      "loss": 2.2484,
      "step": 54304
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1006748676300049,
      "learning_rate": 3.991585703924616e-06,
      "loss": 2.5082,
      "step": 54305
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0927485227584839,
      "learning_rate": 3.991256579730065e-06,
      "loss": 2.3816,
      "step": 54306
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1732473373413086,
      "learning_rate": 3.990927465721971e-06,
      "loss": 2.2634,
      "step": 54307
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1005446910858154,
      "learning_rate": 3.990598361900887e-06,
      "loss": 2.3424,
      "step": 54308
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1594575643539429,
      "learning_rate": 3.990269268267371e-06,
      "loss": 2.2836,
      "step": 54309
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1776405572891235,
      "learning_rate": 3.989940184821977e-06,
      "loss": 2.2495,
      "step": 54310
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1325763463974,
      "learning_rate": 3.98961111156527e-06,
      "loss": 2.5018,
      "step": 54311
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.042112112045288,
      "learning_rate": 3.9892820484978014e-06,
      "loss": 2.2959,
      "step": 54312
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1325972080230713,
      "learning_rate": 3.988952995620136e-06,
      "loss": 2.3234,
      "step": 54313
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0404932498931885,
      "learning_rate": 3.988623952932824e-06,
      "loss": 2.4129,
      "step": 54314
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0561957359313965,
      "learning_rate": 3.988294920436431e-06,
      "loss": 2.2568,
      "step": 54315
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.2411752939224243,
      "learning_rate": 3.98796589813151e-06,
      "loss": 2.4969,
      "step": 54316
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.299290657043457,
      "learning_rate": 3.987636886018617e-06,
      "loss": 2.3753,
      "step": 54317
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0028494596481323,
      "learning_rate": 3.987307884098314e-06,
      "loss": 2.3715,
      "step": 54318
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0199798345565796,
      "learning_rate": 3.9869788923711565e-06,
      "loss": 2.0648,
      "step": 54319
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.213322401046753,
      "learning_rate": 3.986649910837705e-06,
      "loss": 2.4342,
      "step": 54320
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0035566091537476,
      "learning_rate": 3.986320939498511e-06,
      "loss": 2.2099,
      "step": 54321
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.9636157751083374,
      "learning_rate": 3.9859919783541444e-06,
      "loss": 2.3085,
      "step": 54322
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.9694779515266418,
      "learning_rate": 3.9856630274051465e-06,
      "loss": 2.1758,
      "step": 54323
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0008591413497925,
      "learning_rate": 3.985334086652088e-06,
      "loss": 2.2375,
      "step": 54324
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1460490226745605,
      "learning_rate": 3.985005156095519e-06,
      "loss": 2.2143,
      "step": 54325
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0575443506240845,
      "learning_rate": 3.984676235736002e-06,
      "loss": 2.3708,
      "step": 54326
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.3603180646896362,
      "learning_rate": 3.984347325574088e-06,
      "loss": 2.1997,
      "step": 54327
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1406488418579102,
      "learning_rate": 3.9840184256103445e-06,
      "loss": 2.4803,
      "step": 54328
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1754077672958374,
      "learning_rate": 3.98368953584532e-06,
      "loss": 2.3543,
      "step": 54329
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0396480560302734,
      "learning_rate": 3.983360656279579e-06,
      "loss": 2.362,
      "step": 54330
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.9786800742149353,
      "learning_rate": 3.983031786913671e-06,
      "loss": 2.3545,
      "step": 54331
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1096547842025757,
      "learning_rate": 3.982702927748163e-06,
      "loss": 2.3354,
      "step": 54332
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0264194011688232,
      "learning_rate": 3.982374078783605e-06,
      "loss": 2.3691,
      "step": 54333
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0731244087219238,
      "learning_rate": 3.98204524002056e-06,
      "loss": 2.209,
      "step": 54334
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0392543077468872,
      "learning_rate": 3.98171641145958e-06,
      "loss": 2.5238,
      "step": 54335
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.9838812947273254,
      "learning_rate": 3.9813875931012305e-06,
      "loss": 2.1528,
      "step": 54336
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1156084537506104,
      "learning_rate": 3.981058784946058e-06,
      "loss": 2.3514,
      "step": 54337
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.160477876663208,
      "learning_rate": 3.980729986994629e-06,
      "loss": 2.31,
      "step": 54338
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.9964483976364136,
      "learning_rate": 3.9804011992474935e-06,
      "loss": 2.2445,
      "step": 54339
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0967358350753784,
      "learning_rate": 3.980072421705217e-06,
      "loss": 2.4308,
      "step": 54340
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.051161527633667,
      "learning_rate": 3.979743654368349e-06,
      "loss": 2.3255,
      "step": 54341
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.4286640882492065,
      "learning_rate": 3.9794148972374535e-06,
      "loss": 2.4107,
      "step": 54342
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.163688063621521,
      "learning_rate": 3.979086150313085e-06,
      "loss": 2.271,
      "step": 54343
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0572872161865234,
      "learning_rate": 3.978757413595798e-06,
      "loss": 2.3475,
      "step": 54344
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0831340551376343,
      "learning_rate": 3.978428687086155e-06,
      "loss": 2.3325,
      "step": 54345
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.2822109460830688,
      "learning_rate": 3.978099970784709e-06,
      "loss": 2.4591,
      "step": 54346
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.023667335510254,
      "learning_rate": 3.977771264692021e-06,
      "loss": 2.5098,
      "step": 54347
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1606286764144897,
      "learning_rate": 3.977442568808643e-06,
      "loss": 2.5045,
      "step": 54348
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0232677459716797,
      "learning_rate": 3.977113883135143e-06,
      "loss": 2.1228,
      "step": 54349
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1046500205993652,
      "learning_rate": 3.976785207672064e-06,
      "loss": 2.1318,
      "step": 54350
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.183823585510254,
      "learning_rate": 3.976456542419973e-06,
      "loss": 2.5363,
      "step": 54351
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0505919456481934,
      "learning_rate": 3.976127887379421e-06,
      "loss": 2.2047,
      "step": 54352
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1753380298614502,
      "learning_rate": 3.975799242550972e-06,
      "loss": 2.3556,
      "step": 54353
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0643032789230347,
      "learning_rate": 3.9754706079351744e-06,
      "loss": 2.2853,
      "step": 54354
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1233562231063843,
      "learning_rate": 3.9751419835325955e-06,
      "loss": 2.2449,
      "step": 54355
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.207156777381897,
      "learning_rate": 3.974813369343783e-06,
      "loss": 2.1483,
      "step": 54356
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.4374761581420898,
      "learning_rate": 3.974484765369302e-06,
      "loss": 2.1387,
      "step": 54357
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1846275329589844,
      "learning_rate": 3.974156171609702e-06,
      "loss": 2.3861,
      "step": 54358
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0899591445922852,
      "learning_rate": 3.973827588065548e-06,
      "loss": 2.2369,
      "step": 54359
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0877575874328613,
      "learning_rate": 3.97349901473739e-06,
      "loss": 2.3075,
      "step": 54360
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0611032247543335,
      "learning_rate": 3.97317045162579e-06,
      "loss": 2.0648,
      "step": 54361
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0798791646957397,
      "learning_rate": 3.972841898731303e-06,
      "loss": 2.4767,
      "step": 54362
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0947012901306152,
      "learning_rate": 3.972513356054487e-06,
      "loss": 2.4147,
      "step": 54363
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0300847291946411,
      "learning_rate": 3.972184823595894e-06,
      "loss": 2.3155,
      "step": 54364
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0889230966567993,
      "learning_rate": 3.971856301356087e-06,
      "loss": 2.2093,
      "step": 54365
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0481497049331665,
      "learning_rate": 3.97152778933562e-06,
      "loss": 2.2934,
      "step": 54366
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1607290506362915,
      "learning_rate": 3.971199287535053e-06,
      "loss": 2.2953,
      "step": 54367
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1461259126663208,
      "learning_rate": 3.970870795954935e-06,
      "loss": 2.3568,
      "step": 54368
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1953308582305908,
      "learning_rate": 3.970542314595835e-06,
      "loss": 2.2543,
      "step": 54369
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.947519063949585,
      "learning_rate": 3.970213843458302e-06,
      "loss": 2.2796,
      "step": 54370
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1205123662948608,
      "learning_rate": 3.969885382542891e-06,
      "loss": 2.2201,
      "step": 54371
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0062617063522339,
      "learning_rate": 3.969556931850165e-06,
      "loss": 2.3334,
      "step": 54372
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.207969307899475,
      "learning_rate": 3.9692284913806746e-06,
      "loss": 2.2628,
      "step": 54373
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0374714136123657,
      "learning_rate": 3.9689000611349835e-06,
      "loss": 2.3482,
      "step": 54374
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1842678785324097,
      "learning_rate": 3.968571641113644e-06,
      "loss": 2.4696,
      "step": 54375
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1902518272399902,
      "learning_rate": 3.9682432313172115e-06,
      "loss": 2.4221,
      "step": 54376
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.121520757675171,
      "learning_rate": 3.967914831746248e-06,
      "loss": 2.3086,
      "step": 54377
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0260177850723267,
      "learning_rate": 3.967586442401308e-06,
      "loss": 2.2811,
      "step": 54378
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0978375673294067,
      "learning_rate": 3.967258063282943e-06,
      "loss": 2.2891,
      "step": 54379
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0514336824417114,
      "learning_rate": 3.966929694391717e-06,
      "loss": 2.2817,
      "step": 54380
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.139814853668213,
      "learning_rate": 3.96660133572818e-06,
      "loss": 2.4426,
      "step": 54381
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0598793029785156,
      "learning_rate": 3.966272987292897e-06,
      "loss": 2.437,
      "step": 54382
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.2452222108840942,
      "learning_rate": 3.965944649086416e-06,
      "loss": 2.3477,
      "step": 54383
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.9890958666801453,
      "learning_rate": 3.965616321109301e-06,
      "loss": 2.3488,
      "step": 54384
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0418959856033325,
      "learning_rate": 3.965288003362102e-06,
      "loss": 2.2768,
      "step": 54385
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.039308786392212,
      "learning_rate": 3.964959695845383e-06,
      "loss": 2.3978,
      "step": 54386
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1586666107177734,
      "learning_rate": 3.964631398559692e-06,
      "loss": 2.584,
      "step": 54387
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0955289602279663,
      "learning_rate": 3.964303111505598e-06,
      "loss": 2.4601,
      "step": 54388
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.143955111503601,
      "learning_rate": 3.963974834683641e-06,
      "loss": 2.3272,
      "step": 54389
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1157652139663696,
      "learning_rate": 3.96364656809439e-06,
      "loss": 2.3935,
      "step": 54390
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.2496814727783203,
      "learning_rate": 3.963318311738394e-06,
      "loss": 2.2677,
      "step": 54391
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0501890182495117,
      "learning_rate": 3.962990065616217e-06,
      "loss": 2.2683,
      "step": 54392
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1434425115585327,
      "learning_rate": 3.962661829728408e-06,
      "loss": 2.1007,
      "step": 54393
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.3276365995407104,
      "learning_rate": 3.9623336040755304e-06,
      "loss": 2.1525,
      "step": 54394
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0587646961212158,
      "learning_rate": 3.962005388658134e-06,
      "loss": 2.2492,
      "step": 54395
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0706212520599365,
      "learning_rate": 3.96167718347678e-06,
      "loss": 2.255,
      "step": 54396
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.2425172328948975,
      "learning_rate": 3.961348988532021e-06,
      "loss": 2.1443,
      "step": 54397
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1204050779342651,
      "learning_rate": 3.961020803824419e-06,
      "loss": 2.2876,
      "step": 54398
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.122757077217102,
      "learning_rate": 3.960692629354527e-06,
      "loss": 2.2888,
      "step": 54399
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0635277032852173,
      "learning_rate": 3.960364465122898e-06,
      "loss": 2.3591,
      "step": 54400
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1482371091842651,
      "learning_rate": 3.960036311130094e-06,
      "loss": 2.2867,
      "step": 54401
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.3106443881988525,
      "learning_rate": 3.9597081673766705e-06,
      "loss": 2.3458,
      "step": 54402
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.2620835304260254,
      "learning_rate": 3.959380033863177e-06,
      "loss": 2.2294,
      "step": 54403
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.9233466982841492,
      "learning_rate": 3.959051910590179e-06,
      "loss": 2.4218,
      "step": 54404
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.173396348953247,
      "learning_rate": 3.958723797558229e-06,
      "loss": 2.281,
      "step": 54405
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0420739650726318,
      "learning_rate": 3.958395694767879e-06,
      "loss": 2.4461,
      "step": 54406
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0947498083114624,
      "learning_rate": 3.958067602219694e-06,
      "loss": 2.1766,
      "step": 54407
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.066528558731079,
      "learning_rate": 3.95773951991422e-06,
      "loss": 2.2559,
      "step": 54408
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.122127890586853,
      "learning_rate": 3.957411447852023e-06,
      "loss": 2.3452,
      "step": 54409
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0495887994766235,
      "learning_rate": 3.957083386033651e-06,
      "loss": 2.3339,
      "step": 54410
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.016764760017395,
      "learning_rate": 3.956755334459669e-06,
      "loss": 2.3929,
      "step": 54411
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.9837324023246765,
      "learning_rate": 3.956427293130622e-06,
      "loss": 2.4143,
      "step": 54412
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.9960616230964661,
      "learning_rate": 3.9560992620470774e-06,
      "loss": 2.167,
      "step": 54413
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0532242059707642,
      "learning_rate": 3.955771241209582e-06,
      "loss": 2.2366,
      "step": 54414
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0866559743881226,
      "learning_rate": 3.955443230618703e-06,
      "loss": 2.3107,
      "step": 54415
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0811797380447388,
      "learning_rate": 3.955115230274982e-06,
      "loss": 2.3161,
      "step": 54416
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1360267400741577,
      "learning_rate": 3.954787240178987e-06,
      "loss": 2.324,
      "step": 54417
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0767561197280884,
      "learning_rate": 3.9544592603312644e-06,
      "loss": 2.3636,
      "step": 54418
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1246916055679321,
      "learning_rate": 3.954131290732379e-06,
      "loss": 2.2736,
      "step": 54419
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.945223331451416,
      "learning_rate": 3.9538033313828806e-06,
      "loss": 2.2015,
      "step": 54420
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.177634358406067,
      "learning_rate": 3.95347538228333e-06,
      "loss": 2.3648,
      "step": 54421
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0380754470825195,
      "learning_rate": 3.953147443434279e-06,
      "loss": 2.3192,
      "step": 54422
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0284227132797241,
      "learning_rate": 3.952819514836288e-06,
      "loss": 2.135,
      "step": 54423
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0189573764801025,
      "learning_rate": 3.952491596489907e-06,
      "loss": 2.1689,
      "step": 54424
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.116593837738037,
      "learning_rate": 3.952163688395698e-06,
      "loss": 2.1234,
      "step": 54425
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.019371509552002,
      "learning_rate": 3.951835790554215e-06,
      "loss": 2.3165,
      "step": 54426
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1044644117355347,
      "learning_rate": 3.95150790296601e-06,
      "loss": 2.2613,
      "step": 54427
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.157180666923523,
      "learning_rate": 3.951180025631644e-06,
      "loss": 2.4479,
      "step": 54428
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0151855945587158,
      "learning_rate": 3.950852158551671e-06,
      "loss": 2.1805,
      "step": 54429
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0939844846725464,
      "learning_rate": 3.950524301726644e-06,
      "loss": 2.5017,
      "step": 54430
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.124140977859497,
      "learning_rate": 3.950196455157124e-06,
      "loss": 2.1255,
      "step": 54431
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.3927487134933472,
      "learning_rate": 3.94986861884366e-06,
      "loss": 2.2416,
      "step": 54432
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.9625627994537354,
      "learning_rate": 3.949540792786817e-06,
      "loss": 2.3302,
      "step": 54433
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0021953582763672,
      "learning_rate": 3.949212976987145e-06,
      "loss": 2.2771,
      "step": 54434
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1836998462677002,
      "learning_rate": 3.9488851714451955e-06,
      "loss": 2.466,
      "step": 54435
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1106138229370117,
      "learning_rate": 3.9485573761615336e-06,
      "loss": 2.1864,
      "step": 54436
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.101712703704834,
      "learning_rate": 3.948229591136708e-06,
      "loss": 2.1934,
      "step": 54437
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1034225225448608,
      "learning_rate": 3.947901816371279e-06,
      "loss": 2.3979,
      "step": 54438
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1583960056304932,
      "learning_rate": 3.9475740518657965e-06,
      "loss": 2.4198,
      "step": 54439
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1925976276397705,
      "learning_rate": 3.947246297620823e-06,
      "loss": 2.2678,
      "step": 54440
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.041431188583374,
      "learning_rate": 3.946918553636912e-06,
      "loss": 2.4504,
      "step": 54441
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1067438125610352,
      "learning_rate": 3.946590819914619e-06,
      "loss": 2.3774,
      "step": 54442
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0805280208587646,
      "learning_rate": 3.946263096454493e-06,
      "loss": 2.4605,
      "step": 54443
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.059220790863037,
      "learning_rate": 3.9459353832571015e-06,
      "loss": 2.4786,
      "step": 54444
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.2426024675369263,
      "learning_rate": 3.945607680322988e-06,
      "loss": 2.4412,
      "step": 54445
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.202462911605835,
      "learning_rate": 3.945279987652718e-06,
      "loss": 2.2868,
      "step": 54446
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.098184585571289,
      "learning_rate": 3.9449523052468386e-06,
      "loss": 2.2215,
      "step": 54447
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0291433334350586,
      "learning_rate": 3.944624633105913e-06,
      "loss": 2.3726,
      "step": 54448
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1671373844146729,
      "learning_rate": 3.944296971230491e-06,
      "loss": 2.2981,
      "step": 54449
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.9987071752548218,
      "learning_rate": 3.9439693196211335e-06,
      "loss": 2.3578,
      "step": 54450
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.9941328763961792,
      "learning_rate": 3.94364167827839e-06,
      "loss": 2.5469,
      "step": 54451
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.9711572527885437,
      "learning_rate": 3.943314047202821e-06,
      "loss": 2.098,
      "step": 54452
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.2995612621307373,
      "learning_rate": 3.9429864263949756e-06,
      "loss": 2.1627,
      "step": 54453
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0570577383041382,
      "learning_rate": 3.942658815855422e-06,
      "loss": 2.5089,
      "step": 54454
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1292412281036377,
      "learning_rate": 3.942331215584698e-06,
      "loss": 2.2186,
      "step": 54455
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1080535650253296,
      "learning_rate": 3.942003625583372e-06,
      "loss": 2.3732,
      "step": 54456
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0034223794937134,
      "learning_rate": 3.941676045851993e-06,
      "loss": 2.2535,
      "step": 54457
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0366575717926025,
      "learning_rate": 3.941348476391122e-06,
      "loss": 2.2121,
      "step": 54458
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.3139395713806152,
      "learning_rate": 3.941020917201306e-06,
      "loss": 2.1854,
      "step": 54459
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.2912988662719727,
      "learning_rate": 3.940693368283108e-06,
      "loss": 2.2516,
      "step": 54460
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.280316710472107,
      "learning_rate": 3.9403658296370815e-06,
      "loss": 2.234,
      "step": 54461
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0668774843215942,
      "learning_rate": 3.940038301263777e-06,
      "loss": 2.4128,
      "step": 54462
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1187764406204224,
      "learning_rate": 3.939710783163758e-06,
      "loss": 2.3665,
      "step": 54463
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1210570335388184,
      "learning_rate": 3.93938327533757e-06,
      "loss": 2.3112,
      "step": 54464
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.247681975364685,
      "learning_rate": 3.939055777785778e-06,
      "loss": 2.4087,
      "step": 54465
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0314393043518066,
      "learning_rate": 3.93872829050893e-06,
      "loss": 2.4069,
      "step": 54466
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0806090831756592,
      "learning_rate": 3.938400813507589e-06,
      "loss": 2.382,
      "step": 54467
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.2281737327575684,
      "learning_rate": 3.938073346782299e-06,
      "loss": 2.2997,
      "step": 54468
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0955684185028076,
      "learning_rate": 3.937745890333623e-06,
      "loss": 2.5181,
      "step": 54469
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0779796838760376,
      "learning_rate": 3.937418444162112e-06,
      "loss": 2.3922,
      "step": 54470
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0137827396392822,
      "learning_rate": 3.937091008268327e-06,
      "loss": 2.2412,
      "step": 54471
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.9524229764938354,
      "learning_rate": 3.936763582652815e-06,
      "loss": 2.3053,
      "step": 54472
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0793498754501343,
      "learning_rate": 3.936436167316139e-06,
      "loss": 2.4066,
      "step": 54473
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0318958759307861,
      "learning_rate": 3.936108762258848e-06,
      "loss": 2.4188,
      "step": 54474
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.9850707650184631,
      "learning_rate": 3.935781367481503e-06,
      "loss": 2.3543,
      "step": 54475
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0118662118911743,
      "learning_rate": 3.93545398298465e-06,
      "loss": 2.1925,
      "step": 54476
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1637647151947021,
      "learning_rate": 3.935126608768856e-06,
      "loss": 2.3118,
      "step": 54477
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.15175199508667,
      "learning_rate": 3.934799244834665e-06,
      "loss": 2.5105,
      "step": 54478
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1012383699417114,
      "learning_rate": 3.93447189118264e-06,
      "loss": 2.3546,
      "step": 54479
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0645503997802734,
      "learning_rate": 3.934144547813329e-06,
      "loss": 2.3366,
      "step": 54480
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0289729833602905,
      "learning_rate": 3.933817214727297e-06,
      "loss": 2.2156,
      "step": 54481
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1390464305877686,
      "learning_rate": 3.933489891925085e-06,
      "loss": 2.373,
      "step": 54482
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0690206289291382,
      "learning_rate": 3.93316257940726e-06,
      "loss": 2.2902,
      "step": 54483
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1852296590805054,
      "learning_rate": 3.932835277174368e-06,
      "loss": 2.6152,
      "step": 54484
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.978272557258606,
      "learning_rate": 3.932507985226972e-06,
      "loss": 2.4522,
      "step": 54485
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0899525880813599,
      "learning_rate": 3.932180703565619e-06,
      "loss": 2.4213,
      "step": 54486
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.2072694301605225,
      "learning_rate": 3.931853432190872e-06,
      "loss": 2.2324,
      "step": 54487
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0759685039520264,
      "learning_rate": 3.93152617110328e-06,
      "loss": 2.4211,
      "step": 54488
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0342172384262085,
      "learning_rate": 3.931198920303397e-06,
      "loss": 2.264,
      "step": 54489
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.4219831228256226,
      "learning_rate": 3.930871679791784e-06,
      "loss": 2.3543,
      "step": 54490
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0712904930114746,
      "learning_rate": 3.930544449568987e-06,
      "loss": 2.3461,
      "step": 54491
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1405662298202515,
      "learning_rate": 3.93021722963557e-06,
      "loss": 2.2923,
      "step": 54492
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0360544919967651,
      "learning_rate": 3.929890019992079e-06,
      "loss": 2.1108,
      "step": 54493
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.326669454574585,
      "learning_rate": 3.929562820639077e-06,
      "loss": 2.3304,
      "step": 54494
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0961037874221802,
      "learning_rate": 3.9292356315771155e-06,
      "loss": 2.2084,
      "step": 54495
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.2480082511901855,
      "learning_rate": 3.928908452806747e-06,
      "loss": 2.1327,
      "step": 54496
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1561980247497559,
      "learning_rate": 3.9285812843285245e-06,
      "loss": 2.2514,
      "step": 54497
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1703402996063232,
      "learning_rate": 3.92825412614301e-06,
      "loss": 2.4334,
      "step": 54498
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1626520156860352,
      "learning_rate": 3.927926978250749e-06,
      "loss": 2.4092,
      "step": 54499
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1387048959732056,
      "learning_rate": 3.927599840652305e-06,
      "loss": 2.3625,
      "step": 54500
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.159234642982483,
      "learning_rate": 3.9272727133482255e-06,
      "loss": 2.3731,
      "step": 54501
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0575238466262817,
      "learning_rate": 3.926945596339071e-06,
      "loss": 2.3138,
      "step": 54502
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0754235982894897,
      "learning_rate": 3.92661848962539e-06,
      "loss": 2.6016,
      "step": 54503
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.2267496585845947,
      "learning_rate": 3.926291393207744e-06,
      "loss": 2.247,
      "step": 54504
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.9974063038825989,
      "learning_rate": 3.925964307086681e-06,
      "loss": 2.2176,
      "step": 54505
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1976244449615479,
      "learning_rate": 3.925637231262759e-06,
      "loss": 2.2996,
      "step": 54506
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1270732879638672,
      "learning_rate": 3.925310165736533e-06,
      "loss": 2.3418,
      "step": 54507
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.9989394545555115,
      "learning_rate": 3.9249831105085565e-06,
      "loss": 2.6463,
      "step": 54508
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.2283294200897217,
      "learning_rate": 3.92465606557938e-06,
      "loss": 2.418,
      "step": 54509
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.020090103149414,
      "learning_rate": 3.924329030949565e-06,
      "loss": 2.1906,
      "step": 54510
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.097097635269165,
      "learning_rate": 3.924002006619658e-06,
      "loss": 2.34,
      "step": 54511
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1527026891708374,
      "learning_rate": 3.923674992590223e-06,
      "loss": 2.4475,
      "step": 54512
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1031267642974854,
      "learning_rate": 3.9233479888618046e-06,
      "loss": 2.352,
      "step": 54513
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0645060539245605,
      "learning_rate": 3.923020995434965e-06,
      "loss": 2.4797,
      "step": 54514
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0044888257980347,
      "learning_rate": 3.922694012310253e-06,
      "loss": 2.2494,
      "step": 54515
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.013884425163269,
      "learning_rate": 3.9223670394882276e-06,
      "loss": 2.2591,
      "step": 54516
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1968902349472046,
      "learning_rate": 3.922040076969441e-06,
      "loss": 2.3549,
      "step": 54517
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.060520887374878,
      "learning_rate": 3.921713124754444e-06,
      "loss": 2.1323,
      "step": 54518
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1888351440429688,
      "learning_rate": 3.921386182843797e-06,
      "loss": 1.9799,
      "step": 54519
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1843196153640747,
      "learning_rate": 3.921059251238052e-06,
      "loss": 2.5549,
      "step": 54520
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.139568567276001,
      "learning_rate": 3.920732329937759e-06,
      "loss": 2.5675,
      "step": 54521
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0037860870361328,
      "learning_rate": 3.920405418943479e-06,
      "loss": 2.3239,
      "step": 54522
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.963854193687439,
      "learning_rate": 3.9200785182557635e-06,
      "loss": 2.1121,
      "step": 54523
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0762258768081665,
      "learning_rate": 3.919751627875161e-06,
      "loss": 2.2601,
      "step": 54524
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.221311330795288,
      "learning_rate": 3.919424747802236e-06,
      "loss": 2.1913,
      "step": 54525
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0758873224258423,
      "learning_rate": 3.919097878037534e-06,
      "loss": 2.2673,
      "step": 54526
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0416306257247925,
      "learning_rate": 3.918771018581616e-06,
      "loss": 2.267,
      "step": 54527
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1206365823745728,
      "learning_rate": 3.918444169435029e-06,
      "loss": 2.3493,
      "step": 54528
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0791693925857544,
      "learning_rate": 3.9181173305983345e-06,
      "loss": 2.4333,
      "step": 54529
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1274616718292236,
      "learning_rate": 3.91779050207208e-06,
      "loss": 2.5068,
      "step": 54530
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0301328897476196,
      "learning_rate": 3.9174636838568245e-06,
      "loss": 2.43,
      "step": 54531
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.055535078048706,
      "learning_rate": 3.917136875953118e-06,
      "loss": 2.2873,
      "step": 54532
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0349653959274292,
      "learning_rate": 3.916810078361524e-06,
      "loss": 2.5598,
      "step": 54533
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0991829633712769,
      "learning_rate": 3.91648329108258e-06,
      "loss": 2.3669,
      "step": 54534
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.3257983922958374,
      "learning_rate": 3.9161565141168544e-06,
      "loss": 2.4423,
      "step": 54535
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0093181133270264,
      "learning_rate": 3.915829747464891e-06,
      "loss": 2.3214,
      "step": 54536
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1794995069503784,
      "learning_rate": 3.9155029911272536e-06,
      "loss": 2.3751,
      "step": 54537
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0825214385986328,
      "learning_rate": 3.915176245104486e-06,
      "loss": 2.3415,
      "step": 54538
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0661242008209229,
      "learning_rate": 3.914849509397152e-06,
      "loss": 2.3316,
      "step": 54539
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.073469877243042,
      "learning_rate": 3.914522784005798e-06,
      "loss": 2.2166,
      "step": 54540
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0395170450210571,
      "learning_rate": 3.914196068930982e-06,
      "loss": 2.3039,
      "step": 54541
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.152904987335205,
      "learning_rate": 3.913869364173254e-06,
      "loss": 2.4956,
      "step": 54542
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.969113290309906,
      "learning_rate": 3.913542669733174e-06,
      "loss": 2.1521,
      "step": 54543
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.076791763305664,
      "learning_rate": 3.913215985611293e-06,
      "loss": 2.1981,
      "step": 54544
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0679173469543457,
      "learning_rate": 3.912889311808159e-06,
      "loss": 2.282,
      "step": 54545
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1224884986877441,
      "learning_rate": 3.912562648324336e-06,
      "loss": 2.1995,
      "step": 54546
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.9966646432876587,
      "learning_rate": 3.912235995160372e-06,
      "loss": 2.158,
      "step": 54547
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1152139902114868,
      "learning_rate": 3.911909352316817e-06,
      "loss": 2.204,
      "step": 54548
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1317167282104492,
      "learning_rate": 3.911582719794234e-06,
      "loss": 2.1937,
      "step": 54549
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1861495971679688,
      "learning_rate": 3.911256097593167e-06,
      "loss": 2.2975,
      "step": 54550
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1589950323104858,
      "learning_rate": 3.910929485714179e-06,
      "loss": 2.3215,
      "step": 54551
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.4050548076629639,
      "learning_rate": 3.910602884157819e-06,
      "loss": 2.1999,
      "step": 54552
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0944604873657227,
      "learning_rate": 3.9102762929246386e-06,
      "loss": 2.4894,
      "step": 54553
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.988018810749054,
      "learning_rate": 3.909949712015195e-06,
      "loss": 2.5196,
      "step": 54554
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0131793022155762,
      "learning_rate": 3.909623141430039e-06,
      "loss": 2.5622,
      "step": 54555
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0531221628189087,
      "learning_rate": 3.9092965811697295e-06,
      "loss": 2.3238,
      "step": 54556
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0536391735076904,
      "learning_rate": 3.908970031234812e-06,
      "loss": 2.4325,
      "step": 54557
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0626294612884521,
      "learning_rate": 3.9086434916258486e-06,
      "loss": 2.479,
      "step": 54558
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.032301664352417,
      "learning_rate": 3.908316962343386e-06,
      "loss": 2.4114,
      "step": 54559
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0776288509368896,
      "learning_rate": 3.907990443387987e-06,
      "loss": 2.4487,
      "step": 54560
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1331379413604736,
      "learning_rate": 3.907663934760191e-06,
      "loss": 2.2647,
      "step": 54561
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1201248168945312,
      "learning_rate": 3.907337436460563e-06,
      "loss": 2.4385,
      "step": 54562
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.054253339767456,
      "learning_rate": 3.90701094848965e-06,
      "loss": 2.1439,
      "step": 54563
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.057778000831604,
      "learning_rate": 3.906684470848012e-06,
      "loss": 2.5383,
      "step": 54564
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0597517490386963,
      "learning_rate": 3.9063580035361945e-06,
      "loss": 2.4024,
      "step": 54565
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0634104013442993,
      "learning_rate": 3.906031546554759e-06,
      "loss": 2.3354,
      "step": 54566
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0846890211105347,
      "learning_rate": 3.905705099904251e-06,
      "loss": 2.0029,
      "step": 54567
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1705948114395142,
      "learning_rate": 3.905378663585233e-06,
      "loss": 2.2556,
      "step": 54568
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1369564533233643,
      "learning_rate": 3.905052237598249e-06,
      "loss": 2.6081,
      "step": 54569
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1126985549926758,
      "learning_rate": 3.90472582194386e-06,
      "loss": 2.2121,
      "step": 54570
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0675753355026245,
      "learning_rate": 3.904399416622613e-06,
      "loss": 2.4895,
      "step": 54571
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.2331111431121826,
      "learning_rate": 3.904073021635069e-06,
      "loss": 2.4781,
      "step": 54572
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0809450149536133,
      "learning_rate": 3.903746636981775e-06,
      "loss": 2.2099,
      "step": 54573
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.156277060508728,
      "learning_rate": 3.9034202626632876e-06,
      "loss": 2.4373,
      "step": 54574
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0999239683151245,
      "learning_rate": 3.903093898680154e-06,
      "loss": 2.1294,
      "step": 54575
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0785880088806152,
      "learning_rate": 3.902767545032937e-06,
      "loss": 2.3156,
      "step": 54576
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0579679012298584,
      "learning_rate": 3.902441201722181e-06,
      "loss": 2.4327,
      "step": 54577
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.069303274154663,
      "learning_rate": 3.902114868748446e-06,
      "loss": 2.3754,
      "step": 54578
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1075465679168701,
      "learning_rate": 3.901788546112284e-06,
      "loss": 2.2865,
      "step": 54579
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0216093063354492,
      "learning_rate": 3.901462233814241e-06,
      "loss": 2.1081,
      "step": 54580
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1679805517196655,
      "learning_rate": 3.901135931854881e-06,
      "loss": 2.2719,
      "step": 54581
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1255900859832764,
      "learning_rate": 3.900809640234749e-06,
      "loss": 2.297,
      "step": 54582
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.061447024345398,
      "learning_rate": 3.900483358954403e-06,
      "loss": 2.2875,
      "step": 54583
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1111341714859009,
      "learning_rate": 3.9001570880143925e-06,
      "loss": 2.3308,
      "step": 54584
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1690478324890137,
      "learning_rate": 3.8998308274152745e-06,
      "loss": 2.285,
      "step": 54585
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0369184017181396,
      "learning_rate": 3.899504577157601e-06,
      "loss": 2.3724,
      "step": 54586
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.2012954950332642,
      "learning_rate": 3.899178337241923e-06,
      "loss": 2.3597,
      "step": 54587
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.6986175775527954,
      "learning_rate": 3.898852107668793e-06,
      "loss": 2.196,
      "step": 54588
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0823853015899658,
      "learning_rate": 3.898525888438768e-06,
      "loss": 2.3406,
      "step": 54589
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0672640800476074,
      "learning_rate": 3.898199679552395e-06,
      "loss": 2.2405,
      "step": 54590
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0652737617492676,
      "learning_rate": 3.897873481010235e-06,
      "loss": 2.4268,
      "step": 54591
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.111510992050171,
      "learning_rate": 3.897547292812832e-06,
      "loss": 2.5643,
      "step": 54592
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.9824612736701965,
      "learning_rate": 3.897221114960749e-06,
      "loss": 2.4849,
      "step": 54593
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0425623655319214,
      "learning_rate": 3.896894947454528e-06,
      "loss": 2.3415,
      "step": 54594
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1597657203674316,
      "learning_rate": 3.8965687902947325e-06,
      "loss": 2.5081,
      "step": 54595
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1097251176834106,
      "learning_rate": 3.896242643481908e-06,
      "loss": 2.5252,
      "step": 54596
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.17466139793396,
      "learning_rate": 3.895916507016611e-06,
      "loss": 2.3197,
      "step": 54597
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.2113677263259888,
      "learning_rate": 3.895590380899392e-06,
      "loss": 2.1319,
      "step": 54598
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.050108551979065,
      "learning_rate": 3.8952642651308125e-06,
      "loss": 2.4086,
      "step": 54599
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.9972080588340759,
      "learning_rate": 3.89493815971141e-06,
      "loss": 2.1967,
      "step": 54600
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.8353246450424194,
      "learning_rate": 3.894612064641748e-06,
      "loss": 2.2111,
      "step": 54601
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1074655055999756,
      "learning_rate": 3.894285979922375e-06,
      "loss": 2.2902,
      "step": 54602
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0637580156326294,
      "learning_rate": 3.8939599055538495e-06,
      "loss": 2.3556,
      "step": 54603
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.056972622871399,
      "learning_rate": 3.8936338415367145e-06,
      "loss": 2.2981,
      "step": 54604
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1224055290222168,
      "learning_rate": 3.893307787871534e-06,
      "loss": 2.3113,
      "step": 54605
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.6133968830108643,
      "learning_rate": 3.892981744558855e-06,
      "loss": 2.2886,
      "step": 54606
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0577939748764038,
      "learning_rate": 3.892655711599227e-06,
      "loss": 1.9495,
      "step": 54607
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.268140196800232,
      "learning_rate": 3.89232968899321e-06,
      "loss": 2.3378,
      "step": 54608
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.242403268814087,
      "learning_rate": 3.892003676741348e-06,
      "loss": 2.2174,
      "step": 54609
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.9967531561851501,
      "learning_rate": 3.8916776748442035e-06,
      "loss": 2.28,
      "step": 54610
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.9945631623268127,
      "learning_rate": 3.89135168330232e-06,
      "loss": 2.2586,
      "step": 54611
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1330621242523193,
      "learning_rate": 3.891025702116259e-06,
      "loss": 2.3028,
      "step": 54612
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0305757522583008,
      "learning_rate": 3.890699731286569e-06,
      "loss": 2.2542,
      "step": 54613
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0245554447174072,
      "learning_rate": 3.890373770813801e-06,
      "loss": 2.2695,
      "step": 54614
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1199737787246704,
      "learning_rate": 3.890047820698506e-06,
      "loss": 2.3273,
      "step": 54615
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1062678098678589,
      "learning_rate": 3.889721880941243e-06,
      "loss": 2.2748,
      "step": 54616
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0257599353790283,
      "learning_rate": 3.889395951542556e-06,
      "loss": 2.2161,
      "step": 54617
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.044562816619873,
      "learning_rate": 3.8890700325030074e-06,
      "loss": 2.4648,
      "step": 54618
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0469030141830444,
      "learning_rate": 3.888744123823141e-06,
      "loss": 2.2286,
      "step": 54619
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.094152569770813,
      "learning_rate": 3.8884182255035165e-06,
      "loss": 2.2249,
      "step": 54620
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0644131898880005,
      "learning_rate": 3.88809233754468e-06,
      "loss": 2.3397,
      "step": 54621
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.3746294975280762,
      "learning_rate": 3.88776645994719e-06,
      "loss": 2.4668,
      "step": 54622
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.9879816770553589,
      "learning_rate": 3.8874405927115925e-06,
      "loss": 2.3779,
      "step": 54623
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1288830041885376,
      "learning_rate": 3.887114735838447e-06,
      "loss": 2.4392,
      "step": 54624
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1294584274291992,
      "learning_rate": 3.886788889328298e-06,
      "loss": 2.5525,
      "step": 54625
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.2913841009140015,
      "learning_rate": 3.886463053181709e-06,
      "loss": 2.4469,
      "step": 54626
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.9732531309127808,
      "learning_rate": 3.886137227399221e-06,
      "loss": 2.2866,
      "step": 54627
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.146060585975647,
      "learning_rate": 3.885811411981392e-06,
      "loss": 2.5744,
      "step": 54628
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0821250677108765,
      "learning_rate": 3.885485606928771e-06,
      "loss": 2.3942,
      "step": 54629
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0646064281463623,
      "learning_rate": 3.885159812241914e-06,
      "loss": 2.3191,
      "step": 54630
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1693449020385742,
      "learning_rate": 3.884834027921371e-06,
      "loss": 2.1335,
      "step": 54631
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.2506647109985352,
      "learning_rate": 3.884508253967697e-06,
      "loss": 2.477,
      "step": 54632
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0745466947555542,
      "learning_rate": 3.88418249038144e-06,
      "loss": 2.3516,
      "step": 54633
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.129651427268982,
      "learning_rate": 3.8838567371631575e-06,
      "loss": 2.3365,
      "step": 54634
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.158024787902832,
      "learning_rate": 3.883530994313399e-06,
      "loss": 2.1277,
      "step": 54635
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.05343496799469,
      "learning_rate": 3.883205261832713e-06,
      "loss": 2.5501,
      "step": 54636
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1008634567260742,
      "learning_rate": 3.882879539721659e-06,
      "loss": 2.351,
      "step": 54637
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0326679944992065,
      "learning_rate": 3.882553827980783e-06,
      "loss": 2.4638,
      "step": 54638
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.9958159923553467,
      "learning_rate": 3.882228126610642e-06,
      "loss": 2.0604,
      "step": 54639
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.7031301259994507,
      "learning_rate": 3.881902435611786e-06,
      "loss": 2.4512,
      "step": 54640
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.2728971242904663,
      "learning_rate": 3.8815767549847685e-06,
      "loss": 2.3264,
      "step": 54641
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0169782638549805,
      "learning_rate": 3.881251084730135e-06,
      "loss": 2.4962,
      "step": 54642
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.9597529172897339,
      "learning_rate": 3.880925424848447e-06,
      "loss": 2.2117,
      "step": 54643
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1899052858352661,
      "learning_rate": 3.880599775340248e-06,
      "loss": 2.3947,
      "step": 54644
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0564582347869873,
      "learning_rate": 3.880274136206099e-06,
      "loss": 2.328,
      "step": 54645
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1822589635849,
      "learning_rate": 3.879948507446544e-06,
      "loss": 2.2949,
      "step": 54646
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1703226566314697,
      "learning_rate": 3.879622889062141e-06,
      "loss": 2.3422,
      "step": 54647
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.038962960243225,
      "learning_rate": 3.879297281053436e-06,
      "loss": 2.5371,
      "step": 54648
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1573526859283447,
      "learning_rate": 3.878971683420989e-06,
      "loss": 2.387,
      "step": 54649
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0663676261901855,
      "learning_rate": 3.8786460961653425e-06,
      "loss": 2.3327,
      "step": 54650
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1072323322296143,
      "learning_rate": 3.878320519287058e-06,
      "loss": 2.3905,
      "step": 54651
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.047060489654541,
      "learning_rate": 3.877994952786682e-06,
      "loss": 2.2334,
      "step": 54652
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0783376693725586,
      "learning_rate": 3.877669396664768e-06,
      "loss": 2.3707,
      "step": 54653
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.031437873840332,
      "learning_rate": 3.877343850921864e-06,
      "loss": 2.3638,
      "step": 54654
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0470860004425049,
      "learning_rate": 3.877018315558528e-06,
      "loss": 2.3003,
      "step": 54655
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0007824897766113,
      "learning_rate": 3.876692790575306e-06,
      "loss": 2.3166,
      "step": 54656
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0529158115386963,
      "learning_rate": 3.876367275972755e-06,
      "loss": 2.3103,
      "step": 54657
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.9891551733016968,
      "learning_rate": 3.876041771751422e-06,
      "loss": 2.2919,
      "step": 54658
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.03561532497406,
      "learning_rate": 3.875716277911865e-06,
      "loss": 2.3572,
      "step": 54659
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1243327856063843,
      "learning_rate": 3.8753907944546285e-06,
      "loss": 2.3783,
      "step": 54660
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0552012920379639,
      "learning_rate": 3.8750653213802715e-06,
      "loss": 2.4037,
      "step": 54661
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0728923082351685,
      "learning_rate": 3.874739858689342e-06,
      "loss": 2.2809,
      "step": 54662
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1704750061035156,
      "learning_rate": 3.874414406382389e-06,
      "loss": 2.1075,
      "step": 54663
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1233439445495605,
      "learning_rate": 3.874088964459971e-06,
      "loss": 2.135,
      "step": 54664
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.001876711845398,
      "learning_rate": 3.873763532922636e-06,
      "loss": 2.325,
      "step": 54665
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0604418516159058,
      "learning_rate": 3.873438111770932e-06,
      "loss": 2.2361,
      "step": 54666
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.9702507853507996,
      "learning_rate": 3.873112701005418e-06,
      "loss": 2.3515,
      "step": 54667
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.3155986070632935,
      "learning_rate": 3.872787300626638e-06,
      "loss": 2.2558,
      "step": 54668
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0943266153335571,
      "learning_rate": 3.872461910635151e-06,
      "loss": 2.2737,
      "step": 54669
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1332619190216064,
      "learning_rate": 3.8721365310315065e-06,
      "loss": 2.0677,
      "step": 54670
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.9970875978469849,
      "learning_rate": 3.87181116181625e-06,
      "loss": 2.3438,
      "step": 54671
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1354565620422363,
      "learning_rate": 3.871485802989942e-06,
      "loss": 2.3324,
      "step": 54672
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1507960557937622,
      "learning_rate": 3.871160454553127e-06,
      "loss": 2.2626,
      "step": 54673
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0370330810546875,
      "learning_rate": 3.8708351165063616e-06,
      "loss": 2.1153,
      "step": 54674
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.102895975112915,
      "learning_rate": 3.870509788850193e-06,
      "loss": 2.1931,
      "step": 54675
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1592512130737305,
      "learning_rate": 3.8701844715851786e-06,
      "loss": 2.2959,
      "step": 54676
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0544012784957886,
      "learning_rate": 3.869859164711863e-06,
      "loss": 2.4632,
      "step": 54677
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.7493568658828735,
      "learning_rate": 3.8695338682308084e-06,
      "loss": 2.3556,
      "step": 54678
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.057864785194397,
      "learning_rate": 3.869208582142551e-06,
      "loss": 2.2208,
      "step": 54679
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1022202968597412,
      "learning_rate": 3.868883306447654e-06,
      "loss": 2.4934,
      "step": 54680
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.9619019627571106,
      "learning_rate": 3.868558041146661e-06,
      "loss": 2.5297,
      "step": 54681
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0075968503952026,
      "learning_rate": 3.868232786240131e-06,
      "loss": 2.2071,
      "step": 54682
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.865764856338501,
      "learning_rate": 3.867907541728609e-06,
      "loss": 2.1783,
      "step": 54683
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1873399019241333,
      "learning_rate": 3.867582307612652e-06,
      "loss": 2.1419,
      "step": 54684
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0605913400650024,
      "learning_rate": 3.867257083892806e-06,
      "loss": 2.3185,
      "step": 54685
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0895642042160034,
      "learning_rate": 3.866931870569627e-06,
      "loss": 2.3183,
      "step": 54686
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0839147567749023,
      "learning_rate": 3.866606667643662e-06,
      "loss": 2.4257,
      "step": 54687
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.6989637613296509,
      "learning_rate": 3.866281475115466e-06,
      "loss": 2.1866,
      "step": 54688
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.102736473083496,
      "learning_rate": 3.865956292985587e-06,
      "loss": 2.3127,
      "step": 54689
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0504906177520752,
      "learning_rate": 3.865631121254581e-06,
      "loss": 2.588,
      "step": 54690
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0663039684295654,
      "learning_rate": 3.865305959922997e-06,
      "loss": 2.2039,
      "step": 54691
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.938062846660614,
      "learning_rate": 3.864980808991385e-06,
      "loss": 2.2892,
      "step": 54692
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0694602727890015,
      "learning_rate": 3.864655668460294e-06,
      "loss": 2.287,
      "step": 54693
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.9784290790557861,
      "learning_rate": 3.864330538330281e-06,
      "loss": 2.5511,
      "step": 54694
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.000600814819336,
      "learning_rate": 3.86400541860189e-06,
      "loss": 2.3484,
      "step": 54695
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.9253818392753601,
      "learning_rate": 3.86368030927568e-06,
      "loss": 2.2127,
      "step": 54696
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1039071083068848,
      "learning_rate": 3.8633552103522e-06,
      "loss": 2.0618,
      "step": 54697
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.9891713857650757,
      "learning_rate": 3.863030121831994e-06,
      "loss": 2.4974,
      "step": 54698
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1508389711380005,
      "learning_rate": 3.8627050437156235e-06,
      "loss": 2.3385,
      "step": 54699
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0497463941574097,
      "learning_rate": 3.862379976003631e-06,
      "loss": 2.3155,
      "step": 54700
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0767238140106201,
      "learning_rate": 3.862054918696575e-06,
      "loss": 2.4583,
      "step": 54701
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.075863242149353,
      "learning_rate": 3.861729871794999e-06,
      "loss": 2.5465,
      "step": 54702
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1065590381622314,
      "learning_rate": 3.8614048352994616e-06,
      "loss": 2.4185,
      "step": 54703
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0132622718811035,
      "learning_rate": 3.861079809210507e-06,
      "loss": 2.5462,
      "step": 54704
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1635301113128662,
      "learning_rate": 3.860754793528697e-06,
      "loss": 2.2518,
      "step": 54705
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1159367561340332,
      "learning_rate": 3.860429788254567e-06,
      "loss": 2.0435,
      "step": 54706
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.186057448387146,
      "learning_rate": 3.86010479338868e-06,
      "loss": 2.2611,
      "step": 54707
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0217576026916504,
      "learning_rate": 3.85977980893158e-06,
      "loss": 2.5177,
      "step": 54708
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0896233320236206,
      "learning_rate": 3.859454834883823e-06,
      "loss": 2.3548,
      "step": 54709
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1271342039108276,
      "learning_rate": 3.859129871245954e-06,
      "loss": 2.1911,
      "step": 54710
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.680305004119873,
      "learning_rate": 3.8588049180185336e-06,
      "loss": 2.3871,
      "step": 54711
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.049726963043213,
      "learning_rate": 3.8584799752021015e-06,
      "loss": 2.2106,
      "step": 54712
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0116562843322754,
      "learning_rate": 3.858155042797218e-06,
      "loss": 2.2883,
      "step": 54713
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1083835363388062,
      "learning_rate": 3.857830120804427e-06,
      "loss": 2.5065,
      "step": 54714
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0591715574264526,
      "learning_rate": 3.857505209224284e-06,
      "loss": 2.4169,
      "step": 54715
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0900211334228516,
      "learning_rate": 3.857180308057336e-06,
      "loss": 2.5129,
      "step": 54716
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1854181289672852,
      "learning_rate": 3.856855417304137e-06,
      "loss": 2.3343,
      "step": 54717
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1082319021224976,
      "learning_rate": 3.856530536965238e-06,
      "loss": 2.5037,
      "step": 54718
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0276548862457275,
      "learning_rate": 3.856205667041189e-06,
      "loss": 2.1962,
      "step": 54719
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1377936601638794,
      "learning_rate": 3.855880807532535e-06,
      "loss": 2.4634,
      "step": 54720
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.008355736732483,
      "learning_rate": 3.855555958439836e-06,
      "loss": 2.1278,
      "step": 54721
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1643644571304321,
      "learning_rate": 3.855231119763635e-06,
      "loss": 2.3228,
      "step": 54722
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0682517290115356,
      "learning_rate": 3.85490629150449e-06,
      "loss": 2.2262,
      "step": 54723
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0702325105667114,
      "learning_rate": 3.854581473662948e-06,
      "loss": 2.4187,
      "step": 54724
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0678852796554565,
      "learning_rate": 3.854256666239555e-06,
      "loss": 2.3621,
      "step": 54725
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1710399389266968,
      "learning_rate": 3.85393186923487e-06,
      "loss": 2.532,
      "step": 54726
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.025128722190857,
      "learning_rate": 3.853607082649436e-06,
      "loss": 2.2878,
      "step": 54727
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.9696221351623535,
      "learning_rate": 3.853282306483811e-06,
      "loss": 2.2003,
      "step": 54728
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0977654457092285,
      "learning_rate": 3.852957540738539e-06,
      "loss": 2.3682,
      "step": 54729
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.3107832670211792,
      "learning_rate": 3.852632785414176e-06,
      "loss": 2.371,
      "step": 54730
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.2022333145141602,
      "learning_rate": 3.8523080405112715e-06,
      "loss": 2.2917,
      "step": 54731
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.037005066871643,
      "learning_rate": 3.851983306030372e-06,
      "loss": 2.1919,
      "step": 54732
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.2937488555908203,
      "learning_rate": 3.85165858197203e-06,
      "loss": 2.2665,
      "step": 54733
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.2323859930038452,
      "learning_rate": 3.851333868336798e-06,
      "loss": 2.4952,
      "step": 54734
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1541078090667725,
      "learning_rate": 3.851009165125222e-06,
      "loss": 2.2512,
      "step": 54735
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0686277151107788,
      "learning_rate": 3.85068447233786e-06,
      "loss": 2.3423,
      "step": 54736
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.9828356504440308,
      "learning_rate": 3.8503597899752536e-06,
      "loss": 2.3763,
      "step": 54737
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0833145380020142,
      "learning_rate": 3.850035118037962e-06,
      "loss": 2.3464,
      "step": 54738
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.2250900268554688,
      "learning_rate": 3.849710456526526e-06,
      "loss": 2.1932,
      "step": 54739
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0146318674087524,
      "learning_rate": 3.849385805441506e-06,
      "loss": 2.3855,
      "step": 54740
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.008009672164917,
      "learning_rate": 3.849061164783443e-06,
      "loss": 2.3143,
      "step": 54741
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1626980304718018,
      "learning_rate": 3.848736534552896e-06,
      "loss": 2.4083,
      "step": 54742
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1608352661132812,
      "learning_rate": 3.848411914750409e-06,
      "loss": 2.2343,
      "step": 54743
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.057143211364746,
      "learning_rate": 3.848087305376539e-06,
      "loss": 2.3335,
      "step": 54744
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1932165622711182,
      "learning_rate": 3.8477627064318266e-06,
      "loss": 2.1879,
      "step": 54745
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.222985029220581,
      "learning_rate": 3.84743811791683e-06,
      "loss": 2.3843,
      "step": 54746
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1126115322113037,
      "learning_rate": 3.847113539832094e-06,
      "loss": 2.3317,
      "step": 54747
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1091954708099365,
      "learning_rate": 3.846788972178175e-06,
      "loss": 2.3209,
      "step": 54748
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1445236206054688,
      "learning_rate": 3.846464414955616e-06,
      "loss": 2.1286,
      "step": 54749
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0675305128097534,
      "learning_rate": 3.846139868164975e-06,
      "loss": 2.4697,
      "step": 54750
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0522381067276,
      "learning_rate": 3.845815331806795e-06,
      "loss": 2.2943,
      "step": 54751
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0296088457107544,
      "learning_rate": 3.845490805881632e-06,
      "loss": 2.154,
      "step": 54752
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.2700600624084473,
      "learning_rate": 3.8451662903900345e-06,
      "loss": 2.2715,
      "step": 54753
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0548105239868164,
      "learning_rate": 3.844841785332547e-06,
      "loss": 2.4695,
      "step": 54754
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.9973828792572021,
      "learning_rate": 3.84451729070973e-06,
      "loss": 2.2964,
      "step": 54755
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1373732089996338,
      "learning_rate": 3.844192806522122e-06,
      "loss": 2.2584,
      "step": 54756
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0862864255905151,
      "learning_rate": 3.8438683327702855e-06,
      "loss": 2.3141,
      "step": 54757
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.185783863067627,
      "learning_rate": 3.843543869454762e-06,
      "loss": 2.2903,
      "step": 54758
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.003275990486145,
      "learning_rate": 3.8432194165761036e-06,
      "loss": 2.1607,
      "step": 54759
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0378553867340088,
      "learning_rate": 3.8428949741348585e-06,
      "loss": 2.1905,
      "step": 54760
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1729680299758911,
      "learning_rate": 3.842570542131581e-06,
      "loss": 2.3252,
      "step": 54761
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0821664333343506,
      "learning_rate": 3.842246120566816e-06,
      "loss": 2.3105,
      "step": 54762
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.2278060913085938,
      "learning_rate": 3.841921709441119e-06,
      "loss": 2.3941,
      "step": 54763
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1040338277816772,
      "learning_rate": 3.841597308755034e-06,
      "loss": 2.3102,
      "step": 54764
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1247807741165161,
      "learning_rate": 3.841272918509118e-06,
      "loss": 2.0002,
      "step": 54765
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0668009519577026,
      "learning_rate": 3.840948538703914e-06,
      "loss": 2.3668,
      "step": 54766
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.107421875,
      "learning_rate": 3.840624169339977e-06,
      "loss": 2.4133,
      "step": 54767
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0458067655563354,
      "learning_rate": 3.840299810417853e-06,
      "loss": 2.3326,
      "step": 54768
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.4727362394332886,
      "learning_rate": 3.839975461938096e-06,
      "loss": 2.4071,
      "step": 54769
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0198733806610107,
      "learning_rate": 3.83965112390125e-06,
      "loss": 2.2589,
      "step": 54770
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0897905826568604,
      "learning_rate": 3.839326796307876e-06,
      "loss": 2.2963,
      "step": 54771
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.075548768043518,
      "learning_rate": 3.83900247915851e-06,
      "loss": 2.3372,
      "step": 54772
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.6610013246536255,
      "learning_rate": 3.83867817245371e-06,
      "loss": 2.2658,
      "step": 54773
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1758604049682617,
      "learning_rate": 3.838353876194022e-06,
      "loss": 2.4573,
      "step": 54774
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0657050609588623,
      "learning_rate": 3.8380295903800005e-06,
      "loss": 2.117,
      "step": 54775
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.9782980680465698,
      "learning_rate": 3.837705315012189e-06,
      "loss": 2.1052,
      "step": 54776
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.9964313507080078,
      "learning_rate": 3.8373810500911445e-06,
      "loss": 2.4163,
      "step": 54777
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1147842407226562,
      "learning_rate": 3.837056795617408e-06,
      "loss": 2.1941,
      "step": 54778
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1465929746627808,
      "learning_rate": 3.836732551591539e-06,
      "loss": 2.0921,
      "step": 54779
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1409223079681396,
      "learning_rate": 3.836408318014082e-06,
      "loss": 2.4665,
      "step": 54780
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.2250475883483887,
      "learning_rate": 3.836084094885583e-06,
      "loss": 2.3932,
      "step": 54781
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0593154430389404,
      "learning_rate": 3.835759882206599e-06,
      "loss": 2.2659,
      "step": 54782
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0450118780136108,
      "learning_rate": 3.8354356799776735e-06,
      "loss": 2.0251,
      "step": 54783
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0830368995666504,
      "learning_rate": 3.835111488199361e-06,
      "loss": 2.2603,
      "step": 54784
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0973057746887207,
      "learning_rate": 3.83478730687221e-06,
      "loss": 2.3836,
      "step": 54785
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.9898196458816528,
      "learning_rate": 3.834463135996766e-06,
      "loss": 2.2515,
      "step": 54786
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.9873088002204895,
      "learning_rate": 3.8341389755735835e-06,
      "loss": 2.4509,
      "step": 54787
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0280073881149292,
      "learning_rate": 3.833814825603211e-06,
      "loss": 2.3799,
      "step": 54788
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.2451335191726685,
      "learning_rate": 3.833490686086192e-06,
      "loss": 2.3195,
      "step": 54789
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.2759124040603638,
      "learning_rate": 3.833166557023087e-06,
      "loss": 2.3512,
      "step": 54790
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.05351984500885,
      "learning_rate": 3.8328424384144345e-06,
      "loss": 2.2833,
      "step": 54791
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.038048267364502,
      "learning_rate": 3.8325183302607925e-06,
      "loss": 2.2977,
      "step": 54792
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.998299777507782,
      "learning_rate": 3.832194232562704e-06,
      "loss": 2.1607,
      "step": 54793
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1977310180664062,
      "learning_rate": 3.831870145320725e-06,
      "loss": 2.321,
      "step": 54794
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0449169874191284,
      "learning_rate": 3.831546068535398e-06,
      "loss": 2.5135,
      "step": 54795
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1902856826782227,
      "learning_rate": 3.831222002207279e-06,
      "loss": 2.2653,
      "step": 54796
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1584291458129883,
      "learning_rate": 3.830897946336913e-06,
      "loss": 2.3909,
      "step": 54797
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1892544031143188,
      "learning_rate": 3.830573900924852e-06,
      "loss": 2.4638,
      "step": 54798
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0819807052612305,
      "learning_rate": 3.83024986597164e-06,
      "loss": 2.4167,
      "step": 54799
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0622771978378296,
      "learning_rate": 3.829925841477834e-06,
      "loss": 2.3066,
      "step": 54800
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1521492004394531,
      "learning_rate": 3.829601827443975e-06,
      "loss": 2.4213,
      "step": 54801
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1062803268432617,
      "learning_rate": 3.829277823870621e-06,
      "loss": 2.4805,
      "step": 54802
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1836484670639038,
      "learning_rate": 3.828953830758313e-06,
      "loss": 2.5174,
      "step": 54803
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.185154676437378,
      "learning_rate": 3.828629848107609e-06,
      "loss": 2.1228,
      "step": 54804
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.2856024503707886,
      "learning_rate": 3.828305875919048e-06,
      "loss": 2.2006,
      "step": 54805
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0698528289794922,
      "learning_rate": 3.8279819141931894e-06,
      "loss": 2.4803,
      "step": 54806
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0836259126663208,
      "learning_rate": 3.827657962930575e-06,
      "loss": 2.2264,
      "step": 54807
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0741667747497559,
      "learning_rate": 3.827334022131759e-06,
      "loss": 2.3855,
      "step": 54808
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.2082546949386597,
      "learning_rate": 3.8270100917972895e-06,
      "loss": 2.2653,
      "step": 54809
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0599852800369263,
      "learning_rate": 3.826686171927714e-06,
      "loss": 2.3495,
      "step": 54810
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1293182373046875,
      "learning_rate": 3.826362262523577e-06,
      "loss": 2.3415,
      "step": 54811
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0989149808883667,
      "learning_rate": 3.8260383635854385e-06,
      "loss": 2.2846,
      "step": 54812
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0815889835357666,
      "learning_rate": 3.825714475113838e-06,
      "loss": 2.3638,
      "step": 54813
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.195530891418457,
      "learning_rate": 3.825390597109331e-06,
      "loss": 2.3156,
      "step": 54814
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.229098916053772,
      "learning_rate": 3.825066729572463e-06,
      "loss": 2.1368,
      "step": 54815
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1113296747207642,
      "learning_rate": 3.824742872503782e-06,
      "loss": 2.3754,
      "step": 54816
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0832774639129639,
      "learning_rate": 3.824419025903843e-06,
      "loss": 2.3287,
      "step": 54817
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0536329746246338,
      "learning_rate": 3.824095189773186e-06,
      "loss": 2.1752,
      "step": 54818
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.9849241375923157,
      "learning_rate": 3.823771364112369e-06,
      "loss": 2.1453,
      "step": 54819
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.063856601715088,
      "learning_rate": 3.823447548921933e-06,
      "loss": 2.4821,
      "step": 54820
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.046947717666626,
      "learning_rate": 3.823123744202436e-06,
      "loss": 2.4519,
      "step": 54821
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1651073694229126,
      "learning_rate": 3.822799949954418e-06,
      "loss": 2.2505,
      "step": 54822
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0919908285140991,
      "learning_rate": 3.822476166178437e-06,
      "loss": 2.2855,
      "step": 54823
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1372172832489014,
      "learning_rate": 3.822152392875031e-06,
      "loss": 2.2911,
      "step": 54824
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0740305185317993,
      "learning_rate": 3.821828630044758e-06,
      "loss": 2.5116,
      "step": 54825
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.9976740479469299,
      "learning_rate": 3.8215048776881605e-06,
      "loss": 2.1577,
      "step": 54826
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0450499057769775,
      "learning_rate": 3.821181135805792e-06,
      "loss": 2.3992,
      "step": 54827
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.3060306310653687,
      "learning_rate": 3.820857404398198e-06,
      "loss": 2.1291,
      "step": 54828
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.094249963760376,
      "learning_rate": 3.820533683465932e-06,
      "loss": 2.4272,
      "step": 54829
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0866996049880981,
      "learning_rate": 3.820209973009537e-06,
      "loss": 2.3996,
      "step": 54830
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0595093965530396,
      "learning_rate": 3.819886273029566e-06,
      "loss": 2.3767,
      "step": 54831
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0613778829574585,
      "learning_rate": 3.819562583526564e-06,
      "loss": 2.2783,
      "step": 54832
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.15335214138031,
      "learning_rate": 3.819238904501085e-06,
      "loss": 2.1762,
      "step": 54833
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.2637332677841187,
      "learning_rate": 3.818915235953672e-06,
      "loss": 2.411,
      "step": 54834
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1724255084991455,
      "learning_rate": 3.818591577884879e-06,
      "loss": 2.3771,
      "step": 54835
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1199170351028442,
      "learning_rate": 3.818267930295253e-06,
      "loss": 2.1801,
      "step": 54836
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0861985683441162,
      "learning_rate": 3.817944293185341e-06,
      "loss": 2.2987,
      "step": 54837
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1186771392822266,
      "learning_rate": 3.817620666555689e-06,
      "loss": 2.2168,
      "step": 54838
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0235474109649658,
      "learning_rate": 3.817297050406853e-06,
      "loss": 2.4216,
      "step": 54839
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1091644763946533,
      "learning_rate": 3.816973444739375e-06,
      "loss": 2.2919,
      "step": 54840
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.067020297050476,
      "learning_rate": 3.816649849553809e-06,
      "loss": 2.2147,
      "step": 54841
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.2531862258911133,
      "learning_rate": 3.8163262648507e-06,
      "loss": 2.3943,
      "step": 54842
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1396418809890747,
      "learning_rate": 3.816002690630595e-06,
      "loss": 2.4116,
      "step": 54843
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.2881978750228882,
      "learning_rate": 3.815679126894047e-06,
      "loss": 2.1962,
      "step": 54844
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1418005228042603,
      "learning_rate": 3.815355573641602e-06,
      "loss": 2.4649,
      "step": 54845
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0107600688934326,
      "learning_rate": 3.81503203087381e-06,
      "loss": 2.3549,
      "step": 54846
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.018835425376892,
      "learning_rate": 3.814708498591216e-06,
      "loss": 2.5949,
      "step": 54847
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1205724477767944,
      "learning_rate": 3.814384976794374e-06,
      "loss": 2.149,
      "step": 54848
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0654062032699585,
      "learning_rate": 3.814061465483826e-06,
      "loss": 2.1792,
      "step": 54849
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0484918355941772,
      "learning_rate": 3.8137379646601314e-06,
      "loss": 2.2591,
      "step": 54850
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0297377109527588,
      "learning_rate": 3.8134144743238232e-06,
      "loss": 2.1817,
      "step": 54851
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1826286315917969,
      "learning_rate": 3.8130909944754626e-06,
      "loss": 2.2663,
      "step": 54852
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1052316427230835,
      "learning_rate": 3.8127675251155882e-06,
      "loss": 2.2461,
      "step": 54853
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1742401123046875,
      "learning_rate": 3.8124440662447583e-06,
      "loss": 2.2386,
      "step": 54854
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.9815638065338135,
      "learning_rate": 3.8121206178635116e-06,
      "loss": 2.2744,
      "step": 54855
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1973978281021118,
      "learning_rate": 3.811797179972404e-06,
      "loss": 2.3303,
      "step": 54856
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1550331115722656,
      "learning_rate": 3.8114737525719792e-06,
      "loss": 2.3455,
      "step": 54857
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0859979391098022,
      "learning_rate": 3.811150335662789e-06,
      "loss": 2.3531,
      "step": 54858
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0055567026138306,
      "learning_rate": 3.810826929245377e-06,
      "loss": 2.3758,
      "step": 54859
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0710831880569458,
      "learning_rate": 3.8105035333202977e-06,
      "loss": 2.6536,
      "step": 54860
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0345690250396729,
      "learning_rate": 3.810180147888093e-06,
      "loss": 2.4616,
      "step": 54861
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.117020606994629,
      "learning_rate": 3.8098567729493173e-06,
      "loss": 2.3752,
      "step": 54862
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.9751569032669067,
      "learning_rate": 3.8095334085045154e-06,
      "loss": 2.2514,
      "step": 54863
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0801302194595337,
      "learning_rate": 3.8092100545542354e-06,
      "loss": 2.3509,
      "step": 54864
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1596873998641968,
      "learning_rate": 3.8088867110990222e-06,
      "loss": 2.0328,
      "step": 54865
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.9951210021972656,
      "learning_rate": 3.8085633781394316e-06,
      "loss": 2.4571,
      "step": 54866
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0253469944000244,
      "learning_rate": 3.8082400556760047e-06,
      "loss": 2.0971,
      "step": 54867
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.123429775238037,
      "learning_rate": 3.8079167437092947e-06,
      "loss": 2.3828,
      "step": 54868
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1360599994659424,
      "learning_rate": 3.807593442239844e-06,
      "loss": 2.4417,
      "step": 54869
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0905555486679077,
      "learning_rate": 3.8072701512682085e-06,
      "loss": 2.3669,
      "step": 54870
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1484001874923706,
      "learning_rate": 3.806946870794933e-06,
      "loss": 2.2694,
      "step": 54871
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0349818468093872,
      "learning_rate": 3.806623600820559e-06,
      "loss": 2.4215,
      "step": 54872
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1330089569091797,
      "learning_rate": 3.8063003413456455e-06,
      "loss": 2.2849,
      "step": 54873
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1131690740585327,
      "learning_rate": 3.8059770923707306e-06,
      "loss": 2.3892,
      "step": 54874
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.2623481750488281,
      "learning_rate": 3.8056538538963705e-06,
      "loss": 2.4093,
      "step": 54875
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1579619646072388,
      "learning_rate": 3.8053306259231095e-06,
      "loss": 2.1916,
      "step": 54876
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.2440056800842285,
      "learning_rate": 3.8050074084514954e-06,
      "loss": 2.219,
      "step": 54877
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0826663970947266,
      "learning_rate": 3.8046842014820726e-06,
      "loss": 2.327,
      "step": 54878
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0749591588974,
      "learning_rate": 3.8043610050153955e-06,
      "loss": 2.3344,
      "step": 54879
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.051611065864563,
      "learning_rate": 3.804037819052008e-06,
      "loss": 2.3168,
      "step": 54880
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.33506178855896,
      "learning_rate": 3.803714643592461e-06,
      "loss": 2.2497,
      "step": 54881
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.119396686553955,
      "learning_rate": 3.803391478637297e-06,
      "loss": 2.2755,
      "step": 54882
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1059987545013428,
      "learning_rate": 3.8030683241870714e-06,
      "loss": 2.355,
      "step": 54883
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.9628246426582336,
      "learning_rate": 3.8027451802423243e-06,
      "loss": 2.1419,
      "step": 54884
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1054846048355103,
      "learning_rate": 3.802422046803611e-06,
      "loss": 2.4763,
      "step": 54885
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.174400806427002,
      "learning_rate": 3.8020989238714724e-06,
      "loss": 2.207,
      "step": 54886
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.090915560722351,
      "learning_rate": 3.801775811446462e-06,
      "loss": 2.2405,
      "step": 54887
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0647423267364502,
      "learning_rate": 3.8014527095291222e-06,
      "loss": 2.3069,
      "step": 54888
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0452052354812622,
      "learning_rate": 3.801129618120011e-06,
      "loss": 2.205,
      "step": 54889
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.23285973072052,
      "learning_rate": 3.80080653721966e-06,
      "loss": 2.4621,
      "step": 54890
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0887898206710815,
      "learning_rate": 3.8004834668286305e-06,
      "loss": 2.1865,
      "step": 54891
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0408737659454346,
      "learning_rate": 3.8001604069474616e-06,
      "loss": 2.2801,
      "step": 54892
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.2045307159423828,
      "learning_rate": 3.799837357576708e-06,
      "loss": 2.179,
      "step": 54893
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1828842163085938,
      "learning_rate": 3.7995143187169103e-06,
      "loss": 2.1926,
      "step": 54894
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0288915634155273,
      "learning_rate": 3.7991912903686236e-06,
      "loss": 2.1348,
      "step": 54895
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.116119623184204,
      "learning_rate": 3.798868272532389e-06,
      "loss": 2.388,
      "step": 54896
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1185358762741089,
      "learning_rate": 3.7985452652087596e-06,
      "loss": 2.2767,
      "step": 54897
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1007676124572754,
      "learning_rate": 3.7982222683982806e-06,
      "loss": 2.3793,
      "step": 54898
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0905672311782837,
      "learning_rate": 3.7978992821014947e-06,
      "loss": 2.2921,
      "step": 54899
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.363067626953125,
      "learning_rate": 3.7975763063189584e-06,
      "loss": 2.3407,
      "step": 54900
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0724217891693115,
      "learning_rate": 3.7972533410512113e-06,
      "loss": 2.2739,
      "step": 54901
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.9834747314453125,
      "learning_rate": 3.796930386298808e-06,
      "loss": 2.0781,
      "step": 54902
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.10338294506073,
      "learning_rate": 3.7966074420622924e-06,
      "loss": 2.1105,
      "step": 54903
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0856446027755737,
      "learning_rate": 3.7962845083422086e-06,
      "loss": 2.548,
      "step": 54904
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0824735164642334,
      "learning_rate": 3.795961585139111e-06,
      "loss": 2.4881,
      "step": 54905
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0958497524261475,
      "learning_rate": 3.795638672453542e-06,
      "loss": 2.3383,
      "step": 54906
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.066760778427124,
      "learning_rate": 3.7953157702860487e-06,
      "loss": 2.4182,
      "step": 54907
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1519420146942139,
      "learning_rate": 3.794992878637184e-06,
      "loss": 2.5475,
      "step": 54908
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1315335035324097,
      "learning_rate": 3.7946699975074873e-06,
      "loss": 2.4117,
      "step": 54909
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0547629594802856,
      "learning_rate": 3.794347126897514e-06,
      "loss": 2.2479,
      "step": 54910
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.128684401512146,
      "learning_rate": 3.794024266807804e-06,
      "loss": 2.1388,
      "step": 54911
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.087740182876587,
      "learning_rate": 3.793701417238912e-06,
      "loss": 2.3842,
      "step": 54912
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1372060775756836,
      "learning_rate": 3.793378578191379e-06,
      "loss": 2.3032,
      "step": 54913
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.169303297996521,
      "learning_rate": 3.7930557496657574e-06,
      "loss": 2.2618,
      "step": 54914
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.168355941772461,
      "learning_rate": 3.7927329316625892e-06,
      "loss": 2.3301,
      "step": 54915
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0758322477340698,
      "learning_rate": 3.7924101241824308e-06,
      "loss": 2.3878,
      "step": 54916
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.154595971107483,
      "learning_rate": 3.7920873272258172e-06,
      "loss": 2.3222,
      "step": 54917
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1222832202911377,
      "learning_rate": 3.7917645407933048e-06,
      "loss": 2.6091,
      "step": 54918
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.3720200061798096,
      "learning_rate": 3.791441764885434e-06,
      "loss": 2.3283,
      "step": 54919
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0481595993041992,
      "learning_rate": 3.79111899950276e-06,
      "loss": 2.5827,
      "step": 54920
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.042061686515808,
      "learning_rate": 3.7907962446458204e-06,
      "loss": 2.4444,
      "step": 54921
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.9740675687789917,
      "learning_rate": 3.790473500315173e-06,
      "loss": 2.2218,
      "step": 54922
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.104659080505371,
      "learning_rate": 3.7901507665113547e-06,
      "loss": 2.3275,
      "step": 54923
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.057193398475647,
      "learning_rate": 3.789828043234921e-06,
      "loss": 2.47,
      "step": 54924
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1102715730667114,
      "learning_rate": 3.7895053304864116e-06,
      "loss": 2.1398,
      "step": 54925
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.119050145149231,
      "learning_rate": 3.7891826282663814e-06,
      "loss": 2.3348,
      "step": 54926
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.9993466138839722,
      "learning_rate": 3.788859936575374e-06,
      "loss": 2.4062,
      "step": 54927
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0809861421585083,
      "learning_rate": 3.7885372554139323e-06,
      "loss": 2.3624,
      "step": 54928
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1692308187484741,
      "learning_rate": 3.788214584782609e-06,
      "loss": 2.3173,
      "step": 54929
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.121399998664856,
      "learning_rate": 3.7878919246819514e-06,
      "loss": 2.3696,
      "step": 54930
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1258468627929688,
      "learning_rate": 3.787569275112499e-06,
      "loss": 2.3352,
      "step": 54931
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0628340244293213,
      "learning_rate": 3.787246636074807e-06,
      "loss": 2.3609,
      "step": 54932
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.9981496930122375,
      "learning_rate": 3.7869240075694213e-06,
      "loss": 2.2121,
      "step": 54933
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1713334321975708,
      "learning_rate": 3.7866013895968822e-06,
      "loss": 2.4701,
      "step": 54934
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.261839747428894,
      "learning_rate": 3.786278782157744e-06,
      "loss": 2.189,
      "step": 54935
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0481501817703247,
      "learning_rate": 3.7859561852525484e-06,
      "loss": 2.2289,
      "step": 54936
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1812673807144165,
      "learning_rate": 3.785633598881848e-06,
      "loss": 2.1544,
      "step": 54937
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.023937702178955,
      "learning_rate": 3.785311023046183e-06,
      "loss": 2.4977,
      "step": 54938
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.9648770689964294,
      "learning_rate": 3.7849884577461083e-06,
      "loss": 2.376,
      "step": 54939
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0159590244293213,
      "learning_rate": 3.7846659029821607e-06,
      "loss": 2.1379,
      "step": 54940
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.07145357131958,
      "learning_rate": 3.7843433587548972e-06,
      "loss": 2.204,
      "step": 54941
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0841715335845947,
      "learning_rate": 3.7840208250648604e-06,
      "loss": 2.3826,
      "step": 54942
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0154507160186768,
      "learning_rate": 3.783698301912595e-06,
      "loss": 2.2977,
      "step": 54943
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.3007676601409912,
      "learning_rate": 3.7833757892986467e-06,
      "loss": 2.3063,
      "step": 54944
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.4562413692474365,
      "learning_rate": 3.7830532872235683e-06,
      "loss": 2.4933,
      "step": 54945
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0667576789855957,
      "learning_rate": 3.782730795687899e-06,
      "loss": 2.2983,
      "step": 54946
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0921630859375,
      "learning_rate": 3.782408314692193e-06,
      "loss": 2.1267,
      "step": 54947
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0986524820327759,
      "learning_rate": 3.7820858442369914e-06,
      "loss": 2.2485,
      "step": 54948
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.059728980064392,
      "learning_rate": 3.781763384322845e-06,
      "loss": 2.2876,
      "step": 54949
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0893142223358154,
      "learning_rate": 3.7814409349502955e-06,
      "loss": 2.23,
      "step": 54950
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1122430562973022,
      "learning_rate": 3.7811184961198965e-06,
      "loss": 2.4903,
      "step": 54951
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.2110810279846191,
      "learning_rate": 3.7807960678321875e-06,
      "loss": 2.2782,
      "step": 54952
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0700560808181763,
      "learning_rate": 3.7804736500877217e-06,
      "loss": 2.3615,
      "step": 54953
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0552312135696411,
      "learning_rate": 3.7801512428870415e-06,
      "loss": 2.5999,
      "step": 54954
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.022325038909912,
      "learning_rate": 3.779828846230694e-06,
      "loss": 2.5663,
      "step": 54955
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1531405448913574,
      "learning_rate": 3.7795064601192234e-06,
      "loss": 2.1678,
      "step": 54956
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.084746241569519,
      "learning_rate": 3.779184084553181e-06,
      "loss": 2.4268,
      "step": 54957
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0190945863723755,
      "learning_rate": 3.778861719533109e-06,
      "loss": 2.0398,
      "step": 54958
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.3097758293151855,
      "learning_rate": 3.7785393650595594e-06,
      "loss": 2.252,
      "step": 54959
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.193313717842102,
      "learning_rate": 3.7782170211330747e-06,
      "loss": 2.3694,
      "step": 54960
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.070185899734497,
      "learning_rate": 3.7778946877541987e-06,
      "loss": 2.455,
      "step": 54961
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.2572734355926514,
      "learning_rate": 3.777572364923485e-06,
      "loss": 2.3172,
      "step": 54962
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0450685024261475,
      "learning_rate": 3.7772500526414723e-06,
      "loss": 2.4019,
      "step": 54963
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1187553405761719,
      "learning_rate": 3.776927750908714e-06,
      "loss": 2.3578,
      "step": 54964
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0561970472335815,
      "learning_rate": 3.7766054597257505e-06,
      "loss": 2.3125,
      "step": 54965
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.9865655899047852,
      "learning_rate": 3.776283179093134e-06,
      "loss": 2.4481,
      "step": 54966
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0390936136245728,
      "learning_rate": 3.7759609090114048e-06,
      "loss": 2.3024,
      "step": 54967
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1507986783981323,
      "learning_rate": 3.775638649481119e-06,
      "loss": 2.4394,
      "step": 54968
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.109344244003296,
      "learning_rate": 3.7753164005028088e-06,
      "loss": 2.3304,
      "step": 54969
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0218250751495361,
      "learning_rate": 3.7749941620770326e-06,
      "loss": 2.3777,
      "step": 54970
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0242563486099243,
      "learning_rate": 3.774671934204327e-06,
      "loss": 2.2275,
      "step": 54971
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0518488883972168,
      "learning_rate": 3.774349716885247e-06,
      "loss": 2.2316,
      "step": 54972
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.01011323928833,
      "learning_rate": 3.7740275101203327e-06,
      "loss": 2.3399,
      "step": 54973
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0589516162872314,
      "learning_rate": 3.7737053139101353e-06,
      "loss": 2.2269,
      "step": 54974
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.2704873085021973,
      "learning_rate": 3.773383128255195e-06,
      "loss": 2.4143,
      "step": 54975
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0711677074432373,
      "learning_rate": 3.773060953156066e-06,
      "loss": 2.3713,
      "step": 54976
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.100588321685791,
      "learning_rate": 3.772738788613285e-06,
      "loss": 2.317,
      "step": 54977
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.108633041381836,
      "learning_rate": 3.772416634627407e-06,
      "loss": 2.2005,
      "step": 54978
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1040016412734985,
      "learning_rate": 3.772094491198971e-06,
      "loss": 2.2683,
      "step": 54979
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0588849782943726,
      "learning_rate": 3.7717723583285293e-06,
      "loss": 2.3832,
      "step": 54980
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0664187669754028,
      "learning_rate": 3.771450236016625e-06,
      "loss": 2.4201,
      "step": 54981
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.9988173842430115,
      "learning_rate": 3.7711281242638043e-06,
      "loss": 2.279,
      "step": 54982
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1355963945388794,
      "learning_rate": 3.7708060230706103e-06,
      "loss": 2.3987,
      "step": 54983
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1396936178207397,
      "learning_rate": 3.7704839324375953e-06,
      "loss": 2.2559,
      "step": 54984
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0370875597000122,
      "learning_rate": 3.7701618523652974e-06,
      "loss": 2.1802,
      "step": 54985
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.052234411239624,
      "learning_rate": 3.769839782854271e-06,
      "loss": 2.2067,
      "step": 54986
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1173073053359985,
      "learning_rate": 3.7695177239050552e-06,
      "loss": 2.0807,
      "step": 54987
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.046481728553772,
      "learning_rate": 3.7691956755182024e-06,
      "loss": 2.4649,
      "step": 54988
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1031599044799805,
      "learning_rate": 3.7688736376942557e-06,
      "loss": 2.3374,
      "step": 54989
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.160078763961792,
      "learning_rate": 3.768551610433756e-06,
      "loss": 2.3536,
      "step": 54990
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0502965450286865,
      "learning_rate": 3.768229593737258e-06,
      "loss": 2.2605,
      "step": 54991
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1246132850646973,
      "learning_rate": 3.767907587605301e-06,
      "loss": 2.3799,
      "step": 54992
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.9933208227157593,
      "learning_rate": 3.767585592038434e-06,
      "loss": 2.1912,
      "step": 54993
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0852665901184082,
      "learning_rate": 3.7672636070372006e-06,
      "loss": 2.2645,
      "step": 54994
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0512248277664185,
      "learning_rate": 3.7669416326021546e-06,
      "loss": 2.3274,
      "step": 54995
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.9965003728866577,
      "learning_rate": 3.7666196687338285e-06,
      "loss": 2.3621,
      "step": 54996
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.131195068359375,
      "learning_rate": 3.7662977154327785e-06,
      "loss": 2.3761,
      "step": 54997
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.990888237953186,
      "learning_rate": 3.765975772699544e-06,
      "loss": 2.1539,
      "step": 54998
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1282403469085693,
      "learning_rate": 3.7656538405346765e-06,
      "loss": 2.1357,
      "step": 54999
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.030369758605957,
      "learning_rate": 3.765331918938715e-06,
      "loss": 2.2649,
      "step": 55000
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0259959697723389,
      "learning_rate": 3.7650100079122133e-06,
      "loss": 2.275,
      "step": 55001
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.2515228986740112,
      "learning_rate": 3.7646881074557095e-06,
      "loss": 2.4666,
      "step": 55002
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.9709592461585999,
      "learning_rate": 3.764366217569756e-06,
      "loss": 2.2811,
      "step": 55003
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1246248483657837,
      "learning_rate": 3.764044338254893e-06,
      "loss": 2.3874,
      "step": 55004
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1216922998428345,
      "learning_rate": 3.7637224695116716e-06,
      "loss": 2.3124,
      "step": 55005
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.9861886501312256,
      "learning_rate": 3.7634006113406297e-06,
      "loss": 2.241,
      "step": 55006
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0819848775863647,
      "learning_rate": 3.7630787637423216e-06,
      "loss": 2.2373,
      "step": 55007
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.165621280670166,
      "learning_rate": 3.762756926717289e-06,
      "loss": 2.3933,
      "step": 55008
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1813145875930786,
      "learning_rate": 3.7624351002660785e-06,
      "loss": 2.18,
      "step": 55009
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.094483733177185,
      "learning_rate": 3.7621132843892293e-06,
      "loss": 2.3359,
      "step": 55010
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0668809413909912,
      "learning_rate": 3.761791479087298e-06,
      "loss": 2.4728,
      "step": 55011
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1315598487854004,
      "learning_rate": 3.761469684360819e-06,
      "loss": 2.4784,
      "step": 55012
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0074166059494019,
      "learning_rate": 3.7611479002103466e-06,
      "loss": 2.2942,
      "step": 55013
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.123077392578125,
      "learning_rate": 3.760826126636421e-06,
      "loss": 2.2457,
      "step": 55014
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0375546216964722,
      "learning_rate": 3.760504363639592e-06,
      "loss": 2.3893,
      "step": 55015
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0588122606277466,
      "learning_rate": 3.760182611220403e-06,
      "loss": 2.4262,
      "step": 55016
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.2145566940307617,
      "learning_rate": 3.759860869379396e-06,
      "loss": 2.3697,
      "step": 55017
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.180323839187622,
      "learning_rate": 3.759539138117123e-06,
      "loss": 2.5344,
      "step": 55018
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.9792503118515015,
      "learning_rate": 3.759217417434122e-06,
      "loss": 2.3124,
      "step": 55019
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.969235897064209,
      "learning_rate": 3.7588957073309475e-06,
      "loss": 2.0871,
      "step": 55020
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.967552900314331,
      "learning_rate": 3.7585740078081378e-06,
      "loss": 2.5627,
      "step": 55021
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1719834804534912,
      "learning_rate": 3.758252318866238e-06,
      "loss": 2.3944,
      "step": 55022
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.000020980834961,
      "learning_rate": 3.757930640505799e-06,
      "loss": 2.1404,
      "step": 55023
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.9661726355552673,
      "learning_rate": 3.7576089727273635e-06,
      "loss": 2.2382,
      "step": 55024
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.054425835609436,
      "learning_rate": 3.7572873155314715e-06,
      "loss": 2.4987,
      "step": 55025
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1199754476547241,
      "learning_rate": 3.7569656689186773e-06,
      "loss": 2.1642,
      "step": 55026
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.087205410003662,
      "learning_rate": 3.7566440328895182e-06,
      "loss": 2.409,
      "step": 55027
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.132752537727356,
      "learning_rate": 3.756322407444547e-06,
      "loss": 2.4949,
      "step": 55028
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.2156349420547485,
      "learning_rate": 3.756000792584301e-06,
      "loss": 2.327,
      "step": 55029
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.2262654304504395,
      "learning_rate": 3.7556791883093324e-06,
      "loss": 2.4602,
      "step": 55030
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.162172794342041,
      "learning_rate": 3.7553575946201816e-06,
      "loss": 2.1479,
      "step": 55031
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0804879665374756,
      "learning_rate": 3.7550360115173977e-06,
      "loss": 2.325,
      "step": 55032
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.169346570968628,
      "learning_rate": 3.754714439001521e-06,
      "loss": 2.1981,
      "step": 55033
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1351685523986816,
      "learning_rate": 3.7543928770731073e-06,
      "loss": 2.3503,
      "step": 55034
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.109045147895813,
      "learning_rate": 3.7540713257326854e-06,
      "loss": 2.2971,
      "step": 55035
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0953993797302246,
      "learning_rate": 3.7537497849808137e-06,
      "loss": 2.5084,
      "step": 55036
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1921908855438232,
      "learning_rate": 3.753428254818029e-06,
      "loss": 2.3192,
      "step": 55037
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.015393614768982,
      "learning_rate": 3.7531067352448824e-06,
      "loss": 2.2289,
      "step": 55038
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.9802008271217346,
      "learning_rate": 3.7527852262619147e-06,
      "loss": 2.1024,
      "step": 55039
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0834760665893555,
      "learning_rate": 3.7524637278696753e-06,
      "loss": 2.2263,
      "step": 55040
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1297626495361328,
      "learning_rate": 3.7521422400687024e-06,
      "loss": 2.4194,
      "step": 55041
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.23626708984375,
      "learning_rate": 3.7518207628595503e-06,
      "loss": 2.369,
      "step": 55042
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1066266298294067,
      "learning_rate": 3.751499296242754e-06,
      "loss": 2.1856,
      "step": 55043
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0239887237548828,
      "learning_rate": 3.751177840218868e-06,
      "loss": 2.37,
      "step": 55044
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1969852447509766,
      "learning_rate": 3.7508563947884326e-06,
      "loss": 2.3176,
      "step": 55045
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.3051466941833496,
      "learning_rate": 3.750534959951989e-06,
      "loss": 2.3123,
      "step": 55046
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0777775049209595,
      "learning_rate": 3.75021353571009e-06,
      "loss": 2.4017,
      "step": 55047
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.2310237884521484,
      "learning_rate": 3.7498921220632766e-06,
      "loss": 2.3542,
      "step": 55048
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0059846639633179,
      "learning_rate": 3.749570719012089e-06,
      "loss": 2.3601,
      "step": 55049
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0799750089645386,
      "learning_rate": 3.7492493265570817e-06,
      "loss": 2.3535,
      "step": 55050
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0030014514923096,
      "learning_rate": 3.7489279446987924e-06,
      "loss": 2.0952,
      "step": 55051
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.128648042678833,
      "learning_rate": 3.7486065734377665e-06,
      "loss": 2.2103,
      "step": 55052
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1229076385498047,
      "learning_rate": 3.7482852127745527e-06,
      "loss": 2.2785,
      "step": 55053
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.9690548777580261,
      "learning_rate": 3.7479638627096903e-06,
      "loss": 2.2019,
      "step": 55054
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0897059440612793,
      "learning_rate": 3.747642523243731e-06,
      "loss": 2.2404,
      "step": 55055
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0448296070098877,
      "learning_rate": 3.7473211943772114e-06,
      "loss": 2.3698,
      "step": 55056
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1041505336761475,
      "learning_rate": 3.746999876110684e-06,
      "loss": 2.1454,
      "step": 55057
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0650190114974976,
      "learning_rate": 3.746678568444687e-06,
      "loss": 2.1564,
      "step": 55058
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1070539951324463,
      "learning_rate": 3.7463572713797722e-06,
      "loss": 2.3231,
      "step": 55059
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.014183759689331,
      "learning_rate": 3.7460359849164764e-06,
      "loss": 2.3145,
      "step": 55060
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.139947533607483,
      "learning_rate": 3.7457147090553547e-06,
      "loss": 2.4219,
      "step": 55061
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0603946447372437,
      "learning_rate": 3.7453934437969388e-06,
      "loss": 2.2885,
      "step": 55062
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0255838632583618,
      "learning_rate": 3.745072189141783e-06,
      "loss": 2.1592,
      "step": 55063
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0910383462905884,
      "learning_rate": 3.744750945090425e-06,
      "loss": 2.2847,
      "step": 55064
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.103562593460083,
      "learning_rate": 3.744429711643417e-06,
      "loss": 2.179,
      "step": 55065
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1505204439163208,
      "learning_rate": 3.744108488801296e-06,
      "loss": 2.4434,
      "step": 55066
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.2141810655593872,
      "learning_rate": 3.7437872765646144e-06,
      "loss": 2.2822,
      "step": 55067
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.041232943534851,
      "learning_rate": 3.743466074933908e-06,
      "loss": 2.2739,
      "step": 55068
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.185849666595459,
      "learning_rate": 3.7431448839097296e-06,
      "loss": 2.3532,
      "step": 55069
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0657917261123657,
      "learning_rate": 3.7428237034926162e-06,
      "loss": 2.3475,
      "step": 55070
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0347864627838135,
      "learning_rate": 3.74250253368312e-06,
      "loss": 2.4319,
      "step": 55071
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1280529499053955,
      "learning_rate": 3.7421813744817812e-06,
      "loss": 2.3278,
      "step": 55072
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.157894492149353,
      "learning_rate": 3.7418602258891412e-06,
      "loss": 2.2551,
      "step": 55073
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.173535704612732,
      "learning_rate": 3.741539087905751e-06,
      "loss": 2.4562,
      "step": 55074
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0795046091079712,
      "learning_rate": 3.741217960532152e-06,
      "loss": 2.5064,
      "step": 55075
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.095055103302002,
      "learning_rate": 3.7408968437688854e-06,
      "loss": 2.2586,
      "step": 55076
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.065666913986206,
      "learning_rate": 3.740575737616502e-06,
      "loss": 2.283,
      "step": 55077
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0728323459625244,
      "learning_rate": 3.7402546420755426e-06,
      "loss": 2.2625,
      "step": 55078
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0746922492980957,
      "learning_rate": 3.7399335571465478e-06,
      "loss": 2.4957,
      "step": 55079
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0616276264190674,
      "learning_rate": 3.7396124828300693e-06,
      "loss": 2.2455,
      "step": 55080
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0492407083511353,
      "learning_rate": 3.739291419126645e-06,
      "loss": 2.433,
      "step": 55081
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0791243314743042,
      "learning_rate": 3.7389703660368247e-06,
      "loss": 2.2682,
      "step": 55082
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.281524896621704,
      "learning_rate": 3.7386493235611474e-06,
      "loss": 2.2564,
      "step": 55083
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.2020981311798096,
      "learning_rate": 3.738328291700164e-06,
      "loss": 2.3463,
      "step": 55084
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1158220767974854,
      "learning_rate": 3.7380072704544102e-06,
      "loss": 2.601,
      "step": 55085
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.154383659362793,
      "learning_rate": 3.737686259824439e-06,
      "loss": 2.2245,
      "step": 55086
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.132810115814209,
      "learning_rate": 3.73736525981079e-06,
      "loss": 2.4852,
      "step": 55087
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.99578857421875,
      "learning_rate": 3.7370442704140088e-06,
      "loss": 2.3863,
      "step": 55088
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.170385479927063,
      "learning_rate": 3.736723291634634e-06,
      "loss": 2.1477,
      "step": 55089
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.103513479232788,
      "learning_rate": 3.7364023234732182e-06,
      "loss": 2.4035,
      "step": 55090
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1172451972961426,
      "learning_rate": 3.7360813659302986e-06,
      "loss": 2.2565,
      "step": 55091
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.12589693069458,
      "learning_rate": 3.7357604190064243e-06,
      "loss": 2.445,
      "step": 55092
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.097204566001892,
      "learning_rate": 3.735439482702136e-06,
      "loss": 2.2268,
      "step": 55093
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0931527614593506,
      "learning_rate": 3.7351185570179816e-06,
      "loss": 2.3676,
      "step": 55094
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0396873950958252,
      "learning_rate": 3.7347976419544986e-06,
      "loss": 2.2522,
      "step": 55095
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0600314140319824,
      "learning_rate": 3.734476737512239e-06,
      "loss": 2.1443,
      "step": 55096
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0595118999481201,
      "learning_rate": 3.7341558436917402e-06,
      "loss": 2.3267,
      "step": 55097
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.2807350158691406,
      "learning_rate": 3.7338349604935533e-06,
      "loss": 2.1303,
      "step": 55098
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1622042655944824,
      "learning_rate": 3.733514087918213e-06,
      "loss": 2.2022,
      "step": 55099
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.2194876670837402,
      "learning_rate": 3.7331932259662753e-06,
      "loss": 2.386,
      "step": 55100
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1218740940093994,
      "learning_rate": 3.7328723746382713e-06,
      "loss": 2.4929,
      "step": 55101
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.106613039970398,
      "learning_rate": 3.732551533934753e-06,
      "loss": 2.3266,
      "step": 55102
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.9814189076423645,
      "learning_rate": 3.732230703856259e-06,
      "loss": 1.9723,
      "step": 55103
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1562151908874512,
      "learning_rate": 3.73190988440334e-06,
      "loss": 2.3728,
      "step": 55104
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0693646669387817,
      "learning_rate": 3.731589075576533e-06,
      "loss": 2.4161,
      "step": 55105
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.2257813215255737,
      "learning_rate": 3.7312682773763873e-06,
      "loss": 2.5639,
      "step": 55106
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.222396731376648,
      "learning_rate": 3.730947489803445e-06,
      "loss": 2.4989,
      "step": 55107
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0980783700942993,
      "learning_rate": 3.7306267128582462e-06,
      "loss": 2.0915,
      "step": 55108
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0815660953521729,
      "learning_rate": 3.7303059465413416e-06,
      "loss": 2.2636,
      "step": 55109
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0865709781646729,
      "learning_rate": 3.729985190853267e-06,
      "loss": 2.3338,
      "step": 55110
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1641632318496704,
      "learning_rate": 3.7296644457945742e-06,
      "loss": 2.323,
      "step": 55111
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0422245264053345,
      "learning_rate": 3.7293437113657995e-06,
      "loss": 2.2538,
      "step": 55112
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0839462280273438,
      "learning_rate": 3.7290229875674975e-06,
      "loss": 2.4287,
      "step": 55113
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.055586338043213,
      "learning_rate": 3.7287022744001976e-06,
      "loss": 2.3993,
      "step": 55114
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.05341374874115,
      "learning_rate": 3.7283815718644546e-06,
      "loss": 2.2602,
      "step": 55115
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0579322576522827,
      "learning_rate": 3.728060879960803e-06,
      "loss": 2.4247,
      "step": 55116
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0164345502853394,
      "learning_rate": 3.7277401986897963e-06,
      "loss": 2.2853,
      "step": 55117
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.139401912689209,
      "learning_rate": 3.7274195280519708e-06,
      "loss": 2.1446,
      "step": 55118
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1047784090042114,
      "learning_rate": 3.727098868047876e-06,
      "loss": 2.3449,
      "step": 55119
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.132606863975525,
      "learning_rate": 3.7267782186780478e-06,
      "loss": 2.4717,
      "step": 55120
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0661797523498535,
      "learning_rate": 3.7264575799430392e-06,
      "loss": 2.3179,
      "step": 55121
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.006080985069275,
      "learning_rate": 3.7261369518433844e-06,
      "loss": 2.4044,
      "step": 55122
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.2687163352966309,
      "learning_rate": 3.725816334379635e-06,
      "loss": 2.3868,
      "step": 55123
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.9676475524902344,
      "learning_rate": 3.7254957275523273e-06,
      "loss": 2.1701,
      "step": 55124
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0860499143600464,
      "learning_rate": 3.725175131362012e-06,
      "loss": 2.2547,
      "step": 55125
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0985310077667236,
      "learning_rate": 3.72485454580923e-06,
      "loss": 2.3257,
      "step": 55126
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.113387942314148,
      "learning_rate": 3.7245339708945226e-06,
      "loss": 2.0497,
      "step": 55127
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0837945938110352,
      "learning_rate": 3.7242134066184323e-06,
      "loss": 2.4557,
      "step": 55128
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0503298044204712,
      "learning_rate": 3.7238928529815066e-06,
      "loss": 2.6611,
      "step": 55129
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.9860801100730896,
      "learning_rate": 3.7235723099842845e-06,
      "loss": 2.4012,
      "step": 55130
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.029868245124817,
      "learning_rate": 3.723251777627316e-06,
      "loss": 2.2524,
      "step": 55131
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0745353698730469,
      "learning_rate": 3.7229312559111363e-06,
      "loss": 2.2345,
      "step": 55132
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.023701786994934,
      "learning_rate": 3.722610744836297e-06,
      "loss": 2.0092,
      "step": 55133
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0051484107971191,
      "learning_rate": 3.7222902444033373e-06,
      "loss": 2.5055,
      "step": 55134
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1223088502883911,
      "learning_rate": 3.7219697546127964e-06,
      "loss": 2.2691,
      "step": 55135
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.2728742361068726,
      "learning_rate": 3.7216492754652254e-06,
      "loss": 2.3617,
      "step": 55136
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1652097702026367,
      "learning_rate": 3.721328806961161e-06,
      "loss": 2.3311,
      "step": 55137
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.084802269935608,
      "learning_rate": 3.721008349101153e-06,
      "loss": 2.3063,
      "step": 55138
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0009362697601318,
      "learning_rate": 3.720687901885738e-06,
      "loss": 2.4136,
      "step": 55139
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.9944694638252258,
      "learning_rate": 3.720367465315465e-06,
      "loss": 2.3448,
      "step": 55140
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.11357843875885,
      "learning_rate": 3.720047039390876e-06,
      "loss": 2.3775,
      "step": 55141
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0684301853179932,
      "learning_rate": 3.719726624112512e-06,
      "loss": 2.2866,
      "step": 55142
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0860852003097534,
      "learning_rate": 3.7194062194809145e-06,
      "loss": 2.5007,
      "step": 55143
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0571707487106323,
      "learning_rate": 3.719085825496631e-06,
      "loss": 2.2026,
      "step": 55144
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1055935621261597,
      "learning_rate": 3.7187654421602003e-06,
      "loss": 2.1909,
      "step": 55145
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1483246088027954,
      "learning_rate": 3.718445069472172e-06,
      "loss": 2.2959,
      "step": 55146
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0921396017074585,
      "learning_rate": 3.7181247074330816e-06,
      "loss": 2.2865,
      "step": 55147
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0629740953445435,
      "learning_rate": 3.7178043560434794e-06,
      "loss": 2.2416,
      "step": 55148
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.9904320240020752,
      "learning_rate": 3.7174840153039016e-06,
      "loss": 2.2135,
      "step": 55149
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1203534603118896,
      "learning_rate": 3.7171636852148987e-06,
      "loss": 2.4046,
      "step": 55150
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.3526296615600586,
      "learning_rate": 3.716843365777005e-06,
      "loss": 2.3678,
      "step": 55151
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.085280418395996,
      "learning_rate": 3.716523056990773e-06,
      "loss": 2.2742,
      "step": 55152
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0910438299179077,
      "learning_rate": 3.7162027588567405e-06,
      "loss": 2.3397,
      "step": 55153
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.3402832746505737,
      "learning_rate": 3.7158824713754515e-06,
      "loss": 2.3738,
      "step": 55154
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.9582197070121765,
      "learning_rate": 3.715562194547444e-06,
      "loss": 2.3791,
      "step": 55155
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0365605354309082,
      "learning_rate": 3.7152419283732697e-06,
      "loss": 2.3913,
      "step": 55156
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.2423651218414307,
      "learning_rate": 3.7149216728534632e-06,
      "loss": 2.2229,
      "step": 55157
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.2885886430740356,
      "learning_rate": 3.7146014279885757e-06,
      "loss": 2.4059,
      "step": 55158
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0782966613769531,
      "learning_rate": 3.714281193779141e-06,
      "loss": 2.4248,
      "step": 55159
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1461092233657837,
      "learning_rate": 3.713960970225712e-06,
      "loss": 2.4117,
      "step": 55160
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1337534189224243,
      "learning_rate": 3.7136407573288214e-06,
      "loss": 2.4179,
      "step": 55161
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1688388586044312,
      "learning_rate": 3.7133205550890217e-06,
      "loss": 2.3418,
      "step": 55162
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0866029262542725,
      "learning_rate": 3.71300036350685e-06,
      "loss": 2.4382,
      "step": 55163
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.9585466980934143,
      "learning_rate": 3.7126801825828484e-06,
      "loss": 2.5201,
      "step": 55164
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.4030287265777588,
      "learning_rate": 3.712360012317564e-06,
      "loss": 2.3121,
      "step": 55165
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.116849422454834,
      "learning_rate": 3.7120398527115364e-06,
      "loss": 2.4711,
      "step": 55166
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.130810260772705,
      "learning_rate": 3.7117197037653065e-06,
      "loss": 2.3658,
      "step": 55167
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0575085878372192,
      "learning_rate": 3.7113995654794234e-06,
      "loss": 2.3202,
      "step": 55168
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.013145923614502,
      "learning_rate": 3.7110794378544248e-06,
      "loss": 2.283,
      "step": 55169
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0434108972549438,
      "learning_rate": 3.710759320890852e-06,
      "loss": 2.251,
      "step": 55170
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0047855377197266,
      "learning_rate": 3.710439214589253e-06,
      "loss": 2.4524,
      "step": 55171
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.083633303642273,
      "learning_rate": 3.710119118950165e-06,
      "loss": 2.3736,
      "step": 55172
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0593844652175903,
      "learning_rate": 3.7097990339741364e-06,
      "loss": 2.3452,
      "step": 55173
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.017226219177246,
      "learning_rate": 3.709478959661704e-06,
      "loss": 2.2256,
      "step": 55174
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0874459743499756,
      "learning_rate": 3.709158896013416e-06,
      "loss": 2.2358,
      "step": 55175
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0672403573989868,
      "learning_rate": 3.7088388430298096e-06,
      "loss": 2.2606,
      "step": 55176
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0744318962097168,
      "learning_rate": 3.7085188007114325e-06,
      "loss": 2.5121,
      "step": 55177
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.063786506652832,
      "learning_rate": 3.7081987690588218e-06,
      "loss": 2.275,
      "step": 55178
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1157355308532715,
      "learning_rate": 3.70787874807253e-06,
      "loss": 2.318,
      "step": 55179
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.9937752485275269,
      "learning_rate": 3.707558737753085e-06,
      "loss": 2.164,
      "step": 55180
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0945444107055664,
      "learning_rate": 3.707238738101042e-06,
      "loss": 2.3929,
      "step": 55181
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0230337381362915,
      "learning_rate": 3.7069187491169335e-06,
      "loss": 2.3637,
      "step": 55182
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1535884141921997,
      "learning_rate": 3.7065987708013107e-06,
      "loss": 2.2376,
      "step": 55183
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.3155648708343506,
      "learning_rate": 3.706278803154709e-06,
      "loss": 2.2429,
      "step": 55184
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.053916573524475,
      "learning_rate": 3.7059588461776776e-06,
      "loss": 2.618,
      "step": 55185
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0620280504226685,
      "learning_rate": 3.7056388998707515e-06,
      "loss": 2.2238,
      "step": 55186
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.156292200088501,
      "learning_rate": 3.7053189642344813e-06,
      "loss": 2.3725,
      "step": 55187
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1824595928192139,
      "learning_rate": 3.7049990392694015e-06,
      "loss": 2.3114,
      "step": 55188
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1671069860458374,
      "learning_rate": 3.7046791249760606e-06,
      "loss": 2.4731,
      "step": 55189
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0035593509674072,
      "learning_rate": 3.7043592213549985e-06,
      "loss": 2.3788,
      "step": 55190
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1490569114685059,
      "learning_rate": 3.7040393284067545e-06,
      "loss": 2.29,
      "step": 55191
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1318681240081787,
      "learning_rate": 3.703719446131877e-06,
      "loss": 2.2622,
      "step": 55192
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0016865730285645,
      "learning_rate": 3.703399574530905e-06,
      "loss": 2.2793,
      "step": 55193
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0910732746124268,
      "learning_rate": 3.7030797136043785e-06,
      "loss": 2.4546,
      "step": 55194
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.2632224559783936,
      "learning_rate": 3.702759863352844e-06,
      "loss": 2.129,
      "step": 55195
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0728785991668701,
      "learning_rate": 3.7024400237768422e-06,
      "loss": 2.3317,
      "step": 55196
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.158297061920166,
      "learning_rate": 3.7021201948769115e-06,
      "loss": 2.3269,
      "step": 55197
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0925501585006714,
      "learning_rate": 3.7018003766536015e-06,
      "loss": 2.296,
      "step": 55198
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.260667324066162,
      "learning_rate": 3.7014805691074463e-06,
      "loss": 2.2693,
      "step": 55199
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1036986112594604,
      "learning_rate": 3.7011607722389965e-06,
      "loss": 2.3332,
      "step": 55200
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0742661952972412,
      "learning_rate": 3.700840986048786e-06,
      "loss": 2.193,
      "step": 55201
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1490306854248047,
      "learning_rate": 3.7005212105373645e-06,
      "loss": 2.4209,
      "step": 55202
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0798535346984863,
      "learning_rate": 3.700201445705267e-06,
      "loss": 2.5243,
      "step": 55203
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1515253782272339,
      "learning_rate": 3.6998816915530423e-06,
      "loss": 2.4297,
      "step": 55204
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1129319667816162,
      "learning_rate": 3.699561948081225e-06,
      "loss": 2.4793,
      "step": 55205
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0457991361618042,
      "learning_rate": 3.6992422152903697e-06,
      "loss": 2.2273,
      "step": 55206
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.121704339981079,
      "learning_rate": 3.698922493181002e-06,
      "loss": 2.3848,
      "step": 55207
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0308347940444946,
      "learning_rate": 3.6986027817536762e-06,
      "loss": 2.3025,
      "step": 55208
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.175778865814209,
      "learning_rate": 3.6982830810089266e-06,
      "loss": 2.2267,
      "step": 55209
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.035775065422058,
      "learning_rate": 3.697963390947302e-06,
      "loss": 2.0896,
      "step": 55210
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.16658616065979,
      "learning_rate": 3.6976437115693377e-06,
      "loss": 2.324,
      "step": 55211
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0429743528366089,
      "learning_rate": 3.697324042875583e-06,
      "loss": 2.3095,
      "step": 55212
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.060312271118164,
      "learning_rate": 3.6970043848665716e-06,
      "loss": 2.3485,
      "step": 55213
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.9820672273635864,
      "learning_rate": 3.696684737542854e-06,
      "loss": 2.3186,
      "step": 55214
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.2583751678466797,
      "learning_rate": 3.6963651009049627e-06,
      "loss": 2.273,
      "step": 55215
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1539726257324219,
      "learning_rate": 3.696045474953449e-06,
      "loss": 2.2606,
      "step": 55216
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0465552806854248,
      "learning_rate": 3.695725859688847e-06,
      "loss": 2.3052,
      "step": 55217
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1169052124023438,
      "learning_rate": 3.695406255111705e-06,
      "loss": 2.2273,
      "step": 55218
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0174723863601685,
      "learning_rate": 3.6950866612225623e-06,
      "loss": 2.2017,
      "step": 55219
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1696547269821167,
      "learning_rate": 3.6947670780219593e-06,
      "loss": 2.2694,
      "step": 55220
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.142137885093689,
      "learning_rate": 3.6944475055104356e-06,
      "loss": 2.194,
      "step": 55221
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.15045964717865,
      "learning_rate": 3.6941279436885393e-06,
      "loss": 2.4558,
      "step": 55222
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1959894895553589,
      "learning_rate": 3.6938083925568056e-06,
      "loss": 2.1707,
      "step": 55223
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0798271894454956,
      "learning_rate": 3.693488852115783e-06,
      "loss": 2.4817,
      "step": 55224
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0818933248519897,
      "learning_rate": 3.6931693223660092e-06,
      "loss": 2.4078,
      "step": 55225
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0777322053909302,
      "learning_rate": 3.692849803308023e-06,
      "loss": 2.1333,
      "step": 55226
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.9902577996253967,
      "learning_rate": 3.6925302949423726e-06,
      "loss": 2.2447,
      "step": 55227
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1093127727508545,
      "learning_rate": 3.692210797269593e-06,
      "loss": 2.3553,
      "step": 55228
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.129568099975586,
      "learning_rate": 3.6918913102902322e-06,
      "loss": 2.4378,
      "step": 55229
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.062572717666626,
      "learning_rate": 3.691571834004827e-06,
      "loss": 2.325,
      "step": 55230
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.2044864892959595,
      "learning_rate": 3.6912523684139223e-06,
      "loss": 2.4309,
      "step": 55231
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1186710596084595,
      "learning_rate": 3.6909329135180593e-06,
      "loss": 2.1337,
      "step": 55232
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.9851275086402893,
      "learning_rate": 3.6906134693177787e-06,
      "loss": 2.3909,
      "step": 55233
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1249423027038574,
      "learning_rate": 3.6902940358136185e-06,
      "loss": 2.3438,
      "step": 55234
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0421348810195923,
      "learning_rate": 3.6899746130061274e-06,
      "loss": 2.283,
      "step": 55235
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0287566184997559,
      "learning_rate": 3.6896552008958387e-06,
      "loss": 2.4259,
      "step": 55236
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0606248378753662,
      "learning_rate": 3.6893357994833025e-06,
      "loss": 2.3883,
      "step": 55237
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.144006609916687,
      "learning_rate": 3.689016408769053e-06,
      "loss": 2.2042,
      "step": 55238
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.429235816001892,
      "learning_rate": 3.688697028753637e-06,
      "loss": 2.2376,
      "step": 55239
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1522663831710815,
      "learning_rate": 3.688377659437591e-06,
      "loss": 2.2659,
      "step": 55240
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0464857816696167,
      "learning_rate": 3.688058300821462e-06,
      "loss": 2.1105,
      "step": 55241
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0451569557189941,
      "learning_rate": 3.687738952905785e-06,
      "loss": 2.0633,
      "step": 55242
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0930900573730469,
      "learning_rate": 3.687419615691109e-06,
      "loss": 2.3544,
      "step": 55243
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0907790660858154,
      "learning_rate": 3.687100289177967e-06,
      "loss": 2.5068,
      "step": 55244
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0659873485565186,
      "learning_rate": 3.6867809733669113e-06,
      "loss": 2.2341,
      "step": 55245
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.152789831161499,
      "learning_rate": 3.686461668258471e-06,
      "loss": 2.4579,
      "step": 55246
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.064145565032959,
      "learning_rate": 3.6861423738531956e-06,
      "loss": 2.2872,
      "step": 55247
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1421390771865845,
      "learning_rate": 3.6858230901516202e-06,
      "loss": 2.377,
      "step": 55248
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0035172700881958,
      "learning_rate": 3.6855038171542924e-06,
      "loss": 2.3339,
      "step": 55249
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1461268663406372,
      "learning_rate": 3.685184554861747e-06,
      "loss": 2.4219,
      "step": 55250
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.2959117889404297,
      "learning_rate": 3.684865303274534e-06,
      "loss": 2.3558,
      "step": 55251
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.031601905822754,
      "learning_rate": 3.6845460623931885e-06,
      "loss": 2.3509,
      "step": 55252
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0556761026382446,
      "learning_rate": 3.6842268322182484e-06,
      "loss": 2.2952,
      "step": 55253
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1377520561218262,
      "learning_rate": 3.6839076127502638e-06,
      "loss": 2.1295,
      "step": 55254
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1490367650985718,
      "learning_rate": 3.6835884039897675e-06,
      "loss": 2.4894,
      "step": 55255
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0394470691680908,
      "learning_rate": 3.6832692059373075e-06,
      "loss": 2.2121,
      "step": 55256
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.9896181225776672,
      "learning_rate": 3.6829500185934185e-06,
      "loss": 2.5905,
      "step": 55257
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.03779935836792,
      "learning_rate": 3.6826308419586487e-06,
      "loss": 2.3447,
      "step": 55258
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.9849638938903809,
      "learning_rate": 3.682311676033535e-06,
      "loss": 2.072,
      "step": 55259
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0645473003387451,
      "learning_rate": 3.6819925208186193e-06,
      "loss": 2.4836,
      "step": 55260
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.035650610923767,
      "learning_rate": 3.6816733763144384e-06,
      "loss": 2.3694,
      "step": 55261
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.136228322982788,
      "learning_rate": 3.681354242521541e-06,
      "loss": 2.2388,
      "step": 55262
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1043449640274048,
      "learning_rate": 3.6810351194404605e-06,
      "loss": 2.0248,
      "step": 55263
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1156284809112549,
      "learning_rate": 3.6807160070717453e-06,
      "loss": 2.3261,
      "step": 55264
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0802875757217407,
      "learning_rate": 3.6803969054159284e-06,
      "loss": 2.466,
      "step": 55265
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1393523216247559,
      "learning_rate": 3.6800778144735595e-06,
      "loss": 2.2981,
      "step": 55266
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1352922916412354,
      "learning_rate": 3.6797587342451723e-06,
      "loss": 2.2535,
      "step": 55267
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1371698379516602,
      "learning_rate": 3.6794396647313123e-06,
      "loss": 2.5392,
      "step": 55268
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0765835046768188,
      "learning_rate": 3.679120605932517e-06,
      "loss": 2.5983,
      "step": 55269
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.021540880203247,
      "learning_rate": 3.678801557849332e-06,
      "loss": 2.2258,
      "step": 55270
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.057715892791748,
      "learning_rate": 3.6784825204822948e-06,
      "loss": 2.4837,
      "step": 55271
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.9869570136070251,
      "learning_rate": 3.6781634938319467e-06,
      "loss": 2.1113,
      "step": 55272
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.4820715188980103,
      "learning_rate": 3.6778444778988244e-06,
      "loss": 2.1884,
      "step": 55273
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0422083139419556,
      "learning_rate": 3.6775254726834777e-06,
      "loss": 2.4455,
      "step": 55274
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1823357343673706,
      "learning_rate": 3.677206478186438e-06,
      "loss": 2.1263,
      "step": 55275
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.9812881350517273,
      "learning_rate": 3.6768874944082543e-06,
      "loss": 2.3189,
      "step": 55276
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0865099430084229,
      "learning_rate": 3.6765685213494597e-06,
      "loss": 2.4787,
      "step": 55277
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.087278962135315,
      "learning_rate": 3.6762495590106027e-06,
      "loss": 2.289,
      "step": 55278
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0536946058273315,
      "learning_rate": 3.6759306073922173e-06,
      "loss": 2.1937,
      "step": 55279
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0777349472045898,
      "learning_rate": 3.6756116664948495e-06,
      "loss": 2.073,
      "step": 55280
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.127280831336975,
      "learning_rate": 3.6752927363190385e-06,
      "loss": 2.0829,
      "step": 55281
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0202466249465942,
      "learning_rate": 3.6749738168653202e-06,
      "loss": 2.529,
      "step": 55282
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1296913623809814,
      "learning_rate": 3.674654908134243e-06,
      "loss": 2.1617,
      "step": 55283
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1161115169525146,
      "learning_rate": 3.6743360101263393e-06,
      "loss": 2.1663,
      "step": 55284
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0078777074813843,
      "learning_rate": 3.6740171228421582e-06,
      "loss": 2.2833,
      "step": 55285
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1132376194000244,
      "learning_rate": 3.673698246282236e-06,
      "loss": 2.3589,
      "step": 55286
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1794129610061646,
      "learning_rate": 3.673379380447114e-06,
      "loss": 2.5475,
      "step": 55287
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0886956453323364,
      "learning_rate": 3.6730605253373286e-06,
      "loss": 2.5075,
      "step": 55288
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0303630828857422,
      "learning_rate": 3.6727416809534266e-06,
      "loss": 2.3571,
      "step": 55289
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1755510568618774,
      "learning_rate": 3.6724228472959433e-06,
      "loss": 2.4001,
      "step": 55290
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0651170015335083,
      "learning_rate": 3.672104024365425e-06,
      "loss": 2.2828,
      "step": 55291
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1150791645050049,
      "learning_rate": 3.6717852121624053e-06,
      "loss": 2.148,
      "step": 55292
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0466903448104858,
      "learning_rate": 3.671466410687432e-06,
      "loss": 2.3584,
      "step": 55293
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.2797045707702637,
      "learning_rate": 3.671147619941039e-06,
      "loss": 2.2486,
      "step": 55294
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.2591496706008911,
      "learning_rate": 3.6708288399237735e-06,
      "loss": 2.4387,
      "step": 55295
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0475261211395264,
      "learning_rate": 3.6705100706361672e-06,
      "loss": 2.1534,
      "step": 55296
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0270963907241821,
      "learning_rate": 3.67019131207877e-06,
      "loss": 2.3268,
      "step": 55297
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0687739849090576,
      "learning_rate": 3.669872564252117e-06,
      "loss": 2.2729,
      "step": 55298
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.9911006689071655,
      "learning_rate": 3.66955382715675e-06,
      "loss": 2.442,
      "step": 55299
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0129270553588867,
      "learning_rate": 3.669235100793204e-06,
      "loss": 2.3129,
      "step": 55300
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.2200347185134888,
      "learning_rate": 3.668916385162028e-06,
      "loss": 2.2662,
      "step": 55301
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1676735877990723,
      "learning_rate": 3.6685976802637546e-06,
      "loss": 2.325,
      "step": 55302
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0640614032745361,
      "learning_rate": 3.668278986098931e-06,
      "loss": 2.3931,
      "step": 55303
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0731987953186035,
      "learning_rate": 3.6679603026680922e-06,
      "loss": 2.2627,
      "step": 55304
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.063618779182434,
      "learning_rate": 3.6676416299717822e-06,
      "loss": 2.3979,
      "step": 55305
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1126606464385986,
      "learning_rate": 3.667322968010536e-06,
      "loss": 2.1241,
      "step": 55306
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.102174997329712,
      "learning_rate": 3.6670043167849024e-06,
      "loss": 2.226,
      "step": 55307
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0448684692382812,
      "learning_rate": 3.666685676295415e-06,
      "loss": 2.5085,
      "step": 55308
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0847059488296509,
      "learning_rate": 3.6663670465426125e-06,
      "loss": 2.2309,
      "step": 55309
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0261467695236206,
      "learning_rate": 3.6660484275270414e-06,
      "loss": 2.1722,
      "step": 55310
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0846922397613525,
      "learning_rate": 3.6657298192492384e-06,
      "loss": 2.3392,
      "step": 55311
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1245428323745728,
      "learning_rate": 3.6654112217097404e-06,
      "loss": 2.2968,
      "step": 55312
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1000661849975586,
      "learning_rate": 3.6650926349090942e-06,
      "loss": 2.3917,
      "step": 55313
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0016666650772095,
      "learning_rate": 3.6647740588478374e-06,
      "loss": 2.2913,
      "step": 55314
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1015286445617676,
      "learning_rate": 3.6644554935265043e-06,
      "loss": 2.4669,
      "step": 55315
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.2343931198120117,
      "learning_rate": 3.664136938945644e-06,
      "loss": 2.2535,
      "step": 55316
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.9734920859336853,
      "learning_rate": 3.6638183951057892e-06,
      "loss": 2.2342,
      "step": 55317
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.2114830017089844,
      "learning_rate": 3.663499862007486e-06,
      "loss": 2.3488,
      "step": 55318
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0062922239303589,
      "learning_rate": 3.6631813396512685e-06,
      "loss": 1.9671,
      "step": 55319
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.180032730102539,
      "learning_rate": 3.662862828037683e-06,
      "loss": 2.2803,
      "step": 55320
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.2349687814712524,
      "learning_rate": 3.6625443271672624e-06,
      "loss": 2.5386,
      "step": 55321
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.056759238243103,
      "learning_rate": 3.662225837040555e-06,
      "loss": 2.2491,
      "step": 55322
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1233975887298584,
      "learning_rate": 3.661907357658091e-06,
      "loss": 2.3711,
      "step": 55323
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.114047646522522,
      "learning_rate": 3.6615888890204233e-06,
      "loss": 2.3618,
      "step": 55324
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0792405605316162,
      "learning_rate": 3.661270431128077e-06,
      "loss": 2.2518,
      "step": 55325
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1359024047851562,
      "learning_rate": 3.660951983981602e-06,
      "loss": 2.1373,
      "step": 55326
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1291332244873047,
      "learning_rate": 3.6606335475815313e-06,
      "loss": 2.4089,
      "step": 55327
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1244111061096191,
      "learning_rate": 3.660315121928413e-06,
      "loss": 2.1153,
      "step": 55328
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.57295560836792,
      "learning_rate": 3.659996707022778e-06,
      "loss": 2.3711,
      "step": 55329
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0781726837158203,
      "learning_rate": 3.659678302865175e-06,
      "loss": 2.2973,
      "step": 55330
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1208025217056274,
      "learning_rate": 3.659359909456135e-06,
      "loss": 2.3856,
      "step": 55331
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0846118927001953,
      "learning_rate": 3.6590415267962056e-06,
      "loss": 2.2096,
      "step": 55332
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0868281126022339,
      "learning_rate": 3.658723154885919e-06,
      "loss": 2.5446,
      "step": 55333
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.2067515850067139,
      "learning_rate": 3.6584047937258237e-06,
      "loss": 2.407,
      "step": 55334
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1263954639434814,
      "learning_rate": 3.6580864433164496e-06,
      "loss": 2.5435,
      "step": 55335
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0431932210922241,
      "learning_rate": 3.6577681036583466e-06,
      "loss": 2.4849,
      "step": 55336
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0426489114761353,
      "learning_rate": 3.6574497747520478e-06,
      "loss": 2.3862,
      "step": 55337
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.022070050239563,
      "learning_rate": 3.6571314565980953e-06,
      "loss": 1.9279,
      "step": 55338
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1166194677352905,
      "learning_rate": 3.6568131491970237e-06,
      "loss": 2.2623,
      "step": 55339
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0587794780731201,
      "learning_rate": 3.6564948525493792e-06,
      "loss": 2.3105,
      "step": 55340
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1313036680221558,
      "learning_rate": 3.656176566655697e-06,
      "loss": 2.3926,
      "step": 55341
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.3520374298095703,
      "learning_rate": 3.6558582915165207e-06,
      "loss": 2.2461,
      "step": 55342
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.9799038767814636,
      "learning_rate": 3.6555400271323883e-06,
      "loss": 2.1838,
      "step": 55343
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0357730388641357,
      "learning_rate": 3.6552217735038354e-06,
      "loss": 2.2473,
      "step": 55344
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0964826345443726,
      "learning_rate": 3.6549035306314074e-06,
      "loss": 2.3036,
      "step": 55345
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0422911643981934,
      "learning_rate": 3.6545852985156385e-06,
      "loss": 2.3892,
      "step": 55346
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1030124425888062,
      "learning_rate": 3.654267077157073e-06,
      "loss": 2.3204,
      "step": 55347
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1022841930389404,
      "learning_rate": 3.653948866556246e-06,
      "loss": 2.4821,
      "step": 55348
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.2618681192398071,
      "learning_rate": 3.6536306667137033e-06,
      "loss": 2.4091,
      "step": 55349
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.083901286125183,
      "learning_rate": 3.65331247762998e-06,
      "loss": 2.2723,
      "step": 55350
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.122854232788086,
      "learning_rate": 3.6529942993056143e-06,
      "loss": 2.3397,
      "step": 55351
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1366759538650513,
      "learning_rate": 3.652676131741145e-06,
      "loss": 2.4242,
      "step": 55352
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1555546522140503,
      "learning_rate": 3.652357974937117e-06,
      "loss": 2.2686,
      "step": 55353
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0745865106582642,
      "learning_rate": 3.6520398288940627e-06,
      "loss": 2.3101,
      "step": 55354
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.140377163887024,
      "learning_rate": 3.6517216936125276e-06,
      "loss": 2.2086,
      "step": 55355
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1660699844360352,
      "learning_rate": 3.651403569093046e-06,
      "loss": 2.4264,
      "step": 55356
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.058234691619873,
      "learning_rate": 3.651085455336163e-06,
      "loss": 2.1223,
      "step": 55357
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1320048570632935,
      "learning_rate": 3.65076735234241e-06,
      "loss": 2.4414,
      "step": 55358
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.2623871564865112,
      "learning_rate": 3.6504492601123354e-06,
      "loss": 2.3409,
      "step": 55359
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.182181477546692,
      "learning_rate": 3.6501311786464698e-06,
      "loss": 2.4215,
      "step": 55360
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.121972918510437,
      "learning_rate": 3.649813107945359e-06,
      "loss": 2.3775,
      "step": 55361
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0325223207473755,
      "learning_rate": 3.6494950480095382e-06,
      "loss": 2.3052,
      "step": 55362
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0231046676635742,
      "learning_rate": 3.6491769988395498e-06,
      "loss": 2.3191,
      "step": 55363
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0003705024719238,
      "learning_rate": 3.6488589604359324e-06,
      "loss": 2.1496,
      "step": 55364
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.9803115725517273,
      "learning_rate": 3.648540932799224e-06,
      "loss": 2.1153,
      "step": 55365
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1557927131652832,
      "learning_rate": 3.6482229159299586e-06,
      "loss": 2.2796,
      "step": 55366
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0137073993682861,
      "learning_rate": 3.6479049098286855e-06,
      "loss": 2.3134,
      "step": 55367
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1595512628555298,
      "learning_rate": 3.6475869144959355e-06,
      "loss": 2.3195,
      "step": 55368
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0654524564743042,
      "learning_rate": 3.647268929932254e-06,
      "loss": 2.3823,
      "step": 55369
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0583938360214233,
      "learning_rate": 3.6469509561381764e-06,
      "loss": 2.3502,
      "step": 55370
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.054295539855957,
      "learning_rate": 3.64663299311424e-06,
      "loss": 2.4081,
      "step": 55371
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.9984822273254395,
      "learning_rate": 3.646315040860989e-06,
      "loss": 2.297,
      "step": 55372
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0117570161819458,
      "learning_rate": 3.6459970993789563e-06,
      "loss": 2.3359,
      "step": 55373
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.9604471921920776,
      "learning_rate": 3.6456791686686875e-06,
      "loss": 2.2271,
      "step": 55374
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.9736093878746033,
      "learning_rate": 3.6453612487307153e-06,
      "loss": 2.1649,
      "step": 55375
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0774208307266235,
      "learning_rate": 3.6450433395655847e-06,
      "loss": 2.285,
      "step": 55376
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.133167028427124,
      "learning_rate": 3.6447254411738318e-06,
      "loss": 2.5104,
      "step": 55377
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1223361492156982,
      "learning_rate": 3.644407553555996e-06,
      "loss": 2.4089,
      "step": 55378
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5058597326278687,
      "learning_rate": 3.644089676712611e-06,
      "loss": 2.4973,
      "step": 55379
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0517276525497437,
      "learning_rate": 3.6437718106442242e-06,
      "loss": 2.5806,
      "step": 55380
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.2224079370498657,
      "learning_rate": 3.643453955351367e-06,
      "loss": 2.4618,
      "step": 55381
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1715590953826904,
      "learning_rate": 3.6431361108345865e-06,
      "loss": 2.3316,
      "step": 55382
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.163432002067566,
      "learning_rate": 3.642818277094412e-06,
      "loss": 2.2494,
      "step": 55383
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0768365859985352,
      "learning_rate": 3.64250045413139e-06,
      "loss": 2.375,
      "step": 55384
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0625991821289062,
      "learning_rate": 3.6421826419460537e-06,
      "loss": 2.3656,
      "step": 55385
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0894625186920166,
      "learning_rate": 3.6418648405389478e-06,
      "loss": 2.326,
      "step": 55386
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.125206708908081,
      "learning_rate": 3.6415470499106054e-06,
      "loss": 2.2389,
      "step": 55387
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0272973775863647,
      "learning_rate": 3.6412292700615705e-06,
      "loss": 2.1335,
      "step": 55388
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.9922971129417419,
      "learning_rate": 3.6409115009923744e-06,
      "loss": 2.5166,
      "step": 55389
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0670626163482666,
      "learning_rate": 3.6405937427035687e-06,
      "loss": 2.2265,
      "step": 55390
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1046591997146606,
      "learning_rate": 3.640275995195677e-06,
      "loss": 2.1032,
      "step": 55391
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.2921535968780518,
      "learning_rate": 3.6399582584692472e-06,
      "loss": 2.2311,
      "step": 55392
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0561937093734741,
      "learning_rate": 3.639640532524812e-06,
      "loss": 2.5381,
      "step": 55393
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1475857496261597,
      "learning_rate": 3.639322817362918e-06,
      "loss": 2.213,
      "step": 55394
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.9825887680053711,
      "learning_rate": 3.6390051129840953e-06,
      "loss": 2.4874,
      "step": 55395
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0934734344482422,
      "learning_rate": 3.63868741938889e-06,
      "loss": 2.5088,
      "step": 55396
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.9615432024002075,
      "learning_rate": 3.638369736577835e-06,
      "loss": 2.2704,
      "step": 55397
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.018523097038269,
      "learning_rate": 3.6380520645514728e-06,
      "loss": 2.3552,
      "step": 55398
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.2065006494522095,
      "learning_rate": 3.6377344033103413e-06,
      "loss": 2.2939,
      "step": 55399
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.9969130158424377,
      "learning_rate": 3.637416752854974e-06,
      "loss": 2.2637,
      "step": 55400
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.2723990678787231,
      "learning_rate": 3.637099113185917e-06,
      "loss": 2.3139,
      "step": 55401
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.9820971488952637,
      "learning_rate": 3.636781484303701e-06,
      "loss": 2.4958,
      "step": 55402
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.3503512144088745,
      "learning_rate": 3.6364638662088726e-06,
      "loss": 2.4623,
      "step": 55403
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0789378881454468,
      "learning_rate": 3.636146258901967e-06,
      "loss": 2.2996,
      "step": 55404
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1180013418197632,
      "learning_rate": 3.6358286623835204e-06,
      "loss": 2.0911,
      "step": 55405
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.9442142248153687,
      "learning_rate": 3.63551107665407e-06,
      "loss": 2.5609,
      "step": 55406
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0015029907226562,
      "learning_rate": 3.6351935017141614e-06,
      "loss": 2.3042,
      "step": 55407
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0901281833648682,
      "learning_rate": 3.634875937564324e-06,
      "loss": 2.4761,
      "step": 55408
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0221456289291382,
      "learning_rate": 3.6345583842051037e-06,
      "loss": 2.5968,
      "step": 55409
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0063211917877197,
      "learning_rate": 3.6342408416370333e-06,
      "loss": 2.2783,
      "step": 55410
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1012351512908936,
      "learning_rate": 3.6339233098606564e-06,
      "loss": 2.3945,
      "step": 55411
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1112581491470337,
      "learning_rate": 3.633605788876505e-06,
      "loss": 2.4722,
      "step": 55412
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0949896574020386,
      "learning_rate": 3.6332882786851243e-06,
      "loss": 2.3848,
      "step": 55413
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1244255304336548,
      "learning_rate": 3.632970779287046e-06,
      "loss": 2.2078,
      "step": 55414
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.2656702995300293,
      "learning_rate": 3.632653290682815e-06,
      "loss": 2.0396,
      "step": 55415
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1251899003982544,
      "learning_rate": 3.632335812872966e-06,
      "loss": 2.4481,
      "step": 55416
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1313568353652954,
      "learning_rate": 3.6320183458580372e-06,
      "loss": 2.5703,
      "step": 55417
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1676477193832397,
      "learning_rate": 3.631700889638563e-06,
      "loss": 2.4351,
      "step": 55418
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.021136999130249,
      "learning_rate": 3.6313834442150885e-06,
      "loss": 2.3531,
      "step": 55419
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0077393054962158,
      "learning_rate": 3.6310660095881467e-06,
      "loss": 2.2864,
      "step": 55420
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0116150379180908,
      "learning_rate": 3.63074858575828e-06,
      "loss": 2.2915,
      "step": 55421
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1379073858261108,
      "learning_rate": 3.630431172726021e-06,
      "loss": 2.3598,
      "step": 55422
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.020119071006775,
      "learning_rate": 3.630113770491915e-06,
      "loss": 2.2448,
      "step": 55423
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0551422834396362,
      "learning_rate": 3.629796379056493e-06,
      "loss": 2.1133,
      "step": 55424
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0306531190872192,
      "learning_rate": 3.6294789984202994e-06,
      "loss": 2.3864,
      "step": 55425
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.121463418006897,
      "learning_rate": 3.6291616285838683e-06,
      "loss": 2.3684,
      "step": 55426
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1677515506744385,
      "learning_rate": 3.6288442695477367e-06,
      "loss": 2.2213,
      "step": 55427
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1105018854141235,
      "learning_rate": 3.6285269213124465e-06,
      "loss": 2.394,
      "step": 55428
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1179958581924438,
      "learning_rate": 3.6282095838785303e-06,
      "loss": 2.4812,
      "step": 55429
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.160925030708313,
      "learning_rate": 3.627892257246534e-06,
      "loss": 2.3981,
      "step": 55430
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0719459056854248,
      "learning_rate": 3.6275749414169904e-06,
      "loss": 2.3493,
      "step": 55431
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.2109988927841187,
      "learning_rate": 3.627257636390433e-06,
      "loss": 2.2633,
      "step": 55432
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0502820014953613,
      "learning_rate": 3.6269403421674097e-06,
      "loss": 2.1944,
      "step": 55433
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.212802767753601,
      "learning_rate": 3.626623058748454e-06,
      "loss": 2.0379,
      "step": 55434
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0010343790054321,
      "learning_rate": 3.626305786134099e-06,
      "loss": 2.3384,
      "step": 55435
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0846611261367798,
      "learning_rate": 3.6259885243248905e-06,
      "loss": 2.1984,
      "step": 55436
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0983275175094604,
      "learning_rate": 3.6256712733213585e-06,
      "loss": 2.2091,
      "step": 55437
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.2506988048553467,
      "learning_rate": 3.625354033124049e-06,
      "loss": 2.5107,
      "step": 55438
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1529223918914795,
      "learning_rate": 3.6250368037334914e-06,
      "loss": 2.3039,
      "step": 55439
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.3289395570755005,
      "learning_rate": 3.6247195851502327e-06,
      "loss": 2.3642,
      "step": 55440
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.180627703666687,
      "learning_rate": 3.6244023773748015e-06,
      "loss": 2.5382,
      "step": 55441
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.046365737915039,
      "learning_rate": 3.624085180407743e-06,
      "loss": 2.4117,
      "step": 55442
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0942221879959106,
      "learning_rate": 3.623767994249592e-06,
      "loss": 2.2381,
      "step": 55443
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.2420680522918701,
      "learning_rate": 3.6234508189008867e-06,
      "loss": 2.1906,
      "step": 55444
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.069441556930542,
      "learning_rate": 3.62313365436216e-06,
      "loss": 2.233,
      "step": 55445
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.041060447692871,
      "learning_rate": 3.6228165006339576e-06,
      "loss": 2.4606,
      "step": 55446
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.056111454963684,
      "learning_rate": 3.622499357716809e-06,
      "loss": 2.2035,
      "step": 55447
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1062676906585693,
      "learning_rate": 3.6221822256112605e-06,
      "loss": 2.3253,
      "step": 55448
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0812690258026123,
      "learning_rate": 3.621865104317841e-06,
      "loss": 2.1675,
      "step": 55449
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1347500085830688,
      "learning_rate": 3.6215479938370967e-06,
      "loss": 2.2921,
      "step": 55450
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1982855796813965,
      "learning_rate": 3.621230894169557e-06,
      "loss": 2.2463,
      "step": 55451
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.3461605310440063,
      "learning_rate": 3.6209138053157666e-06,
      "loss": 2.3257,
      "step": 55452
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.9816858172416687,
      "learning_rate": 3.6205967272762566e-06,
      "loss": 2.163,
      "step": 55453
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1696072816848755,
      "learning_rate": 3.6202796600515722e-06,
      "loss": 2.4518,
      "step": 55454
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0496691465377808,
      "learning_rate": 3.6199626036422454e-06,
      "loss": 2.4257,
      "step": 55455
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1797608137130737,
      "learning_rate": 3.6196455580488144e-06,
      "loss": 2.3782,
      "step": 55456
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0698437690734863,
      "learning_rate": 3.6193285232718146e-06,
      "loss": 2.2391,
      "step": 55457
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0524736642837524,
      "learning_rate": 3.6190114993117885e-06,
      "loss": 2.4473,
      "step": 55458
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0841978788375854,
      "learning_rate": 3.6186944861692684e-06,
      "loss": 2.3022,
      "step": 55459
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.2191243171691895,
      "learning_rate": 3.6183774838447973e-06,
      "loss": 2.3302,
      "step": 55460
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0930113792419434,
      "learning_rate": 3.6180604923389097e-06,
      "loss": 2.3171,
      "step": 55461
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1308000087738037,
      "learning_rate": 3.617743511652139e-06,
      "loss": 2.612,
      "step": 55462
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0501219034194946,
      "learning_rate": 3.617426541785031e-06,
      "loss": 2.341,
      "step": 55463
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0107083320617676,
      "learning_rate": 3.6171095827381133e-06,
      "loss": 2.2843,
      "step": 55464
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1013619899749756,
      "learning_rate": 3.616792634511933e-06,
      "loss": 2.231,
      "step": 55465
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0753905773162842,
      "learning_rate": 3.616475697107019e-06,
      "loss": 2.265,
      "step": 55466
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0687224864959717,
      "learning_rate": 3.6161587705239165e-06,
      "loss": 2.335,
      "step": 55467
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0343047380447388,
      "learning_rate": 3.615841854763155e-06,
      "loss": 2.064,
      "step": 55468
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0279200077056885,
      "learning_rate": 3.615524949825282e-06,
      "loss": 2.1659,
      "step": 55469
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.2313252687454224,
      "learning_rate": 3.6152080557108214e-06,
      "loss": 2.4017,
      "step": 55470
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0176137685775757,
      "learning_rate": 3.6148911724203207e-06,
      "loss": 2.2723,
      "step": 55471
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0919471979141235,
      "learning_rate": 3.614574299954311e-06,
      "loss": 2.3379,
      "step": 55472
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0770270824432373,
      "learning_rate": 3.6142574383133343e-06,
      "loss": 2.2792,
      "step": 55473
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5000919103622437,
      "learning_rate": 3.6139405874979226e-06,
      "loss": 2.4644,
      "step": 55474
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1180293560028076,
      "learning_rate": 3.61362374750862e-06,
      "loss": 2.4406,
      "step": 55475
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1390798091888428,
      "learning_rate": 3.613306918345956e-06,
      "loss": 2.393,
      "step": 55476
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0095551013946533,
      "learning_rate": 3.612990100010474e-06,
      "loss": 2.1756,
      "step": 55477
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0246402025222778,
      "learning_rate": 3.6126732925027063e-06,
      "loss": 2.1577,
      "step": 55478
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0846558809280396,
      "learning_rate": 3.612356495823195e-06,
      "loss": 2.5396,
      "step": 55479
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.037453293800354,
      "learning_rate": 3.6120397099724703e-06,
      "loss": 2.279,
      "step": 55480
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.094001293182373,
      "learning_rate": 3.6117229349510774e-06,
      "loss": 2.3046,
      "step": 55481
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.2190989255905151,
      "learning_rate": 3.6114061707595493e-06,
      "loss": 2.208,
      "step": 55482
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.276841402053833,
      "learning_rate": 3.6110894173984224e-06,
      "loss": 2.2072,
      "step": 55483
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0231226682662964,
      "learning_rate": 3.6107726748682304e-06,
      "loss": 2.1497,
      "step": 55484
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1989001035690308,
      "learning_rate": 3.6104559431695183e-06,
      "loss": 2.3471,
      "step": 55485
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.3688045740127563,
      "learning_rate": 3.6101392223028144e-06,
      "loss": 2.4216,
      "step": 55486
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.033856987953186,
      "learning_rate": 3.6098225122686637e-06,
      "loss": 2.4192,
      "step": 55487
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1367061138153076,
      "learning_rate": 3.6095058130675997e-06,
      "loss": 2.587,
      "step": 55488
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.9811910390853882,
      "learning_rate": 3.6091891247001554e-06,
      "loss": 2.4279,
      "step": 55489
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0551341772079468,
      "learning_rate": 3.6088724471668744e-06,
      "loss": 2.3338,
      "step": 55490
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1463658809661865,
      "learning_rate": 3.608555780468288e-06,
      "loss": 2.3813,
      "step": 55491
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0197241306304932,
      "learning_rate": 3.6082391246049387e-06,
      "loss": 2.3932,
      "step": 55492
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1464930772781372,
      "learning_rate": 3.607922479577356e-06,
      "loss": 2.2454,
      "step": 55493
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0423285961151123,
      "learning_rate": 3.607605845386084e-06,
      "loss": 2.2314,
      "step": 55494
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0121431350708008,
      "learning_rate": 3.607289222031657e-06,
      "loss": 2.0857,
      "step": 55495
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.2445945739746094,
      "learning_rate": 3.6069726095146098e-06,
      "loss": 2.4185,
      "step": 55496
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.2695320844650269,
      "learning_rate": 3.606656007835477e-06,
      "loss": 2.4938,
      "step": 55497
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1026732921600342,
      "learning_rate": 3.606339416994804e-06,
      "loss": 2.2979,
      "step": 55498
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.2423614263534546,
      "learning_rate": 3.606022836993117e-06,
      "loss": 2.2781,
      "step": 55499
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.2230664491653442,
      "learning_rate": 3.605706267830962e-06,
      "loss": 2.1765,
      "step": 55500
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.089355230331421,
      "learning_rate": 3.6053897095088674e-06,
      "loss": 2.4881,
      "step": 55501
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0282447338104248,
      "learning_rate": 3.605073162027378e-06,
      "loss": 2.4367,
      "step": 55502
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0981786251068115,
      "learning_rate": 3.6047566253870236e-06,
      "loss": 2.2055,
      "step": 55503
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.2105319499969482,
      "learning_rate": 3.6044400995883466e-06,
      "loss": 2.3773,
      "step": 55504
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0443780422210693,
      "learning_rate": 3.6041235846318768e-06,
      "loss": 2.2031,
      "step": 55505
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1655501127243042,
      "learning_rate": 3.603807080518158e-06,
      "loss": 2.5783,
      "step": 55506
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0499324798583984,
      "learning_rate": 3.60349058724772e-06,
      "loss": 2.3329,
      "step": 55507
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1127654314041138,
      "learning_rate": 3.6031741048211067e-06,
      "loss": 2.4149,
      "step": 55508
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1315202713012695,
      "learning_rate": 3.602857633238851e-06,
      "loss": 2.3563,
      "step": 55509
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0945647954940796,
      "learning_rate": 3.602541172501488e-06,
      "loss": 2.2888,
      "step": 55510
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.9447709918022156,
      "learning_rate": 3.602224722609552e-06,
      "loss": 2.2291,
      "step": 55511
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.2508444786071777,
      "learning_rate": 3.6019082835635868e-06,
      "loss": 2.4652,
      "step": 55512
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1563769578933716,
      "learning_rate": 3.6015918553641206e-06,
      "loss": 2.24,
      "step": 55513
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.012705683708191,
      "learning_rate": 3.6012754380116977e-06,
      "loss": 2.1812,
      "step": 55514
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0404624938964844,
      "learning_rate": 3.6009590315068476e-06,
      "loss": 2.2871,
      "step": 55515
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0947635173797607,
      "learning_rate": 3.6006426358501124e-06,
      "loss": 2.3411,
      "step": 55516
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0234181880950928,
      "learning_rate": 3.6003262510420265e-06,
      "loss": 2.3145,
      "step": 55517
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0078243017196655,
      "learning_rate": 3.6000098770831227e-06,
      "loss": 2.145,
      "step": 55518
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0815677642822266,
      "learning_rate": 3.599693513973944e-06,
      "loss": 2.0619,
      "step": 55519
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0663304328918457,
      "learning_rate": 3.5993771617150185e-06,
      "loss": 2.2467,
      "step": 55520
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0720853805541992,
      "learning_rate": 3.599060820306892e-06,
      "loss": 2.03,
      "step": 55521
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0990279912948608,
      "learning_rate": 3.5987444897500946e-06,
      "loss": 2.1839,
      "step": 55522
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.125586986541748,
      "learning_rate": 3.598428170045165e-06,
      "loss": 2.3192,
      "step": 55523
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1630604267120361,
      "learning_rate": 3.5981118611926336e-06,
      "loss": 2.4219,
      "step": 55524
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0845518112182617,
      "learning_rate": 3.597795563193046e-06,
      "loss": 2.2218,
      "step": 55525
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1620383262634277,
      "learning_rate": 3.597479276046929e-06,
      "loss": 2.4641,
      "step": 55526
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.2335222959518433,
      "learning_rate": 3.5971629997548285e-06,
      "loss": 2.4428,
      "step": 55527
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1056219339370728,
      "learning_rate": 3.596846734317271e-06,
      "loss": 2.356,
      "step": 55528
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0033835172653198,
      "learning_rate": 3.596530479734801e-06,
      "loss": 2.3543,
      "step": 55529
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0776914358139038,
      "learning_rate": 3.5962142360079477e-06,
      "loss": 2.3616,
      "step": 55530
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1660267114639282,
      "learning_rate": 3.5958980031372547e-06,
      "loss": 2.4322,
      "step": 55531
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1683969497680664,
      "learning_rate": 3.5955817811232494e-06,
      "loss": 2.2606,
      "step": 55532
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0721466541290283,
      "learning_rate": 3.595265569966476e-06,
      "loss": 2.3168,
      "step": 55533
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1671466827392578,
      "learning_rate": 3.5949493696674644e-06,
      "loss": 2.5147,
      "step": 55534
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0588816404342651,
      "learning_rate": 3.59463318022676e-06,
      "loss": 2.1,
      "step": 55535
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0126551389694214,
      "learning_rate": 3.5943170016448837e-06,
      "loss": 2.5208,
      "step": 55536
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1108778715133667,
      "learning_rate": 3.5940008339223843e-06,
      "loss": 2.4231,
      "step": 55537
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.209113359451294,
      "learning_rate": 3.59368467705979e-06,
      "loss": 2.3576,
      "step": 55538
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0077351331710815,
      "learning_rate": 3.5933685310576436e-06,
      "loss": 2.4289,
      "step": 55539
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.2957854270935059,
      "learning_rate": 3.5930523959164733e-06,
      "loss": 2.3164,
      "step": 55540
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.9586043953895569,
      "learning_rate": 3.592736271636823e-06,
      "loss": 2.3307,
      "step": 55541
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1479322910308838,
      "learning_rate": 3.592420158219222e-06,
      "loss": 2.3564,
      "step": 55542
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.187817931175232,
      "learning_rate": 3.592104055664213e-06,
      "loss": 2.5625,
      "step": 55543
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0903692245483398,
      "learning_rate": 3.5917879639723275e-06,
      "loss": 2.4559,
      "step": 55544
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0896637439727783,
      "learning_rate": 3.5914718831440977e-06,
      "loss": 2.2791,
      "step": 55545
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0472404956817627,
      "learning_rate": 3.591155813180067e-06,
      "loss": 2.2088,
      "step": 55546
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1694743633270264,
      "learning_rate": 3.590839754080765e-06,
      "loss": 2.3933,
      "step": 55547
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.139424443244934,
      "learning_rate": 3.5905237058467336e-06,
      "loss": 2.2385,
      "step": 55548
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.2561354637145996,
      "learning_rate": 3.590207668478506e-06,
      "loss": 2.4709,
      "step": 55549
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0950514078140259,
      "learning_rate": 3.5898916419766127e-06,
      "loss": 2.4741,
      "step": 55550
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0313280820846558,
      "learning_rate": 3.589575626341598e-06,
      "loss": 2.4553,
      "step": 55551
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1149425506591797,
      "learning_rate": 3.589259621573994e-06,
      "loss": 2.2316,
      "step": 55552
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0594444274902344,
      "learning_rate": 3.588943627674332e-06,
      "loss": 2.2171,
      "step": 55553
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5234344005584717,
      "learning_rate": 3.5886276446431557e-06,
      "loss": 2.3421,
      "step": 55554
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0174167156219482,
      "learning_rate": 3.588311672480993e-06,
      "loss": 2.1179,
      "step": 55555
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0951683521270752,
      "learning_rate": 3.587995711188387e-06,
      "loss": 2.4631,
      "step": 55556
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0726358890533447,
      "learning_rate": 3.5876797607658663e-06,
      "loss": 2.1799,
      "step": 55557
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0523589849472046,
      "learning_rate": 3.5873638212139738e-06,
      "loss": 2.3814,
      "step": 55558
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0743110179901123,
      "learning_rate": 3.5870478925332375e-06,
      "loss": 2.3919,
      "step": 55559
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.069522500038147,
      "learning_rate": 3.5867319747242e-06,
      "loss": 2.3434,
      "step": 55560
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.072611689567566,
      "learning_rate": 3.586416067787394e-06,
      "loss": 2.2545,
      "step": 55561
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0208536386489868,
      "learning_rate": 3.5861001717233544e-06,
      "loss": 1.9841,
      "step": 55562
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1694135665893555,
      "learning_rate": 3.585784286532613e-06,
      "loss": 2.3242,
      "step": 55563
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1474220752716064,
      "learning_rate": 3.5854684122157135e-06,
      "loss": 2.3795,
      "step": 55564
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0427738428115845,
      "learning_rate": 3.585152548773184e-06,
      "loss": 2.2983,
      "step": 55565
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.3682506084442139,
      "learning_rate": 3.584836696205567e-06,
      "loss": 2.5651,
      "step": 55566
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0827327966690063,
      "learning_rate": 3.584520854513389e-06,
      "loss": 2.1864,
      "step": 55567
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.063728928565979,
      "learning_rate": 3.584205023697196e-06,
      "loss": 2.277,
      "step": 55568
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0514060258865356,
      "learning_rate": 3.5838892037575134e-06,
      "loss": 2.2539,
      "step": 55569
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.9961543679237366,
      "learning_rate": 3.583573394694885e-06,
      "loss": 2.2919,
      "step": 55570
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.102196216583252,
      "learning_rate": 3.583257596509838e-06,
      "loss": 2.3669,
      "step": 55571
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0428742170333862,
      "learning_rate": 3.582941809202917e-06,
      "loss": 2.159,
      "step": 55572
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.2841086387634277,
      "learning_rate": 3.582626032774651e-06,
      "loss": 2.266,
      "step": 55573
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0787636041641235,
      "learning_rate": 3.582310267225574e-06,
      "loss": 2.4867,
      "step": 55574
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1136767864227295,
      "learning_rate": 3.5819945125562285e-06,
      "loss": 2.1885,
      "step": 55575
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0549635887145996,
      "learning_rate": 3.5816787687671438e-06,
      "loss": 2.4657,
      "step": 55576
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.2102681398391724,
      "learning_rate": 3.5813630358588545e-06,
      "loss": 2.2489,
      "step": 55577
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.098858118057251,
      "learning_rate": 3.5810473138319012e-06,
      "loss": 2.429,
      "step": 55578
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0108455419540405,
      "learning_rate": 3.5807316026868153e-06,
      "loss": 2.4383,
      "step": 55579
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0988361835479736,
      "learning_rate": 3.58041590242413e-06,
      "loss": 2.5439,
      "step": 55580
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0544874668121338,
      "learning_rate": 3.5801002130443876e-06,
      "loss": 2.3248,
      "step": 55581
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0521048307418823,
      "learning_rate": 3.579784534548114e-06,
      "loss": 2.423,
      "step": 55582
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.978848934173584,
      "learning_rate": 3.5794688669358546e-06,
      "loss": 2.5615,
      "step": 55583
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0410642623901367,
      "learning_rate": 3.579153210208134e-06,
      "loss": 2.3121,
      "step": 55584
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0348031520843506,
      "learning_rate": 3.578837564365497e-06,
      "loss": 2.3344,
      "step": 55585
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.9940643310546875,
      "learning_rate": 3.5785219294084706e-06,
      "loss": 2.3756,
      "step": 55586
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.255743384361267,
      "learning_rate": 3.578206305337597e-06,
      "loss": 2.344,
      "step": 55587
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.2977283000946045,
      "learning_rate": 3.5778906921534085e-06,
      "loss": 2.192,
      "step": 55588
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1741987466812134,
      "learning_rate": 3.577575089856439e-06,
      "loss": 2.2626,
      "step": 55589
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0223380327224731,
      "learning_rate": 3.5772594984472197e-06,
      "loss": 2.4416,
      "step": 55590
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0443278551101685,
      "learning_rate": 3.5769439179262944e-06,
      "loss": 2.3699,
      "step": 55591
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.2088022232055664,
      "learning_rate": 3.57662834829419e-06,
      "loss": 2.0741,
      "step": 55592
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1218183040618896,
      "learning_rate": 3.576312789551448e-06,
      "loss": 2.3774,
      "step": 55593
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1708273887634277,
      "learning_rate": 3.575997241698598e-06,
      "loss": 2.5291,
      "step": 55594
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1330091953277588,
      "learning_rate": 3.57568170473618e-06,
      "loss": 2.5021,
      "step": 55595
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.2216471433639526,
      "learning_rate": 3.575366178664723e-06,
      "loss": 2.3004,
      "step": 55596
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.399618625640869,
      "learning_rate": 3.5750506634847694e-06,
      "loss": 2.2168,
      "step": 55597
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.9591766595840454,
      "learning_rate": 3.574735159196846e-06,
      "loss": 2.2885,
      "step": 55598
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.2364083528518677,
      "learning_rate": 3.5744196658014953e-06,
      "loss": 2.2802,
      "step": 55599
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0122982263565063,
      "learning_rate": 3.5741041832992485e-06,
      "loss": 2.2801,
      "step": 55600
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.036062479019165,
      "learning_rate": 3.57378871169064e-06,
      "loss": 2.4965,
      "step": 55601
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1936578750610352,
      "learning_rate": 3.5734732509762016e-06,
      "loss": 2.1847,
      "step": 55602
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.12527334690094,
      "learning_rate": 3.573157801156475e-06,
      "loss": 2.6902,
      "step": 55603
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0623241662979126,
      "learning_rate": 3.5728423622319876e-06,
      "loss": 2.3184,
      "step": 55604
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.950156569480896,
      "learning_rate": 3.572526934203281e-06,
      "loss": 2.3144,
      "step": 55605
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1554594039916992,
      "learning_rate": 3.5722115170708882e-06,
      "loss": 2.2804,
      "step": 55606
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0617270469665527,
      "learning_rate": 3.571896110835339e-06,
      "loss": 2.3831,
      "step": 55607
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0372365713119507,
      "learning_rate": 3.571580715497174e-06,
      "loss": 2.5592,
      "step": 55608
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1136680841445923,
      "learning_rate": 3.5712653310569233e-06,
      "loss": 2.3571,
      "step": 55609
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0583817958831787,
      "learning_rate": 3.5709499575151276e-06,
      "loss": 2.0742,
      "step": 55610
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0206197500228882,
      "learning_rate": 3.570634594872313e-06,
      "loss": 2.1863,
      "step": 55611
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0971354246139526,
      "learning_rate": 3.5703192431290234e-06,
      "loss": 2.1956,
      "step": 55612
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0386359691619873,
      "learning_rate": 3.5700039022857856e-06,
      "loss": 2.1599,
      "step": 55613
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0904744863510132,
      "learning_rate": 3.569688572343144e-06,
      "loss": 2.3711,
      "step": 55614
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.194490671157837,
      "learning_rate": 3.56937325330162e-06,
      "loss": 2.3348,
      "step": 55615
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1634951829910278,
      "learning_rate": 3.569057945161758e-06,
      "loss": 2.3599,
      "step": 55616
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.9791616797447205,
      "learning_rate": 3.568742647924086e-06,
      "loss": 2.2812,
      "step": 55617
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1666074991226196,
      "learning_rate": 3.568427361589146e-06,
      "loss": 2.1028,
      "step": 55618
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0959808826446533,
      "learning_rate": 3.568112086157465e-06,
      "loss": 2.48,
      "step": 55619
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.2461576461791992,
      "learning_rate": 3.5677968216295833e-06,
      "loss": 2.2299,
      "step": 55620
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0050811767578125,
      "learning_rate": 3.5674815680060302e-06,
      "loss": 2.295,
      "step": 55621
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0669045448303223,
      "learning_rate": 3.5671663252873467e-06,
      "loss": 2.3392,
      "step": 55622
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.203282356262207,
      "learning_rate": 3.56685109347406e-06,
      "loss": 2.1289,
      "step": 55623
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.054644227027893,
      "learning_rate": 3.5665358725667097e-06,
      "loss": 2.2925,
      "step": 55624
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0883458852767944,
      "learning_rate": 3.566220662565827e-06,
      "loss": 2.3075,
      "step": 55625
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0899146795272827,
      "learning_rate": 3.5659054634719503e-06,
      "loss": 2.0873,
      "step": 55626
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.4159682989120483,
      "learning_rate": 3.565590275285611e-06,
      "loss": 1.9273,
      "step": 55627
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1018245220184326,
      "learning_rate": 3.5652750980073434e-06,
      "loss": 2.3589,
      "step": 55628
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.7673566341400146,
      "learning_rate": 3.564959931637679e-06,
      "loss": 2.3395,
      "step": 55629
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1405788660049438,
      "learning_rate": 3.56464477617716e-06,
      "loss": 2.1241,
      "step": 55630
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1439820528030396,
      "learning_rate": 3.564329631626311e-06,
      "loss": 2.4225,
      "step": 55631
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0586366653442383,
      "learning_rate": 3.564014497985675e-06,
      "loss": 2.4019,
      "step": 55632
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0316370725631714,
      "learning_rate": 3.563699375255779e-06,
      "loss": 2.5188,
      "step": 55633
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1260157823562622,
      "learning_rate": 3.563384263437165e-06,
      "loss": 2.355,
      "step": 55634
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.518101453781128,
      "learning_rate": 3.5630691625303616e-06,
      "loss": 2.3008,
      "step": 55635
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1344443559646606,
      "learning_rate": 3.562754072535901e-06,
      "loss": 2.382,
      "step": 55636
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0629302263259888,
      "learning_rate": 3.5624389934543246e-06,
      "loss": 2.3361,
      "step": 55637
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.9714487195014954,
      "learning_rate": 3.5621239252861582e-06,
      "loss": 2.2926,
      "step": 55638
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1401222944259644,
      "learning_rate": 3.561808868031944e-06,
      "loss": 2.3652,
      "step": 55639
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.027652382850647,
      "learning_rate": 3.5614938216922136e-06,
      "loss": 2.2705,
      "step": 55640
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.3962087631225586,
      "learning_rate": 3.5611787862674983e-06,
      "loss": 2.263,
      "step": 55641
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.086585283279419,
      "learning_rate": 3.5608637617583306e-06,
      "loss": 2.3774,
      "step": 55642
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1072486639022827,
      "learning_rate": 3.5605487481652513e-06,
      "loss": 2.1525,
      "step": 55643
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0957294702529907,
      "learning_rate": 3.560233745488787e-06,
      "loss": 2.2145,
      "step": 55644
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1419061422348022,
      "learning_rate": 3.5599187537294798e-06,
      "loss": 2.4501,
      "step": 55645
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0595558881759644,
      "learning_rate": 3.559603772887855e-06,
      "loss": 2.1199,
      "step": 55646
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0462071895599365,
      "learning_rate": 3.5592888029644556e-06,
      "loss": 2.2557,
      "step": 55647
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.002977728843689,
      "learning_rate": 3.5589738439598063e-06,
      "loss": 2.2316,
      "step": 55648
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.148491621017456,
      "learning_rate": 3.5586588958744493e-06,
      "loss": 2.4268,
      "step": 55649
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0473802089691162,
      "learning_rate": 3.558343958708912e-06,
      "loss": 2.1181,
      "step": 55650
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1420390605926514,
      "learning_rate": 3.5580290324637344e-06,
      "loss": 2.4682,
      "step": 55651
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0260835886001587,
      "learning_rate": 3.5577141171394446e-06,
      "loss": 2.4458,
      "step": 55652
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1266322135925293,
      "learning_rate": 3.557399212736582e-06,
      "loss": 2.0661,
      "step": 55653
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0678735971450806,
      "learning_rate": 3.557084319255677e-06,
      "loss": 2.2843,
      "step": 55654
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1548473834991455,
      "learning_rate": 3.5567694366972636e-06,
      "loss": 2.4121,
      "step": 55655
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1833585500717163,
      "learning_rate": 3.556454565061873e-06,
      "loss": 2.4999,
      "step": 55656
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0465947389602661,
      "learning_rate": 3.5561397043500454e-06,
      "loss": 2.5357,
      "step": 55657
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1209691762924194,
      "learning_rate": 3.555824854562309e-06,
      "loss": 2.1849,
      "step": 55658
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1442680358886719,
      "learning_rate": 3.5555100156992017e-06,
      "loss": 2.1277,
      "step": 55659
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0180083513259888,
      "learning_rate": 3.555195187761252e-06,
      "loss": 2.2625,
      "step": 55660
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0242596864700317,
      "learning_rate": 3.5548803707489997e-06,
      "loss": 2.2223,
      "step": 55661
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1004061698913574,
      "learning_rate": 3.5545655646629763e-06,
      "loss": 2.5004,
      "step": 55662
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.194509506225586,
      "learning_rate": 3.554250769503712e-06,
      "loss": 2.0824,
      "step": 55663
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0280624628067017,
      "learning_rate": 3.553935985271746e-06,
      "loss": 2.2894,
      "step": 55664
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.052081823348999,
      "learning_rate": 3.553621211967606e-06,
      "loss": 2.2209,
      "step": 55665
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.9907870888710022,
      "learning_rate": 3.5533064495918324e-06,
      "loss": 2.5201,
      "step": 55666
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.4026726484298706,
      "learning_rate": 3.5529916981449543e-06,
      "loss": 2.1808,
      "step": 55667
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0035654306411743,
      "learning_rate": 3.5526769576275032e-06,
      "loss": 2.3021,
      "step": 55668
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1675132513046265,
      "learning_rate": 3.5523622280400205e-06,
      "loss": 2.2443,
      "step": 55669
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.085545539855957,
      "learning_rate": 3.5520475093830333e-06,
      "loss": 2.214,
      "step": 55670
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.2631891965866089,
      "learning_rate": 3.551732801657074e-06,
      "loss": 2.2675,
      "step": 55671
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.932712972164154,
      "learning_rate": 3.5514181048626828e-06,
      "loss": 2.3834,
      "step": 55672
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0873738527297974,
      "learning_rate": 3.551103419000386e-06,
      "loss": 2.3467,
      "step": 55673
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1973007917404175,
      "learning_rate": 3.5507887440707232e-06,
      "loss": 2.4354,
      "step": 55674
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.027029275894165,
      "learning_rate": 3.550474080074221e-06,
      "loss": 2.3463,
      "step": 55675
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1234294176101685,
      "learning_rate": 3.5501594270114216e-06,
      "loss": 2.2818,
      "step": 55676
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.102569818496704,
      "learning_rate": 3.54984478488285e-06,
      "loss": 2.5116,
      "step": 55677
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.2130025625228882,
      "learning_rate": 3.5495301536890457e-06,
      "loss": 2.1481,
      "step": 55678
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.3922300338745117,
      "learning_rate": 3.5492155334305377e-06,
      "loss": 2.4649,
      "step": 55679
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0599875450134277,
      "learning_rate": 3.5489009241078675e-06,
      "loss": 2.2465,
      "step": 55680
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.178321361541748,
      "learning_rate": 3.5485863257215557e-06,
      "loss": 2.3756,
      "step": 55681
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1123322248458862,
      "learning_rate": 3.5482717382721455e-06,
      "loss": 2.3786,
      "step": 55682
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.3092838525772095,
      "learning_rate": 3.547957161760164e-06,
      "loss": 2.3144,
      "step": 55683
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.21248459815979,
      "learning_rate": 3.5476425961861504e-06,
      "loss": 2.2233,
      "step": 55684
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0610920190811157,
      "learning_rate": 3.547328041550632e-06,
      "loss": 2.2699,
      "step": 55685
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.9906412959098816,
      "learning_rate": 3.5470134978541483e-06,
      "loss": 2.2931,
      "step": 55686
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.9737601280212402,
      "learning_rate": 3.546698965097226e-06,
      "loss": 2.1345,
      "step": 55687
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1088299751281738,
      "learning_rate": 3.5463844432804052e-06,
      "loss": 2.1391,
      "step": 55688
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.2024065256118774,
      "learning_rate": 3.5460699324042113e-06,
      "loss": 2.3722,
      "step": 55689
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.9783106446266174,
      "learning_rate": 3.545755432469187e-06,
      "loss": 1.9611,
      "step": 55690
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1205183267593384,
      "learning_rate": 3.5454409434758584e-06,
      "loss": 2.2293,
      "step": 55691
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.2635483741760254,
      "learning_rate": 3.5451264654247573e-06,
      "loss": 2.2685,
      "step": 55692
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.142076015472412,
      "learning_rate": 3.544811998316424e-06,
      "loss": 2.3073,
      "step": 55693
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1969435214996338,
      "learning_rate": 3.544497542151387e-06,
      "loss": 2.1788,
      "step": 55694
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.102038860321045,
      "learning_rate": 3.5441830969301772e-06,
      "loss": 2.342,
      "step": 55695
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0828810930252075,
      "learning_rate": 3.5438686626533327e-06,
      "loss": 2.3785,
      "step": 55696
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1133358478546143,
      "learning_rate": 3.543554239321385e-06,
      "loss": 2.2316,
      "step": 55697
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0031145811080933,
      "learning_rate": 3.5432398269348636e-06,
      "loss": 2.2086,
      "step": 55698
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1663384437561035,
      "learning_rate": 3.542925425494307e-06,
      "loss": 2.3613,
      "step": 55699
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1579779386520386,
      "learning_rate": 3.542611035000242e-06,
      "loss": 2.4986,
      "step": 55700
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0713672637939453,
      "learning_rate": 3.542296655453209e-06,
      "loss": 2.4203,
      "step": 55701
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0596879720687866,
      "learning_rate": 3.541982286853732e-06,
      "loss": 2.1715,
      "step": 55702
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0338577032089233,
      "learning_rate": 3.541667929202355e-06,
      "loss": 2.1973,
      "step": 55703
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0808693170547485,
      "learning_rate": 3.5413535824996004e-06,
      "loss": 2.4134,
      "step": 55704
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.06170654296875,
      "learning_rate": 3.5410392467460085e-06,
      "loss": 2.2999,
      "step": 55705
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.9920825958251953,
      "learning_rate": 3.54072492194211e-06,
      "loss": 2.3356,
      "step": 55706
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.001362681388855,
      "learning_rate": 3.540410608088436e-06,
      "loss": 2.1745,
      "step": 55707
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.2107858657836914,
      "learning_rate": 3.5400963051855187e-06,
      "loss": 2.3319,
      "step": 55708
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.9989994764328003,
      "learning_rate": 3.5397820132338944e-06,
      "loss": 2.1768,
      "step": 55709
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.13070809841156,
      "learning_rate": 3.539467732234092e-06,
      "loss": 2.1573,
      "step": 55710
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.973578929901123,
      "learning_rate": 3.5391534621866495e-06,
      "loss": 2.4289,
      "step": 55711
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1612011194229126,
      "learning_rate": 3.5388392030920927e-06,
      "loss": 2.4594,
      "step": 55712
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.126992106437683,
      "learning_rate": 3.538524954950963e-06,
      "loss": 2.4321,
      "step": 55713
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1996808052062988,
      "learning_rate": 3.5382107177637837e-06,
      "loss": 2.2481,
      "step": 55714
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0674026012420654,
      "learning_rate": 3.5378964915310953e-06,
      "loss": 2.541,
      "step": 55715
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0583899021148682,
      "learning_rate": 3.537582276253425e-06,
      "loss": 2.3074,
      "step": 55716
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1899887323379517,
      "learning_rate": 3.5372680719313114e-06,
      "loss": 2.2738,
      "step": 55717
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0685456991195679,
      "learning_rate": 3.5369538785652835e-06,
      "loss": 2.2783,
      "step": 55718
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1896860599517822,
      "learning_rate": 3.5366396961558703e-06,
      "loss": 2.4058,
      "step": 55719
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.2027862071990967,
      "learning_rate": 3.5363255247036122e-06,
      "loss": 2.4064,
      "step": 55720
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.2499481439590454,
      "learning_rate": 3.536011364209038e-06,
      "loss": 2.3233,
      "step": 55721
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.2064447402954102,
      "learning_rate": 3.535697214672675e-06,
      "loss": 2.1991,
      "step": 55722
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0014700889587402,
      "learning_rate": 3.535383076095066e-06,
      "loss": 2.293,
      "step": 55723
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.094567894935608,
      "learning_rate": 3.5350689484767375e-06,
      "loss": 2.4648,
      "step": 55724
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0424702167510986,
      "learning_rate": 3.53475483181822e-06,
      "loss": 2.3266,
      "step": 55725
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0731995105743408,
      "learning_rate": 3.534440726120053e-06,
      "loss": 2.4651,
      "step": 55726
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1546536684036255,
      "learning_rate": 3.5341266313827614e-06,
      "loss": 2.2862,
      "step": 55727
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.044806957244873,
      "learning_rate": 3.533812547606884e-06,
      "loss": 2.1729,
      "step": 55728
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0707154273986816,
      "learning_rate": 3.533498474792948e-06,
      "loss": 2.3851,
      "step": 55729
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0910130739212036,
      "learning_rate": 3.5331844129414904e-06,
      "loss": 2.1733,
      "step": 55730
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.077975869178772,
      "learning_rate": 3.532870362053039e-06,
      "loss": 2.2801,
      "step": 55731
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1123310327529907,
      "learning_rate": 3.5325563221281332e-06,
      "loss": 2.2339,
      "step": 55732
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1234081983566284,
      "learning_rate": 3.5322422931672994e-06,
      "loss": 2.0072,
      "step": 55733
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0332202911376953,
      "learning_rate": 3.5319282751710725e-06,
      "loss": 2.2394,
      "step": 55734
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0837239027023315,
      "learning_rate": 3.53161426813998e-06,
      "loss": 2.1482,
      "step": 55735
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.070472240447998,
      "learning_rate": 3.5313002720745615e-06,
      "loss": 2.3439,
      "step": 55736
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0909795761108398,
      "learning_rate": 3.530986286975343e-06,
      "loss": 2.0822,
      "step": 55737
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.994994580745697,
      "learning_rate": 3.530672312842862e-06,
      "loss": 2.1914,
      "step": 55738
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1405199766159058,
      "learning_rate": 3.530358349677646e-06,
      "loss": 2.4089,
      "step": 55739
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1495375633239746,
      "learning_rate": 3.5300443974802335e-06,
      "loss": 2.3898,
      "step": 55740
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.2483241558074951,
      "learning_rate": 3.529730456251148e-06,
      "loss": 2.1641,
      "step": 55741
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.120101809501648,
      "learning_rate": 3.5294165259909317e-06,
      "loss": 2.6117,
      "step": 55742
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.079089879989624,
      "learning_rate": 3.5291026067001076e-06,
      "loss": 2.3708,
      "step": 55743
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.058253288269043,
      "learning_rate": 3.5287886983792152e-06,
      "loss": 2.3131,
      "step": 55744
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.2803865671157837,
      "learning_rate": 3.5284748010287806e-06,
      "loss": 2.3717,
      "step": 55745
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.009482741355896,
      "learning_rate": 3.5281609146493455e-06,
      "loss": 2.3478,
      "step": 55746
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.2451943159103394,
      "learning_rate": 3.527847039241429e-06,
      "loss": 2.2704,
      "step": 55747
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1271593570709229,
      "learning_rate": 3.527533174805573e-06,
      "loss": 2.2925,
      "step": 55748
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1628109216690063,
      "learning_rate": 3.527219321342302e-06,
      "loss": 2.4427,
      "step": 55749
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0228897333145142,
      "learning_rate": 3.526905478852156e-06,
      "loss": 2.1686,
      "step": 55750
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1349921226501465,
      "learning_rate": 3.52659164733566e-06,
      "loss": 2.3509,
      "step": 55751
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1494916677474976,
      "learning_rate": 3.526277826793353e-06,
      "loss": 2.4647,
      "step": 55752
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0049492120742798,
      "learning_rate": 3.5259640172257627e-06,
      "loss": 2.2145,
      "step": 55753
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0973178148269653,
      "learning_rate": 3.5256502186334185e-06,
      "loss": 2.4482,
      "step": 55754
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0406359434127808,
      "learning_rate": 3.525336431016859e-06,
      "loss": 2.3923,
      "step": 55755
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0423659086227417,
      "learning_rate": 3.52502265437661e-06,
      "loss": 2.2927,
      "step": 55756
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0327003002166748,
      "learning_rate": 3.5247088887132085e-06,
      "loss": 2.3971,
      "step": 55757
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0856629610061646,
      "learning_rate": 3.524395134027181e-06,
      "loss": 2.1667,
      "step": 55758
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.15924870967865,
      "learning_rate": 3.5240813903190696e-06,
      "loss": 2.4079,
      "step": 55759
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0753214359283447,
      "learning_rate": 3.523767657589392e-06,
      "loss": 2.2467,
      "step": 55760
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0545694828033447,
      "learning_rate": 3.523453935838691e-06,
      "loss": 2.3252,
      "step": 55761
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0946693420410156,
      "learning_rate": 3.5231402250674906e-06,
      "loss": 2.203,
      "step": 55762
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.140076756477356,
      "learning_rate": 3.5228265252763305e-06,
      "loss": 2.2613,
      "step": 55763
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0905228853225708,
      "learning_rate": 3.5225128364657344e-06,
      "loss": 2.3025,
      "step": 55764
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.2473121881484985,
      "learning_rate": 3.5221991586362425e-06,
      "loss": 2.2702,
      "step": 55765
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.176771640777588,
      "learning_rate": 3.5218854917883794e-06,
      "loss": 2.3792,
      "step": 55766
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.2038277387619019,
      "learning_rate": 3.5215718359226826e-06,
      "loss": 2.2699,
      "step": 55767
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.004928469657898,
      "learning_rate": 3.521258191039677e-06,
      "loss": 2.3794,
      "step": 55768
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0515079498291016,
      "learning_rate": 3.5209445571399025e-06,
      "loss": 2.5213,
      "step": 55769
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.2351484298706055,
      "learning_rate": 3.520630934223883e-06,
      "loss": 2.3108,
      "step": 55770
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0535167455673218,
      "learning_rate": 3.520317322292158e-06,
      "loss": 2.2709,
      "step": 55771
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.146024465560913,
      "learning_rate": 3.5200037213452545e-06,
      "loss": 2.4098,
      "step": 55772
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.044987678527832,
      "learning_rate": 3.519690131383704e-06,
      "loss": 2.3322,
      "step": 55773
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1554505825042725,
      "learning_rate": 3.519376552408036e-06,
      "loss": 2.4004,
      "step": 55774
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0030561685562134,
      "learning_rate": 3.5190629844187886e-06,
      "loss": 2.0786,
      "step": 55775
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.2291038036346436,
      "learning_rate": 3.518749427416486e-06,
      "loss": 2.2249,
      "step": 55776
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0421801805496216,
      "learning_rate": 3.518435881401666e-06,
      "loss": 2.2638,
      "step": 55777
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.2083905935287476,
      "learning_rate": 3.5181223463748546e-06,
      "loss": 2.2332,
      "step": 55778
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.126714825630188,
      "learning_rate": 3.51780882233659e-06,
      "loss": 2.6054,
      "step": 55779
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.2430603504180908,
      "learning_rate": 3.5174953092874e-06,
      "loss": 2.4553,
      "step": 55780
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.089760422706604,
      "learning_rate": 3.517181807227813e-06,
      "loss": 2.4658,
      "step": 55781
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.284706711769104,
      "learning_rate": 3.516868316158366e-06,
      "loss": 2.3012,
      "step": 55782
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.021418809890747,
      "learning_rate": 3.5165548360795855e-06,
      "loss": 2.3634,
      "step": 55783
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1399141550064087,
      "learning_rate": 3.516241366992008e-06,
      "loss": 2.2938,
      "step": 55784
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0182995796203613,
      "learning_rate": 3.515927908896163e-06,
      "loss": 2.1361,
      "step": 55785
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0629268884658813,
      "learning_rate": 3.515614461792577e-06,
      "loss": 2.3227,
      "step": 55786
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0558348894119263,
      "learning_rate": 3.5153010256817898e-06,
      "loss": 2.3523,
      "step": 55787
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0836308002471924,
      "learning_rate": 3.514987600564328e-06,
      "loss": 2.1994,
      "step": 55788
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1440975666046143,
      "learning_rate": 3.5146741864407207e-06,
      "loss": 2.3207,
      "step": 55789
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0211505889892578,
      "learning_rate": 3.5143607833115056e-06,
      "loss": 2.3052,
      "step": 55790
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.035465955734253,
      "learning_rate": 3.5140473911772067e-06,
      "loss": 2.3285,
      "step": 55791
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0157995223999023,
      "learning_rate": 3.513734010038362e-06,
      "loss": 2.1257,
      "step": 55792
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1457123756408691,
      "learning_rate": 3.5134206398954973e-06,
      "loss": 2.1747,
      "step": 55793
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.2475719451904297,
      "learning_rate": 3.5131072807491486e-06,
      "loss": 2.5176,
      "step": 55794
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1226246356964111,
      "learning_rate": 3.512793932599843e-06,
      "loss": 2.2935,
      "step": 55795
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.080122470855713,
      "learning_rate": 3.5124805954481154e-06,
      "loss": 2.3569,
      "step": 55796
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0001205205917358,
      "learning_rate": 3.5121672692944932e-06,
      "loss": 2.518,
      "step": 55797
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.277129888534546,
      "learning_rate": 3.5118539541395112e-06,
      "loss": 2.3445,
      "step": 55798
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0588501691818237,
      "learning_rate": 3.5115406499837e-06,
      "loss": 2.3009,
      "step": 55799
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0448781251907349,
      "learning_rate": 3.5112273568275902e-06,
      "loss": 2.3118,
      "step": 55800
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0990451574325562,
      "learning_rate": 3.5109140746717087e-06,
      "loss": 2.2617,
      "step": 55801
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0340704917907715,
      "learning_rate": 3.5106008035165926e-06,
      "loss": 2.5629,
      "step": 55802
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1589176654815674,
      "learning_rate": 3.510287543362768e-06,
      "loss": 2.1945,
      "step": 55803
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1952539682388306,
      "learning_rate": 3.509974294210772e-06,
      "loss": 2.4005,
      "step": 55804
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1301181316375732,
      "learning_rate": 3.509661056061129e-06,
      "loss": 2.4803,
      "step": 55805
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.191640019416809,
      "learning_rate": 3.5093478289143766e-06,
      "loss": 2.2869,
      "step": 55806
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.5016812086105347,
      "learning_rate": 3.5090346127710384e-06,
      "loss": 2.4472,
      "step": 55807
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0369857549667358,
      "learning_rate": 3.5087214076316534e-06,
      "loss": 2.2631,
      "step": 55808
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.2302604913711548,
      "learning_rate": 3.508408213496749e-06,
      "loss": 2.4003,
      "step": 55809
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0912071466445923,
      "learning_rate": 3.5080950303668516e-06,
      "loss": 2.3145,
      "step": 55810
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.2075753211975098,
      "learning_rate": 3.5077818582425006e-06,
      "loss": 2.6185,
      "step": 55811
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.3624675273895264,
      "learning_rate": 3.507468697124222e-06,
      "loss": 2.4414,
      "step": 55812
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0654268264770508,
      "learning_rate": 3.507155547012544e-06,
      "loss": 2.2341,
      "step": 55813
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1718344688415527,
      "learning_rate": 3.5068424079080043e-06,
      "loss": 2.2659,
      "step": 55814
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1056370735168457,
      "learning_rate": 3.5065292798111296e-06,
      "loss": 2.3568,
      "step": 55815
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.039447546005249,
      "learning_rate": 3.5062161627224488e-06,
      "loss": 2.4045,
      "step": 55816
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0746055841445923,
      "learning_rate": 3.505903056642498e-06,
      "loss": 2.4411,
      "step": 55817
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0077242851257324,
      "learning_rate": 3.5055899615718026e-06,
      "loss": 2.3243,
      "step": 55818
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.113756775856018,
      "learning_rate": 3.5052768775108994e-06,
      "loss": 2.2764,
      "step": 55819
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0701323747634888,
      "learning_rate": 3.504963804460312e-06,
      "loss": 2.2365,
      "step": 55820
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1882429122924805,
      "learning_rate": 3.5046507424205776e-06,
      "loss": 2.3401,
      "step": 55821
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1308516263961792,
      "learning_rate": 3.504337691392222e-06,
      "loss": 2.3092,
      "step": 55822
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1589913368225098,
      "learning_rate": 3.5040246513757815e-06,
      "loss": 2.2416,
      "step": 55823
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.812814712524414,
      "learning_rate": 3.5037116223717806e-06,
      "loss": 2.2353,
      "step": 55824
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1766934394836426,
      "learning_rate": 3.503398604380759e-06,
      "loss": 2.3735,
      "step": 55825
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0762344598770142,
      "learning_rate": 3.503085597403234e-06,
      "loss": 2.0929,
      "step": 55826
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.338046908378601,
      "learning_rate": 3.502772601439748e-06,
      "loss": 2.2635,
      "step": 55827
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0351767539978027,
      "learning_rate": 3.502459616490823e-06,
      "loss": 2.2593,
      "step": 55828
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0596691370010376,
      "learning_rate": 3.5021466425569973e-06,
      "loss": 2.456,
      "step": 55829
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.2020142078399658,
      "learning_rate": 3.501833679638794e-06,
      "loss": 2.4239,
      "step": 55830
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.2563618421554565,
      "learning_rate": 3.501520727736751e-06,
      "loss": 2.3285,
      "step": 55831
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.9980024099349976,
      "learning_rate": 3.5012077868513927e-06,
      "loss": 2.2467,
      "step": 55832
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1106735467910767,
      "learning_rate": 3.5008948569832547e-06,
      "loss": 2.3283,
      "step": 55833
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.073021411895752,
      "learning_rate": 3.500581938132862e-06,
      "loss": 2.3943,
      "step": 55834
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0611811876296997,
      "learning_rate": 3.500269030300751e-06,
      "loss": 2.2182,
      "step": 55835
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.130812168121338,
      "learning_rate": 3.49995613348745e-06,
      "loss": 2.2785,
      "step": 55836
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0501508712768555,
      "learning_rate": 3.499643247693485e-06,
      "loss": 2.2302,
      "step": 55837
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.268782138824463,
      "learning_rate": 3.499330372919394e-06,
      "loss": 2.2467,
      "step": 55838
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.097475528717041,
      "learning_rate": 3.499017509165703e-06,
      "loss": 2.0601,
      "step": 55839
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1779414415359497,
      "learning_rate": 3.4987046564329397e-06,
      "loss": 2.4156,
      "step": 55840
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.3997719287872314,
      "learning_rate": 3.498391814721641e-06,
      "loss": 2.2891,
      "step": 55841
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0745192766189575,
      "learning_rate": 3.4980789840323346e-06,
      "loss": 2.363,
      "step": 55842
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0557128190994263,
      "learning_rate": 3.497766164365547e-06,
      "loss": 2.5537,
      "step": 55843
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.047637939453125,
      "learning_rate": 3.497453355721815e-06,
      "loss": 2.222,
      "step": 55844
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0588634014129639,
      "learning_rate": 3.4971405581016616e-06,
      "loss": 2.4049,
      "step": 55845
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.033205509185791,
      "learning_rate": 3.4968277715056242e-06,
      "loss": 2.259,
      "step": 55846
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0616317987442017,
      "learning_rate": 3.496514995934227e-06,
      "loss": 2.3919,
      "step": 55847
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1536320447921753,
      "learning_rate": 3.4962022313880074e-06,
      "loss": 2.4447,
      "step": 55848
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1853972673416138,
      "learning_rate": 3.495889477867487e-06,
      "loss": 2.3495,
      "step": 55849
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0637810230255127,
      "learning_rate": 3.495576735373204e-06,
      "loss": 2.2164,
      "step": 55850
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1263290643692017,
      "learning_rate": 3.495264003905685e-06,
      "loss": 2.3069,
      "step": 55851
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0288324356079102,
      "learning_rate": 3.4949512834654607e-06,
      "loss": 2.3523,
      "step": 55852
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1061463356018066,
      "learning_rate": 3.4946385740530576e-06,
      "loss": 2.1803,
      "step": 55853
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.021412968635559,
      "learning_rate": 3.4943258756690113e-06,
      "loss": 2.3807,
      "step": 55854
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1383955478668213,
      "learning_rate": 3.4940131883138464e-06,
      "loss": 2.2693,
      "step": 55855
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1408079862594604,
      "learning_rate": 3.4937005119880995e-06,
      "loss": 2.4275,
      "step": 55856
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1171789169311523,
      "learning_rate": 3.4933878466922945e-06,
      "loss": 2.2023,
      "step": 55857
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.9761571288108826,
      "learning_rate": 3.4930751924269667e-06,
      "loss": 2.3965,
      "step": 55858
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.060041069984436,
      "learning_rate": 3.4927625491926422e-06,
      "loss": 2.2028,
      "step": 55859
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1017380952835083,
      "learning_rate": 3.492449916989854e-06,
      "loss": 2.4932,
      "step": 55860
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1178210973739624,
      "learning_rate": 3.492137295819128e-06,
      "loss": 2.5547,
      "step": 55861
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.9885048270225525,
      "learning_rate": 3.491824685681e-06,
      "loss": 2.469,
      "step": 55862
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0665013790130615,
      "learning_rate": 3.4915120865759934e-06,
      "loss": 2.2102,
      "step": 55863
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.121142864227295,
      "learning_rate": 3.491199498504645e-06,
      "loss": 2.1487,
      "step": 55864
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1794695854187012,
      "learning_rate": 3.490886921467481e-06,
      "loss": 2.3634,
      "step": 55865
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0991363525390625,
      "learning_rate": 3.4905743554650316e-06,
      "loss": 2.2243,
      "step": 55866
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.050197958946228,
      "learning_rate": 3.490261800497824e-06,
      "loss": 2.5316,
      "step": 55867
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1420751810073853,
      "learning_rate": 3.489949256566393e-06,
      "loss": 2.487,
      "step": 55868
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0000348091125488,
      "learning_rate": 3.4896367236712625e-06,
      "loss": 2.2197,
      "step": 55869
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0511120557785034,
      "learning_rate": 3.48932420181297e-06,
      "loss": 2.2566,
      "step": 55870
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1166812181472778,
      "learning_rate": 3.4890116909920414e-06,
      "loss": 2.398,
      "step": 55871
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.02876615524292,
      "learning_rate": 3.4886991912090027e-06,
      "loss": 2.4817,
      "step": 55872
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.4410481452941895,
      "learning_rate": 3.4883867024643902e-06,
      "loss": 2.3485,
      "step": 55873
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0855770111083984,
      "learning_rate": 3.4880742247587283e-06,
      "loss": 2.36,
      "step": 55874
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.04924476146698,
      "learning_rate": 3.4877617580925518e-06,
      "loss": 2.3432,
      "step": 55875
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1028138399124146,
      "learning_rate": 3.4874493024663846e-06,
      "loss": 2.239,
      "step": 55876
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1017247438430786,
      "learning_rate": 3.487136857880764e-06,
      "loss": 2.4952,
      "step": 55877
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.3702360391616821,
      "learning_rate": 3.4868244243362135e-06,
      "loss": 2.378,
      "step": 55878
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.018236517906189,
      "learning_rate": 3.4865120018332663e-06,
      "loss": 2.1986,
      "step": 55879
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.108668327331543,
      "learning_rate": 3.4861995903724465e-06,
      "loss": 2.0817,
      "step": 55880
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.3230454921722412,
      "learning_rate": 3.48588718995429e-06,
      "loss": 2.3518,
      "step": 55881
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.2550315856933594,
      "learning_rate": 3.485574800579321e-06,
      "loss": 2.3247,
      "step": 55882
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0281858444213867,
      "learning_rate": 3.485262422248076e-06,
      "loss": 2.4998,
      "step": 55883
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.240668535232544,
      "learning_rate": 3.484950054961077e-06,
      "loss": 2.3315,
      "step": 55884
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.050689697265625,
      "learning_rate": 3.48463769871886e-06,
      "loss": 2.185,
      "step": 55885
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1466561555862427,
      "learning_rate": 3.484325353521948e-06,
      "loss": 2.3754,
      "step": 55886
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0544108152389526,
      "learning_rate": 3.484013019370879e-06,
      "loss": 2.1783,
      "step": 55887
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1682732105255127,
      "learning_rate": 3.4837006962661722e-06,
      "loss": 2.1369,
      "step": 55888
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0595580339431763,
      "learning_rate": 3.483388384208367e-06,
      "loss": 2.4713,
      "step": 55889
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1590954065322876,
      "learning_rate": 3.483076083197985e-06,
      "loss": 2.2283,
      "step": 55890
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0874485969543457,
      "learning_rate": 3.4827637932355664e-06,
      "loss": 2.1646,
      "step": 55891
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.124455451965332,
      "learning_rate": 3.4824515143216254e-06,
      "loss": 2.29,
      "step": 55892
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0081921815872192,
      "learning_rate": 3.4821392464567027e-06,
      "loss": 2.3063,
      "step": 55893
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.164900541305542,
      "learning_rate": 3.4818269896413215e-06,
      "loss": 2.434,
      "step": 55894
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0559755563735962,
      "learning_rate": 3.4815147438760166e-06,
      "loss": 2.4066,
      "step": 55895
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0541163682937622,
      "learning_rate": 3.4812025091613122e-06,
      "loss": 2.4833,
      "step": 55896
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.9470100402832031,
      "learning_rate": 3.480890285497742e-06,
      "loss": 2.3877,
      "step": 55897
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0391818284988403,
      "learning_rate": 3.480578072885834e-06,
      "loss": 2.3143,
      "step": 55898
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0599133968353271,
      "learning_rate": 3.480265871326114e-06,
      "loss": 2.3636,
      "step": 55899
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.2372190952301025,
      "learning_rate": 3.479953680819117e-06,
      "loss": 2.2707,
      "step": 55900
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0914382934570312,
      "learning_rate": 3.479641501365366e-06,
      "loss": 2.5401,
      "step": 55901
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0870469808578491,
      "learning_rate": 3.4793293329653965e-06,
      "loss": 2.158,
      "step": 55902
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.187897801399231,
      "learning_rate": 3.4790171756197323e-06,
      "loss": 2.3184,
      "step": 55903
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.2075117826461792,
      "learning_rate": 3.478705029328907e-06,
      "loss": 2.5182,
      "step": 55904
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.9723904728889465,
      "learning_rate": 3.4783928940934488e-06,
      "loss": 2.5519,
      "step": 55905
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1178544759750366,
      "learning_rate": 3.4780807699138865e-06,
      "loss": 2.2544,
      "step": 55906
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.136263132095337,
      "learning_rate": 3.4777686567907444e-06,
      "loss": 2.4491,
      "step": 55907
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1080070734024048,
      "learning_rate": 3.4774565547245587e-06,
      "loss": 2.4114,
      "step": 55908
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0434054136276245,
      "learning_rate": 3.4771444637158535e-06,
      "loss": 2.3228,
      "step": 55909
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1521780490875244,
      "learning_rate": 3.476832383765163e-06,
      "loss": 2.3159,
      "step": 55910
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1680713891983032,
      "learning_rate": 3.4765203148730096e-06,
      "loss": 2.2153,
      "step": 55911
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1021384000778198,
      "learning_rate": 3.476208257039929e-06,
      "loss": 2.2853,
      "step": 55912
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0033687353134155,
      "learning_rate": 3.4758962102664453e-06,
      "loss": 2.3577,
      "step": 55913
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0753512382507324,
      "learning_rate": 3.4755841745530915e-06,
      "loss": 2.277,
      "step": 55914
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1154274940490723,
      "learning_rate": 3.4752721499003914e-06,
      "loss": 2.3053,
      "step": 55915
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0631288290023804,
      "learning_rate": 3.4749601363088803e-06,
      "loss": 2.4411,
      "step": 55916
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.3713796138763428,
      "learning_rate": 3.474648133779085e-06,
      "loss": 2.1538,
      "step": 55917
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.9507830739021301,
      "learning_rate": 3.474336142311532e-06,
      "loss": 2.3239,
      "step": 55918
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.9837794899940491,
      "learning_rate": 3.4740241619067496e-06,
      "loss": 2.2274,
      "step": 55919
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.030076503753662,
      "learning_rate": 3.4737121925652705e-06,
      "loss": 2.3739,
      "step": 55920
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0619748830795288,
      "learning_rate": 3.473400234287619e-06,
      "loss": 2.2193,
      "step": 55921
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1610760688781738,
      "learning_rate": 3.473088287074331e-06,
      "loss": 2.303,
      "step": 55922
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.061860203742981,
      "learning_rate": 3.4727763509259273e-06,
      "loss": 2.0461,
      "step": 55923
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1462175846099854,
      "learning_rate": 3.472464425842943e-06,
      "loss": 2.2914,
      "step": 55924
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.4171597957611084,
      "learning_rate": 3.4721525118259014e-06,
      "loss": 2.3828,
      "step": 55925
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0970933437347412,
      "learning_rate": 3.4718406088753377e-06,
      "loss": 2.2124,
      "step": 55926
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1022590398788452,
      "learning_rate": 3.4715287169917766e-06,
      "loss": 2.4185,
      "step": 55927
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.251725673675537,
      "learning_rate": 3.471216836175745e-06,
      "loss": 2.403,
      "step": 55928
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1045960187911987,
      "learning_rate": 3.470904966427777e-06,
      "loss": 2.2943,
      "step": 55929
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0279541015625,
      "learning_rate": 3.470593107748398e-06,
      "loss": 2.4835,
      "step": 55930
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.078979730606079,
      "learning_rate": 3.470281260138134e-06,
      "loss": 2.3936,
      "step": 55931
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0800176858901978,
      "learning_rate": 3.4699694235975203e-06,
      "loss": 2.3385,
      "step": 55932
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.2508147954940796,
      "learning_rate": 3.4696575981270818e-06,
      "loss": 2.6249,
      "step": 55933
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.9942124485969543,
      "learning_rate": 3.4693457837273435e-06,
      "loss": 2.1144,
      "step": 55934
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0533052682876587,
      "learning_rate": 3.4690339803988426e-06,
      "loss": 2.1783,
      "step": 55935
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.066922903060913,
      "learning_rate": 3.4687221881420986e-06,
      "loss": 2.4964,
      "step": 55936
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1907079219818115,
      "learning_rate": 3.4684104069576474e-06,
      "loss": 2.2141,
      "step": 55937
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0855650901794434,
      "learning_rate": 3.468098636846011e-06,
      "loss": 2.2646,
      "step": 55938
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.9824774861335754,
      "learning_rate": 3.467786877807725e-06,
      "loss": 2.2104,
      "step": 55939
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1097774505615234,
      "learning_rate": 3.4674751298433117e-06,
      "loss": 2.4228,
      "step": 55940
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1217283010482788,
      "learning_rate": 3.4671633929533053e-06,
      "loss": 2.3763,
      "step": 55941
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.080003023147583,
      "learning_rate": 3.466851667138228e-06,
      "loss": 2.1878,
      "step": 55942
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.175002098083496,
      "learning_rate": 3.4665399523986144e-06,
      "loss": 2.3386,
      "step": 55943
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0932776927947998,
      "learning_rate": 3.4662282487349895e-06,
      "loss": 2.2913,
      "step": 55944
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0953879356384277,
      "learning_rate": 3.4659165561478835e-06,
      "loss": 2.0838,
      "step": 55945
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0818601846694946,
      "learning_rate": 3.46560487463782e-06,
      "loss": 2.1171,
      "step": 55946
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1553813219070435,
      "learning_rate": 3.4652932042053335e-06,
      "loss": 2.4544,
      "step": 55947
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1095051765441895,
      "learning_rate": 3.4649815448509473e-06,
      "loss": 2.2617,
      "step": 55948
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0271706581115723,
      "learning_rate": 3.4646698965751948e-06,
      "loss": 2.4737,
      "step": 55949
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1020829677581787,
      "learning_rate": 3.4643582593785997e-06,
      "loss": 2.0851,
      "step": 55950
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1093592643737793,
      "learning_rate": 3.464046633261695e-06,
      "loss": 2.1789,
      "step": 55951
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1266798973083496,
      "learning_rate": 3.463735018225003e-06,
      "loss": 2.3552,
      "step": 55952
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1194177865982056,
      "learning_rate": 3.463423414269058e-06,
      "loss": 2.5978,
      "step": 55953
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0920140743255615,
      "learning_rate": 3.463111821394387e-06,
      "loss": 2.418,
      "step": 55954
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.3440771102905273,
      "learning_rate": 3.4628002396015126e-06,
      "loss": 2.3612,
      "step": 55955
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0547868013381958,
      "learning_rate": 3.462488668890971e-06,
      "loss": 2.4829,
      "step": 55956
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1043012142181396,
      "learning_rate": 3.462177109263286e-06,
      "loss": 2.3351,
      "step": 55957
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0745781660079956,
      "learning_rate": 3.4618655607189843e-06,
      "loss": 2.3695,
      "step": 55958
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1745718717575073,
      "learning_rate": 3.4615540232585985e-06,
      "loss": 2.2389,
      "step": 55959
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.2115269899368286,
      "learning_rate": 3.4612424968826553e-06,
      "loss": 2.6004,
      "step": 55960
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1528385877609253,
      "learning_rate": 3.460930981591678e-06,
      "loss": 2.2656,
      "step": 55961
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1569468975067139,
      "learning_rate": 3.4606194773862023e-06,
      "loss": 2.2676,
      "step": 55962
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.130230188369751,
      "learning_rate": 3.4603079842667496e-06,
      "loss": 2.1453,
      "step": 55963
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0675898790359497,
      "learning_rate": 3.4599965022338545e-06,
      "loss": 2.1582,
      "step": 55964
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1748294830322266,
      "learning_rate": 3.459685031288037e-06,
      "loss": 2.3091,
      "step": 55965
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0841197967529297,
      "learning_rate": 3.459373571429834e-06,
      "loss": 2.3924,
      "step": 55966
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1817773580551147,
      "learning_rate": 3.4590621226597655e-06,
      "loss": 2.3792,
      "step": 55967
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1051291227340698,
      "learning_rate": 3.458750684978367e-06,
      "loss": 2.1553,
      "step": 55968
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0662140846252441,
      "learning_rate": 3.458439258386159e-06,
      "loss": 2.2931,
      "step": 55969
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.312250018119812,
      "learning_rate": 3.4581278428836794e-06,
      "loss": 2.3284,
      "step": 55970
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0400911569595337,
      "learning_rate": 3.457816438471444e-06,
      "loss": 2.3852,
      "step": 55971
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.2484720945358276,
      "learning_rate": 3.457505045149989e-06,
      "loss": 2.3546,
      "step": 55972
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0800902843475342,
      "learning_rate": 3.4571936629198378e-06,
      "loss": 2.3434,
      "step": 55973
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0608229637145996,
      "learning_rate": 3.4568822917815227e-06,
      "loss": 2.5579,
      "step": 55974
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0353033542633057,
      "learning_rate": 3.4565709317355657e-06,
      "loss": 2.169,
      "step": 55975
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.3273528814315796,
      "learning_rate": 3.4562595827825017e-06,
      "loss": 2.3737,
      "step": 55976
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1773455142974854,
      "learning_rate": 3.455948244922852e-06,
      "loss": 2.1241,
      "step": 55977
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.9976149201393127,
      "learning_rate": 3.4556369181571503e-06,
      "loss": 2.2067,
      "step": 55978
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0455125570297241,
      "learning_rate": 3.4553256024859183e-06,
      "loss": 2.4442,
      "step": 55979
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0055675506591797,
      "learning_rate": 3.45501429790969e-06,
      "loss": 2.2503,
      "step": 55980
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0115736722946167,
      "learning_rate": 3.4547030044289875e-06,
      "loss": 2.4214,
      "step": 55981
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.073448657989502,
      "learning_rate": 3.4543917220443436e-06,
      "loss": 2.2224,
      "step": 55982
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.021981120109558,
      "learning_rate": 3.454080450756284e-06,
      "loss": 2.1697,
      "step": 55983
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1127455234527588,
      "learning_rate": 3.4537691905653357e-06,
      "loss": 2.4076,
      "step": 55984
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1015679836273193,
      "learning_rate": 3.4534579414720238e-06,
      "loss": 2.3616,
      "step": 55985
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.035789132118225,
      "learning_rate": 3.4531467034768805e-06,
      "loss": 2.3682,
      "step": 55986
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1221367120742798,
      "learning_rate": 3.4528354765804295e-06,
      "loss": 2.2689,
      "step": 55987
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.073840618133545,
      "learning_rate": 3.4525242607832042e-06,
      "loss": 2.2759,
      "step": 55988
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1223857402801514,
      "learning_rate": 3.4522130560857283e-06,
      "loss": 2.1663,
      "step": 55989
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0961287021636963,
      "learning_rate": 3.4519018624885247e-06,
      "loss": 2.2139,
      "step": 55990
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.9980652332305908,
      "learning_rate": 3.4515906799921304e-06,
      "loss": 2.1956,
      "step": 55991
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1269710063934326,
      "learning_rate": 3.4512795085970664e-06,
      "loss": 2.4227,
      "step": 55992
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.103790283203125,
      "learning_rate": 3.4509683483038638e-06,
      "loss": 2.1756,
      "step": 55993
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.2140966653823853,
      "learning_rate": 3.4506571991130456e-06,
      "loss": 2.3918,
      "step": 55994
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.5344042778015137,
      "learning_rate": 3.4503460610251462e-06,
      "loss": 2.2285,
      "step": 55995
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0575835704803467,
      "learning_rate": 3.450034934040688e-06,
      "loss": 2.3136,
      "step": 55996
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.049190640449524,
      "learning_rate": 3.4497238181602e-06,
      "loss": 2.3616,
      "step": 55997
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.182126760482788,
      "learning_rate": 3.449412713384206e-06,
      "loss": 2.237,
      "step": 55998
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0777244567871094,
      "learning_rate": 3.44910161971324e-06,
      "loss": 2.2138,
      "step": 55999
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1392974853515625,
      "learning_rate": 3.4487905371478227e-06,
      "loss": 2.2005,
      "step": 56000
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.038293480873108,
      "learning_rate": 3.448479465688488e-06,
      "loss": 2.1805,
      "step": 56001
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.4214338064193726,
      "learning_rate": 3.4481684053357566e-06,
      "loss": 2.285,
      "step": 56002
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0587999820709229,
      "learning_rate": 3.447857356090162e-06,
      "loss": 2.4023,
      "step": 56003
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1327680349349976,
      "learning_rate": 3.447546317952226e-06,
      "loss": 2.1928,
      "step": 56004
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0878609418869019,
      "learning_rate": 3.4472352909224817e-06,
      "loss": 2.3984,
      "step": 56005
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1895536184310913,
      "learning_rate": 3.4469242750014498e-06,
      "loss": 2.4726,
      "step": 56006
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0896986722946167,
      "learning_rate": 3.446613270189665e-06,
      "loss": 2.1926,
      "step": 56007
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1091543436050415,
      "learning_rate": 3.4463022764876463e-06,
      "loss": 2.0978,
      "step": 56008
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0485830307006836,
      "learning_rate": 3.4459912938959294e-06,
      "loss": 2.2834,
      "step": 56009
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.032767653465271,
      "learning_rate": 3.445680322415037e-06,
      "loss": 2.4278,
      "step": 56010
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.189086675643921,
      "learning_rate": 3.4453693620454964e-06,
      "loss": 2.3944,
      "step": 56011
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.080815076828003,
      "learning_rate": 3.445058412787832e-06,
      "loss": 2.3158,
      "step": 56012
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.152409553527832,
      "learning_rate": 3.444747474642577e-06,
      "loss": 2.1613,
      "step": 56013
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.104820728302002,
      "learning_rate": 3.4444365476102514e-06,
      "loss": 2.3982,
      "step": 56014
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0552729368209839,
      "learning_rate": 3.4441256316913907e-06,
      "loss": 2.4473,
      "step": 56015
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.068832516670227,
      "learning_rate": 3.4438147268865176e-06,
      "loss": 2.395,
      "step": 56016
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.141356110572815,
      "learning_rate": 3.4435038331961546e-06,
      "loss": 2.1841,
      "step": 56017
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1068936586380005,
      "learning_rate": 3.4431929506208383e-06,
      "loss": 2.3448,
      "step": 56018
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0954792499542236,
      "learning_rate": 3.442882079161086e-06,
      "loss": 2.21,
      "step": 56019
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1019796133041382,
      "learning_rate": 3.442571218817434e-06,
      "loss": 2.0467,
      "step": 56020
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0451735258102417,
      "learning_rate": 3.442260369590401e-06,
      "loss": 2.3886,
      "step": 56021
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1280134916305542,
      "learning_rate": 3.4419495314805208e-06,
      "loss": 2.4491,
      "step": 56022
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1351683139801025,
      "learning_rate": 3.441638704488317e-06,
      "loss": 2.4987,
      "step": 56023
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.9775418043136597,
      "learning_rate": 3.4413278886143173e-06,
      "loss": 2.5321,
      "step": 56024
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.125367283821106,
      "learning_rate": 3.441017083859045e-06,
      "loss": 2.3999,
      "step": 56025
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0160870552062988,
      "learning_rate": 3.4407062902230328e-06,
      "loss": 2.2603,
      "step": 56026
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0716538429260254,
      "learning_rate": 3.440395507706802e-06,
      "loss": 2.3116,
      "step": 56027
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.2683167457580566,
      "learning_rate": 3.4400847363108857e-06,
      "loss": 2.6133,
      "step": 56028
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.022980809211731,
      "learning_rate": 3.4397739760358038e-06,
      "loss": 2.4888,
      "step": 56029
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.031386375427246,
      "learning_rate": 3.4394632268820904e-06,
      "loss": 2.3702,
      "step": 56030
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.2641392946243286,
      "learning_rate": 3.4391524888502657e-06,
      "loss": 2.3443,
      "step": 56031
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.206613540649414,
      "learning_rate": 3.438841761940862e-06,
      "loss": 2.3248,
      "step": 56032
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0282459259033203,
      "learning_rate": 3.438531046154401e-06,
      "loss": 2.4818,
      "step": 56033
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0587620735168457,
      "learning_rate": 3.438220341491415e-06,
      "loss": 2.1478,
      "step": 56034
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.2433161735534668,
      "learning_rate": 3.4379096479524243e-06,
      "loss": 2.3558,
      "step": 56035
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1001485586166382,
      "learning_rate": 3.437598965537965e-06,
      "loss": 2.264,
      "step": 56036
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0129578113555908,
      "learning_rate": 3.437288294248552e-06,
      "loss": 2.4907,
      "step": 56037
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.9841143488883972,
      "learning_rate": 3.4369776340847205e-06,
      "loss": 2.3786,
      "step": 56038
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.9898331165313721,
      "learning_rate": 3.4366669850469913e-06,
      "loss": 2.2781,
      "step": 56039
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0706491470336914,
      "learning_rate": 3.4363563471358975e-06,
      "loss": 2.3176,
      "step": 56040
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.9762635231018066,
      "learning_rate": 3.436045720351959e-06,
      "loss": 2.4203,
      "step": 56041
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0179890394210815,
      "learning_rate": 3.4357351046957087e-06,
      "loss": 2.4755,
      "step": 56042
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1103025674819946,
      "learning_rate": 3.4354245001676677e-06,
      "loss": 2.5088,
      "step": 56043
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.157404899597168,
      "learning_rate": 3.435113906768368e-06,
      "loss": 2.3087,
      "step": 56044
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0416648387908936,
      "learning_rate": 3.4348033244983327e-06,
      "loss": 2.3743,
      "step": 56045
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0747463703155518,
      "learning_rate": 3.4344927533580863e-06,
      "loss": 2.2054,
      "step": 56046
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1662795543670654,
      "learning_rate": 3.4341821933481612e-06,
      "loss": 2.2635,
      "step": 56047
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0482343435287476,
      "learning_rate": 3.4338716444690766e-06,
      "loss": 2.2723,
      "step": 56048
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.164666771888733,
      "learning_rate": 3.433561106721366e-06,
      "loss": 2.3979,
      "step": 56049
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1127341985702515,
      "learning_rate": 3.433250580105554e-06,
      "loss": 2.537,
      "step": 56050
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.319495677947998,
      "learning_rate": 3.4329400646221646e-06,
      "loss": 2.3622,
      "step": 56051
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0933796167373657,
      "learning_rate": 3.432629560271722e-06,
      "loss": 2.4026,
      "step": 56052
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.115553617477417,
      "learning_rate": 3.4323190670547602e-06,
      "loss": 2.3873,
      "step": 56053
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.2171710729599,
      "learning_rate": 3.4320085849717977e-06,
      "loss": 2.3748,
      "step": 56054
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.011385440826416,
      "learning_rate": 3.4316981140233674e-06,
      "loss": 2.4255,
      "step": 56055
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.2334909439086914,
      "learning_rate": 3.43138765420999e-06,
      "loss": 2.4637,
      "step": 56056
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1434670686721802,
      "learning_rate": 3.4310772055321974e-06,
      "loss": 2.4392,
      "step": 56057
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.077322244644165,
      "learning_rate": 3.43076676799051e-06,
      "loss": 2.3848,
      "step": 56058
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0415003299713135,
      "learning_rate": 3.43045634158546e-06,
      "loss": 2.3983,
      "step": 56059
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1845402717590332,
      "learning_rate": 3.4301459263175674e-06,
      "loss": 2.291,
      "step": 56060
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.074077844619751,
      "learning_rate": 3.429835522187366e-06,
      "loss": 2.2707,
      "step": 56061
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.9706197381019592,
      "learning_rate": 3.429525129195377e-06,
      "loss": 2.3346,
      "step": 56062
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0564182996749878,
      "learning_rate": 3.429214747342128e-06,
      "loss": 2.5428,
      "step": 56063
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0998343229293823,
      "learning_rate": 3.4289043766281403e-06,
      "loss": 2.2742,
      "step": 56064
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.052533507347107,
      "learning_rate": 3.428594017053949e-06,
      "loss": 2.4332,
      "step": 56065
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.9601046442985535,
      "learning_rate": 3.4282836686200716e-06,
      "loss": 2.1881,
      "step": 56066
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0606735944747925,
      "learning_rate": 3.4279733313270415e-06,
      "loss": 2.3985,
      "step": 56067
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0436691045761108,
      "learning_rate": 3.4276630051753788e-06,
      "loss": 2.2785,
      "step": 56068
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1598082780838013,
      "learning_rate": 3.427352690165615e-06,
      "loss": 2.6337,
      "step": 56069
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.9165665507316589,
      "learning_rate": 3.4270423862982706e-06,
      "loss": 2.4052,
      "step": 56070
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1460974216461182,
      "learning_rate": 3.4267320935738767e-06,
      "loss": 2.3981,
      "step": 56071
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.064893126487732,
      "learning_rate": 3.4264218119929593e-06,
      "loss": 2.3523,
      "step": 56072
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0930451154708862,
      "learning_rate": 3.4261115415560377e-06,
      "loss": 2.4077,
      "step": 56073
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.2037326097488403,
      "learning_rate": 3.4258012822636454e-06,
      "loss": 2.1913,
      "step": 56074
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0754286050796509,
      "learning_rate": 3.425491034116306e-06,
      "loss": 2.1635,
      "step": 56075
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.05214262008667,
      "learning_rate": 3.4251807971145412e-06,
      "loss": 2.3598,
      "step": 56076
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.104041337966919,
      "learning_rate": 3.4248705712588847e-06,
      "loss": 2.4488,
      "step": 56077
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1530988216400146,
      "learning_rate": 3.4245603565498573e-06,
      "loss": 2.4004,
      "step": 56078
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0704361200332642,
      "learning_rate": 3.4242501529879835e-06,
      "loss": 2.2428,
      "step": 56079
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.178002119064331,
      "learning_rate": 3.4239399605737954e-06,
      "loss": 2.3537,
      "step": 56080
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0601072311401367,
      "learning_rate": 3.423629779307811e-06,
      "loss": 2.2319,
      "step": 56081
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1150277853012085,
      "learning_rate": 3.423319609190564e-06,
      "loss": 2.4331,
      "step": 56082
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.3026894330978394,
      "learning_rate": 3.4230094502225717e-06,
      "loss": 2.4333,
      "step": 56083
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0102877616882324,
      "learning_rate": 3.42269930240437e-06,
      "loss": 2.1825,
      "step": 56084
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0711958408355713,
      "learning_rate": 3.422389165736475e-06,
      "loss": 2.2921,
      "step": 56085
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.9961865544319153,
      "learning_rate": 3.4220790402194205e-06,
      "loss": 2.346,
      "step": 56086
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0100677013397217,
      "learning_rate": 3.4217689258537248e-06,
      "loss": 2.2839,
      "step": 56087
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0559290647506714,
      "learning_rate": 3.4214588226399216e-06,
      "loss": 2.2781,
      "step": 56088
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0360041856765747,
      "learning_rate": 3.421148730578531e-06,
      "loss": 2.3383,
      "step": 56089
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1074182987213135,
      "learning_rate": 3.420838649670081e-06,
      "loss": 2.061,
      "step": 56090
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0568989515304565,
      "learning_rate": 3.4205285799150934e-06,
      "loss": 2.2789,
      "step": 56091
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.9846986532211304,
      "learning_rate": 3.4202185213140993e-06,
      "loss": 2.2476,
      "step": 56092
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.3101816177368164,
      "learning_rate": 3.419908473867619e-06,
      "loss": 2.392,
      "step": 56093
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1217668056488037,
      "learning_rate": 3.4195984375761836e-06,
      "loss": 2.426,
      "step": 56094
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.106279969215393,
      "learning_rate": 3.4192884124403135e-06,
      "loss": 2.3207,
      "step": 56095
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0233938694000244,
      "learning_rate": 3.4189783984605407e-06,
      "loss": 2.2994,
      "step": 56096
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.115336537361145,
      "learning_rate": 3.4186683956373823e-06,
      "loss": 2.5256,
      "step": 56097
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1227227449417114,
      "learning_rate": 3.4183584039713725e-06,
      "loss": 2.2927,
      "step": 56098
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.090071201324463,
      "learning_rate": 3.4180484234630286e-06,
      "loss": 2.3661,
      "step": 56099
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.152690052986145,
      "learning_rate": 3.4177384541128844e-06,
      "loss": 2.2961,
      "step": 56100
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.111287236213684,
      "learning_rate": 3.417428495921461e-06,
      "loss": 2.3981,
      "step": 56101
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0277173519134521,
      "learning_rate": 3.4171185488892844e-06,
      "loss": 2.3252,
      "step": 56102
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1386600732803345,
      "learning_rate": 3.4168086130168766e-06,
      "loss": 2.4099,
      "step": 56103
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0760780572891235,
      "learning_rate": 3.416498688304769e-06,
      "loss": 2.3586,
      "step": 56104
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0165129899978638,
      "learning_rate": 3.4161887747534804e-06,
      "loss": 2.4826,
      "step": 56105
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.197126030921936,
      "learning_rate": 3.4158788723635438e-06,
      "loss": 2.4123,
      "step": 56106
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1214637756347656,
      "learning_rate": 3.415568981135481e-06,
      "loss": 2.4326,
      "step": 56107
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1048520803451538,
      "learning_rate": 3.4152591010698132e-06,
      "loss": 2.3269,
      "step": 56108
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0809309482574463,
      "learning_rate": 3.414949232167073e-06,
      "loss": 2.2796,
      "step": 56109
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.316593885421753,
      "learning_rate": 3.4146393744277794e-06,
      "loss": 2.3079,
      "step": 56110
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1716227531433105,
      "learning_rate": 3.4143295278524647e-06,
      "loss": 2.4365,
      "step": 56111
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.118557333946228,
      "learning_rate": 3.4140196924416456e-06,
      "loss": 2.5807,
      "step": 56112
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.9204199314117432,
      "learning_rate": 3.4137098681958546e-06,
      "loss": 2.4852,
      "step": 56113
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.251168131828308,
      "learning_rate": 3.413400055115612e-06,
      "loss": 2.2381,
      "step": 56114
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0994924306869507,
      "learning_rate": 3.413090253201452e-06,
      "loss": 2.4099,
      "step": 56115
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.130025029182434,
      "learning_rate": 3.412780462453885e-06,
      "loss": 2.5196,
      "step": 56116
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0869053602218628,
      "learning_rate": 3.412470682873449e-06,
      "loss": 2.172,
      "step": 56117
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.2297791242599487,
      "learning_rate": 3.4121609144606606e-06,
      "loss": 2.2796,
      "step": 56118
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.079636573791504,
      "learning_rate": 3.411851157216052e-06,
      "loss": 2.4902,
      "step": 56119
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0961596965789795,
      "learning_rate": 3.4115414111401414e-06,
      "loss": 2.343,
      "step": 56120
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0377122163772583,
      "learning_rate": 3.4112316762334607e-06,
      "loss": 2.3769,
      "step": 56121
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1304107904434204,
      "learning_rate": 3.410921952496529e-06,
      "loss": 2.211,
      "step": 56122
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0617727041244507,
      "learning_rate": 3.410612239929877e-06,
      "loss": 2.363,
      "step": 56123
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.9481039047241211,
      "learning_rate": 3.4103025385340237e-06,
      "loss": 2.2112,
      "step": 56124
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1035338640213013,
      "learning_rate": 3.4099928483095e-06,
      "loss": 2.4092,
      "step": 56125
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.9681121706962585,
      "learning_rate": 3.4096831692568267e-06,
      "loss": 2.169,
      "step": 56126
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0204603672027588,
      "learning_rate": 3.4093735013765327e-06,
      "loss": 2.4765,
      "step": 56127
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1230709552764893,
      "learning_rate": 3.4090638446691406e-06,
      "loss": 2.2784,
      "step": 56128
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.029823660850525,
      "learning_rate": 3.408754199135175e-06,
      "loss": 2.2509,
      "step": 56129
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0352927446365356,
      "learning_rate": 3.4084445647751573e-06,
      "loss": 2.5605,
      "step": 56130
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1044164896011353,
      "learning_rate": 3.4081349415896214e-06,
      "loss": 2.2678,
      "step": 56131
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1195929050445557,
      "learning_rate": 3.407825329579082e-06,
      "loss": 2.3442,
      "step": 56132
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0989716053009033,
      "learning_rate": 3.4075157287440743e-06,
      "loss": 2.1796,
      "step": 56133
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1159721612930298,
      "learning_rate": 3.4072061390851163e-06,
      "loss": 2.3628,
      "step": 56134
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1340811252593994,
      "learning_rate": 3.4068965606027314e-06,
      "loss": 2.3957,
      "step": 56135
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.028944492340088,
      "learning_rate": 3.4065869932974516e-06,
      "loss": 2.4142,
      "step": 56136
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.051916480064392,
      "learning_rate": 3.406277437169794e-06,
      "loss": 2.337,
      "step": 56137
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.08620285987854,
      "learning_rate": 3.40596789222029e-06,
      "loss": 2.486,
      "step": 56138
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1237398386001587,
      "learning_rate": 3.4056583584494583e-06,
      "loss": 2.1392,
      "step": 56139
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0368192195892334,
      "learning_rate": 3.405348835857829e-06,
      "loss": 2.3548,
      "step": 56140
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0027484893798828,
      "learning_rate": 3.4050393244459255e-06,
      "loss": 2.2171,
      "step": 56141
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.109978199005127,
      "learning_rate": 3.4047298242142713e-06,
      "loss": 2.5442,
      "step": 56142
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0545437335968018,
      "learning_rate": 3.4044203351633886e-06,
      "loss": 2.3546,
      "step": 56143
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.622712254524231,
      "learning_rate": 3.4041108572938074e-06,
      "loss": 2.4345,
      "step": 56144
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0117515325546265,
      "learning_rate": 3.4038013906060462e-06,
      "loss": 2.2956,
      "step": 56145
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0321604013442993,
      "learning_rate": 3.4034919351006366e-06,
      "loss": 2.2774,
      "step": 56146
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0503199100494385,
      "learning_rate": 3.4031824907780977e-06,
      "loss": 2.1032,
      "step": 56147
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1799649000167847,
      "learning_rate": 3.402873057638958e-06,
      "loss": 2.255,
      "step": 56148
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0430666208267212,
      "learning_rate": 3.4025636356837366e-06,
      "loss": 2.4449,
      "step": 56149
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1142479181289673,
      "learning_rate": 3.4022542249129664e-06,
      "loss": 2.3967,
      "step": 56150
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0281537771224976,
      "learning_rate": 3.4019448253271625e-06,
      "loss": 2.162,
      "step": 56151
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0344414710998535,
      "learning_rate": 3.401635436926859e-06,
      "loss": 2.398,
      "step": 56152
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1197326183319092,
      "learning_rate": 3.401326059712571e-06,
      "loss": 2.2945,
      "step": 56153
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1197081804275513,
      "learning_rate": 3.401016693684834e-06,
      "loss": 2.181,
      "step": 56154
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.2243160009384155,
      "learning_rate": 3.4007073388441603e-06,
      "loss": 2.3336,
      "step": 56155
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.2334009408950806,
      "learning_rate": 3.4003979951910827e-06,
      "loss": 2.273,
      "step": 56156
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0759241580963135,
      "learning_rate": 3.40008866272612e-06,
      "loss": 2.2175,
      "step": 56157
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0685099363327026,
      "learning_rate": 3.3997793414498024e-06,
      "loss": 2.2783,
      "step": 56158
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0426967144012451,
      "learning_rate": 3.3994700313626483e-06,
      "loss": 2.1729,
      "step": 56159
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1045860052108765,
      "learning_rate": 3.3991607324651887e-06,
      "loss": 2.3967,
      "step": 56160
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.03022301197052,
      "learning_rate": 3.39885144475794e-06,
      "loss": 2.0952,
      "step": 56161
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0804387331008911,
      "learning_rate": 3.3985421682414355e-06,
      "loss": 2.241,
      "step": 56162
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.146925687789917,
      "learning_rate": 3.398232902916194e-06,
      "loss": 2.3514,
      "step": 56163
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0879133939743042,
      "learning_rate": 3.3979236487827384e-06,
      "loss": 2.228,
      "step": 56164
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0832213163375854,
      "learning_rate": 3.3976144058415983e-06,
      "loss": 2.2648,
      "step": 56165
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0507835149765015,
      "learning_rate": 3.3973051740932918e-06,
      "loss": 2.3158,
      "step": 56166
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.104958176612854,
      "learning_rate": 3.396995953538349e-06,
      "loss": 2.284,
      "step": 56167
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0408103466033936,
      "learning_rate": 3.3966867441772923e-06,
      "loss": 2.4374,
      "step": 56168
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1427372694015503,
      "learning_rate": 3.396377546010644e-06,
      "loss": 2.3438,
      "step": 56169
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0096004009246826,
      "learning_rate": 3.3960683590389263e-06,
      "loss": 2.3242,
      "step": 56170
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1696640253067017,
      "learning_rate": 3.39575918326267e-06,
      "loss": 2.2445,
      "step": 56171
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.170648217201233,
      "learning_rate": 3.3954500186823923e-06,
      "loss": 2.4516,
      "step": 56172
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1282166242599487,
      "learning_rate": 3.395140865298624e-06,
      "loss": 2.6742,
      "step": 56173
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1549898386001587,
      "learning_rate": 3.394831723111882e-06,
      "loss": 2.2663,
      "step": 56174
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0231335163116455,
      "learning_rate": 3.3945225921226975e-06,
      "loss": 2.1399,
      "step": 56175
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1823831796646118,
      "learning_rate": 3.394213472331589e-06,
      "loss": 2.5098,
      "step": 56176
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1931570768356323,
      "learning_rate": 3.3939043637390846e-06,
      "loss": 2.2482,
      "step": 56177
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1377933025360107,
      "learning_rate": 3.3935952663457038e-06,
      "loss": 2.2267,
      "step": 56178
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1376867294311523,
      "learning_rate": 3.393286180151977e-06,
      "loss": 2.2509,
      "step": 56179
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0936168432235718,
      "learning_rate": 3.3929771051584204e-06,
      "loss": 2.3607,
      "step": 56180
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.205662488937378,
      "learning_rate": 3.392668041365569e-06,
      "loss": 2.2112,
      "step": 56181
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0895612239837646,
      "learning_rate": 3.392358988773933e-06,
      "loss": 2.3963,
      "step": 56182
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1991890668869019,
      "learning_rate": 3.3920499473840463e-06,
      "loss": 2.4382,
      "step": 56183
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1021077632904053,
      "learning_rate": 3.3917409171964267e-06,
      "loss": 2.3198,
      "step": 56184
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1231138706207275,
      "learning_rate": 3.3914318982116046e-06,
      "loss": 2.2464,
      "step": 56185
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1189837455749512,
      "learning_rate": 3.3911228904300965e-06,
      "loss": 2.4183,
      "step": 56186
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0578179359436035,
      "learning_rate": 3.3908138938524337e-06,
      "loss": 2.2494,
      "step": 56187
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1537493467330933,
      "learning_rate": 3.390504908479133e-06,
      "loss": 2.2348,
      "step": 56188
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.177842378616333,
      "learning_rate": 3.3901959343107247e-06,
      "loss": 2.2908,
      "step": 56189
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0878360271453857,
      "learning_rate": 3.389886971347729e-06,
      "loss": 2.1811,
      "step": 56190
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.01111900806427,
      "learning_rate": 3.389578019590668e-06,
      "loss": 2.4744,
      "step": 56191
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.026890754699707,
      "learning_rate": 3.3892690790400706e-06,
      "loss": 2.3156,
      "step": 56192
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1065617799758911,
      "learning_rate": 3.388960149696454e-06,
      "loss": 2.2827,
      "step": 56193
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0638959407806396,
      "learning_rate": 3.388651231560348e-06,
      "loss": 2.3656,
      "step": 56194
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0027823448181152,
      "learning_rate": 3.3883423246322754e-06,
      "loss": 2.3195,
      "step": 56195
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.2088360786437988,
      "learning_rate": 3.388033428912757e-06,
      "loss": 2.5185,
      "step": 56196
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0262995958328247,
      "learning_rate": 3.3877245444023155e-06,
      "loss": 2.2499,
      "step": 56197
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1785258054733276,
      "learning_rate": 3.3874156711014794e-06,
      "loss": 2.0825,
      "step": 56198
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0693470239639282,
      "learning_rate": 3.387106809010766e-06,
      "loss": 2.2834,
      "step": 56199
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0331733226776123,
      "learning_rate": 3.3867979581307064e-06,
      "loss": 2.3404,
      "step": 56200
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1199864149093628,
      "learning_rate": 3.3864891184618166e-06,
      "loss": 2.404,
      "step": 56201
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1083017587661743,
      "learning_rate": 3.386180290004628e-06,
      "loss": 2.2517,
      "step": 56202
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1122732162475586,
      "learning_rate": 3.3858714727596563e-06,
      "loss": 2.19,
      "step": 56203
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1296029090881348,
      "learning_rate": 3.385562666727432e-06,
      "loss": 2.1148,
      "step": 56204
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1295119524002075,
      "learning_rate": 3.3852538719084726e-06,
      "loss": 2.4644,
      "step": 56205
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1244689226150513,
      "learning_rate": 3.3849450883033073e-06,
      "loss": 2.4056,
      "step": 56206
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.3678920269012451,
      "learning_rate": 3.3846363159124572e-06,
      "loss": 2.3159,
      "step": 56207
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.011161208152771,
      "learning_rate": 3.3843275547364453e-06,
      "loss": 2.2743,
      "step": 56208
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1543569564819336,
      "learning_rate": 3.3840188047757917e-06,
      "loss": 2.1137,
      "step": 56209
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1137027740478516,
      "learning_rate": 3.3837100660310253e-06,
      "loss": 2.3205,
      "step": 56210
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0293439626693726,
      "learning_rate": 3.3834013385026654e-06,
      "loss": 2.3138,
      "step": 56211
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0351506471633911,
      "learning_rate": 3.3830926221912397e-06,
      "loss": 2.2319,
      "step": 56212
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.074447512626648,
      "learning_rate": 3.3827839170972665e-06,
      "loss": 2.2189,
      "step": 56213
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0592100620269775,
      "learning_rate": 3.3824752232212745e-06,
      "loss": 2.2408,
      "step": 56214
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0789169073104858,
      "learning_rate": 3.3821665405637806e-06,
      "loss": 2.2573,
      "step": 56215
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0836862325668335,
      "learning_rate": 3.3818578691253157e-06,
      "loss": 2.2502,
      "step": 56216
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0307775735855103,
      "learning_rate": 3.3815492089063963e-06,
      "loss": 2.4075,
      "step": 56217
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.285810947418213,
      "learning_rate": 3.3812405599075515e-06,
      "loss": 2.419,
      "step": 56218
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0149831771850586,
      "learning_rate": 3.3809319221293013e-06,
      "loss": 2.2286,
      "step": 56219
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.061739444732666,
      "learning_rate": 3.3806232955721697e-06,
      "loss": 2.283,
      "step": 56220
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1969817876815796,
      "learning_rate": 3.380314680236675e-06,
      "loss": 2.41,
      "step": 56221
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0728288888931274,
      "learning_rate": 3.380006076123349e-06,
      "loss": 2.4146,
      "step": 56222
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1043671369552612,
      "learning_rate": 3.379697483232707e-06,
      "loss": 2.3214,
      "step": 56223
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1736948490142822,
      "learning_rate": 3.3793889015652794e-06,
      "loss": 2.2163,
      "step": 56224
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0409622192382812,
      "learning_rate": 3.379080331121586e-06,
      "loss": 2.0573,
      "step": 56225
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1139367818832397,
      "learning_rate": 3.3787717719021463e-06,
      "loss": 2.4379,
      "step": 56226
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0796219110488892,
      "learning_rate": 3.3784632239074887e-06,
      "loss": 2.2665,
      "step": 56227
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.042874813079834,
      "learning_rate": 3.378154687138133e-06,
      "loss": 2.3741,
      "step": 56228
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.111890435218811,
      "learning_rate": 3.377846161594606e-06,
      "loss": 2.3752,
      "step": 56229
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1168103218078613,
      "learning_rate": 3.3775376472774246e-06,
      "loss": 2.3038,
      "step": 56230
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0946903228759766,
      "learning_rate": 3.3772291441871196e-06,
      "loss": 2.206,
      "step": 56231
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.073061227798462,
      "learning_rate": 3.376920652324206e-06,
      "loss": 2.3298,
      "step": 56232
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0615185499191284,
      "learning_rate": 3.376612171689214e-06,
      "loss": 2.1503,
      "step": 56233
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1824924945831299,
      "learning_rate": 3.376303702282664e-06,
      "loss": 2.112,
      "step": 56234
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.144314169883728,
      "learning_rate": 3.375995244105077e-06,
      "loss": 2.4391,
      "step": 56235
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1463992595672607,
      "learning_rate": 3.375686797156975e-06,
      "loss": 2.3627,
      "step": 56236
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.127699851989746,
      "learning_rate": 3.375378361438886e-06,
      "loss": 2.1893,
      "step": 56237
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.087647557258606,
      "learning_rate": 3.3750699369513263e-06,
      "loss": 2.2978,
      "step": 56238
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.9700583815574646,
      "learning_rate": 3.374761523694826e-06,
      "loss": 2.2349,
      "step": 56239
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.7941324710845947,
      "learning_rate": 3.3744531216699007e-06,
      "loss": 2.2949,
      "step": 56240
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0351672172546387,
      "learning_rate": 3.3741447308770805e-06,
      "loss": 2.3518,
      "step": 56241
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.098836898803711,
      "learning_rate": 3.3738363513168806e-06,
      "loss": 2.3676,
      "step": 56242
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.980262279510498,
      "learning_rate": 3.3735279829898317e-06,
      "loss": 2.3619,
      "step": 56243
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.9786750674247742,
      "learning_rate": 3.373219625896449e-06,
      "loss": 2.2799,
      "step": 56244
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1172388792037964,
      "learning_rate": 3.372911280037262e-06,
      "loss": 2.3371,
      "step": 56245
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.04088294506073,
      "learning_rate": 3.3726029454127906e-06,
      "loss": 2.5158,
      "step": 56246
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.010642170906067,
      "learning_rate": 3.3722946220235565e-06,
      "loss": 2.2465,
      "step": 56247
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.011861801147461,
      "learning_rate": 3.37198630987008e-06,
      "loss": 2.3858,
      "step": 56248
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.2642772197723389,
      "learning_rate": 3.371678008952891e-06,
      "loss": 2.4377,
      "step": 56249
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1024175882339478,
      "learning_rate": 3.3713697192725036e-06,
      "loss": 2.2602,
      "step": 56250
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1397477388381958,
      "learning_rate": 3.3710614408294482e-06,
      "loss": 2.36,
      "step": 56251
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1849040985107422,
      "learning_rate": 3.370753173624245e-06,
      "loss": 2.3312,
      "step": 56252
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.0362722873687744,
      "learning_rate": 3.3704449176574116e-06,
      "loss": 2.0407,
      "step": 56253
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.173222303390503,
      "learning_rate": 3.3701366729294782e-06,
      "loss": 2.4209,
      "step": 56254
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.115953803062439,
      "learning_rate": 3.369828439440961e-06,
      "loss": 2.4266,
      "step": 56255
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0144965648651123,
      "learning_rate": 3.369520217192388e-06,
      "loss": 2.6182,
      "step": 56256
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.2411521673202515,
      "learning_rate": 3.369212006184276e-06,
      "loss": 2.2535,
      "step": 56257
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.4808558225631714,
      "learning_rate": 3.3689038064171532e-06,
      "loss": 2.2643,
      "step": 56258
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0553364753723145,
      "learning_rate": 3.368595617891537e-06,
      "loss": 2.3748,
      "step": 56259
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.099808931350708,
      "learning_rate": 3.368287440607958e-06,
      "loss": 2.3347,
      "step": 56260
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.048209309577942,
      "learning_rate": 3.367979274566927e-06,
      "loss": 2.2853,
      "step": 56261
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.058714747428894,
      "learning_rate": 3.367671119768976e-06,
      "loss": 2.5583,
      "step": 56262
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.9803980588912964,
      "learning_rate": 3.36736297621462e-06,
      "loss": 2.3605,
      "step": 56263
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1036876440048218,
      "learning_rate": 3.367054843904388e-06,
      "loss": 2.5847,
      "step": 56264
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.192275881767273,
      "learning_rate": 3.3667467228387974e-06,
      "loss": 2.5908,
      "step": 56265
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1183369159698486,
      "learning_rate": 3.3664386130183745e-06,
      "loss": 2.1589,
      "step": 56266
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0573409795761108,
      "learning_rate": 3.3661305144436375e-06,
      "loss": 2.4916,
      "step": 56267
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.027240514755249,
      "learning_rate": 3.365822427115114e-06,
      "loss": 2.4363,
      "step": 56268
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.123896837234497,
      "learning_rate": 3.36551435103332e-06,
      "loss": 2.3183,
      "step": 56269
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1525322198867798,
      "learning_rate": 3.3652062861987844e-06,
      "loss": 2.2858,
      "step": 56270
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0562870502471924,
      "learning_rate": 3.364898232612023e-06,
      "loss": 2.4048,
      "step": 56271
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.173373818397522,
      "learning_rate": 3.3645901902735647e-06,
      "loss": 2.3106,
      "step": 56272
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.421281099319458,
      "learning_rate": 3.364282159183927e-06,
      "loss": 2.2422,
      "step": 56273
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1201549768447876,
      "learning_rate": 3.3639741393436343e-06,
      "loss": 2.3596,
      "step": 56274
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.9794691801071167,
      "learning_rate": 3.363666130753205e-06,
      "loss": 2.3044,
      "step": 56275
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1032625436782837,
      "learning_rate": 3.363358133413166e-06,
      "loss": 2.4705,
      "step": 56276
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.009281039237976,
      "learning_rate": 3.363050147324034e-06,
      "loss": 2.2067,
      "step": 56277
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1024857759475708,
      "learning_rate": 3.3627421724863397e-06,
      "loss": 2.3585,
      "step": 56278
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.061410903930664,
      "learning_rate": 3.3624342089005944e-06,
      "loss": 2.3326,
      "step": 56279
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0923107862472534,
      "learning_rate": 3.3621262565673307e-06,
      "loss": 2.3554,
      "step": 56280
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.07315993309021,
      "learning_rate": 3.3618183154870653e-06,
      "loss": 2.2919,
      "step": 56281
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1322118043899536,
      "learning_rate": 3.3615103856603183e-06,
      "loss": 2.2118,
      "step": 56282
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.182756781578064,
      "learning_rate": 3.3612024670876165e-06,
      "loss": 2.0314,
      "step": 56283
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.200072169303894,
      "learning_rate": 3.3608945597694762e-06,
      "loss": 2.258,
      "step": 56284
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1118512153625488,
      "learning_rate": 3.3605866637064277e-06,
      "loss": 1.8961,
      "step": 56285
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.228275179862976,
      "learning_rate": 3.3602787788989867e-06,
      "loss": 2.5493,
      "step": 56286
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0849393606185913,
      "learning_rate": 3.359970905347676e-06,
      "loss": 2.4574,
      "step": 56287
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.014194130897522,
      "learning_rate": 3.3596630430530155e-06,
      "loss": 2.5747,
      "step": 56288
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0465784072875977,
      "learning_rate": 3.3593551920155334e-06,
      "loss": 2.3443,
      "step": 56289
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1781257390975952,
      "learning_rate": 3.3590473522357434e-06,
      "loss": 2.4393,
      "step": 56290
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1869192123413086,
      "learning_rate": 3.3587395237141763e-06,
      "loss": 2.3033,
      "step": 56291
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0738282203674316,
      "learning_rate": 3.358431706451345e-06,
      "loss": 2.2569,
      "step": 56292
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0817930698394775,
      "learning_rate": 3.3581239004477793e-06,
      "loss": 2.1286,
      "step": 56293
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.130254864692688,
      "learning_rate": 3.357816105703995e-06,
      "loss": 2.347,
      "step": 56294
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1306407451629639,
      "learning_rate": 3.3575083222205186e-06,
      "loss": 2.1993,
      "step": 56295
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0989398956298828,
      "learning_rate": 3.357200549997868e-06,
      "loss": 2.5431,
      "step": 56296
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1271287202835083,
      "learning_rate": 3.3568927890365677e-06,
      "loss": 2.4248,
      "step": 56297
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.036681056022644,
      "learning_rate": 3.3565850393371367e-06,
      "loss": 2.2913,
      "step": 56298
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.142623782157898,
      "learning_rate": 3.356277300900105e-06,
      "loss": 2.3346,
      "step": 56299
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0380655527114868,
      "learning_rate": 3.3559695737259803e-06,
      "loss": 2.5134,
      "step": 56300
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.034899115562439,
      "learning_rate": 3.3556618578152954e-06,
      "loss": 2.2537,
      "step": 56301
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0349385738372803,
      "learning_rate": 3.355354153168564e-06,
      "loss": 2.3383,
      "step": 56302
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.233316421508789,
      "learning_rate": 3.3550464597863165e-06,
      "loss": 2.1244,
      "step": 56303
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.995765209197998,
      "learning_rate": 3.3547387776690664e-06,
      "loss": 2.5048,
      "step": 56304
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1680138111114502,
      "learning_rate": 3.354431106817342e-06,
      "loss": 2.0221,
      "step": 56305
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0253411531448364,
      "learning_rate": 3.3541234472316587e-06,
      "loss": 2.0508,
      "step": 56306
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0532734394073486,
      "learning_rate": 3.353815798912544e-06,
      "loss": 2.2784,
      "step": 56307
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1924303770065308,
      "learning_rate": 3.353508161860517e-06,
      "loss": 2.2775,
      "step": 56308
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1207168102264404,
      "learning_rate": 3.3532005360760955e-06,
      "loss": 2.3717,
      "step": 56309
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1490697860717773,
      "learning_rate": 3.352892921559807e-06,
      "loss": 2.0416,
      "step": 56310
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0935767889022827,
      "learning_rate": 3.3525853183121683e-06,
      "loss": 2.3501,
      "step": 56311
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1655584573745728,
      "learning_rate": 3.3522777263337047e-06,
      "loss": 2.2354,
      "step": 56312
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0879793167114258,
      "learning_rate": 3.351970145624938e-06,
      "loss": 2.4222,
      "step": 56313
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0392752885818481,
      "learning_rate": 3.351662576186385e-06,
      "loss": 2.4365,
      "step": 56314
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0565708875656128,
      "learning_rate": 3.3513550180185673e-06,
      "loss": 2.2483,
      "step": 56315
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0478812456130981,
      "learning_rate": 3.351047471122012e-06,
      "loss": 2.3215,
      "step": 56316
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0474659204483032,
      "learning_rate": 3.350739935497234e-06,
      "loss": 2.5009,
      "step": 56317
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.116056203842163,
      "learning_rate": 3.3504324111447606e-06,
      "loss": 2.4566,
      "step": 56318
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0695165395736694,
      "learning_rate": 3.350124898065108e-06,
      "loss": 2.5088,
      "step": 56319
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1429003477096558,
      "learning_rate": 3.349817396258802e-06,
      "loss": 2.2512,
      "step": 56320
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.995745062828064,
      "learning_rate": 3.349509905726358e-06,
      "loss": 2.4557,
      "step": 56321
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0496571063995361,
      "learning_rate": 3.349202426468305e-06,
      "loss": 2.1691,
      "step": 56322
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0481222867965698,
      "learning_rate": 3.348894958485157e-06,
      "loss": 2.2382,
      "step": 56323
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.119938611984253,
      "learning_rate": 3.3485875017774414e-06,
      "loss": 2.187,
      "step": 56324
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0339244604110718,
      "learning_rate": 3.348280056345674e-06,
      "loss": 2.3005,
      "step": 56325
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1217024326324463,
      "learning_rate": 3.347972622190384e-06,
      "loss": 2.2117,
      "step": 56326
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.114010214805603,
      "learning_rate": 3.347665199312081e-06,
      "loss": 2.302,
      "step": 56327
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1166528463363647,
      "learning_rate": 3.347357787711295e-06,
      "loss": 2.2907,
      "step": 56328
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.2325537204742432,
      "learning_rate": 3.347050387388542e-06,
      "loss": 2.1202,
      "step": 56329
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0975197553634644,
      "learning_rate": 3.3467429983443477e-06,
      "loss": 2.4349,
      "step": 56330
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.008589744567871,
      "learning_rate": 3.3464356205792294e-06,
      "loss": 2.3551,
      "step": 56331
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0318645238876343,
      "learning_rate": 3.3461282540937113e-06,
      "loss": 2.3682,
      "step": 56332
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0260684490203857,
      "learning_rate": 3.345820898888311e-06,
      "loss": 2.2178,
      "step": 56333
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.2001326084136963,
      "learning_rate": 3.3455135549635544e-06,
      "loss": 2.2944,
      "step": 56334
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1999047994613647,
      "learning_rate": 3.3452062223199553e-06,
      "loss": 2.3692,
      "step": 56335
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.9464874267578125,
      "learning_rate": 3.3448989009580436e-06,
      "loss": 2.5575,
      "step": 56336
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0215097665786743,
      "learning_rate": 3.3445915908783354e-06,
      "loss": 2.3712,
      "step": 56337
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.072420597076416,
      "learning_rate": 3.344284292081349e-06,
      "loss": 2.2372,
      "step": 56338
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1908243894577026,
      "learning_rate": 3.3439770045676113e-06,
      "loss": 2.2757,
      "step": 56339
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1302417516708374,
      "learning_rate": 3.3436697283376406e-06,
      "loss": 2.1593,
      "step": 56340
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.9614532589912415,
      "learning_rate": 3.3433624633919547e-06,
      "loss": 2.2798,
      "step": 56341
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0627422332763672,
      "learning_rate": 3.34305520973108e-06,
      "loss": 2.3173,
      "step": 56342
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1236785650253296,
      "learning_rate": 3.3427479673555343e-06,
      "loss": 2.3557,
      "step": 56343
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.036690354347229,
      "learning_rate": 3.342440736265836e-06,
      "loss": 2.3142,
      "step": 56344
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0296473503112793,
      "learning_rate": 3.3421335164625124e-06,
      "loss": 2.4038,
      "step": 56345
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.146640419960022,
      "learning_rate": 3.341826307946078e-06,
      "loss": 2.2946,
      "step": 56346
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0755813121795654,
      "learning_rate": 3.3415191107170585e-06,
      "loss": 2.2116,
      "step": 56347
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0653520822525024,
      "learning_rate": 3.34121192477597e-06,
      "loss": 2.4086,
      "step": 56348
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.095353126525879,
      "learning_rate": 3.3409047501233383e-06,
      "loss": 2.4023,
      "step": 56349
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0360326766967773,
      "learning_rate": 3.3405975867596786e-06,
      "loss": 2.5115,
      "step": 56350
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.119330883026123,
      "learning_rate": 3.340290434685518e-06,
      "loss": 2.0243,
      "step": 56351
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0150086879730225,
      "learning_rate": 3.3399832939013744e-06,
      "loss": 2.4356,
      "step": 56352
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.075081467628479,
      "learning_rate": 3.339676164407767e-06,
      "loss": 2.4777,
      "step": 56353
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.095271110534668,
      "learning_rate": 3.3393690462052152e-06,
      "loss": 2.4421,
      "step": 56354
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.9060110449790955,
      "learning_rate": 3.339061939294245e-06,
      "loss": 2.3772,
      "step": 56355
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.093005657196045,
      "learning_rate": 3.3387548436753693e-06,
      "loss": 2.312,
      "step": 56356
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.036044716835022,
      "learning_rate": 3.3384477593491184e-06,
      "loss": 2.2532,
      "step": 56357
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.117306113243103,
      "learning_rate": 3.3381406863160024e-06,
      "loss": 2.0856,
      "step": 56358
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1608833074569702,
      "learning_rate": 3.337833624576553e-06,
      "loss": 2.1815,
      "step": 56359
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.2326674461364746,
      "learning_rate": 3.3375265741312802e-06,
      "loss": 2.3043,
      "step": 56360
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.9729488492012024,
      "learning_rate": 3.3372195349807125e-06,
      "loss": 2.2565,
      "step": 56361
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.106026291847229,
      "learning_rate": 3.336912507125365e-06,
      "loss": 2.2873,
      "step": 56362
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.009433388710022,
      "learning_rate": 3.336605490565763e-06,
      "loss": 2.4937,
      "step": 56363
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0028630495071411,
      "learning_rate": 3.336298485302425e-06,
      "loss": 2.2748,
      "step": 56364
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1122500896453857,
      "learning_rate": 3.3359914913358702e-06,
      "loss": 2.2228,
      "step": 56365
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.040963053703308,
      "learning_rate": 3.335684508666617e-06,
      "loss": 2.4106,
      "step": 56366
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0089590549468994,
      "learning_rate": 3.3353775372951914e-06,
      "loss": 2.3775,
      "step": 56367
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0219347476959229,
      "learning_rate": 3.335070577222108e-06,
      "loss": 2.1121,
      "step": 56368
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.283658742904663,
      "learning_rate": 3.3347636284478937e-06,
      "loss": 2.3359,
      "step": 56369
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0437898635864258,
      "learning_rate": 3.3344566909730645e-06,
      "loss": 2.5037,
      "step": 56370
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1185381412506104,
      "learning_rate": 3.3341497647981393e-06,
      "loss": 2.216,
      "step": 56371
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1034491062164307,
      "learning_rate": 3.333842849923643e-06,
      "loss": 2.2464,
      "step": 56372
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0787603855133057,
      "learning_rate": 3.3335359463500915e-06,
      "loss": 2.3166,
      "step": 56373
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0833503007888794,
      "learning_rate": 3.3332290540780086e-06,
      "loss": 2.2883,
      "step": 56374
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0637439489364624,
      "learning_rate": 3.332922173107912e-06,
      "loss": 2.2972,
      "step": 56375
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.074238657951355,
      "learning_rate": 3.332615303440325e-06,
      "loss": 2.617,
      "step": 56376
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1376839876174927,
      "learning_rate": 3.3323084450757627e-06,
      "loss": 2.0955,
      "step": 56377
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0131189823150635,
      "learning_rate": 3.332001598014751e-06,
      "loss": 2.2034,
      "step": 56378
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.111045241355896,
      "learning_rate": 3.331694762257809e-06,
      "loss": 2.2827,
      "step": 56379
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.030435562133789,
      "learning_rate": 3.331387937805456e-06,
      "loss": 2.4625,
      "step": 56380
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.244293212890625,
      "learning_rate": 3.3310811246582063e-06,
      "loss": 2.4763,
      "step": 56381
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.099576473236084,
      "learning_rate": 3.33077432281659e-06,
      "loss": 2.352,
      "step": 56382
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.135453224182129,
      "learning_rate": 3.3304675322811185e-06,
      "loss": 2.5277,
      "step": 56383
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1463786363601685,
      "learning_rate": 3.3301607530523205e-06,
      "loss": 2.2009,
      "step": 56384
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.11541748046875,
      "learning_rate": 3.3298539851307064e-06,
      "loss": 2.4324,
      "step": 56385
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.2592177391052246,
      "learning_rate": 3.3295472285168064e-06,
      "loss": 2.3234,
      "step": 56386
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0979328155517578,
      "learning_rate": 3.3292404832111313e-06,
      "loss": 2.3486,
      "step": 56387
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.077687382698059,
      "learning_rate": 3.3289337492142095e-06,
      "loss": 2.4165,
      "step": 56388
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.117388129234314,
      "learning_rate": 3.328627026526552e-06,
      "loss": 2.1994,
      "step": 56389
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1429909467697144,
      "learning_rate": 3.3283203151486887e-06,
      "loss": 2.3121,
      "step": 56390
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1038739681243896,
      "learning_rate": 3.3280136150811293e-06,
      "loss": 2.2043,
      "step": 56391
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.029610276222229,
      "learning_rate": 3.3277069263244067e-06,
      "loss": 2.4323,
      "step": 56392
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1983188390731812,
      "learning_rate": 3.327400248879027e-06,
      "loss": 2.2482,
      "step": 56393
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.4314682483673096,
      "learning_rate": 3.3270935827455174e-06,
      "loss": 2.3671,
      "step": 56394
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.9896422624588013,
      "learning_rate": 3.3267869279243948e-06,
      "loss": 2.3213,
      "step": 56395
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1126123666763306,
      "learning_rate": 3.3264802844161836e-06,
      "loss": 2.3203,
      "step": 56396
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.050187110900879,
      "learning_rate": 3.3261736522213985e-06,
      "loss": 2.2927,
      "step": 56397
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1154389381408691,
      "learning_rate": 3.3258670313405637e-06,
      "loss": 2.1895,
      "step": 56398
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1611708402633667,
      "learning_rate": 3.3255604217741977e-06,
      "loss": 2.3461,
      "step": 56399
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1325510740280151,
      "learning_rate": 3.325253823522816e-06,
      "loss": 2.3068,
      "step": 56400
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1001875400543213,
      "learning_rate": 3.324947236586945e-06,
      "loss": 2.4956,
      "step": 56401
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0830435752868652,
      "learning_rate": 3.3246406609670978e-06,
      "loss": 2.2582,
      "step": 56402
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.9718561768531799,
      "learning_rate": 3.3243340966638016e-06,
      "loss": 2.2005,
      "step": 56403
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.008918046951294,
      "learning_rate": 3.3240275436775683e-06,
      "loss": 2.2644,
      "step": 56404
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0480040311813354,
      "learning_rate": 3.3237210020089284e-06,
      "loss": 2.2183,
      "step": 56405
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.244106411933899,
      "learning_rate": 3.3234144716583884e-06,
      "loss": 2.3186,
      "step": 56406
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0996289253234863,
      "learning_rate": 3.3231079526264766e-06,
      "loss": 2.3362,
      "step": 56407
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0647259950637817,
      "learning_rate": 3.322801444913708e-06,
      "loss": 2.5871,
      "step": 56408
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.4638475179672241,
      "learning_rate": 3.322494948520607e-06,
      "loss": 2.2762,
      "step": 56409
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0218229293823242,
      "learning_rate": 3.3221884634476877e-06,
      "loss": 2.2759,
      "step": 56410
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0526483058929443,
      "learning_rate": 3.3218819896954756e-06,
      "loss": 2.2014,
      "step": 56411
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0618661642074585,
      "learning_rate": 3.3215755272644847e-06,
      "loss": 2.3409,
      "step": 56412
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0612815618515015,
      "learning_rate": 3.321269076155239e-06,
      "loss": 2.3789,
      "step": 56413
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0567775964736938,
      "learning_rate": 3.3209626363682544e-06,
      "loss": 2.2115,
      "step": 56414
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0510022640228271,
      "learning_rate": 3.320656207904055e-06,
      "loss": 2.326,
      "step": 56415
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.439003348350525,
      "learning_rate": 3.320349790763153e-06,
      "loss": 2.3317,
      "step": 56416
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1276216506958008,
      "learning_rate": 3.3200433849460767e-06,
      "loss": 2.2466,
      "step": 56417
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1255396604537964,
      "learning_rate": 3.3197369904533417e-06,
      "loss": 2.3207,
      "step": 56418
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1016989946365356,
      "learning_rate": 3.3194306072854655e-06,
      "loss": 2.2148,
      "step": 56419
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1657862663269043,
      "learning_rate": 3.3191242354429665e-06,
      "loss": 2.1759,
      "step": 56420
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.3212560415267944,
      "learning_rate": 3.3188178749263687e-06,
      "loss": 2.3142,
      "step": 56421
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.2087512016296387,
      "learning_rate": 3.3185115257361867e-06,
      "loss": 2.249,
      "step": 56422
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0955628156661987,
      "learning_rate": 3.318205187872945e-06,
      "loss": 2.3055,
      "step": 56423
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1243785619735718,
      "learning_rate": 3.3178988613371577e-06,
      "loss": 2.3939,
      "step": 56424
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.2278894186019897,
      "learning_rate": 3.31759254612935e-06,
      "loss": 2.1614,
      "step": 56425
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0198737382888794,
      "learning_rate": 3.317286242250037e-06,
      "loss": 2.2179,
      "step": 56426
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.7171014547348022,
      "learning_rate": 3.316979949699736e-06,
      "loss": 2.2149,
      "step": 56427
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0509392023086548,
      "learning_rate": 3.3166736684789723e-06,
      "loss": 2.2688,
      "step": 56428
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.2022716999053955,
      "learning_rate": 3.3163673985882583e-06,
      "loss": 2.3494,
      "step": 56429
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1325279474258423,
      "learning_rate": 3.3160611400281206e-06,
      "loss": 2.3922,
      "step": 56430
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.273017168045044,
      "learning_rate": 3.3157548927990747e-06,
      "loss": 2.4586,
      "step": 56431
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.2845470905303955,
      "learning_rate": 3.31544865690164e-06,
      "loss": 2.0792,
      "step": 56432
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.063870906829834,
      "learning_rate": 3.3151424323363314e-06,
      "loss": 2.1891,
      "step": 56433
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0238151550292969,
      "learning_rate": 3.3148362191036753e-06,
      "loss": 2.2001,
      "step": 56434
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.061641812324524,
      "learning_rate": 3.3145300172041838e-06,
      "loss": 2.3165,
      "step": 56435
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1807811260223389,
      "learning_rate": 3.314223826638383e-06,
      "loss": 2.3626,
      "step": 56436
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.2573775053024292,
      "learning_rate": 3.3139176474067858e-06,
      "loss": 2.2679,
      "step": 56437
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.3462169170379639,
      "learning_rate": 3.3136114795099174e-06,
      "loss": 2.3912,
      "step": 56438
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0850499868392944,
      "learning_rate": 3.3133053229482904e-06,
      "loss": 2.4621,
      "step": 56439
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.209130048751831,
      "learning_rate": 3.3129991777224303e-06,
      "loss": 2.1111,
      "step": 56440
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0515010356903076,
      "learning_rate": 3.3126930438328485e-06,
      "loss": 2.4976,
      "step": 56441
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0792734622955322,
      "learning_rate": 3.312386921280073e-06,
      "loss": 2.4715,
      "step": 56442
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1622191667556763,
      "learning_rate": 3.312080810064614e-06,
      "loss": 2.3204,
      "step": 56443
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0349799394607544,
      "learning_rate": 3.311774710187e-06,
      "loss": 2.4829,
      "step": 56444
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0677354335784912,
      "learning_rate": 3.31146862164774e-06,
      "loss": 2.504,
      "step": 56445
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0365360975265503,
      "learning_rate": 3.311162544447358e-06,
      "loss": 2.2884,
      "step": 56446
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.098482608795166,
      "learning_rate": 3.3108564785863707e-06,
      "loss": 2.2532,
      "step": 56447
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1629306077957153,
      "learning_rate": 3.310550424065301e-06,
      "loss": 2.172,
      "step": 56448
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.9852228164672852,
      "learning_rate": 3.3102443808846617e-06,
      "loss": 2.4703,
      "step": 56449
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1356600522994995,
      "learning_rate": 3.3099383490449787e-06,
      "loss": 2.3628,
      "step": 56450
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.114131212234497,
      "learning_rate": 3.309632328546765e-06,
      "loss": 2.3131,
      "step": 56451
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.3950146436691284,
      "learning_rate": 3.3093263193905433e-06,
      "loss": 2.1802,
      "step": 56452
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0178005695343018,
      "learning_rate": 3.309020321576828e-06,
      "loss": 2.4471,
      "step": 56453
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1490318775177002,
      "learning_rate": 3.3087143351061436e-06,
      "loss": 2.3368,
      "step": 56454
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0718752145767212,
      "learning_rate": 3.3084083599790052e-06,
      "loss": 2.4046,
      "step": 56455
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.032104730606079,
      "learning_rate": 3.30810239619593e-06,
      "loss": 2.3022,
      "step": 56456
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0154775381088257,
      "learning_rate": 3.3077964437574404e-06,
      "loss": 2.267,
      "step": 56457
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.118420958518982,
      "learning_rate": 3.3074905026640545e-06,
      "loss": 2.3667,
      "step": 56458
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1637359857559204,
      "learning_rate": 3.3071845729162866e-06,
      "loss": 2.2631,
      "step": 56459
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0876400470733643,
      "learning_rate": 3.3068786545146626e-06,
      "loss": 2.3502,
      "step": 56460
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0349452495574951,
      "learning_rate": 3.3065727474596955e-06,
      "loss": 2.3442,
      "step": 56461
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.099732518196106,
      "learning_rate": 3.3062668517519024e-06,
      "loss": 2.2802,
      "step": 56462
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0929659605026245,
      "learning_rate": 3.3059609673918092e-06,
      "loss": 2.6058,
      "step": 56463
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.208626389503479,
      "learning_rate": 3.3056550943799272e-06,
      "loss": 2.2643,
      "step": 56464
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0298610925674438,
      "learning_rate": 3.3053492327167803e-06,
      "loss": 2.4083,
      "step": 56465
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0904977321624756,
      "learning_rate": 3.3050433824028816e-06,
      "loss": 2.348,
      "step": 56466
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.9994330406188965,
      "learning_rate": 3.304737543438756e-06,
      "loss": 2.1815,
      "step": 56467
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.2863044738769531,
      "learning_rate": 3.3044317158249153e-06,
      "loss": 2.2988,
      "step": 56468
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.060881495475769,
      "learning_rate": 3.304125899561885e-06,
      "loss": 2.5572,
      "step": 56469
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.116665005683899,
      "learning_rate": 3.3038200946501764e-06,
      "loss": 2.4281,
      "step": 56470
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.041502833366394,
      "learning_rate": 3.303514301090318e-06,
      "loss": 2.116,
      "step": 56471
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1052894592285156,
      "learning_rate": 3.3032085188828143e-06,
      "loss": 2.2762,
      "step": 56472
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0259770154953003,
      "learning_rate": 3.302902748028195e-06,
      "loss": 2.3646,
      "step": 56473
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0965251922607422,
      "learning_rate": 3.3025969885269714e-06,
      "loss": 2.3038,
      "step": 56474
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0711336135864258,
      "learning_rate": 3.3022912403796682e-06,
      "loss": 2.2265,
      "step": 56475
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.111777424812317,
      "learning_rate": 3.3019855035867964e-06,
      "loss": 2.2724,
      "step": 56476
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0885214805603027,
      "learning_rate": 3.301679778148882e-06,
      "loss": 2.3398,
      "step": 56477
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0269784927368164,
      "learning_rate": 3.3013740640664372e-06,
      "loss": 2.1442,
      "step": 56478
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1267141103744507,
      "learning_rate": 3.301068361339984e-06,
      "loss": 2.265,
      "step": 56479
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.2028297185897827,
      "learning_rate": 3.300762669970038e-06,
      "loss": 2.1802,
      "step": 56480
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.053543210029602,
      "learning_rate": 3.3004569899571215e-06,
      "loss": 2.0975,
      "step": 56481
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.099846601486206,
      "learning_rate": 3.300151321301749e-06,
      "loss": 2.3457,
      "step": 56482
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0558725595474243,
      "learning_rate": 3.2998456640044374e-06,
      "loss": 2.1797,
      "step": 56483
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.2302742004394531,
      "learning_rate": 3.29954001806571e-06,
      "loss": 2.3023,
      "step": 56484
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.115861177444458,
      "learning_rate": 3.2992343834860817e-06,
      "loss": 2.4541,
      "step": 56485
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.4116451740264893,
      "learning_rate": 3.298928760266069e-06,
      "loss": 2.1904,
      "step": 56486
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0750160217285156,
      "learning_rate": 3.298623148406195e-06,
      "loss": 2.2061,
      "step": 56487
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.23866605758667,
      "learning_rate": 3.298317547906974e-06,
      "loss": 2.4787,
      "step": 56488
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0431933403015137,
      "learning_rate": 3.298011958768922e-06,
      "loss": 2.3912,
      "step": 56489
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.130436897277832,
      "learning_rate": 3.2977063809925626e-06,
      "loss": 2.3075,
      "step": 56490
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1716902256011963,
      "learning_rate": 3.2974008145784085e-06,
      "loss": 2.367,
      "step": 56491
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1108931303024292,
      "learning_rate": 3.2970952595269845e-06,
      "loss": 2.2363,
      "step": 56492
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0996668338775635,
      "learning_rate": 3.2967897158388006e-06,
      "loss": 2.5476,
      "step": 56493
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1909098625183105,
      "learning_rate": 3.296484183514381e-06,
      "loss": 2.2289,
      "step": 56494
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.2432893514633179,
      "learning_rate": 3.2961786625542393e-06,
      "loss": 2.2857,
      "step": 56495
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0799524784088135,
      "learning_rate": 3.295873152958898e-06,
      "loss": 2.3922,
      "step": 56496
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0891880989074707,
      "learning_rate": 3.295567654728874e-06,
      "loss": 2.5091,
      "step": 56497
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1201088428497314,
      "learning_rate": 3.2952621678646823e-06,
      "loss": 2.2387,
      "step": 56498
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.9994931817054749,
      "learning_rate": 3.2949566923668396e-06,
      "loss": 2.3181,
      "step": 56499
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0195120573043823,
      "learning_rate": 3.294651228235869e-06,
      "loss": 2.3375,
      "step": 56500
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.205269455909729,
      "learning_rate": 3.294345775472283e-06,
      "loss": 2.1437,
      "step": 56501
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1423401832580566,
      "learning_rate": 3.294040334076605e-06,
      "loss": 2.1104,
      "step": 56502
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.4149411916732788,
      "learning_rate": 3.293734904049347e-06,
      "loss": 2.2326,
      "step": 56503
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0628994703292847,
      "learning_rate": 3.293429485391033e-06,
      "loss": 2.4365,
      "step": 56504
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0530860424041748,
      "learning_rate": 3.2931240781021744e-06,
      "loss": 2.2493,
      "step": 56505
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1379876136779785,
      "learning_rate": 3.2928186821832952e-06,
      "loss": 2.4517,
      "step": 56506
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1311862468719482,
      "learning_rate": 3.292513297634906e-06,
      "loss": 2.2868,
      "step": 56507
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0944188833236694,
      "learning_rate": 3.2922079244575324e-06,
      "loss": 2.2965,
      "step": 56508
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.9908942580223083,
      "learning_rate": 3.2919025626516855e-06,
      "loss": 2.4734,
      "step": 56509
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0671801567077637,
      "learning_rate": 3.2915972122178908e-06,
      "loss": 2.0762,
      "step": 56510
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0674532651901245,
      "learning_rate": 3.2912918731566556e-06,
      "loss": 2.3767,
      "step": 56511
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0604355335235596,
      "learning_rate": 3.290986545468505e-06,
      "loss": 2.3234,
      "step": 56512
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0969130992889404,
      "learning_rate": 3.2906812291539513e-06,
      "loss": 2.6206,
      "step": 56513
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1727906465530396,
      "learning_rate": 3.290375924213519e-06,
      "loss": 2.2609,
      "step": 56514
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0442583560943604,
      "learning_rate": 3.290070630647718e-06,
      "loss": 2.4698,
      "step": 56515
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0512083768844604,
      "learning_rate": 3.2897653484570737e-06,
      "loss": 2.4042,
      "step": 56516
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.028095006942749,
      "learning_rate": 3.2894600776420983e-06,
      "loss": 2.2323,
      "step": 56517
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0159510374069214,
      "learning_rate": 3.2891548182033082e-06,
      "loss": 2.3832,
      "step": 56518
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0970083475112915,
      "learning_rate": 3.288849570141226e-06,
      "loss": 2.1004,
      "step": 56519
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1697477102279663,
      "learning_rate": 3.2885443334563637e-06,
      "loss": 2.3537,
      "step": 56520
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.9801476001739502,
      "learning_rate": 3.2882391081492448e-06,
      "loss": 2.2412,
      "step": 56521
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.080377221107483,
      "learning_rate": 3.287933894220381e-06,
      "loss": 2.3432,
      "step": 56522
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.970558762550354,
      "learning_rate": 3.287628691670295e-06,
      "loss": 2.421,
      "step": 56523
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.9688863158226013,
      "learning_rate": 3.287323500499501e-06,
      "loss": 2.5307,
      "step": 56524
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.170680284500122,
      "learning_rate": 3.287018320708516e-06,
      "loss": 2.3553,
      "step": 56525
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.2624622583389282,
      "learning_rate": 3.2867131522978567e-06,
      "loss": 2.345,
      "step": 56526
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0380206108093262,
      "learning_rate": 3.286407995268044e-06,
      "loss": 2.3689,
      "step": 56527
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0197478532791138,
      "learning_rate": 3.28610284961959e-06,
      "loss": 2.3033,
      "step": 56528
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.041933298110962,
      "learning_rate": 3.2857977153530186e-06,
      "loss": 2.387,
      "step": 56529
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0645639896392822,
      "learning_rate": 3.2854925924688407e-06,
      "loss": 2.1339,
      "step": 56530
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1690986156463623,
      "learning_rate": 3.28518748096758e-06,
      "loss": 2.2251,
      "step": 56531
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0204898118972778,
      "learning_rate": 3.2848823808497453e-06,
      "loss": 2.2568,
      "step": 56532
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.4661754369735718,
      "learning_rate": 3.2845772921158636e-06,
      "loss": 2.3118,
      "step": 56533
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.3570215702056885,
      "learning_rate": 3.284272214766444e-06,
      "loss": 2.2442,
      "step": 56534
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.021377682685852,
      "learning_rate": 3.28396714880201e-06,
      "loss": 2.3286,
      "step": 56535
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.044005274772644,
      "learning_rate": 3.283662094223071e-06,
      "loss": 2.4142,
      "step": 56536
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1171846389770508,
      "learning_rate": 3.2833570510301573e-06,
      "loss": 2.3023,
      "step": 56537
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.9881916642189026,
      "learning_rate": 3.28305201922377e-06,
      "loss": 2.1646,
      "step": 56538
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0940369367599487,
      "learning_rate": 3.282746998804438e-06,
      "loss": 2.4711,
      "step": 56539
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.077348232269287,
      "learning_rate": 3.28244198977267e-06,
      "loss": 2.5259,
      "step": 56540
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0791516304016113,
      "learning_rate": 3.2821369921289915e-06,
      "loss": 2.2027,
      "step": 56541
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.4468655586242676,
      "learning_rate": 3.281832005873912e-06,
      "loss": 2.1982,
      "step": 56542
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.9544996619224548,
      "learning_rate": 3.281527031007954e-06,
      "loss": 2.1627,
      "step": 56543
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0590988397598267,
      "learning_rate": 3.281222067531633e-06,
      "loss": 2.2078,
      "step": 56544
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.5576280355453491,
      "learning_rate": 3.2809171154454624e-06,
      "loss": 2.4894,
      "step": 56545
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.3024219274520874,
      "learning_rate": 3.2806121747499654e-06,
      "loss": 2.2219,
      "step": 56546
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.021569848060608,
      "learning_rate": 3.280307245445653e-06,
      "loss": 2.4169,
      "step": 56547
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1095603704452515,
      "learning_rate": 3.280002327533047e-06,
      "loss": 2.4493,
      "step": 56548
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0214691162109375,
      "learning_rate": 3.2796974210126597e-06,
      "loss": 2.0515,
      "step": 56549
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1982628107070923,
      "learning_rate": 3.2793925258850166e-06,
      "loss": 2.4111,
      "step": 56550
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1014245748519897,
      "learning_rate": 3.2790876421506223e-06,
      "loss": 2.2805,
      "step": 56551
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1433652639389038,
      "learning_rate": 3.2787827698100027e-06,
      "loss": 2.3093,
      "step": 56552
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.097711443901062,
      "learning_rate": 3.2784779088636686e-06,
      "loss": 2.1731,
      "step": 56553
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.097604751586914,
      "learning_rate": 3.278173059312143e-06,
      "loss": 2.2655,
      "step": 56554
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0819250345230103,
      "learning_rate": 3.2778682211559365e-06,
      "loss": 2.5564,
      "step": 56555
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1604349613189697,
      "learning_rate": 3.277563394395573e-06,
      "loss": 2.4982,
      "step": 56556
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0964967012405396,
      "learning_rate": 3.2772585790315614e-06,
      "loss": 2.368,
      "step": 56557
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.044184923171997,
      "learning_rate": 3.2769537750644264e-06,
      "loss": 2.247,
      "step": 56558
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.9919389486312866,
      "learning_rate": 3.2766489824946767e-06,
      "loss": 2.246,
      "step": 56559
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1026824712753296,
      "learning_rate": 3.2763442013228365e-06,
      "loss": 2.3351,
      "step": 56560
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1028131246566772,
      "learning_rate": 3.276039431549416e-06,
      "loss": 2.2608,
      "step": 56561
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.021797776222229,
      "learning_rate": 3.2757346731749374e-06,
      "loss": 2.3672,
      "step": 56562
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.9986509084701538,
      "learning_rate": 3.275429926199916e-06,
      "loss": 2.2725,
      "step": 56563
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.038243055343628,
      "learning_rate": 3.275125190624866e-06,
      "loss": 2.4125,
      "step": 56564
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.9915286898612976,
      "learning_rate": 3.2748204664503025e-06,
      "loss": 2.3266,
      "step": 56565
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.083000659942627,
      "learning_rate": 3.274515753676748e-06,
      "loss": 2.482,
      "step": 56566
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.3704793453216553,
      "learning_rate": 3.274211052304713e-06,
      "loss": 2.3273,
      "step": 56567
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0753458738327026,
      "learning_rate": 3.27390636233472e-06,
      "loss": 2.3245,
      "step": 56568
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0490312576293945,
      "learning_rate": 3.273601683767279e-06,
      "loss": 2.2805,
      "step": 56569
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0868266820907593,
      "learning_rate": 3.2732970166029132e-06,
      "loss": 2.6051,
      "step": 56570
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0308910608291626,
      "learning_rate": 3.2729923608421334e-06,
      "loss": 2.2319,
      "step": 56571
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0893226861953735,
      "learning_rate": 3.2726877164854608e-06,
      "loss": 2.4374,
      "step": 56572
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0842907428741455,
      "learning_rate": 3.2723830835334105e-06,
      "loss": 2.1804,
      "step": 56573
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.131036639213562,
      "learning_rate": 3.272078461986494e-06,
      "loss": 2.097,
      "step": 56574
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0329779386520386,
      "learning_rate": 3.271773851845236e-06,
      "loss": 2.2216,
      "step": 56575
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0373539924621582,
      "learning_rate": 3.2714692531101487e-06,
      "loss": 2.2411,
      "step": 56576
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0282652378082275,
      "learning_rate": 3.2711646657817442e-06,
      "loss": 2.3613,
      "step": 56577
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1452701091766357,
      "learning_rate": 3.2708600898605468e-06,
      "loss": 2.5132,
      "step": 56578
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.069115161895752,
      "learning_rate": 3.27055552534707e-06,
      "loss": 2.3768,
      "step": 56579
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.70736825466156,
      "learning_rate": 3.2702509722418252e-06,
      "loss": 2.5293,
      "step": 56580
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.091346025466919,
      "learning_rate": 3.2699464305453354e-06,
      "loss": 2.4357,
      "step": 56581
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.9883036017417908,
      "learning_rate": 3.2696419002581125e-06,
      "loss": 2.1325,
      "step": 56582
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1404762268066406,
      "learning_rate": 3.269337381380677e-06,
      "loss": 2.1207,
      "step": 56583
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0515594482421875,
      "learning_rate": 3.269032873913539e-06,
      "loss": 2.4578,
      "step": 56584
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.9915274381637573,
      "learning_rate": 3.268728377857222e-06,
      "loss": 2.3226,
      "step": 56585
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0991814136505127,
      "learning_rate": 3.268423893212236e-06,
      "loss": 2.2785,
      "step": 56586
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0092875957489014,
      "learning_rate": 3.2681194199791022e-06,
      "loss": 2.4079,
      "step": 56587
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1060811281204224,
      "learning_rate": 3.267814958158331e-06,
      "loss": 2.4419,
      "step": 56588
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.203666090965271,
      "learning_rate": 3.267510507750449e-06,
      "loss": 2.5123,
      "step": 56589
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0432488918304443,
      "learning_rate": 3.267206068755958e-06,
      "loss": 2.1828,
      "step": 56590
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0880377292633057,
      "learning_rate": 3.2669016411753853e-06,
      "loss": 2.2897,
      "step": 56591
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.6449098587036133,
      "learning_rate": 3.266597225009239e-06,
      "loss": 2.3179,
      "step": 56592
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.111304759979248,
      "learning_rate": 3.2662928202580437e-06,
      "loss": 2.3641,
      "step": 56593
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0424576997756958,
      "learning_rate": 3.2659884269223065e-06,
      "loss": 2.217,
      "step": 56594
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1816297769546509,
      "learning_rate": 3.2656840450025516e-06,
      "loss": 2.4235,
      "step": 56595
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0254782438278198,
      "learning_rate": 3.2653796744992872e-06,
      "loss": 2.3945,
      "step": 56596
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.9318326711654663,
      "learning_rate": 3.2650753154130376e-06,
      "loss": 2.2322,
      "step": 56597
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0422316789627075,
      "learning_rate": 3.2647709677443106e-06,
      "loss": 2.3758,
      "step": 56598
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1535708904266357,
      "learning_rate": 3.26446663149363e-06,
      "loss": 2.2849,
      "step": 56599
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0909470319747925,
      "learning_rate": 3.2641623066615068e-06,
      "loss": 2.1979,
      "step": 56600
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0195679664611816,
      "learning_rate": 3.263857993248456e-06,
      "loss": 2.4134,
      "step": 56601
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0696606636047363,
      "learning_rate": 3.2635536912549982e-06,
      "loss": 2.3878,
      "step": 56602
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.074404239654541,
      "learning_rate": 3.2632494006816464e-06,
      "loss": 2.3536,
      "step": 56603
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0924451351165771,
      "learning_rate": 3.262945121528912e-06,
      "loss": 2.1922,
      "step": 56604
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0691698789596558,
      "learning_rate": 3.26264085379732e-06,
      "loss": 2.2077,
      "step": 56605
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.091383695602417,
      "learning_rate": 3.2623365974873812e-06,
      "loss": 2.2598,
      "step": 56606
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0678129196166992,
      "learning_rate": 3.262032352599609e-06,
      "loss": 2.3088,
      "step": 56607
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.2708452939987183,
      "learning_rate": 3.261728119134525e-06,
      "loss": 2.3338,
      "step": 56608
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1573162078857422,
      "learning_rate": 3.2614238970926383e-06,
      "loss": 2.3771,
      "step": 56609
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1677595376968384,
      "learning_rate": 3.261119686474472e-06,
      "loss": 2.179,
      "step": 56610
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0954155921936035,
      "learning_rate": 3.260815487280534e-06,
      "loss": 2.471,
      "step": 56611
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.9878779053688049,
      "learning_rate": 3.2605112995113487e-06,
      "loss": 2.2807,
      "step": 56612
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1642597913742065,
      "learning_rate": 3.2602071231674227e-06,
      "loss": 2.452,
      "step": 56613
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0965323448181152,
      "learning_rate": 3.2599029582492804e-06,
      "loss": 2.3916,
      "step": 56614
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.951370358467102,
      "learning_rate": 3.259598804757429e-06,
      "loss": 2.377,
      "step": 56615
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0976922512054443,
      "learning_rate": 3.259294662692396e-06,
      "loss": 2.1903,
      "step": 56616
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1909644603729248,
      "learning_rate": 3.258990532054681e-06,
      "loss": 2.301,
      "step": 56617
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1107882261276245,
      "learning_rate": 3.2586864128448125e-06,
      "loss": 2.5399,
      "step": 56618
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1853663921356201,
      "learning_rate": 3.258382305063298e-06,
      "loss": 2.4925,
      "step": 56619
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0871769189834595,
      "learning_rate": 3.25807820871066e-06,
      "loss": 2.45,
      "step": 56620
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0728040933609009,
      "learning_rate": 3.2577741237874063e-06,
      "loss": 2.405,
      "step": 56621
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1432991027832031,
      "learning_rate": 3.2574700502940603e-06,
      "loss": 2.4921,
      "step": 56622
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.178525447845459,
      "learning_rate": 3.2571659882311314e-06,
      "loss": 2.4478,
      "step": 56623
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0782462358474731,
      "learning_rate": 3.2568619375991394e-06,
      "loss": 2.302,
      "step": 56624
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0173641443252563,
      "learning_rate": 3.2565578983985945e-06,
      "loss": 2.3753,
      "step": 56625
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0341925621032715,
      "learning_rate": 3.2562538706300195e-06,
      "loss": 2.1376,
      "step": 56626
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.5078537464141846,
      "learning_rate": 3.255949854293923e-06,
      "loss": 2.1255,
      "step": 56627
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0666005611419678,
      "learning_rate": 3.255645849390826e-06,
      "loss": 2.325,
      "step": 56628
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1368979215621948,
      "learning_rate": 3.25534185592124e-06,
      "loss": 2.35,
      "step": 56629
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1343752145767212,
      "learning_rate": 3.2550378738856824e-06,
      "loss": 2.1494,
      "step": 56630
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0346593856811523,
      "learning_rate": 3.2547339032846647e-06,
      "loss": 2.2394,
      "step": 56631
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0156277418136597,
      "learning_rate": 3.2544299441187078e-06,
      "loss": 2.2376,
      "step": 56632
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0173540115356445,
      "learning_rate": 3.25412599638832e-06,
      "loss": 2.1164,
      "step": 56633
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.129754662513733,
      "learning_rate": 3.253822060094025e-06,
      "loss": 2.3661,
      "step": 56634
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1178685426712036,
      "learning_rate": 3.253518135236334e-06,
      "loss": 2.3122,
      "step": 56635
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.064035415649414,
      "learning_rate": 3.2532142218157582e-06,
      "loss": 2.442,
      "step": 56636
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1858144998550415,
      "learning_rate": 3.2529103198328207e-06,
      "loss": 2.2491,
      "step": 56637
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.3263577222824097,
      "learning_rate": 3.252606429288029e-06,
      "loss": 2.0906,
      "step": 56638
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.276375651359558,
      "learning_rate": 3.252302550181905e-06,
      "loss": 2.5751,
      "step": 56639
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.131158709526062,
      "learning_rate": 3.2519986825149575e-06,
      "loss": 2.3434,
      "step": 56640
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0089035034179688,
      "learning_rate": 3.251694826287709e-06,
      "loss": 2.414,
      "step": 56641
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1381356716156006,
      "learning_rate": 3.2513909815006694e-06,
      "loss": 2.2148,
      "step": 56642
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0307444334030151,
      "learning_rate": 3.251087148154356e-06,
      "loss": 2.5264,
      "step": 56643
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0789545774459839,
      "learning_rate": 3.2507833262492794e-06,
      "loss": 2.2774,
      "step": 56644
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.001894474029541,
      "learning_rate": 3.2504795157859614e-06,
      "loss": 2.3308,
      "step": 56645
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.184011459350586,
      "learning_rate": 3.250175716764911e-06,
      "loss": 2.6554,
      "step": 56646
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0128552913665771,
      "learning_rate": 3.2498719291866477e-06,
      "loss": 2.2674,
      "step": 56647
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1619038581848145,
      "learning_rate": 3.249568153051683e-06,
      "loss": 2.1368,
      "step": 56648
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.181454062461853,
      "learning_rate": 3.2492643883605357e-06,
      "loss": 2.2914,
      "step": 56649
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0675793886184692,
      "learning_rate": 3.248960635113717e-06,
      "loss": 2.3333,
      "step": 56650
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0704885721206665,
      "learning_rate": 3.2486568933117455e-06,
      "loss": 2.3121,
      "step": 56651
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.9849548935890198,
      "learning_rate": 3.248353162955131e-06,
      "loss": 2.3574,
      "step": 56652
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.053216814994812,
      "learning_rate": 3.2480494440443954e-06,
      "loss": 2.298,
      "step": 56653
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.2247260808944702,
      "learning_rate": 3.247745736580047e-06,
      "loss": 2.2529,
      "step": 56654
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.9649917483329773,
      "learning_rate": 3.2474420405626095e-06,
      "loss": 2.1749,
      "step": 56655
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.3505851030349731,
      "learning_rate": 3.247138355992585e-06,
      "loss": 2.3773,
      "step": 56656
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.2229522466659546,
      "learning_rate": 3.246834682870499e-06,
      "loss": 2.0485,
      "step": 56657
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1376700401306152,
      "learning_rate": 3.246531021196858e-06,
      "loss": 2.4931,
      "step": 56658
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0839625597000122,
      "learning_rate": 3.246227370972185e-06,
      "loss": 2.3939,
      "step": 56659
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0189321041107178,
      "learning_rate": 3.245923732196987e-06,
      "loss": 2.1389,
      "step": 56660
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.2031112909317017,
      "learning_rate": 3.245620104871787e-06,
      "loss": 2.489,
      "step": 56661
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0768096446990967,
      "learning_rate": 3.2453164889970943e-06,
      "loss": 2.3765,
      "step": 56662
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0379643440246582,
      "learning_rate": 3.245012884573423e-06,
      "loss": 2.3769,
      "step": 56663
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0469945669174194,
      "learning_rate": 3.2447092916012913e-06,
      "loss": 2.2875,
      "step": 56664
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.2226521968841553,
      "learning_rate": 3.2444057100812097e-06,
      "loss": 2.2614,
      "step": 56665
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.9899559617042542,
      "learning_rate": 3.2441021400136975e-06,
      "loss": 2.3069,
      "step": 56666
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.9616481065750122,
      "learning_rate": 3.2437985813992647e-06,
      "loss": 2.1428,
      "step": 56667
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1224621534347534,
      "learning_rate": 3.243495034238431e-06,
      "loss": 2.6019,
      "step": 56668
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0313239097595215,
      "learning_rate": 3.243191498531708e-06,
      "loss": 2.27,
      "step": 56669
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1265926361083984,
      "learning_rate": 3.2428879742796116e-06,
      "loss": 2.5739,
      "step": 56670
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.9792839288711548,
      "learning_rate": 3.2425844614826508e-06,
      "loss": 2.4254,
      "step": 56671
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0929588079452515,
      "learning_rate": 3.2422809601413484e-06,
      "loss": 2.0822,
      "step": 56672
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0571807622909546,
      "learning_rate": 3.2419774702562125e-06,
      "loss": 2.4199,
      "step": 56673
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1216892004013062,
      "learning_rate": 3.241673991827763e-06,
      "loss": 2.3894,
      "step": 56674
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1890579462051392,
      "learning_rate": 3.2413705248565085e-06,
      "loss": 2.4966,
      "step": 56675
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0755943059921265,
      "learning_rate": 3.241067069342969e-06,
      "loss": 2.1258,
      "step": 56676
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.3693442344665527,
      "learning_rate": 3.240763625287654e-06,
      "loss": 2.2794,
      "step": 56677
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0526288747787476,
      "learning_rate": 3.2404601926910838e-06,
      "loss": 2.3654,
      "step": 56678
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1326684951782227,
      "learning_rate": 3.2401567715537663e-06,
      "loss": 2.4363,
      "step": 56679
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0661115646362305,
      "learning_rate": 3.239853361876222e-06,
      "loss": 2.4289,
      "step": 56680
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0018731355667114,
      "learning_rate": 3.2395499636589587e-06,
      "loss": 2.3499,
      "step": 56681
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0529707670211792,
      "learning_rate": 3.2392465769025007e-06,
      "loss": 2.2355,
      "step": 56682
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0739094018936157,
      "learning_rate": 3.2389432016073497e-06,
      "loss": 2.279,
      "step": 56683
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.191328763961792,
      "learning_rate": 3.2386398377740303e-06,
      "loss": 2.2447,
      "step": 56684
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1498298645019531,
      "learning_rate": 3.2383364854030484e-06,
      "loss": 2.2682,
      "step": 56685
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0442616939544678,
      "learning_rate": 3.2380331444949266e-06,
      "loss": 2.3371,
      "step": 56686
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.155616283416748,
      "learning_rate": 3.2377298150501712e-06,
      "loss": 2.2825,
      "step": 56687
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0555719137191772,
      "learning_rate": 3.2374264970693047e-06,
      "loss": 2.4488,
      "step": 56688
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0602177381515503,
      "learning_rate": 3.2371231905528334e-06,
      "loss": 2.0749,
      "step": 56689
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0230201482772827,
      "learning_rate": 3.2368198955012774e-06,
      "loss": 2.334,
      "step": 56690
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0888090133666992,
      "learning_rate": 3.2365166119151493e-06,
      "loss": 2.3554,
      "step": 56691
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.067247986793518,
      "learning_rate": 3.2362133397949593e-06,
      "loss": 2.2323,
      "step": 56692
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1365153789520264,
      "learning_rate": 3.2359100791412267e-06,
      "loss": 2.2639,
      "step": 56693
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.154170036315918,
      "learning_rate": 3.235606829954462e-06,
      "loss": 2.2354,
      "step": 56694
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0197380781173706,
      "learning_rate": 3.235303592235184e-06,
      "loss": 2.3634,
      "step": 56695
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.064287543296814,
      "learning_rate": 3.2350003659839024e-06,
      "loss": 2.1769,
      "step": 56696
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.052037239074707,
      "learning_rate": 3.234697151201134e-06,
      "loss": 2.3647,
      "step": 56697
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0025737285614014,
      "learning_rate": 3.2343939478873865e-06,
      "loss": 2.4519,
      "step": 56698
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.061153531074524,
      "learning_rate": 3.2340907560431834e-06,
      "loss": 2.3355,
      "step": 56699
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0778553485870361,
      "learning_rate": 3.233787575669031e-06,
      "loss": 2.6633,
      "step": 56700
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0314891338348389,
      "learning_rate": 3.233484406765448e-06,
      "loss": 2.3686,
      "step": 56701
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0112857818603516,
      "learning_rate": 3.2331812493329452e-06,
      "loss": 2.2547,
      "step": 56702
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.2114564180374146,
      "learning_rate": 3.23287810337204e-06,
      "loss": 2.2538,
      "step": 56703
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.9853301644325256,
      "learning_rate": 3.232574968883242e-06,
      "loss": 2.3098,
      "step": 56704
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1177157163619995,
      "learning_rate": 3.2322718458670697e-06,
      "loss": 2.4266,
      "step": 56705
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0615663528442383,
      "learning_rate": 3.2319687343240325e-06,
      "loss": 2.3349,
      "step": 56706
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.022945523262024,
      "learning_rate": 3.231665634254648e-06,
      "loss": 2.1246,
      "step": 56707
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0846953392028809,
      "learning_rate": 3.23136254565943e-06,
      "loss": 2.3229,
      "step": 56708
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1348296403884888,
      "learning_rate": 3.2310594685388906e-06,
      "loss": 2.4039,
      "step": 56709
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1164599657058716,
      "learning_rate": 3.230756402893541e-06,
      "loss": 2.3968,
      "step": 56710
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0028231143951416,
      "learning_rate": 3.230453348723901e-06,
      "loss": 2.2787,
      "step": 56711
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.9926368594169617,
      "learning_rate": 3.2301503060304775e-06,
      "loss": 2.3732,
      "step": 56712
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.2010619640350342,
      "learning_rate": 3.229847274813791e-06,
      "loss": 2.1396,
      "step": 56713
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1079703569412231,
      "learning_rate": 3.229544255074348e-06,
      "loss": 2.3985,
      "step": 56714
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.090914249420166,
      "learning_rate": 3.229241246812671e-06,
      "loss": 2.3303,
      "step": 56715
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0448248386383057,
      "learning_rate": 3.2289382500292656e-06,
      "loss": 2.5072,
      "step": 56716
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.9513301253318787,
      "learning_rate": 3.2286352647246523e-06,
      "loss": 2.3152,
      "step": 56717
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0709048509597778,
      "learning_rate": 3.22833229089934e-06,
      "loss": 2.5707,
      "step": 56718
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.9918065071105957,
      "learning_rate": 3.2280293285538413e-06,
      "loss": 2.5009,
      "step": 56719
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0701971054077148,
      "learning_rate": 3.227726377688676e-06,
      "loss": 2.4282,
      "step": 56720
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0608240365982056,
      "learning_rate": 3.2274234383043524e-06,
      "loss": 2.5537,
      "step": 56721
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.054138422012329,
      "learning_rate": 3.2271205104013827e-06,
      "loss": 2.2334,
      "step": 56722
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0324056148529053,
      "learning_rate": 3.226817593980287e-06,
      "loss": 2.3981,
      "step": 56723
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1678270101547241,
      "learning_rate": 3.2265146890415743e-06,
      "loss": 2.4336,
      "step": 56724
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.108930230140686,
      "learning_rate": 3.2262117955857566e-06,
      "loss": 2.3397,
      "step": 56725
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0652384757995605,
      "learning_rate": 3.2259089136133526e-06,
      "loss": 2.1037,
      "step": 56726
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1043716669082642,
      "learning_rate": 3.2256060431248693e-06,
      "loss": 2.3077,
      "step": 56727
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1495332717895508,
      "learning_rate": 3.225303184120827e-06,
      "loss": 2.3759,
      "step": 56728
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.202557921409607,
      "learning_rate": 3.225000336601732e-06,
      "loss": 2.3105,
      "step": 56729
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0384538173675537,
      "learning_rate": 3.2246975005681046e-06,
      "loss": 2.3377,
      "step": 56730
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1196194887161255,
      "learning_rate": 3.2243946760204526e-06,
      "loss": 2.4997,
      "step": 56731
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.21059250831604,
      "learning_rate": 3.2240918629592954e-06,
      "loss": 2.4917,
      "step": 56732
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0511986017227173,
      "learning_rate": 3.2237890613851387e-06,
      "loss": 2.4473,
      "step": 56733
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.185288667678833,
      "learning_rate": 3.223486271298507e-06,
      "loss": 2.4643,
      "step": 56734
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1284044981002808,
      "learning_rate": 3.2231834926999005e-06,
      "loss": 2.1776,
      "step": 56735
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.7389936447143555,
      "learning_rate": 3.222880725589841e-06,
      "loss": 2.4051,
      "step": 56736
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.109260082244873,
      "learning_rate": 3.2225779699688366e-06,
      "loss": 2.2833,
      "step": 56737
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.01918363571167,
      "learning_rate": 3.222275225837406e-06,
      "loss": 2.2665,
      "step": 56738
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0628632307052612,
      "learning_rate": 3.2219724931960562e-06,
      "loss": 2.2677,
      "step": 56739
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1962261199951172,
      "learning_rate": 3.221669772045307e-06,
      "loss": 2.119,
      "step": 56740
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0210962295532227,
      "learning_rate": 3.2213670623856663e-06,
      "loss": 2.1983,
      "step": 56741
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.161479115486145,
      "learning_rate": 3.221064364217652e-06,
      "loss": 2.5748,
      "step": 56742
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0640285015106201,
      "learning_rate": 3.2207616775417715e-06,
      "loss": 2.2587,
      "step": 56743
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1576515436172485,
      "learning_rate": 3.220459002358545e-06,
      "loss": 2.2164,
      "step": 56744
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1799198389053345,
      "learning_rate": 3.2201563386684787e-06,
      "loss": 2.4224,
      "step": 56745
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0811755657196045,
      "learning_rate": 3.2198536864720906e-06,
      "loss": 2.3692,
      "step": 56746
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.091718316078186,
      "learning_rate": 3.219551045769893e-06,
      "loss": 2.3924,
      "step": 56747
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0679948329925537,
      "learning_rate": 3.2192484165623984e-06,
      "loss": 2.204,
      "step": 56748
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0976769924163818,
      "learning_rate": 3.218945798850115e-06,
      "loss": 2.1948,
      "step": 56749
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1064525842666626,
      "learning_rate": 3.2186431926335637e-06,
      "loss": 2.1954,
      "step": 56750
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0723670721054077,
      "learning_rate": 3.218340597913251e-06,
      "loss": 2.5061,
      "step": 56751
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.2980574369430542,
      "learning_rate": 3.218038014689696e-06,
      "loss": 2.3373,
      "step": 56752
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.038548469543457,
      "learning_rate": 3.217735442963409e-06,
      "loss": 2.4185,
      "step": 56753
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1689558029174805,
      "learning_rate": 3.217432882734899e-06,
      "loss": 2.3747,
      "step": 56754
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0629552602767944,
      "learning_rate": 3.2171303340046856e-06,
      "loss": 2.481,
      "step": 56755
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0234854221343994,
      "learning_rate": 3.2168277967732753e-06,
      "loss": 2.28,
      "step": 56756
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0912537574768066,
      "learning_rate": 3.2165252710411865e-06,
      "loss": 2.2861,
      "step": 56757
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1745737791061401,
      "learning_rate": 3.2162227568089277e-06,
      "loss": 2.4385,
      "step": 56758
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1146790981292725,
      "learning_rate": 3.2159202540770165e-06,
      "loss": 2.2346,
      "step": 56759
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1009749174118042,
      "learning_rate": 3.2156177628459594e-06,
      "loss": 2.2684,
      "step": 56760
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1864486932754517,
      "learning_rate": 3.2153152831162794e-06,
      "loss": 2.3025,
      "step": 56761
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0448945760726929,
      "learning_rate": 3.2150128148884764e-06,
      "loss": 2.6145,
      "step": 56762
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0830481052398682,
      "learning_rate": 3.2147103581630733e-06,
      "loss": 2.7072,
      "step": 56763
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.2259259223937988,
      "learning_rate": 3.2144079129405746e-06,
      "loss": 2.3328,
      "step": 56764
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.9728326797485352,
      "learning_rate": 3.214105479221502e-06,
      "loss": 2.3367,
      "step": 56765
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1067438125610352,
      "learning_rate": 3.2138030570063595e-06,
      "loss": 2.2954,
      "step": 56766
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.9561871886253357,
      "learning_rate": 3.2135006462956677e-06,
      "loss": 2.3237,
      "step": 56767
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0296803712844849,
      "learning_rate": 3.2131982470899313e-06,
      "loss": 2.1063,
      "step": 56768
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0345587730407715,
      "learning_rate": 3.212895859389671e-06,
      "loss": 2.4392,
      "step": 56769
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.2279366254806519,
      "learning_rate": 3.212593483195392e-06,
      "loss": 2.4846,
      "step": 56770
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.9772126078605652,
      "learning_rate": 3.2122911185076145e-06,
      "loss": 2.2467,
      "step": 56771
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0836753845214844,
      "learning_rate": 3.2119887653268435e-06,
      "loss": 2.253,
      "step": 56772
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0225564241409302,
      "learning_rate": 3.2116864236535982e-06,
      "loss": 2.2468,
      "step": 56773
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0089632272720337,
      "learning_rate": 3.211384093488388e-06,
      "loss": 2.2899,
      "step": 56774
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.04254949092865,
      "learning_rate": 3.2110817748317256e-06,
      "loss": 2.385,
      "step": 56775
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.2043431997299194,
      "learning_rate": 3.210779467684121e-06,
      "loss": 2.3529,
      "step": 56776
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0478734970092773,
      "learning_rate": 3.2104771720460915e-06,
      "loss": 2.3487,
      "step": 56777
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0255237817764282,
      "learning_rate": 3.2101748879181437e-06,
      "loss": 2.4419,
      "step": 56778
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1261826753616333,
      "learning_rate": 3.2098726153007973e-06,
      "loss": 2.4109,
      "step": 56779
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.2588942050933838,
      "learning_rate": 3.209570354194561e-06,
      "loss": 2.1923,
      "step": 56780
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0600030422210693,
      "learning_rate": 3.209268104599943e-06,
      "loss": 2.4332,
      "step": 56781
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1953974962234497,
      "learning_rate": 3.208965866517465e-06,
      "loss": 2.4377,
      "step": 56782
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0251835584640503,
      "learning_rate": 3.208663639947629e-06,
      "loss": 2.1989,
      "step": 56783
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.2057945728302002,
      "learning_rate": 3.208361424890958e-06,
      "loss": 2.4287,
      "step": 56784
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0452606678009033,
      "learning_rate": 3.2080592213479544e-06,
      "loss": 2.5439,
      "step": 56785
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0927897691726685,
      "learning_rate": 3.2077570293191386e-06,
      "loss": 2.2398,
      "step": 56786
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.010096549987793,
      "learning_rate": 3.2074548488050196e-06,
      "loss": 2.5021,
      "step": 56787
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.036375641822815,
      "learning_rate": 3.207152679806108e-06,
      "loss": 2.3106,
      "step": 56788
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.128117322921753,
      "learning_rate": 3.206850522322916e-06,
      "loss": 2.1766,
      "step": 56789
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.9866048693656921,
      "learning_rate": 3.2065483763559604e-06,
      "loss": 2.2905,
      "step": 56790
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0034960508346558,
      "learning_rate": 3.206246241905746e-06,
      "loss": 2.267,
      "step": 56791
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0522493124008179,
      "learning_rate": 3.2059441189727934e-06,
      "loss": 2.4275,
      "step": 56792
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1145590543746948,
      "learning_rate": 3.2056420075576076e-06,
      "loss": 2.5432,
      "step": 56793
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.051077127456665,
      "learning_rate": 3.2053399076607074e-06,
      "loss": 2.4581,
      "step": 56794
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.2180798053741455,
      "learning_rate": 3.205037819282598e-06,
      "loss": 1.9445,
      "step": 56795
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0557241439819336,
      "learning_rate": 3.204735742423798e-06,
      "loss": 2.5123,
      "step": 56796
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.9668373465538025,
      "learning_rate": 3.204433677084814e-06,
      "loss": 2.6278,
      "step": 56797
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0585381984710693,
      "learning_rate": 3.2041316232661623e-06,
      "loss": 2.3798,
      "step": 56798
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.167954921722412,
      "learning_rate": 3.2038295809683517e-06,
      "loss": 2.5567,
      "step": 56799
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0504871606826782,
      "learning_rate": 3.2035275501919018e-06,
      "loss": 2.2188,
      "step": 56800
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1184452772140503,
      "learning_rate": 3.2032255309373116e-06,
      "loss": 2.3497,
      "step": 56801
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0575697422027588,
      "learning_rate": 3.2029235232051037e-06,
      "loss": 2.3624,
      "step": 56802
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1479175090789795,
      "learning_rate": 3.202621526995783e-06,
      "loss": 2.2293,
      "step": 56803
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.236412525177002,
      "learning_rate": 3.2023195423098673e-06,
      "loss": 2.5123,
      "step": 56804
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.102738857269287,
      "learning_rate": 3.2020175691478637e-06,
      "loss": 2.4736,
      "step": 56805
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.141817569732666,
      "learning_rate": 3.2017156075102905e-06,
      "loss": 2.1718,
      "step": 56806
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0455026626586914,
      "learning_rate": 3.201413657397653e-06,
      "loss": 2.4689,
      "step": 56807
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0968762636184692,
      "learning_rate": 3.201111718810468e-06,
      "loss": 2.2551,
      "step": 56808
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0061091184616089,
      "learning_rate": 3.2008097917492455e-06,
      "loss": 2.2014,
      "step": 56809
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1231988668441772,
      "learning_rate": 3.2005078762144928e-06,
      "loss": 2.3169,
      "step": 56810
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1884047985076904,
      "learning_rate": 3.20020597220673e-06,
      "loss": 2.2242,
      "step": 56811
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.071130394935608,
      "learning_rate": 3.1999040797264626e-06,
      "loss": 2.2205,
      "step": 56812
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.104881763458252,
      "learning_rate": 3.1996021987742067e-06,
      "loss": 2.3053,
      "step": 56813
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1107851266860962,
      "learning_rate": 3.199300329350472e-06,
      "loss": 2.2791,
      "step": 56814
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.3096611499786377,
      "learning_rate": 3.198998471455771e-06,
      "loss": 2.2312,
      "step": 56815
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1208524703979492,
      "learning_rate": 3.1986966250906104e-06,
      "loss": 2.3564,
      "step": 56816
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.2429496049880981,
      "learning_rate": 3.19839479025551e-06,
      "loss": 2.4083,
      "step": 56817
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0117701292037964,
      "learning_rate": 3.1980929669509754e-06,
      "loss": 2.2593,
      "step": 56818
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.181477427482605,
      "learning_rate": 3.197791155177523e-06,
      "loss": 2.5129,
      "step": 56819
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0156373977661133,
      "learning_rate": 3.197489354935659e-06,
      "loss": 2.1832,
      "step": 56820
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.022560477256775,
      "learning_rate": 3.1971875662259012e-06,
      "loss": 2.3006,
      "step": 56821
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0772252082824707,
      "learning_rate": 3.1968857890487546e-06,
      "loss": 2.3163,
      "step": 56822
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1409382820129395,
      "learning_rate": 3.1965840234047375e-06,
      "loss": 2.392,
      "step": 56823
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.098900556564331,
      "learning_rate": 3.196282269294355e-06,
      "loss": 2.3188,
      "step": 56824
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0549193620681763,
      "learning_rate": 3.1959805267181254e-06,
      "loss": 2.4423,
      "step": 56825
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1075011491775513,
      "learning_rate": 3.1956787956765544e-06,
      "loss": 2.0947,
      "step": 56826
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0465728044509888,
      "learning_rate": 3.1953770761701607e-06,
      "loss": 2.5114,
      "step": 56827
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0661897659301758,
      "learning_rate": 3.1950753681994463e-06,
      "loss": 2.2371,
      "step": 56828
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1517245769500732,
      "learning_rate": 3.194773671764929e-06,
      "loss": 2.4083,
      "step": 56829
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.157599925994873,
      "learning_rate": 3.194471986867116e-06,
      "loss": 2.2643,
      "step": 56830
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0183391571044922,
      "learning_rate": 3.194170313506525e-06,
      "loss": 2.1158,
      "step": 56831
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.034813642501831,
      "learning_rate": 3.1938686516836593e-06,
      "loss": 1.9741,
      "step": 56832
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1286512613296509,
      "learning_rate": 3.1935670013990396e-06,
      "loss": 2.4097,
      "step": 56833
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0000810623168945,
      "learning_rate": 3.1932653626531673e-06,
      "loss": 2.2514,
      "step": 56834
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0110200643539429,
      "learning_rate": 3.192963735446564e-06,
      "loss": 2.3529,
      "step": 56835
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.076377272605896,
      "learning_rate": 3.1926621197797348e-06,
      "loss": 2.3444,
      "step": 56836
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.102146029472351,
      "learning_rate": 3.1923605156531888e-06,
      "loss": 2.3045,
      "step": 56837
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0474646091461182,
      "learning_rate": 3.1920589230674446e-06,
      "loss": 2.3552,
      "step": 56838
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1697759628295898,
      "learning_rate": 3.1917573420230064e-06,
      "loss": 2.3457,
      "step": 56839
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.158568024635315,
      "learning_rate": 3.191455772520391e-06,
      "loss": 2.3474,
      "step": 56840
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0445882081985474,
      "learning_rate": 3.1911542145601083e-06,
      "loss": 2.392,
      "step": 56841
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.158579707145691,
      "learning_rate": 3.1908526681426675e-06,
      "loss": 2.2104,
      "step": 56842
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1654199361801147,
      "learning_rate": 3.1905511332685777e-06,
      "loss": 2.4706,
      "step": 56843
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.129024863243103,
      "learning_rate": 3.190249609938356e-06,
      "loss": 2.2689,
      "step": 56844
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.9782816767692566,
      "learning_rate": 3.1899480981525086e-06,
      "loss": 2.3474,
      "step": 56845
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1098467111587524,
      "learning_rate": 3.189646597911552e-06,
      "loss": 2.2294,
      "step": 56846
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0657087564468384,
      "learning_rate": 3.1893451092159897e-06,
      "loss": 2.3138,
      "step": 56847
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.070518136024475,
      "learning_rate": 3.189043632066341e-06,
      "loss": 2.3457,
      "step": 56848
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.102880835533142,
      "learning_rate": 3.18874216646311e-06,
      "loss": 2.5907,
      "step": 56849
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0782816410064697,
      "learning_rate": 3.188440712406814e-06,
      "loss": 2.3126,
      "step": 56850
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1354705095291138,
      "learning_rate": 3.1881392698979583e-06,
      "loss": 2.3714,
      "step": 56851
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.13700270652771,
      "learning_rate": 3.187837838937059e-06,
      "loss": 2.3485,
      "step": 56852
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0252575874328613,
      "learning_rate": 3.187536419524625e-06,
      "loss": 2.3853,
      "step": 56853
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.222460150718689,
      "learning_rate": 3.187235011661167e-06,
      "loss": 2.3887,
      "step": 56854
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0862308740615845,
      "learning_rate": 3.186933615347193e-06,
      "loss": 2.481,
      "step": 56855
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.115877628326416,
      "learning_rate": 3.186632230583221e-06,
      "loss": 2.2573,
      "step": 56856
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.2135262489318848,
      "learning_rate": 3.186330857369754e-06,
      "loss": 2.3205,
      "step": 56857
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0702489614486694,
      "learning_rate": 3.186029495707309e-06,
      "loss": 2.5313,
      "step": 56858
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0811474323272705,
      "learning_rate": 3.185728145596392e-06,
      "loss": 2.3782,
      "step": 56859
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0446916818618774,
      "learning_rate": 3.1854268070375193e-06,
      "loss": 2.2641,
      "step": 56860
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.014528751373291,
      "learning_rate": 3.185125480031197e-06,
      "loss": 2.2547,
      "step": 56861
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1238045692443848,
      "learning_rate": 3.18482416457794e-06,
      "loss": 2.3375,
      "step": 56862
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1793378591537476,
      "learning_rate": 3.184522860678254e-06,
      "loss": 2.2101,
      "step": 56863
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0434587001800537,
      "learning_rate": 3.184221568332657e-06,
      "loss": 2.4994,
      "step": 56864
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.108253836631775,
      "learning_rate": 3.183920287541654e-06,
      "loss": 2.3184,
      "step": 56865
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1330896615982056,
      "learning_rate": 3.183619018305759e-06,
      "loss": 2.2427,
      "step": 56866
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0144846439361572,
      "learning_rate": 3.1833177606254772e-06,
      "loss": 2.1684,
      "step": 56867
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0835353136062622,
      "learning_rate": 3.183016514501326e-06,
      "loss": 2.2578,
      "step": 56868
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0495375394821167,
      "learning_rate": 3.1827152799338103e-06,
      "loss": 2.2455,
      "step": 56869
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1131354570388794,
      "learning_rate": 3.1824140569234476e-06,
      "loss": 2.5231,
      "step": 56870
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0673933029174805,
      "learning_rate": 3.1821128454707427e-06,
      "loss": 2.4261,
      "step": 56871
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0197468996047974,
      "learning_rate": 3.1818116455762073e-06,
      "loss": 2.3042,
      "step": 56872
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.132779836654663,
      "learning_rate": 3.1815104572403554e-06,
      "loss": 2.4177,
      "step": 56873
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.229880928993225,
      "learning_rate": 3.181209280463692e-06,
      "loss": 2.3371,
      "step": 56874
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.9462300539016724,
      "learning_rate": 3.1809081152467337e-06,
      "loss": 2.3683,
      "step": 56875
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1325010061264038,
      "learning_rate": 3.180606961589985e-06,
      "loss": 2.453,
      "step": 56876
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.218367576599121,
      "learning_rate": 3.180305819493963e-06,
      "loss": 2.2465,
      "step": 56877
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0674721002578735,
      "learning_rate": 3.180004688959171e-06,
      "loss": 2.2411,
      "step": 56878
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0711275339126587,
      "learning_rate": 3.17970356998613e-06,
      "loss": 2.1886,
      "step": 56879
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.086035132408142,
      "learning_rate": 3.1794024625753385e-06,
      "loss": 2.4946,
      "step": 56880
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.104857325553894,
      "learning_rate": 3.179101366727314e-06,
      "loss": 2.1463,
      "step": 56881
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0346875190734863,
      "learning_rate": 3.1788002824425636e-06,
      "loss": 2.3734,
      "step": 56882
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0633500814437866,
      "learning_rate": 3.1784992097216015e-06,
      "loss": 2.1473,
      "step": 56883
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1733391284942627,
      "learning_rate": 3.178198148564933e-06,
      "loss": 2.3881,
      "step": 56884
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.252912998199463,
      "learning_rate": 3.177897098973074e-06,
      "loss": 2.3426,
      "step": 56885
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1266608238220215,
      "learning_rate": 3.1775960609465296e-06,
      "loss": 2.2563,
      "step": 56886
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0547689199447632,
      "learning_rate": 3.1772950344858154e-06,
      "loss": 2.4222,
      "step": 56887
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0601056814193726,
      "learning_rate": 3.176994019591437e-06,
      "loss": 2.3966,
      "step": 56888
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0768998861312866,
      "learning_rate": 3.1766930162639097e-06,
      "loss": 2.1557,
      "step": 56889
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0711829662322998,
      "learning_rate": 3.1763920245037373e-06,
      "loss": 2.219,
      "step": 56890
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1112282276153564,
      "learning_rate": 3.176091044311438e-06,
      "loss": 2.3609,
      "step": 56891
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.100664496421814,
      "learning_rate": 3.175790075687516e-06,
      "loss": 2.1947,
      "step": 56892
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.076966643333435,
      "learning_rate": 3.175489118632484e-06,
      "loss": 2.2054,
      "step": 56893
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0342295169830322,
      "learning_rate": 3.175188173146849e-06,
      "loss": 2.2949,
      "step": 56894
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.2749422788619995,
      "learning_rate": 3.174887239231126e-06,
      "loss": 2.2844,
      "step": 56895
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.2006011009216309,
      "learning_rate": 3.1745863168858203e-06,
      "loss": 2.1288,
      "step": 56896
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0090783834457397,
      "learning_rate": 3.174285406111447e-06,
      "loss": 2.0943,
      "step": 56897
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0154048204421997,
      "learning_rate": 3.173984506908514e-06,
      "loss": 2.3231,
      "step": 56898
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0295684337615967,
      "learning_rate": 3.173683619277529e-06,
      "loss": 2.52,
      "step": 56899
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0826040506362915,
      "learning_rate": 3.1733827432190056e-06,
      "loss": 2.3468,
      "step": 56900
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0417139530181885,
      "learning_rate": 3.1730818787334503e-06,
      "loss": 2.0824,
      "step": 56901
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.126418113708496,
      "learning_rate": 3.172781025821379e-06,
      "loss": 2.3781,
      "step": 56902
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0441781282424927,
      "learning_rate": 3.1724801844832944e-06,
      "loss": 2.3496,
      "step": 56903
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.127446174621582,
      "learning_rate": 3.1721793547197135e-06,
      "loss": 2.4221,
      "step": 56904
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0455718040466309,
      "learning_rate": 3.1718785365311398e-06,
      "loss": 2.3868,
      "step": 56905
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1831485033035278,
      "learning_rate": 3.171577729918093e-06,
      "loss": 2.3723,
      "step": 56906
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0624170303344727,
      "learning_rate": 3.17127693488107e-06,
      "loss": 2.2477,
      "step": 56907
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.299333095550537,
      "learning_rate": 3.1709761514205906e-06,
      "loss": 2.3227,
      "step": 56908
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1482267379760742,
      "learning_rate": 3.170675379537157e-06,
      "loss": 2.3894,
      "step": 56909
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.9884520769119263,
      "learning_rate": 3.170374619231288e-06,
      "loss": 2.318,
      "step": 56910
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0716958045959473,
      "learning_rate": 3.170073870503485e-06,
      "loss": 2.4956,
      "step": 56911
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1697049140930176,
      "learning_rate": 3.169773133354266e-06,
      "loss": 2.3244,
      "step": 56912
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.06987464427948,
      "learning_rate": 3.169472407784132e-06,
      "loss": 2.3494,
      "step": 56913
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.108635425567627,
      "learning_rate": 3.1691716937936023e-06,
      "loss": 2.6561,
      "step": 56914
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0511378049850464,
      "learning_rate": 3.168870991383177e-06,
      "loss": 2.4152,
      "step": 56915
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1456423997879028,
      "learning_rate": 3.1685703005533754e-06,
      "loss": 2.2936,
      "step": 56916
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1359566450119019,
      "learning_rate": 3.168269621304698e-06,
      "loss": 2.4348,
      "step": 56917
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1688348054885864,
      "learning_rate": 3.1679689536376624e-06,
      "loss": 2.2608,
      "step": 56918
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0569984912872314,
      "learning_rate": 3.1676682975527752e-06,
      "loss": 2.2799,
      "step": 56919
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1058622598648071,
      "learning_rate": 3.167367653050546e-06,
      "loss": 2.1548,
      "step": 56920
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1386057138442993,
      "learning_rate": 3.1670670201314815e-06,
      "loss": 2.4668,
      "step": 56921
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0501126050949097,
      "learning_rate": 3.1667663987960972e-06,
      "loss": 2.3389,
      "step": 56922
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.139865517616272,
      "learning_rate": 3.1664657890448957e-06,
      "loss": 2.3647,
      "step": 56923
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1671780347824097,
      "learning_rate": 3.166165190878395e-06,
      "loss": 2.1594,
      "step": 56924
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.061678171157837,
      "learning_rate": 3.1658646042970964e-06,
      "loss": 2.2842,
      "step": 56925
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.257118582725525,
      "learning_rate": 3.1655640293015167e-06,
      "loss": 2.6775,
      "step": 56926
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0745213031768799,
      "learning_rate": 3.165263465892163e-06,
      "loss": 2.4051,
      "step": 56927
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0223153829574585,
      "learning_rate": 3.1649629140695403e-06,
      "loss": 2.2325,
      "step": 56928
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0931637287139893,
      "learning_rate": 3.164662373834164e-06,
      "loss": 2.4235,
      "step": 56929
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1854829788208008,
      "learning_rate": 3.16436184518654e-06,
      "loss": 2.1831,
      "step": 56930
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0354315042495728,
      "learning_rate": 3.1640613281271814e-06,
      "loss": 2.2448,
      "step": 56931
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.9875097870826721,
      "learning_rate": 3.163760822656595e-06,
      "loss": 2.3542,
      "step": 56932
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1817115545272827,
      "learning_rate": 3.1634603287752917e-06,
      "loss": 2.4429,
      "step": 56933
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1676312685012817,
      "learning_rate": 3.1631598464837764e-06,
      "loss": 2.1128,
      "step": 56934
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.221811056137085,
      "learning_rate": 3.1628593757825652e-06,
      "loss": 2.3559,
      "step": 56935
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1728652715682983,
      "learning_rate": 3.1625589166721614e-06,
      "loss": 2.3839,
      "step": 56936
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0106195211410522,
      "learning_rate": 3.16225846915308e-06,
      "loss": 2.1813,
      "step": 56937
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0071083307266235,
      "learning_rate": 3.1619580332258237e-06,
      "loss": 2.0891,
      "step": 56938
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1732640266418457,
      "learning_rate": 3.1616576088909102e-06,
      "loss": 2.0178,
      "step": 56939
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.078136920928955,
      "learning_rate": 3.161357196148841e-06,
      "loss": 2.2209,
      "step": 56940
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1115385293960571,
      "learning_rate": 3.1610567950001315e-06,
      "loss": 2.3056,
      "step": 56941
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0251646041870117,
      "learning_rate": 3.1607564054452844e-06,
      "loss": 2.3168,
      "step": 56942
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0941358804702759,
      "learning_rate": 3.1604560274848174e-06,
      "loss": 2.3853,
      "step": 56943
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.061825156211853,
      "learning_rate": 3.1601556611192307e-06,
      "loss": 2.3956,
      "step": 56944
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.111301064491272,
      "learning_rate": 3.1598553063490444e-06,
      "loss": 2.4873,
      "step": 56945
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1757004261016846,
      "learning_rate": 3.1595549631747548e-06,
      "loss": 2.4733,
      "step": 56946
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.057099461555481,
      "learning_rate": 3.1592546315968807e-06,
      "loss": 2.2838,
      "step": 56947
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.088338017463684,
      "learning_rate": 3.158954311615925e-06,
      "loss": 2.2728,
      "step": 56948
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.3774633407592773,
      "learning_rate": 3.158654003232402e-06,
      "loss": 2.3267,
      "step": 56949
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0735137462615967,
      "learning_rate": 3.1583537064468163e-06,
      "loss": 2.4724,
      "step": 56950
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1334160566329956,
      "learning_rate": 3.1580534212596815e-06,
      "loss": 2.2954,
      "step": 56951
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1186110973358154,
      "learning_rate": 3.157753147671502e-06,
      "loss": 2.4058,
      "step": 56952
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.049713134765625,
      "learning_rate": 3.1574528856827923e-06,
      "loss": 2.4376,
      "step": 56953
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.022658109664917,
      "learning_rate": 3.1571526352940583e-06,
      "loss": 2.3478,
      "step": 56954
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1834224462509155,
      "learning_rate": 3.156852396505805e-06,
      "loss": 2.2613,
      "step": 56955
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0389798879623413,
      "learning_rate": 3.1565521693185496e-06,
      "loss": 2.3508,
      "step": 56956
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0264899730682373,
      "learning_rate": 3.156251953732794e-06,
      "loss": 2.3022,
      "step": 56957
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1905256509780884,
      "learning_rate": 3.1559517497490534e-06,
      "loss": 2.0872,
      "step": 56958
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.13433039188385,
      "learning_rate": 3.1556515573678326e-06,
      "loss": 2.2146,
      "step": 56959
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0549545288085938,
      "learning_rate": 3.1553513765896413e-06,
      "loss": 2.4462,
      "step": 56960
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.2107796669006348,
      "learning_rate": 3.1550512074149854e-06,
      "loss": 2.2501,
      "step": 56961
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0650246143341064,
      "learning_rate": 3.15475104984438e-06,
      "loss": 2.3916,
      "step": 56962
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0748052597045898,
      "learning_rate": 3.1544509038783278e-06,
      "loss": 2.4758,
      "step": 56963
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.080701231956482,
      "learning_rate": 3.1541507695173434e-06,
      "loss": 2.3336,
      "step": 56964
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1916290521621704,
      "learning_rate": 3.15385064676193e-06,
      "loss": 2.415,
      "step": 56965
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0974619388580322,
      "learning_rate": 3.1535505356126018e-06,
      "loss": 2.0504,
      "step": 56966
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1996034383773804,
      "learning_rate": 3.1532504360698613e-06,
      "loss": 2.4839,
      "step": 56967
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.2648303508758545,
      "learning_rate": 3.152950348134225e-06,
      "loss": 2.4066,
      "step": 56968
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0286391973495483,
      "learning_rate": 3.1526502718061946e-06,
      "loss": 2.4262,
      "step": 56969
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.9799137711524963,
      "learning_rate": 3.1523502070862843e-06,
      "loss": 2.4392,
      "step": 56970
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0182338953018188,
      "learning_rate": 3.152050153974997e-06,
      "loss": 2.1123,
      "step": 56971
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1216986179351807,
      "learning_rate": 3.15175011247285e-06,
      "loss": 2.2145,
      "step": 56972
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0406947135925293,
      "learning_rate": 3.151450082580342e-06,
      "loss": 2.3558,
      "step": 56973
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.2622241973876953,
      "learning_rate": 3.1511500642979876e-06,
      "loss": 2.1878,
      "step": 56974
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1371428966522217,
      "learning_rate": 3.1508500576262925e-06,
      "loss": 2.3828,
      "step": 56975
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0369685888290405,
      "learning_rate": 3.1505500625657683e-06,
      "loss": 2.0168,
      "step": 56976
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.004798173904419,
      "learning_rate": 3.1502500791169198e-06,
      "loss": 2.3614,
      "step": 56977
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.237449049949646,
      "learning_rate": 3.1499501072802606e-06,
      "loss": 2.3639,
      "step": 56978
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0594754219055176,
      "learning_rate": 3.1496501470562936e-06,
      "loss": 2.1513,
      "step": 56979
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1371471881866455,
      "learning_rate": 3.1493501984455332e-06,
      "loss": 2.296,
      "step": 56980
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.058520793914795,
      "learning_rate": 3.149050261448481e-06,
      "loss": 2.1626,
      "step": 56981
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1133638620376587,
      "learning_rate": 3.1487503360656536e-06,
      "loss": 2.5079,
      "step": 56982
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.7089362144470215,
      "learning_rate": 3.148450422297554e-06,
      "loss": 2.2734,
      "step": 56983
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.117047667503357,
      "learning_rate": 3.148150520144689e-06,
      "loss": 2.0971,
      "step": 56984
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1374512910842896,
      "learning_rate": 3.147850629607574e-06,
      "loss": 2.3803,
      "step": 56985
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0850929021835327,
      "learning_rate": 3.147550750686712e-06,
      "loss": 2.5146,
      "step": 56986
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.2632242441177368,
      "learning_rate": 3.14725088338261e-06,
      "loss": 2.4157,
      "step": 56987
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.127564549446106,
      "learning_rate": 3.1469510276957817e-06,
      "loss": 2.3854,
      "step": 56988
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.2344197034835815,
      "learning_rate": 3.146651183626733e-06,
      "loss": 2.4242,
      "step": 56989
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1261261701583862,
      "learning_rate": 3.146351351175968e-06,
      "loss": 2.1841,
      "step": 56990
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0213693380355835,
      "learning_rate": 3.146051530344003e-06,
      "loss": 2.375,
      "step": 56991
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.182312250137329,
      "learning_rate": 3.145751721131338e-06,
      "loss": 2.3357,
      "step": 56992
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1475807428359985,
      "learning_rate": 3.1454519235384893e-06,
      "loss": 2.2383,
      "step": 56993
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1837464570999146,
      "learning_rate": 3.145152137565959e-06,
      "loss": 2.5011,
      "step": 56994
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.068960189819336,
      "learning_rate": 3.144852363214259e-06,
      "loss": 2.3183,
      "step": 56995
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1129305362701416,
      "learning_rate": 3.1445526004838933e-06,
      "loss": 2.3565,
      "step": 56996
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1152857542037964,
      "learning_rate": 3.1442528493753767e-06,
      "loss": 2.3289,
      "step": 56997
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.2465074062347412,
      "learning_rate": 3.143953109889213e-06,
      "loss": 2.4651,
      "step": 56998
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.053082823753357,
      "learning_rate": 3.1436533820259117e-06,
      "loss": 2.429,
      "step": 56999
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.101747751235962,
      "learning_rate": 3.1433536657859765e-06,
      "loss": 2.1538,
      "step": 57000
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1179709434509277,
      "learning_rate": 3.143053961169922e-06,
      "loss": 2.3203,
      "step": 57001
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1099785566329956,
      "learning_rate": 3.1427542681782495e-06,
      "loss": 2.27,
      "step": 57002
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1054013967514038,
      "learning_rate": 3.142454586811475e-06,
      "loss": 2.4045,
      "step": 57003
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1129480600357056,
      "learning_rate": 3.142154917070099e-06,
      "loss": 2.3699,
      "step": 57004
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0376594066619873,
      "learning_rate": 3.141855258954636e-06,
      "loss": 2.3416,
      "step": 57005
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.2070685625076294,
      "learning_rate": 3.141555612465588e-06,
      "loss": 2.3849,
      "step": 57006
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1453142166137695,
      "learning_rate": 3.141255977603469e-06,
      "loss": 2.271,
      "step": 57007
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.129164457321167,
      "learning_rate": 3.1409563543687816e-06,
      "loss": 2.5672,
      "step": 57008
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0884560346603394,
      "learning_rate": 3.140656742762038e-06,
      "loss": 2.2774,
      "step": 57009
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1311109066009521,
      "learning_rate": 3.1403571427837454e-06,
      "loss": 2.3288,
      "step": 57010
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0853420495986938,
      "learning_rate": 3.140057554434409e-06,
      "loss": 2.1635,
      "step": 57011
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.068034291267395,
      "learning_rate": 3.1397579777145357e-06,
      "loss": 2.2504,
      "step": 57012
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1894553899765015,
      "learning_rate": 3.13945841262464e-06,
      "loss": 2.1493,
      "step": 57013
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1379473209381104,
      "learning_rate": 3.1391588591652213e-06,
      "loss": 2.274,
      "step": 57014
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.22650945186615,
      "learning_rate": 3.1388593173367954e-06,
      "loss": 2.4246,
      "step": 57015
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0944446325302124,
      "learning_rate": 3.1385597871398666e-06,
      "loss": 2.3394,
      "step": 57016
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.173809289932251,
      "learning_rate": 3.1382602685749384e-06,
      "loss": 2.4409,
      "step": 57017
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.070770025253296,
      "learning_rate": 3.1379607616425267e-06,
      "loss": 2.3968,
      "step": 57018
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.9680538177490234,
      "learning_rate": 3.1376612663431317e-06,
      "loss": 2.4721,
      "step": 57019
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.144360899925232,
      "learning_rate": 3.1373617826772675e-06,
      "loss": 2.2596,
      "step": 57020
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.073753833770752,
      "learning_rate": 3.1370623106454356e-06,
      "loss": 2.4429,
      "step": 57021
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0453417301177979,
      "learning_rate": 3.1367628502481505e-06,
      "loss": 2.4677,
      "step": 57022
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0616642236709595,
      "learning_rate": 3.136463401485914e-06,
      "loss": 2.4204,
      "step": 57023
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.2326961755752563,
      "learning_rate": 3.136163964359242e-06,
      "loss": 2.4189,
      "step": 57024
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.9820579886436462,
      "learning_rate": 3.135864538868629e-06,
      "loss": 2.317,
      "step": 57025
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0621358156204224,
      "learning_rate": 3.1355651250145945e-06,
      "loss": 2.4999,
      "step": 57026
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.2828360795974731,
      "learning_rate": 3.135265722797638e-06,
      "loss": 2.2501,
      "step": 57027
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.094474196434021,
      "learning_rate": 3.134966332218273e-06,
      "loss": 2.4164,
      "step": 57028
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.2163877487182617,
      "learning_rate": 3.1346669532770023e-06,
      "loss": 2.1964,
      "step": 57029
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1172900199890137,
      "learning_rate": 3.1343675859743382e-06,
      "loss": 2.5643,
      "step": 57030
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1790317296981812,
      "learning_rate": 3.1340682303107827e-06,
      "loss": 2.3797,
      "step": 57031
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0985140800476074,
      "learning_rate": 3.13376888628685e-06,
      "loss": 2.3149,
      "step": 57032
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1176139116287231,
      "learning_rate": 3.13346955390304e-06,
      "loss": 2.3521,
      "step": 57033
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0622508525848389,
      "learning_rate": 3.1331702331598667e-06,
      "loss": 2.3093,
      "step": 57034
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0567669868469238,
      "learning_rate": 3.1328709240578325e-06,
      "loss": 2.2995,
      "step": 57035
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.158823013305664,
      "learning_rate": 3.1325716265974505e-06,
      "loss": 2.2936,
      "step": 57036
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0428521633148193,
      "learning_rate": 3.1322723407792242e-06,
      "loss": 2.4246,
      "step": 57037
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.2280012369155884,
      "learning_rate": 3.131973066603662e-06,
      "loss": 2.4372,
      "step": 57038
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0151886940002441,
      "learning_rate": 3.131673804071268e-06,
      "loss": 2.2286,
      "step": 57039
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1111338138580322,
      "learning_rate": 3.1313745531825548e-06,
      "loss": 2.3058,
      "step": 57040
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0886552333831787,
      "learning_rate": 3.1310753139380247e-06,
      "loss": 2.3623,
      "step": 57041
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1213263273239136,
      "learning_rate": 3.1307760863381898e-06,
      "loss": 2.1416,
      "step": 57042
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.9718560576438904,
      "learning_rate": 3.130476870383552e-06,
      "loss": 2.3216,
      "step": 57043
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.9763890504837036,
      "learning_rate": 3.130177666074625e-06,
      "loss": 2.3607,
      "step": 57044
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.114241123199463,
      "learning_rate": 3.1298784734119134e-06,
      "loss": 2.2154,
      "step": 57045
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0135802030563354,
      "learning_rate": 3.12957929239592e-06,
      "loss": 2.358,
      "step": 57046
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.3180253505706787,
      "learning_rate": 3.1292801230271587e-06,
      "loss": 2.3813,
      "step": 57047
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0193179845809937,
      "learning_rate": 3.12898096530613e-06,
      "loss": 2.5534,
      "step": 57048
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.9664668440818787,
      "learning_rate": 3.128681819233349e-06,
      "loss": 2.4667,
      "step": 57049
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.423919677734375,
      "learning_rate": 3.1283826848093144e-06,
      "loss": 2.2601,
      "step": 57050
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0477508306503296,
      "learning_rate": 3.128083562034544e-06,
      "loss": 2.314,
      "step": 57051
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0448135137557983,
      "learning_rate": 3.127784450909532e-06,
      "loss": 2.4498,
      "step": 57052
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.9929808378219604,
      "learning_rate": 3.127485351434796e-06,
      "loss": 2.4376,
      "step": 57053
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.2670215368270874,
      "learning_rate": 3.1271862636108354e-06,
      "loss": 2.3777,
      "step": 57054
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1167376041412354,
      "learning_rate": 3.1268871874381624e-06,
      "loss": 2.2164,
      "step": 57055
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0298517942428589,
      "learning_rate": 3.12658812291728e-06,
      "loss": 2.0991,
      "step": 57056
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0823532342910767,
      "learning_rate": 3.1262890700487014e-06,
      "loss": 2.3907,
      "step": 57057
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.115391492843628,
      "learning_rate": 3.125990028832926e-06,
      "loss": 2.4856,
      "step": 57058
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1071969270706177,
      "learning_rate": 3.1256909992704674e-06,
      "loss": 2.1616,
      "step": 57059
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.2453138828277588,
      "learning_rate": 3.1253919813618273e-06,
      "loss": 2.3894,
      "step": 57060
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0066635608673096,
      "learning_rate": 3.1250929751075164e-06,
      "loss": 2.1963,
      "step": 57061
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.4331337213516235,
      "learning_rate": 3.1247939805080384e-06,
      "loss": 2.1954,
      "step": 57062
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1958436965942383,
      "learning_rate": 3.124494997563905e-06,
      "loss": 2.4763,
      "step": 57063
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0438059568405151,
      "learning_rate": 3.124196026275619e-06,
      "loss": 2.403,
      "step": 57064
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0824445486068726,
      "learning_rate": 3.1238970666436887e-06,
      "loss": 2.4657,
      "step": 57065
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0821518898010254,
      "learning_rate": 3.123598118668617e-06,
      "loss": 2.287,
      "step": 57066
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.2367842197418213,
      "learning_rate": 3.1232991823509186e-06,
      "loss": 2.4296,
      "step": 57067
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.057247281074524,
      "learning_rate": 3.123000257691091e-06,
      "loss": 2.2919,
      "step": 57068
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1759306192398071,
      "learning_rate": 3.122701344689649e-06,
      "loss": 2.3906,
      "step": 57069
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0311506986618042,
      "learning_rate": 3.1224024433470934e-06,
      "loss": 2.464,
      "step": 57070
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.126052975654602,
      "learning_rate": 3.1221035536639364e-06,
      "loss": 2.1151,
      "step": 57071
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1644753217697144,
      "learning_rate": 3.1218046756406826e-06,
      "loss": 2.4622,
      "step": 57072
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1781201362609863,
      "learning_rate": 3.121505809277835e-06,
      "loss": 2.4078,
      "step": 57073
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.070264220237732,
      "learning_rate": 3.121206954575905e-06,
      "loss": 2.5021,
      "step": 57074
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0467302799224854,
      "learning_rate": 3.1209081115353955e-06,
      "loss": 2.2445,
      "step": 57075
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.027387022972107,
      "learning_rate": 3.1206092801568176e-06,
      "loss": 2.3272,
      "step": 57076
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1470539569854736,
      "learning_rate": 3.1203104604406755e-06,
      "loss": 2.0926,
      "step": 57077
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0788328647613525,
      "learning_rate": 3.1200116523874758e-06,
      "loss": 2.26,
      "step": 57078
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1421061754226685,
      "learning_rate": 3.119712855997722e-06,
      "loss": 2.2351,
      "step": 57079
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0803279876708984,
      "learning_rate": 3.119414071271927e-06,
      "loss": 2.3643,
      "step": 57080
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1176235675811768,
      "learning_rate": 3.11911529821059e-06,
      "loss": 2.4334,
      "step": 57081
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1302146911621094,
      "learning_rate": 3.1188165368142243e-06,
      "loss": 2.2337,
      "step": 57082
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0169193744659424,
      "learning_rate": 3.1185177870833304e-06,
      "loss": 2.3507,
      "step": 57083
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0672606229782104,
      "learning_rate": 3.1182190490184217e-06,
      "loss": 2.6837,
      "step": 57084
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0811840295791626,
      "learning_rate": 3.1179203226199973e-06,
      "loss": 2.3036,
      "step": 57085
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.152390718460083,
      "learning_rate": 3.1176216078885703e-06,
      "loss": 2.3547,
      "step": 57086
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.230746865272522,
      "learning_rate": 3.1173229048246402e-06,
      "loss": 2.3896,
      "step": 57087
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.2312026023864746,
      "learning_rate": 3.117024213428721e-06,
      "loss": 2.2944,
      "step": 57088
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0359227657318115,
      "learning_rate": 3.1167255337013114e-06,
      "loss": 2.2636,
      "step": 57089
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0748546123504639,
      "learning_rate": 3.116426865642929e-06,
      "loss": 2.2788,
      "step": 57090
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1297369003295898,
      "learning_rate": 3.1161282092540644e-06,
      "loss": 2.4948,
      "step": 57091
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.062788963317871,
      "learning_rate": 3.1158295645352367e-06,
      "loss": 2.174,
      "step": 57092
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0615673065185547,
      "learning_rate": 3.115530931486944e-06,
      "loss": 2.4038,
      "step": 57093
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.5189677476882935,
      "learning_rate": 3.115232310109699e-06,
      "loss": 2.2994,
      "step": 57094
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.090099573135376,
      "learning_rate": 3.1149337004040024e-06,
      "loss": 2.5028,
      "step": 57095
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0786454677581787,
      "learning_rate": 3.114635102370367e-06,
      "loss": 2.4324,
      "step": 57096
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1336777210235596,
      "learning_rate": 3.114336516009291e-06,
      "loss": 2.1989,
      "step": 57097
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1185320615768433,
      "learning_rate": 3.1140379413212873e-06,
      "loss": 2.2446,
      "step": 57098
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0246233940124512,
      "learning_rate": 3.1137393783068583e-06,
      "loss": 2.355,
      "step": 57099
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.147938847541809,
      "learning_rate": 3.1134408269665127e-06,
      "loss": 1.9795,
      "step": 57100
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0980581045150757,
      "learning_rate": 3.1131422873007556e-06,
      "loss": 2.2908,
      "step": 57101
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0301116704940796,
      "learning_rate": 3.1128437593100904e-06,
      "loss": 2.1057,
      "step": 57102
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.265790343284607,
      "learning_rate": 3.1125452429950277e-06,
      "loss": 2.5728,
      "step": 57103
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.2832324504852295,
      "learning_rate": 3.112246738356072e-06,
      "loss": 2.2371,
      "step": 57104
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.051430583000183,
      "learning_rate": 3.1119482453937257e-06,
      "loss": 2.5478,
      "step": 57105
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1754076480865479,
      "learning_rate": 3.1116497641085008e-06,
      "loss": 2.2767,
      "step": 57106
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.9933546781539917,
      "learning_rate": 3.1113512945009007e-06,
      "loss": 2.1849,
      "step": 57107
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.082623839378357,
      "learning_rate": 3.1110528365714277e-06,
      "loss": 2.3966,
      "step": 57108
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.061204195022583,
      "learning_rate": 3.1107543903205938e-06,
      "loss": 2.2332,
      "step": 57109
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0251132249832153,
      "learning_rate": 3.1104559557489e-06,
      "loss": 2.6366,
      "step": 57110
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.3104172945022583,
      "learning_rate": 3.1101575328568577e-06,
      "loss": 2.4281,
      "step": 57111
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1042625904083252,
      "learning_rate": 3.109859121644966e-06,
      "loss": 2.3539,
      "step": 57112
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1733357906341553,
      "learning_rate": 3.109560722113738e-06,
      "loss": 2.4239,
      "step": 57113
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1504833698272705,
      "learning_rate": 3.109262334263672e-06,
      "loss": 2.5498,
      "step": 57114
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0850969552993774,
      "learning_rate": 3.108963958095281e-06,
      "loss": 2.295,
      "step": 57115
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1119805574417114,
      "learning_rate": 3.108665593609066e-06,
      "loss": 2.1599,
      "step": 57116
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0982333421707153,
      "learning_rate": 3.1083672408055397e-06,
      "loss": 2.2483,
      "step": 57117
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0550243854522705,
      "learning_rate": 3.108068899685196e-06,
      "loss": 2.4284,
      "step": 57118
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.9574295282363892,
      "learning_rate": 3.1077705702485515e-06,
      "loss": 2.6333,
      "step": 57119
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1818435192108154,
      "learning_rate": 3.1074722524961043e-06,
      "loss": 2.2234,
      "step": 57120
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1400790214538574,
      "learning_rate": 3.107173946428367e-06,
      "loss": 2.3549,
      "step": 57121
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.007877230644226,
      "learning_rate": 3.1068756520458377e-06,
      "loss": 2.2017,
      "step": 57122
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1673914194107056,
      "learning_rate": 3.1065773693490297e-06,
      "loss": 2.4224,
      "step": 57123
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0057075023651123,
      "learning_rate": 3.1062790983384426e-06,
      "loss": 2.2198,
      "step": 57124
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.2369062900543213,
      "learning_rate": 3.1059808390145885e-06,
      "loss": 2.1866,
      "step": 57125
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.2742457389831543,
      "learning_rate": 3.1056825913779654e-06,
      "loss": 2.2505,
      "step": 57126
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1167552471160889,
      "learning_rate": 3.105384355429086e-06,
      "loss": 2.403,
      "step": 57127
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.091446876525879,
      "learning_rate": 3.105086131168453e-06,
      "loss": 2.4354,
      "step": 57128
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0273802280426025,
      "learning_rate": 3.1047879185965678e-06,
      "loss": 2.2446,
      "step": 57129
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1718500852584839,
      "learning_rate": 3.1044897177139433e-06,
      "loss": 2.4287,
      "step": 57130
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.069124698638916,
      "learning_rate": 3.104191528521082e-06,
      "loss": 2.3255,
      "step": 57131
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0639480352401733,
      "learning_rate": 3.1038933510184854e-06,
      "loss": 2.3589,
      "step": 57132
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1614378690719604,
      "learning_rate": 3.1035951852066646e-06,
      "loss": 2.209,
      "step": 57133
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1830017566680908,
      "learning_rate": 3.1032970310861245e-06,
      "loss": 2.2977,
      "step": 57134
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1629270315170288,
      "learning_rate": 3.1029988886573657e-06,
      "loss": 2.19,
      "step": 57135
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0302761793136597,
      "learning_rate": 3.1027007579209e-06,
      "loss": 2.4477,
      "step": 57136
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1377806663513184,
      "learning_rate": 3.1024026388772265e-06,
      "loss": 2.3071,
      "step": 57137
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0041476488113403,
      "learning_rate": 3.1021045315268574e-06,
      "loss": 2.3845,
      "step": 57138
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.042316198348999,
      "learning_rate": 3.1018064358702906e-06,
      "loss": 2.0712,
      "step": 57139
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.158461093902588,
      "learning_rate": 3.101508351908039e-06,
      "loss": 2.2163,
      "step": 57140
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0016283988952637,
      "learning_rate": 3.1012102796406006e-06,
      "loss": 2.2806,
      "step": 57141
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1036587953567505,
      "learning_rate": 3.100912219068489e-06,
      "loss": 2.3785,
      "step": 57142
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.9915582537651062,
      "learning_rate": 3.1006141701922044e-06,
      "loss": 2.0779,
      "step": 57143
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0803383588790894,
      "learning_rate": 3.100316133012252e-06,
      "loss": 2.3819,
      "step": 57144
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0761785507202148,
      "learning_rate": 3.100018107529135e-06,
      "loss": 2.2841,
      "step": 57145
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1183266639709473,
      "learning_rate": 3.0997200937433647e-06,
      "loss": 2.1175,
      "step": 57146
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0759462118148804,
      "learning_rate": 3.0994220916554397e-06,
      "loss": 2.5287,
      "step": 57147
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0073341131210327,
      "learning_rate": 3.099124101265871e-06,
      "loss": 2.3512,
      "step": 57148
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0610311031341553,
      "learning_rate": 3.0988261225751582e-06,
      "loss": 2.2606,
      "step": 57149
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1450086832046509,
      "learning_rate": 3.098528155583813e-06,
      "loss": 2.4375,
      "step": 57150
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.071502447128296,
      "learning_rate": 3.0982302002923336e-06,
      "loss": 2.4008,
      "step": 57151
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1219990253448486,
      "learning_rate": 3.0979322567012316e-06,
      "loss": 2.1955,
      "step": 57152
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0267764329910278,
      "learning_rate": 3.0976343248110063e-06,
      "loss": 2.2276,
      "step": 57153
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1105290651321411,
      "learning_rate": 3.0973364046221676e-06,
      "loss": 2.3025,
      "step": 57154
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0195635557174683,
      "learning_rate": 3.0970384961352195e-06,
      "loss": 2.3375,
      "step": 57155
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1380717754364014,
      "learning_rate": 3.096740599350666e-06,
      "loss": 2.2412,
      "step": 57156
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.2661094665527344,
      "learning_rate": 3.0964427142690078e-06,
      "loss": 2.0058,
      "step": 57157
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0173990726470947,
      "learning_rate": 3.096144840890758e-06,
      "loss": 2.4283,
      "step": 57158
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1325352191925049,
      "learning_rate": 3.0958469792164147e-06,
      "loss": 2.4721,
      "step": 57159
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1500405073165894,
      "learning_rate": 3.0955491292464888e-06,
      "loss": 2.3013,
      "step": 57160
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.2682569026947021,
      "learning_rate": 3.0952512909814803e-06,
      "loss": 2.3467,
      "step": 57161
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1858521699905396,
      "learning_rate": 3.094953464421898e-06,
      "loss": 2.1689,
      "step": 57162
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1344940662384033,
      "learning_rate": 3.0946556495682458e-06,
      "loss": 2.4472,
      "step": 57163
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.203918218612671,
      "learning_rate": 3.0943578464210242e-06,
      "loss": 2.333,
      "step": 57164
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0620450973510742,
      "learning_rate": 3.094060054980744e-06,
      "loss": 2.4517,
      "step": 57165
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.130262851715088,
      "learning_rate": 3.0937622752479058e-06,
      "loss": 2.291,
      "step": 57166
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0114859342575073,
      "learning_rate": 3.0934645072230185e-06,
      "loss": 2.488,
      "step": 57167
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.203721284866333,
      "learning_rate": 3.093166750906582e-06,
      "loss": 2.293,
      "step": 57168
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.24518620967865,
      "learning_rate": 3.0928690062991094e-06,
      "loss": 2.3355,
      "step": 57169
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.2703584432601929,
      "learning_rate": 3.092571273401095e-06,
      "loss": 2.4164,
      "step": 57170
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0735102891921997,
      "learning_rate": 3.0922735522130498e-06,
      "loss": 2.1821,
      "step": 57171
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0663394927978516,
      "learning_rate": 3.091975842735475e-06,
      "loss": 2.4089,
      "step": 57172
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1337982416152954,
      "learning_rate": 3.0916781449688794e-06,
      "loss": 2.2725,
      "step": 57173
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1059989929199219,
      "learning_rate": 3.091380458913763e-06,
      "loss": 2.231,
      "step": 57174
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.118026852607727,
      "learning_rate": 3.091082784570637e-06,
      "loss": 2.3584,
      "step": 57175
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0832703113555908,
      "learning_rate": 3.0907851219399976e-06,
      "loss": 2.5861,
      "step": 57176
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.054004192352295,
      "learning_rate": 3.0904874710223586e-06,
      "loss": 2.3354,
      "step": 57177
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1651619672775269,
      "learning_rate": 3.0901898318182153e-06,
      "loss": 2.2616,
      "step": 57178
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1173564195632935,
      "learning_rate": 3.089892204328081e-06,
      "loss": 2.3859,
      "step": 57179
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.2039958238601685,
      "learning_rate": 3.0895945885524526e-06,
      "loss": 2.4868,
      "step": 57180
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.469276785850525,
      "learning_rate": 3.089296984491842e-06,
      "loss": 2.703,
      "step": 57181
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1254836320877075,
      "learning_rate": 3.0889993921467454e-06,
      "loss": 2.322,
      "step": 57182
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.9968361854553223,
      "learning_rate": 3.088701811517679e-06,
      "loss": 2.4523,
      "step": 57183
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1546428203582764,
      "learning_rate": 3.088404242605134e-06,
      "loss": 2.3951,
      "step": 57184
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0935513973236084,
      "learning_rate": 3.0881066854096244e-06,
      "loss": 2.3439,
      "step": 57185
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0770596265792847,
      "learning_rate": 3.0878091399316466e-06,
      "loss": 2.4967,
      "step": 57186
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1861985921859741,
      "learning_rate": 3.087511606171715e-06,
      "loss": 2.4428,
      "step": 57187
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0089472532272339,
      "learning_rate": 3.0872140841303233e-06,
      "loss": 2.1549,
      "step": 57188
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.4112443923950195,
      "learning_rate": 3.0869165738079855e-06,
      "loss": 2.2438,
      "step": 57189
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.231898307800293,
      "learning_rate": 3.086619075205202e-06,
      "loss": 2.3772,
      "step": 57190
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.112733006477356,
      "learning_rate": 3.086321588322473e-06,
      "loss": 2.3363,
      "step": 57191
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.192891001701355,
      "learning_rate": 3.0860241131603098e-06,
      "loss": 2.2503,
      "step": 57192
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0343377590179443,
      "learning_rate": 3.0857266497192105e-06,
      "loss": 2.4586,
      "step": 57193
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.043427586555481,
      "learning_rate": 3.0854291979996853e-06,
      "loss": 2.2274,
      "step": 57194
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.140401840209961,
      "learning_rate": 3.0851317580022333e-06,
      "loss": 2.6366,
      "step": 57195
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1773308515548706,
      "learning_rate": 3.0848343297273663e-06,
      "loss": 2.2961,
      "step": 57196
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.083661437034607,
      "learning_rate": 3.0845369131755775e-06,
      "loss": 2.2977,
      "step": 57197
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.2027626037597656,
      "learning_rate": 3.0842395083473807e-06,
      "loss": 2.2895,
      "step": 57198
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1110776662826538,
      "learning_rate": 3.0839421152432723e-06,
      "loss": 2.5149,
      "step": 57199
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.02011239528656,
      "learning_rate": 3.083644733863763e-06,
      "loss": 2.2537,
      "step": 57200
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.048989176750183,
      "learning_rate": 3.0833473642093515e-06,
      "loss": 2.1678,
      "step": 57201
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.9288361072540283,
      "learning_rate": 3.083050006280549e-06,
      "loss": 2.1679,
      "step": 57202
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0912836790084839,
      "learning_rate": 3.0827526600778514e-06,
      "loss": 2.1818,
      "step": 57203
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.3310147523880005,
      "learning_rate": 3.0824553256017697e-06,
      "loss": 2.4257,
      "step": 57204
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0367414951324463,
      "learning_rate": 3.082158002852802e-06,
      "loss": 2.3319,
      "step": 57205
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.039748191833496,
      "learning_rate": 3.0818606918314576e-06,
      "loss": 2.372,
      "step": 57206
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0775333642959595,
      "learning_rate": 3.081563392538236e-06,
      "loss": 2.2883,
      "step": 57207
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0167402029037476,
      "learning_rate": 3.081266104973647e-06,
      "loss": 2.1309,
      "step": 57208
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.082938313484192,
      "learning_rate": 3.080968829138191e-06,
      "loss": 2.3168,
      "step": 57209
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0194889307022095,
      "learning_rate": 3.0806715650323716e-06,
      "loss": 2.2354,
      "step": 57210
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0095109939575195,
      "learning_rate": 3.08037431265669e-06,
      "loss": 2.2392,
      "step": 57211
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.160965919494629,
      "learning_rate": 3.0800770720116567e-06,
      "loss": 2.1658,
      "step": 57212
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0595593452453613,
      "learning_rate": 3.0797798430977688e-06,
      "loss": 2.3728,
      "step": 57213
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0766395330429077,
      "learning_rate": 3.079482625915536e-06,
      "loss": 2.3604,
      "step": 57214
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.056321620941162,
      "learning_rate": 3.0791854204654583e-06,
      "loss": 2.3495,
      "step": 57215
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0811660289764404,
      "learning_rate": 3.0788882267480425e-06,
      "loss": 2.1522,
      "step": 57216
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1791388988494873,
      "learning_rate": 3.078591044763789e-06,
      "loss": 2.2662,
      "step": 57217
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0360990762710571,
      "learning_rate": 3.078293874513206e-06,
      "loss": 2.4907,
      "step": 57218
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.9893561005592346,
      "learning_rate": 3.0779967159967948e-06,
      "loss": 2.2237,
      "step": 57219
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0787261724472046,
      "learning_rate": 3.077699569215056e-06,
      "loss": 2.4071,
      "step": 57220
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0285884141921997,
      "learning_rate": 3.0774024341684995e-06,
      "loss": 2.312,
      "step": 57221
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0622223615646362,
      "learning_rate": 3.0771053108576265e-06,
      "loss": 2.3303,
      "step": 57222
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0182958841323853,
      "learning_rate": 3.0768081992829367e-06,
      "loss": 2.2752,
      "step": 57223
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1400316953659058,
      "learning_rate": 3.076511099444941e-06,
      "loss": 2.4028,
      "step": 57224
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.2442429065704346,
      "learning_rate": 3.0762140113441385e-06,
      "loss": 2.2707,
      "step": 57225
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1036535501480103,
      "learning_rate": 3.0759169349810313e-06,
      "loss": 2.4513,
      "step": 57226
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0554696321487427,
      "learning_rate": 3.075619870356129e-06,
      "loss": 2.3801,
      "step": 57227
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1054704189300537,
      "learning_rate": 3.075322817469928e-06,
      "loss": 2.5251,
      "step": 57228
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1186468601226807,
      "learning_rate": 3.0750257763229386e-06,
      "loss": 2.5132,
      "step": 57229
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0297738313674927,
      "learning_rate": 3.0747287469156573e-06,
      "loss": 2.2591,
      "step": 57230
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0694440603256226,
      "learning_rate": 3.074431729248596e-06,
      "loss": 2.2576,
      "step": 57231
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.2545900344848633,
      "learning_rate": 3.074134723322251e-06,
      "loss": 2.3776,
      "step": 57232
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.061025857925415,
      "learning_rate": 3.07383772913713e-06,
      "loss": 2.2111,
      "step": 57233
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.212557077407837,
      "learning_rate": 3.073540746693734e-06,
      "loss": 2.276,
      "step": 57234
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.084515929222107,
      "learning_rate": 3.0732437759925737e-06,
      "loss": 2.2381,
      "step": 57235
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.078813910484314,
      "learning_rate": 3.0729468170341394e-06,
      "loss": 2.2996,
      "step": 57236
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.191232681274414,
      "learning_rate": 3.0726498698189445e-06,
      "loss": 2.2062,
      "step": 57237
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0759426355361938,
      "learning_rate": 3.0723529343474865e-06,
      "loss": 2.2296,
      "step": 57238
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0689733028411865,
      "learning_rate": 3.0720560106202755e-06,
      "loss": 2.2946,
      "step": 57239
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1731382608413696,
      "learning_rate": 3.0717590986378086e-06,
      "loss": 2.376,
      "step": 57240
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1323341131210327,
      "learning_rate": 3.0714621984005933e-06,
      "loss": 2.4081,
      "step": 57241
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.012833595275879,
      "learning_rate": 3.07116530990913e-06,
      "loss": 2.1991,
      "step": 57242
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.988191545009613,
      "learning_rate": 3.070868433163925e-06,
      "loss": 2.2861,
      "step": 57243
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0619773864746094,
      "learning_rate": 3.070571568165477e-06,
      "loss": 2.267,
      "step": 57244
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.2123792171478271,
      "learning_rate": 3.0702747149142954e-06,
      "loss": 2.2289,
      "step": 57245
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.086012363433838,
      "learning_rate": 3.0699778734108808e-06,
      "loss": 2.2459,
      "step": 57246
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0074468851089478,
      "learning_rate": 3.0696810436557322e-06,
      "loss": 2.4138,
      "step": 57247
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.6551012992858887,
      "learning_rate": 3.06938422564936e-06,
      "loss": 2.2552,
      "step": 57248
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0553306341171265,
      "learning_rate": 3.0690874193922637e-06,
      "loss": 2.1728,
      "step": 57249
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.9951381087303162,
      "learning_rate": 3.068790624884943e-06,
      "loss": 2.2935,
      "step": 57250
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.2204290628433228,
      "learning_rate": 3.0684938421279086e-06,
      "loss": 2.3924,
      "step": 57251
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1857832670211792,
      "learning_rate": 3.0681970711216604e-06,
      "loss": 2.4399,
      "step": 57252
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0546239614486694,
      "learning_rate": 3.0679003118666962e-06,
      "loss": 2.3566,
      "step": 57253
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.209546685218811,
      "learning_rate": 3.067603564363527e-06,
      "loss": 2.2248,
      "step": 57254
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.2223689556121826,
      "learning_rate": 3.0673068286126497e-06,
      "loss": 2.1121,
      "step": 57255
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1571167707443237,
      "learning_rate": 3.067010104614574e-06,
      "loss": 2.4027,
      "step": 57256
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0077481269836426,
      "learning_rate": 3.0667133923697943e-06,
      "loss": 2.402,
      "step": 57257
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.016332745552063,
      "learning_rate": 3.066416691878823e-06,
      "loss": 2.1032,
      "step": 57258
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0713179111480713,
      "learning_rate": 3.066120003142156e-06,
      "loss": 2.6137,
      "step": 57259
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0474834442138672,
      "learning_rate": 3.065823326160301e-06,
      "loss": 2.4232,
      "step": 57260
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0535820722579956,
      "learning_rate": 3.0655266609337554e-06,
      "loss": 2.3332,
      "step": 57261
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.067101001739502,
      "learning_rate": 3.0652300074630314e-06,
      "loss": 2.5473,
      "step": 57262
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0714666843414307,
      "learning_rate": 3.064933365748621e-06,
      "loss": 2.0253,
      "step": 57263
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.2304904460906982,
      "learning_rate": 3.0646367357910346e-06,
      "loss": 2.6286,
      "step": 57264
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1793208122253418,
      "learning_rate": 3.0643401175907692e-06,
      "loss": 2.4325,
      "step": 57265
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0924128293991089,
      "learning_rate": 3.0640435111483347e-06,
      "loss": 2.2945,
      "step": 57266
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0648725032806396,
      "learning_rate": 3.0637469164642265e-06,
      "loss": 2.3756,
      "step": 57267
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0004215240478516,
      "learning_rate": 3.063450333538954e-06,
      "loss": 2.2663,
      "step": 57268
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.2467514276504517,
      "learning_rate": 3.0631537623730146e-06,
      "loss": 2.2184,
      "step": 57269
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0517257452011108,
      "learning_rate": 3.062857202966917e-06,
      "loss": 2.6173,
      "step": 57270
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0280461311340332,
      "learning_rate": 3.0625606553211574e-06,
      "loss": 2.4054,
      "step": 57271
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.089462161064148,
      "learning_rate": 3.0622641194362447e-06,
      "loss": 2.4213,
      "step": 57272
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0707389116287231,
      "learning_rate": 3.061967595312675e-06,
      "loss": 2.4015,
      "step": 57273
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1709944009780884,
      "learning_rate": 3.0616710829509577e-06,
      "loss": 2.5726,
      "step": 57274
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1108694076538086,
      "learning_rate": 3.0613745823515927e-06,
      "loss": 2.4066,
      "step": 57275
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0977140665054321,
      "learning_rate": 3.061078093515082e-06,
      "loss": 2.2027,
      "step": 57276
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1344918012619019,
      "learning_rate": 3.0607816164419257e-06,
      "loss": 2.2696,
      "step": 57277
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1578075885772705,
      "learning_rate": 3.0604851511326316e-06,
      "loss": 2.2753,
      "step": 57278
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.2381269931793213,
      "learning_rate": 3.060188697587697e-06,
      "loss": 2.3017,
      "step": 57279
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0139646530151367,
      "learning_rate": 3.0598922558076316e-06,
      "loss": 2.3354,
      "step": 57280
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0513479709625244,
      "learning_rate": 3.0595958257929324e-06,
      "loss": 2.4388,
      "step": 57281
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0477920770645142,
      "learning_rate": 3.0592994075441007e-06,
      "loss": 2.3537,
      "step": 57282
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0887469053268433,
      "learning_rate": 3.0590030010616454e-06,
      "loss": 2.4522,
      "step": 57283
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.082537055015564,
      "learning_rate": 3.0587066063460613e-06,
      "loss": 2.2278,
      "step": 57284
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.202627420425415,
      "learning_rate": 3.0584102233978575e-06,
      "loss": 2.3829,
      "step": 57285
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.9993383288383484,
      "learning_rate": 3.0581138522175315e-06,
      "loss": 2.4158,
      "step": 57286
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0995162725448608,
      "learning_rate": 3.0578174928055903e-06,
      "loss": 2.3114,
      "step": 57287
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.2262176275253296,
      "learning_rate": 3.057521145162534e-06,
      "loss": 2.2943,
      "step": 57288
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.9975963234901428,
      "learning_rate": 3.0572248092888656e-06,
      "loss": 2.3594,
      "step": 57289
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1311368942260742,
      "learning_rate": 3.0569284851850824e-06,
      "loss": 2.5851,
      "step": 57290
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1278564929962158,
      "learning_rate": 3.0566321728516958e-06,
      "loss": 2.4451,
      "step": 57291
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1430635452270508,
      "learning_rate": 3.056335872289199e-06,
      "loss": 2.1604,
      "step": 57292
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.3871911764144897,
      "learning_rate": 3.0560395834981026e-06,
      "loss": 2.3447,
      "step": 57293
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0718114376068115,
      "learning_rate": 3.055743306478901e-06,
      "loss": 2.2784,
      "step": 57294
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0389381647109985,
      "learning_rate": 3.0554470412321047e-06,
      "loss": 2.4491,
      "step": 57295
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1004033088684082,
      "learning_rate": 3.0551507877582087e-06,
      "loss": 2.258,
      "step": 57296
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.9885517954826355,
      "learning_rate": 3.0548545460577205e-06,
      "loss": 2.2478,
      "step": 57297
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1167750358581543,
      "learning_rate": 3.0545583161311376e-06,
      "loss": 2.3297,
      "step": 57298
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0635364055633545,
      "learning_rate": 3.0542620979789682e-06,
      "loss": 2.2943,
      "step": 57299
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.9645032286643982,
      "learning_rate": 3.053965891601708e-06,
      "loss": 2.1879,
      "step": 57300
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.07575523853302,
      "learning_rate": 3.0536696969998668e-06,
      "loss": 2.4373,
      "step": 57301
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0874706506729126,
      "learning_rate": 3.0533735141739375e-06,
      "loss": 2.2751,
      "step": 57302
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1225647926330566,
      "learning_rate": 3.053077343124429e-06,
      "loss": 2.4502,
      "step": 57303
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0774158239364624,
      "learning_rate": 3.0527811838518384e-06,
      "loss": 2.5973,
      "step": 57304
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0239118337631226,
      "learning_rate": 3.0524850363566736e-06,
      "loss": 2.4056,
      "step": 57305
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0913797616958618,
      "learning_rate": 3.05218890063943e-06,
      "loss": 2.4463,
      "step": 57306
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1016147136688232,
      "learning_rate": 3.051892776700617e-06,
      "loss": 2.5422,
      "step": 57307
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.082757830619812,
      "learning_rate": 3.0515966645407326e-06,
      "loss": 2.4019,
      "step": 57308
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0103992223739624,
      "learning_rate": 3.0513005641602754e-06,
      "loss": 2.2196,
      "step": 57309
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.2114640474319458,
      "learning_rate": 3.0510044755597557e-06,
      "loss": 2.2255,
      "step": 57310
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.117202877998352,
      "learning_rate": 3.0507083987396656e-06,
      "loss": 2.3694,
      "step": 57311
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.9870920181274414,
      "learning_rate": 3.0504123337005164e-06,
      "loss": 2.3955,
      "step": 57312
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.065290927886963,
      "learning_rate": 3.050116280442802e-06,
      "loss": 2.2865,
      "step": 57313
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.10394287109375,
      "learning_rate": 3.049820238967035e-06,
      "loss": 2.4379,
      "step": 57314
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.088721752166748,
      "learning_rate": 3.049524209273704e-06,
      "loss": 2.2799,
      "step": 57315
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0187320709228516,
      "learning_rate": 3.0492281913633202e-06,
      "loss": 2.3221,
      "step": 57316
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1671748161315918,
      "learning_rate": 3.0489321852363797e-06,
      "loss": 2.042,
      "step": 57317
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0984225273132324,
      "learning_rate": 3.048636190893389e-06,
      "loss": 2.1792,
      "step": 57318
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0709477663040161,
      "learning_rate": 3.0483402083348455e-06,
      "loss": 2.2633,
      "step": 57319
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.9847187995910645,
      "learning_rate": 3.048044237561256e-06,
      "loss": 2.3886,
      "step": 57320
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0497171878814697,
      "learning_rate": 3.0477482785731174e-06,
      "loss": 2.4296,
      "step": 57321
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1245489120483398,
      "learning_rate": 3.047452331370937e-06,
      "loss": 2.4463,
      "step": 57322
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.9939019680023193,
      "learning_rate": 3.0471563959552098e-06,
      "loss": 2.2551,
      "step": 57323
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1074615716934204,
      "learning_rate": 3.046860472326443e-06,
      "loss": 2.4339,
      "step": 57324
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0559895038604736,
      "learning_rate": 3.0465645604851334e-06,
      "loss": 2.1205,
      "step": 57325
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1083035469055176,
      "learning_rate": 3.0462686604317892e-06,
      "loss": 2.1013,
      "step": 57326
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.074493408203125,
      "learning_rate": 3.0459727721669054e-06,
      "loss": 2.239,
      "step": 57327
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1505759954452515,
      "learning_rate": 3.045676895690991e-06,
      "loss": 2.1541,
      "step": 57328
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1108757257461548,
      "learning_rate": 3.0453810310045384e-06,
      "loss": 2.3582,
      "step": 57329
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1256964206695557,
      "learning_rate": 3.045085178108056e-06,
      "loss": 2.1715,
      "step": 57330
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0931092500686646,
      "learning_rate": 3.0447893370020407e-06,
      "loss": 2.3928,
      "step": 57331
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.109298586845398,
      "learning_rate": 3.0444935076869984e-06,
      "loss": 2.398,
      "step": 57332
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.218511700630188,
      "learning_rate": 3.0441976901634273e-06,
      "loss": 2.3939,
      "step": 57333
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.2180620431900024,
      "learning_rate": 3.0439018844318325e-06,
      "loss": 2.3659,
      "step": 57334
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.208806037902832,
      "learning_rate": 3.04360609049271e-06,
      "loss": 2.4674,
      "step": 57335
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.973666787147522,
      "learning_rate": 3.0433103083465676e-06,
      "loss": 2.2841,
      "step": 57336
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.9921203851699829,
      "learning_rate": 3.0430145379939034e-06,
      "loss": 2.1093,
      "step": 57337
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0728296041488647,
      "learning_rate": 3.0427187794352163e-06,
      "loss": 2.4388,
      "step": 57338
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1868125200271606,
      "learning_rate": 3.042423032671015e-06,
      "loss": 2.136,
      "step": 57339
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0187212228775024,
      "learning_rate": 3.0421272977017912e-06,
      "loss": 2.1942,
      "step": 57340
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.2313172817230225,
      "learning_rate": 3.0418315745280545e-06,
      "loss": 2.4742,
      "step": 57341
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.2127596139907837,
      "learning_rate": 3.0415358631503045e-06,
      "loss": 2.2625,
      "step": 57342
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.2148090600967407,
      "learning_rate": 3.0412401635690404e-06,
      "loss": 2.3098,
      "step": 57343
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.166945219039917,
      "learning_rate": 3.0409444757847604e-06,
      "loss": 1.9931,
      "step": 57344
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0815554857254028,
      "learning_rate": 3.040648799797974e-06,
      "loss": 2.303,
      "step": 57345
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1672254800796509,
      "learning_rate": 3.040353135609173e-06,
      "loss": 2.2283,
      "step": 57346
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0210368633270264,
      "learning_rate": 3.040057483218869e-06,
      "loss": 2.4011,
      "step": 57347
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0126333236694336,
      "learning_rate": 3.039761842627553e-06,
      "loss": 2.3555,
      "step": 57348
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0756189823150635,
      "learning_rate": 3.0394662138357357e-06,
      "loss": 2.2619,
      "step": 57349
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1075778007507324,
      "learning_rate": 3.0391705968439088e-06,
      "loss": 2.436,
      "step": 57350
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1479511260986328,
      "learning_rate": 3.038874991652583e-06,
      "loss": 2.4784,
      "step": 57351
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.063361644744873,
      "learning_rate": 3.0385793982622503e-06,
      "loss": 2.3381,
      "step": 57352
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.4254138469696045,
      "learning_rate": 3.0382838166734195e-06,
      "loss": 2.1876,
      "step": 57353
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1385730504989624,
      "learning_rate": 3.0379882468865897e-06,
      "loss": 2.2003,
      "step": 57354
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0501377582550049,
      "learning_rate": 3.0376926889022594e-06,
      "loss": 2.5406,
      "step": 57355
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0768225193023682,
      "learning_rate": 3.0373971427209283e-06,
      "loss": 2.3434,
      "step": 57356
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.123708724975586,
      "learning_rate": 3.0371016083431026e-06,
      "loss": 2.1363,
      "step": 57357
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0485962629318237,
      "learning_rate": 3.036806085769278e-06,
      "loss": 2.2672,
      "step": 57358
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0025521516799927,
      "learning_rate": 3.0365105749999615e-06,
      "loss": 2.3957,
      "step": 57359
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1690635681152344,
      "learning_rate": 3.036215076035648e-06,
      "loss": 2.3415,
      "step": 57360
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.024324893951416,
      "learning_rate": 3.0359195888768446e-06,
      "loss": 2.1562,
      "step": 57361
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.132712483406067,
      "learning_rate": 3.035624113524045e-06,
      "loss": 2.4087,
      "step": 57362
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0732358694076538,
      "learning_rate": 3.035328649977757e-06,
      "loss": 2.5024,
      "step": 57363
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0962404012680054,
      "learning_rate": 3.0350331982384797e-06,
      "loss": 2.1516,
      "step": 57364
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0886664390563965,
      "learning_rate": 3.0347377583067084e-06,
      "loss": 2.3724,
      "step": 57365
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.099549412727356,
      "learning_rate": 3.0344423301829516e-06,
      "loss": 2.1389,
      "step": 57366
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.16224205493927,
      "learning_rate": 3.034146913867707e-06,
      "loss": 2.3086,
      "step": 57367
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1410499811172485,
      "learning_rate": 3.033851509361473e-06,
      "loss": 2.1846,
      "step": 57368
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1248549222946167,
      "learning_rate": 3.0335561166647554e-06,
      "loss": 2.5535,
      "step": 57369
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0294803380966187,
      "learning_rate": 3.033260735778052e-06,
      "loss": 2.1698,
      "step": 57370
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1099251508712769,
      "learning_rate": 3.032965366701861e-06,
      "loss": 2.3216,
      "step": 57371
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0289615392684937,
      "learning_rate": 3.032670009436688e-06,
      "loss": 2.2365,
      "step": 57372
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0961660146713257,
      "learning_rate": 3.0323746639830297e-06,
      "loss": 2.3302,
      "step": 57373
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1407846212387085,
      "learning_rate": 3.032079330341391e-06,
      "loss": 2.3375,
      "step": 57374
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.074267029762268,
      "learning_rate": 3.0317840085122673e-06,
      "loss": 2.2905,
      "step": 57375
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.339949369430542,
      "learning_rate": 3.0314886984961665e-06,
      "loss": 2.5498,
      "step": 57376
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1853054761886597,
      "learning_rate": 3.0311934002935804e-06,
      "loss": 2.2904,
      "step": 57377
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0832151174545288,
      "learning_rate": 3.0308981139050186e-06,
      "loss": 2.2731,
      "step": 57378
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0124876499176025,
      "learning_rate": 3.0306028393309727e-06,
      "loss": 2.1055,
      "step": 57379
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.2041124105453491,
      "learning_rate": 3.030307576571956e-06,
      "loss": 2.5354,
      "step": 57380
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.114296555519104,
      "learning_rate": 3.030012325628453e-06,
      "loss": 2.3369,
      "step": 57381
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.154685616493225,
      "learning_rate": 3.0297170865009763e-06,
      "loss": 2.5213,
      "step": 57382
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1893976926803589,
      "learning_rate": 3.0294218591900182e-06,
      "loss": 2.3887,
      "step": 57383
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.075003981590271,
      "learning_rate": 3.0291266436960863e-06,
      "loss": 2.2063,
      "step": 57384
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1798256635665894,
      "learning_rate": 3.028831440019676e-06,
      "loss": 2.3927,
      "step": 57385
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0536082983016968,
      "learning_rate": 3.0285362481612925e-06,
      "loss": 2.436,
      "step": 57386
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.009904146194458,
      "learning_rate": 3.0282410681214293e-06,
      "loss": 2.374,
      "step": 57387
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.161496877670288,
      "learning_rate": 3.027945899900595e-06,
      "loss": 2.1918,
      "step": 57388
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1026064157485962,
      "learning_rate": 3.027650743499283e-06,
      "loss": 2.3202,
      "step": 57389
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.3706358671188354,
      "learning_rate": 3.027355598917999e-06,
      "loss": 2.3197,
      "step": 57390
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.2486534118652344,
      "learning_rate": 3.027060466157239e-06,
      "loss": 2.5034,
      "step": 57391
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.9482352137565613,
      "learning_rate": 3.026765345217507e-06,
      "loss": 2.2361,
      "step": 57392
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0847718715667725,
      "learning_rate": 3.026470236099304e-06,
      "loss": 2.4699,
      "step": 57393
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.090864658355713,
      "learning_rate": 3.026175138803126e-06,
      "loss": 2.4215,
      "step": 57394
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0279912948608398,
      "learning_rate": 3.0258800533294717e-06,
      "loss": 2.1929,
      "step": 57395
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0286309719085693,
      "learning_rate": 3.0255849796788496e-06,
      "loss": 2.1674,
      "step": 57396
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0276119709014893,
      "learning_rate": 3.0252899178517516e-06,
      "loss": 2.2275,
      "step": 57397
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1066694259643555,
      "learning_rate": 3.024994867848684e-06,
      "loss": 2.1693,
      "step": 57398
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0722002983093262,
      "learning_rate": 3.0246998296701447e-06,
      "loss": 2.457,
      "step": 57399
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0332919359207153,
      "learning_rate": 3.0244048033166306e-06,
      "loss": 2.2638,
      "step": 57400
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0663471221923828,
      "learning_rate": 3.024109788788648e-06,
      "loss": 2.3222,
      "step": 57401
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.4838314056396484,
      "learning_rate": 3.0238147860866907e-06,
      "loss": 2.4058,
      "step": 57402
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0272490978240967,
      "learning_rate": 3.023519795211265e-06,
      "loss": 1.9783,
      "step": 57403
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0397435426712036,
      "learning_rate": 3.0232248161628653e-06,
      "loss": 2.3388,
      "step": 57404
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1926454305648804,
      "learning_rate": 3.022929848941998e-06,
      "loss": 2.1771,
      "step": 57405
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.236748456954956,
      "learning_rate": 3.0226348935491555e-06,
      "loss": 2.4069,
      "step": 57406
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.9678080081939697,
      "learning_rate": 3.0223399499848484e-06,
      "loss": 2.185,
      "step": 57407
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0939552783966064,
      "learning_rate": 3.0220450182495644e-06,
      "loss": 2.3651,
      "step": 57408
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.2201132774353027,
      "learning_rate": 3.021750098343812e-06,
      "loss": 2.308,
      "step": 57409
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.2912609577178955,
      "learning_rate": 3.0214551902680856e-06,
      "loss": 2.3383,
      "step": 57410
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.053483247756958,
      "learning_rate": 3.0211602940228912e-06,
      "loss": 2.3185,
      "step": 57411
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0477524995803833,
      "learning_rate": 3.020865409608722e-06,
      "loss": 2.4299,
      "step": 57412
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.2909786701202393,
      "learning_rate": 3.020570537026084e-06,
      "loss": 2.1217,
      "step": 57413
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0619902610778809,
      "learning_rate": 3.0202756762754725e-06,
      "loss": 2.2792,
      "step": 57414
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1092596054077148,
      "learning_rate": 3.0199808273573915e-06,
      "loss": 2.3145,
      "step": 57415
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0724971294403076,
      "learning_rate": 3.019685990272336e-06,
      "loss": 2.2875,
      "step": 57416
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.015593409538269,
      "learning_rate": 3.019391165020813e-06,
      "loss": 2.4486,
      "step": 57417
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1250576972961426,
      "learning_rate": 3.0190963516033132e-06,
      "loss": 2.4384,
      "step": 57418
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1120370626449585,
      "learning_rate": 3.018801550020345e-06,
      "loss": 2.4056,
      "step": 57419
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.25623619556427,
      "learning_rate": 3.018506760272404e-06,
      "loss": 2.3062,
      "step": 57420
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1370688676834106,
      "learning_rate": 3.0182119823599897e-06,
      "loss": 2.064,
      "step": 57421
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0665700435638428,
      "learning_rate": 3.0179172162836003e-06,
      "loss": 2.1963,
      "step": 57422
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.112351894378662,
      "learning_rate": 3.0176224620437387e-06,
      "loss": 2.3633,
      "step": 57423
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1939501762390137,
      "learning_rate": 3.017327719640902e-06,
      "loss": 2.3877,
      "step": 57424
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1176525354385376,
      "learning_rate": 3.0170329890755945e-06,
      "loss": 2.3679,
      "step": 57425
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1620573997497559,
      "learning_rate": 3.0167382703483118e-06,
      "loss": 2.2546,
      "step": 57426
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0777232646942139,
      "learning_rate": 3.016443563459551e-06,
      "loss": 2.2647,
      "step": 57427
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.9992431998252869,
      "learning_rate": 3.0161488684098196e-06,
      "loss": 2.0793,
      "step": 57428
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0388579368591309,
      "learning_rate": 3.0158541851996083e-06,
      "loss": 2.1793,
      "step": 57429
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1793150901794434,
      "learning_rate": 3.015559513829425e-06,
      "loss": 2.286,
      "step": 57430
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0618770122528076,
      "learning_rate": 3.015264854299761e-06,
      "loss": 2.1495,
      "step": 57431
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0329666137695312,
      "learning_rate": 3.014970206611124e-06,
      "loss": 2.3679,
      "step": 57432
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1210405826568604,
      "learning_rate": 3.0146755707640095e-06,
      "loss": 2.1346,
      "step": 57433
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.048108458518982,
      "learning_rate": 3.014380946758917e-06,
      "loss": 2.3334,
      "step": 57434
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.087997317314148,
      "learning_rate": 3.0140863345963432e-06,
      "loss": 2.3182,
      "step": 57435
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.074493646621704,
      "learning_rate": 3.013791734276793e-06,
      "loss": 2.1231,
      "step": 57436
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0607136487960815,
      "learning_rate": 3.01349714580076e-06,
      "loss": 2.1393,
      "step": 57437
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1579313278198242,
      "learning_rate": 3.0132025691687507e-06,
      "loss": 2.2393,
      "step": 57438
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.175637125968933,
      "learning_rate": 3.0129080043812574e-06,
      "loss": 2.0754,
      "step": 57439
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1103869676589966,
      "learning_rate": 3.0126134514387863e-06,
      "loss": 2.2148,
      "step": 57440
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0906291007995605,
      "learning_rate": 3.012318910341829e-06,
      "loss": 2.4054,
      "step": 57441
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.044540524482727,
      "learning_rate": 3.012024381090892e-06,
      "loss": 2.3183,
      "step": 57442
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0477609634399414,
      "learning_rate": 3.011729863686469e-06,
      "loss": 2.2661,
      "step": 57443
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0937691926956177,
      "learning_rate": 3.0114353581290656e-06,
      "loss": 2.3862,
      "step": 57444
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0847758054733276,
      "learning_rate": 3.011140864419173e-06,
      "loss": 2.3285,
      "step": 57445
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0914579629898071,
      "learning_rate": 3.010846382557302e-06,
      "loss": 2.2345,
      "step": 57446
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.033801794052124,
      "learning_rate": 3.0105519125439375e-06,
      "loss": 2.166,
      "step": 57447
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.029313325881958,
      "learning_rate": 3.010257454379589e-06,
      "loss": 2.1959,
      "step": 57448
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.087372899055481,
      "learning_rate": 3.00996300806475e-06,
      "loss": 2.3173,
      "step": 57449
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1079686880111694,
      "learning_rate": 3.0096685735999255e-06,
      "loss": 2.2785,
      "step": 57450
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.2300941944122314,
      "learning_rate": 3.009374150985608e-06,
      "loss": 2.3802,
      "step": 57451
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0861855745315552,
      "learning_rate": 3.009079740222303e-06,
      "loss": 2.3136,
      "step": 57452
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.095011591911316,
      "learning_rate": 3.0087853413105028e-06,
      "loss": 2.375,
      "step": 57453
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1858967542648315,
      "learning_rate": 3.0084909542507134e-06,
      "loss": 2.343,
      "step": 57454
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0741137266159058,
      "learning_rate": 3.008196579043432e-06,
      "loss": 2.2645,
      "step": 57455
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0286015272140503,
      "learning_rate": 3.0079022156891512e-06,
      "loss": 2.3658,
      "step": 57456
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1638805866241455,
      "learning_rate": 3.00760786418838e-06,
      "loss": 2.4585,
      "step": 57457
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0701583623886108,
      "learning_rate": 3.0073135245416085e-06,
      "loss": 2.3289,
      "step": 57458
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0473387241363525,
      "learning_rate": 3.0070191967493433e-06,
      "loss": 2.3916,
      "step": 57459
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1267625093460083,
      "learning_rate": 3.0067248808120797e-06,
      "loss": 2.2853,
      "step": 57460
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0367469787597656,
      "learning_rate": 3.006430576730317e-06,
      "loss": 2.1743,
      "step": 57461
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0154767036437988,
      "learning_rate": 3.0061362845045507e-06,
      "loss": 2.127,
      "step": 57462
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1570872068405151,
      "learning_rate": 3.0058420041352864e-06,
      "loss": 2.2505,
      "step": 57463
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1416387557983398,
      "learning_rate": 3.0055477356230155e-06,
      "loss": 2.3359,
      "step": 57464
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1704785823822021,
      "learning_rate": 3.005253478968244e-06,
      "loss": 2.3904,
      "step": 57465
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.114928960800171,
      "learning_rate": 3.004959234171465e-06,
      "loss": 2.2506,
      "step": 57466
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.2397090196609497,
      "learning_rate": 3.0046650012331833e-06,
      "loss": 2.172,
      "step": 57467
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.115935206413269,
      "learning_rate": 3.0043707801538914e-06,
      "loss": 2.4958,
      "step": 57468
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0682066679000854,
      "learning_rate": 3.0040765709340936e-06,
      "loss": 2.2796,
      "step": 57469
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.017987608909607,
      "learning_rate": 3.0037823735742823e-06,
      "loss": 2.3702,
      "step": 57470
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0524927377700806,
      "learning_rate": 3.0034881880749646e-06,
      "loss": 2.2606,
      "step": 57471
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.9966258406639099,
      "learning_rate": 3.0031940144366302e-06,
      "loss": 2.3895,
      "step": 57472
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.055181860923767,
      "learning_rate": 3.0028998526597887e-06,
      "loss": 2.2717,
      "step": 57473
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0246789455413818,
      "learning_rate": 3.002605702744926e-06,
      "loss": 2.2255,
      "step": 57474
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0418604612350464,
      "learning_rate": 3.0023115646925516e-06,
      "loss": 2.2735,
      "step": 57475
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.032907485961914,
      "learning_rate": 3.002017438503155e-06,
      "loss": 2.2793,
      "step": 57476
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0649996995925903,
      "learning_rate": 3.0017233241772437e-06,
      "loss": 2.3201,
      "step": 57477
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1173089742660522,
      "learning_rate": 3.0014292217153085e-06,
      "loss": 2.4544,
      "step": 57478
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0468071699142456,
      "learning_rate": 3.0011351311178536e-06,
      "loss": 2.3084,
      "step": 57479
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.245842695236206,
      "learning_rate": 3.0008410523853737e-06,
      "loss": 2.4157,
      "step": 57480
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.036970615386963,
      "learning_rate": 3.0005469855183723e-06,
      "loss": 2.3536,
      "step": 57481
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1688339710235596,
      "learning_rate": 3.0002529305173445e-06,
      "loss": 2.2119,
      "step": 57482
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.34243643283844,
      "learning_rate": 2.9999588873827857e-06,
      "loss": 2.4543,
      "step": 57483
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0060758590698242,
      "learning_rate": 2.999664856115201e-06,
      "loss": 2.2613,
      "step": 57484
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.092685341835022,
      "learning_rate": 2.999370836715083e-06,
      "loss": 2.3352,
      "step": 57485
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1708770990371704,
      "learning_rate": 2.9990768291829353e-06,
      "loss": 2.2988,
      "step": 57486
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.2131164073944092,
      "learning_rate": 2.9987828335192535e-06,
      "loss": 2.3336,
      "step": 57487
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1088569164276123,
      "learning_rate": 2.9984888497245366e-06,
      "loss": 2.1898,
      "step": 57488
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.156244158744812,
      "learning_rate": 2.998194877799281e-06,
      "loss": 2.1488,
      "step": 57489
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0709415674209595,
      "learning_rate": 2.9979009177439875e-06,
      "loss": 2.1479,
      "step": 57490
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0668762922286987,
      "learning_rate": 2.9976069695591524e-06,
      "loss": 2.3772,
      "step": 57491
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.2090343236923218,
      "learning_rate": 2.9973130332452773e-06,
      "loss": 2.2294,
      "step": 57492
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0432496070861816,
      "learning_rate": 2.9970191088028557e-06,
      "loss": 2.3454,
      "step": 57493
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1033060550689697,
      "learning_rate": 2.996725196232392e-06,
      "loss": 2.4705,
      "step": 57494
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.044229507446289,
      "learning_rate": 2.9964312955343778e-06,
      "loss": 2.1557,
      "step": 57495
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0891865491867065,
      "learning_rate": 2.9961374067093185e-06,
      "loss": 2.035,
      "step": 57496
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1535911560058594,
      "learning_rate": 2.995843529757705e-06,
      "loss": 2.3708,
      "step": 57497
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0400748252868652,
      "learning_rate": 2.9955496646800418e-06,
      "loss": 2.4934,
      "step": 57498
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0407236814498901,
      "learning_rate": 2.9952558114768248e-06,
      "loss": 2.1199,
      "step": 57499
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0217653512954712,
      "learning_rate": 2.994961970148551e-06,
      "loss": 2.3241,
      "step": 57500
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0941152572631836,
      "learning_rate": 2.994668140695717e-06,
      "loss": 2.3124,
      "step": 57501
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.008013129234314,
      "learning_rate": 2.994374323118825e-06,
      "loss": 2.221,
      "step": 57502
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1219085454940796,
      "learning_rate": 2.9940805174183697e-06,
      "loss": 2.3379,
      "step": 57503
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.110466480255127,
      "learning_rate": 2.9937867235948524e-06,
      "loss": 2.3986,
      "step": 57504
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1190204620361328,
      "learning_rate": 2.9934929416487666e-06,
      "loss": 2.1067,
      "step": 57505
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1115461587905884,
      "learning_rate": 2.9931991715806174e-06,
      "loss": 2.4544,
      "step": 57506
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.03292977809906,
      "learning_rate": 2.992905413390894e-06,
      "loss": 2.3261,
      "step": 57507
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0635969638824463,
      "learning_rate": 2.992611667080103e-06,
      "loss": 2.4128,
      "step": 57508
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1312596797943115,
      "learning_rate": 2.992317932648735e-06,
      "loss": 2.3566,
      "step": 57509
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.9812147617340088,
      "learning_rate": 2.992024210097295e-06,
      "loss": 2.3145,
      "step": 57510
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1339339017868042,
      "learning_rate": 2.9917304994262777e-06,
      "loss": 2.2467,
      "step": 57511
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0330663919448853,
      "learning_rate": 2.9914368006361793e-06,
      "loss": 2.4104,
      "step": 57512
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1890339851379395,
      "learning_rate": 2.991143113727496e-06,
      "loss": 2.2946,
      "step": 57513
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1223899126052856,
      "learning_rate": 2.9908494387007324e-06,
      "loss": 2.253,
      "step": 57514
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.170702338218689,
      "learning_rate": 2.9905557755563795e-06,
      "loss": 2.0469,
      "step": 57515
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0640937089920044,
      "learning_rate": 2.9902621242949413e-06,
      "loss": 2.3301,
      "step": 57516
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1379960775375366,
      "learning_rate": 2.989968484916913e-06,
      "loss": 2.366,
      "step": 57517
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0165245532989502,
      "learning_rate": 2.9896748574227886e-06,
      "loss": 2.0944,
      "step": 57518
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1013462543487549,
      "learning_rate": 2.9893812418130717e-06,
      "loss": 2.196,
      "step": 57519
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0491893291473389,
      "learning_rate": 2.989087638088256e-06,
      "loss": 2.2844,
      "step": 57520
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.2916676998138428,
      "learning_rate": 2.988794046248843e-06,
      "loss": 2.4026,
      "step": 57521
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1381657123565674,
      "learning_rate": 2.9885004662953253e-06,
      "loss": 2.3896,
      "step": 57522
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.9803799986839294,
      "learning_rate": 2.9882068982282066e-06,
      "loss": 2.1886,
      "step": 57523
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0686914920806885,
      "learning_rate": 2.987913342047979e-06,
      "loss": 2.3999,
      "step": 57524
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.9783326387405396,
      "learning_rate": 2.987619797755149e-06,
      "loss": 2.4313,
      "step": 57525
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1246050596237183,
      "learning_rate": 2.987326265350201e-06,
      "loss": 2.1701,
      "step": 57526
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.04900324344635,
      "learning_rate": 2.987032744833643e-06,
      "loss": 2.3815,
      "step": 57527
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0937894582748413,
      "learning_rate": 2.986739236205967e-06,
      "loss": 2.2681,
      "step": 57528
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0139902830123901,
      "learning_rate": 2.9864457394676747e-06,
      "loss": 2.3884,
      "step": 57529
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0647965669631958,
      "learning_rate": 2.986152254619259e-06,
      "loss": 2.2101,
      "step": 57530
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1233491897583008,
      "learning_rate": 2.9858587816612227e-06,
      "loss": 2.4414,
      "step": 57531
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.2969610691070557,
      "learning_rate": 2.985565320594059e-06,
      "loss": 2.4832,
      "step": 57532
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0960357189178467,
      "learning_rate": 2.9852718714182693e-06,
      "loss": 2.4555,
      "step": 57533
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0899693965911865,
      "learning_rate": 2.984978434134346e-06,
      "loss": 2.3718,
      "step": 57534
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.075010895729065,
      "learning_rate": 2.9846850087427926e-06,
      "loss": 2.5046,
      "step": 57535
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.040399432182312,
      "learning_rate": 2.9843915952441004e-06,
      "loss": 2.2511,
      "step": 57536
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1182847023010254,
      "learning_rate": 2.9840981936387734e-06,
      "loss": 2.0762,
      "step": 57537
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.119296669960022,
      "learning_rate": 2.983804803927305e-06,
      "loss": 2.4477,
      "step": 57538
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0809274911880493,
      "learning_rate": 2.9835114261101927e-06,
      "loss": 2.4817,
      "step": 57539
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.033833622932434,
      "learning_rate": 2.9832180601879324e-06,
      "loss": 2.4208,
      "step": 57540
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0227925777435303,
      "learning_rate": 2.9829247061610247e-06,
      "loss": 2.3723,
      "step": 57541
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1337083578109741,
      "learning_rate": 2.9826313640299633e-06,
      "loss": 2.2735,
      "step": 57542
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1317670345306396,
      "learning_rate": 2.98233803379525e-06,
      "loss": 2.3078,
      "step": 57543
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.216172456741333,
      "learning_rate": 2.9820447154573805e-06,
      "loss": 2.3703,
      "step": 57544
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1088796854019165,
      "learning_rate": 2.9817514090168477e-06,
      "loss": 2.4603,
      "step": 57545
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0974336862564087,
      "learning_rate": 2.981458114474156e-06,
      "loss": 2.2663,
      "step": 57546
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0332893133163452,
      "learning_rate": 2.9811648318297958e-06,
      "loss": 2.4662,
      "step": 57547
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.149570345878601,
      "learning_rate": 2.98087156108427e-06,
      "loss": 2.3378,
      "step": 57548
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.2168283462524414,
      "learning_rate": 2.9805783022380706e-06,
      "loss": 2.4358,
      "step": 57549
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0446840524673462,
      "learning_rate": 2.9802850552917005e-06,
      "loss": 2.0608,
      "step": 57550
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0612037181854248,
      "learning_rate": 2.97999182024565e-06,
      "loss": 2.3126,
      "step": 57551
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.060369610786438,
      "learning_rate": 2.979698597100428e-06,
      "loss": 2.3626,
      "step": 57552
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0319327116012573,
      "learning_rate": 2.9794053858565152e-06,
      "loss": 2.3688,
      "step": 57553
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.2212409973144531,
      "learning_rate": 2.9791121865144213e-06,
      "loss": 2.4945,
      "step": 57554
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0831326246261597,
      "learning_rate": 2.978818999074635e-06,
      "loss": 2.2994,
      "step": 57555
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.155752420425415,
      "learning_rate": 2.9785258235376623e-06,
      "loss": 2.3482,
      "step": 57556
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.263558030128479,
      "learning_rate": 2.9782326599039914e-06,
      "loss": 2.3129,
      "step": 57557
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1432759761810303,
      "learning_rate": 2.977939508174126e-06,
      "loss": 2.3262,
      "step": 57558
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0483489036560059,
      "learning_rate": 2.9776463683485567e-06,
      "loss": 2.3365,
      "step": 57559
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.121958613395691,
      "learning_rate": 2.9773532404277883e-06,
      "loss": 2.3798,
      "step": 57560
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0356799364089966,
      "learning_rate": 2.97706012441231e-06,
      "loss": 2.2437,
      "step": 57561
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0912755727767944,
      "learning_rate": 2.9767670203026244e-06,
      "loss": 2.2841,
      "step": 57562
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1053377389907837,
      "learning_rate": 2.976473928099224e-06,
      "loss": 2.2684,
      "step": 57563
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1584969758987427,
      "learning_rate": 2.9761808478026112e-06,
      "loss": 2.1166,
      "step": 57564
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.9854345917701721,
      "learning_rate": 2.975887779413279e-06,
      "loss": 2.1171,
      "step": 57565
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0864349603652954,
      "learning_rate": 2.975594722931725e-06,
      "loss": 2.4653,
      "step": 57566
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0737231969833374,
      "learning_rate": 2.975301678358442e-06,
      "loss": 2.1426,
      "step": 57567
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.056687355041504,
      "learning_rate": 2.975008645693934e-06,
      "loss": 2.2864,
      "step": 57568
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.9186426997184753,
      "learning_rate": 2.974715624938692e-06,
      "loss": 2.3213,
      "step": 57569
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0158780813217163,
      "learning_rate": 2.974422616093218e-06,
      "loss": 2.3306,
      "step": 57570
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0214617252349854,
      "learning_rate": 2.9741296191580017e-06,
      "loss": 2.1631,
      "step": 57571
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1619582176208496,
      "learning_rate": 2.973836634133548e-06,
      "loss": 2.417,
      "step": 57572
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0286298990249634,
      "learning_rate": 2.973543661020348e-06,
      "loss": 2.3016,
      "step": 57573
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.2238351106643677,
      "learning_rate": 2.9732506998188983e-06,
      "loss": 2.1854,
      "step": 57574
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0013160705566406,
      "learning_rate": 2.9729577505296993e-06,
      "loss": 2.3106,
      "step": 57575
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1333739757537842,
      "learning_rate": 2.972664813153243e-06,
      "loss": 2.2506,
      "step": 57576
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.104541301727295,
      "learning_rate": 2.972371887690031e-06,
      "loss": 2.2495,
      "step": 57577
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0657947063446045,
      "learning_rate": 2.9720789741405576e-06,
      "loss": 2.6212,
      "step": 57578
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1833404302597046,
      "learning_rate": 2.9717860725053195e-06,
      "loss": 2.4802,
      "step": 57579
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0691701173782349,
      "learning_rate": 2.9714931827848094e-06,
      "loss": 2.3949,
      "step": 57580
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1036205291748047,
      "learning_rate": 2.971200304979529e-06,
      "loss": 2.4021,
      "step": 57581
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.9953669309616089,
      "learning_rate": 2.9709074390899716e-06,
      "loss": 2.3487,
      "step": 57582
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1559165716171265,
      "learning_rate": 2.970614585116638e-06,
      "loss": 2.3122,
      "step": 57583
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.2043445110321045,
      "learning_rate": 2.9703217430600186e-06,
      "loss": 2.308,
      "step": 57584
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1227926015853882,
      "learning_rate": 2.970028912920616e-06,
      "loss": 2.3709,
      "step": 57585
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.155267357826233,
      "learning_rate": 2.9697360946989207e-06,
      "loss": 2.3436,
      "step": 57586
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0445555448532104,
      "learning_rate": 2.969443288395436e-06,
      "loss": 2.3753,
      "step": 57587
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.9974474906921387,
      "learning_rate": 2.9691504940106507e-06,
      "loss": 2.3124,
      "step": 57588
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0245025157928467,
      "learning_rate": 2.968857711545068e-06,
      "loss": 2.2846,
      "step": 57589
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0814907550811768,
      "learning_rate": 2.968564940999178e-06,
      "loss": 2.2621,
      "step": 57590
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0962424278259277,
      "learning_rate": 2.968272182373486e-06,
      "loss": 2.5348,
      "step": 57591
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.171829104423523,
      "learning_rate": 2.9679794356684765e-06,
      "loss": 2.2766,
      "step": 57592
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0033414363861084,
      "learning_rate": 2.9676867008846544e-06,
      "loss": 2.517,
      "step": 57593
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1000851392745972,
      "learning_rate": 2.9673939780225105e-06,
      "loss": 2.3926,
      "step": 57594
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1167709827423096,
      "learning_rate": 2.967101267082547e-06,
      "loss": 2.3482,
      "step": 57595
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.2058956623077393,
      "learning_rate": 2.9668085680652536e-06,
      "loss": 2.4071,
      "step": 57596
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0234477519989014,
      "learning_rate": 2.9665158809711338e-06,
      "loss": 2.3766,
      "step": 57597
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1140193939208984,
      "learning_rate": 2.9662232058006755e-06,
      "loss": 2.274,
      "step": 57598
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.9965878129005432,
      "learning_rate": 2.965930542554384e-06,
      "loss": 2.0499,
      "step": 57599
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.2351213693618774,
      "learning_rate": 2.9656378912327498e-06,
      "loss": 2.3487,
      "step": 57600
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0566000938415527,
      "learning_rate": 2.9653452518362656e-06,
      "loss": 2.3372,
      "step": 57601
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0379441976547241,
      "learning_rate": 2.9650526243654366e-06,
      "loss": 2.2441,
      "step": 57602
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1949599981307983,
      "learning_rate": 2.9647600088207505e-06,
      "loss": 2.4252,
      "step": 57603
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0752143859863281,
      "learning_rate": 2.96446740520271e-06,
      "loss": 2.3981,
      "step": 57604
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0236570835113525,
      "learning_rate": 2.9641748135118077e-06,
      "loss": 2.2088,
      "step": 57605
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0411800146102905,
      "learning_rate": 2.9638822337485407e-06,
      "loss": 2.2453,
      "step": 57606
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.027860403060913,
      "learning_rate": 2.9635896659134e-06,
      "loss": 2.363,
      "step": 57607
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.254101037979126,
      "learning_rate": 2.9632971100068897e-06,
      "loss": 2.1917,
      "step": 57608
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0611300468444824,
      "learning_rate": 2.9630045660294993e-06,
      "loss": 2.1142,
      "step": 57609
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.116487979888916,
      "learning_rate": 2.9627120339817293e-06,
      "loss": 2.4091,
      "step": 57610
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0961525440216064,
      "learning_rate": 2.962419513864072e-06,
      "loss": 2.3777,
      "step": 57611
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.098609209060669,
      "learning_rate": 2.9621270056770267e-06,
      "loss": 2.3547,
      "step": 57612
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.049829125404358,
      "learning_rate": 2.9618345094210854e-06,
      "loss": 2.1722,
      "step": 57613
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.012929916381836,
      "learning_rate": 2.9615420250967496e-06,
      "loss": 2.3401,
      "step": 57614
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.9515312314033508,
      "learning_rate": 2.961249552704508e-06,
      "loss": 2.2433,
      "step": 57615
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1788854598999023,
      "learning_rate": 2.9609570922448627e-06,
      "loss": 2.1256,
      "step": 57616
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.081443190574646,
      "learning_rate": 2.960664643718305e-06,
      "loss": 2.5614,
      "step": 57617
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.2972915172576904,
      "learning_rate": 2.960372207125338e-06,
      "loss": 2.246,
      "step": 57618
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0685991048812866,
      "learning_rate": 2.960079782466446e-06,
      "loss": 2.1321,
      "step": 57619
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.129156231880188,
      "learning_rate": 2.959787369742134e-06,
      "loss": 2.4441,
      "step": 57620
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0107951164245605,
      "learning_rate": 2.9594949689528906e-06,
      "loss": 2.1698,
      "step": 57621
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0957401990890503,
      "learning_rate": 2.9592025800992198e-06,
      "loss": 2.301,
      "step": 57622
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0669360160827637,
      "learning_rate": 2.9589102031816085e-06,
      "loss": 2.3478,
      "step": 57623
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0521184206008911,
      "learning_rate": 2.9586178382005605e-06,
      "loss": 2.2079,
      "step": 57624
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1123485565185547,
      "learning_rate": 2.9583254851565644e-06,
      "loss": 2.3907,
      "step": 57625
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.2974803447723389,
      "learning_rate": 2.958033144050123e-06,
      "loss": 2.4033,
      "step": 57626
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0389037132263184,
      "learning_rate": 2.9577408148817246e-06,
      "loss": 2.3112,
      "step": 57627
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.988785982131958,
      "learning_rate": 2.95744849765187e-06,
      "loss": 2.2312,
      "step": 57628
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0486228466033936,
      "learning_rate": 2.9571561923610547e-06,
      "loss": 2.2652,
      "step": 57629
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1570425033569336,
      "learning_rate": 2.956863899009769e-06,
      "loss": 2.2528,
      "step": 57630
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.248206615447998,
      "learning_rate": 2.9565716175985135e-06,
      "loss": 2.3064,
      "step": 57631
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0020464658737183,
      "learning_rate": 2.956279348127783e-06,
      "loss": 2.2024,
      "step": 57632
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0913418531417847,
      "learning_rate": 2.9559870905980693e-06,
      "loss": 2.2499,
      "step": 57633
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.2505379915237427,
      "learning_rate": 2.955694845009873e-06,
      "loss": 2.3271,
      "step": 57634
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.5448237657546997,
      "learning_rate": 2.955402611363688e-06,
      "loss": 2.45,
      "step": 57635
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.025832176208496,
      "learning_rate": 2.9551103896600053e-06,
      "loss": 2.1622,
      "step": 57636
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1436916589736938,
      "learning_rate": 2.954818179899327e-06,
      "loss": 2.1925,
      "step": 57637
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.026182770729065,
      "learning_rate": 2.9545259820821426e-06,
      "loss": 2.2415,
      "step": 57638
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.948749303817749,
      "learning_rate": 2.954233796208953e-06,
      "loss": 2.2566,
      "step": 57639
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0381112098693848,
      "learning_rate": 2.9539416222802474e-06,
      "loss": 2.4431,
      "step": 57640
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0617268085479736,
      "learning_rate": 2.953649460296527e-06,
      "loss": 2.3862,
      "step": 57641
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1030633449554443,
      "learning_rate": 2.9533573102582823e-06,
      "loss": 2.2196,
      "step": 57642
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.048183798789978,
      "learning_rate": 2.9530651721660142e-06,
      "loss": 2.0791,
      "step": 57643
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.077467441558838,
      "learning_rate": 2.952773046020214e-06,
      "loss": 2.4992,
      "step": 57644
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1331191062927246,
      "learning_rate": 2.9524809318213777e-06,
      "loss": 2.3129,
      "step": 57645
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0427922010421753,
      "learning_rate": 2.9521888295699976e-06,
      "loss": 2.337,
      "step": 57646
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0137555599212646,
      "learning_rate": 2.9518967392665733e-06,
      "loss": 2.137,
      "step": 57647
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.9770039916038513,
      "learning_rate": 2.9516046609115966e-06,
      "loss": 2.528,
      "step": 57648
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1734720468521118,
      "learning_rate": 2.9513125945055666e-06,
      "loss": 2.0631,
      "step": 57649
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1185376644134521,
      "learning_rate": 2.9510205400489735e-06,
      "loss": 2.3377,
      "step": 57650
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.087363362312317,
      "learning_rate": 2.950728497542318e-06,
      "loss": 2.1205,
      "step": 57651
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.9976815581321716,
      "learning_rate": 2.9504364669860887e-06,
      "loss": 2.3873,
      "step": 57652
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0646418333053589,
      "learning_rate": 2.9501444483807873e-06,
      "loss": 2.2417,
      "step": 57653
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0266766548156738,
      "learning_rate": 2.949852441726904e-06,
      "loss": 2.401,
      "step": 57654
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.003093957901001,
      "learning_rate": 2.9495604470249374e-06,
      "loss": 2.3585,
      "step": 57655
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1150920391082764,
      "learning_rate": 2.9492684642753812e-06,
      "loss": 2.3216,
      "step": 57656
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.086124300956726,
      "learning_rate": 2.94897649347873e-06,
      "loss": 2.3047,
      "step": 57657
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1731778383255005,
      "learning_rate": 2.9486845346354753e-06,
      "loss": 2.2696,
      "step": 57658
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1739858388900757,
      "learning_rate": 2.948392587746118e-06,
      "loss": 2.4716,
      "step": 57659
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.9928125143051147,
      "learning_rate": 2.9481006528111476e-06,
      "loss": 2.279,
      "step": 57660
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0315310955047607,
      "learning_rate": 2.9478087298310666e-06,
      "loss": 2.3522,
      "step": 57661
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.166262149810791,
      "learning_rate": 2.947516818806363e-06,
      "loss": 2.344,
      "step": 57662
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0515071153640747,
      "learning_rate": 2.9472249197375326e-06,
      "loss": 2.2876,
      "step": 57663
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0427385568618774,
      "learning_rate": 2.9469330326250735e-06,
      "loss": 2.3943,
      "step": 57664
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1278616189956665,
      "learning_rate": 2.946641157469475e-06,
      "loss": 2.4127,
      "step": 57665
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1870187520980835,
      "learning_rate": 2.946349294271239e-06,
      "loss": 2.603,
      "step": 57666
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1572345495224,
      "learning_rate": 2.946057443030853e-06,
      "loss": 2.2317,
      "step": 57667
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.142987608909607,
      "learning_rate": 2.945765603748819e-06,
      "loss": 2.2497,
      "step": 57668
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0360798835754395,
      "learning_rate": 2.9454737764256258e-06,
      "loss": 2.1915,
      "step": 57669
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0269064903259277,
      "learning_rate": 2.945181961061776e-06,
      "loss": 2.2008,
      "step": 57670
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0860077142715454,
      "learning_rate": 2.9448901576577526e-06,
      "loss": 2.298,
      "step": 57671
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.103500247001648,
      "learning_rate": 2.94459836621406e-06,
      "loss": 2.3418,
      "step": 57672
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0795490741729736,
      "learning_rate": 2.944306586731186e-06,
      "loss": 2.2296,
      "step": 57673
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0769071578979492,
      "learning_rate": 2.944014819209632e-06,
      "loss": 2.2814,
      "step": 57674
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0049673318862915,
      "learning_rate": 2.943723063649886e-06,
      "loss": 2.3215,
      "step": 57675
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1668859720230103,
      "learning_rate": 2.943431320052449e-06,
      "loss": 2.3893,
      "step": 57676
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.9964839816093445,
      "learning_rate": 2.9431395884178094e-06,
      "loss": 2.3719,
      "step": 57677
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.063670039176941,
      "learning_rate": 2.9428478687464677e-06,
      "loss": 2.0778,
      "step": 57678
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0035192966461182,
      "learning_rate": 2.9425561610389132e-06,
      "loss": 2.3168,
      "step": 57679
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.9981035590171814,
      "learning_rate": 2.9422644652956458e-06,
      "loss": 2.0643,
      "step": 57680
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1695135831832886,
      "learning_rate": 2.941972781517153e-06,
      "loss": 2.2883,
      "step": 57681
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.7774641513824463,
      "learning_rate": 2.9416811097039366e-06,
      "loss": 2.3753,
      "step": 57682
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.158671498298645,
      "learning_rate": 2.9413894498564876e-06,
      "loss": 2.5624,
      "step": 57683
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0147391557693481,
      "learning_rate": 2.9410978019753013e-06,
      "loss": 2.259,
      "step": 57684
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1649378538131714,
      "learning_rate": 2.9408061660608678e-06,
      "loss": 2.3806,
      "step": 57685
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0691046714782715,
      "learning_rate": 2.9405145421136884e-06,
      "loss": 2.3105,
      "step": 57686
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1779056787490845,
      "learning_rate": 2.94022293013425e-06,
      "loss": 2.2524,
      "step": 57687
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.133249044418335,
      "learning_rate": 2.939931330123056e-06,
      "loss": 2.2101,
      "step": 57688
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.9982869029045105,
      "learning_rate": 2.939639742080591e-06,
      "loss": 2.3443,
      "step": 57689
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.090250015258789,
      "learning_rate": 2.9393481660073586e-06,
      "loss": 2.4811,
      "step": 57690
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0315029621124268,
      "learning_rate": 2.939056601903849e-06,
      "loss": 2.0774,
      "step": 57691
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.160056710243225,
      "learning_rate": 2.938765049770552e-06,
      "loss": 2.1245,
      "step": 57692
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.977343738079071,
      "learning_rate": 2.9384735096079696e-06,
      "loss": 2.1337,
      "step": 57693
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.4426442384719849,
      "learning_rate": 2.938181981416589e-06,
      "loss": 2.3392,
      "step": 57694
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0231877565383911,
      "learning_rate": 2.937890465196912e-06,
      "loss": 2.4534,
      "step": 57695
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1596777439117432,
      "learning_rate": 2.937598960949425e-06,
      "loss": 2.4157,
      "step": 57696
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1208654642105103,
      "learning_rate": 2.9373074686746315e-06,
      "loss": 2.3459,
      "step": 57697
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.170122742652893,
      "learning_rate": 2.9370159883730155e-06,
      "loss": 2.5175,
      "step": 57698
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0241941213607788,
      "learning_rate": 2.9367245200450768e-06,
      "loss": 2.3677,
      "step": 57699
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.087741494178772,
      "learning_rate": 2.9364330636913065e-06,
      "loss": 2.2854,
      "step": 57700
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0809940099716187,
      "learning_rate": 2.9361416193122037e-06,
      "loss": 2.3343,
      "step": 57701
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.03801691532135,
      "learning_rate": 2.9358501869082557e-06,
      "loss": 2.1436,
      "step": 57702
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.967461347579956,
      "learning_rate": 2.935558766479963e-06,
      "loss": 2.2149,
      "step": 57703
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1536322832107544,
      "learning_rate": 2.9352673580278145e-06,
      "loss": 2.3724,
      "step": 57704
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.9967198967933655,
      "learning_rate": 2.9349759615523097e-06,
      "loss": 2.1463,
      "step": 57705
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.2256979942321777,
      "learning_rate": 2.934684577053937e-06,
      "loss": 2.2503,
      "step": 57706
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0895826816558838,
      "learning_rate": 2.9343932045331947e-06,
      "loss": 2.3259,
      "step": 57707
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.9579529166221619,
      "learning_rate": 2.934101843990572e-06,
      "loss": 2.2977,
      "step": 57708
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.3758348226547241,
      "learning_rate": 2.93381049542657e-06,
      "loss": 2.2945,
      "step": 57709
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0748950242996216,
      "learning_rate": 2.9335191588416765e-06,
      "loss": 2.3377,
      "step": 57710
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0588077306747437,
      "learning_rate": 2.9332278342363884e-06,
      "loss": 2.3574,
      "step": 57711
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0243182182312012,
      "learning_rate": 2.932936521611196e-06,
      "loss": 2.1782,
      "step": 57712
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.34684157371521,
      "learning_rate": 2.932645220966598e-06,
      "loss": 2.1477,
      "step": 57713
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0146468877792358,
      "learning_rate": 2.9323539323030826e-06,
      "loss": 2.3692,
      "step": 57714
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1445883512496948,
      "learning_rate": 2.9320626556211496e-06,
      "loss": 2.1627,
      "step": 57715
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0694321393966675,
      "learning_rate": 2.9317713909212875e-06,
      "loss": 2.2516,
      "step": 57716
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1243098974227905,
      "learning_rate": 2.9314801382039947e-06,
      "loss": 2.4041,
      "step": 57717
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.175783634185791,
      "learning_rate": 2.9311888974697643e-06,
      "loss": 2.4134,
      "step": 57718
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0206925868988037,
      "learning_rate": 2.930897668719085e-06,
      "loss": 2.2469,
      "step": 57719
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0818369388580322,
      "learning_rate": 2.930606451952457e-06,
      "loss": 2.2103,
      "step": 57720
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0909175872802734,
      "learning_rate": 2.9303152471703676e-06,
      "loss": 2.4486,
      "step": 57721
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0882614850997925,
      "learning_rate": 2.930024054373317e-06,
      "loss": 2.2866,
      "step": 57722
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0869877338409424,
      "learning_rate": 2.929732873561797e-06,
      "loss": 2.4125,
      "step": 57723
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1204051971435547,
      "learning_rate": 2.929441704736299e-06,
      "loss": 2.2307,
      "step": 57724
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0420124530792236,
      "learning_rate": 2.9291505478973146e-06,
      "loss": 2.1999,
      "step": 57725
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1753591299057007,
      "learning_rate": 2.928859403045343e-06,
      "loss": 2.3512,
      "step": 57726
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1385291814804077,
      "learning_rate": 2.928568270180874e-06,
      "loss": 2.3298,
      "step": 57727
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.2005665302276611,
      "learning_rate": 2.9282771493044037e-06,
      "loss": 2.3828,
      "step": 57728
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1787482500076294,
      "learning_rate": 2.9279860404164217e-06,
      "loss": 2.3198,
      "step": 57729
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1242271661758423,
      "learning_rate": 2.9276949435174274e-06,
      "loss": 2.4444,
      "step": 57730
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0921834707260132,
      "learning_rate": 2.927403858607908e-06,
      "loss": 2.3412,
      "step": 57731
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0598561763763428,
      "learning_rate": 2.927112785688363e-06,
      "loss": 1.9885,
      "step": 57732
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0344585180282593,
      "learning_rate": 2.9268217247592788e-06,
      "loss": 2.4041,
      "step": 57733
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0548083782196045,
      "learning_rate": 2.9265306758211576e-06,
      "loss": 2.1704,
      "step": 57734
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.117599606513977,
      "learning_rate": 2.926239638874484e-06,
      "loss": 2.2023,
      "step": 57735
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1456692218780518,
      "learning_rate": 2.925948613919761e-06,
      "loss": 2.2902,
      "step": 57736
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0549800395965576,
      "learning_rate": 2.925657600957471e-06,
      "loss": 2.5342,
      "step": 57737
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1707741022109985,
      "learning_rate": 2.9253665999881152e-06,
      "loss": 2.3607,
      "step": 57738
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0789967775344849,
      "learning_rate": 2.9250756110121823e-06,
      "loss": 2.2028,
      "step": 57739
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0814414024353027,
      "learning_rate": 2.92478463403017e-06,
      "loss": 2.2931,
      "step": 57740
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.9844160079956055,
      "learning_rate": 2.9244936690425663e-06,
      "loss": 2.2555,
      "step": 57741
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0441935062408447,
      "learning_rate": 2.9242027160498708e-06,
      "loss": 2.2916,
      "step": 57742
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.204451084136963,
      "learning_rate": 2.9239117750525712e-06,
      "loss": 2.1341,
      "step": 57743
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0170245170593262,
      "learning_rate": 2.923620846051165e-06,
      "loss": 2.6032,
      "step": 57744
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1783901453018188,
      "learning_rate": 2.9233299290461405e-06,
      "loss": 2.4751,
      "step": 57745
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0429778099060059,
      "learning_rate": 2.923039024037997e-06,
      "loss": 2.0369,
      "step": 57746
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0677602291107178,
      "learning_rate": 2.9227481310272243e-06,
      "loss": 2.4494,
      "step": 57747
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.169760823249817,
      "learning_rate": 2.9224572500143133e-06,
      "loss": 2.2971,
      "step": 57748
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.024122953414917,
      "learning_rate": 2.922166380999761e-06,
      "loss": 2.2973,
      "step": 57749
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0626565217971802,
      "learning_rate": 2.9218755239840615e-06,
      "loss": 2.2468,
      "step": 57750
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1999585628509521,
      "learning_rate": 2.9215846789677006e-06,
      "loss": 2.1595,
      "step": 57751
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0592961311340332,
      "learning_rate": 2.9212938459511785e-06,
      "loss": 2.1809,
      "step": 57752
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1306967735290527,
      "learning_rate": 2.9210030249349876e-06,
      "loss": 2.2626,
      "step": 57753
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1122127771377563,
      "learning_rate": 2.9207122159196146e-06,
      "loss": 2.5718,
      "step": 57754
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0407994985580444,
      "learning_rate": 2.9204214189055612e-06,
      "loss": 2.5339,
      "step": 57755
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0623698234558105,
      "learning_rate": 2.9201306338933134e-06,
      "loss": 2.1229,
      "step": 57756
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0099475383758545,
      "learning_rate": 2.9198398608833687e-06,
      "loss": 2.3721,
      "step": 57757
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0560036897659302,
      "learning_rate": 2.9195490998762167e-06,
      "loss": 2.2931,
      "step": 57758
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.22962486743927,
      "learning_rate": 2.919258350872355e-06,
      "loss": 2.1745,
      "step": 57759
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1380771398544312,
      "learning_rate": 2.918967613872269e-06,
      "loss": 2.0342,
      "step": 57760
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.9611825942993164,
      "learning_rate": 2.9186768888764607e-06,
      "loss": 2.1331,
      "step": 57761
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.151748776435852,
      "learning_rate": 2.918386175885418e-06,
      "loss": 2.274,
      "step": 57762
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.026461124420166,
      "learning_rate": 2.9180954748996336e-06,
      "loss": 2.2575,
      "step": 57763
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.040773630142212,
      "learning_rate": 2.917804785919598e-06,
      "loss": 2.1426,
      "step": 57764
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1208865642547607,
      "learning_rate": 2.9175141089458105e-06,
      "loss": 2.3699,
      "step": 57765
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1119166612625122,
      "learning_rate": 2.917223443978756e-06,
      "loss": 2.21,
      "step": 57766
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1537659168243408,
      "learning_rate": 2.916932791018935e-06,
      "loss": 2.2043,
      "step": 57767
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1441909074783325,
      "learning_rate": 2.916642150066833e-06,
      "loss": 2.2909,
      "step": 57768
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1029739379882812,
      "learning_rate": 2.9163515211229497e-06,
      "loss": 2.296,
      "step": 57769
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0329711437225342,
      "learning_rate": 2.916060904187772e-06,
      "loss": 2.3908,
      "step": 57770
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1022506952285767,
      "learning_rate": 2.9157702992617976e-06,
      "loss": 2.2027,
      "step": 57771
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0766595602035522,
      "learning_rate": 2.9154797063455142e-06,
      "loss": 2.2649,
      "step": 57772
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.3272087574005127,
      "learning_rate": 2.9151891254394194e-06,
      "loss": 2.5218,
      "step": 57773
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1051589250564575,
      "learning_rate": 2.9148985565440023e-06,
      "loss": 2.455,
      "step": 57774
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0645604133605957,
      "learning_rate": 2.9146079996597556e-06,
      "loss": 2.2006,
      "step": 57775
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0359092950820923,
      "learning_rate": 2.9143174547871733e-06,
      "loss": 2.1508,
      "step": 57776
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1590033769607544,
      "learning_rate": 2.914026921926749e-06,
      "loss": 2.2794,
      "step": 57777
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.9900141358375549,
      "learning_rate": 2.9137364010789705e-06,
      "loss": 2.2916,
      "step": 57778
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0689834356307983,
      "learning_rate": 2.9134458922443355e-06,
      "loss": 2.4716,
      "step": 57779
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0480015277862549,
      "learning_rate": 2.9131553954233348e-06,
      "loss": 2.1735,
      "step": 57780
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.7445006370544434,
      "learning_rate": 2.9128649106164575e-06,
      "loss": 2.3362,
      "step": 57781
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1208760738372803,
      "learning_rate": 2.9125744378242026e-06,
      "loss": 2.2602,
      "step": 57782
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.104485034942627,
      "learning_rate": 2.9122839770470546e-06,
      "loss": 2.3485,
      "step": 57783
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1532505750656128,
      "learning_rate": 2.911993528285515e-06,
      "loss": 1.9773,
      "step": 57784
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1233655214309692,
      "learning_rate": 2.911703091540067e-06,
      "loss": 2.5042,
      "step": 57785
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.146021842956543,
      "learning_rate": 2.911412666811211e-06,
      "loss": 2.261,
      "step": 57786
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.178893804550171,
      "learning_rate": 2.9111222540994324e-06,
      "loss": 2.3634,
      "step": 57787
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1291513442993164,
      "learning_rate": 2.9108318534052303e-06,
      "loss": 2.115,
      "step": 57788
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0241725444793701,
      "learning_rate": 2.910541464729093e-06,
      "loss": 2.2066,
      "step": 57789
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.2095344066619873,
      "learning_rate": 2.910251088071514e-06,
      "loss": 2.2911,
      "step": 57790
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1218109130859375,
      "learning_rate": 2.9099607234329817e-06,
      "loss": 2.3456,
      "step": 57791
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1129662990570068,
      "learning_rate": 2.909670370813995e-06,
      "loss": 2.1264,
      "step": 57792
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0699732303619385,
      "learning_rate": 2.90938003021504e-06,
      "loss": 2.1683,
      "step": 57793
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1908223628997803,
      "learning_rate": 2.909089701636614e-06,
      "loss": 2.0531,
      "step": 57794
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.2395827770233154,
      "learning_rate": 2.908799385079204e-06,
      "loss": 2.3124,
      "step": 57795
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1576060056686401,
      "learning_rate": 2.908509080543308e-06,
      "loss": 2.3954,
      "step": 57796
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0548228025436401,
      "learning_rate": 2.908218788029411e-06,
      "loss": 2.2908,
      "step": 57797
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0184519290924072,
      "learning_rate": 2.9079285075380136e-06,
      "loss": 2.2805,
      "step": 57798
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1208124160766602,
      "learning_rate": 2.9076382390696e-06,
      "loss": 2.2883,
      "step": 57799
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.9978008270263672,
      "learning_rate": 2.9073479826246697e-06,
      "loss": 2.4188,
      "step": 57800
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1132662296295166,
      "learning_rate": 2.90705773820371e-06,
      "loss": 2.1802,
      "step": 57801
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0178221464157104,
      "learning_rate": 2.9067675058072144e-06,
      "loss": 2.3741,
      "step": 57802
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1478408575057983,
      "learning_rate": 2.9064772854356715e-06,
      "loss": 2.3432,
      "step": 57803
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0441721677780151,
      "learning_rate": 2.9061870770895782e-06,
      "loss": 2.4068,
      "step": 57804
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1078749895095825,
      "learning_rate": 2.905896880769422e-06,
      "loss": 2.3153,
      "step": 57805
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1060762405395508,
      "learning_rate": 2.9056066964757e-06,
      "loss": 2.5241,
      "step": 57806
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.2314682006835938,
      "learning_rate": 2.9053165242088997e-06,
      "loss": 2.6044,
      "step": 57807
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0614433288574219,
      "learning_rate": 2.9050263639695166e-06,
      "loss": 2.2546,
      "step": 57808
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0482800006866455,
      "learning_rate": 2.9047362157580416e-06,
      "loss": 2.183,
      "step": 57809
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.030377984046936,
      "learning_rate": 2.904446079574962e-06,
      "loss": 2.2825,
      "step": 57810
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1466656923294067,
      "learning_rate": 2.9041559554207764e-06,
      "loss": 2.5073,
      "step": 57811
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1439177989959717,
      "learning_rate": 2.903865843295971e-06,
      "loss": 2.215,
      "step": 57812
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1419752836227417,
      "learning_rate": 2.9035757432010435e-06,
      "loss": 2.3755,
      "step": 57813
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0222294330596924,
      "learning_rate": 2.9032856551364786e-06,
      "loss": 2.1481,
      "step": 57814
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0859966278076172,
      "learning_rate": 2.9029955791027787e-06,
      "loss": 2.5741,
      "step": 57815
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0860040187835693,
      "learning_rate": 2.9027055151004225e-06,
      "loss": 2.4987,
      "step": 57816
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.3292620182037354,
      "learning_rate": 2.902415463129912e-06,
      "loss": 2.2419,
      "step": 57817
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0239366292953491,
      "learning_rate": 2.9021254231917307e-06,
      "loss": 2.3571,
      "step": 57818
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1375527381896973,
      "learning_rate": 2.9018353952863785e-06,
      "loss": 2.1829,
      "step": 57819
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0030908584594727,
      "learning_rate": 2.901545379414339e-06,
      "loss": 2.3047,
      "step": 57820
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1154896020889282,
      "learning_rate": 2.901255375576112e-06,
      "loss": 2.1868,
      "step": 57821
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0793986320495605,
      "learning_rate": 2.9009653837721816e-06,
      "loss": 2.1544,
      "step": 57822
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0122126340866089,
      "learning_rate": 2.900675404003046e-06,
      "loss": 2.4389,
      "step": 57823
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0770812034606934,
      "learning_rate": 2.9003854362691908e-06,
      "loss": 2.4933,
      "step": 57824
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1233290433883667,
      "learning_rate": 2.9000954805711146e-06,
      "loss": 2.3394,
      "step": 57825
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.192180871963501,
      "learning_rate": 2.8998055369093004e-06,
      "loss": 2.5076,
      "step": 57826
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0790996551513672,
      "learning_rate": 2.899515605284248e-06,
      "loss": 2.3953,
      "step": 57827
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0350404977798462,
      "learning_rate": 2.899225685696444e-06,
      "loss": 2.152,
      "step": 57828
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.9744181632995605,
      "learning_rate": 2.8989357781463824e-06,
      "loss": 2.3225,
      "step": 57829
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.082520842552185,
      "learning_rate": 2.89864588263455e-06,
      "loss": 2.4228,
      "step": 57830
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0545283555984497,
      "learning_rate": 2.898355999161444e-06,
      "loss": 2.4349,
      "step": 57831
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0311899185180664,
      "learning_rate": 2.89806612772755e-06,
      "loss": 2.2858,
      "step": 57832
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1469303369522095,
      "learning_rate": 2.8977762683333667e-06,
      "loss": 2.3151,
      "step": 57833
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0283122062683105,
      "learning_rate": 2.8974864209793785e-06,
      "loss": 2.2919,
      "step": 57834
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.2408517599105835,
      "learning_rate": 2.8971965856660823e-06,
      "loss": 2.4803,
      "step": 57835
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1173036098480225,
      "learning_rate": 2.8969067623939672e-06,
      "loss": 2.3044,
      "step": 57836
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0286096334457397,
      "learning_rate": 2.896616951163521e-06,
      "loss": 2.5133,
      "step": 57837
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1842031478881836,
      "learning_rate": 2.8963271519752424e-06,
      "loss": 2.1661,
      "step": 57838
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0774909257888794,
      "learning_rate": 2.896037364829615e-06,
      "loss": 2.5963,
      "step": 57839
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1095142364501953,
      "learning_rate": 2.895747589727136e-06,
      "loss": 2.3345,
      "step": 57840
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.04265558719635,
      "learning_rate": 2.895457826668292e-06,
      "loss": 2.2829,
      "step": 57841
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1690037250518799,
      "learning_rate": 2.8951680756535826e-06,
      "loss": 2.2679,
      "step": 57842
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0110410451889038,
      "learning_rate": 2.8948783366834863e-06,
      "loss": 2.3306,
      "step": 57843
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1371116638183594,
      "learning_rate": 2.894588609758505e-06,
      "loss": 2.5214,
      "step": 57844
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0795994997024536,
      "learning_rate": 2.894298894879122e-06,
      "loss": 2.191,
      "step": 57845
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0798351764678955,
      "learning_rate": 2.8940091920458355e-06,
      "loss": 2.1775,
      "step": 57846
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.039969801902771,
      "learning_rate": 2.89371950125913e-06,
      "loss": 2.4719,
      "step": 57847
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.9687291979789734,
      "learning_rate": 2.8934298225195036e-06,
      "loss": 2.5546,
      "step": 57848
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.043779730796814,
      "learning_rate": 2.8931401558274406e-06,
      "loss": 2.2821,
      "step": 57849
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0296070575714111,
      "learning_rate": 2.8928505011834383e-06,
      "loss": 2.5513,
      "step": 57850
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1060162782669067,
      "learning_rate": 2.8925608585879816e-06,
      "loss": 2.2001,
      "step": 57851
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0652114152908325,
      "learning_rate": 2.8922712280415678e-06,
      "loss": 2.1251,
      "step": 57852
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.201184868812561,
      "learning_rate": 2.891981609544681e-06,
      "loss": 2.3933,
      "step": 57853
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0219542980194092,
      "learning_rate": 2.891692003097819e-06,
      "loss": 2.2272,
      "step": 57854
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0887269973754883,
      "learning_rate": 2.89140240870147e-06,
      "loss": 2.3863,
      "step": 57855
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.9963086247444153,
      "learning_rate": 2.891112826356125e-06,
      "loss": 2.2593,
      "step": 57856
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.9938487410545349,
      "learning_rate": 2.8908232560622705e-06,
      "loss": 2.3832,
      "step": 57857
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0126075744628906,
      "learning_rate": 2.890533697820406e-06,
      "loss": 2.302,
      "step": 57858
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.087823748588562,
      "learning_rate": 2.8902441516310133e-06,
      "loss": 2.4029,
      "step": 57859
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0143332481384277,
      "learning_rate": 2.8899546174945913e-06,
      "loss": 2.1487,
      "step": 57860
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.094383716583252,
      "learning_rate": 2.8896650954116235e-06,
      "loss": 2.265,
      "step": 57861
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1111708879470825,
      "learning_rate": 2.8893755853826087e-06,
      "loss": 2.4042,
      "step": 57862
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0348786115646362,
      "learning_rate": 2.8890860874080294e-06,
      "loss": 2.2379,
      "step": 57863
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0016206502914429,
      "learning_rate": 2.888796601488385e-06,
      "loss": 2.2602,
      "step": 57864
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.250279188156128,
      "learning_rate": 2.8885071276241604e-06,
      "loss": 2.4286,
      "step": 57865
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1188029050827026,
      "learning_rate": 2.8882176658158445e-06,
      "loss": 2.2025,
      "step": 57866
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.095430612564087,
      "learning_rate": 2.8879282160639355e-06,
      "loss": 2.4224,
      "step": 57867
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.2538188695907593,
      "learning_rate": 2.887638778368919e-06,
      "loss": 2.2866,
      "step": 57868
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0730712413787842,
      "learning_rate": 2.8873493527312835e-06,
      "loss": 2.2094,
      "step": 57869
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1254875659942627,
      "learning_rate": 2.887059939151525e-06,
      "loss": 2.3511,
      "step": 57870
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1817525625228882,
      "learning_rate": 2.8867705376301326e-06,
      "loss": 2.3658,
      "step": 57871
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.2322373390197754,
      "learning_rate": 2.8864811481675935e-06,
      "loss": 2.1989,
      "step": 57872
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0056694746017456,
      "learning_rate": 2.8861917707644028e-06,
      "loss": 2.5306,
      "step": 57873
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1074734926223755,
      "learning_rate": 2.8859024054210473e-06,
      "loss": 2.2895,
      "step": 57874
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0753889083862305,
      "learning_rate": 2.885613052138021e-06,
      "loss": 2.3401,
      "step": 57875
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.04035484790802,
      "learning_rate": 2.8853237109158107e-06,
      "loss": 2.3319,
      "step": 57876
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.7414848804473877,
      "learning_rate": 2.885034381754912e-06,
      "loss": 2.3533,
      "step": 57877
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1346032619476318,
      "learning_rate": 2.8847450646558086e-06,
      "loss": 2.3838,
      "step": 57878
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.051832675933838,
      "learning_rate": 2.8844557596189983e-06,
      "loss": 2.4668,
      "step": 57879
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0264254808425903,
      "learning_rate": 2.884166466644965e-06,
      "loss": 2.2679,
      "step": 57880
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0804013013839722,
      "learning_rate": 2.8838771857342083e-06,
      "loss": 2.5654,
      "step": 57881
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.3188194036483765,
      "learning_rate": 2.8835879168872073e-06,
      "loss": 2.366,
      "step": 57882
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1575381755828857,
      "learning_rate": 2.8832986601044597e-06,
      "loss": 2.3527,
      "step": 57883
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.7146520614624023,
      "learning_rate": 2.883009415386451e-06,
      "loss": 2.36,
      "step": 57884
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0792688131332397,
      "learning_rate": 2.882720182733677e-06,
      "loss": 2.2979,
      "step": 57885
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1518818140029907,
      "learning_rate": 2.882430962146622e-06,
      "loss": 2.286,
      "step": 57886
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1296452283859253,
      "learning_rate": 2.8821417536257833e-06,
      "loss": 2.4756,
      "step": 57887
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.998775064945221,
      "learning_rate": 2.8818525571716437e-06,
      "loss": 2.2662,
      "step": 57888
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1163842678070068,
      "learning_rate": 2.8815633727847013e-06,
      "loss": 2.2856,
      "step": 57889
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.095833420753479,
      "learning_rate": 2.881274200465438e-06,
      "loss": 2.4588,
      "step": 57890
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1239286661148071,
      "learning_rate": 2.8809850402143522e-06,
      "loss": 2.5324,
      "step": 57891
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0345313549041748,
      "learning_rate": 2.8806958920319307e-06,
      "loss": 2.1913,
      "step": 57892
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.130016565322876,
      "learning_rate": 2.8804067559186598e-06,
      "loss": 2.4172,
      "step": 57893
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1542021036148071,
      "learning_rate": 2.880117631875037e-06,
      "loss": 2.2873,
      "step": 57894
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0699737071990967,
      "learning_rate": 2.8798285199015473e-06,
      "loss": 2.7139,
      "step": 57895
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.3871691226959229,
      "learning_rate": 2.8795394199986795e-06,
      "loss": 2.435,
      "step": 57896
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0359995365142822,
      "learning_rate": 2.8792503321669285e-06,
      "loss": 2.2183,
      "step": 57897
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.194942831993103,
      "learning_rate": 2.8789612564067837e-06,
      "loss": 2.3968,
      "step": 57898
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1941368579864502,
      "learning_rate": 2.878672192718729e-06,
      "loss": 2.2827,
      "step": 57899
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1822000741958618,
      "learning_rate": 2.8783831411032626e-06,
      "loss": 2.2405,
      "step": 57900
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.3827329874038696,
      "learning_rate": 2.878094101560869e-06,
      "loss": 2.4529,
      "step": 57901
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.9459091424942017,
      "learning_rate": 2.8778050740920418e-06,
      "loss": 2.3466,
      "step": 57902
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0378130674362183,
      "learning_rate": 2.8775160586972674e-06,
      "loss": 2.1567,
      "step": 57903
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.079507827758789,
      "learning_rate": 2.87722705537704e-06,
      "loss": 2.1681,
      "step": 57904
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.428959846496582,
      "learning_rate": 2.8769380641318444e-06,
      "loss": 2.2904,
      "step": 57905
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.19331693649292,
      "learning_rate": 2.8766490849621764e-06,
      "loss": 2.3083,
      "step": 57906
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0660089254379272,
      "learning_rate": 2.876360117868523e-06,
      "loss": 2.2198,
      "step": 57907
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1469675302505493,
      "learning_rate": 2.8760711628513737e-06,
      "loss": 2.2807,
      "step": 57908
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1613985300064087,
      "learning_rate": 2.875782219911216e-06,
      "loss": 2.2037,
      "step": 57909
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.167722225189209,
      "learning_rate": 2.8754932890485455e-06,
      "loss": 2.5108,
      "step": 57910
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.113640546798706,
      "learning_rate": 2.8752043702638457e-06,
      "loss": 2.3256,
      "step": 57911
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.9918458461761475,
      "learning_rate": 2.8749154635576125e-06,
      "loss": 2.14,
      "step": 57912
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.182083249092102,
      "learning_rate": 2.87462656893033e-06,
      "loss": 2.3962,
      "step": 57913
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1242746114730835,
      "learning_rate": 2.8743376863824924e-06,
      "loss": 2.2584,
      "step": 57914
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0360907316207886,
      "learning_rate": 2.8740488159145864e-06,
      "loss": 2.3337,
      "step": 57915
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0071269273757935,
      "learning_rate": 2.8737599575271046e-06,
      "loss": 2.1251,
      "step": 57916
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0878130197525024,
      "learning_rate": 2.873471111220534e-06,
      "loss": 2.3756,
      "step": 57917
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0889842510223389,
      "learning_rate": 2.8731822769953666e-06,
      "loss": 2.3004,
      "step": 57918
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.262510061264038,
      "learning_rate": 2.8728934548520917e-06,
      "loss": 2.2075,
      "step": 57919
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.047584891319275,
      "learning_rate": 2.872604644791195e-06,
      "loss": 2.1953,
      "step": 57920
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0522725582122803,
      "learning_rate": 2.8723158468131716e-06,
      "loss": 2.4145,
      "step": 57921
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1171717643737793,
      "learning_rate": 2.8720270609185095e-06,
      "loss": 2.2432,
      "step": 57922
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1706757545471191,
      "learning_rate": 2.871738287107694e-06,
      "loss": 2.3323,
      "step": 57923
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1257715225219727,
      "learning_rate": 2.8714495253812213e-06,
      "loss": 2.3885,
      "step": 57924
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.100068211555481,
      "learning_rate": 2.8711607757395744e-06,
      "loss": 2.0401,
      "step": 57925
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0487371683120728,
      "learning_rate": 2.87087203818325e-06,
      "loss": 2.3218,
      "step": 57926
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0543891191482544,
      "learning_rate": 2.8705833127127335e-06,
      "loss": 2.2899,
      "step": 57927
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0772736072540283,
      "learning_rate": 2.87029459932851e-06,
      "loss": 2.3728,
      "step": 57928
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0009655952453613,
      "learning_rate": 2.8700058980310785e-06,
      "loss": 2.1736,
      "step": 57929
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1609452962875366,
      "learning_rate": 2.86971720882092e-06,
      "loss": 2.4956,
      "step": 57930
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0888856649398804,
      "learning_rate": 2.8694285316985306e-06,
      "loss": 2.2762,
      "step": 57931
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.2381999492645264,
      "learning_rate": 2.8691398666643933e-06,
      "loss": 2.2421,
      "step": 57932
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1080408096313477,
      "learning_rate": 2.8688512137190028e-06,
      "loss": 2.3811,
      "step": 57933
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.021596074104309,
      "learning_rate": 2.8685625728628476e-06,
      "loss": 2.4701,
      "step": 57934
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0621116161346436,
      "learning_rate": 2.868273944096415e-06,
      "loss": 2.4688,
      "step": 57935
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1116111278533936,
      "learning_rate": 2.867985327420192e-06,
      "loss": 2.4526,
      "step": 57936
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1447031497955322,
      "learning_rate": 2.867696722834674e-06,
      "loss": 2.2858,
      "step": 57937
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0088127851486206,
      "learning_rate": 2.8674081303403433e-06,
      "loss": 2.5427,
      "step": 57938
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0374324321746826,
      "learning_rate": 2.867119549937697e-06,
      "loss": 2.3417,
      "step": 57939
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0855469703674316,
      "learning_rate": 2.8668309816272176e-06,
      "loss": 2.1302,
      "step": 57940
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.9641817212104797,
      "learning_rate": 2.8665424254094e-06,
      "loss": 2.1003,
      "step": 57941
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.139235496520996,
      "learning_rate": 2.866253881284726e-06,
      "loss": 2.2633,
      "step": 57942
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.145475149154663,
      "learning_rate": 2.8659653492536933e-06,
      "loss": 2.1222,
      "step": 57943
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0712082386016846,
      "learning_rate": 2.8656768293167836e-06,
      "loss": 2.3668,
      "step": 57944
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0886585712432861,
      "learning_rate": 2.8653883214744926e-06,
      "loss": 2.1368,
      "step": 57945
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0542064905166626,
      "learning_rate": 2.865099825727303e-06,
      "loss": 2.1982,
      "step": 57946
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.08922278881073,
      "learning_rate": 2.864811342075714e-06,
      "loss": 2.1938,
      "step": 57947
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1630903482437134,
      "learning_rate": 2.8645228705202e-06,
      "loss": 2.1995,
      "step": 57948
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1323708295822144,
      "learning_rate": 2.8642344110612617e-06,
      "loss": 2.3557,
      "step": 57949
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1475566625595093,
      "learning_rate": 2.8639459636993805e-06,
      "loss": 2.3759,
      "step": 57950
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0716848373413086,
      "learning_rate": 2.8636575284350534e-06,
      "loss": 2.4529,
      "step": 57951
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.2427082061767578,
      "learning_rate": 2.8633691052687606e-06,
      "loss": 2.0969,
      "step": 57952
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.3060376644134521,
      "learning_rate": 2.863080694200998e-06,
      "loss": 2.2007,
      "step": 57953
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1234424114227295,
      "learning_rate": 2.862792295232253e-06,
      "loss": 2.4097,
      "step": 57954
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0293678045272827,
      "learning_rate": 2.8625039083630103e-06,
      "loss": 2.4893,
      "step": 57955
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.008598804473877,
      "learning_rate": 2.8622155335937652e-06,
      "loss": 2.4286,
      "step": 57956
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1338748931884766,
      "learning_rate": 2.861927170925001e-06,
      "loss": 2.3958,
      "step": 57957
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0591694116592407,
      "learning_rate": 2.86163882035721e-06,
      "loss": 2.3929,
      "step": 57958
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.094359278678894,
      "learning_rate": 2.861350481890879e-06,
      "loss": 2.382,
      "step": 57959
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.7022544145584106,
      "learning_rate": 2.8610621555265018e-06,
      "loss": 2.559,
      "step": 57960
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.2493765354156494,
      "learning_rate": 2.860773841264558e-06,
      "loss": 2.2032,
      "step": 57961
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.078840970993042,
      "learning_rate": 2.860485539105545e-06,
      "loss": 2.1904,
      "step": 57962
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.143263339996338,
      "learning_rate": 2.860197249049945e-06,
      "loss": 2.3556,
      "step": 57963
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.160129427909851,
      "learning_rate": 2.859908971098252e-06,
      "loss": 2.3143,
      "step": 57964
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.067728042602539,
      "learning_rate": 2.8596207052509506e-06,
      "loss": 2.2864,
      "step": 57965
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.134988784790039,
      "learning_rate": 2.859332451508534e-06,
      "loss": 2.4285,
      "step": 57966
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.9793688654899597,
      "learning_rate": 2.8590442098714845e-06,
      "loss": 2.3828,
      "step": 57967
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1623404026031494,
      "learning_rate": 2.8587559803402985e-06,
      "loss": 2.3079,
      "step": 57968
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.092381477355957,
      "learning_rate": 2.858467762915458e-06,
      "loss": 2.2553,
      "step": 57969
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1174746751785278,
      "learning_rate": 2.858179557597456e-06,
      "loss": 2.2138,
      "step": 57970
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.087357521057129,
      "learning_rate": 2.8578913643867766e-06,
      "loss": 2.2544,
      "step": 57971
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0383796691894531,
      "learning_rate": 2.8576031832839146e-06,
      "loss": 2.1098,
      "step": 57972
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1327406167984009,
      "learning_rate": 2.8573150142893547e-06,
      "loss": 2.323,
      "step": 57973
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0260107517242432,
      "learning_rate": 2.8570268574035865e-06,
      "loss": 1.9981,
      "step": 57974
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0682909488677979,
      "learning_rate": 2.856738712627094e-06,
      "loss": 2.3663,
      "step": 57975
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.09817373752594,
      "learning_rate": 2.8564505799603724e-06,
      "loss": 2.2292,
      "step": 57976
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.060519814491272,
      "learning_rate": 2.856162459403905e-06,
      "loss": 2.2967,
      "step": 57977
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0576448440551758,
      "learning_rate": 2.8558743509581844e-06,
      "loss": 2.326,
      "step": 57978
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0640290975570679,
      "learning_rate": 2.8555862546236946e-06,
      "loss": 2.4939,
      "step": 57979
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1328045129776,
      "learning_rate": 2.85529817040093e-06,
      "loss": 2.1455,
      "step": 57980
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.062536597251892,
      "learning_rate": 2.8550100982903716e-06,
      "loss": 2.3189,
      "step": 57981
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0706846714019775,
      "learning_rate": 2.8547220382925157e-06,
      "loss": 2.3003,
      "step": 57982
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.3323653936386108,
      "learning_rate": 2.8544339904078455e-06,
      "loss": 2.412,
      "step": 57983
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.524587631225586,
      "learning_rate": 2.8541459546368476e-06,
      "loss": 2.268,
      "step": 57984
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1188902854919434,
      "learning_rate": 2.8538579309800175e-06,
      "loss": 2.2666,
      "step": 57985
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1072882413864136,
      "learning_rate": 2.8535699194378342e-06,
      "loss": 2.3154,
      "step": 57986
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.2670806646347046,
      "learning_rate": 2.8532819200107953e-06,
      "loss": 2.3451,
      "step": 57987
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0298919677734375,
      "learning_rate": 2.8529939326993837e-06,
      "loss": 2.3351,
      "step": 57988
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0359997749328613,
      "learning_rate": 2.8527059575040885e-06,
      "loss": 2.3997,
      "step": 57989
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.029318928718567,
      "learning_rate": 2.852417994425395e-06,
      "loss": 2.413,
      "step": 57990
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0894198417663574,
      "learning_rate": 2.8521300434637967e-06,
      "loss": 2.4112,
      "step": 57991
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1635688543319702,
      "learning_rate": 2.8518421046197776e-06,
      "loss": 2.442,
      "step": 57992
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0944682359695435,
      "learning_rate": 2.8515541778938295e-06,
      "loss": 2.3559,
      "step": 57993
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.14556884765625,
      "learning_rate": 2.8512662632864363e-06,
      "loss": 2.4027,
      "step": 57994
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1089178323745728,
      "learning_rate": 2.8509783607980903e-06,
      "loss": 2.4409,
      "step": 57995
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0575553178787231,
      "learning_rate": 2.850690470429276e-06,
      "loss": 2.1379,
      "step": 57996
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0266584157943726,
      "learning_rate": 2.8504025921804844e-06,
      "loss": 2.3026,
      "step": 57997
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0915042161941528,
      "learning_rate": 2.8501147260522e-06,
      "loss": 2.5583,
      "step": 57998
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1018868684768677,
      "learning_rate": 2.8498268720449163e-06,
      "loss": 2.4235,
      "step": 57999
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.4176201820373535,
      "learning_rate": 2.8495390301591184e-06,
      "loss": 2.4496,
      "step": 58000
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0481743812561035,
      "learning_rate": 2.8492512003952933e-06,
      "loss": 2.1972,
      "step": 58001
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.01447594165802,
      "learning_rate": 2.8489633827539263e-06,
      "loss": 2.3642,
      "step": 58002
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0236308574676514,
      "learning_rate": 2.848675577235512e-06,
      "loss": 2.484,
      "step": 58003
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1333861351013184,
      "learning_rate": 2.8483877838405315e-06,
      "loss": 2.3913,
      "step": 58004
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.102303385734558,
      "learning_rate": 2.8481000025694794e-06,
      "loss": 2.3599,
      "step": 58005
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.217365026473999,
      "learning_rate": 2.8478122334228375e-06,
      "loss": 2.149,
      "step": 58006
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1022776365280151,
      "learning_rate": 2.847524476401099e-06,
      "loss": 2.1206,
      "step": 58007
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1534218788146973,
      "learning_rate": 2.8472367315047456e-06,
      "loss": 2.2089,
      "step": 58008
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.2296656370162964,
      "learning_rate": 2.8469489987342724e-06,
      "loss": 2.2688,
      "step": 58009
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0868048667907715,
      "learning_rate": 2.8466612780901626e-06,
      "loss": 2.4347,
      "step": 58010
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0810573101043701,
      "learning_rate": 2.8463735695729023e-06,
      "loss": 2.3972,
      "step": 58011
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1454614400863647,
      "learning_rate": 2.8460858731829854e-06,
      "loss": 2.0673,
      "step": 58012
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0105661153793335,
      "learning_rate": 2.845798188920895e-06,
      "loss": 2.1704,
      "step": 58013
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0390816926956177,
      "learning_rate": 2.8455105167871165e-06,
      "loss": 2.421,
      "step": 58014
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0169779062271118,
      "learning_rate": 2.8452228567821437e-06,
      "loss": 2.2086,
      "step": 58015
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.2743542194366455,
      "learning_rate": 2.844935208906463e-06,
      "loss": 2.3315,
      "step": 58016
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1402196884155273,
      "learning_rate": 2.844647573160556e-06,
      "loss": 2.2864,
      "step": 58017
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1778228282928467,
      "learning_rate": 2.844359949544918e-06,
      "loss": 2.4078,
      "step": 58018
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0948148965835571,
      "learning_rate": 2.84407233806003e-06,
      "loss": 2.223,
      "step": 58019
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.178072452545166,
      "learning_rate": 2.8437847387063866e-06,
      "loss": 2.1898,
      "step": 58020
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0682176351547241,
      "learning_rate": 2.843497151484468e-06,
      "loss": 2.3252,
      "step": 58021
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0006183385849,
      "learning_rate": 2.8432095763947688e-06,
      "loss": 2.1068,
      "step": 58022
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.007558822631836,
      "learning_rate": 2.84292201343777e-06,
      "loss": 2.2799,
      "step": 58023
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0519099235534668,
      "learning_rate": 2.842634462613966e-06,
      "loss": 2.3139,
      "step": 58024
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.090514898300171,
      "learning_rate": 2.8423469239238355e-06,
      "loss": 2.3335,
      "step": 58025
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1137772798538208,
      "learning_rate": 2.842059397367879e-06,
      "loss": 2.2682,
      "step": 58026
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.2332805395126343,
      "learning_rate": 2.841771882946569e-06,
      "loss": 2.2191,
      "step": 58027
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.05429208278656,
      "learning_rate": 2.841484380660403e-06,
      "loss": 2.2271,
      "step": 58028
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0971426963806152,
      "learning_rate": 2.841196890509862e-06,
      "loss": 2.2001,
      "step": 58029
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1270880699157715,
      "learning_rate": 2.8409094124954393e-06,
      "loss": 2.2839,
      "step": 58030
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.355654001235962,
      "learning_rate": 2.8406219466176168e-06,
      "loss": 2.2844,
      "step": 58031
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1145567893981934,
      "learning_rate": 2.8403344928768882e-06,
      "loss": 2.4288,
      "step": 58032
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0165839195251465,
      "learning_rate": 2.840047051273733e-06,
      "loss": 2.4723,
      "step": 58033
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.9890958666801453,
      "learning_rate": 2.8397596218086465e-06,
      "loss": 2.1847,
      "step": 58034
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1481636762619019,
      "learning_rate": 2.8394722044821088e-06,
      "loss": 2.4072,
      "step": 58035
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.9856831431388855,
      "learning_rate": 2.839184799294614e-06,
      "loss": 2.1215,
      "step": 58036
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.9957185387611389,
      "learning_rate": 2.8388974062466456e-06,
      "loss": 2.3298,
      "step": 58037
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.9730485677719116,
      "learning_rate": 2.838610025338687e-06,
      "loss": 2.2408,
      "step": 58038
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.2103314399719238,
      "learning_rate": 2.838322656571234e-06,
      "loss": 2.0509,
      "step": 58039
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.9835588932037354,
      "learning_rate": 2.838035299944769e-06,
      "loss": 2.3288,
      "step": 58040
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0055049657821655,
      "learning_rate": 2.8377479554597754e-06,
      "loss": 2.0986,
      "step": 58041
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0477169752120972,
      "learning_rate": 2.8374606231167478e-06,
      "loss": 2.3832,
      "step": 58042
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.135000467300415,
      "learning_rate": 2.837173302916166e-06,
      "loss": 2.3568,
      "step": 58043
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1501502990722656,
      "learning_rate": 2.8368859948585248e-06,
      "loss": 2.1189,
      "step": 58044
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0771446228027344,
      "learning_rate": 2.8365986989443073e-06,
      "loss": 2.3108,
      "step": 58045
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.315914511680603,
      "learning_rate": 2.836311415173997e-06,
      "loss": 2.0219,
      "step": 58046
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1303648948669434,
      "learning_rate": 2.8360241435480884e-06,
      "loss": 2.3904,
      "step": 58047
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.087147831916809,
      "learning_rate": 2.8357368840670606e-06,
      "loss": 2.2725,
      "step": 58048
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1044609546661377,
      "learning_rate": 2.835449636731408e-06,
      "loss": 2.2039,
      "step": 58049
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.2166624069213867,
      "learning_rate": 2.83516240154161e-06,
      "loss": 2.6488,
      "step": 58050
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1807360649108887,
      "learning_rate": 2.8348751784981622e-06,
      "loss": 2.6259,
      "step": 58051
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.2108721733093262,
      "learning_rate": 2.8345879676015463e-06,
      "loss": 2.1864,
      "step": 58052
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.066440224647522,
      "learning_rate": 2.8343007688522506e-06,
      "loss": 2.4055,
      "step": 58053
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0622714757919312,
      "learning_rate": 2.8340135822507576e-06,
      "loss": 2.3766,
      "step": 58054
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1164333820343018,
      "learning_rate": 2.8337264077975614e-06,
      "loss": 2.0602,
      "step": 58055
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0791672468185425,
      "learning_rate": 2.8334392454931415e-06,
      "loss": 2.3338,
      "step": 58056
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1587539911270142,
      "learning_rate": 2.833152095337992e-06,
      "loss": 2.2954,
      "step": 58057
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1430439949035645,
      "learning_rate": 2.832864957332593e-06,
      "loss": 2.4696,
      "step": 58058
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0176578760147095,
      "learning_rate": 2.8325778314774377e-06,
      "loss": 2.324,
      "step": 58059
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1484895944595337,
      "learning_rate": 2.832290717773006e-06,
      "loss": 2.474,
      "step": 58060
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1206395626068115,
      "learning_rate": 2.8320036162197916e-06,
      "loss": 2.3673,
      "step": 58061
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.9889318943023682,
      "learning_rate": 2.8317165268182744e-06,
      "loss": 2.0947,
      "step": 58062
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.123958706855774,
      "learning_rate": 2.8314294495689488e-06,
      "loss": 2.2013,
      "step": 58063
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.9895516037940979,
      "learning_rate": 2.8311423844722928e-06,
      "loss": 2.1876,
      "step": 58064
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0153968334197998,
      "learning_rate": 2.8308553315288024e-06,
      "loss": 2.2431,
      "step": 58065
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.095105528831482,
      "learning_rate": 2.8305682907389575e-06,
      "loss": 2.1983,
      "step": 58066
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0379018783569336,
      "learning_rate": 2.830281262103247e-06,
      "loss": 2.3625,
      "step": 58067
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0429753065109253,
      "learning_rate": 2.8299942456221552e-06,
      "loss": 2.314,
      "step": 58068
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.05030357837677,
      "learning_rate": 2.8297072412961724e-06,
      "loss": 2.3739,
      "step": 58069
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1583839654922485,
      "learning_rate": 2.8294202491257806e-06,
      "loss": 2.2762,
      "step": 58070
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1772726774215698,
      "learning_rate": 2.8291332691114713e-06,
      "loss": 2.3533,
      "step": 58071
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0872273445129395,
      "learning_rate": 2.828846301253729e-06,
      "loss": 2.3718,
      "step": 58072
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.368257761001587,
      "learning_rate": 2.828559345553037e-06,
      "loss": 2.3269,
      "step": 58073
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1994736194610596,
      "learning_rate": 2.8282724020098884e-06,
      "loss": 2.4641,
      "step": 58074
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0000883340835571,
      "learning_rate": 2.8279854706247612e-06,
      "loss": 2.1474,
      "step": 58075
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1055629253387451,
      "learning_rate": 2.827698551398151e-06,
      "loss": 2.4143,
      "step": 58076
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0384855270385742,
      "learning_rate": 2.8274116443305365e-06,
      "loss": 2.4348,
      "step": 58077
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0231906175613403,
      "learning_rate": 2.8271247494224097e-06,
      "loss": 2.5125,
      "step": 58078
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1430269479751587,
      "learning_rate": 2.8268378666742544e-06,
      "loss": 2.0851,
      "step": 58079
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0863698720932007,
      "learning_rate": 2.8265509960865577e-06,
      "loss": 2.1896,
      "step": 58080
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1057072877883911,
      "learning_rate": 2.8262641376598033e-06,
      "loss": 2.0884,
      "step": 58081
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0350784063339233,
      "learning_rate": 2.8259772913944815e-06,
      "loss": 2.4122,
      "step": 58082
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1915470361709595,
      "learning_rate": 2.825690457291074e-06,
      "loss": 2.3722,
      "step": 58083
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0187724828720093,
      "learning_rate": 2.8254036353500713e-06,
      "loss": 2.3348,
      "step": 58084
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0042237043380737,
      "learning_rate": 2.8251168255719563e-06,
      "loss": 2.5471,
      "step": 58085
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0012805461883545,
      "learning_rate": 2.8248300279572195e-06,
      "loss": 2.3889,
      "step": 58086
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.2217285633087158,
      "learning_rate": 2.8245432425063425e-06,
      "loss": 2.5535,
      "step": 58087
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0432437658309937,
      "learning_rate": 2.824256469219815e-06,
      "loss": 2.5041,
      "step": 58088
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0674785375595093,
      "learning_rate": 2.82396970809812e-06,
      "loss": 2.1844,
      "step": 58089
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.2038938999176025,
      "learning_rate": 2.823682959141748e-06,
      "loss": 2.3977,
      "step": 58090
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1180704832077026,
      "learning_rate": 2.823396222351179e-06,
      "loss": 2.4338,
      "step": 58091
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1301714181900024,
      "learning_rate": 2.823109497726909e-06,
      "loss": 2.2735,
      "step": 58092
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0886552333831787,
      "learning_rate": 2.8228227852694114e-06,
      "loss": 2.5634,
      "step": 58093
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.9685477018356323,
      "learning_rate": 2.8225360849791816e-06,
      "loss": 2.3403,
      "step": 58094
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.2726643085479736,
      "learning_rate": 2.8222493968566987e-06,
      "loss": 2.3691,
      "step": 58095
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.2289608716964722,
      "learning_rate": 2.821962720902457e-06,
      "loss": 2.3369,
      "step": 58096
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0908315181732178,
      "learning_rate": 2.8216760571169345e-06,
      "loss": 2.2863,
      "step": 58097
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0912948846817017,
      "learning_rate": 2.821389405500623e-06,
      "loss": 2.3532,
      "step": 58098
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0842629671096802,
      "learning_rate": 2.8211027660540035e-06,
      "loss": 2.1651,
      "step": 58099
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1197922229766846,
      "learning_rate": 2.820816138777568e-06,
      "loss": 2.2804,
      "step": 58100
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.2243571281433105,
      "learning_rate": 2.8205295236717988e-06,
      "loss": 2.5363,
      "step": 58101
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1518527269363403,
      "learning_rate": 2.8202429207371782e-06,
      "loss": 2.5261,
      "step": 58102
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1217803955078125,
      "learning_rate": 2.8199563299741993e-06,
      "loss": 2.49,
      "step": 58103
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1990410089492798,
      "learning_rate": 2.8196697513833425e-06,
      "loss": 2.24,
      "step": 58104
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1165601015090942,
      "learning_rate": 2.8193831849650975e-06,
      "loss": 2.4264,
      "step": 58105
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0891668796539307,
      "learning_rate": 2.8190966307199485e-06,
      "loss": 2.2574,
      "step": 58106
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.145498514175415,
      "learning_rate": 2.8188100886483815e-06,
      "loss": 2.116,
      "step": 58107
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1123253107070923,
      "learning_rate": 2.8185235587508777e-06,
      "loss": 2.1879,
      "step": 58108
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1071479320526123,
      "learning_rate": 2.8182370410279305e-06,
      "loss": 2.449,
      "step": 58109
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.269768238067627,
      "learning_rate": 2.8179505354800197e-06,
      "loss": 2.4843,
      "step": 58110
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1532825231552124,
      "learning_rate": 2.817664042107635e-06,
      "loss": 2.3309,
      "step": 58111
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0863029956817627,
      "learning_rate": 2.8173775609112586e-06,
      "loss": 2.2157,
      "step": 58112
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0869359970092773,
      "learning_rate": 2.8170910918913807e-06,
      "loss": 2.3522,
      "step": 58113
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0054937601089478,
      "learning_rate": 2.816804635048481e-06,
      "loss": 2.2541,
      "step": 58114
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.092428207397461,
      "learning_rate": 2.8165181903830518e-06,
      "loss": 2.1753,
      "step": 58115
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.9684323668479919,
      "learning_rate": 2.8162317578955723e-06,
      "loss": 2.3795,
      "step": 58116
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0673283338546753,
      "learning_rate": 2.815945337586533e-06,
      "loss": 2.2584,
      "step": 58117
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0790706872940063,
      "learning_rate": 2.8156589294564186e-06,
      "loss": 2.2248,
      "step": 58118
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0519245862960815,
      "learning_rate": 2.815372533505714e-06,
      "loss": 2.4827,
      "step": 58119
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0905922651290894,
      "learning_rate": 2.8150861497349e-06,
      "loss": 2.332,
      "step": 58120
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0138343572616577,
      "learning_rate": 2.8147997781444703e-06,
      "loss": 2.2008,
      "step": 58121
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0248236656188965,
      "learning_rate": 2.814513418734902e-06,
      "loss": 2.4666,
      "step": 58122
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.054036259651184,
      "learning_rate": 2.8142270715066897e-06,
      "loss": 2.1208,
      "step": 58123
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1483145952224731,
      "learning_rate": 2.81394073646031e-06,
      "loss": 2.2312,
      "step": 58124
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0087450742721558,
      "learning_rate": 2.813654413596256e-06,
      "loss": 2.1923,
      "step": 58125
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.4619441032409668,
      "learning_rate": 2.813368102915007e-06,
      "loss": 2.3204,
      "step": 58126
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0998777151107788,
      "learning_rate": 2.813081804417054e-06,
      "loss": 2.3008,
      "step": 58127
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0670106410980225,
      "learning_rate": 2.8127955181028777e-06,
      "loss": 2.276,
      "step": 58128
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0769113302230835,
      "learning_rate": 2.812509243972963e-06,
      "loss": 2.4309,
      "step": 58129
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.2032073736190796,
      "learning_rate": 2.8122229820278e-06,
      "loss": 2.2636,
      "step": 58130
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.020818829536438,
      "learning_rate": 2.811936732267869e-06,
      "loss": 2.1961,
      "step": 58131
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0574833154678345,
      "learning_rate": 2.8116504946936597e-06,
      "loss": 2.2551,
      "step": 58132
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.131036400794983,
      "learning_rate": 2.8113642693056554e-06,
      "loss": 2.3755,
      "step": 58133
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.2167195081710815,
      "learning_rate": 2.8110780561043414e-06,
      "loss": 2.3453,
      "step": 58134
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0473299026489258,
      "learning_rate": 2.8107918550901993e-06,
      "loss": 2.2751,
      "step": 58135
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.066502332687378,
      "learning_rate": 2.8105056662637208e-06,
      "loss": 2.4613,
      "step": 58136
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.04021155834198,
      "learning_rate": 2.8102194896253844e-06,
      "loss": 2.348,
      "step": 58137
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.081563115119934,
      "learning_rate": 2.8099333251756823e-06,
      "loss": 2.4682,
      "step": 58138
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0035842657089233,
      "learning_rate": 2.8096471729150922e-06,
      "loss": 2.0731,
      "step": 58139
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0353854894638062,
      "learning_rate": 2.8093610328441067e-06,
      "loss": 2.2184,
      "step": 58140
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1253308057785034,
      "learning_rate": 2.809074904963204e-06,
      "loss": 2.2212,
      "step": 58141
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.21488356590271,
      "learning_rate": 2.808788789272875e-06,
      "loss": 2.3005,
      "step": 58142
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.4128209352493286,
      "learning_rate": 2.8085026857735988e-06,
      "loss": 2.1956,
      "step": 58143
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0789660215377808,
      "learning_rate": 2.808216594465868e-06,
      "loss": 2.3488,
      "step": 58144
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1021612882614136,
      "learning_rate": 2.8079305153501622e-06,
      "loss": 2.2494,
      "step": 58145
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0903962850570679,
      "learning_rate": 2.8076444484269685e-06,
      "loss": 2.2508,
      "step": 58146
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1316519975662231,
      "learning_rate": 2.8073583936967665e-06,
      "loss": 2.4355,
      "step": 58147
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.117493987083435,
      "learning_rate": 2.8070723511600494e-06,
      "loss": 2.2933,
      "step": 58148
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1027950048446655,
      "learning_rate": 2.806786320817295e-06,
      "loss": 2.287,
      "step": 58149
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0414478778839111,
      "learning_rate": 2.8065003026689953e-06,
      "loss": 2.1697,
      "step": 58150
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.2995518445968628,
      "learning_rate": 2.806214296715627e-06,
      "loss": 2.2113,
      "step": 58151
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1375435590744019,
      "learning_rate": 2.8059283029576835e-06,
      "loss": 2.217,
      "step": 58152
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0215566158294678,
      "learning_rate": 2.805642321395642e-06,
      "loss": 2.2996,
      "step": 58153
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0849488973617554,
      "learning_rate": 2.805356352029993e-06,
      "loss": 2.0682,
      "step": 58154
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1232370138168335,
      "learning_rate": 2.805070394861219e-06,
      "loss": 2.4633,
      "step": 58155
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1456226110458374,
      "learning_rate": 2.8047844498898026e-06,
      "loss": 2.1732,
      "step": 58156
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1467052698135376,
      "learning_rate": 2.804498517116234e-06,
      "loss": 2.1806,
      "step": 58157
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0306942462921143,
      "learning_rate": 2.8042125965409937e-06,
      "loss": 2.6628,
      "step": 58158
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.117523431777954,
      "learning_rate": 2.8039266881645656e-06,
      "loss": 2.362,
      "step": 58159
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.2660963535308838,
      "learning_rate": 2.8036407919874375e-06,
      "loss": 2.3115,
      "step": 58160
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0360766649246216,
      "learning_rate": 2.8033549080100907e-06,
      "loss": 2.3968,
      "step": 58161
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1631091833114624,
      "learning_rate": 2.8030690362330148e-06,
      "loss": 2.5032,
      "step": 58162
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.2226507663726807,
      "learning_rate": 2.802783176656693e-06,
      "loss": 2.5066,
      "step": 58163
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.3727965354919434,
      "learning_rate": 2.802497329281604e-06,
      "loss": 2.398,
      "step": 58164
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0356650352478027,
      "learning_rate": 2.802211494108239e-06,
      "loss": 2.2877,
      "step": 58165
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.3169199228286743,
      "learning_rate": 2.801925671137079e-06,
      "loss": 2.1979,
      "step": 58166
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0924419164657593,
      "learning_rate": 2.8016398603686124e-06,
      "loss": 2.282,
      "step": 58167
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0569100379943848,
      "learning_rate": 2.8013540618033177e-06,
      "loss": 2.3269,
      "step": 58168
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1547720432281494,
      "learning_rate": 2.801068275441686e-06,
      "loss": 2.452,
      "step": 58169
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0007851123809814,
      "learning_rate": 2.8007825012841972e-06,
      "loss": 2.4844,
      "step": 58170
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0967432260513306,
      "learning_rate": 2.800496739331342e-06,
      "loss": 2.2552,
      "step": 58171
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.9915152788162231,
      "learning_rate": 2.800210989583595e-06,
      "loss": 2.4084,
      "step": 58172
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0487138032913208,
      "learning_rate": 2.799925252041449e-06,
      "loss": 2.508,
      "step": 58173
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.139442801475525,
      "learning_rate": 2.799639526705381e-06,
      "loss": 2.486,
      "step": 58174
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.3054518699645996,
      "learning_rate": 2.7993538135758836e-06,
      "loss": 2.2502,
      "step": 58175
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1481099128723145,
      "learning_rate": 2.799068112653434e-06,
      "loss": 2.2394,
      "step": 58176
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.056183099746704,
      "learning_rate": 2.798782423938522e-06,
      "loss": 2.3489,
      "step": 58177
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1013259887695312,
      "learning_rate": 2.798496747431627e-06,
      "loss": 2.3126,
      "step": 58178
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0768781900405884,
      "learning_rate": 2.798211083133239e-06,
      "loss": 2.5194,
      "step": 58179
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1109472513198853,
      "learning_rate": 2.7979254310438365e-06,
      "loss": 2.3934,
      "step": 58180
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.131402850151062,
      "learning_rate": 2.7976397911639086e-06,
      "loss": 2.4406,
      "step": 58181
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1447018384933472,
      "learning_rate": 2.797354163493935e-06,
      "loss": 2.2212,
      "step": 58182
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1254057884216309,
      "learning_rate": 2.797068548034405e-06,
      "loss": 2.1023,
      "step": 58183
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.9748066663742065,
      "learning_rate": 2.796782944785801e-06,
      "loss": 2.2482,
      "step": 58184
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0970314741134644,
      "learning_rate": 2.7964973537486063e-06,
      "loss": 2.4223,
      "step": 58185
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0623507499694824,
      "learning_rate": 2.796211774923301e-06,
      "loss": 2.4601,
      "step": 58186
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0553944110870361,
      "learning_rate": 2.7959262083103766e-06,
      "loss": 2.2709,
      "step": 58187
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1237200498580933,
      "learning_rate": 2.795640653910311e-06,
      "loss": 2.2535,
      "step": 58188
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0804132223129272,
      "learning_rate": 2.7953551117235934e-06,
      "loss": 2.4999,
      "step": 58189
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.2098734378814697,
      "learning_rate": 2.795069581750707e-06,
      "loss": 2.3611,
      "step": 58190
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1273597478866577,
      "learning_rate": 2.7947840639921308e-06,
      "loss": 2.304,
      "step": 58191
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0874186754226685,
      "learning_rate": 2.7944985584483554e-06,
      "loss": 2.4091,
      "step": 58192
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0353822708129883,
      "learning_rate": 2.7942130651198583e-06,
      "loss": 2.2756,
      "step": 58193
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1543896198272705,
      "learning_rate": 2.7939275840071313e-06,
      "loss": 2.2049,
      "step": 58194
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0540207624435425,
      "learning_rate": 2.7936421151106507e-06,
      "loss": 2.3612,
      "step": 58195
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0483542680740356,
      "learning_rate": 2.7933566584309068e-06,
      "loss": 2.3119,
      "step": 58196
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0487991571426392,
      "learning_rate": 2.7930712139683815e-06,
      "loss": 2.4471,
      "step": 58197
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0196529626846313,
      "learning_rate": 2.792785781723557e-06,
      "loss": 2.2927,
      "step": 58198
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1896064281463623,
      "learning_rate": 2.792500361696915e-06,
      "loss": 2.3624,
      "step": 58199
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.180772066116333,
      "learning_rate": 2.7922149538889463e-06,
      "loss": 2.2796,
      "step": 58200
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0809085369110107,
      "learning_rate": 2.7919295583001273e-06,
      "loss": 2.3892,
      "step": 58201
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.035231351852417,
      "learning_rate": 2.7916441749309485e-06,
      "loss": 2.2107,
      "step": 58202
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0810283422470093,
      "learning_rate": 2.7913588037818875e-06,
      "loss": 2.4529,
      "step": 58203
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0553451776504517,
      "learning_rate": 2.7910734448534336e-06,
      "loss": 2.1722,
      "step": 58204
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1230430603027344,
      "learning_rate": 2.7907880981460656e-06,
      "loss": 2.3833,
      "step": 58205
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1241415739059448,
      "learning_rate": 2.7905027636602734e-06,
      "loss": 2.3945,
      "step": 58206
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.6557817459106445,
      "learning_rate": 2.7902174413965334e-06,
      "loss": 2.2413,
      "step": 58207
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0805679559707642,
      "learning_rate": 2.7899321313553353e-06,
      "loss": 2.3404,
      "step": 58208
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1026593446731567,
      "learning_rate": 2.7896468335371585e-06,
      "loss": 2.3709,
      "step": 58209
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.9935812950134277,
      "learning_rate": 2.7893615479424917e-06,
      "loss": 2.3788,
      "step": 58210
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.088463544845581,
      "learning_rate": 2.789076274571815e-06,
      "loss": 2.2881,
      "step": 58211
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0081018209457397,
      "learning_rate": 2.7887910134256122e-06,
      "loss": 2.4047,
      "step": 58212
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1471598148345947,
      "learning_rate": 2.7885057645043643e-06,
      "loss": 2.2146,
      "step": 58213
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1643158197402954,
      "learning_rate": 2.788220527808562e-06,
      "loss": 2.3952,
      "step": 58214
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.051424264907837,
      "learning_rate": 2.78793530333868e-06,
      "loss": 2.3697,
      "step": 58215
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0603135824203491,
      "learning_rate": 2.78765009109521e-06,
      "loss": 2.3289,
      "step": 58216
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0532724857330322,
      "learning_rate": 2.787364891078629e-06,
      "loss": 2.2611,
      "step": 58217
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0378926992416382,
      "learning_rate": 2.787079703289427e-06,
      "loss": 2.428,
      "step": 58218
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.975796103477478,
      "learning_rate": 2.7867945277280827e-06,
      "loss": 2.3285,
      "step": 58219
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1640092134475708,
      "learning_rate": 2.786509364395079e-06,
      "loss": 2.3737,
      "step": 58220
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.207481861114502,
      "learning_rate": 2.7862242132909035e-06,
      "loss": 1.9044,
      "step": 58221
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1556529998779297,
      "learning_rate": 2.785939074416034e-06,
      "loss": 2.505,
      "step": 58222
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0719324350357056,
      "learning_rate": 2.78565394777096e-06,
      "loss": 2.3056,
      "step": 58223
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0988988876342773,
      "learning_rate": 2.7853688333561625e-06,
      "loss": 2.2543,
      "step": 58224
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0775871276855469,
      "learning_rate": 2.7850837311721236e-06,
      "loss": 2.4289,
      "step": 58225
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1564089059829712,
      "learning_rate": 2.7847986412193253e-06,
      "loss": 2.2242,
      "step": 58226
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0508766174316406,
      "learning_rate": 2.7845135634982544e-06,
      "loss": 2.3974,
      "step": 58227
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0176506042480469,
      "learning_rate": 2.7842284980093913e-06,
      "loss": 2.2555,
      "step": 58228
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1125138998031616,
      "learning_rate": 2.7839434447532234e-06,
      "loss": 2.2067,
      "step": 58229
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1119266748428345,
      "learning_rate": 2.7836584037302272e-06,
      "loss": 2.1812,
      "step": 58230
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.113800048828125,
      "learning_rate": 2.783373374940893e-06,
      "loss": 2.4026,
      "step": 58231
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0963343381881714,
      "learning_rate": 2.7830883583856994e-06,
      "loss": 2.3103,
      "step": 58232
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0834450721740723,
      "learning_rate": 2.7828033540651334e-06,
      "loss": 2.3618,
      "step": 58233
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1774859428405762,
      "learning_rate": 2.7825183619796724e-06,
      "loss": 2.237,
      "step": 58234
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0646185874938965,
      "learning_rate": 2.7822333821298065e-06,
      "loss": 2.3703,
      "step": 58235
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.040199637413025,
      "learning_rate": 2.7819484145160113e-06,
      "loss": 2.2348,
      "step": 58236
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1238526105880737,
      "learning_rate": 2.7816634591387814e-06,
      "loss": 2.4231,
      "step": 58237
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0582774877548218,
      "learning_rate": 2.781378515998586e-06,
      "loss": 2.1619,
      "step": 58238
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1728768348693848,
      "learning_rate": 2.7810935850959174e-06,
      "loss": 2.4043,
      "step": 58239
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.9957057237625122,
      "learning_rate": 2.780808666431253e-06,
      "loss": 2.4498,
      "step": 58240
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1592576503753662,
      "learning_rate": 2.7805237600050806e-06,
      "loss": 2.0913,
      "step": 58241
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0426082611083984,
      "learning_rate": 2.7802388658178793e-06,
      "loss": 2.1463,
      "step": 58242
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1932100057601929,
      "learning_rate": 2.7799539838701363e-06,
      "loss": 2.2521,
      "step": 58243
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.124265432357788,
      "learning_rate": 2.77966911416233e-06,
      "loss": 2.0713,
      "step": 58244
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0193630456924438,
      "learning_rate": 2.779384256694948e-06,
      "loss": 2.4756,
      "step": 58245
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1983168125152588,
      "learning_rate": 2.779099411468471e-06,
      "loss": 2.2476,
      "step": 58246
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.2741605043411255,
      "learning_rate": 2.7788145784833785e-06,
      "loss": 2.3458,
      "step": 58247
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1091465950012207,
      "learning_rate": 2.7785297577401603e-06,
      "loss": 2.4643,
      "step": 58248
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0780264139175415,
      "learning_rate": 2.7782449492392917e-06,
      "loss": 2.1351,
      "step": 58249
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.045493483543396,
      "learning_rate": 2.7779601529812616e-06,
      "loss": 2.0325,
      "step": 58250
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.014845371246338,
      "learning_rate": 2.7776753689665524e-06,
      "loss": 2.1893,
      "step": 58251
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.040421724319458,
      "learning_rate": 2.777390597195644e-06,
      "loss": 2.586,
      "step": 58252
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1136481761932373,
      "learning_rate": 2.7771058376690162e-06,
      "loss": 2.334,
      "step": 58253
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0687620639801025,
      "learning_rate": 2.7768210903871604e-06,
      "loss": 2.1313,
      "step": 58254
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.112852692604065,
      "learning_rate": 2.776536355350551e-06,
      "loss": 2.4617,
      "step": 58255
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.195055365562439,
      "learning_rate": 2.7762516325596765e-06,
      "loss": 2.2056,
      "step": 58256
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1235498189926147,
      "learning_rate": 2.775966922015015e-06,
      "loss": 2.0762,
      "step": 58257
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0690821409225464,
      "learning_rate": 2.7756822237170557e-06,
      "loss": 2.1036,
      "step": 58258
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0212328433990479,
      "learning_rate": 2.7753975376662736e-06,
      "loss": 2.2828,
      "step": 58259
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0280144214630127,
      "learning_rate": 2.7751128638631563e-06,
      "loss": 2.2974,
      "step": 58260
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.146492600440979,
      "learning_rate": 2.774828202308184e-06,
      "loss": 2.4219,
      "step": 58261
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.251266360282898,
      "learning_rate": 2.7745435530018416e-06,
      "loss": 2.3071,
      "step": 58262
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0720062255859375,
      "learning_rate": 2.774258915944611e-06,
      "loss": 2.1694,
      "step": 58263
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1988804340362549,
      "learning_rate": 2.773974291136975e-06,
      "loss": 2.5752,
      "step": 58264
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0882662534713745,
      "learning_rate": 2.773689678579411e-06,
      "loss": 2.3202,
      "step": 58265
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.991462230682373,
      "learning_rate": 2.7734050782724088e-06,
      "loss": 2.1335,
      "step": 58266
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0194271802902222,
      "learning_rate": 2.773120490216444e-06,
      "loss": 2.5775,
      "step": 58267
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0537687540054321,
      "learning_rate": 2.7728359144120066e-06,
      "loss": 2.182,
      "step": 58268
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0317648649215698,
      "learning_rate": 2.7725513508595727e-06,
      "loss": 2.2659,
      "step": 58269
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.297866940498352,
      "learning_rate": 2.772266799559629e-06,
      "loss": 2.2747,
      "step": 58270
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0397591590881348,
      "learning_rate": 2.7719822605126535e-06,
      "loss": 2.3257,
      "step": 58271
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1197333335876465,
      "learning_rate": 2.771697733719134e-06,
      "loss": 2.7251,
      "step": 58272
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.2909338474273682,
      "learning_rate": 2.7714132191795507e-06,
      "loss": 2.4836,
      "step": 58273
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1581343412399292,
      "learning_rate": 2.7711287168943825e-06,
      "loss": 2.3118,
      "step": 58274
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0230913162231445,
      "learning_rate": 2.7708442268641167e-06,
      "loss": 2.1201,
      "step": 58275
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.9902085065841675,
      "learning_rate": 2.77055974908923e-06,
      "loss": 2.0295,
      "step": 58276
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1304762363433838,
      "learning_rate": 2.7702752835702118e-06,
      "loss": 2.2545,
      "step": 58277
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.021351933479309,
      "learning_rate": 2.769990830307541e-06,
      "loss": 2.3368,
      "step": 58278
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0832282304763794,
      "learning_rate": 2.769706389301695e-06,
      "loss": 2.0093,
      "step": 58279
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0536364316940308,
      "learning_rate": 2.7694219605531648e-06,
      "loss": 2.3273,
      "step": 58280
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.355143427848816,
      "learning_rate": 2.769137544062427e-06,
      "loss": 2.3703,
      "step": 58281
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1519510746002197,
      "learning_rate": 2.768853139829962e-06,
      "loss": 2.3585,
      "step": 58282
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.097100853919983,
      "learning_rate": 2.7685687478562583e-06,
      "loss": 2.3825,
      "step": 58283
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5248218774795532,
      "learning_rate": 2.7682843681417915e-06,
      "loss": 2.0781,
      "step": 58284
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1337058544158936,
      "learning_rate": 2.76800000068705e-06,
      "loss": 2.1455,
      "step": 58285
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1073006391525269,
      "learning_rate": 2.7677156454925104e-06,
      "loss": 2.397,
      "step": 58286
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0232276916503906,
      "learning_rate": 2.7674313025586587e-06,
      "loss": 2.2589,
      "step": 58287
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1429095268249512,
      "learning_rate": 2.7671469718859736e-06,
      "loss": 2.4013,
      "step": 58288
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0976811647415161,
      "learning_rate": 2.76686265347494e-06,
      "loss": 2.458,
      "step": 58289
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.064164161682129,
      "learning_rate": 2.7665783473260397e-06,
      "loss": 2.577,
      "step": 58290
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.105347990989685,
      "learning_rate": 2.766294053439754e-06,
      "loss": 2.4319,
      "step": 58291
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1352512836456299,
      "learning_rate": 2.766009771816561e-06,
      "loss": 2.3326,
      "step": 58292
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.015241265296936,
      "learning_rate": 2.7657255024569497e-06,
      "loss": 2.4542,
      "step": 58293
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1307405233383179,
      "learning_rate": 2.7654412453613945e-06,
      "loss": 2.1675,
      "step": 58294
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1139357089996338,
      "learning_rate": 2.765157000530385e-06,
      "loss": 2.3861,
      "step": 58295
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0466347932815552,
      "learning_rate": 2.764872767964396e-06,
      "loss": 2.2572,
      "step": 58296
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.068745493888855,
      "learning_rate": 2.7645885476639165e-06,
      "loss": 2.2151,
      "step": 58297
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1746766567230225,
      "learning_rate": 2.764304339629421e-06,
      "loss": 2.2938,
      "step": 58298
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1816458702087402,
      "learning_rate": 2.764020143861398e-06,
      "loss": 2.4363,
      "step": 58299
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1175668239593506,
      "learning_rate": 2.763735960360323e-06,
      "loss": 2.3073,
      "step": 58300
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0401753187179565,
      "learning_rate": 2.7634517891266833e-06,
      "loss": 2.1764,
      "step": 58301
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1300437450408936,
      "learning_rate": 2.763167630160959e-06,
      "loss": 2.2613,
      "step": 58302
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1600069999694824,
      "learning_rate": 2.762883483463631e-06,
      "loss": 2.3042,
      "step": 58303
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.02227783203125,
      "learning_rate": 2.762599349035178e-06,
      "loss": 2.3099,
      "step": 58304
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0452507734298706,
      "learning_rate": 2.7623152268760868e-06,
      "loss": 2.2502,
      "step": 58305
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0336276292800903,
      "learning_rate": 2.762031116986835e-06,
      "loss": 2.4566,
      "step": 58306
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.9743462800979614,
      "learning_rate": 2.7617470193679098e-06,
      "loss": 2.26,
      "step": 58307
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0442465543746948,
      "learning_rate": 2.7614629340197886e-06,
      "loss": 2.2496,
      "step": 58308
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0880614519119263,
      "learning_rate": 2.76117886094295e-06,
      "loss": 2.496,
      "step": 58309
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0746006965637207,
      "learning_rate": 2.7608948001378833e-06,
      "loss": 2.3741,
      "step": 58310
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.056342363357544,
      "learning_rate": 2.7606107516050628e-06,
      "loss": 2.1573,
      "step": 58311
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5346593856811523,
      "learning_rate": 2.7603267153449754e-06,
      "loss": 2.0965,
      "step": 58312
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0968296527862549,
      "learning_rate": 2.760042691358099e-06,
      "loss": 2.3601,
      "step": 58313
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0428546667099,
      "learning_rate": 2.759758679644918e-06,
      "loss": 2.2027,
      "step": 58314
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.212300419807434,
      "learning_rate": 2.7594746802059104e-06,
      "loss": 2.275,
      "step": 58315
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0298727750778198,
      "learning_rate": 2.759190693041566e-06,
      "loss": 2.127,
      "step": 58316
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0755482912063599,
      "learning_rate": 2.7589067181523533e-06,
      "loss": 2.3099,
      "step": 58317
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.103804349899292,
      "learning_rate": 2.7586227555387636e-06,
      "loss": 2.1234,
      "step": 58318
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.2169932126998901,
      "learning_rate": 2.7583388052012728e-06,
      "loss": 2.0619,
      "step": 58319
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1443586349487305,
      "learning_rate": 2.7580548671403663e-06,
      "loss": 2.2475,
      "step": 58320
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.9998735189437866,
      "learning_rate": 2.7577709413565213e-06,
      "loss": 2.463,
      "step": 58321
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.027759075164795,
      "learning_rate": 2.757487027850224e-06,
      "loss": 2.6248,
      "step": 58322
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.032823920249939,
      "learning_rate": 2.757203126621951e-06,
      "loss": 2.1663,
      "step": 58323
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0582432746887207,
      "learning_rate": 2.756919237672189e-06,
      "loss": 2.3372,
      "step": 58324
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.049376368522644,
      "learning_rate": 2.756635361001412e-06,
      "loss": 2.4313,
      "step": 58325
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.032456874847412,
      "learning_rate": 2.756351496610109e-06,
      "loss": 2.3271,
      "step": 58326
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1596522331237793,
      "learning_rate": 2.7560676444987543e-06,
      "loss": 2.3193,
      "step": 58327
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0466066598892212,
      "learning_rate": 2.755783804667835e-06,
      "loss": 2.143,
      "step": 58328
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0376334190368652,
      "learning_rate": 2.7554999771178303e-06,
      "loss": 2.2321,
      "step": 58329
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0852452516555786,
      "learning_rate": 2.7552161618492212e-06,
      "loss": 2.2277,
      "step": 58330
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0366815328598022,
      "learning_rate": 2.7549323588624845e-06,
      "loss": 2.3227,
      "step": 58331
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1009113788604736,
      "learning_rate": 2.7546485681581083e-06,
      "loss": 2.2598,
      "step": 58332
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0677111148834229,
      "learning_rate": 2.754364789736568e-06,
      "loss": 2.4952,
      "step": 58333
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.125400185585022,
      "learning_rate": 2.7540810235983495e-06,
      "loss": 2.454,
      "step": 58334
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.9946938157081604,
      "learning_rate": 2.7537972697439295e-06,
      "loss": 2.2927,
      "step": 58335
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.237898588180542,
      "learning_rate": 2.7535135281737933e-06,
      "loss": 2.511,
      "step": 58336
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1847889423370361,
      "learning_rate": 2.7532297988884203e-06,
      "loss": 2.4379,
      "step": 58337
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1561559438705444,
      "learning_rate": 2.7529460818882882e-06,
      "loss": 2.5772,
      "step": 58338
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.2537987232208252,
      "learning_rate": 2.7526623771738837e-06,
      "loss": 2.555,
      "step": 58339
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.2142651081085205,
      "learning_rate": 2.752378684745681e-06,
      "loss": 2.3954,
      "step": 58340
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1328097581863403,
      "learning_rate": 2.7520950046041682e-06,
      "loss": 2.5177,
      "step": 58341
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0921308994293213,
      "learning_rate": 2.7518113367498233e-06,
      "loss": 2.3793,
      "step": 58342
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0473294258117676,
      "learning_rate": 2.751527681183127e-06,
      "loss": 2.1597,
      "step": 58343
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.2210203409194946,
      "learning_rate": 2.751244037904557e-06,
      "loss": 2.195,
      "step": 58344
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1042472124099731,
      "learning_rate": 2.750960406914599e-06,
      "loss": 2.3014,
      "step": 58345
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.277480959892273,
      "learning_rate": 2.7506767882137296e-06,
      "loss": 2.1113,
      "step": 58346
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1181694269180298,
      "learning_rate": 2.7503931818024355e-06,
      "loss": 2.2111,
      "step": 58347
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.10695219039917,
      "learning_rate": 2.75010958768119e-06,
      "loss": 2.3293,
      "step": 58348
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0117466449737549,
      "learning_rate": 2.749826005850481e-06,
      "loss": 2.5727,
      "step": 58349
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.2456713914871216,
      "learning_rate": 2.7495424363107835e-06,
      "loss": 2.1898,
      "step": 58350
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1511682271957397,
      "learning_rate": 2.749258879062583e-06,
      "loss": 2.3707,
      "step": 58351
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0011812448501587,
      "learning_rate": 2.7489753341063564e-06,
      "loss": 2.349,
      "step": 58352
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1032209396362305,
      "learning_rate": 2.7486918014425877e-06,
      "loss": 2.6102,
      "step": 58353
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.188202977180481,
      "learning_rate": 2.7484082810717527e-06,
      "loss": 2.4851,
      "step": 58354
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0238710641860962,
      "learning_rate": 2.7481247729943384e-06,
      "loss": 2.2295,
      "step": 58355
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.092753291130066,
      "learning_rate": 2.747841277210822e-06,
      "loss": 2.1594,
      "step": 58356
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.140864372253418,
      "learning_rate": 2.747557793721686e-06,
      "loss": 2.1407,
      "step": 58357
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1294069290161133,
      "learning_rate": 2.7472743225274044e-06,
      "loss": 2.3828,
      "step": 58358
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.7107380628585815,
      "learning_rate": 2.746990863628466e-06,
      "loss": 2.3375,
      "step": 58359
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.021969199180603,
      "learning_rate": 2.746707417025345e-06,
      "loss": 2.0735,
      "step": 58360
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1113842725753784,
      "learning_rate": 2.746423982718528e-06,
      "loss": 2.4863,
      "step": 58361
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0582619905471802,
      "learning_rate": 2.74614056070849e-06,
      "loss": 2.4977,
      "step": 58362
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.065616250038147,
      "learning_rate": 2.7458571509957156e-06,
      "loss": 2.4781,
      "step": 58363
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1251744031906128,
      "learning_rate": 2.7455737535806847e-06,
      "loss": 2.3043,
      "step": 58364
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1373205184936523,
      "learning_rate": 2.7452903684638734e-06,
      "loss": 2.1407,
      "step": 58365
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0899587869644165,
      "learning_rate": 2.745006995645767e-06,
      "loss": 2.4045,
      "step": 58366
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1617904901504517,
      "learning_rate": 2.7447236351268424e-06,
      "loss": 2.3292,
      "step": 58367
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.111428141593933,
      "learning_rate": 2.744440286907585e-06,
      "loss": 2.5017,
      "step": 58368
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0331013202667236,
      "learning_rate": 2.744156950988471e-06,
      "loss": 2.2281,
      "step": 58369
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1039459705352783,
      "learning_rate": 2.7438736273699817e-06,
      "loss": 2.2828,
      "step": 58370
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.041466474533081,
      "learning_rate": 2.7435903160525956e-06,
      "loss": 2.4296,
      "step": 58371
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.4563392400741577,
      "learning_rate": 2.743307017036796e-06,
      "loss": 2.2711,
      "step": 58372
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.078936219215393,
      "learning_rate": 2.74302373032306e-06,
      "loss": 2.3783,
      "step": 58373
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0681841373443604,
      "learning_rate": 2.742740455911872e-06,
      "loss": 2.4595,
      "step": 58374
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0963525772094727,
      "learning_rate": 2.742457193803708e-06,
      "loss": 2.3609,
      "step": 58375
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.126863718032837,
      "learning_rate": 2.7421739439990523e-06,
      "loss": 2.1867,
      "step": 58376
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1390364170074463,
      "learning_rate": 2.74189070649838e-06,
      "loss": 2.4332,
      "step": 58377
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.064537763595581,
      "learning_rate": 2.7416074813021774e-06,
      "loss": 2.0392,
      "step": 58378
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0330389738082886,
      "learning_rate": 2.741324268410919e-06,
      "loss": 2.2979,
      "step": 58379
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1200345754623413,
      "learning_rate": 2.74104106782509e-06,
      "loss": 2.321,
      "step": 58380
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.2206815481185913,
      "learning_rate": 2.740757879545165e-06,
      "loss": 2.2305,
      "step": 58381
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0841153860092163,
      "learning_rate": 2.740474703571633e-06,
      "loss": 2.4438,
      "step": 58382
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1716487407684326,
      "learning_rate": 2.7401915399049615e-06,
      "loss": 2.257,
      "step": 58383
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0975182056427002,
      "learning_rate": 2.73990838854564e-06,
      "loss": 2.4488,
      "step": 58384
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.2012161016464233,
      "learning_rate": 2.7396252494941434e-06,
      "loss": 2.318,
      "step": 58385
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.2357088327407837,
      "learning_rate": 2.7393421227509564e-06,
      "loss": 2.1398,
      "step": 58386
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1396790742874146,
      "learning_rate": 2.7390590083165537e-06,
      "loss": 2.1549,
      "step": 58387
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.218738079071045,
      "learning_rate": 2.738775906191421e-06,
      "loss": 2.197,
      "step": 58388
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0847448110580444,
      "learning_rate": 2.7384928163760317e-06,
      "loss": 2.3825,
      "step": 58389
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0979441404342651,
      "learning_rate": 2.738209738870873e-06,
      "loss": 2.2519,
      "step": 58390
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1062119007110596,
      "learning_rate": 2.7379266736764208e-06,
      "loss": 2.2299,
      "step": 58391
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1748141050338745,
      "learning_rate": 2.7376436207931524e-06,
      "loss": 2.1919,
      "step": 58392
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0714985132217407,
      "learning_rate": 2.737360580221553e-06,
      "loss": 2.2904,
      "step": 58393
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0872907638549805,
      "learning_rate": 2.7370775519620973e-06,
      "loss": 2.4263,
      "step": 58394
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1113932132720947,
      "learning_rate": 2.7367945360152714e-06,
      "loss": 2.2829,
      "step": 58395
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.064704179763794,
      "learning_rate": 2.7365115323815505e-06,
      "loss": 2.5854,
      "step": 58396
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.105884313583374,
      "learning_rate": 2.736228541061412e-06,
      "loss": 2.3081,
      "step": 58397
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1340159177780151,
      "learning_rate": 2.7359455620553433e-06,
      "loss": 2.1945,
      "step": 58398
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0294800996780396,
      "learning_rate": 2.7356625953638183e-06,
      "loss": 2.3157,
      "step": 58399
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0388188362121582,
      "learning_rate": 2.7353796409873155e-06,
      "loss": 2.3777,
      "step": 58400
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0340723991394043,
      "learning_rate": 2.7350966989263207e-06,
      "loss": 2.1885,
      "step": 58401
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.993031919002533,
      "learning_rate": 2.734813769181307e-06,
      "loss": 2.1966,
      "step": 58402
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0814701318740845,
      "learning_rate": 2.7345308517527603e-06,
      "loss": 2.3536,
      "step": 58403
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1250312328338623,
      "learning_rate": 2.734247946641154e-06,
      "loss": 2.3662,
      "step": 58404
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1707630157470703,
      "learning_rate": 2.733965053846973e-06,
      "loss": 2.3067,
      "step": 58405
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.9577657580375671,
      "learning_rate": 2.733682173370693e-06,
      "loss": 2.395,
      "step": 58406
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0648683309555054,
      "learning_rate": 2.733399305212797e-06,
      "loss": 2.2942,
      "step": 58407
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.2117730379104614,
      "learning_rate": 2.733116449373764e-06,
      "loss": 2.0923,
      "step": 58408
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1420577764511108,
      "learning_rate": 2.7328336058540715e-06,
      "loss": 2.6111,
      "step": 58409
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.08047354221344,
      "learning_rate": 2.732550774654197e-06,
      "loss": 2.1522,
      "step": 58410
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.058010458946228,
      "learning_rate": 2.7322679557746256e-06,
      "loss": 2.3477,
      "step": 58411
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.118006706237793,
      "learning_rate": 2.73198514921583e-06,
      "loss": 2.2143,
      "step": 58412
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1660031080245972,
      "learning_rate": 2.7317023549782984e-06,
      "loss": 2.3651,
      "step": 58413
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.2222718000411987,
      "learning_rate": 2.7314195730625017e-06,
      "loss": 2.3491,
      "step": 58414
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0465061664581299,
      "learning_rate": 2.7311368034689244e-06,
      "loss": 2.3641,
      "step": 58415
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0724323987960815,
      "learning_rate": 2.7308540461980426e-06,
      "loss": 2.3412,
      "step": 58416
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0625578165054321,
      "learning_rate": 2.730571301250341e-06,
      "loss": 2.3512,
      "step": 58417
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.077959656715393,
      "learning_rate": 2.7302885686262927e-06,
      "loss": 2.345,
      "step": 58418
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0703357458114624,
      "learning_rate": 2.7300058483263814e-06,
      "loss": 2.3028,
      "step": 58419
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1909054517745972,
      "learning_rate": 2.729723140351085e-06,
      "loss": 2.1035,
      "step": 58420
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0807158946990967,
      "learning_rate": 2.7294404447008794e-06,
      "loss": 2.2962,
      "step": 58421
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.9863808155059814,
      "learning_rate": 2.729157761376251e-06,
      "loss": 2.173,
      "step": 58422
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0706876516342163,
      "learning_rate": 2.728875090377674e-06,
      "loss": 2.414,
      "step": 58423
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0861496925354004,
      "learning_rate": 2.7285924317056256e-06,
      "loss": 2.5058,
      "step": 58424
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.073868751525879,
      "learning_rate": 2.7283097853605913e-06,
      "loss": 2.3847,
      "step": 58425
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0723849534988403,
      "learning_rate": 2.7280271513430457e-06,
      "loss": 2.2392,
      "step": 58426
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0528416633605957,
      "learning_rate": 2.7277445296534667e-06,
      "loss": 2.3652,
      "step": 58427
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1395893096923828,
      "learning_rate": 2.727461920292339e-06,
      "loss": 2.3934,
      "step": 58428
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0440614223480225,
      "learning_rate": 2.727179323260136e-06,
      "loss": 2.3789,
      "step": 58429
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1268644332885742,
      "learning_rate": 2.726896738557342e-06,
      "loss": 2.44,
      "step": 58430
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0442781448364258,
      "learning_rate": 2.72661416618443e-06,
      "loss": 2.2966,
      "step": 58431
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0324903726577759,
      "learning_rate": 2.7263316061418866e-06,
      "loss": 2.1102,
      "step": 58432
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1970241069793701,
      "learning_rate": 2.7260490584301823e-06,
      "loss": 2.1155,
      "step": 58433
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0421141386032104,
      "learning_rate": 2.7257665230498043e-06,
      "loss": 2.4519,
      "step": 58434
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1732276678085327,
      "learning_rate": 2.7254840000012273e-06,
      "loss": 2.3238,
      "step": 58435
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.15947687625885,
      "learning_rate": 2.72520148928493e-06,
      "loss": 2.0999,
      "step": 58436
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1138086318969727,
      "learning_rate": 2.7249189909013896e-06,
      "loss": 2.567,
      "step": 58437
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0567165613174438,
      "learning_rate": 2.7246365048510902e-06,
      "loss": 2.2416,
      "step": 58438
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0323734283447266,
      "learning_rate": 2.724354031134505e-06,
      "loss": 2.1294,
      "step": 58439
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0783658027648926,
      "learning_rate": 2.724071569752119e-06,
      "loss": 2.1439,
      "step": 58440
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1058861017227173,
      "learning_rate": 2.7237891207044053e-06,
      "loss": 2.1916,
      "step": 58441
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.9930459260940552,
      "learning_rate": 2.7235066839918477e-06,
      "loss": 2.3156,
      "step": 58442
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.2110854387283325,
      "learning_rate": 2.7232242596149193e-06,
      "loss": 2.3285,
      "step": 58443
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0076119899749756,
      "learning_rate": 2.7229418475741055e-06,
      "loss": 2.3101,
      "step": 58444
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0161265134811401,
      "learning_rate": 2.7226594478698776e-06,
      "loss": 2.3574,
      "step": 58445
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0610791444778442,
      "learning_rate": 2.722377060502723e-06,
      "loss": 2.2541,
      "step": 58446
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.993508517742157,
      "learning_rate": 2.7220946854731143e-06,
      "loss": 2.3008,
      "step": 58447
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.043825626373291,
      "learning_rate": 2.721812322781533e-06,
      "loss": 2.0041,
      "step": 58448
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0272334814071655,
      "learning_rate": 2.721529972428454e-06,
      "loss": 2.3641,
      "step": 58449
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0256822109222412,
      "learning_rate": 2.72124763441436e-06,
      "loss": 2.2615,
      "step": 58450
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.054172396659851,
      "learning_rate": 2.720965308739726e-06,
      "loss": 2.4598,
      "step": 58451
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1344488859176636,
      "learning_rate": 2.720682995405035e-06,
      "loss": 2.1219,
      "step": 58452
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0445486307144165,
      "learning_rate": 2.720400694410761e-06,
      "loss": 2.2723,
      "step": 58453
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.2847497463226318,
      "learning_rate": 2.7201184057573882e-06,
      "loss": 2.6371,
      "step": 58454
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0946637392044067,
      "learning_rate": 2.7198361294453914e-06,
      "loss": 2.2662,
      "step": 58455
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.131916880607605,
      "learning_rate": 2.7195538654752473e-06,
      "loss": 2.3006,
      "step": 58456
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1188316345214844,
      "learning_rate": 2.7192716138474385e-06,
      "loss": 2.1915,
      "step": 58457
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1000326871871948,
      "learning_rate": 2.7189893745624395e-06,
      "loss": 2.3687,
      "step": 58458
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.199277400970459,
      "learning_rate": 2.718707147620734e-06,
      "loss": 2.1471,
      "step": 58459
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.110724687576294,
      "learning_rate": 2.718424933022794e-06,
      "loss": 2.3602,
      "step": 58460
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1492739915847778,
      "learning_rate": 2.718142730769109e-06,
      "loss": 2.3289,
      "step": 58461
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.046331524848938,
      "learning_rate": 2.7178605408601423e-06,
      "loss": 2.3576,
      "step": 58462
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1065638065338135,
      "learning_rate": 2.7175783632963828e-06,
      "loss": 2.201,
      "step": 58463
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0614898204803467,
      "learning_rate": 2.717296198078303e-06,
      "loss": 2.3031,
      "step": 58464
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1732958555221558,
      "learning_rate": 2.7170140452063876e-06,
      "loss": 2.2927,
      "step": 58465
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0915054082870483,
      "learning_rate": 2.716731904681108e-06,
      "loss": 2.4631,
      "step": 58466
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.9908340573310852,
      "learning_rate": 2.7164497765029483e-06,
      "loss": 2.3652,
      "step": 58467
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1435980796813965,
      "learning_rate": 2.716167660672383e-06,
      "loss": 2.128,
      "step": 58468
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1364914178848267,
      "learning_rate": 2.7158855571898935e-06,
      "loss": 2.1477,
      "step": 58469
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.9990665912628174,
      "learning_rate": 2.7156034660559537e-06,
      "loss": 2.6041,
      "step": 58470
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.172305941581726,
      "learning_rate": 2.715321387271047e-06,
      "loss": 2.368,
      "step": 58471
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1053920984268188,
      "learning_rate": 2.7150393208356473e-06,
      "loss": 2.2298,
      "step": 58472
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.2609695196151733,
      "learning_rate": 2.7147572667502366e-06,
      "loss": 2.4691,
      "step": 58473
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0089850425720215,
      "learning_rate": 2.7144752250152905e-06,
      "loss": 2.2697,
      "step": 58474
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1121647357940674,
      "learning_rate": 2.7141931956312883e-06,
      "loss": 2.2665,
      "step": 58475
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.977874219417572,
      "learning_rate": 2.713911178598705e-06,
      "loss": 2.2985,
      "step": 58476
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.119957685470581,
      "learning_rate": 2.713629173918023e-06,
      "loss": 2.3485,
      "step": 58477
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0629905462265015,
      "learning_rate": 2.713347181589716e-06,
      "loss": 2.4189,
      "step": 58478
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.9981091618537903,
      "learning_rate": 2.7130652016142676e-06,
      "loss": 2.3429,
      "step": 58479
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1596601009368896,
      "learning_rate": 2.712783233992149e-06,
      "loss": 2.321,
      "step": 58480
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0646324157714844,
      "learning_rate": 2.712501278723846e-06,
      "loss": 2.4334,
      "step": 58481
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0064890384674072,
      "learning_rate": 2.7122193358098325e-06,
      "loss": 2.2885,
      "step": 58482
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0535697937011719,
      "learning_rate": 2.711937405250583e-06,
      "loss": 2.3942,
      "step": 58483
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0493274927139282,
      "learning_rate": 2.711655487046583e-06,
      "loss": 2.237,
      "step": 58484
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.117209792137146,
      "learning_rate": 2.711373581198302e-06,
      "loss": 2.2231,
      "step": 58485
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.9706131815910339,
      "learning_rate": 2.7110916877062266e-06,
      "loss": 2.4517,
      "step": 58486
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.144614815711975,
      "learning_rate": 2.71080980657083e-06,
      "loss": 2.3568,
      "step": 58487
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0477851629257202,
      "learning_rate": 2.710527937792591e-06,
      "loss": 2.299,
      "step": 58488
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0400985479354858,
      "learning_rate": 2.7102460813719835e-06,
      "loss": 2.4268,
      "step": 58489
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0089428424835205,
      "learning_rate": 2.7099642373094915e-06,
      "loss": 2.2382,
      "step": 58490
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0925769805908203,
      "learning_rate": 2.709682405605587e-06,
      "loss": 2.1282,
      "step": 58491
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.2907564640045166,
      "learning_rate": 2.709400586260754e-06,
      "loss": 2.3699,
      "step": 58492
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0346025228500366,
      "learning_rate": 2.7091187792754637e-06,
      "loss": 2.3861,
      "step": 58493
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0285429954528809,
      "learning_rate": 2.7088369846502004e-06,
      "loss": 2.283,
      "step": 58494
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0487747192382812,
      "learning_rate": 2.7085552023854365e-06,
      "loss": 2.1766,
      "step": 58495
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.21953547000885,
      "learning_rate": 2.708273432481654e-06,
      "loss": 2.2573,
      "step": 58496
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1168495416641235,
      "learning_rate": 2.7079916749393263e-06,
      "loss": 2.4093,
      "step": 58497
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.2075490951538086,
      "learning_rate": 2.707709929758936e-06,
      "loss": 2.3316,
      "step": 58498
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.156378984451294,
      "learning_rate": 2.7074281969409544e-06,
      "loss": 2.3555,
      "step": 58499
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.9957389235496521,
      "learning_rate": 2.7071464764858656e-06,
      "loss": 2.2716,
      "step": 58500
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1076545715332031,
      "learning_rate": 2.706864768394144e-06,
      "loss": 2.1191,
      "step": 58501
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.006859540939331,
      "learning_rate": 2.7065830726662677e-06,
      "loss": 2.4364,
      "step": 58502
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.2096309661865234,
      "learning_rate": 2.706301389302711e-06,
      "loss": 2.1481,
      "step": 58503
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.190702199935913,
      "learning_rate": 2.7060197183039584e-06,
      "loss": 2.1864,
      "step": 58504
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.023288607597351,
      "learning_rate": 2.70573805967048e-06,
      "loss": 2.1851,
      "step": 58505
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.041068434715271,
      "learning_rate": 2.705456413402758e-06,
      "loss": 2.3575,
      "step": 58506
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.072135090827942,
      "learning_rate": 2.7051747795012674e-06,
      "loss": 2.5614,
      "step": 58507
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0405945777893066,
      "learning_rate": 2.7048931579664894e-06,
      "loss": 2.323,
      "step": 58508
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0775383710861206,
      "learning_rate": 2.704611548798899e-06,
      "loss": 2.4218,
      "step": 58509
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0159835815429688,
      "learning_rate": 2.7043299519989695e-06,
      "loss": 2.3286,
      "step": 58510
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0077048540115356,
      "learning_rate": 2.7040483675671856e-06,
      "loss": 2.3264,
      "step": 58511
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.077975869178772,
      "learning_rate": 2.7037667955040183e-06,
      "loss": 2.4729,
      "step": 58512
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0897977352142334,
      "learning_rate": 2.7034852358099506e-06,
      "loss": 2.5281,
      "step": 58513
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1642861366271973,
      "learning_rate": 2.7032036884854583e-06,
      "loss": 2.1554,
      "step": 58514
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0405205488204956,
      "learning_rate": 2.7029221535310136e-06,
      "loss": 2.5768,
      "step": 58515
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1496464014053345,
      "learning_rate": 2.702640630947101e-06,
      "loss": 2.4041,
      "step": 58516
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0837568044662476,
      "learning_rate": 2.702359120734194e-06,
      "loss": 2.1118,
      "step": 58517
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.2370811700820923,
      "learning_rate": 2.702077622892768e-06,
      "loss": 2.1703,
      "step": 58518
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1668899059295654,
      "learning_rate": 2.7017961374233047e-06,
      "loss": 2.188,
      "step": 58519
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0179073810577393,
      "learning_rate": 2.701514664326277e-06,
      "loss": 2.3896,
      "step": 58520
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0673410892486572,
      "learning_rate": 2.701233203602166e-06,
      "loss": 2.1552,
      "step": 58521
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.9831364750862122,
      "learning_rate": 2.700951755251445e-06,
      "loss": 2.273,
      "step": 58522
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.03347647190094,
      "learning_rate": 2.7006703192745965e-06,
      "loss": 2.1782,
      "step": 58523
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1370030641555786,
      "learning_rate": 2.7003888956720902e-06,
      "loss": 2.248,
      "step": 58524
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.9664702415466309,
      "learning_rate": 2.700107484444411e-06,
      "loss": 2.2921,
      "step": 58525
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0030848979949951,
      "learning_rate": 2.699826085592029e-06,
      "loss": 2.3407,
      "step": 58526
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.9599714279174805,
      "learning_rate": 2.6995446991154305e-06,
      "loss": 2.3912,
      "step": 58527
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1964466571807861,
      "learning_rate": 2.6992633250150814e-06,
      "loss": 2.3017,
      "step": 58528
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1620420217514038,
      "learning_rate": 2.6989819632914647e-06,
      "loss": 2.4327,
      "step": 58529
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.066205620765686,
      "learning_rate": 2.6987006139450555e-06,
      "loss": 2.4693,
      "step": 58530
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0899639129638672,
      "learning_rate": 2.698419276976334e-06,
      "loss": 2.4491,
      "step": 58531
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.11933434009552,
      "learning_rate": 2.698137952385772e-06,
      "loss": 2.2828,
      "step": 58532
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0202864408493042,
      "learning_rate": 2.6978566401738525e-06,
      "loss": 2.2092,
      "step": 58533
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0403404235839844,
      "learning_rate": 2.6975753403410452e-06,
      "loss": 2.474,
      "step": 58534
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0627551078796387,
      "learning_rate": 2.6972940528878354e-06,
      "loss": 2.4053,
      "step": 58535
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1628338098526,
      "learning_rate": 2.6970127778146914e-06,
      "loss": 2.4461,
      "step": 58536
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1790379285812378,
      "learning_rate": 2.6967315151220986e-06,
      "loss": 2.1402,
      "step": 58537
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1367087364196777,
      "learning_rate": 2.6964502648105283e-06,
      "loss": 2.1352,
      "step": 58538
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.202359676361084,
      "learning_rate": 2.6961690268804554e-06,
      "loss": 2.2825,
      "step": 58539
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0439629554748535,
      "learning_rate": 2.695887801332363e-06,
      "loss": 2.381,
      "step": 58540
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0322095155715942,
      "learning_rate": 2.6956065881667247e-06,
      "loss": 2.3322,
      "step": 58541
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0701162815093994,
      "learning_rate": 2.6953253873840135e-06,
      "loss": 2.0406,
      "step": 58542
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0972542762756348,
      "learning_rate": 2.695044198984713e-06,
      "loss": 2.5224,
      "step": 58543
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0273258686065674,
      "learning_rate": 2.694763022969298e-06,
      "loss": 2.3875,
      "step": 58544
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0386428833007812,
      "learning_rate": 2.694481859338238e-06,
      "loss": 2.2769,
      "step": 58545
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0541775226593018,
      "learning_rate": 2.694200708092021e-06,
      "loss": 2.2474,
      "step": 58546
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.9859378933906555,
      "learning_rate": 2.6939195692311137e-06,
      "loss": 2.4026,
      "step": 58547
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1198257207870483,
      "learning_rate": 2.6936384427560004e-06,
      "loss": 2.2601,
      "step": 58548
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0848138332366943,
      "learning_rate": 2.6933573286671512e-06,
      "loss": 2.3707,
      "step": 58549
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1194908618927002,
      "learning_rate": 2.693076226965048e-06,
      "loss": 2.3537,
      "step": 58550
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1483159065246582,
      "learning_rate": 2.6927951376501638e-06,
      "loss": 2.2776,
      "step": 58551
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0731432437896729,
      "learning_rate": 2.6925140607229784e-06,
      "loss": 2.3891,
      "step": 58552
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0356519222259521,
      "learning_rate": 2.6922329961839655e-06,
      "loss": 2.0784,
      "step": 58553
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1855249404907227,
      "learning_rate": 2.691951944033604e-06,
      "loss": 2.3847,
      "step": 58554
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.9748027324676514,
      "learning_rate": 2.6916709042723655e-06,
      "loss": 2.4068,
      "step": 58555
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.142160415649414,
      "learning_rate": 2.6913898769007318e-06,
      "loss": 2.3171,
      "step": 58556
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.9779596924781799,
      "learning_rate": 2.691108861919175e-06,
      "loss": 2.3287,
      "step": 58557
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.6616538763046265,
      "learning_rate": 2.6908278593281766e-06,
      "loss": 2.4267,
      "step": 58558
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.2111271619796753,
      "learning_rate": 2.6905468691282077e-06,
      "loss": 2.265,
      "step": 58559
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.2011959552764893,
      "learning_rate": 2.690265891319749e-06,
      "loss": 2.5655,
      "step": 58560
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0975233316421509,
      "learning_rate": 2.689984925903273e-06,
      "loss": 2.3361,
      "step": 58561
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.2947710752487183,
      "learning_rate": 2.6897039728792594e-06,
      "loss": 2.1222,
      "step": 58562
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0830645561218262,
      "learning_rate": 2.6894230322481817e-06,
      "loss": 2.2167,
      "step": 58563
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0235154628753662,
      "learning_rate": 2.6891421040105194e-06,
      "loss": 2.0276,
      "step": 58564
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.141608476638794,
      "learning_rate": 2.688861188166747e-06,
      "loss": 2.4231,
      "step": 58565
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1706013679504395,
      "learning_rate": 2.6885802847173416e-06,
      "loss": 2.2188,
      "step": 58566
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0164374113082886,
      "learning_rate": 2.6882993936627745e-06,
      "loss": 2.4217,
      "step": 58567
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0468497276306152,
      "learning_rate": 2.6880185150035286e-06,
      "loss": 2.4419,
      "step": 58568
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.051047444343567,
      "learning_rate": 2.687737648740074e-06,
      "loss": 2.5289,
      "step": 58569
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1638048887252808,
      "learning_rate": 2.6874567948728936e-06,
      "loss": 2.3558,
      "step": 58570
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0139126777648926,
      "learning_rate": 2.6871759534024565e-06,
      "loss": 2.2461,
      "step": 58571
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.229386568069458,
      "learning_rate": 2.686895124329246e-06,
      "loss": 2.3728,
      "step": 58572
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0096733570098877,
      "learning_rate": 2.6866143076537343e-06,
      "loss": 2.2989,
      "step": 58573
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0019655227661133,
      "learning_rate": 2.686333503376395e-06,
      "loss": 2.2777,
      "step": 58574
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1979804039001465,
      "learning_rate": 2.6860527114977086e-06,
      "loss": 2.5267,
      "step": 58575
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1150189638137817,
      "learning_rate": 2.6857719320181473e-06,
      "loss": 2.3472,
      "step": 58576
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0953919887542725,
      "learning_rate": 2.685491164938192e-06,
      "loss": 2.4012,
      "step": 58577
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0847535133361816,
      "learning_rate": 2.685210410258312e-06,
      "loss": 2.1907,
      "step": 58578
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0447214841842651,
      "learning_rate": 2.6849296679789906e-06,
      "loss": 2.3309,
      "step": 58579
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.9881272315979004,
      "learning_rate": 2.6846489381007003e-06,
      "loss": 2.3795,
      "step": 58580
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1707804203033447,
      "learning_rate": 2.6843682206239164e-06,
      "loss": 2.3199,
      "step": 58581
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.084531307220459,
      "learning_rate": 2.6840875155491132e-06,
      "loss": 2.4631,
      "step": 58582
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.2173446416854858,
      "learning_rate": 2.68380682287677e-06,
      "loss": 2.0845,
      "step": 58583
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.016200304031372,
      "learning_rate": 2.68352614260736e-06,
      "loss": 2.2627,
      "step": 58584
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.2243053913116455,
      "learning_rate": 2.6832454747413626e-06,
      "loss": 2.4496,
      "step": 58585
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0411440134048462,
      "learning_rate": 2.682964819279248e-06,
      "loss": 2.332,
      "step": 58586
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1087297201156616,
      "learning_rate": 2.682684176221498e-06,
      "loss": 2.426,
      "step": 58587
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0679271221160889,
      "learning_rate": 2.6824035455685836e-06,
      "loss": 2.1454,
      "step": 58588
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0617955923080444,
      "learning_rate": 2.682122927320986e-06,
      "loss": 2.3148,
      "step": 58589
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.133707046508789,
      "learning_rate": 2.6818423214791733e-06,
      "loss": 2.3374,
      "step": 58590
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1075100898742676,
      "learning_rate": 2.681561728043629e-06,
      "loss": 2.2711,
      "step": 58591
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.968235433101654,
      "learning_rate": 2.6812811470148226e-06,
      "loss": 2.0598,
      "step": 58592
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1871668100357056,
      "learning_rate": 2.681000578393238e-06,
      "loss": 2.3443,
      "step": 58593
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.2392661571502686,
      "learning_rate": 2.6807200221793393e-06,
      "loss": 2.3722,
      "step": 58594
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0566977262496948,
      "learning_rate": 2.680439478373611e-06,
      "loss": 2.3352,
      "step": 58595
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.2160242795944214,
      "learning_rate": 2.680158946976522e-06,
      "loss": 2.153,
      "step": 58596
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.106600284576416,
      "learning_rate": 2.679878427988556e-06,
      "loss": 2.3663,
      "step": 58597
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.114889144897461,
      "learning_rate": 2.679597921410181e-06,
      "loss": 2.4133,
      "step": 58598
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1450157165527344,
      "learning_rate": 2.6793174272418776e-06,
      "loss": 2.3615,
      "step": 58599
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.061254858970642,
      "learning_rate": 2.67903694548412e-06,
      "loss": 2.2799,
      "step": 58600
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0471434593200684,
      "learning_rate": 2.678756476137381e-06,
      "loss": 2.253,
      "step": 58601
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.05679190158844,
      "learning_rate": 2.6784760192021395e-06,
      "loss": 2.1185,
      "step": 58602
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.2377458810806274,
      "learning_rate": 2.678195574678868e-06,
      "loss": 2.3409,
      "step": 58603
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0306907892227173,
      "learning_rate": 2.677915142568046e-06,
      "loss": 2.0613,
      "step": 58604
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1470614671707153,
      "learning_rate": 2.6776347228701438e-06,
      "loss": 2.4956,
      "step": 58605
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1128736734390259,
      "learning_rate": 2.6773543155856453e-06,
      "loss": 2.3936,
      "step": 58606
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0922881364822388,
      "learning_rate": 2.6770739207150144e-06,
      "loss": 2.15,
      "step": 58607
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1158288717269897,
      "learning_rate": 2.676793538258735e-06,
      "loss": 2.2365,
      "step": 58608
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.067160725593567,
      "learning_rate": 2.6765131682172763e-06,
      "loss": 2.3378,
      "step": 58609
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0979539155960083,
      "learning_rate": 2.6762328105911194e-06,
      "loss": 2.1685,
      "step": 58610
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1869421005249023,
      "learning_rate": 2.6759524653807347e-06,
      "loss": 2.4517,
      "step": 58611
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0396009683609009,
      "learning_rate": 2.675672132586602e-06,
      "loss": 2.2094,
      "step": 58612
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0291420221328735,
      "learning_rate": 2.6753918122091915e-06,
      "loss": 2.2891,
      "step": 58613
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.117183804512024,
      "learning_rate": 2.675111504248985e-06,
      "loss": 2.4102,
      "step": 58614
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.2324422597885132,
      "learning_rate": 2.67483120870645e-06,
      "loss": 2.3702,
      "step": 58615
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1118463277816772,
      "learning_rate": 2.674550925582069e-06,
      "loss": 2.2544,
      "step": 58616
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1391347646713257,
      "learning_rate": 2.674270654876311e-06,
      "loss": 2.2987,
      "step": 58617
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.116973876953125,
      "learning_rate": 2.6739903965896565e-06,
      "loss": 2.1515,
      "step": 58618
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0903682708740234,
      "learning_rate": 2.673710150722578e-06,
      "loss": 2.3985,
      "step": 58619
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0761140584945679,
      "learning_rate": 2.6734299172755505e-06,
      "loss": 2.4065,
      "step": 58620
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0253651142120361,
      "learning_rate": 2.6731496962490465e-06,
      "loss": 2.1128,
      "step": 58621
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.129763126373291,
      "learning_rate": 2.672869487643546e-06,
      "loss": 2.3934,
      "step": 58622
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.007719874382019,
      "learning_rate": 2.67258929145952e-06,
      "loss": 2.0636,
      "step": 58623
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1073850393295288,
      "learning_rate": 2.6723091076974473e-06,
      "loss": 2.3566,
      "step": 58624
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.9986432790756226,
      "learning_rate": 2.6720289363577977e-06,
      "loss": 2.3523,
      "step": 58625
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0045171976089478,
      "learning_rate": 2.671748777441052e-06,
      "loss": 2.1701,
      "step": 58626
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1693578958511353,
      "learning_rate": 2.6714686309476834e-06,
      "loss": 2.4601,
      "step": 58627
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1502883434295654,
      "learning_rate": 2.6711884968781632e-06,
      "loss": 2.2905,
      "step": 58628
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.3536378145217896,
      "learning_rate": 2.670908375232971e-06,
      "loss": 2.3935,
      "step": 58629
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.2135539054870605,
      "learning_rate": 2.670628266012577e-06,
      "loss": 2.4305,
      "step": 58630
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0642681121826172,
      "learning_rate": 2.6703481692174615e-06,
      "loss": 2.3841,
      "step": 58631
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0900789499282837,
      "learning_rate": 2.6700680848480976e-06,
      "loss": 2.2323,
      "step": 58632
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1867742538452148,
      "learning_rate": 2.6697880129049547e-06,
      "loss": 2.3535,
      "step": 58633
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.193708896636963,
      "learning_rate": 2.669507953388516e-06,
      "loss": 2.2536,
      "step": 58634
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0229288339614868,
      "learning_rate": 2.669227906299251e-06,
      "loss": 2.4263,
      "step": 58635
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1279383897781372,
      "learning_rate": 2.668947871637634e-06,
      "loss": 2.41,
      "step": 58636
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0977671146392822,
      "learning_rate": 2.668667849404144e-06,
      "loss": 2.3178,
      "step": 58637
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0612090826034546,
      "learning_rate": 2.66838783959925e-06,
      "loss": 2.3462,
      "step": 58638
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.1153006553649902,
      "learning_rate": 2.668107842223433e-06,
      "loss": 2.4325,
      "step": 58639
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0229403972625732,
      "learning_rate": 2.667827857277161e-06,
      "loss": 2.145,
      "step": 58640
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0071171522140503,
      "learning_rate": 2.667547884760916e-06,
      "loss": 2.1785,
      "step": 58641
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1033833026885986,
      "learning_rate": 2.6672679246751653e-06,
      "loss": 2.347,
      "step": 58642
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1100881099700928,
      "learning_rate": 2.6669879770203897e-06,
      "loss": 2.4618,
      "step": 58643
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0393855571746826,
      "learning_rate": 2.6667080417970593e-06,
      "loss": 2.2233,
      "step": 58644
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.236735463142395,
      "learning_rate": 2.6664281190056527e-06,
      "loss": 2.3972,
      "step": 58645
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.2127068042755127,
      "learning_rate": 2.6661482086466416e-06,
      "loss": 2.2464,
      "step": 58646
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0864700078964233,
      "learning_rate": 2.665868310720502e-06,
      "loss": 2.3402,
      "step": 58647
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.3305567502975464,
      "learning_rate": 2.6655884252277043e-06,
      "loss": 2.1611,
      "step": 58648
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.2156078815460205,
      "learning_rate": 2.6653085521687296e-06,
      "loss": 2.2787,
      "step": 58649
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.02408766746521,
      "learning_rate": 2.665028691544045e-06,
      "loss": 2.2639,
      "step": 58650
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0796955823898315,
      "learning_rate": 2.6647488433541325e-06,
      "loss": 2.2743,
      "step": 58651
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0825344324111938,
      "learning_rate": 2.6644690075994604e-06,
      "loss": 2.3588,
      "step": 58652
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0691255331039429,
      "learning_rate": 2.664189184280508e-06,
      "loss": 2.3213,
      "step": 58653
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0100510120391846,
      "learning_rate": 2.663909373397745e-06,
      "loss": 2.3072,
      "step": 58654
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0484532117843628,
      "learning_rate": 2.6636295749516505e-06,
      "loss": 2.4294,
      "step": 58655
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.2067598104476929,
      "learning_rate": 2.663349788942696e-06,
      "loss": 2.3734,
      "step": 58656
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0301593542099,
      "learning_rate": 2.663070015371354e-06,
      "loss": 2.4541,
      "step": 58657
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1937137842178345,
      "learning_rate": 2.6627902542381032e-06,
      "loss": 2.4532,
      "step": 58658
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.9905751943588257,
      "learning_rate": 2.6625105055434163e-06,
      "loss": 2.2175,
      "step": 58659
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0143448114395142,
      "learning_rate": 2.6622307692877636e-06,
      "loss": 2.1888,
      "step": 58660
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.090875267982483,
      "learning_rate": 2.6619510454716257e-06,
      "loss": 2.3774,
      "step": 58661
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0166246891021729,
      "learning_rate": 2.661671334095475e-06,
      "loss": 2.4681,
      "step": 58662
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1658446788787842,
      "learning_rate": 2.6613916351597804e-06,
      "loss": 2.1164,
      "step": 58663
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0888738632202148,
      "learning_rate": 2.661111948665023e-06,
      "loss": 2.5294,
      "step": 58664
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1535141468048096,
      "learning_rate": 2.660832274611671e-06,
      "loss": 2.4811,
      "step": 58665
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.2006460428237915,
      "learning_rate": 2.6605526130002045e-06,
      "loss": 2.1254,
      "step": 58666
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.205228328704834,
      "learning_rate": 2.660272963831092e-06,
      "loss": 2.5526,
      "step": 58667
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.9886807799339294,
      "learning_rate": 2.6599933271048127e-06,
      "loss": 2.5716,
      "step": 58668
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.9661154747009277,
      "learning_rate": 2.6597137028218355e-06,
      "loss": 2.4034,
      "step": 58669
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0366495847702026,
      "learning_rate": 2.659434090982641e-06,
      "loss": 2.2783,
      "step": 58670
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.2333407402038574,
      "learning_rate": 2.6591544915876953e-06,
      "loss": 2.4337,
      "step": 58671
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1037869453430176,
      "learning_rate": 2.658874904637483e-06,
      "loss": 2.3241,
      "step": 58672
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0720552206039429,
      "learning_rate": 2.6585953301324663e-06,
      "loss": 2.19,
      "step": 58673
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0900501012802124,
      "learning_rate": 2.6583157680731265e-06,
      "loss": 2.1131,
      "step": 58674
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.9837236404418945,
      "learning_rate": 2.658036218459933e-06,
      "loss": 2.3646,
      "step": 58675
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1517149209976196,
      "learning_rate": 2.657756681293364e-06,
      "loss": 2.0664,
      "step": 58676
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0682663917541504,
      "learning_rate": 2.6574771565738886e-06,
      "loss": 2.2033,
      "step": 58677
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.006320834159851,
      "learning_rate": 2.6571976443019877e-06,
      "loss": 2.3735,
      "step": 58678
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.3814215660095215,
      "learning_rate": 2.6569181444781277e-06,
      "loss": 2.3808,
      "step": 58679
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1357059478759766,
      "learning_rate": 2.656638657102789e-06,
      "loss": 2.3261,
      "step": 58680
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0147079229354858,
      "learning_rate": 2.656359182176439e-06,
      "loss": 2.2452,
      "step": 58681
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0376553535461426,
      "learning_rate": 2.656079719699558e-06,
      "loss": 2.4797,
      "step": 58682
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.118530511856079,
      "learning_rate": 2.655800269672616e-06,
      "loss": 2.3705,
      "step": 58683
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0335108041763306,
      "learning_rate": 2.6555208320960845e-06,
      "loss": 2.2654,
      "step": 58684
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0133616924285889,
      "learning_rate": 2.6552414069704423e-06,
      "loss": 2.1486,
      "step": 58685
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.2848739624023438,
      "learning_rate": 2.6549619942961612e-06,
      "loss": 2.1263,
      "step": 58686
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1626713275909424,
      "learning_rate": 2.654682594073712e-06,
      "loss": 2.2367,
      "step": 58687
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1109123229980469,
      "learning_rate": 2.654403206303573e-06,
      "loss": 2.3034,
      "step": 58688
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1203274726867676,
      "learning_rate": 2.6541238309862127e-06,
      "loss": 2.1998,
      "step": 58689
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.151114821434021,
      "learning_rate": 2.653844468122111e-06,
      "loss": 2.3046,
      "step": 58690
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0998880863189697,
      "learning_rate": 2.653565117711737e-06,
      "loss": 2.2429,
      "step": 58691
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0168131589889526,
      "learning_rate": 2.6532857797555634e-06,
      "loss": 2.1363,
      "step": 58692
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0885621309280396,
      "learning_rate": 2.653006454254069e-06,
      "loss": 2.2734,
      "step": 58693
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.2075116634368896,
      "learning_rate": 2.6527271412077206e-06,
      "loss": 2.2646,
      "step": 58694
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.9777252078056335,
      "learning_rate": 2.652447840616997e-06,
      "loss": 2.3309,
      "step": 58695
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.126978874206543,
      "learning_rate": 2.6521685524823692e-06,
      "loss": 2.3822,
      "step": 58696
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.188274621963501,
      "learning_rate": 2.6518892768043124e-06,
      "loss": 2.2958,
      "step": 58697
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.013901710510254,
      "learning_rate": 2.6516100135833e-06,
      "loss": 2.2134,
      "step": 58698
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1936975717544556,
      "learning_rate": 2.651330762819805e-06,
      "loss": 2.4009,
      "step": 58699
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1102838516235352,
      "learning_rate": 2.6510515245142966e-06,
      "loss": 2.3724,
      "step": 58700
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.090505838394165,
      "learning_rate": 2.6507722986672546e-06,
      "loss": 2.3504,
      "step": 58701
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1547735929489136,
      "learning_rate": 2.6504930852791466e-06,
      "loss": 2.2519,
      "step": 58702
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0244548320770264,
      "learning_rate": 2.650213884350451e-06,
      "loss": 2.4381,
      "step": 58703
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0764998197555542,
      "learning_rate": 2.6499346958816376e-06,
      "loss": 2.1496,
      "step": 58704
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1083967685699463,
      "learning_rate": 2.6496555198731833e-06,
      "loss": 2.4061,
      "step": 58705
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.9636155962944031,
      "learning_rate": 2.649376356325557e-06,
      "loss": 2.3888,
      "step": 58706
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.154475212097168,
      "learning_rate": 2.649097205239236e-06,
      "loss": 2.3265,
      "step": 58707
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.9705052375793457,
      "learning_rate": 2.64881806661469e-06,
      "loss": 2.3145,
      "step": 58708
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0515021085739136,
      "learning_rate": 2.6485389404523964e-06,
      "loss": 2.1398,
      "step": 58709
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.2357430458068848,
      "learning_rate": 2.6482598267528224e-06,
      "loss": 2.2473,
      "step": 58710
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.024620771408081,
      "learning_rate": 2.6479807255164514e-06,
      "loss": 2.2218,
      "step": 58711
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.3060963153839111,
      "learning_rate": 2.6477016367437437e-06,
      "loss": 2.3065,
      "step": 58712
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.9773876667022705,
      "learning_rate": 2.647422560435181e-06,
      "loss": 2.2252,
      "step": 58713
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.2388660907745361,
      "learning_rate": 2.6471434965912314e-06,
      "loss": 2.3589,
      "step": 58714
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0250568389892578,
      "learning_rate": 2.6468644452123737e-06,
      "loss": 2.4641,
      "step": 58715
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0493890047073364,
      "learning_rate": 2.6465854062990746e-06,
      "loss": 2.2758,
      "step": 58716
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1025028228759766,
      "learning_rate": 2.646306379851814e-06,
      "loss": 2.2587,
      "step": 58717
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.07673180103302,
      "learning_rate": 2.646027365871061e-06,
      "loss": 2.2154,
      "step": 58718
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.9969629049301147,
      "learning_rate": 2.6457483643572857e-06,
      "loss": 2.3828,
      "step": 58719
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0663295984268188,
      "learning_rate": 2.6454693753109673e-06,
      "loss": 2.2429,
      "step": 58720
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0636615753173828,
      "learning_rate": 2.645190398732573e-06,
      "loss": 2.5344,
      "step": 58721
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.055997610092163,
      "learning_rate": 2.644911434622581e-06,
      "loss": 2.1589,
      "step": 58722
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.078758955001831,
      "learning_rate": 2.6446324829814596e-06,
      "loss": 2.4251,
      "step": 58723
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1236894130706787,
      "learning_rate": 2.6443535438096866e-06,
      "loss": 2.4079,
      "step": 58724
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0860519409179688,
      "learning_rate": 2.6440746171077315e-06,
      "loss": 2.3244,
      "step": 58725
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1783021688461304,
      "learning_rate": 2.6437957028760687e-06,
      "loss": 2.2718,
      "step": 58726
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0388367176055908,
      "learning_rate": 2.6435168011151656e-06,
      "loss": 2.2294,
      "step": 58727
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0853092670440674,
      "learning_rate": 2.6432379118255034e-06,
      "loss": 2.3703,
      "step": 58728
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0988826751708984,
      "learning_rate": 2.6429590350075484e-06,
      "loss": 2.2498,
      "step": 58729
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.2484254837036133,
      "learning_rate": 2.6426801706617788e-06,
      "loss": 2.414,
      "step": 58730
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1626954078674316,
      "learning_rate": 2.642401318788662e-06,
      "loss": 2.1526,
      "step": 58731
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.9797945618629456,
      "learning_rate": 2.642122479388676e-06,
      "loss": 2.406,
      "step": 58732
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.070850133895874,
      "learning_rate": 2.641843652462287e-06,
      "loss": 2.3654,
      "step": 58733
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.017169713973999,
      "learning_rate": 2.6415648380099746e-06,
      "loss": 2.2385,
      "step": 58734
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0457470417022705,
      "learning_rate": 2.6412860360322057e-06,
      "loss": 2.4404,
      "step": 58735
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0356897115707397,
      "learning_rate": 2.641007246529459e-06,
      "loss": 2.4226,
      "step": 58736
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0587255954742432,
      "learning_rate": 2.6407284695021997e-06,
      "loss": 2.335,
      "step": 58737
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0918524265289307,
      "learning_rate": 2.64044970495091e-06,
      "loss": 2.2793,
      "step": 58738
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.118622899055481,
      "learning_rate": 2.640170952876052e-06,
      "loss": 2.4633,
      "step": 58739
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.085376501083374,
      "learning_rate": 2.6398922132781067e-06,
      "loss": 2.4173,
      "step": 58740
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.126816987991333,
      "learning_rate": 2.6396134861575385e-06,
      "loss": 2.4563,
      "step": 58741
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0744764804840088,
      "learning_rate": 2.6393347715148286e-06,
      "loss": 2.2769,
      "step": 58742
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.2192554473876953,
      "learning_rate": 2.6390560693504417e-06,
      "loss": 2.4301,
      "step": 58743
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.2323715686798096,
      "learning_rate": 2.6387773796648576e-06,
      "loss": 2.3501,
      "step": 58744
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0061099529266357,
      "learning_rate": 2.638498702458545e-06,
      "loss": 2.3248,
      "step": 58745
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.088297963142395,
      "learning_rate": 2.6382200377319734e-06,
      "loss": 2.366,
      "step": 58746
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1014330387115479,
      "learning_rate": 2.6379413854856208e-06,
      "loss": 2.2948,
      "step": 58747
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.2967931032180786,
      "learning_rate": 2.6376627457199543e-06,
      "loss": 2.2091,
      "step": 58748
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.2404567003250122,
      "learning_rate": 2.637384118435452e-06,
      "loss": 2.118,
      "step": 58749
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1077910661697388,
      "learning_rate": 2.637105503632581e-06,
      "loss": 2.3025,
      "step": 58750
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1878119707107544,
      "learning_rate": 2.6368269013118186e-06,
      "loss": 2.3355,
      "step": 58751
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0483331680297852,
      "learning_rate": 2.6365483114736334e-06,
      "loss": 2.3774,
      "step": 58752
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0386841297149658,
      "learning_rate": 2.6362697341184997e-06,
      "loss": 2.2099,
      "step": 58753
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1624624729156494,
      "learning_rate": 2.6359911692468854e-06,
      "loss": 2.1912,
      "step": 58754
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.003359317779541,
      "learning_rate": 2.6357126168592697e-06,
      "loss": 2.5438,
      "step": 58755
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0563628673553467,
      "learning_rate": 2.6354340769561182e-06,
      "loss": 2.3243,
      "step": 58756
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.126223087310791,
      "learning_rate": 2.6351555495379076e-06,
      "loss": 2.4648,
      "step": 58757
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0234653949737549,
      "learning_rate": 2.6348770346051067e-06,
      "loss": 2.2695,
      "step": 58758
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.164663553237915,
      "learning_rate": 2.6345985321581935e-06,
      "loss": 2.4315,
      "step": 58759
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1163564920425415,
      "learning_rate": 2.6343200421976323e-06,
      "loss": 2.2961,
      "step": 58760
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.182120680809021,
      "learning_rate": 2.6340415647239016e-06,
      "loss": 2.364,
      "step": 58761
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1519474983215332,
      "learning_rate": 2.6337630997374686e-06,
      "loss": 2.5717,
      "step": 58762
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0982439517974854,
      "learning_rate": 2.633484647238811e-06,
      "loss": 2.2811,
      "step": 58763
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1042592525482178,
      "learning_rate": 2.633206207228397e-06,
      "loss": 2.3523,
      "step": 58764
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0741240978240967,
      "learning_rate": 2.6329277797067e-06,
      "loss": 2.5076,
      "step": 58765
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1802382469177246,
      "learning_rate": 2.632649364674187e-06,
      "loss": 2.3793,
      "step": 58766
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0325872898101807,
      "learning_rate": 2.6323709621313387e-06,
      "loss": 2.2523,
      "step": 58767
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1117300987243652,
      "learning_rate": 2.6320925720786185e-06,
      "loss": 2.2558,
      "step": 58768
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.080540418624878,
      "learning_rate": 2.6318141945165067e-06,
      "loss": 2.2695,
      "step": 58769
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.179994821548462,
      "learning_rate": 2.6315358294454673e-06,
      "loss": 2.4042,
      "step": 58770
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0315274000167847,
      "learning_rate": 2.6312574768659794e-06,
      "loss": 2.2468,
      "step": 58771
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1480627059936523,
      "learning_rate": 2.6309791367785076e-06,
      "loss": 2.1803,
      "step": 58772
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.2262907028198242,
      "learning_rate": 2.630700809183531e-06,
      "loss": 2.3791,
      "step": 58773
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.9965847134590149,
      "learning_rate": 2.6304224940815184e-06,
      "loss": 2.4784,
      "step": 58774
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0375804901123047,
      "learning_rate": 2.6301441914729374e-06,
      "loss": 2.3478,
      "step": 58775
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0901892185211182,
      "learning_rate": 2.6298659013582673e-06,
      "loss": 2.29,
      "step": 58776
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1376534700393677,
      "learning_rate": 2.6295876237379756e-06,
      "loss": 2.4003,
      "step": 58777
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.5106029510498047,
      "learning_rate": 2.6293093586125317e-06,
      "loss": 2.2003,
      "step": 58778
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1155123710632324,
      "learning_rate": 2.6290311059824136e-06,
      "loss": 2.4222,
      "step": 58779
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1103272438049316,
      "learning_rate": 2.6287528658480898e-06,
      "loss": 2.1337,
      "step": 58780
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0919647216796875,
      "learning_rate": 2.628474638210029e-06,
      "loss": 2.2669,
      "step": 58781
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0368837118148804,
      "learning_rate": 2.6281964230687083e-06,
      "loss": 2.0751,
      "step": 58782
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1081085205078125,
      "learning_rate": 2.6279182204245934e-06,
      "loss": 2.4912,
      "step": 58783
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0781477689743042,
      "learning_rate": 2.6276400302781637e-06,
      "loss": 2.1904,
      "step": 58784
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1482466459274292,
      "learning_rate": 2.6273618526298826e-06,
      "loss": 2.1208,
      "step": 58785
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1232080459594727,
      "learning_rate": 2.6270836874802287e-06,
      "loss": 2.1229,
      "step": 58786
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0466183423995972,
      "learning_rate": 2.6268055348296674e-06,
      "loss": 2.428,
      "step": 58787
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.206615686416626,
      "learning_rate": 2.626527394678676e-06,
      "loss": 2.2816,
      "step": 58788
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1266894340515137,
      "learning_rate": 2.6262492670277205e-06,
      "loss": 2.5343,
      "step": 58789
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0960395336151123,
      "learning_rate": 2.625971151877278e-06,
      "loss": 2.3426,
      "step": 58790
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.174499273300171,
      "learning_rate": 2.6256930492278178e-06,
      "loss": 2.2982,
      "step": 58791
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0493067502975464,
      "learning_rate": 2.6254149590798105e-06,
      "loss": 2.3777,
      "step": 58792
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.014336347579956,
      "learning_rate": 2.625136881433724e-06,
      "loss": 2.0903,
      "step": 58793
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0771595239639282,
      "learning_rate": 2.624858816290037e-06,
      "loss": 2.1197,
      "step": 58794
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.2290014028549194,
      "learning_rate": 2.6245807636492148e-06,
      "loss": 2.3672,
      "step": 58795
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1602201461791992,
      "learning_rate": 2.6243027235117335e-06,
      "loss": 2.3654,
      "step": 58796
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1340043544769287,
      "learning_rate": 2.6240246958780603e-06,
      "loss": 2.2171,
      "step": 58797
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.2415236234664917,
      "learning_rate": 2.62374668074867e-06,
      "loss": 2.4584,
      "step": 58798
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1113972663879395,
      "learning_rate": 2.6234686781240302e-06,
      "loss": 2.2932,
      "step": 58799
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1301839351654053,
      "learning_rate": 2.6231906880046174e-06,
      "loss": 2.2068,
      "step": 58800
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0702158212661743,
      "learning_rate": 2.6229127103909015e-06,
      "loss": 2.4593,
      "step": 58801
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0784505605697632,
      "learning_rate": 2.6226347452833467e-06,
      "loss": 2.2441,
      "step": 58802
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.9476104974746704,
      "learning_rate": 2.6223567926824346e-06,
      "loss": 2.4624,
      "step": 58803
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.01961350440979,
      "learning_rate": 2.6220788525886297e-06,
      "loss": 2.3204,
      "step": 58804
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1298056840896606,
      "learning_rate": 2.6218009250024034e-06,
      "loss": 2.3882,
      "step": 58805
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0004833936691284,
      "learning_rate": 2.6215230099242304e-06,
      "loss": 2.3828,
      "step": 58806
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1836895942687988,
      "learning_rate": 2.6212451073545776e-06,
      "loss": 2.3204,
      "step": 58807
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0763458013534546,
      "learning_rate": 2.6209672172939217e-06,
      "loss": 2.3064,
      "step": 58808
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.041378140449524,
      "learning_rate": 2.620689339742729e-06,
      "loss": 2.153,
      "step": 58809
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1958327293395996,
      "learning_rate": 2.620411474701471e-06,
      "loss": 2.3141,
      "step": 58810
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1695489883422852,
      "learning_rate": 2.6201336221706207e-06,
      "loss": 2.2593,
      "step": 58811
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0694550275802612,
      "learning_rate": 2.6198557821506465e-06,
      "loss": 2.2835,
      "step": 58812
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0112416744232178,
      "learning_rate": 2.619577954642024e-06,
      "loss": 2.2942,
      "step": 58813
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.9891722798347473,
      "learning_rate": 2.6193001396452187e-06,
      "loss": 2.4002,
      "step": 58814
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0652213096618652,
      "learning_rate": 2.619022337160707e-06,
      "loss": 2.1978,
      "step": 58815
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0863962173461914,
      "learning_rate": 2.618744547188954e-06,
      "loss": 2.2407,
      "step": 58816
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0660603046417236,
      "learning_rate": 2.61846676973044e-06,
      "loss": 2.2063,
      "step": 58817
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.2279115915298462,
      "learning_rate": 2.6181890047856227e-06,
      "loss": 2.692,
      "step": 58818
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.142783522605896,
      "learning_rate": 2.617911252354983e-06,
      "loss": 2.0728,
      "step": 58819
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.250901222229004,
      "learning_rate": 2.6176335124389873e-06,
      "loss": 2.3152,
      "step": 58820
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.2403889894485474,
      "learning_rate": 2.6173557850381094e-06,
      "loss": 2.239,
      "step": 58821
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0527658462524414,
      "learning_rate": 2.6170780701528155e-06,
      "loss": 2.3393,
      "step": 58822
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0132625102996826,
      "learning_rate": 2.616800367783583e-06,
      "loss": 2.4845,
      "step": 58823
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0987540483474731,
      "learning_rate": 2.616522677930876e-06,
      "loss": 2.2985,
      "step": 58824
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.078716516494751,
      "learning_rate": 2.6162450005951713e-06,
      "loss": 2.2651,
      "step": 58825
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.2484716176986694,
      "learning_rate": 2.6159673357769344e-06,
      "loss": 2.4299,
      "step": 58826
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0725833177566528,
      "learning_rate": 2.61568968347664e-06,
      "loss": 2.3966,
      "step": 58827
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1119385957717896,
      "learning_rate": 2.6154120436947548e-06,
      "loss": 2.4543,
      "step": 58828
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0678168535232544,
      "learning_rate": 2.615134416431755e-06,
      "loss": 2.2292,
      "step": 58829
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1715773344039917,
      "learning_rate": 2.614856801688108e-06,
      "loss": 2.6537,
      "step": 58830
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0471292734146118,
      "learning_rate": 2.6145791994642845e-06,
      "loss": 2.4451,
      "step": 58831
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.112131953239441,
      "learning_rate": 2.614301609760752e-06,
      "loss": 2.268,
      "step": 58832
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0919981002807617,
      "learning_rate": 2.6140240325779876e-06,
      "loss": 2.1895,
      "step": 58833
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0618804693222046,
      "learning_rate": 2.6137464679164547e-06,
      "loss": 2.4486,
      "step": 58834
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0165404081344604,
      "learning_rate": 2.6134689157766314e-06,
      "loss": 2.3985,
      "step": 58835
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.003060221672058,
      "learning_rate": 2.6131913761589833e-06,
      "loss": 2.5172,
      "step": 58836
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1040247678756714,
      "learning_rate": 2.6129138490639804e-06,
      "loss": 2.0596,
      "step": 58837
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.9969478249549866,
      "learning_rate": 2.612636334492097e-06,
      "loss": 2.2363,
      "step": 58838
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.9823607802391052,
      "learning_rate": 2.6123588324437986e-06,
      "loss": 2.5771,
      "step": 58839
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.9631526470184326,
      "learning_rate": 2.6120813429195612e-06,
      "loss": 2.0047,
      "step": 58840
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0492089986801147,
      "learning_rate": 2.61180386591985e-06,
      "loss": 2.2463,
      "step": 58841
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.058326244354248,
      "learning_rate": 2.6115264014451402e-06,
      "loss": 2.3476,
      "step": 58842
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0897184610366821,
      "learning_rate": 2.6112489494959e-06,
      "loss": 2.2682,
      "step": 58843
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0876184701919556,
      "learning_rate": 2.6109715100726007e-06,
      "loss": 2.1983,
      "step": 58844
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.2095521688461304,
      "learning_rate": 2.6106940831757077e-06,
      "loss": 2.4711,
      "step": 58845
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.2761098146438599,
      "learning_rate": 2.6104166688056977e-06,
      "loss": 2.2716,
      "step": 58846
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1369048357009888,
      "learning_rate": 2.6101392669630355e-06,
      "loss": 2.3098,
      "step": 58847
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1478666067123413,
      "learning_rate": 2.6098618776481978e-06,
      "loss": 2.2132,
      "step": 58848
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0753642320632935,
      "learning_rate": 2.609584500861648e-06,
      "loss": 2.4859,
      "step": 58849
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0947402715682983,
      "learning_rate": 2.6093071366038623e-06,
      "loss": 2.1695,
      "step": 58850
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1380650997161865,
      "learning_rate": 2.6090297848753065e-06,
      "loss": 2.126,
      "step": 58851
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1516066789627075,
      "learning_rate": 2.6087524456764546e-06,
      "loss": 2.2645,
      "step": 58852
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.117494821548462,
      "learning_rate": 2.6084751190077717e-06,
      "loss": 2.5564,
      "step": 58853
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0742617845535278,
      "learning_rate": 2.6081978048697355e-06,
      "loss": 2.4978,
      "step": 58854
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1699062585830688,
      "learning_rate": 2.6079205032628064e-06,
      "loss": 2.1404,
      "step": 58855
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0560450553894043,
      "learning_rate": 2.6076432141874675e-06,
      "loss": 2.2601,
      "step": 58856
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0192919969558716,
      "learning_rate": 2.607365937644174e-06,
      "loss": 2.1237,
      "step": 58857
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1946141719818115,
      "learning_rate": 2.607088673633407e-06,
      "loss": 2.2051,
      "step": 58858
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0900673866271973,
      "learning_rate": 2.6068114221556282e-06,
      "loss": 2.3004,
      "step": 58859
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.2150278091430664,
      "learning_rate": 2.6065341832113156e-06,
      "loss": 2.2848,
      "step": 58860
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1273853778839111,
      "learning_rate": 2.6062569568009332e-06,
      "loss": 2.2191,
      "step": 58861
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0740692615509033,
      "learning_rate": 2.6059797429249554e-06,
      "loss": 2.5583,
      "step": 58862
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.124108076095581,
      "learning_rate": 2.605702541583851e-06,
      "loss": 2.3543,
      "step": 58863
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1955995559692383,
      "learning_rate": 2.605425352778086e-06,
      "loss": 2.3109,
      "step": 58864
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0144758224487305,
      "learning_rate": 2.6051481765081356e-06,
      "loss": 2.4718,
      "step": 58865
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0210078954696655,
      "learning_rate": 2.604871012774465e-06,
      "loss": 2.2596,
      "step": 58866
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.133266568183899,
      "learning_rate": 2.604593861577549e-06,
      "loss": 2.4143,
      "step": 58867
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.2944661378860474,
      "learning_rate": 2.604316722917852e-06,
      "loss": 2.4031,
      "step": 58868
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1880333423614502,
      "learning_rate": 2.604039596795851e-06,
      "loss": 2.3489,
      "step": 58869
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.177020788192749,
      "learning_rate": 2.603762483212011e-06,
      "loss": 2.2783,
      "step": 58870
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.159452199935913,
      "learning_rate": 2.6034853821668014e-06,
      "loss": 2.3911,
      "step": 58871
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.074906826019287,
      "learning_rate": 2.603208293660692e-06,
      "loss": 2.3634,
      "step": 58872
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.076271653175354,
      "learning_rate": 2.6029312176941547e-06,
      "loss": 2.3967,
      "step": 58873
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.9616953134536743,
      "learning_rate": 2.6026541542676552e-06,
      "loss": 2.29,
      "step": 58874
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0711899995803833,
      "learning_rate": 2.602377103381669e-06,
      "loss": 2.2675,
      "step": 58875
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.9405900835990906,
      "learning_rate": 2.60210006503666e-06,
      "loss": 2.2801,
      "step": 58876
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1289335489273071,
      "learning_rate": 2.6018230392331044e-06,
      "loss": 2.5523,
      "step": 58877
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.172676920890808,
      "learning_rate": 2.601546025971464e-06,
      "loss": 2.4131,
      "step": 58878
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0122860670089722,
      "learning_rate": 2.6012690252522165e-06,
      "loss": 2.3895,
      "step": 58879
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0876210927963257,
      "learning_rate": 2.6009920370758234e-06,
      "loss": 2.2849,
      "step": 58880
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0550808906555176,
      "learning_rate": 2.600715061442762e-06,
      "loss": 2.3971,
      "step": 58881
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0345195531845093,
      "learning_rate": 2.600438098353495e-06,
      "loss": 2.3747,
      "step": 58882
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0577000379562378,
      "learning_rate": 2.600161147808501e-06,
      "loss": 2.4713,
      "step": 58883
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1973721981048584,
      "learning_rate": 2.5998842098082377e-06,
      "loss": 2.1939,
      "step": 58884
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0557458400726318,
      "learning_rate": 2.5996072843531837e-06,
      "loss": 2.1986,
      "step": 58885
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.190626859664917,
      "learning_rate": 2.5993303714438025e-06,
      "loss": 2.4693,
      "step": 58886
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1268024444580078,
      "learning_rate": 2.599053471080568e-06,
      "loss": 2.4746,
      "step": 58887
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0198578834533691,
      "learning_rate": 2.5987765832639455e-06,
      "loss": 2.1752,
      "step": 58888
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.9883526563644409,
      "learning_rate": 2.598499707994411e-06,
      "loss": 2.4433,
      "step": 58889
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0271480083465576,
      "learning_rate": 2.598222845272426e-06,
      "loss": 2.373,
      "step": 58890
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0762666463851929,
      "learning_rate": 2.5979459950984665e-06,
      "loss": 2.4663,
      "step": 58891
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0682282447814941,
      "learning_rate": 2.5976691574729984e-06,
      "loss": 2.3322,
      "step": 58892
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1061357259750366,
      "learning_rate": 2.597392332396489e-06,
      "loss": 2.372,
      "step": 58893
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1083985567092896,
      "learning_rate": 2.597115519869413e-06,
      "loss": 2.2649,
      "step": 58894
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.100698471069336,
      "learning_rate": 2.596838719892234e-06,
      "loss": 2.2687,
      "step": 58895
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1373510360717773,
      "learning_rate": 2.5965619324654266e-06,
      "loss": 2.215,
      "step": 58896
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0469567775726318,
      "learning_rate": 2.5962851575894577e-06,
      "loss": 2.3275,
      "step": 58897
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1307650804519653,
      "learning_rate": 2.5960083952647976e-06,
      "loss": 2.5152,
      "step": 58898
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0677518844604492,
      "learning_rate": 2.595731645491909e-06,
      "loss": 2.3709,
      "step": 58899
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.9809340834617615,
      "learning_rate": 2.59545490827127e-06,
      "loss": 2.5197,
      "step": 58900
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1146410703659058,
      "learning_rate": 2.595178183603344e-06,
      "loss": 2.4519,
      "step": 58901
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.01918625831604,
      "learning_rate": 2.594901471488603e-06,
      "loss": 2.272,
      "step": 58902
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0533995628356934,
      "learning_rate": 2.594624771927514e-06,
      "loss": 2.2212,
      "step": 58903
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.2438760995864868,
      "learning_rate": 2.5943480849205506e-06,
      "loss": 2.2935,
      "step": 58904
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1096830368041992,
      "learning_rate": 2.5940714104681743e-06,
      "loss": 2.3295,
      "step": 58905
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0784611701965332,
      "learning_rate": 2.5937947485708613e-06,
      "loss": 2.231,
      "step": 58906
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.143873929977417,
      "learning_rate": 2.5935180992290753e-06,
      "loss": 2.415,
      "step": 58907
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1409188508987427,
      "learning_rate": 2.593241462443291e-06,
      "loss": 2.3736,
      "step": 58908
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0574374198913574,
      "learning_rate": 2.5929648382139738e-06,
      "loss": 2.3912,
      "step": 58909
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0134484767913818,
      "learning_rate": 2.592688226541592e-06,
      "loss": 2.2898,
      "step": 58910
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0698368549346924,
      "learning_rate": 2.5924116274266144e-06,
      "loss": 2.3876,
      "step": 58911
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0840822458267212,
      "learning_rate": 2.592135040869512e-06,
      "loss": 2.3321,
      "step": 58912
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0473639965057373,
      "learning_rate": 2.5918584668707503e-06,
      "loss": 2.411,
      "step": 58913
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0528095960617065,
      "learning_rate": 2.5915819054308046e-06,
      "loss": 2.362,
      "step": 58914
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0180069208145142,
      "learning_rate": 2.5913053565501356e-06,
      "loss": 2.1612,
      "step": 58915
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.19700288772583,
      "learning_rate": 2.5910288202292188e-06,
      "loss": 2.4516,
      "step": 58916
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1309375762939453,
      "learning_rate": 2.5907522964685185e-06,
      "loss": 2.3493,
      "step": 58917
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0905205011367798,
      "learning_rate": 2.590475785268507e-06,
      "loss": 2.0365,
      "step": 58918
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0209320783615112,
      "learning_rate": 2.590199286629652e-06,
      "loss": 2.2683,
      "step": 58919
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1008473634719849,
      "learning_rate": 2.589922800552419e-06,
      "loss": 2.4963,
      "step": 58920
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0866180658340454,
      "learning_rate": 2.5896463270372828e-06,
      "loss": 2.3043,
      "step": 58921
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1881707906723022,
      "learning_rate": 2.589369866084708e-06,
      "loss": 2.5821,
      "step": 58922
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1033682823181152,
      "learning_rate": 2.589093417695161e-06,
      "loss": 2.2002,
      "step": 58923
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0859112739562988,
      "learning_rate": 2.5888169818691165e-06,
      "loss": 2.527,
      "step": 58924
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1419960260391235,
      "learning_rate": 2.588540558607038e-06,
      "loss": 2.4024,
      "step": 58925
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.2024325132369995,
      "learning_rate": 2.588264147909398e-06,
      "loss": 2.4546,
      "step": 58926
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1034296751022339,
      "learning_rate": 2.5879877497766635e-06,
      "loss": 2.1945,
      "step": 58927
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.063612699508667,
      "learning_rate": 2.587711364209301e-06,
      "loss": 2.1942,
      "step": 58928
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1206040382385254,
      "learning_rate": 2.5874349912077822e-06,
      "loss": 2.4051,
      "step": 58929
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0199612379074097,
      "learning_rate": 2.587158630772573e-06,
      "loss": 2.1944,
      "step": 58930
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.5986474752426147,
      "learning_rate": 2.586882282904144e-06,
      "loss": 2.1499,
      "step": 58931
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.3164877891540527,
      "learning_rate": 2.5866059476029613e-06,
      "loss": 2.4217,
      "step": 58932
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1677391529083252,
      "learning_rate": 2.586329624869498e-06,
      "loss": 2.3475,
      "step": 58933
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1052364110946655,
      "learning_rate": 2.586053314704217e-06,
      "loss": 2.2251,
      "step": 58934
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0256843566894531,
      "learning_rate": 2.5857770171075912e-06,
      "loss": 2.173,
      "step": 58935
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.2823524475097656,
      "learning_rate": 2.585500732080087e-06,
      "loss": 2.3736,
      "step": 58936
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.08620285987854,
      "learning_rate": 2.5852244596221734e-06,
      "loss": 2.2735,
      "step": 58937
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1580182313919067,
      "learning_rate": 2.584948199734315e-06,
      "loss": 2.7331,
      "step": 58938
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.11597740650177,
      "learning_rate": 2.584671952416986e-06,
      "loss": 2.4376,
      "step": 58939
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.074459433555603,
      "learning_rate": 2.584395717670649e-06,
      "loss": 2.3477,
      "step": 58940
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1778080463409424,
      "learning_rate": 2.5841194954957795e-06,
      "loss": 2.5133,
      "step": 58941
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.4396657943725586,
      "learning_rate": 2.5838432858928375e-06,
      "loss": 2.4536,
      "step": 58942
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.2699949741363525,
      "learning_rate": 2.5835670888622975e-06,
      "loss": 2.3832,
      "step": 58943
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0544989109039307,
      "learning_rate": 2.5832909044046238e-06,
      "loss": 2.5669,
      "step": 58944
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.715151309967041,
      "learning_rate": 2.5830147325202893e-06,
      "loss": 2.3063,
      "step": 58945
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0128955841064453,
      "learning_rate": 2.5827385732097566e-06,
      "loss": 2.4699,
      "step": 58946
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0655075311660767,
      "learning_rate": 2.5824624264734987e-06,
      "loss": 2.318,
      "step": 58947
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.034375786781311,
      "learning_rate": 2.5821862923119824e-06,
      "loss": 2.2165,
      "step": 58948
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1257489919662476,
      "learning_rate": 2.5819101707256744e-06,
      "loss": 2.42,
      "step": 58949
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0053519010543823,
      "learning_rate": 2.5816340617150404e-06,
      "loss": 2.3184,
      "step": 58950
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.9887023568153381,
      "learning_rate": 2.581357965280554e-06,
      "loss": 2.1215,
      "step": 58951
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1289464235305786,
      "learning_rate": 2.581081881422678e-06,
      "loss": 2.3361,
      "step": 58952
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0934005975723267,
      "learning_rate": 2.5808058101418865e-06,
      "loss": 2.495,
      "step": 58953
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.320299744606018,
      "learning_rate": 2.580529751438644e-06,
      "loss": 2.3477,
      "step": 58954
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1925538778305054,
      "learning_rate": 2.580253705313416e-06,
      "loss": 2.4157,
      "step": 58955
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0971661806106567,
      "learning_rate": 2.5799776717666756e-06,
      "loss": 2.5411,
      "step": 58956
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0971240997314453,
      "learning_rate": 2.579701650798886e-06,
      "loss": 2.186,
      "step": 58957
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0733845233917236,
      "learning_rate": 2.5794256424105203e-06,
      "loss": 2.3846,
      "step": 58958
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1936473846435547,
      "learning_rate": 2.5791496466020396e-06,
      "loss": 2.2569,
      "step": 58959
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1681164503097534,
      "learning_rate": 2.5788736633739196e-06,
      "loss": 2.1742,
      "step": 58960
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1902506351470947,
      "learning_rate": 2.578597692726621e-06,
      "loss": 2.3545,
      "step": 58961
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0733014345169067,
      "learning_rate": 2.5783217346606215e-06,
      "loss": 2.3863,
      "step": 58962
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0731717348098755,
      "learning_rate": 2.5780457891763757e-06,
      "loss": 2.0736,
      "step": 58963
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0881444215774536,
      "learning_rate": 2.5777698562743612e-06,
      "loss": 2.3717,
      "step": 58964
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1173022985458374,
      "learning_rate": 2.57749393595504e-06,
      "loss": 2.2812,
      "step": 58965
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0828865766525269,
      "learning_rate": 2.577218028218884e-06,
      "loss": 2.1014,
      "step": 58966
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1247390508651733,
      "learning_rate": 2.576942133066358e-06,
      "loss": 2.4382,
      "step": 58967
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1621148586273193,
      "learning_rate": 2.5766662504979347e-06,
      "loss": 2.3277,
      "step": 58968
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0660285949707031,
      "learning_rate": 2.5763903805140735e-06,
      "loss": 2.1642,
      "step": 58969
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0203012228012085,
      "learning_rate": 2.5761145231152507e-06,
      "loss": 2.3978,
      "step": 58970
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0895389318466187,
      "learning_rate": 2.5758386783019273e-06,
      "loss": 2.2099,
      "step": 58971
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0779213905334473,
      "learning_rate": 2.5755628460745762e-06,
      "loss": 2.2427,
      "step": 58972
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.083389163017273,
      "learning_rate": 2.575287026433659e-06,
      "loss": 2.2975,
      "step": 58973
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0621767044067383,
      "learning_rate": 2.5750112193796517e-06,
      "loss": 2.0881,
      "step": 58974
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.369388461112976,
      "learning_rate": 2.5747354249130154e-06,
      "loss": 2.0828,
      "step": 58975
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.2483770847320557,
      "learning_rate": 2.5744596430342196e-06,
      "loss": 2.2297,
      "step": 58976
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0035420656204224,
      "learning_rate": 2.5741838737437285e-06,
      "loss": 2.3653,
      "step": 58977
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.2095739841461182,
      "learning_rate": 2.5739081170420155e-06,
      "loss": 2.2933,
      "step": 58978
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1692087650299072,
      "learning_rate": 2.573632372929543e-06,
      "loss": 2.3525,
      "step": 58979
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0395805835723877,
      "learning_rate": 2.5733566414067824e-06,
      "loss": 2.1897,
      "step": 58980
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1472707986831665,
      "learning_rate": 2.5730809224742004e-06,
      "loss": 2.1075,
      "step": 58981
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0332711935043335,
      "learning_rate": 2.572805216132259e-06,
      "loss": 2.0151,
      "step": 58982
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0477705001831055,
      "learning_rate": 2.572529522381434e-06,
      "loss": 2.2982,
      "step": 58983
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1021493673324585,
      "learning_rate": 2.5722538412221853e-06,
      "loss": 2.1759,
      "step": 58984
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1422860622406006,
      "learning_rate": 2.5719781726549874e-06,
      "loss": 2.421,
      "step": 58985
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.162021517753601,
      "learning_rate": 2.5717025166803e-06,
      "loss": 2.2989,
      "step": 58986
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.053253412246704,
      "learning_rate": 2.571426873298598e-06,
      "loss": 2.3236,
      "step": 58987
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0723930597305298,
      "learning_rate": 2.5711512425103445e-06,
      "loss": 2.214,
      "step": 58988
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0488784313201904,
      "learning_rate": 2.570875624316007e-06,
      "loss": 2.1334,
      "step": 58989
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1072986125946045,
      "learning_rate": 2.5706000187160506e-06,
      "loss": 2.5233,
      "step": 58990
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1240969896316528,
      "learning_rate": 2.5703244257109484e-06,
      "loss": 2.3787,
      "step": 58991
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.9474589824676514,
      "learning_rate": 2.570048845301161e-06,
      "loss": 2.251,
      "step": 58992
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0838143825531006,
      "learning_rate": 2.5697732774871607e-06,
      "loss": 2.228,
      "step": 58993
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.179476261138916,
      "learning_rate": 2.569497722269411e-06,
      "loss": 2.5289,
      "step": 58994
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.053318738937378,
      "learning_rate": 2.569222179648383e-06,
      "loss": 2.3815,
      "step": 58995
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.042338490486145,
      "learning_rate": 2.5689466496245384e-06,
      "loss": 2.4382,
      "step": 58996
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.2380777597427368,
      "learning_rate": 2.5686711321983514e-06,
      "loss": 2.3059,
      "step": 58997
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.2667531967163086,
      "learning_rate": 2.5683956273702815e-06,
      "loss": 2.2184,
      "step": 58998
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.101057767868042,
      "learning_rate": 2.5681201351408026e-06,
      "loss": 2.2395,
      "step": 58999
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0206407308578491,
      "learning_rate": 2.567844655510375e-06,
      "loss": 2.4114,
      "step": 59000
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0114786624908447,
      "learning_rate": 2.5675691884794764e-06,
      "loss": 2.1184,
      "step": 59001
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.9956271648406982,
      "learning_rate": 2.5672937340485594e-06,
      "loss": 2.3834,
      "step": 59002
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0714309215545654,
      "learning_rate": 2.567018292218102e-06,
      "loss": 2.1993,
      "step": 59003
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.2176220417022705,
      "learning_rate": 2.566742862988564e-06,
      "loss": 2.5387,
      "step": 59004
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.019923448562622,
      "learning_rate": 2.566467446360419e-06,
      "loss": 2.2178,
      "step": 59005
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0517446994781494,
      "learning_rate": 2.566192042334128e-06,
      "loss": 2.2968,
      "step": 59006
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0610195398330688,
      "learning_rate": 2.565916650910163e-06,
      "loss": 2.2977,
      "step": 59007
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0795128345489502,
      "learning_rate": 2.5656412720889845e-06,
      "loss": 2.3288,
      "step": 59008
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1211507320404053,
      "learning_rate": 2.565365905871068e-06,
      "loss": 2.3409,
      "step": 59009
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.9959797859191895,
      "learning_rate": 2.5650905522568736e-06,
      "loss": 2.2407,
      "step": 59010
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.2536555528640747,
      "learning_rate": 2.5648152112468684e-06,
      "loss": 2.2293,
      "step": 59011
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0316436290740967,
      "learning_rate": 2.5645398828415224e-06,
      "loss": 2.3955,
      "step": 59012
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.995306134223938,
      "learning_rate": 2.5642645670412993e-06,
      "loss": 2.4182,
      "step": 59013
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.057492971420288,
      "learning_rate": 2.563989263846669e-06,
      "loss": 2.3021,
      "step": 59014
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.099359154701233,
      "learning_rate": 2.563713973258097e-06,
      "loss": 2.1547,
      "step": 59015
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1093510389328003,
      "learning_rate": 2.5634386952760503e-06,
      "loss": 2.5418,
      "step": 59016
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1194562911987305,
      "learning_rate": 2.563163429900991e-06,
      "loss": 2.3075,
      "step": 59017
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0920990705490112,
      "learning_rate": 2.5628881771333923e-06,
      "loss": 2.526,
      "step": 59018
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1600165367126465,
      "learning_rate": 2.5626129369737154e-06,
      "loss": 2.3456,
      "step": 59019
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0647087097167969,
      "learning_rate": 2.562337709422432e-06,
      "loss": 2.2001,
      "step": 59020
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0471285581588745,
      "learning_rate": 2.5620624944800044e-06,
      "loss": 2.4144,
      "step": 59021
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0180449485778809,
      "learning_rate": 2.5617872921469024e-06,
      "loss": 2.4896,
      "step": 59022
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.2091491222381592,
      "learning_rate": 2.561512102423589e-06,
      "loss": 2.1902,
      "step": 59023
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0803422927856445,
      "learning_rate": 2.5612369253105364e-06,
      "loss": 2.4535,
      "step": 59024
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.066584587097168,
      "learning_rate": 2.5609617608082036e-06,
      "loss": 2.3457,
      "step": 59025
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0023599863052368,
      "learning_rate": 2.5606866089170644e-06,
      "loss": 2.2476,
      "step": 59026
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0932945013046265,
      "learning_rate": 2.560411469637579e-06,
      "loss": 2.2261,
      "step": 59027
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0600744485855103,
      "learning_rate": 2.5601363429702218e-06,
      "loss": 2.4105,
      "step": 59028
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0760347843170166,
      "learning_rate": 2.559861228915449e-06,
      "loss": 2.2974,
      "step": 59029
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1089109182357788,
      "learning_rate": 2.5595861274737356e-06,
      "loss": 2.388,
      "step": 59030
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0178444385528564,
      "learning_rate": 2.559311038645541e-06,
      "loss": 2.281,
      "step": 59031
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1468037366867065,
      "learning_rate": 2.5590359624313378e-06,
      "loss": 2.1573,
      "step": 59032
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0976964235305786,
      "learning_rate": 2.558760898831587e-06,
      "loss": 2.3448,
      "step": 59033
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0764623880386353,
      "learning_rate": 2.55848584784676e-06,
      "loss": 2.2222,
      "step": 59034
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.227599024772644,
      "learning_rate": 2.5582108094773184e-06,
      "loss": 2.2184,
      "step": 59035
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.2083464860916138,
      "learning_rate": 2.557935783723733e-06,
      "loss": 2.5228,
      "step": 59036
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0244450569152832,
      "learning_rate": 2.5576607705864677e-06,
      "loss": 2.1361,
      "step": 59037
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0170844793319702,
      "learning_rate": 2.5573857700659853e-06,
      "loss": 2.1628,
      "step": 59038
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0653278827667236,
      "learning_rate": 2.5571107821627595e-06,
      "loss": 2.1229,
      "step": 59039
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.3152960538864136,
      "learning_rate": 2.556835806877248e-06,
      "loss": 2.5165,
      "step": 59040
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0771310329437256,
      "learning_rate": 2.5565608442099253e-06,
      "loss": 2.3012,
      "step": 59041
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0119712352752686,
      "learning_rate": 2.5562858941612533e-06,
      "loss": 2.2245,
      "step": 59042
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.052759051322937,
      "learning_rate": 2.556010956731696e-06,
      "loss": 2.3946,
      "step": 59043
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0481326580047607,
      "learning_rate": 2.5557360319217238e-06,
      "loss": 2.4314,
      "step": 59044
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0945894718170166,
      "learning_rate": 2.5554611197318013e-06,
      "loss": 2.315,
      "step": 59045
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.186250925064087,
      "learning_rate": 2.5551862201623924e-06,
      "loss": 2.2888,
      "step": 59046
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.957642674446106,
      "learning_rate": 2.5549113332139663e-06,
      "loss": 2.3337,
      "step": 59047
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0942283868789673,
      "learning_rate": 2.5546364588869845e-06,
      "loss": 2.454,
      "step": 59048
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0931174755096436,
      "learning_rate": 2.554361597181919e-06,
      "loss": 2.3562,
      "step": 59049
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.3701874017715454,
      "learning_rate": 2.5540867480992304e-06,
      "loss": 2.4324,
      "step": 59050
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.073533535003662,
      "learning_rate": 2.55381191163939e-06,
      "loss": 2.1948,
      "step": 59051
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0070220232009888,
      "learning_rate": 2.5535370878028586e-06,
      "loss": 2.4894,
      "step": 59052
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0596320629119873,
      "learning_rate": 2.553262276590106e-06,
      "loss": 2.4246,
      "step": 59053
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.055280327796936,
      "learning_rate": 2.5529874780015963e-06,
      "loss": 2.3577,
      "step": 59054
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0829427242279053,
      "learning_rate": 2.5527126920377963e-06,
      "loss": 2.4846,
      "step": 59055
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.2242704629898071,
      "learning_rate": 2.5524379186991677e-06,
      "loss": 2.2611,
      "step": 59056
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1160292625427246,
      "learning_rate": 2.552163157986182e-06,
      "loss": 2.393,
      "step": 59057
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.9759480953216553,
      "learning_rate": 2.5518884098993002e-06,
      "loss": 2.2138,
      "step": 59058
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1078336238861084,
      "learning_rate": 2.551613674438993e-06,
      "loss": 2.357,
      "step": 59059
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0910590887069702,
      "learning_rate": 2.551338951605721e-06,
      "loss": 2.177,
      "step": 59060
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.2084283828735352,
      "learning_rate": 2.5510642413999544e-06,
      "loss": 2.213,
      "step": 59061
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1135188341140747,
      "learning_rate": 2.5507895438221552e-06,
      "loss": 2.2197,
      "step": 59062
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1551071405410767,
      "learning_rate": 2.5505148588727936e-06,
      "loss": 2.3468,
      "step": 59063
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.2844305038452148,
      "learning_rate": 2.5502401865523286e-06,
      "loss": 2.4978,
      "step": 59064
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0077645778656006,
      "learning_rate": 2.5499655268612345e-06,
      "loss": 2.1765,
      "step": 59065
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0329093933105469,
      "learning_rate": 2.549690879799971e-06,
      "loss": 2.4945,
      "step": 59066
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0898030996322632,
      "learning_rate": 2.5494162453690053e-06,
      "loss": 2.2931,
      "step": 59067
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0485622882843018,
      "learning_rate": 2.5491416235687993e-06,
      "loss": 2.3578,
      "step": 59068
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.9852373003959656,
      "learning_rate": 2.5488670143998252e-06,
      "loss": 2.441,
      "step": 59069
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0594711303710938,
      "learning_rate": 2.548592417862542e-06,
      "loss": 2.2672,
      "step": 59070
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.9953787922859192,
      "learning_rate": 2.548317833957422e-06,
      "loss": 2.2059,
      "step": 59071
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1966382265090942,
      "learning_rate": 2.5480432626849274e-06,
      "loss": 2.4238,
      "step": 59072
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0779304504394531,
      "learning_rate": 2.547768704045519e-06,
      "loss": 2.3055,
      "step": 59073
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.149743676185608,
      "learning_rate": 2.5474941580396707e-06,
      "loss": 2.3433,
      "step": 59074
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1134089231491089,
      "learning_rate": 2.5472196246678406e-06,
      "loss": 2.4266,
      "step": 59075
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0610712766647339,
      "learning_rate": 2.5469451039305004e-06,
      "loss": 2.3164,
      "step": 59076
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.082322120666504,
      "learning_rate": 2.5466705958281092e-06,
      "loss": 2.2401,
      "step": 59077
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1622928380966187,
      "learning_rate": 2.5463961003611394e-06,
      "loss": 2.152,
      "step": 59078
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0871317386627197,
      "learning_rate": 2.5461216175300487e-06,
      "loss": 2.3938,
      "step": 59079
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1666896343231201,
      "learning_rate": 2.5458471473353097e-06,
      "loss": 2.3785,
      "step": 59080
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1247249841690063,
      "learning_rate": 2.5455726897773846e-06,
      "loss": 2.477,
      "step": 59081
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0406626462936401,
      "learning_rate": 2.5452982448567374e-06,
      "loss": 2.1645,
      "step": 59082
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0477769374847412,
      "learning_rate": 2.5450238125738324e-06,
      "loss": 2.3902,
      "step": 59083
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0877691507339478,
      "learning_rate": 2.54474939292914e-06,
      "loss": 2.3482,
      "step": 59084
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0510855913162231,
      "learning_rate": 2.544474985923118e-06,
      "loss": 2.2293,
      "step": 59085
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0540634393692017,
      "learning_rate": 2.5442005915562396e-06,
      "loss": 2.4455,
      "step": 59086
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.157533049583435,
      "learning_rate": 2.543926209828963e-06,
      "loss": 2.3808,
      "step": 59087
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.2021559476852417,
      "learning_rate": 2.5436518407417587e-06,
      "loss": 2.3938,
      "step": 59088
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.068461537361145,
      "learning_rate": 2.5433774842950878e-06,
      "loss": 2.3077,
      "step": 59089
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0551892518997192,
      "learning_rate": 2.54310314048942e-06,
      "loss": 2.374,
      "step": 59090
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0838788747787476,
      "learning_rate": 2.542828809325215e-06,
      "loss": 2.461,
      "step": 59091
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0053430795669556,
      "learning_rate": 2.542554490802943e-06,
      "loss": 2.3813,
      "step": 59092
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0066276788711548,
      "learning_rate": 2.542280184923066e-06,
      "loss": 2.2101,
      "step": 59093
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1465929746627808,
      "learning_rate": 2.5420058916860504e-06,
      "loss": 2.4454,
      "step": 59094
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1011168956756592,
      "learning_rate": 2.5417316110923572e-06,
      "loss": 2.1554,
      "step": 59095
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.263141393661499,
      "learning_rate": 2.541457343142457e-06,
      "loss": 2.1425,
      "step": 59096
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.079691767692566,
      "learning_rate": 2.5411830878368106e-06,
      "loss": 2.2447,
      "step": 59097
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.2869231700897217,
      "learning_rate": 2.5409088451758867e-06,
      "loss": 2.4711,
      "step": 59098
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.2582151889801025,
      "learning_rate": 2.540634615160148e-06,
      "loss": 2.2738,
      "step": 59099
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.081138253211975,
      "learning_rate": 2.540360397790057e-06,
      "loss": 2.1599,
      "step": 59100
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0931758880615234,
      "learning_rate": 2.5400861930660836e-06,
      "loss": 2.4,
      "step": 59101
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1576123237609863,
      "learning_rate": 2.5398120009886883e-06,
      "loss": 2.3255,
      "step": 59102
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.351547122001648,
      "learning_rate": 2.53953782155834e-06,
      "loss": 2.4035,
      "step": 59103
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0280230045318604,
      "learning_rate": 2.5392636547754977e-06,
      "loss": 2.34,
      "step": 59104
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.2966862916946411,
      "learning_rate": 2.538989500640634e-06,
      "loss": 2.3535,
      "step": 59105
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0423154830932617,
      "learning_rate": 2.5387153591542056e-06,
      "loss": 2.2553,
      "step": 59106
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0418728590011597,
      "learning_rate": 2.5384412303166882e-06,
      "loss": 2.3235,
      "step": 59107
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.9641950130462646,
      "learning_rate": 2.5381671141285326e-06,
      "loss": 2.2667,
      "step": 59108
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0101479291915894,
      "learning_rate": 2.537893010590213e-06,
      "loss": 2.2811,
      "step": 59109
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0901433229446411,
      "learning_rate": 2.5376189197021892e-06,
      "loss": 2.3448,
      "step": 59110
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1149044036865234,
      "learning_rate": 2.5373448414649315e-06,
      "loss": 2.4714,
      "step": 59111
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0521339178085327,
      "learning_rate": 2.537070775878897e-06,
      "loss": 2.2978,
      "step": 59112
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0905719995498657,
      "learning_rate": 2.536796722944558e-06,
      "loss": 2.2368,
      "step": 59113
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0896316766738892,
      "learning_rate": 2.5365226826623722e-06,
      "loss": 2.3642,
      "step": 59114
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0221387147903442,
      "learning_rate": 2.5362486550328115e-06,
      "loss": 2.5986,
      "step": 59115
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.9882814884185791,
      "learning_rate": 2.535974640056332e-06,
      "loss": 2.268,
      "step": 59116
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0187370777130127,
      "learning_rate": 2.535700637733407e-06,
      "loss": 2.2966,
      "step": 59117
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0182709693908691,
      "learning_rate": 2.535426648064493e-06,
      "loss": 2.3756,
      "step": 59118
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.2796516418457031,
      "learning_rate": 2.5351526710500616e-06,
      "loss": 2.411,
      "step": 59119
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.058630347251892,
      "learning_rate": 2.5348787066905734e-06,
      "loss": 2.409,
      "step": 59120
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1882593631744385,
      "learning_rate": 2.5346047549864926e-06,
      "loss": 2.3353,
      "step": 59121
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0645254850387573,
      "learning_rate": 2.534330815938283e-06,
      "loss": 2.3576,
      "step": 59122
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1176261901855469,
      "learning_rate": 2.5340568895464123e-06,
      "loss": 2.4382,
      "step": 59123
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.730589747428894,
      "learning_rate": 2.5337829758113398e-06,
      "loss": 2.4791,
      "step": 59124
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.150489330291748,
      "learning_rate": 2.5335090747335365e-06,
      "loss": 2.3057,
      "step": 59125
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1584323644638062,
      "learning_rate": 2.5332351863134595e-06,
      "loss": 2.5547,
      "step": 59126
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0523816347122192,
      "learning_rate": 2.5329613105515804e-06,
      "loss": 2.3464,
      "step": 59127
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1397889852523804,
      "learning_rate": 2.5326874474483597e-06,
      "loss": 2.3757,
      "step": 59128
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1985681056976318,
      "learning_rate": 2.5324135970042595e-06,
      "loss": 2.1875,
      "step": 59129
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0855896472930908,
      "learning_rate": 2.5321397592197485e-06,
      "loss": 2.2903,
      "step": 59130
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.9602423310279846,
      "learning_rate": 2.5318659340952854e-06,
      "loss": 2.5405,
      "step": 59131
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0745840072631836,
      "learning_rate": 2.531592121631342e-06,
      "loss": 2.1838,
      "step": 59132
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1029434204101562,
      "learning_rate": 2.5313183218283776e-06,
      "loss": 2.3029,
      "step": 59133
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1951335668563843,
      "learning_rate": 2.5310445346868586e-06,
      "loss": 2.3411,
      "step": 59134
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1215020418167114,
      "learning_rate": 2.530770760207243e-06,
      "loss": 2.4254,
      "step": 59135
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.157676339149475,
      "learning_rate": 2.530496998390003e-06,
      "loss": 2.3493,
      "step": 59136
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.145721673965454,
      "learning_rate": 2.530223249235596e-06,
      "loss": 2.1075,
      "step": 59137
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.9762012362480164,
      "learning_rate": 2.529949512744493e-06,
      "loss": 2.234,
      "step": 59138
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1168497800827026,
      "learning_rate": 2.5296757889171507e-06,
      "loss": 2.3878,
      "step": 59139
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.041380763053894,
      "learning_rate": 2.5294020777540407e-06,
      "loss": 2.1626,
      "step": 59140
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0974727869033813,
      "learning_rate": 2.52912837925562e-06,
      "loss": 2.2634,
      "step": 59141
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1124212741851807,
      "learning_rate": 2.528854693422359e-06,
      "loss": 2.2928,
      "step": 59142
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0910261869430542,
      "learning_rate": 2.5285810202547147e-06,
      "loss": 2.3677,
      "step": 59143
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1170092821121216,
      "learning_rate": 2.528307359753158e-06,
      "loss": 2.3356,
      "step": 59144
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0441453456878662,
      "learning_rate": 2.528033711918146e-06,
      "loss": 2.3227,
      "step": 59145
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0702733993530273,
      "learning_rate": 2.5277600767501533e-06,
      "loss": 2.1363,
      "step": 59146
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0868394374847412,
      "learning_rate": 2.52748645424963e-06,
      "loss": 2.3892,
      "step": 59147
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1311019659042358,
      "learning_rate": 2.5272128444170495e-06,
      "loss": 2.2608,
      "step": 59148
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0391956567764282,
      "learning_rate": 2.5269392472528707e-06,
      "loss": 2.3448,
      "step": 59149
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.2544195652008057,
      "learning_rate": 2.5266656627575626e-06,
      "loss": 2.4287,
      "step": 59150
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1433944702148438,
      "learning_rate": 2.526392090931582e-06,
      "loss": 2.3983,
      "step": 59151
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0244804620742798,
      "learning_rate": 2.5261185317754e-06,
      "loss": 2.4139,
      "step": 59152
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0105559825897217,
      "learning_rate": 2.525844985289474e-06,
      "loss": 2.2185,
      "step": 59153
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.9605453610420227,
      "learning_rate": 2.5255714514742735e-06,
      "loss": 2.0991,
      "step": 59154
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.140778660774231,
      "learning_rate": 2.5252979303302595e-06,
      "loss": 2.367,
      "step": 59155
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0714285373687744,
      "learning_rate": 2.5250244218578935e-06,
      "loss": 2.2284,
      "step": 59156
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0654759407043457,
      "learning_rate": 2.524750926057643e-06,
      "loss": 2.184,
      "step": 59157
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.074887990951538,
      "learning_rate": 2.524477442929968e-06,
      "loss": 2.475,
      "step": 59158
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.9935689568519592,
      "learning_rate": 2.5242039724753366e-06,
      "loss": 2.4368,
      "step": 59159
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0847142934799194,
      "learning_rate": 2.5239305146942096e-06,
      "loss": 2.3055,
      "step": 59160
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.10042142868042,
      "learning_rate": 2.5236570695870477e-06,
      "loss": 2.4197,
      "step": 59161
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.101783275604248,
      "learning_rate": 2.5233836371543206e-06,
      "loss": 2.1006,
      "step": 59162
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.078015923500061,
      "learning_rate": 2.5231102173964895e-06,
      "loss": 2.1881,
      "step": 59163
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1141031980514526,
      "learning_rate": 2.5228368103140135e-06,
      "loss": 2.3381,
      "step": 59164
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1623104810714722,
      "learning_rate": 2.522563415907363e-06,
      "loss": 2.3335,
      "step": 59165
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.174562931060791,
      "learning_rate": 2.522290034176996e-06,
      "loss": 2.2978,
      "step": 59166
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1115870475769043,
      "learning_rate": 2.5220166651233813e-06,
      "loss": 2.5383,
      "step": 59167
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1454753875732422,
      "learning_rate": 2.5217433087469754e-06,
      "loss": 2.4318,
      "step": 59168
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1817291975021362,
      "learning_rate": 2.52146996504825e-06,
      "loss": 2.2006,
      "step": 59169
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0188477039337158,
      "learning_rate": 2.5211966340276607e-06,
      "loss": 2.1404,
      "step": 59170
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0594083070755005,
      "learning_rate": 2.520923315685676e-06,
      "loss": 2.3965,
      "step": 59171
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0419687032699585,
      "learning_rate": 2.520650010022756e-06,
      "loss": 2.2728,
      "step": 59172
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.166601538658142,
      "learning_rate": 2.520376717039371e-06,
      "loss": 1.9754,
      "step": 59173
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0187928676605225,
      "learning_rate": 2.5201034367359734e-06,
      "loss": 2.1728,
      "step": 59174
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.2512401342391968,
      "learning_rate": 2.5198301691130345e-06,
      "loss": 2.2504,
      "step": 59175
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0437240600585938,
      "learning_rate": 2.519556914171013e-06,
      "loss": 2.4726,
      "step": 59176
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0443973541259766,
      "learning_rate": 2.5192836719103765e-06,
      "loss": 2.2544,
      "step": 59177
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.9683964848518372,
      "learning_rate": 2.519010442331582e-06,
      "loss": 2.1799,
      "step": 59178
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.9549871683120728,
      "learning_rate": 2.5187372254351004e-06,
      "loss": 2.3141,
      "step": 59179
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0623371601104736,
      "learning_rate": 2.5184640212213885e-06,
      "loss": 2.4301,
      "step": 59180
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0346369743347168,
      "learning_rate": 2.5181908296909154e-06,
      "loss": 2.3711,
      "step": 59181
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1387865543365479,
      "learning_rate": 2.5179176508441373e-06,
      "loss": 2.3341,
      "step": 59182
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.396263599395752,
      "learning_rate": 2.517644484681523e-06,
      "loss": 2.3584,
      "step": 59183
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.2637300491333008,
      "learning_rate": 2.517371331203534e-06,
      "loss": 2.4308,
      "step": 59184
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0549933910369873,
      "learning_rate": 2.5170981904106306e-06,
      "loss": 2.4191,
      "step": 59185
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0373119115829468,
      "learning_rate": 2.516825062303281e-06,
      "loss": 2.4521,
      "step": 59186
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.103803277015686,
      "learning_rate": 2.5165519468819444e-06,
      "loss": 2.473,
      "step": 59187
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.3098100423812866,
      "learning_rate": 2.516278844147082e-06,
      "loss": 2.1807,
      "step": 59188
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0630569458007812,
      "learning_rate": 2.5160057540991635e-06,
      "loss": 2.1776,
      "step": 59189
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0542640686035156,
      "learning_rate": 2.515732676738647e-06,
      "loss": 2.3982,
      "step": 59190
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0030697584152222,
      "learning_rate": 2.5154596120659935e-06,
      "loss": 2.371,
      "step": 59191
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.9985368847846985,
      "learning_rate": 2.515186560081672e-06,
      "loss": 2.4411,
      "step": 59192
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0704315900802612,
      "learning_rate": 2.5149135207861374e-06,
      "loss": 2.4543,
      "step": 59193
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0115617513656616,
      "learning_rate": 2.5146404941798617e-06,
      "loss": 2.4438,
      "step": 59194
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0519211292266846,
      "learning_rate": 2.5143674802633012e-06,
      "loss": 2.3679,
      "step": 59195
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0315232276916504,
      "learning_rate": 2.5140944790369228e-06,
      "loss": 2.2283,
      "step": 59196
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.942550778388977,
      "learning_rate": 2.513821490501185e-06,
      "loss": 2.361,
      "step": 59197
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1903839111328125,
      "learning_rate": 2.5135485146565553e-06,
      "loss": 2.3442,
      "step": 59198
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0955321788787842,
      "learning_rate": 2.5132755515034947e-06,
      "loss": 2.2752,
      "step": 59199
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.9927736520767212,
      "learning_rate": 2.5130026010424646e-06,
      "loss": 2.3947,
      "step": 59200
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.033674955368042,
      "learning_rate": 2.512729663273926e-06,
      "loss": 2.1001,
      "step": 59201
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1537853479385376,
      "learning_rate": 2.5124567381983477e-06,
      "loss": 2.3396,
      "step": 59202
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0933177471160889,
      "learning_rate": 2.5121838258161856e-06,
      "loss": 2.1602,
      "step": 59203
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.2045273780822754,
      "learning_rate": 2.5119109261279084e-06,
      "loss": 2.4019,
      "step": 59204
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1582340002059937,
      "learning_rate": 2.5116380391339725e-06,
      "loss": 2.5938,
      "step": 59205
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.315842866897583,
      "learning_rate": 2.511365164834848e-06,
      "loss": 2.3971,
      "step": 59206
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.086195945739746,
      "learning_rate": 2.51109230323099e-06,
      "loss": 2.2658,
      "step": 59207
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.051902413368225,
      "learning_rate": 2.510819454322867e-06,
      "loss": 2.4335,
      "step": 59208
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.051896572113037,
      "learning_rate": 2.510546618110936e-06,
      "loss": 2.324,
      "step": 59209
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.2616422176361084,
      "learning_rate": 2.5102737945956657e-06,
      "loss": 2.3374,
      "step": 59210
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.9292720556259155,
      "learning_rate": 2.5100009837775165e-06,
      "loss": 2.2474,
      "step": 59211
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.107543706893921,
      "learning_rate": 2.509728185656949e-06,
      "loss": 2.3021,
      "step": 59212
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1441538333892822,
      "learning_rate": 2.509455400234423e-06,
      "loss": 2.2693,
      "step": 59213
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.3123034238815308,
      "learning_rate": 2.509182627510408e-06,
      "loss": 2.2666,
      "step": 59214
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.212327003479004,
      "learning_rate": 2.5089098674853606e-06,
      "loss": 2.1575,
      "step": 59215
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0940792560577393,
      "learning_rate": 2.5086371201597483e-06,
      "loss": 2.094,
      "step": 59216
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.006401777267456,
      "learning_rate": 2.5083643855340266e-06,
      "loss": 2.2419,
      "step": 59217
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0856008529663086,
      "learning_rate": 2.508091663608666e-06,
      "loss": 2.4328,
      "step": 59218
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0743541717529297,
      "learning_rate": 2.5078189543841247e-06,
      "loss": 2.2706,
      "step": 59219
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0144413709640503,
      "learning_rate": 2.507546257860862e-06,
      "loss": 2.1743,
      "step": 59220
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0631227493286133,
      "learning_rate": 2.507273574039346e-06,
      "loss": 2.144,
      "step": 59221
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1189669370651245,
      "learning_rate": 2.5070009029200336e-06,
      "loss": 2.2489,
      "step": 59222
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.155066728591919,
      "learning_rate": 2.5067282445033926e-06,
      "loss": 2.4046,
      "step": 59223
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.077300786972046,
      "learning_rate": 2.50645559878988e-06,
      "loss": 2.1367,
      "step": 59224
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0046930313110352,
      "learning_rate": 2.506182965779963e-06,
      "loss": 2.2273,
      "step": 59225
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.021557092666626,
      "learning_rate": 2.5059103454741006e-06,
      "loss": 2.3338,
      "step": 59226
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0760424137115479,
      "learning_rate": 2.505637737872756e-06,
      "loss": 2.5531,
      "step": 59227
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1166561841964722,
      "learning_rate": 2.505365142976388e-06,
      "loss": 2.4469,
      "step": 59228
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0501148700714111,
      "learning_rate": 2.5050925607854647e-06,
      "loss": 2.4516,
      "step": 59229
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0385433435440063,
      "learning_rate": 2.504819991300442e-06,
      "loss": 2.2398,
      "step": 59230
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.088560700416565,
      "learning_rate": 2.5045474345217878e-06,
      "loss": 2.5168,
      "step": 59231
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1193856000900269,
      "learning_rate": 2.504274890449958e-06,
      "loss": 2.2246,
      "step": 59232
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0668071508407593,
      "learning_rate": 2.5040023590854223e-06,
      "loss": 2.3713,
      "step": 59233
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0750337839126587,
      "learning_rate": 2.5037298404286348e-06,
      "loss": 2.1839,
      "step": 59234
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.3236771821975708,
      "learning_rate": 2.503457334480064e-06,
      "loss": 2.2277,
      "step": 59235
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.2510017156600952,
      "learning_rate": 2.503184841240166e-06,
      "loss": 2.2681,
      "step": 59236
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1403559446334839,
      "learning_rate": 2.5029123607094097e-06,
      "loss": 2.3107,
      "step": 59237
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0602450370788574,
      "learning_rate": 2.502639892888249e-06,
      "loss": 2.1532,
      "step": 59238
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0902190208435059,
      "learning_rate": 2.502367437777157e-06,
      "loss": 2.4379,
      "step": 59239
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1201329231262207,
      "learning_rate": 2.5020949953765814e-06,
      "loss": 2.4674,
      "step": 59240
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1252970695495605,
      "learning_rate": 2.5018225656869953e-06,
      "loss": 2.4787,
      "step": 59241
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.10499107837677,
      "learning_rate": 2.501550148708852e-06,
      "loss": 2.5371,
      "step": 59242
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.9114458560943604,
      "learning_rate": 2.501277744442623e-06,
      "loss": 2.1336,
      "step": 59243
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.9840642809867859,
      "learning_rate": 2.50100535288876e-06,
      "loss": 2.378,
      "step": 59244
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0816582441329956,
      "learning_rate": 2.5007329740477324e-06,
      "loss": 2.3133,
      "step": 59245
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.082753300666809,
      "learning_rate": 2.50046060792e-06,
      "loss": 2.2119,
      "step": 59246
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.048911452293396,
      "learning_rate": 2.5001882545060206e-06,
      "loss": 2.1463,
      "step": 59247
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1961451768875122,
      "learning_rate": 2.4999159138062623e-06,
      "loss": 2.2249,
      "step": 59248
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1586370468139648,
      "learning_rate": 2.499643585821181e-06,
      "loss": 2.0859,
      "step": 59249
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1621216535568237,
      "learning_rate": 2.499371270551243e-06,
      "loss": 2.2225,
      "step": 59250
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.2005789279937744,
      "learning_rate": 2.4990989679969045e-06,
      "loss": 2.2719,
      "step": 59251
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.9898939728736877,
      "learning_rate": 2.4988266781586367e-06,
      "loss": 2.402,
      "step": 59252
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0856069326400757,
      "learning_rate": 2.49855440103689e-06,
      "loss": 2.415,
      "step": 59253
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1398273706436157,
      "learning_rate": 2.498282136632133e-06,
      "loss": 2.3066,
      "step": 59254
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0400971174240112,
      "learning_rate": 2.498009884944822e-06,
      "loss": 2.5447,
      "step": 59255
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0051689147949219,
      "learning_rate": 2.4977376459754253e-06,
      "loss": 2.2864,
      "step": 59256
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1420708894729614,
      "learning_rate": 2.4974654197243977e-06,
      "loss": 2.2325,
      "step": 59257
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0198493003845215,
      "learning_rate": 2.4971932061922067e-06,
      "loss": 2.448,
      "step": 59258
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.129288911819458,
      "learning_rate": 2.496921005379308e-06,
      "loss": 2.4345,
      "step": 59259
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.9876202940940857,
      "learning_rate": 2.496648817286169e-06,
      "loss": 2.1842,
      "step": 59260
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0286974906921387,
      "learning_rate": 2.496376641913246e-06,
      "loss": 2.3343,
      "step": 59261
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0614264011383057,
      "learning_rate": 2.4961044792610047e-06,
      "loss": 2.2383,
      "step": 59262
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1478041410446167,
      "learning_rate": 2.4958323293299013e-06,
      "loss": 2.3145,
      "step": 59263
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0360183715820312,
      "learning_rate": 2.495560192120403e-06,
      "loss": 2.2163,
      "step": 59264
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0113048553466797,
      "learning_rate": 2.4952880676329696e-06,
      "loss": 2.3338,
      "step": 59265
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1687740087509155,
      "learning_rate": 2.49501595586806e-06,
      "loss": 2.203,
      "step": 59266
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0047065019607544,
      "learning_rate": 2.494743856826134e-06,
      "loss": 2.272,
      "step": 59267
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.151635766029358,
      "learning_rate": 2.494471770507658e-06,
      "loss": 2.3506,
      "step": 59268
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.2505449056625366,
      "learning_rate": 2.4941996969130887e-06,
      "loss": 2.3667,
      "step": 59269
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0744788646697998,
      "learning_rate": 2.4939276360428923e-06,
      "loss": 2.4985,
      "step": 59270
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.031980276107788,
      "learning_rate": 2.4936555878975245e-06,
      "loss": 2.2344,
      "step": 59271
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.4518916606903076,
      "learning_rate": 2.493383552477452e-06,
      "loss": 2.1857,
      "step": 59272
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.091470718383789,
      "learning_rate": 2.493111529783132e-06,
      "loss": 2.1478,
      "step": 59273
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1090443134307861,
      "learning_rate": 2.4928395198150244e-06,
      "loss": 2.3971,
      "step": 59274
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0714001655578613,
      "learning_rate": 2.492567522573597e-06,
      "loss": 2.2859,
      "step": 59275
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.2100099325180054,
      "learning_rate": 2.4922955380593016e-06,
      "loss": 2.1925,
      "step": 59276
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0525131225585938,
      "learning_rate": 2.492023566272609e-06,
      "loss": 2.2164,
      "step": 59277
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0704419612884521,
      "learning_rate": 2.491751607213975e-06,
      "loss": 2.3705,
      "step": 59278
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0225448608398438,
      "learning_rate": 2.491479660883858e-06,
      "loss": 2.2262,
      "step": 59279
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0593839883804321,
      "learning_rate": 2.491207727282725e-06,
      "loss": 2.3203,
      "step": 59280
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.3450231552124023,
      "learning_rate": 2.4909358064110357e-06,
      "loss": 2.2267,
      "step": 59281
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1497435569763184,
      "learning_rate": 2.4906638982692454e-06,
      "loss": 2.0694,
      "step": 59282
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.7750645875930786,
      "learning_rate": 2.490392002857822e-06,
      "loss": 2.2631,
      "step": 59283
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.9929793477058411,
      "learning_rate": 2.490120120177222e-06,
      "loss": 2.3603,
      "step": 59284
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0829142332077026,
      "learning_rate": 2.4898482502279096e-06,
      "loss": 2.5061,
      "step": 59285
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.119249701499939,
      "learning_rate": 2.4895763930103424e-06,
      "loss": 2.3878,
      "step": 59286
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1196526288986206,
      "learning_rate": 2.4893045485249855e-06,
      "loss": 2.2987,
      "step": 59287
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.9900912642478943,
      "learning_rate": 2.489032716772294e-06,
      "loss": 2.2484,
      "step": 59288
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.2547050714492798,
      "learning_rate": 2.4887608977527354e-06,
      "loss": 2.2289,
      "step": 59289
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0729484558105469,
      "learning_rate": 2.4884890914667638e-06,
      "loss": 2.1491,
      "step": 59290
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0587642192840576,
      "learning_rate": 2.488217297914849e-06,
      "loss": 2.455,
      "step": 59291
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.2799783945083618,
      "learning_rate": 2.4879455170974407e-06,
      "loss": 2.2364,
      "step": 59292
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1302275657653809,
      "learning_rate": 2.4876737490150085e-06,
      "loss": 2.1366,
      "step": 59293
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.3125356435775757,
      "learning_rate": 2.487401993668006e-06,
      "loss": 2.1986,
      "step": 59294
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.087246060371399,
      "learning_rate": 2.4871302510568995e-06,
      "loss": 2.5242,
      "step": 59295
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0330883264541626,
      "learning_rate": 2.486858521182146e-06,
      "loss": 2.258,
      "step": 59296
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0698935985565186,
      "learning_rate": 2.486586804044211e-06,
      "loss": 2.374,
      "step": 59297
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0681536197662354,
      "learning_rate": 2.486315099643548e-06,
      "loss": 2.2587,
      "step": 59298
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0367162227630615,
      "learning_rate": 2.486043407980625e-06,
      "loss": 2.1217,
      "step": 59299
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.2179564237594604,
      "learning_rate": 2.485771729055896e-06,
      "loss": 2.2099,
      "step": 59300
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.155035138130188,
      "learning_rate": 2.4855000628698276e-06,
      "loss": 2.4591,
      "step": 59301
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.181930422782898,
      "learning_rate": 2.4852284094228786e-06,
      "loss": 2.1649,
      "step": 59302
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0565471649169922,
      "learning_rate": 2.484956768715505e-06,
      "loss": 2.395,
      "step": 59303
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1023521423339844,
      "learning_rate": 2.484685140748173e-06,
      "loss": 2.1111,
      "step": 59304
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0945687294006348,
      "learning_rate": 2.484413525521342e-06,
      "loss": 2.3769,
      "step": 59305
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.9612343311309814,
      "learning_rate": 2.4841419230354667e-06,
      "loss": 2.3553,
      "step": 59306
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0612969398498535,
      "learning_rate": 2.4838703332910163e-06,
      "loss": 2.2012,
      "step": 59307
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.096954345703125,
      "learning_rate": 2.483598756288447e-06,
      "loss": 2.0362,
      "step": 59308
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0949310064315796,
      "learning_rate": 2.4833271920282155e-06,
      "loss": 2.2056,
      "step": 59309
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.245417594909668,
      "learning_rate": 2.4830556405107897e-06,
      "loss": 2.4753,
      "step": 59310
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0833172798156738,
      "learning_rate": 2.4827841017366226e-06,
      "loss": 2.3838,
      "step": 59311
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1167736053466797,
      "learning_rate": 2.482512575706182e-06,
      "loss": 2.3406,
      "step": 59312
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0601255893707275,
      "learning_rate": 2.48224106241992e-06,
      "loss": 2.2862,
      "step": 59313
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.515650749206543,
      "learning_rate": 2.4819695618783047e-06,
      "loss": 2.3514,
      "step": 59314
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.125658631324768,
      "learning_rate": 2.48169807408179e-06,
      "loss": 2.4185,
      "step": 59315
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.2203088998794556,
      "learning_rate": 2.481426599030842e-06,
      "loss": 2.1604,
      "step": 59316
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.014138102531433,
      "learning_rate": 2.4811551367259144e-06,
      "loss": 2.3107,
      "step": 59317
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1019964218139648,
      "learning_rate": 2.480883687167478e-06,
      "loss": 2.4305,
      "step": 59318
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0612229108810425,
      "learning_rate": 2.4806122503559793e-06,
      "loss": 2.5238,
      "step": 59319
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1336174011230469,
      "learning_rate": 2.4803408262918883e-06,
      "loss": 2.2246,
      "step": 59320
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0659412145614624,
      "learning_rate": 2.4800694149756587e-06,
      "loss": 2.2074,
      "step": 59321
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.2356876134872437,
      "learning_rate": 2.4797980164077574e-06,
      "loss": 2.3169,
      "step": 59322
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1549874544143677,
      "learning_rate": 2.4795266305886366e-06,
      "loss": 2.4269,
      "step": 59323
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0990078449249268,
      "learning_rate": 2.4792552575187644e-06,
      "loss": 2.4337,
      "step": 59324
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0989234447479248,
      "learning_rate": 2.4789838971985947e-06,
      "loss": 2.239,
      "step": 59325
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1514908075332642,
      "learning_rate": 2.4787125496285922e-06,
      "loss": 2.5611,
      "step": 59326
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1312311887741089,
      "learning_rate": 2.4784412148092117e-06,
      "loss": 2.3395,
      "step": 59327
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.9855976700782776,
      "learning_rate": 2.4781698927409204e-06,
      "loss": 2.2606,
      "step": 59328
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1331106424331665,
      "learning_rate": 2.477898583424173e-06,
      "loss": 2.583,
      "step": 59329
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.2306965589523315,
      "learning_rate": 2.4776272868594275e-06,
      "loss": 2.2998,
      "step": 59330
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1212618350982666,
      "learning_rate": 2.47735600304715e-06,
      "loss": 2.3211,
      "step": 59331
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.076522946357727,
      "learning_rate": 2.4770847319877964e-06,
      "loss": 2.2751,
      "step": 59332
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1330535411834717,
      "learning_rate": 2.476813473681825e-06,
      "loss": 2.5847,
      "step": 59333
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0408546924591064,
      "learning_rate": 2.4765422281297013e-06,
      "loss": 2.4552,
      "step": 59334
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0637258291244507,
      "learning_rate": 2.4762709953318788e-06,
      "loss": 2.5907,
      "step": 59335
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.163013219833374,
      "learning_rate": 2.4759997752888232e-06,
      "loss": 2.1541,
      "step": 59336
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0698236227035522,
      "learning_rate": 2.475728568000991e-06,
      "loss": 2.3376,
      "step": 59337
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0759252309799194,
      "learning_rate": 2.4754573734688405e-06,
      "loss": 2.4387,
      "step": 59338
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0723508596420288,
      "learning_rate": 2.4751861916928353e-06,
      "loss": 2.1765,
      "step": 59339
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1830430030822754,
      "learning_rate": 2.474915022673432e-06,
      "loss": 2.5168,
      "step": 59340
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.031017541885376,
      "learning_rate": 2.4746438664110927e-06,
      "loss": 2.3253,
      "step": 59341
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1054706573486328,
      "learning_rate": 2.474372722906273e-06,
      "loss": 2.4353,
      "step": 59342
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0982202291488647,
      "learning_rate": 2.474101592159439e-06,
      "loss": 2.3607,
      "step": 59343
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1036335229873657,
      "learning_rate": 2.4738304741710474e-06,
      "loss": 2.4479,
      "step": 59344
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1103473901748657,
      "learning_rate": 2.4735593689415562e-06,
      "loss": 2.5956,
      "step": 59345
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0493550300598145,
      "learning_rate": 2.4732882764714237e-06,
      "loss": 2.3623,
      "step": 59346
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0702818632125854,
      "learning_rate": 2.4730171967611137e-06,
      "loss": 2.3315,
      "step": 59347
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.148171067237854,
      "learning_rate": 2.4727461298110823e-06,
      "loss": 2.1815,
      "step": 59348
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0218390226364136,
      "learning_rate": 2.472475075621793e-06,
      "loss": 2.0988,
      "step": 59349
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1038426160812378,
      "learning_rate": 2.4722040341936993e-06,
      "loss": 2.4368,
      "step": 59350
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1513206958770752,
      "learning_rate": 2.4719330055272685e-06,
      "loss": 2.1439,
      "step": 59351
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.9981854557991028,
      "learning_rate": 2.471661989622952e-06,
      "loss": 2.51,
      "step": 59352
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.058167576789856,
      "learning_rate": 2.471390986481216e-06,
      "loss": 2.137,
      "step": 59353
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1307646036148071,
      "learning_rate": 2.4711199961025143e-06,
      "loss": 2.1744,
      "step": 59354
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1614184379577637,
      "learning_rate": 2.4708490184873123e-06,
      "loss": 2.4944,
      "step": 59355
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.2442330121994019,
      "learning_rate": 2.4705780536360625e-06,
      "loss": 2.4293,
      "step": 59356
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.9692216515541077,
      "learning_rate": 2.470307101549234e-06,
      "loss": 2.5921,
      "step": 59357
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.2145756483078003,
      "learning_rate": 2.470036162227275e-06,
      "loss": 2.2241,
      "step": 59358
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0698108673095703,
      "learning_rate": 2.4697652356706513e-06,
      "loss": 2.0925,
      "step": 59359
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1010559797286987,
      "learning_rate": 2.4694943218798185e-06,
      "loss": 2.3776,
      "step": 59360
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1587475538253784,
      "learning_rate": 2.4692234208552414e-06,
      "loss": 2.4584,
      "step": 59361
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.106002688407898,
      "learning_rate": 2.468952532597372e-06,
      "loss": 2.3622,
      "step": 59362
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.9614084959030151,
      "learning_rate": 2.4686816571066775e-06,
      "loss": 2.0368,
      "step": 59363
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0328857898712158,
      "learning_rate": 2.4684107943836123e-06,
      "loss": 2.2299,
      "step": 59364
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.2191746234893799,
      "learning_rate": 2.4681399444286336e-06,
      "loss": 2.2712,
      "step": 59365
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1518127918243408,
      "learning_rate": 2.467869107242207e-06,
      "loss": 2.2893,
      "step": 59366
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0311421155929565,
      "learning_rate": 2.4675982828247837e-06,
      "loss": 2.1261,
      "step": 59367
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.9756578207015991,
      "learning_rate": 2.4673274711768303e-06,
      "loss": 2.3805,
      "step": 59368
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0100799798965454,
      "learning_rate": 2.4670566722987998e-06,
      "loss": 2.3724,
      "step": 59369
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1024478673934937,
      "learning_rate": 2.466785886191161e-06,
      "loss": 2.259,
      "step": 59370
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.055989146232605,
      "learning_rate": 2.4665151128543596e-06,
      "loss": 2.412,
      "step": 59371
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0281188488006592,
      "learning_rate": 2.4662443522888634e-06,
      "loss": 2.1769,
      "step": 59372
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.339308500289917,
      "learning_rate": 2.4659736044951278e-06,
      "loss": 2.4012,
      "step": 59373
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1573138236999512,
      "learning_rate": 2.465702869473615e-06,
      "loss": 2.0942,
      "step": 59374
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.042245626449585,
      "learning_rate": 2.465432147224779e-06,
      "loss": 2.407,
      "step": 59375
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.073043942451477,
      "learning_rate": 2.465161437749085e-06,
      "loss": 2.3562,
      "step": 59376
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.387143611907959,
      "learning_rate": 2.464890741046986e-06,
      "loss": 2.4335,
      "step": 59377
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.081274390220642,
      "learning_rate": 2.464620057118947e-06,
      "loss": 2.4342,
      "step": 59378
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1027731895446777,
      "learning_rate": 2.46434938596542e-06,
      "loss": 2.5132,
      "step": 59379
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.116621494293213,
      "learning_rate": 2.4640787275868704e-06,
      "loss": 2.3958,
      "step": 59380
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1557284593582153,
      "learning_rate": 2.4638080819837517e-06,
      "loss": 2.4028,
      "step": 59381
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0993825197219849,
      "learning_rate": 2.463537449156528e-06,
      "loss": 2.2876,
      "step": 59382
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1940423250198364,
      "learning_rate": 2.4632668291056517e-06,
      "loss": 2.1955,
      "step": 59383
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.123879075050354,
      "learning_rate": 2.4629962218315917e-06,
      "loss": 2.4267,
      "step": 59384
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1092089414596558,
      "learning_rate": 2.462725627334793e-06,
      "loss": 2.4578,
      "step": 59385
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0575028657913208,
      "learning_rate": 2.462455045615726e-06,
      "loss": 2.2236,
      "step": 59386
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0974323749542236,
      "learning_rate": 2.4621844766748417e-06,
      "loss": 1.9236,
      "step": 59387
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0396071672439575,
      "learning_rate": 2.461913920512604e-06,
      "loss": 2.329,
      "step": 59388
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1641497611999512,
      "learning_rate": 2.461643377129467e-06,
      "loss": 2.2716,
      "step": 59389
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0412386655807495,
      "learning_rate": 2.4613728465258946e-06,
      "loss": 2.3902,
      "step": 59390
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.2047384977340698,
      "learning_rate": 2.461102328702343e-06,
      "loss": 2.4702,
      "step": 59391
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0436341762542725,
      "learning_rate": 2.4608318236592666e-06,
      "loss": 2.2954,
      "step": 59392
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0795000791549683,
      "learning_rate": 2.460561331397132e-06,
      "loss": 2.309,
      "step": 59393
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.2171459197998047,
      "learning_rate": 2.4602908519163905e-06,
      "loss": 2.3245,
      "step": 59394
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1375654935836792,
      "learning_rate": 2.4600203852175065e-06,
      "loss": 2.3329,
      "step": 59395
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1182584762573242,
      "learning_rate": 2.459749931300932e-06,
      "loss": 2.4172,
      "step": 59396
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.9960029721260071,
      "learning_rate": 2.459479490167134e-06,
      "loss": 2.0744,
      "step": 59397
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1773571968078613,
      "learning_rate": 2.4592090618165654e-06,
      "loss": 2.4132,
      "step": 59398
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0519461631774902,
      "learning_rate": 2.4589386462496846e-06,
      "loss": 2.3694,
      "step": 59399
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0189976692199707,
      "learning_rate": 2.4586682434669485e-06,
      "loss": 2.216,
      "step": 59400
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1021966934204102,
      "learning_rate": 2.4583978534688215e-06,
      "loss": 2.0623,
      "step": 59401
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.069345474243164,
      "learning_rate": 2.4581274762557548e-06,
      "loss": 2.5677,
      "step": 59402
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1402795314788818,
      "learning_rate": 2.4578571118282133e-06,
      "loss": 2.4552,
      "step": 59403
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.251960039138794,
      "learning_rate": 2.45758676018665e-06,
      "loss": 2.1517,
      "step": 59404
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1030522584915161,
      "learning_rate": 2.4573164213315295e-06,
      "loss": 2.2743,
      "step": 59405
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0884193181991577,
      "learning_rate": 2.4570460952633014e-06,
      "loss": 2.2456,
      "step": 59406
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.248361349105835,
      "learning_rate": 2.456775781982432e-06,
      "loss": 2.1152,
      "step": 59407
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1121267080307007,
      "learning_rate": 2.456505481489374e-06,
      "loss": 2.4473,
      "step": 59408
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1430519819259644,
      "learning_rate": 2.456235193784591e-06,
      "loss": 2.3418,
      "step": 59409
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1210377216339111,
      "learning_rate": 2.4559649188685386e-06,
      "loss": 2.2183,
      "step": 59410
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.178399682044983,
      "learning_rate": 2.455694656741674e-06,
      "loss": 2.4364,
      "step": 59411
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.303945541381836,
      "learning_rate": 2.455424407404453e-06,
      "loss": 2.5485,
      "step": 59412
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0957889556884766,
      "learning_rate": 2.455154170857339e-06,
      "loss": 2.4172,
      "step": 59413
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0733799934387207,
      "learning_rate": 2.454883947100786e-06,
      "loss": 2.419,
      "step": 59414
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0811586380004883,
      "learning_rate": 2.4546137361352564e-06,
      "loss": 2.3996,
      "step": 59415
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1636053323745728,
      "learning_rate": 2.4543435379612035e-06,
      "loss": 2.1839,
      "step": 59416
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.9927530884742737,
      "learning_rate": 2.4540733525790896e-06,
      "loss": 2.296,
      "step": 59417
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.226831316947937,
      "learning_rate": 2.453803179989368e-06,
      "loss": 2.3287,
      "step": 59418
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.9772191047668457,
      "learning_rate": 2.4535330201925033e-06,
      "loss": 2.1804,
      "step": 59419
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.047931432723999,
      "learning_rate": 2.4532628731889487e-06,
      "loss": 2.2839,
      "step": 59420
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.117427110671997,
      "learning_rate": 2.452992738979161e-06,
      "loss": 2.1489,
      "step": 59421
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1362797021865845,
      "learning_rate": 2.452722617563603e-06,
      "loss": 2.3834,
      "step": 59422
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.9953382611274719,
      "learning_rate": 2.45245250894273e-06,
      "loss": 2.2837,
      "step": 59423
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0027662515640259,
      "learning_rate": 2.452182413116997e-06,
      "loss": 2.3801,
      "step": 59424
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0326300859451294,
      "learning_rate": 2.4519123300868673e-06,
      "loss": 2.2445,
      "step": 59425
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1735424995422363,
      "learning_rate": 2.4516422598527966e-06,
      "loss": 2.2722,
      "step": 59426
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0420875549316406,
      "learning_rate": 2.45137220241524e-06,
      "loss": 2.4451,
      "step": 59427
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1697348356246948,
      "learning_rate": 2.4511021577746597e-06,
      "loss": 2.1436,
      "step": 59428
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0899968147277832,
      "learning_rate": 2.4508321259315095e-06,
      "loss": 2.5811,
      "step": 59429
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1339974403381348,
      "learning_rate": 2.4505621068862517e-06,
      "loss": 2.2083,
      "step": 59430
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.2472407817840576,
      "learning_rate": 2.4502921006393386e-06,
      "loss": 2.2468,
      "step": 59431
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.04666268825531,
      "learning_rate": 2.4500221071912334e-06,
      "loss": 2.1131,
      "step": 59432
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0330368280410767,
      "learning_rate": 2.4497521265423887e-06,
      "loss": 2.4699,
      "step": 59433
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0891282558441162,
      "learning_rate": 2.449482158693267e-06,
      "loss": 2.2742,
      "step": 59434
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1277940273284912,
      "learning_rate": 2.4492122036443224e-06,
      "loss": 2.2325,
      "step": 59435
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.2251489162445068,
      "learning_rate": 2.448942261396018e-06,
      "loss": 2.4282,
      "step": 59436
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0730997323989868,
      "learning_rate": 2.4486723319488027e-06,
      "loss": 2.4144,
      "step": 59437
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.062235951423645,
      "learning_rate": 2.4484024153031405e-06,
      "loss": 2.3049,
      "step": 59438
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.201212763786316,
      "learning_rate": 2.4481325114594855e-06,
      "loss": 2.4357,
      "step": 59439
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0562609434127808,
      "learning_rate": 2.4478626204182986e-06,
      "loss": 2.2664,
      "step": 59440
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.016219973564148,
      "learning_rate": 2.4475927421800337e-06,
      "loss": 2.327,
      "step": 59441
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.01194167137146,
      "learning_rate": 2.447322876745153e-06,
      "loss": 2.4265,
      "step": 59442
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.162611722946167,
      "learning_rate": 2.447053024114109e-06,
      "loss": 2.2694,
      "step": 59443
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.9470739364624023,
      "learning_rate": 2.4467831842873636e-06,
      "loss": 2.1962,
      "step": 59444
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.9444461464881897,
      "learning_rate": 2.446513357265369e-06,
      "loss": 2.2827,
      "step": 59445
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.3192596435546875,
      "learning_rate": 2.446243543048589e-06,
      "loss": 2.2604,
      "step": 59446
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.2621065378189087,
      "learning_rate": 2.4459737416374773e-06,
      "loss": 2.3152,
      "step": 59447
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0275280475616455,
      "learning_rate": 2.44570395303249e-06,
      "loss": 2.1805,
      "step": 59448
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1033201217651367,
      "learning_rate": 2.4454341772340883e-06,
      "loss": 2.2004,
      "step": 59449
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1259013414382935,
      "learning_rate": 2.4451644142427277e-06,
      "loss": 2.1888,
      "step": 59450
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0039246082305908,
      "learning_rate": 2.4448946640588622e-06,
      "loss": 2.2286,
      "step": 59451
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.076517939567566,
      "learning_rate": 2.444624926682955e-06,
      "loss": 2.3339,
      "step": 59452
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1458934545516968,
      "learning_rate": 2.4443552021154584e-06,
      "loss": 2.2499,
      "step": 59453
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.118910551071167,
      "learning_rate": 2.4440854903568335e-06,
      "loss": 2.4574,
      "step": 59454
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1548179388046265,
      "learning_rate": 2.4438157914075368e-06,
      "loss": 2.2315,
      "step": 59455
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.155696988105774,
      "learning_rate": 2.443546105268021e-06,
      "loss": 2.2457,
      "step": 59456
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1052075624465942,
      "learning_rate": 2.4432764319387502e-06,
      "loss": 2.326,
      "step": 59457
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.9714183211326599,
      "learning_rate": 2.4430067714201756e-06,
      "loss": 2.2703,
      "step": 59458
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1462416648864746,
      "learning_rate": 2.4427371237127596e-06,
      "loss": 2.2707,
      "step": 59459
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0299190282821655,
      "learning_rate": 2.442467488816954e-06,
      "loss": 2.4245,
      "step": 59460
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1445456743240356,
      "learning_rate": 2.4421978667332216e-06,
      "loss": 2.1333,
      "step": 59461
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.128454566001892,
      "learning_rate": 2.441928257462013e-06,
      "loss": 2.3134,
      "step": 59462
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.2447491884231567,
      "learning_rate": 2.441658661003794e-06,
      "loss": 2.3067,
      "step": 59463
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1102312803268433,
      "learning_rate": 2.441389077359012e-06,
      "loss": 2.3862,
      "step": 59464
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0411365032196045,
      "learning_rate": 2.44111950652813e-06,
      "loss": 2.226,
      "step": 59465
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.157936930656433,
      "learning_rate": 2.4408499485116023e-06,
      "loss": 2.3588,
      "step": 59466
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1576424837112427,
      "learning_rate": 2.440580403309888e-06,
      "loss": 2.13,
      "step": 59467
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0074173212051392,
      "learning_rate": 2.4403108709234412e-06,
      "loss": 2.3374,
      "step": 59468
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1667695045471191,
      "learning_rate": 2.4400413513527233e-06,
      "loss": 2.3042,
      "step": 59469
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.083497166633606,
      "learning_rate": 2.4397718445981855e-06,
      "loss": 2.434,
      "step": 59470
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1026735305786133,
      "learning_rate": 2.439502350660291e-06,
      "loss": 2.0827,
      "step": 59471
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.9959573745727539,
      "learning_rate": 2.4392328695394897e-06,
      "loss": 2.338,
      "step": 59472
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0307204723358154,
      "learning_rate": 2.4389634012362464e-06,
      "loss": 2.235,
      "step": 59473
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0191056728363037,
      "learning_rate": 2.4386939457510095e-06,
      "loss": 2.2566,
      "step": 59474
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1665024757385254,
      "learning_rate": 2.4384245030842436e-06,
      "loss": 2.3436,
      "step": 59475
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1735659837722778,
      "learning_rate": 2.438155073236401e-06,
      "loss": 2.1779,
      "step": 59476
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.06732177734375,
      "learning_rate": 2.43788565620794e-06,
      "loss": 2.4111,
      "step": 59477
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0183836221694946,
      "learning_rate": 2.437616251999314e-06,
      "loss": 2.4964,
      "step": 59478
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.3083698749542236,
      "learning_rate": 2.4373468606109853e-06,
      "loss": 2.2175,
      "step": 59479
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.9805930256843567,
      "learning_rate": 2.437077482043404e-06,
      "loss": 2.2458,
      "step": 59480
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0759046077728271,
      "learning_rate": 2.4368081162970336e-06,
      "loss": 2.36,
      "step": 59481
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.21527898311615,
      "learning_rate": 2.436538763372328e-06,
      "loss": 2.214,
      "step": 59482
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0021345615386963,
      "learning_rate": 2.43626942326974e-06,
      "loss": 2.0469,
      "step": 59483
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1363540887832642,
      "learning_rate": 2.4360000959897335e-06,
      "loss": 2.2855,
      "step": 59484
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.9820796847343445,
      "learning_rate": 2.435730781532758e-06,
      "loss": 2.3841,
      "step": 59485
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1626451015472412,
      "learning_rate": 2.435461479899275e-06,
      "loss": 2.4179,
      "step": 59486
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.9935370683670044,
      "learning_rate": 2.435192191089737e-06,
      "loss": 2.3068,
      "step": 59487
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0441874265670776,
      "learning_rate": 2.434922915104606e-06,
      "loss": 2.4427,
      "step": 59488
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.9961960911750793,
      "learning_rate": 2.4346536519443353e-06,
      "loss": 2.2275,
      "step": 59489
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0431805849075317,
      "learning_rate": 2.434384401609381e-06,
      "loss": 2.383,
      "step": 59490
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.3376812934875488,
      "learning_rate": 2.434115164100198e-06,
      "loss": 2.3427,
      "step": 59491
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0207960605621338,
      "learning_rate": 2.4338459394172465e-06,
      "loss": 2.2336,
      "step": 59492
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1202858686447144,
      "learning_rate": 2.433576727560978e-06,
      "loss": 2.346,
      "step": 59493
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.008028268814087,
      "learning_rate": 2.4333075285318552e-06,
      "loss": 2.166,
      "step": 59494
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.045468807220459,
      "learning_rate": 2.4330383423303295e-06,
      "loss": 2.3484,
      "step": 59495
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.579199194908142,
      "learning_rate": 2.43276916895686e-06,
      "loss": 2.2344,
      "step": 59496
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.160049319267273,
      "learning_rate": 2.4325000084119e-06,
      "loss": 2.2691,
      "step": 59497
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.065712809562683,
      "learning_rate": 2.432230860695911e-06,
      "loss": 2.4902,
      "step": 59498
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.227161169052124,
      "learning_rate": 2.431961725809343e-06,
      "loss": 2.1717,
      "step": 59499
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1503952741622925,
      "learning_rate": 2.4316926037526577e-06,
      "loss": 2.119,
      "step": 59500
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.049748420715332,
      "learning_rate": 2.431423494526306e-06,
      "loss": 2.2785,
      "step": 59501
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1847901344299316,
      "learning_rate": 2.431154398130754e-06,
      "loss": 2.2686,
      "step": 59502
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.2754682302474976,
      "learning_rate": 2.4308853145664446e-06,
      "loss": 2.123,
      "step": 59503
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1060936450958252,
      "learning_rate": 2.430616243833842e-06,
      "loss": 2.363,
      "step": 59504
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0479896068572998,
      "learning_rate": 2.430347185933399e-06,
      "loss": 2.4319,
      "step": 59505
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.3039634227752686,
      "learning_rate": 2.430078140865577e-06,
      "loss": 2.4331,
      "step": 59506
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1162563562393188,
      "learning_rate": 2.429809108630824e-06,
      "loss": 2.3751,
      "step": 59507
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0319666862487793,
      "learning_rate": 2.429540089229605e-06,
      "loss": 2.3153,
      "step": 59508
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1455564498901367,
      "learning_rate": 2.429271082662371e-06,
      "loss": 2.1714,
      "step": 59509
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1339030265808105,
      "learning_rate": 2.429002088929575e-06,
      "loss": 2.2096,
      "step": 59510
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0679231882095337,
      "learning_rate": 2.428733108031681e-06,
      "loss": 2.4345,
      "step": 59511
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.081409215927124,
      "learning_rate": 2.4284641399691367e-06,
      "loss": 2.3376,
      "step": 59512
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.306941270828247,
      "learning_rate": 2.4281951847424055e-06,
      "loss": 2.2577,
      "step": 59513
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.146342158317566,
      "learning_rate": 2.427926242351937e-06,
      "loss": 2.5377,
      "step": 59514
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0841645002365112,
      "learning_rate": 2.427657312798193e-06,
      "loss": 2.2448,
      "step": 59515
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.108667016029358,
      "learning_rate": 2.427388396081627e-06,
      "loss": 2.2398,
      "step": 59516
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.2395905256271362,
      "learning_rate": 2.427119492202693e-06,
      "loss": 2.2876,
      "step": 59517
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0360077619552612,
      "learning_rate": 2.4268506011618463e-06,
      "loss": 2.5278,
      "step": 59518
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0721802711486816,
      "learning_rate": 2.426581722959548e-06,
      "loss": 2.3478,
      "step": 59519
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1260626316070557,
      "learning_rate": 2.4263128575962467e-06,
      "loss": 2.3419,
      "step": 59520
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1494791507720947,
      "learning_rate": 2.4260440050724057e-06,
      "loss": 2.2004,
      "step": 59521
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0290889739990234,
      "learning_rate": 2.425775165388473e-06,
      "loss": 2.421,
      "step": 59522
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.2046862840652466,
      "learning_rate": 2.425506338544913e-06,
      "loss": 2.43,
      "step": 59523
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.984221339225769,
      "learning_rate": 2.4252375245421733e-06,
      "loss": 2.4446,
      "step": 59524
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1261740922927856,
      "learning_rate": 2.424968723380716e-06,
      "loss": 2.2447,
      "step": 59525
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0289316177368164,
      "learning_rate": 2.424699935060991e-06,
      "loss": 2.3075,
      "step": 59526
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0137825012207031,
      "learning_rate": 2.4244311595834614e-06,
      "loss": 2.303,
      "step": 59527
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0324877500534058,
      "learning_rate": 2.424162396948574e-06,
      "loss": 2.3628,
      "step": 59528
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0962107181549072,
      "learning_rate": 2.4238936471567953e-06,
      "loss": 2.2894,
      "step": 59529
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1080621480941772,
      "learning_rate": 2.4236249102085686e-06,
      "loss": 2.3722,
      "step": 59530
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1253622770309448,
      "learning_rate": 2.4233561861043587e-06,
      "loss": 2.301,
      "step": 59531
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.9997684359550476,
      "learning_rate": 2.423087474844614e-06,
      "loss": 2.3018,
      "step": 59532
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0865956544876099,
      "learning_rate": 2.422818776429797e-06,
      "loss": 2.4775,
      "step": 59533
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1240041255950928,
      "learning_rate": 2.4225500908603584e-06,
      "loss": 2.3584,
      "step": 59534
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.2402726411819458,
      "learning_rate": 2.422281418136757e-06,
      "loss": 2.2189,
      "step": 59535
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0280275344848633,
      "learning_rate": 2.4220127582594443e-06,
      "loss": 2.2394,
      "step": 59536
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.2457085847854614,
      "learning_rate": 2.421744111228882e-06,
      "loss": 2.1841,
      "step": 59537
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.2136132717132568,
      "learning_rate": 2.4214754770455207e-06,
      "loss": 2.3372,
      "step": 59538
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.3950729370117188,
      "learning_rate": 2.4212068557098134e-06,
      "loss": 2.5571,
      "step": 59539
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0833178758621216,
      "learning_rate": 2.420938247222222e-06,
      "loss": 2.4308,
      "step": 59540
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1837036609649658,
      "learning_rate": 2.4206696515831963e-06,
      "loss": 2.5467,
      "step": 59541
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1705329418182373,
      "learning_rate": 2.4204010687931966e-06,
      "loss": 1.9794,
      "step": 59542
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0674208402633667,
      "learning_rate": 2.4201324988526753e-06,
      "loss": 2.2622,
      "step": 59543
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0829700231552124,
      "learning_rate": 2.4198639417620894e-06,
      "loss": 2.4145,
      "step": 59544
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.063545823097229,
      "learning_rate": 2.419595397521889e-06,
      "loss": 2.1704,
      "step": 59545
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0646737813949585,
      "learning_rate": 2.4193268661325366e-06,
      "loss": 2.3367,
      "step": 59546
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.3467278480529785,
      "learning_rate": 2.419058347594481e-06,
      "loss": 2.456,
      "step": 59547
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0024893283843994,
      "learning_rate": 2.4187898419081823e-06,
      "loss": 2.3686,
      "step": 59548
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0733904838562012,
      "learning_rate": 2.418521349074092e-06,
      "loss": 2.4384,
      "step": 59549
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1124435663223267,
      "learning_rate": 2.41825286909267e-06,
      "loss": 2.3365,
      "step": 59550
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1906129121780396,
      "learning_rate": 2.417984401964365e-06,
      "loss": 2.2562,
      "step": 59551
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1705501079559326,
      "learning_rate": 2.4177159476896395e-06,
      "loss": 2.5288,
      "step": 59552
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1690049171447754,
      "learning_rate": 2.4174475062689407e-06,
      "loss": 2.1783,
      "step": 59553
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0988712310791016,
      "learning_rate": 2.4171790777027317e-06,
      "loss": 2.4092,
      "step": 59554
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0612705945968628,
      "learning_rate": 2.4169106619914628e-06,
      "loss": 2.6324,
      "step": 59555
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.9720034003257751,
      "learning_rate": 2.416642259135591e-06,
      "loss": 2.0577,
      "step": 59556
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0555990934371948,
      "learning_rate": 2.4163738691355666e-06,
      "loss": 2.305,
      "step": 59557
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0443074703216553,
      "learning_rate": 2.4161054919918516e-06,
      "loss": 2.6401,
      "step": 59558
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.031935691833496,
      "learning_rate": 2.415837127704893e-06,
      "loss": 2.2678,
      "step": 59559
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.095758080482483,
      "learning_rate": 2.4155687762751544e-06,
      "loss": 2.1031,
      "step": 59560
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.033141851425171,
      "learning_rate": 2.415300437703084e-06,
      "loss": 2.5671,
      "step": 59561
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0633403062820435,
      "learning_rate": 2.4150321119891417e-06,
      "loss": 2.3152,
      "step": 59562
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.2979211807250977,
      "learning_rate": 2.414763799133778e-06,
      "loss": 2.3208,
      "step": 59563
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.073694109916687,
      "learning_rate": 2.4144954991374512e-06,
      "loss": 2.2834,
      "step": 59564
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1290090084075928,
      "learning_rate": 2.414227212000616e-06,
      "loss": 2.1937,
      "step": 59565
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.9993786215782166,
      "learning_rate": 2.413958937723723e-06,
      "loss": 2.3221,
      "step": 59566
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0648045539855957,
      "learning_rate": 2.413690676307232e-06,
      "loss": 2.2958,
      "step": 59567
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.107960820198059,
      "learning_rate": 2.4134224277515962e-06,
      "loss": 2.3603,
      "step": 59568
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1592087745666504,
      "learning_rate": 2.4131541920572678e-06,
      "loss": 2.1624,
      "step": 59569
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.033652663230896,
      "learning_rate": 2.412885969224705e-06,
      "loss": 2.3584,
      "step": 59570
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0484546422958374,
      "learning_rate": 2.4126177592543586e-06,
      "loss": 2.448,
      "step": 59571
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.056026816368103,
      "learning_rate": 2.4123495621466898e-06,
      "loss": 2.3195,
      "step": 59572
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.065649151802063,
      "learning_rate": 2.412081377902148e-06,
      "loss": 2.2073,
      "step": 59573
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.032309651374817,
      "learning_rate": 2.4118132065211863e-06,
      "loss": 2.4125,
      "step": 59574
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1859008073806763,
      "learning_rate": 2.4115450480042657e-06,
      "loss": 2.2581,
      "step": 59575
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0582040548324585,
      "learning_rate": 2.4112769023518346e-06,
      "loss": 2.1092,
      "step": 59576
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1098276376724243,
      "learning_rate": 2.4110087695643525e-06,
      "loss": 2.3624,
      "step": 59577
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.9528341889381409,
      "learning_rate": 2.4107406496422683e-06,
      "loss": 2.3325,
      "step": 59578
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.9963650107383728,
      "learning_rate": 2.4104725425860432e-06,
      "loss": 2.15,
      "step": 59579
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0451372861862183,
      "learning_rate": 2.4102044483961262e-06,
      "loss": 2.3809,
      "step": 59580
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1584066152572632,
      "learning_rate": 2.409936367072979e-06,
      "loss": 2.2671,
      "step": 59581
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0985522270202637,
      "learning_rate": 2.409668298617045e-06,
      "loss": 2.2461,
      "step": 59582
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1039923429489136,
      "learning_rate": 2.409400243028789e-06,
      "loss": 2.4598,
      "step": 59583
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1930161714553833,
      "learning_rate": 2.409132200308657e-06,
      "loss": 2.2699,
      "step": 59584
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.304267406463623,
      "learning_rate": 2.4088641704571113e-06,
      "loss": 2.2606,
      "step": 59585
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1445634365081787,
      "learning_rate": 2.4085961534745994e-06,
      "loss": 2.3182,
      "step": 59586
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0351250171661377,
      "learning_rate": 2.4083281493615817e-06,
      "loss": 1.9672,
      "step": 59587
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1370017528533936,
      "learning_rate": 2.408060158118506e-06,
      "loss": 2.4581,
      "step": 59588
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1556084156036377,
      "learning_rate": 2.4077921797458347e-06,
      "loss": 2.3351,
      "step": 59589
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0640746355056763,
      "learning_rate": 2.4075242142440135e-06,
      "loss": 2.4067,
      "step": 59590
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.3712830543518066,
      "learning_rate": 2.407256261613504e-06,
      "loss": 2.5813,
      "step": 59591
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0867397785186768,
      "learning_rate": 2.4069883218547552e-06,
      "loss": 2.1121,
      "step": 59592
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.2267396450042725,
      "learning_rate": 2.406720394968225e-06,
      "loss": 2.4266,
      "step": 59593
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.2485979795455933,
      "learning_rate": 2.4064524809543666e-06,
      "loss": 2.4051,
      "step": 59594
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1964219808578491,
      "learning_rate": 2.406184579813634e-06,
      "loss": 2.2829,
      "step": 59595
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0977821350097656,
      "learning_rate": 2.4059166915464783e-06,
      "loss": 2.0752,
      "step": 59596
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1541528701782227,
      "learning_rate": 2.4056488161533586e-06,
      "loss": 2.3683,
      "step": 59597
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0267468690872192,
      "learning_rate": 2.405380953634724e-06,
      "loss": 2.33,
      "step": 59598
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.083146333694458,
      "learning_rate": 2.4051131039910347e-06,
      "loss": 2.3874,
      "step": 59599
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.202309250831604,
      "learning_rate": 2.4048452672227407e-06,
      "loss": 2.3871,
      "step": 59600
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.2368766069412231,
      "learning_rate": 2.4045774433302936e-06,
      "loss": 2.4311,
      "step": 59601
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.033410668373108,
      "learning_rate": 2.4043096323141546e-06,
      "loss": 2.3408,
      "step": 59602
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0979201793670654,
      "learning_rate": 2.4040418341747705e-06,
      "loss": 2.216,
      "step": 59603
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0953112840652466,
      "learning_rate": 2.4037740489126014e-06,
      "loss": 2.5914,
      "step": 59604
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.047263503074646,
      "learning_rate": 2.403506276528095e-06,
      "loss": 2.415,
      "step": 59605
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0962285995483398,
      "learning_rate": 2.403238517021712e-06,
      "loss": 2.5651,
      "step": 59606
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1059359312057495,
      "learning_rate": 2.4029707703938997e-06,
      "loss": 2.3031,
      "step": 59607
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0694838762283325,
      "learning_rate": 2.402703036645122e-06,
      "loss": 2.0989,
      "step": 59608
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1442476511001587,
      "learning_rate": 2.4024353157758195e-06,
      "loss": 2.3241,
      "step": 59609
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.112764596939087,
      "learning_rate": 2.4021676077864552e-06,
      "loss": 2.5303,
      "step": 59610
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0731233358383179,
      "learning_rate": 2.4018999126774776e-06,
      "loss": 2.2241,
      "step": 59611
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0415252447128296,
      "learning_rate": 2.4016322304493466e-06,
      "loss": 2.2519,
      "step": 59612
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0548173189163208,
      "learning_rate": 2.4013645611025106e-06,
      "loss": 2.1903,
      "step": 59613
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.2968097925186157,
      "learning_rate": 2.4010969046374266e-06,
      "loss": 2.261,
      "step": 59614
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0120577812194824,
      "learning_rate": 2.4008292610545456e-06,
      "loss": 2.2654,
      "step": 59615
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0872552394866943,
      "learning_rate": 2.4005616303543243e-06,
      "loss": 2.4161,
      "step": 59616
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.3359428644180298,
      "learning_rate": 2.400294012537214e-06,
      "loss": 2.1683,
      "step": 59617
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1404985189437866,
      "learning_rate": 2.4000264076036707e-06,
      "loss": 2.2305,
      "step": 59618
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1903786659240723,
      "learning_rate": 2.399758815554145e-06,
      "loss": 2.2591,
      "step": 59619
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.2668437957763672,
      "learning_rate": 2.3994912363890944e-06,
      "loss": 2.3622,
      "step": 59620
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1418474912643433,
      "learning_rate": 2.399223670108971e-06,
      "loss": 2.3261,
      "step": 59621
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0312000513076782,
      "learning_rate": 2.3989561167142274e-06,
      "loss": 2.283,
      "step": 59622
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1531888246536255,
      "learning_rate": 2.3986885762053145e-06,
      "loss": 2.3876,
      "step": 59623
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1177098751068115,
      "learning_rate": 2.398421048582693e-06,
      "loss": 2.5329,
      "step": 59624
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.188141107559204,
      "learning_rate": 2.3981535338468086e-06,
      "loss": 2.3821,
      "step": 59625
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.120008111000061,
      "learning_rate": 2.3978860319981214e-06,
      "loss": 2.3558,
      "step": 59626
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.2753101587295532,
      "learning_rate": 2.3976185430370824e-06,
      "loss": 2.3755,
      "step": 59627
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.2600868940353394,
      "learning_rate": 2.397351066964142e-06,
      "loss": 2.2917,
      "step": 59628
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1000319719314575,
      "learning_rate": 2.3970836037797584e-06,
      "loss": 2.1971,
      "step": 59629
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0154627561569214,
      "learning_rate": 2.39681615348438e-06,
      "loss": 2.4345,
      "step": 59630
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1352089643478394,
      "learning_rate": 2.3965487160784674e-06,
      "loss": 2.4258,
      "step": 59631
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0281816720962524,
      "learning_rate": 2.3962812915624665e-06,
      "loss": 2.3747,
      "step": 59632
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0605484247207642,
      "learning_rate": 2.396013879936837e-06,
      "loss": 2.5987,
      "step": 59633
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1209067106246948,
      "learning_rate": 2.395746481202028e-06,
      "loss": 2.5262,
      "step": 59634
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1596859693527222,
      "learning_rate": 2.395479095358495e-06,
      "loss": 2.3397,
      "step": 59635
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1993707418441772,
      "learning_rate": 2.395211722406687e-06,
      "loss": 2.114,
      "step": 59636
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.4070943593978882,
      "learning_rate": 2.394944362347065e-06,
      "loss": 2.1401,
      "step": 59637
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.2369601726531982,
      "learning_rate": 2.3946770151800736e-06,
      "loss": 2.4042,
      "step": 59638
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0145254135131836,
      "learning_rate": 2.3944096809061722e-06,
      "loss": 2.3608,
      "step": 59639
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.9537920355796814,
      "learning_rate": 2.3941423595258106e-06,
      "loss": 2.2492,
      "step": 59640
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.123583197593689,
      "learning_rate": 2.3938750510394458e-06,
      "loss": 2.4149,
      "step": 59641
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1245514154434204,
      "learning_rate": 2.3936077554475257e-06,
      "loss": 2.2551,
      "step": 59642
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0977942943572998,
      "learning_rate": 2.3933404727505096e-06,
      "loss": 2.3983,
      "step": 59643
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1297088861465454,
      "learning_rate": 2.3930732029488433e-06,
      "loss": 2.2854,
      "step": 59644
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0342650413513184,
      "learning_rate": 2.3928059460429886e-06,
      "loss": 2.2863,
      "step": 59645
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.254225730895996,
      "learning_rate": 2.39253870203339e-06,
      "loss": 2.2925,
      "step": 59646
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0446034669876099,
      "learning_rate": 2.3922714709205097e-06,
      "loss": 2.3394,
      "step": 59647
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0859500169754028,
      "learning_rate": 2.3920042527047906e-06,
      "loss": 2.2163,
      "step": 59648
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.9954837560653687,
      "learning_rate": 2.391737047386693e-06,
      "loss": 2.3277,
      "step": 59649
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0101661682128906,
      "learning_rate": 2.391469854966665e-06,
      "loss": 2.608,
      "step": 59650
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.2824159860610962,
      "learning_rate": 2.3912026754451647e-06,
      "loss": 2.4372,
      "step": 59651
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1761772632598877,
      "learning_rate": 2.3909355088226396e-06,
      "loss": 2.2176,
      "step": 59652
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.9952789545059204,
      "learning_rate": 2.390668355099548e-06,
      "loss": 2.3665,
      "step": 59653
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1115490198135376,
      "learning_rate": 2.3904012142763377e-06,
      "loss": 2.3049,
      "step": 59654
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.2412983179092407,
      "learning_rate": 2.390134086353466e-06,
      "loss": 2.2135,
      "step": 59655
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0853722095489502,
      "learning_rate": 2.389866971331385e-06,
      "loss": 2.2062,
      "step": 59656
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1212973594665527,
      "learning_rate": 2.389599869210544e-06,
      "loss": 2.5405,
      "step": 59657
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0443401336669922,
      "learning_rate": 2.389332779991399e-06,
      "loss": 2.4271,
      "step": 59658
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0541068315505981,
      "learning_rate": 2.3890657036744003e-06,
      "loss": 2.2091,
      "step": 59659
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0348098278045654,
      "learning_rate": 2.388798640260006e-06,
      "loss": 2.6377,
      "step": 59660
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0454647541046143,
      "learning_rate": 2.388531589748664e-06,
      "loss": 2.374,
      "step": 59661
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1086562871932983,
      "learning_rate": 2.3882645521408288e-06,
      "loss": 2.4538,
      "step": 59662
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0807620286941528,
      "learning_rate": 2.3879975274369494e-06,
      "loss": 2.4357,
      "step": 59663
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0356504917144775,
      "learning_rate": 2.387730515637484e-06,
      "loss": 2.3784,
      "step": 59664
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.9950699210166931,
      "learning_rate": 2.3874635167428807e-06,
      "loss": 2.2706,
      "step": 59665
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.149157166481018,
      "learning_rate": 2.387196530753596e-06,
      "loss": 2.3394,
      "step": 59666
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1243093013763428,
      "learning_rate": 2.3869295576700792e-06,
      "loss": 2.3054,
      "step": 59667
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0933939218521118,
      "learning_rate": 2.386662597492787e-06,
      "loss": 2.3907,
      "step": 59668
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.171949863433838,
      "learning_rate": 2.3863956502221664e-06,
      "loss": 2.4122,
      "step": 59669
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1648794412612915,
      "learning_rate": 2.3861287158586767e-06,
      "loss": 2.364,
      "step": 59670
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0687639713287354,
      "learning_rate": 2.385861794402763e-06,
      "loss": 2.3557,
      "step": 59671
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0279209613800049,
      "learning_rate": 2.3855948858548837e-06,
      "loss": 2.4363,
      "step": 59672
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1399641036987305,
      "learning_rate": 2.385327990215487e-06,
      "loss": 2.2484,
      "step": 59673
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0621246099472046,
      "learning_rate": 2.385061107485034e-06,
      "loss": 2.4008,
      "step": 59674
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0417404174804688,
      "learning_rate": 2.384794237663963e-06,
      "loss": 2.2204,
      "step": 59675
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.09794282913208,
      "learning_rate": 2.384527380752739e-06,
      "loss": 2.4642,
      "step": 59676
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.2092387676239014,
      "learning_rate": 2.384260536751806e-06,
      "loss": 2.09,
      "step": 59677
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1006650924682617,
      "learning_rate": 2.3839937056616216e-06,
      "loss": 2.4574,
      "step": 59678
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1015130281448364,
      "learning_rate": 2.3837268874826334e-06,
      "loss": 2.3714,
      "step": 59679
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0886833667755127,
      "learning_rate": 2.383460082215301e-06,
      "loss": 2.2688,
      "step": 59680
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.2397829294204712,
      "learning_rate": 2.3831932898600684e-06,
      "loss": 2.3747,
      "step": 59681
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.011060357093811,
      "learning_rate": 2.382926510417396e-06,
      "loss": 2.1595,
      "step": 59682
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.2266377210617065,
      "learning_rate": 2.3826597438877306e-06,
      "loss": 2.3647,
      "step": 59683
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.197196364402771,
      "learning_rate": 2.3823929902715236e-06,
      "loss": 2.0999,
      "step": 59684
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.9469761252403259,
      "learning_rate": 2.3821262495692322e-06,
      "loss": 2.1249,
      "step": 59685
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0106372833251953,
      "learning_rate": 2.3818595217813024e-06,
      "loss": 2.3445,
      "step": 59686
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1369495391845703,
      "learning_rate": 2.3815928069081927e-06,
      "loss": 2.4196,
      "step": 59687
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1358705759048462,
      "learning_rate": 2.3813261049503534e-06,
      "loss": 2.4483,
      "step": 59688
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.2320042848587036,
      "learning_rate": 2.3810594159082313e-06,
      "loss": 2.2462,
      "step": 59689
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.039011836051941,
      "learning_rate": 2.380792739782286e-06,
      "loss": 2.0944,
      "step": 59690
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.079227089881897,
      "learning_rate": 2.3805260765729674e-06,
      "loss": 2.4283,
      "step": 59691
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.026804804801941,
      "learning_rate": 2.3802594262807223e-06,
      "loss": 2.3827,
      "step": 59692
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0092066526412964,
      "learning_rate": 2.3799927889060103e-06,
      "loss": 2.3837,
      "step": 59693
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.9969360828399658,
      "learning_rate": 2.3797261644492776e-06,
      "loss": 2.0602,
      "step": 59694
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0204987525939941,
      "learning_rate": 2.379459552910981e-06,
      "loss": 2.2471,
      "step": 59695
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0704593658447266,
      "learning_rate": 2.3791929542915683e-06,
      "loss": 2.4493,
      "step": 59696
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0860586166381836,
      "learning_rate": 2.3789263685914955e-06,
      "loss": 2.4538,
      "step": 59697
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0142267942428589,
      "learning_rate": 2.378659795811209e-06,
      "loss": 2.2824,
      "step": 59698
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.3844778537750244,
      "learning_rate": 2.3783932359511673e-06,
      "loss": 2.2891,
      "step": 59699
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0029195547103882,
      "learning_rate": 2.3781266890118194e-06,
      "loss": 2.3765,
      "step": 59700
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.2237485647201538,
      "learning_rate": 2.3778601549936165e-06,
      "loss": 2.3757,
      "step": 59701
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.36060631275177,
      "learning_rate": 2.3775936338970083e-06,
      "loss": 2.317,
      "step": 59702
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1765234470367432,
      "learning_rate": 2.377327125722452e-06,
      "loss": 2.3372,
      "step": 59703
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.15509831905365,
      "learning_rate": 2.3770606304703923e-06,
      "loss": 2.4693,
      "step": 59704
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.06244695186615,
      "learning_rate": 2.376794148141289e-06,
      "loss": 2.5619,
      "step": 59705
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.020517349243164,
      "learning_rate": 2.3765276787355874e-06,
      "loss": 2.2455,
      "step": 59706
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0088472366333008,
      "learning_rate": 2.376261222253745e-06,
      "loss": 2.2012,
      "step": 59707
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0615192651748657,
      "learning_rate": 2.3759947786962066e-06,
      "loss": 2.1575,
      "step": 59708
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.2342100143432617,
      "learning_rate": 2.375728348063432e-06,
      "loss": 2.4169,
      "step": 59709
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.3620082139968872,
      "learning_rate": 2.3754619303558645e-06,
      "loss": 2.1721,
      "step": 59710
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.067257046699524,
      "learning_rate": 2.3751955255739634e-06,
      "loss": 2.2755,
      "step": 59711
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.266780138015747,
      "learning_rate": 2.374929133718177e-06,
      "loss": 2.3754,
      "step": 59712
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0619193315505981,
      "learning_rate": 2.3746627547889554e-06,
      "loss": 2.3565,
      "step": 59713
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1072998046875,
      "learning_rate": 2.3743963887867493e-06,
      "loss": 2.1856,
      "step": 59714
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0478897094726562,
      "learning_rate": 2.3741300357120155e-06,
      "loss": 2.3775,
      "step": 59715
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0697550773620605,
      "learning_rate": 2.3738636955651984e-06,
      "loss": 2.3023,
      "step": 59716
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0436248779296875,
      "learning_rate": 2.3735973683467586e-06,
      "loss": 2.3908,
      "step": 59717
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0648025274276733,
      "learning_rate": 2.373331054057141e-06,
      "loss": 2.3886,
      "step": 59718
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0089218616485596,
      "learning_rate": 2.3730647526967954e-06,
      "loss": 2.379,
      "step": 59719
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0022058486938477,
      "learning_rate": 2.3727984642661804e-06,
      "loss": 2.3993,
      "step": 59720
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.3004224300384521,
      "learning_rate": 2.3725321887657403e-06,
      "loss": 2.4036,
      "step": 59721
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.9676430225372314,
      "learning_rate": 2.3722659261959323e-06,
      "loss": 2.3556,
      "step": 59722
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1092406511306763,
      "learning_rate": 2.371999676557203e-06,
      "loss": 2.5097,
      "step": 59723
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.191572666168213,
      "learning_rate": 2.371733439850008e-06,
      "loss": 2.3608,
      "step": 59724
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0129728317260742,
      "learning_rate": 2.3714672160747944e-06,
      "loss": 2.2604,
      "step": 59725
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0763152837753296,
      "learning_rate": 2.371201005232021e-06,
      "loss": 2.3596,
      "step": 59726
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.045746088027954,
      "learning_rate": 2.3709348073221285e-06,
      "loss": 2.503,
      "step": 59727
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0362032651901245,
      "learning_rate": 2.3706686223455766e-06,
      "loss": 2.4214,
      "step": 59728
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0277040004730225,
      "learning_rate": 2.37040245030281e-06,
      "loss": 2.3153,
      "step": 59729
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.021000862121582,
      "learning_rate": 2.370136291194287e-06,
      "loss": 2.5447,
      "step": 59730
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.075483798980713,
      "learning_rate": 2.369870145020452e-06,
      "loss": 2.3679,
      "step": 59731
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1199947595596313,
      "learning_rate": 2.369604011781762e-06,
      "loss": 2.432,
      "step": 59732
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1862763166427612,
      "learning_rate": 2.3693378914786637e-06,
      "loss": 2.445,
      "step": 59733
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.2750993967056274,
      "learning_rate": 2.369071784111612e-06,
      "loss": 2.3208,
      "step": 59734
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1054744720458984,
      "learning_rate": 2.368805689681054e-06,
      "loss": 2.5495,
      "step": 59735
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.2321546077728271,
      "learning_rate": 2.3685396081874445e-06,
      "loss": 2.2507,
      "step": 59736
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.139470100402832,
      "learning_rate": 2.368273539631232e-06,
      "loss": 2.4884,
      "step": 59737
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1717649698257446,
      "learning_rate": 2.36800748401287e-06,
      "loss": 2.4117,
      "step": 59738
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.229526162147522,
      "learning_rate": 2.367741441332809e-06,
      "loss": 2.217,
      "step": 59739
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0187389850616455,
      "learning_rate": 2.367475411591499e-06,
      "loss": 2.4274,
      "step": 59740
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0951403379440308,
      "learning_rate": 2.367209394789388e-06,
      "loss": 2.3723,
      "step": 59741
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1649975776672363,
      "learning_rate": 2.3669433909269334e-06,
      "loss": 2.3464,
      "step": 59742
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0274938344955444,
      "learning_rate": 2.366677400004579e-06,
      "loss": 2.297,
      "step": 59743
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1418864727020264,
      "learning_rate": 2.366411422022783e-06,
      "loss": 2.2058,
      "step": 59744
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.2325538396835327,
      "learning_rate": 2.366145456981993e-06,
      "loss": 2.3016,
      "step": 59745
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1325852870941162,
      "learning_rate": 2.3658795048826577e-06,
      "loss": 2.4987,
      "step": 59746
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.016802191734314,
      "learning_rate": 2.365613565725232e-06,
      "loss": 2.1696,
      "step": 59747
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0541127920150757,
      "learning_rate": 2.3653476395101617e-06,
      "loss": 2.2739,
      "step": 59748
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.04183030128479,
      "learning_rate": 2.3650817262379045e-06,
      "loss": 2.2745,
      "step": 59749
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.9535035490989685,
      "learning_rate": 2.364815825908904e-06,
      "loss": 2.1291,
      "step": 59750
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1945430040359497,
      "learning_rate": 2.364549938523617e-06,
      "loss": 2.4429,
      "step": 59751
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.089004635810852,
      "learning_rate": 2.3642840640824893e-06,
      "loss": 2.3544,
      "step": 59752
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.357202172279358,
      "learning_rate": 2.3640182025859794e-06,
      "loss": 2.227,
      "step": 59753
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1429237127304077,
      "learning_rate": 2.3637523540345262e-06,
      "loss": 2.2469,
      "step": 59754
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0429688692092896,
      "learning_rate": 2.3634865184285905e-06,
      "loss": 2.195,
      "step": 59755
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0600191354751587,
      "learning_rate": 2.3632206957686154e-06,
      "loss": 2.2519,
      "step": 59756
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.056748390197754,
      "learning_rate": 2.362954886055059e-06,
      "loss": 2.273,
      "step": 59757
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.064305305480957,
      "learning_rate": 2.3626890892883647e-06,
      "loss": 2.4362,
      "step": 59758
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1089876890182495,
      "learning_rate": 2.3624233054689905e-06,
      "loss": 2.3803,
      "step": 59759
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.193166971206665,
      "learning_rate": 2.36215753459738e-06,
      "loss": 2.2174,
      "step": 59760
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0464383363723755,
      "learning_rate": 2.3618917766739892e-06,
      "loss": 2.0698,
      "step": 59761
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.2197761535644531,
      "learning_rate": 2.3616260316992635e-06,
      "loss": 2.0877,
      "step": 59762
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.103527545928955,
      "learning_rate": 2.3613602996736597e-06,
      "loss": 2.3007,
      "step": 59763
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0028315782546997,
      "learning_rate": 2.3610945805976215e-06,
      "loss": 2.3964,
      "step": 59764
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0746043920516968,
      "learning_rate": 2.3608288744716058e-06,
      "loss": 2.1742,
      "step": 59765
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1811368465423584,
      "learning_rate": 2.3605631812960593e-06,
      "loss": 2.2311,
      "step": 59766
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.01825749874115,
      "learning_rate": 2.3602975010714348e-06,
      "loss": 2.4169,
      "step": 59767
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.2073813676834106,
      "learning_rate": 2.3600318337981765e-06,
      "loss": 2.358,
      "step": 59768
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.037106990814209,
      "learning_rate": 2.3597661794767424e-06,
      "loss": 2.2306,
      "step": 59769
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.2451151609420776,
      "learning_rate": 2.3595005381075762e-06,
      "loss": 2.3352,
      "step": 59770
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.9944431781768799,
      "learning_rate": 2.359234909691136e-06,
      "loss": 2.5229,
      "step": 59771
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.196704626083374,
      "learning_rate": 2.358969294227864e-06,
      "loss": 2.4222,
      "step": 59772
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0534796714782715,
      "learning_rate": 2.358703691718217e-06,
      "loss": 2.0362,
      "step": 59773
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.2833924293518066,
      "learning_rate": 2.3584381021626435e-06,
      "loss": 2.4251,
      "step": 59774
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.962713360786438,
      "learning_rate": 2.3581725255615895e-06,
      "loss": 2.1617,
      "step": 59775
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0685217380523682,
      "learning_rate": 2.3579069619155114e-06,
      "loss": 2.3727,
      "step": 59776
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0072510242462158,
      "learning_rate": 2.3576414112248534e-06,
      "loss": 2.3781,
      "step": 59777
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0648205280303955,
      "learning_rate": 2.357375873490072e-06,
      "loss": 2.237,
      "step": 59778
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0672768354415894,
      "learning_rate": 2.357110348711614e-06,
      "loss": 2.6034,
      "step": 59779
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.118391752243042,
      "learning_rate": 2.35684483688993e-06,
      "loss": 2.3664,
      "step": 59780
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0604650974273682,
      "learning_rate": 2.3565793380254676e-06,
      "loss": 2.2397,
      "step": 59781
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1675626039505005,
      "learning_rate": 2.3563138521186812e-06,
      "loss": 2.2796,
      "step": 59782
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1238762140274048,
      "learning_rate": 2.3560483791700163e-06,
      "loss": 2.1875,
      "step": 59783
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0615925788879395,
      "learning_rate": 2.3557829191799275e-06,
      "loss": 2.4871,
      "step": 59784
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.9513095021247864,
      "learning_rate": 2.3555174721488605e-06,
      "loss": 2.264,
      "step": 59785
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.2450731992721558,
      "learning_rate": 2.35525203807727e-06,
      "loss": 2.3173,
      "step": 59786
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0750950574874878,
      "learning_rate": 2.3549866169656024e-06,
      "loss": 2.3139,
      "step": 59787
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0525038242340088,
      "learning_rate": 2.3547212088143103e-06,
      "loss": 2.3036,
      "step": 59788
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0418734550476074,
      "learning_rate": 2.3544558136238395e-06,
      "loss": 2.145,
      "step": 59789
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0760372877120972,
      "learning_rate": 2.354190431394645e-06,
      "loss": 2.2802,
      "step": 59790
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0588895082473755,
      "learning_rate": 2.3539250621271724e-06,
      "loss": 2.3278,
      "step": 59791
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.9990734457969666,
      "learning_rate": 2.353659705821878e-06,
      "loss": 2.3301,
      "step": 59792
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5367602109909058,
      "learning_rate": 2.353394362479202e-06,
      "loss": 2.1822,
      "step": 59793
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1115084886550903,
      "learning_rate": 2.3531290320996024e-06,
      "loss": 2.2345,
      "step": 59794
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1523616313934326,
      "learning_rate": 2.3528637146835232e-06,
      "loss": 2.1814,
      "step": 59795
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.066124677658081,
      "learning_rate": 2.3525984102314205e-06,
      "loss": 2.3578,
      "step": 59796
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0149081945419312,
      "learning_rate": 2.352333118743736e-06,
      "loss": 2.2157,
      "step": 59797
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1207270622253418,
      "learning_rate": 2.3520678402209275e-06,
      "loss": 2.3594,
      "step": 59798
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1919893026351929,
      "learning_rate": 2.351802574663439e-06,
      "loss": 2.4662,
      "step": 59799
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.9919331669807434,
      "learning_rate": 2.351537322071724e-06,
      "loss": 2.2807,
      "step": 59800
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.013622522354126,
      "learning_rate": 2.351272082446231e-06,
      "loss": 2.4581,
      "step": 59801
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1585592031478882,
      "learning_rate": 2.3510068557874065e-06,
      "loss": 2.2514,
      "step": 59802
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0730106830596924,
      "learning_rate": 2.350741642095706e-06,
      "loss": 2.2226,
      "step": 59803
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0006026029586792,
      "learning_rate": 2.350476441371573e-06,
      "loss": 2.2937,
      "step": 59804
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0942336320877075,
      "learning_rate": 2.3502112536154618e-06,
      "loss": 2.4839,
      "step": 59805
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.147755742073059,
      "learning_rate": 2.3499460788278216e-06,
      "loss": 2.2576,
      "step": 59806
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1115880012512207,
      "learning_rate": 2.3496809170090974e-06,
      "loss": 2.3365,
      "step": 59807
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.099064588546753,
      "learning_rate": 2.349415768159744e-06,
      "loss": 2.2784,
      "step": 59808
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0900201797485352,
      "learning_rate": 2.349150632280208e-06,
      "loss": 2.2655,
      "step": 59809
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0811333656311035,
      "learning_rate": 2.3488855093709385e-06,
      "loss": 2.1713,
      "step": 59810
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1270800828933716,
      "learning_rate": 2.3486203994323885e-06,
      "loss": 2.2383,
      "step": 59811
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.02555513381958,
      "learning_rate": 2.3483553024650017e-06,
      "loss": 2.2919,
      "step": 59812
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1957587003707886,
      "learning_rate": 2.3480902184692345e-06,
      "loss": 2.405,
      "step": 59813
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.037748098373413,
      "learning_rate": 2.3478251474455284e-06,
      "loss": 2.1538,
      "step": 59814
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0093529224395752,
      "learning_rate": 2.3475600893943406e-06,
      "loss": 2.3567,
      "step": 59815
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1595957279205322,
      "learning_rate": 2.3472950443161137e-06,
      "loss": 2.1514,
      "step": 59816
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1146717071533203,
      "learning_rate": 2.347030012211303e-06,
      "loss": 2.3245,
      "step": 59817
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.4609622955322266,
      "learning_rate": 2.3467649930803526e-06,
      "loss": 2.1894,
      "step": 59818
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0194288492202759,
      "learning_rate": 2.3464999869237204e-06,
      "loss": 2.2532,
      "step": 59819
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.3725813627243042,
      "learning_rate": 2.3462349937418428e-06,
      "loss": 2.3033,
      "step": 59820
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0747535228729248,
      "learning_rate": 2.3459700135351783e-06,
      "loss": 2.2363,
      "step": 59821
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0221946239471436,
      "learning_rate": 2.345705046304171e-06,
      "loss": 2.2174,
      "step": 59822
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0307965278625488,
      "learning_rate": 2.3454400920492746e-06,
      "loss": 2.481,
      "step": 59823
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0756183862686157,
      "learning_rate": 2.345175150770934e-06,
      "loss": 2.4095,
      "step": 59824
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1056996583938599,
      "learning_rate": 2.344910222469603e-06,
      "loss": 2.3656,
      "step": 59825
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0870671272277832,
      "learning_rate": 2.3446453071457266e-06,
      "loss": 2.349,
      "step": 59826
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0404424667358398,
      "learning_rate": 2.3443804047997567e-06,
      "loss": 2.2974,
      "step": 59827
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0462994575500488,
      "learning_rate": 2.3441155154321395e-06,
      "loss": 2.2404,
      "step": 59828
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.2083444595336914,
      "learning_rate": 2.343850639043328e-06,
      "loss": 2.1972,
      "step": 59829
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.2246893644332886,
      "learning_rate": 2.34358577563377e-06,
      "loss": 2.539,
      "step": 59830
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.091447353363037,
      "learning_rate": 2.34332092520391e-06,
      "loss": 2.2829,
      "step": 59831
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1696563959121704,
      "learning_rate": 2.343056087754204e-06,
      "loss": 2.0993,
      "step": 59832
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.019661545753479,
      "learning_rate": 2.342791263285097e-06,
      "loss": 2.3458,
      "step": 59833
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0983320474624634,
      "learning_rate": 2.342526451797036e-06,
      "loss": 2.3782,
      "step": 59834
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0830128192901611,
      "learning_rate": 2.342261653290474e-06,
      "loss": 2.4047,
      "step": 59835
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.161999225616455,
      "learning_rate": 2.34199686776586e-06,
      "loss": 2.1376,
      "step": 59836
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0284826755523682,
      "learning_rate": 2.341732095223638e-06,
      "loss": 2.5451,
      "step": 59837
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1524403095245361,
      "learning_rate": 2.3414673356642624e-06,
      "loss": 2.264,
      "step": 59838
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.115498661994934,
      "learning_rate": 2.3412025890881762e-06,
      "loss": 2.3652,
      "step": 59839
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1942909955978394,
      "learning_rate": 2.3409378554958363e-06,
      "loss": 2.2705,
      "step": 59840
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1599782705307007,
      "learning_rate": 2.340673134887682e-06,
      "loss": 2.4429,
      "step": 59841
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1718087196350098,
      "learning_rate": 2.340408427264171e-06,
      "loss": 2.4829,
      "step": 59842
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1581813097000122,
      "learning_rate": 2.340143732625746e-06,
      "loss": 2.0311,
      "step": 59843
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.250671625137329,
      "learning_rate": 2.3398790509728595e-06,
      "loss": 2.4465,
      "step": 59844
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.2559243440628052,
      "learning_rate": 2.339614382305959e-06,
      "loss": 2.5328,
      "step": 59845
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0187585353851318,
      "learning_rate": 2.339349726625492e-06,
      "loss": 2.2648,
      "step": 59846
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.006779432296753,
      "learning_rate": 2.3390850839319056e-06,
      "loss": 2.2544,
      "step": 59847
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0763216018676758,
      "learning_rate": 2.338820454225653e-06,
      "loss": 2.2113,
      "step": 59848
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0760468244552612,
      "learning_rate": 2.338555837507178e-06,
      "loss": 2.3017,
      "step": 59849
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0875366926193237,
      "learning_rate": 2.3382912337769346e-06,
      "loss": 2.3569,
      "step": 59850
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0387872457504272,
      "learning_rate": 2.338026643035365e-06,
      "loss": 2.4259,
      "step": 59851
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1142640113830566,
      "learning_rate": 2.337762065282925e-06,
      "loss": 2.391,
      "step": 59852
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0958863496780396,
      "learning_rate": 2.3374975005200553e-06,
      "loss": 2.2127,
      "step": 59853
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.067970871925354,
      "learning_rate": 2.337232948747212e-06,
      "loss": 2.0714,
      "step": 59854
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.133082628250122,
      "learning_rate": 2.336968409964837e-06,
      "loss": 2.4136,
      "step": 59855
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1544092893600464,
      "learning_rate": 2.336703884173385e-06,
      "loss": 2.3776,
      "step": 59856
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1533551216125488,
      "learning_rate": 2.3364393713733014e-06,
      "loss": 2.3559,
      "step": 59857
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0856572389602661,
      "learning_rate": 2.3361748715650334e-06,
      "loss": 2.2795,
      "step": 59858
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.3590608835220337,
      "learning_rate": 2.335910384749028e-06,
      "loss": 2.3566,
      "step": 59859
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1187353134155273,
      "learning_rate": 2.33564591092574e-06,
      "loss": 2.3109,
      "step": 59860
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0417979955673218,
      "learning_rate": 2.3353814500956096e-06,
      "loss": 2.2093,
      "step": 59861
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1148433685302734,
      "learning_rate": 2.335117002259093e-06,
      "loss": 2.4118,
      "step": 59862
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0670530796051025,
      "learning_rate": 2.3348525674166346e-06,
      "loss": 2.2695,
      "step": 59863
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0648629665374756,
      "learning_rate": 2.3345881455686803e-06,
      "loss": 2.5906,
      "step": 59864
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0898858308792114,
      "learning_rate": 2.3343237367156835e-06,
      "loss": 2.3052,
      "step": 59865
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0528734922409058,
      "learning_rate": 2.334059340858087e-06,
      "loss": 2.2589,
      "step": 59866
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0142029523849487,
      "learning_rate": 2.3337949579963448e-06,
      "loss": 2.1335,
      "step": 59867
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0537476539611816,
      "learning_rate": 2.333530588130899e-06,
      "loss": 2.2002,
      "step": 59868
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0052074193954468,
      "learning_rate": 2.3332662312622047e-06,
      "loss": 2.4206,
      "step": 59869
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0295753479003906,
      "learning_rate": 2.3330018873907034e-06,
      "loss": 2.3835,
      "step": 59870
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1814682483673096,
      "learning_rate": 2.332737556516852e-06,
      "loss": 2.2284,
      "step": 59871
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0555320978164673,
      "learning_rate": 2.3324732386410864e-06,
      "loss": 2.3673,
      "step": 59872
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1651369333267212,
      "learning_rate": 2.3322089337638645e-06,
      "loss": 2.2195,
      "step": 59873
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0132434368133545,
      "learning_rate": 2.3319446418856285e-06,
      "loss": 2.2764,
      "step": 59874
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0356160402297974,
      "learning_rate": 2.3316803630068306e-06,
      "loss": 2.1965,
      "step": 59875
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.055130124092102,
      "learning_rate": 2.331416097127915e-06,
      "loss": 2.3013,
      "step": 59876
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.2682445049285889,
      "learning_rate": 2.331151844249335e-06,
      "loss": 2.1541,
      "step": 59877
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0227845907211304,
      "learning_rate": 2.3308876043715336e-06,
      "loss": 2.3692,
      "step": 59878
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1107991933822632,
      "learning_rate": 2.3306233774949618e-06,
      "loss": 2.453,
      "step": 59879
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0301334857940674,
      "learning_rate": 2.330359163620064e-06,
      "loss": 2.3774,
      "step": 59880
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.189031720161438,
      "learning_rate": 2.3300949627472936e-06,
      "loss": 2.2777,
      "step": 59881
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.17584228515625,
      "learning_rate": 2.329830774877092e-06,
      "loss": 2.4118,
      "step": 59882
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0495414733886719,
      "learning_rate": 2.3295666000099137e-06,
      "loss": 2.3518,
      "step": 59883
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.128501057624817,
      "learning_rate": 2.3293024381462005e-06,
      "loss": 2.2766,
      "step": 59884
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0549696683883667,
      "learning_rate": 2.3290382892864085e-06,
      "loss": 2.2393,
      "step": 59885
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1098887920379639,
      "learning_rate": 2.328774153430975e-06,
      "loss": 2.2982,
      "step": 59886
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5015536546707153,
      "learning_rate": 2.3285100305803544e-06,
      "loss": 1.9739,
      "step": 59887
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0654420852661133,
      "learning_rate": 2.3282459207349905e-06,
      "loss": 2.3515,
      "step": 59888
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.2247670888900757,
      "learning_rate": 2.3279818238953365e-06,
      "loss": 2.2643,
      "step": 59889
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1213396787643433,
      "learning_rate": 2.3277177400618336e-06,
      "loss": 2.2814,
      "step": 59890
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.3749321699142456,
      "learning_rate": 2.3274536692349357e-06,
      "loss": 2.3724,
      "step": 59891
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1394104957580566,
      "learning_rate": 2.327189611415088e-06,
      "loss": 2.3643,
      "step": 59892
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1829934120178223,
      "learning_rate": 2.326925566602736e-06,
      "loss": 2.3261,
      "step": 59893
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1289645433425903,
      "learning_rate": 2.32666153479833e-06,
      "loss": 2.0947,
      "step": 59894
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.044785976409912,
      "learning_rate": 2.3263975160023155e-06,
      "loss": 2.1079,
      "step": 59895
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.023038625717163,
      "learning_rate": 2.3261335102151437e-06,
      "loss": 2.2024,
      "step": 59896
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1567108631134033,
      "learning_rate": 2.325869517437257e-06,
      "loss": 2.3029,
      "step": 59897
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0031914710998535,
      "learning_rate": 2.325605537669111e-06,
      "loss": 2.2592,
      "step": 59898
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0933979749679565,
      "learning_rate": 2.3253415709111426e-06,
      "loss": 2.1342,
      "step": 59899
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1703310012817383,
      "learning_rate": 2.325077617163807e-06,
      "loss": 2.3287,
      "step": 59900
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.089483380317688,
      "learning_rate": 2.3248136764275465e-06,
      "loss": 2.203,
      "step": 59901
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.9894893169403076,
      "learning_rate": 2.3245497487028146e-06,
      "loss": 2.2115,
      "step": 59902
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.2373822927474976,
      "learning_rate": 2.3242858339900533e-06,
      "loss": 2.1915,
      "step": 59903
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.111244797706604,
      "learning_rate": 2.324021932289714e-06,
      "loss": 2.2246,
      "step": 59904
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.171752691268921,
      "learning_rate": 2.32375804360224e-06,
      "loss": 2.5192,
      "step": 59905
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0465736389160156,
      "learning_rate": 2.323494167928083e-06,
      "loss": 2.1135,
      "step": 59906
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1408947706222534,
      "learning_rate": 2.3232303052676863e-06,
      "loss": 2.1291,
      "step": 59907
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.066048264503479,
      "learning_rate": 2.3229664556215024e-06,
      "loss": 2.3059,
      "step": 59908
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1093027591705322,
      "learning_rate": 2.3227026189899716e-06,
      "loss": 2.2247,
      "step": 59909
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1084461212158203,
      "learning_rate": 2.3224387953735485e-06,
      "loss": 2.2224,
      "step": 59910
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0666587352752686,
      "learning_rate": 2.3221749847726773e-06,
      "loss": 2.5019,
      "step": 59911
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.2802729606628418,
      "learning_rate": 2.321911187187803e-06,
      "loss": 2.5347,
      "step": 59912
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.2292335033416748,
      "learning_rate": 2.3216474026193735e-06,
      "loss": 2.4772,
      "step": 59913
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0360621213912964,
      "learning_rate": 2.3213836310678384e-06,
      "loss": 2.3078,
      "step": 59914
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.061133623123169,
      "learning_rate": 2.3211198725336415e-06,
      "loss": 2.4784,
      "step": 59915
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.9571411609649658,
      "learning_rate": 2.320856127017235e-06,
      "loss": 2.2977,
      "step": 59916
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.4303443431854248,
      "learning_rate": 2.32059239451906e-06,
      "loss": 2.4301,
      "step": 59917
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1821337938308716,
      "learning_rate": 2.320328675039569e-06,
      "loss": 2.359,
      "step": 59918
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.07900869846344,
      "learning_rate": 2.3200649685792075e-06,
      "loss": 2.1588,
      "step": 59919
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1146266460418701,
      "learning_rate": 2.319801275138418e-06,
      "loss": 2.3986,
      "step": 59920
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0785298347473145,
      "learning_rate": 2.319537594717655e-06,
      "loss": 2.3498,
      "step": 59921
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0843201875686646,
      "learning_rate": 2.319273927317358e-06,
      "loss": 2.4077,
      "step": 59922
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.9761493802070618,
      "learning_rate": 2.3190102729379816e-06,
      "loss": 2.3445,
      "step": 59923
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.094377040863037,
      "learning_rate": 2.318746631579968e-06,
      "loss": 2.473,
      "step": 59924
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0984687805175781,
      "learning_rate": 2.318483003243763e-06,
      "loss": 2.2666,
      "step": 59925
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1048359870910645,
      "learning_rate": 2.3182193879298175e-06,
      "loss": 2.3825,
      "step": 59926
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0513958930969238,
      "learning_rate": 2.317955785638577e-06,
      "loss": 2.2943,
      "step": 59927
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.533550500869751,
      "learning_rate": 2.3176921963704858e-06,
      "loss": 2.4227,
      "step": 59928
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1151973009109497,
      "learning_rate": 2.3174286201259946e-06,
      "loss": 2.1932,
      "step": 59929
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0739408731460571,
      "learning_rate": 2.317165056905546e-06,
      "loss": 2.3646,
      "step": 59930
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.125284194946289,
      "learning_rate": 2.3169015067095912e-06,
      "loss": 2.3778,
      "step": 59931
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.9737809300422668,
      "learning_rate": 2.316637969538573e-06,
      "loss": 2.4157,
      "step": 59932
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1578161716461182,
      "learning_rate": 2.3163744453929436e-06,
      "loss": 2.3033,
      "step": 59933
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1705822944641113,
      "learning_rate": 2.3161109342731425e-06,
      "loss": 2.4564,
      "step": 59934
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0044039487838745,
      "learning_rate": 2.3158474361796235e-06,
      "loss": 2.2418,
      "step": 59935
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.9967303276062012,
      "learning_rate": 2.3155839511128276e-06,
      "loss": 2.2512,
      "step": 59936
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.2677314281463623,
      "learning_rate": 2.3153204790732086e-06,
      "loss": 2.2528,
      "step": 59937
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0454683303833008,
      "learning_rate": 2.3150570200612045e-06,
      "loss": 2.2258,
      "step": 59938
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0891799926757812,
      "learning_rate": 2.3147935740772677e-06,
      "loss": 2.0906,
      "step": 59939
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0555474758148193,
      "learning_rate": 2.31453014112184e-06,
      "loss": 2.2747,
      "step": 59940
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0358495712280273,
      "learning_rate": 2.314266721195374e-06,
      "loss": 2.4991,
      "step": 59941
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1220817565917969,
      "learning_rate": 2.3140033142983108e-06,
      "loss": 2.3387,
      "step": 59942
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0977754592895508,
      "learning_rate": 2.313739920431103e-06,
      "loss": 2.0425,
      "step": 59943
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1715859174728394,
      "learning_rate": 2.3134765395941895e-06,
      "loss": 2.2459,
      "step": 59944
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0127673149108887,
      "learning_rate": 2.3132131717880246e-06,
      "loss": 2.3116,
      "step": 59945
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0317072868347168,
      "learning_rate": 2.3129498170130482e-06,
      "loss": 2.375,
      "step": 59946
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.9828981161117554,
      "learning_rate": 2.312686475269712e-06,
      "loss": 2.555,
      "step": 59947
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0654513835906982,
      "learning_rate": 2.3124231465584602e-06,
      "loss": 2.3107,
      "step": 59948
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1403759717941284,
      "learning_rate": 2.3121598308797367e-06,
      "loss": 2.2664,
      "step": 59949
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.047098159790039,
      "learning_rate": 2.3118965282339924e-06,
      "loss": 2.3001,
      "step": 59950
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1716859340667725,
      "learning_rate": 2.3116332386216723e-06,
      "loss": 2.3263,
      "step": 59951
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.2313377857208252,
      "learning_rate": 2.3113699620432183e-06,
      "loss": 2.0728,
      "step": 59952
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.2138023376464844,
      "learning_rate": 2.311106698499084e-06,
      "loss": 2.3931,
      "step": 59953
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0058685541152954,
      "learning_rate": 2.310843447989711e-06,
      "loss": 2.2142,
      "step": 59954
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0876837968826294,
      "learning_rate": 2.310580210515545e-06,
      "loss": 2.3473,
      "step": 59955
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.4065643548965454,
      "learning_rate": 2.3103169860770357e-06,
      "loss": 2.3322,
      "step": 59956
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0005415678024292,
      "learning_rate": 2.3100537746746265e-06,
      "loss": 2.3213,
      "step": 59957
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0469019412994385,
      "learning_rate": 2.309790576308766e-06,
      "loss": 2.4781,
      "step": 59958
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1430490016937256,
      "learning_rate": 2.3095273909798965e-06,
      "loss": 2.2722,
      "step": 59959
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1458264589309692,
      "learning_rate": 2.3092642186884706e-06,
      "loss": 2.3771,
      "step": 59960
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.40320885181427,
      "learning_rate": 2.3090010594349264e-06,
      "loss": 2.3258,
      "step": 59961
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.115903615951538,
      "learning_rate": 2.3087379132197185e-06,
      "loss": 2.3678,
      "step": 59962
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.064056158065796,
      "learning_rate": 2.308474780043285e-06,
      "loss": 2.5032,
      "step": 59963
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1233665943145752,
      "learning_rate": 2.3082116599060824e-06,
      "loss": 2.2167,
      "step": 59964
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.2239325046539307,
      "learning_rate": 2.3079485528085432e-06,
      "loss": 2.2325,
      "step": 59965
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.12285578250885,
      "learning_rate": 2.3076854587511233e-06,
      "loss": 2.2799,
      "step": 59966
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0234586000442505,
      "learning_rate": 2.307422377734263e-06,
      "loss": 2.3362,
      "step": 59967
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0883433818817139,
      "learning_rate": 2.3071593097584145e-06,
      "loss": 2.3183,
      "step": 59968
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0449891090393066,
      "learning_rate": 2.306896254824017e-06,
      "loss": 2.2973,
      "step": 59969
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1430222988128662,
      "learning_rate": 2.3066332129315226e-06,
      "loss": 2.4636,
      "step": 59970
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.9949172735214233,
      "learning_rate": 2.3063701840813722e-06,
      "loss": 2.4408,
      "step": 59971
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1125309467315674,
      "learning_rate": 2.3061071682740164e-06,
      "loss": 2.2462,
      "step": 59972
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.2074851989746094,
      "learning_rate": 2.3058441655098952e-06,
      "loss": 2.2895,
      "step": 59973
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1124662160873413,
      "learning_rate": 2.305581175789461e-06,
      "loss": 2.2367,
      "step": 59974
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.983754575252533,
      "learning_rate": 2.3053181991131566e-06,
      "loss": 2.349,
      "step": 59975
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1363158226013184,
      "learning_rate": 2.305055235481425e-06,
      "loss": 2.33,
      "step": 59976
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.2315056324005127,
      "learning_rate": 2.304792284894717e-06,
      "loss": 2.3305,
      "step": 59977
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0839505195617676,
      "learning_rate": 2.304529347353477e-06,
      "loss": 2.3998,
      "step": 59978
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.051495909690857,
      "learning_rate": 2.3042664228581457e-06,
      "loss": 2.3535,
      "step": 59979
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0079104900360107,
      "learning_rate": 2.3040035114091762e-06,
      "loss": 2.1572,
      "step": 59980
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1870183944702148,
      "learning_rate": 2.3037406130070116e-06,
      "loss": 2.4222,
      "step": 59981
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.138502836227417,
      "learning_rate": 2.3034777276520937e-06,
      "loss": 2.5229,
      "step": 59982
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.2245495319366455,
      "learning_rate": 2.303214855344874e-06,
      "loss": 2.4323,
      "step": 59983
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1436183452606201,
      "learning_rate": 2.3029519960857936e-06,
      "loss": 2.499,
      "step": 59984
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0895006656646729,
      "learning_rate": 2.302689149875302e-06,
      "loss": 2.2969,
      "step": 59985
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1224074363708496,
      "learning_rate": 2.3024263167138403e-06,
      "loss": 2.2365,
      "step": 59986
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.070117712020874,
      "learning_rate": 2.302163496601859e-06,
      "loss": 2.3139,
      "step": 59987
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1249791383743286,
      "learning_rate": 2.3019006895397986e-06,
      "loss": 2.3238,
      "step": 59988
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0183701515197754,
      "learning_rate": 2.3016378955281115e-06,
      "loss": 2.3357,
      "step": 59989
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0912855863571167,
      "learning_rate": 2.301375114567238e-06,
      "loss": 2.4943,
      "step": 59990
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0706676244735718,
      "learning_rate": 2.3011123466576245e-06,
      "loss": 2.2297,
      "step": 59991
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.2561110258102417,
      "learning_rate": 2.300849591799713e-06,
      "loss": 2.243,
      "step": 59992
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1770920753479004,
      "learning_rate": 2.3005868499939564e-06,
      "loss": 2.4288,
      "step": 59993
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.038233757019043,
      "learning_rate": 2.3003241212407932e-06,
      "loss": 2.1828,
      "step": 59994
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.071017861366272,
      "learning_rate": 2.3000614055406746e-06,
      "loss": 2.3447,
      "step": 59995
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.079175353050232,
      "learning_rate": 2.2997987028940405e-06,
      "loss": 2.2583,
      "step": 59996
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0061649084091187,
      "learning_rate": 2.299536013301341e-06,
      "loss": 2.3041,
      "step": 59997
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1155675649642944,
      "learning_rate": 2.299273336763017e-06,
      "loss": 2.3502,
      "step": 59998
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0062092542648315,
      "learning_rate": 2.299010673279519e-06,
      "loss": 2.396,
      "step": 59999
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.018088459968567,
      "learning_rate": 2.298748022851287e-06,
      "loss": 2.2551,
      "step": 60000
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0212260484695435,
      "learning_rate": 2.2984853854787713e-06,
      "loss": 2.5443,
      "step": 60001
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0312912464141846,
      "learning_rate": 2.298222761162412e-06,
      "loss": 2.0708,
      "step": 60002
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1202702522277832,
      "learning_rate": 2.2979601499026627e-06,
      "loss": 2.3802,
      "step": 60003
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0402153730392456,
      "learning_rate": 2.297697551699957e-06,
      "loss": 2.3024,
      "step": 60004
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1343775987625122,
      "learning_rate": 2.297434966554748e-06,
      "loss": 2.349,
      "step": 60005
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.2954572439193726,
      "learning_rate": 2.297172394467476e-06,
      "loss": 2.1745,
      "step": 60006
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1422556638717651,
      "learning_rate": 2.296909835438592e-06,
      "loss": 2.4073,
      "step": 60007
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0255461931228638,
      "learning_rate": 2.2966472894685355e-06,
      "loss": 2.345,
      "step": 60008
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1142255067825317,
      "learning_rate": 2.296384756557756e-06,
      "loss": 2.5988,
      "step": 60009
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0909067392349243,
      "learning_rate": 2.296122236706697e-06,
      "loss": 2.1308,
      "step": 60010
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0890105962753296,
      "learning_rate": 2.2958597299158004e-06,
      "loss": 2.2511,
      "step": 60011
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0678514242172241,
      "learning_rate": 2.2955972361855174e-06,
      "loss": 2.1732,
      "step": 60012
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0550882816314697,
      "learning_rate": 2.2953347555162863e-06,
      "loss": 2.3169,
      "step": 60013
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.2168958187103271,
      "learning_rate": 2.2950722879085574e-06,
      "loss": 2.6145,
      "step": 60014
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.081127405166626,
      "learning_rate": 2.2948098333627724e-06,
      "loss": 2.3929,
      "step": 60015
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1219607591629028,
      "learning_rate": 2.2945473918793814e-06,
      "loss": 2.233,
      "step": 60016
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.116714358329773,
      "learning_rate": 2.2942849634588204e-06,
      "loss": 2.4652,
      "step": 60017
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0855869054794312,
      "learning_rate": 2.294022548101542e-06,
      "loss": 2.4284,
      "step": 60018
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0197770595550537,
      "learning_rate": 2.293760145807986e-06,
      "loss": 2.3055,
      "step": 60019
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.2587792873382568,
      "learning_rate": 2.293497756578602e-06,
      "loss": 2.0331,
      "step": 60020
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0370246171951294,
      "learning_rate": 2.2932353804138297e-06,
      "loss": 2.3014,
      "step": 60021
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.2045620679855347,
      "learning_rate": 2.2929730173141185e-06,
      "loss": 2.3863,
      "step": 60022
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.2257869243621826,
      "learning_rate": 2.292710667279909e-06,
      "loss": 2.3638,
      "step": 60023
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.963172435760498,
      "learning_rate": 2.292448330311652e-06,
      "loss": 2.4104,
      "step": 60024
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0811387300491333,
      "learning_rate": 2.2921860064097845e-06,
      "loss": 2.3736,
      "step": 60025
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.06307053565979,
      "learning_rate": 2.291923695574758e-06,
      "loss": 2.3596,
      "step": 60026
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0676932334899902,
      "learning_rate": 2.291661397807011e-06,
      "loss": 2.4049,
      "step": 60027
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.139301061630249,
      "learning_rate": 2.2913991131069958e-06,
      "loss": 2.2709,
      "step": 60028
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0839650630950928,
      "learning_rate": 2.2911368414751487e-06,
      "loss": 2.1656,
      "step": 60029
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1121718883514404,
      "learning_rate": 2.290874582911924e-06,
      "loss": 2.0659,
      "step": 60030
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0005218982696533,
      "learning_rate": 2.2906123374177557e-06,
      "loss": 2.3166,
      "step": 60031
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1786561012268066,
      "learning_rate": 2.2903501049930953e-06,
      "loss": 2.302,
      "step": 60032
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0711442232131958,
      "learning_rate": 2.2900878856383836e-06,
      "loss": 2.503,
      "step": 60033
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.064143180847168,
      "learning_rate": 2.2898256793540684e-06,
      "loss": 2.3056,
      "step": 60034
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0410020351409912,
      "learning_rate": 2.2895634861405913e-06,
      "loss": 2.0618,
      "step": 60035
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.9928685426712036,
      "learning_rate": 2.2893013059984005e-06,
      "loss": 2.4287,
      "step": 60036
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.9564870595932007,
      "learning_rate": 2.2890391389279376e-06,
      "loss": 2.3972,
      "step": 60037
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1743727922439575,
      "learning_rate": 2.288776984929646e-06,
      "loss": 2.3814,
      "step": 60038
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1245577335357666,
      "learning_rate": 2.288514844003974e-06,
      "loss": 2.4657,
      "step": 60039
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.192472219467163,
      "learning_rate": 2.2882527161513603e-06,
      "loss": 2.4367,
      "step": 60040
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0885815620422363,
      "learning_rate": 2.287990601372256e-06,
      "loss": 2.2713,
      "step": 60041
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.043831706047058,
      "learning_rate": 2.2877284996671e-06,
      "loss": 2.1545,
      "step": 60042
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0320560932159424,
      "learning_rate": 2.287466411036341e-06,
      "loss": 2.3177,
      "step": 60043
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1084067821502686,
      "learning_rate": 2.2872043354804206e-06,
      "loss": 1.9747,
      "step": 60044
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1202081441879272,
      "learning_rate": 2.286942272999785e-06,
      "loss": 2.2983,
      "step": 60045
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.9943515062332153,
      "learning_rate": 2.2866802235948725e-06,
      "loss": 2.4144,
      "step": 60046
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.036024570465088,
      "learning_rate": 2.2864181872661363e-06,
      "loss": 2.2834,
      "step": 60047
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.2267382144927979,
      "learning_rate": 2.286156164014013e-06,
      "loss": 2.3304,
      "step": 60048
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0418925285339355,
      "learning_rate": 2.285894153838952e-06,
      "loss": 2.3986,
      "step": 60049
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0915154218673706,
      "learning_rate": 2.2856321567413943e-06,
      "loss": 2.1704,
      "step": 60050
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.9393911957740784,
      "learning_rate": 2.285370172721787e-06,
      "loss": 2.1165,
      "step": 60051
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0948069095611572,
      "learning_rate": 2.2851082017805704e-06,
      "loss": 2.4515,
      "step": 60052
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1302114725112915,
      "learning_rate": 2.284846243918193e-06,
      "loss": 2.1802,
      "step": 60053
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.3620473146438599,
      "learning_rate": 2.284584299135094e-06,
      "loss": 2.1295,
      "step": 60054
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.3542132377624512,
      "learning_rate": 2.2843223674317228e-06,
      "loss": 2.4496,
      "step": 60055
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.9611212611198425,
      "learning_rate": 2.2840604488085204e-06,
      "loss": 2.2302,
      "step": 60056
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0513029098510742,
      "learning_rate": 2.2837985432659315e-06,
      "loss": 2.3384,
      "step": 60057
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1754870414733887,
      "learning_rate": 2.2835366508043975e-06,
      "loss": 2.5687,
      "step": 60058
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.179674506187439,
      "learning_rate": 2.2832747714243663e-06,
      "loss": 2.3203,
      "step": 60059
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.2784655094146729,
      "learning_rate": 2.2830129051262783e-06,
      "loss": 2.4358,
      "step": 60060
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.9736359715461731,
      "learning_rate": 2.2827510519105824e-06,
      "loss": 2.435,
      "step": 60061
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1199939250946045,
      "learning_rate": 2.2824892117777153e-06,
      "loss": 2.5658,
      "step": 60062
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.2306727170944214,
      "learning_rate": 2.2822273847281296e-06,
      "loss": 2.307,
      "step": 60063
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0271964073181152,
      "learning_rate": 2.28196557076226e-06,
      "loss": 2.1924,
      "step": 60064
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0187890529632568,
      "learning_rate": 2.281703769880559e-06,
      "loss": 2.3739,
      "step": 60065
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0560414791107178,
      "learning_rate": 2.2814419820834653e-06,
      "loss": 2.3467,
      "step": 60066
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0483577251434326,
      "learning_rate": 2.2811802073714216e-06,
      "loss": 2.155,
      "step": 60067
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1226191520690918,
      "learning_rate": 2.2809184457448773e-06,
      "loss": 2.3868,
      "step": 60068
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.106187105178833,
      "learning_rate": 2.280656697204272e-06,
      "loss": 2.2704,
      "step": 60069
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.078094482421875,
      "learning_rate": 2.2803949617500477e-06,
      "loss": 2.2324,
      "step": 60070
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1171749830245972,
      "learning_rate": 2.2801332393826526e-06,
      "loss": 2.3044,
      "step": 60071
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.096182107925415,
      "learning_rate": 2.2798715301025285e-06,
      "loss": 2.3041,
      "step": 60072
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.2426143884658813,
      "learning_rate": 2.2796098339101167e-06,
      "loss": 2.0493,
      "step": 60073
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1910732984542847,
      "learning_rate": 2.2793481508058658e-06,
      "loss": 2.3542,
      "step": 60074
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.2301877737045288,
      "learning_rate": 2.2790864807902136e-06,
      "loss": 2.3857,
      "step": 60075
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1764615774154663,
      "learning_rate": 2.2788248238636103e-06,
      "loss": 2.3738,
      "step": 60076
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.052044153213501,
      "learning_rate": 2.2785631800264917e-06,
      "loss": 2.4634,
      "step": 60077
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0616307258605957,
      "learning_rate": 2.2783015492793102e-06,
      "loss": 2.3817,
      "step": 60078
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1882940530776978,
      "learning_rate": 2.278039931622501e-06,
      "loss": 2.2576,
      "step": 60079
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1532917022705078,
      "learning_rate": 2.2777783270565135e-06,
      "loss": 2.4226,
      "step": 60080
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.018824577331543,
      "learning_rate": 2.277516735581787e-06,
      "loss": 2.3323,
      "step": 60081
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1425070762634277,
      "learning_rate": 2.2772551571987733e-06,
      "loss": 2.2687,
      "step": 60082
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.2104198932647705,
      "learning_rate": 2.2769935919079034e-06,
      "loss": 2.4311,
      "step": 60083
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0881593227386475,
      "learning_rate": 2.2767320397096294e-06,
      "loss": 2.2432,
      "step": 60084
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.120606541633606,
      "learning_rate": 2.2764705006043896e-06,
      "loss": 2.6136,
      "step": 60085
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1913368701934814,
      "learning_rate": 2.276208974592633e-06,
      "loss": 2.5217,
      "step": 60086
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1771632432937622,
      "learning_rate": 2.2759474616747966e-06,
      "loss": 2.3302,
      "step": 60087
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0152944326400757,
      "learning_rate": 2.2756859618513307e-06,
      "loss": 2.1128,
      "step": 60088
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1377512216567993,
      "learning_rate": 2.2754244751226705e-06,
      "loss": 2.277,
      "step": 60089
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.947654664516449,
      "learning_rate": 2.275163001489268e-06,
      "loss": 2.3878,
      "step": 60090
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1176586151123047,
      "learning_rate": 2.2749015409515595e-06,
      "loss": 2.2885,
      "step": 60091
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0315303802490234,
      "learning_rate": 2.2746400935099933e-06,
      "loss": 2.2003,
      "step": 60092
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.058100938796997,
      "learning_rate": 2.274378659165011e-06,
      "loss": 2.2681,
      "step": 60093
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0570861101150513,
      "learning_rate": 2.2741172379170518e-06,
      "loss": 2.3249,
      "step": 60094
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5206815004348755,
      "learning_rate": 2.2738558297665645e-06,
      "loss": 2.5569,
      "step": 60095
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1523340940475464,
      "learning_rate": 2.2735944347139904e-06,
      "loss": 2.3645,
      "step": 60096
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0797889232635498,
      "learning_rate": 2.27333305275977e-06,
      "loss": 1.9308,
      "step": 60097
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0923341512680054,
      "learning_rate": 2.273071683904351e-06,
      "loss": 2.4505,
      "step": 60098
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0334151983261108,
      "learning_rate": 2.272810328148174e-06,
      "loss": 2.3605,
      "step": 60099
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0066388845443726,
      "learning_rate": 2.2725489854916793e-06,
      "loss": 2.3127,
      "step": 60100
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0945080518722534,
      "learning_rate": 2.272287655935316e-06,
      "loss": 2.3545,
      "step": 60101
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1435301303863525,
      "learning_rate": 2.2720263394795205e-06,
      "loss": 2.4276,
      "step": 60102
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0869616270065308,
      "learning_rate": 2.2717650361247423e-06,
      "loss": 2.2297,
      "step": 60103
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.19529390335083,
      "learning_rate": 2.2715037458714183e-06,
      "loss": 2.3206,
      "step": 60104
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.3651028871536255,
      "learning_rate": 2.2712424687199986e-06,
      "loss": 2.3294,
      "step": 60105
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1633400917053223,
      "learning_rate": 2.270981204670918e-06,
      "loss": 2.3358,
      "step": 60106
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.07124924659729,
      "learning_rate": 2.270719953724626e-06,
      "loss": 2.0722,
      "step": 60107
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1773128509521484,
      "learning_rate": 2.2704587158815605e-06,
      "loss": 2.3297,
      "step": 60108
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.4157568216323853,
      "learning_rate": 2.270197491142173e-06,
      "loss": 2.388,
      "step": 60109
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.048298716545105,
      "learning_rate": 2.2699362795068934e-06,
      "loss": 2.242,
      "step": 60110
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1656150817871094,
      "learning_rate": 2.269675080976175e-06,
      "loss": 2.275,
      "step": 60111
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1065196990966797,
      "learning_rate": 2.269413895550454e-06,
      "loss": 2.3542,
      "step": 60112
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0668509006500244,
      "learning_rate": 2.269152723230178e-06,
      "loss": 2.26,
      "step": 60113
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1453598737716675,
      "learning_rate": 2.268891564015786e-06,
      "loss": 2.1383,
      "step": 60114
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.062893033027649,
      "learning_rate": 2.2686304179077244e-06,
      "loss": 2.234,
      "step": 60115
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.9607584476470947,
      "learning_rate": 2.268369284906432e-06,
      "loss": 2.2572,
      "step": 60116
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1615781784057617,
      "learning_rate": 2.268108165012356e-06,
      "loss": 2.2822,
      "step": 60117
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1236099004745483,
      "learning_rate": 2.2678470582259337e-06,
      "loss": 2.2137,
      "step": 60118
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.2262918949127197,
      "learning_rate": 2.267585964547614e-06,
      "loss": 2.3829,
      "step": 60119
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.072994351387024,
      "learning_rate": 2.2673248839778327e-06,
      "loss": 2.1722,
      "step": 60120
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1517202854156494,
      "learning_rate": 2.26706381651704e-06,
      "loss": 2.3378,
      "step": 60121
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1485952138900757,
      "learning_rate": 2.266802762165673e-06,
      "loss": 2.2677,
      "step": 60122
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0631543397903442,
      "learning_rate": 2.2665417209241756e-06,
      "loss": 2.244,
      "step": 60123
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0064836740493774,
      "learning_rate": 2.266280692792988e-06,
      "loss": 2.2572,
      "step": 60124
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.209822416305542,
      "learning_rate": 2.2660196777725585e-06,
      "loss": 2.2643,
      "step": 60125
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0571821928024292,
      "learning_rate": 2.265758675863322e-06,
      "loss": 2.3339,
      "step": 60126
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.3996105194091797,
      "learning_rate": 2.2654976870657297e-06,
      "loss": 2.2445,
      "step": 60127
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1140040159225464,
      "learning_rate": 2.2652367113802176e-06,
      "loss": 2.2801,
      "step": 60128
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0810737609863281,
      "learning_rate": 2.2649757488072288e-06,
      "loss": 2.3325,
      "step": 60129
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0335865020751953,
      "learning_rate": 2.2647147993472087e-06,
      "loss": 2.4201,
      "step": 60130
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1425737142562866,
      "learning_rate": 2.264453863000595e-06,
      "loss": 2.1525,
      "step": 60131
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.2080339193344116,
      "learning_rate": 2.2641929397678364e-06,
      "loss": 2.2913,
      "step": 60132
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1935744285583496,
      "learning_rate": 2.263932029649368e-06,
      "loss": 2.2958,
      "step": 60133
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1672362089157104,
      "learning_rate": 2.2636711326456406e-06,
      "loss": 2.4829,
      "step": 60134
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.144317388534546,
      "learning_rate": 2.2634102487570896e-06,
      "loss": 2.3052,
      "step": 60135
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5262775421142578,
      "learning_rate": 2.2631493779841606e-06,
      "loss": 2.1024,
      "step": 60136
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.151974081993103,
      "learning_rate": 2.262888520327292e-06,
      "loss": 2.2556,
      "step": 60137
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1090010404586792,
      "learning_rate": 2.2626276757869316e-06,
      "loss": 2.4182,
      "step": 60138
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1438199281692505,
      "learning_rate": 2.262366844363516e-06,
      "loss": 2.1756,
      "step": 60139
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.026047945022583,
      "learning_rate": 2.2621060260574923e-06,
      "loss": 2.3774,
      "step": 60140
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1309269666671753,
      "learning_rate": 2.2618452208692974e-06,
      "loss": 2.1739,
      "step": 60141
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1723299026489258,
      "learning_rate": 2.2615844287993805e-06,
      "loss": 2.3306,
      "step": 60142
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.056352138519287,
      "learning_rate": 2.261323649848176e-06,
      "loss": 2.2913,
      "step": 60143
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.102199912071228,
      "learning_rate": 2.261062884016133e-06,
      "loss": 2.0231,
      "step": 60144
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0621455907821655,
      "learning_rate": 2.2608021313036865e-06,
      "loss": 2.1164,
      "step": 60145
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.405145287513733,
      "learning_rate": 2.2605413917112863e-06,
      "loss": 2.2445,
      "step": 60146
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1262706518173218,
      "learning_rate": 2.260280665239367e-06,
      "loss": 2.5626,
      "step": 60147
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0509088039398193,
      "learning_rate": 2.2600199518883805e-06,
      "loss": 2.2651,
      "step": 60148
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0265809297561646,
      "learning_rate": 2.2597592516587554e-06,
      "loss": 2.5057,
      "step": 60149
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.04335618019104,
      "learning_rate": 2.2594985645509437e-06,
      "loss": 2.3771,
      "step": 60150
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0973482131958008,
      "learning_rate": 2.259237890565382e-06,
      "loss": 2.3991,
      "step": 60151
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1215728521347046,
      "learning_rate": 2.258977229702517e-06,
      "loss": 2.3604,
      "step": 60152
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.2296782732009888,
      "learning_rate": 2.2587165819627855e-06,
      "loss": 2.3873,
      "step": 60153
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0107944011688232,
      "learning_rate": 2.258455947346634e-06,
      "loss": 2.4326,
      "step": 60154
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.161988377571106,
      "learning_rate": 2.258195325854503e-06,
      "loss": 2.3434,
      "step": 60155
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.086237907409668,
      "learning_rate": 2.2579347174868306e-06,
      "loss": 2.2904,
      "step": 60156
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0845810174942017,
      "learning_rate": 2.2576741222440644e-06,
      "loss": 2.3932,
      "step": 60157
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0000920295715332,
      "learning_rate": 2.2574135401266405e-06,
      "loss": 2.4416,
      "step": 60158
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.003503441810608,
      "learning_rate": 2.257152971135006e-06,
      "loss": 2.2645,
      "step": 60159
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.141022801399231,
      "learning_rate": 2.256892415269597e-06,
      "loss": 2.1086,
      "step": 60160
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1775271892547607,
      "learning_rate": 2.2566318725308624e-06,
      "loss": 2.1155,
      "step": 60161
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.3838682174682617,
      "learning_rate": 2.2563713429192392e-06,
      "loss": 2.4132,
      "step": 60162
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1918970346450806,
      "learning_rate": 2.2561108264351695e-06,
      "loss": 2.3661,
      "step": 60163
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1757268905639648,
      "learning_rate": 2.255850323079092e-06,
      "loss": 2.2013,
      "step": 60164
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.091233253479004,
      "learning_rate": 2.255589832851455e-06,
      "loss": 2.3154,
      "step": 60165
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0345631837844849,
      "learning_rate": 2.2553293557526935e-06,
      "loss": 2.499,
      "step": 60166
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1473885774612427,
      "learning_rate": 2.2550688917832553e-06,
      "loss": 2.1413,
      "step": 60167
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0383421182632446,
      "learning_rate": 2.2548084409435753e-06,
      "loss": 1.9976,
      "step": 60168
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1247621774673462,
      "learning_rate": 2.254548003234103e-06,
      "loss": 2.1985,
      "step": 60169
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1123350858688354,
      "learning_rate": 2.254287578655271e-06,
      "loss": 2.1549,
      "step": 60170
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.017582893371582,
      "learning_rate": 2.2540271672075285e-06,
      "loss": 2.1941,
      "step": 60171
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.049522042274475,
      "learning_rate": 2.2537667688913112e-06,
      "loss": 2.3729,
      "step": 60172
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0154250860214233,
      "learning_rate": 2.2535063837070646e-06,
      "loss": 2.1163,
      "step": 60173
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.785184383392334,
      "learning_rate": 2.25324601165523e-06,
      "loss": 2.4853,
      "step": 60174
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.137467622756958,
      "learning_rate": 2.252985652736247e-06,
      "loss": 2.339,
      "step": 60175
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1132378578186035,
      "learning_rate": 2.252725306950555e-06,
      "loss": 2.3503,
      "step": 60176
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0158438682556152,
      "learning_rate": 2.2524649742986005e-06,
      "loss": 2.3089,
      "step": 60177
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0308579206466675,
      "learning_rate": 2.2522046547808186e-06,
      "loss": 2.2052,
      "step": 60178
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.9928972721099854,
      "learning_rate": 2.2519443483976578e-06,
      "loss": 2.3059,
      "step": 60179
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0863920450210571,
      "learning_rate": 2.2516840551495524e-06,
      "loss": 2.2682,
      "step": 60180
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0707592964172363,
      "learning_rate": 2.2514237750369494e-06,
      "loss": 2.27,
      "step": 60181
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0120790004730225,
      "learning_rate": 2.2511635080602845e-06,
      "loss": 2.2502,
      "step": 60182
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0551621913909912,
      "learning_rate": 2.250903254220006e-06,
      "loss": 2.4311,
      "step": 60183
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0315284729003906,
      "learning_rate": 2.25064301351655e-06,
      "loss": 2.4104,
      "step": 60184
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0925339460372925,
      "learning_rate": 2.2503827859503568e-06,
      "loss": 2.3717,
      "step": 60185
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.084238052368164,
      "learning_rate": 2.2501225715218723e-06,
      "loss": 2.3068,
      "step": 60186
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0353844165802002,
      "learning_rate": 2.2498623702315313e-06,
      "loss": 2.4924,
      "step": 60187
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0680220127105713,
      "learning_rate": 2.249602182079782e-06,
      "loss": 2.1964,
      "step": 60188
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0835617780685425,
      "learning_rate": 2.249342007067061e-06,
      "loss": 2.229,
      "step": 60189
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1903661489486694,
      "learning_rate": 2.249081845193811e-06,
      "loss": 2.4553,
      "step": 60190
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.989678680896759,
      "learning_rate": 2.2488216964604703e-06,
      "loss": 1.9236,
      "step": 60191
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1106324195861816,
      "learning_rate": 2.248561560867484e-06,
      "loss": 2.3008,
      "step": 60192
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.16584050655365,
      "learning_rate": 2.248301438415289e-06,
      "loss": 2.3719,
      "step": 60193
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.2207897901535034,
      "learning_rate": 2.24804132910433e-06,
      "loss": 2.2213,
      "step": 60194
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.2120110988616943,
      "learning_rate": 2.2477812329350447e-06,
      "loss": 2.3651,
      "step": 60195
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0986629724502563,
      "learning_rate": 2.2475211499078775e-06,
      "loss": 2.2753,
      "step": 60196
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0713160037994385,
      "learning_rate": 2.2472610800232654e-06,
      "loss": 2.3571,
      "step": 60197
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.142267107963562,
      "learning_rate": 2.247001023281654e-06,
      "loss": 2.1605,
      "step": 60198
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0316622257232666,
      "learning_rate": 2.2467409796834795e-06,
      "loss": 2.4351,
      "step": 60199
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.2393639087677002,
      "learning_rate": 2.246480949229186e-06,
      "loss": 2.4472,
      "step": 60200
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.9456512928009033,
      "learning_rate": 2.246220931919214e-06,
      "loss": 2.275,
      "step": 60201
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.9833849668502808,
      "learning_rate": 2.245960927754004e-06,
      "loss": 2.3433,
      "step": 60202
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1506824493408203,
      "learning_rate": 2.245700936733992e-06,
      "loss": 2.4422,
      "step": 60203
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.005082368850708,
      "learning_rate": 2.245440958859627e-06,
      "loss": 2.2605,
      "step": 60204
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0531796216964722,
      "learning_rate": 2.245180994131343e-06,
      "loss": 2.4082,
      "step": 60205
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1637654304504395,
      "learning_rate": 2.244921042549585e-06,
      "loss": 2.251,
      "step": 60206
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1129218339920044,
      "learning_rate": 2.24466110411479e-06,
      "loss": 2.3745,
      "step": 60207
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.2238482236862183,
      "learning_rate": 2.2444011788274033e-06,
      "loss": 2.3597,
      "step": 60208
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1349180936813354,
      "learning_rate": 2.2441412666878604e-06,
      "loss": 2.3784,
      "step": 60209
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.2336713075637817,
      "learning_rate": 2.2438813676966076e-06,
      "loss": 2.2354,
      "step": 60210
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.174526333808899,
      "learning_rate": 2.2436214818540823e-06,
      "loss": 2.2829,
      "step": 60211
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.04445481300354,
      "learning_rate": 2.2433616091607215e-06,
      "loss": 2.211,
      "step": 60212
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1414330005645752,
      "learning_rate": 2.2431017496169738e-06,
      "loss": 2.2714,
      "step": 60213
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1297913789749146,
      "learning_rate": 2.242841903223274e-06,
      "loss": 2.3053,
      "step": 60214
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.101210594177246,
      "learning_rate": 2.2425820699800616e-06,
      "loss": 2.2442,
      "step": 60215
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1072611808776855,
      "learning_rate": 2.2423222498877827e-06,
      "loss": 2.2689,
      "step": 60216
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1767160892486572,
      "learning_rate": 2.242062442946874e-06,
      "loss": 2.5871,
      "step": 60217
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.9975820183753967,
      "learning_rate": 2.241802649157774e-06,
      "loss": 2.0272,
      "step": 60218
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.2880421876907349,
      "learning_rate": 2.2415428685209284e-06,
      "loss": 2.1783,
      "step": 60219
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0397015810012817,
      "learning_rate": 2.241283101036772e-06,
      "loss": 2.3574,
      "step": 60220
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.2839871644973755,
      "learning_rate": 2.241023346705751e-06,
      "loss": 2.5442,
      "step": 60221
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0503464937210083,
      "learning_rate": 2.240763605528299e-06,
      "loss": 2.2576,
      "step": 60222
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1026926040649414,
      "learning_rate": 2.2405038775048638e-06,
      "loss": 2.1976,
      "step": 60223
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0233210325241089,
      "learning_rate": 2.2402441626358783e-06,
      "loss": 2.2672,
      "step": 60224
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1360995769500732,
      "learning_rate": 2.23998446092179e-06,
      "loss": 2.3097,
      "step": 60225
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.2342027425765991,
      "learning_rate": 2.2397247723630332e-06,
      "loss": 2.4357,
      "step": 60226
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0777291059494019,
      "learning_rate": 2.239465096960055e-06,
      "loss": 2.3026,
      "step": 60227
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.099963903427124,
      "learning_rate": 2.239205434713286e-06,
      "loss": 2.3187,
      "step": 60228
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0877236127853394,
      "learning_rate": 2.2389457856231755e-06,
      "loss": 2.305,
      "step": 60229
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1196428537368774,
      "learning_rate": 2.2386861496901556e-06,
      "loss": 2.1472,
      "step": 60230
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.9683874845504761,
      "learning_rate": 2.2384265269146744e-06,
      "loss": 2.2537,
      "step": 60231
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1399946212768555,
      "learning_rate": 2.2381669172971643e-06,
      "loss": 2.3754,
      "step": 60232
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.9538545608520508,
      "learning_rate": 2.237907320838073e-06,
      "loss": 2.3086,
      "step": 60233
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.184489369392395,
      "learning_rate": 2.237647737537835e-06,
      "loss": 2.4772,
      "step": 60234
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0624748468399048,
      "learning_rate": 2.2373881673968933e-06,
      "loss": 2.5044,
      "step": 60235
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.221476435661316,
      "learning_rate": 2.237128610415685e-06,
      "loss": 2.1734,
      "step": 60236
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1688356399536133,
      "learning_rate": 2.2368690665946548e-06,
      "loss": 2.4096,
      "step": 60237
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.063568115234375,
      "learning_rate": 2.236609535934238e-06,
      "loss": 2.2938,
      "step": 60238
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1584080457687378,
      "learning_rate": 2.2363500184348774e-06,
      "loss": 2.2219,
      "step": 60239
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0750840902328491,
      "learning_rate": 2.2360905140970134e-06,
      "loss": 2.0583,
      "step": 60240
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5909446477890015,
      "learning_rate": 2.2358310229210843e-06,
      "loss": 2.356,
      "step": 60241
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.9492494463920593,
      "learning_rate": 2.2355715449075276e-06,
      "loss": 2.3488,
      "step": 60242
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.2001354694366455,
      "learning_rate": 2.2353120800567895e-06,
      "loss": 2.269,
      "step": 60243
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0408778190612793,
      "learning_rate": 2.235052628369303e-06,
      "loss": 2.1722,
      "step": 60244
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0864967107772827,
      "learning_rate": 2.2347931898455133e-06,
      "loss": 2.38,
      "step": 60245
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0149286985397339,
      "learning_rate": 2.234533764485859e-06,
      "loss": 2.2972,
      "step": 60246
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1790878772735596,
      "learning_rate": 2.2342743522907763e-06,
      "loss": 2.4009,
      "step": 60247
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1513328552246094,
      "learning_rate": 2.234014953260709e-06,
      "loss": 2.4762,
      "step": 60248
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0477392673492432,
      "learning_rate": 2.233755567396094e-06,
      "loss": 2.6293,
      "step": 60249
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1766117811203003,
      "learning_rate": 2.2334961946973756e-06,
      "loss": 2.343,
      "step": 60250
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1050227880477905,
      "learning_rate": 2.233236835164988e-06,
      "loss": 2.3698,
      "step": 60251
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1162614822387695,
      "learning_rate": 2.2329774887993748e-06,
      "loss": 2.3397,
      "step": 60252
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0722601413726807,
      "learning_rate": 2.2327181556009726e-06,
      "loss": 2.5761,
      "step": 60253
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0735113620758057,
      "learning_rate": 2.232458835570228e-06,
      "loss": 2.2841,
      "step": 60254
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0857819318771362,
      "learning_rate": 2.2321995287075694e-06,
      "loss": 2.3322,
      "step": 60255
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.2424099445343018,
      "learning_rate": 2.2319402350134455e-06,
      "loss": 2.3908,
      "step": 60256
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1748374700546265,
      "learning_rate": 2.23168095448829e-06,
      "loss": 2.5388,
      "step": 60257
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0536235570907593,
      "learning_rate": 2.231421687132548e-06,
      "loss": 2.5302,
      "step": 60258
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0530210733413696,
      "learning_rate": 2.231162432946653e-06,
      "loss": 2.3806,
      "step": 60259
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0657082796096802,
      "learning_rate": 2.2309031919310496e-06,
      "loss": 2.3399,
      "step": 60260
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.2126274108886719,
      "learning_rate": 2.2306439640861734e-06,
      "loss": 2.1574,
      "step": 60261
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.9930172562599182,
      "learning_rate": 2.2303847494124685e-06,
      "loss": 2.2745,
      "step": 60262
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0967605113983154,
      "learning_rate": 2.230125547910369e-06,
      "loss": 2.277,
      "step": 60263
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.106579303741455,
      "learning_rate": 2.2298663595803206e-06,
      "loss": 2.2731,
      "step": 60264
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.051245927810669,
      "learning_rate": 2.229607184422755e-06,
      "loss": 2.4313,
      "step": 60265
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0720722675323486,
      "learning_rate": 2.2293480224381192e-06,
      "loss": 2.531,
      "step": 60266
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1008964776992798,
      "learning_rate": 2.2290888736268477e-06,
      "loss": 2.2824,
      "step": 60267
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.094329595565796,
      "learning_rate": 2.228829737989383e-06,
      "loss": 2.455,
      "step": 60268
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1431857347488403,
      "learning_rate": 2.228570615526159e-06,
      "loss": 2.2925,
      "step": 60269
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.060451626777649,
      "learning_rate": 2.22831150623762e-06,
      "loss": 2.5501,
      "step": 60270
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1115232706069946,
      "learning_rate": 2.228052410124203e-06,
      "loss": 2.121,
      "step": 60271
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.177522897720337,
      "learning_rate": 2.2277933271863495e-06,
      "loss": 2.4224,
      "step": 60272
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0844718217849731,
      "learning_rate": 2.2275342574244973e-06,
      "loss": 2.4992,
      "step": 60273
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0435259342193604,
      "learning_rate": 2.227275200839083e-06,
      "loss": 2.472,
      "step": 60274
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0528696775436401,
      "learning_rate": 2.2270161574305503e-06,
      "loss": 2.2537,
      "step": 60275
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0803436040878296,
      "learning_rate": 2.226757127199335e-06,
      "loss": 2.0251,
      "step": 60276
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1580606698989868,
      "learning_rate": 2.2264981101458793e-06,
      "loss": 2.5089,
      "step": 60277
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0965044498443604,
      "learning_rate": 2.226239106270618e-06,
      "loss": 2.221,
      "step": 60278
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1391764879226685,
      "learning_rate": 2.2259801155739956e-06,
      "loss": 2.2654,
      "step": 60279
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1009312868118286,
      "learning_rate": 2.225721138056448e-06,
      "loss": 2.3916,
      "step": 60280
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0181243419647217,
      "learning_rate": 2.2254621737184157e-06,
      "loss": 2.3414,
      "step": 60281
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.2060189247131348,
      "learning_rate": 2.2252032225603326e-06,
      "loss": 2.3934,
      "step": 60282
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1397099494934082,
      "learning_rate": 2.224944284582644e-06,
      "loss": 2.2883,
      "step": 60283
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.007826566696167,
      "learning_rate": 2.2246853597857853e-06,
      "loss": 2.1803,
      "step": 60284
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0755528211593628,
      "learning_rate": 2.224426448170198e-06,
      "loss": 2.2304,
      "step": 60285
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.9572230577468872,
      "learning_rate": 2.224167549736318e-06,
      "loss": 2.3196,
      "step": 60286
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1737823486328125,
      "learning_rate": 2.2239086644845876e-06,
      "loss": 2.4035,
      "step": 60287
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0997955799102783,
      "learning_rate": 2.223649792415442e-06,
      "loss": 2.2787,
      "step": 60288
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.2074023485183716,
      "learning_rate": 2.2233909335293247e-06,
      "loss": 2.3239,
      "step": 60289
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.226284146308899,
      "learning_rate": 2.223132087826668e-06,
      "loss": 2.181,
      "step": 60290
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.059917688369751,
      "learning_rate": 2.222873255307919e-06,
      "loss": 2.4116,
      "step": 60291
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.3777649402618408,
      "learning_rate": 2.222614435973509e-06,
      "loss": 2.3149,
      "step": 60292
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.105130672454834,
      "learning_rate": 2.2223556298238848e-06,
      "loss": 2.2539,
      "step": 60293
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1403402090072632,
      "learning_rate": 2.222096836859474e-06,
      "loss": 2.5151,
      "step": 60294
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1843419075012207,
      "learning_rate": 2.2218380570807264e-06,
      "loss": 2.2744,
      "step": 60295
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1026958227157593,
      "learning_rate": 2.2215792904880706e-06,
      "loss": 2.3273,
      "step": 60296
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1397782564163208,
      "learning_rate": 2.2213205370819548e-06,
      "loss": 2.5345,
      "step": 60297
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0996187925338745,
      "learning_rate": 2.22106179686281e-06,
      "loss": 2.3252,
      "step": 60298
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0825717449188232,
      "learning_rate": 2.220803069831081e-06,
      "loss": 2.3549,
      "step": 60299
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1631940603256226,
      "learning_rate": 2.220544355987202e-06,
      "loss": 2.1702,
      "step": 60300
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1032800674438477,
      "learning_rate": 2.220285655331614e-06,
      "loss": 2.2351,
      "step": 60301
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1624637842178345,
      "learning_rate": 2.220026967864756e-06,
      "loss": 2.5105,
      "step": 60302
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1631876230239868,
      "learning_rate": 2.2197682935870624e-06,
      "loss": 2.3303,
      "step": 60303
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0253283977508545,
      "learning_rate": 2.2195096324989773e-06,
      "loss": 2.3529,
      "step": 60304
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.105159044265747,
      "learning_rate": 2.2192509846009336e-06,
      "loss": 2.332,
      "step": 60305
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1969021558761597,
      "learning_rate": 2.2189923498933753e-06,
      "loss": 2.3227,
      "step": 60306
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0499602556228638,
      "learning_rate": 2.2187337283767386e-06,
      "loss": 2.2815,
      "step": 60307
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0563455820083618,
      "learning_rate": 2.218475120051462e-06,
      "loss": 2.3618,
      "step": 60308
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1222819089889526,
      "learning_rate": 2.21821652491798e-06,
      "loss": 2.373,
      "step": 60309
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0728310346603394,
      "learning_rate": 2.2179579429767373e-06,
      "loss": 2.261,
      "step": 60310
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0639950037002563,
      "learning_rate": 2.217699374228167e-06,
      "loss": 2.4572,
      "step": 60311
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.168601632118225,
      "learning_rate": 2.2174408186727125e-06,
      "loss": 2.1874,
      "step": 60312
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0442163944244385,
      "learning_rate": 2.2171822763108062e-06,
      "loss": 2.3379,
      "step": 60313
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1539729833602905,
      "learning_rate": 2.2169237471428938e-06,
      "loss": 2.4073,
      "step": 60314
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.229924201965332,
      "learning_rate": 2.216665231169406e-06,
      "loss": 2.116,
      "step": 60315
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0199017524719238,
      "learning_rate": 2.2164067283907875e-06,
      "loss": 2.2,
      "step": 60316
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0378059148788452,
      "learning_rate": 2.2161482388074707e-06,
      "loss": 2.4691,
      "step": 60317
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0010476112365723,
      "learning_rate": 2.2158897624198994e-06,
      "loss": 2.4888,
      "step": 60318
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1299599409103394,
      "learning_rate": 2.215631299228509e-06,
      "loss": 2.5406,
      "step": 60319
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.021661639213562,
      "learning_rate": 2.215372849233738e-06,
      "loss": 2.2845,
      "step": 60320
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0925350189208984,
      "learning_rate": 2.215114412436021e-06,
      "loss": 2.2622,
      "step": 60321
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.9757639765739441,
      "learning_rate": 2.2148559888358036e-06,
      "loss": 2.374,
      "step": 60322
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0069814920425415,
      "learning_rate": 2.214597578433515e-06,
      "loss": 2.2594,
      "step": 60323
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.031838059425354,
      "learning_rate": 2.214339181229602e-06,
      "loss": 2.3774,
      "step": 60324
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0859596729278564,
      "learning_rate": 2.214080797224496e-06,
      "loss": 2.2573,
      "step": 60325
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0136127471923828,
      "learning_rate": 2.2138224264186402e-06,
      "loss": 2.4334,
      "step": 60326
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0724682807922363,
      "learning_rate": 2.213564068812467e-06,
      "loss": 2.2241,
      "step": 60327
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0127208232879639,
      "learning_rate": 2.2133057244064204e-06,
      "loss": 2.5195,
      "step": 60328
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.086111068725586,
      "learning_rate": 2.2130473932009354e-06,
      "loss": 2.3949,
      "step": 60329
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1082736253738403,
      "learning_rate": 2.2127890751964478e-06,
      "loss": 2.3043,
      "step": 60330
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1024192571640015,
      "learning_rate": 2.2125307703933994e-06,
      "loss": 2.268,
      "step": 60331
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0882198810577393,
      "learning_rate": 2.2122724787922234e-06,
      "loss": 2.5209,
      "step": 60332
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.150262475013733,
      "learning_rate": 2.2120142003933642e-06,
      "loss": 2.2889,
      "step": 60333
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0537582635879517,
      "learning_rate": 2.2117559351972563e-06,
      "loss": 2.338,
      "step": 60334
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0036553144454956,
      "learning_rate": 2.211497683204338e-06,
      "loss": 1.9814,
      "step": 60335
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0678001642227173,
      "learning_rate": 2.211239444415042e-06,
      "loss": 2.1659,
      "step": 60336
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5575370788574219,
      "learning_rate": 2.210981218829814e-06,
      "loss": 2.3142,
      "step": 60337
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.081666350364685,
      "learning_rate": 2.210723006449086e-06,
      "loss": 2.3332,
      "step": 60338
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0567573308944702,
      "learning_rate": 2.2104648072733005e-06,
      "loss": 2.2286,
      "step": 60339
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.167432188987732,
      "learning_rate": 2.2102066213028907e-06,
      "loss": 2.2321,
      "step": 60340
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1300933361053467,
      "learning_rate": 2.2099484485382973e-06,
      "loss": 2.2617,
      "step": 60341
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.159432291984558,
      "learning_rate": 2.2096902889799554e-06,
      "loss": 2.4868,
      "step": 60342
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4344840049743652,
      "learning_rate": 2.2094321426283073e-06,
      "loss": 2.5867,
      "step": 60343
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1161750555038452,
      "learning_rate": 2.209174009483784e-06,
      "loss": 2.3514,
      "step": 60344
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.2643766403198242,
      "learning_rate": 2.208915889546831e-06,
      "loss": 2.4393,
      "step": 60345
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1431207656860352,
      "learning_rate": 2.2086577828178803e-06,
      "loss": 2.5031,
      "step": 60346
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1021101474761963,
      "learning_rate": 2.208399689297371e-06,
      "loss": 2.2699,
      "step": 60347
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4801050424575806,
      "learning_rate": 2.208141608985738e-06,
      "loss": 2.4104,
      "step": 60348
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.071507453918457,
      "learning_rate": 2.207883541883423e-06,
      "loss": 2.2403,
      "step": 60349
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1513923406600952,
      "learning_rate": 2.20762548799086e-06,
      "loss": 2.4573,
      "step": 60350
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.2014265060424805,
      "learning_rate": 2.2073674473084907e-06,
      "loss": 2.4505,
      "step": 60351
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0960637331008911,
      "learning_rate": 2.207109419836747e-06,
      "loss": 2.2913,
      "step": 60352
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1575978994369507,
      "learning_rate": 2.2068514055760724e-06,
      "loss": 2.2298,
      "step": 60353
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0685571432113647,
      "learning_rate": 2.2065934045268977e-06,
      "loss": 2.6256,
      "step": 60354
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.093817949295044,
      "learning_rate": 2.206335416689668e-06,
      "loss": 2.4812,
      "step": 60355
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.9435341954231262,
      "learning_rate": 2.2060774420648125e-06,
      "loss": 2.2589,
      "step": 60356
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1081795692443848,
      "learning_rate": 2.2058194806527766e-06,
      "loss": 2.3229,
      "step": 60357
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4437224864959717,
      "learning_rate": 2.2055615324539926e-06,
      "loss": 2.3588,
      "step": 60358
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.042623519897461,
      "learning_rate": 2.2053035974688985e-06,
      "loss": 2.3534,
      "step": 60359
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0965094566345215,
      "learning_rate": 2.20504567569793e-06,
      "loss": 2.1562,
      "step": 60360
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1768232583999634,
      "learning_rate": 2.2047877671415276e-06,
      "loss": 2.2556,
      "step": 60361
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1100682020187378,
      "learning_rate": 2.204529871800125e-06,
      "loss": 2.312,
      "step": 60362
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.2133886814117432,
      "learning_rate": 2.2042719896741647e-06,
      "loss": 2.6061,
      "step": 60363
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0523347854614258,
      "learning_rate": 2.2040141207640796e-06,
      "loss": 2.1498,
      "step": 60364
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.390244483947754,
      "learning_rate": 2.203756265070307e-06,
      "loss": 2.2667,
      "step": 60365
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.3515145778656006,
      "learning_rate": 2.2034984225932866e-06,
      "loss": 2.5304,
      "step": 60366
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0400359630584717,
      "learning_rate": 2.203240593333451e-06,
      "loss": 2.3883,
      "step": 60367
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.101881504058838,
      "learning_rate": 2.2029827772912426e-06,
      "loss": 2.287,
      "step": 60368
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1955251693725586,
      "learning_rate": 2.202724974467094e-06,
      "loss": 2.3415,
      "step": 60369
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1987485885620117,
      "learning_rate": 2.2024671848614465e-06,
      "loss": 2.3207,
      "step": 60370
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0658800601959229,
      "learning_rate": 2.2022094084747324e-06,
      "loss": 2.3243,
      "step": 60371
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.050904631614685,
      "learning_rate": 2.2019516453073964e-06,
      "loss": 2.6287,
      "step": 60372
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0741535425186157,
      "learning_rate": 2.2016938953598653e-06,
      "loss": 2.1892,
      "step": 60373
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0821492671966553,
      "learning_rate": 2.201436158632584e-06,
      "loss": 2.4286,
      "step": 60374
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0412158966064453,
      "learning_rate": 2.201178435125982e-06,
      "loss": 2.4037,
      "step": 60375
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.2805432081222534,
      "learning_rate": 2.200920724840505e-06,
      "loss": 2.2995,
      "step": 60376
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0564182996749878,
      "learning_rate": 2.200663027776583e-06,
      "loss": 2.2687,
      "step": 60377
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0115700960159302,
      "learning_rate": 2.2004053439346582e-06,
      "loss": 2.2615,
      "step": 60378
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.11935555934906,
      "learning_rate": 2.200147673315162e-06,
      "loss": 2.209,
      "step": 60379
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1697067022323608,
      "learning_rate": 2.1998900159185354e-06,
      "loss": 2.3151,
      "step": 60380
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.041070818901062,
      "learning_rate": 2.199632371745212e-06,
      "loss": 2.5213,
      "step": 60381
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1873141527175903,
      "learning_rate": 2.1993747407956324e-06,
      "loss": 2.188,
      "step": 60382
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.053586483001709,
      "learning_rate": 2.199117123070229e-06,
      "loss": 2.355,
      "step": 60383
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1766871213912964,
      "learning_rate": 2.1988595185694426e-06,
      "loss": 2.2619,
      "step": 60384
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1741230487823486,
      "learning_rate": 2.1986019272937086e-06,
      "loss": 2.2621,
      "step": 60385
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1731816530227661,
      "learning_rate": 2.198344349243464e-06,
      "loss": 2.3012,
      "step": 60386
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0620830059051514,
      "learning_rate": 2.1980867844191412e-06,
      "loss": 2.451,
      "step": 60387
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.218170166015625,
      "learning_rate": 2.1978292328211825e-06,
      "loss": 2.3903,
      "step": 60388
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.033727765083313,
      "learning_rate": 2.19757169445002e-06,
      "loss": 2.3359,
      "step": 60389
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.9880117774009705,
      "learning_rate": 2.1973141693060963e-06,
      "loss": 2.2931,
      "step": 60390
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.646493673324585,
      "learning_rate": 2.1970566573898434e-06,
      "loss": 2.2208,
      "step": 60391
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0358383655548096,
      "learning_rate": 2.1967991587016956e-06,
      "loss": 2.559,
      "step": 60392
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1313393115997314,
      "learning_rate": 2.196541673242095e-06,
      "loss": 2.2523,
      "step": 60393
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0588810443878174,
      "learning_rate": 2.1962842010114737e-06,
      "loss": 2.3067,
      "step": 60394
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.9857268929481506,
      "learning_rate": 2.1960267420102733e-06,
      "loss": 2.3224,
      "step": 60395
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1097068786621094,
      "learning_rate": 2.1957692962389245e-06,
      "loss": 2.2081,
      "step": 60396
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.9485526084899902,
      "learning_rate": 2.195511863697868e-06,
      "loss": 2.6227,
      "step": 60397
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.039839744567871,
      "learning_rate": 2.1952544443875357e-06,
      "loss": 2.2067,
      "step": 60398
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0844769477844238,
      "learning_rate": 2.194997038308373e-06,
      "loss": 2.314,
      "step": 60399
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1682428121566772,
      "learning_rate": 2.194739645460805e-06,
      "loss": 2.1503,
      "step": 60400
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1594146490097046,
      "learning_rate": 2.1944822658452748e-06,
      "loss": 2.3961,
      "step": 60401
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0873678922653198,
      "learning_rate": 2.194224899462215e-06,
      "loss": 2.4003,
      "step": 60402
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.2132741212844849,
      "learning_rate": 2.1939675463120667e-06,
      "loss": 2.5072,
      "step": 60403
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.9704919457435608,
      "learning_rate": 2.193710206395261e-06,
      "loss": 2.3939,
      "step": 60404
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0973440408706665,
      "learning_rate": 2.1934528797122388e-06,
      "loss": 2.3905,
      "step": 60405
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.117497444152832,
      "learning_rate": 2.1931955662634317e-06,
      "loss": 2.2502,
      "step": 60406
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.089732050895691,
      "learning_rate": 2.1929382660492815e-06,
      "loss": 2.4454,
      "step": 60407
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0804075002670288,
      "learning_rate": 2.1926809790702185e-06,
      "loss": 2.0994,
      "step": 60408
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0439358949661255,
      "learning_rate": 2.1924237053266852e-06,
      "loss": 2.357,
      "step": 60409
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1123466491699219,
      "learning_rate": 2.1921664448191103e-06,
      "loss": 2.3556,
      "step": 60410
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0004576444625854,
      "learning_rate": 2.1919091975479367e-06,
      "loss": 2.288,
      "step": 60411
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.304665207862854,
      "learning_rate": 2.191651963513598e-06,
      "loss": 2.2326,
      "step": 60412
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0896189212799072,
      "learning_rate": 2.1913947427165294e-06,
      "loss": 2.2511,
      "step": 60413
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1015088558197021,
      "learning_rate": 2.1911375351571652e-06,
      "loss": 2.4508,
      "step": 60414
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0175906419754028,
      "learning_rate": 2.190880340835947e-06,
      "loss": 2.2021,
      "step": 60415
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0391900539398193,
      "learning_rate": 2.1906231597533033e-06,
      "loss": 2.2684,
      "step": 60416
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.141414761543274,
      "learning_rate": 2.190365991909679e-06,
      "loss": 2.2804,
      "step": 60417
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.9630399346351624,
      "learning_rate": 2.1901088373055013e-06,
      "loss": 2.4534,
      "step": 60418
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0857025384902954,
      "learning_rate": 2.189851695941213e-06,
      "loss": 2.3061,
      "step": 60419
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.031376838684082,
      "learning_rate": 2.189594567817248e-06,
      "loss": 2.2031,
      "step": 60420
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0291060209274292,
      "learning_rate": 2.1893374529340383e-06,
      "loss": 2.0134,
      "step": 60421
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.205441951751709,
      "learning_rate": 2.1890803512920255e-06,
      "loss": 2.189,
      "step": 60422
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1284220218658447,
      "learning_rate": 2.1888232628916407e-06,
      "loss": 2.4727,
      "step": 60423
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1050339937210083,
      "learning_rate": 2.1885661877333242e-06,
      "loss": 2.3162,
      "step": 60424
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.083412766456604,
      "learning_rate": 2.1883091258175103e-06,
      "loss": 2.5241,
      "step": 60425
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0199775695800781,
      "learning_rate": 2.1880520771446334e-06,
      "loss": 2.3306,
      "step": 60426
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1047658920288086,
      "learning_rate": 2.187795041715127e-06,
      "loss": 2.2683,
      "step": 60427
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.2063461542129517,
      "learning_rate": 2.187538019529434e-06,
      "loss": 2.1843,
      "step": 60428
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.091179609298706,
      "learning_rate": 2.1872810105879816e-06,
      "loss": 2.296,
      "step": 60429
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.2455681562423706,
      "learning_rate": 2.1870240148912126e-06,
      "loss": 2.4499,
      "step": 60430
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0513852834701538,
      "learning_rate": 2.1867670324395574e-06,
      "loss": 2.2385,
      "step": 60431
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.2400789260864258,
      "learning_rate": 2.186510063233458e-06,
      "loss": 2.2218,
      "step": 60432
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.976581335067749,
      "learning_rate": 2.1862531072733417e-06,
      "loss": 2.386,
      "step": 60433
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.9773047566413879,
      "learning_rate": 2.1859961645596516e-06,
      "loss": 2.3104,
      "step": 60434
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1665164232254028,
      "learning_rate": 2.1857392350928185e-06,
      "loss": 2.3732,
      "step": 60435
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.084295630455017,
      "learning_rate": 2.185482318873282e-06,
      "loss": 2.3233,
      "step": 60436
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0543663501739502,
      "learning_rate": 2.185225415901473e-06,
      "loss": 2.393,
      "step": 60437
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0818687677383423,
      "learning_rate": 2.1849685261778344e-06,
      "loss": 2.2879,
      "step": 60438
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0312591791152954,
      "learning_rate": 2.184711649702791e-06,
      "loss": 2.3093,
      "step": 60439
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.01164710521698,
      "learning_rate": 2.184454786476786e-06,
      "loss": 2.255,
      "step": 60440
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0544143915176392,
      "learning_rate": 2.184197936500252e-06,
      "loss": 2.3306,
      "step": 60441
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.195957064628601,
      "learning_rate": 2.1839410997736266e-06,
      "loss": 2.2828,
      "step": 60442
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0281617641448975,
      "learning_rate": 2.183684276297341e-06,
      "loss": 2.3706,
      "step": 60443
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.2680315971374512,
      "learning_rate": 2.183427466071837e-06,
      "loss": 2.1645,
      "step": 60444
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1331121921539307,
      "learning_rate": 2.183170669097543e-06,
      "loss": 2.1448,
      "step": 60445
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.133131504058838,
      "learning_rate": 2.182913885374901e-06,
      "loss": 2.207,
      "step": 60446
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.046189546585083,
      "learning_rate": 2.182657114904343e-06,
      "loss": 2.3269,
      "step": 60447
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0579568147659302,
      "learning_rate": 2.182400357686301e-06,
      "loss": 2.1993,
      "step": 60448
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0660532712936401,
      "learning_rate": 2.182143613721217e-06,
      "loss": 2.2205,
      "step": 60449
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.103114128112793,
      "learning_rate": 2.1818868830095196e-06,
      "loss": 2.3048,
      "step": 60450
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1133171319961548,
      "learning_rate": 2.18163016555165e-06,
      "loss": 2.5814,
      "step": 60451
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1151759624481201,
      "learning_rate": 2.1813734613480418e-06,
      "loss": 2.2805,
      "step": 60452
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0041643381118774,
      "learning_rate": 2.181116770399129e-06,
      "loss": 2.3599,
      "step": 60453
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.9578453898429871,
      "learning_rate": 2.1808600927053424e-06,
      "loss": 2.3934,
      "step": 60454
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1656086444854736,
      "learning_rate": 2.1806034282671263e-06,
      "loss": 2.389,
      "step": 60455
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.01819908618927,
      "learning_rate": 2.1803467770849073e-06,
      "loss": 2.229,
      "step": 60456
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.017649531364441,
      "learning_rate": 2.1800901391591266e-06,
      "loss": 2.4593,
      "step": 60457
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0807243585586548,
      "learning_rate": 2.179833514490215e-06,
      "loss": 2.3612,
      "step": 60458
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0958157777786255,
      "learning_rate": 2.179576903078612e-06,
      "loss": 2.2944,
      "step": 60459
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.9842633008956909,
      "learning_rate": 2.179320304924748e-06,
      "loss": 2.488,
      "step": 60460
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0329978466033936,
      "learning_rate": 2.1790637200290623e-06,
      "loss": 2.3048,
      "step": 60461
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.258769154548645,
      "learning_rate": 2.1788071483919847e-06,
      "loss": 2.3568,
      "step": 60462
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0409115552902222,
      "learning_rate": 2.178550590013957e-06,
      "loss": 2.3729,
      "step": 60463
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1343903541564941,
      "learning_rate": 2.1782940448954095e-06,
      "loss": 2.3095,
      "step": 60464
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0920108556747437,
      "learning_rate": 2.178037513036778e-06,
      "loss": 2.2411,
      "step": 60465
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0835928916931152,
      "learning_rate": 2.177780994438494e-06,
      "loss": 2.3202,
      "step": 60466
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.2553001642227173,
      "learning_rate": 2.1775244891009995e-06,
      "loss": 2.3225,
      "step": 60467
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0852867364883423,
      "learning_rate": 2.1772679970247225e-06,
      "loss": 2.1779,
      "step": 60468
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.2258340120315552,
      "learning_rate": 2.1770115182101037e-06,
      "loss": 2.3882,
      "step": 60469
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.111382007598877,
      "learning_rate": 2.176755052657572e-06,
      "loss": 2.534,
      "step": 60470
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1118104457855225,
      "learning_rate": 2.176498600367568e-06,
      "loss": 2.3686,
      "step": 60471
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.113495111465454,
      "learning_rate": 2.1762421613405216e-06,
      "loss": 2.3991,
      "step": 60472
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1690797805786133,
      "learning_rate": 2.1759857355768722e-06,
      "loss": 2.3157,
      "step": 60473
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0584505796432495,
      "learning_rate": 2.175729323077048e-06,
      "loss": 2.2974,
      "step": 60474
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.18473219871521,
      "learning_rate": 2.175472923841492e-06,
      "loss": 2.2735,
      "step": 60475
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0668033361434937,
      "learning_rate": 2.1752165378706335e-06,
      "loss": 2.5352,
      "step": 60476
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1451619863510132,
      "learning_rate": 2.1749601651649065e-06,
      "loss": 2.2551,
      "step": 60477
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0709710121154785,
      "learning_rate": 2.174703805724749e-06,
      "loss": 2.3302,
      "step": 60478
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.156326413154602,
      "learning_rate": 2.1744474595505936e-06,
      "loss": 2.3745,
      "step": 60479
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0511770248413086,
      "learning_rate": 2.174191126642874e-06,
      "loss": 2.2824,
      "step": 60480
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.072595238685608,
      "learning_rate": 2.173934807002027e-06,
      "loss": 2.2889,
      "step": 60481
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1420232057571411,
      "learning_rate": 2.1736785006284866e-06,
      "loss": 2.4831,
      "step": 60482
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0918376445770264,
      "learning_rate": 2.173422207522684e-06,
      "loss": 2.2349,
      "step": 60483
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.132952094078064,
      "learning_rate": 2.173165927685059e-06,
      "loss": 2.3245,
      "step": 60484
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0595418214797974,
      "learning_rate": 2.1729096611160403e-06,
      "loss": 2.2782,
      "step": 60485
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.9910643696784973,
      "learning_rate": 2.17265340781607e-06,
      "loss": 2.2545,
      "step": 60486
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.148409366607666,
      "learning_rate": 2.1723971677855737e-06,
      "loss": 2.4207,
      "step": 60487
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1554701328277588,
      "learning_rate": 2.172140941024994e-06,
      "loss": 2.3874,
      "step": 60488
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1747273206710815,
      "learning_rate": 2.171884727534758e-06,
      "loss": 2.4613,
      "step": 60489
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1158194541931152,
      "learning_rate": 2.171628527315306e-06,
      "loss": 2.1868,
      "step": 60490
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0739885568618774,
      "learning_rate": 2.1713723403670707e-06,
      "loss": 2.4325,
      "step": 60491
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0689507722854614,
      "learning_rate": 2.1711161666904846e-06,
      "loss": 2.4578,
      "step": 60492
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0953776836395264,
      "learning_rate": 2.1708600062859797e-06,
      "loss": 2.2936,
      "step": 60493
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1625605821609497,
      "learning_rate": 2.1706038591539967e-06,
      "loss": 2.2239,
      "step": 60494
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1344801187515259,
      "learning_rate": 2.1703477252949644e-06,
      "loss": 2.2582,
      "step": 60495
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.9900594353675842,
      "learning_rate": 2.170091604709322e-06,
      "loss": 2.2701,
      "step": 60496
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1015146970748901,
      "learning_rate": 2.1698354973974976e-06,
      "loss": 2.3643,
      "step": 60497
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.9683359265327454,
      "learning_rate": 2.1695794033599317e-06,
      "loss": 2.0765,
      "step": 60498
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0827549695968628,
      "learning_rate": 2.169323322597052e-06,
      "loss": 2.2857,
      "step": 60499
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.3339265584945679,
      "learning_rate": 2.1690672551093007e-06,
      "loss": 2.1393,
      "step": 60500
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.9569779634475708,
      "learning_rate": 2.168811200897103e-06,
      "loss": 2.3973,
      "step": 60501
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.9839098453521729,
      "learning_rate": 2.1685551599609e-06,
      "loss": 2.2521,
      "step": 60502
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.056241512298584,
      "learning_rate": 2.1682991323011226e-06,
      "loss": 2.4265,
      "step": 60503
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1992380619049072,
      "learning_rate": 2.168043117918206e-06,
      "loss": 2.0891,
      "step": 60504
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1643553972244263,
      "learning_rate": 2.167787116812581e-06,
      "loss": 2.3516,
      "step": 60505
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.045838713645935,
      "learning_rate": 2.1675311289846867e-06,
      "loss": 2.2567,
      "step": 60506
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.2261394262313843,
      "learning_rate": 2.1672751544349513e-06,
      "loss": 2.2125,
      "step": 60507
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.2189480066299438,
      "learning_rate": 2.167019193163814e-06,
      "loss": 2.3104,
      "step": 60508
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0167105197906494,
      "learning_rate": 2.166763245171707e-06,
      "loss": 2.4893,
      "step": 60509
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0650787353515625,
      "learning_rate": 2.1665073104590618e-06,
      "loss": 2.1502,
      "step": 60510
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.2362514734268188,
      "learning_rate": 2.166251389026316e-06,
      "loss": 2.349,
      "step": 60511
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1340148448944092,
      "learning_rate": 2.1659954808738994e-06,
      "loss": 2.3454,
      "step": 60512
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.9898293614387512,
      "learning_rate": 2.165739586002251e-06,
      "loss": 2.3531,
      "step": 60513
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1740201711654663,
      "learning_rate": 2.1654837044117982e-06,
      "loss": 2.3727,
      "step": 60514
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0620779991149902,
      "learning_rate": 2.165227836102982e-06,
      "loss": 2.2424,
      "step": 60515
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.2443917989730835,
      "learning_rate": 2.1649719810762295e-06,
      "loss": 2.3661,
      "step": 60516
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1249425411224365,
      "learning_rate": 2.164716139331984e-06,
      "loss": 2.2723,
      "step": 60517
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1702297925949097,
      "learning_rate": 2.164460310870665e-06,
      "loss": 2.1987,
      "step": 60518
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0547209978103638,
      "learning_rate": 2.164204495692718e-06,
      "loss": 2.408,
      "step": 60519
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.9918308854103088,
      "learning_rate": 2.16394869379857e-06,
      "loss": 2.2542,
      "step": 60520
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0054492950439453,
      "learning_rate": 2.163692905188659e-06,
      "loss": 2.1942,
      "step": 60521
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0846338272094727,
      "learning_rate": 2.163437129863415e-06,
      "loss": 2.3971,
      "step": 60522
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0619553327560425,
      "learning_rate": 2.163181367823276e-06,
      "loss": 2.2144,
      "step": 60523
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1278196573257446,
      "learning_rate": 2.1629256190686698e-06,
      "loss": 2.2555,
      "step": 60524
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1490553617477417,
      "learning_rate": 2.1626698836000358e-06,
      "loss": 2.3212,
      "step": 60525
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1329808235168457,
      "learning_rate": 2.162414161417803e-06,
      "loss": 2.4691,
      "step": 60526
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.05424964427948,
      "learning_rate": 2.162158452522409e-06,
      "loss": 2.0918,
      "step": 60527
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1632274389266968,
      "learning_rate": 2.1619027569142825e-06,
      "loss": 2.3745,
      "step": 60528
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.152411937713623,
      "learning_rate": 2.1616470745938633e-06,
      "loss": 2.161,
      "step": 60529
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0144702196121216,
      "learning_rate": 2.1613914055615805e-06,
      "loss": 2.1434,
      "step": 60530
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1037119626998901,
      "learning_rate": 2.1611357498178674e-06,
      "loss": 2.3925,
      "step": 60531
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1196410655975342,
      "learning_rate": 2.1608801073631568e-06,
      "loss": 2.2817,
      "step": 60532
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0946385860443115,
      "learning_rate": 2.1606244781978857e-06,
      "loss": 2.0609,
      "step": 60533
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0515708923339844,
      "learning_rate": 2.1603688623224816e-06,
      "loss": 2.5186,
      "step": 60534
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.132213830947876,
      "learning_rate": 2.160113259737385e-06,
      "loss": 2.2904,
      "step": 60535
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0806125402450562,
      "learning_rate": 2.159857670443023e-06,
      "loss": 2.2703,
      "step": 60536
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0778344869613647,
      "learning_rate": 2.1596020944398344e-06,
      "loss": 2.3142,
      "step": 60537
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0021275281906128,
      "learning_rate": 2.1593465317282503e-06,
      "loss": 2.2638,
      "step": 60538
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0569363832473755,
      "learning_rate": 2.1590909823087002e-06,
      "loss": 2.3648,
      "step": 60539
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1353983879089355,
      "learning_rate": 2.158835446181623e-06,
      "loss": 2.3505,
      "step": 60540
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1148051023483276,
      "learning_rate": 2.158579923347446e-06,
      "loss": 2.2139,
      "step": 60541
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0898364782333374,
      "learning_rate": 2.1583244138066084e-06,
      "loss": 2.3585,
      "step": 60542
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1661698818206787,
      "learning_rate": 2.1580689175595393e-06,
      "loss": 2.3324,
      "step": 60543
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.080369472503662,
      "learning_rate": 2.157813434606678e-06,
      "loss": 2.239,
      "step": 60544
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0626683235168457,
      "learning_rate": 2.157557964948447e-06,
      "loss": 2.2811,
      "step": 60545
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0706374645233154,
      "learning_rate": 2.157302508585288e-06,
      "loss": 2.3697,
      "step": 60546
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0413485765457153,
      "learning_rate": 2.157047065517629e-06,
      "loss": 2.3176,
      "step": 60547
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1642228364944458,
      "learning_rate": 2.1567916357459083e-06,
      "loss": 2.2956,
      "step": 60548
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.9834485650062561,
      "learning_rate": 2.156536219270553e-06,
      "loss": 2.406,
      "step": 60549
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0637422800064087,
      "learning_rate": 2.156280816092001e-06,
      "loss": 2.1494,
      "step": 60550
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1137598752975464,
      "learning_rate": 2.1560254262106815e-06,
      "loss": 2.419,
      "step": 60551
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.9856743812561035,
      "learning_rate": 2.1557700496270317e-06,
      "loss": 2.2216,
      "step": 60552
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.112024188041687,
      "learning_rate": 2.155514686341479e-06,
      "loss": 2.1236,
      "step": 60553
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0291486978530884,
      "learning_rate": 2.1552593363544628e-06,
      "loss": 2.3482,
      "step": 60554
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0711771249771118,
      "learning_rate": 2.1550039996664094e-06,
      "loss": 2.3325,
      "step": 60555
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0445153713226318,
      "learning_rate": 2.154748676277758e-06,
      "loss": 2.4243,
      "step": 60556
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1428041458129883,
      "learning_rate": 2.154493366188939e-06,
      "loss": 2.2752,
      "step": 60557
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.112544059753418,
      "learning_rate": 2.154238069400384e-06,
      "loss": 2.0754,
      "step": 60558
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0235012769699097,
      "learning_rate": 2.1539827859125227e-06,
      "loss": 2.2914,
      "step": 60559
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.2448002099990845,
      "learning_rate": 2.153727515725795e-06,
      "loss": 2.4017,
      "step": 60560
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1862365007400513,
      "learning_rate": 2.1534722588406274e-06,
      "loss": 2.3275,
      "step": 60561
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.9849149584770203,
      "learning_rate": 2.153217015257458e-06,
      "loss": 2.2776,
      "step": 60562
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0935611724853516,
      "learning_rate": 2.152961784976715e-06,
      "loss": 2.1697,
      "step": 60563
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.101212501525879,
      "learning_rate": 2.1527065679988356e-06,
      "loss": 2.1715,
      "step": 60564
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.3074140548706055,
      "learning_rate": 2.1524513643242496e-06,
      "loss": 2.3603,
      "step": 60565
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0433942079544067,
      "learning_rate": 2.152196173953387e-06,
      "loss": 2.2745,
      "step": 60566
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0239747762680054,
      "learning_rate": 2.1519409968866857e-06,
      "loss": 2.2605,
      "step": 60567
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0915616750717163,
      "learning_rate": 2.151685833124574e-06,
      "loss": 2.287,
      "step": 60568
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0767240524291992,
      "learning_rate": 2.151430682667488e-06,
      "loss": 2.3868,
      "step": 60569
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.065447449684143,
      "learning_rate": 2.151175545515859e-06,
      "loss": 2.4212,
      "step": 60570
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1331616640090942,
      "learning_rate": 2.1509204216701194e-06,
      "loss": 2.6961,
      "step": 60571
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0851414203643799,
      "learning_rate": 2.1506653111306985e-06,
      "loss": 2.3059,
      "step": 60572
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.3000013828277588,
      "learning_rate": 2.1504102138980352e-06,
      "loss": 2.4388,
      "step": 60573
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0093203783035278,
      "learning_rate": 2.1501551299725555e-06,
      "loss": 2.3756,
      "step": 60574
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1345564126968384,
      "learning_rate": 2.149900059354697e-06,
      "loss": 2.437,
      "step": 60575
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.090609073638916,
      "learning_rate": 2.1496450020448878e-06,
      "loss": 2.4497,
      "step": 60576
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.2660014629364014,
      "learning_rate": 2.149389958043565e-06,
      "loss": 2.4195,
      "step": 60577
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.67454195022583,
      "learning_rate": 2.1491349273511554e-06,
      "loss": 2.2424,
      "step": 60578
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.041244626045227,
      "learning_rate": 2.148879909968098e-06,
      "loss": 2.3228,
      "step": 60579
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0587787628173828,
      "learning_rate": 2.148624905894817e-06,
      "loss": 2.3283,
      "step": 60580
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1387040615081787,
      "learning_rate": 2.1483699151317537e-06,
      "loss": 2.1562,
      "step": 60581
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1566802263259888,
      "learning_rate": 2.148114937679332e-06,
      "loss": 2.3589,
      "step": 60582
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0288915634155273,
      "learning_rate": 2.1478599735379944e-06,
      "loss": 2.2382,
      "step": 60583
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1647006273269653,
      "learning_rate": 2.1476050227081613e-06,
      "loss": 2.6103,
      "step": 60584
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.9863644242286682,
      "learning_rate": 2.1473500851902716e-06,
      "loss": 2.2241,
      "step": 60585
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0273017883300781,
      "learning_rate": 2.147095160984756e-06,
      "loss": 2.4185,
      "step": 60586
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0327224731445312,
      "learning_rate": 2.146840250092047e-06,
      "loss": 2.2243,
      "step": 60587
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1586312055587769,
      "learning_rate": 2.1465853525125758e-06,
      "loss": 2.4384,
      "step": 60588
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0759093761444092,
      "learning_rate": 2.1463304682467776e-06,
      "loss": 2.1895,
      "step": 60589
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0166984796524048,
      "learning_rate": 2.1460755972950796e-06,
      "loss": 2.1389,
      "step": 60590
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1341100931167603,
      "learning_rate": 2.145820739657919e-06,
      "loss": 2.1231,
      "step": 60591
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.220137596130371,
      "learning_rate": 2.1455658953357227e-06,
      "loss": 2.2446,
      "step": 60592
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.080124855041504,
      "learning_rate": 2.145311064328928e-06,
      "loss": 2.3343,
      "step": 60593
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.114634394645691,
      "learning_rate": 2.1450562466379644e-06,
      "loss": 2.4134,
      "step": 60594
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.9581305384635925,
      "learning_rate": 2.1448014422632614e-06,
      "loss": 2.3156,
      "step": 60595
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1034067869186401,
      "learning_rate": 2.144546651205256e-06,
      "loss": 2.3532,
      "step": 60596
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1074233055114746,
      "learning_rate": 2.144291873464378e-06,
      "loss": 2.5077,
      "step": 60597
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1350326538085938,
      "learning_rate": 2.144037109041056e-06,
      "loss": 2.4169,
      "step": 60598
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.3371789455413818,
      "learning_rate": 2.1437823579357274e-06,
      "loss": 2.2212,
      "step": 60599
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.044636607170105,
      "learning_rate": 2.143527620148821e-06,
      "loss": 2.2355,
      "step": 60600
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1762199401855469,
      "learning_rate": 2.143272895680767e-06,
      "loss": 2.2429,
      "step": 60601
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1624432802200317,
      "learning_rate": 2.143018184532001e-06,
      "loss": 2.5464,
      "step": 60602
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.3201789855957031,
      "learning_rate": 2.1427634867029513e-06,
      "loss": 2.3533,
      "step": 60603
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0974102020263672,
      "learning_rate": 2.1425088021940533e-06,
      "loss": 2.2673,
      "step": 60604
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1813323497772217,
      "learning_rate": 2.1422541310057352e-06,
      "loss": 2.3395,
      "step": 60605
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.188836932182312,
      "learning_rate": 2.1419994731384332e-06,
      "loss": 2.4036,
      "step": 60606
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1603611707687378,
      "learning_rate": 2.1417448285925726e-06,
      "loss": 2.3629,
      "step": 60607
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4063944816589355,
      "learning_rate": 2.141490197368592e-06,
      "loss": 2.0622,
      "step": 60608
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1270356178283691,
      "learning_rate": 2.1412355794669205e-06,
      "loss": 2.6028,
      "step": 60609
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.042107343673706,
      "learning_rate": 2.1409809748879884e-06,
      "loss": 2.4702,
      "step": 60610
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1161320209503174,
      "learning_rate": 2.1407263836322256e-06,
      "loss": 2.1507,
      "step": 60611
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.9699017405509949,
      "learning_rate": 2.1404718057000683e-06,
      "loss": 2.4091,
      "step": 60612
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0397493839263916,
      "learning_rate": 2.140217241091944e-06,
      "loss": 2.3265,
      "step": 60613
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1505396366119385,
      "learning_rate": 2.1399626898082883e-06,
      "loss": 2.1139,
      "step": 60614
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.123870611190796,
      "learning_rate": 2.139708151849528e-06,
      "loss": 2.3634,
      "step": 60615
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0305802822113037,
      "learning_rate": 2.1394536272161006e-06,
      "loss": 2.3561,
      "step": 60616
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.2172003984451294,
      "learning_rate": 2.13919911590843e-06,
      "loss": 2.4636,
      "step": 60617
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0408146381378174,
      "learning_rate": 2.138944617926956e-06,
      "loss": 2.3144,
      "step": 60618
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.9641997218132019,
      "learning_rate": 2.138690133272102e-06,
      "loss": 2.4655,
      "step": 60619
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0223497152328491,
      "learning_rate": 2.138435661944307e-06,
      "loss": 2.3962,
      "step": 60620
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.9975143671035767,
      "learning_rate": 2.1381812039439985e-06,
      "loss": 2.2775,
      "step": 60621
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0796794891357422,
      "learning_rate": 2.137926759271606e-06,
      "loss": 2.4156,
      "step": 60622
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1611894369125366,
      "learning_rate": 2.1376723279275646e-06,
      "loss": 2.5973,
      "step": 60623
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.149846076965332,
      "learning_rate": 2.137417909912305e-06,
      "loss": 2.2536,
      "step": 60624
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0837193727493286,
      "learning_rate": 2.137163505226254e-06,
      "loss": 2.4055,
      "step": 60625
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1119071245193481,
      "learning_rate": 2.1369091138698484e-06,
      "loss": 2.1634,
      "step": 60626
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.9563754796981812,
      "learning_rate": 2.1366547358435186e-06,
      "loss": 2.2517,
      "step": 60627
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0146499872207642,
      "learning_rate": 2.1364003711476922e-06,
      "loss": 2.1595,
      "step": 60628
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1463208198547363,
      "learning_rate": 2.1361460197828045e-06,
      "loss": 2.3179,
      "step": 60629
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1469151973724365,
      "learning_rate": 2.1358916817492823e-06,
      "loss": 2.5188,
      "step": 60630
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.2344781160354614,
      "learning_rate": 2.135637357047563e-06,
      "loss": 2.4295,
      "step": 60631
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0475044250488281,
      "learning_rate": 2.135383045678072e-06,
      "loss": 2.2769,
      "step": 60632
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1158361434936523,
      "learning_rate": 2.1351287476412453e-06,
      "loss": 2.5161,
      "step": 60633
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0150588750839233,
      "learning_rate": 2.1348744629375083e-06,
      "loss": 2.4028,
      "step": 60634
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.004833698272705,
      "learning_rate": 2.134620191567298e-06,
      "loss": 2.1315,
      "step": 60635
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0334441661834717,
      "learning_rate": 2.134365933531043e-06,
      "loss": 2.2539,
      "step": 60636
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1311753988265991,
      "learning_rate": 2.1341116888291733e-06,
      "loss": 2.3856,
      "step": 60637
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4500069618225098,
      "learning_rate": 2.1338574574621185e-06,
      "loss": 2.2474,
      "step": 60638
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.090963363647461,
      "learning_rate": 2.133603239430314e-06,
      "loss": 2.388,
      "step": 60639
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.110444188117981,
      "learning_rate": 2.1333490347341855e-06,
      "loss": 2.2278,
      "step": 60640
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.9868665933609009,
      "learning_rate": 2.1330948433741704e-06,
      "loss": 2.1494,
      "step": 60641
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1232630014419556,
      "learning_rate": 2.132840665350694e-06,
      "loss": 2.2484,
      "step": 60642
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0633147954940796,
      "learning_rate": 2.132586500664191e-06,
      "loss": 2.2169,
      "step": 60643
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0742145776748657,
      "learning_rate": 2.1323323493150885e-06,
      "loss": 2.1781,
      "step": 60644
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.9898228645324707,
      "learning_rate": 2.132078211303822e-06,
      "loss": 2.0335,
      "step": 60645
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1365859508514404,
      "learning_rate": 2.131824086630817e-06,
      "loss": 2.416,
      "step": 60646
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.02347731590271,
      "learning_rate": 2.1315699752965103e-06,
      "loss": 2.4478,
      "step": 60647
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0079989433288574,
      "learning_rate": 2.1313158773013266e-06,
      "loss": 2.4258,
      "step": 60648
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0037057399749756,
      "learning_rate": 2.1310617926457043e-06,
      "loss": 2.634,
      "step": 60649
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.18268620967865,
      "learning_rate": 2.130807721330065e-06,
      "loss": 2.4341,
      "step": 60650
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1502888202667236,
      "learning_rate": 2.1305536633548464e-06,
      "loss": 2.3042,
      "step": 60651
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.9937509298324585,
      "learning_rate": 2.130299618720473e-06,
      "loss": 2.5002,
      "step": 60652
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0709155797958374,
      "learning_rate": 2.130045587427383e-06,
      "loss": 2.3384,
      "step": 60653
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.2701648473739624,
      "learning_rate": 2.1297915694759996e-06,
      "loss": 2.2982,
      "step": 60654
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0609779357910156,
      "learning_rate": 2.1295375648667603e-06,
      "loss": 2.1548,
      "step": 60655
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0829540491104126,
      "learning_rate": 2.1292835736000916e-06,
      "loss": 2.436,
      "step": 60656
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.015293002128601,
      "learning_rate": 2.129029595676423e-06,
      "loss": 2.2516,
      "step": 60657
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.154282808303833,
      "learning_rate": 2.1287756310961896e-06,
      "loss": 2.5623,
      "step": 60658
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1243700981140137,
      "learning_rate": 2.1285216798598164e-06,
      "loss": 2.284,
      "step": 60659
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1946102380752563,
      "learning_rate": 2.12826774196774e-06,
      "loss": 2.4209,
      "step": 60660
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1338975429534912,
      "learning_rate": 2.1280138174203844e-06,
      "loss": 2.1385,
      "step": 60661
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1241997480392456,
      "learning_rate": 2.1277599062181896e-06,
      "loss": 2.2673,
      "step": 60662
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0834040641784668,
      "learning_rate": 2.127506008361574e-06,
      "loss": 2.6499,
      "step": 60663
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0958056449890137,
      "learning_rate": 2.127252123850976e-06,
      "loss": 2.1679,
      "step": 60664
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1705243587493896,
      "learning_rate": 2.1269982526868215e-06,
      "loss": 2.035,
      "step": 60665
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1756843328475952,
      "learning_rate": 2.1267443948695464e-06,
      "loss": 2.43,
      "step": 60666
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0975316762924194,
      "learning_rate": 2.1264905503995748e-06,
      "loss": 2.2552,
      "step": 60667
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0447696447372437,
      "learning_rate": 2.1262367192773427e-06,
      "loss": 2.263,
      "step": 60668
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0729044675827026,
      "learning_rate": 2.125982901503275e-06,
      "loss": 2.3195,
      "step": 60669
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0657674074172974,
      "learning_rate": 2.125729097077808e-06,
      "loss": 2.318,
      "step": 60670
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0553940534591675,
      "learning_rate": 2.1254753060013656e-06,
      "loss": 2.2961,
      "step": 60671
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0428268909454346,
      "learning_rate": 2.125221528274385e-06,
      "loss": 2.3352,
      "step": 60672
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.015334129333496,
      "learning_rate": 2.1249677638972887e-06,
      "loss": 2.2815,
      "step": 60673
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0864944458007812,
      "learning_rate": 2.1247140128705146e-06,
      "loss": 2.2735,
      "step": 60674
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0716917514801025,
      "learning_rate": 2.124460275194488e-06,
      "loss": 2.1939,
      "step": 60675
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0488216876983643,
      "learning_rate": 2.124206550869641e-06,
      "loss": 2.4516,
      "step": 60676
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1308579444885254,
      "learning_rate": 2.1239528398964006e-06,
      "loss": 2.1587,
      "step": 60677
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.177486538887024,
      "learning_rate": 2.1236991422752007e-06,
      "loss": 2.5964,
      "step": 60678
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.9678342938423157,
      "learning_rate": 2.1234454580064677e-06,
      "loss": 2.332,
      "step": 60679
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0367393493652344,
      "learning_rate": 2.123191787090637e-06,
      "loss": 2.3752,
      "step": 60680
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1017757654190063,
      "learning_rate": 2.1229381295281324e-06,
      "loss": 2.3045,
      "step": 60681
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0238093137741089,
      "learning_rate": 2.1226844853193907e-06,
      "loss": 2.1597,
      "step": 60682
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.2937923669815063,
      "learning_rate": 2.1224308544648374e-06,
      "loss": 2.333,
      "step": 60683
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0967398881912231,
      "learning_rate": 2.1221772369649008e-06,
      "loss": 2.1774,
      "step": 60684
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1177887916564941,
      "learning_rate": 2.1219236328200165e-06,
      "loss": 2.1646,
      "step": 60685
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0719770193099976,
      "learning_rate": 2.121670042030608e-06,
      "loss": 2.3656,
      "step": 60686
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.3110743761062622,
      "learning_rate": 2.1214164645971114e-06,
      "loss": 2.7263,
      "step": 60687
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1090362071990967,
      "learning_rate": 2.121162900519951e-06,
      "loss": 2.3569,
      "step": 60688
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1035304069519043,
      "learning_rate": 2.1209093497995646e-06,
      "loss": 2.1945,
      "step": 60689
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.9600196480751038,
      "learning_rate": 2.120655812436372e-06,
      "loss": 2.2673,
      "step": 60690
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0306777954101562,
      "learning_rate": 2.1204022884308097e-06,
      "loss": 1.8927,
      "step": 60691
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.9993078112602234,
      "learning_rate": 2.120148777783303e-06,
      "loss": 2.2052,
      "step": 60692
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0182806253433228,
      "learning_rate": 2.1198952804942874e-06,
      "loss": 2.1859,
      "step": 60693
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0905826091766357,
      "learning_rate": 2.1196417965641858e-06,
      "loss": 2.1301,
      "step": 60694
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1254292726516724,
      "learning_rate": 2.1193883259934357e-06,
      "loss": 2.197,
      "step": 60695
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1228989362716675,
      "learning_rate": 2.1191348687824586e-06,
      "loss": 2.2249,
      "step": 60696
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.047002911567688,
      "learning_rate": 2.1188814249316923e-06,
      "loss": 2.6328,
      "step": 60697
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0381848812103271,
      "learning_rate": 2.1186279944415587e-06,
      "loss": 2.2369,
      "step": 60698
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1707996129989624,
      "learning_rate": 2.1183745773124954e-06,
      "loss": 2.4742,
      "step": 60699
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1640448570251465,
      "learning_rate": 2.1181211735449235e-06,
      "loss": 2.5352,
      "step": 60700
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0796136856079102,
      "learning_rate": 2.1178677831392812e-06,
      "loss": 2.3759,
      "step": 60701
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0014116764068604,
      "learning_rate": 2.1176144060959924e-06,
      "loss": 2.1483,
      "step": 60702
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1616528034210205,
      "learning_rate": 2.117361042415489e-06,
      "loss": 2.2031,
      "step": 60703
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1153581142425537,
      "learning_rate": 2.117107692098196e-06,
      "loss": 2.3303,
      "step": 60704
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0102232694625854,
      "learning_rate": 2.1168543551445497e-06,
      "loss": 2.3474,
      "step": 60705
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0337432622909546,
      "learning_rate": 2.1166010315549733e-06,
      "loss": 2.1886,
      "step": 60706
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0243006944656372,
      "learning_rate": 2.116347721329902e-06,
      "loss": 2.4104,
      "step": 60707
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.011359453201294,
      "learning_rate": 2.116094424469759e-06,
      "loss": 2.4275,
      "step": 60708
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0806515216827393,
      "learning_rate": 2.1158411409749814e-06,
      "loss": 2.1208,
      "step": 60709
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4789786338806152,
      "learning_rate": 2.115587870845991e-06,
      "loss": 2.211,
      "step": 60710
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1678552627563477,
      "learning_rate": 2.115334614083223e-06,
      "loss": 2.3651,
      "step": 60711
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1314433813095093,
      "learning_rate": 2.115081370687104e-06,
      "loss": 2.2445,
      "step": 60712
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.069201946258545,
      "learning_rate": 2.1148281406580617e-06,
      "loss": 2.3545,
      "step": 60713
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.030314564704895,
      "learning_rate": 2.11457492399653e-06,
      "loss": 2.1794,
      "step": 60714
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.2126654386520386,
      "learning_rate": 2.1143217207029343e-06,
      "loss": 2.4674,
      "step": 60715
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.141764760017395,
      "learning_rate": 2.114068530777703e-06,
      "loss": 2.8206,
      "step": 60716
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.110303282737732,
      "learning_rate": 2.1138153542212704e-06,
      "loss": 2.4811,
      "step": 60717
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0700709819793701,
      "learning_rate": 2.113562191034062e-06,
      "loss": 2.4384,
      "step": 60718
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.138637661933899,
      "learning_rate": 2.1133090412165057e-06,
      "loss": 2.4078,
      "step": 60719
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0809801816940308,
      "learning_rate": 2.1130559047690346e-06,
      "loss": 2.3039,
      "step": 60720
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.9699795246124268,
      "learning_rate": 2.112802781692074e-06,
      "loss": 2.3905,
      "step": 60721
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.2633440494537354,
      "learning_rate": 2.112549671986056e-06,
      "loss": 2.3905,
      "step": 60722
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0985150337219238,
      "learning_rate": 2.112296575651407e-06,
      "loss": 2.4783,
      "step": 60723
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0172523260116577,
      "learning_rate": 2.1120434926885604e-06,
      "loss": 2.4439,
      "step": 60724
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.146680235862732,
      "learning_rate": 2.111790423097939e-06,
      "loss": 2.686,
      "step": 60725
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0427302122116089,
      "learning_rate": 2.1115373668799777e-06,
      "loss": 2.1622,
      "step": 60726
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.2494995594024658,
      "learning_rate": 2.111284324035101e-06,
      "loss": 2.2115,
      "step": 60727
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0512136220932007,
      "learning_rate": 2.1110312945637457e-06,
      "loss": 2.213,
      "step": 60728
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.9986644983291626,
      "learning_rate": 2.110778278466329e-06,
      "loss": 2.3206,
      "step": 60729
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.101189136505127,
      "learning_rate": 2.110525275743288e-06,
      "loss": 2.286,
      "step": 60730
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0087699890136719,
      "learning_rate": 2.1102722863950477e-06,
      "loss": 2.4986,
      "step": 60731
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.221331238746643,
      "learning_rate": 2.110019310422041e-06,
      "loss": 2.3128,
      "step": 60732
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0207916498184204,
      "learning_rate": 2.109766347824691e-06,
      "loss": 2.273,
      "step": 60733
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0540722608566284,
      "learning_rate": 2.1095133986034343e-06,
      "loss": 2.2321,
      "step": 60734
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4544340372085571,
      "learning_rate": 2.109260462758691e-06,
      "loss": 2.277,
      "step": 60735
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0845052003860474,
      "learning_rate": 2.109007540290898e-06,
      "loss": 2.2061,
      "step": 60736
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.9827178716659546,
      "learning_rate": 2.108754631200478e-06,
      "loss": 2.2934,
      "step": 60737
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.056312084197998,
      "learning_rate": 2.1085017354878646e-06,
      "loss": 2.1001,
      "step": 60738
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0483472347259521,
      "learning_rate": 2.108248853153484e-06,
      "loss": 2.2095,
      "step": 60739
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.9887517690658569,
      "learning_rate": 2.1079959841977617e-06,
      "loss": 2.276,
      "step": 60740
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0852314233779907,
      "learning_rate": 2.1077431286211326e-06,
      "loss": 2.4883,
      "step": 60741
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.095786690711975,
      "learning_rate": 2.107490286424022e-06,
      "loss": 2.3055,
      "step": 60742
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0968577861785889,
      "learning_rate": 2.1072374576068578e-06,
      "loss": 2.2984,
      "step": 60743
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0397943258285522,
      "learning_rate": 2.1069846421700714e-06,
      "loss": 2.2583,
      "step": 60744
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1957710981369019,
      "learning_rate": 2.106731840114089e-06,
      "loss": 2.2726,
      "step": 60745
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.028793454170227,
      "learning_rate": 2.1064790514393386e-06,
      "loss": 2.14,
      "step": 60746
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.129279375076294,
      "learning_rate": 2.1062262761462515e-06,
      "loss": 2.2881,
      "step": 60747
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.064347743988037,
      "learning_rate": 2.1059735142352533e-06,
      "loss": 2.1655,
      "step": 60748
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1574560403823853,
      "learning_rate": 2.1057207657067756e-06,
      "loss": 2.2585,
      "step": 60749
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.134914755821228,
      "learning_rate": 2.105468030561243e-06,
      "loss": 2.3178,
      "step": 60750
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0483020544052124,
      "learning_rate": 2.105215308799089e-06,
      "loss": 2.4049,
      "step": 60751
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1099181175231934,
      "learning_rate": 2.104962600420737e-06,
      "loss": 2.1839,
      "step": 60752
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4465529918670654,
      "learning_rate": 2.1047099054266198e-06,
      "loss": 2.2622,
      "step": 60753
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.2663347721099854,
      "learning_rate": 2.1044572238171635e-06,
      "loss": 2.5547,
      "step": 60754
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.238116979598999,
      "learning_rate": 2.104204555592797e-06,
      "loss": 2.4295,
      "step": 60755
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1578550338745117,
      "learning_rate": 2.1039519007539454e-06,
      "loss": 2.6213,
      "step": 60756
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1182475090026855,
      "learning_rate": 2.103699259301042e-06,
      "loss": 2.3115,
      "step": 60757
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0861873626708984,
      "learning_rate": 2.1034466312345116e-06,
      "loss": 2.3401,
      "step": 60758
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0994325876235962,
      "learning_rate": 2.103194016554785e-06,
      "loss": 2.355,
      "step": 60759
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0413497686386108,
      "learning_rate": 2.1029414152622874e-06,
      "loss": 2.3781,
      "step": 60760
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1346670389175415,
      "learning_rate": 2.1026888273574517e-06,
      "loss": 2.2886,
      "step": 60761
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0799227952957153,
      "learning_rate": 2.1024362528407007e-06,
      "loss": 2.2447,
      "step": 60762
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0383257865905762,
      "learning_rate": 2.102183691712467e-06,
      "loss": 2.3652,
      "step": 60763
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1772902011871338,
      "learning_rate": 2.101931143973175e-06,
      "loss": 2.3682,
      "step": 60764
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1131640672683716,
      "learning_rate": 2.101678609623258e-06,
      "loss": 2.2616,
      "step": 60765
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.157464861869812,
      "learning_rate": 2.1014260886631375e-06,
      "loss": 2.4071,
      "step": 60766
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1710948944091797,
      "learning_rate": 2.101173581093248e-06,
      "loss": 2.454,
      "step": 60767
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0892468690872192,
      "learning_rate": 2.100921086914014e-06,
      "loss": 2.2788,
      "step": 60768
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.073148250579834,
      "learning_rate": 2.100668606125864e-06,
      "loss": 2.3843,
      "step": 60769
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0918337106704712,
      "learning_rate": 2.1004161387292244e-06,
      "loss": 2.4236,
      "step": 60770
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.9391838908195496,
      "learning_rate": 2.1001636847245276e-06,
      "loss": 2.47,
      "step": 60771
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.151157021522522,
      "learning_rate": 2.0999112441121947e-06,
      "loss": 2.2447,
      "step": 60772
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0534865856170654,
      "learning_rate": 2.0996588168926623e-06,
      "loss": 2.5385,
      "step": 60773
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0688592195510864,
      "learning_rate": 2.099406403066353e-06,
      "loss": 2.1174,
      "step": 60774
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0235697031021118,
      "learning_rate": 2.0991540026336933e-06,
      "loss": 2.0928,
      "step": 60775
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0823076963424683,
      "learning_rate": 2.098901615595116e-06,
      "loss": 2.3631,
      "step": 60776
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1761711835861206,
      "learning_rate": 2.098649241951044e-06,
      "loss": 2.0907,
      "step": 60777
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.2255065441131592,
      "learning_rate": 2.0983968817019096e-06,
      "loss": 2.2952,
      "step": 60778
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.38810133934021,
      "learning_rate": 2.0981445348481365e-06,
      "loss": 2.4927,
      "step": 60779
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0527613162994385,
      "learning_rate": 2.097892201390157e-06,
      "loss": 2.4421,
      "step": 60780
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.9495116472244263,
      "learning_rate": 2.097639881328396e-06,
      "loss": 2.4355,
      "step": 60781
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.9949297308921814,
      "learning_rate": 2.0973875746632823e-06,
      "loss": 2.2561,
      "step": 60782
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.9987534880638123,
      "learning_rate": 2.0971352813952393e-06,
      "loss": 2.4244,
      "step": 60783
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0857009887695312,
      "learning_rate": 2.096883001524702e-06,
      "loss": 2.2945,
      "step": 60784
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.2298774719238281,
      "learning_rate": 2.096630735052092e-06,
      "loss": 2.3173,
      "step": 60785
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.019677758216858,
      "learning_rate": 2.096378481977841e-06,
      "loss": 2.2216,
      "step": 60786
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1767164468765259,
      "learning_rate": 2.0961262423023732e-06,
      "loss": 2.3985,
      "step": 60787
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0264852046966553,
      "learning_rate": 2.0958740160261203e-06,
      "loss": 2.5171,
      "step": 60788
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0901787281036377,
      "learning_rate": 2.095621803149506e-06,
      "loss": 2.2278,
      "step": 60789
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0171765089035034,
      "learning_rate": 2.0953696036729607e-06,
      "loss": 2.5881,
      "step": 60790
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.3522323369979858,
      "learning_rate": 2.0951174175969092e-06,
      "loss": 2.3424,
      "step": 60791
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1968870162963867,
      "learning_rate": 2.0948652449217834e-06,
      "loss": 2.365,
      "step": 60792
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.110990047454834,
      "learning_rate": 2.094613085648004e-06,
      "loss": 2.3622,
      "step": 60793
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0792524814605713,
      "learning_rate": 2.094360939776009e-06,
      "loss": 2.3275,
      "step": 60794
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0865849256515503,
      "learning_rate": 2.0941088073062133e-06,
      "loss": 1.9557,
      "step": 60795
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.2343014478683472,
      "learning_rate": 2.0938566882390543e-06,
      "loss": 2.1949,
      "step": 60796
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0503108501434326,
      "learning_rate": 2.0936045825749516e-06,
      "loss": 2.3893,
      "step": 60797
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0459834337234497,
      "learning_rate": 2.09335249031434e-06,
      "loss": 2.2193,
      "step": 60798
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0987142324447632,
      "learning_rate": 2.0931004114576403e-06,
      "loss": 2.3505,
      "step": 60799
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0301250219345093,
      "learning_rate": 2.092848346005285e-06,
      "loss": 2.3427,
      "step": 60800
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1301318407058716,
      "learning_rate": 2.092596293957699e-06,
      "loss": 2.408,
      "step": 60801
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0377068519592285,
      "learning_rate": 2.0923442553153084e-06,
      "loss": 2.2872,
      "step": 60802
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0737165212631226,
      "learning_rate": 2.092092230078545e-06,
      "loss": 2.225,
      "step": 60803
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0597059726715088,
      "learning_rate": 2.091840218247829e-06,
      "loss": 2.4295,
      "step": 60804
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1256603002548218,
      "learning_rate": 2.091588219823596e-06,
      "loss": 2.3466,
      "step": 60805
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1261117458343506,
      "learning_rate": 2.091336234806265e-06,
      "loss": 2.2654,
      "step": 60806
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.113901972770691,
      "learning_rate": 2.0910842631962723e-06,
      "loss": 2.039,
      "step": 60807
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0594370365142822,
      "learning_rate": 2.090832304994035e-06,
      "loss": 2.4645,
      "step": 60808
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0152745246887207,
      "learning_rate": 2.0905803601999873e-06,
      "loss": 2.4618,
      "step": 60809
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1848506927490234,
      "learning_rate": 2.090328428814551e-06,
      "loss": 2.4351,
      "step": 60810
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.9203375577926636,
      "learning_rate": 2.0900765108381592e-06,
      "loss": 2.2207,
      "step": 60811
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1131278276443481,
      "learning_rate": 2.0898246062712335e-06,
      "loss": 2.3631,
      "step": 60812
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.2012710571289062,
      "learning_rate": 2.0895727151142064e-06,
      "loss": 2.1798,
      "step": 60813
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1658921241760254,
      "learning_rate": 2.0893208373675002e-06,
      "loss": 2.2709,
      "step": 60814
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0497708320617676,
      "learning_rate": 2.089068973031545e-06,
      "loss": 2.4724,
      "step": 60815
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0425142049789429,
      "learning_rate": 2.0888171221067633e-06,
      "loss": 2.4851,
      "step": 60816
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0872045755386353,
      "learning_rate": 2.088565284593589e-06,
      "loss": 2.4623,
      "step": 60817
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.054253101348877,
      "learning_rate": 2.0883134604924426e-06,
      "loss": 2.3562,
      "step": 60818
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0286509990692139,
      "learning_rate": 2.0880616498037556e-06,
      "loss": 2.3082,
      "step": 60819
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.2702926397323608,
      "learning_rate": 2.0878098525279534e-06,
      "loss": 2.3304,
      "step": 60820
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0470309257507324,
      "learning_rate": 2.087558068665463e-06,
      "loss": 2.331,
      "step": 60821
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0765962600708008,
      "learning_rate": 2.087306298216707e-06,
      "loss": 2.45,
      "step": 60822
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1899306774139404,
      "learning_rate": 2.0870545411821185e-06,
      "loss": 2.3344,
      "step": 60823
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.056707501411438,
      "learning_rate": 2.08680279756212e-06,
      "loss": 2.2292,
      "step": 60824
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1890212297439575,
      "learning_rate": 2.0865510673571422e-06,
      "loss": 2.4081,
      "step": 60825
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.2388468980789185,
      "learning_rate": 2.0862993505676067e-06,
      "loss": 2.1885,
      "step": 60826
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.9908221364021301,
      "learning_rate": 2.086047647193946e-06,
      "loss": 2.5292,
      "step": 60827
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0974180698394775,
      "learning_rate": 2.0857959572365814e-06,
      "loss": 2.4829,
      "step": 60828
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.2813491821289062,
      "learning_rate": 2.0855442806959446e-06,
      "loss": 2.402,
      "step": 60829
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1415060758590698,
      "learning_rate": 2.0852926175724596e-06,
      "loss": 2.3876,
      "step": 60830
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1915966272354126,
      "learning_rate": 2.0850409678665507e-06,
      "loss": 2.3084,
      "step": 60831
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0951687097549438,
      "learning_rate": 2.0847893315786496e-06,
      "loss": 2.189,
      "step": 60832
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1279160976409912,
      "learning_rate": 2.084537708709178e-06,
      "loss": 2.0092,
      "step": 60833
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1018835306167603,
      "learning_rate": 2.0842860992585667e-06,
      "loss": 2.266,
      "step": 60834
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1797064542770386,
      "learning_rate": 2.0840345032272413e-06,
      "loss": 2.1642,
      "step": 60835
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0238863229751587,
      "learning_rate": 2.0837829206156267e-06,
      "loss": 2.3133,
      "step": 60836
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0929954051971436,
      "learning_rate": 2.0835313514241483e-06,
      "loss": 2.3352,
      "step": 60837
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0362681150436401,
      "learning_rate": 2.0832797956532357e-06,
      "loss": 2.3026,
      "step": 60838
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1889514923095703,
      "learning_rate": 2.083028253303312e-06,
      "loss": 2.6016,
      "step": 60839
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.002626657485962,
      "learning_rate": 2.0827767243748087e-06,
      "loss": 2.2303,
      "step": 60840
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.006906270980835,
      "learning_rate": 2.0825252088681456e-06,
      "loss": 2.0499,
      "step": 60841
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0812487602233887,
      "learning_rate": 2.0822737067837563e-06,
      "loss": 2.3162,
      "step": 60842
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1123943328857422,
      "learning_rate": 2.0820222181220596e-06,
      "loss": 2.3446,
      "step": 60843
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0242180824279785,
      "learning_rate": 2.0817707428834898e-06,
      "loss": 2.2852,
      "step": 60844
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.9712629318237305,
      "learning_rate": 2.081519281068466e-06,
      "loss": 2.3249,
      "step": 60845
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0862480401992798,
      "learning_rate": 2.0812678326774193e-06,
      "loss": 2.3508,
      "step": 60846
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1089091300964355,
      "learning_rate": 2.0810163977107756e-06,
      "loss": 2.3522,
      "step": 60847
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1055467128753662,
      "learning_rate": 2.08076497616896e-06,
      "loss": 2.2462,
      "step": 60848
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0209368467330933,
      "learning_rate": 2.080513568052395e-06,
      "loss": 2.3888,
      "step": 60849
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0224429368972778,
      "learning_rate": 2.0802621733615125e-06,
      "loss": 2.1674,
      "step": 60850
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1248912811279297,
      "learning_rate": 2.080010792096735e-06,
      "loss": 2.1143,
      "step": 60851
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.247976303100586,
      "learning_rate": 2.079759424258493e-06,
      "loss": 2.4685,
      "step": 60852
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.209937572479248,
      "learning_rate": 2.079508069847207e-06,
      "loss": 2.2162,
      "step": 60853
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1073969602584839,
      "learning_rate": 2.079256728863308e-06,
      "loss": 2.615,
      "step": 60854
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1718603372573853,
      "learning_rate": 2.079005401307218e-06,
      "loss": 2.4112,
      "step": 60855
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1232317686080933,
      "learning_rate": 2.078754087179369e-06,
      "loss": 2.2519,
      "step": 60856
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0517339706420898,
      "learning_rate": 2.078502786480181e-06,
      "loss": 2.3892,
      "step": 60857
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1435474157333374,
      "learning_rate": 2.078251499210081e-06,
      "loss": 2.0796,
      "step": 60858
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1222552061080933,
      "learning_rate": 2.0780002253694988e-06,
      "loss": 2.2823,
      "step": 60859
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1536996364593506,
      "learning_rate": 2.0777489649588567e-06,
      "loss": 2.3883,
      "step": 60860
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0142416954040527,
      "learning_rate": 2.0774977179785806e-06,
      "loss": 2.1263,
      "step": 60861
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0055066347122192,
      "learning_rate": 2.0772464844290995e-06,
      "loss": 2.2064,
      "step": 60862
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1798712015151978,
      "learning_rate": 2.076995264310837e-06,
      "loss": 2.5176,
      "step": 60863
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0262762308120728,
      "learning_rate": 2.076744057624217e-06,
      "loss": 2.1754,
      "step": 60864
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0053927898406982,
      "learning_rate": 2.0764928643696713e-06,
      "loss": 2.1038,
      "step": 60865
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.2115706205368042,
      "learning_rate": 2.0762416845476184e-06,
      "loss": 2.5055,
      "step": 60866
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1406384706497192,
      "learning_rate": 2.0759905181584915e-06,
      "loss": 2.1467,
      "step": 60867
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1104285717010498,
      "learning_rate": 2.0757393652027092e-06,
      "loss": 2.3421,
      "step": 60868
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1027480363845825,
      "learning_rate": 2.075488225680704e-06,
      "loss": 2.1249,
      "step": 60869
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.2166811227798462,
      "learning_rate": 2.075237099592896e-06,
      "loss": 2.429,
      "step": 60870
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0803565979003906,
      "learning_rate": 2.074985986939716e-06,
      "loss": 2.2795,
      "step": 60871
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0353742837905884,
      "learning_rate": 2.0747348877215847e-06,
      "loss": 2.2291,
      "step": 60872
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1113775968551636,
      "learning_rate": 2.0744838019389345e-06,
      "loss": 2.2451,
      "step": 60873
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.257248044013977,
      "learning_rate": 2.0742327295921828e-06,
      "loss": 2.0733,
      "step": 60874
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.088269829750061,
      "learning_rate": 2.0739816706817617e-06,
      "loss": 2.2275,
      "step": 60875
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.6375807523727417,
      "learning_rate": 2.073730625208091e-06,
      "loss": 2.4393,
      "step": 60876
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0469452142715454,
      "learning_rate": 2.0734795931716022e-06,
      "loss": 2.427,
      "step": 60877
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1194229125976562,
      "learning_rate": 2.0732285745727167e-06,
      "loss": 2.1886,
      "step": 60878
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1351964473724365,
      "learning_rate": 2.0729775694118636e-06,
      "loss": 2.5345,
      "step": 60879
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1972692012786865,
      "learning_rate": 2.0727265776894635e-06,
      "loss": 2.3111,
      "step": 60880
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.106476902961731,
      "learning_rate": 2.0724755994059488e-06,
      "loss": 2.3851,
      "step": 60881
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0345840454101562,
      "learning_rate": 2.072224634561737e-06,
      "loss": 2.6321,
      "step": 60882
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0233713388442993,
      "learning_rate": 2.0719736831572603e-06,
      "loss": 2.3091,
      "step": 60883
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.121741771697998,
      "learning_rate": 2.07172274519294e-06,
      "loss": 2.384,
      "step": 60884
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.238544225692749,
      "learning_rate": 2.071471820669204e-06,
      "loss": 2.2785,
      "step": 60885
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0313310623168945,
      "learning_rate": 2.0712209095864777e-06,
      "loss": 2.1763,
      "step": 60886
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.9910937547683716,
      "learning_rate": 2.0709700119451858e-06,
      "loss": 2.5185,
      "step": 60887
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0951584577560425,
      "learning_rate": 2.0707191277457494e-06,
      "loss": 2.3944,
      "step": 60888
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1892015933990479,
      "learning_rate": 2.0704682569886013e-06,
      "loss": 2.1195,
      "step": 60889
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.9774599075317383,
      "learning_rate": 2.0702173996741592e-06,
      "loss": 2.242,
      "step": 60890
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1549134254455566,
      "learning_rate": 2.069966555802856e-06,
      "loss": 2.3657,
      "step": 60891
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.132543683052063,
      "learning_rate": 2.0697157253751132e-06,
      "loss": 2.5356,
      "step": 60892
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.407699704170227,
      "learning_rate": 2.069464908391353e-06,
      "loss": 2.0949,
      "step": 60893
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.9989505410194397,
      "learning_rate": 2.0692141048520067e-06,
      "loss": 2.2732,
      "step": 60894
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.9812464714050293,
      "learning_rate": 2.068963314757494e-06,
      "loss": 2.2507,
      "step": 60895
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1752272844314575,
      "learning_rate": 2.068712538108245e-06,
      "loss": 2.2137,
      "step": 60896
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.9820429086685181,
      "learning_rate": 2.0684617749046797e-06,
      "loss": 2.3509,
      "step": 60897
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.146417260169983,
      "learning_rate": 2.0682110251472275e-06,
      "loss": 2.3518,
      "step": 60898
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.146873950958252,
      "learning_rate": 2.0679602888363136e-06,
      "loss": 2.1891,
      "step": 60899
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.3886325359344482,
      "learning_rate": 2.06770956597236e-06,
      "loss": 2.4662,
      "step": 60900
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0222892761230469,
      "learning_rate": 2.0674588565557908e-06,
      "loss": 2.1511,
      "step": 60901
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0443956851959229,
      "learning_rate": 2.0672081605870364e-06,
      "loss": 2.2386,
      "step": 60902
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.138041377067566,
      "learning_rate": 2.0669574780665158e-06,
      "loss": 2.2575,
      "step": 60903
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0093659162521362,
      "learning_rate": 2.0667068089946595e-06,
      "loss": 2.2372,
      "step": 60904
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.2872971296310425,
      "learning_rate": 2.066456153371886e-06,
      "loss": 2.2018,
      "step": 60905
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.3335630893707275,
      "learning_rate": 2.066205511198628e-06,
      "loss": 2.28,
      "step": 60906
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.195919156074524,
      "learning_rate": 2.065954882475304e-06,
      "loss": 2.2609,
      "step": 60907
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0941057205200195,
      "learning_rate": 2.065704267202343e-06,
      "loss": 2.2294,
      "step": 60908
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.2555861473083496,
      "learning_rate": 2.065453665380166e-06,
      "loss": 2.2493,
      "step": 60909
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0683069229125977,
      "learning_rate": 2.0652030770092036e-06,
      "loss": 2.3677,
      "step": 60910
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1805318593978882,
      "learning_rate": 2.064952502089873e-06,
      "loss": 2.3113,
      "step": 60911
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1454429626464844,
      "learning_rate": 2.0647019406226066e-06,
      "loss": 2.2826,
      "step": 60912
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0853021144866943,
      "learning_rate": 2.0644513926078247e-06,
      "loss": 2.1503,
      "step": 60913
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1980220079421997,
      "learning_rate": 2.0642008580459537e-06,
      "loss": 2.5039,
      "step": 60914
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.3708701133728027,
      "learning_rate": 2.0639503369374146e-06,
      "loss": 2.1918,
      "step": 60915
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1297132968902588,
      "learning_rate": 2.0636998292826383e-06,
      "loss": 2.3238,
      "step": 60916
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0522329807281494,
      "learning_rate": 2.0634493350820427e-06,
      "loss": 2.4935,
      "step": 60917
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1393569707870483,
      "learning_rate": 2.0631988543360593e-06,
      "loss": 2.2625,
      "step": 60918
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1440925598144531,
      "learning_rate": 2.0629483870451096e-06,
      "loss": 2.3328,
      "step": 60919
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.9549515247344971,
      "learning_rate": 2.062697933209614e-06,
      "loss": 2.3407,
      "step": 60920
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.2626475095748901,
      "learning_rate": 2.0624474928300053e-06,
      "loss": 2.4239,
      "step": 60921
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0657767057418823,
      "learning_rate": 2.0621970659067015e-06,
      "loss": 2.2757,
      "step": 60922
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0434931516647339,
      "learning_rate": 2.061946652440131e-06,
      "loss": 2.0864,
      "step": 60923
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.132713794708252,
      "learning_rate": 2.061696252430715e-06,
      "loss": 2.6096,
      "step": 60924
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.01639986038208,
      "learning_rate": 2.0614458658788815e-06,
      "loss": 2.2747,
      "step": 60925
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0367889404296875,
      "learning_rate": 2.061195492785055e-06,
      "loss": 2.2468,
      "step": 60926
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0226225852966309,
      "learning_rate": 2.0609451331496578e-06,
      "loss": 2.3644,
      "step": 60927
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1448115110397339,
      "learning_rate": 2.060694786973111e-06,
      "loss": 2.448,
      "step": 60928
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0992121696472168,
      "learning_rate": 2.0604444542558466e-06,
      "loss": 2.1723,
      "step": 60929
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1035516262054443,
      "learning_rate": 2.0601941349982824e-06,
      "loss": 2.6093,
      "step": 60930
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0459868907928467,
      "learning_rate": 2.0599438292008476e-06,
      "loss": 2.3352,
      "step": 60931
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.9993014931678772,
      "learning_rate": 2.0596935368639624e-06,
      "loss": 2.3274,
      "step": 60932
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0659915208816528,
      "learning_rate": 2.0594432579880553e-06,
      "loss": 2.0953,
      "step": 60933
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.152316927909851,
      "learning_rate": 2.0591929925735464e-06,
      "loss": 2.2585,
      "step": 60934
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0696576833724976,
      "learning_rate": 2.058942740620865e-06,
      "loss": 2.1514,
      "step": 60935
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.3612804412841797,
      "learning_rate": 2.0586925021304295e-06,
      "loss": 2.4261,
      "step": 60936
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.079197645187378,
      "learning_rate": 2.0584422771026693e-06,
      "loss": 2.5077,
      "step": 60937
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0312042236328125,
      "learning_rate": 2.058192065538004e-06,
      "loss": 2.3387,
      "step": 60938
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1951652765274048,
      "learning_rate": 2.0579418674368646e-06,
      "loss": 2.2944,
      "step": 60939
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1574115753173828,
      "learning_rate": 2.0576916827996663e-06,
      "loss": 2.3026,
      "step": 60940
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.3189889192581177,
      "learning_rate": 2.0574415116268407e-06,
      "loss": 2.3085,
      "step": 60941
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1135411262512207,
      "learning_rate": 2.057191353918806e-06,
      "loss": 2.0319,
      "step": 60942
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1169382333755493,
      "learning_rate": 2.056941209675991e-06,
      "loss": 2.0929,
      "step": 60943
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1084715127944946,
      "learning_rate": 2.056691078898816e-06,
      "loss": 2.1768,
      "step": 60944
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1017272472381592,
      "learning_rate": 2.05644096158771e-06,
      "loss": 2.2573,
      "step": 60945
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.9626651406288147,
      "learning_rate": 2.056190857743091e-06,
      "loss": 2.4275,
      "step": 60946
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0549211502075195,
      "learning_rate": 2.0559407673653887e-06,
      "loss": 2.2514,
      "step": 60947
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.147971272468567,
      "learning_rate": 2.0556906904550244e-06,
      "loss": 2.274,
      "step": 60948
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.9705342054367065,
      "learning_rate": 2.0554406270124196e-06,
      "loss": 2.4167,
      "step": 60949
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0340356826782227,
      "learning_rate": 2.055190577038002e-06,
      "loss": 2.4574,
      "step": 60950
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0041131973266602,
      "learning_rate": 2.054940540532193e-06,
      "loss": 2.3662,
      "step": 60951
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0711191892623901,
      "learning_rate": 2.05469051749542e-06,
      "loss": 2.2816,
      "step": 60952
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0722132921218872,
      "learning_rate": 2.0544405079281036e-06,
      "loss": 2.378,
      "step": 60953
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1364669799804688,
      "learning_rate": 2.0541905118306693e-06,
      "loss": 2.2509,
      "step": 60954
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.2021151781082153,
      "learning_rate": 2.0539405292035377e-06,
      "loss": 2.3058,
      "step": 60955
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0645655393600464,
      "learning_rate": 2.053690560047137e-06,
      "loss": 2.0408,
      "step": 60956
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1445289850234985,
      "learning_rate": 2.0534406043618872e-06,
      "loss": 2.3772,
      "step": 60957
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1803114414215088,
      "learning_rate": 2.0531906621482154e-06,
      "loss": 2.5395,
      "step": 60958
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.149487853050232,
      "learning_rate": 2.052940733406542e-06,
      "loss": 2.5129,
      "step": 60959
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.166324496269226,
      "learning_rate": 2.052690818137295e-06,
      "loss": 2.2687,
      "step": 60960
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0558383464813232,
      "learning_rate": 2.052440916340892e-06,
      "loss": 2.0892,
      "step": 60961
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0719736814498901,
      "learning_rate": 2.0521910280177636e-06,
      "loss": 2.3693,
      "step": 60962
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.179347276687622,
      "learning_rate": 2.0519411531683274e-06,
      "loss": 2.5079,
      "step": 60963
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.045660138130188,
      "learning_rate": 2.051691291793012e-06,
      "loss": 2.2093,
      "step": 60964
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.7085013389587402,
      "learning_rate": 2.051441443892239e-06,
      "loss": 2.4313,
      "step": 60965
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0523686408996582,
      "learning_rate": 2.051191609466431e-06,
      "loss": 2.0545,
      "step": 60966
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0144957304000854,
      "learning_rate": 2.05094178851601e-06,
      "loss": 2.3189,
      "step": 60967
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.054225206375122,
      "learning_rate": 2.050691981041404e-06,
      "loss": 2.3141,
      "step": 60968
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.9982643127441406,
      "learning_rate": 2.0504421870430315e-06,
      "loss": 2.1384,
      "step": 60969
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0964151620864868,
      "learning_rate": 2.050192406521321e-06,
      "loss": 2.4035,
      "step": 60970
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.167681097984314,
      "learning_rate": 2.049942639476691e-06,
      "loss": 2.2752,
      "step": 60971
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.9825702905654907,
      "learning_rate": 2.0496928859095698e-06,
      "loss": 2.4187,
      "step": 60972
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1702940464019775,
      "learning_rate": 2.0494431458203755e-06,
      "loss": 2.3442,
      "step": 60973
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1565316915512085,
      "learning_rate": 2.0491934192095375e-06,
      "loss": 2.2041,
      "step": 60974
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.9961681365966797,
      "learning_rate": 2.0489437060774763e-06,
      "loss": 2.4012,
      "step": 60975
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0885381698608398,
      "learning_rate": 2.0486940064246117e-06,
      "loss": 2.4967,
      "step": 60976
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0471371412277222,
      "learning_rate": 2.0484443202513735e-06,
      "loss": 2.3591,
      "step": 60977
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0402849912643433,
      "learning_rate": 2.0481946475581815e-06,
      "loss": 2.3342,
      "step": 60978
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0909969806671143,
      "learning_rate": 2.0479449883454574e-06,
      "loss": 2.3743,
      "step": 60979
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0573973655700684,
      "learning_rate": 2.047695342613628e-06,
      "loss": 2.3197,
      "step": 60980
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1325267553329468,
      "learning_rate": 2.047445710363115e-06,
      "loss": 2.4244,
      "step": 60981
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0978261232376099,
      "learning_rate": 2.047196091594339e-06,
      "loss": 2.3556,
      "step": 60982
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0206235647201538,
      "learning_rate": 2.046946486307728e-06,
      "loss": 2.481,
      "step": 60983
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1808031797409058,
      "learning_rate": 2.0466968945037e-06,
      "loss": 2.3623,
      "step": 60984
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0834236145019531,
      "learning_rate": 2.046447316182685e-06,
      "loss": 2.3018,
      "step": 60985
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1370646953582764,
      "learning_rate": 2.046197751345097e-06,
      "loss": 2.4688,
      "step": 60986
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0739452838897705,
      "learning_rate": 2.0459481999913676e-06,
      "loss": 2.3176,
      "step": 60987
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1445587873458862,
      "learning_rate": 2.0456986621219145e-06,
      "loss": 2.3827,
      "step": 60988
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0199365615844727,
      "learning_rate": 2.0454491377371645e-06,
      "loss": 2.391,
      "step": 60989
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1411656141281128,
      "learning_rate": 2.045199626837535e-06,
      "loss": 2.1968,
      "step": 60990
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.111893653869629,
      "learning_rate": 2.0449501294234565e-06,
      "loss": 2.1858,
      "step": 60991
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1301225423812866,
      "learning_rate": 2.0447006454953476e-06,
      "loss": 2.2018,
      "step": 60992
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.3023203611373901,
      "learning_rate": 2.044451175053632e-06,
      "loss": 2.2823,
      "step": 60993
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0232218503952026,
      "learning_rate": 2.0442017180987305e-06,
      "loss": 2.3011,
      "step": 60994
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4584057331085205,
      "learning_rate": 2.0439522746310693e-06,
      "loss": 2.0392,
      "step": 60995
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0958505868911743,
      "learning_rate": 2.043702844651068e-06,
      "loss": 2.2369,
      "step": 60996
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0569480657577515,
      "learning_rate": 2.0434534281591533e-06,
      "loss": 2.2993,
      "step": 60997
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1005265712738037,
      "learning_rate": 2.043204025155743e-06,
      "loss": 2.1853,
      "step": 60998
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.3040733337402344,
      "learning_rate": 2.0429546356412667e-06,
      "loss": 2.3948,
      "step": 60999
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.9966018199920654,
      "learning_rate": 2.04270525961614e-06,
      "loss": 2.2688,
      "step": 61000
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0529450178146362,
      "learning_rate": 2.0424558970807916e-06,
      "loss": 2.4045,
      "step": 61001
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.9888954758644104,
      "learning_rate": 2.042206548035639e-06,
      "loss": 2.2005,
      "step": 61002
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0963318347930908,
      "learning_rate": 2.0419572124811104e-06,
      "loss": 2.3273,
      "step": 61003
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0279901027679443,
      "learning_rate": 2.0417078904176257e-06,
      "loss": 2.1972,
      "step": 61004
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1845437288284302,
      "learning_rate": 2.0414585818456077e-06,
      "loss": 2.3659,
      "step": 61005
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0242805480957031,
      "learning_rate": 2.0412092867654764e-06,
      "loss": 2.1889,
      "step": 61006
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.033432960510254,
      "learning_rate": 2.04096000517766e-06,
      "loss": 2.3037,
      "step": 61007
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1655021905899048,
      "learning_rate": 2.040710737082574e-06,
      "loss": 2.3683,
      "step": 61008
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.290630578994751,
      "learning_rate": 2.040461482480649e-06,
      "loss": 2.1978,
      "step": 61009
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.186632513999939,
      "learning_rate": 2.0402122413723025e-06,
      "loss": 2.2309,
      "step": 61010
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.044988989830017,
      "learning_rate": 2.0399630137579564e-06,
      "loss": 2.1974,
      "step": 61011
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0563372373580933,
      "learning_rate": 2.0397137996380368e-06,
      "loss": 2.5389,
      "step": 61012
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0156134366989136,
      "learning_rate": 2.0394645990129623e-06,
      "loss": 2.2308,
      "step": 61013
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0948429107666016,
      "learning_rate": 2.0392154118831597e-06,
      "loss": 2.2915,
      "step": 61014
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1133854389190674,
      "learning_rate": 2.038966238249047e-06,
      "loss": 2.2215,
      "step": 61015
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0211081504821777,
      "learning_rate": 2.038717078111051e-06,
      "loss": 2.1562,
      "step": 61016
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1942813396453857,
      "learning_rate": 2.0384679314695886e-06,
      "loss": 2.3424,
      "step": 61017
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.106228232383728,
      "learning_rate": 2.0382187983250913e-06,
      "loss": 2.38,
      "step": 61018
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1016170978546143,
      "learning_rate": 2.03796967867797e-06,
      "loss": 2.4782,
      "step": 61019
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0188146829605103,
      "learning_rate": 2.0377205725286554e-06,
      "loss": 2.3599,
      "step": 61020
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.9872646331787109,
      "learning_rate": 2.037471479877563e-06,
      "loss": 2.2879,
      "step": 61021
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1452770233154297,
      "learning_rate": 2.037222400725123e-06,
      "loss": 2.4182,
      "step": 61022
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.778163194656372,
      "learning_rate": 2.0369733350717514e-06,
      "loss": 2.1919,
      "step": 61023
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0823122262954712,
      "learning_rate": 2.0367242829178747e-06,
      "loss": 2.1319,
      "step": 61024
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.2143468856811523,
      "learning_rate": 2.0364752442639104e-06,
      "loss": 2.4521,
      "step": 61025
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1580240726470947,
      "learning_rate": 2.0362262191102867e-06,
      "loss": 2.4449,
      "step": 61026
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0637041330337524,
      "learning_rate": 2.035977207457419e-06,
      "loss": 2.4276,
      "step": 61027
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0287901163101196,
      "learning_rate": 2.035728209305736e-06,
      "loss": 2.1491,
      "step": 61028
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0958281755447388,
      "learning_rate": 2.035479224655654e-06,
      "loss": 2.2967,
      "step": 61029
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.2023613452911377,
      "learning_rate": 2.0352302535076005e-06,
      "loss": 2.4564,
      "step": 61030
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.9849957227706909,
      "learning_rate": 2.0349812958619954e-06,
      "loss": 2.3295,
      "step": 61031
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1682225465774536,
      "learning_rate": 2.0347323517192596e-06,
      "loss": 2.5393,
      "step": 61032
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1578947305679321,
      "learning_rate": 2.034483421079814e-06,
      "loss": 2.2319,
      "step": 61033
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.023614764213562,
      "learning_rate": 2.0342345039440837e-06,
      "loss": 2.1112,
      "step": 61034
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.3358248472213745,
      "learning_rate": 2.033985600312488e-06,
      "loss": 2.347,
      "step": 61035
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.171197772026062,
      "learning_rate": 2.033736710185452e-06,
      "loss": 2.1165,
      "step": 61036
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.027902603149414,
      "learning_rate": 2.0334878335633967e-06,
      "loss": 2.277,
      "step": 61037
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1675971746444702,
      "learning_rate": 2.0332389704467404e-06,
      "loss": 2.1887,
      "step": 61038
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1028797626495361,
      "learning_rate": 2.0329901208359093e-06,
      "loss": 2.3927,
      "step": 61039
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1448537111282349,
      "learning_rate": 2.032741284731322e-06,
      "loss": 2.4162,
      "step": 61040
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.2316577434539795,
      "learning_rate": 2.032492462133405e-06,
      "loss": 2.2209,
      "step": 61041
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1953171491622925,
      "learning_rate": 2.032243653042575e-06,
      "loss": 2.2479,
      "step": 61042
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.114126205444336,
      "learning_rate": 2.0319948574592575e-06,
      "loss": 2.4128,
      "step": 61043
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1371756792068481,
      "learning_rate": 2.0317460753838736e-06,
      "loss": 2.2089,
      "step": 61044
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4697909355163574,
      "learning_rate": 2.0314973068168444e-06,
      "loss": 2.2698,
      "step": 61045
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1544008255004883,
      "learning_rate": 2.0312485517585876e-06,
      "loss": 2.3757,
      "step": 61046
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.074229121208191,
      "learning_rate": 2.030999810209532e-06,
      "loss": 2.2149,
      "step": 61047
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.219409704208374,
      "learning_rate": 2.030751082170094e-06,
      "loss": 2.4715,
      "step": 61048
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0806541442871094,
      "learning_rate": 2.0305023676407e-06,
      "loss": 2.2289,
      "step": 61049
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1583538055419922,
      "learning_rate": 2.0302536666217655e-06,
      "loss": 2.2016,
      "step": 61050
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0556775331497192,
      "learning_rate": 2.030004979113719e-06,
      "loss": 2.3539,
      "step": 61051
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0639164447784424,
      "learning_rate": 2.0297563051169755e-06,
      "loss": 2.2909,
      "step": 61052
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.021512508392334,
      "learning_rate": 2.029507644631963e-06,
      "loss": 2.364,
      "step": 61053
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0841262340545654,
      "learning_rate": 2.029258997659096e-06,
      "loss": 2.3075,
      "step": 61054
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0126398801803589,
      "learning_rate": 2.0290103641988036e-06,
      "loss": 2.1747,
      "step": 61055
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0397732257843018,
      "learning_rate": 2.0287617442515007e-06,
      "loss": 2.6254,
      "step": 61056
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1941839456558228,
      "learning_rate": 2.0285131378176136e-06,
      "loss": 2.1769,
      "step": 61057
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0972059965133667,
      "learning_rate": 2.0282645448975625e-06,
      "loss": 2.3067,
      "step": 61058
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.140987515449524,
      "learning_rate": 2.0280159654917675e-06,
      "loss": 2.3257,
      "step": 61059
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.9728107452392578,
      "learning_rate": 2.0277673996006487e-06,
      "loss": 2.2209,
      "step": 61060
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1653705835342407,
      "learning_rate": 2.027518847224631e-06,
      "loss": 2.4265,
      "step": 61061
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1137757301330566,
      "learning_rate": 2.027270308364132e-06,
      "loss": 2.2391,
      "step": 61062
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1571779251098633,
      "learning_rate": 2.027021783019578e-06,
      "loss": 2.4448,
      "step": 61063
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.054710865020752,
      "learning_rate": 2.0267732711913858e-06,
      "loss": 2.4601,
      "step": 61064
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1937170028686523,
      "learning_rate": 2.0265247728799796e-06,
      "loss": 2.262,
      "step": 61065
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.062158226966858,
      "learning_rate": 2.0262762880857802e-06,
      "loss": 2.4315,
      "step": 61066
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1263229846954346,
      "learning_rate": 2.026027816809205e-06,
      "loss": 2.35,
      "step": 61067
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0167315006256104,
      "learning_rate": 2.0257793590506813e-06,
      "loss": 2.5257,
      "step": 61068
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1096750497817993,
      "learning_rate": 2.025530914810624e-06,
      "loss": 2.2994,
      "step": 61069
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0562573671340942,
      "learning_rate": 2.0252824840894614e-06,
      "loss": 2.3544,
      "step": 61070
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1055024862289429,
      "learning_rate": 2.02503406688761e-06,
      "loss": 2.3032,
      "step": 61071
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0676590204238892,
      "learning_rate": 2.024785663205493e-06,
      "loss": 2.6109,
      "step": 61072
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0248806476593018,
      "learning_rate": 2.0245372730435263e-06,
      "loss": 2.1475,
      "step": 61073
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.2063007354736328,
      "learning_rate": 2.024288896402138e-06,
      "loss": 2.2513,
      "step": 61074
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0472768545150757,
      "learning_rate": 2.0240405332817447e-06,
      "loss": 2.1826,
      "step": 61075
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.150355339050293,
      "learning_rate": 2.02379218368277e-06,
      "loss": 2.3614,
      "step": 61076
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0951865911483765,
      "learning_rate": 2.0235438476056325e-06,
      "loss": 2.403,
      "step": 61077
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.2189254760742188,
      "learning_rate": 2.023295525050756e-06,
      "loss": 2.5247,
      "step": 61078
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.3305792808532715,
      "learning_rate": 2.0230472160185586e-06,
      "loss": 2.3386,
      "step": 61079
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1113300323486328,
      "learning_rate": 2.0227989205094643e-06,
      "loss": 2.3685,
      "step": 61080
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1035127639770508,
      "learning_rate": 2.02255063852389e-06,
      "loss": 2.444,
      "step": 61081
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0645369291305542,
      "learning_rate": 2.0223023700622624e-06,
      "loss": 2.148,
      "step": 61082
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.133177638053894,
      "learning_rate": 2.022054115124995e-06,
      "loss": 2.2394,
      "step": 61083
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0741328001022339,
      "learning_rate": 2.021805873712519e-06,
      "loss": 2.5256,
      "step": 61084
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0429818630218506,
      "learning_rate": 2.021557645825244e-06,
      "loss": 2.297,
      "step": 61085
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0508038997650146,
      "learning_rate": 2.0213094314635973e-06,
      "loss": 2.1596,
      "step": 61086
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0605665445327759,
      "learning_rate": 2.0210612306279964e-06,
      "loss": 2.4371,
      "step": 61087
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0525169372558594,
      "learning_rate": 2.0208130433188653e-06,
      "loss": 2.2841,
      "step": 61088
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.212638258934021,
      "learning_rate": 2.020564869536621e-06,
      "loss": 2.3121,
      "step": 61089
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0391191244125366,
      "learning_rate": 2.02031670928169e-06,
      "loss": 2.4749,
      "step": 61090
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1167237758636475,
      "learning_rate": 2.020068562554486e-06,
      "loss": 2.3687,
      "step": 61091
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0021356344223022,
      "learning_rate": 2.019820429355436e-06,
      "loss": 2.3273,
      "step": 61092
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0168211460113525,
      "learning_rate": 2.0195723096849574e-06,
      "loss": 2.2159,
      "step": 61093
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.012298345565796,
      "learning_rate": 2.019324203543468e-06,
      "loss": 2.1295,
      "step": 61094
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1790666580200195,
      "learning_rate": 2.019076110931395e-06,
      "loss": 2.4177,
      "step": 61095
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0768275260925293,
      "learning_rate": 2.018828031849153e-06,
      "loss": 2.2677,
      "step": 61096
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0708941221237183,
      "learning_rate": 2.018579966297167e-06,
      "loss": 2.2706,
      "step": 61097
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.3257030248641968,
      "learning_rate": 2.0183319142758563e-06,
      "loss": 2.2127,
      "step": 61098
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1228917837142944,
      "learning_rate": 2.0180838757856415e-06,
      "loss": 2.4033,
      "step": 61099
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1392757892608643,
      "learning_rate": 2.017835850826939e-06,
      "loss": 2.2865,
      "step": 61100
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.2239564657211304,
      "learning_rate": 2.0175878394001748e-06,
      "loss": 2.431,
      "step": 61101
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0827593803405762,
      "learning_rate": 2.0173398415057656e-06,
      "loss": 2.2198,
      "step": 61102
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0847903490066528,
      "learning_rate": 2.0170918571441355e-06,
      "loss": 2.4401,
      "step": 61103
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.039206862449646,
      "learning_rate": 2.0168438863157003e-06,
      "loss": 2.3124,
      "step": 61104
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0612221956253052,
      "learning_rate": 2.016595929020886e-06,
      "loss": 2.0954,
      "step": 61105
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1825004816055298,
      "learning_rate": 2.0163479852601064e-06,
      "loss": 2.2424,
      "step": 61106
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.2694978713989258,
      "learning_rate": 2.0161000550337885e-06,
      "loss": 2.2753,
      "step": 61107
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0664020776748657,
      "learning_rate": 2.0158521383423456e-06,
      "loss": 2.4952,
      "step": 61108
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.094840407371521,
      "learning_rate": 2.015604235186206e-06,
      "loss": 2.6552,
      "step": 61109
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1720280647277832,
      "learning_rate": 2.0153563455657855e-06,
      "loss": 2.138,
      "step": 61110
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.9855619072914124,
      "learning_rate": 2.0151084694815036e-06,
      "loss": 2.36,
      "step": 61111
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0499516725540161,
      "learning_rate": 2.0148606069337793e-06,
      "loss": 2.3202,
      "step": 61112
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.241176724433899,
      "learning_rate": 2.014612757923038e-06,
      "loss": 2.4098,
      "step": 61113
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1378997564315796,
      "learning_rate": 2.014364922449694e-06,
      "loss": 2.1352,
      "step": 61114
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.063273549079895,
      "learning_rate": 2.0141171005141737e-06,
      "loss": 2.3029,
      "step": 61115
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.9810442328453064,
      "learning_rate": 2.01386929211689e-06,
      "loss": 2.2716,
      "step": 61116
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1993271112442017,
      "learning_rate": 2.01362149725827e-06,
      "loss": 2.4366,
      "step": 61117
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.044772982597351,
      "learning_rate": 2.0133737159387278e-06,
      "loss": 2.2808,
      "step": 61118
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1897163391113281,
      "learning_rate": 2.0131259481586894e-06,
      "loss": 2.1233,
      "step": 61119
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0788780450820923,
      "learning_rate": 2.0128781939185693e-06,
      "loss": 2.3904,
      "step": 61120
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.089213490486145,
      "learning_rate": 2.0126304532187914e-06,
      "loss": 2.4229,
      "step": 61121
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1471563577651978,
      "learning_rate": 2.012382726059775e-06,
      "loss": 2.3845,
      "step": 61122
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.126869559288025,
      "learning_rate": 2.012135012441939e-06,
      "loss": 2.4283,
      "step": 61123
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0332399606704712,
      "learning_rate": 2.0118873123657025e-06,
      "loss": 2.2055,
      "step": 61124
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.157874345779419,
      "learning_rate": 2.0116396258314873e-06,
      "loss": 2.5063,
      "step": 61125
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.015731692314148,
      "learning_rate": 2.0113919528397107e-06,
      "loss": 2.1795,
      "step": 61126
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.06734299659729,
      "learning_rate": 2.0111442933907966e-06,
      "loss": 2.1503,
      "step": 61127
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.065842866897583,
      "learning_rate": 2.0108966474851634e-06,
      "loss": 2.3291,
      "step": 61128
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1221418380737305,
      "learning_rate": 2.010649015123227e-06,
      "loss": 2.4777,
      "step": 61129
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0242894887924194,
      "learning_rate": 2.0104013963054124e-06,
      "loss": 2.5672,
      "step": 61130
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.04160737991333,
      "learning_rate": 2.0101537910321356e-06,
      "loss": 2.3171,
      "step": 61131
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1064857244491577,
      "learning_rate": 2.0099061993038203e-06,
      "loss": 2.4266,
      "step": 61132
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.090478539466858,
      "learning_rate": 2.0096586211208823e-06,
      "loss": 2.3119,
      "step": 61133
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.2457352876663208,
      "learning_rate": 2.0094110564837457e-06,
      "loss": 2.2136,
      "step": 61134
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0884016752243042,
      "learning_rate": 2.0091635053928236e-06,
      "loss": 2.1412,
      "step": 61135
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0286036729812622,
      "learning_rate": 2.0089159678485436e-06,
      "loss": 2.6528,
      "step": 61136
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1200158596038818,
      "learning_rate": 2.0086684438513205e-06,
      "loss": 2.216,
      "step": 61137
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0733070373535156,
      "learning_rate": 2.008420933401576e-06,
      "loss": 2.4093,
      "step": 61138
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0022075176239014,
      "learning_rate": 2.008173436499724e-06,
      "loss": 2.3726,
      "step": 61139
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0396102666854858,
      "learning_rate": 2.0079259531461927e-06,
      "loss": 2.4786,
      "step": 61140
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0386090278625488,
      "learning_rate": 2.0076784833413943e-06,
      "loss": 2.1993,
      "step": 61141
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0591143369674683,
      "learning_rate": 2.0074310270857545e-06,
      "loss": 2.3261,
      "step": 61142
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.9950550198554993,
      "learning_rate": 2.0071835843796874e-06,
      "loss": 2.4327,
      "step": 61143
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1697816848754883,
      "learning_rate": 2.0069361552236165e-06,
      "loss": 2.4097,
      "step": 61144
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0205097198486328,
      "learning_rate": 2.006688739617958e-06,
      "loss": 2.2707,
      "step": 61145
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.044500708580017,
      "learning_rate": 2.0064413375631366e-06,
      "loss": 2.3554,
      "step": 61146
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.061978816986084,
      "learning_rate": 2.0061939490595637e-06,
      "loss": 2.1888,
      "step": 61147
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0783814191818237,
      "learning_rate": 2.0059465741076667e-06,
      "loss": 2.3541,
      "step": 61148
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.2150113582611084,
      "learning_rate": 2.005699212707861e-06,
      "loss": 2.4149,
      "step": 61149
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0111645460128784,
      "learning_rate": 2.005451864860567e-06,
      "loss": 2.3665,
      "step": 61150
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0843968391418457,
      "learning_rate": 2.0052045305662004e-06,
      "loss": 2.1093,
      "step": 61151
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.150495171546936,
      "learning_rate": 2.004957209825186e-06,
      "loss": 2.4111,
      "step": 61152
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1333332061767578,
      "learning_rate": 2.004709902637938e-06,
      "loss": 2.1792,
      "step": 61153
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.2677230834960938,
      "learning_rate": 2.00446260900488e-06,
      "loss": 2.4149,
      "step": 61154
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.164520502090454,
      "learning_rate": 2.004215328926431e-06,
      "loss": 2.3773,
      "step": 61155
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0201997756958008,
      "learning_rate": 2.0039680624030045e-06,
      "loss": 2.5776,
      "step": 61156
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.977230966091156,
      "learning_rate": 2.003720809435028e-06,
      "loss": 2.4106,
      "step": 61157
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0595424175262451,
      "learning_rate": 2.0034735700229123e-06,
      "loss": 2.5175,
      "step": 61158
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1676875352859497,
      "learning_rate": 2.003226344167084e-06,
      "loss": 2.1938,
      "step": 61159
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1031548976898193,
      "learning_rate": 2.002979131867957e-06,
      "loss": 2.5775,
      "step": 61160
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.2347410917282104,
      "learning_rate": 2.0027319331259542e-06,
      "loss": 2.298,
      "step": 61161
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0592477321624756,
      "learning_rate": 2.00248474794149e-06,
      "loss": 2.2167,
      "step": 61162
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0080630779266357,
      "learning_rate": 2.0022375763149925e-06,
      "loss": 2.4651,
      "step": 61163
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.031209945678711,
      "learning_rate": 2.0019904182468684e-06,
      "loss": 2.3308,
      "step": 61164
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1974858045578003,
      "learning_rate": 2.0017432737375455e-06,
      "loss": 2.5803,
      "step": 61165
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0463182926177979,
      "learning_rate": 2.001496142787438e-06,
      "loss": 2.1528,
      "step": 61166
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1247978210449219,
      "learning_rate": 2.0012490253969686e-06,
      "loss": 2.3889,
      "step": 61167
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0316768884658813,
      "learning_rate": 2.001001921566552e-06,
      "loss": 2.2781,
      "step": 61168
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.2791911363601685,
      "learning_rate": 2.000754831296613e-06,
      "loss": 2.3336,
      "step": 61169
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.081870675086975,
      "learning_rate": 2.000507754587564e-06,
      "loss": 2.2065,
      "step": 61170
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1921864748001099,
      "learning_rate": 2.0002606914398294e-06,
      "loss": 2.1309,
      "step": 61171
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0170612335205078,
      "learning_rate": 2.000013641853824e-06,
      "loss": 2.2672,
      "step": 61172
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.173775315284729,
      "learning_rate": 1.99976660582997e-06,
      "loss": 2.5083,
      "step": 61173
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.01060152053833,
      "learning_rate": 1.999519583368682e-06,
      "loss": 2.2941,
      "step": 61174
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0318821668624878,
      "learning_rate": 1.9992725744703835e-06,
      "loss": 2.077,
      "step": 61175
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.9970855116844177,
      "learning_rate": 1.999025579135492e-06,
      "loss": 2.2104,
      "step": 61176
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.9204267859458923,
      "learning_rate": 1.9987785973644236e-06,
      "loss": 2.2422,
      "step": 61177
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1589596271514893,
      "learning_rate": 1.9985316291575975e-06,
      "loss": 2.1781,
      "step": 61178
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0727918148040771,
      "learning_rate": 1.998284674515435e-06,
      "loss": 2.4618,
      "step": 61179
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1299692392349243,
      "learning_rate": 1.9980377334383503e-06,
      "loss": 2.1751,
      "step": 61180
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.11419677734375,
      "learning_rate": 1.9977908059267694e-06,
      "loss": 2.2652,
      "step": 61181
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1342568397521973,
      "learning_rate": 1.9975438919811018e-06,
      "loss": 2.3026,
      "step": 61182
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.058499813079834,
      "learning_rate": 1.9972969916017747e-06,
      "loss": 2.5189,
      "step": 61183
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.9927158951759338,
      "learning_rate": 1.997050104789202e-06,
      "loss": 2.2676,
      "step": 61184
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1479045152664185,
      "learning_rate": 1.996803231543801e-06,
      "loss": 2.2155,
      "step": 61185
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1313191652297974,
      "learning_rate": 1.996556371865994e-06,
      "loss": 2.3743,
      "step": 61186
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1141858100891113,
      "learning_rate": 1.996309525756196e-06,
      "loss": 2.2878,
      "step": 61187
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1979398727416992,
      "learning_rate": 1.9960626932148286e-06,
      "loss": 2.3245,
      "step": 61188
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.109969973564148,
      "learning_rate": 1.9958158742423097e-06,
      "loss": 2.1907,
      "step": 61189
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.114492654800415,
      "learning_rate": 1.995569068839056e-06,
      "loss": 2.2976,
      "step": 61190
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.065993309020996,
      "learning_rate": 1.9953222770054845e-06,
      "loss": 2.2553,
      "step": 61191
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0830386877059937,
      "learning_rate": 1.9950754987420182e-06,
      "loss": 2.3793,
      "step": 61192
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1837248802185059,
      "learning_rate": 1.9948287340490714e-06,
      "loss": 2.314,
      "step": 61193
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1352990865707397,
      "learning_rate": 1.9945819829270653e-06,
      "loss": 2.3505,
      "step": 61194
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.174345850944519,
      "learning_rate": 1.9943352453764155e-06,
      "loss": 2.1728,
      "step": 61195
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0641577243804932,
      "learning_rate": 1.9940885213975438e-06,
      "loss": 2.3807,
      "step": 61196
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1148239374160767,
      "learning_rate": 1.9938418109908643e-06,
      "loss": 2.2747,
      "step": 61197
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.9934045076370239,
      "learning_rate": 1.993595114156799e-06,
      "loss": 2.2835,
      "step": 61198
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.3428288698196411,
      "learning_rate": 1.993348430895763e-06,
      "loss": 2.2345,
      "step": 61199
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0538480281829834,
      "learning_rate": 1.993101761208177e-06,
      "loss": 2.2153,
      "step": 61200
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.123782753944397,
      "learning_rate": 1.9928551050944566e-06,
      "loss": 2.4772,
      "step": 61201
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.023720622062683,
      "learning_rate": 1.9926084625550245e-06,
      "loss": 2.2042,
      "step": 61202
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0186277627944946,
      "learning_rate": 1.992361833590295e-06,
      "loss": 2.1537,
      "step": 61203
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0681027173995972,
      "learning_rate": 1.9921152182006874e-06,
      "loss": 2.4132,
      "step": 61204
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0949692726135254,
      "learning_rate": 1.9918686163866176e-06,
      "loss": 2.5358,
      "step": 61205
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.039720058441162,
      "learning_rate": 1.9916220281485077e-06,
      "loss": 2.2461,
      "step": 61206
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1185338497161865,
      "learning_rate": 1.9913754534867703e-06,
      "loss": 2.2343,
      "step": 61207
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0330257415771484,
      "learning_rate": 1.991128892401829e-06,
      "loss": 2.3178,
      "step": 61208
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0726455450057983,
      "learning_rate": 1.9908823448940973e-06,
      "loss": 2.4014,
      "step": 61209
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.2201353311538696,
      "learning_rate": 1.9906358109639978e-06,
      "loss": 2.4068,
      "step": 61210
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0962611436843872,
      "learning_rate": 1.990389290611946e-06,
      "loss": 2.3254,
      "step": 61211
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1765395402908325,
      "learning_rate": 1.990142783838358e-06,
      "loss": 2.1788,
      "step": 61212
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0574069023132324,
      "learning_rate": 1.989896290643655e-06,
      "loss": 2.3328,
      "step": 61213
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.06240713596344,
      "learning_rate": 1.98964981102825e-06,
      "loss": 2.1994,
      "step": 61214
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0774387121200562,
      "learning_rate": 1.9894033449925674e-06,
      "loss": 2.3972,
      "step": 61215
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.096314549446106,
      "learning_rate": 1.989156892537023e-06,
      "loss": 2.1164,
      "step": 61216
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0547975301742554,
      "learning_rate": 1.988910453662032e-06,
      "loss": 2.4268,
      "step": 61217
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0195250511169434,
      "learning_rate": 1.9886640283680113e-06,
      "loss": 2.1833,
      "step": 61218
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0553224086761475,
      "learning_rate": 1.9884176166553835e-06,
      "loss": 2.2289,
      "step": 61219
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1332581043243408,
      "learning_rate": 1.9881712185245614e-06,
      "loss": 2.3174,
      "step": 61220
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0299124717712402,
      "learning_rate": 1.9879248339759673e-06,
      "loss": 2.3569,
      "step": 61221
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0073115825653076,
      "learning_rate": 1.9876784630100144e-06,
      "loss": 2.3913,
      "step": 61222
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0548955202102661,
      "learning_rate": 1.987432105627125e-06,
      "loss": 2.5688,
      "step": 61223
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0694425106048584,
      "learning_rate": 1.9871857618277114e-06,
      "loss": 2.4052,
      "step": 61224
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0736488103866577,
      "learning_rate": 1.986939431612198e-06,
      "loss": 2.3887,
      "step": 61225
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1622850894927979,
      "learning_rate": 1.9866931149809944e-06,
      "loss": 2.4063,
      "step": 61226
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1451157331466675,
      "learning_rate": 1.9864468119345258e-06,
      "loss": 2.3451,
      "step": 61227
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0749248266220093,
      "learning_rate": 1.986200522473204e-06,
      "loss": 2.2982,
      "step": 61228
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0323630571365356,
      "learning_rate": 1.9859542465974534e-06,
      "loss": 2.432,
      "step": 61229
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0506830215454102,
      "learning_rate": 1.9857079843076822e-06,
      "loss": 2.4097,
      "step": 61230
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.037526249885559,
      "learning_rate": 1.985461735604315e-06,
      "loss": 2.1524,
      "step": 61231
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.016607403755188,
      "learning_rate": 1.9852155004877647e-06,
      "loss": 2.3806,
      "step": 61232
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1807414293289185,
      "learning_rate": 1.984969278958453e-06,
      "loss": 2.137,
      "step": 61233
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.2101430892944336,
      "learning_rate": 1.984723071016793e-06,
      "loss": 2.353,
      "step": 61234
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.129696011543274,
      "learning_rate": 1.9844768766632062e-06,
      "loss": 2.3095,
      "step": 61235
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1446565389633179,
      "learning_rate": 1.984230695898106e-06,
      "loss": 2.2123,
      "step": 61236
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1806503534317017,
      "learning_rate": 1.983984528721914e-06,
      "loss": 2.3358,
      "step": 61237
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0268315076828003,
      "learning_rate": 1.9837383751350435e-06,
      "loss": 2.2072,
      "step": 61238
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0814201831817627,
      "learning_rate": 1.983492235137916e-06,
      "loss": 2.2644,
      "step": 61239
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1030033826828003,
      "learning_rate": 1.983246108730946e-06,
      "loss": 2.3786,
      "step": 61240
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0805360078811646,
      "learning_rate": 1.9829999959145487e-06,
      "loss": 2.395,
      "step": 61241
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.141106128692627,
      "learning_rate": 1.9827538966891468e-06,
      "loss": 2.3026,
      "step": 61242
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0586261749267578,
      "learning_rate": 1.9825078110551543e-06,
      "loss": 2.481,
      "step": 61243
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.185681939125061,
      "learning_rate": 1.982261739012986e-06,
      "loss": 2.2633,
      "step": 61244
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.397461175918579,
      "learning_rate": 1.9820156805630643e-06,
      "loss": 2.2068,
      "step": 61245
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1136071681976318,
      "learning_rate": 1.9817696357058036e-06,
      "loss": 2.2264,
      "step": 61246
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.058967113494873,
      "learning_rate": 1.9815236044416176e-06,
      "loss": 2.3479,
      "step": 61247
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.9964948892593384,
      "learning_rate": 1.981277586770931e-06,
      "loss": 2.3464,
      "step": 61248
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0815505981445312,
      "learning_rate": 1.981031582694153e-06,
      "loss": 2.2562,
      "step": 61249
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.091944694519043,
      "learning_rate": 1.9807855922117083e-06,
      "loss": 2.4195,
      "step": 61250
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.9797564148902893,
      "learning_rate": 1.980539615324005e-06,
      "loss": 2.4249,
      "step": 61251
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1832062005996704,
      "learning_rate": 1.9802936520314696e-06,
      "loss": 2.2452,
      "step": 61252
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0591291189193726,
      "learning_rate": 1.980047702334512e-06,
      "loss": 2.1606,
      "step": 61253
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1319670677185059,
      "learning_rate": 1.9798017662335544e-06,
      "loss": 2.3939,
      "step": 61254
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.9910399913787842,
      "learning_rate": 1.979555843729011e-06,
      "loss": 2.2168,
      "step": 61255
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0646231174468994,
      "learning_rate": 1.979309934821297e-06,
      "loss": 2.0392,
      "step": 61256
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1586735248565674,
      "learning_rate": 1.9790640395108307e-06,
      "loss": 2.2657,
      "step": 61257
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0388380289077759,
      "learning_rate": 1.9788181577980305e-06,
      "loss": 2.3384,
      "step": 61258
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0539166927337646,
      "learning_rate": 1.9785722896833105e-06,
      "loss": 2.442,
      "step": 61259
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1062798500061035,
      "learning_rate": 1.9783264351670904e-06,
      "loss": 2.2325,
      "step": 61260
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.035993218421936,
      "learning_rate": 1.978080594249785e-06,
      "loss": 2.3316,
      "step": 61261
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0956681966781616,
      "learning_rate": 1.977834766931812e-06,
      "loss": 2.3977,
      "step": 61262
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1112391948699951,
      "learning_rate": 1.9775889532135874e-06,
      "loss": 2.4201,
      "step": 61263
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0546735525131226,
      "learning_rate": 1.97734315309553e-06,
      "loss": 2.3307,
      "step": 61264
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.9601849317550659,
      "learning_rate": 1.9770973665780514e-06,
      "loss": 2.2284,
      "step": 61265
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.043733835220337,
      "learning_rate": 1.9768515936615752e-06,
      "loss": 2.3588,
      "step": 61266
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0883628129959106,
      "learning_rate": 1.9766058343465144e-06,
      "loss": 2.1818,
      "step": 61267
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.122754454612732,
      "learning_rate": 1.9763600886332866e-06,
      "loss": 2.344,
      "step": 61268
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.126145839691162,
      "learning_rate": 1.976114356522304e-06,
      "loss": 2.3694,
      "step": 61269
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1499524116516113,
      "learning_rate": 1.9758686380139904e-06,
      "loss": 2.4212,
      "step": 61270
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0669469833374023,
      "learning_rate": 1.9756229331087563e-06,
      "loss": 2.4361,
      "step": 61271
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.098679542541504,
      "learning_rate": 1.9753772418070218e-06,
      "loss": 2.1767,
      "step": 61272
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1058000326156616,
      "learning_rate": 1.975131564109204e-06,
      "loss": 2.4116,
      "step": 61273
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0182795524597168,
      "learning_rate": 1.9748859000157138e-06,
      "loss": 2.1312,
      "step": 61274
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1560486555099487,
      "learning_rate": 1.9746402495269747e-06,
      "loss": 2.3352,
      "step": 61275
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1253883838653564,
      "learning_rate": 1.974394612643398e-06,
      "loss": 2.3458,
      "step": 61276
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0537947416305542,
      "learning_rate": 1.974148989365404e-06,
      "loss": 2.4465,
      "step": 61277
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.04490065574646,
      "learning_rate": 1.973903379693405e-06,
      "loss": 2.5309,
      "step": 61278
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.2543483972549438,
      "learning_rate": 1.9736577836278214e-06,
      "loss": 2.527,
      "step": 61279
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0298339128494263,
      "learning_rate": 1.973412201169066e-06,
      "loss": 2.338,
      "step": 61280
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.060320258140564,
      "learning_rate": 1.9731666323175604e-06,
      "loss": 2.2722,
      "step": 61281
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1194005012512207,
      "learning_rate": 1.9729210770737163e-06,
      "loss": 2.3004,
      "step": 61282
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.024295687675476,
      "learning_rate": 1.9726755354379512e-06,
      "loss": 2.1055,
      "step": 61283
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0940531492233276,
      "learning_rate": 1.9724300074106785e-06,
      "loss": 2.2562,
      "step": 61284
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1040613651275635,
      "learning_rate": 1.9721844929923194e-06,
      "loss": 2.214,
      "step": 61285
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.008564829826355,
      "learning_rate": 1.9719389921832866e-06,
      "loss": 2.476,
      "step": 61286
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.2562916278839111,
      "learning_rate": 1.9716935049839993e-06,
      "loss": 2.0659,
      "step": 61287
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.091951608657837,
      "learning_rate": 1.971448031394869e-06,
      "loss": 2.2973,
      "step": 61288
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.2520843744277954,
      "learning_rate": 1.9712025714163184e-06,
      "loss": 2.1821,
      "step": 61289
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1084803342819214,
      "learning_rate": 1.970957125048757e-06,
      "loss": 2.3005,
      "step": 61290
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.048285961151123,
      "learning_rate": 1.9707116922926064e-06,
      "loss": 2.3826,
      "step": 61291
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.004856824874878,
      "learning_rate": 1.970466273148278e-06,
      "loss": 2.3245,
      "step": 61292
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.070286750793457,
      "learning_rate": 1.9702208676161925e-06,
      "loss": 2.1868,
      "step": 61293
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.3397310972213745,
      "learning_rate": 1.9699754756967603e-06,
      "loss": 2.2229,
      "step": 61294
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0979645252227783,
      "learning_rate": 1.9697300973904066e-06,
      "loss": 2.332,
      "step": 61295
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0303621292114258,
      "learning_rate": 1.9694847326975363e-06,
      "loss": 2.2513,
      "step": 61296
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0580540895462036,
      "learning_rate": 1.969239381618573e-06,
      "loss": 2.329,
      "step": 61297
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.9875307679176331,
      "learning_rate": 1.9689940441539268e-06,
      "loss": 2.34,
      "step": 61298
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0695085525512695,
      "learning_rate": 1.9687487203040188e-06,
      "loss": 2.1373,
      "step": 61299
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0582380294799805,
      "learning_rate": 1.9685034100692625e-06,
      "loss": 2.31,
      "step": 61300
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1581954956054688,
      "learning_rate": 1.9682581134500755e-06,
      "loss": 2.0061,
      "step": 61301
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1618587970733643,
      "learning_rate": 1.9680128304468728e-06,
      "loss": 2.2743,
      "step": 61302
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0032740831375122,
      "learning_rate": 1.9677675610600665e-06,
      "loss": 2.2628,
      "step": 61303
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0867068767547607,
      "learning_rate": 1.9675223052900784e-06,
      "loss": 2.2221,
      "step": 61304
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.092146635055542,
      "learning_rate": 1.9672770631373194e-06,
      "loss": 2.1905,
      "step": 61305
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1902254819869995,
      "learning_rate": 1.96703183460221e-06,
      "loss": 2.2442,
      "step": 61306
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.119947910308838,
      "learning_rate": 1.9667866196851614e-06,
      "loss": 2.2999,
      "step": 61307
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.15437912940979,
      "learning_rate": 1.9665414183865963e-06,
      "loss": 2.245,
      "step": 61308
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0656720399856567,
      "learning_rate": 1.966296230706919e-06,
      "loss": 2.3915,
      "step": 61309
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.9525194764137268,
      "learning_rate": 1.966051056646555e-06,
      "loss": 2.4114,
      "step": 61310
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.9919522404670715,
      "learning_rate": 1.9658058962059134e-06,
      "loss": 2.3896,
      "step": 61311
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.256514072418213,
      "learning_rate": 1.9655607493854158e-06,
      "loss": 2.231,
      "step": 61312
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0519962310791016,
      "learning_rate": 1.9653156161854726e-06,
      "loss": 2.3634,
      "step": 61313
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.171443223953247,
      "learning_rate": 1.9650704966065037e-06,
      "loss": 2.2119,
      "step": 61314
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.058660626411438,
      "learning_rate": 1.96482539064892e-06,
      "loss": 2.3442,
      "step": 61315
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.161246418952942,
      "learning_rate": 1.9645802983131423e-06,
      "loss": 2.5705,
      "step": 61316
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1821584701538086,
      "learning_rate": 1.9643352195995803e-06,
      "loss": 2.3748,
      "step": 61317
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0419920682907104,
      "learning_rate": 1.9640901545086556e-06,
      "loss": 2.2462,
      "step": 61318
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0888482332229614,
      "learning_rate": 1.963845103040778e-06,
      "loss": 2.3843,
      "step": 61319
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0861917734146118,
      "learning_rate": 1.9636000651963683e-06,
      "loss": 2.2257,
      "step": 61320
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.9550418853759766,
      "learning_rate": 1.9633550409758396e-06,
      "loss": 2.274,
      "step": 61321
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.01442289352417,
      "learning_rate": 1.963110030379606e-06,
      "loss": 2.3103,
      "step": 61322
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0879429578781128,
      "learning_rate": 1.9628650334080814e-06,
      "loss": 2.3557,
      "step": 61323
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1475341320037842,
      "learning_rate": 1.9626200500616853e-06,
      "loss": 2.5725,
      "step": 61324
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1385602951049805,
      "learning_rate": 1.9623750803408294e-06,
      "loss": 2.0627,
      "step": 61325
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.027161955833435,
      "learning_rate": 1.9621301242459333e-06,
      "loss": 2.3448,
      "step": 61326
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0597853660583496,
      "learning_rate": 1.9618851817774066e-06,
      "loss": 2.3838,
      "step": 61327
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0537105798721313,
      "learning_rate": 1.9616402529356706e-06,
      "loss": 2.1379,
      "step": 61328
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0387495756149292,
      "learning_rate": 1.961395337721138e-06,
      "loss": 2.3001,
      "step": 61329
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0664230585098267,
      "learning_rate": 1.9611504361342206e-06,
      "loss": 2.3975,
      "step": 61330
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0867358446121216,
      "learning_rate": 1.960905548175338e-06,
      "loss": 2.3595,
      "step": 61331
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0743016004562378,
      "learning_rate": 1.960660673844903e-06,
      "loss": 2.2495,
      "step": 61332
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1237891912460327,
      "learning_rate": 1.9604158131433336e-06,
      "loss": 2.3398,
      "step": 61333
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.125167727470398,
      "learning_rate": 1.960170966071042e-06,
      "loss": 2.4692,
      "step": 61334
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1164758205413818,
      "learning_rate": 1.959926132628446e-06,
      "loss": 2.3391,
      "step": 61335
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.056048035621643,
      "learning_rate": 1.9596813128159553e-06,
      "loss": 2.3085,
      "step": 61336
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.9634341597557068,
      "learning_rate": 1.9594365066339905e-06,
      "loss": 2.2443,
      "step": 61337
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0435898303985596,
      "learning_rate": 1.959191714082963e-06,
      "loss": 2.4163,
      "step": 61338
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0592014789581299,
      "learning_rate": 1.9589469351632915e-06,
      "loss": 2.429,
      "step": 61339
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.2062432765960693,
      "learning_rate": 1.9587021698753862e-06,
      "loss": 2.1933,
      "step": 61340
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1018599271774292,
      "learning_rate": 1.958457418219667e-06,
      "loss": 2.4022,
      "step": 61341
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1415549516677856,
      "learning_rate": 1.958212680196545e-06,
      "loss": 2.3587,
      "step": 61342
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0366357564926147,
      "learning_rate": 1.957967955806438e-06,
      "loss": 2.2871,
      "step": 61343
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0237810611724854,
      "learning_rate": 1.9577232450497575e-06,
      "loss": 2.2486,
      "step": 61344
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1679822206497192,
      "learning_rate": 1.957478547926923e-06,
      "loss": 2.1484,
      "step": 61345
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1311396360397339,
      "learning_rate": 1.9572338644383447e-06,
      "loss": 2.2052,
      "step": 61346
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0879735946655273,
      "learning_rate": 1.956989194584441e-06,
      "loss": 2.3616,
      "step": 61347
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0982619524002075,
      "learning_rate": 1.956744538365626e-06,
      "loss": 2.4898,
      "step": 61348
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.179024338722229,
      "learning_rate": 1.9564998957823124e-06,
      "loss": 2.4745,
      "step": 61349
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0487488508224487,
      "learning_rate": 1.9562552668349144e-06,
      "loss": 2.4342,
      "step": 61350
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.089174509048462,
      "learning_rate": 1.95601065152385e-06,
      "loss": 2.421,
      "step": 61351
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0426197052001953,
      "learning_rate": 1.95576604984953e-06,
      "loss": 2.3975,
      "step": 61352
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0936774015426636,
      "learning_rate": 1.955521461812374e-06,
      "loss": 2.3444,
      "step": 61353
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.9635059237480164,
      "learning_rate": 1.9552768874127925e-06,
      "loss": 2.2435,
      "step": 61354
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1846164464950562,
      "learning_rate": 1.955032326651203e-06,
      "loss": 2.4171,
      "step": 61355
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1515556573867798,
      "learning_rate": 1.9547877795280155e-06,
      "loss": 2.493,
      "step": 61356
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0726268291473389,
      "learning_rate": 1.954543246043652e-06,
      "loss": 2.2644,
      "step": 61357
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0983446836471558,
      "learning_rate": 1.9542987261985216e-06,
      "loss": 2.3419,
      "step": 61358
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1039040088653564,
      "learning_rate": 1.954054219993038e-06,
      "loss": 2.191,
      "step": 61359
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.181060552597046,
      "learning_rate": 1.9538097274276204e-06,
      "loss": 2.3617,
      "step": 61360
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1086609363555908,
      "learning_rate": 1.95356524850268e-06,
      "loss": 2.2528,
      "step": 61361
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.2219579219818115,
      "learning_rate": 1.9533207832186295e-06,
      "loss": 2.3389,
      "step": 61362
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1599398851394653,
      "learning_rate": 1.953076331575888e-06,
      "loss": 2.1553,
      "step": 61363
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0240674018859863,
      "learning_rate": 1.952831893574869e-06,
      "loss": 2.1918,
      "step": 61364
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.259979486465454,
      "learning_rate": 1.952587469215982e-06,
      "loss": 2.2812,
      "step": 61365
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0646278858184814,
      "learning_rate": 1.9523430584996474e-06,
      "loss": 2.2434,
      "step": 61366
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0169450044631958,
      "learning_rate": 1.952098661426275e-06,
      "loss": 2.32,
      "step": 61367
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.135180950164795,
      "learning_rate": 1.9518542779962835e-06,
      "loss": 2.324,
      "step": 61368
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1145265102386475,
      "learning_rate": 1.9516099082100816e-06,
      "loss": 2.2237,
      "step": 61369
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0644536018371582,
      "learning_rate": 1.9513655520680896e-06,
      "loss": 2.4043,
      "step": 61370
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1728178262710571,
      "learning_rate": 1.9511212095707164e-06,
      "loss": 2.3307,
      "step": 61371
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0483484268188477,
      "learning_rate": 1.950876880718382e-06,
      "loss": 2.2343,
      "step": 61372
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.9995653033256531,
      "learning_rate": 1.9506325655114943e-06,
      "loss": 2.2982,
      "step": 61373
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0440537929534912,
      "learning_rate": 1.9503882639504767e-06,
      "loss": 2.2819,
      "step": 61374
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0480585098266602,
      "learning_rate": 1.950143976035731e-06,
      "loss": 2.3576,
      "step": 61375
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5993244647979736,
      "learning_rate": 1.94989970176768e-06,
      "loss": 2.1919,
      "step": 61376
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.095072865486145,
      "learning_rate": 1.949655441146734e-06,
      "loss": 2.4196,
      "step": 61377
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.336399793624878,
      "learning_rate": 1.94941119417331e-06,
      "loss": 2.3231,
      "step": 61378
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.120545506477356,
      "learning_rate": 1.949166960847818e-06,
      "loss": 2.184,
      "step": 61379
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1749911308288574,
      "learning_rate": 1.948922741170678e-06,
      "loss": 2.1531,
      "step": 61380
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0017708539962769,
      "learning_rate": 1.9486785351422967e-06,
      "loss": 2.1822,
      "step": 61381
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0302667617797852,
      "learning_rate": 1.948434342763096e-06,
      "loss": 2.5224,
      "step": 61382
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1209890842437744,
      "learning_rate": 1.948190164033482e-06,
      "loss": 2.3556,
      "step": 61383
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.180757761001587,
      "learning_rate": 1.947945998953876e-06,
      "loss": 2.1832,
      "step": 61384
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0854662656784058,
      "learning_rate": 1.947701847524688e-06,
      "loss": 2.4127,
      "step": 61385
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0830663442611694,
      "learning_rate": 1.94745770974633e-06,
      "loss": 2.2785,
      "step": 61386
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1227736473083496,
      "learning_rate": 1.947213585619221e-06,
      "loss": 2.452,
      "step": 61387
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0259594917297363,
      "learning_rate": 1.9469694751437716e-06,
      "loss": 2.5499,
      "step": 61388
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1201534271240234,
      "learning_rate": 1.946725378320393e-06,
      "loss": 2.415,
      "step": 61389
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0678379535675049,
      "learning_rate": 1.946481295149506e-06,
      "loss": 2.4067,
      "step": 61390
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1564035415649414,
      "learning_rate": 1.9462372256315197e-06,
      "loss": 2.3552,
      "step": 61391
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.05387544631958,
      "learning_rate": 1.9459931697668466e-06,
      "loss": 2.2734,
      "step": 61392
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0885579586029053,
      "learning_rate": 1.945749127555905e-06,
      "loss": 2.434,
      "step": 61393
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0923302173614502,
      "learning_rate": 1.9455050989991032e-06,
      "loss": 2.3569,
      "step": 61394
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0774742364883423,
      "learning_rate": 1.94526108409686e-06,
      "loss": 2.3343,
      "step": 61395
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1332316398620605,
      "learning_rate": 1.945017082849585e-06,
      "loss": 2.554,
      "step": 61396
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1679785251617432,
      "learning_rate": 1.944773095257696e-06,
      "loss": 2.3299,
      "step": 61397
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0070818662643433,
      "learning_rate": 1.944529121321602e-06,
      "loss": 2.1234,
      "step": 61398
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0920498371124268,
      "learning_rate": 1.9442851610417215e-06,
      "loss": 2.5522,
      "step": 61399
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.2580926418304443,
      "learning_rate": 1.944041214418466e-06,
      "loss": 2.2433,
      "step": 61400
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1517858505249023,
      "learning_rate": 1.9437972814522475e-06,
      "loss": 2.5074,
      "step": 61401
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0277693271636963,
      "learning_rate": 1.94355336214348e-06,
      "loss": 2.1063,
      "step": 61402
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.9757383465766907,
      "learning_rate": 1.943309456492578e-06,
      "loss": 2.3094,
      "step": 61403
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0431028604507446,
      "learning_rate": 1.943065564499953e-06,
      "loss": 2.2899,
      "step": 61404
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0221768617630005,
      "learning_rate": 1.942821686166023e-06,
      "loss": 2.5218,
      "step": 61405
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0661522150039673,
      "learning_rate": 1.9425778214911963e-06,
      "loss": 2.2869,
      "step": 61406
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1793341636657715,
      "learning_rate": 1.9423339704758904e-06,
      "loss": 2.3502,
      "step": 61407
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0052049160003662,
      "learning_rate": 1.9420901331205143e-06,
      "loss": 2.1905,
      "step": 61408
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.2872154712677002,
      "learning_rate": 1.941846309425487e-06,
      "loss": 2.3969,
      "step": 61409
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0433392524719238,
      "learning_rate": 1.9416024993912153e-06,
      "loss": 2.3967,
      "step": 61410
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0517150163650513,
      "learning_rate": 1.9413587030181193e-06,
      "loss": 2.3666,
      "step": 61411
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0225962400436401,
      "learning_rate": 1.941114920306606e-06,
      "loss": 2.3607,
      "step": 61412
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0847313404083252,
      "learning_rate": 1.9408711512570978e-06,
      "loss": 2.2035,
      "step": 61413
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.051193356513977,
      "learning_rate": 1.9406273958699952e-06,
      "loss": 2.0865,
      "step": 61414
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0430123805999756,
      "learning_rate": 1.940383654145721e-06,
      "loss": 2.2594,
      "step": 61415
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.2144545316696167,
      "learning_rate": 1.940139926084683e-06,
      "loss": 2.293,
      "step": 61416
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.024027705192566,
      "learning_rate": 1.9398962116873e-06,
      "loss": 2.3203,
      "step": 61417
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.9998452663421631,
      "learning_rate": 1.93965251095398e-06,
      "loss": 2.3406,
      "step": 61418
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1517493724822998,
      "learning_rate": 1.939408823885139e-06,
      "loss": 2.1823,
      "step": 61419
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0730726718902588,
      "learning_rate": 1.9391651504811905e-06,
      "loss": 2.2627,
      "step": 61420
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0486572980880737,
      "learning_rate": 1.9389214907425434e-06,
      "loss": 2.2,
      "step": 61421
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1585729122161865,
      "learning_rate": 1.9386778446696162e-06,
      "loss": 2.4057,
      "step": 61422
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0942091941833496,
      "learning_rate": 1.9384342122628173e-06,
      "loss": 2.3609,
      "step": 61423
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1187167167663574,
      "learning_rate": 1.9381905935225643e-06,
      "loss": 2.2337,
      "step": 61424
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0281916856765747,
      "learning_rate": 1.9379469884492653e-06,
      "loss": 2.285,
      "step": 61425
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1250262260437012,
      "learning_rate": 1.937703397043338e-06,
      "loss": 2.1508,
      "step": 61426
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0580122470855713,
      "learning_rate": 1.937459819305193e-06,
      "loss": 2.3337,
      "step": 61427
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.083681344985962,
      "learning_rate": 1.9372162552352435e-06,
      "loss": 2.3629,
      "step": 61428
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.131695032119751,
      "learning_rate": 1.9369727048339005e-06,
      "loss": 2.4209,
      "step": 61429
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.2797832489013672,
      "learning_rate": 1.93672916810158e-06,
      "loss": 2.0619,
      "step": 61430
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.9903590083122253,
      "learning_rate": 1.9364856450386915e-06,
      "loss": 2.4288,
      "step": 61431
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.2060503959655762,
      "learning_rate": 1.936242135645653e-06,
      "loss": 2.2908,
      "step": 61432
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.063309907913208,
      "learning_rate": 1.9359986399228704e-06,
      "loss": 2.3277,
      "step": 61433
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0694751739501953,
      "learning_rate": 1.9357551578707634e-06,
      "loss": 2.3718,
      "step": 61434
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1000922918319702,
      "learning_rate": 1.9355116894897395e-06,
      "loss": 2.4271,
      "step": 61435
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1454746723175049,
      "learning_rate": 1.9352682347802153e-06,
      "loss": 2.2767,
      "step": 61436
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0803561210632324,
      "learning_rate": 1.9350247937426003e-06,
      "loss": 2.4298,
      "step": 61437
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0472867488861084,
      "learning_rate": 1.934781366377311e-06,
      "loss": 2.2312,
      "step": 61438
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0597211122512817,
      "learning_rate": 1.934537952684754e-06,
      "loss": 2.374,
      "step": 61439
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.07937490940094,
      "learning_rate": 1.9342945526653524e-06,
      "loss": 2.4589,
      "step": 61440
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.9797478318214417,
      "learning_rate": 1.9340511663195063e-06,
      "loss": 2.2615,
      "step": 61441
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1458542346954346,
      "learning_rate": 1.933807793647636e-06,
      "loss": 2.3923,
      "step": 61442
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.2406141757965088,
      "learning_rate": 1.9335644346501503e-06,
      "loss": 2.3238,
      "step": 61443
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.097945213317871,
      "learning_rate": 1.9333210893274667e-06,
      "loss": 2.2075,
      "step": 61444
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.3448020219802856,
      "learning_rate": 1.933077757679992e-06,
      "loss": 2.1735,
      "step": 61445
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1305363178253174,
      "learning_rate": 1.9328344397081446e-06,
      "loss": 2.273,
      "step": 61446
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1858274936676025,
      "learning_rate": 1.9325911354123327e-06,
      "loss": 2.4856,
      "step": 61447
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1161595582962036,
      "learning_rate": 1.9323478447929677e-06,
      "loss": 2.0475,
      "step": 61448
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0375810861587524,
      "learning_rate": 1.9321045678504667e-06,
      "loss": 2.4976,
      "step": 61449
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.033940315246582,
      "learning_rate": 1.9318613045852374e-06,
      "loss": 2.0234,
      "step": 61450
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0527160167694092,
      "learning_rate": 1.931618054997697e-06,
      "loss": 2.2593,
      "step": 61451
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0480692386627197,
      "learning_rate": 1.931374819088252e-06,
      "loss": 2.2725,
      "step": 61452
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0170507431030273,
      "learning_rate": 1.9311315968573243e-06,
      "loss": 2.2776,
      "step": 61453
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.088963270187378,
      "learning_rate": 1.930888388305314e-06,
      "loss": 2.2954,
      "step": 61454
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.072951078414917,
      "learning_rate": 1.930645193432643e-06,
      "loss": 2.1417,
      "step": 61455
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.9829040765762329,
      "learning_rate": 1.930402012239717e-06,
      "loss": 2.3776,
      "step": 61456
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0365403890609741,
      "learning_rate": 1.9301588447269537e-06,
      "loss": 2.3914,
      "step": 61457
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0125107765197754,
      "learning_rate": 1.92991569089476e-06,
      "loss": 2.1864,
      "step": 61458
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1621625423431396,
      "learning_rate": 1.9296725507435544e-06,
      "loss": 2.1546,
      "step": 61459
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.2767224311828613,
      "learning_rate": 1.929429424273742e-06,
      "loss": 2.331,
      "step": 61460
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0617101192474365,
      "learning_rate": 1.929186311485742e-06,
      "loss": 2.2172,
      "step": 61461
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0828553438186646,
      "learning_rate": 1.928943212379961e-06,
      "loss": 2.3501,
      "step": 61462
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0792653560638428,
      "learning_rate": 1.9287001269568153e-06,
      "loss": 2.2395,
      "step": 61463
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.4776395559310913,
      "learning_rate": 1.9284570552167125e-06,
      "loss": 2.3064,
      "step": 61464
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.2126586437225342,
      "learning_rate": 1.92821399716007e-06,
      "loss": 2.4019,
      "step": 61465
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0443673133850098,
      "learning_rate": 1.927970952787297e-06,
      "loss": 2.1574,
      "step": 61466
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.8171974420547485,
      "learning_rate": 1.9277279220988056e-06,
      "loss": 2.3486,
      "step": 61467
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.074711799621582,
      "learning_rate": 1.9274849050950053e-06,
      "loss": 2.1707,
      "step": 61468
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.2202080488204956,
      "learning_rate": 1.9272419017763123e-06,
      "loss": 2.2717,
      "step": 61469
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1216524839401245,
      "learning_rate": 1.9269989121431344e-06,
      "loss": 2.3348,
      "step": 61470
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0890697240829468,
      "learning_rate": 1.9267559361958897e-06,
      "loss": 2.1517,
      "step": 61471
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0561765432357788,
      "learning_rate": 1.926512973934983e-06,
      "loss": 2.2054,
      "step": 61472
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1695152521133423,
      "learning_rate": 1.9262700253608315e-06,
      "loss": 2.4168,
      "step": 61473
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.010916829109192,
      "learning_rate": 1.926027090473843e-06,
      "loss": 2.3626,
      "step": 61474
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1372413635253906,
      "learning_rate": 1.925784169274435e-06,
      "loss": 2.3248,
      "step": 61475
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1217507123947144,
      "learning_rate": 1.925541261763014e-06,
      "loss": 2.209,
      "step": 61476
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.038676381111145,
      "learning_rate": 1.925298367939993e-06,
      "loss": 2.3532,
      "step": 61477
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0704492330551147,
      "learning_rate": 1.925055487805785e-06,
      "loss": 2.5785,
      "step": 61478
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1126539707183838,
      "learning_rate": 1.924812621360802e-06,
      "loss": 2.18,
      "step": 61479
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1355820894241333,
      "learning_rate": 1.924569768605452e-06,
      "loss": 2.3553,
      "step": 61480
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.9772784113883972,
      "learning_rate": 1.924326929540152e-06,
      "loss": 2.3518,
      "step": 61481
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0427486896514893,
      "learning_rate": 1.9240841041653115e-06,
      "loss": 2.1808,
      "step": 61482
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1014015674591064,
      "learning_rate": 1.9238412924813387e-06,
      "loss": 2.511,
      "step": 61483
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0789498090744019,
      "learning_rate": 1.923598494488652e-06,
      "loss": 2.3907,
      "step": 61484
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.184015154838562,
      "learning_rate": 1.9233557101876565e-06,
      "loss": 2.4842,
      "step": 61485
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1086649894714355,
      "learning_rate": 1.9231129395787686e-06,
      "loss": 2.3144,
      "step": 61486
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.2536576986312866,
      "learning_rate": 1.9228701826623953e-06,
      "loss": 2.3535,
      "step": 61487
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0522105693817139,
      "learning_rate": 1.922627439438954e-06,
      "loss": 2.2084,
      "step": 61488
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0387929677963257,
      "learning_rate": 1.9223847099088498e-06,
      "loss": 2.3239,
      "step": 61489
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1500234603881836,
      "learning_rate": 1.9221419940725004e-06,
      "loss": 2.245,
      "step": 61490
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0784519910812378,
      "learning_rate": 1.9218992919303114e-06,
      "loss": 2.3398,
      "step": 61491
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0415135622024536,
      "learning_rate": 1.9216566034827e-06,
      "loss": 2.3803,
      "step": 61492
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1116808652877808,
      "learning_rate": 1.921413928730074e-06,
      "loss": 2.1418,
      "step": 61493
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0241144895553589,
      "learning_rate": 1.9211712676728457e-06,
      "loss": 2.4201,
      "step": 61494
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.157576084136963,
      "learning_rate": 1.920928620311424e-06,
      "loss": 2.3073,
      "step": 61495
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.3434549570083618,
      "learning_rate": 1.920685986646226e-06,
      "loss": 2.3904,
      "step": 61496
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0021051168441772,
      "learning_rate": 1.920443366677656e-06,
      "loss": 2.2087,
      "step": 61497
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1835850477218628,
      "learning_rate": 1.920200760406131e-06,
      "loss": 2.1387,
      "step": 61498
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0571383237838745,
      "learning_rate": 1.9199581678320576e-06,
      "loss": 2.364,
      "step": 61499
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0616854429244995,
      "learning_rate": 1.9197155889558528e-06,
      "loss": 2.3816,
      "step": 61500
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0778675079345703,
      "learning_rate": 1.9194730237779223e-06,
      "loss": 2.3072,
      "step": 61501
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0237603187561035,
      "learning_rate": 1.9192304722986822e-06,
      "loss": 2.223,
      "step": 61502
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0326471328735352,
      "learning_rate": 1.9189879345185403e-06,
      "loss": 2.4167,
      "step": 61503
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.19155752658844,
      "learning_rate": 1.9187454104379064e-06,
      "loss": 2.2184,
      "step": 61504
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1349459886550903,
      "learning_rate": 1.9185029000571966e-06,
      "loss": 2.2494,
      "step": 61505
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.2352954149246216,
      "learning_rate": 1.9182604033768195e-06,
      "loss": 2.2948,
      "step": 61506
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.04441499710083,
      "learning_rate": 1.9180179203971825e-06,
      "loss": 2.1121,
      "step": 61507
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1263700723648071,
      "learning_rate": 1.917775451118704e-06,
      "loss": 2.0898,
      "step": 61508
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.2016055583953857,
      "learning_rate": 1.917532995541791e-06,
      "loss": 2.358,
      "step": 61509
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1128339767456055,
      "learning_rate": 1.917290553666852e-06,
      "loss": 2.3122,
      "step": 61510
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0565879344940186,
      "learning_rate": 1.917048125494303e-06,
      "loss": 2.3868,
      "step": 61511
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1326265335083008,
      "learning_rate": 1.9168057110245496e-06,
      "loss": 2.2082,
      "step": 61512
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.9613643288612366,
      "learning_rate": 1.9165633102580093e-06,
      "loss": 2.2255,
      "step": 61513
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0117710828781128,
      "learning_rate": 1.9163209231950873e-06,
      "loss": 2.3999,
      "step": 61514
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0865751504898071,
      "learning_rate": 1.9160785498361987e-06,
      "loss": 2.3391,
      "step": 61515
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0835587978363037,
      "learning_rate": 1.91583619018175e-06,
      "loss": 2.3144,
      "step": 61516
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0587825775146484,
      "learning_rate": 1.9155938442321574e-06,
      "loss": 2.3638,
      "step": 61517
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.9651068449020386,
      "learning_rate": 1.9153515119878275e-06,
      "loss": 2.2108,
      "step": 61518
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.3988237380981445,
      "learning_rate": 1.915109193449176e-06,
      "loss": 2.294,
      "step": 61519
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1531060934066772,
      "learning_rate": 1.9148668886166055e-06,
      "loss": 2.2596,
      "step": 61520
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0566353797912598,
      "learning_rate": 1.914624597490534e-06,
      "loss": 2.1525,
      "step": 61521
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.012423038482666,
      "learning_rate": 1.9143823200713673e-06,
      "loss": 2.4336,
      "step": 61522
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0633788108825684,
      "learning_rate": 1.9141400563595215e-06,
      "loss": 2.1532,
      "step": 61523
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1349176168441772,
      "learning_rate": 1.9138978063554026e-06,
      "loss": 2.3178,
      "step": 61524
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.013301968574524,
      "learning_rate": 1.913655570059424e-06,
      "loss": 2.1406,
      "step": 61525
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0272676944732666,
      "learning_rate": 1.913413347471994e-06,
      "loss": 2.2644,
      "step": 61526
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.025529384613037,
      "learning_rate": 1.913171138593527e-06,
      "loss": 2.4033,
      "step": 61527
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0713773965835571,
      "learning_rate": 1.912928943424428e-06,
      "loss": 2.2589,
      "step": 61528
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1252191066741943,
      "learning_rate": 1.9126867619651136e-06,
      "loss": 1.9973,
      "step": 61529
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0458269119262695,
      "learning_rate": 1.91244459421599e-06,
      "loss": 2.6243,
      "step": 61530
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0206000804901123,
      "learning_rate": 1.912202440177471e-06,
      "loss": 2.2404,
      "step": 61531
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.083193302154541,
      "learning_rate": 1.9119602998499655e-06,
      "loss": 2.3273,
      "step": 61532
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.9891554117202759,
      "learning_rate": 1.911718173233884e-06,
      "loss": 2.2404,
      "step": 61533
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0512762069702148,
      "learning_rate": 1.911476060329636e-06,
      "loss": 2.1667,
      "step": 61534
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0126012563705444,
      "learning_rate": 1.911233961137633e-06,
      "loss": 2.5058,
      "step": 61535
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1914145946502686,
      "learning_rate": 1.9109918756582834e-06,
      "loss": 2.3519,
      "step": 61536
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0003414154052734,
      "learning_rate": 1.9107498038920037e-06,
      "loss": 2.5662,
      "step": 61537
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1133209466934204,
      "learning_rate": 1.9105077458391985e-06,
      "loss": 2.1475,
      "step": 61538
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0437678098678589,
      "learning_rate": 1.9102657015002777e-06,
      "loss": 2.4913,
      "step": 61539
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.168669581413269,
      "learning_rate": 1.910023670875656e-06,
      "loss": 2.3593,
      "step": 61540
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1365463733673096,
      "learning_rate": 1.909781653965739e-06,
      "loss": 2.2748,
      "step": 61541
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1265437602996826,
      "learning_rate": 1.909539650770942e-06,
      "loss": 2.3736,
      "step": 61542
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0454127788543701,
      "learning_rate": 1.909297661291669e-06,
      "loss": 2.2984,
      "step": 61543
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0365108251571655,
      "learning_rate": 1.9090556855283382e-06,
      "loss": 2.451,
      "step": 61544
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.146716594696045,
      "learning_rate": 1.908813723481354e-06,
      "loss": 2.3798,
      "step": 61545
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0522695779800415,
      "learning_rate": 1.908571775151129e-06,
      "loss": 2.1609,
      "step": 61546
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0102094411849976,
      "learning_rate": 1.9083298405380703e-06,
      "loss": 2.1787,
      "step": 61547
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0928666591644287,
      "learning_rate": 1.9080879196425917e-06,
      "loss": 2.2619,
      "step": 61548
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1132382154464722,
      "learning_rate": 1.9078460124651e-06,
      "loss": 2.4218,
      "step": 61549
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.076702356338501,
      "learning_rate": 1.9076041190060092e-06,
      "loss": 2.192,
      "step": 61550
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1480088233947754,
      "learning_rate": 1.9073622392657245e-06,
      "loss": 2.394,
      "step": 61551
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0281610488891602,
      "learning_rate": 1.9071203732446619e-06,
      "loss": 2.4754,
      "step": 61552
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.201755404472351,
      "learning_rate": 1.906878520943225e-06,
      "loss": 2.3226,
      "step": 61553
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1735889911651611,
      "learning_rate": 1.9066366823618298e-06,
      "loss": 2.4425,
      "step": 61554
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1250271797180176,
      "learning_rate": 1.9063948575008817e-06,
      "loss": 2.3123,
      "step": 61555
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0036211013793945,
      "learning_rate": 1.9061530463607947e-06,
      "loss": 2.34,
      "step": 61556
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1658941507339478,
      "learning_rate": 1.905911248941974e-06,
      "loss": 2.5036,
      "step": 61557
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.101589322090149,
      "learning_rate": 1.905669465244837e-06,
      "loss": 2.25,
      "step": 61558
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.22333824634552,
      "learning_rate": 1.9054276952697836e-06,
      "loss": 2.2931,
      "step": 61559
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1469792127609253,
      "learning_rate": 1.905185939017231e-06,
      "loss": 2.32,
      "step": 61560
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0682874917984009,
      "learning_rate": 1.9049441964875848e-06,
      "loss": 2.3494,
      "step": 61561
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1354484558105469,
      "learning_rate": 1.9047024676812588e-06,
      "loss": 2.2499,
      "step": 61562
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0538413524627686,
      "learning_rate": 1.904460752598658e-06,
      "loss": 2.5135,
      "step": 61563
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0966030359268188,
      "learning_rate": 1.904219051240198e-06,
      "loss": 2.4333,
      "step": 61564
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.7860119342803955,
      "learning_rate": 1.9039773636062853e-06,
      "loss": 2.3757,
      "step": 61565
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0934001207351685,
      "learning_rate": 1.903735689697328e-06,
      "loss": 2.2109,
      "step": 61566
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0728274583816528,
      "learning_rate": 1.9034940295137394e-06,
      "loss": 2.447,
      "step": 61567
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.970323920249939,
      "learning_rate": 1.9032523830559247e-06,
      "loss": 2.1086,
      "step": 61568
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.975650429725647,
      "learning_rate": 1.903010750324299e-06,
      "loss": 2.5086,
      "step": 61569
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.2567225694656372,
      "learning_rate": 1.9027691313192676e-06,
      "loss": 2.2378,
      "step": 61570
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0749704837799072,
      "learning_rate": 1.9025275260412424e-06,
      "loss": 2.5408,
      "step": 61571
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0550864934921265,
      "learning_rate": 1.902285934490634e-06,
      "loss": 1.9752,
      "step": 61572
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.3457787036895752,
      "learning_rate": 1.9020443566678492e-06,
      "loss": 2.1355,
      "step": 61573
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.2706407308578491,
      "learning_rate": 1.9018027925732962e-06,
      "loss": 2.2583,
      "step": 61574
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0344117879867554,
      "learning_rate": 1.9015612422073905e-06,
      "loss": 2.2741,
      "step": 61575
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0290402173995972,
      "learning_rate": 1.9013197055705336e-06,
      "loss": 2.196,
      "step": 61576
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0790374279022217,
      "learning_rate": 1.901078182663143e-06,
      "loss": 2.145,
      "step": 61577
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0105668306350708,
      "learning_rate": 1.900836673485622e-06,
      "loss": 2.1342,
      "step": 61578
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0678902864456177,
      "learning_rate": 1.9005951780383847e-06,
      "loss": 2.3025,
      "step": 61579
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.121490240097046,
      "learning_rate": 1.900353696321836e-06,
      "loss": 2.3017,
      "step": 61580
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.684059977531433,
      "learning_rate": 1.9001122283363904e-06,
      "loss": 2.1778,
      "step": 61581
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5265716314315796,
      "learning_rate": 1.8998707740824518e-06,
      "loss": 2.4091,
      "step": 61582
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.2387199401855469,
      "learning_rate": 1.8996293335604343e-06,
      "loss": 2.537,
      "step": 61583
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1023225784301758,
      "learning_rate": 1.8993879067707432e-06,
      "loss": 2.4479,
      "step": 61584
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1499435901641846,
      "learning_rate": 1.8991464937137948e-06,
      "loss": 2.3676,
      "step": 61585
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1598641872406006,
      "learning_rate": 1.8989050943899877e-06,
      "loss": 2.3971,
      "step": 61586
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.9921619296073914,
      "learning_rate": 1.8986637087997395e-06,
      "loss": 2.223,
      "step": 61587
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0528910160064697,
      "learning_rate": 1.898422336943454e-06,
      "loss": 2.2519,
      "step": 61588
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.222170352935791,
      "learning_rate": 1.8981809788215454e-06,
      "loss": 2.425,
      "step": 61589
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0729517936706543,
      "learning_rate": 1.897939634434418e-06,
      "loss": 2.3743,
      "step": 61590
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.2043172121047974,
      "learning_rate": 1.8976983037824858e-06,
      "loss": 2.083,
      "step": 61591
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0665316581726074,
      "learning_rate": 1.8974569868661542e-06,
      "loss": 2.3888,
      "step": 61592
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1304794549942017,
      "learning_rate": 1.8972156836858346e-06,
      "loss": 2.3462,
      "step": 61593
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.009473204612732,
      "learning_rate": 1.896974394241936e-06,
      "loss": 2.3202,
      "step": 61594
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0823928117752075,
      "learning_rate": 1.8967331185348637e-06,
      "loss": 2.2643,
      "step": 61595
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.06471586227417,
      "learning_rate": 1.8964918565650325e-06,
      "loss": 2.1379,
      "step": 61596
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0770816802978516,
      "learning_rate": 1.8962506083328447e-06,
      "loss": 2.252,
      "step": 61597
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1124244928359985,
      "learning_rate": 1.8960093738387165e-06,
      "loss": 2.4831,
      "step": 61598
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1587132215499878,
      "learning_rate": 1.895768153083054e-06,
      "loss": 2.0801,
      "step": 61599
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0843151807785034,
      "learning_rate": 1.895526946066265e-06,
      "loss": 2.5505,
      "step": 61600
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1995517015457153,
      "learning_rate": 1.8952857527887559e-06,
      "loss": 2.4771,
      "step": 61601
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0999356508255005,
      "learning_rate": 1.8950445732509414e-06,
      "loss": 2.5114,
      "step": 61602
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1092544794082642,
      "learning_rate": 1.8948034074532262e-06,
      "loss": 2.5523,
      "step": 61603
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0037277936935425,
      "learning_rate": 1.8945622553960218e-06,
      "loss": 2.3135,
      "step": 61604
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.19526207447052,
      "learning_rate": 1.8943211170797338e-06,
      "loss": 2.0944,
      "step": 61605
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.990091860294342,
      "learning_rate": 1.894079992504776e-06,
      "loss": 2.4414,
      "step": 61606
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.377090573310852,
      "learning_rate": 1.8938388816715503e-06,
      "loss": 2.25,
      "step": 61607
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.3943761587142944,
      "learning_rate": 1.8935977845804731e-06,
      "loss": 2.3533,
      "step": 61608
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.2234853506088257,
      "learning_rate": 1.8933567012319465e-06,
      "loss": 2.4981,
      "step": 61609
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0967261791229248,
      "learning_rate": 1.8931156316263844e-06,
      "loss": 2.2025,
      "step": 61610
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.255455732345581,
      "learning_rate": 1.892874575764192e-06,
      "loss": 2.3547,
      "step": 61611
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.04556143283844,
      "learning_rate": 1.8926335336457801e-06,
      "loss": 2.6219,
      "step": 61612
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.252453327178955,
      "learning_rate": 1.8923925052715542e-06,
      "loss": 2.6013,
      "step": 61613
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.155698537826538,
      "learning_rate": 1.892151490641927e-06,
      "loss": 2.4397,
      "step": 61614
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1085349321365356,
      "learning_rate": 1.8919104897573026e-06,
      "loss": 2.1196,
      "step": 61615
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.3719133138656616,
      "learning_rate": 1.8916695026180953e-06,
      "loss": 2.2614,
      "step": 61616
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1418068408966064,
      "learning_rate": 1.8914285292247069e-06,
      "loss": 2.1466,
      "step": 61617
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0654206275939941,
      "learning_rate": 1.8911875695775516e-06,
      "loss": 2.2194,
      "step": 61618
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1775914430618286,
      "learning_rate": 1.8909466236770347e-06,
      "loss": 2.2781,
      "step": 61619
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.2151302099227905,
      "learning_rate": 1.8907056915235667e-06,
      "loss": 2.3709,
      "step": 61620
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0750452280044556,
      "learning_rate": 1.8904647731175552e-06,
      "loss": 2.3205,
      "step": 61621
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1262620687484741,
      "learning_rate": 1.8902238684594066e-06,
      "loss": 2.3422,
      "step": 61622
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1283490657806396,
      "learning_rate": 1.8899829775495337e-06,
      "loss": 2.4369,
      "step": 61623
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.9985362887382507,
      "learning_rate": 1.889742100388342e-06,
      "loss": 2.0865,
      "step": 61624
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.201941967010498,
      "learning_rate": 1.8895012369762367e-06,
      "loss": 2.3151,
      "step": 61625
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.084769606590271,
      "learning_rate": 1.889260387313634e-06,
      "loss": 2.3999,
      "step": 61626
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.2165751457214355,
      "learning_rate": 1.8890195514009358e-06,
      "loss": 2.4141,
      "step": 61627
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.050289273262024,
      "learning_rate": 1.8887787292385518e-06,
      "loss": 2.4811,
      "step": 61628
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0156381130218506,
      "learning_rate": 1.8885379208268917e-06,
      "loss": 2.1032,
      "step": 61629
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1058536767959595,
      "learning_rate": 1.8882971261663608e-06,
      "loss": 2.2918,
      "step": 61630
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.9688659906387329,
      "learning_rate": 1.888056345257372e-06,
      "loss": 2.4052,
      "step": 61631
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0103968381881714,
      "learning_rate": 1.8878155781003293e-06,
      "loss": 2.2426,
      "step": 61632
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0360854864120483,
      "learning_rate": 1.887574824695644e-06,
      "loss": 2.3568,
      "step": 61633
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.051486611366272,
      "learning_rate": 1.8873340850437194e-06,
      "loss": 2.0432,
      "step": 61634
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.028201699256897,
      "learning_rate": 1.8870933591449714e-06,
      "loss": 2.296,
      "step": 61635
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.072582721710205,
      "learning_rate": 1.8868526469997994e-06,
      "loss": 2.2464,
      "step": 61636
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.972285270690918,
      "learning_rate": 1.8866119486086188e-06,
      "loss": 2.2538,
      "step": 61637
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.3647106885910034,
      "learning_rate": 1.8863712639718345e-06,
      "loss": 2.488,
      "step": 61638
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0768769979476929,
      "learning_rate": 1.8861305930898543e-06,
      "loss": 2.4766,
      "step": 61639
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0966609716415405,
      "learning_rate": 1.8858899359630845e-06,
      "loss": 2.1827,
      "step": 61640
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0946398973464966,
      "learning_rate": 1.8856492925919368e-06,
      "loss": 2.0835,
      "step": 61641
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.2415223121643066,
      "learning_rate": 1.8854086629768154e-06,
      "loss": 2.2333,
      "step": 61642
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0831581354141235,
      "learning_rate": 1.885168047118132e-06,
      "loss": 2.3637,
      "step": 61643
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0471477508544922,
      "learning_rate": 1.8849274450162913e-06,
      "loss": 2.2996,
      "step": 61644
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0262717008590698,
      "learning_rate": 1.8846868566717046e-06,
      "loss": 2.3673,
      "step": 61645
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1594709157943726,
      "learning_rate": 1.8844462820847754e-06,
      "loss": 2.344,
      "step": 61646
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0462348461151123,
      "learning_rate": 1.8842057212559162e-06,
      "loss": 2.4539,
      "step": 61647
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0668665170669556,
      "learning_rate": 1.8839651741855292e-06,
      "loss": 2.2376,
      "step": 61648
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.9667033553123474,
      "learning_rate": 1.8837246408740295e-06,
      "loss": 2.5437,
      "step": 61649
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.2175023555755615,
      "learning_rate": 1.8834841213218202e-06,
      "loss": 2.517,
      "step": 61650
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.593630075454712,
      "learning_rate": 1.88324361552931e-06,
      "loss": 2.2738,
      "step": 61651
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1392003297805786,
      "learning_rate": 1.8830031234969036e-06,
      "loss": 2.3511,
      "step": 61652
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1195638179779053,
      "learning_rate": 1.8827626452250136e-06,
      "loss": 2.2662,
      "step": 61653
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0220736265182495,
      "learning_rate": 1.8825221807140437e-06,
      "loss": 2.2011,
      "step": 61654
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.210084319114685,
      "learning_rate": 1.8822817299644057e-06,
      "loss": 2.0779,
      "step": 61655
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0478590726852417,
      "learning_rate": 1.8820412929765043e-06,
      "loss": 2.3387,
      "step": 61656
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0473833084106445,
      "learning_rate": 1.881800869750746e-06,
      "loss": 2.2739,
      "step": 61657
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.2017416954040527,
      "learning_rate": 1.8815604602875425e-06,
      "loss": 2.3663,
      "step": 61658
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1436398029327393,
      "learning_rate": 1.8813200645872965e-06,
      "loss": 2.3638,
      "step": 61659
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0476770401000977,
      "learning_rate": 1.8810796826504197e-06,
      "loss": 2.3599,
      "step": 61660
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1481103897094727,
      "learning_rate": 1.8808393144773164e-06,
      "loss": 2.4786,
      "step": 61661
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1461026668548584,
      "learning_rate": 1.8805989600683971e-06,
      "loss": 2.2518,
      "step": 61662
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.124976396560669,
      "learning_rate": 1.8803586194240664e-06,
      "loss": 2.171,
      "step": 61663
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0491156578063965,
      "learning_rate": 1.880118292544737e-06,
      "loss": 2.2033,
      "step": 61664
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1396799087524414,
      "learning_rate": 1.8798779794308086e-06,
      "loss": 2.6457,
      "step": 61665
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1054675579071045,
      "learning_rate": 1.8796376800826944e-06,
      "loss": 2.348,
      "step": 61666
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0116230249404907,
      "learning_rate": 1.879397394500797e-06,
      "loss": 2.2589,
      "step": 61667
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1035118103027344,
      "learning_rate": 1.8791571226855288e-06,
      "loss": 2.5132,
      "step": 61668
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0733200311660767,
      "learning_rate": 1.8789168646372935e-06,
      "loss": 2.1969,
      "step": 61669
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0117250680923462,
      "learning_rate": 1.878676620356501e-06,
      "loss": 2.3605,
      "step": 61670
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.101088047027588,
      "learning_rate": 1.8784363898435553e-06,
      "loss": 2.2992,
      "step": 61671
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.9764238595962524,
      "learning_rate": 1.8781961730988685e-06,
      "loss": 2.4548,
      "step": 61672
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0991082191467285,
      "learning_rate": 1.8779559701228422e-06,
      "loss": 2.3456,
      "step": 61673
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.2474499940872192,
      "learning_rate": 1.8777157809158896e-06,
      "loss": 2.5415,
      "step": 61674
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1086056232452393,
      "learning_rate": 1.8774756054784116e-06,
      "loss": 2.1827,
      "step": 61675
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0444663763046265,
      "learning_rate": 1.877235443810821e-06,
      "loss": 2.2856,
      "step": 61676
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.012693166732788,
      "learning_rate": 1.8769952959135229e-06,
      "loss": 2.5272,
      "step": 61677
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.308643102645874,
      "learning_rate": 1.8767551617869229e-06,
      "loss": 2.4927,
      "step": 61678
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0418347120285034,
      "learning_rate": 1.8765150414314282e-06,
      "loss": 2.4133,
      "step": 61679
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.4441224336624146,
      "learning_rate": 1.8762749348474485e-06,
      "loss": 2.096,
      "step": 61680
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1131408214569092,
      "learning_rate": 1.8760348420353857e-06,
      "loss": 2.1991,
      "step": 61681
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0541962385177612,
      "learning_rate": 1.8757947629956541e-06,
      "loss": 2.1362,
      "step": 61682
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.393747329711914,
      "learning_rate": 1.8755546977286565e-06,
      "loss": 2.4934,
      "step": 61683
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0776506662368774,
      "learning_rate": 1.8753146462347982e-06,
      "loss": 2.3772,
      "step": 61684
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0909758806228638,
      "learning_rate": 1.87507460851449e-06,
      "loss": 2.1973,
      "step": 61685
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0811562538146973,
      "learning_rate": 1.8748345845681348e-06,
      "loss": 2.3536,
      "step": 61686
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0624743700027466,
      "learning_rate": 1.8745945743961446e-06,
      "loss": 2.2194,
      "step": 61687
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1235812902450562,
      "learning_rate": 1.8743545779989202e-06,
      "loss": 2.2636,
      "step": 61688
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1587754487991333,
      "learning_rate": 1.8741145953768747e-06,
      "loss": 2.3483,
      "step": 61689
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0863620042800903,
      "learning_rate": 1.873874626530412e-06,
      "loss": 2.2431,
      "step": 61690
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1592286825180054,
      "learning_rate": 1.8736346714599385e-06,
      "loss": 2.175,
      "step": 61691
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0325391292572021,
      "learning_rate": 1.8733947301658584e-06,
      "loss": 2.2039,
      "step": 61692
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.122754454612732,
      "learning_rate": 1.8731548026485846e-06,
      "loss": 2.3281,
      "step": 61693
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0062756538391113,
      "learning_rate": 1.8729148889085181e-06,
      "loss": 2.4325,
      "step": 61694
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1792378425598145,
      "learning_rate": 1.8726749889460706e-06,
      "loss": 2.3135,
      "step": 61695
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.134137511253357,
      "learning_rate": 1.872435102761644e-06,
      "loss": 2.2396,
      "step": 61696
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0593209266662598,
      "learning_rate": 1.872195230355649e-06,
      "loss": 2.2934,
      "step": 61697
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.097338318824768,
      "learning_rate": 1.8719553717284888e-06,
      "loss": 2.149,
      "step": 61698
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.9879105687141418,
      "learning_rate": 1.8717155268805742e-06,
      "loss": 2.2082,
      "step": 61699
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0638670921325684,
      "learning_rate": 1.871475695812307e-06,
      "loss": 2.282,
      "step": 61700
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0873165130615234,
      "learning_rate": 1.8712358785240981e-06,
      "loss": 2.4656,
      "step": 61701
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.037776231765747,
      "learning_rate": 1.8709960750163503e-06,
      "loss": 2.3404,
      "step": 61702
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1180089712142944,
      "learning_rate": 1.8707562852894768e-06,
      "loss": 2.5111,
      "step": 61703
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0667898654937744,
      "learning_rate": 1.8705165093438738e-06,
      "loss": 2.2205,
      "step": 61704
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.107704758644104,
      "learning_rate": 1.8702767471799565e-06,
      "loss": 2.1299,
      "step": 61705
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1897425651550293,
      "learning_rate": 1.8700369987981249e-06,
      "loss": 2.33,
      "step": 61706
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1242262125015259,
      "learning_rate": 1.8697972641987915e-06,
      "loss": 2.2704,
      "step": 61707
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0918397903442383,
      "learning_rate": 1.8695575433823576e-06,
      "loss": 2.331,
      "step": 61708
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0727112293243408,
      "learning_rate": 1.8693178363492337e-06,
      "loss": 2.2839,
      "step": 61709
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.048281192779541,
      "learning_rate": 1.8690781430998229e-06,
      "loss": 2.3684,
      "step": 61710
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1811658143997192,
      "learning_rate": 1.8688384636345347e-06,
      "loss": 2.1745,
      "step": 61711
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.121374487876892,
      "learning_rate": 1.8685987979537735e-06,
      "loss": 2.2841,
      "step": 61712
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.2541303634643555,
      "learning_rate": 1.8683591460579443e-06,
      "loss": 2.4151,
      "step": 61713
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.9395462870597839,
      "learning_rate": 1.8681195079474556e-06,
      "loss": 2.4547,
      "step": 61714
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0448927879333496,
      "learning_rate": 1.8678798836227119e-06,
      "loss": 2.41,
      "step": 61715
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1109601259231567,
      "learning_rate": 1.8676402730841227e-06,
      "loss": 2.1243,
      "step": 61716
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0925270318984985,
      "learning_rate": 1.8674006763320918e-06,
      "loss": 2.1577,
      "step": 61717
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1228197813034058,
      "learning_rate": 1.8671610933670258e-06,
      "loss": 2.2539,
      "step": 61718
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.9664022922515869,
      "learning_rate": 1.866921524189328e-06,
      "loss": 2.2055,
      "step": 61719
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.119247317314148,
      "learning_rate": 1.8666819687994086e-06,
      "loss": 2.3555,
      "step": 61720
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.191990852355957,
      "learning_rate": 1.8664424271976711e-06,
      "loss": 2.2546,
      "step": 61721
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.9914718866348267,
      "learning_rate": 1.8662028993845248e-06,
      "loss": 2.4844,
      "step": 61722
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1019442081451416,
      "learning_rate": 1.8659633853603709e-06,
      "loss": 2.0702,
      "step": 61723
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1622220277786255,
      "learning_rate": 1.865723885125621e-06,
      "loss": 2.3307,
      "step": 61724
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1412880420684814,
      "learning_rate": 1.8654843986806758e-06,
      "loss": 2.2831,
      "step": 61725
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.2996622323989868,
      "learning_rate": 1.8652449260259464e-06,
      "loss": 2.2886,
      "step": 61726
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.051279902458191,
      "learning_rate": 1.8650054671618334e-06,
      "loss": 2.2894,
      "step": 61727
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.049173355102539,
      "learning_rate": 1.8647660220887486e-06,
      "loss": 2.4285,
      "step": 61728
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.3776805400848389,
      "learning_rate": 1.864526590807092e-06,
      "loss": 2.2045,
      "step": 61729
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0899009704589844,
      "learning_rate": 1.8642871733172785e-06,
      "loss": 2.3446,
      "step": 61730
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0706700086593628,
      "learning_rate": 1.8640477696197012e-06,
      "loss": 2.2538,
      "step": 61731
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1644424200057983,
      "learning_rate": 1.8638083797147766e-06,
      "loss": 2.2319,
      "step": 61732
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0278958082199097,
      "learning_rate": 1.863569003602903e-06,
      "loss": 2.1357,
      "step": 61733
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0114045143127441,
      "learning_rate": 1.8633296412844926e-06,
      "loss": 2.0987,
      "step": 61734
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0455925464630127,
      "learning_rate": 1.8630902927599458e-06,
      "loss": 2.4292,
      "step": 61735
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.104905366897583,
      "learning_rate": 1.8628509580296728e-06,
      "loss": 2.5558,
      "step": 61736
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1270161867141724,
      "learning_rate": 1.862611637094076e-06,
      "loss": 2.2246,
      "step": 61737
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0163898468017578,
      "learning_rate": 1.8623723299535645e-06,
      "loss": 2.4182,
      "step": 61738
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.2327214479446411,
      "learning_rate": 1.8621330366085423e-06,
      "loss": 2.264,
      "step": 61739
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.170191764831543,
      "learning_rate": 1.861893757059412e-06,
      "loss": 2.1469,
      "step": 61740
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.216119408607483,
      "learning_rate": 1.8616544913065847e-06,
      "loss": 2.4625,
      "step": 61741
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.237481713294983,
      "learning_rate": 1.8614152393504614e-06,
      "loss": 2.5023,
      "step": 61742
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0409380197525024,
      "learning_rate": 1.8611760011914514e-06,
      "loss": 2.5338,
      "step": 61743
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0989060401916504,
      "learning_rate": 1.8609367768299591e-06,
      "loss": 2.4618,
      "step": 61744
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1104226112365723,
      "learning_rate": 1.8606975662663896e-06,
      "loss": 2.2423,
      "step": 61745
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1385376453399658,
      "learning_rate": 1.8604583695011446e-06,
      "loss": 2.2525,
      "step": 61746
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0774699449539185,
      "learning_rate": 1.8602191865346376e-06,
      "loss": 2.3183,
      "step": 61747
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.012579083442688,
      "learning_rate": 1.8599800173672667e-06,
      "loss": 2.0528,
      "step": 61748
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0722041130065918,
      "learning_rate": 1.8597408619994418e-06,
      "loss": 2.4336,
      "step": 61749
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0484049320220947,
      "learning_rate": 1.8595017204315657e-06,
      "loss": 2.4118,
      "step": 61750
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1224145889282227,
      "learning_rate": 1.8592625926640473e-06,
      "loss": 2.2423,
      "step": 61751
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.3800288438796997,
      "learning_rate": 1.8590234786972872e-06,
      "loss": 2.307,
      "step": 61752
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.128346562385559,
      "learning_rate": 1.858784378531696e-06,
      "loss": 2.5881,
      "step": 61753
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1494951248168945,
      "learning_rate": 1.8585452921676738e-06,
      "loss": 2.3592,
      "step": 61754
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1274241209030151,
      "learning_rate": 1.8583062196056312e-06,
      "loss": 2.3596,
      "step": 61755
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0864734649658203,
      "learning_rate": 1.8580671608459711e-06,
      "loss": 2.2301,
      "step": 61756
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.9815567135810852,
      "learning_rate": 1.8578281158890988e-06,
      "loss": 2.3243,
      "step": 61757
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0733282566070557,
      "learning_rate": 1.857589084735415e-06,
      "loss": 2.4126,
      "step": 61758
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1500080823898315,
      "learning_rate": 1.8573500673853329e-06,
      "loss": 2.3946,
      "step": 61759
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0124003887176514,
      "learning_rate": 1.8571110638392508e-06,
      "loss": 2.3649,
      "step": 61760
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.2015670537948608,
      "learning_rate": 1.856872074097581e-06,
      "loss": 2.3722,
      "step": 61761
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.2055532932281494,
      "learning_rate": 1.8566330981607206e-06,
      "loss": 2.1456,
      "step": 61762
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1186243295669556,
      "learning_rate": 1.8563941360290816e-06,
      "loss": 2.4195,
      "step": 61763
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0048737525939941,
      "learning_rate": 1.8561551877030647e-06,
      "loss": 2.2046,
      "step": 61764
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.2048485279083252,
      "learning_rate": 1.8559162531830788e-06,
      "loss": 2.3268,
      "step": 61765
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.174783706665039,
      "learning_rate": 1.8556773324695233e-06,
      "loss": 2.3015,
      "step": 61766
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.9799668788909912,
      "learning_rate": 1.85543842556281e-06,
      "loss": 2.4363,
      "step": 61767
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1020891666412354,
      "learning_rate": 1.8551995324633398e-06,
      "loss": 2.2316,
      "step": 61768
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1837902069091797,
      "learning_rate": 1.8549606531715192e-06,
      "loss": 2.271,
      "step": 61769
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.2032889127731323,
      "learning_rate": 1.8547217876877498e-06,
      "loss": 2.2312,
      "step": 61770
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.2040691375732422,
      "learning_rate": 1.8544829360124428e-06,
      "loss": 2.0394,
      "step": 61771
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.107566237449646,
      "learning_rate": 1.8542440981459953e-06,
      "loss": 2.3258,
      "step": 61772
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0850719213485718,
      "learning_rate": 1.8540052740888193e-06,
      "loss": 2.1756,
      "step": 61773
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0302627086639404,
      "learning_rate": 1.8537664638413177e-06,
      "loss": 2.3339,
      "step": 61774
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0412390232086182,
      "learning_rate": 1.8535276674038916e-06,
      "loss": 2.2164,
      "step": 61775
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0292894840240479,
      "learning_rate": 1.8532888847769503e-06,
      "loss": 2.3575,
      "step": 61776
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1840715408325195,
      "learning_rate": 1.8530501159608948e-06,
      "loss": 2.2512,
      "step": 61777
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.980582058429718,
      "learning_rate": 1.8528113609561348e-06,
      "loss": 2.4876,
      "step": 61778
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0894235372543335,
      "learning_rate": 1.8525726197630701e-06,
      "loss": 2.3496,
      "step": 61779
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1581529378890991,
      "learning_rate": 1.8523338923821099e-06,
      "loss": 2.2082,
      "step": 61780
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.151208758354187,
      "learning_rate": 1.8520951788136532e-06,
      "loss": 2.1981,
      "step": 61781
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.114616870880127,
      "learning_rate": 1.8518564790581118e-06,
      "loss": 2.4542,
      "step": 61782
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0583552122116089,
      "learning_rate": 1.8516177931158862e-06,
      "loss": 2.1292,
      "step": 61783
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0992374420166016,
      "learning_rate": 1.8513791209873821e-06,
      "loss": 2.4875,
      "step": 61784
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1050028800964355,
      "learning_rate": 1.851140462673e-06,
      "loss": 2.4852,
      "step": 61785
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.106399655342102,
      "learning_rate": 1.8509018181731509e-06,
      "loss": 2.3597,
      "step": 61786
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.6027741432189941,
      "learning_rate": 1.8506631874882341e-06,
      "loss": 2.4425,
      "step": 61787
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0332181453704834,
      "learning_rate": 1.8504245706186596e-06,
      "loss": 2.1886,
      "step": 61788
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0665793418884277,
      "learning_rate": 1.850185967564826e-06,
      "loss": 2.3679,
      "step": 61789
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1370502710342407,
      "learning_rate": 1.8499473783271427e-06,
      "loss": 2.4364,
      "step": 61790
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.12782883644104,
      "learning_rate": 1.8497088029060095e-06,
      "loss": 2.3806,
      "step": 61791
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0778930187225342,
      "learning_rate": 1.8494702413018361e-06,
      "loss": 2.329,
      "step": 61792
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0559636354446411,
      "learning_rate": 1.8492316935150224e-06,
      "loss": 2.3075,
      "step": 61793
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0965906381607056,
      "learning_rate": 1.8489931595459765e-06,
      "loss": 2.4486,
      "step": 61794
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.090404987335205,
      "learning_rate": 1.8487546393951018e-06,
      "loss": 2.1938,
      "step": 61795
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1649175882339478,
      "learning_rate": 1.848516133062801e-06,
      "loss": 2.3222,
      "step": 61796
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.08685302734375,
      "learning_rate": 1.8482776405494774e-06,
      "loss": 2.1962,
      "step": 61797
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1440999507904053,
      "learning_rate": 1.8480391618555393e-06,
      "loss": 2.4037,
      "step": 61798
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1269928216934204,
      "learning_rate": 1.8478006969813856e-06,
      "loss": 2.4337,
      "step": 61799
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.055500864982605,
      "learning_rate": 1.8475622459274279e-06,
      "loss": 2.0981,
      "step": 61800
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0697569847106934,
      "learning_rate": 1.8473238086940658e-06,
      "loss": 2.3557,
      "step": 61801
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.2474446296691895,
      "learning_rate": 1.8470853852817006e-06,
      "loss": 2.2639,
      "step": 61802
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0249212980270386,
      "learning_rate": 1.8468469756907436e-06,
      "loss": 2.103,
      "step": 61803
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.226164698600769,
      "learning_rate": 1.8466085799215927e-06,
      "loss": 2.3013,
      "step": 61804
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0192794799804688,
      "learning_rate": 1.8463701979746573e-06,
      "loss": 2.2172,
      "step": 61805
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.041006326675415,
      "learning_rate": 1.846131829850336e-06,
      "loss": 2.3415,
      "step": 61806
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0508456230163574,
      "learning_rate": 1.8458934755490376e-06,
      "loss": 2.2481,
      "step": 61807
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0119953155517578,
      "learning_rate": 1.8456551350711626e-06,
      "loss": 2.3366,
      "step": 61808
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.182623028755188,
      "learning_rate": 1.845416808417122e-06,
      "loss": 2.4207,
      "step": 61809
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0835809707641602,
      "learning_rate": 1.8451784955873097e-06,
      "loss": 2.0363,
      "step": 61810
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1335294246673584,
      "learning_rate": 1.8449401965821378e-06,
      "loss": 2.3612,
      "step": 61811
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0223459005355835,
      "learning_rate": 1.8447019114020037e-06,
      "loss": 2.3263,
      "step": 61812
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1312906742095947,
      "learning_rate": 1.844463640047317e-06,
      "loss": 2.2616,
      "step": 61813
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0150264501571655,
      "learning_rate": 1.8442253825184776e-06,
      "loss": 2.2871,
      "step": 61814
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1933774948120117,
      "learning_rate": 1.8439871388158937e-06,
      "loss": 2.3295,
      "step": 61815
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0798938274383545,
      "learning_rate": 1.843748908939964e-06,
      "loss": 2.3908,
      "step": 61816
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1099109649658203,
      "learning_rate": 1.8435106928910984e-06,
      "loss": 2.3089,
      "step": 61817
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1502783298492432,
      "learning_rate": 1.8432724906696943e-06,
      "loss": 2.3086,
      "step": 61818
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1330909729003906,
      "learning_rate": 1.843034302276161e-06,
      "loss": 2.1919,
      "step": 61819
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1327115297317505,
      "learning_rate": 1.8427961277108974e-06,
      "loss": 2.0253,
      "step": 61820
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.2270921468734741,
      "learning_rate": 1.8425579669743131e-06,
      "loss": 2.5012,
      "step": 61821
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0946917533874512,
      "learning_rate": 1.842319820066808e-06,
      "loss": 2.3046,
      "step": 61822
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0674645900726318,
      "learning_rate": 1.8420816869887858e-06,
      "loss": 2.3821,
      "step": 61823
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.68220853805542,
      "learning_rate": 1.84184356774065e-06,
      "loss": 2.1731,
      "step": 61824
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.017919898033142,
      "learning_rate": 1.8416054623228064e-06,
      "loss": 2.3332,
      "step": 61825
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0810837745666504,
      "learning_rate": 1.841367370735655e-06,
      "loss": 2.4974,
      "step": 61826
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0343806743621826,
      "learning_rate": 1.8411292929796044e-06,
      "loss": 2.2579,
      "step": 61827
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1491466760635376,
      "learning_rate": 1.8408912290550517e-06,
      "loss": 2.2297,
      "step": 61828
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1328279972076416,
      "learning_rate": 1.8406531789624081e-06,
      "loss": 2.3234,
      "step": 61829
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0414519309997559,
      "learning_rate": 1.8404151427020743e-06,
      "loss": 2.2958,
      "step": 61830
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1046584844589233,
      "learning_rate": 1.840177120274449e-06,
      "loss": 2.2894,
      "step": 61831
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0721780061721802,
      "learning_rate": 1.8399391116799426e-06,
      "loss": 2.2288,
      "step": 61832
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0360804796218872,
      "learning_rate": 1.8397011169189538e-06,
      "loss": 2.5353,
      "step": 61833
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1019772291183472,
      "learning_rate": 1.839463135991889e-06,
      "loss": 2.4509,
      "step": 61834
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0258526802062988,
      "learning_rate": 1.8392251688991514e-06,
      "loss": 2.2468,
      "step": 61835
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0798771381378174,
      "learning_rate": 1.8389872156411438e-06,
      "loss": 2.2616,
      "step": 61836
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.098771095275879,
      "learning_rate": 1.8387492762182667e-06,
      "loss": 2.4827,
      "step": 61837
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1067214012145996,
      "learning_rate": 1.8385113506309283e-06,
      "loss": 2.2521,
      "step": 61838
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0469919443130493,
      "learning_rate": 1.8382734388795275e-06,
      "loss": 1.9795,
      "step": 61839
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.093187689781189,
      "learning_rate": 1.8380355409644734e-06,
      "loss": 2.5099,
      "step": 61840
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.013921856880188,
      "learning_rate": 1.8377976568861622e-06,
      "loss": 2.4534,
      "step": 61841
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0289623737335205,
      "learning_rate": 1.8375597866450035e-06,
      "loss": 2.616,
      "step": 61842
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0132118463516235,
      "learning_rate": 1.8373219302413948e-06,
      "loss": 2.184,
      "step": 61843
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.9970088601112366,
      "learning_rate": 1.8370840876757457e-06,
      "loss": 2.3165,
      "step": 61844
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0766741037368774,
      "learning_rate": 1.8368462589484537e-06,
      "loss": 2.2483,
      "step": 61845
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.152600884437561,
      "learning_rate": 1.8366084440599264e-06,
      "loss": 2.2571,
      "step": 61846
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0567286014556885,
      "learning_rate": 1.836370643010562e-06,
      "loss": 2.4907,
      "step": 61847
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1128160953521729,
      "learning_rate": 1.836132855800773e-06,
      "loss": 2.232,
      "step": 61848
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0683343410491943,
      "learning_rate": 1.8358950824309496e-06,
      "loss": 2.1801,
      "step": 61849
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1134611368179321,
      "learning_rate": 1.835657322901504e-06,
      "loss": 2.4103,
      "step": 61850
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0009242296218872,
      "learning_rate": 1.8354195772128346e-06,
      "loss": 2.2868,
      "step": 61851
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.010125994682312,
      "learning_rate": 1.8351818453653492e-06,
      "loss": 2.2133,
      "step": 61852
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0650744438171387,
      "learning_rate": 1.834944127359446e-06,
      "loss": 2.3269,
      "step": 61853
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.3320446014404297,
      "learning_rate": 1.8347064231955313e-06,
      "loss": 2.2634,
      "step": 61854
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0039024353027344,
      "learning_rate": 1.8344687328740053e-06,
      "loss": 2.2566,
      "step": 61855
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1316349506378174,
      "learning_rate": 1.8342310563952749e-06,
      "loss": 2.2634,
      "step": 61856
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.2074475288391113,
      "learning_rate": 1.833993393759741e-06,
      "loss": 2.4655,
      "step": 61857
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1687872409820557,
      "learning_rate": 1.8337557449678024e-06,
      "loss": 2.2979,
      "step": 61858
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0756527185440063,
      "learning_rate": 1.8335181100198696e-06,
      "loss": 2.348,
      "step": 61859
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1071476936340332,
      "learning_rate": 1.833280488916338e-06,
      "loss": 2.4576,
      "step": 61860
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0610231161117554,
      "learning_rate": 1.8330428816576174e-06,
      "loss": 2.4753,
      "step": 61861
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0708208084106445,
      "learning_rate": 1.8328052882441072e-06,
      "loss": 2.5266,
      "step": 61862
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1160637140274048,
      "learning_rate": 1.8325677086762095e-06,
      "loss": 2.3714,
      "step": 61863
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0966461896896362,
      "learning_rate": 1.832330142954326e-06,
      "loss": 2.3578,
      "step": 61864
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0870176553726196,
      "learning_rate": 1.8320925910788633e-06,
      "loss": 2.3689,
      "step": 61865
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0977418422698975,
      "learning_rate": 1.8318550530502198e-06,
      "loss": 2.3272,
      "step": 61866
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0866575241088867,
      "learning_rate": 1.8316175288688021e-06,
      "loss": 2.3047,
      "step": 61867
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1920666694641113,
      "learning_rate": 1.8313800185350094e-06,
      "loss": 2.155,
      "step": 61868
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1040143966674805,
      "learning_rate": 1.8311425220492484e-06,
      "loss": 2.2309,
      "step": 61869
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.058491826057434,
      "learning_rate": 1.8309050394119177e-06,
      "loss": 2.4343,
      "step": 61870
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.148730993270874,
      "learning_rate": 1.8306675706234234e-06,
      "loss": 2.3912,
      "step": 61871
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0073304176330566,
      "learning_rate": 1.8304301156841641e-06,
      "loss": 2.3732,
      "step": 61872
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0467079877853394,
      "learning_rate": 1.8301926745945464e-06,
      "loss": 2.3259,
      "step": 61873
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0304054021835327,
      "learning_rate": 1.8299552473549697e-06,
      "loss": 2.1523,
      "step": 61874
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0638892650604248,
      "learning_rate": 1.8297178339658416e-06,
      "loss": 2.2832,
      "step": 61875
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0395010709762573,
      "learning_rate": 1.8294804344275574e-06,
      "loss": 2.424,
      "step": 61876
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.609623670578003,
      "learning_rate": 1.8292430487405244e-06,
      "loss": 2.5455,
      "step": 61877
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.9603723287582397,
      "learning_rate": 1.8290056769051411e-06,
      "loss": 2.3751,
      "step": 61878
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.2074538469314575,
      "learning_rate": 1.8287683189218141e-06,
      "loss": 2.2555,
      "step": 61879
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0289298295974731,
      "learning_rate": 1.828530974790943e-06,
      "loss": 2.5656,
      "step": 61880
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.2931467294692993,
      "learning_rate": 1.8282936445129328e-06,
      "loss": 2.1158,
      "step": 61881
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.2651617527008057,
      "learning_rate": 1.8280563280881824e-06,
      "loss": 2.2088,
      "step": 61882
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1816232204437256,
      "learning_rate": 1.827819025517098e-06,
      "loss": 2.2748,
      "step": 61883
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.9988532662391663,
      "learning_rate": 1.8275817368000782e-06,
      "loss": 2.385,
      "step": 61884
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.072780966758728,
      "learning_rate": 1.827344461937528e-06,
      "loss": 2.3425,
      "step": 61885
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.095669150352478,
      "learning_rate": 1.8271072009298496e-06,
      "loss": 2.4031,
      "step": 61886
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0535022020339966,
      "learning_rate": 1.8268699537774416e-06,
      "loss": 2.3416,
      "step": 61887
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1114224195480347,
      "learning_rate": 1.826632720480711e-06,
      "loss": 2.406,
      "step": 61888
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1499894857406616,
      "learning_rate": 1.826395501040058e-06,
      "loss": 2.2484,
      "step": 61889
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0993809700012207,
      "learning_rate": 1.826158295455882e-06,
      "loss": 2.5231,
      "step": 61890
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0595898628234863,
      "learning_rate": 1.8259211037285906e-06,
      "loss": 2.2672,
      "step": 61891
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.182221531867981,
      "learning_rate": 1.8256839258585835e-06,
      "loss": 2.3545,
      "step": 61892
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0853205919265747,
      "learning_rate": 1.825446761846258e-06,
      "loss": 2.3251,
      "step": 61893
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0259143114089966,
      "learning_rate": 1.8252096116920248e-06,
      "loss": 2.4089,
      "step": 61894
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0483262538909912,
      "learning_rate": 1.8249724753962773e-06,
      "loss": 2.2848,
      "step": 61895
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.21671462059021,
      "learning_rate": 1.8247353529594257e-06,
      "loss": 2.247,
      "step": 61896
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.066436767578125,
      "learning_rate": 1.824498244381865e-06,
      "loss": 2.3428,
      "step": 61897
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0144785642623901,
      "learning_rate": 1.8242611496640017e-06,
      "loss": 2.1829,
      "step": 61898
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0751368999481201,
      "learning_rate": 1.8240240688062348e-06,
      "loss": 2.3794,
      "step": 61899
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0488245487213135,
      "learning_rate": 1.8237870018089709e-06,
      "loss": 2.2392,
      "step": 61900
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1664429903030396,
      "learning_rate": 1.8235499486726072e-06,
      "loss": 2.4603,
      "step": 61901
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.2658308744430542,
      "learning_rate": 1.8233129093975476e-06,
      "loss": 2.1636,
      "step": 61902
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1735771894454956,
      "learning_rate": 1.8230758839841911e-06,
      "loss": 2.1349,
      "step": 61903
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0739067792892456,
      "learning_rate": 1.8228388724329439e-06,
      "loss": 2.44,
      "step": 61904
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0397626161575317,
      "learning_rate": 1.822601874744203e-06,
      "loss": 2.1262,
      "step": 61905
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0465608835220337,
      "learning_rate": 1.8223648909183767e-06,
      "loss": 2.3274,
      "step": 61906
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.9828604459762573,
      "learning_rate": 1.8221279209558595e-06,
      "loss": 2.276,
      "step": 61907
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.176585078239441,
      "learning_rate": 1.8218909648570594e-06,
      "loss": 2.0496,
      "step": 61908
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.4650142192840576,
      "learning_rate": 1.8216540226223722e-06,
      "loss": 2.3184,
      "step": 61909
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0831559896469116,
      "learning_rate": 1.8214170942522047e-06,
      "loss": 2.3705,
      "step": 61910
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.9501838684082031,
      "learning_rate": 1.8211801797469552e-06,
      "loss": 2.2309,
      "step": 61911
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.2340975999832153,
      "learning_rate": 1.820943279107028e-06,
      "loss": 2.2274,
      "step": 61912
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0657365322113037,
      "learning_rate": 1.820706392332824e-06,
      "loss": 2.4224,
      "step": 61913
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1063766479492188,
      "learning_rate": 1.8204695194247445e-06,
      "loss": 2.2611,
      "step": 61914
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.2491333484649658,
      "learning_rate": 1.8202326603831876e-06,
      "loss": 2.4479,
      "step": 61915
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.101201057434082,
      "learning_rate": 1.8199958152085606e-06,
      "loss": 2.4329,
      "step": 61916
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1098840236663818,
      "learning_rate": 1.8197589839012597e-06,
      "loss": 2.2175,
      "step": 61917
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.022936224937439,
      "learning_rate": 1.8195221664616925e-06,
      "loss": 2.1632,
      "step": 61918
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0627522468566895,
      "learning_rate": 1.8192853628902563e-06,
      "loss": 2.3642,
      "step": 61919
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1733797788619995,
      "learning_rate": 1.8190485731873497e-06,
      "loss": 2.4414,
      "step": 61920
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0465021133422852,
      "learning_rate": 1.8188117973533815e-06,
      "loss": 2.1873,
      "step": 61921
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1939398050308228,
      "learning_rate": 1.8185750353887466e-06,
      "loss": 2.3999,
      "step": 61922
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0866076946258545,
      "learning_rate": 1.8183382872938515e-06,
      "loss": 2.2154,
      "step": 61923
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0001367330551147,
      "learning_rate": 1.8181015530690927e-06,
      "loss": 2.371,
      "step": 61924
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1349560022354126,
      "learning_rate": 1.8178648327148762e-06,
      "loss": 2.0968,
      "step": 61925
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1365431547164917,
      "learning_rate": 1.8176281262315986e-06,
      "loss": 2.3137,
      "step": 61926
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1367688179016113,
      "learning_rate": 1.8173914336196685e-06,
      "loss": 2.2471,
      "step": 61927
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.9863752722740173,
      "learning_rate": 1.8171547548794776e-06,
      "loss": 2.1944,
      "step": 61928
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0668919086456299,
      "learning_rate": 1.8169180900114336e-06,
      "loss": 2.3257,
      "step": 61929
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0760008096694946,
      "learning_rate": 1.8166814390159338e-06,
      "loss": 2.2188,
      "step": 61930
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.3401528596878052,
      "learning_rate": 1.8164448018933844e-06,
      "loss": 2.3187,
      "step": 61931
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.042352557182312,
      "learning_rate": 1.816208178644181e-06,
      "loss": 2.2282,
      "step": 61932
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1585896015167236,
      "learning_rate": 1.8159715692687296e-06,
      "loss": 2.3647,
      "step": 61933
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0711685419082642,
      "learning_rate": 1.815734973767428e-06,
      "loss": 2.2543,
      "step": 61934
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0837833881378174,
      "learning_rate": 1.8154983921406787e-06,
      "loss": 2.2712,
      "step": 61935
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1080381870269775,
      "learning_rate": 1.815261824388882e-06,
      "loss": 2.3151,
      "step": 61936
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.211209774017334,
      "learning_rate": 1.8150252705124405e-06,
      "loss": 2.2644,
      "step": 61937
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0977580547332764,
      "learning_rate": 1.814788730511753e-06,
      "loss": 2.0553,
      "step": 61938
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0860872268676758,
      "learning_rate": 1.8145522043872232e-06,
      "loss": 2.3225,
      "step": 61939
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1737644672393799,
      "learning_rate": 1.8143156921392513e-06,
      "loss": 2.024,
      "step": 61940
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.9217882752418518,
      "learning_rate": 1.8140791937682368e-06,
      "loss": 2.1739,
      "step": 61941
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.103572130203247,
      "learning_rate": 1.8138427092745792e-06,
      "loss": 2.0802,
      "step": 61942
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.3118946552276611,
      "learning_rate": 1.8136062386586838e-06,
      "loss": 2.4113,
      "step": 61943
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1757856607437134,
      "learning_rate": 1.8133697819209461e-06,
      "loss": 2.4783,
      "step": 61944
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1249502897262573,
      "learning_rate": 1.8131333390617735e-06,
      "loss": 2.0445,
      "step": 61945
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.098201036453247,
      "learning_rate": 1.8128969100815609e-06,
      "loss": 2.3256,
      "step": 61946
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0570603609085083,
      "learning_rate": 1.8126604949807137e-06,
      "loss": 2.5012,
      "step": 61947
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1684976816177368,
      "learning_rate": 1.8124240937596315e-06,
      "loss": 2.3213,
      "step": 61948
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1101531982421875,
      "learning_rate": 1.812187706418711e-06,
      "loss": 2.3052,
      "step": 61949
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.123560905456543,
      "learning_rate": 1.8119513329583583e-06,
      "loss": 2.2883,
      "step": 61950
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1749662160873413,
      "learning_rate": 1.8117149733789697e-06,
      "loss": 2.5141,
      "step": 61951
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.3056854009628296,
      "learning_rate": 1.8114786276809516e-06,
      "loss": 2.3928,
      "step": 61952
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0548864603042603,
      "learning_rate": 1.8112422958646969e-06,
      "loss": 2.5411,
      "step": 61953
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.2385274171829224,
      "learning_rate": 1.8110059779306166e-06,
      "loss": 2.2712,
      "step": 61954
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.060723066329956,
      "learning_rate": 1.8107696738791002e-06,
      "loss": 2.3397,
      "step": 61955
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1061521768569946,
      "learning_rate": 1.8105333837105566e-06,
      "loss": 2.2374,
      "step": 61956
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1930172443389893,
      "learning_rate": 1.8102971074253794e-06,
      "loss": 2.6695,
      "step": 61957
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.161154866218567,
      "learning_rate": 1.8100608450239755e-06,
      "loss": 2.5364,
      "step": 61958
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0771929025650024,
      "learning_rate": 1.8098245965067407e-06,
      "loss": 2.3959,
      "step": 61959
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1245524883270264,
      "learning_rate": 1.8095883618740806e-06,
      "loss": 2.4174,
      "step": 61960
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0314775705337524,
      "learning_rate": 1.8093521411263893e-06,
      "loss": 2.3072,
      "step": 61961
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.9671192765235901,
      "learning_rate": 1.8091159342640741e-06,
      "loss": 2.4038,
      "step": 61962
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1639777421951294,
      "learning_rate": 1.8088797412875281e-06,
      "loss": 2.2832,
      "step": 61963
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0499299764633179,
      "learning_rate": 1.8086435621971598e-06,
      "loss": 2.5723,
      "step": 61964
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.2014070749282837,
      "learning_rate": 1.8084073969933613e-06,
      "loss": 2.1368,
      "step": 61965
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.980126678943634,
      "learning_rate": 1.808171245676541e-06,
      "loss": 2.1541,
      "step": 61966
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0449299812316895,
      "learning_rate": 1.807935108247094e-06,
      "loss": 2.4157,
      "step": 61967
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1011606454849243,
      "learning_rate": 1.8076989847054228e-06,
      "loss": 2.1142,
      "step": 61968
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1312675476074219,
      "learning_rate": 1.8074628750519241e-06,
      "loss": 2.2128,
      "step": 61969
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1739808320999146,
      "learning_rate": 1.8072267792870034e-06,
      "loss": 2.3254,
      "step": 61970
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0816445350646973,
      "learning_rate": 1.8069906974110562e-06,
      "loss": 2.2656,
      "step": 61971
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.5748764276504517,
      "learning_rate": 1.8067546294244875e-06,
      "loss": 2.1932,
      "step": 61972
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1142306327819824,
      "learning_rate": 1.8065185753276916e-06,
      "loss": 2.3601,
      "step": 61973
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1806468963623047,
      "learning_rate": 1.8062825351210756e-06,
      "loss": 2.3221,
      "step": 61974
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0676199197769165,
      "learning_rate": 1.806046508805035e-06,
      "loss": 2.2639,
      "step": 61975
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0793564319610596,
      "learning_rate": 1.8058104963799693e-06,
      "loss": 2.2974,
      "step": 61976
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0374794006347656,
      "learning_rate": 1.805574497846283e-06,
      "loss": 2.3752,
      "step": 61977
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1953718662261963,
      "learning_rate": 1.80533851320437e-06,
      "loss": 2.2571,
      "step": 61978
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1109124422073364,
      "learning_rate": 1.8051025424546375e-06,
      "loss": 2.4599,
      "step": 61979
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0037212371826172,
      "learning_rate": 1.8048665855974813e-06,
      "loss": 2.4538,
      "step": 61980
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.104896068572998,
      "learning_rate": 1.8046306426333026e-06,
      "loss": 2.2811,
      "step": 61981
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1250782012939453,
      "learning_rate": 1.8043947135624972e-06,
      "loss": 2.5666,
      "step": 61982
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.128281831741333,
      "learning_rate": 1.8041587983854714e-06,
      "loss": 2.286,
      "step": 61983
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0855062007904053,
      "learning_rate": 1.8039228971026212e-06,
      "loss": 2.4187,
      "step": 61984
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1620230674743652,
      "learning_rate": 1.8036870097143488e-06,
      "loss": 2.3873,
      "step": 61985
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0346646308898926,
      "learning_rate": 1.8034511362210506e-06,
      "loss": 2.1936,
      "step": 61986
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0688687562942505,
      "learning_rate": 1.8032152766231326e-06,
      "loss": 2.2519,
      "step": 61987
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.9873234033584595,
      "learning_rate": 1.8029794309209868e-06,
      "loss": 2.3703,
      "step": 61988
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.005867600440979,
      "learning_rate": 1.8027435991150211e-06,
      "loss": 2.1865,
      "step": 61989
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0576852560043335,
      "learning_rate": 1.802507781205628e-06,
      "loss": 2.396,
      "step": 61990
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.163486361503601,
      "learning_rate": 1.8022719771932129e-06,
      "loss": 2.3556,
      "step": 61991
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0467585325241089,
      "learning_rate": 1.8020361870781711e-06,
      "loss": 2.3679,
      "step": 61992
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1137938499450684,
      "learning_rate": 1.80180041086091e-06,
      "loss": 2.3715,
      "step": 61993
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.067179560661316,
      "learning_rate": 1.801564648541818e-06,
      "loss": 2.2684,
      "step": 61994
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0821642875671387,
      "learning_rate": 1.801328900121303e-06,
      "loss": 2.5008,
      "step": 61995
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.2401785850524902,
      "learning_rate": 1.801093165599761e-06,
      "loss": 2.3194,
      "step": 61996
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1313976049423218,
      "learning_rate": 1.800857444977594e-06,
      "loss": 2.2426,
      "step": 61997
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1159043312072754,
      "learning_rate": 1.8006217382551982e-06,
      "loss": 2.3032,
      "step": 61998
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.041887879371643,
      "learning_rate": 1.8003860454329791e-06,
      "loss": 2.386,
      "step": 61999
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.9613127112388611,
      "learning_rate": 1.8001503665113285e-06,
      "loss": 2.4202,
      "step": 62000
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.064483404159546,
      "learning_rate": 1.7999147014906538e-06,
      "loss": 2.4282,
      "step": 62001
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.158279538154602,
      "learning_rate": 1.7996790503713478e-06,
      "loss": 2.1137,
      "step": 62002
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.9837281107902527,
      "learning_rate": 1.7994434131538162e-06,
      "loss": 2.4379,
      "step": 62003
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.3380651473999023,
      "learning_rate": 1.799207789838454e-06,
      "loss": 2.1851,
      "step": 62004
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.169689655303955,
      "learning_rate": 1.7989721804256598e-06,
      "loss": 2.3733,
      "step": 62005
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1033426523208618,
      "learning_rate": 1.7987365849158389e-06,
      "loss": 2.4453,
      "step": 62006
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.040320873260498,
      "learning_rate": 1.7985010033093853e-06,
      "loss": 2.3815,
      "step": 62007
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0885711908340454,
      "learning_rate": 1.7982654356066987e-06,
      "loss": 2.3016,
      "step": 62008
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1888612508773804,
      "learning_rate": 1.7980298818081809e-06,
      "loss": 2.3418,
      "step": 62009
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1295857429504395,
      "learning_rate": 1.7977943419142307e-06,
      "loss": 2.3773,
      "step": 62010
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1281769275665283,
      "learning_rate": 1.7975588159252443e-06,
      "loss": 2.2085,
      "step": 62011
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.17670476436615,
      "learning_rate": 1.797323303841627e-06,
      "loss": 2.2883,
      "step": 62012
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.05797278881073,
      "learning_rate": 1.7970878056637708e-06,
      "loss": 2.4695,
      "step": 62013
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0770668983459473,
      "learning_rate": 1.7968523213920808e-06,
      "loss": 2.2781,
      "step": 62014
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.2609096765518188,
      "learning_rate": 1.796616851026952e-06,
      "loss": 2.6116,
      "step": 62015
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1355271339416504,
      "learning_rate": 1.7963813945687891e-06,
      "loss": 2.4762,
      "step": 62016
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1421858072280884,
      "learning_rate": 1.7961459520179847e-06,
      "loss": 2.1506,
      "step": 62017
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0521330833435059,
      "learning_rate": 1.795910523374943e-06,
      "loss": 2.2593,
      "step": 62018
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.055602788925171,
      "learning_rate": 1.7956751086400593e-06,
      "loss": 2.5467,
      "step": 62019
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0962127447128296,
      "learning_rate": 1.7954397078137398e-06,
      "loss": 2.349,
      "step": 62020
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0219590663909912,
      "learning_rate": 1.7952043208963722e-06,
      "loss": 2.3377,
      "step": 62021
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1722098588943481,
      "learning_rate": 1.7949689478883646e-06,
      "loss": 2.2221,
      "step": 62022
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1443880796432495,
      "learning_rate": 1.7947335887901107e-06,
      "loss": 2.2866,
      "step": 62023
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0017725229263306,
      "learning_rate": 1.7944982436020153e-06,
      "loss": 2.1965,
      "step": 62024
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1048274040222168,
      "learning_rate": 1.7942629123244704e-06,
      "loss": 2.254,
      "step": 62025
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0379400253295898,
      "learning_rate": 1.7940275949578812e-06,
      "loss": 2.2337,
      "step": 62026
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1216998100280762,
      "learning_rate": 1.7937922915026418e-06,
      "loss": 2.3801,
      "step": 62027
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.186742901802063,
      "learning_rate": 1.7935570019591552e-06,
      "loss": 2.37,
      "step": 62028
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.182018756866455,
      "learning_rate": 1.7933217263278158e-06,
      "loss": 2.5492,
      "step": 62029
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0209071636199951,
      "learning_rate": 1.7930864646090273e-06,
      "loss": 2.3106,
      "step": 62030
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0639373064041138,
      "learning_rate": 1.7928512168031874e-06,
      "loss": 2.4033,
      "step": 62031
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0715758800506592,
      "learning_rate": 1.7926159829106904e-06,
      "loss": 2.4107,
      "step": 62032
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0156162977218628,
      "learning_rate": 1.792380762931941e-06,
      "loss": 2.3953,
      "step": 62033
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1043753623962402,
      "learning_rate": 1.792145556867335e-06,
      "loss": 2.2513,
      "step": 62034
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1092864274978638,
      "learning_rate": 1.7919103647172697e-06,
      "loss": 2.4898,
      "step": 62035
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0029399394989014,
      "learning_rate": 1.791675186482147e-06,
      "loss": 2.2322,
      "step": 62036
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.9876473546028137,
      "learning_rate": 1.791440022162365e-06,
      "loss": 2.448,
      "step": 62037
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1822887659072876,
      "learning_rate": 1.7912048717583197e-06,
      "loss": 2.2361,
      "step": 62038
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.015075445175171,
      "learning_rate": 1.7909697352704137e-06,
      "loss": 2.163,
      "step": 62039
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1978944540023804,
      "learning_rate": 1.7907346126990398e-06,
      "loss": 2.4319,
      "step": 62040
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0791290998458862,
      "learning_rate": 1.7904995040446039e-06,
      "loss": 2.3023,
      "step": 62041
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0000481605529785,
      "learning_rate": 1.7902644093074984e-06,
      "loss": 2.2148,
      "step": 62042
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.115344524383545,
      "learning_rate": 1.7900293284881276e-06,
      "loss": 2.4686,
      "step": 62043
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0609849691390991,
      "learning_rate": 1.7897942615868835e-06,
      "loss": 2.2395,
      "step": 62044
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0911214351654053,
      "learning_rate": 1.7895592086041713e-06,
      "loss": 2.3578,
      "step": 62045
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1687930822372437,
      "learning_rate": 1.789324169540385e-06,
      "loss": 2.4406,
      "step": 62046
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.102790355682373,
      "learning_rate": 1.7890891443959258e-06,
      "loss": 2.2679,
      "step": 62047
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1913906335830688,
      "learning_rate": 1.788854133171186e-06,
      "loss": 2.3027,
      "step": 62048
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1543608903884888,
      "learning_rate": 1.7886191358665727e-06,
      "loss": 2.4627,
      "step": 62049
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1513862609863281,
      "learning_rate": 1.7883841524824774e-06,
      "loss": 2.3474,
      "step": 62050
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0851689577102661,
      "learning_rate": 1.7881491830193031e-06,
      "loss": 2.3643,
      "step": 62051
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0208200216293335,
      "learning_rate": 1.7879142274774442e-06,
      "loss": 2.2478,
      "step": 62052
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.251948595046997,
      "learning_rate": 1.7876792858573034e-06,
      "loss": 2.3941,
      "step": 62053
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0277607440948486,
      "learning_rate": 1.7874443581592737e-06,
      "loss": 2.3834,
      "step": 62054
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.9870814681053162,
      "learning_rate": 1.7872094443837595e-06,
      "loss": 2.1248,
      "step": 62055
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0685604810714722,
      "learning_rate": 1.7869745445311538e-06,
      "loss": 2.4419,
      "step": 62056
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1229920387268066,
      "learning_rate": 1.7867396586018583e-06,
      "loss": 2.2887,
      "step": 62057
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0935717821121216,
      "learning_rate": 1.7865047865962703e-06,
      "loss": 2.3655,
      "step": 62058
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.9647282958030701,
      "learning_rate": 1.7862699285147878e-06,
      "loss": 2.241,
      "step": 62059
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1647170782089233,
      "learning_rate": 1.7860350843578055e-06,
      "loss": 2.2485,
      "step": 62060
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1279743909835815,
      "learning_rate": 1.7858002541257268e-06,
      "loss": 2.5014,
      "step": 62061
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0710569620132446,
      "learning_rate": 1.7855654378189457e-06,
      "loss": 2.3964,
      "step": 62062
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.13888680934906,
      "learning_rate": 1.785330635437864e-06,
      "loss": 2.5143,
      "step": 62063
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1000304222106934,
      "learning_rate": 1.785095846982876e-06,
      "loss": 2.201,
      "step": 62064
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1264212131500244,
      "learning_rate": 1.7848610724543848e-06,
      "loss": 2.2438,
      "step": 62065
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0774575471878052,
      "learning_rate": 1.7846263118527852e-06,
      "loss": 2.3045,
      "step": 62066
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.9969541430473328,
      "learning_rate": 1.7843915651784716e-06,
      "loss": 2.293,
      "step": 62067
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0433377027511597,
      "learning_rate": 1.7841568324318493e-06,
      "loss": 2.3177,
      "step": 62068
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1196812391281128,
      "learning_rate": 1.7839221136133089e-06,
      "loss": 2.2902,
      "step": 62069
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1819262504577637,
      "learning_rate": 1.7836874087232548e-06,
      "loss": 2.2569,
      "step": 62070
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.133968472480774,
      "learning_rate": 1.7834527177620798e-06,
      "loss": 2.3617,
      "step": 62071
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1006253957748413,
      "learning_rate": 1.7832180407301891e-06,
      "loss": 2.2794,
      "step": 62072
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.025705099105835,
      "learning_rate": 1.7829833776279704e-06,
      "loss": 2.3917,
      "step": 62073
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0249919891357422,
      "learning_rate": 1.7827487284558298e-06,
      "loss": 2.4876,
      "step": 62074
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0983432531356812,
      "learning_rate": 1.7825140932141582e-06,
      "loss": 2.2193,
      "step": 62075
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.133857011795044,
      "learning_rate": 1.7822794719033609e-06,
      "loss": 2.4303,
      "step": 62076
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.072109580039978,
      "learning_rate": 1.7820448645238276e-06,
      "loss": 2.2715,
      "step": 62077
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.121838927268982,
      "learning_rate": 1.7818102710759644e-06,
      "loss": 2.3473,
      "step": 62078
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1262004375457764,
      "learning_rate": 1.7815756915601611e-06,
      "loss": 2.2633,
      "step": 62079
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0874571800231934,
      "learning_rate": 1.781341125976822e-06,
      "loss": 2.4071,
      "step": 62080
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0577595233917236,
      "learning_rate": 1.781106574326339e-06,
      "loss": 2.3376,
      "step": 62081
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.9873735308647156,
      "learning_rate": 1.780872036609116e-06,
      "loss": 2.2377,
      "step": 62082
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1519877910614014,
      "learning_rate": 1.7806375128255438e-06,
      "loss": 2.4949,
      "step": 62083
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1358225345611572,
      "learning_rate": 1.7804030029760266e-06,
      "loss": 2.297,
      "step": 62084
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0726474523544312,
      "learning_rate": 1.7801685070609554e-06,
      "loss": 2.3172,
      "step": 62085
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.3143168687820435,
      "learning_rate": 1.7799340250807362e-06,
      "loss": 2.4755,
      "step": 62086
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.2607225179672241,
      "learning_rate": 1.7796995570357567e-06,
      "loss": 2.3764,
      "step": 62087
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.052711009979248,
      "learning_rate": 1.779465102926422e-06,
      "loss": 2.2625,
      "step": 62088
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1062124967575073,
      "learning_rate": 1.7792306627531231e-06,
      "loss": 2.3764,
      "step": 62089
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1733285188674927,
      "learning_rate": 1.7789962365162638e-06,
      "loss": 2.4071,
      "step": 62090
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0439608097076416,
      "learning_rate": 1.7787618242162363e-06,
      "loss": 2.2786,
      "step": 62091
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1396172046661377,
      "learning_rate": 1.7785274258534436e-06,
      "loss": 2.323,
      "step": 62092
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.9403661489486694,
      "learning_rate": 1.7782930414282783e-06,
      "loss": 2.2013,
      "step": 62093
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.177685022354126,
      "learning_rate": 1.7780586709411385e-06,
      "loss": 2.2429,
      "step": 62094
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0723105669021606,
      "learning_rate": 1.7778243143924234e-06,
      "loss": 2.3728,
      "step": 62095
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.9624976515769958,
      "learning_rate": 1.7775899717825274e-06,
      "loss": 2.2454,
      "step": 62096
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.9855918288230896,
      "learning_rate": 1.7773556431118522e-06,
      "loss": 2.5158,
      "step": 62097
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0358514785766602,
      "learning_rate": 1.7771213283807887e-06,
      "loss": 2.0598,
      "step": 62098
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1143628358840942,
      "learning_rate": 1.7768870275897443e-06,
      "loss": 2.1877,
      "step": 62099
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.042137861251831,
      "learning_rate": 1.7766527407391044e-06,
      "loss": 2.3121,
      "step": 62100
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.110129952430725,
      "learning_rate": 1.7764184678292739e-06,
      "loss": 2.3209,
      "step": 62101
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.2823392152786255,
      "learning_rate": 1.7761842088606451e-06,
      "loss": 2.4158,
      "step": 62102
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1818957328796387,
      "learning_rate": 1.7759499638336196e-06,
      "loss": 2.1601,
      "step": 62103
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1703859567642212,
      "learning_rate": 1.7757157327485907e-06,
      "loss": 2.0711,
      "step": 62104
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.088159441947937,
      "learning_rate": 1.775481515605959e-06,
      "loss": 2.5917,
      "step": 62105
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.9757856130599976,
      "learning_rate": 1.7752473124061187e-06,
      "loss": 2.293,
      "step": 62106
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0532445907592773,
      "learning_rate": 1.7750131231494693e-06,
      "loss": 2.5621,
      "step": 62107
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0326963663101196,
      "learning_rate": 1.7747789478364053e-06,
      "loss": 2.2874,
      "step": 62108
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0923199653625488,
      "learning_rate": 1.774544786467326e-06,
      "loss": 2.3486,
      "step": 62109
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0646008253097534,
      "learning_rate": 1.7743106390426257e-06,
      "loss": 2.2377,
      "step": 62110
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.048182725906372,
      "learning_rate": 1.7740765055627053e-06,
      "loss": 2.3736,
      "step": 62111
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0941861867904663,
      "learning_rate": 1.7738423860279596e-06,
      "loss": 2.2988,
      "step": 62112
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1600457429885864,
      "learning_rate": 1.7736082804387845e-06,
      "loss": 2.2503,
      "step": 62113
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0401477813720703,
      "learning_rate": 1.7733741887955757e-06,
      "loss": 2.3005,
      "step": 62114
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0423204898834229,
      "learning_rate": 1.7731401110987345e-06,
      "loss": 2.476,
      "step": 62115
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.061339259147644,
      "learning_rate": 1.7729060473486515e-06,
      "loss": 2.1302,
      "step": 62116
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0040948390960693,
      "learning_rate": 1.7726719975457308e-06,
      "loss": 2.575,
      "step": 62117
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.040136694908142,
      "learning_rate": 1.7724379616903631e-06,
      "loss": 2.1944,
      "step": 62118
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.090043067932129,
      "learning_rate": 1.7722039397829504e-06,
      "loss": 2.1775,
      "step": 62119
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.090224027633667,
      "learning_rate": 1.7719699318238836e-06,
      "loss": 2.2176,
      "step": 62120
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.2480981349945068,
      "learning_rate": 1.7717359378135645e-06,
      "loss": 2.4279,
      "step": 62121
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0599591732025146,
      "learning_rate": 1.7715019577523896e-06,
      "loss": 2.6196,
      "step": 62122
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.6393661499023438,
      "learning_rate": 1.7712679916407494e-06,
      "loss": 2.4056,
      "step": 62123
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.064531683921814,
      "learning_rate": 1.7710340394790481e-06,
      "loss": 2.1642,
      "step": 62124
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0908489227294922,
      "learning_rate": 1.770800101267679e-06,
      "loss": 2.5094,
      "step": 62125
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1297197341918945,
      "learning_rate": 1.770566177007036e-06,
      "loss": 2.4066,
      "step": 62126
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0855084657669067,
      "learning_rate": 1.7703322666975208e-06,
      "loss": 2.3487,
      "step": 62127
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0694711208343506,
      "learning_rate": 1.770098370339528e-06,
      "loss": 2.369,
      "step": 62128
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0304969549179077,
      "learning_rate": 1.7698644879334502e-06,
      "loss": 2.5103,
      "step": 62129
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.3793634176254272,
      "learning_rate": 1.7696306194796908e-06,
      "loss": 2.1957,
      "step": 62130
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.9384545087814331,
      "learning_rate": 1.7693967649786403e-06,
      "loss": 2.1848,
      "step": 62131
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1112854480743408,
      "learning_rate": 1.7691629244307007e-06,
      "loss": 2.1956,
      "step": 62132
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1301532983779907,
      "learning_rate": 1.7689290978362616e-06,
      "loss": 2.204,
      "step": 62133
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0725260972976685,
      "learning_rate": 1.7686952851957273e-06,
      "loss": 2.1809,
      "step": 62134
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.9932917952537537,
      "learning_rate": 1.7684614865094862e-06,
      "loss": 2.1555,
      "step": 62135
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0063693523406982,
      "learning_rate": 1.7682277017779414e-06,
      "loss": 2.3039,
      "step": 62136
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.9829097986221313,
      "learning_rate": 1.7679939310014837e-06,
      "loss": 2.2366,
      "step": 62137
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.9832780957221985,
      "learning_rate": 1.7677601741805173e-06,
      "loss": 2.1755,
      "step": 62138
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1443777084350586,
      "learning_rate": 1.7675264313154284e-06,
      "loss": 2.4733,
      "step": 62139
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1229503154754639,
      "learning_rate": 1.7672927024066211e-06,
      "loss": 2.2368,
      "step": 62140
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.027093768119812,
      "learning_rate": 1.7670589874544852e-06,
      "loss": 1.9497,
      "step": 62141
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.4239205121994019,
      "learning_rate": 1.7668252864594237e-06,
      "loss": 2.3039,
      "step": 62142
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0802860260009766,
      "learning_rate": 1.7665915994218263e-06,
      "loss": 2.3984,
      "step": 62143
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0822997093200684,
      "learning_rate": 1.766357926342095e-06,
      "loss": 2.25,
      "step": 62144
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0516941547393799,
      "learning_rate": 1.7661242672206203e-06,
      "loss": 2.2698,
      "step": 62145
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.123821496963501,
      "learning_rate": 1.7658906220578043e-06,
      "loss": 2.2858,
      "step": 62146
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.3466671705245972,
      "learning_rate": 1.7656569908540378e-06,
      "loss": 2.2071,
      "step": 62147
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.003831386566162,
      "learning_rate": 1.7654233736097204e-06,
      "loss": 2.3044,
      "step": 62148
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0175212621688843,
      "learning_rate": 1.7651897703252473e-06,
      "loss": 2.1312,
      "step": 62149
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0560227632522583,
      "learning_rate": 1.7649561810010129e-06,
      "loss": 2.1596,
      "step": 62150
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.143535852432251,
      "learning_rate": 1.7647226056374155e-06,
      "loss": 2.4579,
      "step": 62151
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0045804977416992,
      "learning_rate": 1.7644890442348505e-06,
      "loss": 2.2766,
      "step": 62152
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.029110074043274,
      "learning_rate": 1.7642554967937109e-06,
      "loss": 2.2049,
      "step": 62153
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0877788066864014,
      "learning_rate": 1.7640219633143974e-06,
      "loss": 2.2686,
      "step": 62154
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0956199169158936,
      "learning_rate": 1.7637884437973029e-06,
      "loss": 2.2319,
      "step": 62155
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.2209619283676147,
      "learning_rate": 1.7635549382428219e-06,
      "loss": 2.1637,
      "step": 62156
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.2569880485534668,
      "learning_rate": 1.7633214466513548e-06,
      "loss": 2.3081,
      "step": 62157
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.9875605702400208,
      "learning_rate": 1.7630879690232928e-06,
      "loss": 2.1863,
      "step": 62158
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1542068719863892,
      "learning_rate": 1.7628545053590362e-06,
      "loss": 2.3919,
      "step": 62159
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0081793069839478,
      "learning_rate": 1.7626210556589752e-06,
      "loss": 2.3435,
      "step": 62160
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0824302434921265,
      "learning_rate": 1.7623876199235123e-06,
      "loss": 2.3974,
      "step": 62161
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.3617762327194214,
      "learning_rate": 1.7621541981530365e-06,
      "loss": 2.104,
      "step": 62162
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.097301959991455,
      "learning_rate": 1.7619207903479485e-06,
      "loss": 2.3224,
      "step": 62163
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.023051142692566,
      "learning_rate": 1.7616873965086412e-06,
      "loss": 2.2316,
      "step": 62164
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.3693212270736694,
      "learning_rate": 1.7614540166355153e-06,
      "loss": 2.4256,
      "step": 62165
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1243181228637695,
      "learning_rate": 1.7612206507289575e-06,
      "loss": 2.4495,
      "step": 62166
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.2066351175308228,
      "learning_rate": 1.7609872987893706e-06,
      "loss": 2.3024,
      "step": 62167
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1110568046569824,
      "learning_rate": 1.7607539608171465e-06,
      "loss": 2.386,
      "step": 62168
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.04363214969635,
      "learning_rate": 1.7605206368126837e-06,
      "loss": 2.3846,
      "step": 62169
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.09329092502594,
      "learning_rate": 1.7602873267763742e-06,
      "loss": 2.1849,
      "step": 62170
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.064708948135376,
      "learning_rate": 1.7600540307086178e-06,
      "loss": 2.5394,
      "step": 62171
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0354162454605103,
      "learning_rate": 1.759820748609805e-06,
      "loss": 2.5011,
      "step": 62172
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.01513671875,
      "learning_rate": 1.759587480480337e-06,
      "loss": 2.4905,
      "step": 62173
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.185189127922058,
      "learning_rate": 1.7593542263206033e-06,
      "loss": 2.4777,
      "step": 62174
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0591797828674316,
      "learning_rate": 1.7591209861310054e-06,
      "loss": 2.2654,
      "step": 62175
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0957835912704468,
      "learning_rate": 1.7588877599119347e-06,
      "loss": 2.6919,
      "step": 62176
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.008380651473999,
      "learning_rate": 1.7586545476637862e-06,
      "loss": 2.2923,
      "step": 62177
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1162612438201904,
      "learning_rate": 1.7584213493869583e-06,
      "loss": 2.2157,
      "step": 62178
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1876178979873657,
      "learning_rate": 1.7581881650818455e-06,
      "loss": 2.3448,
      "step": 62179
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1563934087753296,
      "learning_rate": 1.7579549947488383e-06,
      "loss": 2.4464,
      "step": 62180
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1461495161056519,
      "learning_rate": 1.75772183838834e-06,
      "loss": 2.4413,
      "step": 62181
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0959811210632324,
      "learning_rate": 1.7574886960007375e-06,
      "loss": 2.2803,
      "step": 62182
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0392496585845947,
      "learning_rate": 1.7572555675864333e-06,
      "loss": 2.4315,
      "step": 62183
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1683642864227295,
      "learning_rate": 1.7570224531458202e-06,
      "loss": 2.1855,
      "step": 62184
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0954811573028564,
      "learning_rate": 1.7567893526792901e-06,
      "loss": 2.3419,
      "step": 62185
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.228925347328186,
      "learning_rate": 1.7565562661872436e-06,
      "loss": 2.1777,
      "step": 62186
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0641762018203735,
      "learning_rate": 1.7563231936700708e-06,
      "loss": 2.3954,
      "step": 62187
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0180400609970093,
      "learning_rate": 1.7560901351281712e-06,
      "loss": 2.2558,
      "step": 62188
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.9985724091529846,
      "learning_rate": 1.7558570905619354e-06,
      "loss": 2.3565,
      "step": 62189
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1411734819412231,
      "learning_rate": 1.7556240599717633e-06,
      "loss": 2.341,
      "step": 62190
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0108745098114014,
      "learning_rate": 1.7553910433580467e-06,
      "loss": 2.452,
      "step": 62191
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.2443171739578247,
      "learning_rate": 1.7551580407211833e-06,
      "loss": 2.2412,
      "step": 62192
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0656312704086304,
      "learning_rate": 1.7549250520615623e-06,
      "loss": 2.2525,
      "step": 62193
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1360942125320435,
      "learning_rate": 1.7546920773795862e-06,
      "loss": 2.0886,
      "step": 62194
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1497628688812256,
      "learning_rate": 1.754459116675643e-06,
      "loss": 2.4782,
      "step": 62195
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1264575719833374,
      "learning_rate": 1.754226169950134e-06,
      "loss": 2.2495,
      "step": 62196
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0960947275161743,
      "learning_rate": 1.7539932372034484e-06,
      "loss": 2.2819,
      "step": 62197
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0809744596481323,
      "learning_rate": 1.7537603184359874e-06,
      "loss": 2.4054,
      "step": 62198
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.072178602218628,
      "learning_rate": 1.7535274136481384e-06,
      "loss": 2.3515,
      "step": 62199
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.980346143245697,
      "learning_rate": 1.7532945228403042e-06,
      "loss": 2.165,
      "step": 62200
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1183968782424927,
      "learning_rate": 1.7530616460128713e-06,
      "loss": 2.4904,
      "step": 62201
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.9921016097068787,
      "learning_rate": 1.7528287831662428e-06,
      "loss": 2.3148,
      "step": 62202
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0883029699325562,
      "learning_rate": 1.7525959343008058e-06,
      "loss": 2.4879,
      "step": 62203
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0860885381698608,
      "learning_rate": 1.7523630994169638e-06,
      "loss": 2.2738,
      "step": 62204
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0945712327957153,
      "learning_rate": 1.7521302785151018e-06,
      "loss": 2.4502,
      "step": 62205
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1316227912902832,
      "learning_rate": 1.7518974715956217e-06,
      "loss": 2.2572,
      "step": 62206
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.9690718650817871,
      "learning_rate": 1.7516646786589132e-06,
      "loss": 2.3407,
      "step": 62207
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0472010374069214,
      "learning_rate": 1.7514318997053748e-06,
      "loss": 2.0585,
      "step": 62208
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1116678714752197,
      "learning_rate": 1.7511991347353973e-06,
      "loss": 2.2902,
      "step": 62209
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.087059736251831,
      "learning_rate": 1.7509663837493806e-06,
      "loss": 2.2527,
      "step": 62210
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.123910903930664,
      "learning_rate": 1.7507336467477165e-06,
      "loss": 2.4439,
      "step": 62211
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0581138134002686,
      "learning_rate": 1.7505009237307968e-06,
      "loss": 2.2033,
      "step": 62212
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.305657148361206,
      "learning_rate": 1.7502682146990202e-06,
      "loss": 2.3742,
      "step": 62213
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.2985960245132446,
      "learning_rate": 1.7500355196527773e-06,
      "loss": 2.1209,
      "step": 62214
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.051010012626648,
      "learning_rate": 1.749802838592468e-06,
      "loss": 2.3157,
      "step": 62215
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.049544334411621,
      "learning_rate": 1.749570171518481e-06,
      "loss": 2.3897,
      "step": 62216
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1080801486968994,
      "learning_rate": 1.7493375184312178e-06,
      "loss": 2.5508,
      "step": 62217
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.054043173789978,
      "learning_rate": 1.7491048793310638e-06,
      "loss": 2.2219,
      "step": 62218
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1303377151489258,
      "learning_rate": 1.748872254218421e-06,
      "loss": 2.1864,
      "step": 62219
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1145364046096802,
      "learning_rate": 1.748639643093677e-06,
      "loss": 2.3069,
      "step": 62220
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.114734172821045,
      "learning_rate": 1.7484070459572333e-06,
      "loss": 2.4409,
      "step": 62221
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.2058002948760986,
      "learning_rate": 1.7481744628094776e-06,
      "loss": 2.3275,
      "step": 62222
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1541540622711182,
      "learning_rate": 1.7479418936508108e-06,
      "loss": 2.2126,
      "step": 62223
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.9614962935447693,
      "learning_rate": 1.7477093384816202e-06,
      "loss": 2.2746,
      "step": 62224
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0931936502456665,
      "learning_rate": 1.7474767973023065e-06,
      "loss": 2.0849,
      "step": 62225
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1210321187973022,
      "learning_rate": 1.7472442701132575e-06,
      "loss": 2.0281,
      "step": 62226
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.9959059357643127,
      "learning_rate": 1.7470117569148748e-06,
      "loss": 2.2123,
      "step": 62227
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0371835231781006,
      "learning_rate": 1.7467792577075459e-06,
      "loss": 2.2702,
      "step": 62228
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0625405311584473,
      "learning_rate": 1.7465467724916696e-06,
      "loss": 2.2155,
      "step": 62229
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.142611026763916,
      "learning_rate": 1.7463143012676364e-06,
      "loss": 2.338,
      "step": 62230
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1072810888290405,
      "learning_rate": 1.7460818440358463e-06,
      "loss": 2.3365,
      "step": 62231
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.032034993171692,
      "learning_rate": 1.7458494007966842e-06,
      "loss": 2.4436,
      "step": 62232
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1159672737121582,
      "learning_rate": 1.7456169715505523e-06,
      "loss": 2.4794,
      "step": 62233
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.2111085653305054,
      "learning_rate": 1.745384556297839e-06,
      "loss": 2.4495,
      "step": 62234
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0401651859283447,
      "learning_rate": 1.7451521550389428e-06,
      "loss": 2.4593,
      "step": 62235
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0310356616973877,
      "learning_rate": 1.7449197677742536e-06,
      "loss": 2.1134,
      "step": 62236
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1069520711898804,
      "learning_rate": 1.7446873945041697e-06,
      "loss": 2.481,
      "step": 62237
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0205694437026978,
      "learning_rate": 1.74445503522908e-06,
      "loss": 2.4472,
      "step": 62238
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1233055591583252,
      "learning_rate": 1.7442226899493842e-06,
      "loss": 2.4395,
      "step": 62239
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0754172801971436,
      "learning_rate": 1.7439903586654728e-06,
      "loss": 2.2792,
      "step": 62240
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.056854248046875,
      "learning_rate": 1.7437580413777378e-06,
      "loss": 2.2613,
      "step": 62241
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0923141241073608,
      "learning_rate": 1.7435257380865777e-06,
      "loss": 2.6382,
      "step": 62242
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.2353463172912598,
      "learning_rate": 1.7432934487923814e-06,
      "loss": 2.4719,
      "step": 62243
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.7781054973602295,
      "learning_rate": 1.7430611734955483e-06,
      "loss": 2.0825,
      "step": 62244
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0506643056869507,
      "learning_rate": 1.7428289121964681e-06,
      "loss": 2.4605,
      "step": 62245
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0950628519058228,
      "learning_rate": 1.7425966648955362e-06,
      "loss": 2.3772,
      "step": 62246
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.111249566078186,
      "learning_rate": 1.742364431593142e-06,
      "loss": 2.1482,
      "step": 62247
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0170708894729614,
      "learning_rate": 1.7421322122896867e-06,
      "loss": 2.4612,
      "step": 62248
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.2718127965927124,
      "learning_rate": 1.7419000069855574e-06,
      "loss": 2.232,
      "step": 62249
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0786492824554443,
      "learning_rate": 1.7416678156811528e-06,
      "loss": 2.4347,
      "step": 62250
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1292238235473633,
      "learning_rate": 1.7414356383768615e-06,
      "loss": 2.1562,
      "step": 62251
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1424192190170288,
      "learning_rate": 1.7412034750730822e-06,
      "loss": 2.2827,
      "step": 62252
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.112615704536438,
      "learning_rate": 1.7409713257702032e-06,
      "loss": 2.2889,
      "step": 62253
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.362073302268982,
      "learning_rate": 1.7407391904686243e-06,
      "loss": 2.2211,
      "step": 62254
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0435734987258911,
      "learning_rate": 1.740507069168732e-06,
      "loss": 2.4269,
      "step": 62255
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.021673321723938,
      "learning_rate": 1.740274961870927e-06,
      "loss": 2.1507,
      "step": 62256
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.096690058708191,
      "learning_rate": 1.7400428685755988e-06,
      "loss": 2.4542,
      "step": 62257
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0273345708847046,
      "learning_rate": 1.7398107892831416e-06,
      "loss": 2.2484,
      "step": 62258
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.110693335533142,
      "learning_rate": 1.7395787239939466e-06,
      "loss": 2.1584,
      "step": 62259
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1011520624160767,
      "learning_rate": 1.7393466727084108e-06,
      "loss": 2.3488,
      "step": 62260
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0103654861450195,
      "learning_rate": 1.739114635426924e-06,
      "loss": 2.0767,
      "step": 62261
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1459101438522339,
      "learning_rate": 1.7388826121498836e-06,
      "loss": 2.4228,
      "step": 62262
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0266549587249756,
      "learning_rate": 1.7386506028776783e-06,
      "loss": 2.2769,
      "step": 62263
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0064356327056885,
      "learning_rate": 1.7384186076107067e-06,
      "loss": 2.3353,
      "step": 62264
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.3379106521606445,
      "learning_rate": 1.7381866263493563e-06,
      "loss": 2.3608,
      "step": 62265
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.123178482055664,
      "learning_rate": 1.7379546590940267e-06,
      "loss": 2.3121,
      "step": 62266
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.037467360496521,
      "learning_rate": 1.7377227058451074e-06,
      "loss": 2.2764,
      "step": 62267
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1371358633041382,
      "learning_rate": 1.7374907666029895e-06,
      "loss": 2.2878,
      "step": 62268
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0969552993774414,
      "learning_rate": 1.7372588413680724e-06,
      "loss": 2.3345,
      "step": 62269
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.2007150650024414,
      "learning_rate": 1.737026930140745e-06,
      "loss": 2.2689,
      "step": 62270
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0666090250015259,
      "learning_rate": 1.7367950329213978e-06,
      "loss": 2.3873,
      "step": 62271
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1035302877426147,
      "learning_rate": 1.7365631497104307e-06,
      "loss": 2.2266,
      "step": 62272
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0185729265213013,
      "learning_rate": 1.7363312805082333e-06,
      "loss": 2.2945,
      "step": 62273
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0354779958724976,
      "learning_rate": 1.7360994253151963e-06,
      "loss": 2.251,
      "step": 62274
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0814881324768066,
      "learning_rate": 1.7358675841317173e-06,
      "loss": 2.3168,
      "step": 62275
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0737093687057495,
      "learning_rate": 1.7356357569581862e-06,
      "loss": 2.178,
      "step": 62276
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.06719172000885,
      "learning_rate": 1.7354039437949978e-06,
      "loss": 2.3439,
      "step": 62277
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0764491558074951,
      "learning_rate": 1.735172144642543e-06,
      "loss": 2.2735,
      "step": 62278
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1326802968978882,
      "learning_rate": 1.7349403595012183e-06,
      "loss": 2.258,
      "step": 62279
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.144298791885376,
      "learning_rate": 1.734708588371411e-06,
      "loss": 2.3323,
      "step": 62280
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1751289367675781,
      "learning_rate": 1.734476831253521e-06,
      "loss": 2.3098,
      "step": 62281
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1594210863113403,
      "learning_rate": 1.7342450881479344e-06,
      "loss": 2.6273,
      "step": 62282
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.2151446342468262,
      "learning_rate": 1.7340133590550534e-06,
      "loss": 2.4106,
      "step": 62283
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.057874083518982,
      "learning_rate": 1.7337816439752585e-06,
      "loss": 2.1089,
      "step": 62284
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0466407537460327,
      "learning_rate": 1.733549942908953e-06,
      "loss": 2.28,
      "step": 62285
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.2216105461120605,
      "learning_rate": 1.733318255856522e-06,
      "loss": 2.4911,
      "step": 62286
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0754334926605225,
      "learning_rate": 1.733086582818364e-06,
      "loss": 2.2587,
      "step": 62287
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0186398029327393,
      "learning_rate": 1.7328549237948665e-06,
      "loss": 2.2536,
      "step": 62288
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0788394212722778,
      "learning_rate": 1.7326232787864283e-06,
      "loss": 2.1463,
      "step": 62289
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1176040172576904,
      "learning_rate": 1.7323916477934378e-06,
      "loss": 2.2696,
      "step": 62290
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1135845184326172,
      "learning_rate": 1.73216003081629e-06,
      "loss": 2.2192,
      "step": 62291
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.2146260738372803,
      "learning_rate": 1.7319284278553739e-06,
      "loss": 2.1956,
      "step": 62292
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0334277153015137,
      "learning_rate": 1.7316968389110877e-06,
      "loss": 2.167,
      "step": 62293
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.2866735458374023,
      "learning_rate": 1.7314652639838202e-06,
      "loss": 2.4268,
      "step": 62294
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.167309284210205,
      "learning_rate": 1.7312337030739634e-06,
      "loss": 2.2894,
      "step": 62295
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.9828302264213562,
      "learning_rate": 1.7310021561819135e-06,
      "loss": 2.44,
      "step": 62296
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.034420132637024,
      "learning_rate": 1.73077062330806e-06,
      "loss": 2.422,
      "step": 62297
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0117452144622803,
      "learning_rate": 1.7305391044527941e-06,
      "loss": 2.2664,
      "step": 62298
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1098511219024658,
      "learning_rate": 1.730307599616513e-06,
      "loss": 2.4767,
      "step": 62299
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.202670931816101,
      "learning_rate": 1.7300761087996043e-06,
      "loss": 2.2714,
      "step": 62300
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.01382577419281,
      "learning_rate": 1.7298446320024653e-06,
      "loss": 2.2932,
      "step": 62301
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0704680681228638,
      "learning_rate": 1.7296131692254848e-06,
      "loss": 2.2571,
      "step": 62302
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.2463395595550537,
      "learning_rate": 1.7293817204690543e-06,
      "loss": 2.3154,
      "step": 62303
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.2934250831604004,
      "learning_rate": 1.7291502857335706e-06,
      "loss": 2.2066,
      "step": 62304
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.9085341691970825,
      "learning_rate": 1.728918865019421e-06,
      "loss": 2.081,
      "step": 62305
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1042336225509644,
      "learning_rate": 1.728687458327003e-06,
      "loss": 2.5554,
      "step": 62306
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0995723009109497,
      "learning_rate": 1.728456065656704e-06,
      "loss": 2.325,
      "step": 62307
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.4281402826309204,
      "learning_rate": 1.7282246870089202e-06,
      "loss": 2.2001,
      "step": 62308
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0948764085769653,
      "learning_rate": 1.7279933223840396e-06,
      "loss": 2.2308,
      "step": 62309
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0168133974075317,
      "learning_rate": 1.7277619717824624e-06,
      "loss": 2.2611,
      "step": 62310
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.05610191822052,
      "learning_rate": 1.7275306352045706e-06,
      "loss": 2.2,
      "step": 62311
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0285613536834717,
      "learning_rate": 1.7272993126507631e-06,
      "loss": 2.4001,
      "step": 62312
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.9995830655097961,
      "learning_rate": 1.7270680041214272e-06,
      "loss": 2.2311,
      "step": 62313
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1026413440704346,
      "learning_rate": 1.7268367096169614e-06,
      "loss": 2.3087,
      "step": 62314
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.140703797340393,
      "learning_rate": 1.726605429137751e-06,
      "loss": 2.407,
      "step": 62315
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1000571250915527,
      "learning_rate": 1.7263741626841947e-06,
      "loss": 2.4632,
      "step": 62316
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1140486001968384,
      "learning_rate": 1.7261429102566784e-06,
      "loss": 2.4803,
      "step": 62317
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.121759295463562,
      "learning_rate": 1.7259116718556002e-06,
      "loss": 2.2208,
      "step": 62318
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.15118408203125,
      "learning_rate": 1.7256804474813449e-06,
      "loss": 2.276,
      "step": 62319
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.142075777053833,
      "learning_rate": 1.7254492371343123e-06,
      "loss": 2.1728,
      "step": 62320
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1555454730987549,
      "learning_rate": 1.7252180408148876e-06,
      "loss": 2.2949,
      "step": 62321
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0568153858184814,
      "learning_rate": 1.7249868585234687e-06,
      "loss": 2.2118,
      "step": 62322
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.9733005166053772,
      "learning_rate": 1.7247556902604433e-06,
      "loss": 2.3503,
      "step": 62323
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.170461893081665,
      "learning_rate": 1.7245245360262064e-06,
      "loss": 2.3359,
      "step": 62324
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1851556301116943,
      "learning_rate": 1.7242933958211438e-06,
      "loss": 2.2563,
      "step": 62325
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.9361304044723511,
      "learning_rate": 1.7240622696456544e-06,
      "loss": 2.2692,
      "step": 62326
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1288081407546997,
      "learning_rate": 1.7238311575001255e-06,
      "loss": 2.3954,
      "step": 62327
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.2680219411849976,
      "learning_rate": 1.7236000593849521e-06,
      "loss": 2.4285,
      "step": 62328
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.025317668914795,
      "learning_rate": 1.7233689753005257e-06,
      "loss": 2.204,
      "step": 62329
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0617094039916992,
      "learning_rate": 1.723137905247233e-06,
      "loss": 2.2722,
      "step": 62330
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1033904552459717,
      "learning_rate": 1.722906849225473e-06,
      "loss": 2.3299,
      "step": 62331
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.2666031122207642,
      "learning_rate": 1.7226758072356309e-06,
      "loss": 2.4007,
      "step": 62332
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0184669494628906,
      "learning_rate": 1.7224447792781041e-06,
      "loss": 2.4911,
      "step": 62333
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0816681385040283,
      "learning_rate": 1.7222137653532789e-06,
      "loss": 2.3478,
      "step": 62334
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1773810386657715,
      "learning_rate": 1.7219827654615517e-06,
      "loss": 2.0249,
      "step": 62335
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0847997665405273,
      "learning_rate": 1.7217517796033122e-06,
      "loss": 2.1987,
      "step": 62336
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0064911842346191,
      "learning_rate": 1.7215208077789513e-06,
      "loss": 2.3693,
      "step": 62337
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1009564399719238,
      "learning_rate": 1.7212898499888598e-06,
      "loss": 2.2275,
      "step": 62338
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1083604097366333,
      "learning_rate": 1.7210589062334327e-06,
      "loss": 2.0046,
      "step": 62339
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.179642915725708,
      "learning_rate": 1.7208279765130565e-06,
      "loss": 2.3996,
      "step": 62340
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.088793158531189,
      "learning_rate": 1.7205970608281274e-06,
      "loss": 1.98,
      "step": 62341
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1107643842697144,
      "learning_rate": 1.7203661591790334e-06,
      "loss": 2.3456,
      "step": 62342
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0766984224319458,
      "learning_rate": 1.720135271566169e-06,
      "loss": 2.3872,
      "step": 62343
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0051926374435425,
      "learning_rate": 1.7199043979899221e-06,
      "loss": 2.2401,
      "step": 62344
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1324692964553833,
      "learning_rate": 1.719673538450689e-06,
      "loss": 2.2408,
      "step": 62345
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1634105443954468,
      "learning_rate": 1.719442692948855e-06,
      "loss": 2.4274,
      "step": 62346
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1030068397521973,
      "learning_rate": 1.7192118614848174e-06,
      "loss": 2.3865,
      "step": 62347
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0789469480514526,
      "learning_rate": 1.7189810440589616e-06,
      "loss": 2.4571,
      "step": 62348
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1917301416397095,
      "learning_rate": 1.7187502406716883e-06,
      "loss": 2.6703,
      "step": 62349
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0404914617538452,
      "learning_rate": 1.718519451323376e-06,
      "loss": 2.1544,
      "step": 62350
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0608220100402832,
      "learning_rate": 1.7182886760144257e-06,
      "loss": 2.4032,
      "step": 62351
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0422923564910889,
      "learning_rate": 1.7180579147452237e-06,
      "loss": 2.452,
      "step": 62352
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.029100775718689,
      "learning_rate": 1.717827167516164e-06,
      "loss": 2.4035,
      "step": 62353
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0836533308029175,
      "learning_rate": 1.7175964343276352e-06,
      "loss": 2.3912,
      "step": 62354
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0803574323654175,
      "learning_rate": 1.7173657151800317e-06,
      "loss": 2.2033,
      "step": 62355
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.271499752998352,
      "learning_rate": 1.7171350100737406e-06,
      "loss": 2.4099,
      "step": 62356
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0836855173110962,
      "learning_rate": 1.7169043190091583e-06,
      "loss": 2.1666,
      "step": 62357
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1664361953735352,
      "learning_rate": 1.7166736419866714e-06,
      "loss": 2.2403,
      "step": 62358
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0661308765411377,
      "learning_rate": 1.7164429790066716e-06,
      "loss": 2.2983,
      "step": 62359
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1408442258834839,
      "learning_rate": 1.7162123300695522e-06,
      "loss": 2.5424,
      "step": 62360
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.3062193393707275,
      "learning_rate": 1.7159816951757002e-06,
      "loss": 2.5521,
      "step": 62361
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0932378768920898,
      "learning_rate": 1.7157510743255124e-06,
      "loss": 2.1239,
      "step": 62362
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.146878957748413,
      "learning_rate": 1.715520467519376e-06,
      "loss": 2.5915,
      "step": 62363
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1124299764633179,
      "learning_rate": 1.7152898747576818e-06,
      "loss": 2.1325,
      "step": 62364
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0946770906448364,
      "learning_rate": 1.7150592960408196e-06,
      "loss": 2.4792,
      "step": 62365
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.3675999641418457,
      "learning_rate": 1.7148287313691846e-06,
      "loss": 2.3125,
      "step": 62366
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1140577793121338,
      "learning_rate": 1.7145981807431632e-06,
      "loss": 2.4457,
      "step": 62367
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0997086763381958,
      "learning_rate": 1.7143676441631496e-06,
      "loss": 2.3814,
      "step": 62368
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.22335684299469,
      "learning_rate": 1.714137121629531e-06,
      "loss": 2.2317,
      "step": 62369
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.048398733139038,
      "learning_rate": 1.713906613142703e-06,
      "loss": 2.2011,
      "step": 62370
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0587267875671387,
      "learning_rate": 1.7136761187030515e-06,
      "loss": 2.246,
      "step": 62371
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0148791074752808,
      "learning_rate": 1.713445638310971e-06,
      "loss": 2.3523,
      "step": 62372
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.9722855091094971,
      "learning_rate": 1.71321517196685e-06,
      "loss": 2.2563,
      "step": 62373
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1618090867996216,
      "learning_rate": 1.7129847196710802e-06,
      "loss": 2.0535,
      "step": 62374
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1064574718475342,
      "learning_rate": 1.7127542814240516e-06,
      "loss": 2.6389,
      "step": 62375
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1377840042114258,
      "learning_rate": 1.7125238572261592e-06,
      "loss": 2.1238,
      "step": 62376
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.121289849281311,
      "learning_rate": 1.7122934470777853e-06,
      "loss": 2.3195,
      "step": 62377
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.9621329307556152,
      "learning_rate": 1.7120630509793269e-06,
      "loss": 2.1854,
      "step": 62378
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1763321161270142,
      "learning_rate": 1.7118326689311704e-06,
      "loss": 2.3251,
      "step": 62379
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.16683030128479,
      "learning_rate": 1.7116023009337102e-06,
      "loss": 2.212,
      "step": 62380
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1452490091323853,
      "learning_rate": 1.7113719469873335e-06,
      "loss": 2.3014,
      "step": 62381
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.179561972618103,
      "learning_rate": 1.7111416070924347e-06,
      "loss": 2.4446,
      "step": 62382
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.131398320198059,
      "learning_rate": 1.7109112812494e-06,
      "loss": 2.3431,
      "step": 62383
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0345137119293213,
      "learning_rate": 1.7106809694586246e-06,
      "loss": 2.4462,
      "step": 62384
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1052230596542358,
      "learning_rate": 1.710450671720496e-06,
      "loss": 2.2673,
      "step": 62385
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0564045906066895,
      "learning_rate": 1.7102203880354017e-06,
      "loss": 2.3785,
      "step": 62386
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0275819301605225,
      "learning_rate": 1.7099901184037394e-06,
      "loss": 2.2627,
      "step": 62387
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0977436304092407,
      "learning_rate": 1.7097598628258916e-06,
      "loss": 2.283,
      "step": 62388
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.094732403755188,
      "learning_rate": 1.709529621302255e-06,
      "loss": 2.4128,
      "step": 62389
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.101101040840149,
      "learning_rate": 1.7092993938332182e-06,
      "loss": 2.2916,
      "step": 62390
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1202141046524048,
      "learning_rate": 1.7090691804191694e-06,
      "loss": 2.3307,
      "step": 62391
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1206694841384888,
      "learning_rate": 1.7088389810604988e-06,
      "loss": 2.2027,
      "step": 62392
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0366246700286865,
      "learning_rate": 1.7086087957576003e-06,
      "loss": 2.2969,
      "step": 62393
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.9725925922393799,
      "learning_rate": 1.7083786245108591e-06,
      "loss": 2.0599,
      "step": 62394
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.103989839553833,
      "learning_rate": 1.7081484673206705e-06,
      "loss": 2.2969,
      "step": 62395
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0967504978179932,
      "learning_rate": 1.7079183241874198e-06,
      "loss": 2.2258,
      "step": 62396
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1064724922180176,
      "learning_rate": 1.7076881951115022e-06,
      "loss": 2.3618,
      "step": 62397
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1609528064727783,
      "learning_rate": 1.7074580800933027e-06,
      "loss": 2.2887,
      "step": 62398
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1542327404022217,
      "learning_rate": 1.707227979133217e-06,
      "loss": 2.2209,
      "step": 62399
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0367480516433716,
      "learning_rate": 1.7069978922316289e-06,
      "loss": 2.3072,
      "step": 62400
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1482828855514526,
      "learning_rate": 1.7067678193889348e-06,
      "loss": 2.2247,
      "step": 62401
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.025869607925415,
      "learning_rate": 1.706537760605521e-06,
      "loss": 2.2929,
      "step": 62402
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.2579591274261475,
      "learning_rate": 1.7063077158817787e-06,
      "loss": 2.2212,
      "step": 62403
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0481255054473877,
      "learning_rate": 1.706077685218095e-06,
      "loss": 2.3117,
      "step": 62404
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0142796039581299,
      "learning_rate": 1.705847668614864e-06,
      "loss": 2.3037,
      "step": 62405
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1315103769302368,
      "learning_rate": 1.705617666072471e-06,
      "loss": 2.5924,
      "step": 62406
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0701866149902344,
      "learning_rate": 1.7053876775913125e-06,
      "loss": 2.2735,
      "step": 62407
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.065293550491333,
      "learning_rate": 1.7051577031717715e-06,
      "loss": 2.2798,
      "step": 62408
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.9686753749847412,
      "learning_rate": 1.7049277428142441e-06,
      "loss": 2.4592,
      "step": 62409
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1676585674285889,
      "learning_rate": 1.7046977965191147e-06,
      "loss": 2.3593,
      "step": 62410
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0920350551605225,
      "learning_rate": 1.7044678642867775e-06,
      "loss": 2.2875,
      "step": 62411
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0176142454147339,
      "learning_rate": 1.7042379461176196e-06,
      "loss": 2.152,
      "step": 62412
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1587766408920288,
      "learning_rate": 1.7040080420120298e-06,
      "loss": 2.3841,
      "step": 62413
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.097678780555725,
      "learning_rate": 1.7037781519704022e-06,
      "loss": 2.2238,
      "step": 62414
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.3987013101577759,
      "learning_rate": 1.7035482759931232e-06,
      "loss": 2.5197,
      "step": 62415
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.2213019132614136,
      "learning_rate": 1.7033184140805814e-06,
      "loss": 2.7353,
      "step": 62416
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.4402282238006592,
      "learning_rate": 1.7030885662331709e-06,
      "loss": 2.2139,
      "step": 62417
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0005455017089844,
      "learning_rate": 1.7028587324512757e-06,
      "loss": 2.3228,
      "step": 62418
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.021085500717163,
      "learning_rate": 1.7026289127352914e-06,
      "loss": 2.3619,
      "step": 62419
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.2191472053527832,
      "learning_rate": 1.702399107085604e-06,
      "loss": 2.5298,
      "step": 62420
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0086770057678223,
      "learning_rate": 1.702169315502601e-06,
      "loss": 2.3841,
      "step": 62421
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0518827438354492,
      "learning_rate": 1.7019395379866777e-06,
      "loss": 2.359,
      "step": 62422
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.3084081411361694,
      "learning_rate": 1.7017097745382173e-06,
      "loss": 2.2118,
      "step": 62423
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1332978010177612,
      "learning_rate": 1.7014800251576159e-06,
      "loss": 2.2717,
      "step": 62424
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1702295541763306,
      "learning_rate": 1.7012502898452577e-06,
      "loss": 2.2468,
      "step": 62425
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1075223684310913,
      "learning_rate": 1.7010205686015357e-06,
      "loss": 2.3634,
      "step": 62426
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1649398803710938,
      "learning_rate": 1.7007908614268354e-06,
      "loss": 2.4085,
      "step": 62427
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0854178667068481,
      "learning_rate": 1.7005611683215539e-06,
      "loss": 2.1562,
      "step": 62428
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1044504642486572,
      "learning_rate": 1.70033148928607e-06,
      "loss": 2.0773,
      "step": 62429
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.9553132057189941,
      "learning_rate": 1.7001018243207812e-06,
      "loss": 2.1934,
      "step": 62430
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1714588403701782,
      "learning_rate": 1.6998721734260726e-06,
      "loss": 2.2069,
      "step": 62431
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.2466447353363037,
      "learning_rate": 1.699642536602336e-06,
      "loss": 2.3049,
      "step": 62432
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0050854682922363,
      "learning_rate": 1.699412913849957e-06,
      "loss": 2.273,
      "step": 62433
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.071757435798645,
      "learning_rate": 1.6991833051693306e-06,
      "loss": 2.344,
      "step": 62434
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1717793941497803,
      "learning_rate": 1.6989537105608412e-06,
      "loss": 2.3836,
      "step": 62435
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.092798113822937,
      "learning_rate": 1.6987241300248814e-06,
      "loss": 2.2855,
      "step": 62436
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1109492778778076,
      "learning_rate": 1.6984945635618355e-06,
      "loss": 2.2113,
      "step": 62437
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0451732873916626,
      "learning_rate": 1.6982650111720999e-06,
      "loss": 2.2366,
      "step": 62438
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0622109174728394,
      "learning_rate": 1.6980354728560566e-06,
      "loss": 2.2843,
      "step": 62439
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0071195363998413,
      "learning_rate": 1.6978059486141008e-06,
      "loss": 2.0922,
      "step": 62440
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0912913084030151,
      "learning_rate": 1.6975764384466186e-06,
      "loss": 2.3144,
      "step": 62441
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0636650323867798,
      "learning_rate": 1.697346942353999e-06,
      "loss": 2.2506,
      "step": 62442
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1040241718292236,
      "learning_rate": 1.6971174603366291e-06,
      "loss": 2.1256,
      "step": 62443
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0518298149108887,
      "learning_rate": 1.6968879923949023e-06,
      "loss": 2.1683,
      "step": 62444
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0381373167037964,
      "learning_rate": 1.6966585385292035e-06,
      "loss": 2.4731,
      "step": 62445
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.072473168373108,
      "learning_rate": 1.6964290987399257e-06,
      "loss": 2.2136,
      "step": 62446
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1156200170516968,
      "learning_rate": 1.6961996730274565e-06,
      "loss": 2.4667,
      "step": 62447
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0059623718261719,
      "learning_rate": 1.6959702613921813e-06,
      "loss": 2.3669,
      "step": 62448
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.111085295677185,
      "learning_rate": 1.6957408638344941e-06,
      "loss": 2.4513,
      "step": 62449
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.9947491884231567,
      "learning_rate": 1.695511480354779e-06,
      "loss": 2.3747,
      "step": 62450
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.2361544370651245,
      "learning_rate": 1.69528211095343e-06,
      "loss": 2.1489,
      "step": 62451
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1388838291168213,
      "learning_rate": 1.6950527556308305e-06,
      "loss": 2.3261,
      "step": 62452
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1241716146469116,
      "learning_rate": 1.6948234143873754e-06,
      "loss": 2.2,
      "step": 62453
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.116388201713562,
      "learning_rate": 1.6945940872234467e-06,
      "loss": 2.2539,
      "step": 62454
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0820441246032715,
      "learning_rate": 1.6943647741394432e-06,
      "loss": 2.3254,
      "step": 62455
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.2646740674972534,
      "learning_rate": 1.6941354751357409e-06,
      "loss": 2.2062,
      "step": 62456
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0983824729919434,
      "learning_rate": 1.6939061902127385e-06,
      "loss": 2.3533,
      "step": 62457
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0735894441604614,
      "learning_rate": 1.6936769193708168e-06,
      "loss": 2.3021,
      "step": 62458
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.076067328453064,
      "learning_rate": 1.6934476626103724e-06,
      "loss": 2.2464,
      "step": 62459
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.150417447090149,
      "learning_rate": 1.693218419931788e-06,
      "loss": 2.3563,
      "step": 62460
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0615304708480835,
      "learning_rate": 1.6929891913354568e-06,
      "loss": 2.3483,
      "step": 62461
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1331382989883423,
      "learning_rate": 1.6927599768217617e-06,
      "loss": 2.4896,
      "step": 62462
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.166394591331482,
      "learning_rate": 1.6925307763910981e-06,
      "loss": 2.3767,
      "step": 62463
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0710334777832031,
      "learning_rate": 1.692301590043849e-06,
      "loss": 2.3564,
      "step": 62464
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1208298206329346,
      "learning_rate": 1.692072417780406e-06,
      "loss": 2.4024,
      "step": 62465
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.014985203742981,
      "learning_rate": 1.6918432596011558e-06,
      "loss": 2.3352,
      "step": 62466
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1499683856964111,
      "learning_rate": 1.6916141155064892e-06,
      "loss": 2.2607,
      "step": 62467
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0400835275650024,
      "learning_rate": 1.6913849854967934e-06,
      "loss": 2.2173,
      "step": 62468
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.2201260328292847,
      "learning_rate": 1.6911558695724561e-06,
      "loss": 2.2175,
      "step": 62469
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0632799863815308,
      "learning_rate": 1.6909267677338647e-06,
      "loss": 2.4811,
      "step": 62470
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0659693479537964,
      "learning_rate": 1.6906976799814112e-06,
      "loss": 2.2232,
      "step": 62471
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.047764539718628,
      "learning_rate": 1.6904686063154795e-06,
      "loss": 2.3317,
      "step": 62472
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1539759635925293,
      "learning_rate": 1.690239546736463e-06,
      "loss": 2.2524,
      "step": 62473
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0063061714172363,
      "learning_rate": 1.6900105012447442e-06,
      "loss": 2.1135,
      "step": 62474
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.058573603630066,
      "learning_rate": 1.6897814698407178e-06,
      "loss": 2.3288,
      "step": 62475
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1125411987304688,
      "learning_rate": 1.6895524525247687e-06,
      "loss": 2.2014,
      "step": 62476
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.2293709516525269,
      "learning_rate": 1.689323449297282e-06,
      "loss": 2.4909,
      "step": 62477
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0124151706695557,
      "learning_rate": 1.6890944601586534e-06,
      "loss": 2.1748,
      "step": 62478
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0842474699020386,
      "learning_rate": 1.6888654851092633e-06,
      "loss": 2.3636,
      "step": 62479
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1680728197097778,
      "learning_rate": 1.6886365241495061e-06,
      "loss": 2.3111,
      "step": 62480
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.261526107788086,
      "learning_rate": 1.688407577279767e-06,
      "loss": 2.3165,
      "step": 62481
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0196566581726074,
      "learning_rate": 1.6881786445004345e-06,
      "loss": 2.3221,
      "step": 62482
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1429953575134277,
      "learning_rate": 1.6879497258118949e-06,
      "loss": 2.3443,
      "step": 62483
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1074780225753784,
      "learning_rate": 1.687720821214539e-06,
      "loss": 2.4979,
      "step": 62484
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1235629320144653,
      "learning_rate": 1.6874919307087534e-06,
      "loss": 2.3696,
      "step": 62485
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0328369140625,
      "learning_rate": 1.6872630542949275e-06,
      "loss": 2.3105,
      "step": 62486
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0729814767837524,
      "learning_rate": 1.6870341919734468e-06,
      "loss": 2.1598,
      "step": 62487
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0988801717758179,
      "learning_rate": 1.686805343744703e-06,
      "loss": 2.2521,
      "step": 62488
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0766810178756714,
      "learning_rate": 1.6865765096090803e-06,
      "loss": 2.4646,
      "step": 62489
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.084106683731079,
      "learning_rate": 1.6863476895669695e-06,
      "loss": 2.3282,
      "step": 62490
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.028562068939209,
      "learning_rate": 1.686118883618756e-06,
      "loss": 2.1883,
      "step": 62491
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.130860447883606,
      "learning_rate": 1.6858900917648313e-06,
      "loss": 2.4399,
      "step": 62492
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1166446208953857,
      "learning_rate": 1.6856613140055778e-06,
      "loss": 2.4744,
      "step": 62493
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0405640602111816,
      "learning_rate": 1.6854325503413915e-06,
      "loss": 2.3207,
      "step": 62494
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1279199123382568,
      "learning_rate": 1.6852038007726512e-06,
      "loss": 2.2424,
      "step": 62495
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0278937816619873,
      "learning_rate": 1.68497506529975e-06,
      "loss": 2.3881,
      "step": 62496
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.288014531135559,
      "learning_rate": 1.6847463439230726e-06,
      "loss": 2.367,
      "step": 62497
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.053849697113037,
      "learning_rate": 1.6845176366430117e-06,
      "loss": 2.389,
      "step": 62498
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0405434370040894,
      "learning_rate": 1.6842889434599486e-06,
      "loss": 2.2551,
      "step": 62499
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.9588403701782227,
      "learning_rate": 1.6840602643742765e-06,
      "loss": 2.2648,
      "step": 62500
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.9681591391563416,
      "learning_rate": 1.6838315993863786e-06,
      "loss": 2.2951,
      "step": 62501
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0160539150238037,
      "learning_rate": 1.6836029484966476e-06,
      "loss": 2.1886,
      "step": 62502
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0965499877929688,
      "learning_rate": 1.6833743117054678e-06,
      "loss": 2.4848,
      "step": 62503
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0633385181427002,
      "learning_rate": 1.6831456890132248e-06,
      "loss": 2.3422,
      "step": 62504
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1178382635116577,
      "learning_rate": 1.682917080420312e-06,
      "loss": 2.1453,
      "step": 62505
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1224863529205322,
      "learning_rate": 1.6826884859271098e-06,
      "loss": 2.4801,
      "step": 62506
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.086501121520996,
      "learning_rate": 1.6824599055340129e-06,
      "loss": 2.4721,
      "step": 62507
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.032645344734192,
      "learning_rate": 1.682231339241406e-06,
      "loss": 2.2649,
      "step": 62508
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.284087061882019,
      "learning_rate": 1.6820027870496747e-06,
      "loss": 2.2972,
      "step": 62509
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0304970741271973,
      "learning_rate": 1.6817742489592071e-06,
      "loss": 2.3956,
      "step": 62510
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1952638626098633,
      "learning_rate": 1.6815457249703927e-06,
      "loss": 2.2039,
      "step": 62511
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.9836735725402832,
      "learning_rate": 1.6813172150836144e-06,
      "loss": 2.2939,
      "step": 62512
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0754283666610718,
      "learning_rate": 1.6810887192992664e-06,
      "loss": 2.114,
      "step": 62513
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.048297643661499,
      "learning_rate": 1.6808602376177296e-06,
      "loss": 2.4722,
      "step": 62514
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1044777631759644,
      "learning_rate": 1.680631770039396e-06,
      "loss": 2.2177,
      "step": 62515
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.046035647392273,
      "learning_rate": 1.6804033165646493e-06,
      "loss": 2.4617,
      "step": 62516
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0529593229293823,
      "learning_rate": 1.6801748771938808e-06,
      "loss": 2.3704,
      "step": 62517
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.086966633796692,
      "learning_rate": 1.6799464519274734e-06,
      "loss": 2.4028,
      "step": 62518
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0527925491333008,
      "learning_rate": 1.6797180407658188e-06,
      "loss": 2.3205,
      "step": 62519
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1318564414978027,
      "learning_rate": 1.6794896437092988e-06,
      "loss": 2.2293,
      "step": 62520
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0915343761444092,
      "learning_rate": 1.6792612607583092e-06,
      "loss": 2.4228,
      "step": 62521
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0050662755966187,
      "learning_rate": 1.679032891913227e-06,
      "loss": 2.3156,
      "step": 62522
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.203274130821228,
      "learning_rate": 1.6788045371744456e-06,
      "loss": 2.128,
      "step": 62523
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1080601215362549,
      "learning_rate": 1.6785761965423486e-06,
      "loss": 2.3074,
      "step": 62524
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0451207160949707,
      "learning_rate": 1.6783478700173272e-06,
      "loss": 2.2272,
      "step": 62525
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0181992053985596,
      "learning_rate": 1.6781195575997643e-06,
      "loss": 2.1773,
      "step": 62526
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0308082103729248,
      "learning_rate": 1.677891259290051e-06,
      "loss": 2.3891,
      "step": 62527
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.3437978029251099,
      "learning_rate": 1.6776629750885699e-06,
      "loss": 2.2663,
      "step": 62528
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0394257307052612,
      "learning_rate": 1.677434704995713e-06,
      "loss": 2.4355,
      "step": 62529
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0082249641418457,
      "learning_rate": 1.6772064490118644e-06,
      "loss": 2.3091,
      "step": 62530
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0047703981399536,
      "learning_rate": 1.6769782071374085e-06,
      "loss": 2.2457,
      "step": 62531
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1881765127182007,
      "learning_rate": 1.6767499793727382e-06,
      "loss": 2.1856,
      "step": 62532
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.2127583026885986,
      "learning_rate": 1.6765217657182342e-06,
      "loss": 2.4428,
      "step": 62533
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0445762872695923,
      "learning_rate": 1.6762935661742896e-06,
      "loss": 2.3865,
      "step": 62534
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.241264820098877,
      "learning_rate": 1.6760653807412874e-06,
      "loss": 2.3372,
      "step": 62535
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.135627269744873,
      "learning_rate": 1.675837209419613e-06,
      "loss": 2.4369,
      "step": 62536
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0928962230682373,
      "learning_rate": 1.675609052209658e-06,
      "loss": 2.3656,
      "step": 62537
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1353721618652344,
      "learning_rate": 1.6753809091118066e-06,
      "loss": 2.5779,
      "step": 62538
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.114949345588684,
      "learning_rate": 1.6751527801264422e-06,
      "loss": 2.5506,
      "step": 62539
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0332177877426147,
      "learning_rate": 1.6749246652539585e-06,
      "loss": 2.1946,
      "step": 62540
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1507817506790161,
      "learning_rate": 1.6746965644947355e-06,
      "loss": 2.5154,
      "step": 62541
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1019723415374756,
      "learning_rate": 1.6744684778491648e-06,
      "loss": 2.391,
      "step": 62542
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0463690757751465,
      "learning_rate": 1.6742404053176299e-06,
      "loss": 2.3725,
      "step": 62543
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0617085695266724,
      "learning_rate": 1.6740123469005198e-06,
      "loss": 2.3164,
      "step": 62544
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.2078096866607666,
      "learning_rate": 1.6737843025982191e-06,
      "loss": 2.0807,
      "step": 62545
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.359082579612732,
      "learning_rate": 1.6735562724111165e-06,
      "loss": 2.3912,
      "step": 62546
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0935871601104736,
      "learning_rate": 1.6733282563395992e-06,
      "loss": 2.3262,
      "step": 62547
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.03642737865448,
      "learning_rate": 1.6731002543840502e-06,
      "loss": 2.2461,
      "step": 62548
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0333189964294434,
      "learning_rate": 1.672872266544856e-06,
      "loss": 2.358,
      "step": 62549
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1444034576416016,
      "learning_rate": 1.6726442928224074e-06,
      "loss": 2.4067,
      "step": 62550
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.2444579601287842,
      "learning_rate": 1.6724163332170862e-06,
      "loss": 2.3872,
      "step": 62551
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.173388123512268,
      "learning_rate": 1.6721883877292833e-06,
      "loss": 2.4361,
      "step": 62552
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1221182346343994,
      "learning_rate": 1.6719604563593806e-06,
      "loss": 2.3751,
      "step": 62553
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.161819577217102,
      "learning_rate": 1.6717325391077688e-06,
      "loss": 2.2016,
      "step": 62554
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.106637716293335,
      "learning_rate": 1.6715046359748299e-06,
      "loss": 2.3135,
      "step": 62555
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.2652614116668701,
      "learning_rate": 1.6712767469609558e-06,
      "loss": 2.6486,
      "step": 62556
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0250825881958008,
      "learning_rate": 1.6710488720665264e-06,
      "loss": 2.1681,
      "step": 62557
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0556262731552124,
      "learning_rate": 1.6708210112919344e-06,
      "loss": 2.3128,
      "step": 62558
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0884376764297485,
      "learning_rate": 1.6705931646375629e-06,
      "loss": 2.2473,
      "step": 62559
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1247698068618774,
      "learning_rate": 1.670365332103797e-06,
      "loss": 2.2411,
      "step": 62560
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.205075740814209,
      "learning_rate": 1.6701375136910236e-06,
      "loss": 2.4194,
      "step": 62561
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0823975801467896,
      "learning_rate": 1.6699097093996308e-06,
      "loss": 2.2369,
      "step": 62562
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.935485303401947,
      "learning_rate": 1.6696819192300006e-06,
      "loss": 2.2019,
      "step": 62563
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.110572099685669,
      "learning_rate": 1.669454143182525e-06,
      "loss": 2.4201,
      "step": 62564
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1321079730987549,
      "learning_rate": 1.6692263812575881e-06,
      "loss": 2.2302,
      "step": 62565
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0978156328201294,
      "learning_rate": 1.6689986334555718e-06,
      "loss": 2.3753,
      "step": 62566
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1846033334732056,
      "learning_rate": 1.668770899776867e-06,
      "loss": 2.4852,
      "step": 62567
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1513599157333374,
      "learning_rate": 1.6685431802218578e-06,
      "loss": 2.5193,
      "step": 62568
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.011767864227295,
      "learning_rate": 1.6683154747909314e-06,
      "loss": 2.1326,
      "step": 62569
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1542088985443115,
      "learning_rate": 1.668087783484471e-06,
      "loss": 2.3148,
      "step": 62570
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1026338338851929,
      "learning_rate": 1.6678601063028677e-06,
      "loss": 2.4351,
      "step": 62571
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1247068643569946,
      "learning_rate": 1.6676324432465018e-06,
      "loss": 2.3583,
      "step": 62572
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.061522126197815,
      "learning_rate": 1.6674047943157668e-06,
      "loss": 2.3574,
      "step": 62573
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0246797800064087,
      "learning_rate": 1.6671771595110397e-06,
      "loss": 2.2992,
      "step": 62574
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.9834758639335632,
      "learning_rate": 1.6669495388327117e-06,
      "loss": 2.4434,
      "step": 62575
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0959917306900024,
      "learning_rate": 1.6667219322811656e-06,
      "loss": 2.3892,
      "step": 62576
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1970105171203613,
      "learning_rate": 1.6664943398567911e-06,
      "loss": 2.5339,
      "step": 62577
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0990458726882935,
      "learning_rate": 1.6662667615599704e-06,
      "loss": 2.1032,
      "step": 62578
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0169472694396973,
      "learning_rate": 1.6660391973910927e-06,
      "loss": 2.3928,
      "step": 62579
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.185335397720337,
      "learning_rate": 1.6658116473505404e-06,
      "loss": 2.2729,
      "step": 62580
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0879548788070679,
      "learning_rate": 1.6655841114387028e-06,
      "loss": 2.2258,
      "step": 62581
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0839093923568726,
      "learning_rate": 1.6653565896559609e-06,
      "loss": 2.2463,
      "step": 62582
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.263981580734253,
      "learning_rate": 1.6651290820027067e-06,
      "loss": 2.3894,
      "step": 62583
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0808355808258057,
      "learning_rate": 1.6649015884793186e-06,
      "loss": 2.2992,
      "step": 62584
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.3765900135040283,
      "learning_rate": 1.6646741090861895e-06,
      "loss": 2.3071,
      "step": 62585
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.036597490310669,
      "learning_rate": 1.6644466438237016e-06,
      "loss": 2.3566,
      "step": 62586
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0772030353546143,
      "learning_rate": 1.6642191926922403e-06,
      "loss": 2.3468,
      "step": 62587
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.742199659347534,
      "learning_rate": 1.663991755692189e-06,
      "loss": 2.3047,
      "step": 62588
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.3395482301712036,
      "learning_rate": 1.6637643328239384e-06,
      "loss": 2.1289,
      "step": 62589
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0200543403625488,
      "learning_rate": 1.6635369240878684e-06,
      "loss": 2.5459,
      "step": 62590
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1710283756256104,
      "learning_rate": 1.6633095294843705e-06,
      "loss": 2.2869,
      "step": 62591
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0222272872924805,
      "learning_rate": 1.6630821490138237e-06,
      "loss": 2.3886,
      "step": 62592
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0341496467590332,
      "learning_rate": 1.6628547826766206e-06,
      "loss": 2.4884,
      "step": 62593
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.2561702728271484,
      "learning_rate": 1.6626274304731426e-06,
      "loss": 2.381,
      "step": 62594
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0820916891098022,
      "learning_rate": 1.662400092403773e-06,
      "loss": 2.2263,
      "step": 62595
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0662676095962524,
      "learning_rate": 1.6621727684689016e-06,
      "loss": 2.2355,
      "step": 62596
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.979387640953064,
      "learning_rate": 1.6619454586689098e-06,
      "loss": 2.3474,
      "step": 62597
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1520020961761475,
      "learning_rate": 1.6617181630041878e-06,
      "loss": 2.2837,
      "step": 62598
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.07976496219635,
      "learning_rate": 1.6614908814751162e-06,
      "loss": 2.3419,
      "step": 62599
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1368951797485352,
      "learning_rate": 1.661263614082086e-06,
      "loss": 2.146,
      "step": 62600
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1402575969696045,
      "learning_rate": 1.6610363608254743e-06,
      "loss": 2.357,
      "step": 62601
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.9325919151306152,
      "learning_rate": 1.6608091217056733e-06,
      "loss": 2.2789,
      "step": 62602
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.08584463596344,
      "learning_rate": 1.6605818967230635e-06,
      "loss": 2.4031,
      "step": 62603
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.7001714706420898,
      "learning_rate": 1.660354685878034e-06,
      "loss": 2.2639,
      "step": 62604
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0771557092666626,
      "learning_rate": 1.6601274891709673e-06,
      "loss": 2.1413,
      "step": 62605
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.9500178694725037,
      "learning_rate": 1.6599003066022513e-06,
      "loss": 2.086,
      "step": 62606
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1000208854675293,
      "learning_rate": 1.6596731381722663e-06,
      "loss": 2.2305,
      "step": 62607
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.2268731594085693,
      "learning_rate": 1.6594459838814036e-06,
      "loss": 2.3737,
      "step": 62608
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1181695461273193,
      "learning_rate": 1.6592188437300427e-06,
      "loss": 2.3364,
      "step": 62609
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1857163906097412,
      "learning_rate": 1.6589917177185732e-06,
      "loss": 2.2489,
      "step": 62610
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0496416091918945,
      "learning_rate": 1.6587646058473761e-06,
      "loss": 2.4734,
      "step": 62611
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0480451583862305,
      "learning_rate": 1.658537508116841e-06,
      "loss": 2.4871,
      "step": 62612
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.062927484512329,
      "learning_rate": 1.6583104245273506e-06,
      "loss": 2.2717,
      "step": 62613
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.2751213312149048,
      "learning_rate": 1.6580833550792886e-06,
      "loss": 2.2251,
      "step": 62614
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.2738078832626343,
      "learning_rate": 1.6578562997730396e-06,
      "loss": 2.3992,
      "step": 62615
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0381073951721191,
      "learning_rate": 1.6576292586089916e-06,
      "loss": 2.4536,
      "step": 62616
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.9664440751075745,
      "learning_rate": 1.657402231587526e-06,
      "loss": 2.2644,
      "step": 62617
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1230201721191406,
      "learning_rate": 1.6571752187090306e-06,
      "loss": 2.2535,
      "step": 62618
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1060688495635986,
      "learning_rate": 1.6569482199738885e-06,
      "loss": 2.5407,
      "step": 62619
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.15256929397583,
      "learning_rate": 1.6567212353824858e-06,
      "loss": 2.1102,
      "step": 62620
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.2175403833389282,
      "learning_rate": 1.656494264935208e-06,
      "loss": 2.4196,
      "step": 62621
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.4999651908874512,
      "learning_rate": 1.6562673086324354e-06,
      "loss": 2.2598,
      "step": 62622
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.9953174591064453,
      "learning_rate": 1.6560403664745583e-06,
      "loss": 2.0942,
      "step": 62623
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.028147578239441,
      "learning_rate": 1.6558134384619562e-06,
      "loss": 2.2274,
      "step": 62624
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0547257661819458,
      "learning_rate": 1.6555865245950187e-06,
      "loss": 2.2714,
      "step": 62625
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.9795587658882141,
      "learning_rate": 1.6553596248741299e-06,
      "loss": 2.2925,
      "step": 62626
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1175286769866943,
      "learning_rate": 1.655132739299672e-06,
      "loss": 2.3314,
      "step": 62627
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.081601619720459,
      "learning_rate": 1.6549058678720276e-06,
      "loss": 2.2741,
      "step": 62628
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.058395504951477,
      "learning_rate": 1.6546790105915866e-06,
      "loss": 2.2033,
      "step": 62629
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0632381439208984,
      "learning_rate": 1.65445216745873e-06,
      "loss": 2.1883,
      "step": 62630
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.114145040512085,
      "learning_rate": 1.6542253384738448e-06,
      "loss": 2.4344,
      "step": 62631
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1085494756698608,
      "learning_rate": 1.6539985236373125e-06,
      "loss": 2.3847,
      "step": 62632
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1667627096176147,
      "learning_rate": 1.653771722949522e-06,
      "loss": 2.158,
      "step": 62633
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.9991733431816101,
      "learning_rate": 1.6535449364108524e-06,
      "loss": 2.2819,
      "step": 62634
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.105330228805542,
      "learning_rate": 1.6533181640216944e-06,
      "loss": 2.4119,
      "step": 62635
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0596091747283936,
      "learning_rate": 1.6530914057824255e-06,
      "loss": 2.2646,
      "step": 62636
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1113697290420532,
      "learning_rate": 1.6528646616934363e-06,
      "loss": 2.4846,
      "step": 62637
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1553601026535034,
      "learning_rate": 1.6526379317551067e-06,
      "loss": 2.2126,
      "step": 62638
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0502302646636963,
      "learning_rate": 1.6524112159678274e-06,
      "loss": 2.2477,
      "step": 62639
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0938643217086792,
      "learning_rate": 1.6521845143319738e-06,
      "loss": 2.2039,
      "step": 62640
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0891927480697632,
      "learning_rate": 1.6519578268479364e-06,
      "loss": 2.3679,
      "step": 62641
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0310636758804321,
      "learning_rate": 1.651731153516095e-06,
      "loss": 2.2366,
      "step": 62642
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1297485828399658,
      "learning_rate": 1.6515044943368408e-06,
      "loss": 2.387,
      "step": 62643
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1321485042572021,
      "learning_rate": 1.6512778493105498e-06,
      "loss": 2.1847,
      "step": 62644
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0470935106277466,
      "learning_rate": 1.6510512184376137e-06,
      "loss": 2.3537,
      "step": 62645
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.131973385810852,
      "learning_rate": 1.6508246017184105e-06,
      "loss": 2.5381,
      "step": 62646
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0980627536773682,
      "learning_rate": 1.6505979991533294e-06,
      "loss": 2.6247,
      "step": 62647
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0930073261260986,
      "learning_rate": 1.6503714107427526e-06,
      "loss": 2.2657,
      "step": 62648
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.189742922782898,
      "learning_rate": 1.650144836487062e-06,
      "loss": 2.2546,
      "step": 62649
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.064244270324707,
      "learning_rate": 1.649918276386646e-06,
      "loss": 2.1186,
      "step": 62650
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0993677377700806,
      "learning_rate": 1.6496917304418837e-06,
      "loss": 2.3901,
      "step": 62651
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1166999340057373,
      "learning_rate": 1.649465198653164e-06,
      "loss": 2.3031,
      "step": 62652
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.9898236989974976,
      "learning_rate": 1.6492386810208695e-06,
      "loss": 2.2527,
      "step": 62653
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1367592811584473,
      "learning_rate": 1.6490121775453804e-06,
      "loss": 2.3171,
      "step": 62654
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.162293553352356,
      "learning_rate": 1.6487856882270859e-06,
      "loss": 2.495,
      "step": 62655
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.2304503917694092,
      "learning_rate": 1.6485592130663686e-06,
      "loss": 2.347,
      "step": 62656
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0643678903579712,
      "learning_rate": 1.6483327520636083e-06,
      "loss": 2.3017,
      "step": 62657
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.25244140625,
      "learning_rate": 1.6481063052191959e-06,
      "loss": 2.3252,
      "step": 62658
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.6688343286514282,
      "learning_rate": 1.6478798725335088e-06,
      "loss": 1.9072,
      "step": 62659
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.134996771812439,
      "learning_rate": 1.6476534540069356e-06,
      "loss": 2.3449,
      "step": 62660
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0805097818374634,
      "learning_rate": 1.647427049639856e-06,
      "loss": 2.5211,
      "step": 62661
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0923211574554443,
      "learning_rate": 1.6472006594326583e-06,
      "loss": 2.3601,
      "step": 62662
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0565019845962524,
      "learning_rate": 1.6469742833857228e-06,
      "loss": 2.2459,
      "step": 62663
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1034479141235352,
      "learning_rate": 1.6467479214994364e-06,
      "loss": 2.0857,
      "step": 62664
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.041736125946045,
      "learning_rate": 1.646521573774178e-06,
      "loss": 2.136,
      "step": 62665
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0739048719406128,
      "learning_rate": 1.6462952402103394e-06,
      "loss": 2.2406,
      "step": 62666
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.239638090133667,
      "learning_rate": 1.6460689208082947e-06,
      "loss": 2.1966,
      "step": 62667
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.2164223194122314,
      "learning_rate": 1.6458426155684337e-06,
      "loss": 2.3109,
      "step": 62668
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0320639610290527,
      "learning_rate": 1.6456163244911371e-06,
      "loss": 2.1552,
      "step": 62669
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1420061588287354,
      "learning_rate": 1.6453900475767915e-06,
      "loss": 2.2483,
      "step": 62670
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1217894554138184,
      "learning_rate": 1.6451637848257774e-06,
      "loss": 2.3768,
      "step": 62671
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.2012698650360107,
      "learning_rate": 1.6449375362384812e-06,
      "loss": 2.0728,
      "step": 62672
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0324115753173828,
      "learning_rate": 1.6447113018152827e-06,
      "loss": 2.407,
      "step": 62673
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0772582292556763,
      "learning_rate": 1.6444850815565704e-06,
      "loss": 2.4097,
      "step": 62674
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.2601757049560547,
      "learning_rate": 1.6442588754627232e-06,
      "loss": 2.4343,
      "step": 62675
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.9768235087394714,
      "learning_rate": 1.6440326835341292e-06,
      "loss": 2.538,
      "step": 62676
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0639963150024414,
      "learning_rate": 1.6438065057711683e-06,
      "loss": 2.3937,
      "step": 62677
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0865217447280884,
      "learning_rate": 1.6435803421742225e-06,
      "loss": 2.2165,
      "step": 62678
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.3318318128585815,
      "learning_rate": 1.6433541927436802e-06,
      "loss": 2.2833,
      "step": 62679
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.19544517993927,
      "learning_rate": 1.6431280574799236e-06,
      "loss": 2.5347,
      "step": 62680
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.228955864906311,
      "learning_rate": 1.642901936383331e-06,
      "loss": 2.1844,
      "step": 62681
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1051265001296997,
      "learning_rate": 1.6426758294542911e-06,
      "loss": 2.2736,
      "step": 62682
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1592868566513062,
      "learning_rate": 1.642449736693187e-06,
      "loss": 2.3953,
      "step": 62683
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.04412043094635,
      "learning_rate": 1.6422236581003969e-06,
      "loss": 2.5332,
      "step": 62684
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.079338788986206,
      "learning_rate": 1.64199759367631e-06,
      "loss": 2.3294,
      "step": 62685
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.2188940048217773,
      "learning_rate": 1.6417715434213056e-06,
      "loss": 2.1535,
      "step": 62686
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0477880239486694,
      "learning_rate": 1.64154550733577e-06,
      "loss": 2.4964,
      "step": 62687
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.065092921257019,
      "learning_rate": 1.6413194854200832e-06,
      "loss": 2.4116,
      "step": 62688
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.99607914686203,
      "learning_rate": 1.6410934776746323e-06,
      "loss": 2.4819,
      "step": 62689
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1369236707687378,
      "learning_rate": 1.6408674840997962e-06,
      "loss": 2.4177,
      "step": 62690
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0240676403045654,
      "learning_rate": 1.6406415046959623e-06,
      "loss": 2.3788,
      "step": 62691
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0073777437210083,
      "learning_rate": 1.6404155394635101e-06,
      "loss": 2.2745,
      "step": 62692
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.052780032157898,
      "learning_rate": 1.640189588402825e-06,
      "loss": 2.4649,
      "step": 62693
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.106825590133667,
      "learning_rate": 1.639963651514287e-06,
      "loss": 2.3998,
      "step": 62694
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0584003925323486,
      "learning_rate": 1.639737728798283e-06,
      "loss": 2.3234,
      "step": 62695
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.071509599685669,
      "learning_rate": 1.6395118202551928e-06,
      "loss": 2.1861,
      "step": 62696
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1541072130203247,
      "learning_rate": 1.639285925885402e-06,
      "loss": 2.2306,
      "step": 62697
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1261414289474487,
      "learning_rate": 1.6390600456892901e-06,
      "loss": 2.1665,
      "step": 62698
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0379326343536377,
      "learning_rate": 1.6388341796672446e-06,
      "loss": 2.3812,
      "step": 62699
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1479769945144653,
      "learning_rate": 1.638608327819644e-06,
      "loss": 2.4004,
      "step": 62700
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.2331756353378296,
      "learning_rate": 1.6383824901468758e-06,
      "loss": 2.1736,
      "step": 62701
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0794627666473389,
      "learning_rate": 1.6381566666493166e-06,
      "loss": 2.3358,
      "step": 62702
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.054582953453064,
      "learning_rate": 1.6379308573273567e-06,
      "loss": 2.3864,
      "step": 62703
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0507361888885498,
      "learning_rate": 1.6377050621813738e-06,
      "loss": 2.1257,
      "step": 62704
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.052292823791504,
      "learning_rate": 1.6374792812117535e-06,
      "loss": 2.284,
      "step": 62705
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1526950597763062,
      "learning_rate": 1.637253514418874e-06,
      "loss": 2.3311,
      "step": 62706
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0603148937225342,
      "learning_rate": 1.6370277618031227e-06,
      "loss": 2.0353,
      "step": 62707
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0534178018569946,
      "learning_rate": 1.6368020233648795e-06,
      "loss": 2.2423,
      "step": 62708
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1719684600830078,
      "learning_rate": 1.6365762991045298e-06,
      "loss": 2.568,
      "step": 62709
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0166964530944824,
      "learning_rate": 1.6363505890224529e-06,
      "loss": 1.9803,
      "step": 62710
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.9699150323867798,
      "learning_rate": 1.6361248931190355e-06,
      "loss": 2.3421,
      "step": 62711
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0524399280548096,
      "learning_rate": 1.6358992113946581e-06,
      "loss": 2.4029,
      "step": 62712
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.211692214012146,
      "learning_rate": 1.6356735438497006e-06,
      "loss": 2.3465,
      "step": 62713
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.166615605354309,
      "learning_rate": 1.6354478904845506e-06,
      "loss": 2.4261,
      "step": 62714
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.09371018409729,
      "learning_rate": 1.6352222512995864e-06,
      "loss": 2.2367,
      "step": 62715
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1074248552322388,
      "learning_rate": 1.6349966262951934e-06,
      "loss": 2.4864,
      "step": 62716
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0300087928771973,
      "learning_rate": 1.6347710154717512e-06,
      "loss": 2.533,
      "step": 62717
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1053560972213745,
      "learning_rate": 1.6345454188296495e-06,
      "loss": 2.4093,
      "step": 62718
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0561999082565308,
      "learning_rate": 1.6343198363692603e-06,
      "loss": 2.3026,
      "step": 62719
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0688989162445068,
      "learning_rate": 1.6340942680909734e-06,
      "loss": 2.104,
      "step": 62720
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.057762861251831,
      "learning_rate": 1.633868713995166e-06,
      "loss": 2.2854,
      "step": 62721
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1070094108581543,
      "learning_rate": 1.6336431740822268e-06,
      "loss": 2.504,
      "step": 62722
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1492352485656738,
      "learning_rate": 1.633417648352531e-06,
      "loss": 2.4321,
      "step": 62723
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1693778038024902,
      "learning_rate": 1.6331921368064674e-06,
      "loss": 2.3468,
      "step": 62724
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.9924820065498352,
      "learning_rate": 1.6329666394444133e-06,
      "loss": 2.2934,
      "step": 62725
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.9808425307273865,
      "learning_rate": 1.6327411562667561e-06,
      "loss": 2.1691,
      "step": 62726
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.272724986076355,
      "learning_rate": 1.6325156872738723e-06,
      "loss": 2.4131,
      "step": 62727
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.2035329341888428,
      "learning_rate": 1.6322902324661493e-06,
      "loss": 2.1895,
      "step": 62728
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.100154161453247,
      "learning_rate": 1.6320647918439647e-06,
      "loss": 2.456,
      "step": 62729
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0423489809036255,
      "learning_rate": 1.631839365407706e-06,
      "loss": 2.4365,
      "step": 62730
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1101348400115967,
      "learning_rate": 1.6316139531577513e-06,
      "loss": 2.3093,
      "step": 62731
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1416970491409302,
      "learning_rate": 1.631388555094484e-06,
      "loss": 2.3974,
      "step": 62732
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0450164079666138,
      "learning_rate": 1.631163171218284e-06,
      "loss": 2.2446,
      "step": 62733
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1451138257980347,
      "learning_rate": 1.6309378015295374e-06,
      "loss": 2.2887,
      "step": 62734
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0334798097610474,
      "learning_rate": 1.6307124460286216e-06,
      "loss": 2.2078,
      "step": 62735
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0271005630493164,
      "learning_rate": 1.6304871047159242e-06,
      "loss": 2.1016,
      "step": 62736
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.101626992225647,
      "learning_rate": 1.6302617775918217e-06,
      "loss": 2.2502,
      "step": 62737
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0963088274002075,
      "learning_rate": 1.6300364646567012e-06,
      "loss": 2.3494,
      "step": 62738
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0864591598510742,
      "learning_rate": 1.6298111659109416e-06,
      "loss": 2.2193,
      "step": 62739
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1337358951568604,
      "learning_rate": 1.6295858813549237e-06,
      "loss": 2.1354,
      "step": 62740
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1695410013198853,
      "learning_rate": 1.6293606109890336e-06,
      "loss": 2.4999,
      "step": 62741
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0311856269836426,
      "learning_rate": 1.629135354813648e-06,
      "loss": 2.1958,
      "step": 62742
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.9676908850669861,
      "learning_rate": 1.628910112829153e-06,
      "loss": 2.2771,
      "step": 62743
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.136315941810608,
      "learning_rate": 1.6286848850359272e-06,
      "loss": 2.207,
      "step": 62744
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.382149338722229,
      "learning_rate": 1.6284596714343592e-06,
      "loss": 2.0585,
      "step": 62745
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0304958820343018,
      "learning_rate": 1.628234472024821e-06,
      "loss": 2.2965,
      "step": 62746
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.9746502041816711,
      "learning_rate": 1.6280092868077013e-06,
      "loss": 2.2705,
      "step": 62747
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1577732563018799,
      "learning_rate": 1.6277841157833773e-06,
      "loss": 2.2097,
      "step": 62748
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0505967140197754,
      "learning_rate": 1.6275589589522356e-06,
      "loss": 2.3315,
      "step": 62749
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.141161322593689,
      "learning_rate": 1.6273338163146523e-06,
      "loss": 2.4745,
      "step": 62750
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0904619693756104,
      "learning_rate": 1.6271086878710151e-06,
      "loss": 2.2021,
      "step": 62751
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.071921706199646,
      "learning_rate": 1.6268835736217004e-06,
      "loss": 2.2336,
      "step": 62752
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0693079233169556,
      "learning_rate": 1.6266584735670954e-06,
      "loss": 2.2584,
      "step": 62753
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.149936556816101,
      "learning_rate": 1.6264333877075744e-06,
      "loss": 2.3819,
      "step": 62754
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.2404329776763916,
      "learning_rate": 1.626208316043526e-06,
      "loss": 2.3521,
      "step": 62755
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.9820091128349304,
      "learning_rate": 1.6259832585753266e-06,
      "loss": 2.434,
      "step": 62756
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0669769048690796,
      "learning_rate": 1.6257582153033624e-06,
      "loss": 2.2728,
      "step": 62757
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0910224914550781,
      "learning_rate": 1.6255331862280122e-06,
      "loss": 2.3694,
      "step": 62758
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.9888736009597778,
      "learning_rate": 1.6253081713496578e-06,
      "loss": 2.3953,
      "step": 62759
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.9985358715057373,
      "learning_rate": 1.6250831706686788e-06,
      "loss": 2.4211,
      "step": 62760
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0876095294952393,
      "learning_rate": 1.6248581841854604e-06,
      "loss": 2.401,
      "step": 62761
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0630717277526855,
      "learning_rate": 1.6246332119003795e-06,
      "loss": 2.2321,
      "step": 62762
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1416985988616943,
      "learning_rate": 1.6244082538138217e-06,
      "loss": 2.5826,
      "step": 62763
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0665267705917358,
      "learning_rate": 1.624183309926165e-06,
      "loss": 2.2154,
      "step": 62764
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0506620407104492,
      "learning_rate": 1.6239583802377957e-06,
      "loss": 2.3407,
      "step": 62765
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.10958993434906,
      "learning_rate": 1.623733464749091e-06,
      "loss": 2.2481,
      "step": 62766
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.9734976887702942,
      "learning_rate": 1.6235085634604298e-06,
      "loss": 2.326,
      "step": 62767
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1479359865188599,
      "learning_rate": 1.6232836763722003e-06,
      "loss": 2.1437,
      "step": 62768
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.005708932876587,
      "learning_rate": 1.6230588034847772e-06,
      "loss": 2.2911,
      "step": 62769
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.078275203704834,
      "learning_rate": 1.6228339447985463e-06,
      "loss": 2.1527,
      "step": 62770
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1062721014022827,
      "learning_rate": 1.6226091003138877e-06,
      "loss": 2.4543,
      "step": 62771
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0424607992172241,
      "learning_rate": 1.6223842700311797e-06,
      "loss": 2.4128,
      "step": 62772
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.11640202999115,
      "learning_rate": 1.622159453950808e-06,
      "loss": 2.2235,
      "step": 62773
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.9929715991020203,
      "learning_rate": 1.6219346520731517e-06,
      "loss": 2.2389,
      "step": 62774
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.3178489208221436,
      "learning_rate": 1.6217098643985884e-06,
      "loss": 2.4018,
      "step": 62775
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1051692962646484,
      "learning_rate": 1.621485090927506e-06,
      "loss": 2.2737,
      "step": 62776
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0927261114120483,
      "learning_rate": 1.621260331660278e-06,
      "loss": 2.1246,
      "step": 62777
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1416246891021729,
      "learning_rate": 1.6210355865972937e-06,
      "loss": 2.3302,
      "step": 62778
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1536643505096436,
      "learning_rate": 1.6208108557389258e-06,
      "loss": 2.3454,
      "step": 62779
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.023527979850769,
      "learning_rate": 1.6205861390855626e-06,
      "loss": 2.0846,
      "step": 62780
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.064598798751831,
      "learning_rate": 1.6203614366375787e-06,
      "loss": 2.2595,
      "step": 62781
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0905050039291382,
      "learning_rate": 1.62013674839536e-06,
      "loss": 2.2338,
      "step": 62782
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.171215534210205,
      "learning_rate": 1.6199120743592844e-06,
      "loss": 2.4904,
      "step": 62783
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0443540811538696,
      "learning_rate": 1.619687414529738e-06,
      "loss": 2.2226,
      "step": 62784
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1101845502853394,
      "learning_rate": 1.6194627689070918e-06,
      "loss": 2.2192,
      "step": 62785
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.116083025932312,
      "learning_rate": 1.6192381374917355e-06,
      "loss": 2.3917,
      "step": 62786
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1278696060180664,
      "learning_rate": 1.619013520284045e-06,
      "loss": 2.2539,
      "step": 62787
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0578389167785645,
      "learning_rate": 1.618788917284404e-06,
      "loss": 2.1065,
      "step": 62788
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0494855642318726,
      "learning_rate": 1.6185643284931907e-06,
      "loss": 2.0405,
      "step": 62789
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0027446746826172,
      "learning_rate": 1.6183397539107893e-06,
      "loss": 2.2465,
      "step": 62790
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.184581995010376,
      "learning_rate": 1.618115193537576e-06,
      "loss": 2.4024,
      "step": 62791
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1258374452590942,
      "learning_rate": 1.6178906473739365e-06,
      "loss": 2.2914,
      "step": 62792
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0445343255996704,
      "learning_rate": 1.6176661154202454e-06,
      "loss": 2.3012,
      "step": 62793
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1470680236816406,
      "learning_rate": 1.617441597676891e-06,
      "loss": 2.1577,
      "step": 62794
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.2218468189239502,
      "learning_rate": 1.6172170941442488e-06,
      "loss": 2.4405,
      "step": 62795
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0016525983810425,
      "learning_rate": 1.6169926048226981e-06,
      "loss": 2.1775,
      "step": 62796
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.2244917154312134,
      "learning_rate": 1.6167681297126237e-06,
      "loss": 2.3282,
      "step": 62797
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0238571166992188,
      "learning_rate": 1.6165436688144043e-06,
      "loss": 2.261,
      "step": 62798
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0991687774658203,
      "learning_rate": 1.6163192221284186e-06,
      "loss": 2.3623,
      "step": 62799
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0607713460922241,
      "learning_rate": 1.6160947896550505e-06,
      "loss": 2.5123,
      "step": 62800
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1900049448013306,
      "learning_rate": 1.6158703713946789e-06,
      "loss": 2.2049,
      "step": 62801
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1357691287994385,
      "learning_rate": 1.615645967347682e-06,
      "loss": 2.2402,
      "step": 62802
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0766358375549316,
      "learning_rate": 1.6154215775144445e-06,
      "loss": 2.3018,
      "step": 62803
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.027600884437561,
      "learning_rate": 1.6151972018953421e-06,
      "loss": 2.3256,
      "step": 62804
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.2170366048812866,
      "learning_rate": 1.6149728404907605e-06,
      "loss": 2.1006,
      "step": 62805
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.9797526001930237,
      "learning_rate": 1.614748493301075e-06,
      "loss": 2.2497,
      "step": 62806
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1492555141448975,
      "learning_rate": 1.6145241603266705e-06,
      "loss": 2.5019,
      "step": 62807
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0184807777404785,
      "learning_rate": 1.6142998415679224e-06,
      "loss": 2.1782,
      "step": 62808
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1598834991455078,
      "learning_rate": 1.614075537025216e-06,
      "loss": 2.3961,
      "step": 62809
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.169582486152649,
      "learning_rate": 1.6138512466989264e-06,
      "loss": 2.1938,
      "step": 62810
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0618337392807007,
      "learning_rate": 1.6136269705894425e-06,
      "loss": 2.5274,
      "step": 62811
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0836186408996582,
      "learning_rate": 1.6134027086971326e-06,
      "loss": 2.2885,
      "step": 62812
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0722168684005737,
      "learning_rate": 1.6131784610223856e-06,
      "loss": 2.4615,
      "step": 62813
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0826385021209717,
      "learning_rate": 1.6129542275655763e-06,
      "loss": 2.3203,
      "step": 62814
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0980175733566284,
      "learning_rate": 1.6127300083270903e-06,
      "loss": 2.4757,
      "step": 62815
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0690559148788452,
      "learning_rate": 1.6125058033073027e-06,
      "loss": 2.4745,
      "step": 62816
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.008008360862732,
      "learning_rate": 1.6122816125065977e-06,
      "loss": 2.2067,
      "step": 62817
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1218093633651733,
      "learning_rate": 1.6120574359253516e-06,
      "loss": 2.4088,
      "step": 62818
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0781599283218384,
      "learning_rate": 1.6118332735639487e-06,
      "loss": 2.4486,
      "step": 62819
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0582870244979858,
      "learning_rate": 1.611609125422764e-06,
      "loss": 2.2131,
      "step": 62820
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.006201982498169,
      "learning_rate": 1.6113849915021817e-06,
      "loss": 2.2399,
      "step": 62821
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.148215413093567,
      "learning_rate": 1.6111608718025806e-06,
      "loss": 2.1996,
      "step": 62822
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0749374628067017,
      "learning_rate": 1.610936766324338e-06,
      "loss": 2.3302,
      "step": 62823
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.16973078250885,
      "learning_rate": 1.6107126750678393e-06,
      "loss": 2.284,
      "step": 62824
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0809861421585083,
      "learning_rate": 1.6104885980334595e-06,
      "loss": 2.1053,
      "step": 62825
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1505670547485352,
      "learning_rate": 1.6102645352215784e-06,
      "loss": 2.353,
      "step": 62826
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.2164366245269775,
      "learning_rate": 1.6100404866325802e-06,
      "loss": 2.2174,
      "step": 62827
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0844807624816895,
      "learning_rate": 1.609816452266839e-06,
      "loss": 2.2227,
      "step": 62828
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.2571266889572144,
      "learning_rate": 1.6095924321247414e-06,
      "loss": 2.2103,
      "step": 62829
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0301405191421509,
      "learning_rate": 1.6093684262066623e-06,
      "loss": 2.1642,
      "step": 62830
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.10212242603302,
      "learning_rate": 1.6091444345129803e-06,
      "loss": 2.3634,
      "step": 62831
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1309338808059692,
      "learning_rate": 1.6089204570440798e-06,
      "loss": 2.3017,
      "step": 62832
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.9993051290512085,
      "learning_rate": 1.6086964938003358e-06,
      "loss": 2.2254,
      "step": 62833
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0654674768447876,
      "learning_rate": 1.6084725447821325e-06,
      "loss": 2.5073,
      "step": 62834
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0590494871139526,
      "learning_rate": 1.6082486099898454e-06,
      "loss": 2.3439,
      "step": 62835
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1483091115951538,
      "learning_rate": 1.6080246894238582e-06,
      "loss": 2.3261,
      "step": 62836
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0829694271087646,
      "learning_rate": 1.6078007830845477e-06,
      "loss": 2.0698,
      "step": 62837
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.983363687992096,
      "learning_rate": 1.6075768909722955e-06,
      "loss": 2.1277,
      "step": 62838
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0312786102294922,
      "learning_rate": 1.607353013087477e-06,
      "loss": 2.5545,
      "step": 62839
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.053420066833496,
      "learning_rate": 1.6071291494304763e-06,
      "loss": 2.3766,
      "step": 62840
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0703458786010742,
      "learning_rate": 1.6069053000016688e-06,
      "loss": 2.3206,
      "step": 62841
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0760966539382935,
      "learning_rate": 1.6066814648014394e-06,
      "loss": 2.4558,
      "step": 62842
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0717720985412598,
      "learning_rate": 1.6064576438301615e-06,
      "loss": 2.4194,
      "step": 62843
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.069982647895813,
      "learning_rate": 1.6062338370882202e-06,
      "loss": 2.36,
      "step": 62844
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.256798267364502,
      "learning_rate": 1.6060100445759908e-06,
      "loss": 2.3746,
      "step": 62845
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.15007746219635,
      "learning_rate": 1.605786266293855e-06,
      "loss": 2.4116,
      "step": 62846
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.2042237520217896,
      "learning_rate": 1.6055625022421905e-06,
      "loss": 2.2734,
      "step": 62847
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.2615996599197388,
      "learning_rate": 1.6053387524213782e-06,
      "loss": 2.5195,
      "step": 62848
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0254030227661133,
      "learning_rate": 1.6051150168317952e-06,
      "loss": 2.2209,
      "step": 62849
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.16122567653656,
      "learning_rate": 1.6048912954738272e-06,
      "loss": 2.2429,
      "step": 62850
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0547106266021729,
      "learning_rate": 1.6046675883478436e-06,
      "loss": 2.328,
      "step": 62851
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.111745834350586,
      "learning_rate": 1.6044438954542307e-06,
      "loss": 2.1847,
      "step": 62852
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1352323293685913,
      "learning_rate": 1.604220216793363e-06,
      "loss": 2.1,
      "step": 62853
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0763962268829346,
      "learning_rate": 1.6039965523656254e-06,
      "loss": 2.2632,
      "step": 62854
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1015632152557373,
      "learning_rate": 1.603772902171391e-06,
      "loss": 2.2508,
      "step": 62855
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.107047438621521,
      "learning_rate": 1.6035492662110442e-06,
      "loss": 2.3908,
      "step": 62856
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0001780986785889,
      "learning_rate": 1.6033256444849622e-06,
      "loss": 2.2506,
      "step": 62857
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.093991994857788,
      "learning_rate": 1.6031020369935213e-06,
      "loss": 2.0736,
      "step": 62858
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.188647985458374,
      "learning_rate": 1.6028784437371058e-06,
      "loss": 2.4805,
      "step": 62859
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0389240980148315,
      "learning_rate": 1.602654864716089e-06,
      "loss": 2.2665,
      "step": 62860
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1025627851486206,
      "learning_rate": 1.6024312999308556e-06,
      "loss": 2.1992,
      "step": 62861
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1345747709274292,
      "learning_rate": 1.6022077493817789e-06,
      "loss": 2.3919,
      "step": 62862
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1652874946594238,
      "learning_rate": 1.6019842130692465e-06,
      "loss": 2.1866,
      "step": 62863
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.3542295694351196,
      "learning_rate": 1.6017606909936268e-06,
      "loss": 2.3585,
      "step": 62864
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.114903450012207,
      "learning_rate": 1.6015371831553062e-06,
      "loss": 2.3073,
      "step": 62865
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1260929107666016,
      "learning_rate": 1.6013136895546577e-06,
      "loss": 2.4095,
      "step": 62866
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.378585696220398,
      "learning_rate": 1.6010902101920667e-06,
      "loss": 2.1928,
      "step": 62867
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0574452877044678,
      "learning_rate": 1.6008667450679062e-06,
      "loss": 2.1978,
      "step": 62868
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0656836032867432,
      "learning_rate": 1.60064329418256e-06,
      "loss": 2.4574,
      "step": 62869
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0971354246139526,
      "learning_rate": 1.6004198575364028e-06,
      "loss": 2.2712,
      "step": 62870
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1606577634811401,
      "learning_rate": 1.6001964351298172e-06,
      "loss": 2.2782,
      "step": 62871
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0857890844345093,
      "learning_rate": 1.5999730269631774e-06,
      "loss": 2.3666,
      "step": 62872
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0515424013137817,
      "learning_rate": 1.5997496330368678e-06,
      "loss": 2.4124,
      "step": 62873
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0924326181411743,
      "learning_rate": 1.5995262533512611e-06,
      "loss": 2.3843,
      "step": 62874
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0845621824264526,
      "learning_rate": 1.5993028879067419e-06,
      "loss": 2.3574,
      "step": 62875
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0842196941375732,
      "learning_rate": 1.599079536703686e-06,
      "loss": 2.2826,
      "step": 62876
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1943693161010742,
      "learning_rate": 1.5988561997424713e-06,
      "loss": 2.4098,
      "step": 62877
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1557077169418335,
      "learning_rate": 1.5986328770234749e-06,
      "loss": 2.3948,
      "step": 62878
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1371623277664185,
      "learning_rate": 1.5984095685470802e-06,
      "loss": 2.4173,
      "step": 62879
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.045580267906189,
      "learning_rate": 1.59818627431366e-06,
      "loss": 2.515,
      "step": 62880
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1256681680679321,
      "learning_rate": 1.5979629943235996e-06,
      "loss": 2.4144,
      "step": 62881
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1184560060501099,
      "learning_rate": 1.59773972857727e-06,
      "loss": 2.2354,
      "step": 62882
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0547475814819336,
      "learning_rate": 1.5975164770750574e-06,
      "loss": 2.3003,
      "step": 62883
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0233334302902222,
      "learning_rate": 1.5972932398173335e-06,
      "loss": 2.4588,
      "step": 62884
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1646788120269775,
      "learning_rate": 1.5970700168044816e-06,
      "loss": 2.4785,
      "step": 62885
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.027569055557251,
      "learning_rate": 1.5968468080368782e-06,
      "loss": 2.33,
      "step": 62886
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0076828002929688,
      "learning_rate": 1.5966236135149004e-06,
      "loss": 2.1226,
      "step": 62887
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1796923875808716,
      "learning_rate": 1.5964004332389294e-06,
      "loss": 2.3275,
      "step": 62888
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.103663444519043,
      "learning_rate": 1.59617726720934e-06,
      "loss": 2.3916,
      "step": 62889
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1414507627487183,
      "learning_rate": 1.5959541154265145e-06,
      "loss": 2.3624,
      "step": 62890
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.109434723854065,
      "learning_rate": 1.5957309778908293e-06,
      "loss": 2.2582,
      "step": 62891
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0927958488464355,
      "learning_rate": 1.5955078546026636e-06,
      "loss": 2.3035,
      "step": 62892
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0159047842025757,
      "learning_rate": 1.595284745562391e-06,
      "loss": 2.4944,
      "step": 62893
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.3158262968063354,
      "learning_rate": 1.5950616507703976e-06,
      "loss": 2.2804,
      "step": 62894
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.6477220058441162,
      "learning_rate": 1.594838570227053e-06,
      "loss": 2.2354,
      "step": 62895
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.3519952297210693,
      "learning_rate": 1.5946155039327437e-06,
      "loss": 2.398,
      "step": 62896
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0637080669403076,
      "learning_rate": 1.5943924518878406e-06,
      "loss": 2.4111,
      "step": 62897
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1808620691299438,
      "learning_rate": 1.5941694140927289e-06,
      "loss": 2.4397,
      "step": 62898
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.9875853657722473,
      "learning_rate": 1.5939463905477792e-06,
      "loss": 2.3094,
      "step": 62899
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1189548969268799,
      "learning_rate": 1.593723381253377e-06,
      "loss": 2.2373,
      "step": 62900
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1509026288986206,
      "learning_rate": 1.5935003862098941e-06,
      "loss": 2.3559,
      "step": 62901
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1085669994354248,
      "learning_rate": 1.5932774054177136e-06,
      "loss": 2.3945,
      "step": 62902
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1022740602493286,
      "learning_rate": 1.5930544388772108e-06,
      "loss": 2.5307,
      "step": 62903
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0378363132476807,
      "learning_rate": 1.592831486588764e-06,
      "loss": 2.3294,
      "step": 62904
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1823019981384277,
      "learning_rate": 1.5926085485527488e-06,
      "loss": 2.2474,
      "step": 62905
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0121041536331177,
      "learning_rate": 1.592385624769548e-06,
      "loss": 2.3868,
      "step": 62906
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.9668991565704346,
      "learning_rate": 1.5921627152395347e-06,
      "loss": 2.4776,
      "step": 62907
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.9615935683250427,
      "learning_rate": 1.591939819963092e-06,
      "loss": 2.371,
      "step": 62908
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.9629491567611694,
      "learning_rate": 1.5917169389405917e-06,
      "loss": 2.2954,
      "step": 62909
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0438770055770874,
      "learning_rate": 1.5914940721724182e-06,
      "loss": 2.4813,
      "step": 62910
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1323589086532593,
      "learning_rate": 1.5912712196589431e-06,
      "loss": 2.3282,
      "step": 62911
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.2321218252182007,
      "learning_rate": 1.5910483814005495e-06,
      "loss": 2.175,
      "step": 62912
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0784122943878174,
      "learning_rate": 1.5908255573976116e-06,
      "loss": 2.1915,
      "step": 62913
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.139037013053894,
      "learning_rate": 1.590602747650507e-06,
      "loss": 2.466,
      "step": 62914
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1664584875106812,
      "learning_rate": 1.5903799521596174e-06,
      "loss": 2.1359,
      "step": 62915
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0594006776809692,
      "learning_rate": 1.5901571709253171e-06,
      "loss": 2.448,
      "step": 62916
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1751075983047485,
      "learning_rate": 1.5899344039479825e-06,
      "loss": 2.4553,
      "step": 62917
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1401971578598022,
      "learning_rate": 1.5897116512279952e-06,
      "loss": 2.4116,
      "step": 62918
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0553171634674072,
      "learning_rate": 1.589488912765731e-06,
      "loss": 2.3244,
      "step": 62919
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1409622430801392,
      "learning_rate": 1.5892661885615656e-06,
      "loss": 2.4697,
      "step": 62920
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0770517587661743,
      "learning_rate": 1.5890434786158803e-06,
      "loss": 2.198,
      "step": 62921
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.034585952758789,
      "learning_rate": 1.5888207829290481e-06,
      "loss": 2.2164,
      "step": 62922
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.135405421257019,
      "learning_rate": 1.5885981015014517e-06,
      "loss": 2.3743,
      "step": 62923
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0807360410690308,
      "learning_rate": 1.5883754343334635e-06,
      "loss": 2.2625,
      "step": 62924
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.10142982006073,
      "learning_rate": 1.588152781425466e-06,
      "loss": 2.2233,
      "step": 62925
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0065641403198242,
      "learning_rate": 1.5879301427778316e-06,
      "loss": 2.3201,
      "step": 62926
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0061041116714478,
      "learning_rate": 1.587707518390943e-06,
      "loss": 2.19,
      "step": 62927
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.08771812915802,
      "learning_rate": 1.5874849082651723e-06,
      "loss": 2.4122,
      "step": 62928
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1593626737594604,
      "learning_rate": 1.5872623124009046e-06,
      "loss": 2.3788,
      "step": 62929
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1240687370300293,
      "learning_rate": 1.5870397307985064e-06,
      "loss": 2.2482,
      "step": 62930
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.9759745597839355,
      "learning_rate": 1.586817163458364e-06,
      "loss": 2.2959,
      "step": 62931
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0454175472259521,
      "learning_rate": 1.5865946103808494e-06,
      "loss": 1.967,
      "step": 62932
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0560752153396606,
      "learning_rate": 1.5863720715663434e-06,
      "loss": 2.2713,
      "step": 62933
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0461504459381104,
      "learning_rate": 1.58614954701522e-06,
      "loss": 2.2302,
      "step": 62934
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.239359736442566,
      "learning_rate": 1.5859270367278613e-06,
      "loss": 2.3749,
      "step": 62935
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0386370420455933,
      "learning_rate": 1.5857045407046379e-06,
      "loss": 2.3408,
      "step": 62936
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.9688398838043213,
      "learning_rate": 1.585482058945934e-06,
      "loss": 2.2493,
      "step": 62937
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1470450162887573,
      "learning_rate": 1.5852595914521197e-06,
      "loss": 2.5228,
      "step": 62938
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0914064645767212,
      "learning_rate": 1.5850371382235795e-06,
      "loss": 2.5474,
      "step": 62939
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0630158185958862,
      "learning_rate": 1.5848146992606862e-06,
      "loss": 2.6007,
      "step": 62940
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.100467324256897,
      "learning_rate": 1.5845922745638154e-06,
      "loss": 2.357,
      "step": 62941
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0532350540161133,
      "learning_rate": 1.5843698641333482e-06,
      "loss": 2.3379,
      "step": 62942
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0753343105316162,
      "learning_rate": 1.58414746796966e-06,
      "loss": 2.3022,
      "step": 62943
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0014621019363403,
      "learning_rate": 1.583925086073126e-06,
      "loss": 2.3149,
      "step": 62944
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1302828788757324,
      "learning_rate": 1.5837027184441255e-06,
      "loss": 2.3822,
      "step": 62945
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0432378053665161,
      "learning_rate": 1.5834803650830344e-06,
      "loss": 2.3404,
      "step": 62946
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.12661874294281,
      "learning_rate": 1.583258025990231e-06,
      "loss": 2.1892,
      "step": 62947
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1595823764801025,
      "learning_rate": 1.5830357011660912e-06,
      "loss": 2.1705,
      "step": 62948
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1325291395187378,
      "learning_rate": 1.5828133906109898e-06,
      "loss": 2.5145,
      "step": 62949
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.9547903537750244,
      "learning_rate": 1.5825910943253076e-06,
      "loss": 2.3458,
      "step": 62950
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1093664169311523,
      "learning_rate": 1.5823688123094182e-06,
      "loss": 2.3213,
      "step": 62951
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.2046064138412476,
      "learning_rate": 1.5821465445637018e-06,
      "loss": 2.3881,
      "step": 62952
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1478074789047241,
      "learning_rate": 1.5819242910885302e-06,
      "loss": 2.4101,
      "step": 62953
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1965727806091309,
      "learning_rate": 1.5817020518842863e-06,
      "loss": 2.4028,
      "step": 62954
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0454144477844238,
      "learning_rate": 1.581479826951342e-06,
      "loss": 2.2666,
      "step": 62955
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.9950733780860901,
      "learning_rate": 1.5812576162900796e-06,
      "loss": 2.4425,
      "step": 62956
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0131804943084717,
      "learning_rate": 1.5810354199008683e-06,
      "loss": 2.4765,
      "step": 62957
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.9723192453384399,
      "learning_rate": 1.5808132377840902e-06,
      "loss": 2.2363,
      "step": 62958
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0664796829223633,
      "learning_rate": 1.5805910699401173e-06,
      "loss": 2.2152,
      "step": 62959
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0175395011901855,
      "learning_rate": 1.5803689163693325e-06,
      "loss": 2.142,
      "step": 62960
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.216068983078003,
      "learning_rate": 1.580146777072107e-06,
      "loss": 2.4078,
      "step": 62961
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1273576021194458,
      "learning_rate": 1.579924652048821e-06,
      "loss": 2.1837,
      "step": 62962
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1453938484191895,
      "learning_rate": 1.5797025412998479e-06,
      "loss": 2.4456,
      "step": 62963
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0950886011123657,
      "learning_rate": 1.5794804448255673e-06,
      "loss": 2.234,
      "step": 62964
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.235592246055603,
      "learning_rate": 1.5792583626263536e-06,
      "loss": 2.379,
      "step": 62965
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0473459959030151,
      "learning_rate": 1.579036294702585e-06,
      "loss": 2.098,
      "step": 62966
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0393692255020142,
      "learning_rate": 1.5788142410546348e-06,
      "loss": 2.5619,
      "step": 62967
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1514660120010376,
      "learning_rate": 1.5785922016828847e-06,
      "loss": 2.6231,
      "step": 62968
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1020294427871704,
      "learning_rate": 1.578370176587708e-06,
      "loss": 2.2943,
      "step": 62969
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1730506420135498,
      "learning_rate": 1.5781481657694808e-06,
      "loss": 2.3601,
      "step": 62970
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.163704514503479,
      "learning_rate": 1.5779261692285775e-06,
      "loss": 2.2019,
      "step": 62971
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.213981032371521,
      "learning_rate": 1.5777041869653786e-06,
      "loss": 2.0348,
      "step": 62972
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0863596200942993,
      "learning_rate": 1.5774822189802564e-06,
      "loss": 2.3636,
      "step": 62973
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0901496410369873,
      "learning_rate": 1.5772602652735924e-06,
      "loss": 2.3309,
      "step": 62974
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.9712480306625366,
      "learning_rate": 1.577038325845759e-06,
      "loss": 2.4536,
      "step": 62975
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.04347562789917,
      "learning_rate": 1.5768164006971309e-06,
      "loss": 2.2159,
      "step": 62976
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.058568000793457,
      "learning_rate": 1.5765944898280894e-06,
      "loss": 2.4664,
      "step": 62977
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.9850310683250427,
      "learning_rate": 1.576372593239006e-06,
      "loss": 2.38,
      "step": 62978
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0781886577606201,
      "learning_rate": 1.5761507109302609e-06,
      "loss": 2.3849,
      "step": 62979
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1715728044509888,
      "learning_rate": 1.5759288429022257e-06,
      "loss": 2.3026,
      "step": 62980
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0724351406097412,
      "learning_rate": 1.5757069891552824e-06,
      "loss": 2.208,
      "step": 62981
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1982579231262207,
      "learning_rate": 1.5754851496898027e-06,
      "loss": 2.464,
      "step": 62982
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1260865926742554,
      "learning_rate": 1.5752633245061633e-06,
      "loss": 2.2464,
      "step": 62983
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1703003644943237,
      "learning_rate": 1.5750415136047393e-06,
      "loss": 2.4406,
      "step": 62984
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1904782056808472,
      "learning_rate": 1.5748197169859102e-06,
      "loss": 2.325,
      "step": 62985
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.016201138496399,
      "learning_rate": 1.5745979346500485e-06,
      "loss": 2.279,
      "step": 62986
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1127455234527588,
      "learning_rate": 1.574376166597532e-06,
      "loss": 2.106,
      "step": 62987
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0271778106689453,
      "learning_rate": 1.5741544128287357e-06,
      "loss": 2.4026,
      "step": 62988
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.9781585931777954,
      "learning_rate": 1.5739326733440374e-06,
      "loss": 2.5099,
      "step": 62989
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0303536653518677,
      "learning_rate": 1.5737109481438094e-06,
      "loss": 2.1445,
      "step": 62990
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.020517110824585,
      "learning_rate": 1.5734892372284328e-06,
      "loss": 2.1795,
      "step": 62991
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.2223284244537354,
      "learning_rate": 1.5732675405982778e-06,
      "loss": 2.3538,
      "step": 62992
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1888387203216553,
      "learning_rate": 1.5730458582537257e-06,
      "loss": 2.2242,
      "step": 62993
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0982022285461426,
      "learning_rate": 1.572824190195147e-06,
      "loss": 2.2554,
      "step": 62994
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0330960750579834,
      "learning_rate": 1.5726025364229247e-06,
      "loss": 2.4928,
      "step": 62995
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.9916142225265503,
      "learning_rate": 1.5723808969374255e-06,
      "loss": 1.9779,
      "step": 62996
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.158107042312622,
      "learning_rate": 1.5721592717390321e-06,
      "loss": 2.4464,
      "step": 62997
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.991909384727478,
      "learning_rate": 1.5719376608281144e-06,
      "loss": 2.3,
      "step": 62998
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1788280010223389,
      "learning_rate": 1.5717160642050544e-06,
      "loss": 2.4442,
      "step": 62999
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1644701957702637,
      "learning_rate": 1.5714944818702227e-06,
      "loss": 2.4852,
      "step": 63000
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0652718544006348,
      "learning_rate": 1.5712729138239991e-06,
      "loss": 2.4028,
      "step": 63001
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.119789719581604,
      "learning_rate": 1.5710513600667543e-06,
      "loss": 2.4155,
      "step": 63002
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.177295446395874,
      "learning_rate": 1.5708298205988703e-06,
      "loss": 2.328,
      "step": 63003
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0512031316757202,
      "learning_rate": 1.570608295420718e-06,
      "loss": 2.309,
      "step": 63004
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0276541709899902,
      "learning_rate": 1.5703867845326714e-06,
      "loss": 2.3335,
      "step": 63005
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.2156680822372437,
      "learning_rate": 1.5701652879351125e-06,
      "loss": 2.4428,
      "step": 63006
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1438359022140503,
      "learning_rate": 1.5699438056284088e-06,
      "loss": 2.2836,
      "step": 63007
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1107990741729736,
      "learning_rate": 1.5697223376129433e-06,
      "loss": 2.3199,
      "step": 63008
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0563448667526245,
      "learning_rate": 1.569500883889088e-06,
      "loss": 2.4121,
      "step": 63009
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.9954879283905029,
      "learning_rate": 1.569279444457219e-06,
      "loss": 2.2212,
      "step": 63010
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0106724500656128,
      "learning_rate": 1.5690580193177075e-06,
      "loss": 2.1897,
      "step": 63011
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0867303609848022,
      "learning_rate": 1.568836608470935e-06,
      "loss": 2.3444,
      "step": 63012
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1207642555236816,
      "learning_rate": 1.5686152119172726e-06,
      "loss": 2.4527,
      "step": 63013
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0663034915924072,
      "learning_rate": 1.5683938296571e-06,
      "loss": 2.3229,
      "step": 63014
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1666897535324097,
      "learning_rate": 1.5681724616907867e-06,
      "loss": 2.2068,
      "step": 63015
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0297967195510864,
      "learning_rate": 1.5679511080187137e-06,
      "loss": 2.0148,
      "step": 63016
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.9975084066390991,
      "learning_rate": 1.5677297686412508e-06,
      "loss": 2.2725,
      "step": 63017
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0847368240356445,
      "learning_rate": 1.5675084435587794e-06,
      "loss": 2.2993,
      "step": 63018
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0371582508087158,
      "learning_rate": 1.5672871327716687e-06,
      "loss": 2.3644,
      "step": 63019
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.2013078927993774,
      "learning_rate": 1.5670658362802992e-06,
      "loss": 2.393,
      "step": 63020
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1675209999084473,
      "learning_rate": 1.566844554085043e-06,
      "loss": 2.2647,
      "step": 63021
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0465384721755981,
      "learning_rate": 1.5666232861862762e-06,
      "loss": 2.2768,
      "step": 63022
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0342005491256714,
      "learning_rate": 1.566402032584371e-06,
      "loss": 2.0665,
      "step": 63023
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.2753956317901611,
      "learning_rate": 1.5661807932797068e-06,
      "loss": 2.0798,
      "step": 63024
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0656100511550903,
      "learning_rate": 1.5659595682726548e-06,
      "loss": 2.3855,
      "step": 63025
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.150373935699463,
      "learning_rate": 1.565738357563593e-06,
      "loss": 2.3024,
      "step": 63026
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0650908946990967,
      "learning_rate": 1.5655171611528951e-06,
      "loss": 2.3199,
      "step": 63027
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1184406280517578,
      "learning_rate": 1.5652959790409373e-06,
      "loss": 2.2243,
      "step": 63028
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1218352317810059,
      "learning_rate": 1.5650748112280923e-06,
      "loss": 2.3221,
      "step": 63029
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0500264167785645,
      "learning_rate": 1.5648536577147378e-06,
      "loss": 2.3294,
      "step": 63030
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.9867408871650696,
      "learning_rate": 1.5646325185012479e-06,
      "loss": 2.4602,
      "step": 63031
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0284932851791382,
      "learning_rate": 1.5644113935879945e-06,
      "loss": 2.3298,
      "step": 63032
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1233975887298584,
      "learning_rate": 1.5641902829753575e-06,
      "loss": 2.2095,
      "step": 63033
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1898293495178223,
      "learning_rate": 1.5639691866637064e-06,
      "loss": 2.3281,
      "step": 63034
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1068568229675293,
      "learning_rate": 1.5637481046534209e-06,
      "loss": 2.4643,
      "step": 63035
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1209369897842407,
      "learning_rate": 1.5635270369448741e-06,
      "loss": 2.422,
      "step": 63036
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.071750283241272,
      "learning_rate": 1.563305983538439e-06,
      "loss": 2.3363,
      "step": 63037
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.9352743625640869,
      "learning_rate": 1.563084944434491e-06,
      "loss": 2.1487,
      "step": 63038
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1185630559921265,
      "learning_rate": 1.5628639196334072e-06,
      "loss": 2.3337,
      "step": 63039
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0018985271453857,
      "learning_rate": 1.5626429091355577e-06,
      "loss": 2.1704,
      "step": 63040
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0022069215774536,
      "learning_rate": 1.562421912941322e-06,
      "loss": 2.4003,
      "step": 63041
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1370596885681152,
      "learning_rate": 1.5622009310510722e-06,
      "loss": 2.7588,
      "step": 63042
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.712585926055908,
      "learning_rate": 1.5619799634651844e-06,
      "loss": 2.0539,
      "step": 63043
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.2171381711959839,
      "learning_rate": 1.5617590101840308e-06,
      "loss": 2.2725,
      "step": 63044
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.135933518409729,
      "learning_rate": 1.5615380712079887e-06,
      "loss": 2.4822,
      "step": 63045
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1965339183807373,
      "learning_rate": 1.5613171465374298e-06,
      "loss": 2.3825,
      "step": 63046
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0156049728393555,
      "learning_rate": 1.5610962361727321e-06,
      "loss": 2.3559,
      "step": 63047
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1393386125564575,
      "learning_rate": 1.5608753401142695e-06,
      "loss": 1.9387,
      "step": 63048
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0312408208847046,
      "learning_rate": 1.560654458362414e-06,
      "loss": 2.2341,
      "step": 63049
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0515148639678955,
      "learning_rate": 1.5604335909175393e-06,
      "loss": 2.3549,
      "step": 63050
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0192586183547974,
      "learning_rate": 1.5602127377800246e-06,
      "loss": 2.3182,
      "step": 63051
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1362580060958862,
      "learning_rate": 1.5599918989502383e-06,
      "loss": 2.1974,
      "step": 63052
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1911303997039795,
      "learning_rate": 1.559771074428561e-06,
      "loss": 2.3578,
      "step": 63053
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1756911277770996,
      "learning_rate": 1.5595502642153616e-06,
      "loss": 2.367,
      "step": 63054
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.091658592224121,
      "learning_rate": 1.5593294683110195e-06,
      "loss": 2.2351,
      "step": 63055
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0497742891311646,
      "learning_rate": 1.5591086867159033e-06,
      "loss": 2.2575,
      "step": 63056
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.105118989944458,
      "learning_rate": 1.558887919430394e-06,
      "loss": 2.2625,
      "step": 63057
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0940568447113037,
      "learning_rate": 1.5586671664548615e-06,
      "loss": 2.2902,
      "step": 63058
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0469502210617065,
      "learning_rate": 1.5584464277896782e-06,
      "loss": 2.4601,
      "step": 63059
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1153022050857544,
      "learning_rate": 1.5582257034352232e-06,
      "loss": 2.3863,
      "step": 63060
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0364186763763428,
      "learning_rate": 1.5580049933918683e-06,
      "loss": 2.2795,
      "step": 63061
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.044651985168457,
      "learning_rate": 1.5577842976599867e-06,
      "loss": 2.4453,
      "step": 63062
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1466301679611206,
      "learning_rate": 1.5575636162399543e-06,
      "loss": 2.2841,
      "step": 63063
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.035184621810913,
      "learning_rate": 1.5573429491321434e-06,
      "loss": 2.1721,
      "step": 63064
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0882658958435059,
      "learning_rate": 1.5571222963369303e-06,
      "loss": 2.5113,
      "step": 63065
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.2068244218826294,
      "learning_rate": 1.5569016578546891e-06,
      "loss": 2.3752,
      "step": 63066
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.2362070083618164,
      "learning_rate": 1.5566810336857896e-06,
      "loss": 2.5252,
      "step": 63067
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0715774297714233,
      "learning_rate": 1.5564604238306113e-06,
      "loss": 2.2511,
      "step": 63068
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.141414999961853,
      "learning_rate": 1.5562398282895242e-06,
      "loss": 2.1647,
      "step": 63069
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1684281826019287,
      "learning_rate": 1.5560192470629054e-06,
      "loss": 2.3362,
      "step": 63070
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1221721172332764,
      "learning_rate": 1.555798680151126e-06,
      "loss": 2.2561,
      "step": 63071
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0419930219650269,
      "learning_rate": 1.5555781275545624e-06,
      "loss": 2.1892,
      "step": 63072
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0459593534469604,
      "learning_rate": 1.555357589273586e-06,
      "loss": 2.3111,
      "step": 63073
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0789088010787964,
      "learning_rate": 1.5551370653085763e-06,
      "loss": 2.3177,
      "step": 63074
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.101679801940918,
      "learning_rate": 1.5549165556598977e-06,
      "loss": 2.3712,
      "step": 63075
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0049498081207275,
      "learning_rate": 1.5546960603279326e-06,
      "loss": 2.3971,
      "step": 63076
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.032273530960083,
      "learning_rate": 1.5544755793130483e-06,
      "loss": 2.4284,
      "step": 63077
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.2095032930374146,
      "learning_rate": 1.5542551126156237e-06,
      "loss": 2.3222,
      "step": 63078
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0359512567520142,
      "learning_rate": 1.554034660236029e-06,
      "loss": 2.29,
      "step": 63079
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1235651969909668,
      "learning_rate": 1.553814222174641e-06,
      "loss": 2.4929,
      "step": 63080
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.9970324635505676,
      "learning_rate": 1.5535937984318305e-06,
      "loss": 2.2458,
      "step": 63081
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1666040420532227,
      "learning_rate": 1.553373389007974e-06,
      "loss": 2.4071,
      "step": 63082
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.2572041749954224,
      "learning_rate": 1.5531529939034418e-06,
      "loss": 2.3394,
      "step": 63083
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.951834499835968,
      "learning_rate": 1.5529326131186107e-06,
      "loss": 2.1931,
      "step": 63084
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1950798034667969,
      "learning_rate": 1.5527122466538524e-06,
      "loss": 2.2353,
      "step": 63085
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.093278169631958,
      "learning_rate": 1.5524918945095424e-06,
      "loss": 2.3734,
      "step": 63086
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0728455781936646,
      "learning_rate": 1.5522715566860525e-06,
      "loss": 2.3217,
      "step": 63087
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1227810382843018,
      "learning_rate": 1.5520512331837578e-06,
      "loss": 2.2506,
      "step": 63088
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1886645555496216,
      "learning_rate": 1.5518309240030282e-06,
      "loss": 2.2238,
      "step": 63089
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.093259572982788,
      "learning_rate": 1.5516106291442413e-06,
      "loss": 2.3383,
      "step": 63090
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.6215150356292725,
      "learning_rate": 1.5513903486077665e-06,
      "loss": 2.3691,
      "step": 63091
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0716322660446167,
      "learning_rate": 1.5511700823939823e-06,
      "loss": 2.1944,
      "step": 63092
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0201293230056763,
      "learning_rate": 1.55094983050326e-06,
      "loss": 2.0323,
      "step": 63093
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.07364022731781,
      "learning_rate": 1.55072959293597e-06,
      "loss": 2.2998,
      "step": 63094
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.9755980372428894,
      "learning_rate": 1.55050936969249e-06,
      "loss": 2.3988,
      "step": 63095
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1903663873672485,
      "learning_rate": 1.5502891607731895e-06,
      "loss": 2.2365,
      "step": 63096
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1270948648452759,
      "learning_rate": 1.5500689661784462e-06,
      "loss": 2.3303,
      "step": 63097
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0117037296295166,
      "learning_rate": 1.5498487859086286e-06,
      "loss": 2.365,
      "step": 63098
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1919200420379639,
      "learning_rate": 1.5496286199641142e-06,
      "loss": 2.3954,
      "step": 63099
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0939879417419434,
      "learning_rate": 1.5494084683452726e-06,
      "loss": 2.1241,
      "step": 63100
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.9924582839012146,
      "learning_rate": 1.5491883310524835e-06,
      "loss": 2.2816,
      "step": 63101
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1247625350952148,
      "learning_rate": 1.5489682080861102e-06,
      "loss": 2.319,
      "step": 63102
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0811564922332764,
      "learning_rate": 1.548748099446533e-06,
      "loss": 2.4122,
      "step": 63103
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1854244470596313,
      "learning_rate": 1.5485280051341223e-06,
      "loss": 2.1824,
      "step": 63104
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1715211868286133,
      "learning_rate": 1.5483079251492528e-06,
      "loss": 2.3026,
      "step": 63105
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.9738888144493103,
      "learning_rate": 1.5480878594922955e-06,
      "loss": 2.0954,
      "step": 63106
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.2143970727920532,
      "learning_rate": 1.5478678081636268e-06,
      "loss": 2.5648,
      "step": 63107
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.170528769493103,
      "learning_rate": 1.5476477711636152e-06,
      "loss": 2.3888,
      "step": 63108
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0929064750671387,
      "learning_rate": 1.5474277484926393e-06,
      "loss": 2.1221,
      "step": 63109
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0854848623275757,
      "learning_rate": 1.5472077401510655e-06,
      "loss": 2.243,
      "step": 63110
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0176634788513184,
      "learning_rate": 1.5469877461392736e-06,
      "loss": 2.1415,
      "step": 63111
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0206962823867798,
      "learning_rate": 1.5467677664576309e-06,
      "loss": 2.4423,
      "step": 63112
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.045641303062439,
      "learning_rate": 1.5465478011065148e-06,
      "loss": 2.1759,
      "step": 63113
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.072699785232544,
      "learning_rate": 1.5463278500862954e-06,
      "loss": 2.49,
      "step": 63114
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1296733617782593,
      "learning_rate": 1.5461079133973477e-06,
      "loss": 2.2818,
      "step": 63115
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1280912160873413,
      "learning_rate": 1.5458879910400393e-06,
      "loss": 2.5016,
      "step": 63116
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1958123445510864,
      "learning_rate": 1.545668083014751e-06,
      "loss": 2.381,
      "step": 63117
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.9862886071205139,
      "learning_rate": 1.5454481893218476e-06,
      "loss": 2.2772,
      "step": 63118
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.202750325202942,
      "learning_rate": 1.5452283099617082e-06,
      "loss": 2.162,
      "step": 63119
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.9946199059486389,
      "learning_rate": 1.5450084449347014e-06,
      "loss": 2.154,
      "step": 63120
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.2366912364959717,
      "learning_rate": 1.5447885942412034e-06,
      "loss": 2.1675,
      "step": 63121
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1345131397247314,
      "learning_rate": 1.544568757881586e-06,
      "loss": 2.324,
      "step": 63122
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0779072046279907,
      "learning_rate": 1.544348935856218e-06,
      "loss": 2.2553,
      "step": 63123
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1373943090438843,
      "learning_rate": 1.544129128165478e-06,
      "loss": 2.3761,
      "step": 63124
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0713834762573242,
      "learning_rate": 1.5439093348097323e-06,
      "loss": 2.1042,
      "step": 63125
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0767179727554321,
      "learning_rate": 1.5436895557893604e-06,
      "loss": 2.2446,
      "step": 63126
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0254461765289307,
      "learning_rate": 1.5434697911047302e-06,
      "loss": 2.1858,
      "step": 63127
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.4508912563323975,
      "learning_rate": 1.5432500407562168e-06,
      "loss": 2.2545,
      "step": 63128
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1713114976882935,
      "learning_rate": 1.5430303047441887e-06,
      "loss": 2.2298,
      "step": 63129
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.2225072383880615,
      "learning_rate": 1.542810583069022e-06,
      "loss": 2.3924,
      "step": 63130
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.302735447883606,
      "learning_rate": 1.5425908757310882e-06,
      "loss": 2.2795,
      "step": 63131
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1735916137695312,
      "learning_rate": 1.5423711827307609e-06,
      "loss": 2.288,
      "step": 63132
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0733271837234497,
      "learning_rate": 1.5421515040684088e-06,
      "loss": 2.3411,
      "step": 63133
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0305131673812866,
      "learning_rate": 1.5419318397444104e-06,
      "loss": 2.1515,
      "step": 63134
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.099805235862732,
      "learning_rate": 1.5417121897591313e-06,
      "loss": 2.2666,
      "step": 63135
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.115262508392334,
      "learning_rate": 1.541492554112951e-06,
      "loss": 2.435,
      "step": 63136
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1001043319702148,
      "learning_rate": 1.5412729328062348e-06,
      "loss": 2.4844,
      "step": 63137
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0022064447402954,
      "learning_rate": 1.541053325839361e-06,
      "loss": 2.0981,
      "step": 63138
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0496231317520142,
      "learning_rate": 1.5408337332126965e-06,
      "loss": 2.1922,
      "step": 63139
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.9972349405288696,
      "learning_rate": 1.5406141549266219e-06,
      "loss": 2.1784,
      "step": 63140
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1157561540603638,
      "learning_rate": 1.5403945909814988e-06,
      "loss": 2.3247,
      "step": 63141
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0443122386932373,
      "learning_rate": 1.540175041377706e-06,
      "loss": 2.3042,
      "step": 63142
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0674997568130493,
      "learning_rate": 1.5399555061156124e-06,
      "loss": 2.1003,
      "step": 63143
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1411694288253784,
      "learning_rate": 1.539735985195594e-06,
      "loss": 2.3409,
      "step": 63144
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1079020500183105,
      "learning_rate": 1.5395164786180195e-06,
      "loss": 2.4245,
      "step": 63145
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0378773212432861,
      "learning_rate": 1.539296986383264e-06,
      "loss": 2.242,
      "step": 63146
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.3736461400985718,
      "learning_rate": 1.5390775084916954e-06,
      "loss": 2.4829,
      "step": 63147
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.2423326969146729,
      "learning_rate": 1.5388580449436907e-06,
      "loss": 2.2693,
      "step": 63148
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.133729338645935,
      "learning_rate": 1.53863859573962e-06,
      "loss": 2.3448,
      "step": 63149
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0027536153793335,
      "learning_rate": 1.5384191608798527e-06,
      "loss": 2.2436,
      "step": 63150
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.178367257118225,
      "learning_rate": 1.5381997403647653e-06,
      "loss": 2.4617,
      "step": 63151
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.2519140243530273,
      "learning_rate": 1.5379803341947252e-06,
      "loss": 2.3327,
      "step": 63152
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1465840339660645,
      "learning_rate": 1.537760942370109e-06,
      "loss": 2.2826,
      "step": 63153
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0618252754211426,
      "learning_rate": 1.537541564891286e-06,
      "loss": 2.0744,
      "step": 63154
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0112197399139404,
      "learning_rate": 1.5373222017586286e-06,
      "loss": 2.2707,
      "step": 63155
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.030194878578186,
      "learning_rate": 1.5371028529725074e-06,
      "loss": 2.1505,
      "step": 63156
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.039692759513855,
      "learning_rate": 1.5368835185332964e-06,
      "loss": 2.2605,
      "step": 63157
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0047086477279663,
      "learning_rate": 1.5366641984413643e-06,
      "loss": 2.4213,
      "step": 63158
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1809815168380737,
      "learning_rate": 1.5364448926970865e-06,
      "loss": 2.3606,
      "step": 63159
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0722849369049072,
      "learning_rate": 1.5362256013008324e-06,
      "loss": 2.1974,
      "step": 63160
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1734596490859985,
      "learning_rate": 1.5360063242529756e-06,
      "loss": 2.3737,
      "step": 63161
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1411826610565186,
      "learning_rate": 1.535787061553885e-06,
      "loss": 2.3938,
      "step": 63162
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1597230434417725,
      "learning_rate": 1.5355678132039376e-06,
      "loss": 2.2388,
      "step": 63163
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.9621009230613708,
      "learning_rate": 1.5353485792034983e-06,
      "loss": 2.351,
      "step": 63164
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.085456371307373,
      "learning_rate": 1.5351293595529448e-06,
      "loss": 2.3277,
      "step": 63165
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.077590823173523,
      "learning_rate": 1.5349101542526457e-06,
      "loss": 2.2296,
      "step": 63166
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.043819546699524,
      "learning_rate": 1.5346909633029727e-06,
      "loss": 2.4249,
      "step": 63167
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1245381832122803,
      "learning_rate": 1.5344717867042958e-06,
      "loss": 2.3606,
      "step": 63168
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1383017301559448,
      "learning_rate": 1.5342526244569912e-06,
      "loss": 2.36,
      "step": 63169
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0698233842849731,
      "learning_rate": 1.534033476561425e-06,
      "loss": 2.2548,
      "step": 63170
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0852645635604858,
      "learning_rate": 1.533814343017973e-06,
      "loss": 2.2926,
      "step": 63171
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0444750785827637,
      "learning_rate": 1.5335952238270035e-06,
      "loss": 2.4486,
      "step": 63172
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.106958270072937,
      "learning_rate": 1.5333761189888919e-06,
      "loss": 2.4086,
      "step": 63173
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1483469009399414,
      "learning_rate": 1.5331570285040043e-06,
      "loss": 2.3246,
      "step": 63174
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0562827587127686,
      "learning_rate": 1.5329379523727184e-06,
      "loss": 2.2166,
      "step": 63175
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.059638500213623,
      "learning_rate": 1.5327188905954016e-06,
      "loss": 2.2885,
      "step": 63176
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0333878993988037,
      "learning_rate": 1.5324998431724236e-06,
      "loss": 2.2925,
      "step": 63177
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0076322555541992,
      "learning_rate": 1.5322808101041609e-06,
      "loss": 2.2028,
      "step": 63178
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1060593128204346,
      "learning_rate": 1.5320617913909797e-06,
      "loss": 2.1507,
      "step": 63179
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1343175172805786,
      "learning_rate": 1.5318427870332553e-06,
      "loss": 2.3423,
      "step": 63180
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0379027128219604,
      "learning_rate": 1.5316237970313575e-06,
      "loss": 2.1616,
      "step": 63181
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.9795960187911987,
      "learning_rate": 1.5314048213856559e-06,
      "loss": 2.3193,
      "step": 63182
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.029245138168335,
      "learning_rate": 1.5311858600965245e-06,
      "loss": 2.2207,
      "step": 63183
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0196788311004639,
      "learning_rate": 1.5309669131643335e-06,
      "loss": 2.3685,
      "step": 63184
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1452946662902832,
      "learning_rate": 1.5307479805894509e-06,
      "loss": 2.3164,
      "step": 63185
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0281949043273926,
      "learning_rate": 1.5305290623722546e-06,
      "loss": 2.2706,
      "step": 63186
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1148685216903687,
      "learning_rate": 1.5303101585131076e-06,
      "loss": 2.3183,
      "step": 63187
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0463874340057373,
      "learning_rate": 1.5300912690123882e-06,
      "loss": 2.1146,
      "step": 63188
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0761133432388306,
      "learning_rate": 1.529872393870463e-06,
      "loss": 2.2638,
      "step": 63189
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1512296199798584,
      "learning_rate": 1.529653533087706e-06,
      "loss": 2.4378,
      "step": 63190
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1400597095489502,
      "learning_rate": 1.5294346866644848e-06,
      "loss": 2.4879,
      "step": 63191
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1388773918151855,
      "learning_rate": 1.5292158546011749e-06,
      "loss": 2.5317,
      "step": 63192
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0691159963607788,
      "learning_rate": 1.5289970368981443e-06,
      "loss": 2.2533,
      "step": 63193
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.185612678527832,
      "learning_rate": 1.5287782335557644e-06,
      "loss": 2.2188,
      "step": 63194
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.079150676727295,
      "learning_rate": 1.5285594445744034e-06,
      "loss": 2.4761,
      "step": 63195
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.127040982246399,
      "learning_rate": 1.5283406699544379e-06,
      "loss": 2.5664,
      "step": 63196
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.2218456268310547,
      "learning_rate": 1.5281219096962342e-06,
      "loss": 2.3046,
      "step": 63197
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0622422695159912,
      "learning_rate": 1.5279031638001661e-06,
      "loss": 2.205,
      "step": 63198
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.9632883071899414,
      "learning_rate": 1.5276844322666007e-06,
      "loss": 2.1494,
      "step": 63199
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0833191871643066,
      "learning_rate": 1.5274657150959138e-06,
      "loss": 2.2723,
      "step": 63200
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.2462153434753418,
      "learning_rate": 1.527247012288472e-06,
      "loss": 2.4725,
      "step": 63201
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.9969028830528259,
      "learning_rate": 1.5270283238446493e-06,
      "loss": 2.2887,
      "step": 63202
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0036792755126953,
      "learning_rate": 1.5268096497648122e-06,
      "loss": 2.3841,
      "step": 63203
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1878958940505981,
      "learning_rate": 1.526590990049337e-06,
      "loss": 2.3022,
      "step": 63204
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.2417329549789429,
      "learning_rate": 1.5263723446985912e-06,
      "loss": 2.1386,
      "step": 63205
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0207297801971436,
      "learning_rate": 1.5261537137129455e-06,
      "loss": 2.3561,
      "step": 63206
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0838290452957153,
      "learning_rate": 1.525935097092769e-06,
      "loss": 2.3647,
      "step": 63207
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0879738330841064,
      "learning_rate": 1.5257164948384351e-06,
      "loss": 2.2154,
      "step": 63208
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.9879713654518127,
      "learning_rate": 1.5254979069503117e-06,
      "loss": 2.2565,
      "step": 63209
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1106784343719482,
      "learning_rate": 1.5252793334287741e-06,
      "loss": 2.1544,
      "step": 63210
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0806384086608887,
      "learning_rate": 1.5250607742741886e-06,
      "loss": 2.2643,
      "step": 63211
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1718909740447998,
      "learning_rate": 1.5248422294869258e-06,
      "loss": 2.1904,
      "step": 63212
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0637251138687134,
      "learning_rate": 1.524623699067358e-06,
      "loss": 2.4959,
      "step": 63213
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.463374137878418,
      "learning_rate": 1.5244051830158535e-06,
      "loss": 2.4915,
      "step": 63214
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0571510791778564,
      "learning_rate": 1.5241866813327866e-06,
      "loss": 2.2119,
      "step": 63215
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0865918397903442,
      "learning_rate": 1.5239681940185225e-06,
      "loss": 2.4635,
      "step": 63216
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1276997327804565,
      "learning_rate": 1.5237497210734364e-06,
      "loss": 2.2562,
      "step": 63217
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0571966171264648,
      "learning_rate": 1.5235312624978949e-06,
      "loss": 2.4895,
      "step": 63218
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0489110946655273,
      "learning_rate": 1.523312818292275e-06,
      "loss": 2.2746,
      "step": 63219
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.355048656463623,
      "learning_rate": 1.523094388456937e-06,
      "loss": 2.246,
      "step": 63220
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.109578251838684,
      "learning_rate": 1.5228759729922592e-06,
      "loss": 2.1405,
      "step": 63221
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.223382830619812,
      "learning_rate": 1.5226575718986058e-06,
      "loss": 2.3399,
      "step": 63222
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.185042142868042,
      "learning_rate": 1.522439185176353e-06,
      "loss": 2.481,
      "step": 63223
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0514456033706665,
      "learning_rate": 1.5222208128258664e-06,
      "loss": 2.2598,
      "step": 63224
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1651898622512817,
      "learning_rate": 1.5220024548475198e-06,
      "loss": 2.5148,
      "step": 63225
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0872639417648315,
      "learning_rate": 1.5217841112416787e-06,
      "loss": 2.194,
      "step": 63226
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0925487279891968,
      "learning_rate": 1.5215657820087193e-06,
      "loss": 2.4059,
      "step": 63227
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1046741008758545,
      "learning_rate": 1.5213474671490059e-06,
      "loss": 2.4144,
      "step": 63228
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.9896353483200073,
      "learning_rate": 1.5211291666629147e-06,
      "loss": 2.0573,
      "step": 63229
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1570241451263428,
      "learning_rate": 1.5209108805508088e-06,
      "loss": 2.2536,
      "step": 63230
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.2089442014694214,
      "learning_rate": 1.5206926088130648e-06,
      "loss": 2.3171,
      "step": 63231
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1317429542541504,
      "learning_rate": 1.5204743514500497e-06,
      "loss": 2.4101,
      "step": 63232
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0403107404708862,
      "learning_rate": 1.5202561084621337e-06,
      "loss": 2.3173,
      "step": 63233
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1912260055541992,
      "learning_rate": 1.520037879849684e-06,
      "loss": 2.3634,
      "step": 63234
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1238163709640503,
      "learning_rate": 1.5198196656130747e-06,
      "loss": 2.3781,
      "step": 63235
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1295069456100464,
      "learning_rate": 1.5196014657526736e-06,
      "loss": 2.0028,
      "step": 63236
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.026656985282898,
      "learning_rate": 1.5193832802688524e-06,
      "loss": 2.2825,
      "step": 63237
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1980825662612915,
      "learning_rate": 1.5191651091619785e-06,
      "loss": 2.182,
      "step": 63238
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1686985492706299,
      "learning_rate": 1.5189469524324252e-06,
      "loss": 2.2674,
      "step": 63239
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0506341457366943,
      "learning_rate": 1.5187288100805597e-06,
      "loss": 2.4096,
      "step": 63240
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1053686141967773,
      "learning_rate": 1.5185106821067496e-06,
      "loss": 2.2029,
      "step": 63241
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0169963836669922,
      "learning_rate": 1.5182925685113714e-06,
      "loss": 2.1908,
      "step": 63242
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1805739402770996,
      "learning_rate": 1.5180744692947869e-06,
      "loss": 2.3461,
      "step": 63243
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0526306629180908,
      "learning_rate": 1.5178563844573735e-06,
      "loss": 2.3757,
      "step": 63244
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1465535163879395,
      "learning_rate": 1.5176383139994943e-06,
      "loss": 2.3912,
      "step": 63245
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1297273635864258,
      "learning_rate": 1.517420257921527e-06,
      "loss": 2.3856,
      "step": 63246
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0924043655395508,
      "learning_rate": 1.5172022162238308e-06,
      "loss": 2.253,
      "step": 63247
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0163947343826294,
      "learning_rate": 1.5169841889067827e-06,
      "loss": 2.2141,
      "step": 63248
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.019327163696289,
      "learning_rate": 1.5167661759707498e-06,
      "loss": 2.3154,
      "step": 63249
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.099028468132019,
      "learning_rate": 1.5165481774161028e-06,
      "loss": 2.3046,
      "step": 63250
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1371879577636719,
      "learning_rate": 1.5163301932432096e-06,
      "loss": 2.2851,
      "step": 63251
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.221544623374939,
      "learning_rate": 1.5161122234524428e-06,
      "loss": 2.3029,
      "step": 63252
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1019407510757446,
      "learning_rate": 1.5158942680441668e-06,
      "loss": 2.3862,
      "step": 63253
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.130445957183838,
      "learning_rate": 1.515676327018758e-06,
      "loss": 2.1897,
      "step": 63254
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0417715311050415,
      "learning_rate": 1.5154584003765782e-06,
      "loss": 2.4818,
      "step": 63255
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0553524494171143,
      "learning_rate": 1.5152404881180048e-06,
      "loss": 2.3247,
      "step": 63256
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.9293780326843262,
      "learning_rate": 1.5150225902434e-06,
      "loss": 2.3656,
      "step": 63257
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.10265052318573,
      "learning_rate": 1.5148047067531391e-06,
      "loss": 2.4315,
      "step": 63258
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0113328695297241,
      "learning_rate": 1.5145868376475881e-06,
      "loss": 2.2512,
      "step": 63259
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1456592082977295,
      "learning_rate": 1.514368982927117e-06,
      "loss": 2.2645,
      "step": 63260
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0542185306549072,
      "learning_rate": 1.514151142592094e-06,
      "loss": 2.5843,
      "step": 63261
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.071319580078125,
      "learning_rate": 1.5139333166428903e-06,
      "loss": 2.3439,
      "step": 63262
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.142518401145935,
      "learning_rate": 1.5137155050798735e-06,
      "loss": 2.457,
      "step": 63263
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0661625862121582,
      "learning_rate": 1.5134977079034153e-06,
      "loss": 2.2655,
      "step": 63264
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.2226873636245728,
      "learning_rate": 1.5132799251138807e-06,
      "loss": 2.3673,
      "step": 63265
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0770446062088013,
      "learning_rate": 1.5130621567116454e-06,
      "loss": 2.1494,
      "step": 63266
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0155025720596313,
      "learning_rate": 1.5128444026970735e-06,
      "loss": 2.3614,
      "step": 63267
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.084409475326538,
      "learning_rate": 1.5126266630705333e-06,
      "loss": 2.0706,
      "step": 63268
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0959025621414185,
      "learning_rate": 1.5124089378323982e-06,
      "loss": 2.165,
      "step": 63269
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.634919285774231,
      "learning_rate": 1.512191226983033e-06,
      "loss": 2.2957,
      "step": 63270
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1983325481414795,
      "learning_rate": 1.5119735305228122e-06,
      "loss": 2.3731,
      "step": 63271
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.090848684310913,
      "learning_rate": 1.5117558484521e-06,
      "loss": 2.2727,
      "step": 63272
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0324265956878662,
      "learning_rate": 1.511538180771268e-06,
      "loss": 2.2778,
      "step": 63273
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.269129991531372,
      "learning_rate": 1.511320527480682e-06,
      "loss": 2.5184,
      "step": 63274
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0559499263763428,
      "learning_rate": 1.5111028885807155e-06,
      "loss": 2.2099,
      "step": 63275
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.094353199005127,
      "learning_rate": 1.510885264071732e-06,
      "loss": 2.548,
      "step": 63276
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1439253091812134,
      "learning_rate": 1.5106676539541065e-06,
      "loss": 2.4175,
      "step": 63277
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.211164116859436,
      "learning_rate": 1.5104500582282022e-06,
      "loss": 2.3604,
      "step": 63278
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.2118043899536133,
      "learning_rate": 1.5102324768943944e-06,
      "loss": 2.3244,
      "step": 63279
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0596805810928345,
      "learning_rate": 1.5100149099530448e-06,
      "loss": 2.2177,
      "step": 63280
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.2410684823989868,
      "learning_rate": 1.5097973574045277e-06,
      "loss": 2.3509,
      "step": 63281
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1163822412490845,
      "learning_rate": 1.5095798192492084e-06,
      "loss": 2.3441,
      "step": 63282
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0530363321304321,
      "learning_rate": 1.5093622954874599e-06,
      "loss": 2.2934,
      "step": 63283
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.2175475358963013,
      "learning_rate": 1.5091447861196463e-06,
      "loss": 2.1732,
      "step": 63284
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1329939365386963,
      "learning_rate": 1.5089272911461416e-06,
      "loss": 2.2619,
      "step": 63285
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1105839014053345,
      "learning_rate": 1.5087098105673082e-06,
      "loss": 2.2668,
      "step": 63286
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.188001036643982,
      "learning_rate": 1.5084923443835187e-06,
      "loss": 2.1768,
      "step": 63287
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.23004150390625,
      "learning_rate": 1.5082748925951395e-06,
      "loss": 2.2912,
      "step": 63288
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.048153042793274,
      "learning_rate": 1.5080574552025429e-06,
      "loss": 2.1382,
      "step": 63289
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.150907039642334,
      "learning_rate": 1.5078400322060926e-06,
      "loss": 2.256,
      "step": 63290
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0695998668670654,
      "learning_rate": 1.507622623606163e-06,
      "loss": 2.3817,
      "step": 63291
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0112906694412231,
      "learning_rate": 1.5074052294031171e-06,
      "loss": 2.4974,
      "step": 63292
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0542502403259277,
      "learning_rate": 1.507187849597328e-06,
      "loss": 2.1013,
      "step": 63293
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0736147165298462,
      "learning_rate": 1.506970484189162e-06,
      "loss": 2.3901,
      "step": 63294
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0621272325515747,
      "learning_rate": 1.5067531331789853e-06,
      "loss": 2.168,
      "step": 63295
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1257959604263306,
      "learning_rate": 1.5065357965671713e-06,
      "loss": 2.3577,
      "step": 63296
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1069276332855225,
      "learning_rate": 1.5063184743540826e-06,
      "loss": 2.2514,
      "step": 63297
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.2054569721221924,
      "learning_rate": 1.506101166540095e-06,
      "loss": 2.1426,
      "step": 63298
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0945345163345337,
      "learning_rate": 1.5058838731255721e-06,
      "loss": 2.3348,
      "step": 63299
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.236351490020752,
      "learning_rate": 1.5056665941108806e-06,
      "loss": 2.3515,
      "step": 63300
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0574021339416504,
      "learning_rate": 1.5054493294963934e-06,
      "loss": 2.1425,
      "step": 63301
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1396846771240234,
      "learning_rate": 1.505232079282476e-06,
      "loss": 2.1781,
      "step": 63302
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.2735353708267212,
      "learning_rate": 1.5050148434694968e-06,
      "loss": 2.1962,
      "step": 63303
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.027476191520691,
      "learning_rate": 1.5047976220578253e-06,
      "loss": 2.3923,
      "step": 63304
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1298648118972778,
      "learning_rate": 1.5045804150478273e-06,
      "loss": 2.3688,
      "step": 63305
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.063973069190979,
      "learning_rate": 1.5043632224398752e-06,
      "loss": 2.2767,
      "step": 63306
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1383112668991089,
      "learning_rate": 1.5041460442343337e-06,
      "loss": 2.3375,
      "step": 63307
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.9945286512374878,
      "learning_rate": 1.5039288804315733e-06,
      "loss": 2.273,
      "step": 63308
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.086458444595337,
      "learning_rate": 1.5037117310319581e-06,
      "loss": 2.4825,
      "step": 63309
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1050289869308472,
      "learning_rate": 1.5034945960358616e-06,
      "loss": 2.2431,
      "step": 63310
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0273945331573486,
      "learning_rate": 1.5032774754436509e-06,
      "loss": 2.3983,
      "step": 63311
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0774321556091309,
      "learning_rate": 1.503060369255691e-06,
      "loss": 2.4225,
      "step": 63312
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0612701177597046,
      "learning_rate": 1.502843277472349e-06,
      "loss": 2.3446,
      "step": 63313
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0614169836044312,
      "learning_rate": 1.5026262000939984e-06,
      "loss": 2.2843,
      "step": 63314
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1106078624725342,
      "learning_rate": 1.5024091371210025e-06,
      "loss": 2.54,
      "step": 63315
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0797821283340454,
      "learning_rate": 1.5021920885537322e-06,
      "loss": 2.2186,
      "step": 63316
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1422014236450195,
      "learning_rate": 1.5019750543925527e-06,
      "loss": 2.1477,
      "step": 63317
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0790339708328247,
      "learning_rate": 1.5017580346378357e-06,
      "loss": 2.4073,
      "step": 63318
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0629708766937256,
      "learning_rate": 1.5015410292899447e-06,
      "loss": 2.2106,
      "step": 63319
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1281715631484985,
      "learning_rate": 1.5013240383492523e-06,
      "loss": 2.5661,
      "step": 63320
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1515097618103027,
      "learning_rate": 1.5011070618161205e-06,
      "loss": 2.203,
      "step": 63321
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1035956144332886,
      "learning_rate": 1.5008900996909238e-06,
      "loss": 2.3979,
      "step": 63322
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0741935968399048,
      "learning_rate": 1.500673151974027e-06,
      "loss": 2.2735,
      "step": 63323
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.029581069946289,
      "learning_rate": 1.5004562186657944e-06,
      "loss": 2.3218,
      "step": 63324
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.057158350944519,
      "learning_rate": 1.5002392997665993e-06,
      "loss": 2.3199,
      "step": 63325
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.081939697265625,
      "learning_rate": 1.5000223952768079e-06,
      "loss": 2.0594,
      "step": 63326
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0759944915771484,
      "learning_rate": 1.4998055051967842e-06,
      "loss": 2.247,
      "step": 63327
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0415902137756348,
      "learning_rate": 1.4995886295269012e-06,
      "loss": 2.3619,
      "step": 63328
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.051024079322815,
      "learning_rate": 1.4993717682675236e-06,
      "loss": 2.2439,
      "step": 63329
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0459764003753662,
      "learning_rate": 1.4991549214190181e-06,
      "loss": 2.3177,
      "step": 63330
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0516645908355713,
      "learning_rate": 1.4989380889817561e-06,
      "loss": 2.1529,
      "step": 63331
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0844967365264893,
      "learning_rate": 1.4987212709560995e-06,
      "loss": 2.4382,
      "step": 63332
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.024304747581482,
      "learning_rate": 1.4985044673424232e-06,
      "loss": 2.4746,
      "step": 63333
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.9837113618850708,
      "learning_rate": 1.4982876781410872e-06,
      "loss": 2.309,
      "step": 63334
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0511387586593628,
      "learning_rate": 1.4980709033524654e-06,
      "loss": 2.3845,
      "step": 63335
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0511506795883179,
      "learning_rate": 1.49785414297692e-06,
      "loss": 2.4097,
      "step": 63336
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.07435941696167,
      "learning_rate": 1.4976373970148238e-06,
      "loss": 2.2263,
      "step": 63337
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.2073163986206055,
      "learning_rate": 1.4974206654665412e-06,
      "loss": 2.2012,
      "step": 63338
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.012919306755066,
      "learning_rate": 1.4972039483324396e-06,
      "loss": 2.284,
      "step": 63339
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.2045278549194336,
      "learning_rate": 1.4969872456128843e-06,
      "loss": 2.1951,
      "step": 63340
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.076074242591858,
      "learning_rate": 1.496770557308248e-06,
      "loss": 2.2053,
      "step": 63341
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.2243164777755737,
      "learning_rate": 1.496553883418892e-06,
      "loss": 2.2081,
      "step": 63342
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0630863904953003,
      "learning_rate": 1.496337223945189e-06,
      "loss": 2.4081,
      "step": 63343
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.9392160177230835,
      "learning_rate": 1.4961205788875021e-06,
      "loss": 2.0015,
      "step": 63344
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1925749778747559,
      "learning_rate": 1.4959039482462023e-06,
      "loss": 2.3386,
      "step": 63345
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0794776678085327,
      "learning_rate": 1.4956873320216537e-06,
      "loss": 2.2109,
      "step": 63346
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.068150281906128,
      "learning_rate": 1.4954707302142257e-06,
      "loss": 2.1923,
      "step": 63347
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0562608242034912,
      "learning_rate": 1.495254142824283e-06,
      "loss": 2.3622,
      "step": 63348
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1253052949905396,
      "learning_rate": 1.4950375698521968e-06,
      "loss": 2.4068,
      "step": 63349
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0479673147201538,
      "learning_rate": 1.494821011298332e-06,
      "loss": 2.3344,
      "step": 63350
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.2426774501800537,
      "learning_rate": 1.4946044671630543e-06,
      "loss": 2.2015,
      "step": 63351
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1855480670928955,
      "learning_rate": 1.4943879374467307e-06,
      "loss": 2.2683,
      "step": 63352
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0610507726669312,
      "learning_rate": 1.4941714221497327e-06,
      "loss": 2.2332,
      "step": 63353
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.055830717086792,
      "learning_rate": 1.4939549212724213e-06,
      "loss": 2.278,
      "step": 63354
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.195953607559204,
      "learning_rate": 1.4937384348151685e-06,
      "loss": 2.3239,
      "step": 63355
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1821092367172241,
      "learning_rate": 1.4935219627783371e-06,
      "loss": 2.2821,
      "step": 63356
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1314244270324707,
      "learning_rate": 1.493305505162299e-06,
      "loss": 2.1961,
      "step": 63357
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1666297912597656,
      "learning_rate": 1.4930890619674177e-06,
      "loss": 2.1995,
      "step": 63358
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1668343544006348,
      "learning_rate": 1.4928726331940591e-06,
      "loss": 2.1265,
      "step": 63359
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.140811562538147,
      "learning_rate": 1.4926562188425942e-06,
      "loss": 2.2712,
      "step": 63360
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.2532213926315308,
      "learning_rate": 1.492439818913386e-06,
      "loss": 2.3834,
      "step": 63361
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.9514341354370117,
      "learning_rate": 1.4922234334068042e-06,
      "loss": 2.3911,
      "step": 63362
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.049339771270752,
      "learning_rate": 1.492007062323212e-06,
      "loss": 2.2174,
      "step": 63363
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0760573148727417,
      "learning_rate": 1.4917907056629832e-06,
      "loss": 2.389,
      "step": 63364
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1704649925231934,
      "learning_rate": 1.4915743634264757e-06,
      "loss": 2.4933,
      "step": 63365
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1213856935501099,
      "learning_rate": 1.4913580356140633e-06,
      "loss": 2.0555,
      "step": 63366
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1010212898254395,
      "learning_rate": 1.4911417222261071e-06,
      "loss": 2.4441,
      "step": 63367
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0570918321609497,
      "learning_rate": 1.490925423262979e-06,
      "loss": 2.3002,
      "step": 63368
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.2929795980453491,
      "learning_rate": 1.4907091387250406e-06,
      "loss": 2.0614,
      "step": 63369
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.4408293962478638,
      "learning_rate": 1.4904928686126641e-06,
      "loss": 2.5205,
      "step": 63370
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.161840796470642,
      "learning_rate": 1.4902766129262115e-06,
      "loss": 2.5993,
      "step": 63371
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.03392493724823,
      "learning_rate": 1.4900603716660534e-06,
      "loss": 2.3844,
      "step": 63372
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0748391151428223,
      "learning_rate": 1.4898441448325508e-06,
      "loss": 2.2304,
      "step": 63373
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.2510607242584229,
      "learning_rate": 1.4896279324260764e-06,
      "loss": 2.2511,
      "step": 63374
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0274252891540527,
      "learning_rate": 1.4894117344469927e-06,
      "loss": 2.3277,
      "step": 63375
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.162356972694397,
      "learning_rate": 1.489195550895669e-06,
      "loss": 2.445,
      "step": 63376
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1904670000076294,
      "learning_rate": 1.4889793817724707e-06,
      "loss": 2.3173,
      "step": 63377
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.021619200706482,
      "learning_rate": 1.4887632270777641e-06,
      "loss": 2.1689,
      "step": 63378
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.2815592288970947,
      "learning_rate": 1.4885470868119124e-06,
      "loss": 2.2041,
      "step": 63379
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.225058674812317,
      "learning_rate": 1.4883309609752882e-06,
      "loss": 2.1852,
      "step": 63380
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.9899247884750366,
      "learning_rate": 1.4881148495682517e-06,
      "loss": 2.5508,
      "step": 63381
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.9723950624465942,
      "learning_rate": 1.4878987525911747e-06,
      "loss": 2.4157,
      "step": 63382
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.7205400466918945,
      "learning_rate": 1.4876826700444203e-06,
      "loss": 2.2198,
      "step": 63383
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0633755922317505,
      "learning_rate": 1.4874666019283568e-06,
      "loss": 2.3859,
      "step": 63384
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.9845976829528809,
      "learning_rate": 1.4872505482433487e-06,
      "loss": 2.1334,
      "step": 63385
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1488711833953857,
      "learning_rate": 1.487034508989762e-06,
      "loss": 2.2736,
      "step": 63386
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1106016635894775,
      "learning_rate": 1.4868184841679656e-06,
      "loss": 2.1459,
      "step": 63387
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0404117107391357,
      "learning_rate": 1.4866024737783213e-06,
      "loss": 2.1545,
      "step": 63388
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.207910180091858,
      "learning_rate": 1.4863864778212012e-06,
      "loss": 2.4004,
      "step": 63389
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.2302602529525757,
      "learning_rate": 1.4861704962969649e-06,
      "loss": 2.1758,
      "step": 63390
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1077022552490234,
      "learning_rate": 1.4859545292059874e-06,
      "loss": 2.1936,
      "step": 63391
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0445901155471802,
      "learning_rate": 1.4857385765486243e-06,
      "loss": 2.3214,
      "step": 63392
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1313525438308716,
      "learning_rate": 1.4855226383252496e-06,
      "loss": 2.1657,
      "step": 63393
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0633975267410278,
      "learning_rate": 1.4853067145362233e-06,
      "loss": 2.1091,
      "step": 63394
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.3865859508514404,
      "learning_rate": 1.4850908051819179e-06,
      "loss": 2.2698,
      "step": 63395
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1439356803894043,
      "learning_rate": 1.4848749102626935e-06,
      "loss": 2.1767,
      "step": 63396
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.122078537940979,
      "learning_rate": 1.484659029778921e-06,
      "loss": 2.255,
      "step": 63397
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0361018180847168,
      "learning_rate": 1.484443163730962e-06,
      "loss": 2.34,
      "step": 63398
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.9667057394981384,
      "learning_rate": 1.4842273121191874e-06,
      "loss": 2.4287,
      "step": 63399
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.9708389639854431,
      "learning_rate": 1.4840114749439572e-06,
      "loss": 2.4276,
      "step": 63400
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.030684232711792,
      "learning_rate": 1.4837956522056441e-06,
      "loss": 2.2798,
      "step": 63401
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0359275341033936,
      "learning_rate": 1.4835798439046067e-06,
      "loss": 2.3575,
      "step": 63402
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.9995400905609131,
      "learning_rate": 1.4833640500412171e-06,
      "loss": 2.6015,
      "step": 63403
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1557663679122925,
      "learning_rate": 1.4831482706158385e-06,
      "loss": 2.3136,
      "step": 63404
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.999691903591156,
      "learning_rate": 1.482932505628837e-06,
      "loss": 2.3908,
      "step": 63405
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0707099437713623,
      "learning_rate": 1.4827167550805754e-06,
      "loss": 2.3304,
      "step": 63406
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0836657285690308,
      "learning_rate": 1.4825010189714251e-06,
      "loss": 2.3119,
      "step": 63407
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0577994585037231,
      "learning_rate": 1.4822852973017465e-06,
      "loss": 2.3575,
      "step": 63408
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.210830807685852,
      "learning_rate": 1.4820695900719095e-06,
      "loss": 2.1834,
      "step": 63409
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0977885723114014,
      "learning_rate": 1.481853897282276e-06,
      "loss": 2.1992,
      "step": 63410
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0205751657485962,
      "learning_rate": 1.4816382189332157e-06,
      "loss": 2.2036,
      "step": 63411
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0582667589187622,
      "learning_rate": 1.4814225550250926e-06,
      "loss": 2.5172,
      "step": 63412
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1150797605514526,
      "learning_rate": 1.4812069055582689e-06,
      "loss": 2.2234,
      "step": 63413
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.2626286745071411,
      "learning_rate": 1.4809912705331164e-06,
      "loss": 2.3483,
      "step": 63414
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1252955198287964,
      "learning_rate": 1.4807756499499948e-06,
      "loss": 2.4141,
      "step": 63415
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1076654195785522,
      "learning_rate": 1.4805600438092738e-06,
      "loss": 2.3932,
      "step": 63416
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0356502532958984,
      "learning_rate": 1.4803444521113187e-06,
      "loss": 2.5377,
      "step": 63417
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1307531595230103,
      "learning_rate": 1.4801288748564902e-06,
      "loss": 2.4029,
      "step": 63418
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1022199392318726,
      "learning_rate": 1.4799133120451602e-06,
      "loss": 2.0763,
      "step": 63419
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1223528385162354,
      "learning_rate": 1.479697763677691e-06,
      "loss": 2.2055,
      "step": 63420
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.05143141746521,
      "learning_rate": 1.4794822297544453e-06,
      "loss": 2.2936,
      "step": 63421
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.006548285484314,
      "learning_rate": 1.479266710275794e-06,
      "loss": 2.2664,
      "step": 63422
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0166655778884888,
      "learning_rate": 1.479051205242098e-06,
      "loss": 2.2527,
      "step": 63423
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0201064348220825,
      "learning_rate": 1.4788357146537268e-06,
      "loss": 2.4373,
      "step": 63424
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1055635213851929,
      "learning_rate": 1.4786202385110404e-06,
      "loss": 2.274,
      "step": 63425
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.9908292889595032,
      "learning_rate": 1.4784047768144093e-06,
      "loss": 2.3122,
      "step": 63426
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.238031268119812,
      "learning_rate": 1.4781893295641947e-06,
      "loss": 2.4052,
      "step": 63427
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0496407747268677,
      "learning_rate": 1.477973896760766e-06,
      "loss": 2.3692,
      "step": 63428
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.2188429832458496,
      "learning_rate": 1.477758478404484e-06,
      "loss": 2.3085,
      "step": 63429
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0972084999084473,
      "learning_rate": 1.4775430744957197e-06,
      "loss": 2.1685,
      "step": 63430
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1051607131958008,
      "learning_rate": 1.4773276850348306e-06,
      "loss": 2.2724,
      "step": 63431
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.110998272895813,
      "learning_rate": 1.4771123100221884e-06,
      "loss": 2.445,
      "step": 63432
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.042310357093811,
      "learning_rate": 1.4768969494581532e-06,
      "loss": 2.3508,
      "step": 63433
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.9790529608726501,
      "learning_rate": 1.4766816033430941e-06,
      "loss": 2.4547,
      "step": 63434
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.114570140838623,
      "learning_rate": 1.4764662716773736e-06,
      "loss": 2.4353,
      "step": 63435
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1226574182510376,
      "learning_rate": 1.476250954461359e-06,
      "loss": 2.3289,
      "step": 63436
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.236014723777771,
      "learning_rate": 1.4760356516954121e-06,
      "loss": 2.1824,
      "step": 63437
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1181944608688354,
      "learning_rate": 1.4758203633799017e-06,
      "loss": 2.0719,
      "step": 63438
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.037728190422058,
      "learning_rate": 1.4756050895151886e-06,
      "loss": 2.4344,
      "step": 63439
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1433173418045044,
      "learning_rate": 1.4753898301016434e-06,
      "loss": 2.3716,
      "step": 63440
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.2748199701309204,
      "learning_rate": 1.4751745851396259e-06,
      "loss": 2.1721,
      "step": 63441
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.110621452331543,
      "learning_rate": 1.4749593546295016e-06,
      "loss": 2.2194,
      "step": 63442
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.9726459383964539,
      "learning_rate": 1.47474413857164e-06,
      "loss": 2.1867,
      "step": 63443
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1875356435775757,
      "learning_rate": 1.4745289369664006e-06,
      "loss": 2.4277,
      "step": 63444
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.3108129501342773,
      "learning_rate": 1.474313749814149e-06,
      "loss": 2.5505,
      "step": 63445
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0936411619186401,
      "learning_rate": 1.4740985771152528e-06,
      "loss": 2.0826,
      "step": 63446
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.065131425857544,
      "learning_rate": 1.4738834188700756e-06,
      "loss": 2.2536,
      "step": 63447
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.137487530708313,
      "learning_rate": 1.4736682750789788e-06,
      "loss": 2.3259,
      "step": 63448
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1343061923980713,
      "learning_rate": 1.4734531457423328e-06,
      "loss": 2.3559,
      "step": 63449
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.251389503479004,
      "learning_rate": 1.4732380308604966e-06,
      "loss": 2.3912,
      "step": 63450
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.04805326461792,
      "learning_rate": 1.4730229304338406e-06,
      "loss": 2.4594,
      "step": 63451
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1767032146453857,
      "learning_rate": 1.4728078444627237e-06,
      "loss": 2.2953,
      "step": 63452
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.2458940744400024,
      "learning_rate": 1.4725927729475164e-06,
      "loss": 2.4912,
      "step": 63453
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0952321290969849,
      "learning_rate": 1.4723777158885776e-06,
      "loss": 2.4808,
      "step": 63454
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.9693966507911682,
      "learning_rate": 1.4721626732862782e-06,
      "loss": 2.2974,
      "step": 63455
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0708421468734741,
      "learning_rate": 1.4719476451409776e-06,
      "loss": 2.3369,
      "step": 63456
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.125352144241333,
      "learning_rate": 1.4717326314530433e-06,
      "loss": 2.4024,
      "step": 63457
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.4782695770263672,
      "learning_rate": 1.4715176322228353e-06,
      "loss": 2.2209,
      "step": 63458
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1834183931350708,
      "learning_rate": 1.4713026474507231e-06,
      "loss": 2.2674,
      "step": 63459
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.2136372327804565,
      "learning_rate": 1.4710876771370674e-06,
      "loss": 2.4487,
      "step": 63460
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0596767663955688,
      "learning_rate": 1.4708727212822371e-06,
      "loss": 2.3032,
      "step": 63461
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.160807728767395,
      "learning_rate": 1.4706577798865917e-06,
      "loss": 2.3926,
      "step": 63462
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.112551212310791,
      "learning_rate": 1.4704428529504999e-06,
      "loss": 2.2392,
      "step": 63463
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.9739220142364502,
      "learning_rate": 1.4702279404743224e-06,
      "loss": 2.2765,
      "step": 63464
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1769554615020752,
      "learning_rate": 1.470013042458427e-06,
      "loss": 2.1935,
      "step": 63465
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0430530309677124,
      "learning_rate": 1.469798158903174e-06,
      "loss": 2.5583,
      "step": 63466
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0249838829040527,
      "learning_rate": 1.4695832898089312e-06,
      "loss": 2.2922,
      "step": 63467
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.160066843032837,
      "learning_rate": 1.4693684351760628e-06,
      "loss": 2.2222,
      "step": 63468
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1701414585113525,
      "learning_rate": 1.4691535950049297e-06,
      "loss": 2.2983,
      "step": 63469
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0449546575546265,
      "learning_rate": 1.468938769295899e-06,
      "loss": 2.2981,
      "step": 63470
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.164013147354126,
      "learning_rate": 1.4687239580493351e-06,
      "loss": 2.5314,
      "step": 63471
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0640990734100342,
      "learning_rate": 1.468509161265599e-06,
      "loss": 2.1476,
      "step": 63472
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.151817798614502,
      "learning_rate": 1.4682943789450587e-06,
      "loss": 2.4221,
      "step": 63473
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.2320289611816406,
      "learning_rate": 1.4680796110880758e-06,
      "loss": 2.2605,
      "step": 63474
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.2135263681411743,
      "learning_rate": 1.467864857695016e-06,
      "loss": 2.1619,
      "step": 63475
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1417040824890137,
      "learning_rate": 1.4676501187662428e-06,
      "loss": 2.2904,
      "step": 63476
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0938056707382202,
      "learning_rate": 1.467435394302118e-06,
      "loss": 2.1938,
      "step": 63477
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0681021213531494,
      "learning_rate": 1.4672206843030101e-06,
      "loss": 2.3327,
      "step": 63478
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1377087831497192,
      "learning_rate": 1.4670059887692779e-06,
      "loss": 2.4025,
      "step": 63479
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0557386875152588,
      "learning_rate": 1.4667913077012897e-06,
      "loss": 2.3867,
      "step": 63480
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0144623517990112,
      "learning_rate": 1.4665766410994064e-06,
      "loss": 2.2899,
      "step": 63481
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.090919017791748,
      "learning_rate": 1.4663619889639957e-06,
      "loss": 2.3391,
      "step": 63482
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0589568614959717,
      "learning_rate": 1.4661473512954194e-06,
      "loss": 2.2738,
      "step": 63483
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.9523952007293701,
      "learning_rate": 1.4659327280940405e-06,
      "loss": 2.2696,
      "step": 63484
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1143484115600586,
      "learning_rate": 1.465718119360221e-06,
      "loss": 2.2765,
      "step": 63485
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.054711103439331,
      "learning_rate": 1.4655035250943296e-06,
      "loss": 2.4351,
      "step": 63486
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0738986730575562,
      "learning_rate": 1.4652889452967256e-06,
      "loss": 2.3737,
      "step": 63487
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0529166460037231,
      "learning_rate": 1.465074379967777e-06,
      "loss": 2.2278,
      "step": 63488
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0545563697814941,
      "learning_rate": 1.464859829107843e-06,
      "loss": 2.4716,
      "step": 63489
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0796582698822021,
      "learning_rate": 1.4646452927172928e-06,
      "loss": 2.5394,
      "step": 63490
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.171297311782837,
      "learning_rate": 1.4644307707964834e-06,
      "loss": 2.4279,
      "step": 63491
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.057116985321045,
      "learning_rate": 1.4642162633457857e-06,
      "loss": 2.3945,
      "step": 63492
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1560887098312378,
      "learning_rate": 1.4640017703655563e-06,
      "loss": 2.322,
      "step": 63493
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.114608645439148,
      "learning_rate": 1.4637872918561646e-06,
      "loss": 2.3677,
      "step": 63494
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.202163577079773,
      "learning_rate": 1.4635728278179707e-06,
      "loss": 2.3667,
      "step": 63495
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.381210207939148,
      "learning_rate": 1.4633583782513427e-06,
      "loss": 2.1918,
      "step": 63496
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1277168989181519,
      "learning_rate": 1.4631439431566364e-06,
      "loss": 2.4073,
      "step": 63497
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.2277671098709106,
      "learning_rate": 1.4629295225342221e-06,
      "loss": 2.2117,
      "step": 63498
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1134710311889648,
      "learning_rate": 1.462715116384459e-06,
      "loss": 2.3212,
      "step": 63499
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.215942144393921,
      "learning_rate": 1.462500724707715e-06,
      "loss": 2.2943,
      "step": 63500
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0841064453125,
      "learning_rate": 1.462286347504348e-06,
      "loss": 2.2709,
      "step": 63501
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1368716955184937,
      "learning_rate": 1.4620719847747266e-06,
      "loss": 2.6302,
      "step": 63502
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1748299598693848,
      "learning_rate": 1.4618576365192128e-06,
      "loss": 2.2483,
      "step": 63503
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1623256206512451,
      "learning_rate": 1.461643302738166e-06,
      "loss": 2.5202,
      "step": 63504
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.9886810779571533,
      "learning_rate": 1.4614289834319562e-06,
      "loss": 2.1786,
      "step": 63505
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1536961793899536,
      "learning_rate": 1.4612146786009406e-06,
      "loss": 2.6485,
      "step": 63506
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1720936298370361,
      "learning_rate": 1.461000388245487e-06,
      "loss": 2.5506,
      "step": 63507
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0636450052261353,
      "learning_rate": 1.460786112365955e-06,
      "loss": 2.3069,
      "step": 63508
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.032410740852356,
      "learning_rate": 1.4605718509627144e-06,
      "loss": 2.2856,
      "step": 63509
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1269108057022095,
      "learning_rate": 1.460357604036119e-06,
      "loss": 2.3085,
      "step": 63510
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0041486024856567,
      "learning_rate": 1.46014337158654e-06,
      "loss": 2.3939,
      "step": 63511
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0940009355545044,
      "learning_rate": 1.4599291536143335e-06,
      "loss": 2.2409,
      "step": 63512
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.28071129322052,
      "learning_rate": 1.4597149501198693e-06,
      "loss": 2.4831,
      "step": 63513
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0478726625442505,
      "learning_rate": 1.4595007611035062e-06,
      "loss": 2.3906,
      "step": 63514
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1261885166168213,
      "learning_rate": 1.4592865865656113e-06,
      "loss": 2.2716,
      "step": 63515
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1237837076187134,
      "learning_rate": 1.4590724265065425e-06,
      "loss": 2.2514,
      "step": 63516
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.3356025218963623,
      "learning_rate": 1.4588582809266672e-06,
      "loss": 2.0648,
      "step": 63517
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1064425706863403,
      "learning_rate": 1.458644149826346e-06,
      "loss": 2.2177,
      "step": 63518
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.21113920211792,
      "learning_rate": 1.4584300332059443e-06,
      "loss": 2.4355,
      "step": 63519
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.067659854888916,
      "learning_rate": 1.4582159310658205e-06,
      "loss": 2.3876,
      "step": 63520
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.026902437210083,
      "learning_rate": 1.4580018434063436e-06,
      "loss": 2.5408,
      "step": 63521
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1625564098358154,
      "learning_rate": 1.4577877702278742e-06,
      "loss": 2.4246,
      "step": 63522
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.46884286403656,
      "learning_rate": 1.4575737115307742e-06,
      "loss": 2.209,
      "step": 63523
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0858581066131592,
      "learning_rate": 1.4573596673154034e-06,
      "loss": 2.4169,
      "step": 63524
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0996313095092773,
      "learning_rate": 1.4571456375821313e-06,
      "loss": 2.4083,
      "step": 63525
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.9717732667922974,
      "learning_rate": 1.4569316223313158e-06,
      "loss": 2.2098,
      "step": 63526
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.060738444328308,
      "learning_rate": 1.4567176215633227e-06,
      "loss": 2.2255,
      "step": 63527
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.112532138824463,
      "learning_rate": 1.4565036352785122e-06,
      "loss": 2.4369,
      "step": 63528
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1390984058380127,
      "learning_rate": 1.4562896634772506e-06,
      "loss": 2.2438,
      "step": 63529
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0452368259429932,
      "learning_rate": 1.4560757061598985e-06,
      "loss": 2.2844,
      "step": 63530
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.3742376565933228,
      "learning_rate": 1.4558617633268158e-06,
      "loss": 2.2374,
      "step": 63531
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.9177330136299133,
      "learning_rate": 1.4556478349783698e-06,
      "loss": 2.3349,
      "step": 63532
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1589889526367188,
      "learning_rate": 1.4554339211149203e-06,
      "loss": 2.4302,
      "step": 63533
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0677855014801025,
      "learning_rate": 1.4552200217368328e-06,
      "loss": 2.262,
      "step": 63534
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.2018492221832275,
      "learning_rate": 1.455006136844468e-06,
      "loss": 2.3742,
      "step": 63535
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0233280658721924,
      "learning_rate": 1.4547922664381853e-06,
      "loss": 2.2734,
      "step": 63536
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.2134186029434204,
      "learning_rate": 1.4545784105183548e-06,
      "loss": 2.3277,
      "step": 63537
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0457872152328491,
      "learning_rate": 1.4543645690853336e-06,
      "loss": 2.3706,
      "step": 63538
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.159862756729126,
      "learning_rate": 1.4541507421394828e-06,
      "loss": 2.2835,
      "step": 63539
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1744515895843506,
      "learning_rate": 1.4539369296811701e-06,
      "loss": 1.9876,
      "step": 63540
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.1488823890686035,
      "learning_rate": 1.4537231317107537e-06,
      "loss": 2.0009,
      "step": 63541
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0804773569107056,
      "learning_rate": 1.4535093482286e-06,
      "loss": 2.4558,
      "step": 63542
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.612641453742981,
      "learning_rate": 1.4532955792350655e-06,
      "loss": 2.1701,
      "step": 63543
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1178927421569824,
      "learning_rate": 1.45308182473052e-06,
      "loss": 2.3043,
      "step": 63544
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0187262296676636,
      "learning_rate": 1.45286808471532e-06,
      "loss": 2.2118,
      "step": 63545
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.2691384553909302,
      "learning_rate": 1.4526543591898313e-06,
      "loss": 2.4769,
      "step": 63546
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0196980237960815,
      "learning_rate": 1.452440648154413e-06,
      "loss": 2.4013,
      "step": 63547
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.021114706993103,
      "learning_rate": 1.4522269516094312e-06,
      "loss": 2.6639,
      "step": 63548
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0002763271331787,
      "learning_rate": 1.4520132695552468e-06,
      "loss": 2.3143,
      "step": 63549
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.133156657218933,
      "learning_rate": 1.451799601992221e-06,
      "loss": 2.1995,
      "step": 63550
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0406886339187622,
      "learning_rate": 1.451585948920714e-06,
      "loss": 2.1929,
      "step": 63551
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0865082740783691,
      "learning_rate": 1.4513723103410937e-06,
      "loss": 2.5145,
      "step": 63552
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.170932412147522,
      "learning_rate": 1.4511586862537165e-06,
      "loss": 2.4119,
      "step": 63553
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.030584454536438,
      "learning_rate": 1.4509450766589495e-06,
      "loss": 2.3192,
      "step": 63554
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1311737298965454,
      "learning_rate": 1.4507314815571494e-06,
      "loss": 2.1278,
      "step": 63555
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0641776323318481,
      "learning_rate": 1.4505179009486848e-06,
      "loss": 2.34,
      "step": 63556
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0556519031524658,
      "learning_rate": 1.450304334833912e-06,
      "loss": 2.2402,
      "step": 63557
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0606986284255981,
      "learning_rate": 1.4500907832131972e-06,
      "loss": 2.246,
      "step": 63558
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.2794291973114014,
      "learning_rate": 1.4498772460869004e-06,
      "loss": 2.2537,
      "step": 63559
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.091996192932129,
      "learning_rate": 1.4496637234553824e-06,
      "loss": 2.3188,
      "step": 63560
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.9706394076347351,
      "learning_rate": 1.4494502153190083e-06,
      "loss": 2.201,
      "step": 63561
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1749533414840698,
      "learning_rate": 1.449236721678139e-06,
      "loss": 2.2718,
      "step": 63562
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.955866277217865,
      "learning_rate": 1.449023242533133e-06,
      "loss": 2.2756,
      "step": 63563
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.091965913772583,
      "learning_rate": 1.4488097778843568e-06,
      "loss": 2.2591,
      "step": 63564
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.108322262763977,
      "learning_rate": 1.4485963277321714e-06,
      "loss": 2.2702,
      "step": 63565
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1546834707260132,
      "learning_rate": 1.448382892076935e-06,
      "loss": 2.3655,
      "step": 63566
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.049677848815918,
      "learning_rate": 1.4481694709190142e-06,
      "loss": 2.2742,
      "step": 63567
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0664786100387573,
      "learning_rate": 1.4479560642587675e-06,
      "loss": 2.3362,
      "step": 63568
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0487059354782104,
      "learning_rate": 1.4477426720965593e-06,
      "loss": 2.2859,
      "step": 63569
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0745307207107544,
      "learning_rate": 1.447529294432748e-06,
      "loss": 2.6012,
      "step": 63570
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0880217552185059,
      "learning_rate": 1.4473159312676999e-06,
      "loss": 2.3987,
      "step": 63571
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1347291469573975,
      "learning_rate": 1.4471025826017726e-06,
      "loss": 2.3022,
      "step": 63572
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.003609538078308,
      "learning_rate": 1.4468892484353303e-06,
      "loss": 2.2791,
      "step": 63573
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.07927405834198,
      "learning_rate": 1.4466759287687315e-06,
      "loss": 2.4261,
      "step": 63574
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1017740964889526,
      "learning_rate": 1.446462623602346e-06,
      "loss": 2.3397,
      "step": 63575
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.141623854637146,
      "learning_rate": 1.4462493329365235e-06,
      "loss": 2.3516,
      "step": 63576
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1112773418426514,
      "learning_rate": 1.4460360567716347e-06,
      "loss": 2.3472,
      "step": 63577
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0638388395309448,
      "learning_rate": 1.445822795108035e-06,
      "loss": 2.4691,
      "step": 63578
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1113641262054443,
      "learning_rate": 1.445609547946092e-06,
      "loss": 2.2845,
      "step": 63579
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.209317922592163,
      "learning_rate": 1.4453963152861606e-06,
      "loss": 2.3368,
      "step": 63580
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1056147813796997,
      "learning_rate": 1.4451830971286096e-06,
      "loss": 2.238,
      "step": 63581
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.9401054382324219,
      "learning_rate": 1.4449698934737933e-06,
      "loss": 2.1724,
      "step": 63582
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.091049075126648,
      "learning_rate": 1.44475670432208e-06,
      "loss": 2.1561,
      "step": 63583
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0623528957366943,
      "learning_rate": 1.4445435296738242e-06,
      "loss": 2.1542,
      "step": 63584
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1433310508728027,
      "learning_rate": 1.444330369529393e-06,
      "loss": 2.3743,
      "step": 63585
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0241116285324097,
      "learning_rate": 1.4441172238891466e-06,
      "loss": 2.1829,
      "step": 63586
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.2209522724151611,
      "learning_rate": 1.443904092753442e-06,
      "loss": 2.3404,
      "step": 63587
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.183524250984192,
      "learning_rate": 1.443690976122647e-06,
      "loss": 2.411,
      "step": 63588
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0645142793655396,
      "learning_rate": 1.4434778739971188e-06,
      "loss": 2.3236,
      "step": 63589
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0533862113952637,
      "learning_rate": 1.4432647863772176e-06,
      "loss": 2.3958,
      "step": 63590
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.119159460067749,
      "learning_rate": 1.4430517132633094e-06,
      "loss": 2.3629,
      "step": 63591
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.082564115524292,
      "learning_rate": 1.4428386546557494e-06,
      "loss": 2.2507,
      "step": 63592
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1398903131484985,
      "learning_rate": 1.442625610554904e-06,
      "loss": 2.1822,
      "step": 63593
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1168761253356934,
      "learning_rate": 1.4424125809611334e-06,
      "loss": 2.3421,
      "step": 63594
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0564143657684326,
      "learning_rate": 1.4421995658747944e-06,
      "loss": 2.2477,
      "step": 63595
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1056671142578125,
      "learning_rate": 1.441986565296254e-06,
      "loss": 2.3008,
      "step": 63596
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.184984803199768,
      "learning_rate": 1.441773579225869e-06,
      "loss": 2.2561,
      "step": 63597
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.009772539138794,
      "learning_rate": 1.4415606076640042e-06,
      "loss": 2.3496,
      "step": 63598
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0134320259094238,
      "learning_rate": 1.4413476506110158e-06,
      "loss": 2.3174,
      "step": 63599
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.046704649925232,
      "learning_rate": 1.44113470806727e-06,
      "loss": 2.3325,
      "step": 63600
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1325886249542236,
      "learning_rate": 1.4409217800331265e-06,
      "loss": 2.2144,
      "step": 63601
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.131568193435669,
      "learning_rate": 1.4407088665089442e-06,
      "loss": 2.1916,
      "step": 63602
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1039865016937256,
      "learning_rate": 1.4404959674950824e-06,
      "loss": 2.2113,
      "step": 63603
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0226598978042603,
      "learning_rate": 1.4402830829919078e-06,
      "loss": 2.2078,
      "step": 63604
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.127903938293457,
      "learning_rate": 1.4400702129997756e-06,
      "loss": 2.2106,
      "step": 63605
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.102936029434204,
      "learning_rate": 1.4398573575190512e-06,
      "loss": 2.4735,
      "step": 63606
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1561062335968018,
      "learning_rate": 1.4396445165500916e-06,
      "loss": 2.4932,
      "step": 63607
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.202911138534546,
      "learning_rate": 1.4394316900932625e-06,
      "loss": 2.438,
      "step": 63608
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0368568897247314,
      "learning_rate": 1.4392188781489181e-06,
      "loss": 2.4206,
      "step": 63609
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.061650276184082,
      "learning_rate": 1.4390060807174256e-06,
      "loss": 2.4097,
      "step": 63610
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0838420391082764,
      "learning_rate": 1.4387932977991403e-06,
      "loss": 2.365,
      "step": 63611
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.2862600088119507,
      "learning_rate": 1.4385805293944288e-06,
      "loss": 2.1717,
      "step": 63612
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.2372797727584839,
      "learning_rate": 1.4383677755036451e-06,
      "loss": 2.3308,
      "step": 63613
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0437672138214111,
      "learning_rate": 1.4381550361271568e-06,
      "loss": 2.2586,
      "step": 63614
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.085109829902649,
      "learning_rate": 1.43794231126532e-06,
      "loss": 2.4085,
      "step": 63615
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.00229012966156,
      "learning_rate": 1.4377296009184971e-06,
      "loss": 2.2768,
      "step": 63616
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.2570053339004517,
      "learning_rate": 1.4375169050870451e-06,
      "loss": 2.3653,
      "step": 63617
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.2096831798553467,
      "learning_rate": 1.4373042237713298e-06,
      "loss": 2.2762,
      "step": 63618
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0577025413513184,
      "learning_rate": 1.4370915569717082e-06,
      "loss": 2.0137,
      "step": 63619
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.166488528251648,
      "learning_rate": 1.4368789046885434e-06,
      "loss": 2.4766,
      "step": 63620
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0858114957809448,
      "learning_rate": 1.4366662669221942e-06,
      "loss": 2.5208,
      "step": 63621
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1591386795043945,
      "learning_rate": 1.4364536436730204e-06,
      "loss": 2.3001,
      "step": 63622
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0423108339309692,
      "learning_rate": 1.4362410349413847e-06,
      "loss": 2.2207,
      "step": 63623
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.00437331199646,
      "learning_rate": 1.4360284407276449e-06,
      "loss": 2.2623,
      "step": 63624
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1958270072937012,
      "learning_rate": 1.435815861032165e-06,
      "loss": 2.3676,
      "step": 63625
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.007596731185913,
      "learning_rate": 1.4356032958553e-06,
      "loss": 2.2217,
      "step": 63626
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0803345441818237,
      "learning_rate": 1.435390745197417e-06,
      "loss": 2.4292,
      "step": 63627
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.3352457284927368,
      "learning_rate": 1.4351782090588716e-06,
      "loss": 2.2568,
      "step": 63628
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.284103512763977,
      "learning_rate": 1.4349656874400264e-06,
      "loss": 2.1808,
      "step": 63629
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1033127307891846,
      "learning_rate": 1.4347531803412374e-06,
      "loss": 2.2312,
      "step": 63630
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.007415771484375,
      "learning_rate": 1.434540687762871e-06,
      "loss": 2.3384,
      "step": 63631
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.068981409072876,
      "learning_rate": 1.4343282097052813e-06,
      "loss": 2.2033,
      "step": 63632
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0549501180648804,
      "learning_rate": 1.434115746168835e-06,
      "loss": 2.1051,
      "step": 63633
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.029099702835083,
      "learning_rate": 1.433903297153887e-06,
      "loss": 2.2869,
      "step": 63634
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.9705812335014343,
      "learning_rate": 1.4336908626608003e-06,
      "loss": 2.0663,
      "step": 63635
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0313466787338257,
      "learning_rate": 1.433478442689933e-06,
      "loss": 2.2532,
      "step": 63636
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1158684492111206,
      "learning_rate": 1.4332660372416484e-06,
      "loss": 2.3457,
      "step": 63637
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0906232595443726,
      "learning_rate": 1.4330536463163015e-06,
      "loss": 2.5706,
      "step": 63638
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0988725423812866,
      "learning_rate": 1.4328412699142579e-06,
      "loss": 2.1691,
      "step": 63639
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.9975023865699768,
      "learning_rate": 1.4326289080358734e-06,
      "loss": 2.5071,
      "step": 63640
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1778579950332642,
      "learning_rate": 1.4324165606815144e-06,
      "loss": 2.2622,
      "step": 63641
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1706156730651855,
      "learning_rate": 1.4322042278515324e-06,
      "loss": 2.1211,
      "step": 63642
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.086300015449524,
      "learning_rate": 1.431991909546292e-06,
      "loss": 2.5585,
      "step": 63643
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1209678649902344,
      "learning_rate": 1.4317796057661504e-06,
      "loss": 2.3187,
      "step": 63644
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.052167296409607,
      "learning_rate": 1.431567316511473e-06,
      "loss": 2.4686,
      "step": 63645
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0722136497497559,
      "learning_rate": 1.4313550417826127e-06,
      "loss": 2.4074,
      "step": 63646
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1167651414871216,
      "learning_rate": 1.431142781579935e-06,
      "loss": 2.3395,
      "step": 63647
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0589452981948853,
      "learning_rate": 1.4309305359037984e-06,
      "loss": 2.245,
      "step": 63648
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.062805414199829,
      "learning_rate": 1.4307183047545582e-06,
      "loss": 2.4954,
      "step": 63649
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.9838494062423706,
      "learning_rate": 1.4305060881325816e-06,
      "loss": 2.261,
      "step": 63650
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.084108591079712,
      "learning_rate": 1.430293886038222e-06,
      "loss": 2.3887,
      "step": 63651
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0782707929611206,
      "learning_rate": 1.4300816984718435e-06,
      "loss": 2.1618,
      "step": 63652
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.110519528388977,
      "learning_rate": 1.4298695254338024e-06,
      "loss": 2.4593,
      "step": 63653
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.268882155418396,
      "learning_rate": 1.4296573669244617e-06,
      "loss": 2.4281,
      "step": 63654
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0056570768356323,
      "learning_rate": 1.4294452229441802e-06,
      "loss": 2.2197,
      "step": 63655
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0941423177719116,
      "learning_rate": 1.4292330934933163e-06,
      "loss": 2.3294,
      "step": 63656
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.11616051197052,
      "learning_rate": 1.4290209785722276e-06,
      "loss": 2.1886,
      "step": 63657
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1110949516296387,
      "learning_rate": 1.4288088781812782e-06,
      "loss": 2.524,
      "step": 63658
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0898563861846924,
      "learning_rate": 1.4285967923208243e-06,
      "loss": 2.2211,
      "step": 63659
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.497542381286621,
      "learning_rate": 1.4283847209912282e-06,
      "loss": 2.155,
      "step": 63660
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0836238861083984,
      "learning_rate": 1.4281726641928462e-06,
      "loss": 2.4447,
      "step": 63661
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1271718740463257,
      "learning_rate": 1.4279606219260422e-06,
      "loss": 2.3059,
      "step": 63662
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.804627537727356,
      "learning_rate": 1.4277485941911707e-06,
      "loss": 2.2258,
      "step": 63663
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1101750135421753,
      "learning_rate": 1.4275365809885955e-06,
      "loss": 2.4598,
      "step": 63664
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1824699640274048,
      "learning_rate": 1.4273245823186721e-06,
      "loss": 2.37,
      "step": 63665
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1409908533096313,
      "learning_rate": 1.4271125981817636e-06,
      "loss": 2.2595,
      "step": 63666
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0501658916473389,
      "learning_rate": 1.4269006285782283e-06,
      "loss": 2.202,
      "step": 63667
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.192427158355713,
      "learning_rate": 1.426688673508424e-06,
      "loss": 1.9901,
      "step": 63668
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.270799160003662,
      "learning_rate": 1.4264767329727092e-06,
      "loss": 2.3306,
      "step": 63669
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0776880979537964,
      "learning_rate": 1.4262648069714469e-06,
      "loss": 2.3199,
      "step": 63670
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1180146932601929,
      "learning_rate": 1.4260528955049925e-06,
      "loss": 2.2761,
      "step": 63671
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0638266801834106,
      "learning_rate": 1.4258409985737088e-06,
      "loss": 2.2727,
      "step": 63672
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0435203313827515,
      "learning_rate": 1.4256291161779512e-06,
      "loss": 2.1186,
      "step": 63673
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1683943271636963,
      "learning_rate": 1.425417248318084e-06,
      "loss": 2.4367,
      "step": 63674
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.205105185508728,
      "learning_rate": 1.4252053949944599e-06,
      "loss": 2.3606,
      "step": 63675
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0739214420318604,
      "learning_rate": 1.4249935562074445e-06,
      "loss": 2.5058,
      "step": 63676
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0874651670455933,
      "learning_rate": 1.4247817319573942e-06,
      "loss": 2.4753,
      "step": 63677
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0059230327606201,
      "learning_rate": 1.424569922244665e-06,
      "loss": 2.2932,
      "step": 63678
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0673538446426392,
      "learning_rate": 1.4243581270696216e-06,
      "loss": 2.1975,
      "step": 63679
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0627769231796265,
      "learning_rate": 1.42414634643262e-06,
      "loss": 2.3763,
      "step": 63680
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1777358055114746,
      "learning_rate": 1.4239345803340176e-06,
      "loss": 2.4092,
      "step": 63681
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1523098945617676,
      "learning_rate": 1.4237228287741778e-06,
      "loss": 2.248,
      "step": 63682
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0668435096740723,
      "learning_rate": 1.4235110917534556e-06,
      "loss": 2.3247,
      "step": 63683
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.063511610031128,
      "learning_rate": 1.4232993692722107e-06,
      "loss": 2.373,
      "step": 63684
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0235483646392822,
      "learning_rate": 1.4230876613308043e-06,
      "loss": 2.2328,
      "step": 63685
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0241646766662598,
      "learning_rate": 1.4228759679295923e-06,
      "loss": 2.337,
      "step": 63686
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0617215633392334,
      "learning_rate": 1.4226642890689357e-06,
      "loss": 2.3291,
      "step": 63687
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1004323959350586,
      "learning_rate": 1.4224526247491922e-06,
      "loss": 2.5844,
      "step": 63688
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0417895317077637,
      "learning_rate": 1.4222409749707223e-06,
      "loss": 2.372,
      "step": 63689
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1271121501922607,
      "learning_rate": 1.4220293397338813e-06,
      "loss": 2.362,
      "step": 63690
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1985105276107788,
      "learning_rate": 1.4218177190390337e-06,
      "loss": 2.4076,
      "step": 63691
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.069692611694336,
      "learning_rate": 1.4216061128865322e-06,
      "loss": 2.3611,
      "step": 63692
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1852442026138306,
      "learning_rate": 1.4213945212767398e-06,
      "loss": 2.4585,
      "step": 63693
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0863614082336426,
      "learning_rate": 1.4211829442100144e-06,
      "loss": 2.5725,
      "step": 63694
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1781052350997925,
      "learning_rate": 1.4209713816867132e-06,
      "loss": 2.394,
      "step": 63695
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1986702680587769,
      "learning_rate": 1.4207598337071936e-06,
      "loss": 2.1809,
      "step": 63696
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0712451934814453,
      "learning_rate": 1.4205483002718178e-06,
      "loss": 2.224,
      "step": 63697
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.036656141281128,
      "learning_rate": 1.420336781380941e-06,
      "loss": 2.0033,
      "step": 63698
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0621483325958252,
      "learning_rate": 1.420125277034926e-06,
      "loss": 2.3255,
      "step": 63699
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1706088781356812,
      "learning_rate": 1.4199137872341274e-06,
      "loss": 2.4278,
      "step": 63700
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0323129892349243,
      "learning_rate": 1.4197023119789066e-06,
      "loss": 2.6299,
      "step": 63701
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.112945556640625,
      "learning_rate": 1.419490851269618e-06,
      "loss": 2.3448,
      "step": 63702
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0765560865402222,
      "learning_rate": 1.419279405106626e-06,
      "loss": 2.2763,
      "step": 63703
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1187052726745605,
      "learning_rate": 1.4190679734902856e-06,
      "loss": 2.4367,
      "step": 63704
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0723992586135864,
      "learning_rate": 1.4188565564209534e-06,
      "loss": 2.0756,
      "step": 63705
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.119746208190918,
      "learning_rate": 1.4186451538989921e-06,
      "loss": 2.2661,
      "step": 63706
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.032954216003418,
      "learning_rate": 1.4184337659247571e-06,
      "loss": 2.4599,
      "step": 63707
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.020239233970642,
      "learning_rate": 1.4182223924986072e-06,
      "loss": 2.2082,
      "step": 63708
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.9744973182678223,
      "learning_rate": 1.418011033620902e-06,
      "loss": 2.2142,
      "step": 63709
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0079594850540161,
      "learning_rate": 1.4177996892919964e-06,
      "loss": 2.3397,
      "step": 63710
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1882996559143066,
      "learning_rate": 1.4175883595122542e-06,
      "loss": 2.4767,
      "step": 63711
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0703933238983154,
      "learning_rate": 1.4173770442820312e-06,
      "loss": 2.2181,
      "step": 63712
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.221352458000183,
      "learning_rate": 1.4171657436016817e-06,
      "loss": 2.3533,
      "step": 63713
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1115959882736206,
      "learning_rate": 1.41695445747157e-06,
      "loss": 2.1525,
      "step": 63714
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0470868349075317,
      "learning_rate": 1.41674318589205e-06,
      "loss": 2.3267,
      "step": 63715
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1626285314559937,
      "learning_rate": 1.4165319288634826e-06,
      "loss": 2.1456,
      "step": 63716
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.195373296737671,
      "learning_rate": 1.4163206863862233e-06,
      "loss": 2.2796,
      "step": 63717
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0955969095230103,
      "learning_rate": 1.4161094584606338e-06,
      "loss": 2.6426,
      "step": 63718
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0341176986694336,
      "learning_rate": 1.4158982450870684e-06,
      "loss": 2.1531,
      "step": 63719
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0915560722351074,
      "learning_rate": 1.41568704626589e-06,
      "loss": 2.3472,
      "step": 63720
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.086078405380249,
      "learning_rate": 1.4154758619974506e-06,
      "loss": 2.1106,
      "step": 63721
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.074289083480835,
      "learning_rate": 1.4152646922821122e-06,
      "loss": 2.3133,
      "step": 63722
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1001209020614624,
      "learning_rate": 1.4150535371202301e-06,
      "loss": 2.1765,
      "step": 63723
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.163496732711792,
      "learning_rate": 1.4148423965121661e-06,
      "loss": 2.6954,
      "step": 63724
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0138932466506958,
      "learning_rate": 1.414631270458273e-06,
      "loss": 2.2879,
      "step": 63725
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.049188494682312,
      "learning_rate": 1.4144201589589156e-06,
      "loss": 2.3222,
      "step": 63726
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0507673025131226,
      "learning_rate": 1.4142090620144444e-06,
      "loss": 2.3117,
      "step": 63727
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1120820045471191,
      "learning_rate": 1.4139979796252222e-06,
      "loss": 2.4621,
      "step": 63728
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.2571511268615723,
      "learning_rate": 1.4137869117916048e-06,
      "loss": 2.5418,
      "step": 63729
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.107249140739441,
      "learning_rate": 1.4135758585139514e-06,
      "loss": 2.1675,
      "step": 63730
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0733000040054321,
      "learning_rate": 1.4133648197926175e-06,
      "loss": 2.2512,
      "step": 63731
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0984468460083008,
      "learning_rate": 1.413153795627964e-06,
      "loss": 2.4395,
      "step": 63732
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.189538836479187,
      "learning_rate": 1.4129427860203481e-06,
      "loss": 2.2794,
      "step": 63733
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.9905002117156982,
      "learning_rate": 1.4127317909701254e-06,
      "loss": 2.2831,
      "step": 63734
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0076804161071777,
      "learning_rate": 1.4125208104776522e-06,
      "loss": 2.1121,
      "step": 63735
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1327029466629028,
      "learning_rate": 1.4123098445432915e-06,
      "loss": 2.1758,
      "step": 63736
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0481593608856201,
      "learning_rate": 1.4120988931673952e-06,
      "loss": 2.4802,
      "step": 63737
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.9747332334518433,
      "learning_rate": 1.4118879563503262e-06,
      "loss": 2.3508,
      "step": 63738
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0808898210525513,
      "learning_rate": 1.4116770340924402e-06,
      "loss": 2.2897,
      "step": 63739
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0385539531707764,
      "learning_rate": 1.411466126394091e-06,
      "loss": 2.316,
      "step": 63740
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0245733261108398,
      "learning_rate": 1.4112552332556418e-06,
      "loss": 2.1959,
      "step": 63741
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.7701098918914795,
      "learning_rate": 1.4110443546774454e-06,
      "loss": 2.3046,
      "step": 63742
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1586577892303467,
      "learning_rate": 1.4108334906598641e-06,
      "loss": 2.4132,
      "step": 63743
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.9777294993400574,
      "learning_rate": 1.4106226412032508e-06,
      "loss": 2.0649,
      "step": 63744
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0077472925186157,
      "learning_rate": 1.4104118063079663e-06,
      "loss": 2.2645,
      "step": 63745
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1109051704406738,
      "learning_rate": 1.4102009859743671e-06,
      "loss": 2.0029,
      "step": 63746
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0045323371887207,
      "learning_rate": 1.4099901802028104e-06,
      "loss": 2.157,
      "step": 63747
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1056668758392334,
      "learning_rate": 1.4097793889936506e-06,
      "loss": 2.2189,
      "step": 63748
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.3115812540054321,
      "learning_rate": 1.4095686123472508e-06,
      "loss": 2.3037,
      "step": 63749
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1031992435455322,
      "learning_rate": 1.4093578502639626e-06,
      "loss": 2.3043,
      "step": 63750
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.9749950170516968,
      "learning_rate": 1.4091471027441483e-06,
      "loss": 2.2997,
      "step": 63751
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0216448307037354,
      "learning_rate": 1.4089363697881608e-06,
      "loss": 2.2175,
      "step": 63752
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.9470374584197998,
      "learning_rate": 1.408725651396362e-06,
      "loss": 2.2261,
      "step": 63753
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.218578815460205,
      "learning_rate": 1.408514947569104e-06,
      "loss": 2.1172,
      "step": 63754
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1116076707839966,
      "learning_rate": 1.4083042583067484e-06,
      "loss": 2.2819,
      "step": 63755
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1236035823822021,
      "learning_rate": 1.4080935836096488e-06,
      "loss": 2.3829,
      "step": 63756
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1718655824661255,
      "learning_rate": 1.4078829234781665e-06,
      "loss": 2.3247,
      "step": 63757
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0907306671142578,
      "learning_rate": 1.4076722779126539e-06,
      "loss": 2.3227,
      "step": 63758
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1315131187438965,
      "learning_rate": 1.4074616469134728e-06,
      "loss": 2.2258,
      "step": 63759
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.095604419708252,
      "learning_rate": 1.4072510304809772e-06,
      "loss": 2.3362,
      "step": 63760
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0711337327957153,
      "learning_rate": 1.4070404286155259e-06,
      "loss": 2.337,
      "step": 63761
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1530050039291382,
      "learning_rate": 1.406829841317472e-06,
      "loss": 2.206,
      "step": 63762
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.012460470199585,
      "learning_rate": 1.406619268587177e-06,
      "loss": 2.4328,
      "step": 63763
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0494178533554077,
      "learning_rate": 1.4064087104249956e-06,
      "loss": 2.3512,
      "step": 63764
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1246284246444702,
      "learning_rate": 1.4061981668312863e-06,
      "loss": 2.352,
      "step": 63765
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.2566121816635132,
      "learning_rate": 1.4059876378064064e-06,
      "loss": 2.3572,
      "step": 63766
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0715069770812988,
      "learning_rate": 1.4057771233507077e-06,
      "loss": 2.1522,
      "step": 63767
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1636953353881836,
      "learning_rate": 1.4055666234645549e-06,
      "loss": 2.3097,
      "step": 63768
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0389522314071655,
      "learning_rate": 1.405356138148297e-06,
      "loss": 2.4518,
      "step": 63769
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.9772327542304993,
      "learning_rate": 1.405145667402298e-06,
      "loss": 2.2412,
      "step": 63770
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.015090823173523,
      "learning_rate": 1.4049352112269087e-06,
      "loss": 2.3083,
      "step": 63771
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0106698274612427,
      "learning_rate": 1.404724769622491e-06,
      "loss": 2.1096,
      "step": 63772
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.184755802154541,
      "learning_rate": 1.4045143425893992e-06,
      "loss": 2.3847,
      "step": 63773
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.2389883995056152,
      "learning_rate": 1.4043039301279904e-06,
      "loss": 2.3226,
      "step": 63774
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1241652965545654,
      "learning_rate": 1.4040935322386184e-06,
      "loss": 2.2924,
      "step": 63775
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0560084581375122,
      "learning_rate": 1.4038831489216454e-06,
      "loss": 2.2914,
      "step": 63776
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.174283742904663,
      "learning_rate": 1.403672780177422e-06,
      "loss": 2.2055,
      "step": 63777
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.165421724319458,
      "learning_rate": 1.4034624260063113e-06,
      "loss": 2.029,
      "step": 63778
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.2082760334014893,
      "learning_rate": 1.403252086408663e-06,
      "loss": 2.2682,
      "step": 63779
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1134709119796753,
      "learning_rate": 1.4030417613848402e-06,
      "loss": 2.186,
      "step": 63780
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1293243169784546,
      "learning_rate": 1.4028314509351948e-06,
      "loss": 2.1711,
      "step": 63781
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.2732126712799072,
      "learning_rate": 1.4026211550600865e-06,
      "loss": 2.324,
      "step": 63782
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.049822449684143,
      "learning_rate": 1.4024108737598685e-06,
      "loss": 2.3155,
      "step": 63783
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.9927781820297241,
      "learning_rate": 1.4022006070349014e-06,
      "loss": 2.3065,
      "step": 63784
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1164283752441406,
      "learning_rate": 1.4019903548855363e-06,
      "loss": 2.3251,
      "step": 63785
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1699680089950562,
      "learning_rate": 1.401780117312138e-06,
      "loss": 2.2982,
      "step": 63786
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.10764479637146,
      "learning_rate": 1.4015698943150536e-06,
      "loss": 2.2024,
      "step": 63787
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0714060068130493,
      "learning_rate": 1.4013596858946455e-06,
      "loss": 2.1625,
      "step": 63788
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1318824291229248,
      "learning_rate": 1.4011494920512647e-06,
      "loss": 2.3818,
      "step": 63789
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.200416088104248,
      "learning_rate": 1.4009393127852744e-06,
      "loss": 2.3504,
      "step": 63790
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0504274368286133,
      "learning_rate": 1.4007291480970243e-06,
      "loss": 2.4579,
      "step": 63791
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.3944255113601685,
      "learning_rate": 1.4005189979868772e-06,
      "loss": 2.21,
      "step": 63792
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0609657764434814,
      "learning_rate": 1.4003088624551831e-06,
      "loss": 2.2782,
      "step": 63793
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.168984293937683,
      "learning_rate": 1.4000987415023038e-06,
      "loss": 2.4754,
      "step": 63794
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0905871391296387,
      "learning_rate": 1.3998886351285922e-06,
      "loss": 2.2613,
      "step": 63795
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.9562236666679382,
      "learning_rate": 1.3996785433344028e-06,
      "loss": 2.2059,
      "step": 63796
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0079305171966553,
      "learning_rate": 1.3994684661200964e-06,
      "loss": 2.4009,
      "step": 63797
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.4796687364578247,
      "learning_rate": 1.3992584034860257e-06,
      "loss": 2.1613,
      "step": 63798
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.2264653444290161,
      "learning_rate": 1.3990483554325484e-06,
      "loss": 2.2756,
      "step": 63799
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1161940097808838,
      "learning_rate": 1.398838321960021e-06,
      "loss": 2.1395,
      "step": 63800
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.2648634910583496,
      "learning_rate": 1.3986283030687997e-06,
      "loss": 2.4347,
      "step": 63801
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.064961552619934,
      "learning_rate": 1.3984182987592354e-06,
      "loss": 2.3149,
      "step": 63802
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0315831899642944,
      "learning_rate": 1.3982083090316924e-06,
      "loss": 2.5339,
      "step": 63803
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0350993871688843,
      "learning_rate": 1.3979983338865189e-06,
      "loss": 2.2548,
      "step": 63804
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1605031490325928,
      "learning_rate": 1.3977883733240772e-06,
      "loss": 2.569,
      "step": 63805
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0776300430297852,
      "learning_rate": 1.397578427344718e-06,
      "loss": 2.3664,
      "step": 63806
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.067632794380188,
      "learning_rate": 1.3973684959488022e-06,
      "loss": 2.4751,
      "step": 63807
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.9855630993843079,
      "learning_rate": 1.3971585791366816e-06,
      "loss": 1.9885,
      "step": 63808
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0185571908950806,
      "learning_rate": 1.3969486769087148e-06,
      "loss": 2.4859,
      "step": 63809
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1822597980499268,
      "learning_rate": 1.396738789265255e-06,
      "loss": 2.3745,
      "step": 63810
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.069048285484314,
      "learning_rate": 1.396528916206662e-06,
      "loss": 2.0225,
      "step": 63811
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.2148772478103638,
      "learning_rate": 1.3963190577332886e-06,
      "loss": 2.3631,
      "step": 63812
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.074022889137268,
      "learning_rate": 1.3961092138454913e-06,
      "loss": 2.5674,
      "step": 63813
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1650117635726929,
      "learning_rate": 1.3958993845436231e-06,
      "loss": 2.3411,
      "step": 63814
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0929605960845947,
      "learning_rate": 1.3956895698280448e-06,
      "loss": 2.3334,
      "step": 63815
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0863577127456665,
      "learning_rate": 1.3954797696991073e-06,
      "loss": 2.4311,
      "step": 63816
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.9937569499015808,
      "learning_rate": 1.3952699841571716e-06,
      "loss": 2.347,
      "step": 63817
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.168853998184204,
      "learning_rate": 1.395060213202587e-06,
      "loss": 2.3567,
      "step": 63818
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.105678915977478,
      "learning_rate": 1.3948504568357158e-06,
      "loss": 2.4418,
      "step": 63819
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.9837848544120789,
      "learning_rate": 1.3946407150569064e-06,
      "loss": 2.4168,
      "step": 63820
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1002402305603027,
      "learning_rate": 1.394430987866522e-06,
      "loss": 2.0949,
      "step": 63821
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0299739837646484,
      "learning_rate": 1.394221275264913e-06,
      "loss": 2.3612,
      "step": 63822
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.08332097530365,
      "learning_rate": 1.3940115772524343e-06,
      "loss": 2.2383,
      "step": 63823
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0941808223724365,
      "learning_rate": 1.393801893829446e-06,
      "loss": 2.3187,
      "step": 63824
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1186195611953735,
      "learning_rate": 1.3935922249963007e-06,
      "loss": 2.0569,
      "step": 63825
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.973850667476654,
      "learning_rate": 1.3933825707533521e-06,
      "loss": 2.1439,
      "step": 63826
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0482553243637085,
      "learning_rate": 1.393172931100959e-06,
      "loss": 2.2682,
      "step": 63827
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.32835054397583,
      "learning_rate": 1.3929633060394742e-06,
      "loss": 2.537,
      "step": 63828
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0034675598144531,
      "learning_rate": 1.3927536955692556e-06,
      "loss": 2.3122,
      "step": 63829
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.3138794898986816,
      "learning_rate": 1.3925440996906582e-06,
      "loss": 2.3766,
      "step": 63830
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0982741117477417,
      "learning_rate": 1.3923345184040327e-06,
      "loss": 2.4548,
      "step": 63831
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1671855449676514,
      "learning_rate": 1.3921249517097414e-06,
      "loss": 2.4105,
      "step": 63832
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.104079246520996,
      "learning_rate": 1.3919153996081337e-06,
      "loss": 2.4003,
      "step": 63833
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.2112022638320923,
      "learning_rate": 1.3917058620995683e-06,
      "loss": 2.4083,
      "step": 63834
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1115044355392456,
      "learning_rate": 1.3914963391843983e-06,
      "loss": 2.3149,
      "step": 63835
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.2151503562927246,
      "learning_rate": 1.3912868308629824e-06,
      "loss": 2.1492,
      "step": 63836
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0777256488800049,
      "learning_rate": 1.3910773371356711e-06,
      "loss": 2.2721,
      "step": 63837
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0908104181289673,
      "learning_rate": 1.3908678580028233e-06,
      "loss": 2.4906,
      "step": 63838
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1060993671417236,
      "learning_rate": 1.3906583934647943e-06,
      "loss": 2.4242,
      "step": 63839
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0495874881744385,
      "learning_rate": 1.390448943521936e-06,
      "loss": 2.1831,
      "step": 63840
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.2038705348968506,
      "learning_rate": 1.3902395081746034e-06,
      "loss": 2.3351,
      "step": 63841
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0663203001022339,
      "learning_rate": 1.3900300874231565e-06,
      "loss": 2.0748,
      "step": 63842
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0527902841567993,
      "learning_rate": 1.3898206812679438e-06,
      "loss": 2.5053,
      "step": 63843
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.02069890499115,
      "learning_rate": 1.389611289709325e-06,
      "loss": 2.1984,
      "step": 63844
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0867077112197876,
      "learning_rate": 1.3894019127476522e-06,
      "loss": 2.2928,
      "step": 63845
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.119598627090454,
      "learning_rate": 1.3891925503832848e-06,
      "loss": 2.4679,
      "step": 63846
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1212390661239624,
      "learning_rate": 1.3889832026165705e-06,
      "loss": 2.2523,
      "step": 63847
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0475149154663086,
      "learning_rate": 1.3887738694478726e-06,
      "loss": 2.32,
      "step": 63848
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1730228662490845,
      "learning_rate": 1.388564550877538e-06,
      "loss": 2.3288,
      "step": 63849
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.2680941820144653,
      "learning_rate": 1.3883552469059293e-06,
      "loss": 2.0437,
      "step": 63850
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0895458459854126,
      "learning_rate": 1.3881459575333955e-06,
      "loss": 2.2066,
      "step": 63851
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1536996364593506,
      "learning_rate": 1.3879366827602946e-06,
      "loss": 2.4203,
      "step": 63852
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.167096495628357,
      "learning_rate": 1.3877274225869775e-06,
      "loss": 2.1155,
      "step": 63853
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.165431261062622,
      "learning_rate": 1.3875181770138024e-06,
      "loss": 2.2342,
      "step": 63854
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0942065715789795,
      "learning_rate": 1.3873089460411227e-06,
      "loss": 2.2312,
      "step": 63855
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.124825358390808,
      "learning_rate": 1.3870997296692945e-06,
      "loss": 2.3813,
      "step": 63856
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.999243438243866,
      "learning_rate": 1.3868905278986722e-06,
      "loss": 2.2423,
      "step": 63857
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.115754246711731,
      "learning_rate": 1.3866813407296076e-06,
      "loss": 2.4695,
      "step": 63858
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1375465393066406,
      "learning_rate": 1.3864721681624593e-06,
      "loss": 2.3095,
      "step": 63859
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1583009958267212,
      "learning_rate": 1.386263010197577e-06,
      "loss": 2.4994,
      "step": 63860
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1247013807296753,
      "learning_rate": 1.3860538668353218e-06,
      "loss": 2.3828,
      "step": 63861
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0150222778320312,
      "learning_rate": 1.3858447380760432e-06,
      "loss": 2.2694,
      "step": 63862
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0426603555679321,
      "learning_rate": 1.3856356239200985e-06,
      "loss": 2.1872,
      "step": 63863
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.127464771270752,
      "learning_rate": 1.3854265243678389e-06,
      "loss": 2.3973,
      "step": 63864
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0800453424453735,
      "learning_rate": 1.385217439419625e-06,
      "loss": 2.5166,
      "step": 63865
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.061622142791748,
      "learning_rate": 1.3850083690758031e-06,
      "loss": 2.5687,
      "step": 63866
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.11720609664917,
      "learning_rate": 1.3847993133367354e-06,
      "loss": 2.2836,
      "step": 63867
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.9907764792442322,
      "learning_rate": 1.3845902722027692e-06,
      "loss": 2.1741,
      "step": 63868
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.2739477157592773,
      "learning_rate": 1.3843812456742646e-06,
      "loss": 2.3266,
      "step": 63869
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0209929943084717,
      "learning_rate": 1.3841722337515717e-06,
      "loss": 2.4083,
      "step": 63870
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0577126741409302,
      "learning_rate": 1.3839632364350497e-06,
      "loss": 2.2254,
      "step": 63871
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1846688985824585,
      "learning_rate": 1.383754253725047e-06,
      "loss": 2.2763,
      "step": 63872
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.066890835762024,
      "learning_rate": 1.3835452856219234e-06,
      "loss": 2.3501,
      "step": 63873
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0144025087356567,
      "learning_rate": 1.3833363321260284e-06,
      "loss": 2.089,
      "step": 63874
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0870784521102905,
      "learning_rate": 1.383127393237722e-06,
      "loss": 2.3442,
      "step": 63875
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0706223249435425,
      "learning_rate": 1.3829184689573516e-06,
      "loss": 2.4731,
      "step": 63876
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.3872572183609009,
      "learning_rate": 1.3827095592852768e-06,
      "loss": 2.1984,
      "step": 63877
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1809160709381104,
      "learning_rate": 1.3825006642218508e-06,
      "loss": 2.3837,
      "step": 63878
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.2805018424987793,
      "learning_rate": 1.3822917837674254e-06,
      "loss": 2.2833,
      "step": 63879
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.040871500968933,
      "learning_rate": 1.382082917922355e-06,
      "loss": 2.2807,
      "step": 63880
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1020725965499878,
      "learning_rate": 1.3818740666869956e-06,
      "loss": 2.3724,
      "step": 63881
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0256644487380981,
      "learning_rate": 1.3816652300616995e-06,
      "loss": 2.2698,
      "step": 63882
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1925350427627563,
      "learning_rate": 1.381456408046823e-06,
      "loss": 2.3756,
      "step": 63883
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1895018815994263,
      "learning_rate": 1.3812476006427179e-06,
      "loss": 2.2648,
      "step": 63884
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1682847738265991,
      "learning_rate": 1.3810388078497372e-06,
      "loss": 2.2644,
      "step": 63885
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.2206391096115112,
      "learning_rate": 1.3808300296682397e-06,
      "loss": 2.3602,
      "step": 63886
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0940345525741577,
      "learning_rate": 1.3806212660985728e-06,
      "loss": 2.5422,
      "step": 63887
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1775095462799072,
      "learning_rate": 1.3804125171410976e-06,
      "loss": 2.377,
      "step": 63888
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.081819772720337,
      "learning_rate": 1.38020378279616e-06,
      "loss": 1.9564,
      "step": 63889
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0909346342086792,
      "learning_rate": 1.3799950630641224e-06,
      "loss": 2.3529,
      "step": 63890
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.992443323135376,
      "learning_rate": 1.379786357945333e-06,
      "loss": 2.3264,
      "step": 63891
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.9822161793708801,
      "learning_rate": 1.3795776674401474e-06,
      "loss": 2.369,
      "step": 63892
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0704779624938965,
      "learning_rate": 1.379368991548916e-06,
      "loss": 2.1988,
      "step": 63893
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1334941387176514,
      "learning_rate": 1.3791603302719981e-06,
      "loss": 2.4366,
      "step": 63894
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1157069206237793,
      "learning_rate": 1.3789516836097428e-06,
      "loss": 2.2813,
      "step": 63895
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1485439538955688,
      "learning_rate": 1.3787430515625089e-06,
      "loss": 2.4442,
      "step": 63896
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0146127939224243,
      "learning_rate": 1.378534434130644e-06,
      "loss": 2.2843,
      "step": 63897
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.2242538928985596,
      "learning_rate": 1.3783258313145065e-06,
      "loss": 2.2408,
      "step": 63898
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0225422382354736,
      "learning_rate": 1.3781172431144462e-06,
      "loss": 2.2175,
      "step": 63899
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.208333134651184,
      "learning_rate": 1.377908669530822e-06,
      "loss": 2.4826,
      "step": 63900
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1288022994995117,
      "learning_rate": 1.377700110563981e-06,
      "loss": 2.3935,
      "step": 63901
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0262457132339478,
      "learning_rate": 1.3774915662142829e-06,
      "loss": 2.331,
      "step": 63902
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.152496337890625,
      "learning_rate": 1.3772830364820765e-06,
      "loss": 2.416,
      "step": 63903
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1538223028182983,
      "learning_rate": 1.3770745213677194e-06,
      "loss": 2.2376,
      "step": 63904
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0827980041503906,
      "learning_rate": 1.3768660208715623e-06,
      "loss": 2.3917,
      "step": 63905
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0792529582977295,
      "learning_rate": 1.3766575349939593e-06,
      "loss": 2.4525,
      "step": 63906
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0687947273254395,
      "learning_rate": 1.3764490637352624e-06,
      "loss": 2.4832,
      "step": 63907
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1924046277999878,
      "learning_rate": 1.3762406070958279e-06,
      "loss": 2.4475,
      "step": 63908
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.9962815046310425,
      "learning_rate": 1.3760321650760067e-06,
      "loss": 2.2245,
      "step": 63909
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.197750449180603,
      "learning_rate": 1.375823737676154e-06,
      "loss": 2.3253,
      "step": 63910
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0748274326324463,
      "learning_rate": 1.375615324896621e-06,
      "loss": 2.2284,
      "step": 63911
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1032884120941162,
      "learning_rate": 1.3754069267377646e-06,
      "loss": 2.4794,
      "step": 63912
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1793060302734375,
      "learning_rate": 1.3751985431999359e-06,
      "loss": 2.3527,
      "step": 63913
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.9761940836906433,
      "learning_rate": 1.374990174283486e-06,
      "loss": 2.1888,
      "step": 63914
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.061598539352417,
      "learning_rate": 1.374781819988772e-06,
      "loss": 2.1278,
      "step": 63915
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1667941808700562,
      "learning_rate": 1.3745734803161426e-06,
      "loss": 2.1679,
      "step": 63916
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1090452671051025,
      "learning_rate": 1.3743651552659576e-06,
      "loss": 2.164,
      "step": 63917
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.2160382270812988,
      "learning_rate": 1.3741568448385656e-06,
      "loss": 2.0012,
      "step": 63918
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.154966950416565,
      "learning_rate": 1.3739485490343196e-06,
      "loss": 2.3473,
      "step": 63919
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.9808400273323059,
      "learning_rate": 1.3737402678535728e-06,
      "loss": 2.471,
      "step": 63920
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.9610428810119629,
      "learning_rate": 1.3735320012966803e-06,
      "loss": 2.1274,
      "step": 63921
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1255544424057007,
      "learning_rate": 1.3733237493639928e-06,
      "loss": 2.2689,
      "step": 63922
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0145982503890991,
      "learning_rate": 1.3731155120558658e-06,
      "loss": 2.3969,
      "step": 63923
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0022203922271729,
      "learning_rate": 1.372907289372649e-06,
      "loss": 2.2759,
      "step": 63924
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0597883462905884,
      "learning_rate": 1.3726990813147e-06,
      "loss": 2.0722,
      "step": 63925
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.156808614730835,
      "learning_rate": 1.3724908878823662e-06,
      "loss": 2.109,
      "step": 63926
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.060998797416687,
      "learning_rate": 1.372282709076007e-06,
      "loss": 2.2015,
      "step": 63927
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.2099637985229492,
      "learning_rate": 1.3720745448959683e-06,
      "loss": 2.2944,
      "step": 63928
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0931297540664673,
      "learning_rate": 1.3718663953426103e-06,
      "loss": 2.4191,
      "step": 63929
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1499929428100586,
      "learning_rate": 1.3716582604162787e-06,
      "loss": 2.1324,
      "step": 63930
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0431965589523315,
      "learning_rate": 1.371450140117334e-06,
      "loss": 2.3296,
      "step": 63931
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0495152473449707,
      "learning_rate": 1.371242034446122e-06,
      "loss": 2.3313,
      "step": 63932
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1267160177230835,
      "learning_rate": 1.3710339434029984e-06,
      "loss": 2.3677,
      "step": 63933
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.084925651550293,
      "learning_rate": 1.3708258669883156e-06,
      "loss": 2.4169,
      "step": 63934
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0727492570877075,
      "learning_rate": 1.3706178052024277e-06,
      "loss": 2.2093,
      "step": 63935
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0532143115997314,
      "learning_rate": 1.3704097580456844e-06,
      "loss": 2.5303,
      "step": 63936
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0655156373977661,
      "learning_rate": 1.3702017255184418e-06,
      "loss": 2.5082,
      "step": 63937
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0702484846115112,
      "learning_rate": 1.36999370762105e-06,
      "loss": 2.3336,
      "step": 63938
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0178457498550415,
      "learning_rate": 1.3697857043538643e-06,
      "loss": 2.3849,
      "step": 63939
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.177424669265747,
      "learning_rate": 1.3695777157172363e-06,
      "loss": 2.5907,
      "step": 63940
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.129048466682434,
      "learning_rate": 1.369369741711516e-06,
      "loss": 2.3203,
      "step": 63941
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1530535221099854,
      "learning_rate": 1.3691617823370596e-06,
      "loss": 2.3924,
      "step": 63942
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.15756356716156,
      "learning_rate": 1.3689538375942157e-06,
      "loss": 2.4741,
      "step": 63943
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0923718214035034,
      "learning_rate": 1.368745907483342e-06,
      "loss": 2.3848,
      "step": 63944
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.152640700340271,
      "learning_rate": 1.3685379920047881e-06,
      "loss": 2.3606,
      "step": 63945
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1207126379013062,
      "learning_rate": 1.3683300911589037e-06,
      "loss": 2.12,
      "step": 63946
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1316606998443604,
      "learning_rate": 1.3681222049460474e-06,
      "loss": 2.2778,
      "step": 63947
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1637400388717651,
      "learning_rate": 1.3679143333665678e-06,
      "loss": 2.1155,
      "step": 63948
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0361835956573486,
      "learning_rate": 1.3677064764208147e-06,
      "loss": 2.2401,
      "step": 63949
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0620046854019165,
      "learning_rate": 1.3674986341091478e-06,
      "loss": 2.3407,
      "step": 63950
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0543184280395508,
      "learning_rate": 1.3672908064319113e-06,
      "loss": 2.634,
      "step": 63951
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.9863828420639038,
      "learning_rate": 1.3670829933894647e-06,
      "loss": 1.9879,
      "step": 63952
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1452240943908691,
      "learning_rate": 1.3668751949821546e-06,
      "loss": 2.3232,
      "step": 63953
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.134278416633606,
      "learning_rate": 1.3666674112103384e-06,
      "loss": 2.3065,
      "step": 63954
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0050643682479858,
      "learning_rate": 1.3664596420743626e-06,
      "loss": 2.1196,
      "step": 63955
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0475165843963623,
      "learning_rate": 1.3662518875745857e-06,
      "loss": 2.3326,
      "step": 63956
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.103512167930603,
      "learning_rate": 1.3660441477113561e-06,
      "loss": 2.4016,
      "step": 63957
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1246306896209717,
      "learning_rate": 1.3658364224850273e-06,
      "loss": 2.44,
      "step": 63958
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1128307580947876,
      "learning_rate": 1.3656287118959478e-06,
      "loss": 2.2048,
      "step": 63959
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0756641626358032,
      "learning_rate": 1.3654210159444748e-06,
      "loss": 2.3234,
      "step": 63960
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.235797643661499,
      "learning_rate": 1.3652133346309571e-06,
      "loss": 2.4811,
      "step": 63961
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1401779651641846,
      "learning_rate": 1.3650056679557487e-06,
      "loss": 2.411,
      "step": 63962
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0456907749176025,
      "learning_rate": 1.3647980159191999e-06,
      "loss": 2.3398,
      "step": 63963
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.032288908958435,
      "learning_rate": 1.3645903785216663e-06,
      "loss": 2.4072,
      "step": 63964
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0808011293411255,
      "learning_rate": 1.3643827557634937e-06,
      "loss": 2.3271,
      "step": 63965
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1880598068237305,
      "learning_rate": 1.3641751476450416e-06,
      "loss": 2.3005,
      "step": 63966
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0224382877349854,
      "learning_rate": 1.3639675541666542e-06,
      "loss": 2.4894,
      "step": 63967
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.064479947090149,
      "learning_rate": 1.3637599753286913e-06,
      "loss": 2.399,
      "step": 63968
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.042883038520813,
      "learning_rate": 1.3635524111314991e-06,
      "loss": 2.3047,
      "step": 63969
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0241667032241821,
      "learning_rate": 1.363344861575432e-06,
      "loss": 2.5781,
      "step": 63970
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.142603874206543,
      "learning_rate": 1.3631373266608384e-06,
      "loss": 2.4765,
      "step": 63971
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1929774284362793,
      "learning_rate": 1.3629298063880758e-06,
      "loss": 2.4337,
      "step": 63972
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.110148549079895,
      "learning_rate": 1.3627223007574909e-06,
      "loss": 2.3596,
      "step": 63973
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0296859741210938,
      "learning_rate": 1.3625148097694385e-06,
      "loss": 2.2595,
      "step": 63974
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.065577745437622,
      "learning_rate": 1.3623073334242698e-06,
      "loss": 2.3917,
      "step": 63975
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0832237005233765,
      "learning_rate": 1.3620998717223344e-06,
      "loss": 2.1974,
      "step": 63976
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.179153323173523,
      "learning_rate": 1.3618924246639875e-06,
      "loss": 2.1837,
      "step": 63977
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1678812503814697,
      "learning_rate": 1.3616849922495778e-06,
      "loss": 2.2916,
      "step": 63978
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0874906778335571,
      "learning_rate": 1.3614775744794596e-06,
      "loss": 2.3807,
      "step": 63979
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.2247451543807983,
      "learning_rate": 1.361270171353981e-06,
      "loss": 2.3525,
      "step": 63980
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.021485447883606,
      "learning_rate": 1.361062782873498e-06,
      "loss": 2.3527,
      "step": 63981
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1028639078140259,
      "learning_rate": 1.3608554090383574e-06,
      "loss": 2.3489,
      "step": 63982
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1563459634780884,
      "learning_rate": 1.360648049848915e-06,
      "loss": 2.3285,
      "step": 63983
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1006195545196533,
      "learning_rate": 1.3604407053055213e-06,
      "loss": 2.3686,
      "step": 63984
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1322046518325806,
      "learning_rate": 1.3602333754085273e-06,
      "loss": 2.2574,
      "step": 63985
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0977507829666138,
      "learning_rate": 1.3600260601582814e-06,
      "loss": 2.2293,
      "step": 63986
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0345566272735596,
      "learning_rate": 1.3598187595551404e-06,
      "loss": 2.4625,
      "step": 63987
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1985366344451904,
      "learning_rate": 1.3596114735994514e-06,
      "loss": 2.2715,
      "step": 63988
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1997342109680176,
      "learning_rate": 1.35940420229157e-06,
      "loss": 2.3226,
      "step": 63989
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0585472583770752,
      "learning_rate": 1.3591969456318422e-06,
      "loss": 2.4934,
      "step": 63990
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1758397817611694,
      "learning_rate": 1.3589897036206245e-06,
      "loss": 2.349,
      "step": 63991
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.5255656242370605,
      "learning_rate": 1.3587824762582646e-06,
      "loss": 2.2792,
      "step": 63992
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0580589771270752,
      "learning_rate": 1.3585752635451167e-06,
      "loss": 2.4306,
      "step": 63993
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1132797002792358,
      "learning_rate": 1.358368065481529e-06,
      "loss": 2.2908,
      "step": 63994
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0158568620681763,
      "learning_rate": 1.358160882067857e-06,
      "loss": 2.2601,
      "step": 63995
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0795193910598755,
      "learning_rate": 1.3579537133044496e-06,
      "loss": 2.5872,
      "step": 63996
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.2007533311843872,
      "learning_rate": 1.3577465591916572e-06,
      "loss": 2.4908,
      "step": 63997
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.156628131866455,
      "learning_rate": 1.3575394197298297e-06,
      "loss": 2.155,
      "step": 63998
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1120527982711792,
      "learning_rate": 1.3573322949193223e-06,
      "loss": 2.4063,
      "step": 63999
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0561212301254272,
      "learning_rate": 1.3571251847604816e-06,
      "loss": 2.4883,
      "step": 64000
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1645772457122803,
      "learning_rate": 1.3569180892536626e-06,
      "loss": 2.1324,
      "step": 64001
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0940645933151245,
      "learning_rate": 1.3567110083992163e-06,
      "loss": 2.2834,
      "step": 64002
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.2322719097137451,
      "learning_rate": 1.356503942197489e-06,
      "loss": 2.2324,
      "step": 64003
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0196290016174316,
      "learning_rate": 1.3562968906488382e-06,
      "loss": 2.4259,
      "step": 64004
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1216639280319214,
      "learning_rate": 1.3560898537536083e-06,
      "loss": 2.3166,
      "step": 64005
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0444624423980713,
      "learning_rate": 1.3558828315121575e-06,
      "loss": 2.3685,
      "step": 64006
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1696984767913818,
      "learning_rate": 1.3556758239248292e-06,
      "loss": 2.3859,
      "step": 64007
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.097981333732605,
      "learning_rate": 1.3554688309919817e-06,
      "loss": 2.1109,
      "step": 64008
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0173389911651611,
      "learning_rate": 1.3552618527139593e-06,
      "loss": 2.4677,
      "step": 64009
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1472254991531372,
      "learning_rate": 1.3550548890911207e-06,
      "loss": 2.2395,
      "step": 64010
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0436655282974243,
      "learning_rate": 1.354847940123808e-06,
      "loss": 2.2817,
      "step": 64011
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.2608176469802856,
      "learning_rate": 1.3546410058123771e-06,
      "loss": 2.4893,
      "step": 64012
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.056675672531128,
      "learning_rate": 1.354434086157177e-06,
      "loss": 2.1454,
      "step": 64013
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0648897886276245,
      "learning_rate": 1.3542271811585605e-06,
      "loss": 2.2017,
      "step": 64014
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0746581554412842,
      "learning_rate": 1.3540202908168743e-06,
      "loss": 2.4898,
      "step": 64015
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0722383260726929,
      "learning_rate": 1.3538134151324754e-06,
      "loss": 2.3911,
      "step": 64016
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0640884637832642,
      "learning_rate": 1.3536065541057085e-06,
      "loss": 2.3336,
      "step": 64017
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.2346700429916382,
      "learning_rate": 1.3533997077369288e-06,
      "loss": 2.2116,
      "step": 64018
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0721521377563477,
      "learning_rate": 1.3531928760264823e-06,
      "loss": 2.2472,
      "step": 64019
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0284087657928467,
      "learning_rate": 1.3529860589747257e-06,
      "loss": 2.2775,
      "step": 64020
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0541985034942627,
      "learning_rate": 1.352779256582003e-06,
      "loss": 2.3463,
      "step": 64021
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0406866073608398,
      "learning_rate": 1.3525724688486707e-06,
      "loss": 2.1533,
      "step": 64022
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1787890195846558,
      "learning_rate": 1.3523656957750764e-06,
      "loss": 2.2674,
      "step": 64023
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.058344841003418,
      "learning_rate": 1.3521589373615706e-06,
      "loss": 2.2239,
      "step": 64024
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.038672924041748,
      "learning_rate": 1.3519521936085024e-06,
      "loss": 2.2938,
      "step": 64025
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0409282445907593,
      "learning_rate": 1.3517454645162264e-06,
      "loss": 2.2192,
      "step": 64026
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.106119155883789,
      "learning_rate": 1.3515387500850885e-06,
      "loss": 2.1202,
      "step": 64027
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.027251958847046,
      "learning_rate": 1.3513320503154426e-06,
      "loss": 2.4676,
      "step": 64028
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.2358675003051758,
      "learning_rate": 1.3511253652076362e-06,
      "loss": 2.2335,
      "step": 64029
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0211212635040283,
      "learning_rate": 1.3509186947620246e-06,
      "loss": 2.3534,
      "step": 64030
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1628813743591309,
      "learning_rate": 1.350712038978953e-06,
      "loss": 2.1936,
      "step": 64031
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1219381093978882,
      "learning_rate": 1.3505053978587724e-06,
      "loss": 2.1206,
      "step": 64032
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0049068927764893,
      "learning_rate": 1.3502987714018368e-06,
      "loss": 2.2569,
      "step": 64033
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0924594402313232,
      "learning_rate": 1.3500921596084916e-06,
      "loss": 2.2752,
      "step": 64034
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.069059133529663,
      "learning_rate": 1.3498855624790909e-06,
      "loss": 2.1629,
      "step": 64035
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1281880140304565,
      "learning_rate": 1.3496789800139843e-06,
      "loss": 2.2901,
      "step": 64036
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1143789291381836,
      "learning_rate": 1.3494724122135205e-06,
      "loss": 2.5499,
      "step": 64037
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0498443841934204,
      "learning_rate": 1.3492658590780495e-06,
      "loss": 2.1929,
      "step": 64038
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.176809549331665,
      "learning_rate": 1.349059320607924e-06,
      "loss": 2.26,
      "step": 64039
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0008429288864136,
      "learning_rate": 1.3488527968034893e-06,
      "loss": 2.243,
      "step": 64040
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.9778466820716858,
      "learning_rate": 1.3486462876651019e-06,
      "loss": 2.2697,
      "step": 64041
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0405274629592896,
      "learning_rate": 1.3484397931931058e-06,
      "loss": 2.574,
      "step": 64042
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1235238313674927,
      "learning_rate": 1.3482333133878578e-06,
      "loss": 2.1776,
      "step": 64043
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1890506744384766,
      "learning_rate": 1.3480268482497005e-06,
      "loss": 2.4793,
      "step": 64044
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.129324197769165,
      "learning_rate": 1.3478203977789906e-06,
      "loss": 2.4189,
      "step": 64045
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.3714678287506104,
      "learning_rate": 1.347613961976072e-06,
      "loss": 2.3885,
      "step": 64046
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0824835300445557,
      "learning_rate": 1.3474075408413e-06,
      "loss": 2.586,
      "step": 64047
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0875557661056519,
      "learning_rate": 1.3472011343750201e-06,
      "loss": 2.1758,
      "step": 64048
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0825510025024414,
      "learning_rate": 1.3469947425775864e-06,
      "loss": 2.3418,
      "step": 64049
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0567013025283813,
      "learning_rate": 1.3467883654493474e-06,
      "loss": 2.4397,
      "step": 64050
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0978763103485107,
      "learning_rate": 1.3465820029906517e-06,
      "loss": 2.1538,
      "step": 64051
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0763401985168457,
      "learning_rate": 1.3463756552018482e-06,
      "loss": 2.3271,
      "step": 64052
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.044155240058899,
      "learning_rate": 1.3461693220832894e-06,
      "loss": 2.0181,
      "step": 64053
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0146524906158447,
      "learning_rate": 1.3459630036353233e-06,
      "loss": 2.2848,
      "step": 64054
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.9592220187187195,
      "learning_rate": 1.3457566998583015e-06,
      "loss": 2.199,
      "step": 64055
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1338449716567993,
      "learning_rate": 1.3455504107525697e-06,
      "loss": 2.168,
      "step": 64056
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0563684701919556,
      "learning_rate": 1.3453441363184838e-06,
      "loss": 2.2249,
      "step": 64057
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0555590391159058,
      "learning_rate": 1.3451378765563895e-06,
      "loss": 2.0335,
      "step": 64058
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.01742422580719,
      "learning_rate": 1.344931631466635e-06,
      "loss": 2.4323,
      "step": 64059
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0979515314102173,
      "learning_rate": 1.3447254010495747e-06,
      "loss": 2.3577,
      "step": 64060
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.9634303450584412,
      "learning_rate": 1.3445191853055528e-06,
      "loss": 2.2352,
      "step": 64061
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.073455572128296,
      "learning_rate": 1.3443129842349245e-06,
      "loss": 2.3485,
      "step": 64062
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.2001475095748901,
      "learning_rate": 1.3441067978380361e-06,
      "loss": 2.372,
      "step": 64063
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0908597707748413,
      "learning_rate": 1.3439006261152366e-06,
      "loss": 2.3009,
      "step": 64064
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1068874597549438,
      "learning_rate": 1.3436944690668774e-06,
      "loss": 2.2369,
      "step": 64065
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.20002281665802,
      "learning_rate": 1.3434883266933074e-06,
      "loss": 2.2956,
      "step": 64066
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.2565912008285522,
      "learning_rate": 1.343282198994873e-06,
      "loss": 2.32,
      "step": 64067
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.2060407400131226,
      "learning_rate": 1.3430760859719293e-06,
      "loss": 2.2043,
      "step": 64068
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.104378581047058,
      "learning_rate": 1.3428699876248208e-06,
      "loss": 2.3629,
      "step": 64069
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0391168594360352,
      "learning_rate": 1.3426639039539002e-06,
      "loss": 2.3816,
      "step": 64070
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1301010847091675,
      "learning_rate": 1.342457834959514e-06,
      "loss": 2.329,
      "step": 64071
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.071824073791504,
      "learning_rate": 1.3422517806420154e-06,
      "loss": 2.2327,
      "step": 64072
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1247494220733643,
      "learning_rate": 1.3420457410017496e-06,
      "loss": 2.4941,
      "step": 64073
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1595345735549927,
      "learning_rate": 1.3418397160390695e-06,
      "loss": 2.2785,
      "step": 64074
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.9774578213691711,
      "learning_rate": 1.3416337057543194e-06,
      "loss": 2.5836,
      "step": 64075
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.058785080909729,
      "learning_rate": 1.341427710147858e-06,
      "loss": 2.4143,
      "step": 64076
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1896705627441406,
      "learning_rate": 1.3412217292200224e-06,
      "loss": 2.3714,
      "step": 64077
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1852331161499023,
      "learning_rate": 1.3410157629711707e-06,
      "loss": 2.4512,
      "step": 64078
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0044174194335938,
      "learning_rate": 1.3408098114016466e-06,
      "loss": 2.4736,
      "step": 64079
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.013575792312622,
      "learning_rate": 1.3406038745118034e-06,
      "loss": 2.3265,
      "step": 64080
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1088058948516846,
      "learning_rate": 1.3403979523019873e-06,
      "loss": 2.4421,
      "step": 64081
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1351003646850586,
      "learning_rate": 1.3401920447725502e-06,
      "loss": 2.5296,
      "step": 64082
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0744901895523071,
      "learning_rate": 1.3399861519238365e-06,
      "loss": 2.3297,
      "step": 64083
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0942517518997192,
      "learning_rate": 1.3397802737562016e-06,
      "loss": 2.265,
      "step": 64084
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.180190086364746,
      "learning_rate": 1.3395744102699892e-06,
      "loss": 2.5036,
      "step": 64085
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0964460372924805,
      "learning_rate": 1.3393685614655516e-06,
      "loss": 2.129,
      "step": 64086
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0443263053894043,
      "learning_rate": 1.3391627273432373e-06,
      "loss": 2.3989,
      "step": 64087
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0366936922073364,
      "learning_rate": 1.3389569079033927e-06,
      "loss": 2.3787,
      "step": 64088
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0326662063598633,
      "learning_rate": 1.3387511031463696e-06,
      "loss": 2.2909,
      "step": 64089
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0972248315811157,
      "learning_rate": 1.3385453130725157e-06,
      "loss": 2.4013,
      "step": 64090
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.193364143371582,
      "learning_rate": 1.3383395376821784e-06,
      "loss": 2.1923,
      "step": 64091
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.9516480565071106,
      "learning_rate": 1.3381337769757096e-06,
      "loss": 2.4264,
      "step": 64092
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0665122270584106,
      "learning_rate": 1.337928030953457e-06,
      "loss": 2.4998,
      "step": 64093
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0356327295303345,
      "learning_rate": 1.337722299615767e-06,
      "loss": 2.416,
      "step": 64094
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.054578423500061,
      "learning_rate": 1.337516582962992e-06,
      "loss": 2.4599,
      "step": 64095
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1025512218475342,
      "learning_rate": 1.3373108809954771e-06,
      "loss": 2.4037,
      "step": 64096
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0927211046218872,
      "learning_rate": 1.3371051937135748e-06,
      "loss": 2.3464,
      "step": 64097
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.100978136062622,
      "learning_rate": 1.3368995211176306e-06,
      "loss": 2.351,
      "step": 64098
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0386165380477905,
      "learning_rate": 1.3366938632079963e-06,
      "loss": 2.3584,
      "step": 64099
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0259114503860474,
      "learning_rate": 1.336488219985016e-06,
      "loss": 2.1942,
      "step": 64100
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.9371667504310608,
      "learning_rate": 1.336282591449044e-06,
      "loss": 2.2256,
      "step": 64101
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1098147630691528,
      "learning_rate": 1.3360769776004267e-06,
      "loss": 2.1493,
      "step": 64102
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.3282305002212524,
      "learning_rate": 1.3358713784395116e-06,
      "loss": 2.2819,
      "step": 64103
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.048179268836975,
      "learning_rate": 1.335665793966645e-06,
      "loss": 2.4112,
      "step": 64104
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.967332124710083,
      "learning_rate": 1.3354602241821802e-06,
      "loss": 2.2306,
      "step": 64105
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1743744611740112,
      "learning_rate": 1.335254669086462e-06,
      "loss": 2.1669,
      "step": 64106
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.09862220287323,
      "learning_rate": 1.3350491286798429e-06,
      "loss": 2.3506,
      "step": 64107
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0424977540969849,
      "learning_rate": 1.3348436029626655e-06,
      "loss": 2.3708,
      "step": 64108
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0604033470153809,
      "learning_rate": 1.3346380919352841e-06,
      "loss": 2.1666,
      "step": 64109
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.2286008596420288,
      "learning_rate": 1.334432595598043e-06,
      "loss": 2.4446,
      "step": 64110
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.016396164894104,
      "learning_rate": 1.334227113951294e-06,
      "loss": 2.4154,
      "step": 64111
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1765753030776978,
      "learning_rate": 1.3340216469953825e-06,
      "loss": 2.3338,
      "step": 64112
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1916297674179077,
      "learning_rate": 1.3338161947306594e-06,
      "loss": 2.1685,
      "step": 64113
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.9694883227348328,
      "learning_rate": 1.3336107571574708e-06,
      "loss": 2.2463,
      "step": 64114
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0585752725601196,
      "learning_rate": 1.3334053342761665e-06,
      "loss": 2.3827,
      "step": 64115
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.038474678993225,
      "learning_rate": 1.333199926087092e-06,
      "loss": 2.1167,
      "step": 64116
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.297877550125122,
      "learning_rate": 1.332994532590599e-06,
      "loss": 2.5014,
      "step": 64117
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0963060855865479,
      "learning_rate": 1.3327891537870318e-06,
      "loss": 2.4088,
      "step": 64118
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0533702373504639,
      "learning_rate": 1.3325837896767447e-06,
      "loss": 2.2455,
      "step": 64119
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1021780967712402,
      "learning_rate": 1.3323784402600803e-06,
      "loss": 2.1084,
      "step": 64120
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0555591583251953,
      "learning_rate": 1.3321731055373876e-06,
      "loss": 2.1682,
      "step": 64121
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0903398990631104,
      "learning_rate": 1.3319677855090173e-06,
      "loss": 2.3381,
      "step": 64122
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.2758488655090332,
      "learning_rate": 1.3317624801753137e-06,
      "loss": 2.3528,
      "step": 64123
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.110026478767395,
      "learning_rate": 1.3315571895366285e-06,
      "loss": 2.1607,
      "step": 64124
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.328984022140503,
      "learning_rate": 1.3313519135933072e-06,
      "loss": 2.4868,
      "step": 64125
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0388926267623901,
      "learning_rate": 1.3311466523457007e-06,
      "loss": 2.4128,
      "step": 64126
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1785874366760254,
      "learning_rate": 1.3309414057941516e-06,
      "loss": 2.1602,
      "step": 64127
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.084992527961731,
      "learning_rate": 1.3307361739390146e-06,
      "loss": 2.366,
      "step": 64128
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0714423656463623,
      "learning_rate": 1.3305309567806345e-06,
      "loss": 2.2277,
      "step": 64129
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1965949535369873,
      "learning_rate": 1.3303257543193581e-06,
      "loss": 2.3401,
      "step": 64130
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1211110353469849,
      "learning_rate": 1.3301205665555327e-06,
      "loss": 2.4691,
      "step": 64131
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0064886808395386,
      "learning_rate": 1.3299153934895103e-06,
      "loss": 2.1205,
      "step": 64132
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.9374589323997498,
      "learning_rate": 1.3297102351216328e-06,
      "loss": 2.1968,
      "step": 64133
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.9617719650268555,
      "learning_rate": 1.3295050914522545e-06,
      "loss": 2.2759,
      "step": 64134
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0750237703323364,
      "learning_rate": 1.3292999624817172e-06,
      "loss": 2.3681,
      "step": 64135
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.9796208739280701,
      "learning_rate": 1.3290948482103738e-06,
      "loss": 2.3281,
      "step": 64136
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.2801458835601807,
      "learning_rate": 1.3288897486385667e-06,
      "loss": 2.4523,
      "step": 64137
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1949121952056885,
      "learning_rate": 1.3286846637666496e-06,
      "loss": 2.3538,
      "step": 64138
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.111892819404602,
      "learning_rate": 1.3284795935949647e-06,
      "loss": 2.2406,
      "step": 64139
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.2278988361358643,
      "learning_rate": 1.3282745381238638e-06,
      "loss": 2.443,
      "step": 64140
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1561557054519653,
      "learning_rate": 1.3280694973536911e-06,
      "loss": 2.2234,
      "step": 64141
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0695551633834839,
      "learning_rate": 1.3278644712847998e-06,
      "loss": 2.3805,
      "step": 64142
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1451629400253296,
      "learning_rate": 1.3276594599175296e-06,
      "loss": 2.361,
      "step": 64143
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.134358525276184,
      "learning_rate": 1.3274544632522336e-06,
      "loss": 2.1102,
      "step": 64144
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0075725317001343,
      "learning_rate": 1.3272494812892567e-06,
      "loss": 2.435,
      "step": 64145
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1890171766281128,
      "learning_rate": 1.3270445140289478e-06,
      "loss": 2.4698,
      "step": 64146
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0830882787704468,
      "learning_rate": 1.3268395614716533e-06,
      "loss": 2.256,
      "step": 64147
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.9765895009040833,
      "learning_rate": 1.326634623617723e-06,
      "loss": 2.296,
      "step": 64148
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0848661661148071,
      "learning_rate": 1.3264297004675032e-06,
      "loss": 2.2843,
      "step": 64149
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0411083698272705,
      "learning_rate": 1.3262247920213379e-06,
      "loss": 2.576,
      "step": 64150
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.07632577419281,
      "learning_rate": 1.3260198982795792e-06,
      "loss": 2.3021,
      "step": 64151
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0475984811782837,
      "learning_rate": 1.3258150192425712e-06,
      "loss": 2.3322,
      "step": 64152
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0382376909255981,
      "learning_rate": 1.3256101549106649e-06,
      "loss": 2.0915,
      "step": 64153
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1971254348754883,
      "learning_rate": 1.325405305284202e-06,
      "loss": 2.3584,
      "step": 64154
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0234448909759521,
      "learning_rate": 1.325200470363538e-06,
      "loss": 2.1858,
      "step": 64155
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0708526372909546,
      "learning_rate": 1.3249956501490114e-06,
      "loss": 2.2015,
      "step": 64156
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1592581272125244,
      "learning_rate": 1.3247908446409753e-06,
      "loss": 2.2658,
      "step": 64157
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0005073547363281,
      "learning_rate": 1.3245860538397725e-06,
      "loss": 2.4082,
      "step": 64158
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0575432777404785,
      "learning_rate": 1.3243812777457544e-06,
      "loss": 2.4396,
      "step": 64159
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.985418438911438,
      "learning_rate": 1.3241765163592647e-06,
      "loss": 2.3261,
      "step": 64160
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0472878217697144,
      "learning_rate": 1.3239717696806542e-06,
      "loss": 2.3527,
      "step": 64161
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.20112943649292,
      "learning_rate": 1.3237670377102652e-06,
      "loss": 2.4179,
      "step": 64162
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0670359134674072,
      "learning_rate": 1.3235623204484504e-06,
      "loss": 2.2466,
      "step": 64163
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.130171298980713,
      "learning_rate": 1.323357617895552e-06,
      "loss": 2.2883,
      "step": 64164
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.069952130317688,
      "learning_rate": 1.3231529300519208e-06,
      "loss": 2.4107,
      "step": 64165
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1841167211532593,
      "learning_rate": 1.3229482569178987e-06,
      "loss": 2.1859,
      "step": 64166
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.057754635810852,
      "learning_rate": 1.3227435984938397e-06,
      "loss": 2.205,
      "step": 64167
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1409422159194946,
      "learning_rate": 1.322538954780086e-06,
      "loss": 2.06,
      "step": 64168
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0449769496917725,
      "learning_rate": 1.322334325776986e-06,
      "loss": 2.2567,
      "step": 64169
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0299967527389526,
      "learning_rate": 1.3221297114848841e-06,
      "loss": 2.3368,
      "step": 64170
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0875673294067383,
      "learning_rate": 1.321925111904131e-06,
      "loss": 2.3476,
      "step": 64171
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.034778118133545,
      "learning_rate": 1.3217205270350697e-06,
      "loss": 2.2682,
      "step": 64172
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0869696140289307,
      "learning_rate": 1.3215159568780522e-06,
      "loss": 2.3063,
      "step": 64173
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.2830190658569336,
      "learning_rate": 1.3213114014334182e-06,
      "loss": 2.408,
      "step": 64174
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1024556159973145,
      "learning_rate": 1.321106860701522e-06,
      "loss": 2.3468,
      "step": 64175
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.3183722496032715,
      "learning_rate": 1.3209023346827066e-06,
      "loss": 2.3082,
      "step": 64176
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1080811023712158,
      "learning_rate": 1.3206978233773171e-06,
      "loss": 2.3066,
      "step": 64177
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0257980823516846,
      "learning_rate": 1.3204933267857034e-06,
      "loss": 2.4199,
      "step": 64178
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.12968111038208,
      "learning_rate": 1.3202888449082085e-06,
      "loss": 2.404,
      "step": 64179
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1264275312423706,
      "learning_rate": 1.3200843777451845e-06,
      "loss": 2.1938,
      "step": 64180
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1056780815124512,
      "learning_rate": 1.319879925296974e-06,
      "loss": 2.4015,
      "step": 64181
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1955525875091553,
      "learning_rate": 1.319675487563924e-06,
      "loss": 2.3372,
      "step": 64182
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.085660457611084,
      "learning_rate": 1.3194710645463815e-06,
      "loss": 2.1688,
      "step": 64183
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0500173568725586,
      "learning_rate": 1.3192666562446942e-06,
      "loss": 2.2979,
      "step": 64184
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.628138542175293,
      "learning_rate": 1.3190622626592065e-06,
      "loss": 2.2818,
      "step": 64185
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.2046351432800293,
      "learning_rate": 1.3188578837902665e-06,
      "loss": 2.3459,
      "step": 64186
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1461416482925415,
      "learning_rate": 1.3186535196382188e-06,
      "loss": 2.3698,
      "step": 64187
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0983104705810547,
      "learning_rate": 1.318449170203413e-06,
      "loss": 2.2417,
      "step": 64188
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1016960144042969,
      "learning_rate": 1.318244835486192e-06,
      "loss": 2.4879,
      "step": 64189
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1500760316848755,
      "learning_rate": 1.318040515486906e-06,
      "loss": 2.139,
      "step": 64190
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1613372564315796,
      "learning_rate": 1.317836210205896e-06,
      "loss": 2.1563,
      "step": 64191
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.9821056723594666,
      "learning_rate": 1.317631919643515e-06,
      "loss": 2.3642,
      "step": 64192
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0649760961532593,
      "learning_rate": 1.3174276438001044e-06,
      "loss": 2.2735,
      "step": 64193
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.9919681549072266,
      "learning_rate": 1.3172233826760127e-06,
      "loss": 2.3119,
      "step": 64194
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.105916142463684,
      "learning_rate": 1.3170191362715867e-06,
      "loss": 2.289,
      "step": 64195
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0832914113998413,
      "learning_rate": 1.3168149045871714e-06,
      "loss": 2.356,
      "step": 64196
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0132232904434204,
      "learning_rate": 1.316610687623111e-06,
      "loss": 2.262,
      "step": 64197
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0427814722061157,
      "learning_rate": 1.3164064853797553e-06,
      "loss": 2.3721,
      "step": 64198
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0145580768585205,
      "learning_rate": 1.3162022978574474e-06,
      "loss": 2.2636,
      "step": 64199
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.8986003994941711,
      "learning_rate": 1.3159981250565378e-06,
      "loss": 2.2108,
      "step": 64200
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0488935708999634,
      "learning_rate": 1.3157939669773666e-06,
      "loss": 2.2709,
      "step": 64201
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1680516004562378,
      "learning_rate": 1.3155898236202869e-06,
      "loss": 2.5604,
      "step": 64202
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.9557406902313232,
      "learning_rate": 1.3153856949856382e-06,
      "loss": 2.2006,
      "step": 64203
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1077700853347778,
      "learning_rate": 1.3151815810737712e-06,
      "loss": 2.2544,
      "step": 64204
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0486482381820679,
      "learning_rate": 1.3149774818850303e-06,
      "loss": 2.3252,
      "step": 64205
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0274196863174438,
      "learning_rate": 1.3147733974197597e-06,
      "loss": 2.1248,
      "step": 64206
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.107788324356079,
      "learning_rate": 1.314569327678309e-06,
      "loss": 2.4118,
      "step": 64207
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1172388792037964,
      "learning_rate": 1.3143652726610212e-06,
      "loss": 2.2258,
      "step": 64208
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.9633394479751587,
      "learning_rate": 1.314161232368243e-06,
      "loss": 2.1191,
      "step": 64209
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0874369144439697,
      "learning_rate": 1.3139572068003214e-06,
      "loss": 2.5743,
      "step": 64210
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1078331470489502,
      "learning_rate": 1.313753195957601e-06,
      "loss": 2.411,
      "step": 64211
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1689082384109497,
      "learning_rate": 1.313549199840427e-06,
      "loss": 2.3574,
      "step": 64212
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.2749525308609009,
      "learning_rate": 1.313345218449148e-06,
      "loss": 2.391,
      "step": 64213
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0514624118804932,
      "learning_rate": 1.3131412517841069e-06,
      "loss": 2.1196,
      "step": 64214
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0489227771759033,
      "learning_rate": 1.3129372998456513e-06,
      "loss": 2.1854,
      "step": 64215
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0818971395492554,
      "learning_rate": 1.3127333626341253e-06,
      "loss": 2.4327,
      "step": 64216
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.9748775362968445,
      "learning_rate": 1.3125294401498778e-06,
      "loss": 2.4426,
      "step": 64217
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0351901054382324,
      "learning_rate": 1.3123255323932494e-06,
      "loss": 2.1709,
      "step": 64218
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.19952392578125,
      "learning_rate": 1.312121639364592e-06,
      "loss": 2.2605,
      "step": 64219
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0165799856185913,
      "learning_rate": 1.3119177610642454e-06,
      "loss": 2.3104,
      "step": 64220
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0365689992904663,
      "learning_rate": 1.3117138974925613e-06,
      "loss": 2.4363,
      "step": 64221
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1774067878723145,
      "learning_rate": 1.311510048649879e-06,
      "loss": 2.2854,
      "step": 64222
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.058212161064148,
      "learning_rate": 1.3113062145365485e-06,
      "loss": 2.2235,
      "step": 64223
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0886564254760742,
      "learning_rate": 1.3111023951529112e-06,
      "loss": 2.4045,
      "step": 64224
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.235329270362854,
      "learning_rate": 1.3108985904993177e-06,
      "loss": 2.308,
      "step": 64225
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0865049362182617,
      "learning_rate": 1.3106948005761088e-06,
      "loss": 2.4043,
      "step": 64226
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1219266653060913,
      "learning_rate": 1.3104910253836345e-06,
      "loss": 2.2982,
      "step": 64227
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.245283603668213,
      "learning_rate": 1.3102872649222354e-06,
      "loss": 2.353,
      "step": 64228
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0929981470108032,
      "learning_rate": 1.3100835191922623e-06,
      "loss": 2.2102,
      "step": 64229
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.035882830619812,
      "learning_rate": 1.309879788194055e-06,
      "loss": 2.4276,
      "step": 64230
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.169575572013855,
      "learning_rate": 1.3096760719279645e-06,
      "loss": 2.4105,
      "step": 64231
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0171384811401367,
      "learning_rate": 1.3094723703943324e-06,
      "loss": 2.1407,
      "step": 64232
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1472458839416504,
      "learning_rate": 1.3092686835935032e-06,
      "loss": 2.3287,
      "step": 64233
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0393646955490112,
      "learning_rate": 1.3090650115258263e-06,
      "loss": 2.3869,
      "step": 64234
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0674039125442505,
      "learning_rate": 1.308861354191645e-06,
      "loss": 2.3703,
      "step": 64235
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.161313772201538,
      "learning_rate": 1.3086577115913013e-06,
      "loss": 2.4952,
      "step": 64236
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1325125694274902,
      "learning_rate": 1.3084540837251458e-06,
      "loss": 2.6115,
      "step": 64237
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0397021770477295,
      "learning_rate": 1.3082504705935216e-06,
      "loss": 2.309,
      "step": 64238
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.078317642211914,
      "learning_rate": 1.308046872196771e-06,
      "loss": 2.1496,
      "step": 64239
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.2641469240188599,
      "learning_rate": 1.3078432885352432e-06,
      "loss": 2.266,
      "step": 64240
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1255685091018677,
      "learning_rate": 1.3076397196092805e-06,
      "loss": 2.3062,
      "step": 64241
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.221388339996338,
      "learning_rate": 1.3074361654192302e-06,
      "loss": 2.2978,
      "step": 64242
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1257565021514893,
      "learning_rate": 1.3072326259654355e-06,
      "loss": 2.1695,
      "step": 64243
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0831515789031982,
      "learning_rate": 1.307029101248245e-06,
      "loss": 2.4068,
      "step": 64244
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.085065484046936,
      "learning_rate": 1.3068255912679973e-06,
      "loss": 2.3691,
      "step": 64245
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.034687876701355,
      "learning_rate": 1.3066220960250453e-06,
      "loss": 2.3887,
      "step": 64246
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.062390923500061,
      "learning_rate": 1.306418615519729e-06,
      "loss": 2.3067,
      "step": 64247
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.3128236532211304,
      "learning_rate": 1.3062151497523934e-06,
      "loss": 2.1879,
      "step": 64248
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1244395971298218,
      "learning_rate": 1.3060116987233839e-06,
      "loss": 2.2741,
      "step": 64249
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.055785059928894,
      "learning_rate": 1.305808262433047e-06,
      "loss": 2.3097,
      "step": 64250
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.2190269231796265,
      "learning_rate": 1.3056048408817245e-06,
      "loss": 2.2088,
      "step": 64251
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0801345109939575,
      "learning_rate": 1.3054014340697652e-06,
      "loss": 2.3468,
      "step": 64252
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0661348104476929,
      "learning_rate": 1.3051980419975096e-06,
      "loss": 2.4572,
      "step": 64253
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.084707260131836,
      "learning_rate": 1.3049946646653066e-06,
      "loss": 2.3411,
      "step": 64254
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.210526466369629,
      "learning_rate": 1.304791302073498e-06,
      "loss": 2.1812,
      "step": 64255
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.4162517786026,
      "learning_rate": 1.3045879542224316e-06,
      "loss": 2.3499,
      "step": 64256
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.162557601928711,
      "learning_rate": 1.3043846211124478e-06,
      "loss": 2.4868,
      "step": 64257
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1499322652816772,
      "learning_rate": 1.3041813027438953e-06,
      "loss": 2.3256,
      "step": 64258
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.2209419012069702,
      "learning_rate": 1.3039779991171163e-06,
      "loss": 2.2437,
      "step": 64259
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.051303505897522,
      "learning_rate": 1.3037747102324604e-06,
      "loss": 2.6126,
      "step": 64260
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1765124797821045,
      "learning_rate": 1.303571436090264e-06,
      "loss": 2.1444,
      "step": 64261
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0428788661956787,
      "learning_rate": 1.3033681766908778e-06,
      "loss": 2.3991,
      "step": 64262
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0825248956680298,
      "learning_rate": 1.3031649320346419e-06,
      "loss": 2.4523,
      "step": 64263
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1955927610397339,
      "learning_rate": 1.3029617021219065e-06,
      "loss": 2.2253,
      "step": 64264
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.9871572852134705,
      "learning_rate": 1.3027584869530108e-06,
      "loss": 2.1882,
      "step": 64265
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0728787183761597,
      "learning_rate": 1.3025552865283032e-06,
      "loss": 2.3885,
      "step": 64266
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.067220687866211,
      "learning_rate": 1.3023521008481266e-06,
      "loss": 2.3883,
      "step": 64267
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1677062511444092,
      "learning_rate": 1.3021489299128242e-06,
      "loss": 2.1688,
      "step": 64268
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.9814457297325134,
      "learning_rate": 1.3019457737227426e-06,
      "loss": 2.2934,
      "step": 64269
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.9838892221450806,
      "learning_rate": 1.3017426322782245e-06,
      "loss": 2.2573,
      "step": 64270
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1387962102890015,
      "learning_rate": 1.3015395055796153e-06,
      "loss": 2.251,
      "step": 64271
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.235859751701355,
      "learning_rate": 1.3013363936272583e-06,
      "loss": 2.3237,
      "step": 64272
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.9993166923522949,
      "learning_rate": 1.3011332964215007e-06,
      "loss": 2.1463,
      "step": 64273
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1154165267944336,
      "learning_rate": 1.3009302139626844e-06,
      "loss": 2.1052,
      "step": 64274
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0117098093032837,
      "learning_rate": 1.3007271462511551e-06,
      "loss": 2.3287,
      "step": 64275
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1575217247009277,
      "learning_rate": 1.3005240932872531e-06,
      "loss": 2.0746,
      "step": 64276
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0146510601043701,
      "learning_rate": 1.3003210550713275e-06,
      "loss": 2.4192,
      "step": 64277
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.9983709454536438,
      "learning_rate": 1.3001180316037176e-06,
      "loss": 2.3552,
      "step": 64278
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1425271034240723,
      "learning_rate": 1.2999150228847735e-06,
      "loss": 2.564,
      "step": 64279
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.982367992401123,
      "learning_rate": 1.2997120289148346e-06,
      "loss": 2.0705,
      "step": 64280
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.08260178565979,
      "learning_rate": 1.2995090496942487e-06,
      "loss": 2.3136,
      "step": 64281
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1495743989944458,
      "learning_rate": 1.2993060852233551e-06,
      "loss": 2.2773,
      "step": 64282
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1469379663467407,
      "learning_rate": 1.299103135502503e-06,
      "loss": 2.3949,
      "step": 64283
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1879911422729492,
      "learning_rate": 1.2989002005320329e-06,
      "loss": 2.1052,
      "step": 64284
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1724828481674194,
      "learning_rate": 1.2986972803122922e-06,
      "loss": 2.2829,
      "step": 64285
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0166025161743164,
      "learning_rate": 1.2984943748436197e-06,
      "loss": 2.5618,
      "step": 64286
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0067192316055298,
      "learning_rate": 1.2982914841263684e-06,
      "loss": 2.2267,
      "step": 64287
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0941463708877563,
      "learning_rate": 1.2980886081608713e-06,
      "loss": 2.1557,
      "step": 64288
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1077253818511963,
      "learning_rate": 1.2978857469474793e-06,
      "loss": 2.3876,
      "step": 64289
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.968093991279602,
      "learning_rate": 1.297682900486532e-06,
      "loss": 2.0644,
      "step": 64290
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1426477432250977,
      "learning_rate": 1.2974800687783784e-06,
      "loss": 2.3256,
      "step": 64291
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0658925771713257,
      "learning_rate": 1.2972772518233578e-06,
      "loss": 2.398,
      "step": 64292
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1139678955078125,
      "learning_rate": 1.2970744496218168e-06,
      "loss": 2.4718,
      "step": 64293
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0666818618774414,
      "learning_rate": 1.2968716621740995e-06,
      "loss": 2.4041,
      "step": 64294
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0696475505828857,
      "learning_rate": 1.2966688894805456e-06,
      "loss": 2.3217,
      "step": 64295
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0699093341827393,
      "learning_rate": 1.296466131541505e-06,
      "loss": 2.1816,
      "step": 64296
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.974729597568512,
      "learning_rate": 1.296263388357315e-06,
      "loss": 2.1853,
      "step": 64297
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.9553428292274475,
      "learning_rate": 1.2960606599283243e-06,
      "loss": 2.3701,
      "step": 64298
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.2062491178512573,
      "learning_rate": 1.2958579462548738e-06,
      "loss": 2.4316,
      "step": 64299
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1417176723480225,
      "learning_rate": 1.2956552473373096e-06,
      "loss": 2.34,
      "step": 64300
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1147944927215576,
      "learning_rate": 1.295452563175973e-06,
      "loss": 2.2123,
      "step": 64301
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1090158224105835,
      "learning_rate": 1.2952498937712099e-06,
      "loss": 2.3979,
      "step": 64302
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0677636861801147,
      "learning_rate": 1.2950472391233594e-06,
      "loss": 2.3605,
      "step": 64303
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1968655586242676,
      "learning_rate": 1.2948445992327697e-06,
      "loss": 2.3003,
      "step": 64304
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.10587477684021,
      "learning_rate": 1.2946419740997818e-06,
      "loss": 2.1838,
      "step": 64305
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0770856142044067,
      "learning_rate": 1.294439363724741e-06,
      "loss": 2.198,
      "step": 64306
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.081183910369873,
      "learning_rate": 1.2942367681079893e-06,
      "loss": 2.259,
      "step": 64307
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0466967821121216,
      "learning_rate": 1.2940341872498718e-06,
      "loss": 2.303,
      "step": 64308
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0870269536972046,
      "learning_rate": 1.2938316211507285e-06,
      "loss": 2.5449,
      "step": 64309
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0634723901748657,
      "learning_rate": 1.2936290698109077e-06,
      "loss": 2.102,
      "step": 64310
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.2982393503189087,
      "learning_rate": 1.2934265332307483e-06,
      "loss": 2.3374,
      "step": 64311
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.055275797843933,
      "learning_rate": 1.2932240114105977e-06,
      "loss": 2.4023,
      "step": 64312
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1158164739608765,
      "learning_rate": 1.2930215043507966e-06,
      "loss": 2.2268,
      "step": 64313
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.9842889904975891,
      "learning_rate": 1.2928190120516892e-06,
      "loss": 2.3894,
      "step": 64314
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.2560327053070068,
      "learning_rate": 1.2926165345136166e-06,
      "loss": 2.317,
      "step": 64315
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0341681241989136,
      "learning_rate": 1.292414071736926e-06,
      "loss": 2.3787,
      "step": 64316
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0676158666610718,
      "learning_rate": 1.292211623721955e-06,
      "loss": 2.2529,
      "step": 64317
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0586698055267334,
      "learning_rate": 1.2920091904690546e-06,
      "loss": 2.5132,
      "step": 64318
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.234057903289795,
      "learning_rate": 1.2918067719785598e-06,
      "loss": 2.2633,
      "step": 64319
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0951054096221924,
      "learning_rate": 1.2916043682508207e-06,
      "loss": 2.3209,
      "step": 64320
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.3471430540084839,
      "learning_rate": 1.2914019792861742e-06,
      "loss": 2.0462,
      "step": 64321
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1032273769378662,
      "learning_rate": 1.2911996050849697e-06,
      "loss": 2.3436,
      "step": 64322
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0230485200881958,
      "learning_rate": 1.2909972456475472e-06,
      "loss": 2.1142,
      "step": 64323
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0850352048873901,
      "learning_rate": 1.290794900974247e-06,
      "loss": 2.4539,
      "step": 64324
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0936329364776611,
      "learning_rate": 1.2905925710654177e-06,
      "loss": 2.3809,
      "step": 64325
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0437568426132202,
      "learning_rate": 1.2903902559213987e-06,
      "loss": 2.3075,
      "step": 64326
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1682066917419434,
      "learning_rate": 1.2901879555425311e-06,
      "loss": 2.3453,
      "step": 64327
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.2439826726913452,
      "learning_rate": 1.2899856699291635e-06,
      "loss": 2.4141,
      "step": 64328
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0743860006332397,
      "learning_rate": 1.2897833990816366e-06,
      "loss": 2.5253,
      "step": 64329
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0057188272476196,
      "learning_rate": 1.289581143000289e-06,
      "loss": 2.3361,
      "step": 64330
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0999122858047485,
      "learning_rate": 1.2893789016854696e-06,
      "loss": 2.4635,
      "step": 64331
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0471982955932617,
      "learning_rate": 1.2891766751375167e-06,
      "loss": 2.4775,
      "step": 64332
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1436216831207275,
      "learning_rate": 1.288974463356777e-06,
      "loss": 2.0776,
      "step": 64333
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0641484260559082,
      "learning_rate": 1.2887722663435898e-06,
      "loss": 2.3747,
      "step": 64334
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1438026428222656,
      "learning_rate": 1.2885700840983007e-06,
      "loss": 2.1649,
      "step": 64335
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0321543216705322,
      "learning_rate": 1.2883679166212503e-06,
      "loss": 2.3551,
      "step": 64336
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.034287452697754,
      "learning_rate": 1.2881657639127843e-06,
      "loss": 2.1813,
      "step": 64337
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.208339810371399,
      "learning_rate": 1.287963625973241e-06,
      "loss": 2.3628,
      "step": 64338
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.108648657798767,
      "learning_rate": 1.28776150280297e-06,
      "loss": 2.5387,
      "step": 64339
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0411070585250854,
      "learning_rate": 1.2875593944023047e-06,
      "loss": 2.527,
      "step": 64340
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.2128077745437622,
      "learning_rate": 1.2873573007715946e-06,
      "loss": 2.3848,
      "step": 64341
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0952173471450806,
      "learning_rate": 1.2871552219111784e-06,
      "loss": 2.3078,
      "step": 64342
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.2283401489257812,
      "learning_rate": 1.2869531578214034e-06,
      "loss": 2.2045,
      "step": 64343
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1068979501724243,
      "learning_rate": 1.2867511085026063e-06,
      "loss": 2.3577,
      "step": 64344
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0047260522842407,
      "learning_rate": 1.2865490739551346e-06,
      "loss": 2.1973,
      "step": 64345
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.9817182421684265,
      "learning_rate": 1.2863470541793265e-06,
      "loss": 2.1353,
      "step": 64346
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.2016503810882568,
      "learning_rate": 1.286145049175529e-06,
      "loss": 2.2505,
      "step": 64347
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.2052956819534302,
      "learning_rate": 1.2859430589440802e-06,
      "loss": 2.1775,
      "step": 64348
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.107858419418335,
      "learning_rate": 1.2857410834853269e-06,
      "loss": 2.3243,
      "step": 64349
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0488338470458984,
      "learning_rate": 1.2855391227996095e-06,
      "loss": 2.5728,
      "step": 64350
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0934306383132935,
      "learning_rate": 1.285337176887267e-06,
      "loss": 2.1328,
      "step": 64351
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.085963487625122,
      "learning_rate": 1.2851352457486476e-06,
      "loss": 2.3488,
      "step": 64352
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.271761417388916,
      "learning_rate": 1.2849333293840916e-06,
      "loss": 2.381,
      "step": 64353
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.9590816497802734,
      "learning_rate": 1.284731427793937e-06,
      "loss": 2.2946,
      "step": 64354
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.3558686971664429,
      "learning_rate": 1.284529540978533e-06,
      "loss": 2.4371,
      "step": 64355
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1817457675933838,
      "learning_rate": 1.2843276689382179e-06,
      "loss": 2.3146,
      "step": 64356
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.2473951578140259,
      "learning_rate": 1.2841258116733324e-06,
      "loss": 2.3176,
      "step": 64357
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0579421520233154,
      "learning_rate": 1.2839239691842232e-06,
      "loss": 2.3252,
      "step": 64358
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.9959543943405151,
      "learning_rate": 1.2837221414712276e-06,
      "loss": 2.1925,
      "step": 64359
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.185890555381775,
      "learning_rate": 1.283520328534693e-06,
      "loss": 2.1806,
      "step": 64360
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0960338115692139,
      "learning_rate": 1.2833185303749562e-06,
      "loss": 2.4074,
      "step": 64361
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0406475067138672,
      "learning_rate": 1.2831167469923643e-06,
      "loss": 2.4478,
      "step": 64362
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1946386098861694,
      "learning_rate": 1.2829149783872551e-06,
      "loss": 2.2259,
      "step": 64363
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.114998698234558,
      "learning_rate": 1.2827132245599738e-06,
      "loss": 2.399,
      "step": 64364
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1046608686447144,
      "learning_rate": 1.28251148551086e-06,
      "loss": 2.3146,
      "step": 64365
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.144618272781372,
      "learning_rate": 1.2823097612402612e-06,
      "loss": 2.2858,
      "step": 64366
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.2229310274124146,
      "learning_rate": 1.2821080517485095e-06,
      "loss": 2.0999,
      "step": 64367
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.2610129117965698,
      "learning_rate": 1.2819063570359557e-06,
      "loss": 2.2801,
      "step": 64368
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0790427923202515,
      "learning_rate": 1.2817046771029362e-06,
      "loss": 2.6008,
      "step": 64369
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0066581964492798,
      "learning_rate": 1.2815030119497973e-06,
      "loss": 2.1811,
      "step": 64370
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1264628171920776,
      "learning_rate": 1.2813013615768765e-06,
      "loss": 2.5199,
      "step": 64371
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0393905639648438,
      "learning_rate": 1.2810997259845192e-06,
      "loss": 2.2844,
      "step": 64372
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.2083548307418823,
      "learning_rate": 1.280898105173064e-06,
      "loss": 2.3616,
      "step": 64373
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0957750082015991,
      "learning_rate": 1.2806964991428572e-06,
      "loss": 2.3653,
      "step": 64374
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0361011028289795,
      "learning_rate": 1.2804949078942363e-06,
      "loss": 2.2481,
      "step": 64375
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1645797491073608,
      "learning_rate": 1.2802933314275457e-06,
      "loss": 2.3655,
      "step": 64376
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0679293870925903,
      "learning_rate": 1.2800917697431248e-06,
      "loss": 2.2084,
      "step": 64377
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.2675658464431763,
      "learning_rate": 1.279890222841319e-06,
      "loss": 2.1354,
      "step": 64378
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1418253183364868,
      "learning_rate": 1.279688690722467e-06,
      "loss": 2.2366,
      "step": 64379
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0816763639450073,
      "learning_rate": 1.2794871733869108e-06,
      "loss": 2.2241,
      "step": 64380
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1255521774291992,
      "learning_rate": 1.279285670834991e-06,
      "loss": 2.0056,
      "step": 64381
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0250449180603027,
      "learning_rate": 1.279084183067052e-06,
      "loss": 2.1041,
      "step": 64382
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1794489622116089,
      "learning_rate": 1.2788827100834322e-06,
      "loss": 2.3855,
      "step": 64383
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.9794794321060181,
      "learning_rate": 1.278681251884477e-06,
      "loss": 2.1038,
      "step": 64384
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.019801139831543,
      "learning_rate": 1.2784798084705253e-06,
      "loss": 2.3424,
      "step": 64385
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.203959584236145,
      "learning_rate": 1.2782783798419174e-06,
      "loss": 2.4377,
      "step": 64386
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0533370971679688,
      "learning_rate": 1.2780769659989977e-06,
      "loss": 2.1436,
      "step": 64387
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0251303911209106,
      "learning_rate": 1.277875566942105e-06,
      "loss": 2.3833,
      "step": 64388
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.08795964717865,
      "learning_rate": 1.2776741826715845e-06,
      "loss": 2.4652,
      "step": 64389
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0387722253799438,
      "learning_rate": 1.2774728131877722e-06,
      "loss": 2.4102,
      "step": 64390
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1105791330337524,
      "learning_rate": 1.277271458491015e-06,
      "loss": 2.5521,
      "step": 64391
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.079067587852478,
      "learning_rate": 1.2770701185816514e-06,
      "loss": 2.3554,
      "step": 64392
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.088202953338623,
      "learning_rate": 1.2768687934600244e-06,
      "loss": 2.2874,
      "step": 64393
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1034475564956665,
      "learning_rate": 1.2766674831264704e-06,
      "loss": 2.4216,
      "step": 64394
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0671594142913818,
      "learning_rate": 1.2764661875813367e-06,
      "loss": 2.2713,
      "step": 64395
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1647179126739502,
      "learning_rate": 1.27626490682496e-06,
      "loss": 2.5164,
      "step": 64396
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0425646305084229,
      "learning_rate": 1.2760636408576865e-06,
      "loss": 2.2856,
      "step": 64397
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.2016549110412598,
      "learning_rate": 1.2758623896798517e-06,
      "loss": 2.1966,
      "step": 64398
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.072311282157898,
      "learning_rate": 1.2756611532918017e-06,
      "loss": 2.3483,
      "step": 64399
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0938361883163452,
      "learning_rate": 1.2754599316938732e-06,
      "loss": 2.5489,
      "step": 64400
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0251342058181763,
      "learning_rate": 1.2752587248864124e-06,
      "loss": 2.1934,
      "step": 64401
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.149125099182129,
      "learning_rate": 1.2750575328697556e-06,
      "loss": 2.2844,
      "step": 64402
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1961681842803955,
      "learning_rate": 1.2748563556442472e-06,
      "loss": 2.3624,
      "step": 64403
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.4764569997787476,
      "learning_rate": 1.2746551932102258e-06,
      "loss": 2.5303,
      "step": 64404
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1745491027832031,
      "learning_rate": 1.2744540455680375e-06,
      "loss": 2.3576,
      "step": 64405
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1949163675308228,
      "learning_rate": 1.2742529127180147e-06,
      "loss": 2.4289,
      "step": 64406
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1207098960876465,
      "learning_rate": 1.2740517946605058e-06,
      "loss": 2.3816,
      "step": 64407
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0410438776016235,
      "learning_rate": 1.273850691395847e-06,
      "loss": 2.3049,
      "step": 64408
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0469173192977905,
      "learning_rate": 1.2736496029243828e-06,
      "loss": 2.2242,
      "step": 64409
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.028821587562561,
      "learning_rate": 1.2734485292464516e-06,
      "loss": 2.3091,
      "step": 64410
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0737932920455933,
      "learning_rate": 1.2732474703623965e-06,
      "loss": 2.3169,
      "step": 64411
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.087986707687378,
      "learning_rate": 1.2730464262725572e-06,
      "loss": 2.5152,
      "step": 64412
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1451704502105713,
      "learning_rate": 1.2728453969772726e-06,
      "loss": 2.1552,
      "step": 64413
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0678590536117554,
      "learning_rate": 1.2726443824768863e-06,
      "loss": 2.3184,
      "step": 64414
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.103653907775879,
      "learning_rate": 1.2724433827717376e-06,
      "loss": 2.3385,
      "step": 64415
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1560183763504028,
      "learning_rate": 1.272242397862169e-06,
      "loss": 2.3241,
      "step": 64416
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.3464857339859009,
      "learning_rate": 1.2720414277485172e-06,
      "loss": 2.2548,
      "step": 64417
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0986841917037964,
      "learning_rate": 1.2718404724311284e-06,
      "loss": 2.4094,
      "step": 64418
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.2481820583343506,
      "learning_rate": 1.2716395319103404e-06,
      "loss": 2.0899,
      "step": 64419
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0717947483062744,
      "learning_rate": 1.2714386061864947e-06,
      "loss": 2.4451,
      "step": 64420
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0999878644943237,
      "learning_rate": 1.2712376952599282e-06,
      "loss": 2.3691,
      "step": 64421
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.051114797592163,
      "learning_rate": 1.2710367991309868e-06,
      "loss": 2.3462,
      "step": 64422
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0472819805145264,
      "learning_rate": 1.2708359178000073e-06,
      "loss": 2.6984,
      "step": 64423
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1453197002410889,
      "learning_rate": 1.2706350512673337e-06,
      "loss": 2.3935,
      "step": 64424
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.9660428166389465,
      "learning_rate": 1.2704341995333015e-06,
      "loss": 2.1894,
      "step": 64425
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0818085670471191,
      "learning_rate": 1.2702333625982577e-06,
      "loss": 2.3857,
      "step": 64426
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0941189527511597,
      "learning_rate": 1.2700325404625357e-06,
      "loss": 2.3214,
      "step": 64427
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.221417784690857,
      "learning_rate": 1.2698317331264831e-06,
      "loss": 2.2003,
      "step": 64428
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0789275169372559,
      "learning_rate": 1.269630940590434e-06,
      "loss": 2.5708,
      "step": 64429
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1093274354934692,
      "learning_rate": 1.2694301628547333e-06,
      "loss": 2.4465,
      "step": 64430
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.027380347251892,
      "learning_rate": 1.269229399919718e-06,
      "loss": 2.231,
      "step": 64431
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.128870964050293,
      "learning_rate": 1.2690286517857354e-06,
      "loss": 2.4589,
      "step": 64432
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0393952131271362,
      "learning_rate": 1.2688279184531149e-06,
      "loss": 2.2436,
      "step": 64433
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0640608072280884,
      "learning_rate": 1.2686271999222045e-06,
      "loss": 2.2765,
      "step": 64434
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0425951480865479,
      "learning_rate": 1.2684264961933412e-06,
      "loss": 2.295,
      "step": 64435
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.115120530128479,
      "learning_rate": 1.2682258072668674e-06,
      "loss": 2.2583,
      "step": 64436
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0495829582214355,
      "learning_rate": 1.2680251331431214e-06,
      "loss": 2.4938,
      "step": 64437
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1369472742080688,
      "learning_rate": 1.2678244738224454e-06,
      "loss": 2.3488,
      "step": 64438
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.2537962198257446,
      "learning_rate": 1.267623829305178e-06,
      "loss": 2.2207,
      "step": 64439
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1498360633850098,
      "learning_rate": 1.2674231995916609e-06,
      "loss": 2.3041,
      "step": 64440
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1788339614868164,
      "learning_rate": 1.2672225846822328e-06,
      "loss": 2.2151,
      "step": 64441
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0022752285003662,
      "learning_rate": 1.2670219845772324e-06,
      "loss": 2.2617,
      "step": 64442
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0104341506958008,
      "learning_rate": 1.266821399277004e-06,
      "loss": 2.3352,
      "step": 64443
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1888655424118042,
      "learning_rate": 1.2666208287818837e-06,
      "loss": 2.3536,
      "step": 64444
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.9401874542236328,
      "learning_rate": 1.2664202730922148e-06,
      "loss": 2.116,
      "step": 64445
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0997085571289062,
      "learning_rate": 1.2662197322083348e-06,
      "loss": 2.4232,
      "step": 64446
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0338648557662964,
      "learning_rate": 1.2660192061305854e-06,
      "loss": 2.2837,
      "step": 64447
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.2143625020980835,
      "learning_rate": 1.265818694859302e-06,
      "loss": 2.4124,
      "step": 64448
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.3065704107284546,
      "learning_rate": 1.2656181983948312e-06,
      "loss": 2.2183,
      "step": 64449
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1445584297180176,
      "learning_rate": 1.265417716737508e-06,
      "loss": 2.3515,
      "step": 64450
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.087302803993225,
      "learning_rate": 1.2652172498876759e-06,
      "loss": 2.1639,
      "step": 64451
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0274527072906494,
      "learning_rate": 1.2650167978456707e-06,
      "loss": 2.4403,
      "step": 64452
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1313761472702026,
      "learning_rate": 1.2648163606118369e-06,
      "loss": 2.5187,
      "step": 64453
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.9711620211601257,
      "learning_rate": 1.2646159381865085e-06,
      "loss": 2.3199,
      "step": 64454
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1984188556671143,
      "learning_rate": 1.2644155305700323e-06,
      "loss": 2.3117,
      "step": 64455
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.2270058393478394,
      "learning_rate": 1.264215137762741e-06,
      "loss": 2.3811,
      "step": 64456
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1929020881652832,
      "learning_rate": 1.26401475976498e-06,
      "loss": 2.4314,
      "step": 64457
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.121819257736206,
      "learning_rate": 1.2638143965770865e-06,
      "loss": 2.0778,
      "step": 64458
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1679470539093018,
      "learning_rate": 1.2636140481994009e-06,
      "loss": 2.3244,
      "step": 64459
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1407042741775513,
      "learning_rate": 1.26341371463226e-06,
      "loss": 2.2095,
      "step": 64460
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.9111748933792114,
      "learning_rate": 1.2632133958760074e-06,
      "loss": 2.3242,
      "step": 64461
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0337982177734375,
      "learning_rate": 1.2630130919309792e-06,
      "loss": 2.4208,
      "step": 64462
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.276432991027832,
      "learning_rate": 1.2628128027975184e-06,
      "loss": 2.2477,
      "step": 64463
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.130778431892395,
      "learning_rate": 1.2626125284759617e-06,
      "loss": 2.2587,
      "step": 64464
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.104909062385559,
      "learning_rate": 1.2624122689666518e-06,
      "loss": 2.4109,
      "step": 64465
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1247296333312988,
      "learning_rate": 1.262212024269923e-06,
      "loss": 2.1661,
      "step": 64466
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.000349760055542,
      "learning_rate": 1.262011794386121e-06,
      "loss": 2.2797,
      "step": 64467
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1350679397583008,
      "learning_rate": 1.2618115793155817e-06,
      "loss": 2.3971,
      "step": 64468
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1290818452835083,
      "learning_rate": 1.2616113790586437e-06,
      "loss": 2.4695,
      "step": 64469
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.2461528778076172,
      "learning_rate": 1.2614111936156493e-06,
      "loss": 2.2737,
      "step": 64470
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0408508777618408,
      "learning_rate": 1.2612110229869357e-06,
      "loss": 2.356,
      "step": 64471
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0782965421676636,
      "learning_rate": 1.2610108671728415e-06,
      "loss": 2.4217,
      "step": 64472
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0718814134597778,
      "learning_rate": 1.260810726173709e-06,
      "loss": 2.403,
      "step": 64473
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0126553773880005,
      "learning_rate": 1.2606105999898766e-06,
      "loss": 2.2864,
      "step": 64474
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0691406726837158,
      "learning_rate": 1.2604104886216806e-06,
      "loss": 2.2028,
      "step": 64475
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0379159450531006,
      "learning_rate": 1.2602103920694641e-06,
      "loss": 2.3284,
      "step": 64476
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1005703210830688,
      "learning_rate": 1.2600103103335626e-06,
      "loss": 2.4356,
      "step": 64477
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.043578028678894,
      "learning_rate": 1.259810243414319e-06,
      "loss": 2.4088,
      "step": 64478
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0951526165008545,
      "learning_rate": 1.2596101913120696e-06,
      "loss": 2.2222,
      "step": 64479
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0508066415786743,
      "learning_rate": 1.2594101540271565e-06,
      "loss": 2.2913,
      "step": 64480
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.098904013633728,
      "learning_rate": 1.2592101315599148e-06,
      "loss": 2.5274,
      "step": 64481
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0592782497406006,
      "learning_rate": 1.2590101239106889e-06,
      "loss": 2.2585,
      "step": 64482
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0669697523117065,
      "learning_rate": 1.2588101310798118e-06,
      "loss": 2.3058,
      "step": 64483
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1596803665161133,
      "learning_rate": 1.258610153067631e-06,
      "loss": 2.3932,
      "step": 64484
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.2250115871429443,
      "learning_rate": 1.258410189874475e-06,
      "loss": 2.2247,
      "step": 64485
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.02833092212677,
      "learning_rate": 1.258210241500689e-06,
      "loss": 2.2362,
      "step": 64486
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.119174838066101,
      "learning_rate": 1.25801030794661e-06,
      "loss": 2.1632,
      "step": 64487
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1539705991744995,
      "learning_rate": 1.2578103892125804e-06,
      "loss": 2.2894,
      "step": 64488
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0349971055984497,
      "learning_rate": 1.2576104852989334e-06,
      "loss": 2.3943,
      "step": 64489
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.9074345827102661,
      "learning_rate": 1.2574105962060134e-06,
      "loss": 2.31,
      "step": 64490
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0873832702636719,
      "learning_rate": 1.2572107219341556e-06,
      "loss": 2.6384,
      "step": 64491
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1007639169692993,
      "learning_rate": 1.2570108624837018e-06,
      "loss": 2.3712,
      "step": 64492
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0675890445709229,
      "learning_rate": 1.2568110178549864e-06,
      "loss": 2.3258,
      "step": 64493
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0283507108688354,
      "learning_rate": 1.2566111880483534e-06,
      "loss": 2.2004,
      "step": 64494
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.2343382835388184,
      "learning_rate": 1.256411373064137e-06,
      "loss": 2.1872,
      "step": 64495
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1144214868545532,
      "learning_rate": 1.2562115729026802e-06,
      "loss": 2.1069,
      "step": 64496
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1484678983688354,
      "learning_rate": 1.2560117875643196e-06,
      "loss": 2.3183,
      "step": 64497
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.062268614768982,
      "learning_rate": 1.2558120170493936e-06,
      "loss": 2.4868,
      "step": 64498
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.111267328262329,
      "learning_rate": 1.25561226135824e-06,
      "loss": 2.2162,
      "step": 64499
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0362945795059204,
      "learning_rate": 1.2554125204911992e-06,
      "loss": 2.2937,
      "step": 64500
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1065133810043335,
      "learning_rate": 1.255212794448608e-06,
      "loss": 2.2996,
      "step": 64501
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0193458795547485,
      "learning_rate": 1.2550130832308082e-06,
      "loss": 2.2925,
      "step": 64502
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.157462239265442,
      "learning_rate": 1.2548133868381362e-06,
      "loss": 2.2884,
      "step": 64503
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0349249839782715,
      "learning_rate": 1.2546137052709284e-06,
      "loss": 2.1726,
      "step": 64504
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1334086656570435,
      "learning_rate": 1.2544140385295278e-06,
      "loss": 2.3748,
      "step": 64505
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0439040660858154,
      "learning_rate": 1.2542143866142686e-06,
      "loss": 2.2076,
      "step": 64506
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0663740634918213,
      "learning_rate": 1.254014749525494e-06,
      "loss": 2.3126,
      "step": 64507
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0485925674438477,
      "learning_rate": 1.2538151272635368e-06,
      "loss": 2.3556,
      "step": 64508
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0726475715637207,
      "learning_rate": 1.2536155198287415e-06,
      "loss": 2.3665,
      "step": 64509
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0176427364349365,
      "learning_rate": 1.2534159272214409e-06,
      "loss": 2.3126,
      "step": 64510
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1287267208099365,
      "learning_rate": 1.253216349441979e-06,
      "loss": 2.2773,
      "step": 64511
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1437580585479736,
      "learning_rate": 1.2530167864906873e-06,
      "loss": 2.175,
      "step": 64512
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.009185552597046,
      "learning_rate": 1.2528172383679104e-06,
      "loss": 2.3154,
      "step": 64513
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0781980752944946,
      "learning_rate": 1.2526177050739818e-06,
      "loss": 2.3782,
      "step": 64514
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0606015920639038,
      "learning_rate": 1.2524181866092432e-06,
      "loss": 2.2709,
      "step": 64515
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1405613422393799,
      "learning_rate": 1.2522186829740312e-06,
      "loss": 2.2028,
      "step": 64516
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.004773736000061,
      "learning_rate": 1.2520191941686856e-06,
      "loss": 2.3069,
      "step": 64517
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0042892694473267,
      "learning_rate": 1.25181972019354e-06,
      "loss": 2.276,
      "step": 64518
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1034488677978516,
      "learning_rate": 1.2516202610489402e-06,
      "loss": 2.3331,
      "step": 64519
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.13196861743927,
      "learning_rate": 1.251420816735216e-06,
      "loss": 2.1549,
      "step": 64520
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.2856316566467285,
      "learning_rate": 1.2512213872527135e-06,
      "loss": 2.2832,
      "step": 64521
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1518988609313965,
      "learning_rate": 1.2510219726017636e-06,
      "loss": 2.4484,
      "step": 64522
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.2198301553726196,
      "learning_rate": 1.2508225727827106e-06,
      "loss": 2.3601,
      "step": 64523
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.2392301559448242,
      "learning_rate": 1.2506231877958885e-06,
      "loss": 2.3041,
      "step": 64524
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.2803473472595215,
      "learning_rate": 1.2504238176416372e-06,
      "loss": 2.1592,
      "step": 64525
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.9948474168777466,
      "learning_rate": 1.2502244623202919e-06,
      "loss": 2.2893,
      "step": 64526
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0011234283447266,
      "learning_rate": 1.2500251218321934e-06,
      "loss": 2.3985,
      "step": 64527
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0713526010513306,
      "learning_rate": 1.2498257961776784e-06,
      "loss": 2.4417,
      "step": 64528
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0957107543945312,
      "learning_rate": 1.2496264853570861e-06,
      "loss": 2.3264,
      "step": 64529
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1452915668487549,
      "learning_rate": 1.2494271893707543e-06,
      "loss": 2.2348,
      "step": 64530
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1763794422149658,
      "learning_rate": 1.2492279082190174e-06,
      "loss": 2.3447,
      "step": 64531
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1981422901153564,
      "learning_rate": 1.249028641902218e-06,
      "loss": 2.3312,
      "step": 64532
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.143487572669983,
      "learning_rate": 1.2488293904206894e-06,
      "loss": 1.9986,
      "step": 64533
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0971368551254272,
      "learning_rate": 1.2486301537747747e-06,
      "loss": 2.1766,
      "step": 64534
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.032886028289795,
      "learning_rate": 1.2484309319648057e-06,
      "loss": 2.3693,
      "step": 64535
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0364307165145874,
      "learning_rate": 1.2482317249911258e-06,
      "loss": 2.4237,
      "step": 64536
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1036479473114014,
      "learning_rate": 1.24803253285407e-06,
      "loss": 2.1365,
      "step": 64537
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.2350120544433594,
      "learning_rate": 1.2478333555539758e-06,
      "loss": 2.3049,
      "step": 64538
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1044843196868896,
      "learning_rate": 1.2476341930911795e-06,
      "loss": 2.3519,
      "step": 64539
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0266369581222534,
      "learning_rate": 1.2474350454660222e-06,
      "loss": 2.1099,
      "step": 64540
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.090985655784607,
      "learning_rate": 1.247235912678838e-06,
      "loss": 2.2689,
      "step": 64541
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0989800691604614,
      "learning_rate": 1.2470367947299679e-06,
      "loss": 2.3579,
      "step": 64542
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.2912585735321045,
      "learning_rate": 1.2468376916197455e-06,
      "loss": 2.2543,
      "step": 64543
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.9916273355484009,
      "learning_rate": 1.2466386033485124e-06,
      "loss": 2.3498,
      "step": 64544
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0766329765319824,
      "learning_rate": 1.246439529916602e-06,
      "loss": 2.1825,
      "step": 64545
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0858392715454102,
      "learning_rate": 1.246240471324357e-06,
      "loss": 2.488,
      "step": 64546
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1901440620422363,
      "learning_rate": 1.246041427572109e-06,
      "loss": 2.1433,
      "step": 64547
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0535645484924316,
      "learning_rate": 1.2458423986602008e-06,
      "loss": 2.2092,
      "step": 64548
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.9864128828048706,
      "learning_rate": 1.245643384588965e-06,
      "loss": 2.4753,
      "step": 64549
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0576543807983398,
      "learning_rate": 1.2454443853587461e-06,
      "loss": 2.3532,
      "step": 64550
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1083379983901978,
      "learning_rate": 1.2452454009698723e-06,
      "loss": 2.4266,
      "step": 64551
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0441879034042358,
      "learning_rate": 1.2450464314226874e-06,
      "loss": 2.4296,
      "step": 64552
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.9667865037918091,
      "learning_rate": 1.2448474767175233e-06,
      "loss": 2.0517,
      "step": 64553
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0788171291351318,
      "learning_rate": 1.2446485368547244e-06,
      "loss": 2.5417,
      "step": 64554
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.5353834629058838,
      "learning_rate": 1.2444496118346205e-06,
      "loss": 2.1538,
      "step": 64555
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.131898283958435,
      "learning_rate": 1.2442507016575556e-06,
      "loss": 2.3669,
      "step": 64556
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1147408485412598,
      "learning_rate": 1.2440518063238615e-06,
      "loss": 2.3428,
      "step": 64557
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1727417707443237,
      "learning_rate": 1.2438529258338805e-06,
      "loss": 2.6568,
      "step": 64558
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1846472024917603,
      "learning_rate": 1.2436540601879454e-06,
      "loss": 2.5031,
      "step": 64559
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1389909982681274,
      "learning_rate": 1.2434552093863939e-06,
      "loss": 2.6223,
      "step": 64560
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.102537751197815,
      "learning_rate": 1.2432563734295656e-06,
      "loss": 2.4811,
      "step": 64561
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0385078191757202,
      "learning_rate": 1.2430575523177947e-06,
      "loss": 2.3926,
      "step": 64562
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0743026733398438,
      "learning_rate": 1.2428587460514207e-06,
      "loss": 2.3488,
      "step": 64563
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.29103422164917,
      "learning_rate": 1.2426599546307805e-06,
      "loss": 2.4888,
      "step": 64564
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1461020708084106,
      "learning_rate": 1.242461178056209e-06,
      "loss": 2.276,
      "step": 64565
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1478931903839111,
      "learning_rate": 1.242262416328044e-06,
      "loss": 2.3363,
      "step": 64566
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.038803219795227,
      "learning_rate": 1.2420636694466237e-06,
      "loss": 2.11,
      "step": 64567
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0294736623764038,
      "learning_rate": 1.2418649374122815e-06,
      "loss": 2.4007,
      "step": 64568
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.9868656396865845,
      "learning_rate": 1.2416662202253592e-06,
      "loss": 2.2751,
      "step": 64569
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0770014524459839,
      "learning_rate": 1.2414675178861901e-06,
      "loss": 2.2841,
      "step": 64570
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.9901129007339478,
      "learning_rate": 1.2412688303951136e-06,
      "loss": 2.1756,
      "step": 64571
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.055402159690857,
      "learning_rate": 1.2410701577524642e-06,
      "loss": 2.5108,
      "step": 64572
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.9753267168998718,
      "learning_rate": 1.2408714999585814e-06,
      "loss": 2.2951,
      "step": 64573
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0222601890563965,
      "learning_rate": 1.240672857013797e-06,
      "loss": 2.4001,
      "step": 64574
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.9524917006492615,
      "learning_rate": 1.2404742289184547e-06,
      "loss": 2.0699,
      "step": 64575
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1027474403381348,
      "learning_rate": 1.2402756156728857e-06,
      "loss": 2.4713,
      "step": 64576
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1291369199752808,
      "learning_rate": 1.2400770172774323e-06,
      "loss": 2.4161,
      "step": 64577
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0323035717010498,
      "learning_rate": 1.2398784337324232e-06,
      "loss": 2.4118,
      "step": 64578
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0561555624008179,
      "learning_rate": 1.2396798650382013e-06,
      "loss": 2.4456,
      "step": 64579
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.2133431434631348,
      "learning_rate": 1.2394813111950999e-06,
      "loss": 2.3468,
      "step": 64580
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.082947850227356,
      "learning_rate": 1.2392827722034583e-06,
      "loss": 1.9914,
      "step": 64581
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.162291169166565,
      "learning_rate": 1.23908424806361e-06,
      "loss": 2.3789,
      "step": 64582
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.9416213631629944,
      "learning_rate": 1.2388857387758958e-06,
      "loss": 2.3713,
      "step": 64583
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1746582984924316,
      "learning_rate": 1.2386872443406472e-06,
      "loss": 2.2724,
      "step": 64584
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1321501731872559,
      "learning_rate": 1.2384887647582056e-06,
      "loss": 2.1867,
      "step": 64585
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.2134500741958618,
      "learning_rate": 1.2382903000289048e-06,
      "loss": 2.2191,
      "step": 64586
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0104565620422363,
      "learning_rate": 1.2380918501530791e-06,
      "loss": 2.2557,
      "step": 64587
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0242546796798706,
      "learning_rate": 1.2378934151310707e-06,
      "loss": 2.2157,
      "step": 64588
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.232436180114746,
      "learning_rate": 1.23769499496321e-06,
      "loss": 2.3705,
      "step": 64589
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1274642944335938,
      "learning_rate": 1.2374965896498382e-06,
      "loss": 2.334,
      "step": 64590
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0872981548309326,
      "learning_rate": 1.2372981991912902e-06,
      "loss": 2.3028,
      "step": 64591
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0513242483139038,
      "learning_rate": 1.2370998235879007e-06,
      "loss": 2.4297,
      "step": 64592
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0932737588882446,
      "learning_rate": 1.2369014628400055e-06,
      "loss": 2.4585,
      "step": 64593
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1791714429855347,
      "learning_rate": 1.2367031169479438e-06,
      "loss": 2.3127,
      "step": 64594
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0285407304763794,
      "learning_rate": 1.2365047859120483e-06,
      "loss": 2.4166,
      "step": 64595
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.2528455257415771,
      "learning_rate": 1.23630646973266e-06,
      "loss": 2.2808,
      "step": 64596
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1236732006072998,
      "learning_rate": 1.2361081684101106e-06,
      "loss": 2.1883,
      "step": 64597
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1144992113113403,
      "learning_rate": 1.235909881944739e-06,
      "loss": 2.314,
      "step": 64598
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1089556217193604,
      "learning_rate": 1.2357116103368795e-06,
      "loss": 2.4303,
      "step": 64599
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.9732683300971985,
      "learning_rate": 1.2355133535868713e-06,
      "loss": 2.3346,
      "step": 64600
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.133832335472107,
      "learning_rate": 1.2353151116950457e-06,
      "loss": 2.4272,
      "step": 64601
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1255618333816528,
      "learning_rate": 1.2351168846617433e-06,
      "loss": 2.5053,
      "step": 64602
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1234780550003052,
      "learning_rate": 1.2349186724872985e-06,
      "loss": 2.4654,
      "step": 64603
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1916319131851196,
      "learning_rate": 1.2347204751720477e-06,
      "loss": 2.1348,
      "step": 64604
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.9987146258354187,
      "learning_rate": 1.2345222927163237e-06,
      "loss": 2.1936,
      "step": 64605
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1805593967437744,
      "learning_rate": 1.2343241251204675e-06,
      "loss": 2.491,
      "step": 64606
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.199194073677063,
      "learning_rate": 1.2341259723848098e-06,
      "loss": 2.3797,
      "step": 64607
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.2301182746887207,
      "learning_rate": 1.2339278345096916e-06,
      "loss": 2.2695,
      "step": 64608
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.005566120147705,
      "learning_rate": 1.233729711495445e-06,
      "loss": 2.363,
      "step": 64609
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1246814727783203,
      "learning_rate": 1.2335316033424094e-06,
      "loss": 2.4224,
      "step": 64610
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.9813944101333618,
      "learning_rate": 1.233333510050917e-06,
      "loss": 2.4361,
      "step": 64611
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.175568699836731,
      "learning_rate": 1.2331354316213063e-06,
      "loss": 2.1997,
      "step": 64612
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0364694595336914,
      "learning_rate": 1.2329373680539104e-06,
      "loss": 2.3294,
      "step": 64613
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0476597547531128,
      "learning_rate": 1.232739319349069e-06,
      "loss": 2.1536,
      "step": 64614
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.117088794708252,
      "learning_rate": 1.232541285507116e-06,
      "loss": 2.2391,
      "step": 64615
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0338459014892578,
      "learning_rate": 1.2323432665283862e-06,
      "loss": 2.3666,
      "step": 64616
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.169684648513794,
      "learning_rate": 1.2321452624132135e-06,
      "loss": 2.1356,
      "step": 64617
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1543303728103638,
      "learning_rate": 1.2319472731619386e-06,
      "loss": 2.2508,
      "step": 64618
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0795081853866577,
      "learning_rate": 1.2317492987748925e-06,
      "loss": 2.4289,
      "step": 64619
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0115331411361694,
      "learning_rate": 1.2315513392524147e-06,
      "loss": 2.2372,
      "step": 64620
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.317325472831726,
      "learning_rate": 1.2313533945948385e-06,
      "loss": 2.379,
      "step": 64621
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1580753326416016,
      "learning_rate": 1.231155464802498e-06,
      "loss": 2.1734,
      "step": 64622
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1923010349273682,
      "learning_rate": 1.230957549875732e-06,
      "loss": 2.3142,
      "step": 64623
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.050580382347107,
      "learning_rate": 1.2307596498148732e-06,
      "loss": 2.3166,
      "step": 64624
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0624682903289795,
      "learning_rate": 1.2305617646202605e-06,
      "loss": 2.3603,
      "step": 64625
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1191489696502686,
      "learning_rate": 1.2303638942922257e-06,
      "loss": 2.4543,
      "step": 64626
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1748241186141968,
      "learning_rate": 1.2301660388311076e-06,
      "loss": 2.4497,
      "step": 64627
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0563253164291382,
      "learning_rate": 1.229968198237237e-06,
      "loss": 2.3301,
      "step": 64628
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.2672266960144043,
      "learning_rate": 1.2297703725109578e-06,
      "loss": 2.4642,
      "step": 64629
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0274819135665894,
      "learning_rate": 1.2295725616525944e-06,
      "loss": 2.147,
      "step": 64630
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1377280950546265,
      "learning_rate": 1.22937476566249e-06,
      "loss": 2.5171,
      "step": 64631
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0930819511413574,
      "learning_rate": 1.2291769845409762e-06,
      "loss": 2.4412,
      "step": 64632
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1767101287841797,
      "learning_rate": 1.2289792182883908e-06,
      "loss": 2.3595,
      "step": 64633
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0794851779937744,
      "learning_rate": 1.2287814669050658e-06,
      "loss": 2.3419,
      "step": 64634
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.159964680671692,
      "learning_rate": 1.2285837303913407e-06,
      "loss": 2.4492,
      "step": 64635
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.26654052734375,
      "learning_rate": 1.2283860087475451e-06,
      "loss": 2.4007,
      "step": 64636
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0682992935180664,
      "learning_rate": 1.2281883019740216e-06,
      "loss": 2.5304,
      "step": 64637
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0571614503860474,
      "learning_rate": 1.227990610071098e-06,
      "loss": 2.1419,
      "step": 64638
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1847821474075317,
      "learning_rate": 1.2277929330391158e-06,
      "loss": 2.5301,
      "step": 64639
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1229127645492554,
      "learning_rate": 1.2275952708784044e-06,
      "loss": 2.1874,
      "step": 64640
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.098410725593567,
      "learning_rate": 1.2273976235893037e-06,
      "loss": 2.3349,
      "step": 64641
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.280786156654358,
      "learning_rate": 1.2271999911721467e-06,
      "loss": 2.2241,
      "step": 64642
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1102993488311768,
      "learning_rate": 1.2270023736272674e-06,
      "loss": 2.2055,
      "step": 64643
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.041783332824707,
      "learning_rate": 1.2268047709550013e-06,
      "loss": 2.3936,
      "step": 64644
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0720462799072266,
      "learning_rate": 1.2266071831556848e-06,
      "loss": 2.367,
      "step": 64645
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0504546165466309,
      "learning_rate": 1.2264096102296507e-06,
      "loss": 2.2608,
      "step": 64646
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.2226940393447876,
      "learning_rate": 1.2262120521772369e-06,
      "loss": 2.3621,
      "step": 64647
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.049768090248108,
      "learning_rate": 1.2260145089987763e-06,
      "loss": 2.4735,
      "step": 64648
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0586349964141846,
      "learning_rate": 1.225816980694603e-06,
      "loss": 2.2706,
      "step": 64649
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1387087106704712,
      "learning_rate": 1.2256194672650535e-06,
      "loss": 2.2921,
      "step": 64650
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.9477111101150513,
      "learning_rate": 1.2254219687104608e-06,
      "loss": 2.3532,
      "step": 64651
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0642482042312622,
      "learning_rate": 1.2252244850311633e-06,
      "loss": 2.1117,
      "step": 64652
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1035385131835938,
      "learning_rate": 1.2250270162274913e-06,
      "loss": 2.3989,
      "step": 64653
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.068328857421875,
      "learning_rate": 1.2248295622997842e-06,
      "loss": 2.4561,
      "step": 64654
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0101323127746582,
      "learning_rate": 1.2246321232483715e-06,
      "loss": 2.1785,
      "step": 64655
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.121187448501587,
      "learning_rate": 1.2244346990735956e-06,
      "loss": 2.5807,
      "step": 64656
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0841361284255981,
      "learning_rate": 1.2242372897757815e-06,
      "loss": 2.4578,
      "step": 64657
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.9937642216682434,
      "learning_rate": 1.2240398953552713e-06,
      "loss": 2.2546,
      "step": 64658
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.4256069660186768,
      "learning_rate": 1.2238425158123946e-06,
      "loss": 2.3346,
      "step": 64659
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1663107872009277,
      "learning_rate": 1.22364515114749e-06,
      "loss": 2.2883,
      "step": 64660
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.9710582494735718,
      "learning_rate": 1.2234478013608886e-06,
      "loss": 2.142,
      "step": 64661
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.2016232013702393,
      "learning_rate": 1.2232504664529298e-06,
      "loss": 2.5454,
      "step": 64662
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0487101078033447,
      "learning_rate": 1.2230531464239426e-06,
      "loss": 2.473,
      "step": 64663
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.064309000968933,
      "learning_rate": 1.2228558412742652e-06,
      "loss": 2.2058,
      "step": 64664
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.13578200340271,
      "learning_rate": 1.22265855100423e-06,
      "loss": 2.4845,
      "step": 64665
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0415191650390625,
      "learning_rate": 1.222461275614175e-06,
      "loss": 2.3502,
      "step": 64666
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.081212043762207,
      "learning_rate": 1.2222640151044285e-06,
      "loss": 2.2149,
      "step": 64667
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1568149328231812,
      "learning_rate": 1.2220667694753318e-06,
      "loss": 2.2199,
      "step": 64668
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0822762250900269,
      "learning_rate": 1.221869538727216e-06,
      "loss": 2.2491,
      "step": 64669
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0213470458984375,
      "learning_rate": 1.2216723228604144e-06,
      "loss": 2.2119,
      "step": 64670
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0764496326446533,
      "learning_rate": 1.221475121875262e-06,
      "loss": 2.3611,
      "step": 64671
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.239534854888916,
      "learning_rate": 1.2212779357720938e-06,
      "loss": 2.3606,
      "step": 64672
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0726428031921387,
      "learning_rate": 1.2210807645512424e-06,
      "loss": 2.1504,
      "step": 64673
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.2401254177093506,
      "learning_rate": 1.220883608213046e-06,
      "loss": 2.0307,
      "step": 64674
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0967347621917725,
      "learning_rate": 1.2206864667578332e-06,
      "loss": 2.3337,
      "step": 64675
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1200933456420898,
      "learning_rate": 1.2204893401859441e-06,
      "loss": 2.2966,
      "step": 64676
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1196141242980957,
      "learning_rate": 1.2202922284977093e-06,
      "loss": 2.4534,
      "step": 64677
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0911186933517456,
      "learning_rate": 1.2200951316934616e-06,
      "loss": 2.416,
      "step": 64678
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.09560227394104,
      "learning_rate": 1.2198980497735401e-06,
      "loss": 2.2203,
      "step": 64679
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.2818074226379395,
      "learning_rate": 1.2197009827382745e-06,
      "loss": 2.1051,
      "step": 64680
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.142933964729309,
      "learning_rate": 1.2195039305880007e-06,
      "loss": 2.391,
      "step": 64681
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0121952295303345,
      "learning_rate": 1.2193068933230546e-06,
      "loss": 2.307,
      "step": 64682
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0748317241668701,
      "learning_rate": 1.2191098709437666e-06,
      "loss": 2.3432,
      "step": 64683
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.9975265264511108,
      "learning_rate": 1.218912863450471e-06,
      "loss": 2.2442,
      "step": 64684
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1053709983825684,
      "learning_rate": 1.2187158708435043e-06,
      "loss": 2.3742,
      "step": 64685
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1457016468048096,
      "learning_rate": 1.2185188931231972e-06,
      "loss": 2.3097,
      "step": 64686
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1750257015228271,
      "learning_rate": 1.2183219302898875e-06,
      "loss": 2.2151,
      "step": 64687
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1566249132156372,
      "learning_rate": 1.2181249823439056e-06,
      "loss": 2.5198,
      "step": 64688
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0874512195587158,
      "learning_rate": 1.2179280492855895e-06,
      "loss": 2.1255,
      "step": 64689
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.154884696006775,
      "learning_rate": 1.2177311311152674e-06,
      "loss": 2.3041,
      "step": 64690
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0561686754226685,
      "learning_rate": 1.2175342278332792e-06,
      "loss": 2.4658,
      "step": 64691
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0316474437713623,
      "learning_rate": 1.2173373394399536e-06,
      "loss": 2.2189,
      "step": 64692
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1647496223449707,
      "learning_rate": 1.2171404659356278e-06,
      "loss": 2.1725,
      "step": 64693
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0926653146743774,
      "learning_rate": 1.2169436073206332e-06,
      "loss": 2.1899,
      "step": 64694
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.326714277267456,
      "learning_rate": 1.216746763595309e-06,
      "loss": 2.5198,
      "step": 64695
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1294668912887573,
      "learning_rate": 1.2165499347599796e-06,
      "loss": 2.264,
      "step": 64696
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.9694334268569946,
      "learning_rate": 1.216353120814986e-06,
      "loss": 2.1784,
      "step": 64697
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.2004069089889526,
      "learning_rate": 1.2161563217606565e-06,
      "loss": 2.3963,
      "step": 64698
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1295087337493896,
      "learning_rate": 1.215959537597331e-06,
      "loss": 2.2392,
      "step": 64699
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0369842052459717,
      "learning_rate": 1.215762768325337e-06,
      "loss": 2.0168,
      "step": 64700
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0297526121139526,
      "learning_rate": 1.2155660139450132e-06,
      "loss": 2.1523,
      "step": 64701
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0473099946975708,
      "learning_rate": 1.2153692744566892e-06,
      "loss": 2.4292,
      "step": 64702
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1040186882019043,
      "learning_rate": 1.2151725498607014e-06,
      "loss": 2.2812,
      "step": 64703
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.059013843536377,
      "learning_rate": 1.214975840157383e-06,
      "loss": 2.3261,
      "step": 64704
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0563416481018066,
      "learning_rate": 1.2147791453470638e-06,
      "loss": 2.5281,
      "step": 64705
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.059370756149292,
      "learning_rate": 1.214582465430082e-06,
      "loss": 2.1238,
      "step": 64706
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.2100437879562378,
      "learning_rate": 1.2143858004067665e-06,
      "loss": 2.3231,
      "step": 64707
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.19642174243927,
      "learning_rate": 1.214189150277456e-06,
      "loss": 2.3972,
      "step": 64708
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1209076642990112,
      "learning_rate": 1.2139925150424815e-06,
      "loss": 2.5055,
      "step": 64709
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.088249683380127,
      "learning_rate": 1.2137958947021756e-06,
      "loss": 2.4066,
      "step": 64710
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.9694821238517761,
      "learning_rate": 1.2135992892568693e-06,
      "loss": 2.2442,
      "step": 64711
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0285178422927856,
      "learning_rate": 1.2134026987069015e-06,
      "loss": 2.3704,
      "step": 64712
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1721844673156738,
      "learning_rate": 1.2132061230525992e-06,
      "loss": 2.3497,
      "step": 64713
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0339009761810303,
      "learning_rate": 1.2130095622943016e-06,
      "loss": 2.4711,
      "step": 64714
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0812575817108154,
      "learning_rate": 1.2128130164323382e-06,
      "loss": 2.4228,
      "step": 64715
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.089963674545288,
      "learning_rate": 1.212616485467044e-06,
      "loss": 2.2149,
      "step": 64716
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.003563404083252,
      "learning_rate": 1.2124199693987504e-06,
      "loss": 2.2877,
      "step": 64717
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0251755714416504,
      "learning_rate": 1.2122234682277934e-06,
      "loss": 2.1871,
      "step": 64718
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1345083713531494,
      "learning_rate": 1.2120269819545017e-06,
      "loss": 2.3644,
      "step": 64719
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1220756769180298,
      "learning_rate": 1.211830510579214e-06,
      "loss": 2.1249,
      "step": 64720
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0540814399719238,
      "learning_rate": 1.2116340541022575e-06,
      "loss": 2.2532,
      "step": 64721
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.162434458732605,
      "learning_rate": 1.2114376125239735e-06,
      "loss": 2.3941,
      "step": 64722
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.2053395509719849,
      "learning_rate": 1.211241185844685e-06,
      "loss": 2.2495,
      "step": 64723
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.966286301612854,
      "learning_rate": 1.2110447740647312e-06,
      "loss": 2.2768,
      "step": 64724
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0299532413482666,
      "learning_rate": 1.2108483771844414e-06,
      "loss": 2.2127,
      "step": 64725
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.9871959686279297,
      "learning_rate": 1.210651995204154e-06,
      "loss": 2.3189,
      "step": 64726
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0735629796981812,
      "learning_rate": 1.2104556281241952e-06,
      "loss": 2.2671,
      "step": 64727
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.129703402519226,
      "learning_rate": 1.2102592759449038e-06,
      "loss": 2.4612,
      "step": 64728
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0717829465866089,
      "learning_rate": 1.2100629386666085e-06,
      "loss": 2.3573,
      "step": 64729
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.2148830890655518,
      "learning_rate": 1.2098666162896466e-06,
      "loss": 2.484,
      "step": 64730
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0462570190429688,
      "learning_rate": 1.2096703088143446e-06,
      "loss": 2.3515,
      "step": 64731
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0109130144119263,
      "learning_rate": 1.2094740162410412e-06,
      "loss": 2.3223,
      "step": 64732
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1025824546813965,
      "learning_rate": 1.209277738570067e-06,
      "loss": 2.2098,
      "step": 64733
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1489953994750977,
      "learning_rate": 1.2090814758017533e-06,
      "loss": 2.3439,
      "step": 64734
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.223321795463562,
      "learning_rate": 1.208885227936435e-06,
      "loss": 2.2369,
      "step": 64735
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1528751850128174,
      "learning_rate": 1.2086889949744452e-06,
      "loss": 2.2974,
      "step": 64736
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0309311151504517,
      "learning_rate": 1.2084927769161115e-06,
      "loss": 2.1773,
      "step": 64737
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1670688390731812,
      "learning_rate": 1.2082965737617735e-06,
      "loss": 2.1864,
      "step": 64738
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.053707242012024,
      "learning_rate": 1.20810038551176e-06,
      "loss": 2.342,
      "step": 64739
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1823776960372925,
      "learning_rate": 1.2079042121664032e-06,
      "loss": 2.2937,
      "step": 64740
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0044885873794556,
      "learning_rate": 1.2077080537260377e-06,
      "loss": 2.4137,
      "step": 64741
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.119127631187439,
      "learning_rate": 1.2075119101909927e-06,
      "loss": 2.3005,
      "step": 64742
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1160547733306885,
      "learning_rate": 1.2073157815616055e-06,
      "loss": 2.3034,
      "step": 64743
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0947089195251465,
      "learning_rate": 1.2071196678382047e-06,
      "loss": 2.4059,
      "step": 64744
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.132464051246643,
      "learning_rate": 1.2069235690211255e-06,
      "loss": 2.2704,
      "step": 64745
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1653432846069336,
      "learning_rate": 1.206727485110697e-06,
      "loss": 2.3978,
      "step": 64746
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0210498571395874,
      "learning_rate": 1.206531416107256e-06,
      "loss": 2.3656,
      "step": 64747
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1836434602737427,
      "learning_rate": 1.2063353620111319e-06,
      "loss": 2.3583,
      "step": 64748
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1276731491088867,
      "learning_rate": 1.2061393228226582e-06,
      "loss": 2.415,
      "step": 64749
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1628743410110474,
      "learning_rate": 1.205943298542165e-06,
      "loss": 2.4603,
      "step": 64750
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.2916548252105713,
      "learning_rate": 1.2057472891699873e-06,
      "loss": 2.7416,
      "step": 64751
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1284819841384888,
      "learning_rate": 1.2055512947064552e-06,
      "loss": 2.4256,
      "step": 64752
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1028170585632324,
      "learning_rate": 1.2053553151519036e-06,
      "loss": 2.3411,
      "step": 64753
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0005955696105957,
      "learning_rate": 1.2051593505066628e-06,
      "loss": 2.3785,
      "step": 64754
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1387308835983276,
      "learning_rate": 1.2049634007710665e-06,
      "loss": 2.3531,
      "step": 64755
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0629698038101196,
      "learning_rate": 1.2047674659454433e-06,
      "loss": 2.2691,
      "step": 64756
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0196045637130737,
      "learning_rate": 1.204571546030131e-06,
      "loss": 2.1186,
      "step": 64757
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1140172481536865,
      "learning_rate": 1.204375641025456e-06,
      "loss": 2.4821,
      "step": 64758
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1016426086425781,
      "learning_rate": 1.2041797509317554e-06,
      "loss": 2.5192,
      "step": 64759
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0546746253967285,
      "learning_rate": 1.2039838757493593e-06,
      "loss": 2.3573,
      "step": 64760
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.9890517592430115,
      "learning_rate": 1.2037880154785997e-06,
      "loss": 1.9596,
      "step": 64761
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1582982540130615,
      "learning_rate": 1.203592170119806e-06,
      "loss": 2.3705,
      "step": 64762
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1095155477523804,
      "learning_rate": 1.2033963396733151e-06,
      "loss": 2.4745,
      "step": 64763
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1151705980300903,
      "learning_rate": 1.203200524139454e-06,
      "loss": 2.4942,
      "step": 64764
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0071794986724854,
      "learning_rate": 1.2030047235185593e-06,
      "loss": 2.3728,
      "step": 64765
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.186111569404602,
      "learning_rate": 1.2028089378109608e-06,
      "loss": 2.2485,
      "step": 64766
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.2388156652450562,
      "learning_rate": 1.2026131670169893e-06,
      "loss": 2.2883,
      "step": 64767
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0584006309509277,
      "learning_rate": 1.202417411136979e-06,
      "loss": 2.3874,
      "step": 64768
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0559308528900146,
      "learning_rate": 1.2022216701712586e-06,
      "loss": 2.1749,
      "step": 64769
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.2283070087432861,
      "learning_rate": 1.2020259441201632e-06,
      "loss": 2.0125,
      "step": 64770
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.152963399887085,
      "learning_rate": 1.2018302329840226e-06,
      "loss": 2.3257,
      "step": 64771
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1771742105484009,
      "learning_rate": 1.201634536763171e-06,
      "loss": 2.2595,
      "step": 64772
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0706919431686401,
      "learning_rate": 1.201438855457936e-06,
      "loss": 2.3392,
      "step": 64773
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.029407262802124,
      "learning_rate": 1.201243189068656e-06,
      "loss": 2.2641,
      "step": 64774
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.9970231652259827,
      "learning_rate": 1.2010475375956543e-06,
      "loss": 2.2104,
      "step": 64775
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0227285623550415,
      "learning_rate": 1.2008519010392694e-06,
      "loss": 2.2303,
      "step": 64776
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0903438329696655,
      "learning_rate": 1.2006562793998278e-06,
      "loss": 2.3028,
      "step": 64777
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.037769079208374,
      "learning_rate": 1.2004606726776658e-06,
      "loss": 2.1311,
      "step": 64778
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0913645029067993,
      "learning_rate": 1.200265080873111e-06,
      "loss": 2.3416,
      "step": 64779
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1525399684906006,
      "learning_rate": 1.2000695039864984e-06,
      "loss": 2.5041,
      "step": 64780
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1231249570846558,
      "learning_rate": 1.199873942018157e-06,
      "loss": 2.2739,
      "step": 64781
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1318862438201904,
      "learning_rate": 1.199678394968421e-06,
      "loss": 2.2973,
      "step": 64782
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1125266551971436,
      "learning_rate": 1.1994828628376175e-06,
      "loss": 2.2498,
      "step": 64783
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0663478374481201,
      "learning_rate": 1.1992873456260834e-06,
      "loss": 2.4223,
      "step": 64784
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.2006853818893433,
      "learning_rate": 1.1990918433341459e-06,
      "loss": 2.3725,
      "step": 64785
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0437772274017334,
      "learning_rate": 1.198896355962139e-06,
      "loss": 2.3331,
      "step": 64786
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.059939980506897,
      "learning_rate": 1.198700883510392e-06,
      "loss": 2.4527,
      "step": 64787
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.4909234046936035,
      "learning_rate": 1.1985054259792405e-06,
      "loss": 2.121,
      "step": 64788
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0281740427017212,
      "learning_rate": 1.1983099833690092e-06,
      "loss": 2.5451,
      "step": 64789
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.065405011177063,
      "learning_rate": 1.1981145556800355e-06,
      "loss": 2.4289,
      "step": 64790
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1237616539001465,
      "learning_rate": 1.1979191429126458e-06,
      "loss": 2.3043,
      "step": 64791
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0460243225097656,
      "learning_rate": 1.1977237450671753e-06,
      "loss": 2.2776,
      "step": 64792
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0545340776443481,
      "learning_rate": 1.1975283621439526e-06,
      "loss": 2.2463,
      "step": 64793
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.036475419998169,
      "learning_rate": 1.197332994143311e-06,
      "loss": 2.3979,
      "step": 64794
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.9916808009147644,
      "learning_rate": 1.1971376410655822e-06,
      "loss": 2.1839,
      "step": 64795
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.135474681854248,
      "learning_rate": 1.1969423029110926e-06,
      "loss": 2.3563,
      "step": 64796
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.06183660030365,
      "learning_rate": 1.1967469796801789e-06,
      "loss": 2.3034,
      "step": 64797
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.156315565109253,
      "learning_rate": 1.1965516713731684e-06,
      "loss": 2.45,
      "step": 64798
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.065769076347351,
      "learning_rate": 1.1963563779903953e-06,
      "loss": 2.5344,
      "step": 64799
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0836437940597534,
      "learning_rate": 1.196161099532187e-06,
      "loss": 2.0604,
      "step": 64800
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.2714825868606567,
      "learning_rate": 1.1959658359988824e-06,
      "loss": 2.2645,
      "step": 64801
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1510025262832642,
      "learning_rate": 1.195770587390801e-06,
      "loss": 2.4346,
      "step": 64802
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0520379543304443,
      "learning_rate": 1.1955753537082827e-06,
      "loss": 2.4219,
      "step": 64803
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.2655956745147705,
      "learning_rate": 1.1953801349516525e-06,
      "loss": 2.3242,
      "step": 64804
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0304899215698242,
      "learning_rate": 1.1951849311212471e-06,
      "loss": 2.2611,
      "step": 64805
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0513713359832764,
      "learning_rate": 1.1949897422173928e-06,
      "loss": 2.4031,
      "step": 64806
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0840754508972168,
      "learning_rate": 1.194794568240424e-06,
      "loss": 2.3075,
      "step": 64807
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0082050561904907,
      "learning_rate": 1.1945994091906665e-06,
      "loss": 2.3205,
      "step": 64808
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.2087308168411255,
      "learning_rate": 1.1944042650684583e-06,
      "loss": 2.4222,
      "step": 64809
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0530847311019897,
      "learning_rate": 1.1942091358741237e-06,
      "loss": 2.1243,
      "step": 64810
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.145062804222107,
      "learning_rate": 1.1940140216079988e-06,
      "loss": 2.3264,
      "step": 64811
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0945086479187012,
      "learning_rate": 1.1938189222704088e-06,
      "loss": 2.3246,
      "step": 64812
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.036505103111267,
      "learning_rate": 1.1936238378616905e-06,
      "loss": 2.3985,
      "step": 64813
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1263502836227417,
      "learning_rate": 1.193428768382171e-06,
      "loss": 2.5751,
      "step": 64814
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0465264320373535,
      "learning_rate": 1.1932337138321814e-06,
      "loss": 2.3113,
      "step": 64815
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1332114934921265,
      "learning_rate": 1.1930386742120504e-06,
      "loss": 2.2833,
      "step": 64816
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1961495876312256,
      "learning_rate": 1.1928436495221142e-06,
      "loss": 2.3075,
      "step": 64817
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0009901523590088,
      "learning_rate": 1.1926486397626968e-06,
      "loss": 2.28,
      "step": 64818
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0981519222259521,
      "learning_rate": 1.192453644934134e-06,
      "loss": 2.2914,
      "step": 64819
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1091428995132446,
      "learning_rate": 1.1922586650367529e-06,
      "loss": 2.2426,
      "step": 64820
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0220870971679688,
      "learning_rate": 1.1920637000708867e-06,
      "loss": 2.2017,
      "step": 64821
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.124165654182434,
      "learning_rate": 1.1918687500368653e-06,
      "loss": 2.319,
      "step": 64822
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1928738355636597,
      "learning_rate": 1.191673814935016e-06,
      "loss": 2.4226,
      "step": 64823
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.9995306134223938,
      "learning_rate": 1.1914788947656753e-06,
      "loss": 2.3194,
      "step": 64824
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1477025747299194,
      "learning_rate": 1.1912839895291673e-06,
      "loss": 2.2319,
      "step": 64825
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0118314027786255,
      "learning_rate": 1.1910890992258273e-06,
      "loss": 2.3019,
      "step": 64826
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0673631429672241,
      "learning_rate": 1.190894223855984e-06,
      "loss": 2.2336,
      "step": 64827
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1269092559814453,
      "learning_rate": 1.1906993634199649e-06,
      "loss": 2.3067,
      "step": 64828
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.100938320159912,
      "learning_rate": 1.1905045179181052e-06,
      "loss": 2.4713,
      "step": 64829
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.125134825706482,
      "learning_rate": 1.1903096873507336e-06,
      "loss": 2.3062,
      "step": 64830
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1006914377212524,
      "learning_rate": 1.1901148717181777e-06,
      "loss": 2.6923,
      "step": 64831
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1344199180603027,
      "learning_rate": 1.1899200710207715e-06,
      "loss": 2.4085,
      "step": 64832
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1418296098709106,
      "learning_rate": 1.1897252852588414e-06,
      "loss": 2.4239,
      "step": 64833
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1205933094024658,
      "learning_rate": 1.1895305144327218e-06,
      "loss": 2.2104,
      "step": 64834
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1823885440826416,
      "learning_rate": 1.189335758542739e-06,
      "loss": 2.305,
      "step": 64835
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1793253421783447,
      "learning_rate": 1.189141017589227e-06,
      "loss": 2.3318,
      "step": 64836
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1547064781188965,
      "learning_rate": 1.1889462915725113e-06,
      "loss": 2.2563,
      "step": 64837
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0678116083145142,
      "learning_rate": 1.1887515804929283e-06,
      "loss": 2.1256,
      "step": 64838
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1119316816329956,
      "learning_rate": 1.188556884350801e-06,
      "loss": 2.1409,
      "step": 64839
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0536304712295532,
      "learning_rate": 1.188362203146468e-06,
      "loss": 2.4366,
      "step": 64840
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1200364828109741,
      "learning_rate": 1.1881675368802492e-06,
      "loss": 2.0525,
      "step": 64841
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.044358253479004,
      "learning_rate": 1.187972885552483e-06,
      "loss": 2.1607,
      "step": 64842
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.005382776260376,
      "learning_rate": 1.1877782491634927e-06,
      "loss": 2.3366,
      "step": 64843
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.9536309838294983,
      "learning_rate": 1.1875836277136143e-06,
      "loss": 2.1989,
      "step": 64844
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0202560424804688,
      "learning_rate": 1.1873890212031735e-06,
      "loss": 2.3253,
      "step": 64845
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0213252305984497,
      "learning_rate": 1.1871944296325033e-06,
      "loss": 2.1532,
      "step": 64846
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0435672998428345,
      "learning_rate": 1.18699985300193e-06,
      "loss": 2.4854,
      "step": 64847
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.185368537902832,
      "learning_rate": 1.1868052913117878e-06,
      "loss": 2.3701,
      "step": 64848
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0954619646072388,
      "learning_rate": 1.186610744562402e-06,
      "loss": 2.6073,
      "step": 64849
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.055249810218811,
      "learning_rate": 1.1864162127541068e-06,
      "loss": 2.4038,
      "step": 64850
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1720964908599854,
      "learning_rate": 1.1862216958872297e-06,
      "loss": 2.41,
      "step": 64851
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1290990114212036,
      "learning_rate": 1.1860271939620982e-06,
      "loss": 2.5513,
      "step": 64852
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1144803762435913,
      "learning_rate": 1.1858327069790477e-06,
      "loss": 2.3273,
      "step": 64853
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.023085117340088,
      "learning_rate": 1.1856382349384032e-06,
      "loss": 2.3281,
      "step": 64854
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.069108009338379,
      "learning_rate": 1.185443777840495e-06,
      "loss": 2.3126,
      "step": 64855
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0939656496047974,
      "learning_rate": 1.1852493356856554e-06,
      "loss": 2.4506,
      "step": 64856
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.9962584972381592,
      "learning_rate": 1.1850549084742114e-06,
      "loss": 2.1428,
      "step": 64857
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0436488389968872,
      "learning_rate": 1.1848604962064924e-06,
      "loss": 2.3253,
      "step": 64858
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1612794399261475,
      "learning_rate": 1.1846660988828306e-06,
      "loss": 2.4692,
      "step": 64859
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.9281930923461914,
      "learning_rate": 1.1844717165035513e-06,
      "loss": 2.2092,
      "step": 64860
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1714918613433838,
      "learning_rate": 1.1842773490689897e-06,
      "loss": 2.1636,
      "step": 64861
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0282493829727173,
      "learning_rate": 1.1840829965794698e-06,
      "loss": 2.2812,
      "step": 64862
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.9636191129684448,
      "learning_rate": 1.183888659035326e-06,
      "loss": 2.3029,
      "step": 64863
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.227498173713684,
      "learning_rate": 1.1836943364368835e-06,
      "loss": 2.2735,
      "step": 64864
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0879839658737183,
      "learning_rate": 1.1835000287844756e-06,
      "loss": 2.1582,
      "step": 64865
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1268103122711182,
      "learning_rate": 1.1833057360784272e-06,
      "loss": 2.4551,
      "step": 64866
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0829219818115234,
      "learning_rate": 1.183111458319074e-06,
      "loss": 2.4814,
      "step": 64867
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1335973739624023,
      "learning_rate": 1.1829171955067376e-06,
      "loss": 2.3491,
      "step": 64868
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0059667825698853,
      "learning_rate": 1.1827229476417535e-06,
      "loss": 2.3315,
      "step": 64869
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.020112156867981,
      "learning_rate": 1.1825287147244468e-06,
      "loss": 2.2113,
      "step": 64870
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0700958967208862,
      "learning_rate": 1.182334496755151e-06,
      "loss": 2.129,
      "step": 64871
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.079534888267517,
      "learning_rate": 1.1821402937341898e-06,
      "loss": 2.1696,
      "step": 64872
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0961523056030273,
      "learning_rate": 1.1819461056618986e-06,
      "loss": 2.4024,
      "step": 64873
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.011257290840149,
      "learning_rate": 1.1817519325386018e-06,
      "loss": 2.3177,
      "step": 64874
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1374276876449585,
      "learning_rate": 1.1815577743646322e-06,
      "loss": 2.2703,
      "step": 64875
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0242611169815063,
      "learning_rate": 1.1813636311403154e-06,
      "loss": 2.2825,
      "step": 64876
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1585336923599243,
      "learning_rate": 1.1811695028659852e-06,
      "loss": 2.1967,
      "step": 64877
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0764391422271729,
      "learning_rate": 1.1809753895419662e-06,
      "loss": 2.2926,
      "step": 64878
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0916407108306885,
      "learning_rate": 1.180781291168588e-06,
      "loss": 2.397,
      "step": 64879
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.9724499583244324,
      "learning_rate": 1.1805872077461833e-06,
      "loss": 2.3374,
      "step": 64880
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1137458086013794,
      "learning_rate": 1.1803931392750777e-06,
      "loss": 2.5678,
      "step": 64881
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1108731031417847,
      "learning_rate": 1.180199085755599e-06,
      "loss": 2.6432,
      "step": 64882
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0255467891693115,
      "learning_rate": 1.1800050471880808e-06,
      "loss": 2.3139,
      "step": 64883
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0955861806869507,
      "learning_rate": 1.1798110235728498e-06,
      "loss": 2.4214,
      "step": 64884
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.063893437385559,
      "learning_rate": 1.1796170149102326e-06,
      "loss": 2.2889,
      "step": 64885
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.124513864517212,
      "learning_rate": 1.179423021200562e-06,
      "loss": 2.4289,
      "step": 64886
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.017837643623352,
      "learning_rate": 1.1792290424441622e-06,
      "loss": 2.2345,
      "step": 64887
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0953490734100342,
      "learning_rate": 1.1790350786413672e-06,
      "loss": 2.5197,
      "step": 64888
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.2322096824645996,
      "learning_rate": 1.1788411297925017e-06,
      "loss": 2.0397,
      "step": 64889
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1187528371810913,
      "learning_rate": 1.1786471958978984e-06,
      "loss": 2.367,
      "step": 64890
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1394068002700806,
      "learning_rate": 1.1784532769578815e-06,
      "loss": 2.3841,
      "step": 64891
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.061148762702942,
      "learning_rate": 1.1782593729727854e-06,
      "loss": 2.4429,
      "step": 64892
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0656620264053345,
      "learning_rate": 1.1780654839429351e-06,
      "loss": 2.3711,
      "step": 64893
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0344663858413696,
      "learning_rate": 1.1778716098686594e-06,
      "loss": 2.2827,
      "step": 64894
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.2715411186218262,
      "learning_rate": 1.1776777507502856e-06,
      "loss": 2.3675,
      "step": 64895
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0940200090408325,
      "learning_rate": 1.177483906588146e-06,
      "loss": 2.3844,
      "step": 64896
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.052107572555542,
      "learning_rate": 1.1772900773825646e-06,
      "loss": 2.3056,
      "step": 64897
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1950350999832153,
      "learning_rate": 1.1770962631338757e-06,
      "loss": 2.2887,
      "step": 64898
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0353952646255493,
      "learning_rate": 1.1769024638424031e-06,
      "loss": 2.4229,
      "step": 64899
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0511411428451538,
      "learning_rate": 1.176708679508478e-06,
      "loss": 2.3463,
      "step": 64900
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.264609456062317,
      "learning_rate": 1.176514910132427e-06,
      "loss": 2.2666,
      "step": 64901
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0259313583374023,
      "learning_rate": 1.1763211557145814e-06,
      "loss": 2.5456,
      "step": 64902
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1622945070266724,
      "learning_rate": 1.176127416255266e-06,
      "loss": 2.1143,
      "step": 64903
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.060553789138794,
      "learning_rate": 1.1759336917548137e-06,
      "loss": 2.0111,
      "step": 64904
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0613046884536743,
      "learning_rate": 1.1757399822135474e-06,
      "loss": 2.3773,
      "step": 64905
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0604974031448364,
      "learning_rate": 1.1755462876318024e-06,
      "loss": 2.3277,
      "step": 64906
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0602422952651978,
      "learning_rate": 1.1753526080098997e-06,
      "loss": 2.4224,
      "step": 64907
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1060805320739746,
      "learning_rate": 1.1751589433481736e-06,
      "loss": 2.3533,
      "step": 64908
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.067315697669983,
      "learning_rate": 1.1749652936469468e-06,
      "loss": 2.2405,
      "step": 64909
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0427939891815186,
      "learning_rate": 1.1747716589065538e-06,
      "loss": 2.3366,
      "step": 64910
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0626736879348755,
      "learning_rate": 1.1745780391273165e-06,
      "loss": 2.2215,
      "step": 64911
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0404746532440186,
      "learning_rate": 1.174384434309569e-06,
      "loss": 2.3259,
      "step": 64912
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0403447151184082,
      "learning_rate": 1.1741908444536365e-06,
      "loss": 2.2424,
      "step": 64913
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0589358806610107,
      "learning_rate": 1.1739972695598467e-06,
      "loss": 2.2253,
      "step": 64914
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.01602041721344,
      "learning_rate": 1.1738037096285293e-06,
      "loss": 2.2413,
      "step": 64915
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1407572031021118,
      "learning_rate": 1.1736101646600107e-06,
      "loss": 2.3072,
      "step": 64916
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.9680832028388977,
      "learning_rate": 1.1734166346546216e-06,
      "loss": 2.2507,
      "step": 64917
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0248855352401733,
      "learning_rate": 1.1732231196126864e-06,
      "loss": 2.4467,
      "step": 64918
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1378535032272339,
      "learning_rate": 1.1730296195345403e-06,
      "loss": 2.4284,
      "step": 64919
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1081403493881226,
      "learning_rate": 1.1728361344205008e-06,
      "loss": 2.4391,
      "step": 64920
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.2472209930419922,
      "learning_rate": 1.1726426642709044e-06,
      "loss": 2.2813,
      "step": 64921
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0055841207504272,
      "learning_rate": 1.1724492090860739e-06,
      "loss": 2.2635,
      "step": 64922
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.4655580520629883,
      "learning_rate": 1.1722557688663428e-06,
      "loss": 2.1922,
      "step": 64923
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0322457551956177,
      "learning_rate": 1.1720623436120326e-06,
      "loss": 2.2792,
      "step": 64924
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.125009298324585,
      "learning_rate": 1.1718689333234755e-06,
      "loss": 2.531,
      "step": 64925
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0101332664489746,
      "learning_rate": 1.1716755380009982e-06,
      "loss": 2.454,
      "step": 64926
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0853047370910645,
      "learning_rate": 1.1714821576449287e-06,
      "loss": 2.2612,
      "step": 64927
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0659338235855103,
      "learning_rate": 1.1712887922555938e-06,
      "loss": 2.3191,
      "step": 64928
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0921554565429688,
      "learning_rate": 1.1710954418333243e-06,
      "loss": 2.2223,
      "step": 64929
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.25664484500885,
      "learning_rate": 1.1709021063784432e-06,
      "loss": 2.1961,
      "step": 64930
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1601852178573608,
      "learning_rate": 1.1707087858912835e-06,
      "loss": 2.3314,
      "step": 64931
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0339754819869995,
      "learning_rate": 1.1705154803721686e-06,
      "loss": 2.3673,
      "step": 64932
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0372240543365479,
      "learning_rate": 1.1703221898214324e-06,
      "loss": 2.3837,
      "step": 64933
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1492944955825806,
      "learning_rate": 1.1701289142393934e-06,
      "loss": 2.4205,
      "step": 64934
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1053279638290405,
      "learning_rate": 1.1699356536263862e-06,
      "loss": 2.3079,
      "step": 64935
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.100334644317627,
      "learning_rate": 1.1697424079827346e-06,
      "loss": 2.2035,
      "step": 64936
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0454580783843994,
      "learning_rate": 1.1695491773087697e-06,
      "loss": 2.181,
      "step": 64937
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1467454433441162,
      "learning_rate": 1.1693559616048146e-06,
      "loss": 2.3806,
      "step": 64938
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.015068769454956,
      "learning_rate": 1.169162760871202e-06,
      "loss": 2.3614,
      "step": 64939
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0673224925994873,
      "learning_rate": 1.1689695751082574e-06,
      "loss": 2.4269,
      "step": 64940
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1092544794082642,
      "learning_rate": 1.1687764043163053e-06,
      "loss": 2.2911,
      "step": 64941
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.021023154258728,
      "learning_rate": 1.1685832484956783e-06,
      "loss": 2.4175,
      "step": 64942
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.9995713233947754,
      "learning_rate": 1.1683901076466986e-06,
      "loss": 2.3717,
      "step": 64943
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1821240186691284,
      "learning_rate": 1.168196981769699e-06,
      "loss": 2.3365,
      "step": 64944
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1007460355758667,
      "learning_rate": 1.168003870865001e-06,
      "loss": 2.3251,
      "step": 64945
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.195330023765564,
      "learning_rate": 1.1678107749329382e-06,
      "loss": 2.303,
      "step": 64946
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.042488694190979,
      "learning_rate": 1.1676176939738348e-06,
      "loss": 2.3851,
      "step": 64947
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.057816982269287,
      "learning_rate": 1.1674246279880185e-06,
      "loss": 2.5156,
      "step": 64948
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0890315771102905,
      "learning_rate": 1.1672315769758135e-06,
      "loss": 2.4606,
      "step": 64949
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1099680662155151,
      "learning_rate": 1.1670385409375529e-06,
      "loss": 2.55,
      "step": 64950
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0847418308258057,
      "learning_rate": 1.1668455198735585e-06,
      "loss": 2.3009,
      "step": 64951
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1549468040466309,
      "learning_rate": 1.1666525137841621e-06,
      "loss": 2.0297,
      "step": 64952
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1058435440063477,
      "learning_rate": 1.1664595226696862e-06,
      "loss": 2.3076,
      "step": 64953
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1705715656280518,
      "learning_rate": 1.1662665465304634e-06,
      "loss": 2.2425,
      "step": 64954
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0371251106262207,
      "learning_rate": 1.1660735853668158e-06,
      "loss": 2.4191,
      "step": 64955
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0532567501068115,
      "learning_rate": 1.1658806391790745e-06,
      "loss": 2.2862,
      "step": 64956
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0484260320663452,
      "learning_rate": 1.1656877079675622e-06,
      "loss": 2.2717,
      "step": 64957
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0984015464782715,
      "learning_rate": 1.165494791732612e-06,
      "loss": 2.2654,
      "step": 64958
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1573209762573242,
      "learning_rate": 1.1653018904745472e-06,
      "loss": 2.2032,
      "step": 64959
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0211578607559204,
      "learning_rate": 1.1651090041936941e-06,
      "loss": 2.3442,
      "step": 64960
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.9502140879631042,
      "learning_rate": 1.1649161328903802e-06,
      "loss": 2.3168,
      "step": 64961
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0771821737289429,
      "learning_rate": 1.1647232765649341e-06,
      "loss": 2.3511,
      "step": 64962
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.10621976852417,
      "learning_rate": 1.16453043521768e-06,
      "loss": 2.4018,
      "step": 64963
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.6857945919036865,
      "learning_rate": 1.1643376088489477e-06,
      "loss": 2.3873,
      "step": 64964
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0827410221099854,
      "learning_rate": 1.1641447974590624e-06,
      "loss": 2.1835,
      "step": 64965
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1185956001281738,
      "learning_rate": 1.1639520010483519e-06,
      "loss": 2.4658,
      "step": 64966
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.120468020439148,
      "learning_rate": 1.163759219617142e-06,
      "loss": 2.4475,
      "step": 64967
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0737725496292114,
      "learning_rate": 1.1635664531657609e-06,
      "loss": 2.297,
      "step": 64968
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0414183139801025,
      "learning_rate": 1.1633737016945356e-06,
      "loss": 2.4063,
      "step": 64969
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0463495254516602,
      "learning_rate": 1.1631809652037896e-06,
      "loss": 2.3771,
      "step": 64970
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.145305871963501,
      "learning_rate": 1.1629882436938533e-06,
      "loss": 2.2832,
      "step": 64971
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1643885374069214,
      "learning_rate": 1.1627955371650523e-06,
      "loss": 2.2527,
      "step": 64972
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.9859727621078491,
      "learning_rate": 1.1626028456177118e-06,
      "loss": 2.2562,
      "step": 64973
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1975458860397339,
      "learning_rate": 1.1624101690521605e-06,
      "loss": 2.5642,
      "step": 64974
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.001234531402588,
      "learning_rate": 1.1622175074687247e-06,
      "loss": 2.2835,
      "step": 64975
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0789918899536133,
      "learning_rate": 1.1620248608677287e-06,
      "loss": 2.2311,
      "step": 64976
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0723363161087036,
      "learning_rate": 1.1618322292495032e-06,
      "loss": 2.12,
      "step": 64977
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.047261118888855,
      "learning_rate": 1.1616396126143703e-06,
      "loss": 2.4266,
      "step": 64978
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0459412336349487,
      "learning_rate": 1.1614470109626609e-06,
      "loss": 2.3099,
      "step": 64979
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0162999629974365,
      "learning_rate": 1.1612544242946966e-06,
      "loss": 2.0641,
      "step": 64980
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.2354339361190796,
      "learning_rate": 1.1610618526108087e-06,
      "loss": 2.0378,
      "step": 64981
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0253851413726807,
      "learning_rate": 1.1608692959113199e-06,
      "loss": 2.1999,
      "step": 64982
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.212326169013977,
      "learning_rate": 1.1606767541965603e-06,
      "loss": 2.2748,
      "step": 64983
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.178972840309143,
      "learning_rate": 1.1604842274668526e-06,
      "loss": 2.2281,
      "step": 64984
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1396629810333252,
      "learning_rate": 1.160291715722528e-06,
      "loss": 2.2767,
      "step": 64985
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0963740348815918,
      "learning_rate": 1.160099218963907e-06,
      "loss": 2.3408,
      "step": 64986
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0306447744369507,
      "learning_rate": 1.1599067371913198e-06,
      "loss": 2.5489,
      "step": 64987
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0616633892059326,
      "learning_rate": 1.15971427040509e-06,
      "loss": 2.1233,
      "step": 64988
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.035838007926941,
      "learning_rate": 1.159521818605548e-06,
      "loss": 2.39,
      "step": 64989
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.117107629776001,
      "learning_rate": 1.159329381793014e-06,
      "loss": 2.1484,
      "step": 64990
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.9903610944747925,
      "learning_rate": 1.1591369599678215e-06,
      "loss": 2.1233,
      "step": 64991
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.998105525970459,
      "learning_rate": 1.1589445531302902e-06,
      "loss": 2.3281,
      "step": 64992
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.068504810333252,
      "learning_rate": 1.1587521612807506e-06,
      "loss": 2.2384,
      "step": 64993
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.2416646480560303,
      "learning_rate": 1.1585597844195261e-06,
      "loss": 2.285,
      "step": 64994
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.042435884475708,
      "learning_rate": 1.1583674225469465e-06,
      "loss": 2.3375,
      "step": 64995
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.2569568157196045,
      "learning_rate": 1.1581750756633347e-06,
      "loss": 2.3658,
      "step": 64996
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.075821042060852,
      "learning_rate": 1.157982743769016e-06,
      "loss": 2.3476,
      "step": 64997
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0694793462753296,
      "learning_rate": 1.1577904268643191e-06,
      "loss": 2.4258,
      "step": 64998
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.146620512008667,
      "learning_rate": 1.1575981249495694e-06,
      "loss": 2.4003,
      "step": 64999
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1218798160552979,
      "learning_rate": 1.1574058380250907e-06,
      "loss": 2.3577,
      "step": 65000
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.231600046157837,
      "learning_rate": 1.1572135660912132e-06,
      "loss": 2.405,
      "step": 65001
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1582329273223877,
      "learning_rate": 1.1570213091482596e-06,
      "loss": 2.289,
      "step": 65002
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0270766019821167,
      "learning_rate": 1.1568290671965544e-06,
      "loss": 2.1549,
      "step": 65003
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.9973148107528687,
      "learning_rate": 1.1566368402364282e-06,
      "loss": 2.2586,
      "step": 65004
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0850021839141846,
      "learning_rate": 1.156444628268203e-06,
      "loss": 2.2391,
      "step": 65005
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0611305236816406,
      "learning_rate": 1.1562524312922062e-06,
      "loss": 2.3748,
      "step": 65006
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0331006050109863,
      "learning_rate": 1.1560602493087636e-06,
      "loss": 2.3111,
      "step": 65007
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.3523008823394775,
      "learning_rate": 1.1558680823182011e-06,
      "loss": 2.4151,
      "step": 65008
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1674082279205322,
      "learning_rate": 1.155675930320843e-06,
      "loss": 2.3072,
      "step": 65009
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.049023985862732,
      "learning_rate": 1.1554837933170181e-06,
      "loss": 2.1872,
      "step": 65010
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0139448642730713,
      "learning_rate": 1.1552916713070483e-06,
      "loss": 2.3303,
      "step": 65011
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0578980445861816,
      "learning_rate": 1.1550995642912665e-06,
      "loss": 2.3924,
      "step": 65012
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1332669258117676,
      "learning_rate": 1.1549074722699883e-06,
      "loss": 2.2912,
      "step": 65013
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1107242107391357,
      "learning_rate": 1.1547153952435452e-06,
      "loss": 2.305,
      "step": 65014
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.006247878074646,
      "learning_rate": 1.1545233332122607e-06,
      "loss": 2.2302,
      "step": 65015
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1263325214385986,
      "learning_rate": 1.1543312861764644e-06,
      "loss": 2.3234,
      "step": 65016
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1467969417572021,
      "learning_rate": 1.154139254136476e-06,
      "loss": 2.294,
      "step": 65017
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1887435913085938,
      "learning_rate": 1.1539472370926262e-06,
      "loss": 2.4148,
      "step": 65018
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0421949625015259,
      "learning_rate": 1.1537552350452364e-06,
      "loss": 2.5166,
      "step": 65019
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.00048828125,
      "learning_rate": 1.1535632479946357e-06,
      "loss": 2.1599,
      "step": 65020
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1247754096984863,
      "learning_rate": 1.1533712759411465e-06,
      "loss": 2.5573,
      "step": 65021
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.064321517944336,
      "learning_rate": 1.1531793188850982e-06,
      "loss": 2.3137,
      "step": 65022
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1962937116622925,
      "learning_rate": 1.1529873768268106e-06,
      "loss": 2.313,
      "step": 65023
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.200335144996643,
      "learning_rate": 1.152795449766615e-06,
      "loss": 2.2987,
      "step": 65024
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0871528387069702,
      "learning_rate": 1.152603537704834e-06,
      "loss": 2.3084,
      "step": 65025
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.060752034187317,
      "learning_rate": 1.1524116406417918e-06,
      "loss": 2.5535,
      "step": 65026
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0539891719818115,
      "learning_rate": 1.152219758577814e-06,
      "loss": 2.2311,
      "step": 65027
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.006569266319275,
      "learning_rate": 1.1520278915132287e-06,
      "loss": 2.2587,
      "step": 65028
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.9800601005554199,
      "learning_rate": 1.1518360394483562e-06,
      "loss": 2.1037,
      "step": 65029
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1273446083068848,
      "learning_rate": 1.1516442023835272e-06,
      "loss": 2.5917,
      "step": 65030
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0870126485824585,
      "learning_rate": 1.1514523803190658e-06,
      "loss": 2.3018,
      "step": 65031
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0620288848876953,
      "learning_rate": 1.1512605732552918e-06,
      "loss": 2.3964,
      "step": 65032
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0810304880142212,
      "learning_rate": 1.151068781192538e-06,
      "loss": 2.0968,
      "step": 65033
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1078753471374512,
      "learning_rate": 1.1508770041311235e-06,
      "loss": 2.2342,
      "step": 65034
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.055606484413147,
      "learning_rate": 1.1506852420713777e-06,
      "loss": 2.289,
      "step": 65035
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.998918354511261,
      "learning_rate": 1.1504934950136216e-06,
      "loss": 2.2251,
      "step": 65036
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1178357601165771,
      "learning_rate": 1.1503017629581847e-06,
      "loss": 2.3798,
      "step": 65037
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.9866409301757812,
      "learning_rate": 1.1501100459053894e-06,
      "loss": 2.3427,
      "step": 65038
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.9522507190704346,
      "learning_rate": 1.1499183438555617e-06,
      "loss": 2.2641,
      "step": 65039
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.2014230489730835,
      "learning_rate": 1.1497266568090248e-06,
      "loss": 2.5455,
      "step": 65040
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0734995603561401,
      "learning_rate": 1.1495349847661064e-06,
      "loss": 2.4108,
      "step": 65041
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1698452234268188,
      "learning_rate": 1.149343327727127e-06,
      "loss": 2.2082,
      "step": 65042
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.2623741626739502,
      "learning_rate": 1.1491516856924178e-06,
      "loss": 2.3723,
      "step": 65043
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0706158876419067,
      "learning_rate": 1.1489600586622973e-06,
      "loss": 2.1994,
      "step": 65044
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0775880813598633,
      "learning_rate": 1.1487684466370963e-06,
      "loss": 2.2652,
      "step": 65045
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0818841457366943,
      "learning_rate": 1.1485768496171335e-06,
      "loss": 2.2311,
      "step": 65046
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.3536405563354492,
      "learning_rate": 1.1483852676027397e-06,
      "loss": 2.4155,
      "step": 65047
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.129921793937683,
      "learning_rate": 1.1481937005942356e-06,
      "loss": 2.3183,
      "step": 65048
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0216315984725952,
      "learning_rate": 1.148002148591948e-06,
      "loss": 2.1028,
      "step": 65049
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1545069217681885,
      "learning_rate": 1.1478106115961995e-06,
      "loss": 2.2838,
      "step": 65050
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.423478126525879,
      "learning_rate": 1.1476190896073213e-06,
      "loss": 2.3244,
      "step": 65051
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.116312861442566,
      "learning_rate": 1.1474275826256275e-06,
      "loss": 2.1152,
      "step": 65052
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.2375353574752808,
      "learning_rate": 1.1472360906514512e-06,
      "loss": 2.3207,
      "step": 65053
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1778758764266968,
      "learning_rate": 1.147044613685111e-06,
      "loss": 2.3942,
      "step": 65054
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.9678474068641663,
      "learning_rate": 1.1468531517269376e-06,
      "loss": 2.509,
      "step": 65055
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0786926746368408,
      "learning_rate": 1.1466617047772499e-06,
      "loss": 2.2201,
      "step": 65056
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1823114156723022,
      "learning_rate": 1.1464702728363787e-06,
      "loss": 2.3895,
      "step": 65057
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1027803421020508,
      "learning_rate": 1.1462788559046435e-06,
      "loss": 2.4235,
      "step": 65058
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1396105289459229,
      "learning_rate": 1.1460874539823685e-06,
      "loss": 2.4395,
      "step": 65059
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.9891812205314636,
      "learning_rate": 1.1458960670698816e-06,
      "loss": 2.1912,
      "step": 65060
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.091269850730896,
      "learning_rate": 1.1457046951675045e-06,
      "loss": 2.3153,
      "step": 65061
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.074108362197876,
      "learning_rate": 1.1455133382755646e-06,
      "loss": 2.509,
      "step": 65062
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1495251655578613,
      "learning_rate": 1.1453219963943817e-06,
      "loss": 2.0979,
      "step": 65063
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0914111137390137,
      "learning_rate": 1.145130669524286e-06,
      "loss": 2.3354,
      "step": 65064
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0803499221801758,
      "learning_rate": 1.1449393576655976e-06,
      "loss": 2.436,
      "step": 65065
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.142230749130249,
      "learning_rate": 1.1447480608186424e-06,
      "loss": 2.2053,
      "step": 65066
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0802230834960938,
      "learning_rate": 1.1445567789837432e-06,
      "loss": 2.2763,
      "step": 65067
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.6843029260635376,
      "learning_rate": 1.1443655121612262e-06,
      "loss": 2.4459,
      "step": 65068
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0286800861358643,
      "learning_rate": 1.1441742603514128e-06,
      "loss": 2.3751,
      "step": 65069
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1004477739334106,
      "learning_rate": 1.1439830235546322e-06,
      "loss": 2.0938,
      "step": 65070
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.2932754755020142,
      "learning_rate": 1.1437918017712025e-06,
      "loss": 2.5934,
      "step": 65071
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1224069595336914,
      "learning_rate": 1.143600595001454e-06,
      "loss": 2.4047,
      "step": 65072
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1702405214309692,
      "learning_rate": 1.1434094032457055e-06,
      "loss": 2.2818,
      "step": 65073
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1990948915481567,
      "learning_rate": 1.1432182265042857e-06,
      "loss": 2.2974,
      "step": 65074
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1041182279586792,
      "learning_rate": 1.1430270647775132e-06,
      "loss": 2.4237,
      "step": 65075
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1873399019241333,
      "learning_rate": 1.142835918065719e-06,
      "loss": 2.1519,
      "step": 65076
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0324081182479858,
      "learning_rate": 1.1426447863692203e-06,
      "loss": 2.3496,
      "step": 65077
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.9942618012428284,
      "learning_rate": 1.1424536696883492e-06,
      "loss": 2.4962,
      "step": 65078
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.06341552734375,
      "learning_rate": 1.14226256802342e-06,
      "loss": 2.4817,
      "step": 65079
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0836007595062256,
      "learning_rate": 1.1420714813747647e-06,
      "loss": 2.5451,
      "step": 65080
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0721979141235352,
      "learning_rate": 1.1418804097427005e-06,
      "loss": 2.5247,
      "step": 65081
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0420793294906616,
      "learning_rate": 1.1416893531275585e-06,
      "loss": 2.2878,
      "step": 65082
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.2822563648223877,
      "learning_rate": 1.1414983115296551e-06,
      "loss": 2.3079,
      "step": 65083
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0834252834320068,
      "learning_rate": 1.1413072849493212e-06,
      "loss": 2.2794,
      "step": 65084
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.133095622062683,
      "learning_rate": 1.1411162733868753e-06,
      "loss": 2.39,
      "step": 65085
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0573598146438599,
      "learning_rate": 1.1409252768426448e-06,
      "loss": 2.1123,
      "step": 65086
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1677799224853516,
      "learning_rate": 1.140734295316953e-06,
      "loss": 2.2136,
      "step": 65087
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1232410669326782,
      "learning_rate": 1.1405433288101199e-06,
      "loss": 2.284,
      "step": 65088
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.119507074356079,
      "learning_rate": 1.1403523773224744e-06,
      "loss": 2.3427,
      "step": 65089
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.051874041557312,
      "learning_rate": 1.140161440854336e-06,
      "loss": 2.258,
      "step": 65090
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0291640758514404,
      "learning_rate": 1.1399705194060318e-06,
      "loss": 2.3515,
      "step": 65091
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.2414954900741577,
      "learning_rate": 1.139779612977885e-06,
      "loss": 2.4158,
      "step": 65092
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1776294708251953,
      "learning_rate": 1.1395887215702172e-06,
      "loss": 2.4316,
      "step": 65093
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.2066426277160645,
      "learning_rate": 1.139397845183351e-06,
      "loss": 2.2778,
      "step": 65094
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.9616270065307617,
      "learning_rate": 1.1392069838176134e-06,
      "loss": 2.2937,
      "step": 65095
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.05745530128479,
      "learning_rate": 1.1390161374733254e-06,
      "loss": 2.0295,
      "step": 65096
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0619752407073975,
      "learning_rate": 1.1388253061508137e-06,
      "loss": 2.1387,
      "step": 65097
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0216268301010132,
      "learning_rate": 1.1386344898503976e-06,
      "loss": 2.1923,
      "step": 65098
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.2195035219192505,
      "learning_rate": 1.1384436885724048e-06,
      "loss": 2.3255,
      "step": 65099
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.060132384300232,
      "learning_rate": 1.138252902317154e-06,
      "loss": 2.3756,
      "step": 65100
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0120947360992432,
      "learning_rate": 1.1380621310849738e-06,
      "loss": 2.298,
      "step": 65101
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.136531949043274,
      "learning_rate": 1.1378713748761839e-06,
      "loss": 2.3173,
      "step": 65102
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.9896083474159241,
      "learning_rate": 1.1376806336911106e-06,
      "loss": 2.3659,
      "step": 65103
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.062441110610962,
      "learning_rate": 1.137489907530075e-06,
      "loss": 2.3638,
      "step": 65104
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.134454607963562,
      "learning_rate": 1.137299196393401e-06,
      "loss": 2.264,
      "step": 65105
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0730243921279907,
      "learning_rate": 1.1371085002814108e-06,
      "loss": 2.3456,
      "step": 65106
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0087255239486694,
      "learning_rate": 1.1369178191944297e-06,
      "loss": 2.2436,
      "step": 65107
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0352314710617065,
      "learning_rate": 1.1367271531327794e-06,
      "loss": 2.1526,
      "step": 65108
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0713001489639282,
      "learning_rate": 1.1365365020967844e-06,
      "loss": 2.3489,
      "step": 65109
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0435364246368408,
      "learning_rate": 1.1363458660867666e-06,
      "loss": 2.4184,
      "step": 65110
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0339452028274536,
      "learning_rate": 1.136155245103051e-06,
      "loss": 2.108,
      "step": 65111
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.9345007538795471,
      "learning_rate": 1.1359646391459577e-06,
      "loss": 2.3928,
      "step": 65112
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1230394840240479,
      "learning_rate": 1.1357740482158141e-06,
      "loss": 2.3011,
      "step": 65113
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0773638486862183,
      "learning_rate": 1.1355834723129411e-06,
      "loss": 2.4535,
      "step": 65114
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.006005048751831,
      "learning_rate": 1.1353929114376594e-06,
      "loss": 2.1672,
      "step": 65115
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0483098030090332,
      "learning_rate": 1.1352023655902966e-06,
      "loss": 2.3782,
      "step": 65116
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1160422563552856,
      "learning_rate": 1.1350118347711736e-06,
      "loss": 2.5203,
      "step": 65117
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.9988029599189758,
      "learning_rate": 1.13482131898061e-06,
      "loss": 2.1751,
      "step": 65118
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0743865966796875,
      "learning_rate": 1.1346308182189348e-06,
      "loss": 2.1766,
      "step": 65119
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0057566165924072,
      "learning_rate": 1.1344403324864683e-06,
      "loss": 2.2761,
      "step": 65120
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.162976622581482,
      "learning_rate": 1.1342498617835317e-06,
      "loss": 2.3457,
      "step": 65121
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.098760962486267,
      "learning_rate": 1.1340594061104503e-06,
      "loss": 2.3276,
      "step": 65122
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0923064947128296,
      "learning_rate": 1.133868965467545e-06,
      "loss": 2.1842,
      "step": 65123
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.041398525238037,
      "learning_rate": 1.133678539855142e-06,
      "loss": 2.4186,
      "step": 65124
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0180180072784424,
      "learning_rate": 1.13348812927356e-06,
      "loss": 2.3154,
      "step": 65125
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0753611326217651,
      "learning_rate": 1.1332977337231255e-06,
      "loss": 2.3877,
      "step": 65126
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0812305212020874,
      "learning_rate": 1.133107353204157e-06,
      "loss": 2.2309,
      "step": 65127
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1190868616104126,
      "learning_rate": 1.1329169877169821e-06,
      "loss": 2.5681,
      "step": 65128
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1529021263122559,
      "learning_rate": 1.1327266372619195e-06,
      "loss": 2.2048,
      "step": 65129
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.070308804512024,
      "learning_rate": 1.1325363018392966e-06,
      "loss": 2.4609,
      "step": 65130
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0665994882583618,
      "learning_rate": 1.1323459814494297e-06,
      "loss": 2.4902,
      "step": 65131
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1685541868209839,
      "learning_rate": 1.1321556760926466e-06,
      "loss": 2.4109,
      "step": 65132
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.139203429222107,
      "learning_rate": 1.1319653857692669e-06,
      "loss": 2.2421,
      "step": 65133
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0676599740982056,
      "learning_rate": 1.131775110479616e-06,
      "loss": 2.1544,
      "step": 65134
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.2478045225143433,
      "learning_rate": 1.1315848502240123e-06,
      "loss": 2.1302,
      "step": 65135
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1296659708023071,
      "learning_rate": 1.1313946050027835e-06,
      "loss": 2.2725,
      "step": 65136
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0962482690811157,
      "learning_rate": 1.1312043748162472e-06,
      "loss": 2.053,
      "step": 65137
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.104164719581604,
      "learning_rate": 1.1310141596647306e-06,
      "loss": 2.1372,
      "step": 65138
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0392951965332031,
      "learning_rate": 1.1308239595485515e-06,
      "loss": 2.3679,
      "step": 65139
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1358134746551514,
      "learning_rate": 1.1306337744680362e-06,
      "loss": 2.4251,
      "step": 65140
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0417890548706055,
      "learning_rate": 1.1304436044235033e-06,
      "loss": 2.3317,
      "step": 65141
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0986781120300293,
      "learning_rate": 1.1302534494152806e-06,
      "loss": 2.2183,
      "step": 65142
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1265628337860107,
      "learning_rate": 1.1300633094436864e-06,
      "loss": 2.2454,
      "step": 65143
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0396164655685425,
      "learning_rate": 1.1298731845090449e-06,
      "loss": 2.4582,
      "step": 65144
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0859296321868896,
      "learning_rate": 1.1296830746116749e-06,
      "loss": 2.3857,
      "step": 65145
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1788791418075562,
      "learning_rate": 1.1294929797519027e-06,
      "loss": 2.439,
      "step": 65146
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.043802261352539,
      "learning_rate": 1.129302899930048e-06,
      "loss": 2.2108,
      "step": 65147
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0128822326660156,
      "learning_rate": 1.1291128351464353e-06,
      "loss": 2.4441,
      "step": 65148
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1224535703659058,
      "learning_rate": 1.1289227854013863e-06,
      "loss": 2.2096,
      "step": 65149
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1208316087722778,
      "learning_rate": 1.1287327506952206e-06,
      "loss": 2.4862,
      "step": 65150
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0079782009124756,
      "learning_rate": 1.1285427310282638e-06,
      "loss": 2.3184,
      "step": 65151
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1121509075164795,
      "learning_rate": 1.1283527264008343e-06,
      "loss": 2.2371,
      "step": 65152
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.094186544418335,
      "learning_rate": 1.1281627368132586e-06,
      "loss": 2.2658,
      "step": 65153
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.051721215248108,
      "learning_rate": 1.1279727622658554e-06,
      "loss": 2.2732,
      "step": 65154
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0476782321929932,
      "learning_rate": 1.1277828027589488e-06,
      "loss": 2.5277,
      "step": 65155
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.009552240371704,
      "learning_rate": 1.1275928582928586e-06,
      "loss": 2.4,
      "step": 65156
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0491228103637695,
      "learning_rate": 1.1274029288679134e-06,
      "loss": 2.4461,
      "step": 65157
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1561421155929565,
      "learning_rate": 1.127213014484424e-06,
      "loss": 2.3812,
      "step": 65158
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0990350246429443,
      "learning_rate": 1.1270231151427213e-06,
      "loss": 2.1011,
      "step": 65159
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0570787191390991,
      "learning_rate": 1.1268332308431228e-06,
      "loss": 2.3875,
      "step": 65160
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1031614542007446,
      "learning_rate": 1.1266433615859528e-06,
      "loss": 2.0652,
      "step": 65161
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1018978357315063,
      "learning_rate": 1.126453507371531e-06,
      "loss": 2.2474,
      "step": 65162
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0488672256469727,
      "learning_rate": 1.1262636682001827e-06,
      "loss": 2.1919,
      "step": 65163
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0587177276611328,
      "learning_rate": 1.126073844072224e-06,
      "loss": 2.4235,
      "step": 65164
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.201956033706665,
      "learning_rate": 1.125884034987984e-06,
      "loss": 2.2402,
      "step": 65165
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1978063583374023,
      "learning_rate": 1.1256942409477777e-06,
      "loss": 2.2946,
      "step": 65166
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0274677276611328,
      "learning_rate": 1.125504461951933e-06,
      "loss": 2.317,
      "step": 65167
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0351805686950684,
      "learning_rate": 1.1253146980007657e-06,
      "loss": 2.5534,
      "step": 65168
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0273914337158203,
      "learning_rate": 1.1251249490946026e-06,
      "loss": 2.2717,
      "step": 65169
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.982654869556427,
      "learning_rate": 1.1249352152337622e-06,
      "loss": 2.2123,
      "step": 65170
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.063148856163025,
      "learning_rate": 1.1247454964185688e-06,
      "loss": 2.4494,
      "step": 65171
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.9701114892959595,
      "learning_rate": 1.124555792649339e-06,
      "loss": 2.2046,
      "step": 65172
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.3640958070755005,
      "learning_rate": 1.1243661039263999e-06,
      "loss": 2.2829,
      "step": 65173
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0377575159072876,
      "learning_rate": 1.1241764302500691e-06,
      "loss": 2.2632,
      "step": 65174
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0995348691940308,
      "learning_rate": 1.123986771620671e-06,
      "loss": 2.1317,
      "step": 65175
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.2391753196716309,
      "learning_rate": 1.1237971280385274e-06,
      "loss": 2.5244,
      "step": 65176
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1293466091156006,
      "learning_rate": 1.123607499503956e-06,
      "loss": 2.3752,
      "step": 65177
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.017301321029663,
      "learning_rate": 1.1234178860172818e-06,
      "loss": 2.3085,
      "step": 65178
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0207688808441162,
      "learning_rate": 1.1232282875788237e-06,
      "loss": 2.3793,
      "step": 65179
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.071305751800537,
      "learning_rate": 1.1230387041889069e-06,
      "loss": 2.3577,
      "step": 65180
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.020668625831604,
      "learning_rate": 1.1228491358478477e-06,
      "loss": 2.2018,
      "step": 65181
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.9484713673591614,
      "learning_rate": 1.1226595825559727e-06,
      "loss": 2.2237,
      "step": 65182
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.103448748588562,
      "learning_rate": 1.1224700443136004e-06,
      "loss": 2.3223,
      "step": 65183
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0889427661895752,
      "learning_rate": 1.122280521121053e-06,
      "loss": 2.3799,
      "step": 65184
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1344459056854248,
      "learning_rate": 1.1220910129786488e-06,
      "loss": 2.323,
      "step": 65185
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.042161226272583,
      "learning_rate": 1.1219015198867144e-06,
      "loss": 2.5176,
      "step": 65186
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0832449197769165,
      "learning_rate": 1.1217120418455652e-06,
      "loss": 2.4495,
      "step": 65187
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1210485696792603,
      "learning_rate": 1.1215225788555284e-06,
      "loss": 2.3454,
      "step": 65188
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.308611512184143,
      "learning_rate": 1.1213331309169196e-06,
      "loss": 2.5177,
      "step": 65189
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0813409090042114,
      "learning_rate": 1.1211436980300638e-06,
      "loss": 2.1043,
      "step": 65190
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0717099905014038,
      "learning_rate": 1.12095428019528e-06,
      "loss": 2.4288,
      "step": 65191
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1314992904663086,
      "learning_rate": 1.120764877412892e-06,
      "loss": 2.1497,
      "step": 65192
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1328685283660889,
      "learning_rate": 1.1205754896832166e-06,
      "loss": 2.4267,
      "step": 65193
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0347155332565308,
      "learning_rate": 1.12038611700658e-06,
      "loss": 2.4257,
      "step": 65194
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.9321829676628113,
      "learning_rate": 1.1201967593832986e-06,
      "loss": 2.3251,
      "step": 65195
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0777428150177002,
      "learning_rate": 1.1200074168136987e-06,
      "loss": 2.4743,
      "step": 65196
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.2509849071502686,
      "learning_rate": 1.1198180892980948e-06,
      "loss": 2.4238,
      "step": 65197
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.9818488359451294,
      "learning_rate": 1.1196287768368118e-06,
      "loss": 2.4676,
      "step": 65198
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0222479104995728,
      "learning_rate": 1.1194394794301688e-06,
      "loss": 2.4009,
      "step": 65199
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1980915069580078,
      "learning_rate": 1.1192501970784908e-06,
      "loss": 2.1467,
      "step": 65200
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0530227422714233,
      "learning_rate": 1.119060929782092e-06,
      "loss": 2.2618,
      "step": 65201
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0022221803665161,
      "learning_rate": 1.1188716775413e-06,
      "loss": 2.1902,
      "step": 65202
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5442355871200562,
      "learning_rate": 1.11868244035643e-06,
      "loss": 2.0527,
      "step": 65203
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0391650199890137,
      "learning_rate": 1.1184932182278075e-06,
      "loss": 2.5161,
      "step": 65204
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.2690378427505493,
      "learning_rate": 1.118304011155752e-06,
      "loss": 2.3365,
      "step": 65205
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0746761560440063,
      "learning_rate": 1.11811481914058e-06,
      "loss": 2.3578,
      "step": 65206
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.9984070658683777,
      "learning_rate": 1.117925642182619e-06,
      "loss": 2.1996,
      "step": 65207
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.2980265617370605,
      "learning_rate": 1.1177364802821832e-06,
      "loss": 2.3163,
      "step": 65208
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1054338216781616,
      "learning_rate": 1.1175473334395993e-06,
      "loss": 2.356,
      "step": 65209
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.2053152322769165,
      "learning_rate": 1.1173582016551842e-06,
      "loss": 2.3789,
      "step": 65210
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1880807876586914,
      "learning_rate": 1.1171690849292605e-06,
      "loss": 2.282,
      "step": 65211
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.183691382408142,
      "learning_rate": 1.1169799832621452e-06,
      "loss": 2.2169,
      "step": 65212
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0855454206466675,
      "learning_rate": 1.116790896654164e-06,
      "loss": 2.3773,
      "step": 65213
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0668150186538696,
      "learning_rate": 1.1166018251056322e-06,
      "loss": 2.1935,
      "step": 65214
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.2061543464660645,
      "learning_rate": 1.1164127686168758e-06,
      "loss": 2.1105,
      "step": 65215
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0817787647247314,
      "learning_rate": 1.1162237271882092e-06,
      "loss": 2.3316,
      "step": 65216
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0785959959030151,
      "learning_rate": 1.116034700819959e-06,
      "loss": 2.256,
      "step": 65217
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.341754674911499,
      "learning_rate": 1.1158456895124414e-06,
      "loss": 2.2199,
      "step": 65218
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.26888906955719,
      "learning_rate": 1.1156566932659797e-06,
      "loss": 2.2654,
      "step": 65219
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.146449327468872,
      "learning_rate": 1.1154677120808899e-06,
      "loss": 2.266,
      "step": 65220
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1410706043243408,
      "learning_rate": 1.1152787459574988e-06,
      "loss": 2.436,
      "step": 65221
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1609834432601929,
      "learning_rate": 1.1150897948961204e-06,
      "loss": 2.2267,
      "step": 65222
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0192965269088745,
      "learning_rate": 1.1149008588970823e-06,
      "loss": 2.4204,
      "step": 65223
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0453307628631592,
      "learning_rate": 1.1147119379606964e-06,
      "loss": 2.2044,
      "step": 65224
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.506537914276123,
      "learning_rate": 1.114523032087289e-06,
      "loss": 2.2355,
      "step": 65225
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1131709814071655,
      "learning_rate": 1.1143341412771758e-06,
      "loss": 2.3576,
      "step": 65226
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0687428712844849,
      "learning_rate": 1.114145265530683e-06,
      "loss": 2.3903,
      "step": 65227
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0718120336532593,
      "learning_rate": 1.1139564048481234e-06,
      "loss": 2.2902,
      "step": 65228
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.2280997037887573,
      "learning_rate": 1.1137675592298248e-06,
      "loss": 2.4026,
      "step": 65229
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0460402965545654,
      "learning_rate": 1.1135787286761002e-06,
      "loss": 2.5093,
      "step": 65230
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1179949045181274,
      "learning_rate": 1.1133899131872762e-06,
      "loss": 2.3406,
      "step": 65231
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1859122514724731,
      "learning_rate": 1.1132011127636688e-06,
      "loss": 2.3618,
      "step": 65232
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.9973474144935608,
      "learning_rate": 1.1130123274055982e-06,
      "loss": 2.0883,
      "step": 65233
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1495893001556396,
      "learning_rate": 1.1128235571133872e-06,
      "loss": 2.3922,
      "step": 65234
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0134730339050293,
      "learning_rate": 1.1126348018873512e-06,
      "loss": 2.2663,
      "step": 65235
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0040385723114014,
      "learning_rate": 1.1124460617278165e-06,
      "loss": 2.1374,
      "step": 65236
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0468677282333374,
      "learning_rate": 1.1122573366350987e-06,
      "loss": 2.249,
      "step": 65237
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.118363380432129,
      "learning_rate": 1.1120686266095182e-06,
      "loss": 2.2502,
      "step": 65238
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.21738600730896,
      "learning_rate": 1.111879931651394e-06,
      "loss": 2.5833,
      "step": 65239
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.9491798281669617,
      "learning_rate": 1.111691251761049e-06,
      "loss": 2.3771,
      "step": 65240
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1087514162063599,
      "learning_rate": 1.1115025869387985e-06,
      "loss": 2.2044,
      "step": 65241
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0595895051956177,
      "learning_rate": 1.111313937184968e-06,
      "loss": 2.1722,
      "step": 65242
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1747170686721802,
      "learning_rate": 1.1111253024998724e-06,
      "loss": 2.4214,
      "step": 65243
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0352349281311035,
      "learning_rate": 1.1109366828838364e-06,
      "loss": 2.2928,
      "step": 65244
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.2190815210342407,
      "learning_rate": 1.1107480783371739e-06,
      "loss": 2.3424,
      "step": 65245
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1475763320922852,
      "learning_rate": 1.11055948886021e-06,
      "loss": 2.239,
      "step": 65246
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.161807894706726,
      "learning_rate": 1.1103709144532604e-06,
      "loss": 2.6049,
      "step": 65247
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0251785516738892,
      "learning_rate": 1.110182355116648e-06,
      "loss": 2.3265,
      "step": 65248
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0534921884536743,
      "learning_rate": 1.1099938108506914e-06,
      "loss": 2.2482,
      "step": 65249
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1485222578048706,
      "learning_rate": 1.1098052816557093e-06,
      "loss": 2.3817,
      "step": 65250
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.030433177947998,
      "learning_rate": 1.1096167675320202e-06,
      "loss": 2.1839,
      "step": 65251
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0716807842254639,
      "learning_rate": 1.1094282684799473e-06,
      "loss": 2.3223,
      "step": 65252
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1046189069747925,
      "learning_rate": 1.109239784499806e-06,
      "loss": 2.347,
      "step": 65253
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.9598703384399414,
      "learning_rate": 1.1090513155919202e-06,
      "loss": 2.2479,
      "step": 65254
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.00393545627594,
      "learning_rate": 1.1088628617566045e-06,
      "loss": 2.2523,
      "step": 65255
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1274678707122803,
      "learning_rate": 1.1086744229941838e-06,
      "loss": 2.2915,
      "step": 65256
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0380496978759766,
      "learning_rate": 1.1084859993049723e-06,
      "loss": 2.3706,
      "step": 65257
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.3105756044387817,
      "learning_rate": 1.1082975906892946e-06,
      "loss": 2.1688,
      "step": 65258
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0509722232818604,
      "learning_rate": 1.1081091971474645e-06,
      "loss": 2.2612,
      "step": 65259
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0256752967834473,
      "learning_rate": 1.1079208186798074e-06,
      "loss": 2.198,
      "step": 65260
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0271096229553223,
      "learning_rate": 1.1077324552866386e-06,
      "loss": 2.3123,
      "step": 65261
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1094881296157837,
      "learning_rate": 1.107544106968279e-06,
      "loss": 2.5315,
      "step": 65262
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0762763023376465,
      "learning_rate": 1.1073557737250451e-06,
      "loss": 2.3536,
      "step": 65263
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1699421405792236,
      "learning_rate": 1.1071674555572598e-06,
      "loss": 2.2195,
      "step": 65264
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1955540180206299,
      "learning_rate": 1.1069791524652395e-06,
      "loss": 2.4293,
      "step": 65265
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0266330242156982,
      "learning_rate": 1.1067908644493063e-06,
      "loss": 2.3075,
      "step": 65266
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1246312856674194,
      "learning_rate": 1.1066025915097788e-06,
      "loss": 2.351,
      "step": 65267
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.9903671741485596,
      "learning_rate": 1.1064143336469734e-06,
      "loss": 2.2459,
      "step": 65268
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.154419183731079,
      "learning_rate": 1.1062260908612122e-06,
      "loss": 2.2653,
      "step": 65269
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.025158166885376,
      "learning_rate": 1.1060378631528123e-06,
      "loss": 2.3613,
      "step": 65270
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0499197244644165,
      "learning_rate": 1.105849650522095e-06,
      "loss": 2.4678,
      "step": 65271
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0188615322113037,
      "learning_rate": 1.1056614529693765e-06,
      "loss": 2.2414,
      "step": 65272
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0024479627609253,
      "learning_rate": 1.10547327049498e-06,
      "loss": 2.3109,
      "step": 65273
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1013753414154053,
      "learning_rate": 1.1052851030992184e-06,
      "loss": 2.0879,
      "step": 65274
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1519266366958618,
      "learning_rate": 1.1050969507824206e-06,
      "loss": 2.462,
      "step": 65275
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0538924932479858,
      "learning_rate": 1.1049088135448939e-06,
      "loss": 2.3314,
      "step": 65276
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.388871669769287,
      "learning_rate": 1.1047206913869646e-06,
      "loss": 2.1175,
      "step": 65277
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0567004680633545,
      "learning_rate": 1.1045325843089483e-06,
      "loss": 2.3124,
      "step": 65278
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0976322889328003,
      "learning_rate": 1.1043444923111669e-06,
      "loss": 2.5432,
      "step": 65279
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.972555935382843,
      "learning_rate": 1.1041564153939355e-06,
      "loss": 2.2796,
      "step": 65280
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0157458782196045,
      "learning_rate": 1.1039683535575762e-06,
      "loss": 2.1274,
      "step": 65281
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0371896028518677,
      "learning_rate": 1.1037803068024056e-06,
      "loss": 2.2732,
      "step": 65282
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.03793203830719,
      "learning_rate": 1.1035922751287453e-06,
      "loss": 2.4041,
      "step": 65283
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.232424259185791,
      "learning_rate": 1.103404258536911e-06,
      "loss": 2.3857,
      "step": 65284
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1043827533721924,
      "learning_rate": 1.1032162570272241e-06,
      "loss": 2.2025,
      "step": 65285
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0254206657409668,
      "learning_rate": 1.1030282706000006e-06,
      "loss": 2.3166,
      "step": 65286
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.054455280303955,
      "learning_rate": 1.102840299255562e-06,
      "loss": 2.2994,
      "step": 65287
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1622998714447021,
      "learning_rate": 1.102652342994226e-06,
      "loss": 2.387,
      "step": 65288
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.2127701044082642,
      "learning_rate": 1.10246440181631e-06,
      "loss": 2.5462,
      "step": 65289
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.177543044090271,
      "learning_rate": 1.1022764757221328e-06,
      "loss": 2.298,
      "step": 65290
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0871573686599731,
      "learning_rate": 1.1020885647120138e-06,
      "loss": 2.4029,
      "step": 65291
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.778611898422241,
      "learning_rate": 1.1019006687862698e-06,
      "loss": 2.293,
      "step": 65292
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0673980712890625,
      "learning_rate": 1.1017127879452238e-06,
      "loss": 2.2662,
      "step": 65293
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1315585374832153,
      "learning_rate": 1.101524922189191e-06,
      "loss": 2.1127,
      "step": 65294
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.2307430505752563,
      "learning_rate": 1.101337071518488e-06,
      "loss": 2.3165,
      "step": 65295
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0650352239608765,
      "learning_rate": 1.1011492359334374e-06,
      "loss": 2.1382,
      "step": 65296
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1340502500534058,
      "learning_rate": 1.100961415434354e-06,
      "loss": 2.1963,
      "step": 65297
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.3125648498535156,
      "learning_rate": 1.1007736100215594e-06,
      "loss": 2.3684,
      "step": 65298
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0682711601257324,
      "learning_rate": 1.100585819695369e-06,
      "loss": 2.3305,
      "step": 65299
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0338702201843262,
      "learning_rate": 1.1003980444561046e-06,
      "loss": 2.5305,
      "step": 65300
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.2945928573608398,
      "learning_rate": 1.1002102843040807e-06,
      "loss": 2.4293,
      "step": 65301
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.542486310005188,
      "learning_rate": 1.1000225392396214e-06,
      "loss": 2.1479,
      "step": 65302
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.070600986480713,
      "learning_rate": 1.0998348092630374e-06,
      "loss": 2.3137,
      "step": 65303
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1851651668548584,
      "learning_rate": 1.0996470943746517e-06,
      "loss": 2.4398,
      "step": 65304
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.9768877029418945,
      "learning_rate": 1.09945939457478e-06,
      "loss": 2.2553,
      "step": 65305
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1236475706100464,
      "learning_rate": 1.099271709863745e-06,
      "loss": 2.3765,
      "step": 65306
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0074794292449951,
      "learning_rate": 1.099084040241859e-06,
      "loss": 2.1854,
      "step": 65307
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0758488178253174,
      "learning_rate": 1.0988963857094448e-06,
      "loss": 2.3334,
      "step": 65308
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.554336428642273,
      "learning_rate": 1.0987087462668166e-06,
      "loss": 2.1084,
      "step": 65309
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.098252296447754,
      "learning_rate": 1.0985211219142977e-06,
      "loss": 2.3516,
      "step": 65310
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1322463750839233,
      "learning_rate": 1.0983335126521999e-06,
      "loss": 2.1666,
      "step": 65311
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0604093074798584,
      "learning_rate": 1.0981459184808473e-06,
      "loss": 2.5259,
      "step": 65312
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.121354103088379,
      "learning_rate": 1.0979583394005522e-06,
      "loss": 2.2398,
      "step": 65313
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0699070692062378,
      "learning_rate": 1.0977707754116384e-06,
      "loss": 2.3057,
      "step": 65314
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0436915159225464,
      "learning_rate": 1.0975832265144205e-06,
      "loss": 2.4173,
      "step": 65315
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0817070007324219,
      "learning_rate": 1.0973956927092166e-06,
      "loss": 2.2665,
      "step": 65316
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0650393962860107,
      "learning_rate": 1.0972081739963436e-06,
      "loss": 2.3695,
      "step": 65317
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0925464630126953,
      "learning_rate": 1.097020670376122e-06,
      "loss": 2.1686,
      "step": 65318
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1595745086669922,
      "learning_rate": 1.0968331818488664e-06,
      "loss": 2.5411,
      "step": 65319
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.145893931388855,
      "learning_rate": 1.0966457084148996e-06,
      "loss": 2.1607,
      "step": 65320
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0943551063537598,
      "learning_rate": 1.0964582500745336e-06,
      "loss": 2.3334,
      "step": 65321
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1511969566345215,
      "learning_rate": 1.0962708068280914e-06,
      "loss": 2.2098,
      "step": 65322
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.310080647468567,
      "learning_rate": 1.0960833786758885e-06,
      "loss": 2.3946,
      "step": 65323
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.003206729888916,
      "learning_rate": 1.09589596561824e-06,
      "loss": 2.2625,
      "step": 65324
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1632376909255981,
      "learning_rate": 1.095708567655468e-06,
      "loss": 2.1783,
      "step": 65325
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1056069135665894,
      "learning_rate": 1.0955211847878877e-06,
      "loss": 2.2182,
      "step": 65326
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.2099956274032593,
      "learning_rate": 1.0953338170158178e-06,
      "loss": 2.513,
      "step": 65327
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0819083452224731,
      "learning_rate": 1.095146464339577e-06,
      "loss": 2.1523,
      "step": 65328
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.127273440361023,
      "learning_rate": 1.0949591267594805e-06,
      "loss": 2.5108,
      "step": 65329
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0113780498504639,
      "learning_rate": 1.0947718042758448e-06,
      "loss": 2.1739,
      "step": 65330
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0456832647323608,
      "learning_rate": 1.0945844968889917e-06,
      "loss": 2.4955,
      "step": 65331
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0407366752624512,
      "learning_rate": 1.0943972045992346e-06,
      "loss": 2.4932,
      "step": 65332
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0280911922454834,
      "learning_rate": 1.094209927406895e-06,
      "loss": 2.1302,
      "step": 65333
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0129084587097168,
      "learning_rate": 1.0940226653122864e-06,
      "loss": 2.2508,
      "step": 65334
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0431421995162964,
      "learning_rate": 1.0938354183157306e-06,
      "loss": 2.3362,
      "step": 65335
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1886265277862549,
      "learning_rate": 1.0936481864175397e-06,
      "loss": 2.5785,
      "step": 65336
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.2429887056350708,
      "learning_rate": 1.0934609696180376e-06,
      "loss": 2.2119,
      "step": 65337
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1844245195388794,
      "learning_rate": 1.0932737679175343e-06,
      "loss": 2.2503,
      "step": 65338
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.2490720748901367,
      "learning_rate": 1.0930865813163538e-06,
      "loss": 2.5148,
      "step": 65339
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0150234699249268,
      "learning_rate": 1.0928994098148093e-06,
      "loss": 2.5963,
      "step": 65340
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.144386887550354,
      "learning_rate": 1.092712253413223e-06,
      "loss": 2.4207,
      "step": 65341
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.008582592010498,
      "learning_rate": 1.092525112111904e-06,
      "loss": 2.251,
      "step": 65342
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0864313840866089,
      "learning_rate": 1.0923379859111772e-06,
      "loss": 2.4801,
      "step": 65343
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1737463474273682,
      "learning_rate": 1.092150874811354e-06,
      "loss": 2.3153,
      "step": 65344
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1027259826660156,
      "learning_rate": 1.0919637788127556e-06,
      "loss": 2.267,
      "step": 65345
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.219050645828247,
      "learning_rate": 1.0917766979156974e-06,
      "loss": 2.3563,
      "step": 65346
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.088571310043335,
      "learning_rate": 1.0915896321204988e-06,
      "loss": 2.5143,
      "step": 65347
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0169868469238281,
      "learning_rate": 1.0914025814274732e-06,
      "loss": 2.3297,
      "step": 65348
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.197381854057312,
      "learning_rate": 1.0912155458369422e-06,
      "loss": 2.2087,
      "step": 65349
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.9312638640403748,
      "learning_rate": 1.0910285253492192e-06,
      "loss": 2.3951,
      "step": 65350
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1409721374511719,
      "learning_rate": 1.0908415199646216e-06,
      "loss": 2.3675,
      "step": 65351
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.9936212301254272,
      "learning_rate": 1.090654529683469e-06,
      "loss": 1.9839,
      "step": 65352
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.3143731355667114,
      "learning_rate": 1.090467554506075e-06,
      "loss": 2.3937,
      "step": 65353
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1014502048492432,
      "learning_rate": 1.0902805944327598e-06,
      "loss": 2.4445,
      "step": 65354
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1494674682617188,
      "learning_rate": 1.0900936494638392e-06,
      "loss": 2.1996,
      "step": 65355
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.2446352243423462,
      "learning_rate": 1.0899067195996293e-06,
      "loss": 2.3759,
      "step": 65356
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1291496753692627,
      "learning_rate": 1.0897198048404466e-06,
      "loss": 2.0873,
      "step": 65357
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1955819129943848,
      "learning_rate": 1.08953290518661e-06,
      "loss": 2.4627,
      "step": 65358
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.270082950592041,
      "learning_rate": 1.0893460206384333e-06,
      "loss": 2.3892,
      "step": 65359
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0184471607208252,
      "learning_rate": 1.0891591511962362e-06,
      "loss": 2.494,
      "step": 65360
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1783009767532349,
      "learning_rate": 1.0889722968603334e-06,
      "loss": 2.2882,
      "step": 65361
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1122612953186035,
      "learning_rate": 1.0887854576310452e-06,
      "loss": 2.4491,
      "step": 65362
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0700669288635254,
      "learning_rate": 1.088598633508683e-06,
      "loss": 2.2759,
      "step": 65363
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.151847243309021,
      "learning_rate": 1.0884118244935692e-06,
      "loss": 2.3472,
      "step": 65364
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0006040334701538,
      "learning_rate": 1.0882250305860153e-06,
      "loss": 2.4614,
      "step": 65365
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.044482707977295,
      "learning_rate": 1.088038251786343e-06,
      "loss": 2.4156,
      "step": 65366
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.9786364436149597,
      "learning_rate": 1.0878514880948632e-06,
      "loss": 2.0078,
      "step": 65367
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0329514741897583,
      "learning_rate": 1.0876647395119e-06,
      "loss": 2.303,
      "step": 65368
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0422059297561646,
      "learning_rate": 1.087478006037762e-06,
      "loss": 2.7109,
      "step": 65369
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.2007203102111816,
      "learning_rate": 1.0872912876727714e-06,
      "loss": 2.1337,
      "step": 65370
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.2019803524017334,
      "learning_rate": 1.08710458441724e-06,
      "loss": 2.3405,
      "step": 65371
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.080238699913025,
      "learning_rate": 1.08691789627149e-06,
      "loss": 2.1645,
      "step": 65372
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.114388108253479,
      "learning_rate": 1.086731223235833e-06,
      "loss": 2.4704,
      "step": 65373
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1326446533203125,
      "learning_rate": 1.0865445653105889e-06,
      "loss": 2.417,
      "step": 65374
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1545393466949463,
      "learning_rate": 1.0863579224960707e-06,
      "loss": 2.6099,
      "step": 65375
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1062334775924683,
      "learning_rate": 1.0861712947925985e-06,
      "loss": 2.4817,
      "step": 65376
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0067389011383057,
      "learning_rate": 1.0859846822004848e-06,
      "loss": 2.33,
      "step": 65377
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.3613113164901733,
      "learning_rate": 1.08579808472005e-06,
      "loss": 2.2827,
      "step": 65378
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1828540563583374,
      "learning_rate": 1.085611502351609e-06,
      "loss": 2.2428,
      "step": 65379
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0044721364974976,
      "learning_rate": 1.085424935095476e-06,
      "loss": 2.3492,
      "step": 65380
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0780905485153198,
      "learning_rate": 1.0852383829519696e-06,
      "loss": 2.1504,
      "step": 65381
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1986550092697144,
      "learning_rate": 1.0850518459214066e-06,
      "loss": 2.1481,
      "step": 65382
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0650992393493652,
      "learning_rate": 1.0848653240040996e-06,
      "loss": 2.3711,
      "step": 65383
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0934066772460938,
      "learning_rate": 1.0846788172003687e-06,
      "loss": 2.415,
      "step": 65384
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0011802911758423,
      "learning_rate": 1.084492325510529e-06,
      "loss": 2.1778,
      "step": 65385
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0822572708129883,
      "learning_rate": 1.0843058489348946e-06,
      "loss": 2.2869,
      "step": 65386
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.044155478477478,
      "learning_rate": 1.0841193874737843e-06,
      "loss": 2.513,
      "step": 65387
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1168503761291504,
      "learning_rate": 1.0839329411275124e-06,
      "loss": 2.213,
      "step": 65388
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0701541900634766,
      "learning_rate": 1.0837465098963973e-06,
      "loss": 2.0908,
      "step": 65389
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.038556694984436,
      "learning_rate": 1.0835600937807512e-06,
      "loss": 2.309,
      "step": 65390
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0589174032211304,
      "learning_rate": 1.0833736927808946e-06,
      "loss": 2.467,
      "step": 65391
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.2043086290359497,
      "learning_rate": 1.0831873068971387e-06,
      "loss": 2.4272,
      "step": 65392
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0835275650024414,
      "learning_rate": 1.0830009361298045e-06,
      "loss": 2.5773,
      "step": 65393
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0088355541229248,
      "learning_rate": 1.0828145804792057e-06,
      "loss": 2.3712,
      "step": 65394
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1635662317276,
      "learning_rate": 1.0826282399456577e-06,
      "loss": 2.2804,
      "step": 65395
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0765725374221802,
      "learning_rate": 1.0824419145294752e-06,
      "loss": 2.2253,
      "step": 65396
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0678600072860718,
      "learning_rate": 1.0822556042309773e-06,
      "loss": 2.2701,
      "step": 65397
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1146113872528076,
      "learning_rate": 1.0820693090504763e-06,
      "loss": 2.2409,
      "step": 65398
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0363343954086304,
      "learning_rate": 1.081883028988292e-06,
      "loss": 2.3381,
      "step": 65399
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.2121684551239014,
      "learning_rate": 1.0816967640447363e-06,
      "loss": 2.442,
      "step": 65400
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.4449294805526733,
      "learning_rate": 1.0815105142201288e-06,
      "loss": 2.4005,
      "step": 65401
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0899053812026978,
      "learning_rate": 1.0813242795147805e-06,
      "loss": 2.5129,
      "step": 65402
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.9892207980155945,
      "learning_rate": 1.0811380599290122e-06,
      "loss": 2.2587,
      "step": 65403
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.109703779220581,
      "learning_rate": 1.0809518554631348e-06,
      "loss": 2.1568,
      "step": 65404
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0068271160125732,
      "learning_rate": 1.0807656661174693e-06,
      "loss": 2.2337,
      "step": 65405
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0858511924743652,
      "learning_rate": 1.0805794918923284e-06,
      "loss": 2.3804,
      "step": 65406
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.025747299194336,
      "learning_rate": 1.0803933327880267e-06,
      "loss": 2.0633,
      "step": 65407
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0089133977890015,
      "learning_rate": 1.0802071888048794e-06,
      "loss": 2.3409,
      "step": 65408
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1785736083984375,
      "learning_rate": 1.080021059943206e-06,
      "loss": 2.2237,
      "step": 65409
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0794756412506104,
      "learning_rate": 1.0798349462033164e-06,
      "loss": 2.2161,
      "step": 65410
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.084580421447754,
      "learning_rate": 1.0796488475855328e-06,
      "loss": 2.3479,
      "step": 65411
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1268208026885986,
      "learning_rate": 1.0794627640901656e-06,
      "loss": 2.2568,
      "step": 65412
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1840877532958984,
      "learning_rate": 1.0792766957175305e-06,
      "loss": 2.4332,
      "step": 65413
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.019616961479187,
      "learning_rate": 1.079090642467946e-06,
      "loss": 2.2516,
      "step": 65414
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0968891382217407,
      "learning_rate": 1.078904604341724e-06,
      "loss": 2.187,
      "step": 65415
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0043492317199707,
      "learning_rate": 1.0787185813391843e-06,
      "loss": 2.4563,
      "step": 65416
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0144739151000977,
      "learning_rate": 1.0785325734606367e-06,
      "loss": 2.3947,
      "step": 65417
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.9883447289466858,
      "learning_rate": 1.0783465807064008e-06,
      "loss": 2.2096,
      "step": 65418
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0549675226211548,
      "learning_rate": 1.07816060307679e-06,
      "loss": 2.2257,
      "step": 65419
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0508203506469727,
      "learning_rate": 1.0779746405721235e-06,
      "loss": 2.155,
      "step": 65420
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0681732892990112,
      "learning_rate": 1.0777886931927085e-06,
      "loss": 2.2481,
      "step": 65421
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0871174335479736,
      "learning_rate": 1.0776027609388684e-06,
      "loss": 2.3948,
      "step": 65422
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0852954387664795,
      "learning_rate": 1.077416843810911e-06,
      "loss": 2.2811,
      "step": 65423
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.2471708059310913,
      "learning_rate": 1.0772309418091587e-06,
      "loss": 2.4876,
      "step": 65424
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1006536483764648,
      "learning_rate": 1.0770450549339218e-06,
      "loss": 2.2084,
      "step": 65425
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.780538320541382,
      "learning_rate": 1.0768591831855179e-06,
      "loss": 2.1757,
      "step": 65426
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1240431070327759,
      "learning_rate": 1.0766733265642593e-06,
      "loss": 2.4103,
      "step": 65427
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0472465753555298,
      "learning_rate": 1.0764874850704653e-06,
      "loss": 2.2028,
      "step": 65428
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0041877031326294,
      "learning_rate": 1.0763016587044462e-06,
      "loss": 2.341,
      "step": 65429
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0173696279525757,
      "learning_rate": 1.0761158474665224e-06,
      "loss": 2.1242,
      "step": 65430
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.055374026298523,
      "learning_rate": 1.0759300513570026e-06,
      "loss": 2.3259,
      "step": 65431
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0765210390090942,
      "learning_rate": 1.0757442703762078e-06,
      "loss": 2.3313,
      "step": 65432
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.11197030544281,
      "learning_rate": 1.07555850452445e-06,
      "loss": 2.486,
      "step": 65433
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.2253186702728271,
      "learning_rate": 1.0753727538020454e-06,
      "loss": 2.3568,
      "step": 65434
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.085631012916565,
      "learning_rate": 1.075187018209305e-06,
      "loss": 2.0981,
      "step": 65435
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0781123638153076,
      "learning_rate": 1.0750012977465484e-06,
      "loss": 2.189,
      "step": 65436
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0232231616973877,
      "learning_rate": 1.0748155924140868e-06,
      "loss": 2.4361,
      "step": 65437
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0924280881881714,
      "learning_rate": 1.0746299022122386e-06,
      "loss": 2.4412,
      "step": 65438
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0751584768295288,
      "learning_rate": 1.0744442271413146e-06,
      "loss": 2.2847,
      "step": 65439
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0656307935714722,
      "learning_rate": 1.074258567201635e-06,
      "loss": 2.3472,
      "step": 65440
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0857229232788086,
      "learning_rate": 1.0740729223935097e-06,
      "loss": 2.2084,
      "step": 65441
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.037876009941101,
      "learning_rate": 1.0738872927172538e-06,
      "loss": 2.4551,
      "step": 65442
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1459101438522339,
      "learning_rate": 1.0737016781731858e-06,
      "loss": 2.5231,
      "step": 65443
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1536355018615723,
      "learning_rate": 1.073516078761615e-06,
      "loss": 2.4624,
      "step": 65444
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0173470973968506,
      "learning_rate": 1.0733304944828615e-06,
      "loss": 2.3922,
      "step": 65445
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.9822874665260315,
      "learning_rate": 1.0731449253372351e-06,
      "loss": 2.3965,
      "step": 65446
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.9838804006576538,
      "learning_rate": 1.0729593713250563e-06,
      "loss": 2.3895,
      "step": 65447
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1670188903808594,
      "learning_rate": 1.0727738324466319e-06,
      "loss": 2.223,
      "step": 65448
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1282228231430054,
      "learning_rate": 1.0725883087022825e-06,
      "loss": 2.4609,
      "step": 65449
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.054017424583435,
      "learning_rate": 1.0724028000923193e-06,
      "loss": 2.3553,
      "step": 65450
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0862900018692017,
      "learning_rate": 1.0722173066170594e-06,
      "loss": 2.2447,
      "step": 65451
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0361032485961914,
      "learning_rate": 1.0720318282768139e-06,
      "loss": 2.249,
      "step": 65452
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0799028873443604,
      "learning_rate": 1.0718463650719012e-06,
      "loss": 2.2462,
      "step": 65453
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.2127922773361206,
      "learning_rate": 1.0716609170026316e-06,
      "loss": 2.3889,
      "step": 65454
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.9406372308731079,
      "learning_rate": 1.0714754840693242e-06,
      "loss": 2.1777,
      "step": 65455
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1461058855056763,
      "learning_rate": 1.071290066272288e-06,
      "loss": 2.5151,
      "step": 65456
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.041703224182129,
      "learning_rate": 1.0711046636118427e-06,
      "loss": 2.307,
      "step": 65457
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1610348224639893,
      "learning_rate": 1.0709192760882981e-06,
      "loss": 2.3618,
      "step": 65458
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1713602542877197,
      "learning_rate": 1.0707339037019727e-06,
      "loss": 2.1441,
      "step": 65459
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0419999361038208,
      "learning_rate": 1.0705485464531773e-06,
      "loss": 2.4356,
      "step": 65460
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0795361995697021,
      "learning_rate": 1.0703632043422285e-06,
      "loss": 2.4491,
      "step": 65461
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.223689079284668,
      "learning_rate": 1.070177877369437e-06,
      "loss": 2.2877,
      "step": 65462
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.287990689277649,
      "learning_rate": 1.0699925655351217e-06,
      "loss": 2.3325,
      "step": 65463
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.019083857536316,
      "learning_rate": 1.0698072688395911e-06,
      "loss": 2.5221,
      "step": 65464
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0843549966812134,
      "learning_rate": 1.069621987283166e-06,
      "loss": 2.4492,
      "step": 65465
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0680347681045532,
      "learning_rate": 1.0694367208661537e-06,
      "loss": 2.3869,
      "step": 65466
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.9888474941253662,
      "learning_rate": 1.0692514695888746e-06,
      "loss": 2.3051,
      "step": 65467
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0072097778320312,
      "learning_rate": 1.06906623345164e-06,
      "loss": 2.3817,
      "step": 65468
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0990501642227173,
      "learning_rate": 1.068881012454761e-06,
      "loss": 2.2288,
      "step": 65469
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0698288679122925,
      "learning_rate": 1.0686958065985576e-06,
      "loss": 2.3827,
      "step": 65470
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5823209285736084,
      "learning_rate": 1.0685106158833369e-06,
      "loss": 2.3749,
      "step": 65471
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.3262618780136108,
      "learning_rate": 1.06832544030942e-06,
      "loss": 2.5745,
      "step": 65472
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0639396905899048,
      "learning_rate": 1.0681402798771156e-06,
      "loss": 2.3473,
      "step": 65473
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.9409075379371643,
      "learning_rate": 1.067955134586741e-06,
      "loss": 2.3152,
      "step": 65474
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.098262906074524,
      "learning_rate": 1.0677700044386053e-06,
      "loss": 2.1309,
      "step": 65475
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0943297147750854,
      "learning_rate": 1.0675848894330277e-06,
      "loss": 2.3727,
      "step": 65476
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.9643317461013794,
      "learning_rate": 1.067399789570318e-06,
      "loss": 2.3894,
      "step": 65477
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1530629396438599,
      "learning_rate": 1.067214704850793e-06,
      "loss": 2.301,
      "step": 65478
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0210922956466675,
      "learning_rate": 1.0670296352747632e-06,
      "loss": 2.5578,
      "step": 65479
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.3329943418502808,
      "learning_rate": 1.0668445808425475e-06,
      "loss": 2.3029,
      "step": 65480
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0837104320526123,
      "learning_rate": 1.0666595415544533e-06,
      "loss": 2.3718,
      "step": 65481
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0356823205947876,
      "learning_rate": 1.0664745174108005e-06,
      "loss": 2.365,
      "step": 65482
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.161325216293335,
      "learning_rate": 1.0662895084118964e-06,
      "loss": 2.347,
      "step": 65483
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.9861639142036438,
      "learning_rate": 1.0661045145580606e-06,
      "loss": 2.2546,
      "step": 65484
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1214029788970947,
      "learning_rate": 1.0659195358496023e-06,
      "loss": 2.0362,
      "step": 65485
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0100709199905396,
      "learning_rate": 1.0657345722868406e-06,
      "loss": 2.2832,
      "step": 65486
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.2170071601867676,
      "learning_rate": 1.0655496238700813e-06,
      "loss": 2.42,
      "step": 65487
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0436427593231201,
      "learning_rate": 1.065364690599644e-06,
      "loss": 2.23,
      "step": 65488
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0057775974273682,
      "learning_rate": 1.0651797724758384e-06,
      "loss": 2.3111,
      "step": 65489
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0855227708816528,
      "learning_rate": 1.0649948694989809e-06,
      "loss": 2.3289,
      "step": 65490
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.10965096950531,
      "learning_rate": 1.0648099816693825e-06,
      "loss": 2.3528,
      "step": 65491
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0247300863265991,
      "learning_rate": 1.0646251089873594e-06,
      "loss": 2.209,
      "step": 65492
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1054069995880127,
      "learning_rate": 1.0644402514532227e-06,
      "loss": 2.2234,
      "step": 65493
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.064246654510498,
      "learning_rate": 1.0642554090672875e-06,
      "loss": 2.339,
      "step": 65494
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0398099422454834,
      "learning_rate": 1.0640705818298647e-06,
      "loss": 2.3729,
      "step": 65495
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0447769165039062,
      "learning_rate": 1.0638857697412718e-06,
      "loss": 2.3151,
      "step": 65496
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.2541865110397339,
      "learning_rate": 1.0637009728018188e-06,
      "loss": 2.3648,
      "step": 65497
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.160670518875122,
      "learning_rate": 1.0635161910118174e-06,
      "loss": 2.31,
      "step": 65498
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0611194372177124,
      "learning_rate": 1.0633314243715865e-06,
      "loss": 2.2286,
      "step": 65499
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.053406000137329,
      "learning_rate": 1.0631466728814343e-06,
      "loss": 2.1926,
      "step": 65500
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0782506465911865,
      "learning_rate": 1.0629619365416744e-06,
      "loss": 2.26,
      "step": 65501
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.7339835166931152,
      "learning_rate": 1.0627772153526239e-06,
      "loss": 2.4281,
      "step": 65502
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.139038324356079,
      "learning_rate": 1.0625925093145927e-06,
      "loss": 2.3655,
      "step": 65503
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0389761924743652,
      "learning_rate": 1.062407818427893e-06,
      "loss": 2.4381,
      "step": 65504
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0264590978622437,
      "learning_rate": 1.0622231426928408e-06,
      "loss": 2.0714,
      "step": 65505
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.008676528930664,
      "learning_rate": 1.0620384821097462e-06,
      "loss": 2.338,
      "step": 65506
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.109615683555603,
      "learning_rate": 1.0618538366789265e-06,
      "loss": 2.24,
      "step": 65507
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0868415832519531,
      "learning_rate": 1.0616692064006894e-06,
      "loss": 2.3429,
      "step": 65508
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0999832153320312,
      "learning_rate": 1.0614845912753524e-06,
      "loss": 2.0789,
      "step": 65509
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.102229356765747,
      "learning_rate": 1.061299991303225e-06,
      "loss": 2.0969,
      "step": 65510
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.067386269569397,
      "learning_rate": 1.061115406484624e-06,
      "loss": 2.306,
      "step": 65511
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0054912567138672,
      "learning_rate": 1.060930836819859e-06,
      "loss": 2.2437,
      "step": 65512
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0741686820983887,
      "learning_rate": 1.0607462823092463e-06,
      "loss": 2.3959,
      "step": 65513
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.12364661693573,
      "learning_rate": 1.0605617429530935e-06,
      "loss": 2.342,
      "step": 65514
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0581954717636108,
      "learning_rate": 1.060377218751718e-06,
      "loss": 2.3103,
      "step": 65515
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0846346616744995,
      "learning_rate": 1.06019270970543e-06,
      "loss": 2.2087,
      "step": 65516
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0075409412384033,
      "learning_rate": 1.0600082158145442e-06,
      "loss": 2.4713,
      "step": 65517
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0834497213363647,
      "learning_rate": 1.0598237370793719e-06,
      "loss": 2.3048,
      "step": 65518
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0533205270767212,
      "learning_rate": 1.059639273500227e-06,
      "loss": 2.2847,
      "step": 65519
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0275453329086304,
      "learning_rate": 1.0594548250774206e-06,
      "loss": 2.2657,
      "step": 65520
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.9847970604896545,
      "learning_rate": 1.0592703918112679e-06,
      "loss": 2.2509,
      "step": 65521
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.2192734479904175,
      "learning_rate": 1.0590859737020787e-06,
      "loss": 2.3118,
      "step": 65522
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.9946982264518738,
      "learning_rate": 1.0589015707501693e-06,
      "loss": 2.2398,
      "step": 65523
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.9903606176376343,
      "learning_rate": 1.0587171829558496e-06,
      "loss": 2.3914,
      "step": 65524
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1207629442214966,
      "learning_rate": 1.0585328103194304e-06,
      "loss": 2.1983,
      "step": 65525
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0800808668136597,
      "learning_rate": 1.0583484528412292e-06,
      "loss": 2.5716,
      "step": 65526
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.034805178642273,
      "learning_rate": 1.0581641105215556e-06,
      "loss": 2.3695,
      "step": 65527
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1696263551712036,
      "learning_rate": 1.0579797833607207e-06,
      "loss": 2.1915,
      "step": 65528
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0999712944030762,
      "learning_rate": 1.05779547135904e-06,
      "loss": 2.5093,
      "step": 65529
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1349844932556152,
      "learning_rate": 1.057611174516825e-06,
      "loss": 2.109,
      "step": 65530
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0796012878417969,
      "learning_rate": 1.0574268928343867e-06,
      "loss": 2.2915,
      "step": 65531
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0278403759002686,
      "learning_rate": 1.0572426263120394e-06,
      "loss": 2.3797,
      "step": 65532
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0197783708572388,
      "learning_rate": 1.057058374950093e-06,
      "loss": 2.3743,
      "step": 65533
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0993280410766602,
      "learning_rate": 1.0568741387488635e-06,
      "loss": 2.3259,
      "step": 65534
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.063361644744873,
      "learning_rate": 1.0566899177086599e-06,
      "loss": 2.3028,
      "step": 65535
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.9992688894271851,
      "learning_rate": 1.0565057118297971e-06,
      "loss": 2.3099,
      "step": 65536
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1205427646636963,
      "learning_rate": 1.0563215211125844e-06,
      "loss": 2.265,
      "step": 65537
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0767292976379395,
      "learning_rate": 1.0561373455573375e-06,
      "loss": 2.3287,
      "step": 65538
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1362215280532837,
      "learning_rate": 1.0559531851643668e-06,
      "loss": 2.495,
      "step": 65539
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0644105672836304,
      "learning_rate": 1.0557690399339848e-06,
      "loss": 2.2168,
      "step": 65540
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.9943147897720337,
      "learning_rate": 1.0555849098665017e-06,
      "loss": 2.2391,
      "step": 65541
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.096146583557129,
      "learning_rate": 1.0554007949622337e-06,
      "loss": 2.1791,
      "step": 65542
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0233345031738281,
      "learning_rate": 1.055216695221488e-06,
      "loss": 2.3453,
      "step": 65543
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.6807432174682617,
      "learning_rate": 1.0550326106445819e-06,
      "loss": 2.2062,
      "step": 65544
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0711662769317627,
      "learning_rate": 1.0548485412318232e-06,
      "loss": 2.131,
      "step": 65545
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1377158164978027,
      "learning_rate": 1.0546644869835266e-06,
      "loss": 2.2719,
      "step": 65546
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.075147032737732,
      "learning_rate": 1.0544804479000025e-06,
      "loss": 2.3818,
      "step": 65547
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.9647993445396423,
      "learning_rate": 1.0542964239815645e-06,
      "loss": 2.1854,
      "step": 65548
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1657994985580444,
      "learning_rate": 1.0541124152285221e-06,
      "loss": 2.3681,
      "step": 65549
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.058051586151123,
      "learning_rate": 1.0539284216411915e-06,
      "loss": 2.2515,
      "step": 65550
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0084738731384277,
      "learning_rate": 1.0537444432198795e-06,
      "loss": 2.6366,
      "step": 65551
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.2147190570831299,
      "learning_rate": 1.0535604799649036e-06,
      "loss": 2.2364,
      "step": 65552
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.030766487121582,
      "learning_rate": 1.0533765318765699e-06,
      "loss": 2.2764,
      "step": 65553
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1303850412368774,
      "learning_rate": 1.053192598955194e-06,
      "loss": 2.4561,
      "step": 65554
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.076796293258667,
      "learning_rate": 1.0530086812010843e-06,
      "loss": 2.1457,
      "step": 65555
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0653444528579712,
      "learning_rate": 1.0528247786145573e-06,
      "loss": 2.1252,
      "step": 65556
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5393390655517578,
      "learning_rate": 1.0526408911959207e-06,
      "loss": 2.5252,
      "step": 65557
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.2174713611602783,
      "learning_rate": 1.0524570189454885e-06,
      "loss": 2.1598,
      "step": 65558
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.2622308731079102,
      "learning_rate": 1.0522731618635728e-06,
      "loss": 2.2438,
      "step": 65559
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.9757035374641418,
      "learning_rate": 1.0520893199504823e-06,
      "loss": 2.3298,
      "step": 65560
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0709997415542603,
      "learning_rate": 1.051905493206533e-06,
      "loss": 2.1969,
      "step": 65561
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1404995918273926,
      "learning_rate": 1.0517216816320319e-06,
      "loss": 2.1041,
      "step": 65562
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0568617582321167,
      "learning_rate": 1.051537885227294e-06,
      "loss": 2.1738,
      "step": 65563
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.9840083122253418,
      "learning_rate": 1.0513541039926289e-06,
      "loss": 2.4553,
      "step": 65564
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0178896188735962,
      "learning_rate": 1.0511703379283522e-06,
      "loss": 2.2938,
      "step": 65565
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1771831512451172,
      "learning_rate": 1.0509865870347691e-06,
      "loss": 2.2416,
      "step": 65566
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.036795735359192,
      "learning_rate": 1.0508028513121948e-06,
      "loss": 2.2821,
      "step": 65567
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.9945878982543945,
      "learning_rate": 1.0506191307609393e-06,
      "loss": 2.3603,
      "step": 65568
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1617658138275146,
      "learning_rate": 1.0504354253813164e-06,
      "loss": 2.3108,
      "step": 65569
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0894333124160767,
      "learning_rate": 1.0502517351736353e-06,
      "loss": 2.1228,
      "step": 65570
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0633370876312256,
      "learning_rate": 1.0500680601382096e-06,
      "loss": 2.3934,
      "step": 65571
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1477069854736328,
      "learning_rate": 1.0498844002753472e-06,
      "loss": 2.531,
      "step": 65572
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1515740156173706,
      "learning_rate": 1.0497007555853633e-06,
      "loss": 2.3989,
      "step": 65573
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1359854936599731,
      "learning_rate": 1.0495171260685656e-06,
      "loss": 2.1217,
      "step": 65574
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0735809803009033,
      "learning_rate": 1.049333511725269e-06,
      "loss": 2.2119,
      "step": 65575
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.209700107574463,
      "learning_rate": 1.0491499125557824e-06,
      "loss": 2.3776,
      "step": 65576
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0523921251296997,
      "learning_rate": 1.0489663285604178e-06,
      "loss": 2.4094,
      "step": 65577
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1611533164978027,
      "learning_rate": 1.0487827597394884e-06,
      "loss": 2.1233,
      "step": 65578
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0387321710586548,
      "learning_rate": 1.0485992060933014e-06,
      "loss": 2.4,
      "step": 65579
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.158117651939392,
      "learning_rate": 1.048415667622169e-06,
      "loss": 2.1227,
      "step": 65580
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.991574227809906,
      "learning_rate": 1.0482321443264053e-06,
      "loss": 2.1512,
      "step": 65581
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.961545467376709,
      "learning_rate": 1.0480486362063169e-06,
      "loss": 2.4795,
      "step": 65582
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1037201881408691,
      "learning_rate": 1.04786514326222e-06,
      "loss": 2.2705,
      "step": 65583
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0425503253936768,
      "learning_rate": 1.04768166549442e-06,
      "loss": 2.4273,
      "step": 65584
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0410189628601074,
      "learning_rate": 1.0474982029032344e-06,
      "loss": 2.1733,
      "step": 65585
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0585027933120728,
      "learning_rate": 1.0473147554889696e-06,
      "loss": 2.1947,
      "step": 65586
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1684761047363281,
      "learning_rate": 1.0471313232519365e-06,
      "loss": 2.3128,
      "step": 65587
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0755696296691895,
      "learning_rate": 1.0469479061924493e-06,
      "loss": 2.3161,
      "step": 65588
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.035149335861206,
      "learning_rate": 1.0467645043108144e-06,
      "loss": 2.434,
      "step": 65589
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1566064357757568,
      "learning_rate": 1.0465811176073481e-06,
      "loss": 2.3645,
      "step": 65590
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.9978885650634766,
      "learning_rate": 1.046397746082356e-06,
      "loss": 2.4089,
      "step": 65591
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.270979642868042,
      "learning_rate": 1.0462143897361553e-06,
      "loss": 2.1562,
      "step": 65592
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1179959774017334,
      "learning_rate": 1.0460310485690494e-06,
      "loss": 2.4384,
      "step": 65593
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1575254201889038,
      "learning_rate": 1.0458477225813557e-06,
      "loss": 2.1161,
      "step": 65594
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0875664949417114,
      "learning_rate": 1.0456644117733783e-06,
      "loss": 2.2941,
      "step": 65595
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.6878809928894043,
      "learning_rate": 1.0454811161454348e-06,
      "loss": 2.2726,
      "step": 65596
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.117516279220581,
      "learning_rate": 1.0452978356978305e-06,
      "loss": 2.2146,
      "step": 65597
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.117421269416809,
      "learning_rate": 1.0451145704308807e-06,
      "loss": 2.4578,
      "step": 65598
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.157232403755188,
      "learning_rate": 1.044931320344892e-06,
      "loss": 2.3197,
      "step": 65599
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0796726942062378,
      "learning_rate": 1.0447480854401782e-06,
      "loss": 2.2631,
      "step": 65600
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.2228080034255981,
      "learning_rate": 1.044564865717046e-06,
      "loss": 2.2301,
      "step": 65601
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.207816481590271,
      "learning_rate": 1.0443816611758118e-06,
      "loss": 2.2504,
      "step": 65602
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.092437505722046,
      "learning_rate": 1.044198471816781e-06,
      "loss": 2.3935,
      "step": 65603
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0811340808868408,
      "learning_rate": 1.0440152976402673e-06,
      "loss": 2.2691,
      "step": 65604
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1001046895980835,
      "learning_rate": 1.043832138646581e-06,
      "loss": 2.3452,
      "step": 65605
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1337664127349854,
      "learning_rate": 1.0436489948360307e-06,
      "loss": 2.3643,
      "step": 65606
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.052721381187439,
      "learning_rate": 1.043465866208927e-06,
      "loss": 2.2728,
      "step": 65607
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.267471432685852,
      "learning_rate": 1.043282752765582e-06,
      "loss": 2.3213,
      "step": 65608
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0539665222167969,
      "learning_rate": 1.0430996545063044e-06,
      "loss": 2.2129,
      "step": 65609
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0600802898406982,
      "learning_rate": 1.042916571431407e-06,
      "loss": 2.263,
      "step": 65610
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.2586243152618408,
      "learning_rate": 1.0427335035411967e-06,
      "loss": 2.0335,
      "step": 65611
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.11229407787323,
      "learning_rate": 1.0425504508359885e-06,
      "loss": 2.3703,
      "step": 65612
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0628752708435059,
      "learning_rate": 1.0423674133160877e-06,
      "loss": 2.476,
      "step": 65613
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1406229734420776,
      "learning_rate": 1.0421843909818087e-06,
      "loss": 2.1173,
      "step": 65614
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.9830718040466309,
      "learning_rate": 1.042001383833461e-06,
      "loss": 2.3319,
      "step": 65615
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1262916326522827,
      "learning_rate": 1.0418183918713509e-06,
      "loss": 2.2601,
      "step": 65616
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.9554475545883179,
      "learning_rate": 1.041635415095794e-06,
      "loss": 2.5152,
      "step": 65617
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.087354302406311,
      "learning_rate": 1.041452453507098e-06,
      "loss": 2.1654,
      "step": 65618
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0372216701507568,
      "learning_rate": 1.0412695071055711e-06,
      "loss": 2.1826,
      "step": 65619
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.112290382385254,
      "learning_rate": 1.0410865758915278e-06,
      "loss": 2.2014,
      "step": 65620
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.042392611503601,
      "learning_rate": 1.0409036598652756e-06,
      "loss": 2.1434,
      "step": 65621
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0361154079437256,
      "learning_rate": 1.040720759027123e-06,
      "loss": 2.3601,
      "step": 65622
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1946216821670532,
      "learning_rate": 1.0405378733773842e-06,
      "loss": 2.2311,
      "step": 65623
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1405521631240845,
      "learning_rate": 1.0403550029163656e-06,
      "loss": 2.3254,
      "step": 65624
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1840004920959473,
      "learning_rate": 1.0401721476443804e-06,
      "loss": 2.5149,
      "step": 65625
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1291612386703491,
      "learning_rate": 1.0399893075617339e-06,
      "loss": 2.2054,
      "step": 65626
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1178181171417236,
      "learning_rate": 1.0398064826687427e-06,
      "loss": 2.1612,
      "step": 65627
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.2293490171432495,
      "learning_rate": 1.0396236729657093e-06,
      "loss": 2.1891,
      "step": 65628
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.090035319328308,
      "learning_rate": 1.0394408784529509e-06,
      "loss": 2.3352,
      "step": 65629
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1489448547363281,
      "learning_rate": 1.039258099130771e-06,
      "loss": 2.2023,
      "step": 65630
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0761276483535767,
      "learning_rate": 1.0390753349994864e-06,
      "loss": 2.0918,
      "step": 65631
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0576964616775513,
      "learning_rate": 1.038892586059399e-06,
      "loss": 2.28,
      "step": 65632
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.146526575088501,
      "learning_rate": 1.038709852310824e-06,
      "loss": 2.4298,
      "step": 65633
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.2280856370925903,
      "learning_rate": 1.038527133754068e-06,
      "loss": 2.1024,
      "step": 65634
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0420408248901367,
      "learning_rate": 1.0383444303894453e-06,
      "loss": 2.2952,
      "step": 65635
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1038795709609985,
      "learning_rate": 1.0381617422172597e-06,
      "loss": 2.2274,
      "step": 65636
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0977715253829956,
      "learning_rate": 1.0379790692378266e-06,
      "loss": 2.4557,
      "step": 65637
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1737726926803589,
      "learning_rate": 1.0377964114514504e-06,
      "loss": 2.2969,
      "step": 65638
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0626150369644165,
      "learning_rate": 1.0376137688584454e-06,
      "loss": 2.3115,
      "step": 65639
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.078704833984375,
      "learning_rate": 1.0374311414591177e-06,
      "loss": 2.3729,
      "step": 65640
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.9562097191810608,
      "learning_rate": 1.0372485292537803e-06,
      "loss": 2.2727,
      "step": 65641
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0578628778457642,
      "learning_rate": 1.03706593224274e-06,
      "loss": 2.3823,
      "step": 65642
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1417495012283325,
      "learning_rate": 1.0368833504263065e-06,
      "loss": 2.6065,
      "step": 65643
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0977716445922852,
      "learning_rate": 1.0367007838047916e-06,
      "loss": 2.3735,
      "step": 65644
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0654069185256958,
      "learning_rate": 1.0365182323785028e-06,
      "loss": 2.4942,
      "step": 65645
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1089847087860107,
      "learning_rate": 1.0363356961477488e-06,
      "loss": 2.636,
      "step": 65646
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.109473466873169,
      "learning_rate": 1.0361531751128417e-06,
      "loss": 2.3925,
      "step": 65647
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0452032089233398,
      "learning_rate": 1.03597066927409e-06,
      "loss": 2.3485,
      "step": 65648
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0596792697906494,
      "learning_rate": 1.0357881786318002e-06,
      "loss": 2.5424,
      "step": 65649
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0930410623550415,
      "learning_rate": 1.0356057031862866e-06,
      "loss": 2.3019,
      "step": 65650
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0243104696273804,
      "learning_rate": 1.035423242937854e-06,
      "loss": 2.5296,
      "step": 65651
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.985363245010376,
      "learning_rate": 1.035240797886815e-06,
      "loss": 2.4887,
      "step": 65652
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1649277210235596,
      "learning_rate": 1.0350583680334769e-06,
      "loss": 2.2209,
      "step": 65653
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0679032802581787,
      "learning_rate": 1.0348759533781506e-06,
      "loss": 2.6006,
      "step": 65654
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0866413116455078,
      "learning_rate": 1.0346935539211422e-06,
      "loss": 2.3306,
      "step": 65655
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1236770153045654,
      "learning_rate": 1.0345111696627664e-06,
      "loss": 2.1617,
      "step": 65656
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.341782808303833,
      "learning_rate": 1.0343288006033258e-06,
      "loss": 2.3001,
      "step": 65657
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0267361402511597,
      "learning_rate": 1.0341464467431383e-06,
      "loss": 2.5632,
      "step": 65658
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.097899079322815,
      "learning_rate": 1.0339641080825025e-06,
      "loss": 2.2056,
      "step": 65659
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.106706976890564,
      "learning_rate": 1.0337817846217357e-06,
      "loss": 2.4349,
      "step": 65660
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.9722791314125061,
      "learning_rate": 1.0335994763611411e-06,
      "loss": 2.1374,
      "step": 65661
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.395348072052002,
      "learning_rate": 1.033417183301033e-06,
      "loss": 2.2772,
      "step": 65662
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1172350645065308,
      "learning_rate": 1.0332349054417156e-06,
      "loss": 2.5022,
      "step": 65663
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.2079676389694214,
      "learning_rate": 1.033052642783503e-06,
      "loss": 2.2834,
      "step": 65664
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0084824562072754,
      "learning_rate": 1.0328703953266995e-06,
      "loss": 2.2352,
      "step": 65665
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1109347343444824,
      "learning_rate": 1.032688163071618e-06,
      "loss": 2.1777,
      "step": 65666
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1312004327774048,
      "learning_rate": 1.0325059460185638e-06,
      "loss": 2.2032,
      "step": 65667
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0126891136169434,
      "learning_rate": 1.032323744167849e-06,
      "loss": 2.3441,
      "step": 65668
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.9838034510612488,
      "learning_rate": 1.0321415575197791e-06,
      "loss": 2.4209,
      "step": 65669
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0902061462402344,
      "learning_rate": 1.031959386074668e-06,
      "loss": 2.316,
      "step": 65670
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0853676795959473,
      "learning_rate": 1.0317772298328198e-06,
      "loss": 2.6925,
      "step": 65671
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1324834823608398,
      "learning_rate": 1.0315950887945459e-06,
      "loss": 2.361,
      "step": 65672
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0887987613677979,
      "learning_rate": 1.031412962960152e-06,
      "loss": 2.1556,
      "step": 65673
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.2547454833984375,
      "learning_rate": 1.0312308523299507e-06,
      "loss": 2.3888,
      "step": 65674
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1024926900863647,
      "learning_rate": 1.031048756904247e-06,
      "loss": 2.2847,
      "step": 65675
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0474368333816528,
      "learning_rate": 1.0308666766833542e-06,
      "loss": 2.2403,
      "step": 65676
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.100292444229126,
      "learning_rate": 1.0306846116675783e-06,
      "loss": 2.2975,
      "step": 65677
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.139129400253296,
      "learning_rate": 1.030502561857225e-06,
      "loss": 2.1884,
      "step": 65678
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.133489727973938,
      "learning_rate": 1.0303205272526084e-06,
      "loss": 2.008,
      "step": 65679
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1585416793823242,
      "learning_rate": 1.0301385078540338e-06,
      "loss": 2.3011,
      "step": 65680
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0572257041931152,
      "learning_rate": 1.029956503661811e-06,
      "loss": 2.3939,
      "step": 65681
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0416377782821655,
      "learning_rate": 1.0297745146762471e-06,
      "loss": 2.4283,
      "step": 65682
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1944314241409302,
      "learning_rate": 1.0295925408976536e-06,
      "loss": 2.5453,
      "step": 65683
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0358668565750122,
      "learning_rate": 1.0294105823263367e-06,
      "loss": 2.1556,
      "step": 65684
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.238499402999878,
      "learning_rate": 1.029228638962605e-06,
      "loss": 2.4693,
      "step": 65685
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.284197211265564,
      "learning_rate": 1.029046710806766e-06,
      "loss": 1.9535,
      "step": 65686
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.2605215311050415,
      "learning_rate": 1.0288647978591304e-06,
      "loss": 2.3193,
      "step": 65687
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0468049049377441,
      "learning_rate": 1.0286829001200038e-06,
      "loss": 2.3253,
      "step": 65688
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1430881023406982,
      "learning_rate": 1.0285010175896992e-06,
      "loss": 2.6116,
      "step": 65689
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0244829654693604,
      "learning_rate": 1.0283191502685185e-06,
      "loss": 2.2164,
      "step": 65690
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1000946760177612,
      "learning_rate": 1.0281372981567762e-06,
      "loss": 2.3684,
      "step": 65691
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1573272943496704,
      "learning_rate": 1.027955461254776e-06,
      "loss": 2.4071,
      "step": 65692
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.386151671409607,
      "learning_rate": 1.0277736395628302e-06,
      "loss": 2.2169,
      "step": 65693
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1278399229049683,
      "learning_rate": 1.0275918330812428e-06,
      "loss": 2.27,
      "step": 65694
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1533148288726807,
      "learning_rate": 1.0274100418103261e-06,
      "loss": 2.3207,
      "step": 65695
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.3272064924240112,
      "learning_rate": 1.027228265750384e-06,
      "loss": 2.2155,
      "step": 65696
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0325902700424194,
      "learning_rate": 1.027046504901731e-06,
      "loss": 2.3229,
      "step": 65697
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.9507803320884705,
      "learning_rate": 1.0268647592646675e-06,
      "loss": 2.2574,
      "step": 65698
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0186023712158203,
      "learning_rate": 1.026683028839507e-06,
      "loss": 2.4235,
      "step": 65699
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.128298282623291,
      "learning_rate": 1.0265013136265545e-06,
      "loss": 2.2575,
      "step": 65700
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1381006240844727,
      "learning_rate": 1.0263196136261212e-06,
      "loss": 2.0822,
      "step": 65701
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.025588870048523,
      "learning_rate": 1.026137928838511e-06,
      "loss": 2.2073,
      "step": 65702
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.3145538568496704,
      "learning_rate": 1.0259562592640359e-06,
      "loss": 2.2097,
      "step": 65703
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1898130178451538,
      "learning_rate": 1.0257746049030038e-06,
      "loss": 2.4256,
      "step": 65704
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.083937644958496,
      "learning_rate": 1.0255929657557173e-06,
      "loss": 2.3064,
      "step": 65705
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0751835107803345,
      "learning_rate": 1.025411341822491e-06,
      "loss": 2.3541,
      "step": 65706
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1541879177093506,
      "learning_rate": 1.0252297331036287e-06,
      "loss": 2.1651,
      "step": 65707
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0223129987716675,
      "learning_rate": 1.0250481395994405e-06,
      "loss": 2.0163,
      "step": 65708
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0858888626098633,
      "learning_rate": 1.0248665613102316e-06,
      "loss": 2.5064,
      "step": 65709
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0663996934890747,
      "learning_rate": 1.024684998236315e-06,
      "loss": 2.2645,
      "step": 65710
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.417328953742981,
      "learning_rate": 1.0245034503779915e-06,
      "loss": 2.4036,
      "step": 65711
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0545587539672852,
      "learning_rate": 1.0243219177355745e-06,
      "loss": 2.3214,
      "step": 65712
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0691853761672974,
      "learning_rate": 1.0241404003093668e-06,
      "loss": 2.1312,
      "step": 65713
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.099977731704712,
      "learning_rate": 1.0239588980996818e-06,
      "loss": 2.3111,
      "step": 65714
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1843743324279785,
      "learning_rate": 1.0237774111068222e-06,
      "loss": 2.2579,
      "step": 65715
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1320632696151733,
      "learning_rate": 1.023595939331099e-06,
      "loss": 2.3639,
      "step": 65716
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1720999479293823,
      "learning_rate": 1.0234144827728177e-06,
      "loss": 2.3343,
      "step": 65717
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0210020542144775,
      "learning_rate": 1.0232330414322889e-06,
      "loss": 2.4794,
      "step": 65718
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1331599950790405,
      "learning_rate": 1.0230516153098158e-06,
      "loss": 2.2311,
      "step": 65719
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0400793552398682,
      "learning_rate": 1.0228702044057092e-06,
      "loss": 2.5015,
      "step": 65720
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1065272092819214,
      "learning_rate": 1.0226888087202758e-06,
      "loss": 2.2487,
      "step": 65721
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1638649702072144,
      "learning_rate": 1.0225074282538238e-06,
      "loss": 2.3145,
      "step": 65722
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0343366861343384,
      "learning_rate": 1.02232606300666e-06,
      "loss": 2.4413,
      "step": 65723
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.103421688079834,
      "learning_rate": 1.0221447129790919e-06,
      "loss": 2.4007,
      "step": 65724
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.9908565878868103,
      "learning_rate": 1.0219633781714245e-06,
      "loss": 2.3811,
      "step": 65725
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1488215923309326,
      "learning_rate": 1.02178205858397e-06,
      "loss": 2.2746,
      "step": 65726
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1532543897628784,
      "learning_rate": 1.0216007542170315e-06,
      "loss": 2.3805,
      "step": 65727
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1155827045440674,
      "learning_rate": 1.0214194650709209e-06,
      "loss": 2.3989,
      "step": 65728
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0765963792800903,
      "learning_rate": 1.0212381911459402e-06,
      "loss": 2.2245,
      "step": 65729
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0403339862823486,
      "learning_rate": 1.0210569324424014e-06,
      "loss": 2.3056,
      "step": 65730
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.3306818008422852,
      "learning_rate": 1.0208756889606075e-06,
      "loss": 2.2348,
      "step": 65731
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.9617761969566345,
      "learning_rate": 1.0206944607008696e-06,
      "loss": 2.4079,
      "step": 65732
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.06190824508667,
      "learning_rate": 1.0205132476634937e-06,
      "loss": 2.3353,
      "step": 65733
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1732553243637085,
      "learning_rate": 1.0203320498487845e-06,
      "loss": 2.0636,
      "step": 65734
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.2552410364151,
      "learning_rate": 1.0201508672570537e-06,
      "loss": 2.2912,
      "step": 65735
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0524423122406006,
      "learning_rate": 1.0199696998886043e-06,
      "loss": 2.3583,
      "step": 65736
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.2298401594161987,
      "learning_rate": 1.0197885477437464e-06,
      "loss": 2.1717,
      "step": 65737
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.2140858173370361,
      "learning_rate": 1.0196074108227872e-06,
      "loss": 2.528,
      "step": 65738
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.9728774428367615,
      "learning_rate": 1.019426289126031e-06,
      "loss": 2.0857,
      "step": 65739
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1464229822158813,
      "learning_rate": 1.0192451826537853e-06,
      "loss": 2.3357,
      "step": 65740
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1143766641616821,
      "learning_rate": 1.0190640914063598e-06,
      "loss": 2.2428,
      "step": 65741
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.2198030948638916,
      "learning_rate": 1.0188830153840579e-06,
      "loss": 2.4414,
      "step": 65742
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1620410680770874,
      "learning_rate": 1.0187019545871901e-06,
      "loss": 2.183,
      "step": 65743
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.2292559146881104,
      "learning_rate": 1.0185209090160608e-06,
      "loss": 2.3463,
      "step": 65744
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1255695819854736,
      "learning_rate": 1.0183398786709798e-06,
      "loss": 2.3909,
      "step": 65745
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.09897780418396,
      "learning_rate": 1.0181588635522488e-06,
      "loss": 2.3307,
      "step": 65746
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.031348705291748,
      "learning_rate": 1.0179778636601812e-06,
      "loss": 2.3207,
      "step": 65747
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.981859028339386,
      "learning_rate": 1.0177968789950777e-06,
      "loss": 2.3621,
      "step": 65748
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0608707666397095,
      "learning_rate": 1.0176159095572503e-06,
      "loss": 2.2252,
      "step": 65749
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0459364652633667,
      "learning_rate": 1.0174349553470043e-06,
      "loss": 2.4887,
      "step": 65750
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1159839630126953,
      "learning_rate": 1.017254016364645e-06,
      "loss": 2.4301,
      "step": 65751
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0932362079620361,
      "learning_rate": 1.0170730926104778e-06,
      "loss": 2.2903,
      "step": 65752
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.9934418201446533,
      "learning_rate": 1.0168921840848134e-06,
      "loss": 2.4793,
      "step": 65753
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1002053022384644,
      "learning_rate": 1.016711290787955e-06,
      "loss": 2.0849,
      "step": 65754
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5610969066619873,
      "learning_rate": 1.0165304127202126e-06,
      "loss": 2.5532,
      "step": 65755
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.140592098236084,
      "learning_rate": 1.0163495498818887e-06,
      "loss": 2.5532,
      "step": 65756
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1511139869689941,
      "learning_rate": 1.0161687022732946e-06,
      "loss": 2.4603,
      "step": 65757
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0684199333190918,
      "learning_rate": 1.0159878698947324e-06,
      "loss": 2.3492,
      "step": 65758
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0911900997161865,
      "learning_rate": 1.0158070527465124e-06,
      "loss": 2.1896,
      "step": 65759
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0843007564544678,
      "learning_rate": 1.0156262508289395e-06,
      "loss": 2.2501,
      "step": 65760
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1930609941482544,
      "learning_rate": 1.0154454641423195e-06,
      "loss": 2.2592,
      "step": 65761
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0823214054107666,
      "learning_rate": 1.0152646926869603e-06,
      "loss": 2.2,
      "step": 65762
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1703332662582397,
      "learning_rate": 1.0150839364631682e-06,
      "loss": 2.2626,
      "step": 65763
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1424425840377808,
      "learning_rate": 1.0149031954712462e-06,
      "loss": 2.3337,
      "step": 65764
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0790168046951294,
      "learning_rate": 1.0147224697115065e-06,
      "loss": 2.1055,
      "step": 65765
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.9966570138931274,
      "learning_rate": 1.0145417591842533e-06,
      "loss": 2.5871,
      "step": 65766
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0686222314834595,
      "learning_rate": 1.0143610638897883e-06,
      "loss": 2.3371,
      "step": 65767
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1259324550628662,
      "learning_rate": 1.0141803838284258e-06,
      "loss": 2.2404,
      "step": 65768
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1172415018081665,
      "learning_rate": 1.0139997190004647e-06,
      "loss": 2.3256,
      "step": 65769
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0006598234176636,
      "learning_rate": 1.0138190694062166e-06,
      "loss": 2.3055,
      "step": 65770
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0964598655700684,
      "learning_rate": 1.0136384350459849e-06,
      "loss": 2.2298,
      "step": 65771
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1541898250579834,
      "learning_rate": 1.013457815920078e-06,
      "loss": 2.3279,
      "step": 65772
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.9367032051086426,
      "learning_rate": 1.0132772120287991e-06,
      "loss": 1.9126,
      "step": 65773
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1716097593307495,
      "learning_rate": 1.0130966233724571e-06,
      "loss": 2.3787,
      "step": 65774
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.2435473203659058,
      "learning_rate": 1.0129160499513558e-06,
      "loss": 2.2029,
      "step": 65775
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.6404993534088135,
      "learning_rate": 1.0127354917658073e-06,
      "loss": 2.3365,
      "step": 65776
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.087752342224121,
      "learning_rate": 1.0125549488161079e-06,
      "loss": 2.3366,
      "step": 65777
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.112524151802063,
      "learning_rate": 1.0123744211025722e-06,
      "loss": 2.2255,
      "step": 65778
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.081351399421692,
      "learning_rate": 1.0121939086254995e-06,
      "loss": 2.3403,
      "step": 65779
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0639348030090332,
      "learning_rate": 1.012013411385202e-06,
      "loss": 2.2432,
      "step": 65780
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1487030982971191,
      "learning_rate": 1.0118329293819806e-06,
      "loss": 2.408,
      "step": 65781
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0657216310501099,
      "learning_rate": 1.011652462616145e-06,
      "loss": 2.3268,
      "step": 65782
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.123228669166565,
      "learning_rate": 1.0114720110879982e-06,
      "loss": 2.1307,
      "step": 65783
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.988260805606842,
      "learning_rate": 1.0112915747978503e-06,
      "loss": 2.3025,
      "step": 65784
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0448497533798218,
      "learning_rate": 1.0111111537460017e-06,
      "loss": 2.428,
      "step": 65785
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1544091701507568,
      "learning_rate": 1.0109307479327635e-06,
      "loss": 2.3877,
      "step": 65786
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1220707893371582,
      "learning_rate": 1.0107503573584365e-06,
      "loss": 2.4583,
      "step": 65787
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.074231505393982,
      "learning_rate": 1.0105699820233316e-06,
      "loss": 2.5032,
      "step": 65788
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1603422164916992,
      "learning_rate": 1.010389621927752e-06,
      "loss": 2.4392,
      "step": 65789
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0601701736450195,
      "learning_rate": 1.0102092770720028e-06,
      "loss": 2.2344,
      "step": 65790
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.9922924637794495,
      "learning_rate": 1.0100289474563895e-06,
      "loss": 2.4371,
      "step": 65791
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0849241018295288,
      "learning_rate": 1.0098486330812196e-06,
      "loss": 2.2582,
      "step": 65792
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0955079793930054,
      "learning_rate": 1.0096683339467972e-06,
      "loss": 2.3992,
      "step": 65793
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0677565336227417,
      "learning_rate": 1.0094880500534299e-06,
      "loss": 2.2395,
      "step": 65794
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1090412139892578,
      "learning_rate": 1.0093077814014218e-06,
      "loss": 2.2272,
      "step": 65795
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1322779655456543,
      "learning_rate": 1.0091275279910783e-06,
      "loss": 2.2848,
      "step": 65796
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.2706072330474854,
      "learning_rate": 1.008947289822706e-06,
      "loss": 2.6108,
      "step": 65797
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.015026569366455,
      "learning_rate": 1.0087670668966087e-06,
      "loss": 2.0672,
      "step": 65798
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0459799766540527,
      "learning_rate": 1.0085868592130953e-06,
      "loss": 2.1101,
      "step": 65799
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0395182371139526,
      "learning_rate": 1.0084066667724667e-06,
      "loss": 2.2843,
      "step": 65800
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.098853349685669,
      "learning_rate": 1.0082264895750337e-06,
      "loss": 2.3676,
      "step": 65801
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.2490358352661133,
      "learning_rate": 1.0080463276210972e-06,
      "loss": 2.3774,
      "step": 65802
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0239332914352417,
      "learning_rate": 1.0078661809109668e-06,
      "loss": 2.1955,
      "step": 65803
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0613441467285156,
      "learning_rate": 1.0076860494449426e-06,
      "loss": 2.3311,
      "step": 65804
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1945029497146606,
      "learning_rate": 1.007505933223335e-06,
      "loss": 2.4014,
      "step": 65805
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0729753971099854,
      "learning_rate": 1.007325832246444e-06,
      "loss": 2.4416,
      "step": 65806
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0267616510391235,
      "learning_rate": 1.0071457465145806e-06,
      "loss": 2.4531,
      "step": 65807
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0603665113449097,
      "learning_rate": 1.0069656760280466e-06,
      "loss": 2.2567,
      "step": 65808
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.9713945388793945,
      "learning_rate": 1.0067856207871496e-06,
      "loss": 2.2811,
      "step": 65809
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0852962732315063,
      "learning_rate": 1.0066055807921905e-06,
      "loss": 2.2845,
      "step": 65810
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1147873401641846,
      "learning_rate": 1.006425556043481e-06,
      "loss": 2.0977,
      "step": 65811
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0111377239227295,
      "learning_rate": 1.0062455465413202e-06,
      "loss": 2.335,
      "step": 65812
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.086074948310852,
      "learning_rate": 1.0060655522860174e-06,
      "loss": 2.4143,
      "step": 65813
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.041074275970459,
      "learning_rate": 1.005885573277875e-06,
      "loss": 2.3599,
      "step": 65814
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0831050872802734,
      "learning_rate": 1.0057056095172e-06,
      "loss": 2.1777,
      "step": 65815
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.17431640625,
      "learning_rate": 1.0055256610042984e-06,
      "loss": 2.3354,
      "step": 65816
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1723604202270508,
      "learning_rate": 1.005345727739473e-06,
      "loss": 2.2737,
      "step": 65817
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.9734081625938416,
      "learning_rate": 1.0051658097230276e-06,
      "loss": 2.2519,
      "step": 65818
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1064493656158447,
      "learning_rate": 1.0049859069552715e-06,
      "loss": 2.1538,
      "step": 65819
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0186026096343994,
      "learning_rate": 1.0048060194365038e-06,
      "loss": 2.1227,
      "step": 65820
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0406163930892944,
      "learning_rate": 1.004626147167036e-06,
      "loss": 2.2654,
      "step": 65821
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.166122555732727,
      "learning_rate": 1.0044462901471708e-06,
      "loss": 2.342,
      "step": 65822
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0444319248199463,
      "learning_rate": 1.004266448377209e-06,
      "loss": 2.3704,
      "step": 65823
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0061297416687012,
      "learning_rate": 1.004086621857462e-06,
      "loss": 2.1263,
      "step": 65824
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.9825654029846191,
      "learning_rate": 1.0039068105882277e-06,
      "loss": 2.3438,
      "step": 65825
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.995174765586853,
      "learning_rate": 1.0037270145698185e-06,
      "loss": 2.4815,
      "step": 65826
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1340004205703735,
      "learning_rate": 1.003547233802532e-06,
      "loss": 2.3891,
      "step": 65827
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0647565126419067,
      "learning_rate": 1.0033674682866789e-06,
      "loss": 2.395,
      "step": 65828
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1026188135147095,
      "learning_rate": 1.0031877180225613e-06,
      "loss": 2.4244,
      "step": 65829
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.068221926689148,
      "learning_rate": 1.0030079830104844e-06,
      "loss": 2.3628,
      "step": 65830
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0200618505477905,
      "learning_rate": 1.0028282632507502e-06,
      "loss": 2.2886,
      "step": 65831
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0541017055511475,
      "learning_rate": 1.0026485587436673e-06,
      "loss": 2.203,
      "step": 65832
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.2266440391540527,
      "learning_rate": 1.002468869489538e-06,
      "loss": 2.3789,
      "step": 65833
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0853216648101807,
      "learning_rate": 1.0022891954886681e-06,
      "loss": 2.2235,
      "step": 65834
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.9914907217025757,
      "learning_rate": 1.0021095367413603e-06,
      "loss": 2.6027,
      "step": 65835
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.16975736618042,
      "learning_rate": 1.0019298932479227e-06,
      "loss": 2.3304,
      "step": 65836
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.07013738155365,
      "learning_rate": 1.0017502650086565e-06,
      "loss": 2.2926,
      "step": 65837
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.085005760192871,
      "learning_rate": 1.0015706520238689e-06,
      "loss": 2.2978,
      "step": 65838
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1634827852249146,
      "learning_rate": 1.001391054293861e-06,
      "loss": 2.2135,
      "step": 65839
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.107016921043396,
      "learning_rate": 1.0012114718189414e-06,
      "loss": 2.2388,
      "step": 65840
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1730290651321411,
      "learning_rate": 1.00103190459941e-06,
      "loss": 2.2879,
      "step": 65841
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.218697190284729,
      "learning_rate": 1.0008523526355773e-06,
      "loss": 2.3022,
      "step": 65842
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1123706102371216,
      "learning_rate": 1.0006728159277401e-06,
      "loss": 2.2561,
      "step": 65843
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1224037408828735,
      "learning_rate": 1.0004932944762091e-06,
      "loss": 2.3398,
      "step": 65844
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0017462968826294,
      "learning_rate": 1.0003137882812842e-06,
      "loss": 2.4964,
      "step": 65845
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.2927205562591553,
      "learning_rate": 1.0001342973432736e-06,
      "loss": 2.2711,
      "step": 65846
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0419220924377441,
      "learning_rate": 9.999548216624787e-07,
      "loss": 2.4303,
      "step": 65847
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0827093124389648,
      "learning_rate": 9.997753612392057e-07,
      "loss": 2.1863,
      "step": 65848
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.091198444366455,
      "learning_rate": 9.995959160737555e-07,
      "loss": 2.4477,
      "step": 65849
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0696187019348145,
      "learning_rate": 9.99416486166438e-07,
      "loss": 2.4562,
      "step": 65850
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.139398455619812,
      "learning_rate": 9.992370715175526e-07,
      "loss": 2.3042,
      "step": 65851
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0183188915252686,
      "learning_rate": 9.990576721274036e-07,
      "loss": 2.3686,
      "step": 65852
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0939598083496094,
      "learning_rate": 9.988782879962988e-07,
      "loss": 2.0787,
      "step": 65853
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.096142053604126,
      "learning_rate": 9.986989191245378e-07,
      "loss": 2.265,
      "step": 65854
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0515795946121216,
      "learning_rate": 9.98519565512428e-07,
      "loss": 2.4346,
      "step": 65855
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0246624946594238,
      "learning_rate": 9.983402271602738e-07,
      "loss": 2.3444,
      "step": 65856
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0525609254837036,
      "learning_rate": 9.98160904068377e-07,
      "loss": 2.4564,
      "step": 65857
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.064520239830017,
      "learning_rate": 9.979815962370398e-07,
      "loss": 2.164,
      "step": 65858
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1452651023864746,
      "learning_rate": 9.978023036665707e-07,
      "loss": 2.2655,
      "step": 65859
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0839142799377441,
      "learning_rate": 9.976230263572705e-07,
      "loss": 2.1078,
      "step": 65860
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0389846563339233,
      "learning_rate": 9.974437643094458e-07,
      "loss": 2.3378,
      "step": 65861
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1532444953918457,
      "learning_rate": 9.972645175233974e-07,
      "loss": 2.4333,
      "step": 65862
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1238521337509155,
      "learning_rate": 9.970852859994328e-07,
      "loss": 2.3706,
      "step": 65863
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.195464849472046,
      "learning_rate": 9.969060697378509e-07,
      "loss": 2.3034,
      "step": 65864
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1165956258773804,
      "learning_rate": 9.967268687389608e-07,
      "loss": 2.4964,
      "step": 65865
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.039914608001709,
      "learning_rate": 9.965476830030628e-07,
      "loss": 2.3207,
      "step": 65866
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0550721883773804,
      "learning_rate": 9.963685125304633e-07,
      "loss": 2.491,
      "step": 65867
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1541928052902222,
      "learning_rate": 9.961893573214642e-07,
      "loss": 2.1385,
      "step": 65868
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.2014137506484985,
      "learning_rate": 9.960102173763697e-07,
      "loss": 2.4739,
      "step": 65869
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.9853547215461731,
      "learning_rate": 9.958310926954805e-07,
      "loss": 2.2114,
      "step": 65870
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.201422929763794,
      "learning_rate": 9.956519832791067e-07,
      "loss": 2.1344,
      "step": 65871
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0213083028793335,
      "learning_rate": 9.954728891275456e-07,
      "loss": 2.3396,
      "step": 65872
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1623672246932983,
      "learning_rate": 9.952938102411048e-07,
      "loss": 2.166,
      "step": 65873
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.2396591901779175,
      "learning_rate": 9.95114746620086e-07,
      "loss": 2.1367,
      "step": 65874
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1501983404159546,
      "learning_rate": 9.94935698264794e-07,
      "loss": 2.2203,
      "step": 65875
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0675429105758667,
      "learning_rate": 9.947566651755302e-07,
      "loss": 2.1775,
      "step": 65876
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0154246091842651,
      "learning_rate": 9.945776473526025e-07,
      "loss": 2.3159,
      "step": 65877
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0717209577560425,
      "learning_rate": 9.943986447963106e-07,
      "loss": 2.581,
      "step": 65878
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.057828664779663,
      "learning_rate": 9.942196575069575e-07,
      "loss": 2.4226,
      "step": 65879
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1130151748657227,
      "learning_rate": 9.940406854848495e-07,
      "loss": 2.4525,
      "step": 65880
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1773569583892822,
      "learning_rate": 9.938617287302866e-07,
      "loss": 2.1898,
      "step": 65881
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0616157054901123,
      "learning_rate": 9.936827872435762e-07,
      "loss": 2.2985,
      "step": 65882
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1650642156600952,
      "learning_rate": 9.935038610250203e-07,
      "loss": 2.3918,
      "step": 65883
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1409869194030762,
      "learning_rate": 9.933249500749198e-07,
      "loss": 2.3404,
      "step": 65884
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.336263656616211,
      "learning_rate": 9.93146054393579e-07,
      "loss": 2.2257,
      "step": 65885
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1717294454574585,
      "learning_rate": 9.92967173981304e-07,
      "loss": 2.3112,
      "step": 65886
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.2659705877304077,
      "learning_rate": 9.927883088383928e-07,
      "loss": 2.3217,
      "step": 65887
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.012492299079895,
      "learning_rate": 9.926094589651547e-07,
      "loss": 2.3307,
      "step": 65888
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0195339918136597,
      "learning_rate": 9.924306243618876e-07,
      "loss": 2.4629,
      "step": 65889
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.9965139627456665,
      "learning_rate": 9.922518050288986e-07,
      "loss": 2.3397,
      "step": 65890
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1186892986297607,
      "learning_rate": 9.92073000966488e-07,
      "loss": 2.2395,
      "step": 65891
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0282776355743408,
      "learning_rate": 9.91894212174962e-07,
      "loss": 2.3619,
      "step": 65892
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0602998733520508,
      "learning_rate": 9.917154386546202e-07,
      "loss": 2.3113,
      "step": 65893
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0838521718978882,
      "learning_rate": 9.91536680405769e-07,
      "loss": 2.4059,
      "step": 65894
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.2025275230407715,
      "learning_rate": 9.913579374287098e-07,
      "loss": 2.3819,
      "step": 65895
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.2401586771011353,
      "learning_rate": 9.91179209723745e-07,
      "loss": 2.3568,
      "step": 65896
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0764275789260864,
      "learning_rate": 9.910004972911768e-07,
      "loss": 2.305,
      "step": 65897
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0153515338897705,
      "learning_rate": 9.90821800131312e-07,
      "loss": 2.2799,
      "step": 65898
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1501575708389282,
      "learning_rate": 9.906431182444486e-07,
      "loss": 2.1723,
      "step": 65899
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.2222480773925781,
      "learning_rate": 9.904644516308936e-07,
      "loss": 2.1273,
      "step": 65900
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0175271034240723,
      "learning_rate": 9.902858002909465e-07,
      "loss": 2.1144,
      "step": 65901
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0631641149520874,
      "learning_rate": 9.90107164224915e-07,
      "loss": 2.4323,
      "step": 65902
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0666422843933105,
      "learning_rate": 9.899285434330953e-07,
      "loss": 2.1628,
      "step": 65903
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.20522141456604,
      "learning_rate": 9.89749937915797e-07,
      "loss": 2.2948,
      "step": 65904
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1161785125732422,
      "learning_rate": 9.89571347673317e-07,
      "loss": 2.2232,
      "step": 65905
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0679364204406738,
      "learning_rate": 9.893927727059639e-07,
      "loss": 2.3334,
      "step": 65906
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1594815254211426,
      "learning_rate": 9.89214213014036e-07,
      "loss": 2.215,
      "step": 65907
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0936548709869385,
      "learning_rate": 9.890356685978375e-07,
      "loss": 2.4117,
      "step": 65908
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1069293022155762,
      "learning_rate": 9.888571394576695e-07,
      "loss": 2.2944,
      "step": 65909
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1290194988250732,
      "learning_rate": 9.886786255938375e-07,
      "loss": 2.4561,
      "step": 65910
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1155791282653809,
      "learning_rate": 9.885001270066408e-07,
      "loss": 2.3216,
      "step": 65911
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.236853003501892,
      "learning_rate": 9.88321643696386e-07,
      "loss": 2.2013,
      "step": 65912
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1047773361206055,
      "learning_rate": 9.881431756633741e-07,
      "loss": 2.2583,
      "step": 65913
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0776689052581787,
      "learning_rate": 9.879647229079048e-07,
      "loss": 2.2803,
      "step": 65914
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0790058374404907,
      "learning_rate": 9.877862854302845e-07,
      "loss": 2.2368,
      "step": 65915
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0819288492202759,
      "learning_rate": 9.876078632308117e-07,
      "loss": 2.2347,
      "step": 65916
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.3137248754501343,
      "learning_rate": 9.874294563097942e-07,
      "loss": 2.3223,
      "step": 65917
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1208775043487549,
      "learning_rate": 9.872510646675293e-07,
      "loss": 2.2634,
      "step": 65918
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1930896043777466,
      "learning_rate": 9.870726883043236e-07,
      "loss": 2.2727,
      "step": 65919
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0676428079605103,
      "learning_rate": 9.868943272204756e-07,
      "loss": 2.469,
      "step": 65920
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.093587875366211,
      "learning_rate": 9.86715981416294e-07,
      "loss": 2.0676,
      "step": 65921
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.2021441459655762,
      "learning_rate": 9.86537650892072e-07,
      "loss": 2.2577,
      "step": 65922
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.2290736436843872,
      "learning_rate": 9.863593356481193e-07,
      "loss": 2.5,
      "step": 65923
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1601123809814453,
      "learning_rate": 9.861810356847335e-07,
      "loss": 2.2271,
      "step": 65924
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0026735067367554,
      "learning_rate": 9.860027510022208e-07,
      "loss": 2.3922,
      "step": 65925
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.9679048657417297,
      "learning_rate": 9.8582448160088e-07,
      "loss": 2.258,
      "step": 65926
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.092453122138977,
      "learning_rate": 9.856462274810163e-07,
      "loss": 2.3099,
      "step": 65927
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0701968669891357,
      "learning_rate": 9.854679886429286e-07,
      "loss": 2.4309,
      "step": 65928
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.055893898010254,
      "learning_rate": 9.85289765086923e-07,
      "loss": 2.4486,
      "step": 65929
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.500259280204773,
      "learning_rate": 9.851115568132985e-07,
      "loss": 2.2157,
      "step": 65930
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.053676962852478,
      "learning_rate": 9.84933363822359e-07,
      "loss": 2.2185,
      "step": 65931
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.999277651309967,
      "learning_rate": 9.847551861144055e-07,
      "loss": 2.3485,
      "step": 65932
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.4220868349075317,
      "learning_rate": 9.84577023689741e-07,
      "loss": 2.174,
      "step": 65933
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1246830224990845,
      "learning_rate": 9.84398876548668e-07,
      "loss": 2.4015,
      "step": 65934
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.356053113937378,
      "learning_rate": 9.842207446914865e-07,
      "loss": 2.4859,
      "step": 65935
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.071481704711914,
      "learning_rate": 9.840426281184979e-07,
      "loss": 2.1997,
      "step": 65936
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0771905183792114,
      "learning_rate": 9.838645268300085e-07,
      "loss": 2.4182,
      "step": 65937
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.4936367273330688,
      "learning_rate": 9.836864408263147e-07,
      "loss": 2.114,
      "step": 65938
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0913169384002686,
      "learning_rate": 9.83508370107723e-07,
      "loss": 2.0374,
      "step": 65939
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.089300513267517,
      "learning_rate": 9.833303146745343e-07,
      "loss": 2.3821,
      "step": 65940
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.2868801355361938,
      "learning_rate": 9.831522745270472e-07,
      "loss": 2.1851,
      "step": 65941
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.025783658027649,
      "learning_rate": 9.82974249665567e-07,
      "loss": 2.4792,
      "step": 65942
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.075751543045044,
      "learning_rate": 9.827962400903935e-07,
      "loss": 2.2475,
      "step": 65943
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0568064451217651,
      "learning_rate": 9.826182458018307e-07,
      "loss": 2.5294,
      "step": 65944
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0136373043060303,
      "learning_rate": 9.824402668001788e-07,
      "loss": 2.1956,
      "step": 65945
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1674845218658447,
      "learning_rate": 9.822623030857403e-07,
      "loss": 2.5069,
      "step": 65946
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0363975763320923,
      "learning_rate": 9.820843546588166e-07,
      "loss": 2.3732,
      "step": 65947
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0789744853973389,
      "learning_rate": 9.819064215197083e-07,
      "loss": 2.1666,
      "step": 65948
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0022592544555664,
      "learning_rate": 9.817285036687175e-07,
      "loss": 2.3063,
      "step": 65949
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1159802675247192,
      "learning_rate": 9.815506011061471e-07,
      "loss": 2.3631,
      "step": 65950
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0902256965637207,
      "learning_rate": 9.81372713832296e-07,
      "loss": 2.5091,
      "step": 65951
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0629463195800781,
      "learning_rate": 9.811948418474703e-07,
      "loss": 2.2217,
      "step": 65952
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.016200304031372,
      "learning_rate": 9.810169851519657e-07,
      "loss": 2.1619,
      "step": 65953
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1345282793045044,
      "learning_rate": 9.808391437460895e-07,
      "loss": 2.1911,
      "step": 65954
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0865826606750488,
      "learning_rate": 9.806613176301383e-07,
      "loss": 2.5384,
      "step": 65955
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0825867652893066,
      "learning_rate": 9.804835068044184e-07,
      "loss": 2.1801,
      "step": 65956
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0867398977279663,
      "learning_rate": 9.80305711269226e-07,
      "loss": 2.4448,
      "step": 65957
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.9898474216461182,
      "learning_rate": 9.801279310248667e-07,
      "loss": 2.2386,
      "step": 65958
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1701682806015015,
      "learning_rate": 9.799501660716393e-07,
      "loss": 2.2127,
      "step": 65959
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.2242553234100342,
      "learning_rate": 9.797724164098477e-07,
      "loss": 2.4903,
      "step": 65960
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0155750513076782,
      "learning_rate": 9.795946820397916e-07,
      "loss": 2.3741,
      "step": 65961
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0554701089859009,
      "learning_rate": 9.794169629617733e-07,
      "loss": 2.2216,
      "step": 65962
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1441142559051514,
      "learning_rate": 9.792392591760902e-07,
      "loss": 2.5144,
      "step": 65963
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0646488666534424,
      "learning_rate": 9.790615706830487e-07,
      "loss": 2.3503,
      "step": 65964
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0839256048202515,
      "learning_rate": 9.788838974829462e-07,
      "loss": 2.4533,
      "step": 65965
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0104851722717285,
      "learning_rate": 9.787062395760871e-07,
      "loss": 2.0781,
      "step": 65966
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.10752272605896,
      "learning_rate": 9.7852859696277e-07,
      "loss": 2.2992,
      "step": 65967
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1750035285949707,
      "learning_rate": 9.78350969643298e-07,
      "loss": 2.5064,
      "step": 65968
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0674879550933838,
      "learning_rate": 9.78173357617972e-07,
      "loss": 2.3314,
      "step": 65969
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0208755731582642,
      "learning_rate": 9.779957608870905e-07,
      "loss": 2.2,
      "step": 65970
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0917470455169678,
      "learning_rate": 9.778181794509588e-07,
      "loss": 2.2765,
      "step": 65971
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0544530153274536,
      "learning_rate": 9.776406133098726e-07,
      "loss": 2.1253,
      "step": 65972
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1137058734893799,
      "learning_rate": 9.774630624641391e-07,
      "loss": 2.3244,
      "step": 65973
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.083832025527954,
      "learning_rate": 9.772855269140557e-07,
      "loss": 2.4444,
      "step": 65974
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1099554300308228,
      "learning_rate": 9.771080066599238e-07,
      "loss": 2.3178,
      "step": 65975
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1693897247314453,
      "learning_rate": 9.769305017020424e-07,
      "loss": 2.4967,
      "step": 65976
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1315059661865234,
      "learning_rate": 9.767530120407166e-07,
      "loss": 2.5328,
      "step": 65977
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.669877290725708,
      "learning_rate": 9.765755376762431e-07,
      "loss": 2.2906,
      "step": 65978
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.095370888710022,
      "learning_rate": 9.763980786089266e-07,
      "loss": 2.3989,
      "step": 65979
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.9967866539955139,
      "learning_rate": 9.762206348390647e-07,
      "loss": 2.1205,
      "step": 65980
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.2221864461898804,
      "learning_rate": 9.760432063669612e-07,
      "loss": 2.3923,
      "step": 65981
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0927666425704956,
      "learning_rate": 9.758657931929138e-07,
      "loss": 2.3442,
      "step": 65982
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0664480924606323,
      "learning_rate": 9.75688395317227e-07,
      "loss": 2.365,
      "step": 65983
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1960644721984863,
      "learning_rate": 9.755110127401956e-07,
      "loss": 2.1579,
      "step": 65984
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.078645944595337,
      "learning_rate": 9.753336454621276e-07,
      "loss": 2.3538,
      "step": 65985
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.079566240310669,
      "learning_rate": 9.75156293483318e-07,
      "loss": 2.3258,
      "step": 65986
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.022027611732483,
      "learning_rate": 9.749789568040734e-07,
      "loss": 2.1276,
      "step": 65987
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1692242622375488,
      "learning_rate": 9.74801635424687e-07,
      "loss": 2.379,
      "step": 65988
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0453920364379883,
      "learning_rate": 9.746243293454639e-07,
      "loss": 2.3511,
      "step": 65989
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.193379282951355,
      "learning_rate": 9.744470385667026e-07,
      "loss": 2.3974,
      "step": 65990
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1890767812728882,
      "learning_rate": 9.742697630887066e-07,
      "loss": 2.1576,
      "step": 65991
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0529823303222656,
      "learning_rate": 9.740925029117732e-07,
      "loss": 2.1509,
      "step": 65992
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0433392524719238,
      "learning_rate": 9.739152580362066e-07,
      "loss": 2.2678,
      "step": 65993
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.2026429176330566,
      "learning_rate": 9.73738028462302e-07,
      "loss": 2.1733,
      "step": 65994
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.4849131107330322,
      "learning_rate": 9.735608141903662e-07,
      "loss": 2.3412,
      "step": 65995
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.030981421470642,
      "learning_rate": 9.733836152206955e-07,
      "loss": 2.186,
      "step": 65996
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.068926215171814,
      "learning_rate": 9.732064315535894e-07,
      "loss": 2.2282,
      "step": 65997
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1380674839019775,
      "learning_rate": 9.730292631893523e-07,
      "loss": 2.3814,
      "step": 65998
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0722897052764893,
      "learning_rate": 9.728521101282795e-07,
      "loss": 2.2937,
      "step": 65999
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1551762819290161,
      "learning_rate": 9.726749723706763e-07,
      "loss": 2.4716,
      "step": 66000
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0747805833816528,
      "learning_rate": 9.724978499168413e-07,
      "loss": 2.4808,
      "step": 66001
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.2915472984313965,
      "learning_rate": 9.723207427670744e-07,
      "loss": 2.4612,
      "step": 66002
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0320054292678833,
      "learning_rate": 9.72143650921673e-07,
      "loss": 2.6244,
      "step": 66003
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1214187145233154,
      "learning_rate": 9.719665743809425e-07,
      "loss": 2.3095,
      "step": 66004
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0969923734664917,
      "learning_rate": 9.717895131451793e-07,
      "loss": 2.2469,
      "step": 66005
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0268627405166626,
      "learning_rate": 9.716124672146866e-07,
      "loss": 2.3796,
      "step": 66006
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.066611886024475,
      "learning_rate": 9.714354365897604e-07,
      "loss": 2.1734,
      "step": 66007
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0516215562820435,
      "learning_rate": 9.712584212707056e-07,
      "loss": 2.3433,
      "step": 66008
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.053384780883789,
      "learning_rate": 9.710814212578169e-07,
      "loss": 2.4288,
      "step": 66009
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1415988206863403,
      "learning_rate": 9.70904436551401e-07,
      "loss": 2.513,
      "step": 66010
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1230210065841675,
      "learning_rate": 9.70727467151751e-07,
      "loss": 2.4036,
      "step": 66011
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1026644706726074,
      "learning_rate": 9.705505130591731e-07,
      "loss": 2.3951,
      "step": 66012
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1253172159194946,
      "learning_rate": 9.70373574273964e-07,
      "loss": 2.2872,
      "step": 66013
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1561145782470703,
      "learning_rate": 9.701966507964244e-07,
      "loss": 2.3216,
      "step": 66014
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0479087829589844,
      "learning_rate": 9.70019742626852e-07,
      "loss": 2.4079,
      "step": 66015
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.101353406906128,
      "learning_rate": 9.698428497655498e-07,
      "loss": 2.3443,
      "step": 66016
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.162040114402771,
      "learning_rate": 9.696659722128154e-07,
      "loss": 2.1317,
      "step": 66017
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0626649856567383,
      "learning_rate": 9.694891099689519e-07,
      "loss": 2.3273,
      "step": 66018
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0584200620651245,
      "learning_rate": 9.693122630342543e-07,
      "loss": 2.2696,
      "step": 66019
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.9877815246582031,
      "learning_rate": 9.691354314090274e-07,
      "loss": 2.2927,
      "step": 66020
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0351403951644897,
      "learning_rate": 9.68958615093567e-07,
      "loss": 2.4301,
      "step": 66021
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1038867235183716,
      "learning_rate": 9.687818140881766e-07,
      "loss": 2.2765,
      "step": 66022
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.3003528118133545,
      "learning_rate": 9.686050283931514e-07,
      "loss": 2.5052,
      "step": 66023
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.040104627609253,
      "learning_rate": 9.684282580087955e-07,
      "loss": 2.3813,
      "step": 66024
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1691380739212036,
      "learning_rate": 9.682515029354068e-07,
      "loss": 2.2806,
      "step": 66025
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1572586297988892,
      "learning_rate": 9.680747631732834e-07,
      "loss": 2.1952,
      "step": 66026
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0946584939956665,
      "learning_rate": 9.678980387227276e-07,
      "loss": 2.2949,
      "step": 66027
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0510610342025757,
      "learning_rate": 9.677213295840382e-07,
      "loss": 2.1434,
      "step": 66028
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1230709552764893,
      "learning_rate": 9.675446357575114e-07,
      "loss": 2.3258,
      "step": 66029
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.066927433013916,
      "learning_rate": 9.673679572434525e-07,
      "loss": 2.4455,
      "step": 66030
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0175414085388184,
      "learning_rate": 9.671912940421579e-07,
      "loss": 2.2753,
      "step": 66031
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1236159801483154,
      "learning_rate": 9.670146461539254e-07,
      "loss": 2.507,
      "step": 66032
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1603232622146606,
      "learning_rate": 9.66838013579058e-07,
      "loss": 2.2574,
      "step": 66033
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.114403247833252,
      "learning_rate": 9.666613963178518e-07,
      "loss": 2.3508,
      "step": 66034
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1447118520736694,
      "learning_rate": 9.664847943706102e-07,
      "loss": 2.2649,
      "step": 66035
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.4011200666427612,
      "learning_rate": 9.663082077376285e-07,
      "loss": 2.1996,
      "step": 66036
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.119283676147461,
      "learning_rate": 9.661316364192097e-07,
      "loss": 2.4797,
      "step": 66037
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.4972389936447144,
      "learning_rate": 9.659550804156493e-07,
      "loss": 2.405,
      "step": 66038
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0364469289779663,
      "learning_rate": 9.6577853972725e-07,
      "loss": 2.2754,
      "step": 66039
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.2199513912200928,
      "learning_rate": 9.656020143543099e-07,
      "loss": 2.4023,
      "step": 66040
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0645700693130493,
      "learning_rate": 9.654255042971295e-07,
      "loss": 2.2799,
      "step": 66041
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.2249606847763062,
      "learning_rate": 9.65249009556003e-07,
      "loss": 2.3518,
      "step": 66042
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0650571584701538,
      "learning_rate": 9.650725301312359e-07,
      "loss": 2.2933,
      "step": 66043
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.084304928779602,
      "learning_rate": 9.648960660231231e-07,
      "loss": 2.5631,
      "step": 66044
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.2594822645187378,
      "learning_rate": 9.64719617231966e-07,
      "loss": 2.3321,
      "step": 66045
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.021780014038086,
      "learning_rate": 9.645431837580622e-07,
      "loss": 2.207,
      "step": 66046
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0609183311462402,
      "learning_rate": 9.64366765601714e-07,
      "loss": 2.3758,
      "step": 66047
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1333452463150024,
      "learning_rate": 9.641903627632154e-07,
      "loss": 2.0334,
      "step": 66048
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1779372692108154,
      "learning_rate": 9.640139752428712e-07,
      "loss": 2.2586,
      "step": 66049
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.9659696221351624,
      "learning_rate": 9.638376030409747e-07,
      "loss": 2.3065,
      "step": 66050
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.9945339560508728,
      "learning_rate": 9.636612461578298e-07,
      "loss": 2.4123,
      "step": 66051
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.124582290649414,
      "learning_rate": 9.634849045937333e-07,
      "loss": 2.3301,
      "step": 66052
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1187516450881958,
      "learning_rate": 9.633085783489837e-07,
      "loss": 2.187,
      "step": 66053
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.069498062133789,
      "learning_rate": 9.631322674238786e-07,
      "loss": 2.3144,
      "step": 66054
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1107324361801147,
      "learning_rate": 9.629559718187209e-07,
      "loss": 2.42,
      "step": 66055
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0431549549102783,
      "learning_rate": 9.62779691533805e-07,
      "loss": 2.1554,
      "step": 66056
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0198074579238892,
      "learning_rate": 9.62603426569435e-07,
      "loss": 2.3939,
      "step": 66057
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.2897090911865234,
      "learning_rate": 9.624271769259053e-07,
      "loss": 2.4284,
      "step": 66058
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.224656343460083,
      "learning_rate": 9.622509426035154e-07,
      "loss": 2.4447,
      "step": 66059
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0821950435638428,
      "learning_rate": 9.620747236025663e-07,
      "loss": 2.3024,
      "step": 66060
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1760551929473877,
      "learning_rate": 9.618985199233532e-07,
      "loss": 2.4768,
      "step": 66061
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.015497088432312,
      "learning_rate": 9.617223315661783e-07,
      "loss": 2.3721,
      "step": 66062
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1840674877166748,
      "learning_rate": 9.615461585313367e-07,
      "loss": 2.5223,
      "step": 66063
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.276798129081726,
      "learning_rate": 9.613700008191317e-07,
      "loss": 2.3576,
      "step": 66064
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0643665790557861,
      "learning_rate": 9.611938584298575e-07,
      "loss": 2.4003,
      "step": 66065
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0875698328018188,
      "learning_rate": 9.610177313638191e-07,
      "loss": 2.286,
      "step": 66066
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1415590047836304,
      "learning_rate": 9.608416196213055e-07,
      "loss": 2.4526,
      "step": 66067
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0995750427246094,
      "learning_rate": 9.60665523202623e-07,
      "loss": 2.3207,
      "step": 66068
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.9423365592956543,
      "learning_rate": 9.604894421080657e-07,
      "loss": 2.185,
      "step": 66069
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0643075704574585,
      "learning_rate": 9.603133763379346e-07,
      "loss": 2.3267,
      "step": 66070
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1005452871322632,
      "learning_rate": 9.601373258925272e-07,
      "loss": 2.2887,
      "step": 66071
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0332247018814087,
      "learning_rate": 9.599612907721435e-07,
      "loss": 2.3364,
      "step": 66072
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1041291952133179,
      "learning_rate": 9.597852709770784e-07,
      "loss": 2.2732,
      "step": 66073
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.021174430847168,
      "learning_rate": 9.596092665076351e-07,
      "loss": 2.2609,
      "step": 66074
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0731316804885864,
      "learning_rate": 9.594332773641068e-07,
      "loss": 2.3556,
      "step": 66075
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1821510791778564,
      "learning_rate": 9.592573035467967e-07,
      "loss": 2.3369,
      "step": 66076
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1110576391220093,
      "learning_rate": 9.590813450559989e-07,
      "loss": 2.1199,
      "step": 66077
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1919643878936768,
      "learning_rate": 9.589054018920164e-07,
      "loss": 2.4428,
      "step": 66078
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0866668224334717,
      "learning_rate": 9.587294740551433e-07,
      "loss": 2.5075,
      "step": 66079
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1366854906082153,
      "learning_rate": 9.585535615456798e-07,
      "loss": 2.4183,
      "step": 66080
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0778535604476929,
      "learning_rate": 9.58377664363922e-07,
      "loss": 2.3857,
      "step": 66081
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0267225503921509,
      "learning_rate": 9.582017825101719e-07,
      "loss": 2.2379,
      "step": 66082
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0622981786727905,
      "learning_rate": 9.580259159847237e-07,
      "loss": 2.2736,
      "step": 66083
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.9813594818115234,
      "learning_rate": 9.578500647878786e-07,
      "loss": 2.3518,
      "step": 66084
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.2019098997116089,
      "learning_rate": 9.576742289199325e-07,
      "loss": 2.6263,
      "step": 66085
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0032217502593994,
      "learning_rate": 9.574984083811857e-07,
      "loss": 2.2008,
      "step": 66086
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1267709732055664,
      "learning_rate": 9.57322603171935e-07,
      "loss": 2.237,
      "step": 66087
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1403006315231323,
      "learning_rate": 9.571468132924767e-07,
      "loss": 2.3395,
      "step": 66088
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0327242612838745,
      "learning_rate": 9.569710387431118e-07,
      "loss": 2.3313,
      "step": 66089
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0601885318756104,
      "learning_rate": 9.567952795241353e-07,
      "loss": 1.8961,
      "step": 66090
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1090428829193115,
      "learning_rate": 9.5661953563585e-07,
      "loss": 2.3472,
      "step": 66091
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0863009691238403,
      "learning_rate": 9.564438070785499e-07,
      "loss": 2.4067,
      "step": 66092
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1296887397766113,
      "learning_rate": 9.562680938525336e-07,
      "loss": 2.2911,
      "step": 66093
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1032685041427612,
      "learning_rate": 9.56092395958097e-07,
      "loss": 2.3369,
      "step": 66094
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1404204368591309,
      "learning_rate": 9.559167133955428e-07,
      "loss": 2.4074,
      "step": 66095
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0838078260421753,
      "learning_rate": 9.557410461651628e-07,
      "loss": 2.2033,
      "step": 66096
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.9763537049293518,
      "learning_rate": 9.555653942672616e-07,
      "loss": 2.3118,
      "step": 66097
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1151806116104126,
      "learning_rate": 9.55389757702131e-07,
      "loss": 2.3877,
      "step": 66098
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1601409912109375,
      "learning_rate": 9.552141364700728e-07,
      "loss": 2.4915,
      "step": 66099
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.107930064201355,
      "learning_rate": 9.550385305713817e-07,
      "loss": 2.5394,
      "step": 66100
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.219299554824829,
      "learning_rate": 9.54862940006359e-07,
      "loss": 2.2592,
      "step": 66101
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0663726329803467,
      "learning_rate": 9.546873647752974e-07,
      "loss": 2.4636,
      "step": 66102
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0275474786758423,
      "learning_rate": 9.545118048785006e-07,
      "loss": 2.2611,
      "step": 66103
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0603889226913452,
      "learning_rate": 9.543362603162598e-07,
      "loss": 2.1007,
      "step": 66104
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.168880820274353,
      "learning_rate": 9.54160731088879e-07,
      "loss": 2.1117,
      "step": 66105
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1423648595809937,
      "learning_rate": 9.539852171966513e-07,
      "loss": 2.4423,
      "step": 66106
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.039018154144287,
      "learning_rate": 9.538097186398764e-07,
      "loss": 2.3229,
      "step": 66107
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0551077127456665,
      "learning_rate": 9.536342354188489e-07,
      "loss": 2.3653,
      "step": 66108
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0537126064300537,
      "learning_rate": 9.534587675338702e-07,
      "loss": 2.391,
      "step": 66109
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.2654868364334106,
      "learning_rate": 9.532833149852339e-07,
      "loss": 2.2218,
      "step": 66110
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1846638917922974,
      "learning_rate": 9.531078777732405e-07,
      "loss": 2.4278,
      "step": 66111
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0824437141418457,
      "learning_rate": 9.529324558981856e-07,
      "loss": 2.3399,
      "step": 66112
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0522973537445068,
      "learning_rate": 9.527570493603688e-07,
      "loss": 2.5575,
      "step": 66113
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0422967672348022,
      "learning_rate": 9.525816581600855e-07,
      "loss": 2.2299,
      "step": 66114
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0778311491012573,
      "learning_rate": 9.524062822976332e-07,
      "loss": 2.4596,
      "step": 66115
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1008237600326538,
      "learning_rate": 9.522309217733106e-07,
      "loss": 2.408,
      "step": 66116
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0246734619140625,
      "learning_rate": 9.520555765874118e-07,
      "loss": 2.0522,
      "step": 66117
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1219191551208496,
      "learning_rate": 9.518802467402377e-07,
      "loss": 2.2933,
      "step": 66118
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1010674238204956,
      "learning_rate": 9.517049322320848e-07,
      "loss": 2.4712,
      "step": 66119
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.071664571762085,
      "learning_rate": 9.515296330632484e-07,
      "loss": 2.3102,
      "step": 66120
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.009093165397644,
      "learning_rate": 9.51354349234026e-07,
      "loss": 1.9972,
      "step": 66121
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0809338092803955,
      "learning_rate": 9.511790807447174e-07,
      "loss": 2.0905,
      "step": 66122
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0422428846359253,
      "learning_rate": 9.510038275956157e-07,
      "loss": 2.2417,
      "step": 66123
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.020155668258667,
      "learning_rate": 9.508285897870217e-07,
      "loss": 2.3451,
      "step": 66124
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1106047630310059,
      "learning_rate": 9.506533673192298e-07,
      "loss": 2.5176,
      "step": 66125
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0282466411590576,
      "learning_rate": 9.504781601925406e-07,
      "loss": 2.2612,
      "step": 66126
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.05710768699646,
      "learning_rate": 9.503029684072451e-07,
      "loss": 2.1581,
      "step": 66127
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.193098783493042,
      "learning_rate": 9.501277919636476e-07,
      "loss": 2.3192,
      "step": 66128
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1461373567581177,
      "learning_rate": 9.499526308620388e-07,
      "loss": 2.3101,
      "step": 66129
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0571902990341187,
      "learning_rate": 9.497774851027197e-07,
      "loss": 2.5931,
      "step": 66130
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1571967601776123,
      "learning_rate": 9.496023546859845e-07,
      "loss": 2.1934,
      "step": 66131
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.163403034210205,
      "learning_rate": 9.49427239612134e-07,
      "loss": 2.4634,
      "step": 66132
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.085327386856079,
      "learning_rate": 9.492521398814603e-07,
      "loss": 2.3628,
      "step": 66133
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0695734024047852,
      "learning_rate": 9.49077055494263e-07,
      "loss": 2.3816,
      "step": 66134
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.2527631521224976,
      "learning_rate": 9.489019864508376e-07,
      "loss": 2.0594,
      "step": 66135
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.2141659259796143,
      "learning_rate": 9.487269327514825e-07,
      "loss": 2.2495,
      "step": 66136
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0790367126464844,
      "learning_rate": 9.485518943964911e-07,
      "loss": 2.3882,
      "step": 66137
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.9599781036376953,
      "learning_rate": 9.483768713861663e-07,
      "loss": 2.2756,
      "step": 66138
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.9757148027420044,
      "learning_rate": 9.482018637207979e-07,
      "loss": 2.3027,
      "step": 66139
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0459709167480469,
      "learning_rate": 9.480268714006879e-07,
      "loss": 2.2855,
      "step": 66140
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.2044486999511719,
      "learning_rate": 9.478518944261283e-07,
      "loss": 2.3391,
      "step": 66141
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0736430883407593,
      "learning_rate": 9.476769327974211e-07,
      "loss": 2.115,
      "step": 66142
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.2070220708847046,
      "learning_rate": 9.475019865148604e-07,
      "loss": 2.3018,
      "step": 66143
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1688488721847534,
      "learning_rate": 9.473270555787395e-07,
      "loss": 2.3275,
      "step": 66144
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.045783281326294,
      "learning_rate": 9.471521399893601e-07,
      "loss": 2.5956,
      "step": 66145
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0484707355499268,
      "learning_rate": 9.469772397470167e-07,
      "loss": 2.5432,
      "step": 66146
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.2057512998580933,
      "learning_rate": 9.468023548520044e-07,
      "loss": 2.4012,
      "step": 66147
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1172442436218262,
      "learning_rate": 9.46627485304622e-07,
      "loss": 2.2394,
      "step": 66148
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1304339170455933,
      "learning_rate": 9.464526311051647e-07,
      "loss": 2.1796,
      "step": 66149
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.9753009080886841,
      "learning_rate": 9.462777922539279e-07,
      "loss": 2.3677,
      "step": 66150
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0620136260986328,
      "learning_rate": 9.461029687512102e-07,
      "loss": 2.3508,
      "step": 66151
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.08002769947052,
      "learning_rate": 9.459281605973058e-07,
      "loss": 2.305,
      "step": 66152
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.115996241569519,
      "learning_rate": 9.457533677925146e-07,
      "loss": 2.4232,
      "step": 66153
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.2004228830337524,
      "learning_rate": 9.455785903371273e-07,
      "loss": 2.2109,
      "step": 66154
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.9743998646736145,
      "learning_rate": 9.454038282314471e-07,
      "loss": 2.3833,
      "step": 66155
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0883228778839111,
      "learning_rate": 9.452290814757637e-07,
      "loss": 2.5703,
      "step": 66156
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0287628173828125,
      "learning_rate": 9.450543500703779e-07,
      "loss": 2.2827,
      "step": 66157
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.2204660177230835,
      "learning_rate": 9.448796340155841e-07,
      "loss": 2.6337,
      "step": 66158
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0393167734146118,
      "learning_rate": 9.447049333116787e-07,
      "loss": 2.1761,
      "step": 66159
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.2355108261108398,
      "learning_rate": 9.445302479589569e-07,
      "loss": 2.195,
      "step": 66160
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0625649690628052,
      "learning_rate": 9.443555779577162e-07,
      "loss": 2.3236,
      "step": 66161
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0973423719406128,
      "learning_rate": 9.441809233082522e-07,
      "loss": 2.485,
      "step": 66162
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0389223098754883,
      "learning_rate": 9.44006284010861e-07,
      "loss": 2.4177,
      "step": 66163
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.045640468597412,
      "learning_rate": 9.438316600658381e-07,
      "loss": 2.1368,
      "step": 66164
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.9148045778274536,
      "learning_rate": 9.436570514734822e-07,
      "loss": 2.213,
      "step": 66165
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0405192375183105,
      "learning_rate": 9.434824582340841e-07,
      "loss": 2.0847,
      "step": 66166
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1231588125228882,
      "learning_rate": 9.433078803479456e-07,
      "loss": 2.4968,
      "step": 66167
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0354760885238647,
      "learning_rate": 9.43133317815359e-07,
      "loss": 2.2941,
      "step": 66168
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.6608941555023193,
      "learning_rate": 9.429587706366217e-07,
      "loss": 2.463,
      "step": 66169
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0664187669754028,
      "learning_rate": 9.4278423881203e-07,
      "loss": 2.1184,
      "step": 66170
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1158981323242188,
      "learning_rate": 9.426097223418773e-07,
      "loss": 2.2991,
      "step": 66171
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.2579599618911743,
      "learning_rate": 9.42435221226462e-07,
      "loss": 2.4416,
      "step": 66172
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1439275741577148,
      "learning_rate": 9.422607354660795e-07,
      "loss": 2.494,
      "step": 66173
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.9677484631538391,
      "learning_rate": 9.42086265061023e-07,
      "loss": 2.3535,
      "step": 66174
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0928924083709717,
      "learning_rate": 9.419118100115921e-07,
      "loss": 2.291,
      "step": 66175
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1989094018936157,
      "learning_rate": 9.417373703180809e-07,
      "loss": 2.3521,
      "step": 66176
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.9763418436050415,
      "learning_rate": 9.415629459807841e-07,
      "loss": 2.1527,
      "step": 66177
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0816718339920044,
      "learning_rate": 9.413885369999998e-07,
      "loss": 2.232,
      "step": 66178
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1801252365112305,
      "learning_rate": 9.412141433760191e-07,
      "loss": 2.2009,
      "step": 66179
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1567121744155884,
      "learning_rate": 9.410397651091441e-07,
      "loss": 2.1946,
      "step": 66180
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1126830577850342,
      "learning_rate": 9.408654021996643e-07,
      "loss": 2.3429,
      "step": 66181
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0771845579147339,
      "learning_rate": 9.406910546478809e-07,
      "loss": 2.2942,
      "step": 66182
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0656548738479614,
      "learning_rate": 9.405167224540845e-07,
      "loss": 2.4276,
      "step": 66183
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1515498161315918,
      "learning_rate": 9.40342405618574e-07,
      "loss": 2.1408,
      "step": 66184
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0134608745574951,
      "learning_rate": 9.401681041416444e-07,
      "loss": 2.1454,
      "step": 66185
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.039915680885315,
      "learning_rate": 9.399938180235901e-07,
      "loss": 2.4351,
      "step": 66186
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1466697454452515,
      "learning_rate": 9.398195472647042e-07,
      "loss": 2.3131,
      "step": 66187
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.997482180595398,
      "learning_rate": 9.396452918652876e-07,
      "loss": 2.227,
      "step": 66188
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.13442063331604,
      "learning_rate": 9.394710518256322e-07,
      "loss": 2.1786,
      "step": 66189
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0429966449737549,
      "learning_rate": 9.392968271460345e-07,
      "loss": 2.2764,
      "step": 66190
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1085922718048096,
      "learning_rate": 9.391226178267887e-07,
      "loss": 2.3085,
      "step": 66191
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0796318054199219,
      "learning_rate": 9.389484238681923e-07,
      "loss": 2.2583,
      "step": 66192
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0157345533370972,
      "learning_rate": 9.387742452705373e-07,
      "loss": 2.3736,
      "step": 66193
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0830211639404297,
      "learning_rate": 9.386000820341224e-07,
      "loss": 2.2069,
      "step": 66194
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1148571968078613,
      "learning_rate": 9.384259341592405e-07,
      "loss": 2.3146,
      "step": 66195
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.075059175491333,
      "learning_rate": 9.382518016461894e-07,
      "loss": 2.4514,
      "step": 66196
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.062668800354004,
      "learning_rate": 9.380776844952622e-07,
      "loss": 2.1382,
      "step": 66197
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0792104005813599,
      "learning_rate": 9.379035827067539e-07,
      "loss": 2.621,
      "step": 66198
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.2298386096954346,
      "learning_rate": 9.377294962809602e-07,
      "loss": 2.1223,
      "step": 66199
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1088448762893677,
      "learning_rate": 9.375554252181762e-07,
      "loss": 2.211,
      "step": 66200
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1656731367111206,
      "learning_rate": 9.373813695186962e-07,
      "loss": 2.3366,
      "step": 66201
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.117331624031067,
      "learning_rate": 9.372073291828187e-07,
      "loss": 2.4504,
      "step": 66202
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0280925035476685,
      "learning_rate": 9.370333042108337e-07,
      "loss": 2.2517,
      "step": 66203
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.085904836654663,
      "learning_rate": 9.368592946030397e-07,
      "loss": 2.2546,
      "step": 66204
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.2583390474319458,
      "learning_rate": 9.36685300359732e-07,
      "loss": 2.313,
      "step": 66205
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.2364180088043213,
      "learning_rate": 9.365113214812016e-07,
      "loss": 2.111,
      "step": 66206
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1030558347702026,
      "learning_rate": 9.363373579677482e-07,
      "loss": 2.2438,
      "step": 66207
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0312832593917847,
      "learning_rate": 9.361634098196637e-07,
      "loss": 2.2493,
      "step": 66208
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.085025429725647,
      "learning_rate": 9.359894770372446e-07,
      "loss": 2.3746,
      "step": 66209
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0746591091156006,
      "learning_rate": 9.358155596207841e-07,
      "loss": 2.1997,
      "step": 66210
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0940333604812622,
      "learning_rate": 9.356416575705807e-07,
      "loss": 2.2374,
      "step": 66211
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1234118938446045,
      "learning_rate": 9.354677708869231e-07,
      "loss": 2.3174,
      "step": 66212
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1899633407592773,
      "learning_rate": 9.352938995701122e-07,
      "loss": 2.0654,
      "step": 66213
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0992929935455322,
      "learning_rate": 9.351200436204378e-07,
      "loss": 2.2663,
      "step": 66214
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0686383247375488,
      "learning_rate": 9.349462030381984e-07,
      "loss": 2.3803,
      "step": 66215
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0828142166137695,
      "learning_rate": 9.34772377823685e-07,
      "loss": 2.4313,
      "step": 66216
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1070854663848877,
      "learning_rate": 9.345985679771974e-07,
      "loss": 2.3505,
      "step": 66217
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0624454021453857,
      "learning_rate": 9.344247734990253e-07,
      "loss": 2.1679,
      "step": 66218
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1058179140090942,
      "learning_rate": 9.342509943894662e-07,
      "loss": 2.301,
      "step": 66219
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1499273777008057,
      "learning_rate": 9.340772306488133e-07,
      "loss": 2.5258,
      "step": 66220
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.3276201486587524,
      "learning_rate": 9.339034822773629e-07,
      "loss": 2.2409,
      "step": 66221
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1044962406158447,
      "learning_rate": 9.337297492754071e-07,
      "loss": 2.3608,
      "step": 66222
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.995761513710022,
      "learning_rate": 9.335560316432435e-07,
      "loss": 2.216,
      "step": 66223
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0751078128814697,
      "learning_rate": 9.33382329381165e-07,
      "loss": 2.3233,
      "step": 66224
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.9562925100326538,
      "learning_rate": 9.332086424894649e-07,
      "loss": 2.1958,
      "step": 66225
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0743061304092407,
      "learning_rate": 9.330349709684372e-07,
      "loss": 2.298,
      "step": 66226
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1026270389556885,
      "learning_rate": 9.328613148183807e-07,
      "loss": 2.4076,
      "step": 66227
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.198562741279602,
      "learning_rate": 9.32687674039584e-07,
      "loss": 2.3131,
      "step": 66228
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0465697050094604,
      "learning_rate": 9.325140486323459e-07,
      "loss": 2.2258,
      "step": 66229
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0242986679077148,
      "learning_rate": 9.323404385969581e-07,
      "loss": 2.4625,
      "step": 66230
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0611387491226196,
      "learning_rate": 9.321668439337173e-07,
      "loss": 2.0543,
      "step": 66231
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.036543846130371,
      "learning_rate": 9.319932646429175e-07,
      "loss": 2.2225,
      "step": 66232
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.106132984161377,
      "learning_rate": 9.318197007248487e-07,
      "loss": 2.2195,
      "step": 66233
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.083941102027893,
      "learning_rate": 9.316461521798115e-07,
      "loss": 2.4036,
      "step": 66234
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0295569896697998,
      "learning_rate": 9.314726190080948e-07,
      "loss": 2.2499,
      "step": 66235
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1362030506134033,
      "learning_rate": 9.312991012099959e-07,
      "loss": 2.4122,
      "step": 66236
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.2052547931671143,
      "learning_rate": 9.311255987858093e-07,
      "loss": 2.2593,
      "step": 66237
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1182904243469238,
      "learning_rate": 9.309521117358266e-07,
      "loss": 2.3802,
      "step": 66238
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.2450804710388184,
      "learning_rate": 9.307786400603425e-07,
      "loss": 2.566,
      "step": 66239
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.003827691078186,
      "learning_rate": 9.30605183759653e-07,
      "loss": 2.3123,
      "step": 66240
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.3159533739089966,
      "learning_rate": 9.304317428340493e-07,
      "loss": 2.3406,
      "step": 66241
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1159101724624634,
      "learning_rate": 9.302583172838287e-07,
      "loss": 2.3493,
      "step": 66242
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.9691391587257385,
      "learning_rate": 9.300849071092822e-07,
      "loss": 2.435,
      "step": 66243
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1218862533569336,
      "learning_rate": 9.299115123107072e-07,
      "loss": 2.3422,
      "step": 66244
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.030418872833252,
      "learning_rate": 9.297381328883925e-07,
      "loss": 2.3121,
      "step": 66245
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0354411602020264,
      "learning_rate": 9.29564768842638e-07,
      "loss": 2.253,
      "step": 66246
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.03680419921875,
      "learning_rate": 9.293914201737331e-07,
      "loss": 2.1836,
      "step": 66247
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.2962169647216797,
      "learning_rate": 9.292180868819744e-07,
      "loss": 2.4081,
      "step": 66248
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0553187131881714,
      "learning_rate": 9.29044768967653e-07,
      "loss": 2.2488,
      "step": 66249
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.024169683456421,
      "learning_rate": 9.28871466431066e-07,
      "loss": 2.3583,
      "step": 66250
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1473519802093506,
      "learning_rate": 9.286981792725058e-07,
      "loss": 1.9832,
      "step": 66251
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0158518552780151,
      "learning_rate": 9.285249074922653e-07,
      "loss": 2.1994,
      "step": 66252
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0802603960037231,
      "learning_rate": 9.283516510906376e-07,
      "loss": 2.3015,
      "step": 66253
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.2422009706497192,
      "learning_rate": 9.281784100679203e-07,
      "loss": 2.2335,
      "step": 66254
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.4472017288208008,
      "learning_rate": 9.280051844244009e-07,
      "loss": 2.2285,
      "step": 66255
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.124599814414978,
      "learning_rate": 9.278319741603803e-07,
      "loss": 2.2311,
      "step": 66256
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1232980489730835,
      "learning_rate": 9.276587792761449e-07,
      "loss": 2.3982,
      "step": 66257
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.135063648223877,
      "learning_rate": 9.274855997719945e-07,
      "loss": 2.046,
      "step": 66258
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.104058027267456,
      "learning_rate": 9.273124356482189e-07,
      "loss": 2.2117,
      "step": 66259
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0993703603744507,
      "learning_rate": 9.271392869051143e-07,
      "loss": 2.4294,
      "step": 66260
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.06178617477417,
      "learning_rate": 9.26966153542972e-07,
      "loss": 2.3076,
      "step": 66261
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.111453890800476,
      "learning_rate": 9.267930355620847e-07,
      "loss": 2.2679,
      "step": 66262
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0381202697753906,
      "learning_rate": 9.266199329627501e-07,
      "loss": 2.5217,
      "step": 66263
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1211180686950684,
      "learning_rate": 9.26446845745258e-07,
      "loss": 2.0719,
      "step": 66264
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.2971875667572021,
      "learning_rate": 9.262737739099015e-07,
      "loss": 2.3806,
      "step": 66265
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0963445901870728,
      "learning_rate": 9.261007174569769e-07,
      "loss": 2.4545,
      "step": 66266
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.105814814567566,
      "learning_rate": 9.259276763867763e-07,
      "loss": 2.2808,
      "step": 66267
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.241920828819275,
      "learning_rate": 9.257546506995918e-07,
      "loss": 2.5479,
      "step": 66268
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0847691297531128,
      "learning_rate": 9.255816403957185e-07,
      "loss": 2.1278,
      "step": 66269
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0225993394851685,
      "learning_rate": 9.254086454754474e-07,
      "loss": 2.2251,
      "step": 66270
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.20879065990448,
      "learning_rate": 9.252356659390749e-07,
      "loss": 2.3237,
      "step": 66271
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.116447925567627,
      "learning_rate": 9.250627017868907e-07,
      "loss": 2.1813,
      "step": 66272
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1633968353271484,
      "learning_rate": 9.248897530191914e-07,
      "loss": 2.4296,
      "step": 66273
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1528449058532715,
      "learning_rate": 9.247168196362677e-07,
      "loss": 2.3088,
      "step": 66274
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.073521375656128,
      "learning_rate": 9.24543901638415e-07,
      "loss": 2.4226,
      "step": 66275
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0541560649871826,
      "learning_rate": 9.243709990259231e-07,
      "loss": 2.2495,
      "step": 66276
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0399925708770752,
      "learning_rate": 9.241981117990917e-07,
      "loss": 2.2821,
      "step": 66277
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1687147617340088,
      "learning_rate": 9.240252399582062e-07,
      "loss": 2.2925,
      "step": 66278
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1800936460494995,
      "learning_rate": 9.23852383503564e-07,
      "loss": 2.1376,
      "step": 66279
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1139897108078003,
      "learning_rate": 9.23679542435455e-07,
      "loss": 2.3086,
      "step": 66280
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.214984655380249,
      "learning_rate": 9.235067167541767e-07,
      "loss": 2.1952,
      "step": 66281
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.155989646911621,
      "learning_rate": 9.233339064600177e-07,
      "loss": 2.2581,
      "step": 66282
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0475456714630127,
      "learning_rate": 9.231611115532746e-07,
      "loss": 2.251,
      "step": 66283
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.163779377937317,
      "learning_rate": 9.22988332034237e-07,
      "loss": 2.2554,
      "step": 66284
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.2063847780227661,
      "learning_rate": 9.228155679032014e-07,
      "loss": 2.2097,
      "step": 66285
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1281189918518066,
      "learning_rate": 9.226428191604564e-07,
      "loss": 2.3234,
      "step": 66286
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1762624979019165,
      "learning_rate": 9.224700858062996e-07,
      "loss": 2.3847,
      "step": 66287
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.3577038049697876,
      "learning_rate": 9.222973678410207e-07,
      "loss": 2.1731,
      "step": 66288
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.068447232246399,
      "learning_rate": 9.221246652649118e-07,
      "loss": 2.3704,
      "step": 66289
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0600847005844116,
      "learning_rate": 9.219519780782693e-07,
      "loss": 2.3868,
      "step": 66290
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0222300291061401,
      "learning_rate": 9.21779306281384e-07,
      "loss": 2.3413,
      "step": 66291
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.125540852546692,
      "learning_rate": 9.216066498745457e-07,
      "loss": 2.2406,
      "step": 66292
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0390806198120117,
      "learning_rate": 9.21434008858052e-07,
      "loss": 2.4928,
      "step": 66293
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0970920324325562,
      "learning_rate": 9.212613832321937e-07,
      "loss": 2.3745,
      "step": 66294
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.143348217010498,
      "learning_rate": 9.210887729972606e-07,
      "loss": 2.1649,
      "step": 66295
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0536651611328125,
      "learning_rate": 9.209161781535491e-07,
      "loss": 2.2419,
      "step": 66296
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0179082155227661,
      "learning_rate": 9.207435987013503e-07,
      "loss": 2.2635,
      "step": 66297
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0835517644882202,
      "learning_rate": 9.20571034640958e-07,
      "loss": 2.3522,
      "step": 66298
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0310314893722534,
      "learning_rate": 9.203984859726622e-07,
      "loss": 2.334,
      "step": 66299
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1283998489379883,
      "learning_rate": 9.202259526967583e-07,
      "loss": 2.5069,
      "step": 66300
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.02717924118042,
      "learning_rate": 9.200534348135359e-07,
      "loss": 2.2461,
      "step": 66301
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.124549388885498,
      "learning_rate": 9.198809323232905e-07,
      "loss": 2.1185,
      "step": 66302
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0904334783554077,
      "learning_rate": 9.197084452263139e-07,
      "loss": 2.3108,
      "step": 66303
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.3120681047439575,
      "learning_rate": 9.19535973522897e-07,
      "loss": 2.256,
      "step": 66304
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1266118288040161,
      "learning_rate": 9.193635172133308e-07,
      "loss": 2.299,
      "step": 66305
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.2219854593276978,
      "learning_rate": 9.191910762979117e-07,
      "loss": 2.2782,
      "step": 66306
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.046820878982544,
      "learning_rate": 9.190186507769283e-07,
      "loss": 2.1563,
      "step": 66307
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1003443002700806,
      "learning_rate": 9.188462406506771e-07,
      "loss": 2.3871,
      "step": 66308
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0060421228408813,
      "learning_rate": 9.186738459194456e-07,
      "loss": 2.3888,
      "step": 66309
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.046186923980713,
      "learning_rate": 9.185014665835301e-07,
      "loss": 2.3142,
      "step": 66310
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1117751598358154,
      "learning_rate": 9.183291026432184e-07,
      "loss": 2.3002,
      "step": 66311
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.9971953630447388,
      "learning_rate": 9.18156754098809e-07,
      "loss": 2.3807,
      "step": 66312
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.9818658232688904,
      "learning_rate": 9.179844209505872e-07,
      "loss": 2.215,
      "step": 66313
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0850650072097778,
      "learning_rate": 9.178121031988519e-07,
      "loss": 2.4731,
      "step": 66314
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1119425296783447,
      "learning_rate": 9.176398008438902e-07,
      "loss": 2.2664,
      "step": 66315
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.111212968826294,
      "learning_rate": 9.174675138859946e-07,
      "loss": 2.2167,
      "step": 66316
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0558832883834839,
      "learning_rate": 9.1729524232546e-07,
      "loss": 2.3229,
      "step": 66317
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.149444818496704,
      "learning_rate": 9.171229861625774e-07,
      "loss": 2.3123,
      "step": 66318
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.2161697149276733,
      "learning_rate": 9.169507453976356e-07,
      "loss": 2.4483,
      "step": 66319
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1239783763885498,
      "learning_rate": 9.16778520030931e-07,
      "loss": 2.2105,
      "step": 66320
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1143008470535278,
      "learning_rate": 9.166063100627521e-07,
      "loss": 2.1924,
      "step": 66321
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.100247859954834,
      "learning_rate": 9.164341154933953e-07,
      "loss": 2.3254,
      "step": 66322
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.015687346458435,
      "learning_rate": 9.162619363231484e-07,
      "loss": 2.3656,
      "step": 66323
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0304445028305054,
      "learning_rate": 9.160897725523032e-07,
      "loss": 2.4025,
      "step": 66324
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0847513675689697,
      "learning_rate": 9.159176241811551e-07,
      "loss": 2.3683,
      "step": 66325
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0971609354019165,
      "learning_rate": 9.157454912099917e-07,
      "loss": 2.4816,
      "step": 66326
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.9511449337005615,
      "learning_rate": 9.155733736391093e-07,
      "loss": 2.1684,
      "step": 66327
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1833646297454834,
      "learning_rate": 9.154012714687955e-07,
      "loss": 2.2401,
      "step": 66328
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0725398063659668,
      "learning_rate": 9.152291846993455e-07,
      "loss": 2.2348,
      "step": 66329
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.191234827041626,
      "learning_rate": 9.150571133310493e-07,
      "loss": 2.2427,
      "step": 66330
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0295215845108032,
      "learning_rate": 9.148850573641988e-07,
      "loss": 2.1373,
      "step": 66331
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1567354202270508,
      "learning_rate": 9.147130167990837e-07,
      "loss": 2.4962,
      "step": 66332
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0713859796524048,
      "learning_rate": 9.145409916359993e-07,
      "loss": 2.288,
      "step": 66333
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.6634206771850586,
      "learning_rate": 9.143689818752332e-07,
      "loss": 2.3076,
      "step": 66334
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.119307518005371,
      "learning_rate": 9.14196987517082e-07,
      "loss": 2.5149,
      "step": 66335
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.9807471036911011,
      "learning_rate": 9.14025008561833e-07,
      "loss": 2.064,
      "step": 66336
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1652289628982544,
      "learning_rate": 9.138530450097805e-07,
      "loss": 2.3449,
      "step": 66337
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.052211880683899,
      "learning_rate": 9.136810968612131e-07,
      "loss": 2.4156,
      "step": 66338
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.9843865633010864,
      "learning_rate": 9.135091641164262e-07,
      "loss": 2.3729,
      "step": 66339
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.983325719833374,
      "learning_rate": 9.133372467757062e-07,
      "loss": 2.3459,
      "step": 66340
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.2159292697906494,
      "learning_rate": 9.131653448393507e-07,
      "loss": 2.2705,
      "step": 66341
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.2263559103012085,
      "learning_rate": 9.12993458307645e-07,
      "loss": 2.4397,
      "step": 66342
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.3695577383041382,
      "learning_rate": 9.128215871808876e-07,
      "loss": 2.3206,
      "step": 66343
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0960466861724854,
      "learning_rate": 9.126497314593619e-07,
      "loss": 2.2078,
      "step": 66344
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.2006136178970337,
      "learning_rate": 9.124778911433652e-07,
      "loss": 2.1261,
      "step": 66345
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0057471990585327,
      "learning_rate": 9.12306066233184e-07,
      "loss": 2.4544,
      "step": 66346
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0645054578781128,
      "learning_rate": 9.121342567291147e-07,
      "loss": 2.2482,
      "step": 66347
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0711567401885986,
      "learning_rate": 9.119624626314438e-07,
      "loss": 2.5664,
      "step": 66348
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1990870237350464,
      "learning_rate": 9.117906839404667e-07,
      "loss": 2.0673,
      "step": 66349
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1930437088012695,
      "learning_rate": 9.11618920656473e-07,
      "loss": 2.416,
      "step": 66350
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.122869849205017,
      "learning_rate": 9.114471727797514e-07,
      "loss": 2.1399,
      "step": 66351
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0669872760772705,
      "learning_rate": 9.112754403105973e-07,
      "loss": 2.1821,
      "step": 66352
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0801022052764893,
      "learning_rate": 9.111037232492981e-07,
      "loss": 2.3262,
      "step": 66353
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0918208360671997,
      "learning_rate": 9.109320215961481e-07,
      "loss": 2.361,
      "step": 66354
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0867226123809814,
      "learning_rate": 9.10760335351436e-07,
      "loss": 2.3792,
      "step": 66355
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.143256664276123,
      "learning_rate": 9.10588664515456e-07,
      "loss": 2.2419,
      "step": 66356
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.333658218383789,
      "learning_rate": 9.104170090884945e-07,
      "loss": 2.2211,
      "step": 66357
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.9594265222549438,
      "learning_rate": 9.102453690708458e-07,
      "loss": 2.3546,
      "step": 66358
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0696909427642822,
      "learning_rate": 9.100737444627983e-07,
      "loss": 2.5955,
      "step": 66359
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1147804260253906,
      "learning_rate": 9.099021352646464e-07,
      "loss": 2.3592,
      "step": 66360
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1103482246398926,
      "learning_rate": 9.097305414766777e-07,
      "loss": 2.2795,
      "step": 66361
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.2487188577651978,
      "learning_rate": 9.095589630991863e-07,
      "loss": 2.4195,
      "step": 66362
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.102883219718933,
      "learning_rate": 9.093874001324588e-07,
      "loss": 2.07,
      "step": 66363
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0921281576156616,
      "learning_rate": 9.092158525767913e-07,
      "loss": 2.229,
      "step": 66364
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.9734950065612793,
      "learning_rate": 9.090443204324706e-07,
      "loss": 2.3935,
      "step": 66365
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.006097674369812,
      "learning_rate": 9.088728036997896e-07,
      "loss": 2.3699,
      "step": 66366
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.2455778121948242,
      "learning_rate": 9.08701302379037e-07,
      "loss": 2.3205,
      "step": 66367
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.061939001083374,
      "learning_rate": 9.08529816470507e-07,
      "loss": 2.5173,
      "step": 66368
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1076399087905884,
      "learning_rate": 9.083583459744871e-07,
      "loss": 2.245,
      "step": 66369
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.109606385231018,
      "learning_rate": 9.081868908912705e-07,
      "loss": 2.4313,
      "step": 66370
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.2395739555358887,
      "learning_rate": 9.080154512211436e-07,
      "loss": 2.3774,
      "step": 66371
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1344472169876099,
      "learning_rate": 9.078440269644018e-07,
      "loss": 2.471,
      "step": 66372
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.155387043952942,
      "learning_rate": 9.076726181213325e-07,
      "loss": 2.2906,
      "step": 66373
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.9753870368003845,
      "learning_rate": 9.075012246922288e-07,
      "loss": 2.3479,
      "step": 66374
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1092227697372437,
      "learning_rate": 9.073298466773783e-07,
      "loss": 2.2349,
      "step": 66375
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0791517496109009,
      "learning_rate": 9.071584840770742e-07,
      "loss": 2.3527,
      "step": 66376
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.021939992904663,
      "learning_rate": 9.069871368916061e-07,
      "loss": 2.2166,
      "step": 66377
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.080255150794983,
      "learning_rate": 9.06815805121265e-07,
      "loss": 2.2609,
      "step": 66378
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.9649993181228638,
      "learning_rate": 9.066444887663406e-07,
      "loss": 2.4631,
      "step": 66379
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1685043573379517,
      "learning_rate": 9.064731878271227e-07,
      "loss": 2.4476,
      "step": 66380
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0456022024154663,
      "learning_rate": 9.063019023039033e-07,
      "loss": 2.3565,
      "step": 66381
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0420747995376587,
      "learning_rate": 9.061306321969732e-07,
      "loss": 2.4743,
      "step": 66382
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1761839389801025,
      "learning_rate": 9.059593775066189e-07,
      "loss": 2.1512,
      "step": 66383
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.9980713725090027,
      "learning_rate": 9.057881382331346e-07,
      "loss": 2.3218,
      "step": 66384
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.212085485458374,
      "learning_rate": 9.056169143768101e-07,
      "loss": 2.4194,
      "step": 66385
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.6040748357772827,
      "learning_rate": 9.05445705937933e-07,
      "loss": 2.4111,
      "step": 66386
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1884551048278809,
      "learning_rate": 9.052745129167972e-07,
      "loss": 2.293,
      "step": 66387
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0029850006103516,
      "learning_rate": 9.051033353136896e-07,
      "loss": 2.4009,
      "step": 66388
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0534414052963257,
      "learning_rate": 9.049321731289041e-07,
      "loss": 2.3072,
      "step": 66389
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0820022821426392,
      "learning_rate": 9.047610263627271e-07,
      "loss": 2.6006,
      "step": 66390
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.061879277229309,
      "learning_rate": 9.045898950154508e-07,
      "loss": 2.3528,
      "step": 66391
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.043907642364502,
      "learning_rate": 9.044187790873649e-07,
      "loss": 2.2607,
      "step": 66392
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1674283742904663,
      "learning_rate": 9.042476785787602e-07,
      "loss": 2.3006,
      "step": 66393
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0501827001571655,
      "learning_rate": 9.040765934899242e-07,
      "loss": 2.3025,
      "step": 66394
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0613945722579956,
      "learning_rate": 9.039055238211502e-07,
      "loss": 2.1519,
      "step": 66395
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1048312187194824,
      "learning_rate": 9.037344695727279e-07,
      "loss": 2.6131,
      "step": 66396
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1228324174880981,
      "learning_rate": 9.035634307449459e-07,
      "loss": 2.5622,
      "step": 66397
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1730529069900513,
      "learning_rate": 9.033924073380929e-07,
      "loss": 2.4359,
      "step": 66398
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1241872310638428,
      "learning_rate": 9.032213993524608e-07,
      "loss": 2.1274,
      "step": 66399
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.2050111293792725,
      "learning_rate": 9.030504067883383e-07,
      "loss": 2.3344,
      "step": 66400
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.030334949493408,
      "learning_rate": 9.028794296460186e-07,
      "loss": 2.1968,
      "step": 66401
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0258162021636963,
      "learning_rate": 9.027084679257858e-07,
      "loss": 2.3297,
      "step": 66402
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1514167785644531,
      "learning_rate": 9.025375216279353e-07,
      "loss": 2.2502,
      "step": 66403
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0858893394470215,
      "learning_rate": 9.023665907527523e-07,
      "loss": 2.2711,
      "step": 66404
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.202950119972229,
      "learning_rate": 9.021956753005312e-07,
      "loss": 2.2833,
      "step": 66405
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.9888533353805542,
      "learning_rate": 9.020247752715594e-07,
      "loss": 2.2987,
      "step": 66406
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0548018217086792,
      "learning_rate": 9.018538906661245e-07,
      "loss": 2.3419,
      "step": 66407
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1306523084640503,
      "learning_rate": 9.016830214845206e-07,
      "loss": 2.3731,
      "step": 66408
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0449490547180176,
      "learning_rate": 9.015121677270344e-07,
      "loss": 2.5376,
      "step": 66409
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.125689148902893,
      "learning_rate": 9.013413293939543e-07,
      "loss": 2.2456,
      "step": 66410
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0158021450042725,
      "learning_rate": 9.011705064855747e-07,
      "loss": 2.2387,
      "step": 66411
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0953012704849243,
      "learning_rate": 9.009996990021819e-07,
      "loss": 2.3677,
      "step": 66412
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.2498914003372192,
      "learning_rate": 9.008289069440635e-07,
      "loss": 2.2417,
      "step": 66413
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.13460373878479,
      "learning_rate": 9.006581303115136e-07,
      "loss": 2.4189,
      "step": 66414
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0996887683868408,
      "learning_rate": 9.004873691048166e-07,
      "loss": 2.35,
      "step": 66415
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.937983512878418,
      "learning_rate": 9.003166233242678e-07,
      "loss": 2.3479,
      "step": 66416
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.544184684753418,
      "learning_rate": 9.001458929701524e-07,
      "loss": 2.4202,
      "step": 66417
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0070102214813232,
      "learning_rate": 8.999751780427624e-07,
      "loss": 2.3203,
      "step": 66418
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.151048183441162,
      "learning_rate": 8.998044785423843e-07,
      "loss": 2.431,
      "step": 66419
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0230762958526611,
      "learning_rate": 8.996337944693101e-07,
      "loss": 2.3045,
      "step": 66420
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1098850965499878,
      "learning_rate": 8.994631258238273e-07,
      "loss": 2.4207,
      "step": 66421
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1078771352767944,
      "learning_rate": 8.992924726062302e-07,
      "loss": 2.2754,
      "step": 66422
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.9465874433517456,
      "learning_rate": 8.991218348167996e-07,
      "loss": 2.3258,
      "step": 66423
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.9988060593605042,
      "learning_rate": 8.98951212455832e-07,
      "loss": 2.422,
      "step": 66424
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1035710573196411,
      "learning_rate": 8.987806055236114e-07,
      "loss": 2.2069,
      "step": 66425
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1147000789642334,
      "learning_rate": 8.986100140204313e-07,
      "loss": 2.3957,
      "step": 66426
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1692280769348145,
      "learning_rate": 8.984394379465777e-07,
      "loss": 2.3569,
      "step": 66427
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1311956644058228,
      "learning_rate": 8.982688773023429e-07,
      "loss": 2.3861,
      "step": 66428
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0527483224868774,
      "learning_rate": 8.980983320880121e-07,
      "loss": 2.2491,
      "step": 66429
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.2643333673477173,
      "learning_rate": 8.979278023038784e-07,
      "loss": 2.267,
      "step": 66430
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0749839544296265,
      "learning_rate": 8.977572879502283e-07,
      "loss": 2.3908,
      "step": 66431
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.174917459487915,
      "learning_rate": 8.975867890273526e-07,
      "loss": 2.3767,
      "step": 66432
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0885002613067627,
      "learning_rate": 8.9741630553554e-07,
      "loss": 2.2475,
      "step": 66433
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.128818392753601,
      "learning_rate": 8.97245837475077e-07,
      "loss": 2.3611,
      "step": 66434
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1246399879455566,
      "learning_rate": 8.970753848462566e-07,
      "loss": 2.3075,
      "step": 66435
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.2070811986923218,
      "learning_rate": 8.969049476493663e-07,
      "loss": 2.3286,
      "step": 66436
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1148526668548584,
      "learning_rate": 8.967345258846915e-07,
      "loss": 2.4349,
      "step": 66437
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.7716971635818481,
      "learning_rate": 8.965641195525266e-07,
      "loss": 2.2762,
      "step": 66438
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1377685070037842,
      "learning_rate": 8.963937286531555e-07,
      "loss": 2.3594,
      "step": 66439
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0083016157150269,
      "learning_rate": 8.962233531868725e-07,
      "loss": 2.4839,
      "step": 66440
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1598535776138306,
      "learning_rate": 8.96052993153963e-07,
      "loss": 2.4748,
      "step": 66441
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.9959077835083008,
      "learning_rate": 8.958826485547134e-07,
      "loss": 2.3226,
      "step": 66442
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.27535879611969,
      "learning_rate": 8.95712319389418e-07,
      "loss": 2.2854,
      "step": 66443
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.080003023147583,
      "learning_rate": 8.955420056583619e-07,
      "loss": 2.2698,
      "step": 66444
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0647077560424805,
      "learning_rate": 8.953717073618351e-07,
      "loss": 2.3135,
      "step": 66445
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0689369440078735,
      "learning_rate": 8.95201424500125e-07,
      "loss": 2.2582,
      "step": 66446
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1921740770339966,
      "learning_rate": 8.950311570735237e-07,
      "loss": 2.3126,
      "step": 66447
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0734344720840454,
      "learning_rate": 8.948609050823165e-07,
      "loss": 2.3553,
      "step": 66448
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.107441782951355,
      "learning_rate": 8.946906685267931e-07,
      "loss": 2.1945,
      "step": 66449
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1281676292419434,
      "learning_rate": 8.945204474072411e-07,
      "loss": 2.3098,
      "step": 66450
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0882936716079712,
      "learning_rate": 8.943502417239513e-07,
      "loss": 2.3302,
      "step": 66451
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0991231203079224,
      "learning_rate": 8.941800514772081e-07,
      "loss": 2.4142,
      "step": 66452
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0344483852386475,
      "learning_rate": 8.940098766673055e-07,
      "loss": 2.1457,
      "step": 66453
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1746196746826172,
      "learning_rate": 8.938397172945279e-07,
      "loss": 2.5425,
      "step": 66454
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1515653133392334,
      "learning_rate": 8.936695733591661e-07,
      "loss": 2.4933,
      "step": 66455
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0688179731369019,
      "learning_rate": 8.934994448615064e-07,
      "loss": 2.3135,
      "step": 66456
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1263399124145508,
      "learning_rate": 8.933293318018399e-07,
      "loss": 2.2041,
      "step": 66457
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1593427658081055,
      "learning_rate": 8.931592341804518e-07,
      "loss": 2.4321,
      "step": 66458
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0606220960617065,
      "learning_rate": 8.929891519976341e-07,
      "loss": 2.3972,
      "step": 66459
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1100314855575562,
      "learning_rate": 8.928190852536712e-07,
      "loss": 2.3682,
      "step": 66460
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.147538185119629,
      "learning_rate": 8.926490339488559e-07,
      "loss": 2.2582,
      "step": 66461
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0563578605651855,
      "learning_rate": 8.924789980834736e-07,
      "loss": 2.3313,
      "step": 66462
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.065258502960205,
      "learning_rate": 8.923089776578131e-07,
      "loss": 2.4207,
      "step": 66463
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1776070594787598,
      "learning_rate": 8.921389726721596e-07,
      "loss": 2.3635,
      "step": 66464
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.247127652168274,
      "learning_rate": 8.919689831268074e-07,
      "loss": 2.4236,
      "step": 66465
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.173050880432129,
      "learning_rate": 8.917990090220397e-07,
      "loss": 2.4413,
      "step": 66466
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.100759506225586,
      "learning_rate": 8.91629050358147e-07,
      "loss": 2.2532,
      "step": 66467
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.2154309749603271,
      "learning_rate": 8.914591071354172e-07,
      "loss": 2.1121,
      "step": 66468
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0938256978988647,
      "learning_rate": 8.912891793541367e-07,
      "loss": 2.3842,
      "step": 66469
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0408676862716675,
      "learning_rate": 8.911192670145974e-07,
      "loss": 2.284,
      "step": 66470
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0814752578735352,
      "learning_rate": 8.909493701170824e-07,
      "loss": 2.6539,
      "step": 66471
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1767027378082275,
      "learning_rate": 8.907794886618837e-07,
      "loss": 2.1017,
      "step": 66472
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.009000539779663,
      "learning_rate": 8.906096226492866e-07,
      "loss": 2.2578,
      "step": 66473
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1561039686203003,
      "learning_rate": 8.90439772079581e-07,
      "loss": 2.4477,
      "step": 66474
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0549125671386719,
      "learning_rate": 8.902699369530554e-07,
      "loss": 2.2624,
      "step": 66475
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1669714450836182,
      "learning_rate": 8.901001172699952e-07,
      "loss": 2.0506,
      "step": 66476
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0912984609603882,
      "learning_rate": 8.899303130306891e-07,
      "loss": 2.3259,
      "step": 66477
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0950357913970947,
      "learning_rate": 8.897605242354257e-07,
      "loss": 2.327,
      "step": 66478
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1099318265914917,
      "learning_rate": 8.895907508844914e-07,
      "loss": 2.5231,
      "step": 66479
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.138056755065918,
      "learning_rate": 8.894209929781772e-07,
      "loss": 2.1692,
      "step": 66480
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.2356846332550049,
      "learning_rate": 8.892512505167672e-07,
      "loss": 2.3091,
      "step": 66481
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1368277072906494,
      "learning_rate": 8.890815235005523e-07,
      "loss": 2.3337,
      "step": 66482
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0064929723739624,
      "learning_rate": 8.889118119298168e-07,
      "loss": 2.3064,
      "step": 66483
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1004140377044678,
      "learning_rate": 8.887421158048515e-07,
      "loss": 2.2333,
      "step": 66484
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.2205888032913208,
      "learning_rate": 8.885724351259417e-07,
      "loss": 2.2762,
      "step": 66485
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0634230375289917,
      "learning_rate": 8.884027698933784e-07,
      "loss": 2.376,
      "step": 66486
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.104722261428833,
      "learning_rate": 8.882331201074445e-07,
      "loss": 2.4012,
      "step": 66487
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.052168846130371,
      "learning_rate": 8.880634857684344e-07,
      "loss": 2.2294,
      "step": 66488
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.2557519674301147,
      "learning_rate": 8.87893866876628e-07,
      "loss": 2.291,
      "step": 66489
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1765222549438477,
      "learning_rate": 8.87724263432318e-07,
      "loss": 2.5785,
      "step": 66490
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.2162514925003052,
      "learning_rate": 8.875546754357878e-07,
      "loss": 2.2326,
      "step": 66491
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0387868881225586,
      "learning_rate": 8.873851028873293e-07,
      "loss": 2.2,
      "step": 66492
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0914616584777832,
      "learning_rate": 8.872155457872267e-07,
      "loss": 2.3862,
      "step": 66493
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.058807373046875,
      "learning_rate": 8.870460041357709e-07,
      "loss": 2.3576,
      "step": 66494
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0692046880722046,
      "learning_rate": 8.868764779332439e-07,
      "loss": 2.4066,
      "step": 66495
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1424989700317383,
      "learning_rate": 8.8670696717994e-07,
      "loss": 2.4294,
      "step": 66496
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1796307563781738,
      "learning_rate": 8.86537471876141e-07,
      "loss": 2.355,
      "step": 66497
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0494449138641357,
      "learning_rate": 8.863679920221358e-07,
      "loss": 2.3816,
      "step": 66498
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.9155595898628235,
      "learning_rate": 8.861985276182139e-07,
      "loss": 2.3121,
      "step": 66499
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.118499994277954,
      "learning_rate": 8.860290786646575e-07,
      "loss": 2.2384,
      "step": 66500
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.2642557621002197,
      "learning_rate": 8.858596451617607e-07,
      "loss": 2.2239,
      "step": 66501
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1294646263122559,
      "learning_rate": 8.856902271098067e-07,
      "loss": 2.2794,
      "step": 66502
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.9684613347053528,
      "learning_rate": 8.855208245090818e-07,
      "loss": 2.3929,
      "step": 66503
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0621453523635864,
      "learning_rate": 8.853514373598748e-07,
      "loss": 2.2511,
      "step": 66504
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0292704105377197,
      "learning_rate": 8.851820656624732e-07,
      "loss": 2.484,
      "step": 66505
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.9861646294593811,
      "learning_rate": 8.850127094171612e-07,
      "loss": 2.4153,
      "step": 66506
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0358455181121826,
      "learning_rate": 8.848433686242319e-07,
      "loss": 2.5626,
      "step": 66507
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0420961380004883,
      "learning_rate": 8.846740432839651e-07,
      "loss": 2.2235,
      "step": 66508
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0952554941177368,
      "learning_rate": 8.845047333966539e-07,
      "loss": 2.2805,
      "step": 66509
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.2143176794052124,
      "learning_rate": 8.843354389625813e-07,
      "loss": 2.4018,
      "step": 66510
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0356720685958862,
      "learning_rate": 8.841661599820384e-07,
      "loss": 2.3747,
      "step": 66511
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.2343615293502808,
      "learning_rate": 8.83996896455307e-07,
      "loss": 2.1994,
      "step": 66512
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.125425934791565,
      "learning_rate": 8.83827648382679e-07,
      "loss": 2.3192,
      "step": 66513
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0765690803527832,
      "learning_rate": 8.836584157644378e-07,
      "loss": 2.1239,
      "step": 66514
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1270962953567505,
      "learning_rate": 8.834891986008731e-07,
      "loss": 2.3215,
      "step": 66515
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1846808195114136,
      "learning_rate": 8.833199968922679e-07,
      "loss": 2.2921,
      "step": 66516
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1263798475265503,
      "learning_rate": 8.831508106389131e-07,
      "loss": 2.2962,
      "step": 66517
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.10871422290802,
      "learning_rate": 8.829816398410918e-07,
      "loss": 2.3706,
      "step": 66518
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1644315719604492,
      "learning_rate": 8.828124844990948e-07,
      "loss": 2.2667,
      "step": 66519
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.9490623474121094,
      "learning_rate": 8.826433446132054e-07,
      "loss": 2.3477,
      "step": 66520
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1236509084701538,
      "learning_rate": 8.824742201837133e-07,
      "loss": 2.2835,
      "step": 66521
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.2223917245864868,
      "learning_rate": 8.823051112109016e-07,
      "loss": 2.2156,
      "step": 66522
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.305885910987854,
      "learning_rate": 8.82136017695061e-07,
      "loss": 2.2952,
      "step": 66523
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0571420192718506,
      "learning_rate": 8.81966939636476e-07,
      "loss": 2.3018,
      "step": 66524
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1092849969863892,
      "learning_rate": 8.817978770354318e-07,
      "loss": 2.2262,
      "step": 66525
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.244219422340393,
      "learning_rate": 8.816288298922182e-07,
      "loss": 2.2703,
      "step": 66526
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.015876293182373,
      "learning_rate": 8.814597982071204e-07,
      "loss": 2.3132,
      "step": 66527
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.2591639757156372,
      "learning_rate": 8.81290781980424e-07,
      "loss": 2.4445,
      "step": 66528
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0585447549819946,
      "learning_rate": 8.811217812124173e-07,
      "loss": 2.1753,
      "step": 66529
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1206746101379395,
      "learning_rate": 8.809527959033859e-07,
      "loss": 2.3548,
      "step": 66530
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1479178667068481,
      "learning_rate": 8.807838260536139e-07,
      "loss": 2.1535,
      "step": 66531
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0001307725906372,
      "learning_rate": 8.806148716633922e-07,
      "loss": 2.3424,
      "step": 66532
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1147470474243164,
      "learning_rate": 8.804459327330039e-07,
      "loss": 2.4451,
      "step": 66533
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.361332654953003,
      "learning_rate": 8.802770092627378e-07,
      "loss": 2.1924,
      "step": 66534
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0232564210891724,
      "learning_rate": 8.801081012528778e-07,
      "loss": 2.2264,
      "step": 66535
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.924462616443634,
      "learning_rate": 8.799392087037128e-07,
      "loss": 2.3867,
      "step": 66536
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1260863542556763,
      "learning_rate": 8.797703316155259e-07,
      "loss": 2.2712,
      "step": 66537
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.712512493133545,
      "learning_rate": 8.796014699886068e-07,
      "loss": 2.3397,
      "step": 66538
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.000183343887329,
      "learning_rate": 8.794326238232398e-07,
      "loss": 2.2789,
      "step": 66539
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1213784217834473,
      "learning_rate": 8.792637931197124e-07,
      "loss": 2.1701,
      "step": 66540
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.9864007830619812,
      "learning_rate": 8.790949778783098e-07,
      "loss": 2.4066,
      "step": 66541
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0169076919555664,
      "learning_rate": 8.789261780993186e-07,
      "loss": 2.2594,
      "step": 66542
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1628402471542358,
      "learning_rate": 8.78757393783024e-07,
      "loss": 2.166,
      "step": 66543
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0546209812164307,
      "learning_rate": 8.785886249297137e-07,
      "loss": 2.2248,
      "step": 66544
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1178613901138306,
      "learning_rate": 8.784198715396707e-07,
      "loss": 2.3758,
      "step": 66545
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1541178226470947,
      "learning_rate": 8.78251133613186e-07,
      "loss": 2.2814,
      "step": 66546
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.107495903968811,
      "learning_rate": 8.780824111505415e-07,
      "loss": 2.3725,
      "step": 66547
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1877145767211914,
      "learning_rate": 8.779137041520269e-07,
      "loss": 2.3647,
      "step": 66548
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0345078706741333,
      "learning_rate": 8.777450126179232e-07,
      "loss": 2.2011,
      "step": 66549
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0541712045669556,
      "learning_rate": 8.775763365485213e-07,
      "loss": 2.2843,
      "step": 66550
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.078491449356079,
      "learning_rate": 8.774076759441041e-07,
      "loss": 2.6551,
      "step": 66551
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1883262395858765,
      "learning_rate": 8.772390308049594e-07,
      "loss": 2.362,
      "step": 66552
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.077390432357788,
      "learning_rate": 8.770704011313736e-07,
      "loss": 2.3903,
      "step": 66553
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.062748670578003,
      "learning_rate": 8.769017869236295e-07,
      "loss": 2.1427,
      "step": 66554
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.160747766494751,
      "learning_rate": 8.767331881820141e-07,
      "loss": 2.0456,
      "step": 66555
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.9923576712608337,
      "learning_rate": 8.765646049068155e-07,
      "loss": 2.4027,
      "step": 66556
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.062878131866455,
      "learning_rate": 8.763960370983149e-07,
      "loss": 2.2362,
      "step": 66557
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1082134246826172,
      "learning_rate": 8.762274847568042e-07,
      "loss": 2.3565,
      "step": 66558
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0701218843460083,
      "learning_rate": 8.760589478825654e-07,
      "loss": 2.5157,
      "step": 66559
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.051702618598938,
      "learning_rate": 8.758904264758828e-07,
      "loss": 2.2446,
      "step": 66560
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0973310470581055,
      "learning_rate": 8.757219205370449e-07,
      "loss": 2.4872,
      "step": 66561
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0206780433654785,
      "learning_rate": 8.75553430066336e-07,
      "loss": 2.1471,
      "step": 66562
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1830787658691406,
      "learning_rate": 8.753849550640436e-07,
      "loss": 2.2715,
      "step": 66563
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1058921813964844,
      "learning_rate": 8.752164955304498e-07,
      "loss": 2.3365,
      "step": 66564
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0486302375793457,
      "learning_rate": 8.750480514658444e-07,
      "loss": 2.2837,
      "step": 66565
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1052520275115967,
      "learning_rate": 8.748796228705092e-07,
      "loss": 2.3882,
      "step": 66566
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1536011695861816,
      "learning_rate": 8.747112097447341e-07,
      "loss": 2.3586,
      "step": 66567
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0855673551559448,
      "learning_rate": 8.745428120887988e-07,
      "loss": 2.0566,
      "step": 66568
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1239842176437378,
      "learning_rate": 8.743744299029943e-07,
      "loss": 2.5182,
      "step": 66569
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.137109398841858,
      "learning_rate": 8.742060631876015e-07,
      "loss": 2.3201,
      "step": 66570
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0456151962280273,
      "learning_rate": 8.7403771194291e-07,
      "loss": 2.3873,
      "step": 66571
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.9601234197616577,
      "learning_rate": 8.738693761692007e-07,
      "loss": 2.0889,
      "step": 66572
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0419865846633911,
      "learning_rate": 8.737010558667635e-07,
      "loss": 2.2428,
      "step": 66573
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1101888418197632,
      "learning_rate": 8.735327510358794e-07,
      "loss": 2.2721,
      "step": 66574
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.9955602884292603,
      "learning_rate": 8.733644616768388e-07,
      "loss": 2.2237,
      "step": 66575
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1641523838043213,
      "learning_rate": 8.73196187789922e-07,
      "loss": 1.9594,
      "step": 66576
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1115217208862305,
      "learning_rate": 8.730279293754173e-07,
      "loss": 2.2757,
      "step": 66577
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.111717700958252,
      "learning_rate": 8.72859686433608e-07,
      "loss": 2.2438,
      "step": 66578
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1074039936065674,
      "learning_rate": 8.726914589647828e-07,
      "loss": 2.1461,
      "step": 66579
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0380631685256958,
      "learning_rate": 8.725232469692246e-07,
      "loss": 2.0925,
      "step": 66580
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.9938853979110718,
      "learning_rate": 8.723550504472178e-07,
      "loss": 2.3721,
      "step": 66581
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.154825210571289,
      "learning_rate": 8.721868693990465e-07,
      "loss": 2.3863,
      "step": 66582
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.994573175907135,
      "learning_rate": 8.720187038249995e-07,
      "loss": 2.5249,
      "step": 66583
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.3142881393432617,
      "learning_rate": 8.718505537253585e-07,
      "loss": 2.4006,
      "step": 66584
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.043676495552063,
      "learning_rate": 8.716824191004114e-07,
      "loss": 2.3222,
      "step": 66585
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0702505111694336,
      "learning_rate": 8.715142999504411e-07,
      "loss": 2.277,
      "step": 66586
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0597541332244873,
      "learning_rate": 8.713461962757319e-07,
      "loss": 2.3377,
      "step": 66587
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1103471517562866,
      "learning_rate": 8.711781080765724e-07,
      "loss": 2.2122,
      "step": 66588
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.255147099494934,
      "learning_rate": 8.710100353532435e-07,
      "loss": 2.4434,
      "step": 66589
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.108793020248413,
      "learning_rate": 8.70841978106034e-07,
      "loss": 2.444,
      "step": 66590
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0692617893218994,
      "learning_rate": 8.706739363352246e-07,
      "loss": 2.4297,
      "step": 66591
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.083882212638855,
      "learning_rate": 8.70505910041105e-07,
      "loss": 2.0697,
      "step": 66592
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1280901432037354,
      "learning_rate": 8.703378992239564e-07,
      "loss": 2.3316,
      "step": 66593
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0807499885559082,
      "learning_rate": 8.701699038840649e-07,
      "loss": 2.1366,
      "step": 66594
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.10420560836792,
      "learning_rate": 8.700019240217128e-07,
      "loss": 2.226,
      "step": 66595
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0700503587722778,
      "learning_rate": 8.698339596371896e-07,
      "loss": 2.2307,
      "step": 66596
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.010947585105896,
      "learning_rate": 8.696660107307764e-07,
      "loss": 2.3059,
      "step": 66597
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1186866760253906,
      "learning_rate": 8.694980773027594e-07,
      "loss": 2.1791,
      "step": 66598
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1043528318405151,
      "learning_rate": 8.693301593534221e-07,
      "loss": 2.3141,
      "step": 66599
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1626365184783936,
      "learning_rate": 8.691622568830516e-07,
      "loss": 2.1684,
      "step": 66600
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.147204041481018,
      "learning_rate": 8.689943698919289e-07,
      "loss": 2.1323,
      "step": 66601
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0793492794036865,
      "learning_rate": 8.688264983803429e-07,
      "loss": 2.2834,
      "step": 66602
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0544475317001343,
      "learning_rate": 8.686586423485743e-07,
      "loss": 2.5475,
      "step": 66603
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0525189638137817,
      "learning_rate": 8.684908017969107e-07,
      "loss": 2.1602,
      "step": 66604
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.3574846982955933,
      "learning_rate": 8.68322976725634e-07,
      "loss": 2.4345,
      "step": 66605
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.9773950576782227,
      "learning_rate": 8.681551671350308e-07,
      "loss": 2.23,
      "step": 66606
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1614185571670532,
      "learning_rate": 8.679873730253852e-07,
      "loss": 2.3934,
      "step": 66607
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.2063477039337158,
      "learning_rate": 8.678195943969814e-07,
      "loss": 2.4266,
      "step": 66608
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1607636213302612,
      "learning_rate": 8.676518312501025e-07,
      "loss": 2.3291,
      "step": 66609
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.11033034324646,
      "learning_rate": 8.674840835850351e-07,
      "loss": 1.9827,
      "step": 66610
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0688698291778564,
      "learning_rate": 8.673163514020611e-07,
      "loss": 2.2346,
      "step": 66611
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0979470014572144,
      "learning_rate": 8.67148634701468e-07,
      "loss": 2.5283,
      "step": 66612
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.080904483795166,
      "learning_rate": 8.669809334835355e-07,
      "loss": 2.3415,
      "step": 66613
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1306860446929932,
      "learning_rate": 8.668132477485536e-07,
      "loss": 2.4717,
      "step": 66614
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0841732025146484,
      "learning_rate": 8.666455774968041e-07,
      "loss": 2.5542,
      "step": 66615
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0774221420288086,
      "learning_rate": 8.664779227285691e-07,
      "loss": 2.2105,
      "step": 66616
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1358189582824707,
      "learning_rate": 8.663102834441361e-07,
      "loss": 2.221,
      "step": 66617
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1146715879440308,
      "learning_rate": 8.66142659643786e-07,
      "loss": 2.2136,
      "step": 66618
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.091143250465393,
      "learning_rate": 8.659750513278065e-07,
      "loss": 2.4982,
      "step": 66619
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1503026485443115,
      "learning_rate": 8.658074584964804e-07,
      "loss": 2.3392,
      "step": 66620
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.047141432762146,
      "learning_rate": 8.65639881150091e-07,
      "loss": 2.4645,
      "step": 66621
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1902568340301514,
      "learning_rate": 8.654723192889214e-07,
      "loss": 2.3578,
      "step": 66622
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0953493118286133,
      "learning_rate": 8.653047729132591e-07,
      "loss": 2.3174,
      "step": 66623
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0514883995056152,
      "learning_rate": 8.651372420233839e-07,
      "loss": 2.3185,
      "step": 66624
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.088171124458313,
      "learning_rate": 8.649697266195844e-07,
      "loss": 2.2861,
      "step": 66625
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.2117681503295898,
      "learning_rate": 8.648022267021405e-07,
      "loss": 2.2215,
      "step": 66626
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.118322730064392,
      "learning_rate": 8.646347422713397e-07,
      "loss": 2.1568,
      "step": 66627
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1267350912094116,
      "learning_rate": 8.644672733274617e-07,
      "loss": 2.1388,
      "step": 66628
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1135753393173218,
      "learning_rate": 8.642998198707953e-07,
      "loss": 2.3539,
      "step": 66629
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1144126653671265,
      "learning_rate": 8.64132381901619e-07,
      "loss": 2.2968,
      "step": 66630
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0972790718078613,
      "learning_rate": 8.639649594202226e-07,
      "loss": 2.1703,
      "step": 66631
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0233790874481201,
      "learning_rate": 8.637975524268849e-07,
      "loss": 2.4556,
      "step": 66632
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0495448112487793,
      "learning_rate": 8.636301609218945e-07,
      "loss": 2.2461,
      "step": 66633
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0770463943481445,
      "learning_rate": 8.6346278490553e-07,
      "loss": 2.4825,
      "step": 66634
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0895068645477295,
      "learning_rate": 8.632954243780789e-07,
      "loss": 2.4055,
      "step": 66635
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0322108268737793,
      "learning_rate": 8.631280793398211e-07,
      "loss": 2.5655,
      "step": 66636
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1010794639587402,
      "learning_rate": 8.629607497910453e-07,
      "loss": 2.2645,
      "step": 66637
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1560041904449463,
      "learning_rate": 8.6279343573203e-07,
      "loss": 2.5113,
      "step": 66638
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.077867031097412,
      "learning_rate": 8.62626137163064e-07,
      "loss": 2.1273,
      "step": 66639
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.9820975661277771,
      "learning_rate": 8.62458854084427e-07,
      "loss": 2.279,
      "step": 66640
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0520292520523071,
      "learning_rate": 8.622915864964044e-07,
      "loss": 2.2317,
      "step": 66641
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0012279748916626,
      "learning_rate": 8.621243343992803e-07,
      "loss": 2.3921,
      "step": 66642
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.9546462893486023,
      "learning_rate": 8.619570977933356e-07,
      "loss": 2.27,
      "step": 66643
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0724551677703857,
      "learning_rate": 8.617898766788568e-07,
      "loss": 2.3418,
      "step": 66644
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.9962096214294434,
      "learning_rate": 8.616226710561248e-07,
      "loss": 2.0854,
      "step": 66645
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.9968805909156799,
      "learning_rate": 8.61455480925425e-07,
      "loss": 2.1975,
      "step": 66646
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1344811916351318,
      "learning_rate": 8.612883062870414e-07,
      "loss": 2.2514,
      "step": 66647
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.13999342918396,
      "learning_rate": 8.611211471412551e-07,
      "loss": 2.1776,
      "step": 66648
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0387550592422485,
      "learning_rate": 8.60954003488349e-07,
      "loss": 2.3427,
      "step": 66649
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0979244709014893,
      "learning_rate": 8.607868753286108e-07,
      "loss": 2.3445,
      "step": 66650
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0909550189971924,
      "learning_rate": 8.60619762662318e-07,
      "loss": 2.2151,
      "step": 66651
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0161993503570557,
      "learning_rate": 8.604526654897594e-07,
      "loss": 2.2881,
      "step": 66652
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.001761555671692,
      "learning_rate": 8.602855838112135e-07,
      "loss": 2.443,
      "step": 66653
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1441768407821655,
      "learning_rate": 8.601185176269677e-07,
      "loss": 2.1712,
      "step": 66654
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1171609163284302,
      "learning_rate": 8.599514669373021e-07,
      "loss": 2.2786,
      "step": 66655
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.9707675576210022,
      "learning_rate": 8.59784431742503e-07,
      "loss": 2.0954,
      "step": 66656
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1236950159072876,
      "learning_rate": 8.596174120428491e-07,
      "loss": 2.0752,
      "step": 66657
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0013774633407593,
      "learning_rate": 8.59450407838629e-07,
      "loss": 2.3463,
      "step": 66658
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0463718175888062,
      "learning_rate": 8.592834191301224e-07,
      "loss": 2.4201,
      "step": 66659
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0269142389297485,
      "learning_rate": 8.591164459176127e-07,
      "loss": 2.2367,
      "step": 66660
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.03307044506073,
      "learning_rate": 8.589494882013827e-07,
      "loss": 2.2969,
      "step": 66661
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.2734922170639038,
      "learning_rate": 8.587825459817167e-07,
      "loss": 1.9847,
      "step": 66662
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1719526052474976,
      "learning_rate": 8.586156192588957e-07,
      "loss": 2.1877,
      "step": 66663
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1155742406845093,
      "learning_rate": 8.58448708033205e-07,
      "loss": 2.5552,
      "step": 66664
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0078099966049194,
      "learning_rate": 8.582818123049253e-07,
      "loss": 2.5232,
      "step": 66665
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1139291524887085,
      "learning_rate": 8.581149320743432e-07,
      "loss": 2.1705,
      "step": 66666
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0013220310211182,
      "learning_rate": 8.579480673417362e-07,
      "loss": 2.3258,
      "step": 66667
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.009488821029663,
      "learning_rate": 8.57781218107393e-07,
      "loss": 2.3246,
      "step": 66668
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.026743769645691,
      "learning_rate": 8.576143843715912e-07,
      "loss": 2.4166,
      "step": 66669
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.138431429862976,
      "learning_rate": 8.574475661346182e-07,
      "loss": 2.3628,
      "step": 66670
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0095795392990112,
      "learning_rate": 8.57280763396754e-07,
      "loss": 2.4031,
      "step": 66671
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.9603173136711121,
      "learning_rate": 8.571139761582825e-07,
      "loss": 2.242,
      "step": 66672
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.069413185119629,
      "learning_rate": 8.569472044194848e-07,
      "loss": 2.0842,
      "step": 66673
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1584330797195435,
      "learning_rate": 8.567804481806452e-07,
      "loss": 2.1991,
      "step": 66674
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1869781017303467,
      "learning_rate": 8.566137074420456e-07,
      "loss": 2.3854,
      "step": 66675
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0151721239089966,
      "learning_rate": 8.5644698220397e-07,
      "loss": 2.2431,
      "step": 66676
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1938395500183105,
      "learning_rate": 8.562802724667007e-07,
      "loss": 2.2237,
      "step": 66677
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.3891743421554565,
      "learning_rate": 8.561135782305174e-07,
      "loss": 2.3378,
      "step": 66678
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0190002918243408,
      "learning_rate": 8.559468994957076e-07,
      "loss": 2.6484,
      "step": 66679
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0611950159072876,
      "learning_rate": 8.557802362625489e-07,
      "loss": 2.2851,
      "step": 66680
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1118477582931519,
      "learning_rate": 8.556135885313277e-07,
      "loss": 2.077,
      "step": 66681
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0945765972137451,
      "learning_rate": 8.554469563023238e-07,
      "loss": 1.9803,
      "step": 66682
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.10414457321167,
      "learning_rate": 8.552803395758225e-07,
      "loss": 2.4104,
      "step": 66683
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0163248777389526,
      "learning_rate": 8.551137383521035e-07,
      "loss": 2.2442,
      "step": 66684
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0276468992233276,
      "learning_rate": 8.549471526314512e-07,
      "loss": 2.1584,
      "step": 66685
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.3359532356262207,
      "learning_rate": 8.547805824141475e-07,
      "loss": 2.3649,
      "step": 66686
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.2075140476226807,
      "learning_rate": 8.546140277004755e-07,
      "loss": 2.3115,
      "step": 66687
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.2089247703552246,
      "learning_rate": 8.54447488490714e-07,
      "loss": 2.3207,
      "step": 66688
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1005587577819824,
      "learning_rate": 8.542809647851491e-07,
      "loss": 2.5259,
      "step": 66689
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.075384259223938,
      "learning_rate": 8.541144565840609e-07,
      "loss": 2.0842,
      "step": 66690
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0851956605911255,
      "learning_rate": 8.539479638877346e-07,
      "loss": 2.2631,
      "step": 66691
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.150071620941162,
      "learning_rate": 8.537814866964489e-07,
      "loss": 2.3589,
      "step": 66692
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0507241487503052,
      "learning_rate": 8.536150250104891e-07,
      "loss": 2.1041,
      "step": 66693
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.2315456867218018,
      "learning_rate": 8.534485788301339e-07,
      "loss": 2.2939,
      "step": 66694
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0322843790054321,
      "learning_rate": 8.532821481556697e-07,
      "loss": 2.3085,
      "step": 66695
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0544664859771729,
      "learning_rate": 8.531157329873741e-07,
      "loss": 2.1876,
      "step": 66696
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0537887811660767,
      "learning_rate": 8.529493333255334e-07,
      "loss": 2.4383,
      "step": 66697
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.173304796218872,
      "learning_rate": 8.527829491704287e-07,
      "loss": 2.2637,
      "step": 66698
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0270873308181763,
      "learning_rate": 8.526165805223408e-07,
      "loss": 2.5198,
      "step": 66699
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.974708080291748,
      "learning_rate": 8.524502273815505e-07,
      "loss": 2.2234,
      "step": 66700
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1296430826187134,
      "learning_rate": 8.522838897483432e-07,
      "loss": 2.3124,
      "step": 66701
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1287486553192139,
      "learning_rate": 8.521175676229975e-07,
      "loss": 2.3298,
      "step": 66702
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1271913051605225,
      "learning_rate": 8.51951261005799e-07,
      "loss": 2.2157,
      "step": 66703
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0748600959777832,
      "learning_rate": 8.517849698970271e-07,
      "loss": 2.0792,
      "step": 66704
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1645971536636353,
      "learning_rate": 8.516186942969628e-07,
      "loss": 2.3017,
      "step": 66705
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0619086027145386,
      "learning_rate": 8.514524342058916e-07,
      "loss": 2.372,
      "step": 66706
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.9416522979736328,
      "learning_rate": 8.512861896240909e-07,
      "loss": 2.264,
      "step": 66707
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0573086738586426,
      "learning_rate": 8.511199605518472e-07,
      "loss": 2.2897,
      "step": 66708
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0758835077285767,
      "learning_rate": 8.50953746989438e-07,
      "loss": 2.2654,
      "step": 66709
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1607838869094849,
      "learning_rate": 8.507875489371487e-07,
      "loss": 2.213,
      "step": 66710
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.056219458580017,
      "learning_rate": 8.506213663952578e-07,
      "loss": 2.2275,
      "step": 66711
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.008608341217041,
      "learning_rate": 8.50455199364052e-07,
      "loss": 2.3501,
      "step": 66712
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0167014598846436,
      "learning_rate": 8.502890478438064e-07,
      "loss": 2.2875,
      "step": 66713
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1214258670806885,
      "learning_rate": 8.501229118348075e-07,
      "loss": 2.4325,
      "step": 66714
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1611543893814087,
      "learning_rate": 8.49956791337333e-07,
      "loss": 2.228,
      "step": 66715
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1304775476455688,
      "learning_rate": 8.497906863516692e-07,
      "loss": 2.5331,
      "step": 66716
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0265426635742188,
      "learning_rate": 8.496245968780936e-07,
      "loss": 2.2936,
      "step": 66717
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.2224452495574951,
      "learning_rate": 8.494585229168906e-07,
      "loss": 2.278,
      "step": 66718
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.3450899124145508,
      "learning_rate": 8.492924644683398e-07,
      "loss": 2.4281,
      "step": 66719
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.024749755859375,
      "learning_rate": 8.491264215327255e-07,
      "loss": 2.1743,
      "step": 66720
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0298224687576294,
      "learning_rate": 8.489603941103242e-07,
      "loss": 2.3692,
      "step": 66721
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.350480318069458,
      "learning_rate": 8.487943822014232e-07,
      "loss": 2.2922,
      "step": 66722
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.2333593368530273,
      "learning_rate": 8.486283858062994e-07,
      "loss": 2.3272,
      "step": 66723
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1168524026870728,
      "learning_rate": 8.484624049252377e-07,
      "loss": 2.3346,
      "step": 66724
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.109790563583374,
      "learning_rate": 8.482964395585169e-07,
      "loss": 2.2595,
      "step": 66725
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0312949419021606,
      "learning_rate": 8.481304897064202e-07,
      "loss": 2.1923,
      "step": 66726
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.2231578826904297,
      "learning_rate": 8.479645553692261e-07,
      "loss": 2.3931,
      "step": 66727
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.2433412075042725,
      "learning_rate": 8.477986365472191e-07,
      "loss": 2.325,
      "step": 66728
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.090155839920044,
      "learning_rate": 8.476327332406775e-07,
      "loss": 2.1375,
      "step": 66729
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1343051195144653,
      "learning_rate": 8.474668454498858e-07,
      "loss": 2.4464,
      "step": 66730
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.119462490081787,
      "learning_rate": 8.473009731751214e-07,
      "loss": 2.3722,
      "step": 66731
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0506993532180786,
      "learning_rate": 8.471351164166697e-07,
      "loss": 2.2095,
      "step": 66732
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1470493078231812,
      "learning_rate": 8.469692751748104e-07,
      "loss": 2.488,
      "step": 66733
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.077902913093567,
      "learning_rate": 8.468034494498222e-07,
      "loss": 2.3863,
      "step": 66734
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.6163101196289062,
      "learning_rate": 8.466376392419895e-07,
      "loss": 2.4837,
      "step": 66735
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1327707767486572,
      "learning_rate": 8.464718445515907e-07,
      "loss": 2.5977,
      "step": 66736
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0549604892730713,
      "learning_rate": 8.463060653789102e-07,
      "loss": 2.391,
      "step": 66737
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.2412238121032715,
      "learning_rate": 8.461403017242264e-07,
      "loss": 2.5741,
      "step": 66738
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.08843994140625,
      "learning_rate": 8.459745535878206e-07,
      "loss": 2.3801,
      "step": 66739
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0807781219482422,
      "learning_rate": 8.458088209699733e-07,
      "loss": 2.5999,
      "step": 66740
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1097383499145508,
      "learning_rate": 8.456431038709678e-07,
      "loss": 2.3133,
      "step": 66741
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0965427160263062,
      "learning_rate": 8.454774022910817e-07,
      "loss": 2.5391,
      "step": 66742
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.014790654182434,
      "learning_rate": 8.45311716230599e-07,
      "loss": 2.3192,
      "step": 66743
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1175180673599243,
      "learning_rate": 8.451460456897987e-07,
      "loss": 2.3426,
      "step": 66744
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0880112648010254,
      "learning_rate": 8.449803906689625e-07,
      "loss": 2.1849,
      "step": 66745
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1763046979904175,
      "learning_rate": 8.448147511683702e-07,
      "loss": 2.3399,
      "step": 66746
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.009955644607544,
      "learning_rate": 8.44649127188305e-07,
      "loss": 2.0585,
      "step": 66747
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.2142328023910522,
      "learning_rate": 8.444835187290446e-07,
      "loss": 2.2953,
      "step": 66748
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.9593623280525208,
      "learning_rate": 8.443179257908729e-07,
      "loss": 2.1951,
      "step": 66749
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.139680027961731,
      "learning_rate": 8.441523483740665e-07,
      "loss": 2.3497,
      "step": 66750
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0288599729537964,
      "learning_rate": 8.439867864789131e-07,
      "loss": 2.3468,
      "step": 66751
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.9879953861236572,
      "learning_rate": 8.438212401056845e-07,
      "loss": 2.3107,
      "step": 66752
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0170012712478638,
      "learning_rate": 8.436557092546682e-07,
      "loss": 2.3431,
      "step": 66753
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0609636306762695,
      "learning_rate": 8.434901939261408e-07,
      "loss": 2.2379,
      "step": 66754
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1738535165786743,
      "learning_rate": 8.433246941203854e-07,
      "loss": 2.578,
      "step": 66755
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0850696563720703,
      "learning_rate": 8.431592098376796e-07,
      "loss": 2.0263,
      "step": 66756
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.9710533618927002,
      "learning_rate": 8.429937410783085e-07,
      "loss": 2.3379,
      "step": 66757
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1670513153076172,
      "learning_rate": 8.428282878425476e-07,
      "loss": 2.2755,
      "step": 66758
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0878005027770996,
      "learning_rate": 8.42662850130681e-07,
      "loss": 2.4144,
      "step": 66759
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1209815740585327,
      "learning_rate": 8.424974279429887e-07,
      "loss": 2.2994,
      "step": 66760
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0829148292541504,
      "learning_rate": 8.423320212797492e-07,
      "loss": 2.2301,
      "step": 66761
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.083153486251831,
      "learning_rate": 8.421666301412446e-07,
      "loss": 2.3918,
      "step": 66762
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.100119709968567,
      "learning_rate": 8.420012545277545e-07,
      "loss": 2.4629,
      "step": 66763
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1376532316207886,
      "learning_rate": 8.418358944395599e-07,
      "loss": 2.3827,
      "step": 66764
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.029188871383667,
      "learning_rate": 8.416705498769417e-07,
      "loss": 2.653,
      "step": 66765
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.3316878080368042,
      "learning_rate": 8.415052208401786e-07,
      "loss": 2.3084,
      "step": 66766
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1297591924667358,
      "learning_rate": 8.413399073295503e-07,
      "loss": 2.3704,
      "step": 66767
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0812634229660034,
      "learning_rate": 8.411746093453399e-07,
      "loss": 2.51,
      "step": 66768
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0759668350219727,
      "learning_rate": 8.41009326887825e-07,
      "loss": 2.2356,
      "step": 66769
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.225356936454773,
      "learning_rate": 8.408440599572875e-07,
      "loss": 2.2112,
      "step": 66770
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0384557247161865,
      "learning_rate": 8.406788085540063e-07,
      "loss": 2.4235,
      "step": 66771
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0523219108581543,
      "learning_rate": 8.405135726782632e-07,
      "loss": 2.3361,
      "step": 66772
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.020362377166748,
      "learning_rate": 8.403483523303357e-07,
      "loss": 2.2623,
      "step": 66773
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.313628077507019,
      "learning_rate": 8.401831475105072e-07,
      "loss": 2.2064,
      "step": 66774
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0186735391616821,
      "learning_rate": 8.400179582190538e-07,
      "loss": 2.2538,
      "step": 66775
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.172263264656067,
      "learning_rate": 8.398527844562598e-07,
      "loss": 2.2048,
      "step": 66776
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1458786725997925,
      "learning_rate": 8.396876262224018e-07,
      "loss": 2.3423,
      "step": 66777
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1040704250335693,
      "learning_rate": 8.39522483517764e-07,
      "loss": 2.2012,
      "step": 66778
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0271719694137573,
      "learning_rate": 8.393573563426205e-07,
      "loss": 2.407,
      "step": 66779
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1544290781021118,
      "learning_rate": 8.391922446972556e-07,
      "loss": 2.2198,
      "step": 66780
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1145044565200806,
      "learning_rate": 8.390271485819468e-07,
      "loss": 2.3289,
      "step": 66781
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0743314027786255,
      "learning_rate": 8.388620679969772e-07,
      "loss": 2.3052,
      "step": 66782
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0358738899230957,
      "learning_rate": 8.386970029426211e-07,
      "loss": 2.2057,
      "step": 66783
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0645182132720947,
      "learning_rate": 8.385319534191649e-07,
      "loss": 2.4608,
      "step": 66784
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0364214181900024,
      "learning_rate": 8.383669194268829e-07,
      "loss": 2.2183,
      "step": 66785
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.080696940422058,
      "learning_rate": 8.382019009660592e-07,
      "loss": 2.3414,
      "step": 66786
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0845911502838135,
      "learning_rate": 8.380368980369702e-07,
      "loss": 2.1824,
      "step": 66787
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.291305661201477,
      "learning_rate": 8.37871910639898e-07,
      "loss": 2.3273,
      "step": 66788
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0608726739883423,
      "learning_rate": 8.377069387751224e-07,
      "loss": 1.9751,
      "step": 66789
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.3686928749084473,
      "learning_rate": 8.375419824429198e-07,
      "loss": 2.1231,
      "step": 66790
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0630730390548706,
      "learning_rate": 8.373770416435734e-07,
      "loss": 2.286,
      "step": 66791
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0412812232971191,
      "learning_rate": 8.372121163773617e-07,
      "loss": 2.3331,
      "step": 66792
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0317134857177734,
      "learning_rate": 8.370472066445623e-07,
      "loss": 2.3543,
      "step": 66793
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0818572044372559,
      "learning_rate": 8.368823124454573e-07,
      "loss": 2.3129,
      "step": 66794
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0828814506530762,
      "learning_rate": 8.367174337803263e-07,
      "loss": 2.4376,
      "step": 66795
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1202738285064697,
      "learning_rate": 8.36552570649447e-07,
      "loss": 2.1542,
      "step": 66796
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0438807010650635,
      "learning_rate": 8.363877230531004e-07,
      "loss": 2.1679,
      "step": 66797
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.075415015220642,
      "learning_rate": 8.362228909915637e-07,
      "loss": 2.5251,
      "step": 66798
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.7850959300994873,
      "learning_rate": 8.360580744651203e-07,
      "loss": 2.2623,
      "step": 66799
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1545155048370361,
      "learning_rate": 8.358932734740454e-07,
      "loss": 2.3581,
      "step": 66800
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1206151247024536,
      "learning_rate": 8.357284880186222e-07,
      "loss": 2.3171,
      "step": 66801
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0416547060012817,
      "learning_rate": 8.355637180991272e-07,
      "loss": 2.143,
      "step": 66802
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.8121261596679688,
      "learning_rate": 8.35398963715841e-07,
      "loss": 2.4227,
      "step": 66803
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1199009418487549,
      "learning_rate": 8.352342248690438e-07,
      "loss": 2.5517,
      "step": 66804
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1139432191848755,
      "learning_rate": 8.350695015590127e-07,
      "loss": 2.0601,
      "step": 66805
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0712625980377197,
      "learning_rate": 8.349047937860277e-07,
      "loss": 2.2856,
      "step": 66806
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0628284215927124,
      "learning_rate": 8.347401015503687e-07,
      "loss": 2.4674,
      "step": 66807
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.9921643137931824,
      "learning_rate": 8.345754248523131e-07,
      "loss": 2.2695,
      "step": 66808
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.046644926071167,
      "learning_rate": 8.344107636921428e-07,
      "loss": 2.1451,
      "step": 66809
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0038443803787231,
      "learning_rate": 8.342461180701344e-07,
      "loss": 2.3447,
      "step": 66810
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1605761051177979,
      "learning_rate": 8.340814879865699e-07,
      "loss": 2.1941,
      "step": 66811
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0989826917648315,
      "learning_rate": 8.339168734417247e-07,
      "loss": 2.3547,
      "step": 66812
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1212432384490967,
      "learning_rate": 8.337522744358817e-07,
      "loss": 2.4028,
      "step": 66813
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1547298431396484,
      "learning_rate": 8.335876909693163e-07,
      "loss": 2.4219,
      "step": 66814
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1067408323287964,
      "learning_rate": 8.334231230423118e-07,
      "loss": 2.3434,
      "step": 66815
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0704854726791382,
      "learning_rate": 8.332585706551444e-07,
      "loss": 2.4875,
      "step": 66816
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0786504745483398,
      "learning_rate": 8.330940338080928e-07,
      "loss": 2.1838,
      "step": 66817
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0495001077651978,
      "learning_rate": 8.329295125014347e-07,
      "loss": 2.3689,
      "step": 66818
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.083694577217102,
      "learning_rate": 8.327650067354531e-07,
      "loss": 2.2717,
      "step": 66819
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1160728931427002,
      "learning_rate": 8.326005165104234e-07,
      "loss": 2.4473,
      "step": 66820
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0697013139724731,
      "learning_rate": 8.324360418266264e-07,
      "loss": 2.3017,
      "step": 66821
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0530903339385986,
      "learning_rate": 8.322715826843408e-07,
      "loss": 2.2153,
      "step": 66822
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1780321598052979,
      "learning_rate": 8.32107139083842e-07,
      "loss": 2.3312,
      "step": 66823
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1769664287567139,
      "learning_rate": 8.319427110254141e-07,
      "loss": 2.2794,
      "step": 66824
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.074074625968933,
      "learning_rate": 8.317782985093315e-07,
      "loss": 2.2043,
      "step": 66825
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.093817949295044,
      "learning_rate": 8.31613901535877e-07,
      "loss": 1.9769,
      "step": 66826
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.211150050163269,
      "learning_rate": 8.31449520105324e-07,
      "loss": 2.2982,
      "step": 66827
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1324968338012695,
      "learning_rate": 8.312851542179567e-07,
      "loss": 2.3234,
      "step": 66828
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0109137296676636,
      "learning_rate": 8.311208038740504e-07,
      "loss": 2.2332,
      "step": 66829
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0728613138198853,
      "learning_rate": 8.309564690738847e-07,
      "loss": 2.3283,
      "step": 66830
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1413685083389282,
      "learning_rate": 8.307921498177396e-07,
      "loss": 2.114,
      "step": 66831
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1008832454681396,
      "learning_rate": 8.306278461058914e-07,
      "loss": 2.3309,
      "step": 66832
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.2424930334091187,
      "learning_rate": 8.304635579386177e-07,
      "loss": 2.4944,
      "step": 66833
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0686885118484497,
      "learning_rate": 8.302992853161995e-07,
      "loss": 2.201,
      "step": 66834
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.3354887962341309,
      "learning_rate": 8.301350282389142e-07,
      "loss": 2.1486,
      "step": 66835
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.015712022781372,
      "learning_rate": 8.299707867070417e-07,
      "loss": 2.4744,
      "step": 66836
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0282396078109741,
      "learning_rate": 8.298065607208572e-07,
      "loss": 2.6061,
      "step": 66837
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0517456531524658,
      "learning_rate": 8.296423502806427e-07,
      "loss": 2.2241,
      "step": 66838
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.077385663986206,
      "learning_rate": 8.294781553866737e-07,
      "loss": 2.2798,
      "step": 66839
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1158697605133057,
      "learning_rate": 8.293139760392322e-07,
      "loss": 2.1784,
      "step": 66840
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0464742183685303,
      "learning_rate": 8.291498122385921e-07,
      "loss": 2.2709,
      "step": 66841
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1825087070465088,
      "learning_rate": 8.289856639850346e-07,
      "loss": 2.247,
      "step": 66842
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1579177379608154,
      "learning_rate": 8.288215312788384e-07,
      "loss": 2.2448,
      "step": 66843
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0448039770126343,
      "learning_rate": 8.286574141202808e-07,
      "loss": 2.218,
      "step": 66844
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1304833889007568,
      "learning_rate": 8.284933125096373e-07,
      "loss": 2.3717,
      "step": 66845
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.2319594621658325,
      "learning_rate": 8.283292264471909e-07,
      "loss": 2.3486,
      "step": 66846
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0521390438079834,
      "learning_rate": 8.281651559332149e-07,
      "loss": 2.2381,
      "step": 66847
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.187138557434082,
      "learning_rate": 8.280011009679922e-07,
      "loss": 2.3385,
      "step": 66848
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.112272024154663,
      "learning_rate": 8.278370615517972e-07,
      "loss": 2.4069,
      "step": 66849
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1896404027938843,
      "learning_rate": 8.276730376849107e-07,
      "loss": 2.3411,
      "step": 66850
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0796639919281006,
      "learning_rate": 8.275090293676102e-07,
      "loss": 2.3705,
      "step": 66851
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0214598178863525,
      "learning_rate": 8.273450366001712e-07,
      "loss": 2.2583,
      "step": 66852
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.171314001083374,
      "learning_rate": 8.271810593828745e-07,
      "loss": 2.1909,
      "step": 66853
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1270545721054077,
      "learning_rate": 8.270170977159964e-07,
      "loss": 2.0614,
      "step": 66854
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1430071592330933,
      "learning_rate": 8.268531515998168e-07,
      "loss": 2.4502,
      "step": 66855
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0349597930908203,
      "learning_rate": 8.266892210346112e-07,
      "loss": 2.3038,
      "step": 66856
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0954999923706055,
      "learning_rate": 8.265253060206624e-07,
      "loss": 2.1917,
      "step": 66857
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.17328941822052,
      "learning_rate": 8.263614065582415e-07,
      "loss": 2.2343,
      "step": 66858
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.128473162651062,
      "learning_rate": 8.261975226476304e-07,
      "loss": 2.2171,
      "step": 66859
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.023379921913147,
      "learning_rate": 8.260336542891045e-07,
      "loss": 2.3629,
      "step": 66860
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1118884086608887,
      "learning_rate": 8.258698014829458e-07,
      "loss": 2.168,
      "step": 66861
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0478508472442627,
      "learning_rate": 8.257059642294263e-07,
      "loss": 2.2674,
      "step": 66862
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.020318627357483,
      "learning_rate": 8.255421425288302e-07,
      "loss": 2.3376,
      "step": 66863
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0569357872009277,
      "learning_rate": 8.253783363814294e-07,
      "loss": 2.4418,
      "step": 66864
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.246971845626831,
      "learning_rate": 8.252145457875049e-07,
      "loss": 2.4632,
      "step": 66865
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0832626819610596,
      "learning_rate": 8.250507707473332e-07,
      "loss": 2.4327,
      "step": 66866
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0886952877044678,
      "learning_rate": 8.24887011261194e-07,
      "loss": 2.4565,
      "step": 66867
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.041192889213562,
      "learning_rate": 8.247232673293603e-07,
      "loss": 2.453,
      "step": 66868
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.057188630104065,
      "learning_rate": 8.245595389521155e-07,
      "loss": 2.2172,
      "step": 66869
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.024507999420166,
      "learning_rate": 8.243958261297346e-07,
      "loss": 2.4699,
      "step": 66870
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.16953444480896,
      "learning_rate": 8.242321288624933e-07,
      "loss": 2.2585,
      "step": 66871
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.2175499200820923,
      "learning_rate": 8.240684471506699e-07,
      "loss": 2.2829,
      "step": 66872
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1995855569839478,
      "learning_rate": 8.239047809945444e-07,
      "loss": 2.4505,
      "step": 66873
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.9866469502449036,
      "learning_rate": 8.23741130394391e-07,
      "loss": 2.152,
      "step": 66874
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1201871633529663,
      "learning_rate": 8.235774953504905e-07,
      "loss": 2.4184,
      "step": 66875
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0827573537826538,
      "learning_rate": 8.23413875863116e-07,
      "loss": 2.3196,
      "step": 66876
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1405695676803589,
      "learning_rate": 8.232502719325486e-07,
      "loss": 2.4478,
      "step": 66877
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.078402042388916,
      "learning_rate": 8.230866835590657e-07,
      "loss": 2.4694,
      "step": 66878
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.017907977104187,
      "learning_rate": 8.229231107429403e-07,
      "loss": 2.3241,
      "step": 66879
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1707515716552734,
      "learning_rate": 8.227595534844556e-07,
      "loss": 2.2173,
      "step": 66880
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0796834230422974,
      "learning_rate": 8.225960117838838e-07,
      "loss": 2.3434,
      "step": 66881
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1281747817993164,
      "learning_rate": 8.224324856415055e-07,
      "loss": 2.2616,
      "step": 66882
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.08003830909729,
      "learning_rate": 8.222689750575973e-07,
      "loss": 2.307,
      "step": 66883
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0736058950424194,
      "learning_rate": 8.221054800324358e-07,
      "loss": 2.5166,
      "step": 66884
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.070138931274414,
      "learning_rate": 8.21942000566296e-07,
      "loss": 2.4129,
      "step": 66885
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.2092704772949219,
      "learning_rate": 8.217785366594589e-07,
      "loss": 2.4528,
      "step": 66886
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1709210872650146,
      "learning_rate": 8.216150883121988e-07,
      "loss": 2.191,
      "step": 66887
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0827969312667847,
      "learning_rate": 8.214516555247953e-07,
      "loss": 2.358,
      "step": 66888
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0575743913650513,
      "learning_rate": 8.21288238297523e-07,
      "loss": 2.1717,
      "step": 66889
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.040582537651062,
      "learning_rate": 8.211248366306612e-07,
      "loss": 2.2136,
      "step": 66890
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.3658815622329712,
      "learning_rate": 8.209614505244834e-07,
      "loss": 2.2596,
      "step": 66891
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0968191623687744,
      "learning_rate": 8.207980799792714e-07,
      "loss": 2.2149,
      "step": 66892
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.9898077249526978,
      "learning_rate": 8.206347249952984e-07,
      "loss": 2.3027,
      "step": 66893
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.182586908340454,
      "learning_rate": 8.204713855728441e-07,
      "loss": 2.2306,
      "step": 66894
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1029151678085327,
      "learning_rate": 8.203080617121817e-07,
      "loss": 2.4253,
      "step": 66895
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.015094518661499,
      "learning_rate": 8.201447534135942e-07,
      "loss": 2.2461,
      "step": 66896
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1302262544631958,
      "learning_rate": 8.199814606773504e-07,
      "loss": 2.5266,
      "step": 66897
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0256016254425049,
      "learning_rate": 8.198181835037345e-07,
      "loss": 2.1111,
      "step": 66898
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.017106294631958,
      "learning_rate": 8.196549218930172e-07,
      "loss": 2.2599,
      "step": 66899
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0165998935699463,
      "learning_rate": 8.194916758454796e-07,
      "loss": 2.3829,
      "step": 66900
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1108696460723877,
      "learning_rate": 8.193284453613959e-07,
      "loss": 2.3626,
      "step": 66901
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.151140809059143,
      "learning_rate": 8.191652304410458e-07,
      "loss": 2.1362,
      "step": 66902
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0114058256149292,
      "learning_rate": 8.190020310847014e-07,
      "loss": 2.2807,
      "step": 66903
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0325722694396973,
      "learning_rate": 8.188388472926445e-07,
      "loss": 2.3084,
      "step": 66904
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.2839006185531616,
      "learning_rate": 8.186756790651484e-07,
      "loss": 2.2159,
      "step": 66905
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.9980328679084778,
      "learning_rate": 8.185125264024907e-07,
      "loss": 2.3068,
      "step": 66906
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1192866563796997,
      "learning_rate": 8.183493893049499e-07,
      "loss": 2.2348,
      "step": 66907
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.097392201423645,
      "learning_rate": 8.18186267772797e-07,
      "loss": 2.4938,
      "step": 66908
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0735547542572021,
      "learning_rate": 8.18023161806315e-07,
      "loss": 2.1405,
      "step": 66909
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0160813331604004,
      "learning_rate": 8.178600714057772e-07,
      "loss": 2.3557,
      "step": 66910
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1389600038528442,
      "learning_rate": 8.176969965714588e-07,
      "loss": 2.4883,
      "step": 66911
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.184222936630249,
      "learning_rate": 8.175339373036395e-07,
      "loss": 2.1488,
      "step": 66912
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.991631031036377,
      "learning_rate": 8.173708936025937e-07,
      "loss": 2.2983,
      "step": 66913
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0233014822006226,
      "learning_rate": 8.172078654685967e-07,
      "loss": 2.1429,
      "step": 66914
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.00144624710083,
      "learning_rate": 8.170448529019281e-07,
      "loss": 2.2476,
      "step": 66915
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0248533487319946,
      "learning_rate": 8.168818559028613e-07,
      "loss": 2.2404,
      "step": 66916
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0979623794555664,
      "learning_rate": 8.167188744716748e-07,
      "loss": 2.3294,
      "step": 66917
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.3618741035461426,
      "learning_rate": 8.165559086086427e-07,
      "loss": 2.242,
      "step": 66918
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0384808778762817,
      "learning_rate": 8.163929583140439e-07,
      "loss": 2.1743,
      "step": 66919
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0671364068984985,
      "learning_rate": 8.162300235881527e-07,
      "loss": 2.459,
      "step": 66920
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.2616198062896729,
      "learning_rate": 8.160671044312463e-07,
      "loss": 2.1063,
      "step": 66921
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.9645187258720398,
      "learning_rate": 8.159042008435992e-07,
      "loss": 2.2814,
      "step": 66922
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0190436840057373,
      "learning_rate": 8.157413128254921e-07,
      "loss": 2.3908,
      "step": 66923
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1539793014526367,
      "learning_rate": 8.155784403771949e-07,
      "loss": 2.2829,
      "step": 66924
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0733377933502197,
      "learning_rate": 8.154155834989885e-07,
      "loss": 2.3742,
      "step": 66925
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.9774043560028076,
      "learning_rate": 8.152527421911449e-07,
      "loss": 2.3967,
      "step": 66926
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.205519676208496,
      "learning_rate": 8.150899164539439e-07,
      "loss": 2.4662,
      "step": 66927
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0709995031356812,
      "learning_rate": 8.149271062876585e-07,
      "loss": 2.3873,
      "step": 66928
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.147994875907898,
      "learning_rate": 8.147643116925685e-07,
      "loss": 2.4122,
      "step": 66929
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.2421315908432007,
      "learning_rate": 8.14601532668946e-07,
      "loss": 2.1833,
      "step": 66930
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1754305362701416,
      "learning_rate": 8.144387692170707e-07,
      "loss": 2.122,
      "step": 66931
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0961918830871582,
      "learning_rate": 8.142760213372136e-07,
      "loss": 2.3098,
      "step": 66932
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1894508600234985,
      "learning_rate": 8.141132890296555e-07,
      "loss": 2.3138,
      "step": 66933
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1194130182266235,
      "learning_rate": 8.139505722946706e-07,
      "loss": 2.1906,
      "step": 66934
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0529838800430298,
      "learning_rate": 8.137878711325331e-07,
      "loss": 2.2972,
      "step": 66935
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0145537853240967,
      "learning_rate": 8.136251855435206e-07,
      "loss": 2.0887,
      "step": 66936
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.2648602724075317,
      "learning_rate": 8.134625155279097e-07,
      "loss": 2.3184,
      "step": 66937
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0218558311462402,
      "learning_rate": 8.132998610859722e-07,
      "loss": 2.2169,
      "step": 66938
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1547774076461792,
      "learning_rate": 8.131372222179889e-07,
      "loss": 2.2949,
      "step": 66939
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0710612535476685,
      "learning_rate": 8.129745989242321e-07,
      "loss": 2.2913,
      "step": 66940
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0163023471832275,
      "learning_rate": 8.128119912049781e-07,
      "loss": 2.251,
      "step": 66941
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.041014552116394,
      "learning_rate": 8.126493990605034e-07,
      "loss": 2.4002,
      "step": 66942
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.2948404550552368,
      "learning_rate": 8.12486822491082e-07,
      "loss": 2.3644,
      "step": 66943
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1374292373657227,
      "learning_rate": 8.123242614969928e-07,
      "loss": 2.3652,
      "step": 66944
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0731816291809082,
      "learning_rate": 8.121617160785078e-07,
      "loss": 2.2425,
      "step": 66945
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.2139426469802856,
      "learning_rate": 8.119991862359056e-07,
      "loss": 2.4465,
      "step": 66946
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.176048755645752,
      "learning_rate": 8.118366719694582e-07,
      "loss": 2.381,
      "step": 66947
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1185173988342285,
      "learning_rate": 8.116741732794442e-07,
      "loss": 2.3045,
      "step": 66948
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.2587248086929321,
      "learning_rate": 8.115116901661391e-07,
      "loss": 2.3101,
      "step": 66949
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0932488441467285,
      "learning_rate": 8.11349222629817e-07,
      "loss": 2.3794,
      "step": 66950
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.075149416923523,
      "learning_rate": 8.11186770670751e-07,
      "loss": 2.421,
      "step": 66951
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.03493070602417,
      "learning_rate": 8.110243342892221e-07,
      "loss": 2.3227,
      "step": 66952
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1735807657241821,
      "learning_rate": 8.108619134855001e-07,
      "loss": 2.2656,
      "step": 66953
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1076388359069824,
      "learning_rate": 8.106995082598646e-07,
      "loss": 2.3852,
      "step": 66954
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0934672355651855,
      "learning_rate": 8.105371186125877e-07,
      "loss": 2.188,
      "step": 66955
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1512529850006104,
      "learning_rate": 8.10374744543948e-07,
      "loss": 2.2784,
      "step": 66956
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0778273344039917,
      "learning_rate": 8.102123860542177e-07,
      "loss": 2.2558,
      "step": 66957
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.2984987497329712,
      "learning_rate": 8.100500431436742e-07,
      "loss": 2.3884,
      "step": 66958
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1804457902908325,
      "learning_rate": 8.098877158125907e-07,
      "loss": 2.2008,
      "step": 66959
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.4151463508605957,
      "learning_rate": 8.097254040612446e-07,
      "loss": 2.3498,
      "step": 66960
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.9909919500350952,
      "learning_rate": 8.095631078899102e-07,
      "loss": 2.2169,
      "step": 66961
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0717955827713013,
      "learning_rate": 8.094008272988629e-07,
      "loss": 2.4423,
      "step": 66962
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.062625765800476,
      "learning_rate": 8.092385622883747e-07,
      "loss": 2.2388,
      "step": 66963
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0626837015151978,
      "learning_rate": 8.090763128587264e-07,
      "loss": 2.3929,
      "step": 66964
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0528719425201416,
      "learning_rate": 8.089140790101868e-07,
      "loss": 2.2693,
      "step": 66965
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1294418573379517,
      "learning_rate": 8.087518607430367e-07,
      "loss": 2.2967,
      "step": 66966
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0838035345077515,
      "learning_rate": 8.085896580575469e-07,
      "loss": 2.4999,
      "step": 66967
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1063231229782104,
      "learning_rate": 8.084274709539952e-07,
      "loss": 2.2387,
      "step": 66968
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1196236610412598,
      "learning_rate": 8.082652994326556e-07,
      "loss": 2.3964,
      "step": 66969
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.2540243864059448,
      "learning_rate": 8.081031434938014e-07,
      "loss": 2.2226,
      "step": 66970
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1576732397079468,
      "learning_rate": 8.0794100313771e-07,
      "loss": 2.2339,
      "step": 66971
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1285167932510376,
      "learning_rate": 8.077788783646535e-07,
      "loss": 2.3026,
      "step": 66972
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.122788906097412,
      "learning_rate": 8.076167691749104e-07,
      "loss": 2.3107,
      "step": 66973
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0154541730880737,
      "learning_rate": 8.074546755687507e-07,
      "loss": 2.2452,
      "step": 66974
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0308189392089844,
      "learning_rate": 8.072925975464552e-07,
      "loss": 2.4921,
      "step": 66975
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0318015813827515,
      "learning_rate": 8.071305351082947e-07,
      "loss": 2.1018,
      "step": 66976
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1445388793945312,
      "learning_rate": 8.069684882545447e-07,
      "loss": 2.3889,
      "step": 66977
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.133336067199707,
      "learning_rate": 8.068064569854783e-07,
      "loss": 2.408,
      "step": 66978
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0475434064865112,
      "learning_rate": 8.066444413013729e-07,
      "loss": 2.2723,
      "step": 66979
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.018539309501648,
      "learning_rate": 8.064824412025008e-07,
      "loss": 2.3145,
      "step": 66980
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.9440402984619141,
      "learning_rate": 8.063204566891403e-07,
      "loss": 2.1901,
      "step": 66981
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0223729610443115,
      "learning_rate": 8.061584877615614e-07,
      "loss": 2.2762,
      "step": 66982
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0846611261367798,
      "learning_rate": 8.059965344200427e-07,
      "loss": 2.3219,
      "step": 66983
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0567209720611572,
      "learning_rate": 8.058345966648551e-07,
      "loss": 2.2608,
      "step": 66984
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1560018062591553,
      "learning_rate": 8.056726744962761e-07,
      "loss": 2.4679,
      "step": 66985
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0088002681732178,
      "learning_rate": 8.055107679145779e-07,
      "loss": 2.1233,
      "step": 66986
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.2229132652282715,
      "learning_rate": 8.05348876920038e-07,
      "loss": 2.4074,
      "step": 66987
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0700042247772217,
      "learning_rate": 8.05187001512927e-07,
      "loss": 2.3508,
      "step": 66988
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1048237085342407,
      "learning_rate": 8.05025141693524e-07,
      "loss": 2.3662,
      "step": 66989
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.090650200843811,
      "learning_rate": 8.048632974620985e-07,
      "loss": 2.3435,
      "step": 66990
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1787171363830566,
      "learning_rate": 8.047014688189281e-07,
      "loss": 2.6099,
      "step": 66991
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0552818775177002,
      "learning_rate": 8.045396557642849e-07,
      "loss": 2.2988,
      "step": 66992
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1697438955307007,
      "learning_rate": 8.043778582984463e-07,
      "loss": 2.4738,
      "step": 66993
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.085921049118042,
      "learning_rate": 8.042160764216822e-07,
      "loss": 2.3048,
      "step": 66994
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0091195106506348,
      "learning_rate": 8.040543101342713e-07,
      "loss": 2.1767,
      "step": 66995
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0661982297897339,
      "learning_rate": 8.038925594364866e-07,
      "loss": 2.1611,
      "step": 66996
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0709264278411865,
      "learning_rate": 8.037308243285991e-07,
      "loss": 2.0961,
      "step": 66997
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.26052987575531,
      "learning_rate": 8.035691048108884e-07,
      "loss": 2.1079,
      "step": 66998
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0238093137741089,
      "learning_rate": 8.034074008836224e-07,
      "loss": 2.3358,
      "step": 66999
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1428782939910889,
      "learning_rate": 8.032457125470816e-07,
      "loss": 2.5371,
      "step": 67000
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.4471780061721802,
      "learning_rate": 8.030840398015349e-07,
      "loss": 2.2886,
      "step": 67001
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0905605554580688,
      "learning_rate": 8.029223826472621e-07,
      "loss": 2.3462,
      "step": 67002
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1365381479263306,
      "learning_rate": 8.027607410845306e-07,
      "loss": 2.4794,
      "step": 67003
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.141205906867981,
      "learning_rate": 8.025991151136192e-07,
      "loss": 2.268,
      "step": 67004
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.087327480316162,
      "learning_rate": 8.024375047347988e-07,
      "loss": 2.2274,
      "step": 67005
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.162627100944519,
      "learning_rate": 8.022759099483457e-07,
      "loss": 2.4066,
      "step": 67006
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.9212853312492371,
      "learning_rate": 8.021143307545321e-07,
      "loss": 2.4938,
      "step": 67007
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0965627431869507,
      "learning_rate": 8.019527671536343e-07,
      "loss": 2.261,
      "step": 67008
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0649223327636719,
      "learning_rate": 8.017912191459231e-07,
      "loss": 2.2022,
      "step": 67009
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0548756122589111,
      "learning_rate": 8.016296867316753e-07,
      "loss": 2.4222,
      "step": 67010
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0371302366256714,
      "learning_rate": 8.014681699111615e-07,
      "loss": 2.1923,
      "step": 67011
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0650980472564697,
      "learning_rate": 8.013066686846594e-07,
      "loss": 2.1997,
      "step": 67012
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0388528108596802,
      "learning_rate": 8.011451830524398e-07,
      "loss": 2.353,
      "step": 67013
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0935131311416626,
      "learning_rate": 8.009837130147791e-07,
      "loss": 2.3715,
      "step": 67014
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1111443042755127,
      "learning_rate": 8.008222585719483e-07,
      "loss": 2.2446,
      "step": 67015
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.108054757118225,
      "learning_rate": 8.006608197242238e-07,
      "loss": 2.6041,
      "step": 67016
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.104183316230774,
      "learning_rate": 8.004993964718744e-07,
      "loss": 2.2218,
      "step": 67017
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0566667318344116,
      "learning_rate": 8.003379888151797e-07,
      "loss": 2.3003,
      "step": 67018
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0388792753219604,
      "learning_rate": 8.001765967544095e-07,
      "loss": 2.1302,
      "step": 67019
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0043621063232422,
      "learning_rate": 8.000152202898392e-07,
      "loss": 2.5438,
      "step": 67020
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.085500717163086,
      "learning_rate": 7.998538594217409e-07,
      "loss": 2.3429,
      "step": 67021
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1725940704345703,
      "learning_rate": 7.996925141503897e-07,
      "loss": 2.1466,
      "step": 67022
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1712522506713867,
      "learning_rate": 7.995311844760578e-07,
      "loss": 2.2974,
      "step": 67023
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.049357533454895,
      "learning_rate": 7.993698703990205e-07,
      "loss": 2.3107,
      "step": 67024
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.396139144897461,
      "learning_rate": 7.992085719195508e-07,
      "loss": 2.0339,
      "step": 67025
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1061861515045166,
      "learning_rate": 7.990472890379186e-07,
      "loss": 2.1578,
      "step": 67026
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0242475271224976,
      "learning_rate": 7.988860217544026e-07,
      "loss": 2.1549,
      "step": 67027
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.013443112373352,
      "learning_rate": 7.987247700692746e-07,
      "loss": 2.2495,
      "step": 67028
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.9861627817153931,
      "learning_rate": 7.985635339828046e-07,
      "loss": 2.5463,
      "step": 67029
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1081920862197876,
      "learning_rate": 7.984023134952701e-07,
      "loss": 2.5282,
      "step": 67030
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0381263494491577,
      "learning_rate": 7.98241108606943e-07,
      "loss": 2.4119,
      "step": 67031
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.066706657409668,
      "learning_rate": 7.980799193180944e-07,
      "loss": 2.08,
      "step": 67032
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1317743062973022,
      "learning_rate": 7.979187456290016e-07,
      "loss": 2.3971,
      "step": 67033
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.138258695602417,
      "learning_rate": 7.977575875399335e-07,
      "loss": 2.2013,
      "step": 67034
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.216963291168213,
      "learning_rate": 7.975964450511675e-07,
      "loss": 2.2189,
      "step": 67035
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0351494550704956,
      "learning_rate": 7.974353181629735e-07,
      "loss": 2.3556,
      "step": 67036
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1603792905807495,
      "learning_rate": 7.972742068756278e-07,
      "loss": 2.4087,
      "step": 67037
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0852768421173096,
      "learning_rate": 7.971131111893993e-07,
      "loss": 2.3804,
      "step": 67038
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.151950478553772,
      "learning_rate": 7.969520311045664e-07,
      "loss": 2.2635,
      "step": 67039
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.9312031865119934,
      "learning_rate": 7.967909666213969e-07,
      "loss": 2.1544,
      "step": 67040
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1090868711471558,
      "learning_rate": 7.966299177401693e-07,
      "loss": 2.261,
      "step": 67041
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0495007038116455,
      "learning_rate": 7.964688844611512e-07,
      "loss": 2.3603,
      "step": 67042
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.062195897102356,
      "learning_rate": 7.963078667846192e-07,
      "loss": 2.1972,
      "step": 67043
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1043925285339355,
      "learning_rate": 7.961468647108428e-07,
      "loss": 2.1519,
      "step": 67044
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1694735288619995,
      "learning_rate": 7.959858782400997e-07,
      "loss": 2.2763,
      "step": 67045
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0890324115753174,
      "learning_rate": 7.958249073726587e-07,
      "loss": 2.1486,
      "step": 67046
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0196877717971802,
      "learning_rate": 7.95663952108795e-07,
      "loss": 2.3748,
      "step": 67047
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1563040018081665,
      "learning_rate": 7.955030124487795e-07,
      "loss": 2.1344,
      "step": 67048
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0159845352172852,
      "learning_rate": 7.953420883928886e-07,
      "loss": 2.1758,
      "step": 67049
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0885623693466187,
      "learning_rate": 7.951811799413911e-07,
      "loss": 2.4395,
      "step": 67050
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1061323881149292,
      "learning_rate": 7.950202870945622e-07,
      "loss": 2.2468,
      "step": 67051
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0627470016479492,
      "learning_rate": 7.948594098526741e-07,
      "loss": 2.258,
      "step": 67052
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1211591958999634,
      "learning_rate": 7.946985482159986e-07,
      "loss": 2.4374,
      "step": 67053
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0454717874526978,
      "learning_rate": 7.945377021848099e-07,
      "loss": 2.4006,
      "step": 67054
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.472755789756775,
      "learning_rate": 7.943768717593814e-07,
      "loss": 2.2652,
      "step": 67055
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.107588768005371,
      "learning_rate": 7.942160569399815e-07,
      "loss": 2.4003,
      "step": 67056
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1401209831237793,
      "learning_rate": 7.940552577268868e-07,
      "loss": 2.3058,
      "step": 67057
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0691429376602173,
      "learning_rate": 7.938944741203703e-07,
      "loss": 2.475,
      "step": 67058
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.229911208152771,
      "learning_rate": 7.937337061207007e-07,
      "loss": 2.281,
      "step": 67059
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1880501508712769,
      "learning_rate": 7.935729537281533e-07,
      "loss": 2.3644,
      "step": 67060
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.163063406944275,
      "learning_rate": 7.934122169430004e-07,
      "loss": 2.2728,
      "step": 67061
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1614282131195068,
      "learning_rate": 7.932514957655146e-07,
      "loss": 2.1877,
      "step": 67062
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1172465085983276,
      "learning_rate": 7.930907901959672e-07,
      "loss": 2.2492,
      "step": 67063
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0870208740234375,
      "learning_rate": 7.929301002346323e-07,
      "loss": 2.5039,
      "step": 67064
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.046579360961914,
      "learning_rate": 7.927694258817809e-07,
      "loss": 2.2676,
      "step": 67065
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1698832511901855,
      "learning_rate": 7.92608767137687e-07,
      "loss": 2.3933,
      "step": 67066
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1701456308364868,
      "learning_rate": 7.924481240026193e-07,
      "loss": 2.1379,
      "step": 67067
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0116249322891235,
      "learning_rate": 7.922874964768579e-07,
      "loss": 2.4286,
      "step": 67068
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0794821977615356,
      "learning_rate": 7.921268845606655e-07,
      "loss": 2.3594,
      "step": 67069
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.03018057346344,
      "learning_rate": 7.919662882543211e-07,
      "loss": 2.1543,
      "step": 67070
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0015116930007935,
      "learning_rate": 7.91805707558092e-07,
      "loss": 2.42,
      "step": 67071
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0781773328781128,
      "learning_rate": 7.916451424722559e-07,
      "loss": 2.5281,
      "step": 67072
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.298174262046814,
      "learning_rate": 7.914845929970805e-07,
      "loss": 2.0922,
      "step": 67073
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.047548770904541,
      "learning_rate": 7.913240591328408e-07,
      "loss": 2.2239,
      "step": 67074
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0961354970932007,
      "learning_rate": 7.911635408798068e-07,
      "loss": 2.5244,
      "step": 67075
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.2066864967346191,
      "learning_rate": 7.910030382382527e-07,
      "loss": 2.3065,
      "step": 67076
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0496495962142944,
      "learning_rate": 7.908425512084483e-07,
      "loss": 2.2297,
      "step": 67077
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0798076391220093,
      "learning_rate": 7.90682079790669e-07,
      "loss": 2.2365,
      "step": 67078
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.979735791683197,
      "learning_rate": 7.905216239851843e-07,
      "loss": 2.4016,
      "step": 67079
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.3026292324066162,
      "learning_rate": 7.903611837922654e-07,
      "loss": 2.5353,
      "step": 67080
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1279982328414917,
      "learning_rate": 7.902007592121874e-07,
      "loss": 2.4776,
      "step": 67081
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1157341003417969,
      "learning_rate": 7.900403502452203e-07,
      "loss": 2.4276,
      "step": 67082
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0484466552734375,
      "learning_rate": 7.898799568916338e-07,
      "loss": 2.264,
      "step": 67083
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1297433376312256,
      "learning_rate": 7.897195791517043e-07,
      "loss": 2.2926,
      "step": 67084
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0661098957061768,
      "learning_rate": 7.895592170257005e-07,
      "loss": 2.5083,
      "step": 67085
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.2880494594573975,
      "learning_rate": 7.893988705138966e-07,
      "loss": 2.3855,
      "step": 67086
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.046723484992981,
      "learning_rate": 7.892385396165625e-07,
      "loss": 2.3648,
      "step": 67087
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0520273447036743,
      "learning_rate": 7.8907822433397e-07,
      "loss": 2.5812,
      "step": 67088
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1200487613677979,
      "learning_rate": 7.889179246663925e-07,
      "loss": 2.1554,
      "step": 67089
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0959779024124146,
      "learning_rate": 7.887576406140996e-07,
      "loss": 2.3014,
      "step": 67090
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0663331747055054,
      "learning_rate": 7.885973721773655e-07,
      "loss": 2.2309,
      "step": 67091
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0366053581237793,
      "learning_rate": 7.8843711935646e-07,
      "loss": 2.1889,
      "step": 67092
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.9959757328033447,
      "learning_rate": 7.882768821516562e-07,
      "loss": 2.1826,
      "step": 67093
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.96183180809021,
      "learning_rate": 7.881166605632251e-07,
      "loss": 2.2574,
      "step": 67094
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.078233242034912,
      "learning_rate": 7.879564545914375e-07,
      "loss": 2.4403,
      "step": 67095
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0234971046447754,
      "learning_rate": 7.877962642365655e-07,
      "loss": 2.2354,
      "step": 67096
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1372418403625488,
      "learning_rate": 7.87636089498881e-07,
      "loss": 2.2817,
      "step": 67097
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.9568222165107727,
      "learning_rate": 7.87475930378655e-07,
      "loss": 2.3129,
      "step": 67098
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1345254182815552,
      "learning_rate": 7.873157868761606e-07,
      "loss": 2.3817,
      "step": 67099
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0278862714767456,
      "learning_rate": 7.871556589916662e-07,
      "loss": 2.5213,
      "step": 67100
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0943044424057007,
      "learning_rate": 7.869955467254464e-07,
      "loss": 2.2382,
      "step": 67101
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.9824932813644409,
      "learning_rate": 7.868354500777698e-07,
      "loss": 2.3267,
      "step": 67102
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.221818447113037,
      "learning_rate": 7.866753690489115e-07,
      "loss": 1.9797,
      "step": 67103
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1333633661270142,
      "learning_rate": 7.865153036391393e-07,
      "loss": 2.1182,
      "step": 67104
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0922679901123047,
      "learning_rate": 7.863552538487273e-07,
      "loss": 2.1345,
      "step": 67105
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0547882318496704,
      "learning_rate": 7.861952196779443e-07,
      "loss": 2.2834,
      "step": 67106
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0981369018554688,
      "learning_rate": 7.860352011270655e-07,
      "loss": 2.3568,
      "step": 67107
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1080998182296753,
      "learning_rate": 7.858751981963564e-07,
      "loss": 2.0866,
      "step": 67108
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0967813730239868,
      "learning_rate": 7.857152108860933e-07,
      "loss": 2.3513,
      "step": 67109
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.030816912651062,
      "learning_rate": 7.855552391965437e-07,
      "loss": 2.2059,
      "step": 67110
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1303536891937256,
      "learning_rate": 7.853952831279821e-07,
      "loss": 2.2445,
      "step": 67111
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1837348937988281,
      "learning_rate": 7.852353426806758e-07,
      "loss": 2.2539,
      "step": 67112
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.06843900680542,
      "learning_rate": 7.850754178549013e-07,
      "loss": 2.312,
      "step": 67113
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1887778043746948,
      "learning_rate": 7.849155086509263e-07,
      "loss": 2.5012,
      "step": 67114
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1582039594650269,
      "learning_rate": 7.847556150690205e-07,
      "loss": 2.1585,
      "step": 67115
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0422524213790894,
      "learning_rate": 7.845957371094581e-07,
      "loss": 2.1177,
      "step": 67116
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0795310735702515,
      "learning_rate": 7.844358747725067e-07,
      "loss": 2.4077,
      "step": 67117
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0921838283538818,
      "learning_rate": 7.842760280584428e-07,
      "loss": 2.4022,
      "step": 67118
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.9655876755714417,
      "learning_rate": 7.841161969675304e-07,
      "loss": 2.4298,
      "step": 67119
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0102699995040894,
      "learning_rate": 7.839563815000473e-07,
      "loss": 2.0218,
      "step": 67120
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.119787335395813,
      "learning_rate": 7.837965816562598e-07,
      "loss": 2.3885,
      "step": 67121
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1637042760849,
      "learning_rate": 7.836367974364412e-07,
      "loss": 2.2112,
      "step": 67122
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1815074682235718,
      "learning_rate": 7.834770288408588e-07,
      "loss": 2.3982,
      "step": 67123
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0425372123718262,
      "learning_rate": 7.833172758697893e-07,
      "loss": 2.4204,
      "step": 67124
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1650649309158325,
      "learning_rate": 7.831575385234969e-07,
      "loss": 2.4089,
      "step": 67125
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0532981157302856,
      "learning_rate": 7.829978168022578e-07,
      "loss": 2.3809,
      "step": 67126
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1345642805099487,
      "learning_rate": 7.828381107063398e-07,
      "loss": 2.4837,
      "step": 67127
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1831811666488647,
      "learning_rate": 7.826784202360149e-07,
      "loss": 2.544,
      "step": 67128
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1015046834945679,
      "learning_rate": 7.825187453915528e-07,
      "loss": 2.3974,
      "step": 67129
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0816855430603027,
      "learning_rate": 7.823590861732266e-07,
      "loss": 2.3679,
      "step": 67130
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.003537893295288,
      "learning_rate": 7.821994425813028e-07,
      "loss": 2.2176,
      "step": 67131
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.099591612815857,
      "learning_rate": 7.820398146160568e-07,
      "loss": 2.3459,
      "step": 67132
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.970623791217804,
      "learning_rate": 7.81880202277755e-07,
      "loss": 2.2776,
      "step": 67133
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1006723642349243,
      "learning_rate": 7.817206055666737e-07,
      "loss": 2.1893,
      "step": 67134
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0909035205841064,
      "learning_rate": 7.815610244830751e-07,
      "loss": 2.3159,
      "step": 67135
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0611728429794312,
      "learning_rate": 7.814014590272367e-07,
      "loss": 2.2182,
      "step": 67136
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0449243783950806,
      "learning_rate": 7.81241909199425e-07,
      "loss": 2.2342,
      "step": 67137
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0618691444396973,
      "learning_rate": 7.81082374999913e-07,
      "loss": 2.3802,
      "step": 67138
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.9861767888069153,
      "learning_rate": 7.809228564289695e-07,
      "loss": 2.4213,
      "step": 67139
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1332671642303467,
      "learning_rate": 7.807633534868663e-07,
      "loss": 2.1073,
      "step": 67140
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.117058515548706,
      "learning_rate": 7.806038661738724e-07,
      "loss": 2.337,
      "step": 67141
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0693782567977905,
      "learning_rate": 7.804443944902596e-07,
      "loss": 2.372,
      "step": 67142
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0408974885940552,
      "learning_rate": 7.802849384362987e-07,
      "loss": 2.3551,
      "step": 67143
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.128609538078308,
      "learning_rate": 7.801254980122564e-07,
      "loss": 2.3872,
      "step": 67144
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.9958359003067017,
      "learning_rate": 7.799660732184078e-07,
      "loss": 2.3563,
      "step": 67145
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1895345449447632,
      "learning_rate": 7.798066640550184e-07,
      "loss": 2.3922,
      "step": 67146
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0165804624557495,
      "learning_rate": 7.796472705223623e-07,
      "loss": 2.3886,
      "step": 67147
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.2648698091506958,
      "learning_rate": 7.794878926207095e-07,
      "loss": 2.1422,
      "step": 67148
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.9658728837966919,
      "learning_rate": 7.793285303503284e-07,
      "loss": 2.4234,
      "step": 67149
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0477337837219238,
      "learning_rate": 7.791691837114879e-07,
      "loss": 2.2805,
      "step": 67150
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1458842754364014,
      "learning_rate": 7.790098527044621e-07,
      "loss": 2.1635,
      "step": 67151
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.994622528553009,
      "learning_rate": 7.788505373295164e-07,
      "loss": 2.4207,
      "step": 67152
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.4264992475509644,
      "learning_rate": 7.78691237586926e-07,
      "loss": 2.261,
      "step": 67153
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0410641431808472,
      "learning_rate": 7.785319534769564e-07,
      "loss": 2.2562,
      "step": 67154
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.055646300315857,
      "learning_rate": 7.783726849998807e-07,
      "loss": 2.2262,
      "step": 67155
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1168389320373535,
      "learning_rate": 7.782134321559676e-07,
      "loss": 2.4373,
      "step": 67156
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1237928867340088,
      "learning_rate": 7.780541949454878e-07,
      "loss": 2.3268,
      "step": 67157
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0440869331359863,
      "learning_rate": 7.778949733687091e-07,
      "loss": 2.3602,
      "step": 67158
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0279123783111572,
      "learning_rate": 7.777357674259057e-07,
      "loss": 2.1181,
      "step": 67159
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1959398984909058,
      "learning_rate": 7.775765771173449e-07,
      "loss": 2.3504,
      "step": 67160
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.2497704029083252,
      "learning_rate": 7.774174024432956e-07,
      "loss": 2.254,
      "step": 67161
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0355489253997803,
      "learning_rate": 7.772582434040277e-07,
      "loss": 2.3803,
      "step": 67162
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0524719953536987,
      "learning_rate": 7.77099099999813e-07,
      "loss": 2.1137,
      "step": 67163
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5886824131011963,
      "learning_rate": 7.769399722309189e-07,
      "loss": 2.2145,
      "step": 67164
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.2152810096740723,
      "learning_rate": 7.76780860097619e-07,
      "loss": 2.5309,
      "step": 67165
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.07510507106781,
      "learning_rate": 7.766217636001783e-07,
      "loss": 2.557,
      "step": 67166
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0834084749221802,
      "learning_rate": 7.7646268273887e-07,
      "loss": 2.1074,
      "step": 67167
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.7287095785140991,
      "learning_rate": 7.763036175139604e-07,
      "loss": 2.2322,
      "step": 67168
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0338668823242188,
      "learning_rate": 7.761445679257241e-07,
      "loss": 2.2415,
      "step": 67169
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1496270895004272,
      "learning_rate": 7.759855339744271e-07,
      "loss": 2.2125,
      "step": 67170
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0682915449142456,
      "learning_rate": 7.758265156603384e-07,
      "loss": 2.2936,
      "step": 67171
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0852200984954834,
      "learning_rate": 7.75667512983731e-07,
      "loss": 2.2628,
      "step": 67172
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1136654615402222,
      "learning_rate": 7.755085259448725e-07,
      "loss": 2.2472,
      "step": 67173
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.084557294845581,
      "learning_rate": 7.753495545440304e-07,
      "loss": 2.1948,
      "step": 67174
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.9920872449874878,
      "learning_rate": 7.751905987814779e-07,
      "loss": 2.3562,
      "step": 67175
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.9984541535377502,
      "learning_rate": 7.750316586574824e-07,
      "loss": 2.1418,
      "step": 67176
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.2650883197784424,
      "learning_rate": 7.748727341723128e-07,
      "loss": 2.373,
      "step": 67177
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1210870742797852,
      "learning_rate": 7.747138253262409e-07,
      "loss": 2.1953,
      "step": 67178
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0992250442504883,
      "learning_rate": 7.745549321195323e-07,
      "loss": 2.4862,
      "step": 67179
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.058189868927002,
      "learning_rate": 7.74396054552461e-07,
      "loss": 2.2897,
      "step": 67180
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.20729398727417,
      "learning_rate": 7.742371926252923e-07,
      "loss": 2.282,
      "step": 67181
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1599053144454956,
      "learning_rate": 7.740783463382983e-07,
      "loss": 2.5162,
      "step": 67182
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.2232067584991455,
      "learning_rate": 7.739195156917467e-07,
      "loss": 2.4297,
      "step": 67183
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1774609088897705,
      "learning_rate": 7.737607006859083e-07,
      "loss": 2.4374,
      "step": 67184
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.2123804092407227,
      "learning_rate": 7.736019013210494e-07,
      "loss": 2.4122,
      "step": 67185
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0200871229171753,
      "learning_rate": 7.734431175974444e-07,
      "loss": 2.2019,
      "step": 67186
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.2032729387283325,
      "learning_rate": 7.732843495153564e-07,
      "loss": 2.6132,
      "step": 67187
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.9641600251197815,
      "learning_rate": 7.731255970750596e-07,
      "loss": 2.2928,
      "step": 67188
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1900888681411743,
      "learning_rate": 7.729668602768181e-07,
      "loss": 2.3077,
      "step": 67189
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.9922806620597839,
      "learning_rate": 7.728081391209064e-07,
      "loss": 2.2633,
      "step": 67190
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1602437496185303,
      "learning_rate": 7.726494336075885e-07,
      "loss": 2.3327,
      "step": 67191
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.984512448310852,
      "learning_rate": 7.724907437371376e-07,
      "loss": 2.2401,
      "step": 67192
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0470918416976929,
      "learning_rate": 7.72332069509819e-07,
      "loss": 2.0408,
      "step": 67193
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.085968255996704,
      "learning_rate": 7.721734109259072e-07,
      "loss": 2.2739,
      "step": 67194
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1545624732971191,
      "learning_rate": 7.720147679856637e-07,
      "loss": 2.2706,
      "step": 67195
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0575133562088013,
      "learning_rate": 7.718561406893654e-07,
      "loss": 2.1738,
      "step": 67196
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0925618410110474,
      "learning_rate": 7.716975290372753e-07,
      "loss": 2.4916,
      "step": 67197
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1052402257919312,
      "learning_rate": 7.715389330296641e-07,
      "loss": 2.3836,
      "step": 67198
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.092095136642456,
      "learning_rate": 7.713803526668018e-07,
      "loss": 2.4652,
      "step": 67199
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0634695291519165,
      "learning_rate": 7.712217879489559e-07,
      "loss": 2.1432,
      "step": 67200
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1035130023956299,
      "learning_rate": 7.71063238876395e-07,
      "loss": 2.5047,
      "step": 67201
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1474642753601074,
      "learning_rate": 7.70904705449389e-07,
      "loss": 2.3399,
      "step": 67202
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0422449111938477,
      "learning_rate": 7.707461876682054e-07,
      "loss": 2.373,
      "step": 67203
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.151120901107788,
      "learning_rate": 7.705876855331141e-07,
      "loss": 2.2698,
      "step": 67204
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.096260666847229,
      "learning_rate": 7.704291990443846e-07,
      "loss": 2.3348,
      "step": 67205
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.034401297569275,
      "learning_rate": 7.702707282022814e-07,
      "loss": 2.4386,
      "step": 67206
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.270288109779358,
      "learning_rate": 7.701122730070786e-07,
      "loss": 2.1671,
      "step": 67207
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.2018342018127441,
      "learning_rate": 7.699538334590406e-07,
      "loss": 2.2508,
      "step": 67208
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1518601179122925,
      "learning_rate": 7.697954095584392e-07,
      "loss": 2.4827,
      "step": 67209
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.102007508277893,
      "learning_rate": 7.696370013055399e-07,
      "loss": 2.3931,
      "step": 67210
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1525260210037231,
      "learning_rate": 7.694786087006145e-07,
      "loss": 2.4063,
      "step": 67211
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1601356267929077,
      "learning_rate": 7.693202317439286e-07,
      "loss": 2.3718,
      "step": 67212
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.699755907058716,
      "learning_rate": 7.691618704357539e-07,
      "loss": 2.4272,
      "step": 67213
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.166823387145996,
      "learning_rate": 7.69003524776355e-07,
      "loss": 2.3458,
      "step": 67214
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.9837154150009155,
      "learning_rate": 7.688451947660036e-07,
      "loss": 2.0618,
      "step": 67215
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1282670497894287,
      "learning_rate": 7.686868804049641e-07,
      "loss": 2.1808,
      "step": 67216
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.161628007888794,
      "learning_rate": 7.685285816935106e-07,
      "loss": 2.2823,
      "step": 67217
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.06967294216156,
      "learning_rate": 7.683702986319053e-07,
      "loss": 2.2757,
      "step": 67218
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1357488632202148,
      "learning_rate": 7.682120312204222e-07,
      "loss": 2.4987,
      "step": 67219
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.2769160270690918,
      "learning_rate": 7.680537794593257e-07,
      "loss": 2.4889,
      "step": 67220
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.219395399093628,
      "learning_rate": 7.678955433488866e-07,
      "loss": 2.3368,
      "step": 67221
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.165130615234375,
      "learning_rate": 7.677373228893703e-07,
      "loss": 2.1661,
      "step": 67222
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0771201848983765,
      "learning_rate": 7.675791180810476e-07,
      "loss": 2.4385,
      "step": 67223
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.2763967514038086,
      "learning_rate": 7.674209289241852e-07,
      "loss": 2.3741,
      "step": 67224
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1720926761627197,
      "learning_rate": 7.672627554190525e-07,
      "loss": 2.3161,
      "step": 67225
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1191058158874512,
      "learning_rate": 7.671045975659186e-07,
      "loss": 2.2397,
      "step": 67226
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.003359079360962,
      "learning_rate": 7.669464553650485e-07,
      "loss": 2.2198,
      "step": 67227
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0453698635101318,
      "learning_rate": 7.667883288167111e-07,
      "loss": 2.3443,
      "step": 67228
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1263545751571655,
      "learning_rate": 7.66630217921176e-07,
      "loss": 2.3401,
      "step": 67229
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.208435297012329,
      "learning_rate": 7.664721226787097e-07,
      "loss": 2.3233,
      "step": 67230
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.131454586982727,
      "learning_rate": 7.663140430895821e-07,
      "loss": 2.2493,
      "step": 67231
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0504778623580933,
      "learning_rate": 7.661559791540596e-07,
      "loss": 2.4597,
      "step": 67232
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1581403017044067,
      "learning_rate": 7.659979308724097e-07,
      "loss": 2.1689,
      "step": 67233
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1510671377182007,
      "learning_rate": 7.658398982449022e-07,
      "loss": 2.2273,
      "step": 67234
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.2757189273834229,
      "learning_rate": 7.656818812718026e-07,
      "loss": 2.4461,
      "step": 67235
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0538252592086792,
      "learning_rate": 7.655238799533826e-07,
      "loss": 2.1226,
      "step": 67236
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1337430477142334,
      "learning_rate": 7.653658942899056e-07,
      "loss": 2.508,
      "step": 67237
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.9716553688049316,
      "learning_rate": 7.652079242816424e-07,
      "loss": 2.2942,
      "step": 67238
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.035091757774353,
      "learning_rate": 7.650499699288605e-07,
      "loss": 2.5485,
      "step": 67239
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.194905161857605,
      "learning_rate": 7.648920312318275e-07,
      "loss": 2.2592,
      "step": 67240
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0711290836334229,
      "learning_rate": 7.647341081908088e-07,
      "loss": 2.2847,
      "step": 67241
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1937493085861206,
      "learning_rate": 7.645762008060753e-07,
      "loss": 2.5423,
      "step": 67242
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.2095160484313965,
      "learning_rate": 7.644183090778923e-07,
      "loss": 2.2326,
      "step": 67243
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0433754920959473,
      "learning_rate": 7.642604330065295e-07,
      "loss": 2.2946,
      "step": 67244
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.136982798576355,
      "learning_rate": 7.641025725922535e-07,
      "loss": 2.1918,
      "step": 67245
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.9940871000289917,
      "learning_rate": 7.639447278353329e-07,
      "loss": 2.1059,
      "step": 67246
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1077520847320557,
      "learning_rate": 7.637868987360319e-07,
      "loss": 2.3218,
      "step": 67247
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0280760526657104,
      "learning_rate": 7.636290852946237e-07,
      "loss": 2.3025,
      "step": 67248
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0744832754135132,
      "learning_rate": 7.634712875113703e-07,
      "loss": 2.2295,
      "step": 67249
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1749387979507446,
      "learning_rate": 7.633135053865437e-07,
      "loss": 2.3346,
      "step": 67250
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.09319007396698,
      "learning_rate": 7.63155738920407e-07,
      "loss": 2.0973,
      "step": 67251
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1651523113250732,
      "learning_rate": 7.629979881132344e-07,
      "loss": 2.2729,
      "step": 67252
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1491669416427612,
      "learning_rate": 7.628402529652845e-07,
      "loss": 2.3925,
      "step": 67253
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.2104058265686035,
      "learning_rate": 7.626825334768318e-07,
      "loss": 2.4558,
      "step": 67254
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1370537281036377,
      "learning_rate": 7.625248296481391e-07,
      "loss": 2.1201,
      "step": 67255
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.2085717916488647,
      "learning_rate": 7.623671414794765e-07,
      "loss": 2.3985,
      "step": 67256
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.117569088935852,
      "learning_rate": 7.622094689711102e-07,
      "loss": 2.1757,
      "step": 67257
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.9573763608932495,
      "learning_rate": 7.620518121233089e-07,
      "loss": 2.2463,
      "step": 67258
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0996595621109009,
      "learning_rate": 7.61894170936337e-07,
      "loss": 2.4444,
      "step": 67259
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1637043952941895,
      "learning_rate": 7.617365454104652e-07,
      "loss": 2.2489,
      "step": 67260
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.2147616147994995,
      "learning_rate": 7.615789355459591e-07,
      "loss": 2.2714,
      "step": 67261
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.9909996390342712,
      "learning_rate": 7.614213413430838e-07,
      "loss": 2.2401,
      "step": 67262
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0253061056137085,
      "learning_rate": 7.612637628021102e-07,
      "loss": 2.1478,
      "step": 67263
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.997287929058075,
      "learning_rate": 7.611061999233027e-07,
      "loss": 2.1243,
      "step": 67264
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.189009666442871,
      "learning_rate": 7.609486527069299e-07,
      "loss": 2.2988,
      "step": 67265
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1777976751327515,
      "learning_rate": 7.607911211532592e-07,
      "loss": 2.3906,
      "step": 67266
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0653659105300903,
      "learning_rate": 7.606336052625573e-07,
      "loss": 2.2042,
      "step": 67267
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.2972127199172974,
      "learning_rate": 7.604761050350884e-07,
      "loss": 2.2082,
      "step": 67268
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1504812240600586,
      "learning_rate": 7.603186204711243e-07,
      "loss": 2.5277,
      "step": 67269
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.9803856611251831,
      "learning_rate": 7.601611515709273e-07,
      "loss": 2.4743,
      "step": 67270
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0682703256607056,
      "learning_rate": 7.600036983347691e-07,
      "loss": 2.3662,
      "step": 67271
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1655523777008057,
      "learning_rate": 7.59846260762912e-07,
      "loss": 2.338,
      "step": 67272
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.266505241394043,
      "learning_rate": 7.596888388556267e-07,
      "loss": 2.3857,
      "step": 67273
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1293894052505493,
      "learning_rate": 7.595314326131764e-07,
      "loss": 2.4495,
      "step": 67274
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1013778448104858,
      "learning_rate": 7.59374042035833e-07,
      "loss": 2.4509,
      "step": 67275
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.091959834098816,
      "learning_rate": 7.592166671238577e-07,
      "loss": 2.2176,
      "step": 67276
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.111673355102539,
      "learning_rate": 7.59059307877521e-07,
      "loss": 2.1843,
      "step": 67277
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.151802659034729,
      "learning_rate": 7.589019642970874e-07,
      "loss": 2.3485,
      "step": 67278
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0338428020477295,
      "learning_rate": 7.587446363828288e-07,
      "loss": 2.2497,
      "step": 67279
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.9878702759742737,
      "learning_rate": 7.585873241350039e-07,
      "loss": 2.1943,
      "step": 67280
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.178897738456726,
      "learning_rate": 7.584300275538858e-07,
      "loss": 2.4115,
      "step": 67281
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1033684015274048,
      "learning_rate": 7.582727466397366e-07,
      "loss": 2.4379,
      "step": 67282
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.9663085341453552,
      "learning_rate": 7.58115481392826e-07,
      "loss": 2.1267,
      "step": 67283
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.053558588027954,
      "learning_rate": 7.579582318134193e-07,
      "loss": 2.2591,
      "step": 67284
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.9967130422592163,
      "learning_rate": 7.578009979017852e-07,
      "loss": 2.2441,
      "step": 67285
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.192198395729065,
      "learning_rate": 7.576437796581859e-07,
      "loss": 2.4162,
      "step": 67286
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0228444337844849,
      "learning_rate": 7.574865770828931e-07,
      "loss": 2.0133,
      "step": 67287
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1163372993469238,
      "learning_rate": 7.573293901761703e-07,
      "loss": 2.529,
      "step": 67288
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0280755758285522,
      "learning_rate": 7.571722189382824e-07,
      "loss": 2.2267,
      "step": 67289
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.2977062463760376,
      "learning_rate": 7.570150633695006e-07,
      "loss": 2.2279,
      "step": 67290
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1425544023513794,
      "learning_rate": 7.568579234700868e-07,
      "loss": 2.2596,
      "step": 67291
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.02804696559906,
      "learning_rate": 7.567007992403097e-07,
      "loss": 2.1985,
      "step": 67292
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.081517219543457,
      "learning_rate": 7.565436906804369e-07,
      "loss": 2.28,
      "step": 67293
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.9902140498161316,
      "learning_rate": 7.563865977907315e-07,
      "loss": 2.1534,
      "step": 67294
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1238834857940674,
      "learning_rate": 7.562295205714609e-07,
      "loss": 2.1022,
      "step": 67295
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0737361907958984,
      "learning_rate": 7.56072459022893e-07,
      "loss": 2.5926,
      "step": 67296
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1922438144683838,
      "learning_rate": 7.559154131452905e-07,
      "loss": 2.283,
      "step": 67297
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0228999853134155,
      "learning_rate": 7.557583829389247e-07,
      "loss": 2.4998,
      "step": 67298
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0454226732254028,
      "learning_rate": 7.556013684040575e-07,
      "loss": 2.1883,
      "step": 67299
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1068683862686157,
      "learning_rate": 7.554443695409575e-07,
      "loss": 2.3724,
      "step": 67300
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.082957148551941,
      "learning_rate": 7.552873863498889e-07,
      "loss": 2.1646,
      "step": 67301
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0707368850708008,
      "learning_rate": 7.551304188311215e-07,
      "loss": 2.4802,
      "step": 67302
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1971994638442993,
      "learning_rate": 7.549734669849162e-07,
      "loss": 2.4251,
      "step": 67303
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0325119495391846,
      "learning_rate": 7.548165308115441e-07,
      "loss": 2.4626,
      "step": 67304
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.148002028465271,
      "learning_rate": 7.54659610311268e-07,
      "loss": 2.3601,
      "step": 67305
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0902372598648071,
      "learning_rate": 7.545027054843557e-07,
      "loss": 2.3549,
      "step": 67306
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0542048215866089,
      "learning_rate": 7.543458163310702e-07,
      "loss": 2.4832,
      "step": 67307
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0112448930740356,
      "learning_rate": 7.541889428516813e-07,
      "loss": 2.4017,
      "step": 67308
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0384869575500488,
      "learning_rate": 7.540320850464522e-07,
      "loss": 2.1565,
      "step": 67309
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1441676616668701,
      "learning_rate": 7.538752429156515e-07,
      "loss": 2.2123,
      "step": 67310
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0904889106750488,
      "learning_rate": 7.537184164595413e-07,
      "loss": 2.4809,
      "step": 67311
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0435847043991089,
      "learning_rate": 7.535616056783912e-07,
      "loss": 2.2702,
      "step": 67312
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.354093074798584,
      "learning_rate": 7.534048105724645e-07,
      "loss": 2.3508,
      "step": 67313
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.072435975074768,
      "learning_rate": 7.532480311420299e-07,
      "loss": 2.3213,
      "step": 67314
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0374088287353516,
      "learning_rate": 7.530912673873503e-07,
      "loss": 2.4507,
      "step": 67315
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1023755073547363,
      "learning_rate": 7.529345193086912e-07,
      "loss": 2.3687,
      "step": 67316
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0568718910217285,
      "learning_rate": 7.527777869063213e-07,
      "loss": 2.4078,
      "step": 67317
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1070317029953003,
      "learning_rate": 7.526210701805037e-07,
      "loss": 2.0828,
      "step": 67318
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0514198541641235,
      "learning_rate": 7.524643691315037e-07,
      "loss": 2.4525,
      "step": 67319
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1194183826446533,
      "learning_rate": 7.523076837595911e-07,
      "loss": 2.1957,
      "step": 67320
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1276679039001465,
      "learning_rate": 7.521510140650257e-07,
      "loss": 2.3677,
      "step": 67321
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.2069306373596191,
      "learning_rate": 7.519943600480772e-07,
      "loss": 2.1453,
      "step": 67322
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.098122239112854,
      "learning_rate": 7.518377217090111e-07,
      "loss": 2.2537,
      "step": 67323
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1501269340515137,
      "learning_rate": 7.516810990480905e-07,
      "loss": 2.2362,
      "step": 67324
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.9976455569267273,
      "learning_rate": 7.515244920655828e-07,
      "loss": 2.6542,
      "step": 67325
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0322976112365723,
      "learning_rate": 7.513679007617514e-07,
      "loss": 2.2232,
      "step": 67326
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.038219690322876,
      "learning_rate": 7.512113251368658e-07,
      "loss": 2.3625,
      "step": 67327
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1479358673095703,
      "learning_rate": 7.510547651911859e-07,
      "loss": 2.4343,
      "step": 67328
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0953381061553955,
      "learning_rate": 7.508982209249826e-07,
      "loss": 2.1195,
      "step": 67329
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.9973611235618591,
      "learning_rate": 7.50741692338518e-07,
      "loss": 2.323,
      "step": 67330
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0174793004989624,
      "learning_rate": 7.505851794320607e-07,
      "loss": 2.1348,
      "step": 67331
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1495628356933594,
      "learning_rate": 7.504286822058704e-07,
      "loss": 2.4147,
      "step": 67332
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0998108386993408,
      "learning_rate": 7.50272200660217e-07,
      "loss": 2.104,
      "step": 67333
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.079229474067688,
      "learning_rate": 7.501157347953635e-07,
      "loss": 2.2187,
      "step": 67334
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.2163394689559937,
      "learning_rate": 7.499592846115777e-07,
      "loss": 2.191,
      "step": 67335
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.9463942646980286,
      "learning_rate": 7.498028501091204e-07,
      "loss": 2.2589,
      "step": 67336
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0996840000152588,
      "learning_rate": 7.496464312882623e-07,
      "loss": 2.1754,
      "step": 67337
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1518582105636597,
      "learning_rate": 7.494900281492634e-07,
      "loss": 2.1909,
      "step": 67338
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.133651852607727,
      "learning_rate": 7.493336406923934e-07,
      "loss": 2.2347,
      "step": 67339
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.047510027885437,
      "learning_rate": 7.491772689179144e-07,
      "loss": 2.1542,
      "step": 67340
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0289405584335327,
      "learning_rate": 7.490209128260928e-07,
      "loss": 2.3111,
      "step": 67341
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1975929737091064,
      "learning_rate": 7.488645724171928e-07,
      "loss": 2.3899,
      "step": 67342
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0829379558563232,
      "learning_rate": 7.487082476914808e-07,
      "loss": 2.3053,
      "step": 67343
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.044461727142334,
      "learning_rate": 7.485519386492213e-07,
      "loss": 2.2705,
      "step": 67344
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.018908143043518,
      "learning_rate": 7.483956452906793e-07,
      "loss": 2.3007,
      "step": 67345
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.2416929006576538,
      "learning_rate": 7.48239367616117e-07,
      "loss": 2.1776,
      "step": 67346
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0975435972213745,
      "learning_rate": 7.480831056258031e-07,
      "loss": 2.3052,
      "step": 67347
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1863374710083008,
      "learning_rate": 7.479268593200007e-07,
      "loss": 2.4193,
      "step": 67348
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0356947183609009,
      "learning_rate": 7.47770628698975e-07,
      "loss": 2.0716,
      "step": 67349
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1334753036499023,
      "learning_rate": 7.476144137629926e-07,
      "loss": 2.2648,
      "step": 67350
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.2581863403320312,
      "learning_rate": 7.474582145123144e-07,
      "loss": 2.305,
      "step": 67351
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.046940565109253,
      "learning_rate": 7.47302030947209e-07,
      "loss": 2.2716,
      "step": 67352
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1522064208984375,
      "learning_rate": 7.471458630679384e-07,
      "loss": 2.3466,
      "step": 67353
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.058200478553772,
      "learning_rate": 7.469897108747692e-07,
      "loss": 2.1923,
      "step": 67354
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.9926415085792542,
      "learning_rate": 7.468335743679655e-07,
      "loss": 2.2382,
      "step": 67355
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0828784704208374,
      "learning_rate": 7.466774535477928e-07,
      "loss": 2.34,
      "step": 67356
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.11806058883667,
      "learning_rate": 7.465213484145128e-07,
      "loss": 2.2383,
      "step": 67357
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.027182936668396,
      "learning_rate": 7.463652589683967e-07,
      "loss": 2.3217,
      "step": 67358
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.9531275033950806,
      "learning_rate": 7.462091852097009e-07,
      "loss": 2.2843,
      "step": 67359
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0087742805480957,
      "learning_rate": 7.46053127138695e-07,
      "loss": 2.24,
      "step": 67360
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1245907545089722,
      "learning_rate": 7.458970847556413e-07,
      "loss": 2.302,
      "step": 67361
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1083533763885498,
      "learning_rate": 7.457410580608071e-07,
      "loss": 2.2735,
      "step": 67362
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.227950096130371,
      "learning_rate": 7.455850470544535e-07,
      "loss": 2.373,
      "step": 67363
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0073422193527222,
      "learning_rate": 7.454290517368479e-07,
      "loss": 2.4424,
      "step": 67364
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.098276138305664,
      "learning_rate": 7.452730721082524e-07,
      "loss": 2.2098,
      "step": 67365
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0034761428833008,
      "learning_rate": 7.451171081689346e-07,
      "loss": 2.4024,
      "step": 67366
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0790956020355225,
      "learning_rate": 7.449611599191553e-07,
      "loss": 2.3777,
      "step": 67367
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0668374300003052,
      "learning_rate": 7.44805227359181e-07,
      "loss": 2.5089,
      "step": 67368
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.117638111114502,
      "learning_rate": 7.446493104892749e-07,
      "loss": 2.2798,
      "step": 67369
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0964117050170898,
      "learning_rate": 7.444934093097034e-07,
      "loss": 2.3963,
      "step": 67370
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1024796962738037,
      "learning_rate": 7.443375238207284e-07,
      "loss": 2.029,
      "step": 67371
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.9907044768333435,
      "learning_rate": 7.441816540226166e-07,
      "loss": 2.2375,
      "step": 67372
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.9488147497177124,
      "learning_rate": 7.440257999156276e-07,
      "loss": 2.5353,
      "step": 67373
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1553524732589722,
      "learning_rate": 7.438699615000312e-07,
      "loss": 2.2917,
      "step": 67374
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1433513164520264,
      "learning_rate": 7.437141387760882e-07,
      "loss": 2.4039,
      "step": 67375
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1033456325531006,
      "learning_rate": 7.435583317440642e-07,
      "loss": 2.3405,
      "step": 67376
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0113431215286255,
      "learning_rate": 7.434025404042211e-07,
      "loss": 2.39,
      "step": 67377
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.337268590927124,
      "learning_rate": 7.432467647568276e-07,
      "loss": 2.4737,
      "step": 67378
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.07218599319458,
      "learning_rate": 7.430910048021444e-07,
      "loss": 2.2764,
      "step": 67379
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0599225759506226,
      "learning_rate": 7.429352605404338e-07,
      "loss": 2.4565,
      "step": 67380
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0578869581222534,
      "learning_rate": 7.427795319719644e-07,
      "loss": 2.3065,
      "step": 67381
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.9696586728096008,
      "learning_rate": 7.426238190969959e-07,
      "loss": 2.1641,
      "step": 67382
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0822423696517944,
      "learning_rate": 7.424681219157959e-07,
      "loss": 2.243,
      "step": 67383
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1203943490982056,
      "learning_rate": 7.423124404286275e-07,
      "loss": 2.2567,
      "step": 67384
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.27556574344635,
      "learning_rate": 7.421567746357539e-07,
      "loss": 2.4363,
      "step": 67385
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0600889921188354,
      "learning_rate": 7.420011245374358e-07,
      "loss": 2.3039,
      "step": 67386
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.150438904762268,
      "learning_rate": 7.418454901339433e-07,
      "loss": 2.2862,
      "step": 67387
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.21750009059906,
      "learning_rate": 7.41689871425535e-07,
      "loss": 2.4059,
      "step": 67388
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.070273995399475,
      "learning_rate": 7.415342684124782e-07,
      "loss": 2.2713,
      "step": 67389
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.9976057410240173,
      "learning_rate": 7.413786810950341e-07,
      "loss": 2.2955,
      "step": 67390
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0026047229766846,
      "learning_rate": 7.412231094734701e-07,
      "loss": 2.2188,
      "step": 67391
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0614452362060547,
      "learning_rate": 7.41067553548045e-07,
      "loss": 2.3879,
      "step": 67392
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.9966597557067871,
      "learning_rate": 7.409120133190273e-07,
      "loss": 2.2093,
      "step": 67393
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.057761311531067,
      "learning_rate": 7.40756488786677e-07,
      "loss": 2.2518,
      "step": 67394
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.132805585861206,
      "learning_rate": 7.406009799512604e-07,
      "loss": 2.2529,
      "step": 67395
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0190823078155518,
      "learning_rate": 7.404454868130373e-07,
      "loss": 2.3483,
      "step": 67396
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1337004899978638,
      "learning_rate": 7.402900093722787e-07,
      "loss": 2.4217,
      "step": 67397
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1152578592300415,
      "learning_rate": 7.401345476292399e-07,
      "loss": 2.4345,
      "step": 67398
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.130481243133545,
      "learning_rate": 7.399791015841895e-07,
      "loss": 2.3418,
      "step": 67399
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.045578956604004,
      "learning_rate": 7.398236712373874e-07,
      "loss": 2.2356,
      "step": 67400
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0656713247299194,
      "learning_rate": 7.396682565891012e-07,
      "loss": 2.1647,
      "step": 67401
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.77443528175354,
      "learning_rate": 7.395128576395915e-07,
      "loss": 2.276,
      "step": 67402
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.123729944229126,
      "learning_rate": 7.39357474389123e-07,
      "loss": 2.2537,
      "step": 67403
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0112942457199097,
      "learning_rate": 7.392021068379584e-07,
      "loss": 2.3026,
      "step": 67404
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1694598197937012,
      "learning_rate": 7.390467549863623e-07,
      "loss": 2.4783,
      "step": 67405
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.2255014181137085,
      "learning_rate": 7.388914188345964e-07,
      "loss": 2.3008,
      "step": 67406
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.140417456626892,
      "learning_rate": 7.387360983829239e-07,
      "loss": 2.2116,
      "step": 67407
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0998401641845703,
      "learning_rate": 7.385807936316114e-07,
      "loss": 2.395,
      "step": 67408
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1571515798568726,
      "learning_rate": 7.384255045809174e-07,
      "loss": 2.2737,
      "step": 67409
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.9828056693077087,
      "learning_rate": 7.382702312311097e-07,
      "loss": 2.3427,
      "step": 67410
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.2158421277999878,
      "learning_rate": 7.381149735824488e-07,
      "loss": 2.7258,
      "step": 67411
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0643048286437988,
      "learning_rate": 7.379597316351994e-07,
      "loss": 2.385,
      "step": 67412
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0168380737304688,
      "learning_rate": 7.37804505389621e-07,
      "loss": 2.2741,
      "step": 67413
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0460222959518433,
      "learning_rate": 7.376492948459823e-07,
      "loss": 2.3442,
      "step": 67414
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0794236660003662,
      "learning_rate": 7.374941000045411e-07,
      "loss": 2.1982,
      "step": 67415
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0791873931884766,
      "learning_rate": 7.373389208655646e-07,
      "loss": 2.3641,
      "step": 67416
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0320042371749878,
      "learning_rate": 7.37183757429314e-07,
      "loss": 2.3388,
      "step": 67417
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0367276668548584,
      "learning_rate": 7.370286096960533e-07,
      "loss": 2.2789,
      "step": 67418
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.109131097793579,
      "learning_rate": 7.368734776660435e-07,
      "loss": 2.354,
      "step": 67419
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1131635904312134,
      "learning_rate": 7.367183613395501e-07,
      "loss": 2.383,
      "step": 67420
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1149873733520508,
      "learning_rate": 7.365632607168338e-07,
      "loss": 2.4862,
      "step": 67421
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.2602392435073853,
      "learning_rate": 7.364081757981601e-07,
      "loss": 2.1729,
      "step": 67422
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.9773495197296143,
      "learning_rate": 7.362531065837897e-07,
      "loss": 2.3582,
      "step": 67423
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1155513525009155,
      "learning_rate": 7.360980530739891e-07,
      "loss": 2.269,
      "step": 67424
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1264846324920654,
      "learning_rate": 7.35943015269015e-07,
      "loss": 2.3989,
      "step": 67425
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1620184183120728,
      "learning_rate": 7.357879931691348e-07,
      "loss": 2.2007,
      "step": 67426
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0780634880065918,
      "learning_rate": 7.356329867746081e-07,
      "loss": 2.3264,
      "step": 67427
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0115591287612915,
      "learning_rate": 7.354779960857028e-07,
      "loss": 2.4023,
      "step": 67428
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0599490404129028,
      "learning_rate": 7.353230211026752e-07,
      "loss": 2.4282,
      "step": 67429
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.094803810119629,
      "learning_rate": 7.35168061825794e-07,
      "loss": 2.2816,
      "step": 67430
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.2371771335601807,
      "learning_rate": 7.350131182553177e-07,
      "loss": 2.285,
      "step": 67431
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1930878162384033,
      "learning_rate": 7.34858190391512e-07,
      "loss": 2.0442,
      "step": 67432
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1572312116622925,
      "learning_rate": 7.347032782346375e-07,
      "loss": 2.3006,
      "step": 67433
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.062371850013733,
      "learning_rate": 7.345483817849563e-07,
      "loss": 2.3614,
      "step": 67434
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0044691562652588,
      "learning_rate": 7.343935010427339e-07,
      "loss": 2.283,
      "step": 67435
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.9813222885131836,
      "learning_rate": 7.342386360082288e-07,
      "loss": 2.2225,
      "step": 67436
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0399774312973022,
      "learning_rate": 7.340837866817074e-07,
      "loss": 2.3825,
      "step": 67437
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1451808214187622,
      "learning_rate": 7.339289530634308e-07,
      "loss": 2.2343,
      "step": 67438
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0328444242477417,
      "learning_rate": 7.337741351536598e-07,
      "loss": 2.2671,
      "step": 67439
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0616651773452759,
      "learning_rate": 7.336193329526597e-07,
      "loss": 2.2927,
      "step": 67440
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0308799743652344,
      "learning_rate": 7.334645464606916e-07,
      "loss": 2.2794,
      "step": 67441
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.008131980895996,
      "learning_rate": 7.333097756780161e-07,
      "loss": 2.2428,
      "step": 67442
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1115227937698364,
      "learning_rate": 7.331550206048988e-07,
      "loss": 2.3555,
      "step": 67443
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0258703231811523,
      "learning_rate": 7.330002812415993e-07,
      "loss": 2.4332,
      "step": 67444
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.038209319114685,
      "learning_rate": 7.32845557588382e-07,
      "loss": 2.211,
      "step": 67445
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0613962411880493,
      "learning_rate": 7.326908496455077e-07,
      "loss": 2.1127,
      "step": 67446
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.2289226055145264,
      "learning_rate": 7.325361574132405e-07,
      "loss": 2.4894,
      "step": 67447
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0525091886520386,
      "learning_rate": 7.323814808918406e-07,
      "loss": 2.284,
      "step": 67448
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.045776128768921,
      "learning_rate": 7.322268200815719e-07,
      "loss": 2.2423,
      "step": 67449
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1820056438446045,
      "learning_rate": 7.320721749826964e-07,
      "loss": 2.2079,
      "step": 67450
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.191995620727539,
      "learning_rate": 7.319175455954764e-07,
      "loss": 2.4467,
      "step": 67451
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0367399454116821,
      "learning_rate": 7.317629319201702e-07,
      "loss": 2.3534,
      "step": 67452
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1210132837295532,
      "learning_rate": 7.316083339570457e-07,
      "loss": 2.3086,
      "step": 67453
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.9947081208229065,
      "learning_rate": 7.314537517063602e-07,
      "loss": 2.2754,
      "step": 67454
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1924114227294922,
      "learning_rate": 7.312991851683792e-07,
      "loss": 2.2583,
      "step": 67455
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0014326572418213,
      "learning_rate": 7.311446343433626e-07,
      "loss": 2.1466,
      "step": 67456
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1142770051956177,
      "learning_rate": 7.309900992315744e-07,
      "loss": 2.2132,
      "step": 67457
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0065077543258667,
      "learning_rate": 7.308355798332744e-07,
      "loss": 2.2001,
      "step": 67458
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.2165367603302002,
      "learning_rate": 7.30681076148727e-07,
      "loss": 2.3547,
      "step": 67459
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.2098902463912964,
      "learning_rate": 7.305265881781909e-07,
      "loss": 2.3933,
      "step": 67460
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0629957914352417,
      "learning_rate": 7.303721159219313e-07,
      "loss": 2.2867,
      "step": 67461
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.2701340913772583,
      "learning_rate": 7.302176593802079e-07,
      "loss": 2.3138,
      "step": 67462
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.142691731452942,
      "learning_rate": 7.300632185532841e-07,
      "loss": 2.3658,
      "step": 67463
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.087603211402893,
      "learning_rate": 7.299087934414184e-07,
      "loss": 2.2303,
      "step": 67464
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.11044442653656,
      "learning_rate": 7.297543840448773e-07,
      "loss": 2.4873,
      "step": 67465
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0476243495941162,
      "learning_rate": 7.295999903639195e-07,
      "loss": 2.4466,
      "step": 67466
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.9840515851974487,
      "learning_rate": 7.29445612398808e-07,
      "loss": 2.3985,
      "step": 67467
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.2898441553115845,
      "learning_rate": 7.29291250149804e-07,
      "loss": 2.5206,
      "step": 67468
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1689883470535278,
      "learning_rate": 7.291369036171681e-07,
      "loss": 2.4469,
      "step": 67469
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.17184317111969,
      "learning_rate": 7.289825728011645e-07,
      "loss": 2.4893,
      "step": 67470
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.101945161819458,
      "learning_rate": 7.288282577020511e-07,
      "loss": 2.2259,
      "step": 67471
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1760878562927246,
      "learning_rate": 7.286739583200942e-07,
      "loss": 2.2292,
      "step": 67472
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0682613849639893,
      "learning_rate": 7.285196746555512e-07,
      "loss": 2.0972,
      "step": 67473
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1394166946411133,
      "learning_rate": 7.283654067086877e-07,
      "loss": 2.3543,
      "step": 67474
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0794845819473267,
      "learning_rate": 7.28211154479761e-07,
      "loss": 2.1883,
      "step": 67475
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0571422576904297,
      "learning_rate": 7.280569179690367e-07,
      "loss": 2.5189,
      "step": 67476
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1767196655273438,
      "learning_rate": 7.279026971767722e-07,
      "loss": 2.6538,
      "step": 67477
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.09603750705719,
      "learning_rate": 7.277484921032318e-07,
      "loss": 2.5012,
      "step": 67478
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.071099877357483,
      "learning_rate": 7.275943027486754e-07,
      "loss": 2.3057,
      "step": 67479
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1366947889328003,
      "learning_rate": 7.274401291133659e-07,
      "loss": 2.591,
      "step": 67480
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.3154723644256592,
      "learning_rate": 7.272859711975621e-07,
      "loss": 2.359,
      "step": 67481
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0283573865890503,
      "learning_rate": 7.271318290015283e-07,
      "loss": 2.3255,
      "step": 67482
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.2845438718795776,
      "learning_rate": 7.269777025255242e-07,
      "loss": 2.4886,
      "step": 67483
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1696466207504272,
      "learning_rate": 7.268235917698119e-07,
      "loss": 2.0115,
      "step": 67484
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1883352994918823,
      "learning_rate": 7.26669496734651e-07,
      "loss": 2.3579,
      "step": 67485
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0820143222808838,
      "learning_rate": 7.265154174203048e-07,
      "loss": 2.125,
      "step": 67486
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.9423387050628662,
      "learning_rate": 7.263613538270331e-07,
      "loss": 2.1935,
      "step": 67487
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.044133186340332,
      "learning_rate": 7.26207305955099e-07,
      "loss": 2.3118,
      "step": 67488
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.113274335861206,
      "learning_rate": 7.26053273804761e-07,
      "loss": 2.5074,
      "step": 67489
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0689648389816284,
      "learning_rate": 7.258992573762824e-07,
      "loss": 2.5245,
      "step": 67490
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.062994122505188,
      "learning_rate": 7.257452566699219e-07,
      "loss": 2.2712,
      "step": 67491
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.9641607999801636,
      "learning_rate": 7.255912716859437e-07,
      "loss": 2.4033,
      "step": 67492
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0076581239700317,
      "learning_rate": 7.254373024246053e-07,
      "loss": 2.0439,
      "step": 67493
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.417792558670044,
      "learning_rate": 7.252833488861711e-07,
      "loss": 2.4527,
      "step": 67494
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0520613193511963,
      "learning_rate": 7.251294110708995e-07,
      "loss": 2.4206,
      "step": 67495
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.164883017539978,
      "learning_rate": 7.249754889790539e-07,
      "loss": 2.3958,
      "step": 67496
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.01701819896698,
      "learning_rate": 7.24821582610894e-07,
      "loss": 2.1008,
      "step": 67497
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0535074472427368,
      "learning_rate": 7.246676919666784e-07,
      "loss": 2.1637,
      "step": 67498
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1039535999298096,
      "learning_rate": 7.245138170466725e-07,
      "loss": 2.3062,
      "step": 67499
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.3953447341918945,
      "learning_rate": 7.243599578511329e-07,
      "loss": 2.4134,
      "step": 67500
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0577783584594727,
      "learning_rate": 7.242061143803247e-07,
      "loss": 2.4141,
      "step": 67501
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0126301050186157,
      "learning_rate": 7.240522866345034e-07,
      "loss": 2.2645,
      "step": 67502
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1508749723434448,
      "learning_rate": 7.238984746139377e-07,
      "loss": 2.2136,
      "step": 67503
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1022392511367798,
      "learning_rate": 7.237446783188796e-07,
      "loss": 2.2053,
      "step": 67504
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0590792894363403,
      "learning_rate": 7.235908977495943e-07,
      "loss": 2.3919,
      "step": 67505
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1240134239196777,
      "learning_rate": 7.234371329063416e-07,
      "loss": 2.1392,
      "step": 67506
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0463249683380127,
      "learning_rate": 7.232833837893827e-07,
      "loss": 2.1745,
      "step": 67507
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1223405599594116,
      "learning_rate": 7.231296503989771e-07,
      "loss": 2.2482,
      "step": 67508
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1346312761306763,
      "learning_rate": 7.22975932735388e-07,
      "loss": 2.0915,
      "step": 67509
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.183293104171753,
      "learning_rate": 7.22822230798873e-07,
      "loss": 2.2893,
      "step": 67510
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1856274604797363,
      "learning_rate": 7.226685445896952e-07,
      "loss": 2.165,
      "step": 67511
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1381431818008423,
      "learning_rate": 7.225148741081122e-07,
      "loss": 2.4795,
      "step": 67512
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0833317041397095,
      "learning_rate": 7.223612193543883e-07,
      "loss": 2.1058,
      "step": 67513
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0439976453781128,
      "learning_rate": 7.222075803287798e-07,
      "loss": 2.2863,
      "step": 67514
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0544161796569824,
      "learning_rate": 7.22053957031551e-07,
      "loss": 2.4833,
      "step": 67515
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.019449234008789,
      "learning_rate": 7.219003494629606e-07,
      "loss": 2.242,
      "step": 67516
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0629841089248657,
      "learning_rate": 7.217467576232695e-07,
      "loss": 2.3485,
      "step": 67517
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0995439291000366,
      "learning_rate": 7.215931815127352e-07,
      "loss": 2.417,
      "step": 67518
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1046624183654785,
      "learning_rate": 7.214396211316222e-07,
      "loss": 2.1977,
      "step": 67519
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1529816389083862,
      "learning_rate": 7.212860764801877e-07,
      "loss": 2.4482,
      "step": 67520
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0974130630493164,
      "learning_rate": 7.21132547558695e-07,
      "loss": 2.3293,
      "step": 67521
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.2413564920425415,
      "learning_rate": 7.209790343674006e-07,
      "loss": 2.4442,
      "step": 67522
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1669907569885254,
      "learning_rate": 7.208255369065698e-07,
      "loss": 2.1079,
      "step": 67523
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1275101900100708,
      "learning_rate": 7.20672055176459e-07,
      "loss": 2.499,
      "step": 67524
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0578246116638184,
      "learning_rate": 7.20518589177327e-07,
      "loss": 2.3022,
      "step": 67525
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.4318091869354248,
      "learning_rate": 7.20365138909439e-07,
      "loss": 2.2585,
      "step": 67526
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1276829242706299,
      "learning_rate": 7.202117043730506e-07,
      "loss": 2.3728,
      "step": 67527
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0653808116912842,
      "learning_rate": 7.200582855684246e-07,
      "loss": 2.1147,
      "step": 67528
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.187939167022705,
      "learning_rate": 7.19904882495821e-07,
      "loss": 2.2155,
      "step": 67529
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.9462705254554749,
      "learning_rate": 7.197514951554985e-07,
      "loss": 2.2367,
      "step": 67530
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0171247720718384,
      "learning_rate": 7.195981235477167e-07,
      "loss": 2.3925,
      "step": 67531
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1654841899871826,
      "learning_rate": 7.194447676727378e-07,
      "loss": 2.1995,
      "step": 67532
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0858197212219238,
      "learning_rate": 7.192914275308194e-07,
      "loss": 2.4437,
      "step": 67533
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1844468116760254,
      "learning_rate": 7.191381031222233e-07,
      "loss": 2.2983,
      "step": 67534
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0273016691207886,
      "learning_rate": 7.189847944472083e-07,
      "loss": 2.2276,
      "step": 67535
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.2723177671432495,
      "learning_rate": 7.188315015060354e-07,
      "loss": 1.9814,
      "step": 67536
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0825330018997192,
      "learning_rate": 7.186782242989631e-07,
      "loss": 2.2738,
      "step": 67537
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6271178722381592,
      "learning_rate": 7.185249628262525e-07,
      "loss": 2.1904,
      "step": 67538
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.13887357711792,
      "learning_rate": 7.183717170881621e-07,
      "loss": 2.3853,
      "step": 67539
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.25663423538208,
      "learning_rate": 7.18218487084954e-07,
      "loss": 2.3778,
      "step": 67540
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1851037740707397,
      "learning_rate": 7.180652728168847e-07,
      "loss": 2.2541,
      "step": 67541
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1851400136947632,
      "learning_rate": 7.179120742842194e-07,
      "loss": 2.5476,
      "step": 67542
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0458472967147827,
      "learning_rate": 7.177588914872103e-07,
      "loss": 2.1637,
      "step": 67543
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.147451400756836,
      "learning_rate": 7.176057244261225e-07,
      "loss": 2.3112,
      "step": 67544
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0982580184936523,
      "learning_rate": 7.174525731012127e-07,
      "loss": 2.3674,
      "step": 67545
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.063318133354187,
      "learning_rate": 7.17299437512744e-07,
      "loss": 2.4405,
      "step": 67546
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.990616500377655,
      "learning_rate": 7.171463176609716e-07,
      "loss": 2.2599,
      "step": 67547
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0069267749786377,
      "learning_rate": 7.169932135461588e-07,
      "loss": 2.6033,
      "step": 67548
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0524616241455078,
      "learning_rate": 7.168401251685631e-07,
      "loss": 2.3383,
      "step": 67549
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1849524974822998,
      "learning_rate": 7.166870525284453e-07,
      "loss": 2.2356,
      "step": 67550
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0855656862258911,
      "learning_rate": 7.165339956260642e-07,
      "loss": 2.4544,
      "step": 67551
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.044042706489563,
      "learning_rate": 7.163809544616785e-07,
      "loss": 2.2881,
      "step": 67552
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.2356969118118286,
      "learning_rate": 7.162279290355501e-07,
      "loss": 2.5204,
      "step": 67553
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1120058298110962,
      "learning_rate": 7.160749193479344e-07,
      "loss": 2.2927,
      "step": 67554
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1517366170883179,
      "learning_rate": 7.159219253990957e-07,
      "loss": 2.2719,
      "step": 67555
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.2986767292022705,
      "learning_rate": 7.157689471892904e-07,
      "loss": 2.097,
      "step": 67556
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.172298550605774,
      "learning_rate": 7.15615984718776e-07,
      "loss": 2.1914,
      "step": 67557
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0556024312973022,
      "learning_rate": 7.15463037987817e-07,
      "loss": 2.1388,
      "step": 67558
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0929454565048218,
      "learning_rate": 7.153101069966684e-07,
      "loss": 2.2958,
      "step": 67559
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.099664568901062,
      "learning_rate": 7.151571917455901e-07,
      "loss": 2.3884,
      "step": 67560
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.022040605545044,
      "learning_rate": 7.150042922348432e-07,
      "loss": 2.3428,
      "step": 67561
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0660064220428467,
      "learning_rate": 7.148514084646852e-07,
      "loss": 2.3172,
      "step": 67562
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0492668151855469,
      "learning_rate": 7.146985404353767e-07,
      "loss": 2.4751,
      "step": 67563
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.089101791381836,
      "learning_rate": 7.145456881471747e-07,
      "loss": 2.0714,
      "step": 67564
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.2570908069610596,
      "learning_rate": 7.143928516003406e-07,
      "loss": 2.3584,
      "step": 67565
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0625536441802979,
      "learning_rate": 7.142400307951325e-07,
      "loss": 2.1611,
      "step": 67566
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1893292665481567,
      "learning_rate": 7.140872257318099e-07,
      "loss": 2.1521,
      "step": 67567
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.106168270111084,
      "learning_rate": 7.139344364106304e-07,
      "loss": 2.5941,
      "step": 67568
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1514478921890259,
      "learning_rate": 7.137816628318562e-07,
      "loss": 2.2841,
      "step": 67569
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1487526893615723,
      "learning_rate": 7.136289049957424e-07,
      "loss": 2.3707,
      "step": 67570
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0758471488952637,
      "learning_rate": 7.13476162902551e-07,
      "loss": 2.5326,
      "step": 67571
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.035954475402832,
      "learning_rate": 7.133234365525377e-07,
      "loss": 2.2462,
      "step": 67572
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.9791961908340454,
      "learning_rate": 7.131707259459652e-07,
      "loss": 2.1425,
      "step": 67573
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1102566719055176,
      "learning_rate": 7.130180310830881e-07,
      "loss": 2.2167,
      "step": 67574
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.02009916305542,
      "learning_rate": 7.128653519641704e-07,
      "loss": 2.3595,
      "step": 67575
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.9838950037956238,
      "learning_rate": 7.127126885894664e-07,
      "loss": 2.1732,
      "step": 67576
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0446367263793945,
      "learning_rate": 7.125600409592382e-07,
      "loss": 2.3612,
      "step": 67577
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0501503944396973,
      "learning_rate": 7.124074090737421e-07,
      "loss": 2.33,
      "step": 67578
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.8238229751586914,
      "learning_rate": 7.12254792933239e-07,
      "loss": 2.2385,
      "step": 67579
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.2554032802581787,
      "learning_rate": 7.121021925379868e-07,
      "loss": 2.3185,
      "step": 67580
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1270625591278076,
      "learning_rate": 7.119496078882427e-07,
      "loss": 2.3387,
      "step": 67581
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0861170291900635,
      "learning_rate": 7.117970389842677e-07,
      "loss": 2.4504,
      "step": 67582
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1691670417785645,
      "learning_rate": 7.116444858263194e-07,
      "loss": 2.2712,
      "step": 67583
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1282681226730347,
      "learning_rate": 7.114919484146554e-07,
      "loss": 2.1606,
      "step": 67584
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0456589460372925,
      "learning_rate": 7.113394267495355e-07,
      "loss": 2.1355,
      "step": 67585
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.036322832107544,
      "learning_rate": 7.111869208312194e-07,
      "loss": 2.3424,
      "step": 67586
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.041859745979309,
      "learning_rate": 7.110344306599615e-07,
      "loss": 2.3096,
      "step": 67587
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0162360668182373,
      "learning_rate": 7.108819562360259e-07,
      "loss": 2.4355,
      "step": 67588
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.094402551651001,
      "learning_rate": 7.107294975596657e-07,
      "loss": 2.3326,
      "step": 67589
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.094504475593567,
      "learning_rate": 7.105770546311442e-07,
      "loss": 2.2706,
      "step": 67590
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.2057654857635498,
      "learning_rate": 7.104246274507154e-07,
      "loss": 2.3341,
      "step": 67591
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1208720207214355,
      "learning_rate": 7.102722160186426e-07,
      "loss": 2.3637,
      "step": 67592
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.9999300837516785,
      "learning_rate": 7.101198203351789e-07,
      "loss": 2.2175,
      "step": 67593
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0623366832733154,
      "learning_rate": 7.099674404005863e-07,
      "loss": 2.1176,
      "step": 67594
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.5596567392349243,
      "learning_rate": 7.098150762151235e-07,
      "loss": 2.132,
      "step": 67595
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.2209558486938477,
      "learning_rate": 7.096627277790458e-07,
      "loss": 2.265,
      "step": 67596
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0790019035339355,
      "learning_rate": 7.095103950926119e-07,
      "loss": 2.2604,
      "step": 67597
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1723051071166992,
      "learning_rate": 7.093580781560838e-07,
      "loss": 2.4188,
      "step": 67598
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1916645765304565,
      "learning_rate": 7.092057769697136e-07,
      "loss": 2.3252,
      "step": 67599
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1255794763565063,
      "learning_rate": 7.090534915337654e-07,
      "loss": 2.149,
      "step": 67600
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.3064467906951904,
      "learning_rate": 7.089012218484936e-07,
      "loss": 2.0853,
      "step": 67601
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.076390027999878,
      "learning_rate": 7.08748967914159e-07,
      "loss": 2.4857,
      "step": 67602
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0989296436309814,
      "learning_rate": 7.08596729731017e-07,
      "loss": 2.2772,
      "step": 67603
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1540937423706055,
      "learning_rate": 7.084445072993285e-07,
      "loss": 2.2946,
      "step": 67604
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1655904054641724,
      "learning_rate": 7.082923006193488e-07,
      "loss": 2.3521,
      "step": 67605
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.4192304611206055,
      "learning_rate": 7.081401096913387e-07,
      "loss": 2.1808,
      "step": 67606
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1003674268722534,
      "learning_rate": 7.07987934515555e-07,
      "loss": 2.4349,
      "step": 67607
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.056627631187439,
      "learning_rate": 7.078357750922549e-07,
      "loss": 2.2231,
      "step": 67608
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0469905138015747,
      "learning_rate": 7.076836314216951e-07,
      "loss": 2.2096,
      "step": 67609
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.012934923171997,
      "learning_rate": 7.075315035041374e-07,
      "loss": 2.2048,
      "step": 67610
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1601134538650513,
      "learning_rate": 7.073793913398364e-07,
      "loss": 2.1588,
      "step": 67611
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.2581086158752441,
      "learning_rate": 7.072272949290516e-07,
      "loss": 2.2693,
      "step": 67612
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0350184440612793,
      "learning_rate": 7.070752142720394e-07,
      "loss": 2.3611,
      "step": 67613
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.069077730178833,
      "learning_rate": 7.069231493690609e-07,
      "loss": 2.1432,
      "step": 67614
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1439037322998047,
      "learning_rate": 7.067711002203703e-07,
      "loss": 2.296,
      "step": 67615
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1130738258361816,
      "learning_rate": 7.066190668262263e-07,
      "loss": 2.4163,
      "step": 67616
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0708489418029785,
      "learning_rate": 7.064670491868875e-07,
      "loss": 2.2458,
      "step": 67617
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0908527374267578,
      "learning_rate": 7.063150473026103e-07,
      "loss": 2.2379,
      "step": 67618
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0645925998687744,
      "learning_rate": 7.061630611736547e-07,
      "loss": 2.4733,
      "step": 67619
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.063185691833496,
      "learning_rate": 7.06011090800276e-07,
      "loss": 2.4191,
      "step": 67620
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0188828706741333,
      "learning_rate": 7.05859136182735e-07,
      "loss": 2.1544,
      "step": 67621
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.9761543273925781,
      "learning_rate": 7.057071973212837e-07,
      "loss": 2.5128,
      "step": 67622
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0216588973999023,
      "learning_rate": 7.055552742161853e-07,
      "loss": 2.2393,
      "step": 67623
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.108296513557434,
      "learning_rate": 7.054033668676929e-07,
      "loss": 2.3464,
      "step": 67624
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1004313230514526,
      "learning_rate": 7.052514752760675e-07,
      "loss": 2.5747,
      "step": 67625
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.054673194885254,
      "learning_rate": 7.050995994415644e-07,
      "loss": 2.2555,
      "step": 67626
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0997017621994019,
      "learning_rate": 7.049477393644433e-07,
      "loss": 2.3488,
      "step": 67627
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0569061040878296,
      "learning_rate": 7.047958950449584e-07,
      "loss": 2.3235,
      "step": 67628
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.9974340200424194,
      "learning_rate": 7.04644066483371e-07,
      "loss": 2.3573,
      "step": 67629
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0918715000152588,
      "learning_rate": 7.044922536799348e-07,
      "loss": 2.2471,
      "step": 67630
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1338484287261963,
      "learning_rate": 7.043404566349099e-07,
      "loss": 2.4034,
      "step": 67631
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1121995449066162,
      "learning_rate": 7.041886753485516e-07,
      "loss": 2.3336,
      "step": 67632
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.058225154876709,
      "learning_rate": 7.040369098211197e-07,
      "loss": 2.3359,
      "step": 67633
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1418057680130005,
      "learning_rate": 7.038851600528684e-07,
      "loss": 2.3497,
      "step": 67634
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1581904888153076,
      "learning_rate": 7.037334260440599e-07,
      "loss": 2.3328,
      "step": 67635
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.056424856185913,
      "learning_rate": 7.035817077949447e-07,
      "loss": 2.3595,
      "step": 67636
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0673127174377441,
      "learning_rate": 7.034300053057852e-07,
      "loss": 2.4269,
      "step": 67637
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.287607192993164,
      "learning_rate": 7.032783185768343e-07,
      "loss": 2.4015,
      "step": 67638
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1353576183319092,
      "learning_rate": 7.031266476083543e-07,
      "loss": 2.1498,
      "step": 67639
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.2902230024337769,
      "learning_rate": 7.029749924005968e-07,
      "loss": 2.3698,
      "step": 67640
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.982666552066803,
      "learning_rate": 7.028233529538242e-07,
      "loss": 2.2508,
      "step": 67641
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.9620895981788635,
      "learning_rate": 7.026717292682917e-07,
      "loss": 2.1617,
      "step": 67642
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0976167917251587,
      "learning_rate": 7.025201213442534e-07,
      "loss": 2.1963,
      "step": 67643
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0890488624572754,
      "learning_rate": 7.023685291819704e-07,
      "loss": 2.3013,
      "step": 67644
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1037318706512451,
      "learning_rate": 7.022169527816958e-07,
      "loss": 2.311,
      "step": 67645
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.2139331102371216,
      "learning_rate": 7.020653921436904e-07,
      "loss": 2.4535,
      "step": 67646
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.2064152956008911,
      "learning_rate": 7.019138472682086e-07,
      "loss": 2.3329,
      "step": 67647
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1838948726654053,
      "learning_rate": 7.017623181555111e-07,
      "loss": 2.2556,
      "step": 67648
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0599805116653442,
      "learning_rate": 7.016108048058479e-07,
      "loss": 2.1671,
      "step": 67649
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.106079339981079,
      "learning_rate": 7.01459307219482e-07,
      "loss": 2.2845,
      "step": 67650
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1164997816085815,
      "learning_rate": 7.013078253966666e-07,
      "loss": 2.6135,
      "step": 67651
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0244591236114502,
      "learning_rate": 7.011563593376614e-07,
      "loss": 2.2929,
      "step": 67652
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0936369895935059,
      "learning_rate": 7.010049090427195e-07,
      "loss": 2.4614,
      "step": 67653
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1172977685928345,
      "learning_rate": 7.008534745121021e-07,
      "loss": 2.2791,
      "step": 67654
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0201443433761597,
      "learning_rate": 7.00702055746062e-07,
      "loss": 2.3127,
      "step": 67655
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.087262749671936,
      "learning_rate": 7.005506527448591e-07,
      "loss": 2.3972,
      "step": 67656
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1431233882904053,
      "learning_rate": 7.003992655087466e-07,
      "loss": 2.2391,
      "step": 67657
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1726175546646118,
      "learning_rate": 7.002478940379853e-07,
      "loss": 2.4547,
      "step": 67658
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1660569906234741,
      "learning_rate": 7.000965383328273e-07,
      "loss": 2.4049,
      "step": 67659
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.2148598432540894,
      "learning_rate": 6.999451983935334e-07,
      "loss": 2.2902,
      "step": 67660
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0829941034317017,
      "learning_rate": 6.997938742203592e-07,
      "loss": 2.3279,
      "step": 67661
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0684939622879028,
      "learning_rate": 6.996425658135586e-07,
      "loss": 2.3787,
      "step": 67662
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0989664793014526,
      "learning_rate": 6.994912731733894e-07,
      "loss": 2.2008,
      "step": 67663
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.9754670858383179,
      "learning_rate": 6.993399963001102e-07,
      "loss": 2.3643,
      "step": 67664
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.9985141158103943,
      "learning_rate": 6.99188735193973e-07,
      "loss": 2.1948,
      "step": 67665
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0609699487686157,
      "learning_rate": 6.990374898552399e-07,
      "loss": 2.1199,
      "step": 67666
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0980521440505981,
      "learning_rate": 6.988862602841618e-07,
      "loss": 2.3635,
      "step": 67667
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.2023036479949951,
      "learning_rate": 6.987350464809995e-07,
      "loss": 2.3933,
      "step": 67668
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0524464845657349,
      "learning_rate": 6.985838484460072e-07,
      "loss": 2.3826,
      "step": 67669
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.9986763000488281,
      "learning_rate": 6.984326661794405e-07,
      "loss": 2.2225,
      "step": 67670
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0358390808105469,
      "learning_rate": 6.982814996815579e-07,
      "loss": 2.1426,
      "step": 67671
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1519309282302856,
      "learning_rate": 6.981303489526125e-07,
      "loss": 2.4556,
      "step": 67672
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.027935266494751,
      "learning_rate": 6.979792139928643e-07,
      "loss": 2.3424,
      "step": 67673
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0658016204833984,
      "learning_rate": 6.978280948025684e-07,
      "loss": 2.2193,
      "step": 67674
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0573359727859497,
      "learning_rate": 6.976769913819781e-07,
      "loss": 2.332,
      "step": 67675
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0560349225997925,
      "learning_rate": 6.975259037313542e-07,
      "loss": 2.614,
      "step": 67676
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0282747745513916,
      "learning_rate": 6.973748318509499e-07,
      "loss": 2.4504,
      "step": 67677
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0779284238815308,
      "learning_rate": 6.972237757410194e-07,
      "loss": 2.4953,
      "step": 67678
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1063389778137207,
      "learning_rate": 6.970727354018236e-07,
      "loss": 2.3422,
      "step": 67679
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0216091871261597,
      "learning_rate": 6.969217108336146e-07,
      "loss": 2.2332,
      "step": 67680
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1126867532730103,
      "learning_rate": 6.96770702036651e-07,
      "loss": 2.2081,
      "step": 67681
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.9910562038421631,
      "learning_rate": 6.96619709011187e-07,
      "loss": 2.1135,
      "step": 67682
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.9422459602355957,
      "learning_rate": 6.964687317574803e-07,
      "loss": 2.4509,
      "step": 67683
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0692757368087769,
      "learning_rate": 6.96317770275785e-07,
      "loss": 2.4845,
      "step": 67684
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0667288303375244,
      "learning_rate": 6.961668245663589e-07,
      "loss": 2.0811,
      "step": 67685
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1462222337722778,
      "learning_rate": 6.96015894629456e-07,
      "loss": 2.3407,
      "step": 67686
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0306861400604248,
      "learning_rate": 6.958649804653361e-07,
      "loss": 2.0522,
      "step": 67687
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.9561578035354614,
      "learning_rate": 6.957140820742481e-07,
      "loss": 2.1021,
      "step": 67688
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0398054122924805,
      "learning_rate": 6.955631994564537e-07,
      "loss": 2.3704,
      "step": 67689
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1288330554962158,
      "learning_rate": 6.954123326122053e-07,
      "loss": 2.1307,
      "step": 67690
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0585477352142334,
      "learning_rate": 6.952614815417613e-07,
      "loss": 2.1547,
      "step": 67691
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.066475510597229,
      "learning_rate": 6.95110646245375e-07,
      "loss": 2.3397,
      "step": 67692
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.193210482597351,
      "learning_rate": 6.949598267233049e-07,
      "loss": 2.3866,
      "step": 67693
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1582585573196411,
      "learning_rate": 6.948090229758031e-07,
      "loss": 2.4409,
      "step": 67694
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0733784437179565,
      "learning_rate": 6.946582350031295e-07,
      "loss": 1.9295,
      "step": 67695
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1092737913131714,
      "learning_rate": 6.945074628055359e-07,
      "loss": 2.3025,
      "step": 67696
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0934858322143555,
      "learning_rate": 6.943567063832812e-07,
      "loss": 2.3749,
      "step": 67697
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.9474101662635803,
      "learning_rate": 6.942059657366185e-07,
      "loss": 2.3611,
      "step": 67698
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0593554973602295,
      "learning_rate": 6.94055240865803e-07,
      "loss": 2.182,
      "step": 67699
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0281363725662231,
      "learning_rate": 6.939045317710924e-07,
      "loss": 2.1201,
      "step": 67700
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1341235637664795,
      "learning_rate": 6.93753838452742e-07,
      "loss": 2.2399,
      "step": 67701
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.003086805343628,
      "learning_rate": 6.936031609110039e-07,
      "loss": 2.2311,
      "step": 67702
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.2233375310897827,
      "learning_rate": 6.934524991461378e-07,
      "loss": 2.427,
      "step": 67703
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1105250120162964,
      "learning_rate": 6.93301853158398e-07,
      "loss": 2.3341,
      "step": 67704
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1895862817764282,
      "learning_rate": 6.931512229480375e-07,
      "loss": 2.5016,
      "step": 67705
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0352418422698975,
      "learning_rate": 6.93000608515314e-07,
      "loss": 2.1424,
      "step": 67706
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1423617601394653,
      "learning_rate": 6.928500098604818e-07,
      "loss": 2.2771,
      "step": 67707
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.073208212852478,
      "learning_rate": 6.926994269837972e-07,
      "loss": 2.2798,
      "step": 67708
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0972329378128052,
      "learning_rate": 6.925488598855145e-07,
      "loss": 2.4914,
      "step": 67709
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1674786806106567,
      "learning_rate": 6.923983085658903e-07,
      "loss": 2.2669,
      "step": 67710
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0321074724197388,
      "learning_rate": 6.922477730251765e-07,
      "loss": 2.3864,
      "step": 67711
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0878167152404785,
      "learning_rate": 6.920972532636338e-07,
      "loss": 2.1664,
      "step": 67712
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.2066237926483154,
      "learning_rate": 6.919467492815113e-07,
      "loss": 2.2007,
      "step": 67713
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0929542779922485,
      "learning_rate": 6.917962610790707e-07,
      "loss": 2.2299,
      "step": 67714
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.023424506187439,
      "learning_rate": 6.916457886565608e-07,
      "loss": 2.3174,
      "step": 67715
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1827958822250366,
      "learning_rate": 6.914953320142404e-07,
      "loss": 2.3595,
      "step": 67716
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0428568124771118,
      "learning_rate": 6.913448911523623e-07,
      "loss": 2.2406,
      "step": 67717
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.5299713611602783,
      "learning_rate": 6.911944660711844e-07,
      "loss": 2.1154,
      "step": 67718
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0396798849105835,
      "learning_rate": 6.910440567709587e-07,
      "loss": 2.6624,
      "step": 67719
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0272996425628662,
      "learning_rate": 6.908936632519426e-07,
      "loss": 2.4241,
      "step": 67720
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1681393384933472,
      "learning_rate": 6.907432855143891e-07,
      "loss": 2.3491,
      "step": 67721
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0187478065490723,
      "learning_rate": 6.905929235585551e-07,
      "loss": 2.3339,
      "step": 67722
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0032308101654053,
      "learning_rate": 6.904425773846934e-07,
      "loss": 2.2782,
      "step": 67723
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.3271366357803345,
      "learning_rate": 6.902922469930618e-07,
      "loss": 2.4398,
      "step": 67724
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.227007269859314,
      "learning_rate": 6.901419323839132e-07,
      "loss": 2.4863,
      "step": 67725
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1857601404190063,
      "learning_rate": 6.899916335574996e-07,
      "loss": 2.2548,
      "step": 67726
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1810541152954102,
      "learning_rate": 6.898413505140822e-07,
      "loss": 2.188,
      "step": 67727
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1639423370361328,
      "learning_rate": 6.896910832539117e-07,
      "loss": 2.3431,
      "step": 67728
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0986160039901733,
      "learning_rate": 6.895408317772412e-07,
      "loss": 2.3371,
      "step": 67729
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1234791278839111,
      "learning_rate": 6.893905960843294e-07,
      "loss": 2.2454,
      "step": 67730
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0637080669403076,
      "learning_rate": 6.892403761754285e-07,
      "loss": 2.5114,
      "step": 67731
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1980236768722534,
      "learning_rate": 6.890901720507948e-07,
      "loss": 2.3155,
      "step": 67732
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1368355751037598,
      "learning_rate": 6.889399837106825e-07,
      "loss": 2.426,
      "step": 67733
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1725608110427856,
      "learning_rate": 6.887898111553437e-07,
      "loss": 2.4432,
      "step": 67734
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0398362874984741,
      "learning_rate": 6.886396543850371e-07,
      "loss": 2.4943,
      "step": 67735
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.04678475856781,
      "learning_rate": 6.884895134000136e-07,
      "loss": 2.204,
      "step": 67736
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.109643816947937,
      "learning_rate": 6.883393882005307e-07,
      "loss": 2.1636,
      "step": 67737
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0441733598709106,
      "learning_rate": 6.881892787868405e-07,
      "loss": 2.1691,
      "step": 67738
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0539556741714478,
      "learning_rate": 6.880391851591994e-07,
      "loss": 2.3967,
      "step": 67739
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0583394765853882,
      "learning_rate": 6.878891073178606e-07,
      "loss": 2.3672,
      "step": 67740
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1138907670974731,
      "learning_rate": 6.877390452630806e-07,
      "loss": 2.0813,
      "step": 67741
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0568963289260864,
      "learning_rate": 6.87588998995109e-07,
      "loss": 2.2595,
      "step": 67742
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0626826286315918,
      "learning_rate": 6.874389685142057e-07,
      "loss": 2.265,
      "step": 67743
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0933462381362915,
      "learning_rate": 6.872889538206207e-07,
      "loss": 2.1851,
      "step": 67744
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.995034396648407,
      "learning_rate": 6.871389549146113e-07,
      "loss": 2.3927,
      "step": 67745
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0836350917816162,
      "learning_rate": 6.869889717964296e-07,
      "loss": 2.3082,
      "step": 67746
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.112708568572998,
      "learning_rate": 6.86839004466332e-07,
      "loss": 2.3525,
      "step": 67747
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0541167259216309,
      "learning_rate": 6.866890529245707e-07,
      "loss": 2.4354,
      "step": 67748
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1801191568374634,
      "learning_rate": 6.865391171714031e-07,
      "loss": 2.2773,
      "step": 67749
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.9803836941719055,
      "learning_rate": 6.863891972070791e-07,
      "loss": 2.1015,
      "step": 67750
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.2130119800567627,
      "learning_rate": 6.862392930318562e-07,
      "loss": 2.3007,
      "step": 67751
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.089680790901184,
      "learning_rate": 6.860894046459854e-07,
      "loss": 2.3585,
      "step": 67752
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.165326476097107,
      "learning_rate": 6.859395320497265e-07,
      "loss": 2.1974,
      "step": 67753
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0933798551559448,
      "learning_rate": 6.857896752433268e-07,
      "loss": 2.3816,
      "step": 67754
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.2329689264297485,
      "learning_rate": 6.856398342270442e-07,
      "loss": 2.2043,
      "step": 67755
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1049162149429321,
      "learning_rate": 6.854900090011307e-07,
      "loss": 2.3151,
      "step": 67756
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.159868836402893,
      "learning_rate": 6.853401995658426e-07,
      "loss": 2.3313,
      "step": 67757
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.3907808065414429,
      "learning_rate": 6.85190405921432e-07,
      "loss": 2.3705,
      "step": 67758
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0471869707107544,
      "learning_rate": 6.850406280681543e-07,
      "loss": 2.2096,
      "step": 67759
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0697890520095825,
      "learning_rate": 6.848908660062625e-07,
      "loss": 2.2138,
      "step": 67760
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0325931310653687,
      "learning_rate": 6.847411197360099e-07,
      "loss": 2.3418,
      "step": 67761
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.003424882888794,
      "learning_rate": 6.845913892576516e-07,
      "loss": 2.1716,
      "step": 67762
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.2640032768249512,
      "learning_rate": 6.844416745714399e-07,
      "loss": 2.3486,
      "step": 67763
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0774784088134766,
      "learning_rate": 6.842919756776312e-07,
      "loss": 2.4252,
      "step": 67764
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.9908668994903564,
      "learning_rate": 6.841422925764763e-07,
      "loss": 2.2024,
      "step": 67765
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.2844984531402588,
      "learning_rate": 6.839926252682338e-07,
      "loss": 2.3084,
      "step": 67766
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1166653633117676,
      "learning_rate": 6.838429737531504e-07,
      "loss": 2.1718,
      "step": 67767
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0522747039794922,
      "learning_rate": 6.836933380314847e-07,
      "loss": 2.3051,
      "step": 67768
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.2316831350326538,
      "learning_rate": 6.835437181034876e-07,
      "loss": 2.2359,
      "step": 67769
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.111738681793213,
      "learning_rate": 6.833941139694166e-07,
      "loss": 2.5025,
      "step": 67770
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.086021065711975,
      "learning_rate": 6.832445256295206e-07,
      "loss": 2.4574,
      "step": 67771
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1451706886291504,
      "learning_rate": 6.830949530840569e-07,
      "loss": 2.298,
      "step": 67772
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.572748064994812,
      "learning_rate": 6.829453963332766e-07,
      "loss": 2.1613,
      "step": 67773
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.2608368396759033,
      "learning_rate": 6.827958553774361e-07,
      "loss": 2.0917,
      "step": 67774
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.3078652620315552,
      "learning_rate": 6.826463302167851e-07,
      "loss": 2.263,
      "step": 67775
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.236255407333374,
      "learning_rate": 6.824968208515803e-07,
      "loss": 2.3241,
      "step": 67776
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1047002077102661,
      "learning_rate": 6.823473272820736e-07,
      "loss": 2.3505,
      "step": 67777
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1068419218063354,
      "learning_rate": 6.821978495085191e-07,
      "loss": 2.0948,
      "step": 67778
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.01485276222229,
      "learning_rate": 6.820483875311689e-07,
      "loss": 2.2118,
      "step": 67779
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0320298671722412,
      "learning_rate": 6.818989413502808e-07,
      "loss": 2.3289,
      "step": 67780
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0677037239074707,
      "learning_rate": 6.81749510966101e-07,
      "loss": 2.3562,
      "step": 67781
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.080502986907959,
      "learning_rate": 6.816000963788894e-07,
      "loss": 2.2371,
      "step": 67782
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0232311487197876,
      "learning_rate": 6.814506975888935e-07,
      "loss": 2.2354,
      "step": 67783
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0923832654953003,
      "learning_rate": 6.81301314596372e-07,
      "loss": 2.2954,
      "step": 67784
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.110609769821167,
      "learning_rate": 6.811519474015738e-07,
      "loss": 2.3717,
      "step": 67785
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.199317216873169,
      "learning_rate": 6.810025960047561e-07,
      "loss": 2.3702,
      "step": 67786
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0268664360046387,
      "learning_rate": 6.80853260406169e-07,
      "loss": 2.2911,
      "step": 67787
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.295494794845581,
      "learning_rate": 6.807039406060656e-07,
      "loss": 2.2583,
      "step": 67788
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0052433013916016,
      "learning_rate": 6.805546366047022e-07,
      "loss": 2.5768,
      "step": 67789
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0163313150405884,
      "learning_rate": 6.804053484023276e-07,
      "loss": 2.4402,
      "step": 67790
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1193045377731323,
      "learning_rate": 6.802560759991983e-07,
      "loss": 2.2245,
      "step": 67791
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1208423376083374,
      "learning_rate": 6.801068193955651e-07,
      "loss": 2.3952,
      "step": 67792
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0391755104064941,
      "learning_rate": 6.799575785916845e-07,
      "loss": 2.4448,
      "step": 67793
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.059677004814148,
      "learning_rate": 6.798083535878064e-07,
      "loss": 2.317,
      "step": 67794
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0033202171325684,
      "learning_rate": 6.796591443841838e-07,
      "loss": 2.6127,
      "step": 67795
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0347962379455566,
      "learning_rate": 6.795099509810699e-07,
      "loss": 2.3415,
      "step": 67796
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0188751220703125,
      "learning_rate": 6.793607733787188e-07,
      "loss": 2.5477,
      "step": 67797
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0032401084899902,
      "learning_rate": 6.792116115773806e-07,
      "loss": 2.1527,
      "step": 67798
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0620970726013184,
      "learning_rate": 6.790624655773126e-07,
      "loss": 2.3449,
      "step": 67799
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0387705564498901,
      "learning_rate": 6.789133353787636e-07,
      "loss": 2.4487,
      "step": 67800
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1723501682281494,
      "learning_rate": 6.7876422098199e-07,
      "loss": 2.4529,
      "step": 67801
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0996530055999756,
      "learning_rate": 6.786151223872406e-07,
      "loss": 2.2747,
      "step": 67802
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1415939331054688,
      "learning_rate": 6.784660395947717e-07,
      "loss": 2.3325,
      "step": 67803
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0564968585968018,
      "learning_rate": 6.783169726048333e-07,
      "loss": 2.3035,
      "step": 67804
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1926802396774292,
      "learning_rate": 6.781679214176806e-07,
      "loss": 2.3575,
      "step": 67805
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1627639532089233,
      "learning_rate": 6.780188860335657e-07,
      "loss": 2.4185,
      "step": 67806
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1366779804229736,
      "learning_rate": 6.778698664527395e-07,
      "loss": 2.5332,
      "step": 67807
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.3946751356124878,
      "learning_rate": 6.777208626754551e-07,
      "loss": 2.5324,
      "step": 67808
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.057802438735962,
      "learning_rate": 6.775718747019666e-07,
      "loss": 2.2734,
      "step": 67809
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0652590990066528,
      "learning_rate": 6.774229025325252e-07,
      "loss": 2.2306,
      "step": 67810
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1285423040390015,
      "learning_rate": 6.772739461673849e-07,
      "loss": 2.3287,
      "step": 67811
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.079829454421997,
      "learning_rate": 6.771250056067957e-07,
      "loss": 2.3428,
      "step": 67812
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0214989185333252,
      "learning_rate": 6.769760808510128e-07,
      "loss": 2.327,
      "step": 67813
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.9616595506668091,
      "learning_rate": 6.768271719002861e-07,
      "loss": 2.2518,
      "step": 67814
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1175047159194946,
      "learning_rate": 6.76678278754872e-07,
      "loss": 2.2963,
      "step": 67815
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0193058252334595,
      "learning_rate": 6.765294014150192e-07,
      "loss": 2.1295,
      "step": 67816
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1679667234420776,
      "learning_rate": 6.763805398809809e-07,
      "loss": 2.3172,
      "step": 67817
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.107946753501892,
      "learning_rate": 6.7623169415301e-07,
      "loss": 2.4304,
      "step": 67818
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0916218757629395,
      "learning_rate": 6.760828642313599e-07,
      "loss": 2.1684,
      "step": 67819
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1015877723693848,
      "learning_rate": 6.759340501162792e-07,
      "loss": 2.3881,
      "step": 67820
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0640712976455688,
      "learning_rate": 6.757852518080254e-07,
      "loss": 2.3748,
      "step": 67821
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.082400918006897,
      "learning_rate": 6.756364693068473e-07,
      "loss": 2.4142,
      "step": 67822
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.124795913696289,
      "learning_rate": 6.754877026129958e-07,
      "loss": 2.4942,
      "step": 67823
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1770204305648804,
      "learning_rate": 6.753389517267272e-07,
      "loss": 2.3398,
      "step": 67824
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.9806191325187683,
      "learning_rate": 6.751902166482904e-07,
      "loss": 2.6495,
      "step": 67825
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0110527276992798,
      "learning_rate": 6.750414973779407e-07,
      "loss": 2.3044,
      "step": 67826
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.242332935333252,
      "learning_rate": 6.748927939159255e-07,
      "loss": 2.3504,
      "step": 67827
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0785434246063232,
      "learning_rate": 6.747441062625015e-07,
      "loss": 2.3497,
      "step": 67828
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0950970649719238,
      "learning_rate": 6.745954344179185e-07,
      "loss": 2.2226,
      "step": 67829
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0607717037200928,
      "learning_rate": 6.744467783824294e-07,
      "loss": 2.3402,
      "step": 67830
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.9915944933891296,
      "learning_rate": 6.742981381562841e-07,
      "loss": 2.1527,
      "step": 67831
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.055586338043213,
      "learning_rate": 6.741495137397402e-07,
      "loss": 2.443,
      "step": 67832
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1618356704711914,
      "learning_rate": 6.74000905133042e-07,
      "loss": 2.3292,
      "step": 67833
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0660871267318726,
      "learning_rate": 6.738523123364471e-07,
      "loss": 2.4758,
      "step": 67834
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0061519145965576,
      "learning_rate": 6.737037353502041e-07,
      "loss": 2.0093,
      "step": 67835
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0315055847167969,
      "learning_rate": 6.735551741745672e-07,
      "loss": 2.2938,
      "step": 67836
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1618849039077759,
      "learning_rate": 6.734066288097863e-07,
      "loss": 1.9934,
      "step": 67837
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0700466632843018,
      "learning_rate": 6.732580992561155e-07,
      "loss": 2.4701,
      "step": 67838
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.4321931600570679,
      "learning_rate": 6.731095855138037e-07,
      "loss": 2.4891,
      "step": 67839
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1988970041275024,
      "learning_rate": 6.729610875831061e-07,
      "loss": 2.2118,
      "step": 67840
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.2118473052978516,
      "learning_rate": 6.728126054642714e-07,
      "loss": 2.3818,
      "step": 67841
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.049076795578003,
      "learning_rate": 6.726641391575528e-07,
      "loss": 2.284,
      "step": 67842
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.144040584564209,
      "learning_rate": 6.725156886632034e-07,
      "loss": 2.5066,
      "step": 67843
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0869001150131226,
      "learning_rate": 6.723672539814708e-07,
      "loss": 2.427,
      "step": 67844
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1054158210754395,
      "learning_rate": 6.722188351126102e-07,
      "loss": 2.4769,
      "step": 67845
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.100027084350586,
      "learning_rate": 6.720704320568727e-07,
      "loss": 2.3145,
      "step": 67846
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1009025573730469,
      "learning_rate": 6.71922044814508e-07,
      "loss": 2.422,
      "step": 67847
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.4017977714538574,
      "learning_rate": 6.717736733857705e-07,
      "loss": 2.0887,
      "step": 67848
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0406713485717773,
      "learning_rate": 6.716253177709075e-07,
      "loss": 2.3827,
      "step": 67849
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.142896294593811,
      "learning_rate": 6.714769779701757e-07,
      "loss": 2.512,
      "step": 67850
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.12504243850708,
      "learning_rate": 6.713286539838238e-07,
      "loss": 2.5344,
      "step": 67851
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0446721315383911,
      "learning_rate": 6.711803458121013e-07,
      "loss": 2.4562,
      "step": 67852
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0570000410079956,
      "learning_rate": 6.710320534552639e-07,
      "loss": 2.4296,
      "step": 67853
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0857739448547363,
      "learning_rate": 6.70883776913559e-07,
      "loss": 2.1464,
      "step": 67854
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1164761781692505,
      "learning_rate": 6.70735516187242e-07,
      "loss": 2.5508,
      "step": 67855
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.388621211051941,
      "learning_rate": 6.705872712765605e-07,
      "loss": 2.3338,
      "step": 67856
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.9930152893066406,
      "learning_rate": 6.704390421817685e-07,
      "loss": 2.1461,
      "step": 67857
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.026358962059021,
      "learning_rate": 6.70290828903114e-07,
      "loss": 2.5749,
      "step": 67858
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0588915348052979,
      "learning_rate": 6.701426314408543e-07,
      "loss": 2.4112,
      "step": 67859
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1719502210617065,
      "learning_rate": 6.699944497952337e-07,
      "loss": 2.2492,
      "step": 67860
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0120717287063599,
      "learning_rate": 6.698462839665087e-07,
      "loss": 2.5073,
      "step": 67861
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0156375169754028,
      "learning_rate": 6.696981339549258e-07,
      "loss": 2.4656,
      "step": 67862
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1331727504730225,
      "learning_rate": 6.695499997607413e-07,
      "loss": 2.4185,
      "step": 67863
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1664774417877197,
      "learning_rate": 6.694018813842007e-07,
      "loss": 2.3154,
      "step": 67864
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.118786096572876,
      "learning_rate": 6.692537788255604e-07,
      "loss": 2.2139,
      "step": 67865
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0646718740463257,
      "learning_rate": 6.69105692085067e-07,
      "loss": 2.0538,
      "step": 67866
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.097446322441101,
      "learning_rate": 6.689576211629756e-07,
      "loss": 2.3705,
      "step": 67867
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0034085512161255,
      "learning_rate": 6.68809566059534e-07,
      "loss": 2.2607,
      "step": 67868
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.2492271661758423,
      "learning_rate": 6.686615267749963e-07,
      "loss": 2.5457,
      "step": 67869
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0699797868728638,
      "learning_rate": 6.685135033096091e-07,
      "loss": 2.2091,
      "step": 67870
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0619432926177979,
      "learning_rate": 6.683654956636287e-07,
      "loss": 2.1386,
      "step": 67871
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1019718647003174,
      "learning_rate": 6.682175038373028e-07,
      "loss": 2.1804,
      "step": 67872
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1266016960144043,
      "learning_rate": 6.680695278308824e-07,
      "loss": 2.3285,
      "step": 67873
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0230480432510376,
      "learning_rate": 6.679215676446171e-07,
      "loss": 2.2951,
      "step": 67874
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0985959768295288,
      "learning_rate": 6.677736232787613e-07,
      "loss": 2.3071,
      "step": 67875
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0446752309799194,
      "learning_rate": 6.676256947335613e-07,
      "loss": 2.5074,
      "step": 67876
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.9543434381484985,
      "learning_rate": 6.674777820092726e-07,
      "loss": 2.3436,
      "step": 67877
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0654551982879639,
      "learning_rate": 6.673298851061439e-07,
      "loss": 2.225,
      "step": 67878
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1699202060699463,
      "learning_rate": 6.671820040244237e-07,
      "loss": 2.2328,
      "step": 67879
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1927053928375244,
      "learning_rate": 6.670341387643664e-07,
      "loss": 2.4602,
      "step": 67880
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0920315980911255,
      "learning_rate": 6.668862893262207e-07,
      "loss": 2.3311,
      "step": 67881
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.2040913105010986,
      "learning_rate": 6.667384557102374e-07,
      "loss": 2.2908,
      "step": 67882
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0790166854858398,
      "learning_rate": 6.665906379166665e-07,
      "loss": 2.4211,
      "step": 67883
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0901761054992676,
      "learning_rate": 6.664428359457608e-07,
      "loss": 2.2118,
      "step": 67884
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1263424158096313,
      "learning_rate": 6.662950497977705e-07,
      "loss": 2.612,
      "step": 67885
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0702720880508423,
      "learning_rate": 6.66147279472944e-07,
      "loss": 2.3012,
      "step": 67886
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0819830894470215,
      "learning_rate": 6.659995249715311e-07,
      "loss": 2.4079,
      "step": 67887
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0639617443084717,
      "learning_rate": 6.658517862937864e-07,
      "loss": 2.4233,
      "step": 67888
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.132859706878662,
      "learning_rate": 6.657040634399559e-07,
      "loss": 2.1968,
      "step": 67889
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0645761489868164,
      "learning_rate": 6.655563564102941e-07,
      "loss": 2.2221,
      "step": 67890
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.4840471744537354,
      "learning_rate": 6.654086652050473e-07,
      "loss": 2.2999,
      "step": 67891
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0021939277648926,
      "learning_rate": 6.6526098982447e-07,
      "loss": 2.3887,
      "step": 67892
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.2512893676757812,
      "learning_rate": 6.651133302688096e-07,
      "loss": 2.3648,
      "step": 67893
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.2102411985397339,
      "learning_rate": 6.64965686538318e-07,
      "loss": 2.2762,
      "step": 67894
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0240352153778076,
      "learning_rate": 6.648180586332442e-07,
      "loss": 2.3186,
      "step": 67895
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1661808490753174,
      "learning_rate": 6.646704465538412e-07,
      "loss": 2.477,
      "step": 67896
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.2609790563583374,
      "learning_rate": 6.645228503003542e-07,
      "loss": 2.4916,
      "step": 67897
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.2170227766036987,
      "learning_rate": 6.64375269873041e-07,
      "loss": 2.2951,
      "step": 67898
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1882110834121704,
      "learning_rate": 6.642277052721424e-07,
      "loss": 2.3671,
      "step": 67899
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1974822282791138,
      "learning_rate": 6.64080156497916e-07,
      "loss": 2.3837,
      "step": 67900
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0650997161865234,
      "learning_rate": 6.639326235506082e-07,
      "loss": 2.2315,
      "step": 67901
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0520817041397095,
      "learning_rate": 6.637851064304712e-07,
      "loss": 2.1721,
      "step": 67902
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0402048826217651,
      "learning_rate": 6.636376051377525e-07,
      "loss": 2.2215,
      "step": 67903
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.2510883808135986,
      "learning_rate": 6.634901196727062e-07,
      "loss": 2.455,
      "step": 67904
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0934436321258545,
      "learning_rate": 6.633426500355789e-07,
      "loss": 2.5857,
      "step": 67905
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0291619300842285,
      "learning_rate": 6.631951962266203e-07,
      "loss": 2.4192,
      "step": 67906
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1933774948120117,
      "learning_rate": 6.630477582460837e-07,
      "loss": 2.3702,
      "step": 67907
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1121217012405396,
      "learning_rate": 6.629003360942155e-07,
      "loss": 2.3356,
      "step": 67908
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0666728019714355,
      "learning_rate": 6.627529297712676e-07,
      "loss": 2.3025,
      "step": 67909
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0683823823928833,
      "learning_rate": 6.626055392774888e-07,
      "loss": 2.1736,
      "step": 67910
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.131686806678772,
      "learning_rate": 6.624581646131311e-07,
      "loss": 2.4276,
      "step": 67911
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.02156400680542,
      "learning_rate": 6.623108057784422e-07,
      "loss": 2.1598,
      "step": 67912
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0372024774551392,
      "learning_rate": 6.62163462773674e-07,
      "loss": 2.3921,
      "step": 67913
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0031309127807617,
      "learning_rate": 6.620161355990717e-07,
      "loss": 2.1517,
      "step": 67914
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1182851791381836,
      "learning_rate": 6.61868824254891e-07,
      "loss": 2.1384,
      "step": 67915
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0406512022018433,
      "learning_rate": 6.617215287413769e-07,
      "loss": 2.2392,
      "step": 67916
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.098527193069458,
      "learning_rate": 6.615742490587818e-07,
      "loss": 2.5383,
      "step": 67917
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0808238983154297,
      "learning_rate": 6.614269852073541e-07,
      "loss": 2.439,
      "step": 67918
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.01911199092865,
      "learning_rate": 6.612797371873447e-07,
      "loss": 2.2521,
      "step": 67919
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.052533507347107,
      "learning_rate": 6.611325049990025e-07,
      "loss": 2.0378,
      "step": 67920
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1076478958129883,
      "learning_rate": 6.609852886425772e-07,
      "loss": 2.0973,
      "step": 67921
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.035120964050293,
      "learning_rate": 6.608380881183174e-07,
      "loss": 2.1358,
      "step": 67922
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0763520002365112,
      "learning_rate": 6.606909034264741e-07,
      "loss": 2.2693,
      "step": 67923
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0811865329742432,
      "learning_rate": 6.605437345672961e-07,
      "loss": 2.2547,
      "step": 67924
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1469331979751587,
      "learning_rate": 6.603965815410351e-07,
      "loss": 2.3558,
      "step": 67925
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.9943241477012634,
      "learning_rate": 6.602494443479357e-07,
      "loss": 2.2646,
      "step": 67926
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0074883699417114,
      "learning_rate": 6.60102322988252e-07,
      "loss": 2.2678,
      "step": 67927
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.067899465560913,
      "learning_rate": 6.599552174622293e-07,
      "loss": 2.4799,
      "step": 67928
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0957902669906616,
      "learning_rate": 6.598081277701218e-07,
      "loss": 2.3137,
      "step": 67929
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0097129344940186,
      "learning_rate": 6.59661053912174e-07,
      "loss": 2.1899,
      "step": 67930
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.067503809928894,
      "learning_rate": 6.595139958886387e-07,
      "loss": 2.3266,
      "step": 67931
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0402923822402954,
      "learning_rate": 6.593669536997627e-07,
      "loss": 2.2008,
      "step": 67932
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1573231220245361,
      "learning_rate": 6.592199273457989e-07,
      "loss": 2.3329,
      "step": 67933
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0007578134536743,
      "learning_rate": 6.590729168269938e-07,
      "loss": 2.1344,
      "step": 67934
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0368667840957642,
      "learning_rate": 6.589259221435951e-07,
      "loss": 2.1286,
      "step": 67935
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0718368291854858,
      "learning_rate": 6.587789432958558e-07,
      "loss": 2.4751,
      "step": 67936
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0620903968811035,
      "learning_rate": 6.586319802840224e-07,
      "loss": 2.1958,
      "step": 67937
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0569303035736084,
      "learning_rate": 6.584850331083448e-07,
      "loss": 2.3781,
      "step": 67938
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0631749629974365,
      "learning_rate": 6.583381017690738e-07,
      "loss": 2.1936,
      "step": 67939
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0333716869354248,
      "learning_rate": 6.581911862664558e-07,
      "loss": 2.1499,
      "step": 67940
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1762109994888306,
      "learning_rate": 6.580442866007397e-07,
      "loss": 2.1387,
      "step": 67941
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.2950751781463623,
      "learning_rate": 6.578974027721773e-07,
      "loss": 2.3792,
      "step": 67942
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1103229522705078,
      "learning_rate": 6.577505347810153e-07,
      "loss": 2.118,
      "step": 67943
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1040773391723633,
      "learning_rate": 6.576036826275045e-07,
      "loss": 2.2922,
      "step": 67944
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.225592851638794,
      "learning_rate": 6.574568463118913e-07,
      "loss": 2.4444,
      "step": 67945
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6566582918167114,
      "learning_rate": 6.573100258344278e-07,
      "loss": 2.2194,
      "step": 67946
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.184529185295105,
      "learning_rate": 6.571632211953594e-07,
      "loss": 2.0919,
      "step": 67947
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1450730562210083,
      "learning_rate": 6.570164323949379e-07,
      "loss": 2.4045,
      "step": 67948
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1406131982803345,
      "learning_rate": 6.568696594334101e-07,
      "loss": 2.2482,
      "step": 67949
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1789302825927734,
      "learning_rate": 6.567229023110277e-07,
      "loss": 2.1952,
      "step": 67950
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0691181421279907,
      "learning_rate": 6.565761610280374e-07,
      "loss": 2.2277,
      "step": 67951
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.080162763595581,
      "learning_rate": 6.564294355846878e-07,
      "loss": 2.4328,
      "step": 67952
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0836758613586426,
      "learning_rate": 6.562827259812266e-07,
      "loss": 2.3608,
      "step": 67953
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1505292654037476,
      "learning_rate": 6.561360322179044e-07,
      "loss": 2.1921,
      "step": 67954
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1527798175811768,
      "learning_rate": 6.559893542949691e-07,
      "loss": 2.3259,
      "step": 67955
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1532444953918457,
      "learning_rate": 6.558426922126714e-07,
      "loss": 2.2984,
      "step": 67956
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0320216417312622,
      "learning_rate": 6.556960459712558e-07,
      "loss": 2.3821,
      "step": 67957
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.088489294052124,
      "learning_rate": 6.555494155709752e-07,
      "loss": 2.2751,
      "step": 67958
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1094448566436768,
      "learning_rate": 6.55402801012075e-07,
      "loss": 2.5261,
      "step": 67959
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.034259557723999,
      "learning_rate": 6.552562022948062e-07,
      "loss": 2.1737,
      "step": 67960
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0212088823318481,
      "learning_rate": 6.551096194194162e-07,
      "loss": 2.4968,
      "step": 67961
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.9610899090766907,
      "learning_rate": 6.549630523861516e-07,
      "loss": 2.2986,
      "step": 67962
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0504997968673706,
      "learning_rate": 6.548165011952645e-07,
      "loss": 2.4803,
      "step": 67963
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.2908717393875122,
      "learning_rate": 6.546699658470012e-07,
      "loss": 2.2991,
      "step": 67964
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0857113599777222,
      "learning_rate": 6.545234463416095e-07,
      "loss": 2.1041,
      "step": 67965
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.137783169746399,
      "learning_rate": 6.5437694267934e-07,
      "loss": 2.357,
      "step": 67966
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1295266151428223,
      "learning_rate": 6.542304548604384e-07,
      "loss": 2.2142,
      "step": 67967
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0841258764266968,
      "learning_rate": 6.540839828851563e-07,
      "loss": 2.1887,
      "step": 67968
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1223176717758179,
      "learning_rate": 6.539375267537395e-07,
      "loss": 2.2757,
      "step": 67969
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.2528387308120728,
      "learning_rate": 6.537910864664365e-07,
      "loss": 2.3051,
      "step": 67970
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.2392990589141846,
      "learning_rate": 6.536446620234959e-07,
      "loss": 2.3229,
      "step": 67971
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.080634355545044,
      "learning_rate": 6.534982534251655e-07,
      "loss": 2.4406,
      "step": 67972
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1346668004989624,
      "learning_rate": 6.53351860671696e-07,
      "loss": 2.1651,
      "step": 67973
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1073025465011597,
      "learning_rate": 6.532054837633317e-07,
      "loss": 2.2221,
      "step": 67974
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.090010643005371,
      "learning_rate": 6.530591227003235e-07,
      "loss": 2.5909,
      "step": 67975
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.322492241859436,
      "learning_rate": 6.52912777482918e-07,
      "loss": 2.3345,
      "step": 67976
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0348577499389648,
      "learning_rate": 6.527664481113671e-07,
      "loss": 2.275,
      "step": 67977
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0908379554748535,
      "learning_rate": 6.526201345859117e-07,
      "loss": 2.239,
      "step": 67978
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.9671706557273865,
      "learning_rate": 6.524738369068062e-07,
      "loss": 2.3502,
      "step": 67979
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.209665298461914,
      "learning_rate": 6.523275550742947e-07,
      "loss": 2.0813,
      "step": 67980
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.09890878200531,
      "learning_rate": 6.521812890886281e-07,
      "loss": 2.4028,
      "step": 67981
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.192548394203186,
      "learning_rate": 6.520350389500519e-07,
      "loss": 2.1455,
      "step": 67982
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1552445888519287,
      "learning_rate": 6.518888046588157e-07,
      "loss": 2.3607,
      "step": 67983
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.3308287858963013,
      "learning_rate": 6.517425862151661e-07,
      "loss": 2.2464,
      "step": 67984
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.5147900581359863,
      "learning_rate": 6.515963836193539e-07,
      "loss": 2.1715,
      "step": 67985
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1455713510513306,
      "learning_rate": 6.514501968716225e-07,
      "loss": 2.3453,
      "step": 67986
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.095747470855713,
      "learning_rate": 6.513040259722237e-07,
      "loss": 2.4228,
      "step": 67987
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0703157186508179,
      "learning_rate": 6.511578709214017e-07,
      "loss": 2.2849,
      "step": 67988
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0793224573135376,
      "learning_rate": 6.510117317194087e-07,
      "loss": 2.2121,
      "step": 67989
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.2178761959075928,
      "learning_rate": 6.508656083664899e-07,
      "loss": 2.1543,
      "step": 67990
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.146880865097046,
      "learning_rate": 6.507195008628919e-07,
      "loss": 2.1883,
      "step": 67991
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0310931205749512,
      "learning_rate": 6.505734092088634e-07,
      "loss": 2.2985,
      "step": 67992
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.216207504272461,
      "learning_rate": 6.504273334046529e-07,
      "loss": 2.1712,
      "step": 67993
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.2436631917953491,
      "learning_rate": 6.50281273450506e-07,
      "loss": 2.2301,
      "step": 67994
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0407766103744507,
      "learning_rate": 6.501352293466734e-07,
      "loss": 2.4574,
      "step": 67995
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0558422803878784,
      "learning_rate": 6.499892010934005e-07,
      "loss": 2.4493,
      "step": 67996
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1266340017318726,
      "learning_rate": 6.498431886909351e-07,
      "loss": 2.1536,
      "step": 67997
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1518734693527222,
      "learning_rate": 6.496971921395257e-07,
      "loss": 2.4252,
      "step": 67998
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1710141897201538,
      "learning_rate": 6.495512114394175e-07,
      "loss": 2.3263,
      "step": 67999
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0325881242752075,
      "learning_rate": 6.494052465908607e-07,
      "loss": 2.1935,
      "step": 68000
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0301066637039185,
      "learning_rate": 6.492592975941004e-07,
      "loss": 2.1534,
      "step": 68001
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0646641254425049,
      "learning_rate": 6.491133644493863e-07,
      "loss": 2.6809,
      "step": 68002
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.075151801109314,
      "learning_rate": 6.48967447156964e-07,
      "loss": 2.682,
      "step": 68003
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.070465326309204,
      "learning_rate": 6.488215457170844e-07,
      "loss": 2.3481,
      "step": 68004
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0699635744094849,
      "learning_rate": 6.486756601299893e-07,
      "loss": 2.2096,
      "step": 68005
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0439329147338867,
      "learning_rate": 6.485297903959298e-07,
      "loss": 2.5801,
      "step": 68006
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1210715770721436,
      "learning_rate": 6.483839365151501e-07,
      "loss": 2.4226,
      "step": 68007
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.175577163696289,
      "learning_rate": 6.482380984879011e-07,
      "loss": 2.1589,
      "step": 68008
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0084024667739868,
      "learning_rate": 6.480922763144282e-07,
      "loss": 2.0249,
      "step": 68009
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0748138427734375,
      "learning_rate": 6.479464699949789e-07,
      "loss": 2.3403,
      "step": 68010
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.280730128288269,
      "learning_rate": 6.478006795297997e-07,
      "loss": 2.2621,
      "step": 68011
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.135135531425476,
      "learning_rate": 6.476549049191394e-07,
      "loss": 2.3608,
      "step": 68012
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1949490308761597,
      "learning_rate": 6.475091461632432e-07,
      "loss": 2.3875,
      "step": 68013
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0108171701431274,
      "learning_rate": 6.473634032623599e-07,
      "loss": 2.3324,
      "step": 68014
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.088944673538208,
      "learning_rate": 6.472176762167349e-07,
      "loss": 2.3454,
      "step": 68015
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0774370431900024,
      "learning_rate": 6.470719650266166e-07,
      "loss": 2.3275,
      "step": 68016
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0647653341293335,
      "learning_rate": 6.469262696922529e-07,
      "loss": 2.1487,
      "step": 68017
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0113717317581177,
      "learning_rate": 6.46780590213888e-07,
      "loss": 2.2972,
      "step": 68018
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.2488667964935303,
      "learning_rate": 6.466349265917693e-07,
      "loss": 2.418,
      "step": 68019
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0930267572402954,
      "learning_rate": 6.464892788261467e-07,
      "loss": 2.2583,
      "step": 68020
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1238967180252075,
      "learning_rate": 6.463436469172635e-07,
      "loss": 2.5097,
      "step": 68021
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1092414855957031,
      "learning_rate": 6.461980308653704e-07,
      "loss": 2.5065,
      "step": 68022
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.053125262260437,
      "learning_rate": 6.460524306707105e-07,
      "loss": 2.1031,
      "step": 68023
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0932409763336182,
      "learning_rate": 6.459068463335316e-07,
      "loss": 2.4053,
      "step": 68024
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0348918437957764,
      "learning_rate": 6.457612778540834e-07,
      "loss": 2.3351,
      "step": 68025
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0076960325241089,
      "learning_rate": 6.456157252326078e-07,
      "loss": 2.1098,
      "step": 68026
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1128482818603516,
      "learning_rate": 6.45470188469356e-07,
      "loss": 2.3608,
      "step": 68027
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0808972120285034,
      "learning_rate": 6.453246675645708e-07,
      "loss": 2.154,
      "step": 68028
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0821270942687988,
      "learning_rate": 6.451791625185034e-07,
      "loss": 2.4085,
      "step": 68029
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0198707580566406,
      "learning_rate": 6.450336733313978e-07,
      "loss": 2.2498,
      "step": 68030
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.102283239364624,
      "learning_rate": 6.448882000035006e-07,
      "loss": 2.0851,
      "step": 68031
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0718448162078857,
      "learning_rate": 6.447427425350572e-07,
      "loss": 2.1903,
      "step": 68032
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0901684761047363,
      "learning_rate": 6.445973009263184e-07,
      "loss": 2.4552,
      "step": 68033
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1070600748062134,
      "learning_rate": 6.444518751775252e-07,
      "loss": 2.2028,
      "step": 68034
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0629626512527466,
      "learning_rate": 6.443064652889286e-07,
      "loss": 2.1923,
      "step": 68035
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0926470756530762,
      "learning_rate": 6.441610712607727e-07,
      "loss": 2.4316,
      "step": 68036
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1051260232925415,
      "learning_rate": 6.440156930933061e-07,
      "loss": 2.2819,
      "step": 68037
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0680186748504639,
      "learning_rate": 6.438703307867722e-07,
      "loss": 2.2248,
      "step": 68038
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.2196457386016846,
      "learning_rate": 6.437249843414206e-07,
      "loss": 2.2422,
      "step": 68039
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0614341497421265,
      "learning_rate": 6.435796537574945e-07,
      "loss": 2.2785,
      "step": 68040
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0433484315872192,
      "learning_rate": 6.434343390352438e-07,
      "loss": 2.1824,
      "step": 68041
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1479169130325317,
      "learning_rate": 6.432890401749125e-07,
      "loss": 2.2703,
      "step": 68042
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1169772148132324,
      "learning_rate": 6.431437571767496e-07,
      "loss": 2.197,
      "step": 68043
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1736116409301758,
      "learning_rate": 6.429984900409969e-07,
      "loss": 2.3849,
      "step": 68044
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1542433500289917,
      "learning_rate": 6.428532387679043e-07,
      "loss": 2.2213,
      "step": 68045
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1313848495483398,
      "learning_rate": 6.427080033577148e-07,
      "loss": 2.3042,
      "step": 68046
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0939406156539917,
      "learning_rate": 6.425627838106785e-07,
      "loss": 2.1141,
      "step": 68047
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0514801740646362,
      "learning_rate": 6.424175801270382e-07,
      "loss": 1.9411,
      "step": 68048
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1706898212432861,
      "learning_rate": 6.422723923070439e-07,
      "loss": 2.3628,
      "step": 68049
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0977708101272583,
      "learning_rate": 6.421272203509366e-07,
      "loss": 2.4491,
      "step": 68050
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0450303554534912,
      "learning_rate": 6.41982064258968e-07,
      "loss": 2.328,
      "step": 68051
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1906177997589111,
      "learning_rate": 6.418369240313804e-07,
      "loss": 2.3695,
      "step": 68052
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1292920112609863,
      "learning_rate": 6.416917996684191e-07,
      "loss": 2.4015,
      "step": 68053
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.363782525062561,
      "learning_rate": 6.415466911703349e-07,
      "loss": 2.1985,
      "step": 68054
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0547406673431396,
      "learning_rate": 6.414015985373679e-07,
      "loss": 2.4027,
      "step": 68055
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.079990267753601,
      "learning_rate": 6.412565217697686e-07,
      "loss": 2.164,
      "step": 68056
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1720812320709229,
      "learning_rate": 6.411114608677815e-07,
      "loss": 2.2902,
      "step": 68057
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.071578025817871,
      "learning_rate": 6.409664158316531e-07,
      "loss": 2.2938,
      "step": 68058
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.252575159072876,
      "learning_rate": 6.408213866616264e-07,
      "loss": 2.3531,
      "step": 68059
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1109459400177002,
      "learning_rate": 6.406763733579502e-07,
      "loss": 2.4259,
      "step": 68060
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.3912193775177002,
      "learning_rate": 6.405313759208686e-07,
      "loss": 2.2343,
      "step": 68061
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.6729769706726074,
      "learning_rate": 6.403863943506306e-07,
      "loss": 2.3418,
      "step": 68062
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0427225828170776,
      "learning_rate": 6.402414286474767e-07,
      "loss": 2.3789,
      "step": 68063
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0771595239639282,
      "learning_rate": 6.400964788116582e-07,
      "loss": 2.3015,
      "step": 68064
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0357682704925537,
      "learning_rate": 6.399515448434169e-07,
      "loss": 2.1547,
      "step": 68065
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1959868669509888,
      "learning_rate": 6.398066267430004e-07,
      "loss": 2.3124,
      "step": 68066
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.106042504310608,
      "learning_rate": 6.396617245106529e-07,
      "loss": 2.3746,
      "step": 68067
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.007991909980774,
      "learning_rate": 6.395168381466221e-07,
      "loss": 2.4417,
      "step": 68068
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.2551575899124146,
      "learning_rate": 6.393719676511523e-07,
      "loss": 2.0179,
      "step": 68069
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.022005558013916,
      "learning_rate": 6.39227113024491e-07,
      "loss": 2.4443,
      "step": 68070
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0313960313796997,
      "learning_rate": 6.390822742668801e-07,
      "loss": 2.518,
      "step": 68071
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1126775741577148,
      "learning_rate": 6.389374513785673e-07,
      "loss": 2.3301,
      "step": 68072
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1595779657363892,
      "learning_rate": 6.38792644359798e-07,
      "loss": 2.2826,
      "step": 68073
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0115699768066406,
      "learning_rate": 6.386478532108176e-07,
      "loss": 2.3043,
      "step": 68074
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.9998207092285156,
      "learning_rate": 6.385030779318713e-07,
      "loss": 2.1588,
      "step": 68075
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0569818019866943,
      "learning_rate": 6.383583185232056e-07,
      "loss": 2.2358,
      "step": 68076
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.2471576929092407,
      "learning_rate": 6.382135749850649e-07,
      "loss": 2.2763,
      "step": 68077
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1143349409103394,
      "learning_rate": 6.380688473176944e-07,
      "loss": 2.3439,
      "step": 68078
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0507285594940186,
      "learning_rate": 6.379241355213417e-07,
      "loss": 2.2287,
      "step": 68079
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1083745956420898,
      "learning_rate": 6.377794395962478e-07,
      "loss": 2.4319,
      "step": 68080
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1554515361785889,
      "learning_rate": 6.376347595426624e-07,
      "loss": 2.3987,
      "step": 68081
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.02610445022583,
      "learning_rate": 6.374900953608265e-07,
      "loss": 2.4631,
      "step": 68082
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1561036109924316,
      "learning_rate": 6.373454470509899e-07,
      "loss": 2.0841,
      "step": 68083
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0623950958251953,
      "learning_rate": 6.372008146133956e-07,
      "loss": 2.1163,
      "step": 68084
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.9957310557365417,
      "learning_rate": 6.37056198048287e-07,
      "loss": 2.1221,
      "step": 68085
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0854054689407349,
      "learning_rate": 6.369115973559126e-07,
      "loss": 2.37,
      "step": 68086
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0695369243621826,
      "learning_rate": 6.367670125365167e-07,
      "loss": 2.2598,
      "step": 68087
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1063810586929321,
      "learning_rate": 6.366224435903423e-07,
      "loss": 2.2487,
      "step": 68088
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0571551322937012,
      "learning_rate": 6.36477890517636e-07,
      "loss": 2.2001,
      "step": 68089
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.9947883486747742,
      "learning_rate": 6.363333533186422e-07,
      "loss": 2.3975,
      "step": 68090
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0151287317276,
      "learning_rate": 6.361888319936083e-07,
      "loss": 2.2249,
      "step": 68091
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0990333557128906,
      "learning_rate": 6.360443265427763e-07,
      "loss": 2.0934,
      "step": 68092
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.059495210647583,
      "learning_rate": 6.358998369663927e-07,
      "loss": 2.2522,
      "step": 68093
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.2411892414093018,
      "learning_rate": 6.357553632647018e-07,
      "loss": 2.433,
      "step": 68094
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1159559488296509,
      "learning_rate": 6.356109054379489e-07,
      "loss": 2.4003,
      "step": 68095
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1022138595581055,
      "learning_rate": 6.354664634863805e-07,
      "loss": 2.2797,
      "step": 68096
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1064239740371704,
      "learning_rate": 6.353220374102387e-07,
      "loss": 2.2766,
      "step": 68097
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.123880386352539,
      "learning_rate": 6.351776272097687e-07,
      "loss": 2.2552,
      "step": 68098
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1125984191894531,
      "learning_rate": 6.350332328852171e-07,
      "loss": 2.3694,
      "step": 68099
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0097899436950684,
      "learning_rate": 6.348888544368259e-07,
      "loss": 2.085,
      "step": 68100
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1677745580673218,
      "learning_rate": 6.347444918648438e-07,
      "loss": 2.2533,
      "step": 68101
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1890301704406738,
      "learning_rate": 6.346001451695116e-07,
      "loss": 2.1841,
      "step": 68102
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0261045694351196,
      "learning_rate": 6.344558143510771e-07,
      "loss": 2.1634,
      "step": 68103
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0490748882293701,
      "learning_rate": 6.34311499409782e-07,
      "loss": 2.2948,
      "step": 68104
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1992826461791992,
      "learning_rate": 6.341672003458743e-07,
      "loss": 2.3658,
      "step": 68105
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0230729579925537,
      "learning_rate": 6.340229171595957e-07,
      "loss": 2.2122,
      "step": 68106
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1277457475662231,
      "learning_rate": 6.338786498511929e-07,
      "loss": 2.5283,
      "step": 68107
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0836927890777588,
      "learning_rate": 6.337343984209099e-07,
      "loss": 2.4919,
      "step": 68108
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.049745798110962,
      "learning_rate": 6.335901628689911e-07,
      "loss": 2.5046,
      "step": 68109
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.028600811958313,
      "learning_rate": 6.334459431956797e-07,
      "loss": 2.2457,
      "step": 68110
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.139746069908142,
      "learning_rate": 6.333017394012219e-07,
      "loss": 2.2926,
      "step": 68111
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0760045051574707,
      "learning_rate": 6.331575514858601e-07,
      "loss": 2.3171,
      "step": 68112
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0776196718215942,
      "learning_rate": 6.330133794498427e-07,
      "loss": 2.441,
      "step": 68113
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.114477276802063,
      "learning_rate": 6.328692232934108e-07,
      "loss": 2.3895,
      "step": 68114
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0724878311157227,
      "learning_rate": 6.327250830168086e-07,
      "loss": 2.2085,
      "step": 68115
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1088488101959229,
      "learning_rate": 6.325809586202835e-07,
      "loss": 2.467,
      "step": 68116
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0061408281326294,
      "learning_rate": 6.324368501040756e-07,
      "loss": 2.4746,
      "step": 68117
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.9807938933372498,
      "learning_rate": 6.322927574684334e-07,
      "loss": 2.3724,
      "step": 68118
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0564155578613281,
      "learning_rate": 6.321486807135968e-07,
      "loss": 2.2934,
      "step": 68119
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0893877744674683,
      "learning_rate": 6.320046198398144e-07,
      "loss": 2.4097,
      "step": 68120
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0592150688171387,
      "learning_rate": 6.318605748473273e-07,
      "loss": 2.0263,
      "step": 68121
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.036893367767334,
      "learning_rate": 6.317165457363839e-07,
      "loss": 2.3618,
      "step": 68122
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.069534182548523,
      "learning_rate": 6.31572532507222e-07,
      "loss": 2.1707,
      "step": 68123
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0679049491882324,
      "learning_rate": 6.314285351600913e-07,
      "loss": 2.3393,
      "step": 68124
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.4610878229141235,
      "learning_rate": 6.312845536952317e-07,
      "loss": 2.3302,
      "step": 68125
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.141493797302246,
      "learning_rate": 6.311405881128907e-07,
      "loss": 2.3044,
      "step": 68126
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0670644044876099,
      "learning_rate": 6.309966384133093e-07,
      "loss": 2.1976,
      "step": 68127
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0576808452606201,
      "learning_rate": 6.30852704596735e-07,
      "loss": 2.5101,
      "step": 68128
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.015257716178894,
      "learning_rate": 6.307087866634088e-07,
      "loss": 2.2912,
      "step": 68129
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0650932788848877,
      "learning_rate": 6.30564884613577e-07,
      "loss": 2.3376,
      "step": 68130
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0681495666503906,
      "learning_rate": 6.304209984474807e-07,
      "loss": 2.0909,
      "step": 68131
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0915895700454712,
      "learning_rate": 6.302771281653675e-07,
      "loss": 2.1362,
      "step": 68132
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.9708383083343506,
      "learning_rate": 6.30133273767477e-07,
      "loss": 2.4343,
      "step": 68133
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.182410478591919,
      "learning_rate": 6.299894352540581e-07,
      "loss": 2.2229,
      "step": 68134
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.035658836364746,
      "learning_rate": 6.298456126253516e-07,
      "loss": 2.3029,
      "step": 68135
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0841196775436401,
      "learning_rate": 6.297018058816006e-07,
      "loss": 2.4731,
      "step": 68136
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.3555898666381836,
      "learning_rate": 6.295580150230496e-07,
      "loss": 2.3165,
      "step": 68137
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.063513159751892,
      "learning_rate": 6.294142400499437e-07,
      "loss": 2.2439,
      "step": 68138
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0410070419311523,
      "learning_rate": 6.29270480962525e-07,
      "loss": 2.3549,
      "step": 68139
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.132199764251709,
      "learning_rate": 6.291267377610388e-07,
      "loss": 2.2087,
      "step": 68140
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0584797859191895,
      "learning_rate": 6.289830104457285e-07,
      "loss": 2.306,
      "step": 68141
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0267318487167358,
      "learning_rate": 6.288392990168346e-07,
      "loss": 2.3769,
      "step": 68142
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1069642305374146,
      "learning_rate": 6.28695603474605e-07,
      "loss": 2.2166,
      "step": 68143
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1200652122497559,
      "learning_rate": 6.285519238192805e-07,
      "loss": 2.1876,
      "step": 68144
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.010228157043457,
      "learning_rate": 6.284082600511066e-07,
      "loss": 2.2683,
      "step": 68145
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.9663337469100952,
      "learning_rate": 6.282646121703251e-07,
      "loss": 2.5845,
      "step": 68146
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.051182508468628,
      "learning_rate": 6.281209801771814e-07,
      "loss": 2.2927,
      "step": 68147
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0572137832641602,
      "learning_rate": 6.279773640719178e-07,
      "loss": 2.3969,
      "step": 68148
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0710794925689697,
      "learning_rate": 6.278337638547793e-07,
      "loss": 2.175,
      "step": 68149
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0856341123580933,
      "learning_rate": 6.27690179526006e-07,
      "loss": 2.3351,
      "step": 68150
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0442196130752563,
      "learning_rate": 6.275466110858442e-07,
      "loss": 2.1186,
      "step": 68151
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1174291372299194,
      "learning_rate": 6.274030585345359e-07,
      "loss": 2.5698,
      "step": 68152
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1714725494384766,
      "learning_rate": 6.272595218723254e-07,
      "loss": 2.1289,
      "step": 68153
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.358099341392517,
      "learning_rate": 6.271160010994548e-07,
      "loss": 2.2781,
      "step": 68154
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1315521001815796,
      "learning_rate": 6.269724962161694e-07,
      "loss": 2.3367,
      "step": 68155
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1052075624465942,
      "learning_rate": 6.268290072227101e-07,
      "loss": 2.1339,
      "step": 68156
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.134264588356018,
      "learning_rate": 6.266855341193223e-07,
      "loss": 2.2606,
      "step": 68157
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1055002212524414,
      "learning_rate": 6.265420769062469e-07,
      "loss": 2.4451,
      "step": 68158
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1665527820587158,
      "learning_rate": 6.263986355837304e-07,
      "loss": 2.2848,
      "step": 68159
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0505855083465576,
      "learning_rate": 6.262552101520125e-07,
      "loss": 2.4224,
      "step": 68160
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0779035091400146,
      "learning_rate": 6.261118006113398e-07,
      "loss": 2.359,
      "step": 68161
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1570888757705688,
      "learning_rate": 6.25968406961952e-07,
      "loss": 2.3912,
      "step": 68162
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1614428758621216,
      "learning_rate": 6.258250292040957e-07,
      "loss": 2.4155,
      "step": 68163
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0590890645980835,
      "learning_rate": 6.256816673380095e-07,
      "loss": 2.1379,
      "step": 68164
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.051628589630127,
      "learning_rate": 6.255383213639399e-07,
      "loss": 2.217,
      "step": 68165
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5434290170669556,
      "learning_rate": 6.253949912821277e-07,
      "loss": 2.3672,
      "step": 68166
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1959620714187622,
      "learning_rate": 6.252516770928185e-07,
      "loss": 2.4406,
      "step": 68167
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0642884969711304,
      "learning_rate": 6.251083787962531e-07,
      "loss": 2.2952,
      "step": 68168
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.9792556166648865,
      "learning_rate": 6.249650963926756e-07,
      "loss": 2.1655,
      "step": 68169
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1171424388885498,
      "learning_rate": 6.248218298823295e-07,
      "loss": 2.2307,
      "step": 68170
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0440173149108887,
      "learning_rate": 6.246785792654541e-07,
      "loss": 2.0639,
      "step": 68171
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0134786367416382,
      "learning_rate": 6.245353445422975e-07,
      "loss": 2.2056,
      "step": 68172
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.2188302278518677,
      "learning_rate": 6.24392125713098e-07,
      "loss": 2.3231,
      "step": 68173
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.091402292251587,
      "learning_rate": 6.242489227781012e-07,
      "loss": 2.2205,
      "step": 68174
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1892932653427124,
      "learning_rate": 6.241057357375491e-07,
      "loss": 2.2728,
      "step": 68175
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0494849681854248,
      "learning_rate": 6.239625645916847e-07,
      "loss": 2.3325,
      "step": 68176
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.9926085472106934,
      "learning_rate": 6.238194093407479e-07,
      "loss": 2.5252,
      "step": 68177
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0981701612472534,
      "learning_rate": 6.236762699849863e-07,
      "loss": 2.3555,
      "step": 68178
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.033097267150879,
      "learning_rate": 6.235331465246375e-07,
      "loss": 2.2931,
      "step": 68179
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0997164249420166,
      "learning_rate": 6.23390038959949e-07,
      "loss": 2.3435,
      "step": 68180
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1105369329452515,
      "learning_rate": 6.232469472911584e-07,
      "loss": 2.4783,
      "step": 68181
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.102381944656372,
      "learning_rate": 6.231038715185134e-07,
      "loss": 2.295,
      "step": 68182
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.00899076461792,
      "learning_rate": 6.229608116422525e-07,
      "loss": 2.4113,
      "step": 68183
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0716100931167603,
      "learning_rate": 6.228177676626212e-07,
      "loss": 2.3386,
      "step": 68184
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0083903074264526,
      "learning_rate": 6.226747395798583e-07,
      "loss": 2.3258,
      "step": 68185
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0503015518188477,
      "learning_rate": 6.225317273942111e-07,
      "loss": 2.055,
      "step": 68186
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.011845350265503,
      "learning_rate": 6.223887311059174e-07,
      "loss": 2.2563,
      "step": 68187
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.2389373779296875,
      "learning_rate": 6.222457507152247e-07,
      "loss": 2.3381,
      "step": 68188
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.064859390258789,
      "learning_rate": 6.221027862223705e-07,
      "loss": 2.2127,
      "step": 68189
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0568265914916992,
      "learning_rate": 6.219598376275993e-07,
      "loss": 2.2515,
      "step": 68190
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0935074090957642,
      "learning_rate": 6.218169049311517e-07,
      "loss": 2.3931,
      "step": 68191
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.095004916191101,
      "learning_rate": 6.216739881332734e-07,
      "loss": 2.1331,
      "step": 68192
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.146500587463379,
      "learning_rate": 6.21531087234204e-07,
      "loss": 2.2571,
      "step": 68193
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1513997316360474,
      "learning_rate": 6.213882022341877e-07,
      "loss": 2.1561,
      "step": 68194
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1052297353744507,
      "learning_rate": 6.212453331334634e-07,
      "loss": 2.2005,
      "step": 68195
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.9779527187347412,
      "learning_rate": 6.211024799322774e-07,
      "loss": 2.2932,
      "step": 68196
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0597810745239258,
      "learning_rate": 6.209596426308706e-07,
      "loss": 2.5658,
      "step": 68197
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0120818614959717,
      "learning_rate": 6.208168212294819e-07,
      "loss": 2.2979,
      "step": 68198
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0359010696411133,
      "learning_rate": 6.206740157283586e-07,
      "loss": 2.2466,
      "step": 68199
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1405209302902222,
      "learning_rate": 6.205312261277375e-07,
      "loss": 2.0409,
      "step": 68200
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.9709171652793884,
      "learning_rate": 6.203884524278658e-07,
      "loss": 2.3128,
      "step": 68201
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.9955029487609863,
      "learning_rate": 6.202456946289826e-07,
      "loss": 2.3755,
      "step": 68202
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0619856119155884,
      "learning_rate": 6.201029527313296e-07,
      "loss": 2.2784,
      "step": 68203
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0714755058288574,
      "learning_rate": 6.199602267351513e-07,
      "loss": 2.2789,
      "step": 68204
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.132732629776001,
      "learning_rate": 6.198175166406872e-07,
      "loss": 2.2906,
      "step": 68205
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.3045247793197632,
      "learning_rate": 6.196748224481786e-07,
      "loss": 2.4585,
      "step": 68206
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.017608642578125,
      "learning_rate": 6.195321441578705e-07,
      "loss": 2.2847,
      "step": 68207
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.3868048191070557,
      "learning_rate": 6.193894817700019e-07,
      "loss": 2.1342,
      "step": 68208
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0332757234573364,
      "learning_rate": 6.19246835284818e-07,
      "loss": 2.3694,
      "step": 68209
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.9866995215415955,
      "learning_rate": 6.191042047025553e-07,
      "loss": 2.5584,
      "step": 68210
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.3018534183502197,
      "learning_rate": 6.189615900234614e-07,
      "loss": 2.4492,
      "step": 68211
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1724767684936523,
      "learning_rate": 6.188189912477737e-07,
      "loss": 2.2437,
      "step": 68212
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1114370822906494,
      "learning_rate": 6.18676408375738e-07,
      "loss": 2.2539,
      "step": 68213
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0849131345748901,
      "learning_rate": 6.185338414075903e-07,
      "loss": 2.1586,
      "step": 68214
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.9713006019592285,
      "learning_rate": 6.183912903435807e-07,
      "loss": 2.4129,
      "step": 68215
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.9849460124969482,
      "learning_rate": 6.182487551839422e-07,
      "loss": 2.2407,
      "step": 68216
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1349678039550781,
      "learning_rate": 6.181062359289214e-07,
      "loss": 2.3204,
      "step": 68217
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.127266526222229,
      "learning_rate": 6.179637325787569e-07,
      "loss": 2.2652,
      "step": 68218
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0389738082885742,
      "learning_rate": 6.17821245133694e-07,
      "loss": 2.186,
      "step": 68219
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.178866982460022,
      "learning_rate": 6.176787735939704e-07,
      "loss": 2.5976,
      "step": 68220
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0812045335769653,
      "learning_rate": 6.175363179598315e-07,
      "loss": 2.1474,
      "step": 68221
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.044158935546875,
      "learning_rate": 6.173938782315148e-07,
      "loss": 2.3405,
      "step": 68222
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1662921905517578,
      "learning_rate": 6.172514544092645e-07,
      "loss": 2.2835,
      "step": 68223
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.042923092842102,
      "learning_rate": 6.171090464933216e-07,
      "loss": 2.2493,
      "step": 68224
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1060386896133423,
      "learning_rate": 6.16966654483927e-07,
      "loss": 2.3821,
      "step": 68225
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0398139953613281,
      "learning_rate": 6.168242783813239e-07,
      "loss": 2.3865,
      "step": 68226
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0835877656936646,
      "learning_rate": 6.166819181857498e-07,
      "loss": 2.3531,
      "step": 68227
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0219770669937134,
      "learning_rate": 6.165395738974489e-07,
      "loss": 2.3082,
      "step": 68228
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.3046294450759888,
      "learning_rate": 6.163972455166633e-07,
      "loss": 2.3178,
      "step": 68229
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1763228178024292,
      "learning_rate": 6.162549330436307e-07,
      "loss": 2.2088,
      "step": 68230
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0832768678665161,
      "learning_rate": 6.161126364785963e-07,
      "loss": 2.2682,
      "step": 68231
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0631682872772217,
      "learning_rate": 6.159703558218e-07,
      "loss": 2.0868,
      "step": 68232
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1759583950042725,
      "learning_rate": 6.158280910734804e-07,
      "loss": 2.3769,
      "step": 68233
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1844382286071777,
      "learning_rate": 6.156858422338829e-07,
      "loss": 2.4456,
      "step": 68234
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.226458191871643,
      "learning_rate": 6.155436093032452e-07,
      "loss": 2.3459,
      "step": 68235
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1783912181854248,
      "learning_rate": 6.154013922818114e-07,
      "loss": 2.33,
      "step": 68236
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.9721611142158508,
      "learning_rate": 6.152591911698191e-07,
      "loss": 2.4715,
      "step": 68237
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.2509002685546875,
      "learning_rate": 6.151170059675127e-07,
      "loss": 2.2843,
      "step": 68238
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.089123249053955,
      "learning_rate": 6.149748366751307e-07,
      "loss": 2.1338,
      "step": 68239
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0175859928131104,
      "learning_rate": 6.148326832929174e-07,
      "loss": 2.2955,
      "step": 68240
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1428272724151611,
      "learning_rate": 6.146905458211106e-07,
      "loss": 2.2733,
      "step": 68241
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0298633575439453,
      "learning_rate": 6.14548424259953e-07,
      "loss": 2.1621,
      "step": 68242
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0453559160232544,
      "learning_rate": 6.144063186096838e-07,
      "loss": 2.4018,
      "step": 68243
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0470463037490845,
      "learning_rate": 6.142642288705458e-07,
      "loss": 2.2211,
      "step": 68244
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1097735166549683,
      "learning_rate": 6.141221550427778e-07,
      "loss": 2.3956,
      "step": 68245
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0557838678359985,
      "learning_rate": 6.13980097126623e-07,
      "loss": 2.4266,
      "step": 68246
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.084134578704834,
      "learning_rate": 6.1383805512232e-07,
      "loss": 2.4836,
      "step": 68247
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0440750122070312,
      "learning_rate": 6.13696029030112e-07,
      "loss": 2.4158,
      "step": 68248
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.067805290222168,
      "learning_rate": 6.135540188502376e-07,
      "loss": 2.5299,
      "step": 68249
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1850237846374512,
      "learning_rate": 6.134120245829389e-07,
      "loss": 2.3933,
      "step": 68250
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0378774404525757,
      "learning_rate": 6.132700462284558e-07,
      "loss": 2.3973,
      "step": 68251
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0759453773498535,
      "learning_rate": 6.131280837870301e-07,
      "loss": 2.3301,
      "step": 68252
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.9651739001274109,
      "learning_rate": 6.129861372589019e-07,
      "loss": 2.3686,
      "step": 68253
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0849517583847046,
      "learning_rate": 6.128442066443118e-07,
      "loss": 2.2639,
      "step": 68254
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0747392177581787,
      "learning_rate": 6.127022919434988e-07,
      "loss": 2.5385,
      "step": 68255
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1682013273239136,
      "learning_rate": 6.12560393156707e-07,
      "loss": 2.3086,
      "step": 68256
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0143626928329468,
      "learning_rate": 6.124185102841729e-07,
      "loss": 2.2809,
      "step": 68257
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0282119512557983,
      "learning_rate": 6.122766433261406e-07,
      "loss": 2.3028,
      "step": 68258
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1072980165481567,
      "learning_rate": 6.121347922828491e-07,
      "loss": 2.2975,
      "step": 68259
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0581631660461426,
      "learning_rate": 6.119929571545369e-07,
      "loss": 2.2969,
      "step": 68260
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0298813581466675,
      "learning_rate": 6.118511379414482e-07,
      "loss": 2.3228,
      "step": 68261
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.016336441040039,
      "learning_rate": 6.117093346438197e-07,
      "loss": 2.4572,
      "step": 68262
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.089194893836975,
      "learning_rate": 6.115675472618965e-07,
      "loss": 2.4165,
      "step": 68263
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1647018194198608,
      "learning_rate": 6.11425775795913e-07,
      "loss": 2.3556,
      "step": 68264
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1511423587799072,
      "learning_rate": 6.112840202461156e-07,
      "loss": 2.3746,
      "step": 68265
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1132614612579346,
      "learning_rate": 6.111422806127398e-07,
      "loss": 2.2655,
      "step": 68266
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.07473623752594,
      "learning_rate": 6.110005568960309e-07,
      "loss": 2.3417,
      "step": 68267
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1273915767669678,
      "learning_rate": 6.10858849096222e-07,
      "loss": 2.302,
      "step": 68268
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1135183572769165,
      "learning_rate": 6.107171572135606e-07,
      "loss": 2.4441,
      "step": 68269
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0664769411087036,
      "learning_rate": 6.105754812482812e-07,
      "loss": 2.4875,
      "step": 68270
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.052406668663025,
      "learning_rate": 6.104338212006278e-07,
      "loss": 2.3818,
      "step": 68271
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.995824933052063,
      "learning_rate": 6.102921770708381e-07,
      "loss": 2.4475,
      "step": 68272
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.2106932401657104,
      "learning_rate": 6.101505488591552e-07,
      "loss": 2.2692,
      "step": 68273
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0586804151535034,
      "learning_rate": 6.100089365658157e-07,
      "loss": 2.5399,
      "step": 68274
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1179105043411255,
      "learning_rate": 6.098673401910627e-07,
      "loss": 2.451,
      "step": 68275
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.2807581424713135,
      "learning_rate": 6.097257597351325e-07,
      "loss": 2.291,
      "step": 68276
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.210781216621399,
      "learning_rate": 6.095841951982706e-07,
      "loss": 2.3393,
      "step": 68277
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0230143070220947,
      "learning_rate": 6.094426465807113e-07,
      "loss": 2.2647,
      "step": 68278
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.135880470275879,
      "learning_rate": 6.093011138826987e-07,
      "loss": 2.307,
      "step": 68279
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.123029351234436,
      "learning_rate": 6.091595971044717e-07,
      "loss": 2.3382,
      "step": 68280
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.098536729812622,
      "learning_rate": 6.0901809624627e-07,
      "loss": 2.3435,
      "step": 68281
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0436757802963257,
      "learning_rate": 6.088766113083311e-07,
      "loss": 2.2673,
      "step": 68282
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1501401662826538,
      "learning_rate": 6.087351422908982e-07,
      "loss": 2.2957,
      "step": 68283
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0150469541549683,
      "learning_rate": 6.08593689194209e-07,
      "loss": 2.4127,
      "step": 68284
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0627994537353516,
      "learning_rate": 6.084522520185054e-07,
      "loss": 2.1948,
      "step": 68285
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6136525869369507,
      "learning_rate": 6.08310830764024e-07,
      "loss": 2.3817,
      "step": 68286
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.2151068449020386,
      "learning_rate": 6.081694254310089e-07,
      "loss": 2.3785,
      "step": 68287
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.2400004863739014,
      "learning_rate": 6.080280360196966e-07,
      "loss": 2.0755,
      "step": 68288
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0945675373077393,
      "learning_rate": 6.07886662530327e-07,
      "loss": 2.3216,
      "step": 68289
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1725244522094727,
      "learning_rate": 6.07745304963141e-07,
      "loss": 2.382,
      "step": 68290
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1343244314193726,
      "learning_rate": 6.076039633183762e-07,
      "loss": 2.4134,
      "step": 68291
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1291131973266602,
      "learning_rate": 6.074626375962755e-07,
      "loss": 2.5867,
      "step": 68292
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.3134931325912476,
      "learning_rate": 6.073213277970747e-07,
      "loss": 2.3803,
      "step": 68293
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.113152265548706,
      "learning_rate": 6.071800339210188e-07,
      "loss": 2.4528,
      "step": 68294
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0496598482131958,
      "learning_rate": 6.0703875596834e-07,
      "loss": 2.4385,
      "step": 68295
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.128914475440979,
      "learning_rate": 6.068974939392836e-07,
      "loss": 2.1111,
      "step": 68296
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1027418375015259,
      "learning_rate": 6.06756247834086e-07,
      "loss": 2.1524,
      "step": 68297
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.030408501625061,
      "learning_rate": 6.066150176529884e-07,
      "loss": 2.3355,
      "step": 68298
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0520893335342407,
      "learning_rate": 6.064738033962281e-07,
      "loss": 2.4064,
      "step": 68299
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1101809740066528,
      "learning_rate": 6.063326050640483e-07,
      "loss": 2.32,
      "step": 68300
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1309252977371216,
      "learning_rate": 6.061914226566846e-07,
      "loss": 2.4564,
      "step": 68301
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1937543153762817,
      "learning_rate": 6.060502561743787e-07,
      "loss": 2.35,
      "step": 68302
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.982130229473114,
      "learning_rate": 6.059091056173672e-07,
      "loss": 2.1962,
      "step": 68303
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0621976852416992,
      "learning_rate": 6.057679709858921e-07,
      "loss": 2.4777,
      "step": 68304
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0945576429367065,
      "learning_rate": 6.056268522801912e-07,
      "loss": 2.4444,
      "step": 68305
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0002307891845703,
      "learning_rate": 6.054857495005062e-07,
      "loss": 2.2901,
      "step": 68306
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0744340419769287,
      "learning_rate": 6.053446626470738e-07,
      "loss": 2.1451,
      "step": 68307
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.2191611528396606,
      "learning_rate": 6.052035917201338e-07,
      "loss": 2.304,
      "step": 68308
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.9668719172477722,
      "learning_rate": 6.050625367199237e-07,
      "loss": 2.2896,
      "step": 68309
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0460867881774902,
      "learning_rate": 6.049214976466855e-07,
      "loss": 2.4744,
      "step": 68310
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0602951049804688,
      "learning_rate": 6.047804745006558e-07,
      "loss": 2.3117,
      "step": 68311
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.135419487953186,
      "learning_rate": 6.046394672820766e-07,
      "loss": 2.2664,
      "step": 68312
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1089165210723877,
      "learning_rate": 6.044984759911832e-07,
      "loss": 2.164,
      "step": 68313
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.986608624458313,
      "learning_rate": 6.043575006282187e-07,
      "loss": 2.467,
      "step": 68314
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.2457983493804932,
      "learning_rate": 6.042165411934198e-07,
      "loss": 2.2778,
      "step": 68315
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1822460889816284,
      "learning_rate": 6.04075597687025e-07,
      "loss": 2.1229,
      "step": 68316
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1019515991210938,
      "learning_rate": 6.039346701092741e-07,
      "loss": 2.3233,
      "step": 68317
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.203365683555603,
      "learning_rate": 6.037937584604048e-07,
      "loss": 2.4669,
      "step": 68318
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0841026306152344,
      "learning_rate": 6.03652862740659e-07,
      "loss": 2.2423,
      "step": 68319
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1714030504226685,
      "learning_rate": 6.035119829502733e-07,
      "loss": 2.4794,
      "step": 68320
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5465903282165527,
      "learning_rate": 6.033711190894853e-07,
      "loss": 2.4574,
      "step": 68321
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.9743200540542603,
      "learning_rate": 6.032302711585369e-07,
      "loss": 2.3441,
      "step": 68322
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1546907424926758,
      "learning_rate": 6.030894391576658e-07,
      "loss": 2.1658,
      "step": 68323
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.977410078048706,
      "learning_rate": 6.029486230871084e-07,
      "loss": 2.2204,
      "step": 68324
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0056054592132568,
      "learning_rate": 6.028078229471068e-07,
      "loss": 2.2673,
      "step": 68325
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0824471712112427,
      "learning_rate": 6.026670387378964e-07,
      "loss": 2.3646,
      "step": 68326
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0236899852752686,
      "learning_rate": 6.025262704597201e-07,
      "loss": 2.3217,
      "step": 68327
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.219772219657898,
      "learning_rate": 6.023855181128113e-07,
      "loss": 2.387,
      "step": 68328
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.16116464138031,
      "learning_rate": 6.022447816974142e-07,
      "loss": 2.3401,
      "step": 68329
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1819427013397217,
      "learning_rate": 6.021040612137618e-07,
      "loss": 2.2646,
      "step": 68330
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1359046697616577,
      "learning_rate": 6.019633566620975e-07,
      "loss": 2.3978,
      "step": 68331
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.013413667678833,
      "learning_rate": 6.018226680426564e-07,
      "loss": 2.329,
      "step": 68332
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.133398175239563,
      "learning_rate": 6.016819953556818e-07,
      "loss": 2.1705,
      "step": 68333
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.150891661643982,
      "learning_rate": 6.015413386014046e-07,
      "loss": 2.4143,
      "step": 68334
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1076135635375977,
      "learning_rate": 6.014006977800702e-07,
      "loss": 2.4186,
      "step": 68335
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.2188376188278198,
      "learning_rate": 6.012600728919116e-07,
      "loss": 2.3255,
      "step": 68336
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1422775983810425,
      "learning_rate": 6.01119463937172e-07,
      "loss": 2.1686,
      "step": 68337
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0925014019012451,
      "learning_rate": 6.009788709160858e-07,
      "loss": 2.6275,
      "step": 68338
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1054784059524536,
      "learning_rate": 6.008382938288948e-07,
      "loss": 2.1811,
      "step": 68339
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1363425254821777,
      "learning_rate": 6.006977326758345e-07,
      "loss": 2.28,
      "step": 68340
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.9519141316413879,
      "learning_rate": 6.005571874571458e-07,
      "loss": 2.2747,
      "step": 68341
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1703685522079468,
      "learning_rate": 6.004166581730631e-07,
      "loss": 2.3482,
      "step": 68342
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.029030203819275,
      "learning_rate": 6.002761448238292e-07,
      "loss": 2.1975,
      "step": 68343
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.2374933958053589,
      "learning_rate": 6.001356474096808e-07,
      "loss": 2.2735,
      "step": 68344
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0555962324142456,
      "learning_rate": 5.999951659308534e-07,
      "loss": 2.2838,
      "step": 68345
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1702848672866821,
      "learning_rate": 5.998547003875888e-07,
      "loss": 2.4734,
      "step": 68346
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1467067003250122,
      "learning_rate": 5.997142507801235e-07,
      "loss": 2.4021,
      "step": 68347
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0932674407958984,
      "learning_rate": 5.99573817108694e-07,
      "loss": 2.3177,
      "step": 68348
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0702258348464966,
      "learning_rate": 5.994333993735413e-07,
      "loss": 2.2143,
      "step": 68349
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0763969421386719,
      "learning_rate": 5.992929975749017e-07,
      "loss": 2.5166,
      "step": 68350
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0451654195785522,
      "learning_rate": 5.991526117130131e-07,
      "loss": 2.1289,
      "step": 68351
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.2164833545684814,
      "learning_rate": 5.99012241788115e-07,
      "loss": 2.2235,
      "step": 68352
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0768150091171265,
      "learning_rate": 5.98871887800444e-07,
      "loss": 2.3726,
      "step": 68353
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1390953063964844,
      "learning_rate": 5.987315497502389e-07,
      "loss": 2.5381,
      "step": 68354
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.004449486732483,
      "learning_rate": 5.985912276377359e-07,
      "loss": 2.3169,
      "step": 68355
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.067516565322876,
      "learning_rate": 5.984509214631751e-07,
      "loss": 2.4106,
      "step": 68356
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1144739389419556,
      "learning_rate": 5.983106312267928e-07,
      "loss": 2.3345,
      "step": 68357
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1629692316055298,
      "learning_rate": 5.981703569288289e-07,
      "loss": 2.3088,
      "step": 68358
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0502725839614868,
      "learning_rate": 5.980300985695198e-07,
      "loss": 2.3818,
      "step": 68359
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0784363746643066,
      "learning_rate": 5.97889856149102e-07,
      "loss": 2.3575,
      "step": 68360
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0844271183013916,
      "learning_rate": 5.977496296678143e-07,
      "loss": 2.2463,
      "step": 68361
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0784224271774292,
      "learning_rate": 5.976094191258963e-07,
      "loss": 2.267,
      "step": 68362
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1302672624588013,
      "learning_rate": 5.974692245235814e-07,
      "loss": 2.2501,
      "step": 68363
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0947984457015991,
      "learning_rate": 5.973290458611114e-07,
      "loss": 2.3301,
      "step": 68364
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1704895496368408,
      "learning_rate": 5.971888831387219e-07,
      "loss": 2.3302,
      "step": 68365
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1375436782836914,
      "learning_rate": 5.970487363566513e-07,
      "loss": 2.2255,
      "step": 68366
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1471116542816162,
      "learning_rate": 5.969086055151363e-07,
      "loss": 2.272,
      "step": 68367
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1207743883132935,
      "learning_rate": 5.967684906144166e-07,
      "loss": 2.3206,
      "step": 68368
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1270653009414673,
      "learning_rate": 5.966283916547266e-07,
      "loss": 2.2018,
      "step": 68369
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0241223573684692,
      "learning_rate": 5.96488308636306e-07,
      "loss": 2.2431,
      "step": 68370
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1668214797973633,
      "learning_rate": 5.963482415593924e-07,
      "loss": 2.3636,
      "step": 68371
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1413236856460571,
      "learning_rate": 5.962081904242212e-07,
      "loss": 2.3841,
      "step": 68372
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0925036668777466,
      "learning_rate": 5.960681552310321e-07,
      "loss": 2.2571,
      "step": 68373
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1632251739501953,
      "learning_rate": 5.959281359800628e-07,
      "loss": 2.4794,
      "step": 68374
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0619137287139893,
      "learning_rate": 5.957881326715465e-07,
      "loss": 2.1226,
      "step": 68375
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.9538017511367798,
      "learning_rate": 5.956481453057261e-07,
      "loss": 2.2076,
      "step": 68376
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.9976831674575806,
      "learning_rate": 5.95508173882835e-07,
      "loss": 2.4204,
      "step": 68377
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0418577194213867,
      "learning_rate": 5.953682184031107e-07,
      "loss": 2.1777,
      "step": 68378
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0948996543884277,
      "learning_rate": 5.95228278866794e-07,
      "loss": 2.1504,
      "step": 68379
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.9592720866203308,
      "learning_rate": 5.950883552741172e-07,
      "loss": 2.3287,
      "step": 68380
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1977312564849854,
      "learning_rate": 5.94948447625322e-07,
      "loss": 2.3317,
      "step": 68381
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.2001399993896484,
      "learning_rate": 5.948085559206418e-07,
      "loss": 2.4233,
      "step": 68382
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0230990648269653,
      "learning_rate": 5.946686801603163e-07,
      "loss": 2.2357,
      "step": 68383
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0595474243164062,
      "learning_rate": 5.94528820344581e-07,
      "loss": 2.1753,
      "step": 68384
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0428131818771362,
      "learning_rate": 5.943889764736754e-07,
      "loss": 2.0482,
      "step": 68385
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.08256196975708,
      "learning_rate": 5.942491485478352e-07,
      "loss": 2.2845,
      "step": 68386
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0289607048034668,
      "learning_rate": 5.941093365672968e-07,
      "loss": 2.2672,
      "step": 68387
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1114935874938965,
      "learning_rate": 5.939695405322965e-07,
      "loss": 2.2958,
      "step": 68388
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1131365299224854,
      "learning_rate": 5.938297604430742e-07,
      "loss": 2.3537,
      "step": 68389
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.9985427856445312,
      "learning_rate": 5.936899962998633e-07,
      "loss": 2.3029,
      "step": 68390
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0028858184814453,
      "learning_rate": 5.935502481029043e-07,
      "loss": 2.0377,
      "step": 68391
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.065211534500122,
      "learning_rate": 5.934105158524306e-07,
      "loss": 2.1427,
      "step": 68392
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.107648491859436,
      "learning_rate": 5.93270799548682e-07,
      "loss": 2.2483,
      "step": 68393
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0927565097808838,
      "learning_rate": 5.931310991918938e-07,
      "loss": 2.3429,
      "step": 68394
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0956826210021973,
      "learning_rate": 5.929914147823035e-07,
      "loss": 2.3193,
      "step": 68395
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0360997915267944,
      "learning_rate": 5.928517463201477e-07,
      "loss": 2.239,
      "step": 68396
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0709378719329834,
      "learning_rate": 5.92712093805663e-07,
      "loss": 2.4714,
      "step": 68397
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1055951118469238,
      "learning_rate": 5.925724572390856e-07,
      "loss": 2.3666,
      "step": 68398
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0230927467346191,
      "learning_rate": 5.924328366206555e-07,
      "loss": 2.2781,
      "step": 68399
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0296727418899536,
      "learning_rate": 5.922932319506047e-07,
      "loss": 2.4284,
      "step": 68400
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1410741806030273,
      "learning_rate": 5.92153643229173e-07,
      "loss": 2.3777,
      "step": 68401
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0910052061080933,
      "learning_rate": 5.920140704565946e-07,
      "loss": 2.43,
      "step": 68402
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1322931051254272,
      "learning_rate": 5.918745136331083e-07,
      "loss": 2.3251,
      "step": 68403
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0772290229797363,
      "learning_rate": 5.917349727589495e-07,
      "loss": 2.5996,
      "step": 68404
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1868224143981934,
      "learning_rate": 5.915954478343555e-07,
      "loss": 2.4473,
      "step": 68405
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0005064010620117,
      "learning_rate": 5.914559388595631e-07,
      "loss": 2.2657,
      "step": 68406
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.15581214427948,
      "learning_rate": 5.913164458348075e-07,
      "loss": 2.2092,
      "step": 68407
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.9611601233482361,
      "learning_rate": 5.911769687603264e-07,
      "loss": 2.3204,
      "step": 68408
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.102651834487915,
      "learning_rate": 5.91037507636354e-07,
      "loss": 2.2282,
      "step": 68409
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.2960658073425293,
      "learning_rate": 5.908980624631311e-07,
      "loss": 2.3163,
      "step": 68410
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0418487787246704,
      "learning_rate": 5.907586332408888e-07,
      "loss": 2.5983,
      "step": 68411
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0365017652511597,
      "learning_rate": 5.906192199698701e-07,
      "loss": 2.3606,
      "step": 68412
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.2048815488815308,
      "learning_rate": 5.904798226503039e-07,
      "loss": 2.27,
      "step": 68413
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.2619284391403198,
      "learning_rate": 5.90340441282431e-07,
      "loss": 2.5357,
      "step": 68414
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1370587348937988,
      "learning_rate": 5.902010758664855e-07,
      "loss": 2.197,
      "step": 68415
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0439801216125488,
      "learning_rate": 5.900617264027064e-07,
      "loss": 2.2567,
      "step": 68416
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.2211936712265015,
      "learning_rate": 5.899223928913278e-07,
      "loss": 2.2954,
      "step": 68417
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1504617929458618,
      "learning_rate": 5.897830753325872e-07,
      "loss": 2.4366,
      "step": 68418
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.9551366567611694,
      "learning_rate": 5.89643773726718e-07,
      "loss": 2.484,
      "step": 68419
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1283209323883057,
      "learning_rate": 5.895044880739609e-07,
      "loss": 2.4064,
      "step": 68420
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.2612147331237793,
      "learning_rate": 5.89365218374548e-07,
      "loss": 2.3115,
      "step": 68421
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0951926708221436,
      "learning_rate": 5.89225964628718e-07,
      "loss": 2.2267,
      "step": 68422
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0552047491073608,
      "learning_rate": 5.890867268367039e-07,
      "loss": 2.1088,
      "step": 68423
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.076918601989746,
      "learning_rate": 5.889475049987459e-07,
      "loss": 2.3936,
      "step": 68424
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0784075260162354,
      "learning_rate": 5.888082991150779e-07,
      "loss": 2.3174,
      "step": 68425
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.9948449730873108,
      "learning_rate": 5.886691091859353e-07,
      "loss": 2.0714,
      "step": 68426
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.009210228919983,
      "learning_rate": 5.885299352115548e-07,
      "loss": 2.104,
      "step": 68427
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1986215114593506,
      "learning_rate": 5.883907771921715e-07,
      "loss": 2.2691,
      "step": 68428
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0775344371795654,
      "learning_rate": 5.882516351280221e-07,
      "loss": 2.4217,
      "step": 68429
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.2263827323913574,
      "learning_rate": 5.881125090193429e-07,
      "loss": 2.3079,
      "step": 68430
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.4217582941055298,
      "learning_rate": 5.879733988663683e-07,
      "loss": 2.4106,
      "step": 68431
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0504213571548462,
      "learning_rate": 5.878343046693369e-07,
      "loss": 2.3371,
      "step": 68432
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.2014929056167603,
      "learning_rate": 5.876952264284818e-07,
      "loss": 2.351,
      "step": 68433
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.9963043332099915,
      "learning_rate": 5.875561641440386e-07,
      "loss": 2.3773,
      "step": 68434
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.3400784730911255,
      "learning_rate": 5.874171178162458e-07,
      "loss": 2.3633,
      "step": 68435
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.095867395401001,
      "learning_rate": 5.872780874453354e-07,
      "loss": 2.3912,
      "step": 68436
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0441893339157104,
      "learning_rate": 5.871390730315463e-07,
      "loss": 2.2559,
      "step": 68437
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0896217823028564,
      "learning_rate": 5.870000745751125e-07,
      "loss": 2.2495,
      "step": 68438
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1200093030929565,
      "learning_rate": 5.868610920762707e-07,
      "loss": 2.2168,
      "step": 68439
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0789562463760376,
      "learning_rate": 5.867221255352562e-07,
      "loss": 2.1974,
      "step": 68440
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0267367362976074,
      "learning_rate": 5.865831749523043e-07,
      "loss": 2.2672,
      "step": 68441
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0944031476974487,
      "learning_rate": 5.864442403276494e-07,
      "loss": 2.335,
      "step": 68442
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.9758657217025757,
      "learning_rate": 5.8630532166153e-07,
      "loss": 2.2177,
      "step": 68443
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1043791770935059,
      "learning_rate": 5.861664189541772e-07,
      "loss": 2.3915,
      "step": 68444
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.056735634803772,
      "learning_rate": 5.860275322058318e-07,
      "loss": 2.4854,
      "step": 68445
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.2567335367202759,
      "learning_rate": 5.858886614167236e-07,
      "loss": 2.2457,
      "step": 68446
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.037352204322815,
      "learning_rate": 5.857498065870937e-07,
      "loss": 2.4221,
      "step": 68447
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0940361022949219,
      "learning_rate": 5.856109677171729e-07,
      "loss": 2.2866,
      "step": 68448
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.2348389625549316,
      "learning_rate": 5.854721448071999e-07,
      "loss": 2.2032,
      "step": 68449
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0923722982406616,
      "learning_rate": 5.853333378574066e-07,
      "loss": 2.4101,
      "step": 68450
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.9773879051208496,
      "learning_rate": 5.85194546868032e-07,
      "loss": 2.3646,
      "step": 68451
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1224992275238037,
      "learning_rate": 5.850557718393102e-07,
      "loss": 2.5003,
      "step": 68452
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.099494218826294,
      "learning_rate": 5.849170127714754e-07,
      "loss": 2.3196,
      "step": 68453
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.164156436920166,
      "learning_rate": 5.847782696647619e-07,
      "loss": 2.3479,
      "step": 68454
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0695712566375732,
      "learning_rate": 5.846395425194085e-07,
      "loss": 2.2746,
      "step": 68455
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.327368974685669,
      "learning_rate": 5.845008313356459e-07,
      "loss": 2.5416,
      "step": 68456
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0839922428131104,
      "learning_rate": 5.843621361137142e-07,
      "loss": 2.3081,
      "step": 68457
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.2227569818496704,
      "learning_rate": 5.84223456853843e-07,
      "loss": 2.4383,
      "step": 68458
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0271660089492798,
      "learning_rate": 5.840847935562733e-07,
      "loss": 2.171,
      "step": 68459
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.141524076461792,
      "learning_rate": 5.839461462212348e-07,
      "loss": 2.2647,
      "step": 68460
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1565366983413696,
      "learning_rate": 5.838075148489664e-07,
      "loss": 2.1789,
      "step": 68461
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.3124148845672607,
      "learning_rate": 5.836688994397022e-07,
      "loss": 2.2697,
      "step": 68462
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.098848819732666,
      "learning_rate": 5.835302999936743e-07,
      "loss": 2.3391,
      "step": 68463
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.9639461040496826,
      "learning_rate": 5.833917165111225e-07,
      "loss": 2.1344,
      "step": 68464
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.169898271560669,
      "learning_rate": 5.832531489922788e-07,
      "loss": 2.3836,
      "step": 68465
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0352025032043457,
      "learning_rate": 5.831145974373765e-07,
      "loss": 2.2528,
      "step": 68466
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1306551694869995,
      "learning_rate": 5.829760618466551e-07,
      "loss": 2.1675,
      "step": 68467
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1436265707015991,
      "learning_rate": 5.828375422203459e-07,
      "loss": 2.3663,
      "step": 68468
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1317660808563232,
      "learning_rate": 5.826990385586839e-07,
      "loss": 2.2428,
      "step": 68469
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1445610523223877,
      "learning_rate": 5.825605508619059e-07,
      "loss": 2.3975,
      "step": 68470
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.078277826309204,
      "learning_rate": 5.824220791302448e-07,
      "loss": 1.9287,
      "step": 68471
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1745672225952148,
      "learning_rate": 5.822836233639362e-07,
      "loss": 2.0226,
      "step": 68472
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.269557237625122,
      "learning_rate": 5.821451835632141e-07,
      "loss": 2.3354,
      "step": 68473
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.117250919342041,
      "learning_rate": 5.820067597283152e-07,
      "loss": 2.3681,
      "step": 68474
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.226842999458313,
      "learning_rate": 5.818683518594715e-07,
      "loss": 2.1478,
      "step": 68475
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1471563577651978,
      "learning_rate": 5.817299599569193e-07,
      "loss": 2.3237,
      "step": 68476
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0997581481933594,
      "learning_rate": 5.815915840208919e-07,
      "loss": 2.3222,
      "step": 68477
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0803459882736206,
      "learning_rate": 5.814532240516279e-07,
      "loss": 2.5246,
      "step": 68478
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.004446029663086,
      "learning_rate": 5.813148800493562e-07,
      "loss": 2.3383,
      "step": 68479
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.09161376953125,
      "learning_rate": 5.811765520143142e-07,
      "loss": 2.2143,
      "step": 68480
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.132664680480957,
      "learning_rate": 5.810382399467363e-07,
      "loss": 2.3088,
      "step": 68481
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.9567316770553589,
      "learning_rate": 5.808999438468566e-07,
      "loss": 2.2206,
      "step": 68482
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1279722452163696,
      "learning_rate": 5.807616637149083e-07,
      "loss": 2.2247,
      "step": 68483
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0237181186676025,
      "learning_rate": 5.80623399551129e-07,
      "loss": 2.109,
      "step": 68484
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.9927612543106079,
      "learning_rate": 5.804851513557508e-07,
      "loss": 2.5083,
      "step": 68485
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0759690999984741,
      "learning_rate": 5.80346919129009e-07,
      "loss": 2.118,
      "step": 68486
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1340113878250122,
      "learning_rate": 5.802087028711356e-07,
      "loss": 2.4631,
      "step": 68487
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0180597305297852,
      "learning_rate": 5.800705025823695e-07,
      "loss": 2.4155,
      "step": 68488
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5813251733779907,
      "learning_rate": 5.799323182629412e-07,
      "loss": 2.1551,
      "step": 68489
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1574945449829102,
      "learning_rate": 5.797941499130854e-07,
      "loss": 2.5215,
      "step": 68490
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1724404096603394,
      "learning_rate": 5.796559975330384e-07,
      "loss": 2.2846,
      "step": 68491
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.385970115661621,
      "learning_rate": 5.795178611230335e-07,
      "loss": 2.5257,
      "step": 68492
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0998833179473877,
      "learning_rate": 5.793797406833013e-07,
      "loss": 2.2921,
      "step": 68493
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.327559471130371,
      "learning_rate": 5.792416362140818e-07,
      "loss": 2.4332,
      "step": 68494
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.13021981716156,
      "learning_rate": 5.791035477156048e-07,
      "loss": 2.4026,
      "step": 68495
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1026281118392944,
      "learning_rate": 5.789654751881069e-07,
      "loss": 2.1986,
      "step": 68496
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.9707484245300293,
      "learning_rate": 5.788274186318221e-07,
      "loss": 2.3245,
      "step": 68497
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.2385538816452026,
      "learning_rate": 5.786893780469816e-07,
      "loss": 2.4891,
      "step": 68498
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0525190830230713,
      "learning_rate": 5.785513534338227e-07,
      "loss": 2.3803,
      "step": 68499
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0724773406982422,
      "learning_rate": 5.784133447925766e-07,
      "loss": 2.1909,
      "step": 68500
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0350269079208374,
      "learning_rate": 5.782753521234808e-07,
      "loss": 2.1187,
      "step": 68501
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1775997877120972,
      "learning_rate": 5.781373754267649e-07,
      "loss": 2.3925,
      "step": 68502
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1768968105316162,
      "learning_rate": 5.779994147026669e-07,
      "loss": 2.3272,
      "step": 68503
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0503559112548828,
      "learning_rate": 5.778614699514196e-07,
      "loss": 2.3715,
      "step": 68504
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0053352117538452,
      "learning_rate": 5.777235411732551e-07,
      "loss": 2.2459,
      "step": 68505
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1953727006912231,
      "learning_rate": 5.775856283684067e-07,
      "loss": 2.4121,
      "step": 68506
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1505601406097412,
      "learning_rate": 5.774477315371119e-07,
      "loss": 2.396,
      "step": 68507
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0741544961929321,
      "learning_rate": 5.773098506796005e-07,
      "loss": 2.3091,
      "step": 68508
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0925860404968262,
      "learning_rate": 5.7717198579611e-07,
      "loss": 2.3281,
      "step": 68509
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0180433988571167,
      "learning_rate": 5.770341368868704e-07,
      "loss": 2.4824,
      "step": 68510
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.38137686252594,
      "learning_rate": 5.76896303952118e-07,
      "loss": 2.3394,
      "step": 68511
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1624594926834106,
      "learning_rate": 5.76758486992085e-07,
      "loss": 2.502,
      "step": 68512
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0313929319381714,
      "learning_rate": 5.766206860070078e-07,
      "loss": 2.1907,
      "step": 68513
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.234268307685852,
      "learning_rate": 5.764829009971151e-07,
      "loss": 2.5357,
      "step": 68514
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.061998724937439,
      "learning_rate": 5.763451319626457e-07,
      "loss": 2.6098,
      "step": 68515
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0589580535888672,
      "learning_rate": 5.762073789038281e-07,
      "loss": 2.4036,
      "step": 68516
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1740648746490479,
      "learning_rate": 5.760696418209011e-07,
      "loss": 2.2746,
      "step": 68517
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0242341756820679,
      "learning_rate": 5.759319207140945e-07,
      "loss": 2.231,
      "step": 68518
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0619298219680786,
      "learning_rate": 5.757942155836438e-07,
      "loss": 2.2543,
      "step": 68519
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1307706832885742,
      "learning_rate": 5.756565264297798e-07,
      "loss": 2.4783,
      "step": 68520
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.068149447441101,
      "learning_rate": 5.755188532527389e-07,
      "loss": 2.2431,
      "step": 68521
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1148566007614136,
      "learning_rate": 5.753811960527511e-07,
      "loss": 2.2782,
      "step": 68522
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.2754031419754028,
      "learning_rate": 5.752435548300551e-07,
      "loss": 2.3467,
      "step": 68523
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1450912952423096,
      "learning_rate": 5.751059295848793e-07,
      "loss": 2.1976,
      "step": 68524
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1461879014968872,
      "learning_rate": 5.749683203174583e-07,
      "loss": 2.4658,
      "step": 68525
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0052074193954468,
      "learning_rate": 5.748307270280263e-07,
      "loss": 2.2467,
      "step": 68526
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.160965085029602,
      "learning_rate": 5.746931497168151e-07,
      "loss": 2.3975,
      "step": 68527
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1115670204162598,
      "learning_rate": 5.745555883840603e-07,
      "loss": 2.2295,
      "step": 68528
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0926133394241333,
      "learning_rate": 5.744180430299917e-07,
      "loss": 2.2793,
      "step": 68529
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.2713640928268433,
      "learning_rate": 5.742805136548469e-07,
      "loss": 2.2403,
      "step": 68530
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0981539487838745,
      "learning_rate": 5.741430002588566e-07,
      "loss": 2.3371,
      "step": 68531
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1650669574737549,
      "learning_rate": 5.740055028422531e-07,
      "loss": 2.4497,
      "step": 68532
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0522912740707397,
      "learning_rate": 5.738680214052694e-07,
      "loss": 2.4149,
      "step": 68533
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0899463891983032,
      "learning_rate": 5.737305559481409e-07,
      "loss": 2.5398,
      "step": 68534
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0867608785629272,
      "learning_rate": 5.735931064710976e-07,
      "loss": 2.2224,
      "step": 68535
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.126250147819519,
      "learning_rate": 5.734556729743768e-07,
      "loss": 2.0981,
      "step": 68536
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0647673606872559,
      "learning_rate": 5.733182554582062e-07,
      "loss": 2.3181,
      "step": 68537
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0418003797531128,
      "learning_rate": 5.731808539228223e-07,
      "loss": 2.3819,
      "step": 68538
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1963179111480713,
      "learning_rate": 5.730434683684572e-07,
      "loss": 2.3095,
      "step": 68539
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1187371015548706,
      "learning_rate": 5.729060987953439e-07,
      "loss": 2.4647,
      "step": 68540
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.2993769645690918,
      "learning_rate": 5.727687452037145e-07,
      "loss": 2.1607,
      "step": 68541
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0633869171142578,
      "learning_rate": 5.726314075938033e-07,
      "loss": 2.3391,
      "step": 68542
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1134841442108154,
      "learning_rate": 5.724940859658412e-07,
      "loss": 2.4508,
      "step": 68543
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.155348777770996,
      "learning_rate": 5.723567803200658e-07,
      "loss": 2.3569,
      "step": 68544
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0316218137741089,
      "learning_rate": 5.722194906567025e-07,
      "loss": 2.4194,
      "step": 68545
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0880869626998901,
      "learning_rate": 5.720822169759888e-07,
      "loss": 2.4266,
      "step": 68546
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1266298294067383,
      "learning_rate": 5.719449592781557e-07,
      "loss": 2.2494,
      "step": 68547
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.077417254447937,
      "learning_rate": 5.718077175634373e-07,
      "loss": 2.3742,
      "step": 68548
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1056522130966187,
      "learning_rate": 5.716704918320637e-07,
      "loss": 2.2521,
      "step": 68549
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1457819938659668,
      "learning_rate": 5.715332820842712e-07,
      "loss": 2.3973,
      "step": 68550
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1703647375106812,
      "learning_rate": 5.713960883202907e-07,
      "loss": 2.3268,
      "step": 68551
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.380016803741455,
      "learning_rate": 5.712589105403532e-07,
      "loss": 2.3703,
      "step": 68552
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.123598575592041,
      "learning_rate": 5.71121748744694e-07,
      "loss": 2.4154,
      "step": 68553
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.069278597831726,
      "learning_rate": 5.70984602933543e-07,
      "loss": 2.4176,
      "step": 68554
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.029903769493103,
      "learning_rate": 5.708474731071356e-07,
      "loss": 2.2031,
      "step": 68555
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.140328288078308,
      "learning_rate": 5.707103592657004e-07,
      "loss": 2.2025,
      "step": 68556
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0671464204788208,
      "learning_rate": 5.705732614094739e-07,
      "loss": 2.1439,
      "step": 68557
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0859241485595703,
      "learning_rate": 5.70436179538687e-07,
      "loss": 2.2332,
      "step": 68558
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0181211233139038,
      "learning_rate": 5.702991136535718e-07,
      "loss": 2.4324,
      "step": 68559
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0247021913528442,
      "learning_rate": 5.701620637543593e-07,
      "loss": 2.2172,
      "step": 68560
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0421533584594727,
      "learning_rate": 5.700250298412846e-07,
      "loss": 2.3214,
      "step": 68561
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0744035243988037,
      "learning_rate": 5.698880119145767e-07,
      "loss": 2.1505,
      "step": 68562
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1470063924789429,
      "learning_rate": 5.697510099744719e-07,
      "loss": 2.2925,
      "step": 68563
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.027951717376709,
      "learning_rate": 5.696140240211978e-07,
      "loss": 2.3071,
      "step": 68564
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.9760379791259766,
      "learning_rate": 5.694770540549921e-07,
      "loss": 2.259,
      "step": 68565
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1081560850143433,
      "learning_rate": 5.693401000760812e-07,
      "loss": 2.1439,
      "step": 68566
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.109814167022705,
      "learning_rate": 5.692031620847027e-07,
      "loss": 2.4698,
      "step": 68567
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0825275182724,
      "learning_rate": 5.690662400810843e-07,
      "loss": 2.4121,
      "step": 68568
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.9606046080589294,
      "learning_rate": 5.689293340654611e-07,
      "loss": 2.2409,
      "step": 68569
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1187816858291626,
      "learning_rate": 5.687924440380654e-07,
      "loss": 2.2213,
      "step": 68570
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0851757526397705,
      "learning_rate": 5.68655569999127e-07,
      "loss": 2.2109,
      "step": 68571
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1458208560943604,
      "learning_rate": 5.685187119488778e-07,
      "loss": 2.1663,
      "step": 68572
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0936131477355957,
      "learning_rate": 5.683818698875521e-07,
      "loss": 2.3162,
      "step": 68573
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0416512489318848,
      "learning_rate": 5.682450438153797e-07,
      "loss": 2.3691,
      "step": 68574
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0656172037124634,
      "learning_rate": 5.681082337325949e-07,
      "loss": 2.096,
      "step": 68575
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0950632095336914,
      "learning_rate": 5.679714396394265e-07,
      "loss": 2.4436,
      "step": 68576
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0706056356430054,
      "learning_rate": 5.678346615361096e-07,
      "loss": 2.0735,
      "step": 68577
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.086159586906433,
      "learning_rate": 5.676978994228732e-07,
      "loss": 2.41,
      "step": 68578
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.9930902123451233,
      "learning_rate": 5.675611532999526e-07,
      "loss": 2.2688,
      "step": 68579
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0236752033233643,
      "learning_rate": 5.674244231675774e-07,
      "loss": 2.2413,
      "step": 68580
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.328161358833313,
      "learning_rate": 5.672877090259788e-07,
      "loss": 2.3938,
      "step": 68581
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1019703149795532,
      "learning_rate": 5.671510108753897e-07,
      "loss": 2.1508,
      "step": 68582
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0903241634368896,
      "learning_rate": 5.670143287160412e-07,
      "loss": 2.3421,
      "step": 68583
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.3219890594482422,
      "learning_rate": 5.668776625481665e-07,
      "loss": 2.3742,
      "step": 68584
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.9662983417510986,
      "learning_rate": 5.667410123719964e-07,
      "loss": 2.6426,
      "step": 68585
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0290586948394775,
      "learning_rate": 5.666043781877617e-07,
      "loss": 2.3376,
      "step": 68586
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.985232412815094,
      "learning_rate": 5.664677599956936e-07,
      "loss": 2.4628,
      "step": 68587
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0298116207122803,
      "learning_rate": 5.663311577960262e-07,
      "loss": 2.1522,
      "step": 68588
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.041006088256836,
      "learning_rate": 5.661945715889883e-07,
      "loss": 2.2565,
      "step": 68589
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1039650440216064,
      "learning_rate": 5.660580013748141e-07,
      "loss": 2.2439,
      "step": 68590
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1506657600402832,
      "learning_rate": 5.659214471537322e-07,
      "loss": 2.3594,
      "step": 68591
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.198054313659668,
      "learning_rate": 5.65784908925977e-07,
      "loss": 2.2652,
      "step": 68592
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.138139247894287,
      "learning_rate": 5.656483866917772e-07,
      "loss": 2.2425,
      "step": 68593
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.2531037330627441,
      "learning_rate": 5.65511880451367e-07,
      "loss": 2.3415,
      "step": 68594
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0668503046035767,
      "learning_rate": 5.653753902049764e-07,
      "loss": 2.2458,
      "step": 68595
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1925716400146484,
      "learning_rate": 5.652389159528371e-07,
      "loss": 2.5439,
      "step": 68596
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0695322751998901,
      "learning_rate": 5.651024576951803e-07,
      "loss": 2.3672,
      "step": 68597
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0404945611953735,
      "learning_rate": 5.649660154322378e-07,
      "loss": 2.3928,
      "step": 68598
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1608004570007324,
      "learning_rate": 5.648295891642396e-07,
      "loss": 2.3938,
      "step": 68599
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.110702633857727,
      "learning_rate": 5.646931788914178e-07,
      "loss": 2.263,
      "step": 68600
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0573537349700928,
      "learning_rate": 5.645567846140032e-07,
      "loss": 2.3978,
      "step": 68601
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1125646829605103,
      "learning_rate": 5.644204063322289e-07,
      "loss": 2.319,
      "step": 68602
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.9988996982574463,
      "learning_rate": 5.642840440463226e-07,
      "loss": 2.4422,
      "step": 68603
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0991575717926025,
      "learning_rate": 5.641476977565197e-07,
      "loss": 2.4187,
      "step": 68604
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0437921285629272,
      "learning_rate": 5.640113674630477e-07,
      "loss": 2.2486,
      "step": 68605
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1510484218597412,
      "learning_rate": 5.638750531661396e-07,
      "loss": 2.4733,
      "step": 68606
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.2112364768981934,
      "learning_rate": 5.637387548660278e-07,
      "loss": 2.4625,
      "step": 68607
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0657981634140015,
      "learning_rate": 5.636024725629386e-07,
      "loss": 2.225,
      "step": 68608
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0639653205871582,
      "learning_rate": 5.634662062571084e-07,
      "loss": 2.3436,
      "step": 68609
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1100400686264038,
      "learning_rate": 5.633299559487659e-07,
      "loss": 2.4135,
      "step": 68610
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.215172529220581,
      "learning_rate": 5.631937216381411e-07,
      "loss": 2.4766,
      "step": 68611
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.310795545578003,
      "learning_rate": 5.630575033254659e-07,
      "loss": 2.4158,
      "step": 68612
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0650285482406616,
      "learning_rate": 5.629213010109713e-07,
      "loss": 2.2372,
      "step": 68613
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1410813331604004,
      "learning_rate": 5.627851146948892e-07,
      "loss": 2.2705,
      "step": 68614
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.083057165145874,
      "learning_rate": 5.626489443774496e-07,
      "loss": 2.2987,
      "step": 68615
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.036466121673584,
      "learning_rate": 5.625127900588811e-07,
      "loss": 2.1856,
      "step": 68616
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.2407371997833252,
      "learning_rate": 5.623766517394181e-07,
      "loss": 2.3252,
      "step": 68617
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.044695496559143,
      "learning_rate": 5.62240529419289e-07,
      "loss": 2.4443,
      "step": 68618
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1069384813308716,
      "learning_rate": 5.621044230987271e-07,
      "loss": 2.3654,
      "step": 68619
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1705999374389648,
      "learning_rate": 5.619683327779591e-07,
      "loss": 2.2924,
      "step": 68620
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1017098426818848,
      "learning_rate": 5.6183225845722e-07,
      "loss": 2.2869,
      "step": 68621
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1686217784881592,
      "learning_rate": 5.616962001367376e-07,
      "loss": 2.3226,
      "step": 68622
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0060513019561768,
      "learning_rate": 5.615601578167463e-07,
      "loss": 2.3299,
      "step": 68623
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.2922508716583252,
      "learning_rate": 5.6142413149747e-07,
      "loss": 2.4109,
      "step": 68624
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1335794925689697,
      "learning_rate": 5.612881211791455e-07,
      "loss": 2.5614,
      "step": 68625
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1120026111602783,
      "learning_rate": 5.611521268620001e-07,
      "loss": 2.316,
      "step": 68626
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0570474863052368,
      "learning_rate": 5.610161485462662e-07,
      "loss": 2.4721,
      "step": 68627
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.2940797805786133,
      "learning_rate": 5.608801862321733e-07,
      "loss": 2.2975,
      "step": 68628
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.023308515548706,
      "learning_rate": 5.607442399199525e-07,
      "loss": 2.3808,
      "step": 68629
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0691758394241333,
      "learning_rate": 5.606083096098324e-07,
      "loss": 2.4607,
      "step": 68630
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1097898483276367,
      "learning_rate": 5.604723953020463e-07,
      "loss": 2.4334,
      "step": 68631
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0606027841567993,
      "learning_rate": 5.603364969968217e-07,
      "loss": 2.2603,
      "step": 68632
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.053403615951538,
      "learning_rate": 5.602006146943928e-07,
      "loss": 2.3142,
      "step": 68633
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.11927330493927,
      "learning_rate": 5.600647483949861e-07,
      "loss": 2.2529,
      "step": 68634
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.2162190675735474,
      "learning_rate": 5.599288980988337e-07,
      "loss": 2.3018,
      "step": 68635
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0721861124038696,
      "learning_rate": 5.597930638061666e-07,
      "loss": 2.2928,
      "step": 68636
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0883029699325562,
      "learning_rate": 5.596572455172144e-07,
      "loss": 2.4554,
      "step": 68637
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.2248077392578125,
      "learning_rate": 5.59521443232206e-07,
      "loss": 2.1807,
      "step": 68638
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.117195963859558,
      "learning_rate": 5.593856569513734e-07,
      "loss": 2.3062,
      "step": 68639
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0928680896759033,
      "learning_rate": 5.592498866749452e-07,
      "loss": 2.2265,
      "step": 68640
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.2600271701812744,
      "learning_rate": 5.591141324031535e-07,
      "loss": 2.3904,
      "step": 68641
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0432584285736084,
      "learning_rate": 5.589783941362281e-07,
      "loss": 2.1693,
      "step": 68642
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.054594874382019,
      "learning_rate": 5.588426718743967e-07,
      "loss": 1.9763,
      "step": 68643
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1947730779647827,
      "learning_rate": 5.587069656178923e-07,
      "loss": 2.197,
      "step": 68644
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0545800924301147,
      "learning_rate": 5.585712753669425e-07,
      "loss": 2.1394,
      "step": 68645
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.9414003491401672,
      "learning_rate": 5.584356011217795e-07,
      "loss": 2.2984,
      "step": 68646
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0854047536849976,
      "learning_rate": 5.582999428826319e-07,
      "loss": 2.2418,
      "step": 68647
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0437227487564087,
      "learning_rate": 5.581643006497317e-07,
      "loss": 2.3811,
      "step": 68648
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0224350690841675,
      "learning_rate": 5.580286744233066e-07,
      "loss": 2.3852,
      "step": 68649
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.11266028881073,
      "learning_rate": 5.578930642035884e-07,
      "loss": 2.3977,
      "step": 68650
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.074729323387146,
      "learning_rate": 5.577574699908039e-07,
      "loss": 2.4367,
      "step": 68651
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.106852412223816,
      "learning_rate": 5.576218917851861e-07,
      "loss": 2.238,
      "step": 68652
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1900659799575806,
      "learning_rate": 5.574863295869626e-07,
      "loss": 2.3961,
      "step": 68653
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0682181119918823,
      "learning_rate": 5.573507833963654e-07,
      "loss": 2.2422,
      "step": 68654
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0443588495254517,
      "learning_rate": 5.572152532136221e-07,
      "loss": 2.112,
      "step": 68655
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0914775133132935,
      "learning_rate": 5.570797390389648e-07,
      "loss": 2.2901,
      "step": 68656
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0622014999389648,
      "learning_rate": 5.569442408726212e-07,
      "loss": 2.4996,
      "step": 68657
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1904412508010864,
      "learning_rate": 5.56808758714823e-07,
      "loss": 2.324,
      "step": 68658
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0635710954666138,
      "learning_rate": 5.566732925657969e-07,
      "loss": 2.256,
      "step": 68659
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1725488901138306,
      "learning_rate": 5.565378424257761e-07,
      "loss": 2.0719,
      "step": 68660
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0241074562072754,
      "learning_rate": 5.564024082949871e-07,
      "loss": 2.2104,
      "step": 68661
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0523635149002075,
      "learning_rate": 5.562669901736617e-07,
      "loss": 2.3886,
      "step": 68662
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1046557426452637,
      "learning_rate": 5.561315880620288e-07,
      "loss": 2.2767,
      "step": 68663
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.04225492477417,
      "learning_rate": 5.559962019603182e-07,
      "loss": 2.1119,
      "step": 68664
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1784483194351196,
      "learning_rate": 5.558608318687575e-07,
      "loss": 2.4737,
      "step": 68665
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0143815279006958,
      "learning_rate": 5.557254777875787e-07,
      "loss": 2.4738,
      "step": 68666
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.273748755455017,
      "learning_rate": 5.555901397170094e-07,
      "loss": 2.4124,
      "step": 68667
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.207867980003357,
      "learning_rate": 5.554548176572805e-07,
      "loss": 2.3325,
      "step": 68668
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0055079460144043,
      "learning_rate": 5.553195116086218e-07,
      "loss": 2.3218,
      "step": 68669
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.4474058151245117,
      "learning_rate": 5.551842215712589e-07,
      "loss": 2.5036,
      "step": 68670
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0143473148345947,
      "learning_rate": 5.550489475454268e-07,
      "loss": 2.3033,
      "step": 68671
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0631740093231201,
      "learning_rate": 5.549136895313501e-07,
      "loss": 2.2689,
      "step": 68672
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0068378448486328,
      "learning_rate": 5.547784475292606e-07,
      "loss": 2.4084,
      "step": 68673
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.066566824913025,
      "learning_rate": 5.54643221539386e-07,
      "loss": 2.3997,
      "step": 68674
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.03040611743927,
      "learning_rate": 5.545080115619583e-07,
      "loss": 2.2975,
      "step": 68675
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0524622201919556,
      "learning_rate": 5.543728175972041e-07,
      "loss": 2.1804,
      "step": 68676
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0801751613616943,
      "learning_rate": 5.542376396453542e-07,
      "loss": 2.1397,
      "step": 68677
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0627861022949219,
      "learning_rate": 5.54102477706635e-07,
      "loss": 2.4285,
      "step": 68678
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1580909490585327,
      "learning_rate": 5.539673317812788e-07,
      "loss": 2.369,
      "step": 68679
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1405023336410522,
      "learning_rate": 5.53832201869513e-07,
      "loss": 2.3614,
      "step": 68680
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1400716304779053,
      "learning_rate": 5.536970879715676e-07,
      "loss": 2.129,
      "step": 68681
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.070020318031311,
      "learning_rate": 5.535619900876699e-07,
      "loss": 2.2735,
      "step": 68682
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1303421258926392,
      "learning_rate": 5.534269082180521e-07,
      "loss": 2.4677,
      "step": 68683
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0045084953308105,
      "learning_rate": 5.532918423629397e-07,
      "loss": 2.189,
      "step": 68684
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.020258903503418,
      "learning_rate": 5.531567925225645e-07,
      "loss": 2.1675,
      "step": 68685
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.3157670497894287,
      "learning_rate": 5.530217586971531e-07,
      "loss": 2.305,
      "step": 68686
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0513845682144165,
      "learning_rate": 5.528867408869376e-07,
      "loss": 2.1878,
      "step": 68687
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0320428609848022,
      "learning_rate": 5.527517390921422e-07,
      "loss": 2.2473,
      "step": 68688
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0852257013320923,
      "learning_rate": 5.526167533130023e-07,
      "loss": 2.2109,
      "step": 68689
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1985899209976196,
      "learning_rate": 5.524817835497398e-07,
      "loss": 2.2865,
      "step": 68690
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.122936487197876,
      "learning_rate": 5.52346829802588e-07,
      "loss": 2.2641,
      "step": 68691
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0698788166046143,
      "learning_rate": 5.522118920717723e-07,
      "loss": 2.2646,
      "step": 68692
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.9695802927017212,
      "learning_rate": 5.520769703575257e-07,
      "loss": 2.1599,
      "step": 68693
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0588457584381104,
      "learning_rate": 5.519420646600726e-07,
      "loss": 2.317,
      "step": 68694
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0741621255874634,
      "learning_rate": 5.51807174979645e-07,
      "loss": 2.2525,
      "step": 68695
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.017557144165039,
      "learning_rate": 5.516723013164693e-07,
      "loss": 2.0015,
      "step": 68696
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0018455982208252,
      "learning_rate": 5.515374436707765e-07,
      "loss": 2.4162,
      "step": 68697
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.3534356355667114,
      "learning_rate": 5.514026020427943e-07,
      "loss": 2.1677,
      "step": 68698
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.094112753868103,
      "learning_rate": 5.512677764327489e-07,
      "loss": 2.2398,
      "step": 68699
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.191634178161621,
      "learning_rate": 5.511329668408727e-07,
      "loss": 2.5781,
      "step": 68700
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0189553499221802,
      "learning_rate": 5.509981732673908e-07,
      "loss": 2.1754,
      "step": 68701
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1106122732162476,
      "learning_rate": 5.508633957125353e-07,
      "loss": 2.3316,
      "step": 68702
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1126774549484253,
      "learning_rate": 5.507286341765317e-07,
      "loss": 2.1634,
      "step": 68703
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0591671466827393,
      "learning_rate": 5.505938886596107e-07,
      "loss": 2.413,
      "step": 68704
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.2158137559890747,
      "learning_rate": 5.504591591619968e-07,
      "loss": 2.4155,
      "step": 68705
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.011677622795105,
      "learning_rate": 5.50324445683923e-07,
      "loss": 2.3148,
      "step": 68706
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.022420883178711,
      "learning_rate": 5.501897482256147e-07,
      "loss": 2.3875,
      "step": 68707
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.007840871810913,
      "learning_rate": 5.500550667873017e-07,
      "loss": 2.1672,
      "step": 68708
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0784343481063843,
      "learning_rate": 5.499204013692116e-07,
      "loss": 2.2913,
      "step": 68709
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.101159930229187,
      "learning_rate": 5.497857519715743e-07,
      "loss": 2.1645,
      "step": 68710
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0571964979171753,
      "learning_rate": 5.496511185946152e-07,
      "loss": 2.2236,
      "step": 68711
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1158524751663208,
      "learning_rate": 5.495165012385651e-07,
      "loss": 2.2114,
      "step": 68712
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.621291160583496,
      "learning_rate": 5.493818999036504e-07,
      "loss": 2.2286,
      "step": 68713
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1989668607711792,
      "learning_rate": 5.492473145901011e-07,
      "loss": 2.2259,
      "step": 68714
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.2165952920913696,
      "learning_rate": 5.491127452981448e-07,
      "loss": 2.4108,
      "step": 68715
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.210547924041748,
      "learning_rate": 5.489781920280091e-07,
      "loss": 2.4194,
      "step": 68716
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0299419164657593,
      "learning_rate": 5.488436547799203e-07,
      "loss": 2.5083,
      "step": 68717
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.9528104662895203,
      "learning_rate": 5.487091335541106e-07,
      "loss": 2.2994,
      "step": 68718
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0259284973144531,
      "learning_rate": 5.485746283508042e-07,
      "loss": 2.1066,
      "step": 68719
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0224499702453613,
      "learning_rate": 5.484401391702321e-07,
      "loss": 2.2256,
      "step": 68720
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0499454736709595,
      "learning_rate": 5.483056660126195e-07,
      "loss": 2.2532,
      "step": 68721
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1711982488632202,
      "learning_rate": 5.481712088781977e-07,
      "loss": 2.3641,
      "step": 68722
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.9893646836280823,
      "learning_rate": 5.480367677671905e-07,
      "loss": 2.2705,
      "step": 68723
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0338412523269653,
      "learning_rate": 5.479023426798302e-07,
      "loss": 2.3369,
      "step": 68724
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0554081201553345,
      "learning_rate": 5.477679336163422e-07,
      "loss": 2.4092,
      "step": 68725
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0790060758590698,
      "learning_rate": 5.476335405769539e-07,
      "loss": 2.0657,
      "step": 68726
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0463651418685913,
      "learning_rate": 5.474991635618942e-07,
      "loss": 2.4847,
      "step": 68727
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0723888874053955,
      "learning_rate": 5.473648025713907e-07,
      "loss": 2.1209,
      "step": 68728
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0926159620285034,
      "learning_rate": 5.472304576056719e-07,
      "loss": 2.5187,
      "step": 68729
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1040778160095215,
      "learning_rate": 5.470961286649645e-07,
      "loss": 2.2792,
      "step": 68730
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.109852910041809,
      "learning_rate": 5.46961815749496e-07,
      "loss": 2.2865,
      "step": 68731
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0312620401382446,
      "learning_rate": 5.468275188594951e-07,
      "loss": 2.3862,
      "step": 68732
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.051163673400879,
      "learning_rate": 5.466932379951895e-07,
      "loss": 2.1453,
      "step": 68733
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.2454276084899902,
      "learning_rate": 5.465589731568044e-07,
      "loss": 2.6336,
      "step": 68734
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.2012662887573242,
      "learning_rate": 5.464247243445719e-07,
      "loss": 2.3188,
      "step": 68735
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0410256385803223,
      "learning_rate": 5.462904915587153e-07,
      "loss": 2.4436,
      "step": 68736
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0296549797058105,
      "learning_rate": 5.461562747994642e-07,
      "loss": 2.2836,
      "step": 68737
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1069453954696655,
      "learning_rate": 5.460220740670452e-07,
      "loss": 2.0893,
      "step": 68738
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0378837585449219,
      "learning_rate": 5.458878893616881e-07,
      "loss": 2.3269,
      "step": 68739
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0669879913330078,
      "learning_rate": 5.457537206836172e-07,
      "loss": 2.3991,
      "step": 68740
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0293248891830444,
      "learning_rate": 5.456195680330623e-07,
      "loss": 2.3383,
      "step": 68741
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.032092571258545,
      "learning_rate": 5.454854314102509e-07,
      "loss": 2.4375,
      "step": 68742
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0989376306533813,
      "learning_rate": 5.453513108154096e-07,
      "loss": 2.2513,
      "step": 68743
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.036496877670288,
      "learning_rate": 5.452172062487638e-07,
      "loss": 2.4746,
      "step": 68744
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1627789735794067,
      "learning_rate": 5.450831177105432e-07,
      "loss": 2.3009,
      "step": 68745
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1089166402816772,
      "learning_rate": 5.449490452009742e-07,
      "loss": 2.3551,
      "step": 68746
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1277097463607788,
      "learning_rate": 5.448149887202858e-07,
      "loss": 2.3261,
      "step": 68747
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0815035104751587,
      "learning_rate": 5.446809482687032e-07,
      "loss": 2.4101,
      "step": 68748
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1340546607971191,
      "learning_rate": 5.445469238464552e-07,
      "loss": 2.3766,
      "step": 68749
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.081483006477356,
      "learning_rate": 5.44412915453767e-07,
      "loss": 2.3068,
      "step": 68750
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1085231304168701,
      "learning_rate": 5.442789230908685e-07,
      "loss": 2.2468,
      "step": 68751
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0312058925628662,
      "learning_rate": 5.441449467579829e-07,
      "loss": 2.3802,
      "step": 68752
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1436994075775146,
      "learning_rate": 5.440109864553422e-07,
      "loss": 2.1781,
      "step": 68753
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1182368993759155,
      "learning_rate": 5.438770421831719e-07,
      "loss": 2.5126,
      "step": 68754
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.060265302658081,
      "learning_rate": 5.437431139416971e-07,
      "loss": 2.163,
      "step": 68755
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0877103805541992,
      "learning_rate": 5.436092017311445e-07,
      "loss": 2.35,
      "step": 68756
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1070735454559326,
      "learning_rate": 5.434753055517439e-07,
      "loss": 2.2918,
      "step": 68757
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.089582085609436,
      "learning_rate": 5.433414254037205e-07,
      "loss": 2.2389,
      "step": 68758
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.988031268119812,
      "learning_rate": 5.432075612873022e-07,
      "loss": 2.2606,
      "step": 68759
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.3134312629699707,
      "learning_rate": 5.430737132027165e-07,
      "loss": 2.291,
      "step": 68760
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1078184843063354,
      "learning_rate": 5.429398811501874e-07,
      "loss": 2.3617,
      "step": 68761
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.9848754405975342,
      "learning_rate": 5.428060651299449e-07,
      "loss": 2.3582,
      "step": 68762
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1287435293197632,
      "learning_rate": 5.426722651422145e-07,
      "loss": 2.4548,
      "step": 68763
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0946069955825806,
      "learning_rate": 5.425384811872236e-07,
      "loss": 2.2496,
      "step": 68764
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0818028450012207,
      "learning_rate": 5.424047132651966e-07,
      "loss": 2.1251,
      "step": 68765
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0301252603530884,
      "learning_rate": 5.422709613763644e-07,
      "loss": 2.5734,
      "step": 68766
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0474528074264526,
      "learning_rate": 5.4213722552095e-07,
      "loss": 2.2222,
      "step": 68767
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.104010820388794,
      "learning_rate": 5.420035056991846e-07,
      "loss": 2.3137,
      "step": 68768
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.42777681350708,
      "learning_rate": 5.418698019112889e-07,
      "loss": 2.3011,
      "step": 68769
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0127840042114258,
      "learning_rate": 5.41736114157495e-07,
      "loss": 2.1809,
      "step": 68770
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0394790172576904,
      "learning_rate": 5.416024424380251e-07,
      "loss": 2.3739,
      "step": 68771
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0713611841201782,
      "learning_rate": 5.414687867531099e-07,
      "loss": 2.3348,
      "step": 68772
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.9958961009979248,
      "learning_rate": 5.413351471029715e-07,
      "loss": 2.2486,
      "step": 68773
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.4240473508834839,
      "learning_rate": 5.412015234878409e-07,
      "loss": 2.3392,
      "step": 68774
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0788222551345825,
      "learning_rate": 5.410679159079413e-07,
      "loss": 2.3362,
      "step": 68775
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0379825830459595,
      "learning_rate": 5.409343243635024e-07,
      "loss": 2.5328,
      "step": 68776
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.2270493507385254,
      "learning_rate": 5.408007488547473e-07,
      "loss": 2.2358,
      "step": 68777
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.148293375968933,
      "learning_rate": 5.406671893819049e-07,
      "loss": 2.1814,
      "step": 68778
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.9987388253211975,
      "learning_rate": 5.405336459452004e-07,
      "loss": 2.3107,
      "step": 68779
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.127925157546997,
      "learning_rate": 5.404001185448605e-07,
      "loss": 2.3318,
      "step": 68780
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.373123049736023,
      "learning_rate": 5.402666071811124e-07,
      "loss": 2.3007,
      "step": 68781
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0277299880981445,
      "learning_rate": 5.40133111854182e-07,
      "loss": 2.5178,
      "step": 68782
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.061820149421692,
      "learning_rate": 5.399996325642931e-07,
      "loss": 2.5685,
      "step": 68783
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.173876166343689,
      "learning_rate": 5.398661693116758e-07,
      "loss": 2.0096,
      "step": 68784
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0409797430038452,
      "learning_rate": 5.397327220965532e-07,
      "loss": 2.3198,
      "step": 68785
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0023865699768066,
      "learning_rate": 5.39599290919155e-07,
      "loss": 2.3338,
      "step": 68786
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1140679121017456,
      "learning_rate": 5.394658757797045e-07,
      "loss": 2.2772,
      "step": 68787
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0826473236083984,
      "learning_rate": 5.39332476678428e-07,
      "loss": 2.1591,
      "step": 68788
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.9320483207702637,
      "learning_rate": 5.391990936155533e-07,
      "loss": 2.4347,
      "step": 68789
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1643126010894775,
      "learning_rate": 5.390657265913046e-07,
      "loss": 2.0919,
      "step": 68790
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0896700620651245,
      "learning_rate": 5.389323756059106e-07,
      "loss": 2.225,
      "step": 68791
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1682096719741821,
      "learning_rate": 5.387990406595944e-07,
      "loss": 2.4128,
      "step": 68792
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0624839067459106,
      "learning_rate": 5.386657217525848e-07,
      "loss": 2.1028,
      "step": 68793
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0848556756973267,
      "learning_rate": 5.38532418885106e-07,
      "loss": 2.688,
      "step": 68794
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.087658405303955,
      "learning_rate": 5.383991320573856e-07,
      "loss": 2.3113,
      "step": 68795
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1216473579406738,
      "learning_rate": 5.382658612696457e-07,
      "loss": 2.1927,
      "step": 68796
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1526732444763184,
      "learning_rate": 5.38132606522117e-07,
      "loss": 2.5539,
      "step": 68797
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0704022645950317,
      "learning_rate": 5.379993678150219e-07,
      "loss": 2.4835,
      "step": 68798
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.096962809562683,
      "learning_rate": 5.378661451485889e-07,
      "loss": 2.3234,
      "step": 68799
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0468913316726685,
      "learning_rate": 5.377329385230412e-07,
      "loss": 2.4533,
      "step": 68800
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.129279375076294,
      "learning_rate": 5.375997479386075e-07,
      "loss": 2.4484,
      "step": 68801
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.070374608039856,
      "learning_rate": 5.374665733955109e-07,
      "loss": 2.4502,
      "step": 68802
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1163790225982666,
      "learning_rate": 5.373334148939802e-07,
      "loss": 2.2159,
      "step": 68803
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0878393650054932,
      "learning_rate": 5.372002724342373e-07,
      "loss": 2.3668,
      "step": 68804
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1177667379379272,
      "learning_rate": 5.370671460165122e-07,
      "loss": 2.3899,
      "step": 68805
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1407990455627441,
      "learning_rate": 5.369340356410269e-07,
      "loss": 2.2059,
      "step": 68806
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0420562028884888,
      "learning_rate": 5.3680094130801e-07,
      "loss": 2.253,
      "step": 68807
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0583027601242065,
      "learning_rate": 5.366678630176846e-07,
      "loss": 2.3344,
      "step": 68808
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.9848161339759827,
      "learning_rate": 5.365348007702786e-07,
      "loss": 2.2947,
      "step": 68809
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0842230319976807,
      "learning_rate": 5.364017545660139e-07,
      "loss": 2.2796,
      "step": 68810
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0606088638305664,
      "learning_rate": 5.362687244051201e-07,
      "loss": 2.3542,
      "step": 68811
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1160153150558472,
      "learning_rate": 5.361357102878206e-07,
      "loss": 2.3638,
      "step": 68812
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1468770503997803,
      "learning_rate": 5.360027122143418e-07,
      "loss": 2.1356,
      "step": 68813
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0678237676620483,
      "learning_rate": 5.358697301849081e-07,
      "loss": 2.0361,
      "step": 68814
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0990394353866577,
      "learning_rate": 5.357367641997469e-07,
      "loss": 2.186,
      "step": 68815
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0537800788879395,
      "learning_rate": 5.356038142590825e-07,
      "loss": 2.4428,
      "step": 68816
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0211013555526733,
      "learning_rate": 5.354708803631381e-07,
      "loss": 2.283,
      "step": 68817
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0603702068328857,
      "learning_rate": 5.353379625121436e-07,
      "loss": 2.2695,
      "step": 68818
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0740911960601807,
      "learning_rate": 5.352050607063197e-07,
      "loss": 2.1241,
      "step": 68819
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.15274178981781,
      "learning_rate": 5.350721749458942e-07,
      "loss": 2.0284,
      "step": 68820
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.024817705154419,
      "learning_rate": 5.349393052310936e-07,
      "loss": 2.2186,
      "step": 68821
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.092065691947937,
      "learning_rate": 5.34806451562141e-07,
      "loss": 2.4356,
      "step": 68822
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0698562860488892,
      "learning_rate": 5.346736139392606e-07,
      "loss": 2.1266,
      "step": 68823
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0474317073822021,
      "learning_rate": 5.345407923626811e-07,
      "loss": 2.1201,
      "step": 68824
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.2022509574890137,
      "learning_rate": 5.344079868326235e-07,
      "loss": 2.5529,
      "step": 68825
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1732863187789917,
      "learning_rate": 5.342751973493177e-07,
      "loss": 2.5468,
      "step": 68826
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0184201002120972,
      "learning_rate": 5.341424239129833e-07,
      "loss": 2.2229,
      "step": 68827
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.113932728767395,
      "learning_rate": 5.340096665238515e-07,
      "loss": 2.2822,
      "step": 68828
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0337104797363281,
      "learning_rate": 5.338769251821419e-07,
      "loss": 2.1977,
      "step": 68829
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.078926682472229,
      "learning_rate": 5.337441998880832e-07,
      "loss": 2.322,
      "step": 68830
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.04586660861969,
      "learning_rate": 5.336114906418977e-07,
      "loss": 2.3505,
      "step": 68831
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1682437658309937,
      "learning_rate": 5.334787974438138e-07,
      "loss": 2.3893,
      "step": 68832
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.9944189190864563,
      "learning_rate": 5.333461202940526e-07,
      "loss": 2.1335,
      "step": 68833
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.012654185295105,
      "learning_rate": 5.332134591928439e-07,
      "loss": 2.4004,
      "step": 68834
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.5257172584533691,
      "learning_rate": 5.330808141404065e-07,
      "loss": 2.4459,
      "step": 68835
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0735411643981934,
      "learning_rate": 5.329481851369689e-07,
      "loss": 2.3464,
      "step": 68836
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0824401378631592,
      "learning_rate": 5.328155721827555e-07,
      "loss": 2.4031,
      "step": 68837
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0706108808517456,
      "learning_rate": 5.326829752779916e-07,
      "loss": 2.699,
      "step": 68838
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1759142875671387,
      "learning_rate": 5.325503944228993e-07,
      "loss": 2.302,
      "step": 68839
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0079338550567627,
      "learning_rate": 5.324178296177074e-07,
      "loss": 2.1241,
      "step": 68840
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.2125219106674194,
      "learning_rate": 5.322852808626378e-07,
      "loss": 2.1433,
      "step": 68841
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.2618176937103271,
      "learning_rate": 5.32152748157917e-07,
      "loss": 2.2254,
      "step": 68842
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1512961387634277,
      "learning_rate": 5.320202315037681e-07,
      "loss": 2.2502,
      "step": 68843
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1942647695541382,
      "learning_rate": 5.318877309004156e-07,
      "loss": 2.1336,
      "step": 68844
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.9931536912918091,
      "learning_rate": 5.317552463480858e-07,
      "loss": 2.3904,
      "step": 68845
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.105053186416626,
      "learning_rate": 5.316227778470018e-07,
      "loss": 2.1214,
      "step": 68846
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0452935695648193,
      "learning_rate": 5.314903253973902e-07,
      "loss": 2.1252,
      "step": 68847
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0826739072799683,
      "learning_rate": 5.313578889994731e-07,
      "loss": 2.4568,
      "step": 68848
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0685793161392212,
      "learning_rate": 5.312254686534757e-07,
      "loss": 2.3085,
      "step": 68849
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0985822677612305,
      "learning_rate": 5.310930643596235e-07,
      "loss": 2.5336,
      "step": 68850
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0936905145645142,
      "learning_rate": 5.309606761181407e-07,
      "loss": 2.5714,
      "step": 68851
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1331305503845215,
      "learning_rate": 5.308283039292495e-07,
      "loss": 2.4372,
      "step": 68852
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.9991878271102905,
      "learning_rate": 5.306959477931772e-07,
      "loss": 2.4332,
      "step": 68853
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0973448753356934,
      "learning_rate": 5.305636077101461e-07,
      "loss": 2.406,
      "step": 68854
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0767576694488525,
      "learning_rate": 5.304312836803838e-07,
      "loss": 2.1989,
      "step": 68855
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.6102699041366577,
      "learning_rate": 5.302989757041099e-07,
      "loss": 2.17,
      "step": 68856
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0482691526412964,
      "learning_rate": 5.301666837815534e-07,
      "loss": 2.2343,
      "step": 68857
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1090081930160522,
      "learning_rate": 5.300344079129338e-07,
      "loss": 2.5639,
      "step": 68858
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.2030762434005737,
      "learning_rate": 5.2990214809848e-07,
      "loss": 2.4241,
      "step": 68859
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1609506607055664,
      "learning_rate": 5.297699043384141e-07,
      "loss": 2.241,
      "step": 68860
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1944693326950073,
      "learning_rate": 5.296376766329603e-07,
      "loss": 2.3525,
      "step": 68861
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0613173246383667,
      "learning_rate": 5.295054649823417e-07,
      "loss": 2.2061,
      "step": 68862
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.9955489039421082,
      "learning_rate": 5.293732693867837e-07,
      "loss": 2.4715,
      "step": 68863
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0790797472000122,
      "learning_rate": 5.292410898465095e-07,
      "loss": 2.2446,
      "step": 68864
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.145036220550537,
      "learning_rate": 5.291089263617455e-07,
      "loss": 2.4759,
      "step": 68865
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0718369483947754,
      "learning_rate": 5.289767789327127e-07,
      "loss": 2.3006,
      "step": 68866
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.069189190864563,
      "learning_rate": 5.288446475596376e-07,
      "loss": 2.1234,
      "step": 68867
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1473517417907715,
      "learning_rate": 5.287125322427422e-07,
      "loss": 2.4416,
      "step": 68868
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.347593903541565,
      "learning_rate": 5.28580432982253e-07,
      "loss": 2.2472,
      "step": 68869
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.3266316652297974,
      "learning_rate": 5.284483497783899e-07,
      "loss": 2.3547,
      "step": 68870
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0543960332870483,
      "learning_rate": 5.283162826313815e-07,
      "loss": 2.6543,
      "step": 68871
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.2039629220962524,
      "learning_rate": 5.281842315414498e-07,
      "loss": 2.1985,
      "step": 68872
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0752031803131104,
      "learning_rate": 5.280521965088159e-07,
      "loss": 2.3614,
      "step": 68873
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0586504936218262,
      "learning_rate": 5.279201775337084e-07,
      "loss": 2.1211,
      "step": 68874
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.100773811340332,
      "learning_rate": 5.277881746163482e-07,
      "loss": 2.375,
      "step": 68875
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1521127223968506,
      "learning_rate": 5.276561877569586e-07,
      "loss": 2.4093,
      "step": 68876
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1142207384109497,
      "learning_rate": 5.275242169557659e-07,
      "loss": 2.1937,
      "step": 68877
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.96433424949646,
      "learning_rate": 5.273922622129924e-07,
      "loss": 2.4303,
      "step": 68878
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1456704139709473,
      "learning_rate": 5.272603235288598e-07,
      "loss": 2.2609,
      "step": 68879
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0992666482925415,
      "learning_rate": 5.27128400903596e-07,
      "loss": 2.1645,
      "step": 68880
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1890177726745605,
      "learning_rate": 5.269964943374195e-07,
      "loss": 2.233,
      "step": 68881
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.3029307126998901,
      "learning_rate": 5.268646038305591e-07,
      "loss": 2.289,
      "step": 68882
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1238363981246948,
      "learning_rate": 5.267327293832348e-07,
      "loss": 2.1324,
      "step": 68883
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0810818672180176,
      "learning_rate": 5.266008709956727e-07,
      "loss": 2.3825,
      "step": 68884
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0635323524475098,
      "learning_rate": 5.26469028668094e-07,
      "loss": 2.4096,
      "step": 68885
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0902444124221802,
      "learning_rate": 5.263372024007241e-07,
      "loss": 2.3297,
      "step": 68886
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0707333087921143,
      "learning_rate": 5.26205392193786e-07,
      "loss": 2.2665,
      "step": 68887
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0638724565505981,
      "learning_rate": 5.260735980475018e-07,
      "loss": 2.3604,
      "step": 68888
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1769616603851318,
      "learning_rate": 5.259418199620958e-07,
      "loss": 2.2406,
      "step": 68889
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.147247552871704,
      "learning_rate": 5.258100579377923e-07,
      "loss": 2.3008,
      "step": 68890
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.278942346572876,
      "learning_rate": 5.256783119748133e-07,
      "loss": 2.2865,
      "step": 68891
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0879242420196533,
      "learning_rate": 5.255465820733829e-07,
      "loss": 2.5445,
      "step": 68892
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1215770244598389,
      "learning_rate": 5.254148682337234e-07,
      "loss": 2.3145,
      "step": 68893
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0522563457489014,
      "learning_rate": 5.252831704560613e-07,
      "loss": 2.2138,
      "step": 68894
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1976672410964966,
      "learning_rate": 5.251514887406151e-07,
      "loss": 2.3618,
      "step": 68895
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.262402057647705,
      "learning_rate": 5.250198230876113e-07,
      "loss": 2.3071,
      "step": 68896
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.187738060951233,
      "learning_rate": 5.248881734972722e-07,
      "loss": 2.5498,
      "step": 68897
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1000444889068604,
      "learning_rate": 5.247565399698217e-07,
      "loss": 2.3636,
      "step": 68898
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1240609884262085,
      "learning_rate": 5.246249225054823e-07,
      "loss": 2.3154,
      "step": 68899
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0781539678573608,
      "learning_rate": 5.244933211044778e-07,
      "loss": 2.4002,
      "step": 68900
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.031366229057312,
      "learning_rate": 5.243617357670283e-07,
      "loss": 2.398,
      "step": 68901
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.9474942088127136,
      "learning_rate": 5.242301664933614e-07,
      "loss": 2.2085,
      "step": 68902
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.008954644203186,
      "learning_rate": 5.240986132836967e-07,
      "loss": 2.0949,
      "step": 68903
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0192601680755615,
      "learning_rate": 5.239670761382587e-07,
      "loss": 2.2874,
      "step": 68904
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0884631872177124,
      "learning_rate": 5.238355550572716e-07,
      "loss": 2.1165,
      "step": 68905
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.5888454914093018,
      "learning_rate": 5.237040500409541e-07,
      "loss": 2.3942,
      "step": 68906
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1500651836395264,
      "learning_rate": 5.235725610895337e-07,
      "loss": 2.4865,
      "step": 68907
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0071204900741577,
      "learning_rate": 5.234410882032315e-07,
      "loss": 2.4221,
      "step": 68908
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1660959720611572,
      "learning_rate": 5.233096313822706e-07,
      "loss": 2.2141,
      "step": 68909
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1509523391723633,
      "learning_rate": 5.231781906268718e-07,
      "loss": 2.2397,
      "step": 68910
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0014667510986328,
      "learning_rate": 5.230467659372628e-07,
      "loss": 2.2518,
      "step": 68911
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.967720091342926,
      "learning_rate": 5.229153573136614e-07,
      "loss": 2.4132,
      "step": 68912
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1657350063323975,
      "learning_rate": 5.227839647562949e-07,
      "loss": 2.5759,
      "step": 68913
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0557658672332764,
      "learning_rate": 5.22652588265381e-07,
      "loss": 2.3408,
      "step": 68914
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1561415195465088,
      "learning_rate": 5.225212278411463e-07,
      "loss": 2.1692,
      "step": 68915
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.2203412055969238,
      "learning_rate": 5.223898834838104e-07,
      "loss": 2.5254,
      "step": 68916
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1634221076965332,
      "learning_rate": 5.222585551935999e-07,
      "loss": 2.3125,
      "step": 68917
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0197908878326416,
      "learning_rate": 5.221272429707336e-07,
      "loss": 2.0651,
      "step": 68918
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.9974560141563416,
      "learning_rate": 5.219959468154368e-07,
      "loss": 2.2893,
      "step": 68919
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0804637670516968,
      "learning_rate": 5.218646667279293e-07,
      "loss": 2.349,
      "step": 68920
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1613390445709229,
      "learning_rate": 5.217334027084376e-07,
      "loss": 2.0416,
      "step": 68921
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1717398166656494,
      "learning_rate": 5.216021547571803e-07,
      "loss": 2.3002,
      "step": 68922
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1279000043869019,
      "learning_rate": 5.214709228743831e-07,
      "loss": 2.4001,
      "step": 68923
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0669918060302734,
      "learning_rate": 5.213397070602666e-07,
      "loss": 2.3741,
      "step": 68924
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0639474391937256,
      "learning_rate": 5.212085073150541e-07,
      "loss": 2.4376,
      "step": 68925
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1003214120864868,
      "learning_rate": 5.210773236389677e-07,
      "loss": 2.4482,
      "step": 68926
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0980422496795654,
      "learning_rate": 5.209461560322293e-07,
      "loss": 2.3827,
      "step": 68927
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.2019466161727905,
      "learning_rate": 5.208150044950611e-07,
      "loss": 2.2907,
      "step": 68928
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0081071853637695,
      "learning_rate": 5.206838690276861e-07,
      "loss": 2.3655,
      "step": 68929
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1794075965881348,
      "learning_rate": 5.205527496303264e-07,
      "loss": 2.3244,
      "step": 68930
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0440378189086914,
      "learning_rate": 5.204216463032053e-07,
      "loss": 2.5681,
      "step": 68931
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.242553949356079,
      "learning_rate": 5.202905590465423e-07,
      "loss": 2.308,
      "step": 68932
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0148215293884277,
      "learning_rate": 5.201594878605631e-07,
      "loss": 2.4305,
      "step": 68933
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.044532060623169,
      "learning_rate": 5.200284327454885e-07,
      "loss": 2.1342,
      "step": 68934
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1882288455963135,
      "learning_rate": 5.198973937015394e-07,
      "loss": 2.307,
      "step": 68935
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1257280111312866,
      "learning_rate": 5.19766370728939e-07,
      "loss": 2.2828,
      "step": 68936
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1981353759765625,
      "learning_rate": 5.196353638279095e-07,
      "loss": 2.3356,
      "step": 68937
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.147901177406311,
      "learning_rate": 5.195043729986737e-07,
      "loss": 2.3128,
      "step": 68938
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1502326726913452,
      "learning_rate": 5.193733982414528e-07,
      "loss": 2.1079,
      "step": 68939
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.522316813468933,
      "learning_rate": 5.192424395564688e-07,
      "loss": 2.3215,
      "step": 68940
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0582994222640991,
      "learning_rate": 5.191114969439425e-07,
      "loss": 2.3226,
      "step": 68941
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0686986446380615,
      "learning_rate": 5.189805704040984e-07,
      "loss": 2.3574,
      "step": 68942
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1881338357925415,
      "learning_rate": 5.188496599371561e-07,
      "loss": 2.3048,
      "step": 68943
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0733392238616943,
      "learning_rate": 5.187187655433401e-07,
      "loss": 2.1989,
      "step": 68944
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0611523389816284,
      "learning_rate": 5.1858788722287e-07,
      "loss": 2.0958,
      "step": 68945
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1079274415969849,
      "learning_rate": 5.184570249759691e-07,
      "loss": 2.4527,
      "step": 68946
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1459312438964844,
      "learning_rate": 5.183261788028571e-07,
      "loss": 2.2919,
      "step": 68947
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1082420349121094,
      "learning_rate": 5.181953487037595e-07,
      "loss": 2.2108,
      "step": 68948
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1527262926101685,
      "learning_rate": 5.180645346788948e-07,
      "loss": 2.3588,
      "step": 68949
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.04678475856781,
      "learning_rate": 5.179337367284865e-07,
      "loss": 2.283,
      "step": 68950
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.03689444065094,
      "learning_rate": 5.178029548527553e-07,
      "loss": 2.295,
      "step": 68951
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0947020053863525,
      "learning_rate": 5.176721890519243e-07,
      "loss": 2.1329,
      "step": 68952
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0079336166381836,
      "learning_rate": 5.175414393262135e-07,
      "loss": 2.3477,
      "step": 68953
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1511152982711792,
      "learning_rate": 5.17410705675847e-07,
      "loss": 2.165,
      "step": 68954
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1218370199203491,
      "learning_rate": 5.172799881010426e-07,
      "loss": 2.294,
      "step": 68955
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0496866703033447,
      "learning_rate": 5.171492866020255e-07,
      "loss": 2.1776,
      "step": 68956
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.043325424194336,
      "learning_rate": 5.170186011790146e-07,
      "loss": 2.3422,
      "step": 68957
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0124505758285522,
      "learning_rate": 5.168879318322339e-07,
      "loss": 2.246,
      "step": 68958
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0914931297302246,
      "learning_rate": 5.167572785619024e-07,
      "loss": 2.3732,
      "step": 68959
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0927917957305908,
      "learning_rate": 5.166266413682441e-07,
      "loss": 2.2626,
      "step": 68960
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0728185176849365,
      "learning_rate": 5.164960202514802e-07,
      "loss": 2.1837,
      "step": 68961
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.9884363412857056,
      "learning_rate": 5.163654152118291e-07,
      "loss": 2.471,
      "step": 68962
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0799211263656616,
      "learning_rate": 5.162348262495154e-07,
      "loss": 2.3323,
      "step": 68963
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.102559208869934,
      "learning_rate": 5.161042533647586e-07,
      "loss": 2.2439,
      "step": 68964
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0699032545089722,
      "learning_rate": 5.159736965577822e-07,
      "loss": 2.3924,
      "step": 68965
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.027596116065979,
      "learning_rate": 5.158431558288057e-07,
      "loss": 2.355,
      "step": 68966
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0777324438095093,
      "learning_rate": 5.157126311780502e-07,
      "loss": 2.398,
      "step": 68967
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0636767148971558,
      "learning_rate": 5.155821226057389e-07,
      "loss": 2.3315,
      "step": 68968
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.091601014137268,
      "learning_rate": 5.154516301120915e-07,
      "loss": 2.4458,
      "step": 68969
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0817594528198242,
      "learning_rate": 5.15321153697329e-07,
      "loss": 2.0452,
      "step": 68970
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.2039064168930054,
      "learning_rate": 5.151906933616735e-07,
      "loss": 2.4386,
      "step": 68971
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1193166971206665,
      "learning_rate": 5.150602491053447e-07,
      "loss": 2.2897,
      "step": 68972
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.037286400794983,
      "learning_rate": 5.14929820928567e-07,
      "loss": 2.2414,
      "step": 68973
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0470036268234253,
      "learning_rate": 5.147994088315567e-07,
      "loss": 2.147,
      "step": 68974
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1308096647262573,
      "learning_rate": 5.146690128145404e-07,
      "loss": 2.2456,
      "step": 68975
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.9476923942565918,
      "learning_rate": 5.145386328777335e-07,
      "loss": 2.2787,
      "step": 68976
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1929101943969727,
      "learning_rate": 5.144082690213625e-07,
      "loss": 2.3138,
      "step": 68977
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0859395265579224,
      "learning_rate": 5.14277921245644e-07,
      "loss": 2.2064,
      "step": 68978
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1382951736450195,
      "learning_rate": 5.14147589550803e-07,
      "loss": 2.3336,
      "step": 68979
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0152639150619507,
      "learning_rate": 5.140172739370552e-07,
      "loss": 2.3845,
      "step": 68980
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0678322315216064,
      "learning_rate": 5.13886974404626e-07,
      "loss": 2.2414,
      "step": 68981
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.054875135421753,
      "learning_rate": 5.13756690953734e-07,
      "loss": 2.4163,
      "step": 68982
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0807230472564697,
      "learning_rate": 5.136264235846022e-07,
      "loss": 2.1534,
      "step": 68983
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0578162670135498,
      "learning_rate": 5.134961722974486e-07,
      "loss": 2.4555,
      "step": 68984
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.2281935214996338,
      "learning_rate": 5.133659370924959e-07,
      "loss": 2.3413,
      "step": 68985
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1381211280822754,
      "learning_rate": 5.132357179699643e-07,
      "loss": 2.304,
      "step": 68986
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0851867198944092,
      "learning_rate": 5.131055149300756e-07,
      "loss": 2.371,
      "step": 68987
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0846798419952393,
      "learning_rate": 5.129753279730488e-07,
      "loss": 2.4421,
      "step": 68988
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1436119079589844,
      "learning_rate": 5.128451570991055e-07,
      "loss": 2.5659,
      "step": 68989
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0626978874206543,
      "learning_rate": 5.12715002308467e-07,
      "loss": 2.3495,
      "step": 68990
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.9951263070106506,
      "learning_rate": 5.125848636013531e-07,
      "loss": 2.5833,
      "step": 68991
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.085616946220398,
      "learning_rate": 5.124547409779846e-07,
      "loss": 2.1989,
      "step": 68992
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0819355249404907,
      "learning_rate": 5.123246344385824e-07,
      "loss": 2.3263,
      "step": 68993
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1397030353546143,
      "learning_rate": 5.121945439833653e-07,
      "loss": 2.2306,
      "step": 68994
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0929781198501587,
      "learning_rate": 5.120644696125566e-07,
      "loss": 2.0926,
      "step": 68995
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.066414475440979,
      "learning_rate": 5.119344113263758e-07,
      "loss": 2.3762,
      "step": 68996
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.2149585485458374,
      "learning_rate": 5.118043691250418e-07,
      "loss": 2.1464,
      "step": 68997
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0308644771575928,
      "learning_rate": 5.116743430087778e-07,
      "loss": 2.4854,
      "step": 68998
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.044737696647644,
      "learning_rate": 5.115443329778014e-07,
      "loss": 2.4225,
      "step": 68999
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1848479509353638,
      "learning_rate": 5.114143390323356e-07,
      "loss": 2.4758,
      "step": 69000
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0381157398223877,
      "learning_rate": 5.112843611725982e-07,
      "loss": 2.1365,
      "step": 69001
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1022212505340576,
      "learning_rate": 5.111543993988122e-07,
      "loss": 2.4718,
      "step": 69002
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0931586027145386,
      "learning_rate": 5.110244537111953e-07,
      "loss": 2.2224,
      "step": 69003
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.2091262340545654,
      "learning_rate": 5.108945241099705e-07,
      "loss": 2.3487,
      "step": 69004
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.4134916067123413,
      "learning_rate": 5.107646105953568e-07,
      "loss": 2.3286,
      "step": 69005
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1263500452041626,
      "learning_rate": 5.106347131675738e-07,
      "loss": 2.3242,
      "step": 69006
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0847030878067017,
      "learning_rate": 5.105048318268412e-07,
      "loss": 2.4346,
      "step": 69007
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0153131484985352,
      "learning_rate": 5.103749665733814e-07,
      "loss": 2.277,
      "step": 69008
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0962169170379639,
      "learning_rate": 5.10245117407413e-07,
      "loss": 2.151,
      "step": 69009
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1381702423095703,
      "learning_rate": 5.101152843291567e-07,
      "loss": 2.3358,
      "step": 69010
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1270111799240112,
      "learning_rate": 5.099854673388316e-07,
      "loss": 2.3971,
      "step": 69011
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1088329553604126,
      "learning_rate": 5.098556664366605e-07,
      "loss": 2.4611,
      "step": 69012
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1178853511810303,
      "learning_rate": 5.097258816228589e-07,
      "loss": 2.293,
      "step": 69013
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0832706689834595,
      "learning_rate": 5.095961128976513e-07,
      "loss": 2.3651,
      "step": 69014
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.9898291230201721,
      "learning_rate": 5.094663602612548e-07,
      "loss": 2.3716,
      "step": 69015
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0548279285430908,
      "learning_rate": 5.093366237138919e-07,
      "loss": 2.2451,
      "step": 69016
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0161349773406982,
      "learning_rate": 5.0920690325578e-07,
      "loss": 2.4207,
      "step": 69017
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.2447220087051392,
      "learning_rate": 5.090771988871402e-07,
      "loss": 2.3415,
      "step": 69018
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0847572088241577,
      "learning_rate": 5.08947510608192e-07,
      "loss": 2.3916,
      "step": 69019
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1785317659378052,
      "learning_rate": 5.088178384191567e-07,
      "loss": 2.4442,
      "step": 69020
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1439545154571533,
      "learning_rate": 5.086881823202516e-07,
      "loss": 2.4121,
      "step": 69021
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.061645746231079,
      "learning_rate": 5.08558542311699e-07,
      "loss": 2.4569,
      "step": 69022
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1573138236999512,
      "learning_rate": 5.084289183937175e-07,
      "loss": 2.4763,
      "step": 69023
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1397709846496582,
      "learning_rate": 5.082993105665245e-07,
      "loss": 2.3147,
      "step": 69024
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0462099313735962,
      "learning_rate": 5.081697188303447e-07,
      "loss": 2.246,
      "step": 69025
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1379716396331787,
      "learning_rate": 5.080401431853931e-07,
      "loss": 2.2486,
      "step": 69026
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0856965780258179,
      "learning_rate": 5.079105836318931e-07,
      "loss": 2.1906,
      "step": 69027
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0931199789047241,
      "learning_rate": 5.077810401700611e-07,
      "loss": 2.2839,
      "step": 69028
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1407233476638794,
      "learning_rate": 5.076515128001202e-07,
      "loss": 2.4496,
      "step": 69029
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0865769386291504,
      "learning_rate": 5.075220015222871e-07,
      "loss": 2.4099,
      "step": 69030
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0829707384109497,
      "learning_rate": 5.073925063367823e-07,
      "loss": 2.3296,
      "step": 69031
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1473020315170288,
      "learning_rate": 5.072630272438262e-07,
      "loss": 2.2221,
      "step": 69032
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0534025430679321,
      "learning_rate": 5.07133564243637e-07,
      "loss": 2.408,
      "step": 69033
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1158355474472046,
      "learning_rate": 5.070041173364337e-07,
      "loss": 2.1807,
      "step": 69034
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.007381558418274,
      "learning_rate": 5.068746865224383e-07,
      "loss": 2.3539,
      "step": 69035
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1386282444000244,
      "learning_rate": 5.067452718018661e-07,
      "loss": 2.4087,
      "step": 69036
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0385075807571411,
      "learning_rate": 5.066158731749416e-07,
      "loss": 2.2566,
      "step": 69037
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.056965947151184,
      "learning_rate": 5.064864906418798e-07,
      "loss": 2.2106,
      "step": 69038
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0566997528076172,
      "learning_rate": 5.063571242029031e-07,
      "loss": 2.2864,
      "step": 69039
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.9917898178100586,
      "learning_rate": 5.062277738582277e-07,
      "loss": 2.3115,
      "step": 69040
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0609984397888184,
      "learning_rate": 5.060984396080771e-07,
      "loss": 2.3994,
      "step": 69041
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.9327002167701721,
      "learning_rate": 5.059691214526652e-07,
      "loss": 2.2359,
      "step": 69042
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.210806965827942,
      "learning_rate": 5.058398193922165e-07,
      "loss": 2.1147,
      "step": 69043
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1774592399597168,
      "learning_rate": 5.057105334269475e-07,
      "loss": 2.269,
      "step": 69044
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1543004512786865,
      "learning_rate": 5.055812635570789e-07,
      "loss": 2.346,
      "step": 69045
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0381351709365845,
      "learning_rate": 5.054520097828264e-07,
      "loss": 2.5143,
      "step": 69046
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0115413665771484,
      "learning_rate": 5.05322772104413e-07,
      "loss": 2.4228,
      "step": 69047
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.281834363937378,
      "learning_rate": 5.051935505220552e-07,
      "loss": 2.2454,
      "step": 69048
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0264379978179932,
      "learning_rate": 5.050643450359738e-07,
      "loss": 2.2073,
      "step": 69049
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0331993103027344,
      "learning_rate": 5.049351556463866e-07,
      "loss": 2.3238,
      "step": 69050
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0998800992965698,
      "learning_rate": 5.048059823535135e-07,
      "loss": 2.2728,
      "step": 69051
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1464020013809204,
      "learning_rate": 5.04676825157574e-07,
      "loss": 2.4242,
      "step": 69052
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0929722785949707,
      "learning_rate": 5.045476840587837e-07,
      "loss": 2.2803,
      "step": 69053
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.506456971168518,
      "learning_rate": 5.044185590573669e-07,
      "loss": 2.5007,
      "step": 69054
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0165568590164185,
      "learning_rate": 5.042894501535367e-07,
      "loss": 2.2618,
      "step": 69055
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.092049479484558,
      "learning_rate": 5.041603573475173e-07,
      "loss": 2.6226,
      "step": 69056
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1144801378250122,
      "learning_rate": 5.040312806395231e-07,
      "loss": 2.3886,
      "step": 69057
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.099437952041626,
      "learning_rate": 5.039022200297782e-07,
      "loss": 2.2897,
      "step": 69058
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0491688251495361,
      "learning_rate": 5.03773175518495e-07,
      "loss": 2.3049,
      "step": 69059
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0247806310653687,
      "learning_rate": 5.036441471058973e-07,
      "loss": 2.4277,
      "step": 69060
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0471702814102173,
      "learning_rate": 5.035151347922007e-07,
      "loss": 2.2333,
      "step": 69061
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.094853162765503,
      "learning_rate": 5.033861385776262e-07,
      "loss": 2.1921,
      "step": 69062
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1511727571487427,
      "learning_rate": 5.032571584623891e-07,
      "loss": 2.4485,
      "step": 69063
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0349596738815308,
      "learning_rate": 5.031281944467126e-07,
      "loss": 2.3803,
      "step": 69064
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1216291189193726,
      "learning_rate": 5.02999246530812e-07,
      "loss": 2.3085,
      "step": 69065
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.344725251197815,
      "learning_rate": 5.028703147149083e-07,
      "loss": 2.4007,
      "step": 69066
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0418884754180908,
      "learning_rate": 5.027413989992169e-07,
      "loss": 2.2747,
      "step": 69067
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0604108572006226,
      "learning_rate": 5.02612499383961e-07,
      "loss": 2.2006,
      "step": 69068
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0209587812423706,
      "learning_rate": 5.024836158693536e-07,
      "loss": 2.354,
      "step": 69069
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.15030038356781,
      "learning_rate": 5.02354748455618e-07,
      "loss": 2.4425,
      "step": 69070
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0460489988327026,
      "learning_rate": 5.022258971429705e-07,
      "loss": 2.3602,
      "step": 69071
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1483811140060425,
      "learning_rate": 5.020970619316289e-07,
      "loss": 2.414,
      "step": 69072
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0363367795944214,
      "learning_rate": 5.01968242821812e-07,
      "loss": 2.2655,
      "step": 69073
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0903953313827515,
      "learning_rate": 5.018394398137394e-07,
      "loss": 2.2625,
      "step": 69074
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.2660696506500244,
      "learning_rate": 5.017106529076277e-07,
      "loss": 2.4284,
      "step": 69075
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1062434911727905,
      "learning_rate": 5.015818821036967e-07,
      "loss": 2.2408,
      "step": 69076
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0955586433410645,
      "learning_rate": 5.014531274021639e-07,
      "loss": 2.5119,
      "step": 69077
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0427117347717285,
      "learning_rate": 5.013243888032481e-07,
      "loss": 2.2972,
      "step": 69078
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.105312705039978,
      "learning_rate": 5.011956663071683e-07,
      "loss": 2.1909,
      "step": 69079
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0889123678207397,
      "learning_rate": 5.010669599141393e-07,
      "loss": 2.2733,
      "step": 69080
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.5647575855255127,
      "learning_rate": 5.009382696243836e-07,
      "loss": 2.322,
      "step": 69081
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1162357330322266,
      "learning_rate": 5.008095954381165e-07,
      "loss": 2.2952,
      "step": 69082
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1001561880111694,
      "learning_rate": 5.006809373555577e-07,
      "loss": 2.241,
      "step": 69083
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.2739958763122559,
      "learning_rate": 5.005522953769249e-07,
      "loss": 2.2948,
      "step": 69084
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0118951797485352,
      "learning_rate": 5.004236695024345e-07,
      "loss": 2.3695,
      "step": 69085
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0899412631988525,
      "learning_rate": 5.002950597323075e-07,
      "loss": 2.4687,
      "step": 69086
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1552976369857788,
      "learning_rate": 5.001664660667605e-07,
      "loss": 2.1035,
      "step": 69087
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1781541109085083,
      "learning_rate": 5.000378885060109e-07,
      "loss": 2.5038,
      "step": 69088
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0763161182403564,
      "learning_rate": 4.999093270502775e-07,
      "loss": 2.3602,
      "step": 69089
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0998834371566772,
      "learning_rate": 4.997807816997768e-07,
      "loss": 2.3818,
      "step": 69090
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1569430828094482,
      "learning_rate": 4.996522524547298e-07,
      "loss": 2.2147,
      "step": 69091
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0510444641113281,
      "learning_rate": 4.995237393153507e-07,
      "loss": 2.2266,
      "step": 69092
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0333362817764282,
      "learning_rate": 4.993952422818604e-07,
      "loss": 2.0981,
      "step": 69093
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.9879071116447449,
      "learning_rate": 4.992667613544743e-07,
      "loss": 2.4772,
      "step": 69094
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.046756386756897,
      "learning_rate": 4.991382965334123e-07,
      "loss": 2.2963,
      "step": 69095
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.093501329421997,
      "learning_rate": 4.990098478188898e-07,
      "loss": 2.2031,
      "step": 69096
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1094882488250732,
      "learning_rate": 4.988814152111287e-07,
      "loss": 2.262,
      "step": 69097
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0262141227722168,
      "learning_rate": 4.987529987103434e-07,
      "loss": 2.3577,
      "step": 69098
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0713366270065308,
      "learning_rate": 4.986245983167515e-07,
      "loss": 2.4144,
      "step": 69099
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0565261840820312,
      "learning_rate": 4.984962140305715e-07,
      "loss": 2.3671,
      "step": 69100
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1416728496551514,
      "learning_rate": 4.983678458520213e-07,
      "loss": 2.3738,
      "step": 69101
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.19438636302948,
      "learning_rate": 4.982394937813173e-07,
      "loss": 2.2617,
      "step": 69102
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0855180025100708,
      "learning_rate": 4.981111578186792e-07,
      "loss": 2.4627,
      "step": 69103
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0031872987747192,
      "learning_rate": 4.979828379643226e-07,
      "loss": 1.9154,
      "step": 69104
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.285400152206421,
      "learning_rate": 4.97854534218466e-07,
      "loss": 2.4319,
      "step": 69105
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.033194899559021,
      "learning_rate": 4.97726246581326e-07,
      "loss": 2.395,
      "step": 69106
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1949753761291504,
      "learning_rate": 4.975979750531224e-07,
      "loss": 2.4505,
      "step": 69107
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0834566354751587,
      "learning_rate": 4.974697196340705e-07,
      "loss": 2.4656,
      "step": 69108
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0313620567321777,
      "learning_rate": 4.97341480324387e-07,
      "loss": 2.3817,
      "step": 69109
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.4189867973327637,
      "learning_rate": 4.972132571242927e-07,
      "loss": 1.9835,
      "step": 69110
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.051726222038269,
      "learning_rate": 4.970850500340019e-07,
      "loss": 2.5987,
      "step": 69111
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.07731294631958,
      "learning_rate": 4.969568590537321e-07,
      "loss": 2.4222,
      "step": 69112
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0476808547973633,
      "learning_rate": 4.968286841837022e-07,
      "loss": 2.1181,
      "step": 69113
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0344754457473755,
      "learning_rate": 4.967005254241298e-07,
      "loss": 2.3952,
      "step": 69114
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.2788772583007812,
      "learning_rate": 4.965723827752289e-07,
      "loss": 2.3578,
      "step": 69115
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0763617753982544,
      "learning_rate": 4.964442562372196e-07,
      "loss": 2.2667,
      "step": 69116
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.9783301949501038,
      "learning_rate": 4.963161458103183e-07,
      "loss": 2.2732,
      "step": 69117
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0412135124206543,
      "learning_rate": 4.961880514947437e-07,
      "loss": 2.0892,
      "step": 69118
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.145673155784607,
      "learning_rate": 4.96059973290709e-07,
      "loss": 2.2348,
      "step": 69119
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1038508415222168,
      "learning_rate": 4.959319111984362e-07,
      "loss": 2.2748,
      "step": 69120
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0871495008468628,
      "learning_rate": 4.958038652181385e-07,
      "loss": 2.4116,
      "step": 69121
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1004791259765625,
      "learning_rate": 4.956758353500357e-07,
      "loss": 2.6036,
      "step": 69122
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0628944635391235,
      "learning_rate": 4.955478215943422e-07,
      "loss": 2.4222,
      "step": 69123
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1031858921051025,
      "learning_rate": 4.954198239512797e-07,
      "loss": 2.4488,
      "step": 69124
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0902386903762817,
      "learning_rate": 4.952918424210595e-07,
      "loss": 2.0303,
      "step": 69125
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0568042993545532,
      "learning_rate": 4.951638770039013e-07,
      "loss": 2.4797,
      "step": 69126
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1947941780090332,
      "learning_rate": 4.950359277000216e-07,
      "loss": 2.111,
      "step": 69127
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0328869819641113,
      "learning_rate": 4.94907994509638e-07,
      "loss": 2.1893,
      "step": 69128
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1444841623306274,
      "learning_rate": 4.94780077432967e-07,
      "loss": 2.3426,
      "step": 69129
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0171294212341309,
      "learning_rate": 4.94652176470225e-07,
      "loss": 2.4504,
      "step": 69130
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0862973928451538,
      "learning_rate": 4.945242916216286e-07,
      "loss": 2.4953,
      "step": 69131
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0523709058761597,
      "learning_rate": 4.943964228873965e-07,
      "loss": 2.288,
      "step": 69132
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0706865787506104,
      "learning_rate": 4.94268570267743e-07,
      "loss": 2.5815,
      "step": 69133
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0811567306518555,
      "learning_rate": 4.941407337628867e-07,
      "loss": 2.2794,
      "step": 69134
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.074669599533081,
      "learning_rate": 4.940129133730442e-07,
      "loss": 2.3654,
      "step": 69135
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0536918640136719,
      "learning_rate": 4.938851090984298e-07,
      "loss": 2.346,
      "step": 69136
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0597058534622192,
      "learning_rate": 4.937573209392644e-07,
      "loss": 2.5783,
      "step": 69137
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0792042016983032,
      "learning_rate": 4.93629548895761e-07,
      "loss": 2.3676,
      "step": 69138
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1233328580856323,
      "learning_rate": 4.935017929681363e-07,
      "loss": 2.2705,
      "step": 69139
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.4327186346054077,
      "learning_rate": 4.933740531566101e-07,
      "loss": 2.2071,
      "step": 69140
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0670578479766846,
      "learning_rate": 4.932463294613954e-07,
      "loss": 2.2906,
      "step": 69141
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0573234558105469,
      "learning_rate": 4.9311862188271e-07,
      "loss": 2.401,
      "step": 69142
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.2516943216323853,
      "learning_rate": 4.929909304207714e-07,
      "loss": 2.4487,
      "step": 69143
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0896774530410767,
      "learning_rate": 4.928632550757939e-07,
      "loss": 2.5411,
      "step": 69144
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0676686763763428,
      "learning_rate": 4.927355958479973e-07,
      "loss": 2.3086,
      "step": 69145
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1314047574996948,
      "learning_rate": 4.926079527375937e-07,
      "loss": 2.2088,
      "step": 69146
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0861936807632446,
      "learning_rate": 4.92480325744804e-07,
      "loss": 2.3572,
      "step": 69147
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0828272104263306,
      "learning_rate": 4.923527148698404e-07,
      "loss": 2.3669,
      "step": 69148
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0824191570281982,
      "learning_rate": 4.922251201129236e-07,
      "loss": 2.3899,
      "step": 69149
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.2079651355743408,
      "learning_rate": 4.920975414742657e-07,
      "loss": 2.1964,
      "step": 69150
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0811816453933716,
      "learning_rate": 4.919699789540866e-07,
      "loss": 2.4379,
      "step": 69151
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0128437280654907,
      "learning_rate": 4.918424325525983e-07,
      "loss": 2.4304,
      "step": 69152
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1470032930374146,
      "learning_rate": 4.917149022700207e-07,
      "loss": 2.0997,
      "step": 69153
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.2976306676864624,
      "learning_rate": 4.91587388106568e-07,
      "loss": 2.2748,
      "step": 69154
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0846868753433228,
      "learning_rate": 4.914598900624578e-07,
      "loss": 2.179,
      "step": 69155
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0253218412399292,
      "learning_rate": 4.913324081379045e-07,
      "loss": 2.187,
      "step": 69156
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0791665315628052,
      "learning_rate": 4.912049423331266e-07,
      "loss": 2.2186,
      "step": 69157
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1532455682754517,
      "learning_rate": 4.910774926483386e-07,
      "loss": 2.3175,
      "step": 69158
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0210014581680298,
      "learning_rate": 4.909500590837568e-07,
      "loss": 2.218,
      "step": 69159
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.4061505794525146,
      "learning_rate": 4.908226416395967e-07,
      "loss": 2.266,
      "step": 69160
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.9808219075202942,
      "learning_rate": 4.906952403160759e-07,
      "loss": 2.1078,
      "step": 69161
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.9806885123252869,
      "learning_rate": 4.905678551134074e-07,
      "loss": 2.3953,
      "step": 69162
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0777298212051392,
      "learning_rate": 4.904404860318123e-07,
      "loss": 2.1444,
      "step": 69163
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.9746872186660767,
      "learning_rate": 4.903131330715016e-07,
      "loss": 2.2638,
      "step": 69164
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0932098627090454,
      "learning_rate": 4.901857962326928e-07,
      "loss": 2.4181,
      "step": 69165
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1221216917037964,
      "learning_rate": 4.900584755156012e-07,
      "loss": 2.4335,
      "step": 69166
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1847608089447021,
      "learning_rate": 4.899311709204458e-07,
      "loss": 2.2002,
      "step": 69167
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1602492332458496,
      "learning_rate": 4.898038824474372e-07,
      "loss": 2.4366,
      "step": 69168
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.244114875793457,
      "learning_rate": 4.896766100967953e-07,
      "loss": 2.3059,
      "step": 69169
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0974791049957275,
      "learning_rate": 4.895493538687346e-07,
      "loss": 2.4113,
      "step": 69170
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0115063190460205,
      "learning_rate": 4.894221137634703e-07,
      "loss": 2.5577,
      "step": 69171
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.060960054397583,
      "learning_rate": 4.892948897812188e-07,
      "loss": 2.5869,
      "step": 69172
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1902930736541748,
      "learning_rate": 4.891676819221946e-07,
      "loss": 2.4566,
      "step": 69173
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1050260066986084,
      "learning_rate": 4.890404901866164e-07,
      "loss": 2.2953,
      "step": 69174
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0885940790176392,
      "learning_rate": 4.889133145746949e-07,
      "loss": 2.2117,
      "step": 69175
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1363664865493774,
      "learning_rate": 4.887861550866513e-07,
      "loss": 2.4124,
      "step": 69176
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0526313781738281,
      "learning_rate": 4.886590117226975e-07,
      "loss": 2.4691,
      "step": 69177
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.9859687089920044,
      "learning_rate": 4.8853188448305e-07,
      "loss": 2.0781,
      "step": 69178
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1306278705596924,
      "learning_rate": 4.884047733679232e-07,
      "loss": 2.1734,
      "step": 69179
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1080580949783325,
      "learning_rate": 4.882776783775345e-07,
      "loss": 2.4919,
      "step": 69180
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1271491050720215,
      "learning_rate": 4.881505995120972e-07,
      "loss": 2.2347,
      "step": 69181
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.9930487871170044,
      "learning_rate": 4.8802353677183e-07,
      "loss": 2.4604,
      "step": 69182
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0947250127792358,
      "learning_rate": 4.878964901569439e-07,
      "loss": 2.356,
      "step": 69183
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1852223873138428,
      "learning_rate": 4.877694596676586e-07,
      "loss": 2.4961,
      "step": 69184
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.260830283164978,
      "learning_rate": 4.876424453041862e-07,
      "loss": 2.1534,
      "step": 69185
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1922110319137573,
      "learning_rate": 4.875154470667454e-07,
      "loss": 2.2779,
      "step": 69186
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0538526773452759,
      "learning_rate": 4.873884649555471e-07,
      "loss": 2.4276,
      "step": 69187
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0725046396255493,
      "learning_rate": 4.872614989708113e-07,
      "loss": 2.347,
      "step": 69188
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.017198085784912,
      "learning_rate": 4.871345491127488e-07,
      "loss": 2.3367,
      "step": 69189
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0791821479797363,
      "learning_rate": 4.870076153815795e-07,
      "loss": 2.3499,
      "step": 69190
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.3107304573059082,
      "learning_rate": 4.868806977775142e-07,
      "loss": 2.0953,
      "step": 69191
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.9934107661247253,
      "learning_rate": 4.867537963007696e-07,
      "loss": 2.2622,
      "step": 69192
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0852813720703125,
      "learning_rate": 4.866269109515609e-07,
      "loss": 2.2283,
      "step": 69193
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1868555545806885,
      "learning_rate": 4.865000417301047e-07,
      "loss": 2.4607,
      "step": 69194
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.059140920639038,
      "learning_rate": 4.86373188636613e-07,
      "loss": 2.1815,
      "step": 69195
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0752464532852173,
      "learning_rate": 4.862463516713045e-07,
      "loss": 2.3173,
      "step": 69196
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1600956916809082,
      "learning_rate": 4.861195308343925e-07,
      "loss": 2.2474,
      "step": 69197
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.05570650100708,
      "learning_rate": 4.859927261260899e-07,
      "loss": 2.3128,
      "step": 69198
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0602576732635498,
      "learning_rate": 4.858659375466146e-07,
      "loss": 2.2711,
      "step": 69199
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.157989740371704,
      "learning_rate": 4.857391650961796e-07,
      "loss": 2.0919,
      "step": 69200
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1274901628494263,
      "learning_rate": 4.856124087750025e-07,
      "loss": 2.5786,
      "step": 69201
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.056321144104004,
      "learning_rate": 4.854856685832943e-07,
      "loss": 2.3489,
      "step": 69202
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.2769843339920044,
      "learning_rate": 4.853589445212736e-07,
      "loss": 2.3715,
      "step": 69203
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.2131589651107788,
      "learning_rate": 4.852322365891537e-07,
      "loss": 2.2059,
      "step": 69204
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0593361854553223,
      "learning_rate": 4.8510554478715e-07,
      "loss": 2.2423,
      "step": 69205
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0605953931808472,
      "learning_rate": 4.849788691154744e-07,
      "loss": 2.0163,
      "step": 69206
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.004254937171936,
      "learning_rate": 4.848522095743458e-07,
      "loss": 2.4155,
      "step": 69207
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.2317311763763428,
      "learning_rate": 4.84725566163975e-07,
      "loss": 2.3708,
      "step": 69208
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0617358684539795,
      "learning_rate": 4.845989388845807e-07,
      "loss": 2.1772,
      "step": 69209
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0534451007843018,
      "learning_rate": 4.844723277363739e-07,
      "loss": 2.3336,
      "step": 69210
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.057003378868103,
      "learning_rate": 4.843457327195722e-07,
      "loss": 2.4067,
      "step": 69211
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0278338193893433,
      "learning_rate": 4.842191538343877e-07,
      "loss": 2.1626,
      "step": 69212
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.093353271484375,
      "learning_rate": 4.840925910810368e-07,
      "loss": 2.4723,
      "step": 69213
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0878905057907104,
      "learning_rate": 4.839660444597328e-07,
      "loss": 2.3884,
      "step": 69214
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0464750528335571,
      "learning_rate": 4.838395139706919e-07,
      "loss": 2.0947,
      "step": 69215
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0197786092758179,
      "learning_rate": 4.837129996141276e-07,
      "loss": 2.1649,
      "step": 69216
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.5366698503494263,
      "learning_rate": 4.835865013902553e-07,
      "loss": 2.246,
      "step": 69217
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0673847198486328,
      "learning_rate": 4.834600192992866e-07,
      "loss": 2.4018,
      "step": 69218
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.108641266822815,
      "learning_rate": 4.833335533414385e-07,
      "loss": 2.2027,
      "step": 69219
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.137980341911316,
      "learning_rate": 4.832071035169239e-07,
      "loss": 2.137,
      "step": 69220
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0965756177902222,
      "learning_rate": 4.830806698259593e-07,
      "loss": 2.1904,
      "step": 69221
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1031360626220703,
      "learning_rate": 4.829542522687558e-07,
      "loss": 2.2973,
      "step": 69222
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0960040092468262,
      "learning_rate": 4.82827850845532e-07,
      "loss": 2.4545,
      "step": 69223
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0862685441970825,
      "learning_rate": 4.827014655564977e-07,
      "loss": 2.4069,
      "step": 69224
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1014416217803955,
      "learning_rate": 4.825750964018716e-07,
      "loss": 2.3324,
      "step": 69225
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1579115390777588,
      "learning_rate": 4.824487433818647e-07,
      "loss": 2.4724,
      "step": 69226
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1019047498703003,
      "learning_rate": 4.823224064966914e-07,
      "loss": 2.4154,
      "step": 69227
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.2340513467788696,
      "learning_rate": 4.821960857465668e-07,
      "loss": 2.3146,
      "step": 69228
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0887168645858765,
      "learning_rate": 4.820697811317054e-07,
      "loss": 2.1756,
      "step": 69229
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0707975625991821,
      "learning_rate": 4.819434926523203e-07,
      "loss": 2.1039,
      "step": 69230
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0750820636749268,
      "learning_rate": 4.818172203086269e-07,
      "loss": 2.3414,
      "step": 69231
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1718769073486328,
      "learning_rate": 4.816909641008382e-07,
      "loss": 2.2862,
      "step": 69232
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.3605527877807617,
      "learning_rate": 4.815647240291665e-07,
      "loss": 2.1747,
      "step": 69233
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.083238959312439,
      "learning_rate": 4.814385000938305e-07,
      "loss": 2.1693,
      "step": 69234
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0890791416168213,
      "learning_rate": 4.813122922950386e-07,
      "loss": 2.4322,
      "step": 69235
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.094061255455017,
      "learning_rate": 4.811861006330099e-07,
      "loss": 2.4299,
      "step": 69236
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0462219715118408,
      "learning_rate": 4.81059925107954e-07,
      "loss": 2.3244,
      "step": 69237
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.018208384513855,
      "learning_rate": 4.809337657200885e-07,
      "loss": 2.331,
      "step": 69238
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.107987642288208,
      "learning_rate": 4.808076224696245e-07,
      "loss": 2.3889,
      "step": 69239
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1138057708740234,
      "learning_rate": 4.806814953567784e-07,
      "loss": 2.4067,
      "step": 69240
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0232211351394653,
      "learning_rate": 4.805553843817612e-07,
      "loss": 2.2369,
      "step": 69241
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.3239284753799438,
      "learning_rate": 4.804292895447893e-07,
      "loss": 2.435,
      "step": 69242
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.077002763748169,
      "learning_rate": 4.803032108460748e-07,
      "loss": 2.2358,
      "step": 69243
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0677177906036377,
      "learning_rate": 4.80177148285833e-07,
      "loss": 2.2867,
      "step": 69244
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.104018211364746,
      "learning_rate": 4.800511018642739e-07,
      "loss": 2.4229,
      "step": 69245
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1530888080596924,
      "learning_rate": 4.799250715816162e-07,
      "loss": 2.3809,
      "step": 69246
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1475675106048584,
      "learning_rate": 4.797990574380684e-07,
      "loss": 2.3031,
      "step": 69247
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0738152265548706,
      "learning_rate": 4.796730594338495e-07,
      "loss": 2.366,
      "step": 69248
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.2653700113296509,
      "learning_rate": 4.795470775691679e-07,
      "loss": 2.3074,
      "step": 69249
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.092157006263733,
      "learning_rate": 4.794211118442427e-07,
      "loss": 2.2448,
      "step": 69250
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0229600667953491,
      "learning_rate": 4.792951622592812e-07,
      "loss": 2.2303,
      "step": 69251
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0925877094268799,
      "learning_rate": 4.791692288145034e-07,
      "loss": 2.312,
      "step": 69252
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1056002378463745,
      "learning_rate": 4.790433115101178e-07,
      "loss": 2.3577,
      "step": 69253
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0286790132522583,
      "learning_rate": 4.7891741034634e-07,
      "loss": 2.3542,
      "step": 69254
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1135061979293823,
      "learning_rate": 4.787915253233832e-07,
      "loss": 2.2745,
      "step": 69255
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0431605577468872,
      "learning_rate": 4.786656564414616e-07,
      "loss": 2.1926,
      "step": 69256
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1305240392684937,
      "learning_rate": 4.785398037007849e-07,
      "loss": 2.6138,
      "step": 69257
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1326242685317993,
      "learning_rate": 4.78413967101572e-07,
      "loss": 2.3752,
      "step": 69258
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.014707326889038,
      "learning_rate": 4.782881466440325e-07,
      "loss": 2.3317,
      "step": 69259
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1159636974334717,
      "learning_rate": 4.781623423283799e-07,
      "loss": 2.2314,
      "step": 69260
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.9844993352890015,
      "learning_rate": 4.780365541548293e-07,
      "loss": 2.1676,
      "step": 69261
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.099460244178772,
      "learning_rate": 4.779107821235918e-07,
      "loss": 2.4002,
      "step": 69262
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0297152996063232,
      "learning_rate": 4.777850262348837e-07,
      "loss": 2.3164,
      "step": 69263
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.142240047454834,
      "learning_rate": 4.77659286488914e-07,
      "loss": 2.2782,
      "step": 69264
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.048755168914795,
      "learning_rate": 4.77533562885899e-07,
      "loss": 1.9881,
      "step": 69265
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.136160135269165,
      "learning_rate": 4.774078554260497e-07,
      "loss": 2.3112,
      "step": 69266
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0622375011444092,
      "learning_rate": 4.772821641095826e-07,
      "loss": 2.2199,
      "step": 69267
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.9777225255966187,
      "learning_rate": 4.771564889367064e-07,
      "loss": 2.2669,
      "step": 69268
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1239219903945923,
      "learning_rate": 4.770308299076388e-07,
      "loss": 2.1979,
      "step": 69269
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0666981935501099,
      "learning_rate": 4.769051870225883e-07,
      "loss": 2.4452,
      "step": 69270
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0754749774932861,
      "learning_rate": 4.7677956028177155e-07,
      "loss": 2.2769,
      "step": 69271
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0182479619979858,
      "learning_rate": 4.7665394968539725e-07,
      "loss": 2.5092,
      "step": 69272
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.142845869064331,
      "learning_rate": 4.76528355233683e-07,
      "loss": 2.341,
      "step": 69273
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1824216842651367,
      "learning_rate": 4.7640277692683866e-07,
      "loss": 2.1011,
      "step": 69274
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1794556379318237,
      "learning_rate": 4.762772147650796e-07,
      "loss": 2.5344,
      "step": 69275
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0947712659835815,
      "learning_rate": 4.761516687486156e-07,
      "loss": 2.1713,
      "step": 69276
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.9561538100242615,
      "learning_rate": 4.760261388776632e-07,
      "loss": 2.3733,
      "step": 69277
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0451767444610596,
      "learning_rate": 4.7590062515243116e-07,
      "loss": 2.1768,
      "step": 69278
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0382860898971558,
      "learning_rate": 4.7577512757313594e-07,
      "loss": 2.2583,
      "step": 69279
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.157077431678772,
      "learning_rate": 4.756496461399873e-07,
      "loss": 2.3048,
      "step": 69280
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0127873420715332,
      "learning_rate": 4.7552418085319965e-07,
      "loss": 2.3612,
      "step": 69281
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0319385528564453,
      "learning_rate": 4.75398731712986e-07,
      "loss": 2.179,
      "step": 69282
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0049761533737183,
      "learning_rate": 4.752732987195585e-07,
      "loss": 2.3197,
      "step": 69283
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0237219333648682,
      "learning_rate": 4.751478818731292e-07,
      "loss": 2.1775,
      "step": 69284
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0894379615783691,
      "learning_rate": 4.7502248117391124e-07,
      "loss": 2.3669,
      "step": 69285
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1701281070709229,
      "learning_rate": 4.748970966221156e-07,
      "loss": 2.4973,
      "step": 69286
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0191357135772705,
      "learning_rate": 4.747717282179587e-07,
      "loss": 2.4739,
      "step": 69287
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0363054275512695,
      "learning_rate": 4.7464637596165044e-07,
      "loss": 2.2389,
      "step": 69288
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0020018815994263,
      "learning_rate": 4.745210398534017e-07,
      "loss": 2.4961,
      "step": 69289
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.780424952507019,
      "learning_rate": 4.74395719893429e-07,
      "loss": 2.1894,
      "step": 69290
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.9910065531730652,
      "learning_rate": 4.7427041608194e-07,
      "loss": 2.4193,
      "step": 69291
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.154556393623352,
      "learning_rate": 4.741451284191523e-07,
      "loss": 2.441,
      "step": 69292
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.032710075378418,
      "learning_rate": 4.740198569052745e-07,
      "loss": 2.4087,
      "step": 69293
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1419283151626587,
      "learning_rate": 4.7389460154052103e-07,
      "loss": 2.4099,
      "step": 69294
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1029921770095825,
      "learning_rate": 4.7376936232510385e-07,
      "loss": 2.128,
      "step": 69295
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1601569652557373,
      "learning_rate": 4.7364413925923505e-07,
      "loss": 2.4823,
      "step": 69296
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.9933119416236877,
      "learning_rate": 4.7351893234312554e-07,
      "loss": 2.267,
      "step": 69297
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.048415184020996,
      "learning_rate": 4.733937415769896e-07,
      "loss": 2.0969,
      "step": 69298
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.9826256036758423,
      "learning_rate": 4.7326856696103706e-07,
      "loss": 2.1391,
      "step": 69299
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0410075187683105,
      "learning_rate": 4.7314340849548335e-07,
      "loss": 2.4344,
      "step": 69300
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.191891074180603,
      "learning_rate": 4.7301826618053824e-07,
      "loss": 2.5522,
      "step": 69301
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1246893405914307,
      "learning_rate": 4.7289314001641497e-07,
      "loss": 2.3073,
      "step": 69302
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.9561552405357361,
      "learning_rate": 4.7276803000332547e-07,
      "loss": 2.2084,
      "step": 69303
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1307368278503418,
      "learning_rate": 4.726429361414819e-07,
      "loss": 2.2476,
      "step": 69304
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.9876692891120911,
      "learning_rate": 4.725178584310952e-07,
      "loss": 2.4997,
      "step": 69305
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1450082063674927,
      "learning_rate": 4.723927968723796e-07,
      "loss": 2.311,
      "step": 69306
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1905088424682617,
      "learning_rate": 4.7226775146554384e-07,
      "loss": 2.2896,
      "step": 69307
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.2258065938949585,
      "learning_rate": 4.721427222108055e-07,
      "loss": 2.4232,
      "step": 69308
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.057894229888916,
      "learning_rate": 4.7201770910837e-07,
      "loss": 2.4301,
      "step": 69309
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.044144868850708,
      "learning_rate": 4.718927121584538e-07,
      "loss": 2.2637,
      "step": 69310
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.9867637157440186,
      "learning_rate": 4.717677313612656e-07,
      "loss": 2.3609,
      "step": 69311
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0736969709396362,
      "learning_rate": 4.7164276671702094e-07,
      "loss": 2.452,
      "step": 69312
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0069330930709839,
      "learning_rate": 4.7151781822592836e-07,
      "loss": 2.0505,
      "step": 69313
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.214692234992981,
      "learning_rate": 4.713928858882011e-07,
      "loss": 2.3017,
      "step": 69314
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.071086049079895,
      "learning_rate": 4.712679697040523e-07,
      "loss": 2.3797,
      "step": 69315
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0440969467163086,
      "learning_rate": 4.711430696736896e-07,
      "loss": 2.3184,
      "step": 69316
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0675634145736694,
      "learning_rate": 4.710181857973295e-07,
      "loss": 1.9295,
      "step": 69317
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0206677913665771,
      "learning_rate": 4.708933180751807e-07,
      "loss": 2.2327,
      "step": 69318
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1990673542022705,
      "learning_rate": 4.7076846650745635e-07,
      "loss": 2.4102,
      "step": 69319
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0043399333953857,
      "learning_rate": 4.7064363109436627e-07,
      "loss": 2.1539,
      "step": 69320
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0095531940460205,
      "learning_rate": 4.705188118361248e-07,
      "loss": 2.4488,
      "step": 69321
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0437122583389282,
      "learning_rate": 4.7039400873294174e-07,
      "loss": 2.4056,
      "step": 69322
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.9822613000869751,
      "learning_rate": 4.702692217850302e-07,
      "loss": 2.31,
      "step": 69323
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1903507709503174,
      "learning_rate": 4.701444509925979e-07,
      "loss": 2.4046,
      "step": 69324
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1887110471725464,
      "learning_rate": 4.700196963558612e-07,
      "loss": 2.5614,
      "step": 69325
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.9828306436538696,
      "learning_rate": 4.698949578750278e-07,
      "loss": 2.2754,
      "step": 69326
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0766396522521973,
      "learning_rate": 4.6977023555031197e-07,
      "loss": 2.389,
      "step": 69327
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0509306192398071,
      "learning_rate": 4.6964552938192245e-07,
      "loss": 2.279,
      "step": 69328
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0493642091751099,
      "learning_rate": 4.6952083937007453e-07,
      "loss": 2.3757,
      "step": 69329
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.138636827468872,
      "learning_rate": 4.693961655149748e-07,
      "loss": 2.1906,
      "step": 69330
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1320292949676514,
      "learning_rate": 4.6927150781683864e-07,
      "loss": 2.2691,
      "step": 69331
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0060782432556152,
      "learning_rate": 4.691468662758747e-07,
      "loss": 2.1906,
      "step": 69332
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.004629373550415,
      "learning_rate": 4.690222408922973e-07,
      "loss": 2.3239,
      "step": 69333
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0726791620254517,
      "learning_rate": 4.6889763166631297e-07,
      "loss": 2.2705,
      "step": 69334
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1758627891540527,
      "learning_rate": 4.687730385981393e-07,
      "loss": 2.3789,
      "step": 69335
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.2978370189666748,
      "learning_rate": 4.6864846168798163e-07,
      "loss": 2.364,
      "step": 69336
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1493276357650757,
      "learning_rate": 4.685239009360543e-07,
      "loss": 2.4654,
      "step": 69337
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.090032696723938,
      "learning_rate": 4.6839935634256705e-07,
      "loss": 2.1876,
      "step": 69338
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0478365421295166,
      "learning_rate": 4.6827482790773205e-07,
      "loss": 2.4593,
      "step": 69339
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.104853868484497,
      "learning_rate": 4.6815031563175904e-07,
      "loss": 2.635,
      "step": 69340
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.2477566003799438,
      "learning_rate": 4.6802581951486126e-07,
      "loss": 2.3654,
      "step": 69341
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.9956430792808533,
      "learning_rate": 4.6790133955724735e-07,
      "loss": 2.1062,
      "step": 69342
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1266661882400513,
      "learning_rate": 4.6777687575913053e-07,
      "loss": 2.196,
      "step": 69343
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.2067228555679321,
      "learning_rate": 4.6765242812072173e-07,
      "loss": 2.4802,
      "step": 69344
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1184202432632446,
      "learning_rate": 4.675279966422286e-07,
      "loss": 2.0724,
      "step": 69345
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.235686182975769,
      "learning_rate": 4.674035813238664e-07,
      "loss": 1.9472,
      "step": 69346
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0589419603347778,
      "learning_rate": 4.672791821658429e-07,
      "loss": 2.4194,
      "step": 69347
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0991411209106445,
      "learning_rate": 4.6715479916837113e-07,
      "loss": 2.3517,
      "step": 69348
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1035785675048828,
      "learning_rate": 4.670304323316621e-07,
      "loss": 2.3778,
      "step": 69349
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1005278825759888,
      "learning_rate": 4.6690608165592454e-07,
      "loss": 2.2382,
      "step": 69350
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.2526893615722656,
      "learning_rate": 4.6678174714136936e-07,
      "loss": 2.3449,
      "step": 69351
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.158561110496521,
      "learning_rate": 4.6665742878820975e-07,
      "loss": 2.2113,
      "step": 69352
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.10968816280365,
      "learning_rate": 4.665331265966533e-07,
      "loss": 2.2921,
      "step": 69353
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1415629386901855,
      "learning_rate": 4.6640884056691426e-07,
      "loss": 2.452,
      "step": 69354
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0698277950286865,
      "learning_rate": 4.662845706992003e-07,
      "loss": 2.2537,
      "step": 69355
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0523521900177002,
      "learning_rate": 4.6616031699372457e-07,
      "loss": 2.3419,
      "step": 69356
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.020257830619812,
      "learning_rate": 4.660360794506946e-07,
      "loss": 2.3239,
      "step": 69357
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1119956970214844,
      "learning_rate": 4.659118580703237e-07,
      "loss": 2.1988,
      "step": 69358
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0216338634490967,
      "learning_rate": 4.6578765285282157e-07,
      "loss": 2.4547,
      "step": 69359
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0664184093475342,
      "learning_rate": 4.656634637983992e-07,
      "loss": 2.4435,
      "step": 69360
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0329692363739014,
      "learning_rate": 4.6553929090726757e-07,
      "loss": 2.5439,
      "step": 69361
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0446075201034546,
      "learning_rate": 4.6541513417963644e-07,
      "loss": 2.3216,
      "step": 69362
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.9228397607803345,
      "learning_rate": 4.652909936157146e-07,
      "loss": 2.463,
      "step": 69363
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.103139042854309,
      "learning_rate": 4.651668692157152e-07,
      "loss": 2.2772,
      "step": 69364
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.942009449005127,
      "learning_rate": 4.650427609798458e-07,
      "loss": 2.3996,
      "step": 69365
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0776596069335938,
      "learning_rate": 4.649186689083207e-07,
      "loss": 2.4887,
      "step": 69366
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1304705142974854,
      "learning_rate": 4.647945930013464e-07,
      "loss": 2.3805,
      "step": 69367
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1266931295394897,
      "learning_rate": 4.6467053325913614e-07,
      "loss": 2.4531,
      "step": 69368
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.012012243270874,
      "learning_rate": 4.645464896818985e-07,
      "loss": 2.2087,
      "step": 69369
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.2186952829360962,
      "learning_rate": 4.6442246226984565e-07,
      "loss": 2.2643,
      "step": 69370
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1329909563064575,
      "learning_rate": 4.6429845102318624e-07,
      "loss": 2.3841,
      "step": 69371
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0660909414291382,
      "learning_rate": 4.6417445594212904e-07,
      "loss": 2.1385,
      "step": 69372
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.9836844801902771,
      "learning_rate": 4.640504770268872e-07,
      "loss": 2.3462,
      "step": 69373
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0390667915344238,
      "learning_rate": 4.6392651427767054e-07,
      "loss": 2.3212,
      "step": 69374
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0762966871261597,
      "learning_rate": 4.6380256769468667e-07,
      "loss": 2.161,
      "step": 69375
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0661647319793701,
      "learning_rate": 4.6367863727814986e-07,
      "loss": 2.5052,
      "step": 69376
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1040219068527222,
      "learning_rate": 4.635547230282666e-07,
      "loss": 2.1936,
      "step": 69377
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.18656587600708,
      "learning_rate": 4.6343082494524683e-07,
      "loss": 2.3049,
      "step": 69378
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.2793208360671997,
      "learning_rate": 4.633069430293036e-07,
      "loss": 2.4603,
      "step": 69379
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0492860078811646,
      "learning_rate": 4.631830772806434e-07,
      "loss": 2.3024,
      "step": 69380
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.9964797496795654,
      "learning_rate": 4.630592276994805e-07,
      "loss": 2.3718,
      "step": 69381
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0157742500305176,
      "learning_rate": 4.6293539428602043e-07,
      "loss": 2.2579,
      "step": 69382
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0106414556503296,
      "learning_rate": 4.628115770404762e-07,
      "loss": 2.254,
      "step": 69383
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.347597360610962,
      "learning_rate": 4.626877759630555e-07,
      "loss": 2.2741,
      "step": 69384
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1487599611282349,
      "learning_rate": 4.625639910539703e-07,
      "loss": 2.617,
      "step": 69385
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1374262571334839,
      "learning_rate": 4.6244022231342836e-07,
      "loss": 2.3411,
      "step": 69386
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0972615480422974,
      "learning_rate": 4.623164697416416e-07,
      "loss": 2.2539,
      "step": 69387
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0605545043945312,
      "learning_rate": 4.6219273333881985e-07,
      "loss": 2.3747,
      "step": 69388
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1563535928726196,
      "learning_rate": 4.620690131051708e-07,
      "loss": 2.2495,
      "step": 69389
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.7222321033477783,
      "learning_rate": 4.619453090409043e-07,
      "loss": 2.2328,
      "step": 69390
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0453349351882935,
      "learning_rate": 4.618216211462323e-07,
      "loss": 2.245,
      "step": 69391
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0008879899978638,
      "learning_rate": 4.616979494213625e-07,
      "loss": 2.3291,
      "step": 69392
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.144266963005066,
      "learning_rate": 4.6157429386650574e-07,
      "loss": 2.4964,
      "step": 69393
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0786595344543457,
      "learning_rate": 4.614506544818698e-07,
      "loss": 2.3576,
      "step": 69394
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.156774878501892,
      "learning_rate": 4.6132703126766764e-07,
      "loss": 2.3355,
      "step": 69395
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0261310338974,
      "learning_rate": 4.612034242241048e-07,
      "loss": 2.4189,
      "step": 69396
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1355490684509277,
      "learning_rate": 4.610798333513944e-07,
      "loss": 2.2072,
      "step": 69397
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0287377834320068,
      "learning_rate": 4.6095625864974293e-07,
      "loss": 2.2949,
      "step": 69398
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1773439645767212,
      "learning_rate": 4.6083270011936354e-07,
      "loss": 2.2608,
      "step": 69399
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0581363439559937,
      "learning_rate": 4.6070915776046275e-07,
      "loss": 2.2552,
      "step": 69400
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0943909883499146,
      "learning_rate": 4.605856315732504e-07,
      "loss": 2.2161,
      "step": 69401
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0532963275909424,
      "learning_rate": 4.6046212155793637e-07,
      "loss": 2.2492,
      "step": 69402
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0514845848083496,
      "learning_rate": 4.6033862771473037e-07,
      "loss": 2.3473,
      "step": 69403
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.9913947582244873,
      "learning_rate": 4.602151500438401e-07,
      "loss": 2.3843,
      "step": 69404
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0798637866973877,
      "learning_rate": 4.600916885454765e-07,
      "loss": 2.1679,
      "step": 69405
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1916694641113281,
      "learning_rate": 4.5996824321984935e-07,
      "loss": 2.1343,
      "step": 69406
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1217763423919678,
      "learning_rate": 4.598448140671663e-07,
      "loss": 2.395,
      "step": 69407
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.079038381576538,
      "learning_rate": 4.597214010876383e-07,
      "loss": 2.374,
      "step": 69408
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0978182554244995,
      "learning_rate": 4.595980042814718e-07,
      "loss": 2.2871,
      "step": 69409
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1342946290969849,
      "learning_rate": 4.594746236488801e-07,
      "loss": 2.1455,
      "step": 69410
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.192327618598938,
      "learning_rate": 4.5935125919006727e-07,
      "loss": 2.1639,
      "step": 69411
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1693769693374634,
      "learning_rate": 4.592279109052478e-07,
      "loss": 2.2854,
      "step": 69412
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0900981426239014,
      "learning_rate": 4.5910457879462687e-07,
      "loss": 2.4004,
      "step": 69413
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1525673866271973,
      "learning_rate": 4.589812628584167e-07,
      "loss": 2.2837,
      "step": 69414
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1997637748718262,
      "learning_rate": 4.5885796309682263e-07,
      "loss": 2.5076,
      "step": 69415
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0835174322128296,
      "learning_rate": 4.587346795100578e-07,
      "loss": 2.2136,
      "step": 69416
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0632200241088867,
      "learning_rate": 4.586114120983265e-07,
      "loss": 2.4961,
      "step": 69417
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0232347249984741,
      "learning_rate": 4.584881608618419e-07,
      "loss": 2.3965,
      "step": 69418
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.087405800819397,
      "learning_rate": 4.5836492580081047e-07,
      "loss": 2.1864,
      "step": 69419
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.125908613204956,
      "learning_rate": 4.5824170691544213e-07,
      "loss": 2.3974,
      "step": 69420
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1262725591659546,
      "learning_rate": 4.5811850420594553e-07,
      "loss": 2.2746,
      "step": 69421
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0227447748184204,
      "learning_rate": 4.5799531767253045e-07,
      "loss": 2.44,
      "step": 69422
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0365744829177856,
      "learning_rate": 4.5787214731540353e-07,
      "loss": 2.46,
      "step": 69423
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1438807249069214,
      "learning_rate": 4.5774899313477563e-07,
      "loss": 2.4547,
      "step": 69424
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.2479733228683472,
      "learning_rate": 4.576258551308543e-07,
      "loss": 2.2274,
      "step": 69425
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.9903994798660278,
      "learning_rate": 4.5750273330384954e-07,
      "loss": 2.4167,
      "step": 69426
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.2536369562149048,
      "learning_rate": 4.5737962765396995e-07,
      "loss": 2.43,
      "step": 69427
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0490062236785889,
      "learning_rate": 4.5725653818142423e-07,
      "loss": 2.2414,
      "step": 69428
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.17964506149292,
      "learning_rate": 4.5713346488641785e-07,
      "loss": 2.4475,
      "step": 69429
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0342965126037598,
      "learning_rate": 4.570104077691639e-07,
      "loss": 2.4387,
      "step": 69430
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1209906339645386,
      "learning_rate": 4.568873668298679e-07,
      "loss": 2.3506,
      "step": 69431
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1437960863113403,
      "learning_rate": 4.5676434206874064e-07,
      "loss": 2.5876,
      "step": 69432
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0324304103851318,
      "learning_rate": 4.566413334859898e-07,
      "loss": 2.0844,
      "step": 69433
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.6839988231658936,
      "learning_rate": 4.5651834108182304e-07,
      "loss": 2.2716,
      "step": 69434
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.3042662143707275,
      "learning_rate": 4.563953648564512e-07,
      "loss": 2.3625,
      "step": 69435
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.08625066280365,
      "learning_rate": 4.5627240481007863e-07,
      "loss": 2.336,
      "step": 69436
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1201767921447754,
      "learning_rate": 4.5614946094291847e-07,
      "loss": 2.1677,
      "step": 69437
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.259878396987915,
      "learning_rate": 4.560265332551761e-07,
      "loss": 2.4436,
      "step": 69438
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1147841215133667,
      "learning_rate": 4.5590362174706136e-07,
      "loss": 2.4471,
      "step": 69439
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.166237711906433,
      "learning_rate": 4.55780726418783e-07,
      "loss": 2.1894,
      "step": 69440
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.087890625,
      "learning_rate": 4.5565784727054753e-07,
      "loss": 2.438,
      "step": 69441
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.076700210571289,
      "learning_rate": 4.555349843025636e-07,
      "loss": 2.5041,
      "step": 69442
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.9872648119926453,
      "learning_rate": 4.554121375150411e-07,
      "loss": 2.2186,
      "step": 69443
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1209490299224854,
      "learning_rate": 4.552893069081865e-07,
      "loss": 2.2653,
      "step": 69444
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1672179698944092,
      "learning_rate": 4.5516649248220966e-07,
      "loss": 2.3876,
      "step": 69445
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0021966695785522,
      "learning_rate": 4.5504369423731707e-07,
      "loss": 2.2476,
      "step": 69446
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0381864309310913,
      "learning_rate": 4.549209121737186e-07,
      "loss": 2.2958,
      "step": 69447
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0980300903320312,
      "learning_rate": 4.5479814629161954e-07,
      "loss": 2.4368,
      "step": 69448
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0185335874557495,
      "learning_rate": 4.5467539659123315e-07,
      "loss": 2.2722,
      "step": 69449
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.058903694152832,
      "learning_rate": 4.545526630727615e-07,
      "loss": 2.2309,
      "step": 69450
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0721315145492554,
      "learning_rate": 4.5442994573641763e-07,
      "loss": 2.3694,
      "step": 69451
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1357693672180176,
      "learning_rate": 4.5430724458240705e-07,
      "loss": 2.1762,
      "step": 69452
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.6113364696502686,
      "learning_rate": 4.5418455961093954e-07,
      "loss": 2.3295,
      "step": 69453
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0332309007644653,
      "learning_rate": 4.540618908222205e-07,
      "loss": 2.3012,
      "step": 69454
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1966643333435059,
      "learning_rate": 4.539392382164598e-07,
      "loss": 2.1791,
      "step": 69455
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1826953887939453,
      "learning_rate": 4.5381660179386276e-07,
      "loss": 2.3673,
      "step": 69456
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1930546760559082,
      "learning_rate": 4.536939815546415e-07,
      "loss": 2.4047,
      "step": 69457
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1303256750106812,
      "learning_rate": 4.5357137749900026e-07,
      "loss": 2.2358,
      "step": 69458
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0800203084945679,
      "learning_rate": 4.534487896271489e-07,
      "loss": 2.3613,
      "step": 69459
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1849768161773682,
      "learning_rate": 4.533262179392939e-07,
      "loss": 2.2473,
      "step": 69460
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0768195390701294,
      "learning_rate": 4.532036624356451e-07,
      "loss": 2.287,
      "step": 69461
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0959502458572388,
      "learning_rate": 4.5308112311640893e-07,
      "loss": 2.2846,
      "step": 69462
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.012407660484314,
      "learning_rate": 4.52958599981792e-07,
      "loss": 2.3024,
      "step": 69463
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0746252536773682,
      "learning_rate": 4.528360930320053e-07,
      "loss": 2.3972,
      "step": 69464
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0636402368545532,
      "learning_rate": 4.527136022672518e-07,
      "loss": 2.366,
      "step": 69465
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.042679786682129,
      "learning_rate": 4.525911276877437e-07,
      "loss": 2.3137,
      "step": 69466
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.05910325050354,
      "learning_rate": 4.524686692936864e-07,
      "loss": 2.4632,
      "step": 69467
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.123997688293457,
      "learning_rate": 4.523462270852874e-07,
      "loss": 2.2624,
      "step": 69468
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0185900926589966,
      "learning_rate": 4.5222380106275443e-07,
      "loss": 2.2231,
      "step": 69469
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1780455112457275,
      "learning_rate": 4.5210139122629506e-07,
      "loss": 2.3484,
      "step": 69470
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0755149126052856,
      "learning_rate": 4.5197899757611683e-07,
      "loss": 2.1886,
      "step": 69471
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0555670261383057,
      "learning_rate": 4.518566201124286e-07,
      "loss": 2.3677,
      "step": 69472
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1290876865386963,
      "learning_rate": 4.5173425883543566e-07,
      "loss": 2.3239,
      "step": 69473
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1414382457733154,
      "learning_rate": 4.5161191374534676e-07,
      "loss": 2.3077,
      "step": 69474
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1066707372665405,
      "learning_rate": 4.514895848423684e-07,
      "loss": 2.2021,
      "step": 69475
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0922517776489258,
      "learning_rate": 4.513672721267093e-07,
      "loss": 2.3233,
      "step": 69476
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0881316661834717,
      "learning_rate": 4.5124497559857484e-07,
      "loss": 2.2226,
      "step": 69477
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0628679990768433,
      "learning_rate": 4.5112269525817486e-07,
      "loss": 2.3492,
      "step": 69478
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.016979455947876,
      "learning_rate": 4.5100043110571477e-07,
      "loss": 2.2731,
      "step": 69479
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.132314920425415,
      "learning_rate": 4.508781831414044e-07,
      "loss": 2.2753,
      "step": 69480
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0678226947784424,
      "learning_rate": 4.5075595136544694e-07,
      "loss": 2.3155,
      "step": 69481
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0704805850982666,
      "learning_rate": 4.506337357780521e-07,
      "loss": 2.2701,
      "step": 69482
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.185833215713501,
      "learning_rate": 4.505115363794255e-07,
      "loss": 2.2027,
      "step": 69483
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.089536190032959,
      "learning_rate": 4.5038935316977784e-07,
      "loss": 2.1636,
      "step": 69484
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1072551012039185,
      "learning_rate": 4.502671861493113e-07,
      "loss": 2.3086,
      "step": 69485
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.5790971517562866,
      "learning_rate": 4.5014503531823793e-07,
      "loss": 2.3853,
      "step": 69486
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0514250993728638,
      "learning_rate": 4.500229006767598e-07,
      "loss": 2.3863,
      "step": 69487
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.615805983543396,
      "learning_rate": 4.4990078222508895e-07,
      "loss": 2.4866,
      "step": 69488
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1573941707611084,
      "learning_rate": 4.497786799634296e-07,
      "loss": 2.4229,
      "step": 69489
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0995826721191406,
      "learning_rate": 4.4965659389198837e-07,
      "loss": 2.2018,
      "step": 69490
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1335595846176147,
      "learning_rate": 4.4953452401097387e-07,
      "loss": 2.2825,
      "step": 69491
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1236217021942139,
      "learning_rate": 4.4941247032059044e-07,
      "loss": 2.2942,
      "step": 69492
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.9999146461486816,
      "learning_rate": 4.49290432821049e-07,
      "loss": 2.2731,
      "step": 69493
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0095441341400146,
      "learning_rate": 4.491684115125539e-07,
      "loss": 2.5238,
      "step": 69494
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0699279308319092,
      "learning_rate": 4.490464063953126e-07,
      "loss": 2.1979,
      "step": 69495
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1063199043273926,
      "learning_rate": 4.4892441746952953e-07,
      "loss": 2.4309,
      "step": 69496
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1496254205703735,
      "learning_rate": 4.4880244473541557e-07,
      "loss": 2.033,
      "step": 69497
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0623178482055664,
      "learning_rate": 4.486804881931739e-07,
      "loss": 2.3193,
      "step": 69498
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0230798721313477,
      "learning_rate": 4.485585478430143e-07,
      "loss": 2.4167,
      "step": 69499
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1211644411087036,
      "learning_rate": 4.4843662368514117e-07,
      "loss": 2.2532,
      "step": 69500
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0338308811187744,
      "learning_rate": 4.4831471571976315e-07,
      "loss": 2.2308,
      "step": 69501
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0617635250091553,
      "learning_rate": 4.4819282394708455e-07,
      "loss": 2.2517,
      "step": 69502
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.081276535987854,
      "learning_rate": 4.480709483673151e-07,
      "loss": 2.2499,
      "step": 69503
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.9934484362602234,
      "learning_rate": 4.479490889806581e-07,
      "loss": 2.2765,
      "step": 69504
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.147519588470459,
      "learning_rate": 4.478272457873223e-07,
      "loss": 2.3431,
      "step": 69505
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.9945650100708008,
      "learning_rate": 4.4770541878751404e-07,
      "loss": 2.3819,
      "step": 69506
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0700219869613647,
      "learning_rate": 4.4758360798143997e-07,
      "loss": 2.2506,
      "step": 69507
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0977140665054321,
      "learning_rate": 4.4746181336930427e-07,
      "loss": 2.2863,
      "step": 69508
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1314914226531982,
      "learning_rate": 4.4734003495131684e-07,
      "loss": 2.1765,
      "step": 69509
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1628938913345337,
      "learning_rate": 4.472182727276808e-07,
      "loss": 2.4837,
      "step": 69510
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.046562910079956,
      "learning_rate": 4.47096526698606e-07,
      "loss": 2.4007,
      "step": 69511
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0248972177505493,
      "learning_rate": 4.4697479686429566e-07,
      "loss": 2.4086,
      "step": 69512
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.099453091621399,
      "learning_rate": 4.468530832249596e-07,
      "loss": 2.3188,
      "step": 69513
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1926010847091675,
      "learning_rate": 4.4673138578079976e-07,
      "loss": 2.2686,
      "step": 69514
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1151117086410522,
      "learning_rate": 4.4660970453202726e-07,
      "loss": 2.4729,
      "step": 69515
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.2110240459442139,
      "learning_rate": 4.46488039478844e-07,
      "loss": 2.2413,
      "step": 69516
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.9937099814414978,
      "learning_rate": 4.4636639062145884e-07,
      "loss": 2.2837,
      "step": 69517
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0962074995040894,
      "learning_rate": 4.462447579600782e-07,
      "loss": 2.6281,
      "step": 69518
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.057019591331482,
      "learning_rate": 4.461231414949074e-07,
      "loss": 2.4455,
      "step": 69519
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1502331495285034,
      "learning_rate": 4.460015412261509e-07,
      "loss": 2.3863,
      "step": 69520
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.297754168510437,
      "learning_rate": 4.4587995715401846e-07,
      "loss": 2.4195,
      "step": 69521
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0217329263687134,
      "learning_rate": 4.457583892787132e-07,
      "loss": 2.2882,
      "step": 69522
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0875996351242065,
      "learning_rate": 4.4563683760044384e-07,
      "loss": 2.4052,
      "step": 69523
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1020958423614502,
      "learning_rate": 4.4551530211941363e-07,
      "loss": 2.2611,
      "step": 69524
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0895613431930542,
      "learning_rate": 4.4539378283583014e-07,
      "loss": 2.3502,
      "step": 69525
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0189114809036255,
      "learning_rate": 4.4527227974989984e-07,
      "loss": 2.2549,
      "step": 69526
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0499732494354248,
      "learning_rate": 4.4515079286182707e-07,
      "loss": 2.5295,
      "step": 69527
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.9791387319564819,
      "learning_rate": 4.450293221718194e-07,
      "loss": 2.2673,
      "step": 69528
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.9901677370071411,
      "learning_rate": 4.449078676800811e-07,
      "loss": 2.2894,
      "step": 69529
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0206433534622192,
      "learning_rate": 4.447864293868209e-07,
      "loss": 2.3598,
      "step": 69530
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1822428703308105,
      "learning_rate": 4.4466500729224094e-07,
      "loss": 2.4939,
      "step": 69531
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.2027266025543213,
      "learning_rate": 4.4454360139655094e-07,
      "loss": 2.3793,
      "step": 69532
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.098088264465332,
      "learning_rate": 4.444222116999541e-07,
      "loss": 2.368,
      "step": 69533
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1313084363937378,
      "learning_rate": 4.44300838202657e-07,
      "loss": 2.4728,
      "step": 69534
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.2176463603973389,
      "learning_rate": 4.441794809048638e-07,
      "loss": 2.4337,
      "step": 69535
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0305942296981812,
      "learning_rate": 4.4405813980678336e-07,
      "loss": 2.3001,
      "step": 69536
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1127657890319824,
      "learning_rate": 4.4393681490861873e-07,
      "loss": 2.2565,
      "step": 69537
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.9885847568511963,
      "learning_rate": 4.4381550621057754e-07,
      "loss": 2.3877,
      "step": 69538
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.005507230758667,
      "learning_rate": 4.4369421371286304e-07,
      "loss": 2.5225,
      "step": 69539
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.092387080192566,
      "learning_rate": 4.4357293741568386e-07,
      "loss": 2.3356,
      "step": 69540
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0218909978866577,
      "learning_rate": 4.434516773192421e-07,
      "loss": 2.1588,
      "step": 69541
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0931910276412964,
      "learning_rate": 4.433304334237476e-07,
      "loss": 2.7137,
      "step": 69542
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0692903995513916,
      "learning_rate": 4.432092057294013e-07,
      "loss": 2.4734,
      "step": 69543
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0770047903060913,
      "learning_rate": 4.4308799423641304e-07,
      "loss": 2.3048,
      "step": 69544
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1226463317871094,
      "learning_rate": 4.4296679894498594e-07,
      "loss": 2.4096,
      "step": 69545
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0919245481491089,
      "learning_rate": 4.4284561985532547e-07,
      "loss": 2.1654,
      "step": 69546
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1498525142669678,
      "learning_rate": 4.42724456967637e-07,
      "loss": 2.6212,
      "step": 69547
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0019205808639526,
      "learning_rate": 4.42603310282127e-07,
      "loss": 2.2772,
      "step": 69548
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.9614061713218689,
      "learning_rate": 4.4248217979899865e-07,
      "loss": 2.2936,
      "step": 69549
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0901553630828857,
      "learning_rate": 4.423610655184596e-07,
      "loss": 2.1033,
      "step": 69550
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0989844799041748,
      "learning_rate": 4.4223996744071517e-07,
      "loss": 2.1458,
      "step": 69551
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1466506719589233,
      "learning_rate": 4.421188855659686e-07,
      "loss": 2.4074,
      "step": 69552
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0853140354156494,
      "learning_rate": 4.4199781989442745e-07,
      "loss": 2.2471,
      "step": 69553
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.2064520120620728,
      "learning_rate": 4.4187677042629497e-07,
      "loss": 2.089,
      "step": 69554
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0032213926315308,
      "learning_rate": 4.417557371617776e-07,
      "loss": 2.2931,
      "step": 69555
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.9911432266235352,
      "learning_rate": 4.4163472010107955e-07,
      "loss": 2.251,
      "step": 69556
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.2234936952590942,
      "learning_rate": 4.4151371924440853e-07,
      "loss": 2.0846,
      "step": 69557
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0537519454956055,
      "learning_rate": 4.4139273459196663e-07,
      "loss": 2.2481,
      "step": 69558
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.203728199005127,
      "learning_rate": 4.412717661439614e-07,
      "loss": 2.5575,
      "step": 69559
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0717072486877441,
      "learning_rate": 4.411508139005949e-07,
      "loss": 2.2021,
      "step": 69560
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0650049448013306,
      "learning_rate": 4.410298778620747e-07,
      "loss": 2.3273,
      "step": 69561
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.4392353296279907,
      "learning_rate": 4.409089580286041e-07,
      "loss": 2.2024,
      "step": 69562
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0425913333892822,
      "learning_rate": 4.407880544003906e-07,
      "loss": 2.1494,
      "step": 69563
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1736012697219849,
      "learning_rate": 4.4066716697763635e-07,
      "loss": 2.4032,
      "step": 69564
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1355208158493042,
      "learning_rate": 4.4054629576054884e-07,
      "loss": 2.3608,
      "step": 69565
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0242096185684204,
      "learning_rate": 4.4042544074933025e-07,
      "loss": 2.2023,
      "step": 69566
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.990146815776825,
      "learning_rate": 4.40304601944187e-07,
      "loss": 2.457,
      "step": 69567
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0511775016784668,
      "learning_rate": 4.4018377934532454e-07,
      "loss": 2.6052,
      "step": 69568
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.044105052947998,
      "learning_rate": 4.400629729529471e-07,
      "loss": 2.2375,
      "step": 69569
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0588237047195435,
      "learning_rate": 4.399421827672579e-07,
      "loss": 2.4095,
      "step": 69570
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.2171903848648071,
      "learning_rate": 4.398214087884645e-07,
      "loss": 2.4121,
      "step": 69571
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0409119129180908,
      "learning_rate": 4.397006510167712e-07,
      "loss": 2.4047,
      "step": 69572
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1316486597061157,
      "learning_rate": 4.3957990945238114e-07,
      "loss": 2.2446,
      "step": 69573
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0415557622909546,
      "learning_rate": 4.3945918409549873e-07,
      "loss": 2.4155,
      "step": 69574
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1777006387710571,
      "learning_rate": 4.393384749463303e-07,
      "loss": 2.3707,
      "step": 69575
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0716480016708374,
      "learning_rate": 4.3921778200507917e-07,
      "loss": 2.324,
      "step": 69576
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0890274047851562,
      "learning_rate": 4.390971052719517e-07,
      "loss": 2.5216,
      "step": 69577
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1411651372909546,
      "learning_rate": 4.389764447471501e-07,
      "loss": 2.4668,
      "step": 69578
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0524928569793701,
      "learning_rate": 4.388558004308807e-07,
      "loss": 2.3485,
      "step": 69579
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1437351703643799,
      "learning_rate": 4.3873517232334904e-07,
      "loss": 2.2451,
      "step": 69580
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0541366338729858,
      "learning_rate": 4.386145604247549e-07,
      "loss": 2.2925,
      "step": 69581
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.2027093172073364,
      "learning_rate": 4.384939647353081e-07,
      "loss": 2.3023,
      "step": 69582
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1338505744934082,
      "learning_rate": 4.383733852552097e-07,
      "loss": 2.4684,
      "step": 69583
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1119595766067505,
      "learning_rate": 4.3825282198466713e-07,
      "loss": 2.368,
      "step": 69584
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0777724981307983,
      "learning_rate": 4.3813227492388256e-07,
      "loss": 2.2383,
      "step": 69585
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.160908818244934,
      "learning_rate": 4.3801174407306026e-07,
      "loss": 2.4612,
      "step": 69586
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.2188260555267334,
      "learning_rate": 4.3789122943240447e-07,
      "loss": 2.2602,
      "step": 69587
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1223536729812622,
      "learning_rate": 4.3777073100212064e-07,
      "loss": 2.3405,
      "step": 69588
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0553462505340576,
      "learning_rate": 4.376502487824119e-07,
      "loss": 2.2147,
      "step": 69589
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.098684310913086,
      "learning_rate": 4.375297827734837e-07,
      "loss": 2.4058,
      "step": 69590
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.08539617061615,
      "learning_rate": 4.3740933297553913e-07,
      "loss": 2.2294,
      "step": 69591
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0654594898223877,
      "learning_rate": 4.372888993887836e-07,
      "loss": 2.3863,
      "step": 69592
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0599650144577026,
      "learning_rate": 4.371684820134192e-07,
      "loss": 2.3681,
      "step": 69593
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.068688988685608,
      "learning_rate": 4.3704808084965354e-07,
      "loss": 2.3546,
      "step": 69594
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1254390478134155,
      "learning_rate": 4.3692769589768646e-07,
      "loss": 2.16,
      "step": 69595
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.154716968536377,
      "learning_rate": 4.368073271577256e-07,
      "loss": 2.3722,
      "step": 69596
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1956746578216553,
      "learning_rate": 4.366869746299729e-07,
      "loss": 2.4235,
      "step": 69597
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1108509302139282,
      "learning_rate": 4.3656663831463497e-07,
      "loss": 2.343,
      "step": 69598
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0455288887023926,
      "learning_rate": 4.364463182119127e-07,
      "loss": 2.2134,
      "step": 69599
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.208540916442871,
      "learning_rate": 4.3632601432201163e-07,
      "loss": 2.2956,
      "step": 69600
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0317680835723877,
      "learning_rate": 4.362057266451347e-07,
      "loss": 2.291,
      "step": 69601
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0528653860092163,
      "learning_rate": 4.3608545518148746e-07,
      "loss": 2.2502,
      "step": 69602
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1347734928131104,
      "learning_rate": 4.3596519993127086e-07,
      "loss": 2.2132,
      "step": 69603
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1398051977157593,
      "learning_rate": 4.3584496089469354e-07,
      "loss": 2.3657,
      "step": 69604
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0412752628326416,
      "learning_rate": 4.3572473807195425e-07,
      "loss": 2.2812,
      "step": 69605
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0537863969802856,
      "learning_rate": 4.3560453146326067e-07,
      "loss": 2.2337,
      "step": 69606
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1324766874313354,
      "learning_rate": 4.3548434106881587e-07,
      "loss": 2.2824,
      "step": 69607
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1383235454559326,
      "learning_rate": 4.3536416688881976e-07,
      "loss": 2.3231,
      "step": 69608
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1004761457443237,
      "learning_rate": 4.352440089234822e-07,
      "loss": 2.2185,
      "step": 69609
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.157757043838501,
      "learning_rate": 4.351238671730018e-07,
      "loss": 2.6173,
      "step": 69610
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0810407400131226,
      "learning_rate": 4.350037416375852e-07,
      "loss": 2.4253,
      "step": 69611
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0289312601089478,
      "learning_rate": 4.348836323174355e-07,
      "loss": 2.279,
      "step": 69612
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0483521223068237,
      "learning_rate": 4.347635392127558e-07,
      "loss": 2.4865,
      "step": 69613
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.2164485454559326,
      "learning_rate": 4.346434623237483e-07,
      "loss": 2.486,
      "step": 69614
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.9845312237739563,
      "learning_rate": 4.3452340165061946e-07,
      "loss": 2.4579,
      "step": 69615
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1412793397903442,
      "learning_rate": 4.344033571935702e-07,
      "loss": 2.502,
      "step": 69616
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1460363864898682,
      "learning_rate": 4.34283328952807e-07,
      "loss": 2.431,
      "step": 69617
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.232061505317688,
      "learning_rate": 4.341633169285298e-07,
      "loss": 2.3059,
      "step": 69618
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1037663221359253,
      "learning_rate": 4.3404332112094494e-07,
      "loss": 2.3539,
      "step": 69619
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0619850158691406,
      "learning_rate": 4.339233415302535e-07,
      "loss": 2.4848,
      "step": 69620
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1233853101730347,
      "learning_rate": 4.3380337815666087e-07,
      "loss": 2.52,
      "step": 69621
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.127299189567566,
      "learning_rate": 4.3368343100036905e-07,
      "loss": 2.3212,
      "step": 69622
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.3768813610076904,
      "learning_rate": 4.3356350006158344e-07,
      "loss": 2.3536,
      "step": 69623
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5956299304962158,
      "learning_rate": 4.33443585340505e-07,
      "loss": 2.1933,
      "step": 69624
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0123991966247559,
      "learning_rate": 4.333236868373392e-07,
      "loss": 2.3977,
      "step": 69625
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.2375812530517578,
      "learning_rate": 4.3320380455228684e-07,
      "loss": 2.3147,
      "step": 69626
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0205981731414795,
      "learning_rate": 4.330839384855523e-07,
      "loss": 2.2968,
      "step": 69627
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5847833156585693,
      "learning_rate": 4.3296408863733876e-07,
      "loss": 2.3639,
      "step": 69628
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1243510246276855,
      "learning_rate": 4.3284425500784933e-07,
      "loss": 2.2482,
      "step": 69629
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0454801321029663,
      "learning_rate": 4.327244375972872e-07,
      "loss": 2.252,
      "step": 69630
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0239397287368774,
      "learning_rate": 4.326046364058567e-07,
      "loss": 2.193,
      "step": 69631
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0393348932266235,
      "learning_rate": 4.324848514337576e-07,
      "loss": 2.2488,
      "step": 69632
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.2333818674087524,
      "learning_rate": 4.323650826811976e-07,
      "loss": 2.3234,
      "step": 69633
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1140391826629639,
      "learning_rate": 4.322453301483753e-07,
      "loss": 2.4561,
      "step": 69634
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.110270380973816,
      "learning_rate": 4.3212559383549736e-07,
      "loss": 2.2168,
      "step": 69635
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.225381851196289,
      "learning_rate": 4.320058737427657e-07,
      "loss": 2.4801,
      "step": 69636
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.3145920038223267,
      "learning_rate": 4.3188616987038026e-07,
      "loss": 2.3408,
      "step": 69637
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.040399193763733,
      "learning_rate": 4.3176648221854853e-07,
      "loss": 2.3131,
      "step": 69638
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.040706992149353,
      "learning_rate": 4.316468107874716e-07,
      "loss": 2.1892,
      "step": 69639
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.058603048324585,
      "learning_rate": 4.3152715557735037e-07,
      "loss": 2.0986,
      "step": 69640
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.3180415630340576,
      "learning_rate": 4.314075165883902e-07,
      "loss": 2.4901,
      "step": 69641
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1066257953643799,
      "learning_rate": 4.3128789382079316e-07,
      "loss": 2.3872,
      "step": 69642
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.075060248374939,
      "learning_rate": 4.3116828727476134e-07,
      "loss": 2.4172,
      "step": 69643
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1414958238601685,
      "learning_rate": 4.31048696950499e-07,
      "loss": 2.2257,
      "step": 69644
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1776617765426636,
      "learning_rate": 4.30929122848206e-07,
      "loss": 2.2919,
      "step": 69645
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1093031167984009,
      "learning_rate": 4.308095649680899e-07,
      "loss": 2.5031,
      "step": 69646
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.3363142013549805,
      "learning_rate": 4.3069002331034835e-07,
      "loss": 2.1869,
      "step": 69647
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0239931344985962,
      "learning_rate": 4.305704978751868e-07,
      "loss": 2.5534,
      "step": 69648
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1153813600540161,
      "learning_rate": 4.3045098866280723e-07,
      "loss": 2.3933,
      "step": 69649
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1277551651000977,
      "learning_rate": 4.3033149567341283e-07,
      "loss": 2.4323,
      "step": 69650
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1548309326171875,
      "learning_rate": 4.3021201890720564e-07,
      "loss": 2.4487,
      "step": 69651
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0949198007583618,
      "learning_rate": 4.300925583643889e-07,
      "loss": 2.2225,
      "step": 69652
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0535812377929688,
      "learning_rate": 4.299731140451624e-07,
      "loss": 2.1972,
      "step": 69653
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1194721460342407,
      "learning_rate": 4.2985368594973265e-07,
      "loss": 2.2148,
      "step": 69654
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0699127912521362,
      "learning_rate": 4.2973427407829837e-07,
      "loss": 2.1804,
      "step": 69655
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0844731330871582,
      "learning_rate": 4.2961487843106497e-07,
      "loss": 2.3109,
      "step": 69656
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0198003053665161,
      "learning_rate": 4.2949549900823227e-07,
      "loss": 2.5662,
      "step": 69657
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0969700813293457,
      "learning_rate": 4.293761358100057e-07,
      "loss": 2.501,
      "step": 69658
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0729271173477173,
      "learning_rate": 4.292567888365851e-07,
      "loss": 2.6269,
      "step": 69659
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0637166500091553,
      "learning_rate": 4.291374580881735e-07,
      "loss": 2.4121,
      "step": 69660
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0596214532852173,
      "learning_rate": 4.290181435649732e-07,
      "loss": 2.5429,
      "step": 69661
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0344421863555908,
      "learning_rate": 4.2889884526718715e-07,
      "loss": 2.275,
      "step": 69662
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.117782473564148,
      "learning_rate": 4.287795631950176e-07,
      "loss": 2.448,
      "step": 69663
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.15789794921875,
      "learning_rate": 4.2866029734866644e-07,
      "loss": 2.2293,
      "step": 69664
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0610181093215942,
      "learning_rate": 4.285410477283336e-07,
      "loss": 2.4577,
      "step": 69665
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0439807176589966,
      "learning_rate": 4.2842181433422447e-07,
      "loss": 2.3339,
      "step": 69666
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5202012062072754,
      "learning_rate": 4.283025971665389e-07,
      "loss": 2.2281,
      "step": 69667
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0371955633163452,
      "learning_rate": 4.281833962254811e-07,
      "loss": 2.334,
      "step": 69668
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.060380220413208,
      "learning_rate": 4.280642115112532e-07,
      "loss": 2.3752,
      "step": 69669
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.04319429397583,
      "learning_rate": 4.279450430240539e-07,
      "loss": 2.185,
      "step": 69670
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.212136149406433,
      "learning_rate": 4.2782589076408863e-07,
      "loss": 2.3657,
      "step": 69671
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.072345495223999,
      "learning_rate": 4.277067547315572e-07,
      "loss": 2.0975,
      "step": 69672
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0925514698028564,
      "learning_rate": 4.2758763492666386e-07,
      "loss": 2.0777,
      "step": 69673
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.190650224685669,
      "learning_rate": 4.2746853134960853e-07,
      "loss": 2.3935,
      "step": 69674
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0328965187072754,
      "learning_rate": 4.2734944400059543e-07,
      "loss": 2.2974,
      "step": 69675
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.143700361251831,
      "learning_rate": 4.2723037287982325e-07,
      "loss": 2.3973,
      "step": 69676
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.2517756223678589,
      "learning_rate": 4.271113179874964e-07,
      "loss": 2.4952,
      "step": 69677
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0653079748153687,
      "learning_rate": 4.269922793238157e-07,
      "loss": 2.1384,
      "step": 69678
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1130192279815674,
      "learning_rate": 4.268732568889833e-07,
      "loss": 2.3052,
      "step": 69679
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.208983302116394,
      "learning_rate": 4.267542506832001e-07,
      "loss": 2.1735,
      "step": 69680
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.075906753540039,
      "learning_rate": 4.266352607066693e-07,
      "loss": 2.2642,
      "step": 69681
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.9831754565238953,
      "learning_rate": 4.2651628695959067e-07,
      "loss": 2.2307,
      "step": 69682
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.2170222997665405,
      "learning_rate": 4.263973294421675e-07,
      "loss": 2.3231,
      "step": 69683
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1594008207321167,
      "learning_rate": 4.2627838815460067e-07,
      "loss": 2.4771,
      "step": 69684
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1313574314117432,
      "learning_rate": 4.2615946309709333e-07,
      "loss": 2.2949,
      "step": 69685
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.023725986480713,
      "learning_rate": 4.2604055426984423e-07,
      "loss": 2.3155,
      "step": 69686
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.000776767730713,
      "learning_rate": 4.259216616730588e-07,
      "loss": 2.2746,
      "step": 69687
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0459704399108887,
      "learning_rate": 4.258027853069335e-07,
      "loss": 2.2894,
      "step": 69688
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1355805397033691,
      "learning_rate": 4.25683925171676e-07,
      "loss": 2.2741,
      "step": 69689
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0067121982574463,
      "learning_rate": 4.2556508126748164e-07,
      "loss": 2.4223,
      "step": 69690
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.957951009273529,
      "learning_rate": 4.2544625359455916e-07,
      "loss": 2.355,
      "step": 69691
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.022200584411621,
      "learning_rate": 4.2532744215310173e-07,
      "loss": 2.4217,
      "step": 69692
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0619654655456543,
      "learning_rate": 4.25208646943317e-07,
      "loss": 2.2437,
      "step": 69693
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1381670236587524,
      "learning_rate": 4.250898679654014e-07,
      "loss": 2.2838,
      "step": 69694
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.967686653137207,
      "learning_rate": 4.2497110521956154e-07,
      "loss": 2.3696,
      "step": 69695
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0922712087631226,
      "learning_rate": 4.2485235870599495e-07,
      "loss": 2.3923,
      "step": 69696
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0521010160446167,
      "learning_rate": 4.2473362842490486e-07,
      "loss": 2.3309,
      "step": 69697
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0233999490737915,
      "learning_rate": 4.2461491437649214e-07,
      "loss": 2.2903,
      "step": 69698
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0355339050292969,
      "learning_rate": 4.244962165609556e-07,
      "loss": 2.2691,
      "step": 69699
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0949443578720093,
      "learning_rate": 4.2437753497850066e-07,
      "loss": 2.2879,
      "step": 69700
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0088258981704712,
      "learning_rate": 4.2425886962932593e-07,
      "loss": 2.2356,
      "step": 69701
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1070011854171753,
      "learning_rate": 4.2414022051363357e-07,
      "loss": 2.3593,
      "step": 69702
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.15573251247406,
      "learning_rate": 4.2402158763162336e-07,
      "loss": 2.5055,
      "step": 69703
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1468617916107178,
      "learning_rate": 4.2390297098349965e-07,
      "loss": 2.2753,
      "step": 69704
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.097733736038208,
      "learning_rate": 4.2378437056945884e-07,
      "loss": 2.2129,
      "step": 69705
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0865538120269775,
      "learning_rate": 4.2366578638970535e-07,
      "loss": 2.2524,
      "step": 69706
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0411388874053955,
      "learning_rate": 4.235472184444389e-07,
      "loss": 2.332,
      "step": 69707
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0038827657699585,
      "learning_rate": 4.234286667338616e-07,
      "loss": 2.2459,
      "step": 69708
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.2177996635437012,
      "learning_rate": 4.233101312581722e-07,
      "loss": 2.2203,
      "step": 69709
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.9444480538368225,
      "learning_rate": 4.2319161201757497e-07,
      "loss": 2.4832,
      "step": 69710
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0810819864273071,
      "learning_rate": 4.230731090122664e-07,
      "loss": 2.3171,
      "step": 69711
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1391738653182983,
      "learning_rate": 4.2295462224245185e-07,
      "loss": 2.2239,
      "step": 69712
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0858768224716187,
      "learning_rate": 4.2283615170833014e-07,
      "loss": 2.2746,
      "step": 69713
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1312716007232666,
      "learning_rate": 4.2271769741010214e-07,
      "loss": 2.5114,
      "step": 69714
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.643831729888916,
      "learning_rate": 4.2259925934796775e-07,
      "loss": 2.4014,
      "step": 69715
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1613554954528809,
      "learning_rate": 4.224808375221312e-07,
      "loss": 2.432,
      "step": 69716
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1695983409881592,
      "learning_rate": 4.223624319327901e-07,
      "loss": 2.2265,
      "step": 69717
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0852739810943604,
      "learning_rate": 4.222440425801455e-07,
      "loss": 2.2716,
      "step": 69718
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.099465012550354,
      "learning_rate": 4.2212566946439714e-07,
      "loss": 2.0121,
      "step": 69719
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.123607873916626,
      "learning_rate": 4.220073125857493e-07,
      "loss": 2.185,
      "step": 69720
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0318351984024048,
      "learning_rate": 4.218889719443986e-07,
      "loss": 2.2484,
      "step": 69721
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1130493879318237,
      "learning_rate": 4.217706475405492e-07,
      "loss": 2.2559,
      "step": 69722
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5425944328308105,
      "learning_rate": 4.2165233937439765e-07,
      "loss": 2.1243,
      "step": 69723
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0606707334518433,
      "learning_rate": 4.2153404744614936e-07,
      "loss": 2.2113,
      "step": 69724
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1990537643432617,
      "learning_rate": 4.214157717560019e-07,
      "loss": 2.309,
      "step": 69725
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0927621126174927,
      "learning_rate": 4.2129751230415404e-07,
      "loss": 2.2622,
      "step": 69726
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1333333253860474,
      "learning_rate": 4.211792690908112e-07,
      "loss": 2.3524,
      "step": 69727
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.128332495689392,
      "learning_rate": 4.210610421161687e-07,
      "loss": 2.456,
      "step": 69728
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.2903193235397339,
      "learning_rate": 4.209428313804309e-07,
      "loss": 2.5547,
      "step": 69729
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1642060279846191,
      "learning_rate": 4.2082463688379647e-07,
      "loss": 2.3617,
      "step": 69730
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0054749250411987,
      "learning_rate": 4.2070645862646644e-07,
      "loss": 2.2416,
      "step": 69731
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.2406160831451416,
      "learning_rate": 4.2058829660863944e-07,
      "loss": 2.3239,
      "step": 69732
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0805692672729492,
      "learning_rate": 4.2047015083051757e-07,
      "loss": 2.1087,
      "step": 69733
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1477915048599243,
      "learning_rate": 4.203520212922996e-07,
      "loss": 2.1129,
      "step": 69734
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0043492317199707,
      "learning_rate": 4.202339079941886e-07,
      "loss": 2.2751,
      "step": 69735
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.2147467136383057,
      "learning_rate": 4.201158109363812e-07,
      "loss": 2.4452,
      "step": 69736
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.2143867015838623,
      "learning_rate": 4.199977301190805e-07,
      "loss": 2.2408,
      "step": 69737
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0886062383651733,
      "learning_rate": 4.198796655424842e-07,
      "loss": 2.5092,
      "step": 69738
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1383556127548218,
      "learning_rate": 4.197616172067953e-07,
      "loss": 2.4075,
      "step": 69739
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0159860849380493,
      "learning_rate": 4.196435851122116e-07,
      "loss": 2.2835,
      "step": 69740
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0666652917861938,
      "learning_rate": 4.1952556925893397e-07,
      "loss": 2.4721,
      "step": 69741
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1142847537994385,
      "learning_rate": 4.194075696471622e-07,
      "loss": 2.2885,
      "step": 69742
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.2042315006256104,
      "learning_rate": 4.192895862770996e-07,
      "loss": 2.2201,
      "step": 69743
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.01948881149292,
      "learning_rate": 4.191716191489403e-07,
      "loss": 2.464,
      "step": 69744
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1890279054641724,
      "learning_rate": 4.1905366826288764e-07,
      "loss": 2.2003,
      "step": 69745
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0659865140914917,
      "learning_rate": 4.189357336191413e-07,
      "loss": 2.2574,
      "step": 69746
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1077574491500854,
      "learning_rate": 4.1881781521790124e-07,
      "loss": 2.3119,
      "step": 69747
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0435891151428223,
      "learning_rate": 4.1869991305936606e-07,
      "loss": 2.0903,
      "step": 69748
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.158587098121643,
      "learning_rate": 4.185820271437391e-07,
      "loss": 2.4578,
      "step": 69749
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0137776136398315,
      "learning_rate": 4.184641574712156e-07,
      "loss": 2.1839,
      "step": 69750
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.219964623451233,
      "learning_rate": 4.1834630404199883e-07,
      "loss": 2.3433,
      "step": 69751
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1613943576812744,
      "learning_rate": 4.1822846685628636e-07,
      "loss": 2.5521,
      "step": 69752
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1522998809814453,
      "learning_rate": 4.181106459142803e-07,
      "loss": 2.0668,
      "step": 69753
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0346952676773071,
      "learning_rate": 4.1799284121617936e-07,
      "loss": 2.3131,
      "step": 69754
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0762814283370972,
      "learning_rate": 4.178750527621822e-07,
      "loss": 2.2613,
      "step": 69755
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.112555742263794,
      "learning_rate": 4.177572805524899e-07,
      "loss": 2.2917,
      "step": 69756
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.9967369437217712,
      "learning_rate": 4.1763952458730103e-07,
      "loss": 2.33,
      "step": 69757
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0136430263519287,
      "learning_rate": 4.175217848668156e-07,
      "loss": 2.5104,
      "step": 69758
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5031033754348755,
      "learning_rate": 4.1740406139123333e-07,
      "loss": 2.5001,
      "step": 69759
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1355915069580078,
      "learning_rate": 4.1728635416075416e-07,
      "loss": 2.508,
      "step": 69760
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0906670093536377,
      "learning_rate": 4.171686631755767e-07,
      "loss": 2.3993,
      "step": 69761
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0088878870010376,
      "learning_rate": 4.17050988435902e-07,
      "loss": 2.3981,
      "step": 69762
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.077201247215271,
      "learning_rate": 4.169333299419265e-07,
      "loss": 2.3248,
      "step": 69763
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0631548166275024,
      "learning_rate": 4.168156876938534e-07,
      "loss": 2.3639,
      "step": 69764
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.098963975906372,
      "learning_rate": 4.166980616918803e-07,
      "loss": 2.5618,
      "step": 69765
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0399277210235596,
      "learning_rate": 4.1658045193620713e-07,
      "loss": 2.2372,
      "step": 69766
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1170554161071777,
      "learning_rate": 4.164628584270314e-07,
      "loss": 2.3447,
      "step": 69767
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1637285947799683,
      "learning_rate": 4.163452811645552e-07,
      "loss": 2.2184,
      "step": 69768
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0259108543395996,
      "learning_rate": 4.1622772014897616e-07,
      "loss": 2.3418,
      "step": 69769
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.093895673751831,
      "learning_rate": 4.1611017538049523e-07,
      "loss": 2.4879,
      "step": 69770
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0535871982574463,
      "learning_rate": 4.1599264685930897e-07,
      "loss": 2.1418,
      "step": 69771
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.2474579811096191,
      "learning_rate": 4.1587513458561933e-07,
      "loss": 2.4159,
      "step": 69772
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0954440832138062,
      "learning_rate": 4.1575763855962294e-07,
      "loss": 2.1202,
      "step": 69773
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0921785831451416,
      "learning_rate": 4.1564015878152175e-07,
      "loss": 2.4949,
      "step": 69774
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.9688221216201782,
      "learning_rate": 4.1552269525151234e-07,
      "loss": 2.3648,
      "step": 69775
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0813782215118408,
      "learning_rate": 4.1540524796979677e-07,
      "loss": 2.2898,
      "step": 69776
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0315823554992676,
      "learning_rate": 4.152878169365704e-07,
      "loss": 2.0546,
      "step": 69777
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1236220598220825,
      "learning_rate": 4.151704021520364e-07,
      "loss": 2.3899,
      "step": 69778
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1507391929626465,
      "learning_rate": 4.150530036163891e-07,
      "loss": 2.1025,
      "step": 69779
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.2008405923843384,
      "learning_rate": 4.149356213298328e-07,
      "loss": 2.3319,
      "step": 69780
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0218058824539185,
      "learning_rate": 4.1481825529256394e-07,
      "loss": 2.2072,
      "step": 69781
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0933715105056763,
      "learning_rate": 4.1470090550477904e-07,
      "loss": 2.2015,
      "step": 69782
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.2220714092254639,
      "learning_rate": 4.145835719666813e-07,
      "loss": 2.3129,
      "step": 69783
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.143110752105713,
      "learning_rate": 4.1446625467846836e-07,
      "loss": 2.4394,
      "step": 69784
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1779950857162476,
      "learning_rate": 4.1434895364033665e-07,
      "loss": 2.429,
      "step": 69785
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0577564239501953,
      "learning_rate": 4.1423166885248836e-07,
      "loss": 2.2252,
      "step": 69786
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.022369623184204,
      "learning_rate": 4.1411440031512095e-07,
      "loss": 2.1849,
      "step": 69787
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0513635873794556,
      "learning_rate": 4.1399714802843104e-07,
      "loss": 2.4274,
      "step": 69788
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1782567501068115,
      "learning_rate": 4.138799119926218e-07,
      "loss": 2.4352,
      "step": 69789
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1653655767440796,
      "learning_rate": 4.1376269220788745e-07,
      "loss": 2.3878,
      "step": 69790
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1583932638168335,
      "learning_rate": 4.1364548867443124e-07,
      "loss": 2.1653,
      "step": 69791
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.2006754875183105,
      "learning_rate": 4.1352830139244736e-07,
      "loss": 2.3576,
      "step": 69792
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.2117018699645996,
      "learning_rate": 4.13411130362138e-07,
      "loss": 2.4356,
      "step": 69793
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.153494119644165,
      "learning_rate": 4.1329397558369955e-07,
      "loss": 2.3104,
      "step": 69794
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.105993628501892,
      "learning_rate": 4.13176837057333e-07,
      "loss": 2.2346,
      "step": 69795
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.109472393989563,
      "learning_rate": 4.1305971478323494e-07,
      "loss": 2.3532,
      "step": 69796
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.029444694519043,
      "learning_rate": 4.129426087616051e-07,
      "loss": 2.1167,
      "step": 69797
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.2510790824890137,
      "learning_rate": 4.128255189926389e-07,
      "loss": 2.2446,
      "step": 69798
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.9874136447906494,
      "learning_rate": 4.1270844547653954e-07,
      "loss": 2.3129,
      "step": 69799
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0323759317398071,
      "learning_rate": 4.125913882135024e-07,
      "loss": 2.1228,
      "step": 69800
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0928932428359985,
      "learning_rate": 4.124743472037274e-07,
      "loss": 2.2479,
      "step": 69801
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.4336941242218018,
      "learning_rate": 4.123573224474109e-07,
      "loss": 2.493,
      "step": 69802
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.989493727684021,
      "learning_rate": 4.1224031394475394e-07,
      "loss": 2.5263,
      "step": 69803
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1073217391967773,
      "learning_rate": 4.121233216959519e-07,
      "loss": 2.3676,
      "step": 69804
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0150220394134521,
      "learning_rate": 4.120063457012069e-07,
      "loss": 2.1793,
      "step": 69805
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0947374105453491,
      "learning_rate": 4.1188938596071426e-07,
      "loss": 2.3158,
      "step": 69806
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0168267488479614,
      "learning_rate": 4.1177244247467383e-07,
      "loss": 2.4455,
      "step": 69807
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0582189559936523,
      "learning_rate": 4.1165551524328216e-07,
      "loss": 2.1782,
      "step": 69808
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.9954875111579895,
      "learning_rate": 4.115386042667413e-07,
      "loss": 2.1485,
      "step": 69809
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.160762906074524,
      "learning_rate": 4.114217095452433e-07,
      "loss": 2.2557,
      "step": 69810
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0298439264297485,
      "learning_rate": 4.113048310789924e-07,
      "loss": 2.4208,
      "step": 69811
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1926437616348267,
      "learning_rate": 4.111879688681819e-07,
      "loss": 2.2685,
      "step": 69812
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0527740716934204,
      "learning_rate": 4.110711229130138e-07,
      "loss": 2.3467,
      "step": 69813
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.516904354095459,
      "learning_rate": 4.109542932136823e-07,
      "loss": 2.3069,
      "step": 69814
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1105393171310425,
      "learning_rate": 4.1083747977038955e-07,
      "loss": 2.4865,
      "step": 69815
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.4660526514053345,
      "learning_rate": 4.107206825833321e-07,
      "loss": 2.314,
      "step": 69816
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1779710054397583,
      "learning_rate": 4.1060390165270527e-07,
      "loss": 2.2132,
      "step": 69817
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0322463512420654,
      "learning_rate": 4.104871369787111e-07,
      "loss": 2.444,
      "step": 69818
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0873783826828003,
      "learning_rate": 4.10370388561544e-07,
      "loss": 2.2655,
      "step": 69819
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0893346071243286,
      "learning_rate": 4.1025365640140593e-07,
      "loss": 2.1728,
      "step": 69820
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1665875911712646,
      "learning_rate": 4.1013694049849007e-07,
      "loss": 2.3004,
      "step": 69821
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1239027976989746,
      "learning_rate": 4.1002024085299853e-07,
      "loss": 2.2885,
      "step": 69822
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1066830158233643,
      "learning_rate": 4.0990355746512667e-07,
      "loss": 2.2266,
      "step": 69823
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.2173054218292236,
      "learning_rate": 4.097868903350732e-07,
      "loss": 2.4356,
      "step": 69824
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0462512969970703,
      "learning_rate": 4.0967023946303475e-07,
      "loss": 2.242,
      "step": 69825
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0883290767669678,
      "learning_rate": 4.0955360484921105e-07,
      "loss": 2.5319,
      "step": 69826
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.004394769668579,
      "learning_rate": 4.0943698649379746e-07,
      "loss": 2.343,
      "step": 69827
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0559637546539307,
      "learning_rate": 4.0932038439699394e-07,
      "loss": 2.3973,
      "step": 69828
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0539215803146362,
      "learning_rate": 4.0920379855899584e-07,
      "loss": 2.3564,
      "step": 69829
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1227401494979858,
      "learning_rate": 4.0908722898000295e-07,
      "loss": 2.2801,
      "step": 69830
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1868557929992676,
      "learning_rate": 4.089706756602119e-07,
      "loss": 2.3264,
      "step": 69831
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1945343017578125,
      "learning_rate": 4.0885413859982123e-07,
      "loss": 2.3027,
      "step": 69832
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1882468461990356,
      "learning_rate": 4.0873761779902654e-07,
      "loss": 2.3638,
      "step": 69833
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1909598112106323,
      "learning_rate": 4.0862111325802755e-07,
      "loss": 2.2272,
      "step": 69834
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1463124752044678,
      "learning_rate": 4.085046249770197e-07,
      "loss": 2.3602,
      "step": 69835
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.165242075920105,
      "learning_rate": 4.0838815295620394e-07,
      "loss": 2.361,
      "step": 69836
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0796856880187988,
      "learning_rate": 4.082716971957734e-07,
      "loss": 2.3554,
      "step": 69837
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.4238804578781128,
      "learning_rate": 4.08155257695928e-07,
      "loss": 2.1972,
      "step": 69838
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1329323053359985,
      "learning_rate": 4.080388344568631e-07,
      "loss": 2.2845,
      "step": 69839
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0576491355895996,
      "learning_rate": 4.0792242747877963e-07,
      "loss": 2.0586,
      "step": 69840
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.014054775238037,
      "learning_rate": 4.078060367618708e-07,
      "loss": 2.1371,
      "step": 69841
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.2255706787109375,
      "learning_rate": 4.076896623063376e-07,
      "loss": 2.591,
      "step": 69842
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1456815004348755,
      "learning_rate": 4.075733041123753e-07,
      "loss": 2.3408,
      "step": 69843
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0507959127426147,
      "learning_rate": 4.074569621801805e-07,
      "loss": 2.5005,
      "step": 69844
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1261423826217651,
      "learning_rate": 4.073406365099519e-07,
      "loss": 2.1622,
      "step": 69845
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0024707317352295,
      "learning_rate": 4.072243271018861e-07,
      "loss": 2.445,
      "step": 69846
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.066268801689148,
      "learning_rate": 4.0710803395618173e-07,
      "loss": 2.1018,
      "step": 69847
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.3523880243301392,
      "learning_rate": 4.069917570730331e-07,
      "loss": 2.4297,
      "step": 69848
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0883100032806396,
      "learning_rate": 4.0687549645264114e-07,
      "loss": 2.2821,
      "step": 69849
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0143046379089355,
      "learning_rate": 4.0675925209519794e-07,
      "loss": 2.2364,
      "step": 69850
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.074059247970581,
      "learning_rate": 4.066430240009045e-07,
      "loss": 2.3948,
      "step": 69851
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.084285020828247,
      "learning_rate": 4.0652681216995616e-07,
      "loss": 2.5072,
      "step": 69852
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1277717351913452,
      "learning_rate": 4.0641061660255167e-07,
      "loss": 2.1953,
      "step": 69853
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.2397016286849976,
      "learning_rate": 4.062944372988853e-07,
      "loss": 2.186,
      "step": 69854
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.06809663772583,
      "learning_rate": 4.061782742591558e-07,
      "loss": 2.5763,
      "step": 69855
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.066909670829773,
      "learning_rate": 4.060621274835597e-07,
      "loss": 2.1653,
      "step": 69856
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.066593050956726,
      "learning_rate": 4.0594599697229455e-07,
      "loss": 2.4914,
      "step": 69857
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0758329629898071,
      "learning_rate": 4.058298827255558e-07,
      "loss": 2.4445,
      "step": 69858
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.9879223108291626,
      "learning_rate": 4.0571378474354216e-07,
      "loss": 2.4452,
      "step": 69859
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0789512395858765,
      "learning_rate": 4.05597703026448e-07,
      "loss": 2.2483,
      "step": 69860
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0283561944961548,
      "learning_rate": 4.05481637574473e-07,
      "loss": 2.2764,
      "step": 69861
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0821590423583984,
      "learning_rate": 4.053655883878127e-07,
      "loss": 2.3041,
      "step": 69862
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0492446422576904,
      "learning_rate": 4.0524955546666357e-07,
      "loss": 2.1937,
      "step": 69863
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0203619003295898,
      "learning_rate": 4.05133538811221e-07,
      "loss": 2.3154,
      "step": 69864
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1108934879302979,
      "learning_rate": 4.050175384216848e-07,
      "loss": 2.3135,
      "step": 69865
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0102894306182861,
      "learning_rate": 4.049015542982471e-07,
      "loss": 2.1516,
      "step": 69866
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0352528095245361,
      "learning_rate": 4.0478558644110987e-07,
      "loss": 2.3627,
      "step": 69867
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.9446528553962708,
      "learning_rate": 4.0466963485046526e-07,
      "loss": 2.2152,
      "step": 69868
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.041251301765442,
      "learning_rate": 4.045536995265131e-07,
      "loss": 2.1883,
      "step": 69869
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0102849006652832,
      "learning_rate": 4.044377804694466e-07,
      "loss": 2.4403,
      "step": 69870
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.217251181602478,
      "learning_rate": 4.0432187767946663e-07,
      "loss": 2.3449,
      "step": 69871
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1510376930236816,
      "learning_rate": 4.042059911567664e-07,
      "loss": 2.2566,
      "step": 69872
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.050650715827942,
      "learning_rate": 4.040901209015424e-07,
      "loss": 2.2096,
      "step": 69873
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.040865421295166,
      "learning_rate": 4.0397426691399233e-07,
      "loss": 2.3789,
      "step": 69874
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1564054489135742,
      "learning_rate": 4.038584291943126e-07,
      "loss": 2.3043,
      "step": 69875
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1054465770721436,
      "learning_rate": 4.037426077426987e-07,
      "loss": 2.3001,
      "step": 69876
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0143954753875732,
      "learning_rate": 4.0362680255934706e-07,
      "loss": 2.5839,
      "step": 69877
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0175834894180298,
      "learning_rate": 4.0351101364445534e-07,
      "loss": 2.1198,
      "step": 69878
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1820367574691772,
      "learning_rate": 4.033952409982178e-07,
      "loss": 2.3343,
      "step": 69879
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.9539876580238342,
      "learning_rate": 4.032794846208321e-07,
      "loss": 2.2789,
      "step": 69880
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0739970207214355,
      "learning_rate": 4.031637445124936e-07,
      "loss": 2.2691,
      "step": 69881
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1451364755630493,
      "learning_rate": 4.030480206734e-07,
      "loss": 2.2121,
      "step": 69882
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0945597887039185,
      "learning_rate": 4.029323131037455e-07,
      "loss": 2.2652,
      "step": 69883
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0258861780166626,
      "learning_rate": 4.0281662180372773e-07,
      "loss": 2.4378,
      "step": 69884
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1139702796936035,
      "learning_rate": 4.027009467735421e-07,
      "loss": 2.4627,
      "step": 69885
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.2808715105056763,
      "learning_rate": 4.0258528801338515e-07,
      "loss": 2.1225,
      "step": 69886
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.9685285091400146,
      "learning_rate": 4.0246964552345224e-07,
      "loss": 2.2998,
      "step": 69887
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0985143184661865,
      "learning_rate": 4.023540193039421e-07,
      "loss": 2.4732,
      "step": 69888
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1118288040161133,
      "learning_rate": 4.0223840935504686e-07,
      "loss": 2.4115,
      "step": 69889
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.023637056350708,
      "learning_rate": 4.021228156769641e-07,
      "loss": 2.408,
      "step": 69890
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1875637769699097,
      "learning_rate": 4.020072382698903e-07,
      "loss": 2.284,
      "step": 69891
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.10247004032135,
      "learning_rate": 4.018916771340209e-07,
      "loss": 2.4497,
      "step": 69892
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.2688758373260498,
      "learning_rate": 4.017761322695513e-07,
      "loss": 2.1513,
      "step": 69893
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.2015293836593628,
      "learning_rate": 4.0166060367667903e-07,
      "loss": 2.2216,
      "step": 69894
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1045771837234497,
      "learning_rate": 4.015450913555974e-07,
      "loss": 2.0476,
      "step": 69895
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.058836579322815,
      "learning_rate": 4.0142959530650615e-07,
      "loss": 2.1658,
      "step": 69896
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0490249395370483,
      "learning_rate": 4.013141155295963e-07,
      "loss": 2.2942,
      "step": 69897
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0868114233016968,
      "learning_rate": 4.011986520250677e-07,
      "loss": 2.3114,
      "step": 69898
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1419832706451416,
      "learning_rate": 4.0108320479311344e-07,
      "loss": 2.1603,
      "step": 69899
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.317278504371643,
      "learning_rate": 4.0096777383393013e-07,
      "loss": 2.4172,
      "step": 69900
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1331183910369873,
      "learning_rate": 4.0085235914771426e-07,
      "loss": 2.579,
      "step": 69901
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.076727032661438,
      "learning_rate": 4.0073696073466117e-07,
      "loss": 2.0797,
      "step": 69902
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.043725609779358,
      "learning_rate": 4.006215785949641e-07,
      "loss": 2.6557,
      "step": 69903
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1324784755706787,
      "learning_rate": 4.0050621272882283e-07,
      "loss": 2.187,
      "step": 69904
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.050595760345459,
      "learning_rate": 4.003908631364295e-07,
      "loss": 2.5801,
      "step": 69905
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0620791912078857,
      "learning_rate": 4.002755298179806e-07,
      "loss": 2.4232,
      "step": 69906
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1515387296676636,
      "learning_rate": 4.0016021277367257e-07,
      "loss": 2.1597,
      "step": 69907
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.9910579323768616,
      "learning_rate": 4.000449120036987e-07,
      "loss": 2.0366,
      "step": 69908
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.092578649520874,
      "learning_rate": 3.999296275082587e-07,
      "loss": 2.2991,
      "step": 69909
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0034517049789429,
      "learning_rate": 3.9981435928754255e-07,
      "loss": 2.1813,
      "step": 69910
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0186597108840942,
      "learning_rate": 3.9969910734175e-07,
      "loss": 2.2437,
      "step": 69911
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1185023784637451,
      "learning_rate": 3.995838716710743e-07,
      "loss": 2.2958,
      "step": 69912
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0046496391296387,
      "learning_rate": 3.99468652275713e-07,
      "loss": 2.2838,
      "step": 69913
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1653860807418823,
      "learning_rate": 3.9935344915585703e-07,
      "loss": 2.3831,
      "step": 69914
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1451444625854492,
      "learning_rate": 3.992382623117075e-07,
      "loss": 2.2427,
      "step": 69915
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.096871018409729,
      "learning_rate": 3.991230917434552e-07,
      "loss": 2.4043,
      "step": 69916
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0397461652755737,
      "learning_rate": 3.990079374512967e-07,
      "loss": 2.3431,
      "step": 69917
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.3398088216781616,
      "learning_rate": 3.988927994354275e-07,
      "loss": 2.2224,
      "step": 69918
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1291368007659912,
      "learning_rate": 3.9877767769604283e-07,
      "loss": 2.383,
      "step": 69919
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1181432008743286,
      "learning_rate": 3.9866257223333593e-07,
      "loss": 2.2466,
      "step": 69920
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1584060192108154,
      "learning_rate": 3.985474830475056e-07,
      "loss": 2.2316,
      "step": 69921
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.995008111000061,
      "learning_rate": 3.984324101387438e-07,
      "loss": 2.2462,
      "step": 69922
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.2129364013671875,
      "learning_rate": 3.983173535072482e-07,
      "loss": 2.2263,
      "step": 69923
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1400690078735352,
      "learning_rate": 3.982023131532109e-07,
      "loss": 2.4241,
      "step": 69924
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0571837425231934,
      "learning_rate": 3.980872890768306e-07,
      "loss": 2.4715,
      "step": 69925
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0789527893066406,
      "learning_rate": 3.9797228127829826e-07,
      "loss": 2.2319,
      "step": 69926
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0546858310699463,
      "learning_rate": 3.978572897578115e-07,
      "loss": 2.1739,
      "step": 69927
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1091893911361694,
      "learning_rate": 3.9774231451556454e-07,
      "loss": 2.5396,
      "step": 69928
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0708298683166504,
      "learning_rate": 3.976273555517529e-07,
      "loss": 2.0507,
      "step": 69929
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1879063844680786,
      "learning_rate": 3.9751241286656974e-07,
      "loss": 2.2064,
      "step": 69930
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0952919721603394,
      "learning_rate": 3.9739748646021257e-07,
      "loss": 2.307,
      "step": 69931
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0678554773330688,
      "learning_rate": 3.972825763328725e-07,
      "loss": 2.3718,
      "step": 69932
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0164885520935059,
      "learning_rate": 3.971676824847481e-07,
      "loss": 2.3377,
      "step": 69933
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.126924753189087,
      "learning_rate": 3.970528049160327e-07,
      "loss": 2.4137,
      "step": 69934
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.05203378200531,
      "learning_rate": 3.969379436269194e-07,
      "loss": 2.5278,
      "step": 69935
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0938109159469604,
      "learning_rate": 3.968230986176058e-07,
      "loss": 2.4179,
      "step": 69936
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1802371740341187,
      "learning_rate": 3.96708269888284e-07,
      "loss": 2.4369,
      "step": 69937
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.9314022660255432,
      "learning_rate": 3.9659345743915167e-07,
      "loss": 2.426,
      "step": 69938
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0952272415161133,
      "learning_rate": 3.964786612703997e-07,
      "loss": 2.1443,
      "step": 69939
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1274192333221436,
      "learning_rate": 3.9636388138222683e-07,
      "loss": 2.3102,
      "step": 69940
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.112947702407837,
      "learning_rate": 3.9624911777482513e-07,
      "loss": 2.2179,
      "step": 69941
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1038511991500854,
      "learning_rate": 3.9613437044838887e-07,
      "loss": 2.4907,
      "step": 69942
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0551975965499878,
      "learning_rate": 3.960196394031124e-07,
      "loss": 2.2716,
      "step": 69943
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.2617720365524292,
      "learning_rate": 3.9590492463919214e-07,
      "loss": 2.3529,
      "step": 69944
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1460471153259277,
      "learning_rate": 3.9579022615682026e-07,
      "loss": 2.4464,
      "step": 69945
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.2044256925582886,
      "learning_rate": 3.9567554395619436e-07,
      "loss": 2.2796,
      "step": 69946
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.2253788709640503,
      "learning_rate": 3.955608780375042e-07,
      "loss": 2.1375,
      "step": 69947
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.135022521018982,
      "learning_rate": 3.954462284009497e-07,
      "loss": 2.1485,
      "step": 69948
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0228313207626343,
      "learning_rate": 3.953315950467207e-07,
      "loss": 2.3575,
      "step": 69949
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1319319009780884,
      "learning_rate": 3.952169779750137e-07,
      "loss": 2.4629,
      "step": 69950
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1268314123153687,
      "learning_rate": 3.9510237718602186e-07,
      "loss": 2.2539,
      "step": 69951
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0309703350067139,
      "learning_rate": 3.9498779267994167e-07,
      "loss": 2.1519,
      "step": 69952
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.9938918352127075,
      "learning_rate": 3.948732244569642e-07,
      "loss": 2.3681,
      "step": 69953
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0732663869857788,
      "learning_rate": 3.9475867251728807e-07,
      "loss": 2.4492,
      "step": 69954
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0700793266296387,
      "learning_rate": 3.94644136861102e-07,
      "loss": 2.3193,
      "step": 69955
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.2154096364974976,
      "learning_rate": 3.945296174886049e-07,
      "loss": 2.2316,
      "step": 69956
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.3498347997665405,
      "learning_rate": 3.9441511439998634e-07,
      "loss": 2.2175,
      "step": 69957
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.132224678993225,
      "learning_rate": 3.943006275954453e-07,
      "loss": 2.459,
      "step": 69958
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0017554759979248,
      "learning_rate": 3.9418615707517147e-07,
      "loss": 2.3329,
      "step": 69959
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1876678466796875,
      "learning_rate": 3.940717028393626e-07,
      "loss": 2.4974,
      "step": 69960
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0858525037765503,
      "learning_rate": 3.939572648882117e-07,
      "loss": 2.3134,
      "step": 69961
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0587536096572876,
      "learning_rate": 3.93842843221911e-07,
      "loss": 2.3527,
      "step": 69962
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.105342984199524,
      "learning_rate": 3.9372843784065586e-07,
      "loss": 2.2241,
      "step": 69963
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0131540298461914,
      "learning_rate": 3.936140487446394e-07,
      "loss": 2.3918,
      "step": 69964
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.12692391872406,
      "learning_rate": 3.9349967593405813e-07,
      "loss": 2.2539,
      "step": 69965
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.2412528991699219,
      "learning_rate": 3.9338531940910195e-07,
      "loss": 2.4199,
      "step": 69966
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0464746952056885,
      "learning_rate": 3.9327097916996847e-07,
      "loss": 2.3446,
      "step": 69967
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.148130178451538,
      "learning_rate": 3.9315665521684864e-07,
      "loss": 2.4082,
      "step": 69968
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1478327512741089,
      "learning_rate": 3.9304234754993786e-07,
      "loss": 2.2992,
      "step": 69969
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0839482545852661,
      "learning_rate": 3.929280561694293e-07,
      "loss": 2.2476,
      "step": 69970
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0696282386779785,
      "learning_rate": 3.9281378107551726e-07,
      "loss": 2.3739,
      "step": 69971
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.036102056503296,
      "learning_rate": 3.926995222683949e-07,
      "loss": 2.3232,
      "step": 69972
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.033056378364563,
      "learning_rate": 3.9258527974825654e-07,
      "loss": 2.3854,
      "step": 69973
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.9915542602539062,
      "learning_rate": 3.9247105351529427e-07,
      "loss": 2.4456,
      "step": 69974
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0455402135849,
      "learning_rate": 3.923568435697045e-07,
      "loss": 2.4039,
      "step": 69975
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0289974212646484,
      "learning_rate": 3.922426499116772e-07,
      "loss": 2.1778,
      "step": 69976
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.069785714149475,
      "learning_rate": 3.9212847254141093e-07,
      "loss": 2.2752,
      "step": 69977
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0209671258926392,
      "learning_rate": 3.9201431145909355e-07,
      "loss": 2.2483,
      "step": 69978
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1300246715545654,
      "learning_rate": 3.919001666649236e-07,
      "loss": 2.2409,
      "step": 69979
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0895048379898071,
      "learning_rate": 3.91786038159091e-07,
      "loss": 2.247,
      "step": 69980
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.063042163848877,
      "learning_rate": 3.916719259417923e-07,
      "loss": 2.407,
      "step": 69981
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1385303735733032,
      "learning_rate": 3.915578300132172e-07,
      "loss": 2.5467,
      "step": 69982
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0011382102966309,
      "learning_rate": 3.914437503735624e-07,
      "loss": 2.1747,
      "step": 69983
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0261002779006958,
      "learning_rate": 3.913296870230199e-07,
      "loss": 2.4183,
      "step": 69984
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0631437301635742,
      "learning_rate": 3.912156399617828e-07,
      "loss": 2.2752,
      "step": 69985
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0553704500198364,
      "learning_rate": 3.9110160919004546e-07,
      "loss": 2.3211,
      "step": 69986
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0103679895401,
      "learning_rate": 3.9098759470799994e-07,
      "loss": 2.2371,
      "step": 69987
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.9805592894554138,
      "learning_rate": 3.9087359651584055e-07,
      "loss": 2.2186,
      "step": 69988
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.020538330078125,
      "learning_rate": 3.9075961461376046e-07,
      "loss": 2.3662,
      "step": 69989
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1773459911346436,
      "learning_rate": 3.906456490019539e-07,
      "loss": 2.2842,
      "step": 69990
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0433762073516846,
      "learning_rate": 3.905316996806108e-07,
      "loss": 2.731,
      "step": 69991
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0284860134124756,
      "learning_rate": 3.904177666499276e-07,
      "loss": 2.3784,
      "step": 69992
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0153944492340088,
      "learning_rate": 3.9030384991009416e-07,
      "loss": 2.1744,
      "step": 69993
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0436753034591675,
      "learning_rate": 3.901899494613082e-07,
      "loss": 2.4296,
      "step": 69994
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1070884466171265,
      "learning_rate": 3.900760653037594e-07,
      "loss": 2.4194,
      "step": 69995
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0932351350784302,
      "learning_rate": 3.899621974376422e-07,
      "loss": 2.2063,
      "step": 69996
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.2054917812347412,
      "learning_rate": 3.898483458631475e-07,
      "loss": 2.2962,
      "step": 69997
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0434597730636597,
      "learning_rate": 3.897345105804717e-07,
      "loss": 2.3132,
      "step": 69998
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.2903141975402832,
      "learning_rate": 3.8962069158980485e-07,
      "loss": 2.3579,
      "step": 69999
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0841764211654663,
      "learning_rate": 3.895068888913411e-07,
      "loss": 2.3365,
      "step": 70000
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.05734121799469,
      "learning_rate": 3.8939310248527375e-07,
      "loss": 2.2119,
      "step": 70001
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.086568832397461,
      "learning_rate": 3.892793323717958e-07,
      "loss": 2.3005,
      "step": 70002
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0732917785644531,
      "learning_rate": 3.8916557855109836e-07,
      "loss": 2.3771,
      "step": 70003
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0413687229156494,
      "learning_rate": 3.890518410233757e-07,
      "loss": 2.2856,
      "step": 70004
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0458409786224365,
      "learning_rate": 3.889381197888209e-07,
      "loss": 2.3857,
      "step": 70005
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1374092102050781,
      "learning_rate": 3.8882441484762723e-07,
      "loss": 2.2746,
      "step": 70006
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.9923334121704102,
      "learning_rate": 3.887107261999856e-07,
      "loss": 2.3247,
      "step": 70007
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0909383296966553,
      "learning_rate": 3.885970538460904e-07,
      "loss": 2.4717,
      "step": 70008
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.2122688293457031,
      "learning_rate": 3.884833977861324e-07,
      "loss": 2.4235,
      "step": 70009
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0338993072509766,
      "learning_rate": 3.883697580203061e-07,
      "loss": 2.301,
      "step": 70010
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.9859126806259155,
      "learning_rate": 3.882561345488034e-07,
      "loss": 2.3117,
      "step": 70011
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.9697203040122986,
      "learning_rate": 3.881425273718176e-07,
      "loss": 2.4044,
      "step": 70012
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0919930934906006,
      "learning_rate": 3.8802893648953953e-07,
      "loss": 2.2685,
      "step": 70013
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.327748417854309,
      "learning_rate": 3.8791536190216474e-07,
      "loss": 2.3675,
      "step": 70014
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.2896093130111694,
      "learning_rate": 3.878018036098818e-07,
      "loss": 1.9901,
      "step": 70015
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.079954981803894,
      "learning_rate": 3.876882616128874e-07,
      "loss": 2.189,
      "step": 70016
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.023209571838379,
      "learning_rate": 3.875747359113724e-07,
      "loss": 2.1139,
      "step": 70017
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1180473566055298,
      "learning_rate": 3.8746122650552663e-07,
      "loss": 2.3514,
      "step": 70018
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1148122549057007,
      "learning_rate": 3.8734773339554667e-07,
      "loss": 2.1959,
      "step": 70019
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.9831477999687195,
      "learning_rate": 3.8723425658162337e-07,
      "loss": 2.3889,
      "step": 70020
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.2434360980987549,
      "learning_rate": 3.871207960639467e-07,
      "loss": 2.4073,
      "step": 70021
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.9979037642478943,
      "learning_rate": 3.870073518427131e-07,
      "loss": 2.3471,
      "step": 70022
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0454814434051514,
      "learning_rate": 3.8689392391811244e-07,
      "loss": 2.3025,
      "step": 70023
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0905009508132935,
      "learning_rate": 3.8678051229033677e-07,
      "loss": 2.4876,
      "step": 70024
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.063838005065918,
      "learning_rate": 3.8666711695957925e-07,
      "loss": 2.3079,
      "step": 70025
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.2506017684936523,
      "learning_rate": 3.8655373792603204e-07,
      "loss": 2.2691,
      "step": 70026
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0053907632827759,
      "learning_rate": 3.864403751898871e-07,
      "loss": 2.3424,
      "step": 70027
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0394715070724487,
      "learning_rate": 3.8632702875133655e-07,
      "loss": 2.2025,
      "step": 70028
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.9936745762825012,
      "learning_rate": 3.8621369861057357e-07,
      "loss": 2.0872,
      "step": 70029
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.205525517463684,
      "learning_rate": 3.86100384767788e-07,
      "loss": 2.3311,
      "step": 70030
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1175708770751953,
      "learning_rate": 3.8598708722317523e-07,
      "loss": 2.1391,
      "step": 70031
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.221396803855896,
      "learning_rate": 3.85873805976924e-07,
      "loss": 2.5608,
      "step": 70032
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0776286125183105,
      "learning_rate": 3.8576054102922977e-07,
      "loss": 2.3244,
      "step": 70033
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.057312250137329,
      "learning_rate": 3.856472923802801e-07,
      "loss": 2.3939,
      "step": 70034
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0333775281906128,
      "learning_rate": 3.8553406003027147e-07,
      "loss": 2.1519,
      "step": 70035
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0243860483169556,
      "learning_rate": 3.854208439793927e-07,
      "loss": 2.3198,
      "step": 70036
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1421152353286743,
      "learning_rate": 3.8530764422783696e-07,
      "loss": 2.5078,
      "step": 70037
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.4093091487884521,
      "learning_rate": 3.8519446077579626e-07,
      "loss": 2.1698,
      "step": 70038
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0858176946640015,
      "learning_rate": 3.850812936234627e-07,
      "loss": 2.3489,
      "step": 70039
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.098244309425354,
      "learning_rate": 3.849681427710272e-07,
      "loss": 2.2329,
      "step": 70040
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1118842363357544,
      "learning_rate": 3.8485500821868195e-07,
      "loss": 2.2319,
      "step": 70041
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.219854474067688,
      "learning_rate": 3.8474188996661887e-07,
      "loss": 2.522,
      "step": 70042
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0339021682739258,
      "learning_rate": 3.8462878801503014e-07,
      "loss": 2.2176,
      "step": 70043
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1193174123764038,
      "learning_rate": 3.845157023641066e-07,
      "loss": 2.3344,
      "step": 70044
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0789985656738281,
      "learning_rate": 3.844026330140416e-07,
      "loss": 2.3316,
      "step": 70045
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1212800741195679,
      "learning_rate": 3.842895799650259e-07,
      "loss": 2.1273,
      "step": 70046
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.2277230024337769,
      "learning_rate": 3.841765432172506e-07,
      "loss": 2.3772,
      "step": 70047
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0899537801742554,
      "learning_rate": 3.8406352277090663e-07,
      "loss": 2.1147,
      "step": 70048
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0459473133087158,
      "learning_rate": 3.839505186261871e-07,
      "loss": 2.3882,
      "step": 70049
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.971435010433197,
      "learning_rate": 3.8383753078328314e-07,
      "loss": 2.0677,
      "step": 70050
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.2434085607528687,
      "learning_rate": 3.837245592423866e-07,
      "loss": 2.1439,
      "step": 70051
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1044329404830933,
      "learning_rate": 3.836116040036886e-07,
      "loss": 2.3126,
      "step": 70052
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1074473857879639,
      "learning_rate": 3.8349866506738e-07,
      "loss": 2.3418,
      "step": 70053
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1812613010406494,
      "learning_rate": 3.83385742433654e-07,
      "loss": 2.1942,
      "step": 70054
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0971239805221558,
      "learning_rate": 3.832728361027005e-07,
      "loss": 2.3162,
      "step": 70055
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.986225426197052,
      "learning_rate": 3.831599460747115e-07,
      "loss": 2.037,
      "step": 70056
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1369856595993042,
      "learning_rate": 3.83047072349878e-07,
      "loss": 2.512,
      "step": 70057
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0856953859329224,
      "learning_rate": 3.829342149283921e-07,
      "loss": 2.2336,
      "step": 70058
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.038537859916687,
      "learning_rate": 3.828213738104436e-07,
      "loss": 2.1618,
      "step": 70059
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0884268283843994,
      "learning_rate": 3.827085489962279e-07,
      "loss": 2.247,
      "step": 70060
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.05563223361969,
      "learning_rate": 3.825957404859293e-07,
      "loss": 2.278,
      "step": 70061
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.056589126586914,
      "learning_rate": 3.824829482797454e-07,
      "loss": 2.5472,
      "step": 70062
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0814590454101562,
      "learning_rate": 3.823701723778639e-07,
      "loss": 2.2593,
      "step": 70063
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.9666851162910461,
      "learning_rate": 3.822574127804779e-07,
      "loss": 2.2584,
      "step": 70064
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1250972747802734,
      "learning_rate": 3.8214466948777615e-07,
      "loss": 2.3664,
      "step": 70065
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0705519914627075,
      "learning_rate": 3.8203194249995304e-07,
      "loss": 2.4812,
      "step": 70066
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1153630018234253,
      "learning_rate": 3.8191923181719715e-07,
      "loss": 2.4448,
      "step": 70067
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0869269371032715,
      "learning_rate": 3.818065374397018e-07,
      "loss": 2.274,
      "step": 70068
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.2724367380142212,
      "learning_rate": 3.816938593676545e-07,
      "loss": 2.3981,
      "step": 70069
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.9959769248962402,
      "learning_rate": 3.8158119760124956e-07,
      "loss": 2.345,
      "step": 70070
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.9996011257171631,
      "learning_rate": 3.814685521406758e-07,
      "loss": 2.5068,
      "step": 70071
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0589625835418701,
      "learning_rate": 3.8135592298612746e-07,
      "loss": 2.2635,
      "step": 70072
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.2066493034362793,
      "learning_rate": 3.812433101377921e-07,
      "loss": 2.459,
      "step": 70073
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1078985929489136,
      "learning_rate": 3.8113071359586197e-07,
      "loss": 2.4021,
      "step": 70074
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.178291916847229,
      "learning_rate": 3.810181333605267e-07,
      "loss": 2.1713,
      "step": 70075
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1998748779296875,
      "learning_rate": 3.8090556943197855e-07,
      "loss": 2.3303,
      "step": 70076
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.832948923110962,
      "learning_rate": 3.807930218104072e-07,
      "loss": 2.1513,
      "step": 70077
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.4814571142196655,
      "learning_rate": 3.8068049049600595e-07,
      "loss": 2.5059,
      "step": 70078
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.2096304893493652,
      "learning_rate": 3.805679754889635e-07,
      "loss": 2.1794,
      "step": 70079
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.161150336265564,
      "learning_rate": 3.804554767894697e-07,
      "loss": 2.3395,
      "step": 70080
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0845226049423218,
      "learning_rate": 3.803429943977177e-07,
      "loss": 2.0754,
      "step": 70081
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1032692193984985,
      "learning_rate": 3.802305283138963e-07,
      "loss": 2.3559,
      "step": 70082
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.9663491249084473,
      "learning_rate": 3.801180785381975e-07,
      "loss": 2.1772,
      "step": 70083
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1903009414672852,
      "learning_rate": 3.8000564507081005e-07,
      "loss": 2.2085,
      "step": 70084
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1625988483428955,
      "learning_rate": 3.7989322791192716e-07,
      "loss": 2.1964,
      "step": 70085
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.220584750175476,
      "learning_rate": 3.797808270617387e-07,
      "loss": 2.1878,
      "step": 70086
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0062823295593262,
      "learning_rate": 3.796684425204333e-07,
      "loss": 2.2712,
      "step": 70087
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.2404874563217163,
      "learning_rate": 3.7955607428820205e-07,
      "loss": 2.2684,
      "step": 70088
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0751763582229614,
      "learning_rate": 3.794437223652381e-07,
      "loss": 2.2667,
      "step": 70089
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1581039428710938,
      "learning_rate": 3.7933138675172786e-07,
      "loss": 2.5463,
      "step": 70090
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.2096216678619385,
      "learning_rate": 3.7921906744786463e-07,
      "loss": 2.3794,
      "step": 70091
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.186180591583252,
      "learning_rate": 3.791067644538382e-07,
      "loss": 2.5042,
      "step": 70092
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.9980593323707581,
      "learning_rate": 3.789944777698396e-07,
      "loss": 2.0775,
      "step": 70093
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0594165325164795,
      "learning_rate": 3.788822073960563e-07,
      "loss": 2.3693,
      "step": 70094
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.119981288909912,
      "learning_rate": 3.7876995333268276e-07,
      "loss": 2.2155,
      "step": 70095
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0775545835494995,
      "learning_rate": 3.786577155799054e-07,
      "loss": 2.4243,
      "step": 70096
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.9715816378593445,
      "learning_rate": 3.785454941379174e-07,
      "loss": 2.3213,
      "step": 70097
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0520563125610352,
      "learning_rate": 3.7843328900690755e-07,
      "loss": 2.341,
      "step": 70098
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.06067955493927,
      "learning_rate": 3.78321100187069e-07,
      "loss": 2.1761,
      "step": 70099
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1396740674972534,
      "learning_rate": 3.7820892767858607e-07,
      "loss": 2.3348,
      "step": 70100
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.037951111793518,
      "learning_rate": 3.7809677148165413e-07,
      "loss": 2.209,
      "step": 70101
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1168837547302246,
      "learning_rate": 3.779846315964597e-07,
      "loss": 2.3052,
      "step": 70102
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0189025402069092,
      "learning_rate": 3.7787250802319597e-07,
      "loss": 2.1049,
      "step": 70103
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0826846361160278,
      "learning_rate": 3.7776040076205165e-07,
      "loss": 2.3904,
      "step": 70104
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.164478063583374,
      "learning_rate": 3.7764830981321664e-07,
      "loss": 2.3596,
      "step": 70105
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.052886962890625,
      "learning_rate": 3.775362351768797e-07,
      "loss": 2.5276,
      "step": 70106
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.307524561882019,
      "learning_rate": 3.77424176853235e-07,
      "loss": 2.527,
      "step": 70107
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0024030208587646,
      "learning_rate": 3.7731213484246796e-07,
      "loss": 2.3763,
      "step": 70108
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1392945051193237,
      "learning_rate": 3.772001091447708e-07,
      "loss": 2.3432,
      "step": 70109
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0762276649475098,
      "learning_rate": 3.7708809976033323e-07,
      "loss": 2.4654,
      "step": 70110
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.9694808125495911,
      "learning_rate": 3.7697610668934404e-07,
      "loss": 2.4791,
      "step": 70111
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0156372785568237,
      "learning_rate": 3.7686412993199416e-07,
      "loss": 2.135,
      "step": 70112
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.101227045059204,
      "learning_rate": 3.767521694884746e-07,
      "loss": 2.6079,
      "step": 70113
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0158220529556274,
      "learning_rate": 3.766402253589729e-07,
      "loss": 2.3482,
      "step": 70114
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.2173943519592285,
      "learning_rate": 3.76528297543679e-07,
      "loss": 2.2535,
      "step": 70115
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1124467849731445,
      "learning_rate": 3.7641638604278384e-07,
      "loss": 2.0662,
      "step": 70116
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.2296805381774902,
      "learning_rate": 3.7630449085647613e-07,
      "loss": 2.0912,
      "step": 70117
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.9808768033981323,
      "learning_rate": 3.7619261198494683e-07,
      "loss": 2.1873,
      "step": 70118
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0204038619995117,
      "learning_rate": 3.7608074942838356e-07,
      "loss": 2.4375,
      "step": 70119
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.110973834991455,
      "learning_rate": 3.7596890318697844e-07,
      "loss": 2.324,
      "step": 70120
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1655018329620361,
      "learning_rate": 3.75857073260919e-07,
      "loss": 2.3718,
      "step": 70121
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1734893321990967,
      "learning_rate": 3.7574525965039745e-07,
      "loss": 2.3995,
      "step": 70122
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0454692840576172,
      "learning_rate": 3.7563346235559904e-07,
      "loss": 2.3132,
      "step": 70123
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0256093740463257,
      "learning_rate": 3.755216813767182e-07,
      "loss": 2.2967,
      "step": 70124
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0690277814865112,
      "learning_rate": 3.754099167139402e-07,
      "loss": 2.5324,
      "step": 70125
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.9894450306892395,
      "learning_rate": 3.7529816836745947e-07,
      "loss": 2.4055,
      "step": 70126
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.9943454265594482,
      "learning_rate": 3.751864363374591e-07,
      "loss": 2.351,
      "step": 70127
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0674117803573608,
      "learning_rate": 3.7507472062413343e-07,
      "loss": 2.2565,
      "step": 70128
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0122603178024292,
      "learning_rate": 3.749630212276689e-07,
      "loss": 2.09,
      "step": 70129
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.2948819398880005,
      "learning_rate": 3.748513381482577e-07,
      "loss": 2.2476,
      "step": 70130
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0958194732666016,
      "learning_rate": 3.7473967138608625e-07,
      "loss": 2.0304,
      "step": 70131
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.2006702423095703,
      "learning_rate": 3.7462802094134666e-07,
      "loss": 2.333,
      "step": 70132
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.110260248184204,
      "learning_rate": 3.745163868142254e-07,
      "loss": 2.0764,
      "step": 70133
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1609807014465332,
      "learning_rate": 3.7440476900491465e-07,
      "loss": 2.1382,
      "step": 70134
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.9792866706848145,
      "learning_rate": 3.742931675136008e-07,
      "loss": 2.3784,
      "step": 70135
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1494218111038208,
      "learning_rate": 3.7418158234047485e-07,
      "loss": 2.4477,
      "step": 70136
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1424148082733154,
      "learning_rate": 3.740700134857256e-07,
      "loss": 2.4552,
      "step": 70137
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.2097101211547852,
      "learning_rate": 3.739584609495406e-07,
      "loss": 2.3826,
      "step": 70138
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.2152509689331055,
      "learning_rate": 3.73846924732113e-07,
      "loss": 2.3359,
      "step": 70139
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1218212842941284,
      "learning_rate": 3.7373540483362835e-07,
      "loss": 2.1971,
      "step": 70140
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0749520063400269,
      "learning_rate": 3.736239012542764e-07,
      "loss": 2.2647,
      "step": 70141
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0205051898956299,
      "learning_rate": 3.735124139942459e-07,
      "loss": 2.4872,
      "step": 70142
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.11245858669281,
      "learning_rate": 3.7340094305372664e-07,
      "loss": 2.2701,
      "step": 70143
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1390751600265503,
      "learning_rate": 3.7328948843290634e-07,
      "loss": 2.4142,
      "step": 70144
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0996034145355225,
      "learning_rate": 3.7317805013197703e-07,
      "loss": 2.1751,
      "step": 70145
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1449694633483887,
      "learning_rate": 3.7306662815112305e-07,
      "loss": 2.3458,
      "step": 70146
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1837321519851685,
      "learning_rate": 3.729552224905386e-07,
      "loss": 2.489,
      "step": 70147
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.151697039604187,
      "learning_rate": 3.7284383315040693e-07,
      "loss": 2.6179,
      "step": 70148
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1042505502700806,
      "learning_rate": 3.7273246013092234e-07,
      "loss": 2.4798,
      "step": 70149
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1000323295593262,
      "learning_rate": 3.72621103432268e-07,
      "loss": 2.23,
      "step": 70150
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0477217435836792,
      "learning_rate": 3.7250976305463816e-07,
      "loss": 2.1914,
      "step": 70151
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1010409593582153,
      "learning_rate": 3.7239843899821935e-07,
      "loss": 2.2188,
      "step": 70152
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1597394943237305,
      "learning_rate": 3.7228713126319926e-07,
      "loss": 2.524,
      "step": 70153
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0966910123825073,
      "learning_rate": 3.7217583984976546e-07,
      "loss": 2.216,
      "step": 70154
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0876433849334717,
      "learning_rate": 3.7206456475811116e-07,
      "loss": 2.6545,
      "step": 70155
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.025666356086731,
      "learning_rate": 3.719533059884206e-07,
      "loss": 2.2006,
      "step": 70156
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0888277292251587,
      "learning_rate": 3.718420635408848e-07,
      "loss": 2.4922,
      "step": 70157
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1136136054992676,
      "learning_rate": 3.717308374156914e-07,
      "loss": 2.1862,
      "step": 70158
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.2022203207015991,
      "learning_rate": 3.7161962761302904e-07,
      "loss": 2.159,
      "step": 70159
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.9860936999320984,
      "learning_rate": 3.7150843413308656e-07,
      "loss": 2.1741,
      "step": 70160
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0349119901657104,
      "learning_rate": 3.713972569760527e-07,
      "loss": 2.4046,
      "step": 70161
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0849783420562744,
      "learning_rate": 3.71286096142115e-07,
      "loss": 2.2812,
      "step": 70162
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0622717142105103,
      "learning_rate": 3.7117495163146335e-07,
      "loss": 2.454,
      "step": 70163
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.9366751909255981,
      "learning_rate": 3.710638234442854e-07,
      "loss": 2.2692,
      "step": 70164
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1839962005615234,
      "learning_rate": 3.7095271158076984e-07,
      "loss": 2.338,
      "step": 70165
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0199540853500366,
      "learning_rate": 3.7084161604110327e-07,
      "loss": 2.274,
      "step": 70166
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0667921304702759,
      "learning_rate": 3.7073053682547656e-07,
      "loss": 2.2991,
      "step": 70167
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0444000959396362,
      "learning_rate": 3.706194739340752e-07,
      "loss": 2.2784,
      "step": 70168
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.075942873954773,
      "learning_rate": 3.7050842736709e-07,
      "loss": 2.3532,
      "step": 70169
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1066300868988037,
      "learning_rate": 3.703973971247099e-07,
      "loss": 2.3669,
      "step": 70170
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.948505699634552,
      "learning_rate": 3.7028638320711906e-07,
      "loss": 2.2479,
      "step": 70171
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0654113292694092,
      "learning_rate": 3.7017538561450963e-07,
      "loss": 2.242,
      "step": 70172
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0335229635238647,
      "learning_rate": 3.7006440434706805e-07,
      "loss": 2.4412,
      "step": 70173
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.9625834226608276,
      "learning_rate": 3.6995343940498306e-07,
      "loss": 2.2401,
      "step": 70174
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1866962909698486,
      "learning_rate": 3.6984249078844126e-07,
      "loss": 2.3615,
      "step": 70175
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1470296382904053,
      "learning_rate": 3.6973155849763355e-07,
      "loss": 2.4676,
      "step": 70176
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1247551441192627,
      "learning_rate": 3.6962064253274423e-07,
      "loss": 2.517,
      "step": 70177
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0932248830795288,
      "learning_rate": 3.6950974289396757e-07,
      "loss": 2.3884,
      "step": 70178
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.071940302848816,
      "learning_rate": 3.6939885958148346e-07,
      "loss": 2.2175,
      "step": 70179
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.126425862312317,
      "learning_rate": 3.692879925954862e-07,
      "loss": 2.2347,
      "step": 70180
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1382375955581665,
      "learning_rate": 3.6917714193616006e-07,
      "loss": 2.2341,
      "step": 70181
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.231942057609558,
      "learning_rate": 3.690663076036949e-07,
      "loss": 2.3809,
      "step": 70182
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1945315599441528,
      "learning_rate": 3.689554895982772e-07,
      "loss": 2.3728,
      "step": 70183
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.2020986080169678,
      "learning_rate": 3.68844687920098e-07,
      "loss": 2.4768,
      "step": 70184
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0430529117584229,
      "learning_rate": 3.687339025693404e-07,
      "loss": 2.3708,
      "step": 70185
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0758447647094727,
      "learning_rate": 3.686231335461965e-07,
      "loss": 2.2971,
      "step": 70186
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1021407842636108,
      "learning_rate": 3.685123808508506e-07,
      "loss": 2.3404,
      "step": 70187
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0071667432785034,
      "learning_rate": 3.6840164448349257e-07,
      "loss": 2.1681,
      "step": 70188
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0553799867630005,
      "learning_rate": 3.6829092444431e-07,
      "loss": 2.5333,
      "step": 70189
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0685523748397827,
      "learning_rate": 3.6818022073349057e-07,
      "loss": 2.257,
      "step": 70190
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1312505006790161,
      "learning_rate": 3.680695333512219e-07,
      "loss": 2.2389,
      "step": 70191
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1465774774551392,
      "learning_rate": 3.679588622976915e-07,
      "loss": 2.1177,
      "step": 70192
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1467117071151733,
      "learning_rate": 3.678482075730849e-07,
      "loss": 2.192,
      "step": 70193
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.051712989807129,
      "learning_rate": 3.6773756917759306e-07,
      "loss": 2.468,
      "step": 70194
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1471832990646362,
      "learning_rate": 3.6762694711140136e-07,
      "loss": 2.5047,
      "step": 70195
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1157008409500122,
      "learning_rate": 3.675163413746996e-07,
      "loss": 2.269,
      "step": 70196
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0546573400497437,
      "learning_rate": 3.6740575196767323e-07,
      "loss": 2.3062,
      "step": 70197
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0636646747589111,
      "learning_rate": 3.672951788905099e-07,
      "loss": 2.4092,
      "step": 70198
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.2527447938919067,
      "learning_rate": 3.6718462214339725e-07,
      "loss": 2.0301,
      "step": 70199
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0524752140045166,
      "learning_rate": 3.670740817265228e-07,
      "loss": 2.4371,
      "step": 70200
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0726555585861206,
      "learning_rate": 3.669635576400743e-07,
      "loss": 2.1431,
      "step": 70201
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.193158745765686,
      "learning_rate": 3.668530498842382e-07,
      "loss": 2.2349,
      "step": 70202
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0971194505691528,
      "learning_rate": 3.667425584592044e-07,
      "loss": 2.4338,
      "step": 70203
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1680225133895874,
      "learning_rate": 3.66632083365156e-07,
      "loss": 2.4693,
      "step": 70204
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0646295547485352,
      "learning_rate": 3.6652162460228626e-07,
      "loss": 2.3639,
      "step": 70205
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1013245582580566,
      "learning_rate": 3.66411182170775e-07,
      "loss": 2.4207,
      "step": 70206
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.12717866897583,
      "learning_rate": 3.663007560708154e-07,
      "loss": 2.3051,
      "step": 70207
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0770384073257446,
      "learning_rate": 3.6619034630259067e-07,
      "loss": 2.3508,
      "step": 70208
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1055281162261963,
      "learning_rate": 3.660799528662917e-07,
      "loss": 2.1577,
      "step": 70209
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.2969207763671875,
      "learning_rate": 3.659695757621018e-07,
      "loss": 2.3162,
      "step": 70210
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1339689493179321,
      "learning_rate": 3.658592149902118e-07,
      "loss": 2.5005,
      "step": 70211
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0493457317352295,
      "learning_rate": 3.6574887055080497e-07,
      "loss": 2.3219,
      "step": 70212
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0009433031082153,
      "learning_rate": 3.6563854244407226e-07,
      "loss": 2.4638,
      "step": 70213
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1102334260940552,
      "learning_rate": 3.6552823067019793e-07,
      "loss": 1.9264,
      "step": 70214
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.085699439048767,
      "learning_rate": 3.654179352293707e-07,
      "loss": 2.1716,
      "step": 70215
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0598492622375488,
      "learning_rate": 3.653076561217761e-07,
      "loss": 2.3945,
      "step": 70216
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1756978034973145,
      "learning_rate": 3.651973933476027e-07,
      "loss": 2.2246,
      "step": 70217
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1192706823349,
      "learning_rate": 3.6508714690703604e-07,
      "loss": 2.2394,
      "step": 70218
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0772982835769653,
      "learning_rate": 3.649769168002637e-07,
      "loss": 2.4108,
      "step": 70219
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0712724924087524,
      "learning_rate": 3.648667030274722e-07,
      "loss": 2.3407,
      "step": 70220
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1399567127227783,
      "learning_rate": 3.64756505588848e-07,
      "loss": 2.2814,
      "step": 70221
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.9423944354057312,
      "learning_rate": 3.646463244845777e-07,
      "loss": 2.5208,
      "step": 70222
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1763911247253418,
      "learning_rate": 3.645361597148511e-07,
      "loss": 2.2846,
      "step": 70223
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1312599182128906,
      "learning_rate": 3.6442601127985034e-07,
      "loss": 2.2179,
      "step": 70224
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.189884066581726,
      "learning_rate": 3.643158791797663e-07,
      "loss": 2.4168,
      "step": 70225
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.080225944519043,
      "learning_rate": 3.642057634147833e-07,
      "loss": 2.2142,
      "step": 70226
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1921859979629517,
      "learning_rate": 3.640956639850879e-07,
      "loss": 2.5238,
      "step": 70227
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0658842325210571,
      "learning_rate": 3.6398558089086767e-07,
      "loss": 2.3826,
      "step": 70228
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1572887897491455,
      "learning_rate": 3.6387551413230917e-07,
      "loss": 2.4185,
      "step": 70229
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1736068725585938,
      "learning_rate": 3.637654637095989e-07,
      "loss": 2.2014,
      "step": 70230
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.249330759048462,
      "learning_rate": 3.6365542962292444e-07,
      "loss": 2.3934,
      "step": 70231
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0245320796966553,
      "learning_rate": 3.6354541187247017e-07,
      "loss": 2.3592,
      "step": 70232
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.010658860206604,
      "learning_rate": 3.6343541045842255e-07,
      "loss": 2.5151,
      "step": 70233
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.125846028327942,
      "learning_rate": 3.633254253809715e-07,
      "loss": 2.2662,
      "step": 70234
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0872807502746582,
      "learning_rate": 3.63215456640299e-07,
      "loss": 2.1542,
      "step": 70235
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0032544136047363,
      "learning_rate": 3.6310550423659496e-07,
      "loss": 2.4211,
      "step": 70236
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1010249853134155,
      "learning_rate": 3.629955681700426e-07,
      "loss": 2.3668,
      "step": 70237
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0163689851760864,
      "learning_rate": 3.628856484408316e-07,
      "loss": 2.4046,
      "step": 70238
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.041667103767395,
      "learning_rate": 3.627757450491465e-07,
      "loss": 2.6413,
      "step": 70239
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.3819257020950317,
      "learning_rate": 3.6266585799517473e-07,
      "loss": 2.3667,
      "step": 70240
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1430174112319946,
      "learning_rate": 3.6255598727909957e-07,
      "loss": 2.305,
      "step": 70241
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0757319927215576,
      "learning_rate": 3.62446132901112e-07,
      "loss": 2.4879,
      "step": 70242
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1428781747817993,
      "learning_rate": 3.6233629486139397e-07,
      "loss": 2.3451,
      "step": 70243
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0686614513397217,
      "learning_rate": 3.622264731601366e-07,
      "loss": 2.5465,
      "step": 70244
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.9815295934677124,
      "learning_rate": 3.6211666779751963e-07,
      "loss": 2.196,
      "step": 70245
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1483269929885864,
      "learning_rate": 3.620068787737341e-07,
      "loss": 2.3221,
      "step": 70246
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0579290390014648,
      "learning_rate": 3.618971060889631e-07,
      "loss": 2.1343,
      "step": 70247
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.9850478172302246,
      "learning_rate": 3.617873497433955e-07,
      "loss": 2.3545,
      "step": 70248
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0905113220214844,
      "learning_rate": 3.6167760973721545e-07,
      "loss": 2.3832,
      "step": 70249
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.125610113143921,
      "learning_rate": 3.6156788607061066e-07,
      "loss": 2.3234,
      "step": 70250
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0115305185317993,
      "learning_rate": 3.614581787437643e-07,
      "loss": 2.4124,
      "step": 70251
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0577278137207031,
      "learning_rate": 3.6134848775686513e-07,
      "loss": 2.3429,
      "step": 70252
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1905734539031982,
      "learning_rate": 3.6123881311009857e-07,
      "loss": 2.2702,
      "step": 70253
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0000617504119873,
      "learning_rate": 3.611291548036488e-07,
      "loss": 2.4587,
      "step": 70254
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.2091647386550903,
      "learning_rate": 3.6101951283770475e-07,
      "loss": 2.2529,
      "step": 70255
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.2258338928222656,
      "learning_rate": 3.6090988721244834e-07,
      "loss": 2.365,
      "step": 70256
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0946584939956665,
      "learning_rate": 3.608002779280695e-07,
      "loss": 2.3397,
      "step": 70257
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.080776572227478,
      "learning_rate": 3.606906849847514e-07,
      "loss": 2.2425,
      "step": 70258
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.9977455735206604,
      "learning_rate": 3.6058110838268156e-07,
      "loss": 2.4636,
      "step": 70259
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1275118589401245,
      "learning_rate": 3.604715481220433e-07,
      "loss": 2.4115,
      "step": 70260
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.203711986541748,
      "learning_rate": 3.6036200420302426e-07,
      "loss": 2.3675,
      "step": 70261
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.133772373199463,
      "learning_rate": 3.6025247662580864e-07,
      "loss": 2.3429,
      "step": 70262
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.2126636505126953,
      "learning_rate": 3.6014296539058527e-07,
      "loss": 2.3064,
      "step": 70263
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1046466827392578,
      "learning_rate": 3.600334704975361e-07,
      "loss": 2.2773,
      "step": 70264
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1786468029022217,
      "learning_rate": 3.5992399194685e-07,
      "loss": 2.0126,
      "step": 70265
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0474482774734497,
      "learning_rate": 3.59814529738709e-07,
      "loss": 2.2744,
      "step": 70266
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.12303626537323,
      "learning_rate": 3.597050838733018e-07,
      "loss": 2.3382,
      "step": 70267
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.08942449092865,
      "learning_rate": 3.595956543508117e-07,
      "loss": 2.3195,
      "step": 70268
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.034448266029358,
      "learning_rate": 3.5948624117142726e-07,
      "loss": 2.4888,
      "step": 70269
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1498163938522339,
      "learning_rate": 3.593768443353296e-07,
      "loss": 2.4394,
      "step": 70270
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.077771544456482,
      "learning_rate": 3.592674638427096e-07,
      "loss": 2.3477,
      "step": 70271
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0892362594604492,
      "learning_rate": 3.591580996937472e-07,
      "loss": 2.3,
      "step": 70272
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0939340591430664,
      "learning_rate": 3.590487518886321e-07,
      "loss": 2.2985,
      "step": 70273
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0175905227661133,
      "learning_rate": 3.5893942042754536e-07,
      "loss": 2.3497,
      "step": 70274
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1680768728256226,
      "learning_rate": 3.5883010531067686e-07,
      "loss": 2.0872,
      "step": 70275
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0324857234954834,
      "learning_rate": 3.587208065382075e-07,
      "loss": 2.3029,
      "step": 70276
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0894885063171387,
      "learning_rate": 3.586115241103272e-07,
      "loss": 2.2732,
      "step": 70277
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0429590940475464,
      "learning_rate": 3.5850225802721795e-07,
      "loss": 2.3977,
      "step": 70278
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1098532676696777,
      "learning_rate": 3.583930082890663e-07,
      "loss": 2.2629,
      "step": 70279
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1661494970321655,
      "learning_rate": 3.582837748960566e-07,
      "loss": 2.2013,
      "step": 70280
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1033899784088135,
      "learning_rate": 3.581745578483753e-07,
      "loss": 2.3203,
      "step": 70281
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.089721441268921,
      "learning_rate": 3.580653571462067e-07,
      "loss": 2.0426,
      "step": 70282
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0949039459228516,
      "learning_rate": 3.5795617278973513e-07,
      "loss": 1.9353,
      "step": 70283
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.2112871408462524,
      "learning_rate": 3.5784700477914715e-07,
      "loss": 2.2059,
      "step": 70284
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0340502262115479,
      "learning_rate": 3.5773785311462804e-07,
      "loss": 2.1867,
      "step": 70285
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0883941650390625,
      "learning_rate": 3.576287177963611e-07,
      "loss": 2.1283,
      "step": 70286
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0803972482681274,
      "learning_rate": 3.575195988245328e-07,
      "loss": 2.3972,
      "step": 70287
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.100282907485962,
      "learning_rate": 3.574104961993274e-07,
      "loss": 2.1892,
      "step": 70288
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.110732078552246,
      "learning_rate": 3.573014099209304e-07,
      "loss": 2.2703,
      "step": 70289
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.2838166952133179,
      "learning_rate": 3.5719233998952607e-07,
      "loss": 2.0283,
      "step": 70290
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.148535132408142,
      "learning_rate": 3.570832864052998e-07,
      "loss": 2.4868,
      "step": 70291
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.173410177230835,
      "learning_rate": 3.569742491684358e-07,
      "loss": 2.3836,
      "step": 70292
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.9982706308364868,
      "learning_rate": 3.5686522827911964e-07,
      "loss": 2.2327,
      "step": 70293
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0728870630264282,
      "learning_rate": 3.567562237375366e-07,
      "loss": 2.379,
      "step": 70294
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0154368877410889,
      "learning_rate": 3.5664723554386994e-07,
      "loss": 2.1502,
      "step": 70295
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1446930170059204,
      "learning_rate": 3.565382636983061e-07,
      "loss": 2.4597,
      "step": 70296
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1032366752624512,
      "learning_rate": 3.564293082010295e-07,
      "loss": 2.3428,
      "step": 70297
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1420551538467407,
      "learning_rate": 3.563203690522243e-07,
      "loss": 2.332,
      "step": 70298
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0607002973556519,
      "learning_rate": 3.562114462520738e-07,
      "loss": 2.1914,
      "step": 70299
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1640679836273193,
      "learning_rate": 3.561025398007656e-07,
      "loss": 2.2871,
      "step": 70300
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.9969477653503418,
      "learning_rate": 3.559936496984817e-07,
      "loss": 2.3762,
      "step": 70301
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0391126871109009,
      "learning_rate": 3.558847759454087e-07,
      "loss": 2.343,
      "step": 70302
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0825920104980469,
      "learning_rate": 3.5577591854172976e-07,
      "loss": 2.2253,
      "step": 70303
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0606342554092407,
      "learning_rate": 3.556670774876303e-07,
      "loss": 2.2852,
      "step": 70304
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1452546119689941,
      "learning_rate": 3.5555825278329345e-07,
      "loss": 2.3265,
      "step": 70305
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.9572849273681641,
      "learning_rate": 3.5544944442890696e-07,
      "loss": 2.3573,
      "step": 70306
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0324733257293701,
      "learning_rate": 3.5534065242465053e-07,
      "loss": 2.3611,
      "step": 70307
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0409255027770996,
      "learning_rate": 3.55231876770713e-07,
      "loss": 2.441,
      "step": 70308
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0314608812332153,
      "learning_rate": 3.5512311746727647e-07,
      "loss": 2.339,
      "step": 70309
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1585750579833984,
      "learning_rate": 3.5501437451452623e-07,
      "loss": 2.3371,
      "step": 70310
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0909531116485596,
      "learning_rate": 3.549056479126456e-07,
      "loss": 2.399,
      "step": 70311
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1143990755081177,
      "learning_rate": 3.547969376618199e-07,
      "loss": 2.368,
      "step": 70312
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.2071645259857178,
      "learning_rate": 3.546882437622312e-07,
      "loss": 2.4697,
      "step": 70313
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1101795434951782,
      "learning_rate": 3.545795662140683e-07,
      "loss": 2.4528,
      "step": 70314
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0585201978683472,
      "learning_rate": 3.54470905017511e-07,
      "loss": 2.1258,
      "step": 70315
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1028692722320557,
      "learning_rate": 3.543622601727448e-07,
      "loss": 2.247,
      "step": 70316
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.9629639983177185,
      "learning_rate": 3.542536316799561e-07,
      "loss": 2.2754,
      "step": 70317
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0502320528030396,
      "learning_rate": 3.541450195393248e-07,
      "loss": 2.2579,
      "step": 70318
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.131605863571167,
      "learning_rate": 3.5403642375103966e-07,
      "loss": 2.3883,
      "step": 70319
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.2096964120864868,
      "learning_rate": 3.539278443152816e-07,
      "loss": 2.5161,
      "step": 70320
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.030753493309021,
      "learning_rate": 3.5381928123223607e-07,
      "loss": 2.3563,
      "step": 70321
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0406311750411987,
      "learning_rate": 3.5371073450208516e-07,
      "loss": 2.4369,
      "step": 70322
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0433573722839355,
      "learning_rate": 3.5360220412501755e-07,
      "loss": 2.2351,
      "step": 70323
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1451588869094849,
      "learning_rate": 3.5349369010121204e-07,
      "loss": 2.3988,
      "step": 70324
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0062040090560913,
      "learning_rate": 3.533851924308551e-07,
      "loss": 2.3051,
      "step": 70325
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1399617195129395,
      "learning_rate": 3.5327671111413e-07,
      "loss": 2.3596,
      "step": 70326
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.9779073596000671,
      "learning_rate": 3.531682461512209e-07,
      "loss": 2.0884,
      "step": 70327
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.123941421508789,
      "learning_rate": 3.530597975423111e-07,
      "loss": 2.3575,
      "step": 70328
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.126083493232727,
      "learning_rate": 3.5295136528758714e-07,
      "loss": 2.2548,
      "step": 70329
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.131645679473877,
      "learning_rate": 3.528429493872287e-07,
      "loss": 2.2217,
      "step": 70330
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0012682676315308,
      "learning_rate": 3.527345498414225e-07,
      "loss": 2.4014,
      "step": 70331
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1043752431869507,
      "learning_rate": 3.5262616665035054e-07,
      "loss": 2.3408,
      "step": 70332
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0589118003845215,
      "learning_rate": 3.525177998141982e-07,
      "loss": 2.2827,
      "step": 70333
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0729172229766846,
      "learning_rate": 3.5240944933314756e-07,
      "loss": 2.3219,
      "step": 70334
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1833065748214722,
      "learning_rate": 3.5230111520738407e-07,
      "loss": 2.1125,
      "step": 70335
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.9482492804527283,
      "learning_rate": 3.5219279743708977e-07,
      "loss": 2.2424,
      "step": 70336
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.017645001411438,
      "learning_rate": 3.520844960224512e-07,
      "loss": 2.3542,
      "step": 70337
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0717037916183472,
      "learning_rate": 3.5197621096364707e-07,
      "loss": 2.3565,
      "step": 70338
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.305477499961853,
      "learning_rate": 3.5186794226086506e-07,
      "loss": 2.5788,
      "step": 70339
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.206076741218567,
      "learning_rate": 3.517596899142861e-07,
      "loss": 2.1891,
      "step": 70340
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0822638273239136,
      "learning_rate": 3.516514539240956e-07,
      "loss": 2.1756,
      "step": 70341
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0905224084854126,
      "learning_rate": 3.5154323429047567e-07,
      "loss": 2.5424,
      "step": 70342
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.2218432426452637,
      "learning_rate": 3.5143503101361166e-07,
      "loss": 2.2926,
      "step": 70343
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0691337585449219,
      "learning_rate": 3.5132684409368565e-07,
      "loss": 2.4811,
      "step": 70344
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.2022489309310913,
      "learning_rate": 3.512186735308798e-07,
      "loss": 2.3444,
      "step": 70345
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.054460048675537,
      "learning_rate": 3.5111051932538056e-07,
      "loss": 2.3822,
      "step": 70346
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1187447309494019,
      "learning_rate": 3.5100238147736777e-07,
      "loss": 2.3939,
      "step": 70347
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1524181365966797,
      "learning_rate": 3.50894259987028e-07,
      "loss": 2.4212,
      "step": 70348
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1392977237701416,
      "learning_rate": 3.5078615485454216e-07,
      "loss": 2.3018,
      "step": 70349
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.988143801689148,
      "learning_rate": 3.506780660800968e-07,
      "loss": 2.1781,
      "step": 70350
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1660090684890747,
      "learning_rate": 3.5056999366387067e-07,
      "loss": 2.2202,
      "step": 70351
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.9775804281234741,
      "learning_rate": 3.504619376060492e-07,
      "loss": 2.5508,
      "step": 70352
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1142098903656006,
      "learning_rate": 3.503538979068155e-07,
      "loss": 2.3932,
      "step": 70353
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1099400520324707,
      "learning_rate": 3.5024587456635393e-07,
      "loss": 2.3752,
      "step": 70354
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1383095979690552,
      "learning_rate": 3.5013786758484436e-07,
      "loss": 2.2805,
      "step": 70355
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.172144889831543,
      "learning_rate": 3.5002987696247437e-07,
      "loss": 2.455,
      "step": 70356
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.144315242767334,
      "learning_rate": 3.499219026994227e-07,
      "loss": 2.4117,
      "step": 70357
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5742239952087402,
      "learning_rate": 3.4981394479587596e-07,
      "loss": 2.4765,
      "step": 70358
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.101731300354004,
      "learning_rate": 3.497060032520139e-07,
      "loss": 2.4683,
      "step": 70359
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0261527299880981,
      "learning_rate": 3.495980780680219e-07,
      "loss": 2.2524,
      "step": 70360
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0329269170761108,
      "learning_rate": 3.494901692440822e-07,
      "loss": 2.357,
      "step": 70361
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0134376287460327,
      "learning_rate": 3.4938227678037783e-07,
      "loss": 2.1834,
      "step": 70362
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1090584993362427,
      "learning_rate": 3.4927440067709207e-07,
      "loss": 2.4639,
      "step": 70363
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1874357461929321,
      "learning_rate": 3.4916654093440693e-07,
      "loss": 2.366,
      "step": 70364
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0707627534866333,
      "learning_rate": 3.4905869755250563e-07,
      "loss": 2.4504,
      "step": 70365
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0873973369598389,
      "learning_rate": 3.489508705315714e-07,
      "loss": 2.6158,
      "step": 70366
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1983927488327026,
      "learning_rate": 3.4884305987178513e-07,
      "loss": 2.3069,
      "step": 70367
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.036456823348999,
      "learning_rate": 3.4873526557333226e-07,
      "loss": 2.4621,
      "step": 70368
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.2118356227874756,
      "learning_rate": 3.4862748763639376e-07,
      "loss": 2.371,
      "step": 70369
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.112505316734314,
      "learning_rate": 3.485197260611528e-07,
      "loss": 2.445,
      "step": 70370
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0310980081558228,
      "learning_rate": 3.484119808477937e-07,
      "loss": 2.3608,
      "step": 70371
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.395915150642395,
      "learning_rate": 3.4830425199649634e-07,
      "loss": 2.4389,
      "step": 70372
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.093987226486206,
      "learning_rate": 3.4819653950744494e-07,
      "loss": 2.2985,
      "step": 70373
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0398368835449219,
      "learning_rate": 3.480888433808216e-07,
      "loss": 2.3868,
      "step": 70374
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.9909233450889587,
      "learning_rate": 3.4798116361680956e-07,
      "loss": 2.3293,
      "step": 70375
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.128556728363037,
      "learning_rate": 3.4787350021559084e-07,
      "loss": 2.1138,
      "step": 70376
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0407122373580933,
      "learning_rate": 3.477658531773487e-07,
      "loss": 2.2995,
      "step": 70377
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.030164122581482,
      "learning_rate": 3.476582225022629e-07,
      "loss": 2.4101,
      "step": 70378
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.123008131980896,
      "learning_rate": 3.4755060819052e-07,
      "loss": 2.2897,
      "step": 70379
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.2347015142440796,
      "learning_rate": 3.4744301024229876e-07,
      "loss": 2.442,
      "step": 70380
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.149770975112915,
      "learning_rate": 3.473354286577846e-07,
      "loss": 2.233,
      "step": 70381
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0185353755950928,
      "learning_rate": 3.472278634371573e-07,
      "loss": 2.2379,
      "step": 70382
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.2580639123916626,
      "learning_rate": 3.4712031458060125e-07,
      "loss": 2.5098,
      "step": 70383
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.9844958186149597,
      "learning_rate": 3.4701278208829626e-07,
      "loss": 2.2673,
      "step": 70384
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0419745445251465,
      "learning_rate": 3.4690526596042885e-07,
      "loss": 2.335,
      "step": 70385
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0522079467773438,
      "learning_rate": 3.467977661971766e-07,
      "loss": 2.3018,
      "step": 70386
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.2776271104812622,
      "learning_rate": 3.466902827987262e-07,
      "loss": 2.5937,
      "step": 70387
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.099053144454956,
      "learning_rate": 3.465828157652551e-07,
      "loss": 2.2498,
      "step": 70388
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0367591381072998,
      "learning_rate": 3.46475365096951e-07,
      "loss": 2.3216,
      "step": 70389
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.3168292045593262,
      "learning_rate": 3.463679307939904e-07,
      "loss": 2.3963,
      "step": 70390
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0904840230941772,
      "learning_rate": 3.4626051285655993e-07,
      "loss": 2.4206,
      "step": 70391
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1216918230056763,
      "learning_rate": 3.461531112848382e-07,
      "loss": 2.2032,
      "step": 70392
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.9563689827919006,
      "learning_rate": 3.4604572607900955e-07,
      "loss": 2.3831,
      "step": 70393
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0777206420898438,
      "learning_rate": 3.4593835723925495e-07,
      "loss": 2.2117,
      "step": 70394
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.100942611694336,
      "learning_rate": 3.458310047657576e-07,
      "loss": 2.1755,
      "step": 70395
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0056551694869995,
      "learning_rate": 3.457236686586973e-07,
      "loss": 2.4349,
      "step": 70396
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0921131372451782,
      "learning_rate": 3.4561634891825845e-07,
      "loss": 2.2208,
      "step": 70397
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0732126235961914,
      "learning_rate": 3.4550904554462197e-07,
      "loss": 2.257,
      "step": 70398
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1387332677841187,
      "learning_rate": 3.454017585379699e-07,
      "loss": 2.3302,
      "step": 70399
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.042014718055725,
      "learning_rate": 3.452944878984843e-07,
      "loss": 2.3807,
      "step": 70400
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0859637260437012,
      "learning_rate": 3.451872336263462e-07,
      "loss": 2.2649,
      "step": 70401
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1547861099243164,
      "learning_rate": 3.4507999572173766e-07,
      "loss": 2.1973,
      "step": 70402
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0624037981033325,
      "learning_rate": 3.4497277418484186e-07,
      "loss": 2.3026,
      "step": 70403
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.143140196800232,
      "learning_rate": 3.448655690158387e-07,
      "loss": 2.077,
      "step": 70404
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1651310920715332,
      "learning_rate": 3.4475838021491124e-07,
      "loss": 2.3946,
      "step": 70405
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0449626445770264,
      "learning_rate": 3.446512077822406e-07,
      "loss": 2.1226,
      "step": 70406
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.3124959468841553,
      "learning_rate": 3.4454405171800767e-07,
      "loss": 2.4108,
      "step": 70407
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0762004852294922,
      "learning_rate": 3.444369120223956e-07,
      "loss": 2.235,
      "step": 70408
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0840257406234741,
      "learning_rate": 3.443297886955843e-07,
      "loss": 2.214,
      "step": 70409
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.2021461725234985,
      "learning_rate": 3.442226817377581e-07,
      "loss": 2.1871,
      "step": 70410
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.196987271308899,
      "learning_rate": 3.4411559114909567e-07,
      "loss": 2.1921,
      "step": 70411
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.2380939722061157,
      "learning_rate": 3.440085169297802e-07,
      "loss": 2.4808,
      "step": 70412
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.075465440750122,
      "learning_rate": 3.4390145907999273e-07,
      "loss": 2.1878,
      "step": 70413
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.9751268625259399,
      "learning_rate": 3.437944175999153e-07,
      "loss": 2.1272,
      "step": 70414
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1777551174163818,
      "learning_rate": 3.436873924897277e-07,
      "loss": 2.5086,
      "step": 70415
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.9905545115470886,
      "learning_rate": 3.4358038374961545e-07,
      "loss": 2.3052,
      "step": 70416
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0836536884307861,
      "learning_rate": 3.43473391379755e-07,
      "loss": 2.5037,
      "step": 70417
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0702656507492065,
      "learning_rate": 3.4336641538033066e-07,
      "loss": 2.2513,
      "step": 70418
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0992697477340698,
      "learning_rate": 3.432594557515212e-07,
      "loss": 2.4443,
      "step": 70419
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1042249202728271,
      "learning_rate": 3.43152512493512e-07,
      "loss": 2.2443,
      "step": 70420
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1283135414123535,
      "learning_rate": 3.430455856064796e-07,
      "loss": 2.2997,
      "step": 70421
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0019832849502563,
      "learning_rate": 3.4293867509060943e-07,
      "loss": 2.3809,
      "step": 70422
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0041873455047607,
      "learning_rate": 3.4283178094608017e-07,
      "loss": 2.3395,
      "step": 70423
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1144770383834839,
      "learning_rate": 3.427249031730751e-07,
      "loss": 2.2779,
      "step": 70424
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.2419240474700928,
      "learning_rate": 3.426180417717728e-07,
      "loss": 2.2936,
      "step": 70425
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0508263111114502,
      "learning_rate": 3.425111967423567e-07,
      "loss": 2.3311,
      "step": 70426
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0530869960784912,
      "learning_rate": 3.4240436808500755e-07,
      "loss": 2.4623,
      "step": 70427
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1338857412338257,
      "learning_rate": 3.422975557999042e-07,
      "loss": 2.3628,
      "step": 70428
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0849815607070923,
      "learning_rate": 3.4219075988723093e-07,
      "loss": 2.2621,
      "step": 70429
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1421650648117065,
      "learning_rate": 3.420839803471676e-07,
      "loss": 2.2725,
      "step": 70430
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0004054307937622,
      "learning_rate": 3.419772171798941e-07,
      "loss": 2.162,
      "step": 70431
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1535835266113281,
      "learning_rate": 3.418704703855924e-07,
      "loss": 2.189,
      "step": 70432
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1529124975204468,
      "learning_rate": 3.4176373996444465e-07,
      "loss": 2.3998,
      "step": 70433
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0089244842529297,
      "learning_rate": 3.416570259166285e-07,
      "loss": 2.3459,
      "step": 70434
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0801926851272583,
      "learning_rate": 3.4155032824232825e-07,
      "loss": 2.2767,
      "step": 70435
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.047805905342102,
      "learning_rate": 3.414436469417215e-07,
      "loss": 2.4073,
      "step": 70436
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1202906370162964,
      "learning_rate": 3.4133698201499367e-07,
      "loss": 2.4911,
      "step": 70437
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1399093866348267,
      "learning_rate": 3.4123033346232015e-07,
      "loss": 2.3975,
      "step": 70438
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.03255033493042,
      "learning_rate": 3.411237012838864e-07,
      "loss": 2.383,
      "step": 70439
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0225603580474854,
      "learning_rate": 3.4101708547987e-07,
      "loss": 2.4426,
      "step": 70440
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.117164969444275,
      "learning_rate": 3.409104860504553e-07,
      "loss": 2.5339,
      "step": 70441
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0608057975769043,
      "learning_rate": 3.408039029958188e-07,
      "loss": 2.2293,
      "step": 70442
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5927830934524536,
      "learning_rate": 3.406973363161448e-07,
      "loss": 2.0941,
      "step": 70443
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0256558656692505,
      "learning_rate": 3.4059078601160975e-07,
      "loss": 2.1342,
      "step": 70444
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0596373081207275,
      "learning_rate": 3.404842520823981e-07,
      "loss": 2.1019,
      "step": 70445
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.9943205118179321,
      "learning_rate": 3.403777345286885e-07,
      "loss": 2.2221,
      "step": 70446
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.045343279838562,
      "learning_rate": 3.4027123335066305e-07,
      "loss": 2.2766,
      "step": 70447
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1876100301742554,
      "learning_rate": 3.4016474854850043e-07,
      "loss": 2.2231,
      "step": 70448
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0891852378845215,
      "learning_rate": 3.40058280122384e-07,
      "loss": 2.3065,
      "step": 70449
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1941157579421997,
      "learning_rate": 3.3995182807249007e-07,
      "loss": 2.5723,
      "step": 70450
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.211820363998413,
      "learning_rate": 3.3984539239900305e-07,
      "loss": 2.1496,
      "step": 70451
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.006411075592041,
      "learning_rate": 3.397389731021006e-07,
      "loss": 2.1451,
      "step": 70452
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.995257556438446,
      "learning_rate": 3.3963257018196474e-07,
      "loss": 2.2858,
      "step": 70453
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.9237493276596069,
      "learning_rate": 3.3952618363877533e-07,
      "loss": 2.1009,
      "step": 70454
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1154143810272217,
      "learning_rate": 3.394198134727145e-07,
      "loss": 2.1235,
      "step": 70455
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1133748292922974,
      "learning_rate": 3.3931345968395866e-07,
      "loss": 2.394,
      "step": 70456
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1275748014450073,
      "learning_rate": 3.3920712227269116e-07,
      "loss": 2.1327,
      "step": 70457
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.086713194847107,
      "learning_rate": 3.391008012390906e-07,
      "loss": 2.3769,
      "step": 70458
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.587044358253479,
      "learning_rate": 3.3899449658333915e-07,
      "loss": 2.4985,
      "step": 70459
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1555019617080688,
      "learning_rate": 3.388882083056155e-07,
      "loss": 2.4291,
      "step": 70460
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0882478952407837,
      "learning_rate": 3.387819364061007e-07,
      "loss": 2.4912,
      "step": 70461
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1998605728149414,
      "learning_rate": 3.386756808849745e-07,
      "loss": 2.2242,
      "step": 70462
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.028030514717102,
      "learning_rate": 3.385694417424157e-07,
      "loss": 2.4333,
      "step": 70463
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.2178524732589722,
      "learning_rate": 3.3846321897860634e-07,
      "loss": 2.3918,
      "step": 70464
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1683597564697266,
      "learning_rate": 3.3835701259372524e-07,
      "loss": 2.46,
      "step": 70465
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0328960418701172,
      "learning_rate": 3.382508225879544e-07,
      "loss": 2.0984,
      "step": 70466
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.9352914094924927,
      "learning_rate": 3.381446489614715e-07,
      "loss": 2.237,
      "step": 70467
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.211544394493103,
      "learning_rate": 3.380384917144597e-07,
      "loss": 2.2713,
      "step": 70468
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.048014760017395,
      "learning_rate": 3.379323508470944e-07,
      "loss": 2.4651,
      "step": 70469
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.9801867604255676,
      "learning_rate": 3.3782622635955884e-07,
      "loss": 2.2103,
      "step": 70470
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5112496614456177,
      "learning_rate": 3.377201182520318e-07,
      "loss": 2.1171,
      "step": 70471
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.015749454498291,
      "learning_rate": 3.376140265246941e-07,
      "loss": 2.2356,
      "step": 70472
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0546808242797852,
      "learning_rate": 3.375079511777235e-07,
      "loss": 2.3137,
      "step": 70473
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0090354681015015,
      "learning_rate": 3.3740189221130203e-07,
      "loss": 2.1867,
      "step": 70474
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0666712522506714,
      "learning_rate": 3.3729584962560844e-07,
      "loss": 2.504,
      "step": 70475
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.108345866203308,
      "learning_rate": 3.371898234208237e-07,
      "loss": 2.3402,
      "step": 70476
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.9968666434288025,
      "learning_rate": 3.370838135971244e-07,
      "loss": 2.2363,
      "step": 70477
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.133394479751587,
      "learning_rate": 3.369778201546947e-07,
      "loss": 2.3319,
      "step": 70478
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0410218238830566,
      "learning_rate": 3.368718430937101e-07,
      "loss": 2.1152,
      "step": 70479
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.10663902759552,
      "learning_rate": 3.367658824143538e-07,
      "loss": 2.3091,
      "step": 70480
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1369099617004395,
      "learning_rate": 3.366599381168034e-07,
      "loss": 2.3047,
      "step": 70481
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0662815570831299,
      "learning_rate": 3.365540102012399e-07,
      "loss": 2.3107,
      "step": 70482
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.038195252418518,
      "learning_rate": 3.364480986678409e-07,
      "loss": 2.3917,
      "step": 70483
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1991328001022339,
      "learning_rate": 3.3634220351678735e-07,
      "loss": 2.2397,
      "step": 70484
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.000969409942627,
      "learning_rate": 3.3623632474825695e-07,
      "loss": 2.2388,
      "step": 70485
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1532433032989502,
      "learning_rate": 3.3613046236243175e-07,
      "loss": 2.4297,
      "step": 70486
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1589244604110718,
      "learning_rate": 3.3602461635948934e-07,
      "loss": 2.3538,
      "step": 70487
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1786284446716309,
      "learning_rate": 3.3591878673961074e-07,
      "loss": 2.1167,
      "step": 70488
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.124550700187683,
      "learning_rate": 3.358129735029758e-07,
      "loss": 2.5199,
      "step": 70489
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0500977039337158,
      "learning_rate": 3.3570717664975993e-07,
      "loss": 2.2359,
      "step": 70490
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5358344316482544,
      "learning_rate": 3.3560139618014744e-07,
      "loss": 2.2791,
      "step": 70491
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1499817371368408,
      "learning_rate": 3.3549563209431367e-07,
      "loss": 2.1813,
      "step": 70492
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0332844257354736,
      "learning_rate": 3.353898843924408e-07,
      "loss": 2.2401,
      "step": 70493
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0639914274215698,
      "learning_rate": 3.352841530747053e-07,
      "loss": 2.3862,
      "step": 70494
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0264829397201538,
      "learning_rate": 3.3517843814129146e-07,
      "loss": 2.4257,
      "step": 70495
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1061499118804932,
      "learning_rate": 3.350727395923714e-07,
      "loss": 2.4865,
      "step": 70496
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0667084455490112,
      "learning_rate": 3.3496705742813053e-07,
      "loss": 2.5019,
      "step": 70497
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.2149934768676758,
      "learning_rate": 3.348613916487431e-07,
      "loss": 2.489,
      "step": 70498
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0705848932266235,
      "learning_rate": 3.3475574225439346e-07,
      "loss": 2.3491,
      "step": 70499
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.9648014903068542,
      "learning_rate": 3.3465010924525476e-07,
      "loss": 2.1933,
      "step": 70500
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0404284000396729,
      "learning_rate": 3.3454449262151134e-07,
      "loss": 2.359,
      "step": 70501
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.121741533279419,
      "learning_rate": 3.344388923833397e-07,
      "loss": 2.122,
      "step": 70502
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1515098810195923,
      "learning_rate": 3.343333085309197e-07,
      "loss": 2.2585,
      "step": 70503
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1732218265533447,
      "learning_rate": 3.3422774106442793e-07,
      "loss": 2.3345,
      "step": 70504
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.2395563125610352,
      "learning_rate": 3.3412218998404744e-07,
      "loss": 2.3043,
      "step": 70505
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.171284794807434,
      "learning_rate": 3.3401665528995374e-07,
      "loss": 2.0129,
      "step": 70506
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.2270379066467285,
      "learning_rate": 3.339111369823278e-07,
      "loss": 2.4602,
      "step": 70507
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0804221630096436,
      "learning_rate": 3.338056350613483e-07,
      "loss": 2.3296,
      "step": 70508
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1049383878707886,
      "learning_rate": 3.337001495271941e-07,
      "loss": 2.2538,
      "step": 70509
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0973674058914185,
      "learning_rate": 3.335946803800416e-07,
      "loss": 2.245,
      "step": 70510
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1449568271636963,
      "learning_rate": 3.3348922762007185e-07,
      "loss": 2.1702,
      "step": 70511
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1522077322006226,
      "learning_rate": 3.3338379124746356e-07,
      "loss": 2.4849,
      "step": 70512
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.053101658821106,
      "learning_rate": 3.332783712623955e-07,
      "loss": 2.4445,
      "step": 70513
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1168869733810425,
      "learning_rate": 3.3317296766504525e-07,
      "loss": 2.3442,
      "step": 70514
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1923017501831055,
      "learning_rate": 3.330675804555927e-07,
      "loss": 2.3644,
      "step": 70515
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.049128532409668,
      "learning_rate": 3.3296220963421555e-07,
      "loss": 2.3868,
      "step": 70516
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.061989426612854,
      "learning_rate": 3.328568552010947e-07,
      "loss": 2.3695,
      "step": 70517
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1050182580947876,
      "learning_rate": 3.3275151715640554e-07,
      "loss": 2.3312,
      "step": 70518
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1499837636947632,
      "learning_rate": 3.3264619550032793e-07,
      "loss": 2.2583,
      "step": 70519
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1602627038955688,
      "learning_rate": 3.3254089023304183e-07,
      "loss": 2.4097,
      "step": 70520
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0524015426635742,
      "learning_rate": 3.3243560135472475e-07,
      "loss": 2.3663,
      "step": 70521
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.275017499923706,
      "learning_rate": 3.3233032886555325e-07,
      "loss": 2.3864,
      "step": 70522
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0579737424850464,
      "learning_rate": 3.322250727657095e-07,
      "loss": 2.1834,
      "step": 70523
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0237982273101807,
      "learning_rate": 3.3211983305536876e-07,
      "loss": 2.355,
      "step": 70524
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0849312543869019,
      "learning_rate": 3.32014609734711e-07,
      "loss": 2.5568,
      "step": 70525
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1920385360717773,
      "learning_rate": 3.319094028039138e-07,
      "loss": 2.2052,
      "step": 70526
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.25261390209198,
      "learning_rate": 3.3180421226315596e-07,
      "loss": 2.3641,
      "step": 70527
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.2060599327087402,
      "learning_rate": 3.3169903811261726e-07,
      "loss": 2.0748,
      "step": 70528
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1043390035629272,
      "learning_rate": 3.315938803524721e-07,
      "loss": 2.3104,
      "step": 70529
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.9907074570655823,
      "learning_rate": 3.314887389829036e-07,
      "loss": 2.2416,
      "step": 70530
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.2150630950927734,
      "learning_rate": 3.31383614004086e-07,
      "loss": 2.3632,
      "step": 70531
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0581870079040527,
      "learning_rate": 3.3127850541620045e-07,
      "loss": 2.2414,
      "step": 70532
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1262646913528442,
      "learning_rate": 3.311734132194222e-07,
      "loss": 2.4475,
      "step": 70533
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.153083324432373,
      "learning_rate": 3.310683374139334e-07,
      "loss": 2.4303,
      "step": 70534
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1630699634552002,
      "learning_rate": 3.3096327799990833e-07,
      "loss": 2.1315,
      "step": 70535
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0784064531326294,
      "learning_rate": 3.308582349775269e-07,
      "loss": 2.2929,
      "step": 70536
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.119458556175232,
      "learning_rate": 3.3075320834696556e-07,
      "loss": 2.2177,
      "step": 70537
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1198792457580566,
      "learning_rate": 3.3064819810840533e-07,
      "loss": 2.2983,
      "step": 70538
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1184792518615723,
      "learning_rate": 3.3054320426202157e-07,
      "loss": 2.3221,
      "step": 70539
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0934691429138184,
      "learning_rate": 3.304382268079942e-07,
      "loss": 2.3784,
      "step": 70540
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.9692240357398987,
      "learning_rate": 3.303332657464986e-07,
      "loss": 2.5032,
      "step": 70541
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.2095730304718018,
      "learning_rate": 3.3022832107771687e-07,
      "loss": 2.4453,
      "step": 70542
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.101569414138794,
      "learning_rate": 3.3012339280182216e-07,
      "loss": 2.5846,
      "step": 70543
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5579009056091309,
      "learning_rate": 3.3001848091899547e-07,
      "loss": 2.3988,
      "step": 70544
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0924054384231567,
      "learning_rate": 3.299135854294144e-07,
      "loss": 2.396,
      "step": 70545
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1166307926177979,
      "learning_rate": 3.2980870633325445e-07,
      "loss": 2.2468,
      "step": 70546
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.036561369895935,
      "learning_rate": 3.297038436306976e-07,
      "loss": 2.4141,
      "step": 70547
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1179817914962769,
      "learning_rate": 3.2959899732191826e-07,
      "loss": 2.302,
      "step": 70548
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0773682594299316,
      "learning_rate": 3.2949416740709393e-07,
      "loss": 2.2369,
      "step": 70549
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1294853687286377,
      "learning_rate": 3.2938935388640457e-07,
      "loss": 2.2798,
      "step": 70550
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0423096418380737,
      "learning_rate": 3.292845567600267e-07,
      "loss": 2.3712,
      "step": 70551
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0939602851867676,
      "learning_rate": 3.2917977602813676e-07,
      "loss": 2.1295,
      "step": 70552
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1261341571807861,
      "learning_rate": 3.290750116909147e-07,
      "loss": 2.3606,
      "step": 70553
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0962306261062622,
      "learning_rate": 3.289702637485359e-07,
      "loss": 2.3231,
      "step": 70554
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0171834230422974,
      "learning_rate": 3.2886553220118133e-07,
      "loss": 2.3462,
      "step": 70555
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.095762848854065,
      "learning_rate": 3.287608170490242e-07,
      "loss": 2.2484,
      "step": 70556
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.9723199605941772,
      "learning_rate": 3.286561182922454e-07,
      "loss": 2.0718,
      "step": 70557
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1043061017990112,
      "learning_rate": 3.285514359310205e-07,
      "loss": 2.3135,
      "step": 70558
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.060588002204895,
      "learning_rate": 3.28446769965528e-07,
      "loss": 2.4069,
      "step": 70559
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1226036548614502,
      "learning_rate": 3.283421203959447e-07,
      "loss": 2.0094,
      "step": 70560
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0282593965530396,
      "learning_rate": 3.282374872224503e-07,
      "loss": 2.5527,
      "step": 70561
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0782185792922974,
      "learning_rate": 3.2813287044521804e-07,
      "loss": 2.2625,
      "step": 70562
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0650806427001953,
      "learning_rate": 3.2802827006442774e-07,
      "loss": 2.5552,
      "step": 70563
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.9720776677131653,
      "learning_rate": 3.279236860802559e-07,
      "loss": 2.2313,
      "step": 70564
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.122572422027588,
      "learning_rate": 3.278191184928814e-07,
      "loss": 2.5089,
      "step": 70565
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.9653508067131042,
      "learning_rate": 3.277145673024795e-07,
      "loss": 2.2594,
      "step": 70566
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0838539600372314,
      "learning_rate": 3.276100325092291e-07,
      "loss": 2.2547,
      "step": 70567
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0274231433868408,
      "learning_rate": 3.2750551411330546e-07,
      "loss": 2.3055,
      "step": 70568
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1463420391082764,
      "learning_rate": 3.2740101211488853e-07,
      "loss": 2.2125,
      "step": 70569
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.155480980873108,
      "learning_rate": 3.2729652651415257e-07,
      "loss": 2.3835,
      "step": 70570
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1421912908554077,
      "learning_rate": 3.2719205731127747e-07,
      "loss": 2.3469,
      "step": 70571
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0302265882492065,
      "learning_rate": 3.270876045064375e-07,
      "loss": 2.3813,
      "step": 70572
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0096662044525146,
      "learning_rate": 3.2698316809981255e-07,
      "loss": 2.2089,
      "step": 70573
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1847107410430908,
      "learning_rate": 3.2687874809157806e-07,
      "loss": 2.2824,
      "step": 70574
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.930223286151886,
      "learning_rate": 3.267743444819116e-07,
      "loss": 2.2183,
      "step": 70575
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.3760632276535034,
      "learning_rate": 3.266699572709886e-07,
      "loss": 2.1534,
      "step": 70576
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1936153173446655,
      "learning_rate": 3.2656558645898785e-07,
      "loss": 2.2582,
      "step": 70577
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.9663572311401367,
      "learning_rate": 3.2646123204608473e-07,
      "loss": 2.2142,
      "step": 70578
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.111843466758728,
      "learning_rate": 3.2635689403245907e-07,
      "loss": 2.3415,
      "step": 70579
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.243943691253662,
      "learning_rate": 3.262525724182852e-07,
      "loss": 2.3073,
      "step": 70580
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0752029418945312,
      "learning_rate": 3.261482672037397e-07,
      "loss": 2.4495,
      "step": 70581
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0621256828308105,
      "learning_rate": 3.2604397838900126e-07,
      "loss": 2.1198,
      "step": 70582
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.9896908402442932,
      "learning_rate": 3.259397059742442e-07,
      "loss": 2.0679,
      "step": 70583
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.9558553099632263,
      "learning_rate": 3.2583544995964833e-07,
      "loss": 2.3791,
      "step": 70584
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1672712564468384,
      "learning_rate": 3.257312103453869e-07,
      "loss": 2.2699,
      "step": 70585
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1296876668930054,
      "learning_rate": 3.2562698713164085e-07,
      "loss": 2.209,
      "step": 70586
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.036007046699524,
      "learning_rate": 3.2552278031858343e-07,
      "loss": 2.2608,
      "step": 70587
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0759507417678833,
      "learning_rate": 3.254185899063933e-07,
      "loss": 2.1787,
      "step": 70588
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0734429359436035,
      "learning_rate": 3.253144158952448e-07,
      "loss": 2.3281,
      "step": 70589
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1676167249679565,
      "learning_rate": 3.2521025828531783e-07,
      "loss": 2.3188,
      "step": 70590
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.17378568649292,
      "learning_rate": 3.251061170767855e-07,
      "loss": 2.2378,
      "step": 70591
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.149282455444336,
      "learning_rate": 3.2500199226982667e-07,
      "loss": 2.231,
      "step": 70592
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.046093463897705,
      "learning_rate": 3.248978838646166e-07,
      "loss": 2.1829,
      "step": 70593
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.9956557154655457,
      "learning_rate": 3.2479379186133306e-07,
      "loss": 2.2567,
      "step": 70594
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1351704597473145,
      "learning_rate": 3.2468971626015035e-07,
      "loss": 2.3835,
      "step": 70595
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.988171398639679,
      "learning_rate": 3.245856570612482e-07,
      "loss": 2.3953,
      "step": 70596
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.9644749164581299,
      "learning_rate": 3.2448161426479995e-07,
      "loss": 2.3163,
      "step": 70597
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1227107048034668,
      "learning_rate": 3.2437758787098426e-07,
      "loss": 2.4452,
      "step": 70598
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1403858661651611,
      "learning_rate": 3.2427357787997436e-07,
      "loss": 2.4963,
      "step": 70599
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0616304874420166,
      "learning_rate": 3.2416958429195235e-07,
      "loss": 2.3337,
      "step": 70600
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1187114715576172,
      "learning_rate": 3.24065607107088e-07,
      "loss": 2.2939,
      "step": 70601
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0882484912872314,
      "learning_rate": 3.239616463255613e-07,
      "loss": 2.4315,
      "step": 70602
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1670137643814087,
      "learning_rate": 3.238577019475464e-07,
      "loss": 2.3154,
      "step": 70603
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0846763849258423,
      "learning_rate": 3.2375377397322217e-07,
      "loss": 2.5201,
      "step": 70604
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.09592604637146,
      "learning_rate": 3.236498624027617e-07,
      "loss": 2.5109,
      "step": 70605
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.2189158201217651,
      "learning_rate": 3.235459672363439e-07,
      "loss": 2.3769,
      "step": 70606
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0707212686538696,
      "learning_rate": 3.2344208847414405e-07,
      "loss": 2.4552,
      "step": 70607
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.2542206048965454,
      "learning_rate": 3.233382261163376e-07,
      "loss": 2.2911,
      "step": 70608
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0815240144729614,
      "learning_rate": 3.2323438016310103e-07,
      "loss": 2.456,
      "step": 70609
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0887978076934814,
      "learning_rate": 3.2313055061460985e-07,
      "loss": 2.3812,
      "step": 70610
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0022741556167603,
      "learning_rate": 3.230267374710416e-07,
      "loss": 2.3875,
      "step": 70611
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1224193572998047,
      "learning_rate": 3.229229407325696e-07,
      "loss": 2.4996,
      "step": 70612
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0887432098388672,
      "learning_rate": 3.2281916039937466e-07,
      "loss": 2.4469,
      "step": 70613
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.089516282081604,
      "learning_rate": 3.227153964716268e-07,
      "loss": 2.2941,
      "step": 70614
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.3321019411087036,
      "learning_rate": 3.226116489495057e-07,
      "loss": 2.2729,
      "step": 70615
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1870169639587402,
      "learning_rate": 3.225079178331847e-07,
      "loss": 2.5544,
      "step": 70616
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0382972955703735,
      "learning_rate": 3.224042031228436e-07,
      "loss": 2.3864,
      "step": 70617
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1517577171325684,
      "learning_rate": 3.2230050481865337e-07,
      "loss": 2.4476,
      "step": 70618
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.183337688446045,
      "learning_rate": 3.221968229207939e-07,
      "loss": 2.2042,
      "step": 70619
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0390774011611938,
      "learning_rate": 3.2209315742943725e-07,
      "loss": 2.226,
      "step": 70620
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0526217222213745,
      "learning_rate": 3.2198950834476327e-07,
      "loss": 2.3288,
      "step": 70621
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0280660390853882,
      "learning_rate": 3.2188587566694405e-07,
      "loss": 2.444,
      "step": 70622
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1955070495605469,
      "learning_rate": 3.2178225939615837e-07,
      "loss": 2.5275,
      "step": 70623
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1868541240692139,
      "learning_rate": 3.216786595325794e-07,
      "loss": 2.3382,
      "step": 70624
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0651328563690186,
      "learning_rate": 3.215750760763836e-07,
      "loss": 2.4353,
      "step": 70625
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.9635800719261169,
      "learning_rate": 3.2147150902774647e-07,
      "loss": 2.333,
      "step": 70626
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1237679719924927,
      "learning_rate": 3.2136795838684675e-07,
      "loss": 2.2742,
      "step": 70627
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.163977861404419,
      "learning_rate": 3.2126442415385317e-07,
      "loss": 2.4123,
      "step": 70628
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0204421281814575,
      "learning_rate": 3.211609063289467e-07,
      "loss": 2.3987,
      "step": 70629
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0746623277664185,
      "learning_rate": 3.2105740491230054e-07,
      "loss": 2.3684,
      "step": 70630
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0092458724975586,
      "learning_rate": 3.209539199040923e-07,
      "loss": 2.119,
      "step": 70631
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0324786901474,
      "learning_rate": 3.208504513044941e-07,
      "loss": 2.1744,
      "step": 70632
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.20197594165802,
      "learning_rate": 3.2074699911368466e-07,
      "loss": 2.4751,
      "step": 70633
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0812824964523315,
      "learning_rate": 3.206435633318372e-07,
      "loss": 2.2094,
      "step": 70634
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0115034580230713,
      "learning_rate": 3.2054014395912825e-07,
      "loss": 2.3151,
      "step": 70635
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0560474395751953,
      "learning_rate": 3.204367409957321e-07,
      "loss": 2.3284,
      "step": 70636
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.046861171722412,
      "learning_rate": 3.2033335444182414e-07,
      "loss": 2.5895,
      "step": 70637
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0942808389663696,
      "learning_rate": 3.20229984297582e-07,
      "loss": 2.4014,
      "step": 70638
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1326779127120972,
      "learning_rate": 3.201266305631767e-07,
      "loss": 2.1343,
      "step": 70639
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1940869092941284,
      "learning_rate": 3.20023293238787e-07,
      "loss": 2.4812,
      "step": 70640
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.2280763387680054,
      "learning_rate": 3.199199723245883e-07,
      "loss": 2.2876,
      "step": 70641
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.108857274055481,
      "learning_rate": 3.198166678207526e-07,
      "loss": 2.5672,
      "step": 70642
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1310805082321167,
      "learning_rate": 3.1971337972745654e-07,
      "loss": 2.2629,
      "step": 70643
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.030215859413147,
      "learning_rate": 3.1961010804487656e-07,
      "loss": 2.2326,
      "step": 70644
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.068701148033142,
      "learning_rate": 3.1950685277318485e-07,
      "loss": 2.2126,
      "step": 70645
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.028135061264038,
      "learning_rate": 3.1940361391256004e-07,
      "loss": 2.3734,
      "step": 70646
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.118632197380066,
      "learning_rate": 3.193003914631732e-07,
      "loss": 2.3522,
      "step": 70647
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.3403249979019165,
      "learning_rate": 3.1919718542520403e-07,
      "loss": 2.4677,
      "step": 70648
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0002074241638184,
      "learning_rate": 3.190939957988226e-07,
      "loss": 2.1274,
      "step": 70649
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0848926305770874,
      "learning_rate": 3.1899082258420756e-07,
      "loss": 2.2938,
      "step": 70650
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.07391357421875,
      "learning_rate": 3.1888766578153096e-07,
      "loss": 2.311,
      "step": 70651
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0472291707992554,
      "learning_rate": 3.187845253909716e-07,
      "loss": 1.9749,
      "step": 70652
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1104135513305664,
      "learning_rate": 3.1868140141270044e-07,
      "loss": 2.3977,
      "step": 70653
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0228843688964844,
      "learning_rate": 3.1857829384689397e-07,
      "loss": 2.17,
      "step": 70654
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.043436050415039,
      "learning_rate": 3.184752026937254e-07,
      "loss": 2.3615,
      "step": 70655
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.066972255706787,
      "learning_rate": 3.183721279533725e-07,
      "loss": 2.3197,
      "step": 70656
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0565414428710938,
      "learning_rate": 3.182690696260071e-07,
      "loss": 2.3268,
      "step": 70657
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1235072612762451,
      "learning_rate": 3.1816602771180703e-07,
      "loss": 2.3154,
      "step": 70658
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.048989176750183,
      "learning_rate": 3.180630022109432e-07,
      "loss": 2.4328,
      "step": 70659
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0571860074996948,
      "learning_rate": 3.179599931235933e-07,
      "loss": 2.5338,
      "step": 70660
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.047900676727295,
      "learning_rate": 3.178570004499293e-07,
      "loss": 2.2876,
      "step": 70661
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0077499151229858,
      "learning_rate": 3.177540241901289e-07,
      "loss": 2.2151,
      "step": 70662
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0117566585540771,
      "learning_rate": 3.176510643443653e-07,
      "loss": 2.4839,
      "step": 70663
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1178362369537354,
      "learning_rate": 3.175481209128106e-07,
      "loss": 2.2669,
      "step": 70664
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.3993420600891113,
      "learning_rate": 3.1744519389564354e-07,
      "loss": 2.3911,
      "step": 70665
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0484024286270142,
      "learning_rate": 3.1734228329303727e-07,
      "loss": 2.306,
      "step": 70666
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0351004600524902,
      "learning_rate": 3.1723938910516397e-07,
      "loss": 2.3027,
      "step": 70667
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0395539999008179,
      "learning_rate": 3.171365113322e-07,
      "loss": 2.2988,
      "step": 70668
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.318942904472351,
      "learning_rate": 3.1703364997431985e-07,
      "loss": 2.0529,
      "step": 70669
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1145875453948975,
      "learning_rate": 3.169308050316966e-07,
      "loss": 2.4105,
      "step": 70670
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.02910578250885,
      "learning_rate": 3.1682797650450683e-07,
      "loss": 2.2667,
      "step": 70671
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1074508428573608,
      "learning_rate": 3.167251643929226e-07,
      "loss": 2.3199,
      "step": 70672
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.005603313446045,
      "learning_rate": 3.166223686971204e-07,
      "loss": 2.3514,
      "step": 70673
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.112987995147705,
      "learning_rate": 3.165195894172712e-07,
      "loss": 2.2395,
      "step": 70674
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.200642466545105,
      "learning_rate": 3.1641682655355276e-07,
      "loss": 2.2902,
      "step": 70675
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0721105337142944,
      "learning_rate": 3.163140801061371e-07,
      "loss": 2.2476,
      "step": 70676
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1494507789611816,
      "learning_rate": 3.1621135007520067e-07,
      "loss": 2.3205,
      "step": 70677
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0854772329330444,
      "learning_rate": 3.1610863646091337e-07,
      "loss": 2.4046,
      "step": 70678
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.072464942932129,
      "learning_rate": 3.160059392634551e-07,
      "loss": 2.4393,
      "step": 70679
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.190285325050354,
      "learning_rate": 3.1590325848299574e-07,
      "loss": 2.1432,
      "step": 70680
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.086632251739502,
      "learning_rate": 3.1580059411971063e-07,
      "loss": 2.4717,
      "step": 70681
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1800676584243774,
      "learning_rate": 3.156979461737719e-07,
      "loss": 2.3734,
      "step": 70682
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.3730392456054688,
      "learning_rate": 3.155953146453572e-07,
      "loss": 2.5421,
      "step": 70683
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.145125389099121,
      "learning_rate": 3.154926995346386e-07,
      "loss": 2.442,
      "step": 70684
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0771304368972778,
      "learning_rate": 3.1539010084178924e-07,
      "loss": 2.497,
      "step": 70685
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1573915481567383,
      "learning_rate": 3.152875185669846e-07,
      "loss": 2.4633,
      "step": 70686
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.9794030785560608,
      "learning_rate": 3.1518495271039786e-07,
      "loss": 2.3821,
      "step": 70687
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0355056524276733,
      "learning_rate": 3.150824032722022e-07,
      "loss": 2.3931,
      "step": 70688
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0628736019134521,
      "learning_rate": 3.1497987025257414e-07,
      "loss": 2.5728,
      "step": 70689
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.043662190437317,
      "learning_rate": 3.148773536516836e-07,
      "loss": 2.2524,
      "step": 70690
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1248509883880615,
      "learning_rate": 3.147748534697082e-07,
      "loss": 2.3578,
      "step": 70691
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.086604118347168,
      "learning_rate": 3.146723697068188e-07,
      "loss": 2.2091,
      "step": 70692
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.2381681203842163,
      "learning_rate": 3.1456990236319095e-07,
      "loss": 2.5294,
      "step": 70693
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.068740963935852,
      "learning_rate": 3.144674514389967e-07,
      "loss": 2.1472,
      "step": 70694
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.9788753390312195,
      "learning_rate": 3.1436501693441144e-07,
      "loss": 2.3375,
      "step": 70695
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1485655307769775,
      "learning_rate": 3.142625988496073e-07,
      "loss": 2.2762,
      "step": 70696
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.2337684631347656,
      "learning_rate": 3.1416019718475967e-07,
      "loss": 2.4171,
      "step": 70697
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.275615930557251,
      "learning_rate": 3.140578119400417e-07,
      "loss": 2.2889,
      "step": 70698
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0641212463378906,
      "learning_rate": 3.139554431156244e-07,
      "loss": 2.2087,
      "step": 70699
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1877788305282593,
      "learning_rate": 3.1385309071168434e-07,
      "loss": 2.292,
      "step": 70700
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1424074172973633,
      "learning_rate": 3.1375075472839354e-07,
      "loss": 2.1043,
      "step": 70701
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1002380847930908,
      "learning_rate": 3.1364843516592634e-07,
      "loss": 2.4518,
      "step": 70702
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0485059022903442,
      "learning_rate": 3.1354613202445484e-07,
      "loss": 2.5085,
      "step": 70703
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0539114475250244,
      "learning_rate": 3.134438453041555e-07,
      "loss": 2.2523,
      "step": 70704
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.2001019716262817,
      "learning_rate": 3.1334157500519826e-07,
      "loss": 2.356,
      "step": 70705
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0144689083099365,
      "learning_rate": 3.1323932112775956e-07,
      "loss": 2.2194,
      "step": 70706
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1527836322784424,
      "learning_rate": 3.1313708367200933e-07,
      "loss": 2.2355,
      "step": 70707
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0906633138656616,
      "learning_rate": 3.13034862638123e-07,
      "loss": 2.2584,
      "step": 70708
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1700555086135864,
      "learning_rate": 3.1293265802627257e-07,
      "loss": 2.3858,
      "step": 70709
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1533349752426147,
      "learning_rate": 3.128304698366347e-07,
      "loss": 2.2359,
      "step": 70710
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.9269402623176575,
      "learning_rate": 3.1272829806937797e-07,
      "loss": 2.5235,
      "step": 70711
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1309455633163452,
      "learning_rate": 3.126261427246791e-07,
      "loss": 2.4437,
      "step": 70712
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0603127479553223,
      "learning_rate": 3.125240038027089e-07,
      "loss": 2.2647,
      "step": 70713
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0853602886199951,
      "learning_rate": 3.124218813036428e-07,
      "loss": 2.4843,
      "step": 70714
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.9698036909103394,
      "learning_rate": 3.1231977522765076e-07,
      "loss": 2.2245,
      "step": 70715
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.115918755531311,
      "learning_rate": 3.1221768557490926e-07,
      "loss": 2.3225,
      "step": 70716
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1163595914840698,
      "learning_rate": 3.1211561234558927e-07,
      "loss": 2.2077,
      "step": 70717
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0833370685577393,
      "learning_rate": 3.120135555398662e-07,
      "loss": 2.1467,
      "step": 70718
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0338090658187866,
      "learning_rate": 3.1191151515791106e-07,
      "loss": 2.3695,
      "step": 70719
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1512959003448486,
      "learning_rate": 3.118094911998959e-07,
      "loss": 2.3958,
      "step": 70720
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.022241473197937,
      "learning_rate": 3.11707483665995e-07,
      "loss": 2.1874,
      "step": 70721
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.9930009841918945,
      "learning_rate": 3.116054925563827e-07,
      "loss": 2.3621,
      "step": 70722
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0537304878234863,
      "learning_rate": 3.115035178712289e-07,
      "loss": 2.2833,
      "step": 70723
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.114274024963379,
      "learning_rate": 3.1140155961070894e-07,
      "loss": 2.2315,
      "step": 70724
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.9480800628662109,
      "learning_rate": 3.1129961777499383e-07,
      "loss": 2.3748,
      "step": 70725
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0609040260314941,
      "learning_rate": 3.111976923642579e-07,
      "loss": 2.2903,
      "step": 70726
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.3657931089401245,
      "learning_rate": 3.110957833786732e-07,
      "loss": 2.259,
      "step": 70727
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.089511513710022,
      "learning_rate": 3.109938908184118e-07,
      "loss": 2.3605,
      "step": 70728
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0013054609298706,
      "learning_rate": 3.10892014683648e-07,
      "loss": 2.3089,
      "step": 70729
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0820749998092651,
      "learning_rate": 3.10790154974554e-07,
      "loss": 2.1667,
      "step": 70730
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1368889808654785,
      "learning_rate": 3.1068831169130176e-07,
      "loss": 2.3521,
      "step": 70731
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1835479736328125,
      "learning_rate": 3.105864848340656e-07,
      "loss": 2.2951,
      "step": 70732
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.08065664768219,
      "learning_rate": 3.1048467440301655e-07,
      "loss": 2.4354,
      "step": 70733
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.045375108718872,
      "learning_rate": 3.103828803983255e-07,
      "loss": 2.1647,
      "step": 70734
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0915647745132446,
      "learning_rate": 3.102811028201691e-07,
      "loss": 2.4142,
      "step": 70735
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1199021339416504,
      "learning_rate": 3.101793416687171e-07,
      "loss": 2.4433,
      "step": 70736
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1756213903427124,
      "learning_rate": 3.1007759694414384e-07,
      "loss": 2.5463,
      "step": 70737
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.083903431892395,
      "learning_rate": 3.099758686466192e-07,
      "loss": 2.2698,
      "step": 70738
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.072224736213684,
      "learning_rate": 3.098741567763197e-07,
      "loss": 2.3146,
      "step": 70739
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0417132377624512,
      "learning_rate": 3.09772461333413e-07,
      "loss": 2.4482,
      "step": 70740
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0166155099868774,
      "learning_rate": 3.0967078231807445e-07,
      "loss": 2.1554,
      "step": 70741
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0565812587738037,
      "learning_rate": 3.095691197304762e-07,
      "loss": 2.1753,
      "step": 70742
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0839602947235107,
      "learning_rate": 3.0946747357079033e-07,
      "loss": 2.2718,
      "step": 70743
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.2076560258865356,
      "learning_rate": 3.0936584383918776e-07,
      "loss": 2.155,
      "step": 70744
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0769540071487427,
      "learning_rate": 3.09264230535844e-07,
      "loss": 2.4602,
      "step": 70745
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1590595245361328,
      "learning_rate": 3.091626336609277e-07,
      "loss": 2.1882,
      "step": 70746
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.152842402458191,
      "learning_rate": 3.090610532146143e-07,
      "loss": 2.2839,
      "step": 70747
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0387295484542847,
      "learning_rate": 3.089594891970726e-07,
      "loss": 2.3607,
      "step": 70748
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.125480055809021,
      "learning_rate": 3.0885794160847805e-07,
      "loss": 2.4034,
      "step": 70749
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0881391763687134,
      "learning_rate": 3.087564104490004e-07,
      "loss": 2.4649,
      "step": 70750
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.9942969679832458,
      "learning_rate": 3.0865489571881293e-07,
      "loss": 2.4147,
      "step": 70751
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0805410146713257,
      "learning_rate": 3.0855339741808766e-07,
      "loss": 2.371,
      "step": 70752
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0815644264221191,
      "learning_rate": 3.084519155469967e-07,
      "loss": 2.3159,
      "step": 70753
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0066184997558594,
      "learning_rate": 3.0835045010571217e-07,
      "loss": 2.4205,
      "step": 70754
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.2090225219726562,
      "learning_rate": 3.0824900109440505e-07,
      "loss": 2.301,
      "step": 70755
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.169793963432312,
      "learning_rate": 3.081475685132484e-07,
      "loss": 2.0761,
      "step": 70756
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0004050731658936,
      "learning_rate": 3.080461523624134e-07,
      "loss": 2.2691,
      "step": 70757
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1376111507415771,
      "learning_rate": 3.0794475264207424e-07,
      "loss": 1.9087,
      "step": 70758
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.119320034980774,
      "learning_rate": 3.0784336935239967e-07,
      "loss": 2.1652,
      "step": 70759
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0366815328598022,
      "learning_rate": 3.07742002493564e-07,
      "loss": 2.1479,
      "step": 70760
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0863913297653198,
      "learning_rate": 3.076406520657371e-07,
      "loss": 2.4219,
      "step": 70761
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0359981060028076,
      "learning_rate": 3.075393180690922e-07,
      "loss": 2.3126,
      "step": 70762
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0421042442321777,
      "learning_rate": 3.074380005037991e-07,
      "loss": 2.1389,
      "step": 70763
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1198464632034302,
      "learning_rate": 3.073366993700333e-07,
      "loss": 2.4052,
      "step": 70764
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1795414686203003,
      "learning_rate": 3.072354146679624e-07,
      "loss": 2.3117,
      "step": 70765
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0104762315750122,
      "learning_rate": 3.071341463977606e-07,
      "loss": 2.4323,
      "step": 70766
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.092977523803711,
      "learning_rate": 3.0703289455959907e-07,
      "loss": 2.1833,
      "step": 70767
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1744545698165894,
      "learning_rate": 3.0693165915364976e-07,
      "loss": 2.307,
      "step": 70768
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.069200038909912,
      "learning_rate": 3.0683044018008255e-07,
      "loss": 2.2207,
      "step": 70769
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0624229907989502,
      "learning_rate": 3.0672923763907183e-07,
      "loss": 2.2339,
      "step": 70770
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1696091890335083,
      "learning_rate": 3.0662805153078733e-07,
      "loss": 2.474,
      "step": 70771
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0259332656860352,
      "learning_rate": 3.0652688185540237e-07,
      "loss": 2.3758,
      "step": 70772
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0196772813796997,
      "learning_rate": 3.064257286130845e-07,
      "loss": 2.3876,
      "step": 70773
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1082226037979126,
      "learning_rate": 3.063245918040092e-07,
      "loss": 2.3116,
      "step": 70774
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.211092472076416,
      "learning_rate": 3.062234714283452e-07,
      "loss": 2.2252,
      "step": 70775
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.2331767082214355,
      "learning_rate": 3.061223674862668e-07,
      "loss": 2.2724,
      "step": 70776
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0573972463607788,
      "learning_rate": 3.0602127997794275e-07,
      "loss": 2.3631,
      "step": 70777
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0593470335006714,
      "learning_rate": 3.0592020890354623e-07,
      "loss": 2.5704,
      "step": 70778
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0772483348846436,
      "learning_rate": 3.0581915426324717e-07,
      "loss": 2.2024,
      "step": 70779
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5571454763412476,
      "learning_rate": 3.0571811605721756e-07,
      "loss": 2.3251,
      "step": 70780
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.108012318611145,
      "learning_rate": 3.0561709428562957e-07,
      "loss": 2.2396,
      "step": 70781
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5151803493499756,
      "learning_rate": 3.055160889486519e-07,
      "loss": 2.3945,
      "step": 70782
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.474888563156128,
      "learning_rate": 3.054151000464589e-07,
      "loss": 2.3767,
      "step": 70783
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.145198941230774,
      "learning_rate": 3.053141275792193e-07,
      "loss": 2.3322,
      "step": 70784
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1472314596176147,
      "learning_rate": 3.052131715471074e-07,
      "loss": 2.1619,
      "step": 70785
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1532834768295288,
      "learning_rate": 3.051122319502908e-07,
      "loss": 2.1753,
      "step": 70786
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0967676639556885,
      "learning_rate": 3.0501130878894283e-07,
      "loss": 2.4127,
      "step": 70787
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.164657711982727,
      "learning_rate": 3.049104020632321e-07,
      "loss": 2.373,
      "step": 70788
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0738539695739746,
      "learning_rate": 3.0480951177333294e-07,
      "loss": 2.5053,
      "step": 70789
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.058907389640808,
      "learning_rate": 3.047086379194131e-07,
      "loss": 2.0626,
      "step": 70790
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1290489435195923,
      "learning_rate": 3.0460778050164676e-07,
      "loss": 2.2954,
      "step": 70791
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1217963695526123,
      "learning_rate": 3.045069395202027e-07,
      "loss": 2.5174,
      "step": 70792
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0454046726226807,
      "learning_rate": 3.044061149752531e-07,
      "loss": 2.2373,
      "step": 70793
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.9994209408760071,
      "learning_rate": 3.043053068669677e-07,
      "loss": 2.2589,
      "step": 70794
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0163426399230957,
      "learning_rate": 3.042045151955197e-07,
      "loss": 2.2135,
      "step": 70795
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.137596607208252,
      "learning_rate": 3.0410373996107687e-07,
      "loss": 2.4695,
      "step": 70796
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0974746942520142,
      "learning_rate": 3.0400298116381234e-07,
      "loss": 2.3676,
      "step": 70797
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0668094158172607,
      "learning_rate": 3.0390223880389703e-07,
      "loss": 2.3809,
      "step": 70798
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0996614694595337,
      "learning_rate": 3.038015128814997e-07,
      "loss": 2.0392,
      "step": 70799
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.059370994567871,
      "learning_rate": 3.037008033967914e-07,
      "loss": 2.3852,
      "step": 70800
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0566402673721313,
      "learning_rate": 3.036001103499453e-07,
      "loss": 2.5171,
      "step": 70801
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0365787744522095,
      "learning_rate": 3.034994337411301e-07,
      "loss": 2.3388,
      "step": 70802
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0942180156707764,
      "learning_rate": 3.0339877357051684e-07,
      "loss": 2.4379,
      "step": 70803
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0965149402618408,
      "learning_rate": 3.0329812983827533e-07,
      "loss": 2.1569,
      "step": 70804
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1155213117599487,
      "learning_rate": 3.031975025445777e-07,
      "loss": 2.4034,
      "step": 70805
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.6975226402282715,
      "learning_rate": 3.030968916895938e-07,
      "loss": 2.357,
      "step": 70806
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.133245587348938,
      "learning_rate": 3.0299629727349456e-07,
      "loss": 2.3958,
      "step": 70807
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.4436087608337402,
      "learning_rate": 3.028957192964499e-07,
      "loss": 2.3417,
      "step": 70808
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0527399778366089,
      "learning_rate": 3.0279515775863187e-07,
      "loss": 2.2019,
      "step": 70809
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1871033906936646,
      "learning_rate": 3.0269461266020926e-07,
      "loss": 2.3395,
      "step": 70810
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0453755855560303,
      "learning_rate": 3.0259408400135303e-07,
      "loss": 2.2357,
      "step": 70811
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.9290125370025635,
      "learning_rate": 3.0249357178223306e-07,
      "loss": 2.406,
      "step": 70812
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.2055892944335938,
      "learning_rate": 3.0239307600302135e-07,
      "loss": 1.9364,
      "step": 70813
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.147047758102417,
      "learning_rate": 3.0229259666388567e-07,
      "loss": 2.3895,
      "step": 70814
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0985842943191528,
      "learning_rate": 3.021921337649991e-07,
      "loss": 2.4143,
      "step": 70815
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.102651834487915,
      "learning_rate": 3.020916873065305e-07,
      "loss": 2.3443,
      "step": 70816
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0445371866226196,
      "learning_rate": 3.0199125728864964e-07,
      "loss": 2.2844,
      "step": 70817
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1368783712387085,
      "learning_rate": 3.0189084371152864e-07,
      "loss": 2.3666,
      "step": 70818
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0809612274169922,
      "learning_rate": 3.017904465753352e-07,
      "loss": 2.1677,
      "step": 70819
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.055897831916809,
      "learning_rate": 3.0169006588024243e-07,
      "loss": 2.3177,
      "step": 70820
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0512323379516602,
      "learning_rate": 3.01589701626418e-07,
      "loss": 2.3587,
      "step": 70821
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0977952480316162,
      "learning_rate": 3.01489353814034e-07,
      "loss": 2.4254,
      "step": 70822
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.2612333297729492,
      "learning_rate": 3.013890224432581e-07,
      "loss": 2.5441,
      "step": 70823
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.321778655052185,
      "learning_rate": 3.0128870751426455e-07,
      "loss": 2.1554,
      "step": 70824
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.9978255033493042,
      "learning_rate": 3.011884090272188e-07,
      "loss": 2.2915,
      "step": 70825
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0789732933044434,
      "learning_rate": 3.01088126982293e-07,
      "loss": 2.3124,
      "step": 70826
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1279436349868774,
      "learning_rate": 3.0098786137965686e-07,
      "loss": 2.4187,
      "step": 70827
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.182826280593872,
      "learning_rate": 3.008876122194815e-07,
      "loss": 2.2524,
      "step": 70828
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.052146553993225,
      "learning_rate": 3.0078737950193447e-07,
      "loss": 2.3466,
      "step": 70829
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0862759351730347,
      "learning_rate": 3.0068716322718795e-07,
      "loss": 2.2957,
      "step": 70830
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.2939658164978027,
      "learning_rate": 3.005869633954106e-07,
      "loss": 2.1055,
      "step": 70831
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1223337650299072,
      "learning_rate": 3.0048678000677344e-07,
      "loss": 2.2438,
      "step": 70832
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1666810512542725,
      "learning_rate": 3.0038661306144525e-07,
      "loss": 2.5715,
      "step": 70833
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1027411222457886,
      "learning_rate": 3.0028646255959584e-07,
      "loss": 2.2415,
      "step": 70834
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1213189363479614,
      "learning_rate": 3.001863285013951e-07,
      "loss": 2.2827,
      "step": 70835
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.2024613618850708,
      "learning_rate": 3.0008621088701396e-07,
      "loss": 2.4119,
      "step": 70836
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.2602224349975586,
      "learning_rate": 2.999861097166201e-07,
      "loss": 2.3666,
      "step": 70837
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0700268745422363,
      "learning_rate": 2.9988602499038565e-07,
      "loss": 2.1026,
      "step": 70838
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0368216037750244,
      "learning_rate": 2.997859567084771e-07,
      "loss": 2.3244,
      "step": 70839
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.145147681236267,
      "learning_rate": 2.9968590487106764e-07,
      "loss": 2.285,
      "step": 70840
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0915000438690186,
      "learning_rate": 2.995858694783227e-07,
      "loss": 2.2971,
      "step": 70841
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1737440824508667,
      "learning_rate": 2.9948585053041656e-07,
      "loss": 2.1362,
      "step": 70842
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.986456573009491,
      "learning_rate": 2.9938584802751693e-07,
      "loss": 2.3953,
      "step": 70843
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.063245177268982,
      "learning_rate": 2.9928586196979025e-07,
      "loss": 2.579,
      "step": 70844
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0329560041427612,
      "learning_rate": 2.991858923574109e-07,
      "loss": 2.4286,
      "step": 70845
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.254008412361145,
      "learning_rate": 2.990859391905454e-07,
      "loss": 2.2865,
      "step": 70846
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1475309133529663,
      "learning_rate": 2.9898600246936473e-07,
      "loss": 2.4926,
      "step": 70847
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.044096827507019,
      "learning_rate": 2.9888608219403537e-07,
      "loss": 2.2087,
      "step": 70848
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0409046411514282,
      "learning_rate": 2.987861783647317e-07,
      "loss": 2.4229,
      "step": 70849
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0301249027252197,
      "learning_rate": 2.9868629098161793e-07,
      "loss": 2.3067,
      "step": 70850
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1672022342681885,
      "learning_rate": 2.9858642004486846e-07,
      "loss": 2.3745,
      "step": 70851
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.998776376247406,
      "learning_rate": 2.984865655546476e-07,
      "loss": 2.3939,
      "step": 70852
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.9765363931655884,
      "learning_rate": 2.9838672751112853e-07,
      "loss": 2.2223,
      "step": 70853
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0128036737442017,
      "learning_rate": 2.982869059144766e-07,
      "loss": 1.9876,
      "step": 70854
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0797090530395508,
      "learning_rate": 2.981871007648651e-07,
      "loss": 2.3168,
      "step": 70855
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0276538133621216,
      "learning_rate": 2.9808731206246056e-07,
      "loss": 2.2871,
      "step": 70856
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0036611557006836,
      "learning_rate": 2.979875398074339e-07,
      "loss": 2.3352,
      "step": 70857
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.9525051116943359,
      "learning_rate": 2.9788778399995165e-07,
      "loss": 2.413,
      "step": 70858
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0561095476150513,
      "learning_rate": 2.9778804464018705e-07,
      "loss": 2.3034,
      "step": 70859
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1204907894134521,
      "learning_rate": 2.976883217283044e-07,
      "loss": 2.3456,
      "step": 70860
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.378253698348999,
      "learning_rate": 2.975886152644769e-07,
      "loss": 2.209,
      "step": 70861
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1321693658828735,
      "learning_rate": 2.9748892524887106e-07,
      "loss": 2.3194,
      "step": 70862
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.9580850005149841,
      "learning_rate": 2.973892516816579e-07,
      "loss": 2.2136,
      "step": 70863
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0382676124572754,
      "learning_rate": 2.972895945630039e-07,
      "loss": 2.278,
      "step": 70864
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.992711067199707,
      "learning_rate": 2.9718995389308e-07,
      "loss": 2.2855,
      "step": 70865
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.9976436495780945,
      "learning_rate": 2.97090329672054e-07,
      "loss": 2.1724,
      "step": 70866
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1054593324661255,
      "learning_rate": 2.969907219000956e-07,
      "loss": 2.3068,
      "step": 70867
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.2273353338241577,
      "learning_rate": 2.968911305773714e-07,
      "loss": 2.1938,
      "step": 70868
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.9876049757003784,
      "learning_rate": 2.967915557040546e-07,
      "loss": 2.2511,
      "step": 70869
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.976232647895813,
      "learning_rate": 2.966919972803095e-07,
      "loss": 2.2385,
      "step": 70870
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0728559494018555,
      "learning_rate": 2.965924553063082e-07,
      "loss": 2.4817,
      "step": 70871
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0957136154174805,
      "learning_rate": 2.964929297822172e-07,
      "loss": 2.2767,
      "step": 70872
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.220713496208191,
      "learning_rate": 2.963934207082064e-07,
      "loss": 2.4654,
      "step": 70873
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0875047445297241,
      "learning_rate": 2.9629392808444455e-07,
      "loss": 2.4525,
      "step": 70874
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0218868255615234,
      "learning_rate": 2.961944519110993e-07,
      "loss": 2.217,
      "step": 70875
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.9587318301200867,
      "learning_rate": 2.960949921883405e-07,
      "loss": 2.443,
      "step": 70876
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.04814612865448,
      "learning_rate": 2.9599554891633687e-07,
      "loss": 2.3328,
      "step": 70877
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1646829843521118,
      "learning_rate": 2.9589612209525607e-07,
      "loss": 2.3385,
      "step": 70878
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0851956605911255,
      "learning_rate": 2.957967117252658e-07,
      "loss": 2.2327,
      "step": 70879
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.9720728993415833,
      "learning_rate": 2.9569731780653587e-07,
      "loss": 2.223,
      "step": 70880
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1000274419784546,
      "learning_rate": 2.9559794033923507e-07,
      "loss": 2.2188,
      "step": 70881
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.11925208568573,
      "learning_rate": 2.954985793235321e-07,
      "loss": 2.1936,
      "step": 70882
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.4562971591949463,
      "learning_rate": 2.953992347595935e-07,
      "loss": 2.3709,
      "step": 70883
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0509852170944214,
      "learning_rate": 2.9529990664759034e-07,
      "loss": 2.3563,
      "step": 70884
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1187412738800049,
      "learning_rate": 2.9520059498768794e-07,
      "loss": 2.477,
      "step": 70885
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.034026026725769,
      "learning_rate": 2.951012997800573e-07,
      "loss": 2.5295,
      "step": 70886
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1215908527374268,
      "learning_rate": 2.95002021024865e-07,
      "loss": 2.4051,
      "step": 70887
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.048048496246338,
      "learning_rate": 2.949027587222819e-07,
      "loss": 2.2734,
      "step": 70888
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.9970996379852295,
      "learning_rate": 2.948035128724724e-07,
      "loss": 2.389,
      "step": 70889
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.080322027206421,
      "learning_rate": 2.947042834756098e-07,
      "loss": 2.3463,
      "step": 70890
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.9918707609176636,
      "learning_rate": 2.9460507053185593e-07,
      "loss": 2.2734,
      "step": 70891
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1242578029632568,
      "learning_rate": 2.945058740413842e-07,
      "loss": 2.2516,
      "step": 70892
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0045666694641113,
      "learning_rate": 2.944066940043611e-07,
      "loss": 2.3964,
      "step": 70893
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.9641363620758057,
      "learning_rate": 2.943075304209542e-07,
      "loss": 2.0849,
      "step": 70894
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0151495933532715,
      "learning_rate": 2.9420838329133116e-07,
      "loss": 2.5191,
      "step": 70895
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.047338604927063,
      "learning_rate": 2.9410925261566306e-07,
      "loss": 2.6492,
      "step": 70896
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.143401026725769,
      "learning_rate": 2.9401013839411405e-07,
      "loss": 2.4374,
      "step": 70897
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0227614641189575,
      "learning_rate": 2.939110406268553e-07,
      "loss": 2.3154,
      "step": 70898
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0357892513275146,
      "learning_rate": 2.9381195931405316e-07,
      "loss": 2.4111,
      "step": 70899
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.029329538345337,
      "learning_rate": 2.9371289445587426e-07,
      "loss": 2.1792,
      "step": 70900
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.127036213874817,
      "learning_rate": 2.9361384605249065e-07,
      "loss": 2.473,
      "step": 70901
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0823733806610107,
      "learning_rate": 2.9351481410406555e-07,
      "loss": 2.3545,
      "step": 70902
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.013382911682129,
      "learning_rate": 2.934157986107711e-07,
      "loss": 2.2598,
      "step": 70903
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.9805463552474976,
      "learning_rate": 2.933167995727726e-07,
      "loss": 2.5437,
      "step": 70904
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0792901515960693,
      "learning_rate": 2.932178169902378e-07,
      "loss": 2.3904,
      "step": 70905
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1686122417449951,
      "learning_rate": 2.9311885086333534e-07,
      "loss": 2.1919,
      "step": 70906
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0524966716766357,
      "learning_rate": 2.93019901192233e-07,
      "loss": 2.3388,
      "step": 70907
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.9919871091842651,
      "learning_rate": 2.9292096797709726e-07,
      "loss": 2.4817,
      "step": 70908
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0804007053375244,
      "learning_rate": 2.9282205121809793e-07,
      "loss": 2.4891,
      "step": 70909
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.164582371711731,
      "learning_rate": 2.927231509154016e-07,
      "loss": 2.4109,
      "step": 70910
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0640438795089722,
      "learning_rate": 2.926242670691759e-07,
      "loss": 2.009,
      "step": 70911
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0417488813400269,
      "learning_rate": 2.9252539967958737e-07,
      "loss": 2.4781,
      "step": 70912
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.107235312461853,
      "learning_rate": 2.92426548746807e-07,
      "loss": 2.3851,
      "step": 70913
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0347809791564941,
      "learning_rate": 2.923277142709979e-07,
      "loss": 2.2798,
      "step": 70914
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0585520267486572,
      "learning_rate": 2.922288962523323e-07,
      "loss": 2.2167,
      "step": 70915
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0489217042922974,
      "learning_rate": 2.921300946909744e-07,
      "loss": 2.4758,
      "step": 70916
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0964661836624146,
      "learning_rate": 2.9203130958709303e-07,
      "loss": 2.3956,
      "step": 70917
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1839289665222168,
      "learning_rate": 2.9193254094085354e-07,
      "loss": 2.1349,
      "step": 70918
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0088045597076416,
      "learning_rate": 2.9183378875242695e-07,
      "loss": 2.3187,
      "step": 70919
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1695460081100464,
      "learning_rate": 2.9173505302197647e-07,
      "loss": 2.19,
      "step": 70920
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.216285228729248,
      "learning_rate": 2.9163633374967416e-07,
      "loss": 2.3812,
      "step": 70921
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.095705509185791,
      "learning_rate": 2.915376309356832e-07,
      "loss": 2.4366,
      "step": 70922
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0687114000320435,
      "learning_rate": 2.9143894458017464e-07,
      "loss": 2.3656,
      "step": 70923
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0441672801971436,
      "learning_rate": 2.913402746833116e-07,
      "loss": 2.1646,
      "step": 70924
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.132355809211731,
      "learning_rate": 2.9124162124526623e-07,
      "loss": 2.401,
      "step": 70925
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.142343282699585,
      "learning_rate": 2.9114298426620056e-07,
      "loss": 2.4067,
      "step": 70926
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.120793104171753,
      "learning_rate": 2.9104436374628673e-07,
      "loss": 2.4351,
      "step": 70927
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0135337114334106,
      "learning_rate": 2.9094575968568907e-07,
      "loss": 2.2301,
      "step": 70928
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0073260068893433,
      "learning_rate": 2.90847172084574e-07,
      "loss": 2.2622,
      "step": 70929
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.228538155555725,
      "learning_rate": 2.907486009431115e-07,
      "loss": 2.4968,
      "step": 70930
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0588104724884033,
      "learning_rate": 2.9065004626146587e-07,
      "loss": 2.2433,
      "step": 70931
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.002665638923645,
      "learning_rate": 2.905515080398058e-07,
      "loss": 2.3861,
      "step": 70932
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0414434671401978,
      "learning_rate": 2.9045298627829896e-07,
      "loss": 2.1844,
      "step": 70933
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0250440835952759,
      "learning_rate": 2.9035448097711085e-07,
      "loss": 2.1892,
      "step": 70934
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.2050259113311768,
      "learning_rate": 2.9025599213640785e-07,
      "loss": 2.4786,
      "step": 70935
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1336629390716553,
      "learning_rate": 2.9015751975636e-07,
      "loss": 1.9965,
      "step": 70936
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1663291454315186,
      "learning_rate": 2.900590638371303e-07,
      "loss": 2.2525,
      "step": 70937
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0561057329177856,
      "learning_rate": 2.899606243788888e-07,
      "loss": 2.3591,
      "step": 70938
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.092538595199585,
      "learning_rate": 2.8986220138180086e-07,
      "loss": 2.2835,
      "step": 70939
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.083544373512268,
      "learning_rate": 2.897637948460341e-07,
      "loss": 2.2385,
      "step": 70940
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1086481809616089,
      "learning_rate": 2.89665404771754e-07,
      "loss": 2.1792,
      "step": 70941
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0894575119018555,
      "learning_rate": 2.895670311591292e-07,
      "loss": 2.2859,
      "step": 70942
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0900425910949707,
      "learning_rate": 2.8946867400832635e-07,
      "loss": 2.6784,
      "step": 70943
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0480111837387085,
      "learning_rate": 2.8937033331951083e-07,
      "loss": 2.1477,
      "step": 70944
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.2337682247161865,
      "learning_rate": 2.8927200909284914e-07,
      "loss": 2.4219,
      "step": 70945
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0734142065048218,
      "learning_rate": 2.891737013285101e-07,
      "loss": 2.3906,
      "step": 70946
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0846984386444092,
      "learning_rate": 2.8907541002665794e-07,
      "loss": 2.4682,
      "step": 70947
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1003748178482056,
      "learning_rate": 2.889771351874615e-07,
      "loss": 2.1981,
      "step": 70948
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1355115175247192,
      "learning_rate": 2.8887887681108506e-07,
      "loss": 2.2197,
      "step": 70949
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0390479564666748,
      "learning_rate": 2.8878063489769737e-07,
      "loss": 2.242,
      "step": 70950
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0327969789505005,
      "learning_rate": 2.8868240944746275e-07,
      "loss": 2.1879,
      "step": 70951
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1216444969177246,
      "learning_rate": 2.885842004605499e-07,
      "loss": 2.1982,
      "step": 70952
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0893313884735107,
      "learning_rate": 2.8848600793712436e-07,
      "loss": 2.4618,
      "step": 70953
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0253143310546875,
      "learning_rate": 2.8838783187735363e-07,
      "loss": 2.451,
      "step": 70954
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1686803102493286,
      "learning_rate": 2.8828967228140216e-07,
      "loss": 2.4611,
      "step": 70955
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.9787481427192688,
      "learning_rate": 2.881915291494375e-07,
      "loss": 2.2218,
      "step": 70956
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.976948618888855,
      "learning_rate": 2.880934024816262e-07,
      "loss": 2.2516,
      "step": 70957
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1975685358047485,
      "learning_rate": 2.879952922781337e-07,
      "loss": 2.2042,
      "step": 70958
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0786958932876587,
      "learning_rate": 2.8789719853912655e-07,
      "loss": 2.4865,
      "step": 70959
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1578871011734009,
      "learning_rate": 2.877991212647724e-07,
      "loss": 2.0282,
      "step": 70960
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.2160377502441406,
      "learning_rate": 2.877010604552366e-07,
      "loss": 2.1306,
      "step": 70961
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0492475032806396,
      "learning_rate": 2.8760301611068464e-07,
      "loss": 2.2757,
      "step": 70962
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0573419332504272,
      "learning_rate": 2.8750498823128414e-07,
      "loss": 2.3638,
      "step": 70963
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0816748142242432,
      "learning_rate": 2.8740697681719943e-07,
      "loss": 2.545,
      "step": 70964
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1000908613204956,
      "learning_rate": 2.8730898186859926e-07,
      "loss": 2.249,
      "step": 70965
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0457059144973755,
      "learning_rate": 2.8721100338564677e-07,
      "loss": 2.0894,
      "step": 70966
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.2408332824707031,
      "learning_rate": 2.871130413685108e-07,
      "loss": 2.2557,
      "step": 70967
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.2864981889724731,
      "learning_rate": 2.8701509581735453e-07,
      "loss": 2.3754,
      "step": 70968
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.025464653968811,
      "learning_rate": 2.8691716673234894e-07,
      "loss": 2.2675,
      "step": 70969
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0410155057907104,
      "learning_rate": 2.8681925411365384e-07,
      "loss": 2.5984,
      "step": 70970
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1007800102233887,
      "learning_rate": 2.8672135796143916e-07,
      "loss": 2.213,
      "step": 70971
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.302025318145752,
      "learning_rate": 2.8662347827586923e-07,
      "loss": 2.0488,
      "step": 70972
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0979024171829224,
      "learning_rate": 2.865256150571116e-07,
      "loss": 2.2288,
      "step": 70973
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1728875637054443,
      "learning_rate": 2.864277683053307e-07,
      "loss": 2.2069,
      "step": 70974
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.9974232316017151,
      "learning_rate": 2.86329938020693e-07,
      "loss": 2.1559,
      "step": 70975
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1645475625991821,
      "learning_rate": 2.862321242033639e-07,
      "loss": 2.1219,
      "step": 70976
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1500458717346191,
      "learning_rate": 2.8613432685350995e-07,
      "loss": 2.3883,
      "step": 70977
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1692187786102295,
      "learning_rate": 2.860365459712966e-07,
      "loss": 2.6047,
      "step": 70978
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0106313228607178,
      "learning_rate": 2.859387815568893e-07,
      "loss": 2.3494,
      "step": 70979
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1963529586791992,
      "learning_rate": 2.8584103361045443e-07,
      "loss": 2.3771,
      "step": 70980
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.08003568649292,
      "learning_rate": 2.857433021321576e-07,
      "loss": 2.2382,
      "step": 70981
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0351274013519287,
      "learning_rate": 2.8564558712216417e-07,
      "loss": 2.3969,
      "step": 70982
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.094626545906067,
      "learning_rate": 2.855478885806395e-07,
      "loss": 2.3272,
      "step": 70983
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.024335503578186,
      "learning_rate": 2.8545020650774915e-07,
      "loss": 2.335,
      "step": 70984
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1426595449447632,
      "learning_rate": 2.853525409036595e-07,
      "loss": 2.2404,
      "step": 70985
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.086005449295044,
      "learning_rate": 2.8525489176853607e-07,
      "loss": 2.6091,
      "step": 70986
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0751785039901733,
      "learning_rate": 2.851572591025442e-07,
      "loss": 2.2333,
      "step": 70987
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.9984256029129028,
      "learning_rate": 2.8505964290584833e-07,
      "loss": 2.2429,
      "step": 70988
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1143393516540527,
      "learning_rate": 2.84962043178616e-07,
      "loss": 2.3454,
      "step": 70989
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1515976190567017,
      "learning_rate": 2.8486445992101153e-07,
      "loss": 2.3923,
      "step": 70990
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.096742868423462,
      "learning_rate": 2.847668931331993e-07,
      "loss": 2.2433,
      "step": 70991
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1075198650360107,
      "learning_rate": 2.8466934281534684e-07,
      "loss": 2.0514,
      "step": 70992
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0249722003936768,
      "learning_rate": 2.8457180896761746e-07,
      "loss": 2.4329,
      "step": 70993
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0030221939086914,
      "learning_rate": 2.844742915901777e-07,
      "loss": 2.1234,
      "step": 70994
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1490769386291504,
      "learning_rate": 2.843767906831929e-07,
      "loss": 2.2766,
      "step": 70995
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1032276153564453,
      "learning_rate": 2.8427930624682966e-07,
      "loss": 2.2149,
      "step": 70996
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.9924576282501221,
      "learning_rate": 2.8418183828125003e-07,
      "loss": 2.2113,
      "step": 70997
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0881949663162231,
      "learning_rate": 2.8408438678662273e-07,
      "loss": 2.267,
      "step": 70998
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.2202222347259521,
      "learning_rate": 2.8398695176310886e-07,
      "loss": 2.2637,
      "step": 70999
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1468331813812256,
      "learning_rate": 2.838895332108771e-07,
      "loss": 2.3114,
      "step": 71000
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1144123077392578,
      "learning_rate": 2.8379213113009063e-07,
      "loss": 2.1483,
      "step": 71001
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0941065549850464,
      "learning_rate": 2.8369474552091714e-07,
      "loss": 2.1942,
      "step": 71002
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1379516124725342,
      "learning_rate": 2.835973763835187e-07,
      "loss": 2.3114,
      "step": 71003
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.2251900434494019,
      "learning_rate": 2.8350002371806185e-07,
      "loss": 2.2749,
      "step": 71004
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.2286487817764282,
      "learning_rate": 2.834026875247109e-07,
      "loss": 2.4104,
      "step": 71005
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0022145509719849,
      "learning_rate": 2.833053678036324e-07,
      "loss": 2.4649,
      "step": 71006
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0126434564590454,
      "learning_rate": 2.832080645549895e-07,
      "loss": 2.289,
      "step": 71007
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1756088733673096,
      "learning_rate": 2.831107777789488e-07,
      "loss": 2.2175,
      "step": 71008
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1432563066482544,
      "learning_rate": 2.830135074756746e-07,
      "loss": 2.4761,
      "step": 71009
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.982798159122467,
      "learning_rate": 2.8291625364533117e-07,
      "loss": 2.2989,
      "step": 71010
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.2181272506713867,
      "learning_rate": 2.8281901628808286e-07,
      "loss": 2.2803,
      "step": 71011
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0766102075576782,
      "learning_rate": 2.8272179540409616e-07,
      "loss": 2.3599,
      "step": 71012
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0293378829956055,
      "learning_rate": 2.8262459099353545e-07,
      "loss": 2.2585,
      "step": 71013
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0994755029678345,
      "learning_rate": 2.82527403056565e-07,
      "loss": 2.206,
      "step": 71014
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.137135624885559,
      "learning_rate": 2.8243023159334913e-07,
      "loss": 2.1585,
      "step": 71015
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1221649646759033,
      "learning_rate": 2.823330766040544e-07,
      "loss": 2.2055,
      "step": 71016
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0619951486587524,
      "learning_rate": 2.822359380888451e-07,
      "loss": 2.4416,
      "step": 71017
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0957896709442139,
      "learning_rate": 2.821388160478833e-07,
      "loss": 2.3483,
      "step": 71018
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.2408013343811035,
      "learning_rate": 2.8204171048133665e-07,
      "loss": 2.2976,
      "step": 71019
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0990211963653564,
      "learning_rate": 2.819446213893673e-07,
      "loss": 2.4831,
      "step": 71020
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1101688146591187,
      "learning_rate": 2.8184754877214285e-07,
      "loss": 2.3967,
      "step": 71021
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0336374044418335,
      "learning_rate": 2.8175049262982647e-07,
      "loss": 2.252,
      "step": 71022
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.9616039395332336,
      "learning_rate": 2.816534529625814e-07,
      "loss": 2.3522,
      "step": 71023
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.2623374462127686,
      "learning_rate": 2.815564297705731e-07,
      "loss": 2.3559,
      "step": 71024
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0645184516906738,
      "learning_rate": 2.814594230539669e-07,
      "loss": 2.1674,
      "step": 71025
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1405694484710693,
      "learning_rate": 2.8136243281292606e-07,
      "loss": 2.0967,
      "step": 71026
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0344619750976562,
      "learning_rate": 2.812654590476149e-07,
      "loss": 2.5048,
      "step": 71027
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1989120244979858,
      "learning_rate": 2.8116850175819887e-07,
      "loss": 2.4567,
      "step": 71028
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1237313747406006,
      "learning_rate": 2.8107156094484223e-07,
      "loss": 2.3384,
      "step": 71029
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1378328800201416,
      "learning_rate": 2.8097463660770817e-07,
      "loss": 2.4405,
      "step": 71030
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.168897271156311,
      "learning_rate": 2.8087772874696217e-07,
      "loss": 2.2453,
      "step": 71031
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1791541576385498,
      "learning_rate": 2.807808373627674e-07,
      "loss": 2.5146,
      "step": 71032
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1287436485290527,
      "learning_rate": 2.806839624552904e-07,
      "loss": 2.4599,
      "step": 71033
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0559474229812622,
      "learning_rate": 2.8058710402469215e-07,
      "loss": 2.2447,
      "step": 71034
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.9685385227203369,
      "learning_rate": 2.804902620711403e-07,
      "loss": 2.3947,
      "step": 71035
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0301295518875122,
      "learning_rate": 2.803934365947958e-07,
      "loss": 2.1857,
      "step": 71036
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1086562871932983,
      "learning_rate": 2.802966275958252e-07,
      "loss": 2.271,
      "step": 71037
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.172581672668457,
      "learning_rate": 2.8019983507439063e-07,
      "loss": 2.3954,
      "step": 71038
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0462627410888672,
      "learning_rate": 2.8010305903065746e-07,
      "loss": 2.4038,
      "step": 71039
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0868828296661377,
      "learning_rate": 2.8000629946478895e-07,
      "loss": 2.4517,
      "step": 71040
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1745412349700928,
      "learning_rate": 2.7990955637695163e-07,
      "loss": 2.3839,
      "step": 71041
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1564691066741943,
      "learning_rate": 2.798128297673053e-07,
      "loss": 2.2969,
      "step": 71042
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.028104305267334,
      "learning_rate": 2.7971611963601763e-07,
      "loss": 2.2889,
      "step": 71043
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0401041507720947,
      "learning_rate": 2.7961942598325076e-07,
      "loss": 2.4657,
      "step": 71044
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.981336236000061,
      "learning_rate": 2.7952274880916895e-07,
      "loss": 2.3748,
      "step": 71045
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.02277410030365,
      "learning_rate": 2.794260881139366e-07,
      "loss": 2.435,
      "step": 71046
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1360321044921875,
      "learning_rate": 2.7932944389771563e-07,
      "loss": 2.0599,
      "step": 71047
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0450730323791504,
      "learning_rate": 2.7923281616067277e-07,
      "loss": 2.3957,
      "step": 71048
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0542247295379639,
      "learning_rate": 2.791362049029711e-07,
      "loss": 2.2929,
      "step": 71049
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.2067970037460327,
      "learning_rate": 2.7903961012477166e-07,
      "loss": 2.4021,
      "step": 71050
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.9894179701805115,
      "learning_rate": 2.7894303182624207e-07,
      "loss": 2.4626,
      "step": 71051
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1163407564163208,
      "learning_rate": 2.788464700075433e-07,
      "loss": 2.3603,
      "step": 71052
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0641446113586426,
      "learning_rate": 2.787499246688397e-07,
      "loss": 2.1924,
      "step": 71053
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1105867624282837,
      "learning_rate": 2.786533958102955e-07,
      "loss": 2.322,
      "step": 71054
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1673592329025269,
      "learning_rate": 2.78556883432074e-07,
      "loss": 2.3157,
      "step": 71055
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.199442982673645,
      "learning_rate": 2.7846038753433945e-07,
      "loss": 2.1733,
      "step": 71056
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0406501293182373,
      "learning_rate": 2.78363908117254e-07,
      "loss": 2.2522,
      "step": 71057
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0277085304260254,
      "learning_rate": 2.7826744518098305e-07,
      "loss": 2.5765,
      "step": 71058
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.14258873462677,
      "learning_rate": 2.781709987256875e-07,
      "loss": 2.496,
      "step": 71059
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0915335416793823,
      "learning_rate": 2.7807456875153406e-07,
      "loss": 2.3184,
      "step": 71060
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0514284372329712,
      "learning_rate": 2.7797815525868464e-07,
      "loss": 2.3495,
      "step": 71061
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0860575437545776,
      "learning_rate": 2.778817582473037e-07,
      "loss": 2.3726,
      "step": 71062
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1106663942337036,
      "learning_rate": 2.77785377717551e-07,
      "loss": 2.1506,
      "step": 71063
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1250817775726318,
      "learning_rate": 2.776890136695942e-07,
      "loss": 2.5216,
      "step": 71064
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0514576435089111,
      "learning_rate": 2.775926661035944e-07,
      "loss": 2.2634,
      "step": 71065
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0274536609649658,
      "learning_rate": 2.7749633501971685e-07,
      "loss": 2.2271,
      "step": 71066
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.033859133720398,
      "learning_rate": 2.774000204181226e-07,
      "loss": 2.2771,
      "step": 71067
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1238254308700562,
      "learning_rate": 2.773037222989761e-07,
      "loss": 2.342,
      "step": 71068
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0988517999649048,
      "learning_rate": 2.7720744066244034e-07,
      "loss": 2.2565,
      "step": 71069
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.9868316054344177,
      "learning_rate": 2.771111755086786e-07,
      "loss": 2.0356,
      "step": 71070
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0675547122955322,
      "learning_rate": 2.770149268378541e-07,
      "loss": 2.3107,
      "step": 71071
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0228310823440552,
      "learning_rate": 2.7691869465013013e-07,
      "loss": 2.4973,
      "step": 71072
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.18388831615448,
      "learning_rate": 2.7682247894567084e-07,
      "loss": 2.395,
      "step": 71073
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.088263988494873,
      "learning_rate": 2.767262797246362e-07,
      "loss": 2.2361,
      "step": 71074
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.953560471534729,
      "learning_rate": 2.766300969871927e-07,
      "loss": 2.2128,
      "step": 71075
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1527243852615356,
      "learning_rate": 2.7653393073350244e-07,
      "loss": 2.3077,
      "step": 71076
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0831351280212402,
      "learning_rate": 2.764377809637264e-07,
      "loss": 2.3652,
      "step": 71077
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.2117223739624023,
      "learning_rate": 2.7634164767803007e-07,
      "loss": 2.3566,
      "step": 71078
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0827277898788452,
      "learning_rate": 2.762455308765766e-07,
      "loss": 2.239,
      "step": 71079
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.2813729047775269,
      "learning_rate": 2.761494305595258e-07,
      "loss": 2.3111,
      "step": 71080
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.039025068283081,
      "learning_rate": 2.760533467270443e-07,
      "loss": 2.2324,
      "step": 71081
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1540582180023193,
      "learning_rate": 2.7595727937929195e-07,
      "loss": 2.3837,
      "step": 71082
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.9509683847427368,
      "learning_rate": 2.758612285164342e-07,
      "loss": 2.4414,
      "step": 71083
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0095674991607666,
      "learning_rate": 2.757651941386319e-07,
      "loss": 2.145,
      "step": 71084
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0764837265014648,
      "learning_rate": 2.756691762460495e-07,
      "loss": 2.4132,
      "step": 71085
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.087033987045288,
      "learning_rate": 2.7557317483884796e-07,
      "loss": 2.3015,
      "step": 71086
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.3550705909729004,
      "learning_rate": 2.754771899171915e-07,
      "loss": 2.4829,
      "step": 71087
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0413193702697754,
      "learning_rate": 2.7538122148124347e-07,
      "loss": 2.2487,
      "step": 71088
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.9943328499794006,
      "learning_rate": 2.7528526953116474e-07,
      "loss": 2.1592,
      "step": 71089
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.3575204610824585,
      "learning_rate": 2.7518933406711745e-07,
      "loss": 2.2266,
      "step": 71090
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0300753116607666,
      "learning_rate": 2.75093415089267e-07,
      "loss": 2.4591,
      "step": 71091
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0457258224487305,
      "learning_rate": 2.749975125977733e-07,
      "loss": 2.5224,
      "step": 71092
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0424168109893799,
      "learning_rate": 2.7490162659280064e-07,
      "loss": 2.0805,
      "step": 71093
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0750988721847534,
      "learning_rate": 2.748057570745111e-07,
      "loss": 2.1781,
      "step": 71094
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1347012519836426,
      "learning_rate": 2.7470990404306673e-07,
      "loss": 2.1335,
      "step": 71095
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0888276100158691,
      "learning_rate": 2.7461406749862975e-07,
      "loss": 2.195,
      "step": 71096
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.2344810962677002,
      "learning_rate": 2.7451824744136546e-07,
      "loss": 2.3495,
      "step": 71097
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.090444803237915,
      "learning_rate": 2.744224438714316e-07,
      "loss": 2.3071,
      "step": 71098
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1239060163497925,
      "learning_rate": 2.743266567889957e-07,
      "loss": 2.2571,
      "step": 71099
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0715410709381104,
      "learning_rate": 2.742308861942167e-07,
      "loss": 2.34,
      "step": 71100
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.9962099194526672,
      "learning_rate": 2.741351320872576e-07,
      "loss": 2.205,
      "step": 71101
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0724040269851685,
      "learning_rate": 2.7403939446828064e-07,
      "loss": 2.2763,
      "step": 71102
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1057168245315552,
      "learning_rate": 2.73943673337449e-07,
      "loss": 2.2933,
      "step": 71103
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1261321306228638,
      "learning_rate": 2.738479686949236e-07,
      "loss": 2.3236,
      "step": 71104
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0478310585021973,
      "learning_rate": 2.737522805408677e-07,
      "loss": 2.2747,
      "step": 71105
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.323105812072754,
      "learning_rate": 2.7365660887544334e-07,
      "loss": 2.1816,
      "step": 71106
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0367348194122314,
      "learning_rate": 2.7356095369881265e-07,
      "loss": 2.2351,
      "step": 71107
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0890133380889893,
      "learning_rate": 2.734653150111388e-07,
      "loss": 2.1696,
      "step": 71108
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0308853387832642,
      "learning_rate": 2.7336969281258175e-07,
      "loss": 2.2918,
      "step": 71109
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1335656642913818,
      "learning_rate": 2.7327408710330574e-07,
      "loss": 2.4035,
      "step": 71110
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.2331756353378296,
      "learning_rate": 2.731784978834706e-07,
      "loss": 2.257,
      "step": 71111
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0551913976669312,
      "learning_rate": 2.7308292515324073e-07,
      "loss": 2.2546,
      "step": 71112
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0436173677444458,
      "learning_rate": 2.72987368912776e-07,
      "loss": 2.1554,
      "step": 71113
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.010298252105713,
      "learning_rate": 2.7289182916224177e-07,
      "loss": 2.0681,
      "step": 71114
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.2425092458724976,
      "learning_rate": 2.7279630590179574e-07,
      "loss": 2.5397,
      "step": 71115
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1382253170013428,
      "learning_rate": 2.7270079913160217e-07,
      "loss": 2.2098,
      "step": 71116
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0314671993255615,
      "learning_rate": 2.726053088518221e-07,
      "loss": 2.4893,
      "step": 71117
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1220431327819824,
      "learning_rate": 2.7250983506261986e-07,
      "loss": 2.4219,
      "step": 71118
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.095711350440979,
      "learning_rate": 2.7241437776415303e-07,
      "loss": 2.4543,
      "step": 71119
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1421912908554077,
      "learning_rate": 2.723189369565882e-07,
      "loss": 2.2792,
      "step": 71120
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1332439184188843,
      "learning_rate": 2.7222351264008297e-07,
      "loss": 2.1899,
      "step": 71121
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.036076307296753,
      "learning_rate": 2.721281048148017e-07,
      "loss": 2.2307,
      "step": 71122
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.067049503326416,
      "learning_rate": 2.720327134809042e-07,
      "loss": 2.0583,
      "step": 71123
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0910003185272217,
      "learning_rate": 2.7193733863855486e-07,
      "loss": 2.4655,
      "step": 71124
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.2941349744796753,
      "learning_rate": 2.7184198028791244e-07,
      "loss": 2.1642,
      "step": 71125
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1966112852096558,
      "learning_rate": 2.7174663842914116e-07,
      "loss": 2.3174,
      "step": 71126
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1258034706115723,
      "learning_rate": 2.716513130624021e-07,
      "loss": 2.4749,
      "step": 71127
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0904545783996582,
      "learning_rate": 2.715560041878551e-07,
      "loss": 2.4189,
      "step": 71128
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0727769136428833,
      "learning_rate": 2.714607118056622e-07,
      "loss": 2.2372,
      "step": 71129
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1550414562225342,
      "learning_rate": 2.7136543591598787e-07,
      "loss": 2.255,
      "step": 71130
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.09287428855896,
      "learning_rate": 2.7127017651898844e-07,
      "loss": 2.395,
      "step": 71131
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1168162822723389,
      "learning_rate": 2.7117493361483063e-07,
      "loss": 2.2455,
      "step": 71132
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1007121801376343,
      "learning_rate": 2.7107970720367305e-07,
      "loss": 2.5711,
      "step": 71133
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.488699197769165,
      "learning_rate": 2.7098449728567787e-07,
      "loss": 2.3033,
      "step": 71134
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0216138362884521,
      "learning_rate": 2.708893038610061e-07,
      "loss": 2.013,
      "step": 71135
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0987166166305542,
      "learning_rate": 2.7079412692981866e-07,
      "loss": 2.2908,
      "step": 71136
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.099417805671692,
      "learning_rate": 2.706989664922788e-07,
      "loss": 2.12,
      "step": 71137
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.9887779355049133,
      "learning_rate": 2.706038225485452e-07,
      "loss": 2.3568,
      "step": 71138
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1759449243545532,
      "learning_rate": 2.705086950987823e-07,
      "loss": 2.3855,
      "step": 71139
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.9632895588874817,
      "learning_rate": 2.7041358414314764e-07,
      "loss": 2.5366,
      "step": 71140
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0530294179916382,
      "learning_rate": 2.7031848968180676e-07,
      "loss": 2.3819,
      "step": 71141
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.2323431968688965,
      "learning_rate": 2.702234117149172e-07,
      "loss": 2.336,
      "step": 71142
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.971229076385498,
      "learning_rate": 2.7012835024264216e-07,
      "loss": 2.3633,
      "step": 71143
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0452834367752075,
      "learning_rate": 2.700333052651416e-07,
      "loss": 2.2756,
      "step": 71144
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.2198655605316162,
      "learning_rate": 2.6993827678257754e-07,
      "loss": 2.4973,
      "step": 71145
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1422309875488281,
      "learning_rate": 2.698432647951099e-07,
      "loss": 2.1427,
      "step": 71146
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.130342721939087,
      "learning_rate": 2.6974826930290185e-07,
      "loss": 2.4166,
      "step": 71147
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1175546646118164,
      "learning_rate": 2.696532903061122e-07,
      "loss": 2.3495,
      "step": 71148
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1076985597610474,
      "learning_rate": 2.695583278049041e-07,
      "loss": 2.3455,
      "step": 71149
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1266989707946777,
      "learning_rate": 2.694633817994352e-07,
      "loss": 2.137,
      "step": 71150
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.982391893863678,
      "learning_rate": 2.6936845228987096e-07,
      "loss": 2.6337,
      "step": 71151
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0450628995895386,
      "learning_rate": 2.69273539276369e-07,
      "loss": 2.1823,
      "step": 71152
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.387782096862793,
      "learning_rate": 2.691786427590914e-07,
      "loss": 2.3632,
      "step": 71153
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.095883846282959,
      "learning_rate": 2.6908376273819923e-07,
      "loss": 2.1495,
      "step": 71154
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.9971667528152466,
      "learning_rate": 2.6898889921385343e-07,
      "loss": 2.4499,
      "step": 71155
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.206737995147705,
      "learning_rate": 2.688940521862127e-07,
      "loss": 2.4119,
      "step": 71156
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1528897285461426,
      "learning_rate": 2.6879922165544024e-07,
      "loss": 2.1588,
      "step": 71157
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.2413129806518555,
      "learning_rate": 2.6870440762169605e-07,
      "loss": 2.6436,
      "step": 71158
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1693462133407593,
      "learning_rate": 2.68609610085141e-07,
      "loss": 2.3762,
      "step": 71159
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0611865520477295,
      "learning_rate": 2.68514829045935e-07,
      "loss": 2.3006,
      "step": 71160
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1866555213928223,
      "learning_rate": 2.684200645042401e-07,
      "loss": 2.0246,
      "step": 71161
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1420915126800537,
      "learning_rate": 2.683253164602162e-07,
      "loss": 2.3338,
      "step": 71162
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0721983909606934,
      "learning_rate": 2.682305849140243e-07,
      "loss": 2.494,
      "step": 71163
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.096506953239441,
      "learning_rate": 2.681358698658243e-07,
      "loss": 2.3717,
      "step": 71164
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1694221496582031,
      "learning_rate": 2.68041171315776e-07,
      "loss": 2.3397,
      "step": 71165
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1288799047470093,
      "learning_rate": 2.6794648926404265e-07,
      "loss": 2.3404,
      "step": 71166
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0801881551742554,
      "learning_rate": 2.67851823710783e-07,
      "loss": 2.2643,
      "step": 71167
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0699986219406128,
      "learning_rate": 2.677571746561569e-07,
      "loss": 2.2795,
      "step": 71168
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0608608722686768,
      "learning_rate": 2.676625421003276e-07,
      "loss": 2.3062,
      "step": 71169
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1499141454696655,
      "learning_rate": 2.675679260434516e-07,
      "loss": 2.3856,
      "step": 71170
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0638713836669922,
      "learning_rate": 2.6747332648569213e-07,
      "loss": 2.3981,
      "step": 71171
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.100135087966919,
      "learning_rate": 2.6737874342720794e-07,
      "loss": 2.3642,
      "step": 71172
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0699341297149658,
      "learning_rate": 2.6728417686816e-07,
      "loss": 2.2624,
      "step": 71173
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.9931873679161072,
      "learning_rate": 2.671896268087104e-07,
      "loss": 2.4695,
      "step": 71174
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1357223987579346,
      "learning_rate": 2.670950932490157e-07,
      "loss": 2.593,
      "step": 71175
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.045021891593933,
      "learning_rate": 2.6700057618924026e-07,
      "loss": 2.2161,
      "step": 71176
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1484546661376953,
      "learning_rate": 2.669060756295405e-07,
      "loss": 2.4195,
      "step": 71177
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1244261264801025,
      "learning_rate": 2.6681159157007863e-07,
      "loss": 2.3219,
      "step": 71178
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1673004627227783,
      "learning_rate": 2.6671712401101555e-07,
      "loss": 2.1054,
      "step": 71179
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0638298988342285,
      "learning_rate": 2.666226729525112e-07,
      "loss": 2.2283,
      "step": 71180
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0888184309005737,
      "learning_rate": 2.665282383947232e-07,
      "loss": 2.4605,
      "step": 71181
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.2546945810317993,
      "learning_rate": 2.664338203378147e-07,
      "loss": 2.2949,
      "step": 71182
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0633138418197632,
      "learning_rate": 2.663394187819424e-07,
      "loss": 2.4273,
      "step": 71183
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0701113939285278,
      "learning_rate": 2.662450337272704e-07,
      "loss": 2.3343,
      "step": 71184
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.209559440612793,
      "learning_rate": 2.661506651739554e-07,
      "loss": 2.2692,
      "step": 71185
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0571800470352173,
      "learning_rate": 2.6605631312216053e-07,
      "loss": 2.1924,
      "step": 71186
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1181470155715942,
      "learning_rate": 2.6596197757204233e-07,
      "loss": 2.3684,
      "step": 71187
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0256403684616089,
      "learning_rate": 2.6586765852376294e-07,
      "loss": 2.4051,
      "step": 71188
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.9995633363723755,
      "learning_rate": 2.6577335597748113e-07,
      "loss": 2.387,
      "step": 71189
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1452745199203491,
      "learning_rate": 2.6567906993335777e-07,
      "loss": 2.1945,
      "step": 71190
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1074327230453491,
      "learning_rate": 2.6558480039155286e-07,
      "loss": 2.3559,
      "step": 71191
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0561922788619995,
      "learning_rate": 2.65490547352224e-07,
      "loss": 2.3538,
      "step": 71192
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.133386492729187,
      "learning_rate": 2.653963108155333e-07,
      "loss": 2.3632,
      "step": 71193
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1099191904067993,
      "learning_rate": 2.653020907816406e-07,
      "loss": 2.218,
      "step": 71194
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1391876935958862,
      "learning_rate": 2.6520788725070247e-07,
      "loss": 2.5222,
      "step": 71195
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0552761554718018,
      "learning_rate": 2.6511370022288317e-07,
      "loss": 2.2802,
      "step": 71196
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0473344326019287,
      "learning_rate": 2.6501952969833934e-07,
      "loss": 2.1841,
      "step": 71197
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.2463465929031372,
      "learning_rate": 2.649253756772308e-07,
      "loss": 2.2823,
      "step": 71198
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0813065767288208,
      "learning_rate": 2.6483123815971845e-07,
      "loss": 2.389,
      "step": 71199
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.065490961074829,
      "learning_rate": 2.647371171459612e-07,
      "loss": 2.268,
      "step": 71200
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.001849889755249,
      "learning_rate": 2.646430126361188e-07,
      "loss": 2.2134,
      "step": 71201
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.088005781173706,
      "learning_rate": 2.64548924630349e-07,
      "loss": 2.4555,
      "step": 71202
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0571168661117554,
      "learning_rate": 2.644548531288149e-07,
      "loss": 2.4971,
      "step": 71203
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1103882789611816,
      "learning_rate": 2.64360798131672e-07,
      "loss": 2.2814,
      "step": 71204
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.123047947883606,
      "learning_rate": 2.6426675963908357e-07,
      "loss": 2.2181,
      "step": 71205
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0839763879776,
      "learning_rate": 2.6417273765120713e-07,
      "loss": 2.2211,
      "step": 71206
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.150772213935852,
      "learning_rate": 2.6407873216820145e-07,
      "loss": 2.3051,
      "step": 71207
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1230231523513794,
      "learning_rate": 2.639847431902254e-07,
      "loss": 2.5108,
      "step": 71208
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0437344312667847,
      "learning_rate": 2.6389077071744093e-07,
      "loss": 2.1905,
      "step": 71209
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0648378133773804,
      "learning_rate": 2.6379681475000586e-07,
      "loss": 2.4278,
      "step": 71210
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1999610662460327,
      "learning_rate": 2.6370287528807883e-07,
      "loss": 1.9477,
      "step": 71211
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0878913402557373,
      "learning_rate": 2.6360895233181973e-07,
      "loss": 2.2681,
      "step": 71212
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1358811855316162,
      "learning_rate": 2.6351504588138844e-07,
      "loss": 2.2329,
      "step": 71213
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0467232465744019,
      "learning_rate": 2.634211559369426e-07,
      "loss": 2.3389,
      "step": 71214
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0793383121490479,
      "learning_rate": 2.633272824986444e-07,
      "loss": 2.0467,
      "step": 71215
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0278708934783936,
      "learning_rate": 2.63233425566648e-07,
      "loss": 2.5111,
      "step": 71216
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.035728096961975,
      "learning_rate": 2.631395851411178e-07,
      "loss": 2.2848,
      "step": 71217
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1045352220535278,
      "learning_rate": 2.6304576122221035e-07,
      "loss": 2.4665,
      "step": 71218
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0564568042755127,
      "learning_rate": 2.629519538100833e-07,
      "loss": 2.4811,
      "step": 71219
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1586825847625732,
      "learning_rate": 2.6285816290489876e-07,
      "loss": 2.3581,
      "step": 71220
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1550849676132202,
      "learning_rate": 2.627643885068132e-07,
      "loss": 2.3292,
      "step": 71221
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.2028943300247192,
      "learning_rate": 2.626706306159865e-07,
      "loss": 2.2218,
      "step": 71222
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.996222972869873,
      "learning_rate": 2.6257688923257863e-07,
      "loss": 2.3142,
      "step": 71223
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0908057689666748,
      "learning_rate": 2.6248316435674714e-07,
      "loss": 2.3096,
      "step": 71224
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.951130211353302,
      "learning_rate": 2.6238945598865086e-07,
      "loss": 2.2273,
      "step": 71225
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0748459100723267,
      "learning_rate": 2.622957641284507e-07,
      "loss": 2.3793,
      "step": 71226
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.187380313873291,
      "learning_rate": 2.6220208877630216e-07,
      "loss": 2.349,
      "step": 71227
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0727874040603638,
      "learning_rate": 2.621084299323673e-07,
      "loss": 2.1605,
      "step": 71228
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.151884913444519,
      "learning_rate": 2.620147875968015e-07,
      "loss": 2.2699,
      "step": 71229
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0739853382110596,
      "learning_rate": 2.61921161769767e-07,
      "loss": 2.4528,
      "step": 71230
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1260892152786255,
      "learning_rate": 2.618275524514202e-07,
      "loss": 2.2851,
      "step": 71231
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0290056467056274,
      "learning_rate": 2.617339596419211e-07,
      "loss": 2.2391,
      "step": 71232
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.355026364326477,
      "learning_rate": 2.6164038334142825e-07,
      "loss": 2.2427,
      "step": 71233
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1249983310699463,
      "learning_rate": 2.6154682355009954e-07,
      "loss": 2.2197,
      "step": 71234
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.02474045753479,
      "learning_rate": 2.614532802680925e-07,
      "loss": 2.3463,
      "step": 71235
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0749260187149048,
      "learning_rate": 2.613597534955692e-07,
      "loss": 2.2299,
      "step": 71236
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.071314811706543,
      "learning_rate": 2.61266243232684e-07,
      "loss": 2.3371,
      "step": 71237
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1281065940856934,
      "learning_rate": 2.61172749479599e-07,
      "loss": 2.2112,
      "step": 71238
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1771693229675293,
      "learning_rate": 2.610792722364708e-07,
      "loss": 2.2548,
      "step": 71239
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0030235052108765,
      "learning_rate": 2.60985811503458e-07,
      "loss": 2.4627,
      "step": 71240
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.01863694190979,
      "learning_rate": 2.6089236728071954e-07,
      "loss": 2.4819,
      "step": 71241
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.119426965713501,
      "learning_rate": 2.607989395684141e-07,
      "loss": 2.3417,
      "step": 71242
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.040113091468811,
      "learning_rate": 2.607055283666982e-07,
      "loss": 2.3373,
      "step": 71243
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.3657804727554321,
      "learning_rate": 2.6061213367573277e-07,
      "loss": 2.5358,
      "step": 71244
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.193132996559143,
      "learning_rate": 2.605187554956745e-07,
      "loss": 2.3732,
      "step": 71245
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.2322132587432861,
      "learning_rate": 2.604253938266843e-07,
      "loss": 2.4025,
      "step": 71246
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0566983222961426,
      "learning_rate": 2.603320486689154e-07,
      "loss": 2.5532,
      "step": 71247
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1063387393951416,
      "learning_rate": 2.6023872002252983e-07,
      "loss": 2.248,
      "step": 71248
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1353517770767212,
      "learning_rate": 2.6014540788768526e-07,
      "loss": 2.3815,
      "step": 71249
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1180565357208252,
      "learning_rate": 2.600521122645394e-07,
      "loss": 2.3147,
      "step": 71250
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1005827188491821,
      "learning_rate": 2.5995883315324987e-07,
      "loss": 2.1997,
      "step": 71251
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1105566024780273,
      "learning_rate": 2.598655705539765e-07,
      "loss": 2.2102,
      "step": 71252
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.019709587097168,
      "learning_rate": 2.5977232446687595e-07,
      "loss": 2.454,
      "step": 71253
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1382869482040405,
      "learning_rate": 2.5967909489210685e-07,
      "loss": 2.329,
      "step": 71254
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0449576377868652,
      "learning_rate": 2.595858818298269e-07,
      "loss": 2.3111,
      "step": 71255
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1374863386154175,
      "learning_rate": 2.594926852801938e-07,
      "loss": 2.2534,
      "step": 71256
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0937864780426025,
      "learning_rate": 2.5939950524336735e-07,
      "loss": 2.1459,
      "step": 71257
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.104120135307312,
      "learning_rate": 2.5930634171950295e-07,
      "loss": 2.4579,
      "step": 71258
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1336846351623535,
      "learning_rate": 2.592131947087617e-07,
      "loss": 2.2734,
      "step": 71259
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.2654253244400024,
      "learning_rate": 2.5912006421129785e-07,
      "loss": 2.2929,
      "step": 71260
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.145939588546753,
      "learning_rate": 2.590269502272724e-07,
      "loss": 2.2003,
      "step": 71261
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.3041709661483765,
      "learning_rate": 2.5893385275683966e-07,
      "loss": 2.2354,
      "step": 71262
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0412671566009521,
      "learning_rate": 2.5884077180016174e-07,
      "loss": 2.3269,
      "step": 71263
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0886894464492798,
      "learning_rate": 2.5874770735739296e-07,
      "loss": 2.251,
      "step": 71264
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1351016759872437,
      "learning_rate": 2.586546594286943e-07,
      "loss": 2.3326,
      "step": 71265
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.167691707611084,
      "learning_rate": 2.5856162801422e-07,
      "loss": 2.2402,
      "step": 71266
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0558178424835205,
      "learning_rate": 2.584686131141312e-07,
      "loss": 2.5247,
      "step": 71267
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1233874559402466,
      "learning_rate": 2.583756147285821e-07,
      "loss": 2.2166,
      "step": 71268
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1551707983016968,
      "learning_rate": 2.582826328577326e-07,
      "loss": 2.4105,
      "step": 71269
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1586915254592896,
      "learning_rate": 2.5818966750174036e-07,
      "loss": 2.1078,
      "step": 71270
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0753625631332397,
      "learning_rate": 2.580967186607619e-07,
      "loss": 2.3036,
      "step": 71271
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0070463418960571,
      "learning_rate": 2.5800378633495603e-07,
      "loss": 2.4504,
      "step": 71272
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1135215759277344,
      "learning_rate": 2.579108705244804e-07,
      "loss": 2.3877,
      "step": 71273
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1369155645370483,
      "learning_rate": 2.5781797122949036e-07,
      "loss": 2.2824,
      "step": 71274
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0783812999725342,
      "learning_rate": 2.5772508845014474e-07,
      "loss": 2.2079,
      "step": 71275
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0272915363311768,
      "learning_rate": 2.576322221866012e-07,
      "loss": 2.2117,
      "step": 71276
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.9973939061164856,
      "learning_rate": 2.575393724390174e-07,
      "loss": 2.3668,
      "step": 71277
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1459425687789917,
      "learning_rate": 2.574465392075498e-07,
      "loss": 2.3239,
      "step": 71278
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.9718167185783386,
      "learning_rate": 2.5735372249235724e-07,
      "loss": 2.4408,
      "step": 71279
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0265297889709473,
      "learning_rate": 2.572609222935951e-07,
      "loss": 2.3058,
      "step": 71280
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0928678512573242,
      "learning_rate": 2.571681386114233e-07,
      "loss": 2.3539,
      "step": 71281
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1277353763580322,
      "learning_rate": 2.5707537144599727e-07,
      "loss": 2.2787,
      "step": 71282
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1855463981628418,
      "learning_rate": 2.569826207974735e-07,
      "loss": 2.162,
      "step": 71283
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.106270670890808,
      "learning_rate": 2.5688988666601076e-07,
      "loss": 2.3138,
      "step": 71284
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1970301866531372,
      "learning_rate": 2.5679716905176565e-07,
      "loss": 2.5216,
      "step": 71285
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0890507698059082,
      "learning_rate": 2.567044679548969e-07,
      "loss": 2.4369,
      "step": 71286
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1348168849945068,
      "learning_rate": 2.5661178337555993e-07,
      "loss": 2.2804,
      "step": 71287
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1587932109832764,
      "learning_rate": 2.5651911531391125e-07,
      "loss": 2.3576,
      "step": 71288
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0346249341964722,
      "learning_rate": 2.564264637701097e-07,
      "loss": 2.2551,
      "step": 71289
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0663890838623047,
      "learning_rate": 2.5633382874431176e-07,
      "loss": 2.1789,
      "step": 71290
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.095683217048645,
      "learning_rate": 2.5624121023667294e-07,
      "loss": 2.5706,
      "step": 71291
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1011135578155518,
      "learning_rate": 2.56148608247353e-07,
      "loss": 2.4306,
      "step": 71292
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0592495203018188,
      "learning_rate": 2.5605602277650634e-07,
      "loss": 2.2854,
      "step": 71293
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1829596757888794,
      "learning_rate": 2.5596345382429276e-07,
      "loss": 2.3506,
      "step": 71294
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0688059329986572,
      "learning_rate": 2.558709013908667e-07,
      "loss": 2.189,
      "step": 71295
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0782114267349243,
      "learning_rate": 2.557783654763857e-07,
      "loss": 2.2525,
      "step": 71296
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1722794771194458,
      "learning_rate": 2.5568584608100743e-07,
      "loss": 2.28,
      "step": 71297
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.9747678637504578,
      "learning_rate": 2.555933432048874e-07,
      "loss": 2.4405,
      "step": 71298
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.2034281492233276,
      "learning_rate": 2.555008568481843e-07,
      "loss": 2.3365,
      "step": 71299
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1315739154815674,
      "learning_rate": 2.554083870110535e-07,
      "loss": 2.0495,
      "step": 71300
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1157078742980957,
      "learning_rate": 2.553159336936517e-07,
      "loss": 2.4364,
      "step": 71301
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1272720098495483,
      "learning_rate": 2.5522349689613646e-07,
      "loss": 2.2738,
      "step": 71302
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0843619108200073,
      "learning_rate": 2.551310766186632e-07,
      "loss": 2.2902,
      "step": 71303
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0998400449752808,
      "learning_rate": 2.550386728613907e-07,
      "loss": 2.2335,
      "step": 71304
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.075846791267395,
      "learning_rate": 2.549462856244722e-07,
      "loss": 2.2858,
      "step": 71305
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0779608488082886,
      "learning_rate": 2.5485391490806865e-07,
      "loss": 2.3812,
      "step": 71306
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.16038978099823,
      "learning_rate": 2.5476156071233325e-07,
      "loss": 2.1703,
      "step": 71307
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.9714754223823547,
      "learning_rate": 2.546692230374248e-07,
      "loss": 2.4783,
      "step": 71308
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.2101881504058838,
      "learning_rate": 2.545769018834987e-07,
      "loss": 2.3407,
      "step": 71309
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0468312501907349,
      "learning_rate": 2.544845972507104e-07,
      "loss": 2.452,
      "step": 71310
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0018848180770874,
      "learning_rate": 2.5439230913921865e-07,
      "loss": 2.3773,
      "step": 71311
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0208221673965454,
      "learning_rate": 2.5430003754917886e-07,
      "loss": 2.2102,
      "step": 71312
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0752428770065308,
      "learning_rate": 2.542077824807465e-07,
      "loss": 2.5065,
      "step": 71313
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1241620779037476,
      "learning_rate": 2.541155439340792e-07,
      "loss": 2.2126,
      "step": 71314
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1909477710723877,
      "learning_rate": 2.5402332190933357e-07,
      "loss": 2.2982,
      "step": 71315
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.085561752319336,
      "learning_rate": 2.539311164066649e-07,
      "loss": 2.4343,
      "step": 71316
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0249662399291992,
      "learning_rate": 2.5383892742622986e-07,
      "loss": 2.3296,
      "step": 71317
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0253592729568481,
      "learning_rate": 2.537467549681849e-07,
      "loss": 2.2831,
      "step": 71318
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1213752031326294,
      "learning_rate": 2.5365459903268664e-07,
      "loss": 2.3174,
      "step": 71319
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.9729071259498596,
      "learning_rate": 2.5356245961988934e-07,
      "loss": 2.1886,
      "step": 71320
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1348891258239746,
      "learning_rate": 2.5347033672995294e-07,
      "loss": 2.3877,
      "step": 71321
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.012995719909668,
      "learning_rate": 2.533782303630294e-07,
      "loss": 2.2712,
      "step": 71322
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.09437894821167,
      "learning_rate": 2.532861405192788e-07,
      "loss": 2.3965,
      "step": 71323
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1787073612213135,
      "learning_rate": 2.531940671988542e-07,
      "loss": 2.7535,
      "step": 71324
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0030477046966553,
      "learning_rate": 2.531020104019144e-07,
      "loss": 2.1183,
      "step": 71325
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.103209376335144,
      "learning_rate": 2.530099701286126e-07,
      "loss": 2.4561,
      "step": 71326
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0865697860717773,
      "learning_rate": 2.529179463791065e-07,
      "loss": 2.2912,
      "step": 71327
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.216300368309021,
      "learning_rate": 2.5282593915355146e-07,
      "loss": 2.2833,
      "step": 71328
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.006103754043579,
      "learning_rate": 2.527339484521041e-07,
      "loss": 2.1668,
      "step": 71329
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0503959655761719,
      "learning_rate": 2.5264197427491866e-07,
      "loss": 2.0801,
      "step": 71330
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0749247074127197,
      "learning_rate": 2.5255001662215393e-07,
      "loss": 2.2478,
      "step": 71331
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0918936729431152,
      "learning_rate": 2.524580754939632e-07,
      "loss": 2.292,
      "step": 71332
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0730783939361572,
      "learning_rate": 2.523661508905051e-07,
      "loss": 2.5096,
      "step": 71333
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0217491388320923,
      "learning_rate": 2.522742428119318e-07,
      "loss": 2.1618,
      "step": 71334
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.112024188041687,
      "learning_rate": 2.5218235125840315e-07,
      "loss": 2.3021,
      "step": 71335
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1823399066925049,
      "learning_rate": 2.520904762300724e-07,
      "loss": 2.3625,
      "step": 71336
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0748354196548462,
      "learning_rate": 2.5199861772709387e-07,
      "loss": 2.1981,
      "step": 71337
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.297227382659912,
      "learning_rate": 2.519067757496274e-07,
      "loss": 2.4461,
      "step": 71338
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0413577556610107,
      "learning_rate": 2.518149502978262e-07,
      "loss": 2.1884,
      "step": 71339
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1574422121047974,
      "learning_rate": 2.517231413718446e-07,
      "loss": 2.3318,
      "step": 71340
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0859894752502441,
      "learning_rate": 2.516313489718414e-07,
      "loss": 2.1378,
      "step": 71341
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0673317909240723,
      "learning_rate": 2.515395730979697e-07,
      "loss": 2.2218,
      "step": 71342
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.258675456047058,
      "learning_rate": 2.514478137503873e-07,
      "loss": 2.462,
      "step": 71343
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.032187581062317,
      "learning_rate": 2.5135607092924843e-07,
      "loss": 2.2233,
      "step": 71344
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0819296836853027,
      "learning_rate": 2.512643446347074e-07,
      "loss": 2.3015,
      "step": 71345
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0208781957626343,
      "learning_rate": 2.511726348669219e-07,
      "loss": 2.2602,
      "step": 71346
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1967276334762573,
      "learning_rate": 2.5108094162604625e-07,
      "loss": 2.2917,
      "step": 71347
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0336830615997314,
      "learning_rate": 2.5098926491223694e-07,
      "loss": 2.4482,
      "step": 71348
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.081824779510498,
      "learning_rate": 2.508976047256473e-07,
      "loss": 2.29,
      "step": 71349
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1983271837234497,
      "learning_rate": 2.508059610664348e-07,
      "loss": 2.3255,
      "step": 71350
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1511160135269165,
      "learning_rate": 2.5071433393475506e-07,
      "loss": 2.4669,
      "step": 71351
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0979381799697876,
      "learning_rate": 2.506227233307612e-07,
      "loss": 2.4171,
      "step": 71352
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1583307981491089,
      "learning_rate": 2.5053112925460977e-07,
      "loss": 2.3188,
      "step": 71353
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.990356981754303,
      "learning_rate": 2.5043955170645615e-07,
      "loss": 2.1776,
      "step": 71354
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0833985805511475,
      "learning_rate": 2.5034799068645475e-07,
      "loss": 2.5696,
      "step": 71355
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0216349363327026,
      "learning_rate": 2.502564461947621e-07,
      "loss": 2.3441,
      "step": 71356
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0119428634643555,
      "learning_rate": 2.5016491823153244e-07,
      "loss": 2.3623,
      "step": 71357
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0397011041641235,
      "learning_rate": 2.5007340679692235e-07,
      "loss": 2.3734,
      "step": 71358
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0846312046051025,
      "learning_rate": 2.49981911891084e-07,
      "loss": 2.2151,
      "step": 71359
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0451273918151855,
      "learning_rate": 2.4989043351417606e-07,
      "loss": 2.2265,
      "step": 71360
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.2355279922485352,
      "learning_rate": 2.497989716663507e-07,
      "loss": 2.5064,
      "step": 71361
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1802054643630981,
      "learning_rate": 2.497075263477655e-07,
      "loss": 2.4795,
      "step": 71362
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.9715401530265808,
      "learning_rate": 2.4961609755857266e-07,
      "loss": 2.6089,
      "step": 71363
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1498830318450928,
      "learning_rate": 2.495246852989308e-07,
      "loss": 2.548,
      "step": 71364
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.2518949508666992,
      "learning_rate": 2.4943328956899104e-07,
      "loss": 2.3652,
      "step": 71365
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1223299503326416,
      "learning_rate": 2.49341910368911e-07,
      "loss": 2.5327,
      "step": 71366
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.061055302619934,
      "learning_rate": 2.492505476988438e-07,
      "loss": 2.4659,
      "step": 71367
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.9807233214378357,
      "learning_rate": 2.4915920155894614e-07,
      "loss": 2.1876,
      "step": 71368
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1164487600326538,
      "learning_rate": 2.490678719493711e-07,
      "loss": 2.2987,
      "step": 71369
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.04618239402771,
      "learning_rate": 2.4897655887027527e-07,
      "loss": 2.2838,
      "step": 71370
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.058575987815857,
      "learning_rate": 2.4888526232181187e-07,
      "loss": 2.3932,
      "step": 71371
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1887315511703491,
      "learning_rate": 2.487939823041363e-07,
      "loss": 2.3788,
      "step": 71372
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0586878061294556,
      "learning_rate": 2.4870271881740406e-07,
      "loss": 2.1572,
      "step": 71373
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0497217178344727,
      "learning_rate": 2.4861147186176713e-07,
      "loss": 2.1704,
      "step": 71374
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0591856241226196,
      "learning_rate": 2.485202414373844e-07,
      "loss": 2.2321,
      "step": 71375
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0795316696166992,
      "learning_rate": 2.484290275444068e-07,
      "loss": 2.5289,
      "step": 71376
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0786073207855225,
      "learning_rate": 2.4833783018299197e-07,
      "loss": 2.3901,
      "step": 71377
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0425610542297363,
      "learning_rate": 2.48246649353292e-07,
      "loss": 2.0679,
      "step": 71378
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0252506732940674,
      "learning_rate": 2.481554850554635e-07,
      "loss": 2.3969,
      "step": 71379
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.117133378982544,
      "learning_rate": 2.480643372896585e-07,
      "loss": 2.4796,
      "step": 71380
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0831929445266724,
      "learning_rate": 2.479732060560336e-07,
      "loss": 2.2318,
      "step": 71381
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0888009071350098,
      "learning_rate": 2.4788209135474304e-07,
      "loss": 2.2435,
      "step": 71382
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.3928107023239136,
      "learning_rate": 2.4779099318594123e-07,
      "loss": 2.4157,
      "step": 71383
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0128923654556274,
      "learning_rate": 2.4769991154978137e-07,
      "loss": 2.3913,
      "step": 71384
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0231760740280151,
      "learning_rate": 2.476088464464199e-07,
      "loss": 2.3018,
      "step": 71385
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0602309703826904,
      "learning_rate": 2.4751779787600903e-07,
      "loss": 2.2414,
      "step": 71386
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1977771520614624,
      "learning_rate": 2.474267658387053e-07,
      "loss": 2.2047,
      "step": 71387
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.2110340595245361,
      "learning_rate": 2.4733575033466074e-07,
      "loss": 2.3538,
      "step": 71388
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1916025876998901,
      "learning_rate": 2.472447513640319e-07,
      "loss": 2.2858,
      "step": 71389
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1142863035202026,
      "learning_rate": 2.4715376892697094e-07,
      "loss": 2.088,
      "step": 71390
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1182481050491333,
      "learning_rate": 2.470628030236355e-07,
      "loss": 2.3496,
      "step": 71391
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0884813070297241,
      "learning_rate": 2.469718536541754e-07,
      "loss": 2.2719,
      "step": 71392
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0950145721435547,
      "learning_rate": 2.4688092081874835e-07,
      "loss": 2.4907,
      "step": 71393
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0159872770309448,
      "learning_rate": 2.4679000451750533e-07,
      "loss": 2.0653,
      "step": 71394
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0270590782165527,
      "learning_rate": 2.46699104750604e-07,
      "loss": 2.2044,
      "step": 71395
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0513228178024292,
      "learning_rate": 2.466082215181942e-07,
      "loss": 2.304,
      "step": 71396
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0854464769363403,
      "learning_rate": 2.4651735482043473e-07,
      "loss": 2.3078,
      "step": 71397
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1105422973632812,
      "learning_rate": 2.4642650465747654e-07,
      "loss": 2.2451,
      "step": 71398
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1195698976516724,
      "learning_rate": 2.46335671029474e-07,
      "loss": 2.2388,
      "step": 71399
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0718982219696045,
      "learning_rate": 2.462448539365825e-07,
      "loss": 2.4034,
      "step": 71400
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.9770113229751587,
      "learning_rate": 2.461540533789541e-07,
      "loss": 2.2908,
      "step": 71401
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.087360143661499,
      "learning_rate": 2.460632693567455e-07,
      "loss": 2.3306,
      "step": 71402
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1536319255828857,
      "learning_rate": 2.459725018701065e-07,
      "loss": 2.5464,
      "step": 71403
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.096309781074524,
      "learning_rate": 2.458817509191946e-07,
      "loss": 2.5244,
      "step": 71404
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0772626399993896,
      "learning_rate": 2.457910165041622e-07,
      "loss": 2.5016,
      "step": 71405
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.9763280749320984,
      "learning_rate": 2.457002986251633e-07,
      "loss": 2.2125,
      "step": 71406
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0896025896072388,
      "learning_rate": 2.4560959728235135e-07,
      "loss": 2.3061,
      "step": 71407
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.3643778562545776,
      "learning_rate": 2.4551891247588054e-07,
      "loss": 2.2834,
      "step": 71408
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1082676649093628,
      "learning_rate": 2.4542824420590416e-07,
      "loss": 2.4416,
      "step": 71409
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0778290033340454,
      "learning_rate": 2.453375924725765e-07,
      "loss": 2.586,
      "step": 71410
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.123672604560852,
      "learning_rate": 2.4524695727604965e-07,
      "loss": 2.2635,
      "step": 71411
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0175771713256836,
      "learning_rate": 2.451563386164801e-07,
      "loss": 2.1249,
      "step": 71412
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.9867252707481384,
      "learning_rate": 2.45065736494019e-07,
      "loss": 2.3118,
      "step": 71413
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1108319759368896,
      "learning_rate": 2.449751509088216e-07,
      "loss": 2.3889,
      "step": 71414
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1116595268249512,
      "learning_rate": 2.448845818610401e-07,
      "loss": 2.3312,
      "step": 71415
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.9962034225463867,
      "learning_rate": 2.4479402935082994e-07,
      "loss": 2.151,
      "step": 71416
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.2464449405670166,
      "learning_rate": 2.447034933783432e-07,
      "loss": 2.4678,
      "step": 71417
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0441508293151855,
      "learning_rate": 2.446129739437331e-07,
      "loss": 2.1882,
      "step": 71418
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1932538747787476,
      "learning_rate": 2.445224710471528e-07,
      "loss": 2.1707,
      "step": 71419
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.2267413139343262,
      "learning_rate": 2.444319846887577e-07,
      "loss": 2.3336,
      "step": 71420
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.054170846939087,
      "learning_rate": 2.4434151486869895e-07,
      "loss": 2.0578,
      "step": 71421
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0134247541427612,
      "learning_rate": 2.4425106158713075e-07,
      "loss": 2.4929,
      "step": 71422
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.082553505897522,
      "learning_rate": 2.4416062484420743e-07,
      "loss": 2.104,
      "step": 71423
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0052260160446167,
      "learning_rate": 2.4407020464008113e-07,
      "loss": 2.2179,
      "step": 71424
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0070158243179321,
      "learning_rate": 2.43979800974905e-07,
      "loss": 2.3661,
      "step": 71425
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1276050806045532,
      "learning_rate": 2.4388941384883346e-07,
      "loss": 2.6624,
      "step": 71426
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0605463981628418,
      "learning_rate": 2.4379904326201966e-07,
      "loss": 2.1856,
      "step": 71427
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0170934200286865,
      "learning_rate": 2.4370868921461566e-07,
      "loss": 2.2487,
      "step": 71428
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0945513248443604,
      "learning_rate": 2.4361835170677473e-07,
      "loss": 2.3062,
      "step": 71429
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.288846492767334,
      "learning_rate": 2.435280307386512e-07,
      "loss": 2.4296,
      "step": 71430
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1472175121307373,
      "learning_rate": 2.434377263103971e-07,
      "loss": 2.1841,
      "step": 71431
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0473121404647827,
      "learning_rate": 2.4334743842216677e-07,
      "loss": 2.0471,
      "step": 71432
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.9917241930961609,
      "learning_rate": 2.4325716707411127e-07,
      "loss": 2.1542,
      "step": 71433
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1079611778259277,
      "learning_rate": 2.4316691226638487e-07,
      "loss": 2.298,
      "step": 71434
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0245606899261475,
      "learning_rate": 2.430766739991408e-07,
      "loss": 2.2278,
      "step": 71435
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0463062524795532,
      "learning_rate": 2.4298645227253113e-07,
      "loss": 2.1552,
      "step": 71436
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0269886255264282,
      "learning_rate": 2.428962470867102e-07,
      "loss": 2.386,
      "step": 71437
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1224066019058228,
      "learning_rate": 2.4280605844183016e-07,
      "loss": 2.2833,
      "step": 71438
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0231854915618896,
      "learning_rate": 2.42715886338043e-07,
      "loss": 2.3944,
      "step": 71439
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.02541184425354,
      "learning_rate": 2.426257307755031e-07,
      "loss": 2.2227,
      "step": 71440
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0427006483078003,
      "learning_rate": 2.4253559175436256e-07,
      "loss": 2.1089,
      "step": 71441
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0076687335968018,
      "learning_rate": 2.4244546927477355e-07,
      "loss": 2.2767,
      "step": 71442
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0609183311462402,
      "learning_rate": 2.4235536333689134e-07,
      "loss": 2.2797,
      "step": 71443
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.2157645225524902,
      "learning_rate": 2.4226527394086595e-07,
      "loss": 2.1452,
      "step": 71444
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.9746962785720825,
      "learning_rate": 2.4217520108685166e-07,
      "loss": 2.2329,
      "step": 71445
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0505895614624023,
      "learning_rate": 2.4208514477499945e-07,
      "loss": 2.3995,
      "step": 71446
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0244016647338867,
      "learning_rate": 2.4199510500546363e-07,
      "loss": 2.3182,
      "step": 71447
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1069254875183105,
      "learning_rate": 2.4190508177839636e-07,
      "loss": 2.2692,
      "step": 71448
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0913279056549072,
      "learning_rate": 2.418150750939496e-07,
      "loss": 2.3942,
      "step": 71449
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.4139275550842285,
      "learning_rate": 2.417250849522768e-07,
      "loss": 2.2857,
      "step": 71450
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.9513769745826721,
      "learning_rate": 2.41635111353532e-07,
      "loss": 2.1222,
      "step": 71451
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1201146841049194,
      "learning_rate": 2.415451542978642e-07,
      "loss": 2.3094,
      "step": 71452
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1544967889785767,
      "learning_rate": 2.414552137854276e-07,
      "loss": 2.0914,
      "step": 71453
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1074126958847046,
      "learning_rate": 2.413652898163765e-07,
      "loss": 2.2524,
      "step": 71454
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1881352663040161,
      "learning_rate": 2.412753823908598e-07,
      "loss": 2.2291,
      "step": 71455
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1060562133789062,
      "learning_rate": 2.411854915090328e-07,
      "loss": 2.1815,
      "step": 71456
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.232845664024353,
      "learning_rate": 2.4109561717104765e-07,
      "loss": 2.2253,
      "step": 71457
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.2106951475143433,
      "learning_rate": 2.4100575937705426e-07,
      "loss": 2.3063,
      "step": 71458
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0019786357879639,
      "learning_rate": 2.4091591812720696e-07,
      "loss": 2.0593,
      "step": 71459
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0292026996612549,
      "learning_rate": 2.4082609342165775e-07,
      "loss": 2.2577,
      "step": 71460
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0568897724151611,
      "learning_rate": 2.4073628526055993e-07,
      "loss": 2.2657,
      "step": 71461
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.9990673065185547,
      "learning_rate": 2.406464936440633e-07,
      "loss": 2.2318,
      "step": 71462
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.9867403507232666,
      "learning_rate": 2.405567185723223e-07,
      "loss": 2.164,
      "step": 71463
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.168827772140503,
      "learning_rate": 2.4046696004548786e-07,
      "loss": 2.4033,
      "step": 71464
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1504695415496826,
      "learning_rate": 2.403772180637121e-07,
      "loss": 2.3259,
      "step": 71465
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1111478805541992,
      "learning_rate": 2.4028749262714923e-07,
      "loss": 2.2023,
      "step": 71466
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.2128227949142456,
      "learning_rate": 2.401977837359482e-07,
      "loss": 2.2597,
      "step": 71467
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.9873881340026855,
      "learning_rate": 2.4010809139026313e-07,
      "loss": 2.0764,
      "step": 71468
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.2862452268600464,
      "learning_rate": 2.400184155902452e-07,
      "loss": 2.4398,
      "step": 71469
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.9908087253570557,
      "learning_rate": 2.399287563360486e-07,
      "loss": 2.4687,
      "step": 71470
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.055701494216919,
      "learning_rate": 2.398391136278222e-07,
      "loss": 2.1755,
      "step": 71471
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1413673162460327,
      "learning_rate": 2.3974948746571915e-07,
      "loss": 2.3627,
      "step": 71472
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.026429533958435,
      "learning_rate": 2.396598778498915e-07,
      "loss": 2.2228,
      "step": 71473
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0229175090789795,
      "learning_rate": 2.395702847804915e-07,
      "loss": 2.0963,
      "step": 71474
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0292248725891113,
      "learning_rate": 2.3948070825767113e-07,
      "loss": 2.2909,
      "step": 71475
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1576100587844849,
      "learning_rate": 2.393911482815814e-07,
      "loss": 2.3802,
      "step": 71476
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1102573871612549,
      "learning_rate": 2.393016048523744e-07,
      "loss": 2.1756,
      "step": 71477
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1767197847366333,
      "learning_rate": 2.392120779702034e-07,
      "loss": 2.453,
      "step": 71478
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1710249185562134,
      "learning_rate": 2.3912256763521715e-07,
      "loss": 2.4607,
      "step": 71479
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.2146042585372925,
      "learning_rate": 2.3903307384756993e-07,
      "loss": 2.3491,
      "step": 71480
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0150830745697021,
      "learning_rate": 2.3894359660741274e-07,
      "loss": 2.4868,
      "step": 71481
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.184794306755066,
      "learning_rate": 2.388541359148977e-07,
      "loss": 2.365,
      "step": 71482
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1124787330627441,
      "learning_rate": 2.387646917701758e-07,
      "loss": 2.533,
      "step": 71483
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.989264965057373,
      "learning_rate": 2.386752641733991e-07,
      "loss": 2.176,
      "step": 71484
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0762420892715454,
      "learning_rate": 2.385858531247176e-07,
      "loss": 2.241,
      "step": 71485
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0944173336029053,
      "learning_rate": 2.384964586242866e-07,
      "loss": 2.3163,
      "step": 71486
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0502526760101318,
      "learning_rate": 2.384070806722527e-07,
      "loss": 2.326,
      "step": 71487
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1103363037109375,
      "learning_rate": 2.3831771926877135e-07,
      "loss": 2.5921,
      "step": 71488
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1589394807815552,
      "learning_rate": 2.3822837441399348e-07,
      "loss": 2.2435,
      "step": 71489
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.4804723262786865,
      "learning_rate": 2.3813904610806792e-07,
      "loss": 2.3067,
      "step": 71490
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0878171920776367,
      "learning_rate": 2.3804973435114898e-07,
      "loss": 2.152,
      "step": 71491
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.2921360731124878,
      "learning_rate": 2.379604391433865e-07,
      "loss": 2.4255,
      "step": 71492
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0002833604812622,
      "learning_rate": 2.3787116048493375e-07,
      "loss": 2.482,
      "step": 71493
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1419591903686523,
      "learning_rate": 2.3778189837593946e-07,
      "loss": 2.1603,
      "step": 71494
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.141687273979187,
      "learning_rate": 2.3769265281655685e-07,
      "loss": 2.2709,
      "step": 71495
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.2096850872039795,
      "learning_rate": 2.376034238069369e-07,
      "loss": 2.5139,
      "step": 71496
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0672813653945923,
      "learning_rate": 2.3751421134723064e-07,
      "loss": 2.3236,
      "step": 71497
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0986835956573486,
      "learning_rate": 2.3742501543758789e-07,
      "loss": 2.4594,
      "step": 71498
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0477184057235718,
      "learning_rate": 2.373358360781619e-07,
      "loss": 2.2873,
      "step": 71499
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.081102967262268,
      "learning_rate": 2.3724667326910256e-07,
      "loss": 2.3782,
      "step": 71500
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0552319288253784,
      "learning_rate": 2.3715752701056305e-07,
      "loss": 2.4659,
      "step": 71501
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1758900880813599,
      "learning_rate": 2.3706839730269215e-07,
      "loss": 2.3342,
      "step": 71502
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0841975212097168,
      "learning_rate": 2.36979284145642e-07,
      "loss": 2.2796,
      "step": 71503
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0480722188949585,
      "learning_rate": 2.368901875395635e-07,
      "loss": 2.2236,
      "step": 71504
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0419400930404663,
      "learning_rate": 2.3680110748460771e-07,
      "loss": 2.362,
      "step": 71505
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0096839666366577,
      "learning_rate": 2.3671204398092562e-07,
      "loss": 2.307,
      "step": 71506
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1023719310760498,
      "learning_rate": 2.3662299702866932e-07,
      "loss": 2.195,
      "step": 71507
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0372200012207031,
      "learning_rate": 2.3653396662798756e-07,
      "loss": 2.3688,
      "step": 71508
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0797829627990723,
      "learning_rate": 2.3644495277903246e-07,
      "loss": 2.2808,
      "step": 71509
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0458769798278809,
      "learning_rate": 2.363559554819561e-07,
      "loss": 2.3804,
      "step": 71510
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.037839651107788,
      "learning_rate": 2.362669747369084e-07,
      "loss": 2.3664,
      "step": 71511
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1233140230178833,
      "learning_rate": 2.3617801054403811e-07,
      "loss": 2.3494,
      "step": 71512
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.157008171081543,
      "learning_rate": 2.3608906290349953e-07,
      "loss": 2.1791,
      "step": 71513
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1180826425552368,
      "learning_rate": 2.3600013181544036e-07,
      "loss": 2.1697,
      "step": 71514
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.085947871208191,
      "learning_rate": 2.3591121728001376e-07,
      "loss": 2.486,
      "step": 71515
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0638773441314697,
      "learning_rate": 2.3582231929736854e-07,
      "loss": 2.5141,
      "step": 71516
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.039547085762024,
      "learning_rate": 2.357334378676579e-07,
      "loss": 2.2923,
      "step": 71517
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1409467458724976,
      "learning_rate": 2.356445729910306e-07,
      "loss": 2.2036,
      "step": 71518
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0268036127090454,
      "learning_rate": 2.3555572466763653e-07,
      "loss": 2.3493,
      "step": 71519
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0966769456863403,
      "learning_rate": 2.3546689289762893e-07,
      "loss": 2.3223,
      "step": 71520
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.085085391998291,
      "learning_rate": 2.3537807768115538e-07,
      "loss": 2.2831,
      "step": 71521
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.038153886795044,
      "learning_rate": 2.3528927901837028e-07,
      "loss": 2.4316,
      "step": 71522
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0745002031326294,
      "learning_rate": 2.3520049690942125e-07,
      "loss": 2.3709,
      "step": 71523
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0285722017288208,
      "learning_rate": 2.351117313544593e-07,
      "loss": 2.168,
      "step": 71524
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0419929027557373,
      "learning_rate": 2.350229823536332e-07,
      "loss": 2.2604,
      "step": 71525
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.024880290031433,
      "learning_rate": 2.3493424990709723e-07,
      "loss": 2.2875,
      "step": 71526
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.3406063318252563,
      "learning_rate": 2.348455340149991e-07,
      "loss": 2.19,
      "step": 71527
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.2381314039230347,
      "learning_rate": 2.3475683467749088e-07,
      "loss": 2.5776,
      "step": 71528
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0155762434005737,
      "learning_rate": 2.3466815189472025e-07,
      "loss": 2.075,
      "step": 71529
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1219818592071533,
      "learning_rate": 2.3457948566684152e-07,
      "loss": 2.1862,
      "step": 71530
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0443823337554932,
      "learning_rate": 2.3449083599400125e-07,
      "loss": 2.367,
      "step": 71531
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.070120096206665,
      "learning_rate": 2.344022028763515e-07,
      "loss": 2.2271,
      "step": 71532
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.4166818857192993,
      "learning_rate": 2.3431358631404223e-07,
      "loss": 2.1759,
      "step": 71533
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0039469003677368,
      "learning_rate": 2.3422498630722435e-07,
      "loss": 2.3975,
      "step": 71534
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1724644899368286,
      "learning_rate": 2.3413640285604665e-07,
      "loss": 2.2321,
      "step": 71535
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.069427728652954,
      "learning_rate": 2.3404783596066238e-07,
      "loss": 2.2873,
      "step": 71536
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1230844259262085,
      "learning_rate": 2.3395928562121694e-07,
      "loss": 2.5229,
      "step": 71537
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0962857007980347,
      "learning_rate": 2.3387075183786468e-07,
      "loss": 2.5445,
      "step": 71538
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1043885946273804,
      "learning_rate": 2.3378223461075212e-07,
      "loss": 2.3829,
      "step": 71539
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.152855634689331,
      "learning_rate": 2.3369373394003247e-07,
      "loss": 2.2245,
      "step": 71540
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0785149335861206,
      "learning_rate": 2.3360524982585343e-07,
      "loss": 2.3049,
      "step": 71541
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1341429948806763,
      "learning_rate": 2.3351678226836705e-07,
      "loss": 2.2751,
      "step": 71542
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.087127923965454,
      "learning_rate": 2.3342833126772213e-07,
      "loss": 2.4172,
      "step": 71543
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0630384683609009,
      "learning_rate": 2.3333989682406855e-07,
      "loss": 2.4927,
      "step": 71544
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.9529294967651367,
      "learning_rate": 2.3325147893755617e-07,
      "loss": 2.2909,
      "step": 71545
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1543164253234863,
      "learning_rate": 2.331630776083349e-07,
      "loss": 2.4034,
      "step": 71546
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0793505907058716,
      "learning_rate": 2.3307469283655571e-07,
      "loss": 2.3383,
      "step": 71547
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0606889724731445,
      "learning_rate": 2.3298632462236625e-07,
      "loss": 2.2338,
      "step": 71548
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0356318950653076,
      "learning_rate": 2.3289797296591977e-07,
      "loss": 2.3966,
      "step": 71549
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0084890127182007,
      "learning_rate": 2.3280963786736278e-07,
      "loss": 2.323,
      "step": 71550
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0242204666137695,
      "learning_rate": 2.3272131932684627e-07,
      "loss": 2.2219,
      "step": 71551
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0132410526275635,
      "learning_rate": 2.3263301734451903e-07,
      "loss": 2.3365,
      "step": 71552
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0379148721694946,
      "learning_rate": 2.3254473192053317e-07,
      "loss": 2.4036,
      "step": 71553
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.021384358406067,
      "learning_rate": 2.324564630550352e-07,
      "loss": 2.3817,
      "step": 71554
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0602937936782837,
      "learning_rate": 2.3236821074817727e-07,
      "loss": 1.9844,
      "step": 71555
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0708242654800415,
      "learning_rate": 2.3227997500010702e-07,
      "loss": 2.1341,
      "step": 71556
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.100468397140503,
      "learning_rate": 2.3219175581097653e-07,
      "loss": 2.3255,
      "step": 71557
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0846741199493408,
      "learning_rate": 2.3210355318093348e-07,
      "loss": 2.3056,
      "step": 71558
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1434009075164795,
      "learning_rate": 2.3201536711012774e-07,
      "loss": 2.3367,
      "step": 71559
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1152156591415405,
      "learning_rate": 2.3192719759870807e-07,
      "loss": 2.3004,
      "step": 71560
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.184362530708313,
      "learning_rate": 2.318390446468255e-07,
      "loss": 2.3284,
      "step": 71561
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0164661407470703,
      "learning_rate": 2.3175090825462986e-07,
      "loss": 2.6314,
      "step": 71562
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.252744436264038,
      "learning_rate": 2.3166278842226885e-07,
      "loss": 2.1393,
      "step": 71563
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.2023502588272095,
      "learning_rate": 2.3157468514989124e-07,
      "loss": 2.2804,
      "step": 71564
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.717798948287964,
      "learning_rate": 2.314865984376491e-07,
      "loss": 2.2413,
      "step": 71565
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0515118837356567,
      "learning_rate": 2.3139852828568788e-07,
      "loss": 2.2576,
      "step": 71566
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0515508651733398,
      "learning_rate": 2.313104746941619e-07,
      "loss": 2.254,
      "step": 71567
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1060410737991333,
      "learning_rate": 2.312224376632166e-07,
      "loss": 2.2297,
      "step": 71568
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.18869149684906,
      "learning_rate": 2.31134417193003e-07,
      "loss": 2.3109,
      "step": 71569
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0406544208526611,
      "learning_rate": 2.3104641328366872e-07,
      "loss": 2.2539,
      "step": 71570
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.197265863418579,
      "learning_rate": 2.3095842593536477e-07,
      "loss": 2.4012,
      "step": 71571
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1578923463821411,
      "learning_rate": 2.308704551482399e-07,
      "loss": 2.2391,
      "step": 71572
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1572997570037842,
      "learning_rate": 2.307825009224418e-07,
      "loss": 2.341,
      "step": 71573
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.135206937789917,
      "learning_rate": 2.3069456325812145e-07,
      "loss": 2.3634,
      "step": 71574
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0158588886260986,
      "learning_rate": 2.306066421554276e-07,
      "loss": 2.4106,
      "step": 71575
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0978143215179443,
      "learning_rate": 2.3051873761450684e-07,
      "loss": 2.3729,
      "step": 71576
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.050577998161316,
      "learning_rate": 2.3043084963551232e-07,
      "loss": 2.4177,
      "step": 71577
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.052630066871643,
      "learning_rate": 2.3034297821858952e-07,
      "loss": 2.2666,
      "step": 71578
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0536116361618042,
      "learning_rate": 2.3025512336388945e-07,
      "loss": 2.5439,
      "step": 71579
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.120267391204834,
      "learning_rate": 2.3016728507156084e-07,
      "loss": 2.4726,
      "step": 71580
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0614609718322754,
      "learning_rate": 2.3007946334175134e-07,
      "loss": 2.1987,
      "step": 71581
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0151398181915283,
      "learning_rate": 2.2999165817461088e-07,
      "loss": 2.3112,
      "step": 71582
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.097360372543335,
      "learning_rate": 2.299038695702871e-07,
      "loss": 2.2161,
      "step": 71583
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1455470323562622,
      "learning_rate": 2.2981609752893097e-07,
      "loss": 2.3408,
      "step": 71584
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.3006964921951294,
      "learning_rate": 2.2972834205068905e-07,
      "loss": 2.3971,
      "step": 71585
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.2023621797561646,
      "learning_rate": 2.2964060313571236e-07,
      "loss": 2.3208,
      "step": 71586
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1387865543365479,
      "learning_rate": 2.295528807841474e-07,
      "loss": 2.2266,
      "step": 71587
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.036834716796875,
      "learning_rate": 2.2946517499614518e-07,
      "loss": 2.2341,
      "step": 71588
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.2116458415985107,
      "learning_rate": 2.2937748577185336e-07,
      "loss": 2.4606,
      "step": 71589
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.485916256904602,
      "learning_rate": 2.2928981311141963e-07,
      "loss": 2.2739,
      "step": 71590
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.9868713617324829,
      "learning_rate": 2.292021570149927e-07,
      "loss": 2.1175,
      "step": 71591
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0699249505996704,
      "learning_rate": 2.291145174827225e-07,
      "loss": 2.205,
      "step": 71592
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1050184965133667,
      "learning_rate": 2.2902689451475557e-07,
      "loss": 2.4107,
      "step": 71593
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1961793899536133,
      "learning_rate": 2.28939288111244e-07,
      "loss": 2.6182,
      "step": 71594
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.103390097618103,
      "learning_rate": 2.288516982723321e-07,
      "loss": 2.4355,
      "step": 71595
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0757544040679932,
      "learning_rate": 2.2876412499817202e-07,
      "loss": 2.2487,
      "step": 71596
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1576865911483765,
      "learning_rate": 2.2867656828890917e-07,
      "loss": 2.058,
      "step": 71597
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1843547821044922,
      "learning_rate": 2.2858902814469451e-07,
      "loss": 2.3582,
      "step": 71598
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0863182544708252,
      "learning_rate": 2.2850150456567467e-07,
      "loss": 2.0311,
      "step": 71599
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.027134895324707,
      "learning_rate": 2.2841399755199945e-07,
      "loss": 2.2832,
      "step": 71600
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0907024145126343,
      "learning_rate": 2.2832650710381653e-07,
      "loss": 2.2748,
      "step": 71601
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.4558806419372559,
      "learning_rate": 2.2823903322127362e-07,
      "loss": 2.3638,
      "step": 71602
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0423685312271118,
      "learning_rate": 2.281515759045183e-07,
      "loss": 2.3384,
      "step": 71603
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.062320590019226,
      "learning_rate": 2.2806413515370163e-07,
      "loss": 2.4842,
      "step": 71604
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0777993202209473,
      "learning_rate": 2.2797671096897013e-07,
      "loss": 2.3434,
      "step": 71605
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0379948616027832,
      "learning_rate": 2.2788930335047145e-07,
      "loss": 2.3401,
      "step": 71606
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1412711143493652,
      "learning_rate": 2.2780191229835546e-07,
      "loss": 2.4045,
      "step": 71607
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0327866077423096,
      "learning_rate": 2.2771453781276876e-07,
      "loss": 2.3452,
      "step": 71608
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0931984186172485,
      "learning_rate": 2.2762717989386007e-07,
      "loss": 2.4195,
      "step": 71609
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0642576217651367,
      "learning_rate": 2.2753983854177708e-07,
      "loss": 2.4079,
      "step": 71610
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.8616863489151,
      "learning_rate": 2.2745251375666856e-07,
      "loss": 2.1483,
      "step": 71611
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0497732162475586,
      "learning_rate": 2.2736520553868214e-07,
      "loss": 2.2199,
      "step": 71612
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1103934049606323,
      "learning_rate": 2.2727791388796662e-07,
      "loss": 2.2161,
      "step": 71613
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.4655520915985107,
      "learning_rate": 2.271906388046674e-07,
      "loss": 2.6498,
      "step": 71614
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0954058170318604,
      "learning_rate": 2.2710338028893664e-07,
      "loss": 2.3261,
      "step": 71615
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0265988111495972,
      "learning_rate": 2.2701613834091864e-07,
      "loss": 2.3049,
      "step": 71616
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1996817588806152,
      "learning_rate": 2.2692891296076325e-07,
      "loss": 2.4166,
      "step": 71617
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0860563516616821,
      "learning_rate": 2.2684170414861706e-07,
      "loss": 2.2663,
      "step": 71618
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0261549949645996,
      "learning_rate": 2.2675451190462883e-07,
      "loss": 2.4192,
      "step": 71619
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0979963541030884,
      "learning_rate": 2.266673362289451e-07,
      "loss": 2.2849,
      "step": 71620
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.136682152748108,
      "learning_rate": 2.2658017712171576e-07,
      "loss": 2.4268,
      "step": 71621
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1239123344421387,
      "learning_rate": 2.2649303458308735e-07,
      "loss": 2.3408,
      "step": 71622
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0452048778533936,
      "learning_rate": 2.2640590861320755e-07,
      "loss": 2.059,
      "step": 71623
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1700620651245117,
      "learning_rate": 2.26318799212224e-07,
      "loss": 2.2457,
      "step": 71624
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1730983257293701,
      "learning_rate": 2.2623170638028436e-07,
      "loss": 2.3575,
      "step": 71625
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0907340049743652,
      "learning_rate": 2.261446301175363e-07,
      "loss": 2.2041,
      "step": 71626
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1129902601242065,
      "learning_rate": 2.2605757042412856e-07,
      "loss": 2.4095,
      "step": 71627
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1122069358825684,
      "learning_rate": 2.2597052730020884e-07,
      "loss": 2.3687,
      "step": 71628
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.2058894634246826,
      "learning_rate": 2.2588350074592257e-07,
      "loss": 2.3515,
      "step": 71629
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.161454677581787,
      "learning_rate": 2.2579649076141742e-07,
      "loss": 2.4815,
      "step": 71630
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1281412839889526,
      "learning_rate": 2.257094973468432e-07,
      "loss": 2.344,
      "step": 71631
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0710890293121338,
      "learning_rate": 2.2562252050234434e-07,
      "loss": 2.5239,
      "step": 71632
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.18892502784729,
      "learning_rate": 2.2553556022807178e-07,
      "loss": 2.3397,
      "step": 71633
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0750278234481812,
      "learning_rate": 2.2544861652416983e-07,
      "loss": 2.5038,
      "step": 71634
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.2707626819610596,
      "learning_rate": 2.253616893907884e-07,
      "loss": 2.328,
      "step": 71635
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.2476462125778198,
      "learning_rate": 2.252747788280729e-07,
      "loss": 2.2145,
      "step": 71636
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0617578029632568,
      "learning_rate": 2.2518788483617104e-07,
      "loss": 2.2546,
      "step": 71637
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0651758909225464,
      "learning_rate": 2.2510100741523156e-07,
      "loss": 2.3285,
      "step": 71638
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0139226913452148,
      "learning_rate": 2.2501414656539987e-07,
      "loss": 2.1863,
      "step": 71639
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0783308744430542,
      "learning_rate": 2.249273022868248e-07,
      "loss": 2.6519,
      "step": 71640
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.3647741079330444,
      "learning_rate": 2.2484047457965286e-07,
      "loss": 2.334,
      "step": 71641
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1197741031646729,
      "learning_rate": 2.247536634440306e-07,
      "loss": 2.4889,
      "step": 71642
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0783846378326416,
      "learning_rate": 2.246668688801057e-07,
      "loss": 2.2027,
      "step": 71643
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1355295181274414,
      "learning_rate": 2.245800908880258e-07,
      "loss": 2.403,
      "step": 71644
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0382839441299438,
      "learning_rate": 2.2449332946793745e-07,
      "loss": 2.3067,
      "step": 71645
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0370911359786987,
      "learning_rate": 2.244065846199883e-07,
      "loss": 2.3027,
      "step": 71646
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1558241844177246,
      "learning_rate": 2.2431985634432386e-07,
      "loss": 2.3614,
      "step": 71647
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1073369979858398,
      "learning_rate": 2.242331446410928e-07,
      "loss": 2.1957,
      "step": 71648
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1980621814727783,
      "learning_rate": 2.2414644951044174e-07,
      "loss": 2.143,
      "step": 71649
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1681662797927856,
      "learning_rate": 2.240597709525183e-07,
      "loss": 2.4678,
      "step": 71650
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.043113112449646,
      "learning_rate": 2.2397310896746794e-07,
      "loss": 2.5154,
      "step": 71651
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1453211307525635,
      "learning_rate": 2.2388646355543832e-07,
      "loss": 2.5959,
      "step": 71652
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.226765751838684,
      "learning_rate": 2.2379983471657596e-07,
      "loss": 2.556,
      "step": 71653
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1470950841903687,
      "learning_rate": 2.2371322245102855e-07,
      "loss": 2.1745,
      "step": 71654
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0881551504135132,
      "learning_rate": 2.2362662675894265e-07,
      "loss": 2.5908,
      "step": 71655
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.00233793258667,
      "learning_rate": 2.2354004764046479e-07,
      "loss": 2.4287,
      "step": 71656
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0972415208816528,
      "learning_rate": 2.234534850957415e-07,
      "loss": 2.4763,
      "step": 71657
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.3156834840774536,
      "learning_rate": 2.2336693912492048e-07,
      "loss": 2.2151,
      "step": 71658
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.167902946472168,
      "learning_rate": 2.2328040972814602e-07,
      "loss": 2.4412,
      "step": 71659
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0841307640075684,
      "learning_rate": 2.2319389690556803e-07,
      "loss": 2.2219,
      "step": 71660
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0256321430206299,
      "learning_rate": 2.2310740065733193e-07,
      "loss": 2.2168,
      "step": 71661
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.9656191468238831,
      "learning_rate": 2.2302092098358318e-07,
      "loss": 2.4447,
      "step": 71662
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0041362047195435,
      "learning_rate": 2.2293445788447055e-07,
      "loss": 2.4019,
      "step": 71663
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0509086847305298,
      "learning_rate": 2.2284801136013834e-07,
      "loss": 2.3854,
      "step": 71664
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1295479536056519,
      "learning_rate": 2.2276158141073534e-07,
      "loss": 2.3149,
      "step": 71665
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0815895795822144,
      "learning_rate": 2.2267516803640476e-07,
      "loss": 2.3312,
      "step": 71666
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.9652872085571289,
      "learning_rate": 2.225887712372976e-07,
      "loss": 2.322,
      "step": 71667
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0767983198165894,
      "learning_rate": 2.2250239101355708e-07,
      "loss": 2.2946,
      "step": 71668
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.541399598121643,
      "learning_rate": 2.2241602736533086e-07,
      "loss": 2.3005,
      "step": 71669
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0870448350906372,
      "learning_rate": 2.2232968029276435e-07,
      "loss": 2.3158,
      "step": 71670
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.246770977973938,
      "learning_rate": 2.2224334979600526e-07,
      "loss": 2.2308,
      "step": 71671
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.2384334802627563,
      "learning_rate": 2.2215703587519898e-07,
      "loss": 2.0072,
      "step": 71672
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1479077339172363,
      "learning_rate": 2.220707385304921e-07,
      "loss": 2.1815,
      "step": 71673
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.038099765777588,
      "learning_rate": 2.2198445776203115e-07,
      "loss": 2.538,
      "step": 71674
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0881708860397339,
      "learning_rate": 2.2189819356996157e-07,
      "loss": 2.0871,
      "step": 71675
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.064764142036438,
      "learning_rate": 2.21811945954431e-07,
      "loss": 2.3988,
      "step": 71676
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1503078937530518,
      "learning_rate": 2.2172571491558492e-07,
      "loss": 2.3906,
      "step": 71677
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.3585705757141113,
      "learning_rate": 2.2163950045356876e-07,
      "loss": 2.4455,
      "step": 71678
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.2405248880386353,
      "learning_rate": 2.2155330256853014e-07,
      "loss": 2.5441,
      "step": 71679
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1044563055038452,
      "learning_rate": 2.2146712126061343e-07,
      "loss": 2.2622,
      "step": 71680
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.3618615865707397,
      "learning_rate": 2.213809565299685e-07,
      "loss": 2.3258,
      "step": 71681
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.3943766355514526,
      "learning_rate": 2.2129480837673523e-07,
      "loss": 2.4887,
      "step": 71682
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.114913821220398,
      "learning_rate": 2.2120867680106573e-07,
      "loss": 2.2552,
      "step": 71683
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0993866920471191,
      "learning_rate": 2.211225618031021e-07,
      "loss": 2.3148,
      "step": 71684
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1956759691238403,
      "learning_rate": 2.2103646338299202e-07,
      "loss": 2.2431,
      "step": 71685
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1162327527999878,
      "learning_rate": 2.2095038154087978e-07,
      "loss": 2.5219,
      "step": 71686
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0668448209762573,
      "learning_rate": 2.2086431627691417e-07,
      "loss": 2.3441,
      "step": 71687
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1718757152557373,
      "learning_rate": 2.2077826759123843e-07,
      "loss": 2.3432,
      "step": 71688
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0599403381347656,
      "learning_rate": 2.206922354840013e-07,
      "loss": 2.4049,
      "step": 71689
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1760567426681519,
      "learning_rate": 2.2060621995534603e-07,
      "loss": 2.2751,
      "step": 71690
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1071605682373047,
      "learning_rate": 2.2052022100541805e-07,
      "loss": 2.4367,
      "step": 71691
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.101632833480835,
      "learning_rate": 2.2043423863436607e-07,
      "loss": 2.3212,
      "step": 71692
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.078216314315796,
      "learning_rate": 2.203482728423323e-07,
      "loss": 2.1868,
      "step": 71693
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1475574970245361,
      "learning_rate": 2.202623236294654e-07,
      "loss": 2.4738,
      "step": 71694
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0713810920715332,
      "learning_rate": 2.2017639099591094e-07,
      "loss": 2.3698,
      "step": 71695
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.9841586351394653,
      "learning_rate": 2.2009047494181202e-07,
      "loss": 2.2348,
      "step": 71696
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0562372207641602,
      "learning_rate": 2.2000457546731746e-07,
      "loss": 2.422,
      "step": 71697
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1709327697753906,
      "learning_rate": 2.1991869257257047e-07,
      "loss": 2.5318,
      "step": 71698
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0477335453033447,
      "learning_rate": 2.198328262577165e-07,
      "loss": 2.296,
      "step": 71699
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.2258045673370361,
      "learning_rate": 2.1974697652290434e-07,
      "loss": 2.1267,
      "step": 71700
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0230551958084106,
      "learning_rate": 2.1966114336827493e-07,
      "loss": 2.2869,
      "step": 71701
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1838812828063965,
      "learning_rate": 2.195753267939782e-07,
      "loss": 2.3152,
      "step": 71702
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.9959160685539246,
      "learning_rate": 2.1948952680015623e-07,
      "loss": 2.4639,
      "step": 71703
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1322965621948242,
      "learning_rate": 2.194037433869567e-07,
      "loss": 2.2724,
      "step": 71704
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0731509923934937,
      "learning_rate": 2.193179765545239e-07,
      "loss": 2.2821,
      "step": 71705
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.055486798286438,
      "learning_rate": 2.1923222630300334e-07,
      "loss": 2.0756,
      "step": 71706
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.062889575958252,
      "learning_rate": 2.191464926325415e-07,
      "loss": 2.1353,
      "step": 71707
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1113016605377197,
      "learning_rate": 2.1906077554328276e-07,
      "loss": 2.2741,
      "step": 71708
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0333486795425415,
      "learning_rate": 2.189750750353714e-07,
      "loss": 2.4879,
      "step": 71709
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.223360538482666,
      "learning_rate": 2.1888939110895513e-07,
      "loss": 2.181,
      "step": 71710
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0809320211410522,
      "learning_rate": 2.1880372376417603e-07,
      "loss": 2.2758,
      "step": 71711
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.104516863822937,
      "learning_rate": 2.1871807300118287e-07,
      "loss": 2.2119,
      "step": 71712
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1491388082504272,
      "learning_rate": 2.1863243882011776e-07,
      "loss": 2.2394,
      "step": 71713
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0586601495742798,
      "learning_rate": 2.1854682122112837e-07,
      "loss": 2.3246,
      "step": 71714
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.9888407588005066,
      "learning_rate": 2.1846122020435677e-07,
      "loss": 2.5213,
      "step": 71715
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.008402943611145,
      "learning_rate": 2.183756357699518e-07,
      "loss": 2.2833,
      "step": 71716
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.133320689201355,
      "learning_rate": 2.1829006791805662e-07,
      "loss": 2.2183,
      "step": 71717
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0800341367721558,
      "learning_rate": 2.182045166488167e-07,
      "loss": 2.4113,
      "step": 71718
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.043595552444458,
      "learning_rate": 2.1811898196237634e-07,
      "loss": 2.251,
      "step": 71719
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0359621047973633,
      "learning_rate": 2.1803346385888213e-07,
      "loss": 2.216,
      "step": 71720
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0617674589157104,
      "learning_rate": 2.1794796233847615e-07,
      "loss": 2.2872,
      "step": 71721
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.077621340751648,
      "learning_rate": 2.1786247740130606e-07,
      "loss": 2.4573,
      "step": 71722
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.077116847038269,
      "learning_rate": 2.177770090475151e-07,
      "loss": 2.2823,
      "step": 71723
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.2011942863464355,
      "learning_rate": 2.176915572772498e-07,
      "loss": 2.4097,
      "step": 71724
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.4231101274490356,
      "learning_rate": 2.1760612209065446e-07,
      "loss": 2.2624,
      "step": 71725
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0805474519729614,
      "learning_rate": 2.1752070348787236e-07,
      "loss": 2.3741,
      "step": 71726
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1819789409637451,
      "learning_rate": 2.174353014690511e-07,
      "loss": 2.4285,
      "step": 71727
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1519770622253418,
      "learning_rate": 2.1734991603433176e-07,
      "loss": 2.2292,
      "step": 71728
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.9682862162590027,
      "learning_rate": 2.1726454718386304e-07,
      "loss": 2.366,
      "step": 71729
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0520540475845337,
      "learning_rate": 2.1717919491778704e-07,
      "loss": 2.3356,
      "step": 71730
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0003373622894287,
      "learning_rate": 2.1709385923625037e-07,
      "loss": 2.2481,
      "step": 71731
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.29279625415802,
      "learning_rate": 2.170085401393951e-07,
      "loss": 2.0477,
      "step": 71732
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.110640287399292,
      "learning_rate": 2.1692323762736888e-07,
      "loss": 2.4374,
      "step": 71733
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0798882246017456,
      "learning_rate": 2.1683795170031385e-07,
      "loss": 2.1618,
      "step": 71734
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.054936170578003,
      "learning_rate": 2.1675268235837654e-07,
      "loss": 2.2846,
      "step": 71735
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.9778533577919006,
      "learning_rate": 2.1666742960169908e-07,
      "loss": 2.279,
      "step": 71736
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0896133184432983,
      "learning_rate": 2.1658219343042796e-07,
      "loss": 2.3154,
      "step": 71737
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0241702795028687,
      "learning_rate": 2.1649697384470757e-07,
      "loss": 2.4318,
      "step": 71738
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1286468505859375,
      "learning_rate": 2.1641177084468222e-07,
      "loss": 2.2355,
      "step": 71739
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0051902532577515,
      "learning_rate": 2.1632658443049515e-07,
      "loss": 2.3456,
      "step": 71740
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0417394638061523,
      "learning_rate": 2.1624141460229175e-07,
      "loss": 2.1218,
      "step": 71741
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.055849552154541,
      "learning_rate": 2.161562613602164e-07,
      "loss": 2.3979,
      "step": 71742
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1879496574401855,
      "learning_rate": 2.160711247044145e-07,
      "loss": 2.2392,
      "step": 71743
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.176310420036316,
      "learning_rate": 2.1598600463502817e-07,
      "loss": 2.4603,
      "step": 71744
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1464824676513672,
      "learning_rate": 2.1590090115220397e-07,
      "loss": 2.2681,
      "step": 71745
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.161566972732544,
      "learning_rate": 2.158158142560851e-07,
      "loss": 2.2377,
      "step": 71746
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.9985021352767944,
      "learning_rate": 2.1573074394681481e-07,
      "loss": 2.4374,
      "step": 71747
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1263117790222168,
      "learning_rate": 2.1564569022453852e-07,
      "loss": 2.2106,
      "step": 71748
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0751512050628662,
      "learning_rate": 2.1556065308940056e-07,
      "loss": 2.516,
      "step": 71749
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0388225317001343,
      "learning_rate": 2.1547563254154414e-07,
      "loss": 2.3209,
      "step": 71750
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0685557126998901,
      "learning_rate": 2.153906285811147e-07,
      "loss": 2.2852,
      "step": 71751
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1195483207702637,
      "learning_rate": 2.1530564120825436e-07,
      "loss": 2.4488,
      "step": 71752
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.9714424014091492,
      "learning_rate": 2.1522067042310967e-07,
      "loss": 2.1882,
      "step": 71753
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0550936460494995,
      "learning_rate": 2.1513571622582275e-07,
      "loss": 2.1127,
      "step": 71754
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0744067430496216,
      "learning_rate": 2.15050778616539e-07,
      "loss": 2.3537,
      "step": 71755
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.098587989807129,
      "learning_rate": 2.1496585759540167e-07,
      "loss": 2.3554,
      "step": 71756
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0688596963882446,
      "learning_rate": 2.1488095316255398e-07,
      "loss": 2.3594,
      "step": 71757
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1763358116149902,
      "learning_rate": 2.1479606531814135e-07,
      "loss": 2.1936,
      "step": 71758
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1290898323059082,
      "learning_rate": 2.147111940623059e-07,
      "loss": 2.119,
      "step": 71759
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0498979091644287,
      "learning_rate": 2.1462633939519528e-07,
      "loss": 2.0609,
      "step": 71760
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0905706882476807,
      "learning_rate": 2.1454150131694827e-07,
      "loss": 2.2513,
      "step": 71761
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1760441064834595,
      "learning_rate": 2.1445667982771258e-07,
      "loss": 2.3743,
      "step": 71762
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.073777675628662,
      "learning_rate": 2.143718749276291e-07,
      "loss": 2.3093,
      "step": 71763
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.067931890487671,
      "learning_rate": 2.142870866168434e-07,
      "loss": 2.3081,
      "step": 71764
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0780469179153442,
      "learning_rate": 2.1420231489549859e-07,
      "loss": 2.2949,
      "step": 71765
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1023024320602417,
      "learning_rate": 2.1411755976374015e-07,
      "loss": 2.1038,
      "step": 71766
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.348014235496521,
      "learning_rate": 2.14032821221708e-07,
      "loss": 2.2386,
      "step": 71767
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.151877999305725,
      "learning_rate": 2.1394809926954973e-07,
      "loss": 2.3844,
      "step": 71768
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.097808599472046,
      "learning_rate": 2.138633939074064e-07,
      "loss": 2.4523,
      "step": 71769
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1734877824783325,
      "learning_rate": 2.1377870513542343e-07,
      "loss": 2.333,
      "step": 71770
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.060767412185669,
      "learning_rate": 2.1369403295374292e-07,
      "loss": 2.2282,
      "step": 71771
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.9559401869773865,
      "learning_rate": 2.136093773625092e-07,
      "loss": 2.0933,
      "step": 71772
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0622984170913696,
      "learning_rate": 2.135247383618655e-07,
      "loss": 2.2808,
      "step": 71773
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1598602533340454,
      "learning_rate": 2.1344011595195614e-07,
      "loss": 2.3396,
      "step": 71774
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.3298251628875732,
      "learning_rate": 2.1335551013292212e-07,
      "loss": 2.3936,
      "step": 71775
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.9922935962677002,
      "learning_rate": 2.1327092090490997e-07,
      "loss": 2.3572,
      "step": 71776
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.058430552482605,
      "learning_rate": 2.1318634826806073e-07,
      "loss": 2.3349,
      "step": 71777
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0150092840194702,
      "learning_rate": 2.131017922225187e-07,
      "loss": 2.4325,
      "step": 71778
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1413509845733643,
      "learning_rate": 2.1301725276842712e-07,
      "loss": 2.3669,
      "step": 71779
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0926576852798462,
      "learning_rate": 2.1293272990593029e-07,
      "loss": 2.5354,
      "step": 71780
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.9838132858276367,
      "learning_rate": 2.1284822363517033e-07,
      "loss": 2.0591,
      "step": 71781
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.074906349182129,
      "learning_rate": 2.1276373395628934e-07,
      "loss": 2.4878,
      "step": 71782
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.067965030670166,
      "learning_rate": 2.126792608694339e-07,
      "loss": 2.4103,
      "step": 71783
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.6837972402572632,
      "learning_rate": 2.125948043747439e-07,
      "loss": 2.0557,
      "step": 71784
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0927197933197021,
      "learning_rate": 2.1251036447236472e-07,
      "loss": 2.3448,
      "step": 71785
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0100033283233643,
      "learning_rate": 2.1242594116243963e-07,
      "loss": 2.4681,
      "step": 71786
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.9718130826950073,
      "learning_rate": 2.1234153444510963e-07,
      "loss": 2.4448,
      "step": 71787
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1124099493026733,
      "learning_rate": 2.1225714432051903e-07,
      "loss": 2.2866,
      "step": 71788
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.9628309607505798,
      "learning_rate": 2.1217277078881104e-07,
      "loss": 2.0766,
      "step": 71789
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0654000043869019,
      "learning_rate": 2.120884138501289e-07,
      "loss": 2.204,
      "step": 71790
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.104449987411499,
      "learning_rate": 2.1200407350461471e-07,
      "loss": 2.4618,
      "step": 71791
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1038086414337158,
      "learning_rate": 2.119197497524117e-07,
      "loss": 2.18,
      "step": 71792
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0871162414550781,
      "learning_rate": 2.1183544259366417e-07,
      "loss": 2.5062,
      "step": 71793
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1530534029006958,
      "learning_rate": 2.1175115202851316e-07,
      "loss": 2.2398,
      "step": 71794
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.2336596250534058,
      "learning_rate": 2.1166687805710296e-07,
      "loss": 2.311,
      "step": 71795
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0854394435882568,
      "learning_rate": 2.1158262067957458e-07,
      "loss": 2.4771,
      "step": 71796
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0326284170150757,
      "learning_rate": 2.1149837989607346e-07,
      "loss": 2.2166,
      "step": 71797
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0344345569610596,
      "learning_rate": 2.1141415570674062e-07,
      "loss": 2.3004,
      "step": 71798
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1352497339248657,
      "learning_rate": 2.1132994811172036e-07,
      "loss": 2.534,
      "step": 71799
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.9910380244255066,
      "learning_rate": 2.112457571111537e-07,
      "loss": 2.2801,
      "step": 71800
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.031563639640808,
      "learning_rate": 2.1116158270518382e-07,
      "loss": 2.3464,
      "step": 71801
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1476186513900757,
      "learning_rate": 2.11077424893954e-07,
      "loss": 2.2284,
      "step": 71802
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1586683988571167,
      "learning_rate": 2.109932836776063e-07,
      "loss": 2.4805,
      "step": 71803
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.144762396812439,
      "learning_rate": 2.1090915905628285e-07,
      "loss": 2.3639,
      "step": 71804
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0755555629730225,
      "learning_rate": 2.1082505103012908e-07,
      "loss": 2.4306,
      "step": 71805
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1732652187347412,
      "learning_rate": 2.1074095959928376e-07,
      "loss": 2.5686,
      "step": 71806
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.192681908607483,
      "learning_rate": 2.1065688476389236e-07,
      "loss": 2.4336,
      "step": 71807
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1196019649505615,
      "learning_rate": 2.1057282652409583e-07,
      "loss": 2.3199,
      "step": 71808
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0770646333694458,
      "learning_rate": 2.1048878488003633e-07,
      "loss": 2.3327,
      "step": 71809
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.9618151783943176,
      "learning_rate": 2.1040475983185815e-07,
      "loss": 2.1088,
      "step": 71810
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.144486904144287,
      "learning_rate": 2.103207513797023e-07,
      "loss": 2.3702,
      "step": 71811
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.053439736366272,
      "learning_rate": 2.1023675952371203e-07,
      "loss": 2.2446,
      "step": 71812
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0208581686019897,
      "learning_rate": 2.101527842640294e-07,
      "loss": 2.3525,
      "step": 71813
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.2077122926712036,
      "learning_rate": 2.1006882560079655e-07,
      "loss": 2.2916,
      "step": 71814
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.2315901517868042,
      "learning_rate": 2.0998488353415558e-07,
      "loss": 2.616,
      "step": 71815
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.03764808177948,
      "learning_rate": 2.099009580642497e-07,
      "loss": 2.1702,
      "step": 71816
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.019158124923706,
      "learning_rate": 2.0981704919121994e-07,
      "loss": 2.3355,
      "step": 71817
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.294333577156067,
      "learning_rate": 2.0973315691521058e-07,
      "loss": 2.3365,
      "step": 71818
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1090935468673706,
      "learning_rate": 2.096492812363604e-07,
      "loss": 2.3385,
      "step": 71819
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.174481987953186,
      "learning_rate": 2.095654221548149e-07,
      "loss": 2.3778,
      "step": 71820
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1490979194641113,
      "learning_rate": 2.0948157967071502e-07,
      "loss": 2.4092,
      "step": 71821
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.122402310371399,
      "learning_rate": 2.093977537842029e-07,
      "loss": 2.3586,
      "step": 71822
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.9830554723739624,
      "learning_rate": 2.0931394449542065e-07,
      "loss": 2.2117,
      "step": 71823
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1495352983474731,
      "learning_rate": 2.0923015180451035e-07,
      "loss": 2.1807,
      "step": 71824
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.093902587890625,
      "learning_rate": 2.091463757116141e-07,
      "loss": 2.3595,
      "step": 71825
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0839824676513672,
      "learning_rate": 2.0906261621687517e-07,
      "loss": 2.4089,
      "step": 71826
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0636067390441895,
      "learning_rate": 2.0897887332043232e-07,
      "loss": 2.4171,
      "step": 71827
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1391161680221558,
      "learning_rate": 2.0889514702243096e-07,
      "loss": 2.1701,
      "step": 71828
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.174268126487732,
      "learning_rate": 2.088114373230099e-07,
      "loss": 2.0981,
      "step": 71829
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1302663087844849,
      "learning_rate": 2.0872774422231456e-07,
      "loss": 2.4889,
      "step": 71830
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0464576482772827,
      "learning_rate": 2.0864406772048374e-07,
      "loss": 2.0642,
      "step": 71831
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.034545660018921,
      "learning_rate": 2.0856040781766063e-07,
      "loss": 2.4622,
      "step": 71832
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0175079107284546,
      "learning_rate": 2.0847676451398736e-07,
      "loss": 2.1163,
      "step": 71833
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0784778594970703,
      "learning_rate": 2.0839313780960492e-07,
      "loss": 2.2857,
      "step": 71834
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0616451501846313,
      "learning_rate": 2.083095277046554e-07,
      "loss": 2.2117,
      "step": 71835
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.099768042564392,
      "learning_rate": 2.0822593419928094e-07,
      "loss": 2.077,
      "step": 71836
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1056162118911743,
      "learning_rate": 2.0814235729362364e-07,
      "loss": 2.1741,
      "step": 71837
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0171465873718262,
      "learning_rate": 2.0805879698782337e-07,
      "loss": 2.1195,
      "step": 71838
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1954286098480225,
      "learning_rate": 2.079752532820234e-07,
      "loss": 2.0957,
      "step": 71839
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.2363168001174927,
      "learning_rate": 2.0789172617636465e-07,
      "loss": 2.3964,
      "step": 71840
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0472092628479004,
      "learning_rate": 2.078082156709893e-07,
      "loss": 2.4033,
      "step": 71841
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.040033221244812,
      "learning_rate": 2.0772472176603832e-07,
      "loss": 2.337,
      "step": 71842
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1350287199020386,
      "learning_rate": 2.076412444616538e-07,
      "loss": 2.1667,
      "step": 71843
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1157079935073853,
      "learning_rate": 2.0755778375797563e-07,
      "loss": 2.2678,
      "step": 71844
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0733466148376465,
      "learning_rate": 2.0747433965514818e-07,
      "loss": 2.1796,
      "step": 71845
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1447443962097168,
      "learning_rate": 2.073909121533102e-07,
      "loss": 2.4458,
      "step": 71846
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1007227897644043,
      "learning_rate": 2.073075012526049e-07,
      "loss": 2.331,
      "step": 71847
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0442872047424316,
      "learning_rate": 2.072241069531722e-07,
      "loss": 2.2471,
      "step": 71848
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1470612287521362,
      "learning_rate": 2.0714072925515527e-07,
      "loss": 2.2333,
      "step": 71849
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.3464100360870361,
      "learning_rate": 2.0705736815869404e-07,
      "loss": 2.3714,
      "step": 71850
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0020006895065308,
      "learning_rate": 2.069740236639306e-07,
      "loss": 2.3076,
      "step": 71851
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0183006525039673,
      "learning_rate": 2.0689069577100706e-07,
      "loss": 2.3453,
      "step": 71852
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.2732726335525513,
      "learning_rate": 2.0680738448006222e-07,
      "loss": 2.2394,
      "step": 71853
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.271375060081482,
      "learning_rate": 2.0672408979123814e-07,
      "loss": 2.3105,
      "step": 71854
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0593854188919067,
      "learning_rate": 2.0664081170467808e-07,
      "loss": 2.3303,
      "step": 71855
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.3061943054199219,
      "learning_rate": 2.065575502205197e-07,
      "loss": 2.366,
      "step": 71856
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1345504522323608,
      "learning_rate": 2.0647430533890733e-07,
      "loss": 2.4117,
      "step": 71857
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1663721799850464,
      "learning_rate": 2.0639107705998084e-07,
      "loss": 2.3637,
      "step": 71858
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1261858940124512,
      "learning_rate": 2.0630786538388124e-07,
      "loss": 2.4312,
      "step": 71859
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1033284664154053,
      "learning_rate": 2.0622467031074955e-07,
      "loss": 2.3488,
      "step": 71860
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.2942109107971191,
      "learning_rate": 2.0614149184072785e-07,
      "loss": 2.3113,
      "step": 71861
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.047856092453003,
      "learning_rate": 2.060583299739549e-07,
      "loss": 2.4631,
      "step": 71862
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.9906298518180847,
      "learning_rate": 2.0597518471057398e-07,
      "loss": 2.0105,
      "step": 71863
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.081431269645691,
      "learning_rate": 2.0589205605072604e-07,
      "loss": 2.3493,
      "step": 71864
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.084877371788025,
      "learning_rate": 2.0580894399454987e-07,
      "loss": 2.3923,
      "step": 71865
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1913748979568481,
      "learning_rate": 2.0572584854218757e-07,
      "loss": 2.2739,
      "step": 71866
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.2526798248291016,
      "learning_rate": 2.0564276969378128e-07,
      "loss": 2.3297,
      "step": 71867
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0647741556167603,
      "learning_rate": 2.0555970744946863e-07,
      "loss": 2.2638,
      "step": 71868
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0526331663131714,
      "learning_rate": 2.0547666180939397e-07,
      "loss": 2.2296,
      "step": 71869
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.105569839477539,
      "learning_rate": 2.0539363277369605e-07,
      "loss": 2.3666,
      "step": 71870
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.156444787979126,
      "learning_rate": 2.0531062034251593e-07,
      "loss": 2.0766,
      "step": 71871
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.072286605834961,
      "learning_rate": 2.0522762451599453e-07,
      "loss": 2.229,
      "step": 71872
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0206422805786133,
      "learning_rate": 2.0514464529427292e-07,
      "loss": 2.4686,
      "step": 71873
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.9317715167999268,
      "learning_rate": 2.0506168267749094e-07,
      "loss": 2.6174,
      "step": 71874
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0351184606552124,
      "learning_rate": 2.049787366657896e-07,
      "loss": 2.2097,
      "step": 71875
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.4689651727676392,
      "learning_rate": 2.048958072593099e-07,
      "loss": 2.4086,
      "step": 71876
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0485999584197998,
      "learning_rate": 2.0481289445819173e-07,
      "loss": 2.1852,
      "step": 71877
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.085790753364563,
      "learning_rate": 2.0472999826257722e-07,
      "loss": 2.2556,
      "step": 71878
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1657612323760986,
      "learning_rate": 2.0464711867260512e-07,
      "loss": 2.3182,
      "step": 71879
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.360641598701477,
      "learning_rate": 2.0456425568841642e-07,
      "loss": 2.2992,
      "step": 71880
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.2455500364303589,
      "learning_rate": 2.0448140931015103e-07,
      "loss": 2.3543,
      "step": 71881
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.2432143688201904,
      "learning_rate": 2.0439857953795106e-07,
      "loss": 2.4581,
      "step": 71882
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0560460090637207,
      "learning_rate": 2.0431576637195526e-07,
      "loss": 2.4314,
      "step": 71883
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1690948009490967,
      "learning_rate": 2.0423296981230467e-07,
      "loss": 2.1709,
      "step": 71884
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0458976030349731,
      "learning_rate": 2.0415018985914026e-07,
      "loss": 2.3633,
      "step": 71885
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.2895989418029785,
      "learning_rate": 2.0406742651260081e-07,
      "loss": 2.235,
      "step": 71886
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.017406940460205,
      "learning_rate": 2.0398467977282842e-07,
      "loss": 2.3761,
      "step": 71887
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.2223478555679321,
      "learning_rate": 2.03901949639963e-07,
      "loss": 2.3581,
      "step": 71888
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0261584520339966,
      "learning_rate": 2.0381923611414334e-07,
      "loss": 2.4497,
      "step": 71889
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0236034393310547,
      "learning_rate": 2.037365391955115e-07,
      "loss": 2.425,
      "step": 71890
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0905990600585938,
      "learning_rate": 2.036538588842052e-07,
      "loss": 2.4041,
      "step": 71891
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0230212211608887,
      "learning_rate": 2.0357119518036983e-07,
      "loss": 2.3854,
      "step": 71892
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.9922226667404175,
      "learning_rate": 2.0348854808413865e-07,
      "loss": 2.3406,
      "step": 71893
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0599979162216187,
      "learning_rate": 2.0340591759565597e-07,
      "loss": 2.5385,
      "step": 71894
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0543510913848877,
      "learning_rate": 2.033233037150617e-07,
      "loss": 2.482,
      "step": 71895
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.221165418624878,
      "learning_rate": 2.032407064424946e-07,
      "loss": 2.5253,
      "step": 71896
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.2141001224517822,
      "learning_rate": 2.0315812577809458e-07,
      "loss": 2.323,
      "step": 71897
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.2985084056854248,
      "learning_rate": 2.0307556172200372e-07,
      "loss": 2.5177,
      "step": 71898
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0488781929016113,
      "learning_rate": 2.0299301427435968e-07,
      "loss": 2.2326,
      "step": 71899
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1046357154846191,
      "learning_rate": 2.029104834353035e-07,
      "loss": 2.3218,
      "step": 71900
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.073742389678955,
      "learning_rate": 2.0282796920497505e-07,
      "loss": 2.3941,
      "step": 71901
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0541739463806152,
      "learning_rate": 2.027454715835131e-07,
      "loss": 2.2477,
      "step": 71902
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0155560970306396,
      "learning_rate": 2.0266299057105975e-07,
      "loss": 2.5287,
      "step": 71903
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0673704147338867,
      "learning_rate": 2.025805261677527e-07,
      "loss": 2.3077,
      "step": 71904
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0649387836456299,
      "learning_rate": 2.0249807837373403e-07,
      "loss": 2.552,
      "step": 71905
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.5626895427703857,
      "learning_rate": 2.0241564718914142e-07,
      "loss": 2.2603,
      "step": 71906
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.071387767791748,
      "learning_rate": 2.0233323261411475e-07,
      "loss": 2.381,
      "step": 71907
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1977866888046265,
      "learning_rate": 2.0225083464879392e-07,
      "loss": 2.5247,
      "step": 71908
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1059818267822266,
      "learning_rate": 2.021684532933199e-07,
      "loss": 2.3952,
      "step": 71909
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.108203649520874,
      "learning_rate": 2.020860885478304e-07,
      "loss": 2.2631,
      "step": 71910
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0475666522979736,
      "learning_rate": 2.020037404124675e-07,
      "loss": 2.3586,
      "step": 71911
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0362309217453003,
      "learning_rate": 2.0192140888736778e-07,
      "loss": 2.2008,
      "step": 71912
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.075527310371399,
      "learning_rate": 2.0183909397267332e-07,
      "loss": 2.3018,
      "step": 71913
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.086466908454895,
      "learning_rate": 2.017567956685218e-07,
      "loss": 2.3179,
      "step": 71914
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.9834617376327515,
      "learning_rate": 2.0167451397505532e-07,
      "loss": 2.2452,
      "step": 71915
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.035707712173462,
      "learning_rate": 2.0159224889240937e-07,
      "loss": 2.3118,
      "step": 71916
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0784926414489746,
      "learning_rate": 2.015100004207282e-07,
      "loss": 2.3875,
      "step": 71917
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1793105602264404,
      "learning_rate": 2.0142776856014845e-07,
      "loss": 2.2506,
      "step": 71918
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.341727375984192,
      "learning_rate": 2.0134555331080884e-07,
      "loss": 2.5769,
      "step": 71919
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.00856614112854,
      "learning_rate": 2.012633546728504e-07,
      "loss": 2.2472,
      "step": 71920
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1378331184387207,
      "learning_rate": 2.011811726464119e-07,
      "loss": 2.3771,
      "step": 71921
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.977826714515686,
      "learning_rate": 2.010990072316321e-07,
      "loss": 2.4556,
      "step": 71922
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.100927472114563,
      "learning_rate": 2.0101685842865092e-07,
      "loss": 2.2885,
      "step": 71923
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0022248029708862,
      "learning_rate": 2.0093472623760712e-07,
      "loss": 2.3737,
      "step": 71924
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0711026191711426,
      "learning_rate": 2.008526106586417e-07,
      "loss": 2.1687,
      "step": 71925
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.112419605255127,
      "learning_rate": 2.0077051169189232e-07,
      "loss": 2.1709,
      "step": 71926
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1020210981369019,
      "learning_rate": 2.0068842933749776e-07,
      "loss": 2.3954,
      "step": 71927
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.113404631614685,
      "learning_rate": 2.0060636359559792e-07,
      "loss": 2.1919,
      "step": 71928
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1612969636917114,
      "learning_rate": 2.0052431446633158e-07,
      "loss": 2.2972,
      "step": 71929
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1220721006393433,
      "learning_rate": 2.004422819498386e-07,
      "loss": 2.1034,
      "step": 71930
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0994032621383667,
      "learning_rate": 2.003602660462567e-07,
      "loss": 2.5107,
      "step": 71931
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1561073064804077,
      "learning_rate": 2.002782667557257e-07,
      "loss": 2.3866,
      "step": 71932
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0139968395233154,
      "learning_rate": 2.0019628407838554e-07,
      "loss": 2.0707,
      "step": 71933
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.9632288813591003,
      "learning_rate": 2.0011431801437386e-07,
      "loss": 2.249,
      "step": 71934
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1725184917449951,
      "learning_rate": 2.0003236856382945e-07,
      "loss": 2.5183,
      "step": 71935
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0673967599868774,
      "learning_rate": 1.999504357268922e-07,
      "loss": 2.2545,
      "step": 71936
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0442354679107666,
      "learning_rate": 1.998685195036998e-07,
      "loss": 2.2756,
      "step": 71937
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1460760831832886,
      "learning_rate": 1.997866198943932e-07,
      "loss": 2.6497,
      "step": 71938
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.071107268333435,
      "learning_rate": 1.9970473689910895e-07,
      "loss": 2.2575,
      "step": 71939
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0742565393447876,
      "learning_rate": 1.9962287051798811e-07,
      "loss": 2.2977,
      "step": 71940
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.045145034790039,
      "learning_rate": 1.995410207511672e-07,
      "loss": 2.2842,
      "step": 71941
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.142146110534668,
      "learning_rate": 1.9945918759878723e-07,
      "loss": 2.1419,
      "step": 71942
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1475614309310913,
      "learning_rate": 1.9937737106098364e-07,
      "loss": 2.2242,
      "step": 71943
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.119493007659912,
      "learning_rate": 1.9929557113789966e-07,
      "loss": 2.4765,
      "step": 71944
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0040373802185059,
      "learning_rate": 1.9921378782967073e-07,
      "loss": 2.2016,
      "step": 71945
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1483045816421509,
      "learning_rate": 1.991320211364356e-07,
      "loss": 2.487,
      "step": 71946
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0939644575119019,
      "learning_rate": 1.9905027105833418e-07,
      "loss": 2.2477,
      "step": 71947
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1527903079986572,
      "learning_rate": 1.9896853759550416e-07,
      "loss": 2.5974,
      "step": 71948
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1037174463272095,
      "learning_rate": 1.988868207480843e-07,
      "loss": 2.4013,
      "step": 71949
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.5971585512161255,
      "learning_rate": 1.9880512051621337e-07,
      "loss": 2.3157,
      "step": 71950
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1454280614852905,
      "learning_rate": 1.9872343690002904e-07,
      "loss": 2.0403,
      "step": 71951
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1115801334381104,
      "learning_rate": 1.9864176989967122e-07,
      "loss": 2.5612,
      "step": 71952
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.152754783630371,
      "learning_rate": 1.9856011951527755e-07,
      "loss": 2.2136,
      "step": 71953
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1540002822875977,
      "learning_rate": 1.9847848574698681e-07,
      "loss": 2.3445,
      "step": 71954
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0374850034713745,
      "learning_rate": 1.9839686859493667e-07,
      "loss": 2.4606,
      "step": 71955
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.036075472831726,
      "learning_rate": 1.9831526805926592e-07,
      "loss": 2.2388,
      "step": 71956
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.181311011314392,
      "learning_rate": 1.9823368414011335e-07,
      "loss": 2.3428,
      "step": 71957
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0282726287841797,
      "learning_rate": 1.981521168376166e-07,
      "loss": 2.376,
      "step": 71958
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0450154542922974,
      "learning_rate": 1.9807056615191445e-07,
      "loss": 2.3982,
      "step": 71959
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0371965169906616,
      "learning_rate": 1.9798903208314457e-07,
      "loss": 2.3308,
      "step": 71960
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1800662279129028,
      "learning_rate": 1.9790751463144576e-07,
      "loss": 2.2769,
      "step": 71961
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0493911504745483,
      "learning_rate": 1.9782601379695454e-07,
      "loss": 2.2662,
      "step": 71962
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1033053398132324,
      "learning_rate": 1.9774452957981193e-07,
      "loss": 2.2392,
      "step": 71963
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1800743341445923,
      "learning_rate": 1.9766306198015338e-07,
      "loss": 2.3957,
      "step": 71964
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1345890760421753,
      "learning_rate": 1.975816109981199e-07,
      "loss": 2.3631,
      "step": 71965
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.4495240449905396,
      "learning_rate": 1.9750017663384578e-07,
      "loss": 2.6952,
      "step": 71966
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.14130699634552,
      "learning_rate": 1.9741875888747318e-07,
      "loss": 2.416,
      "step": 71967
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0128722190856934,
      "learning_rate": 1.9733735775913643e-07,
      "loss": 2.3302,
      "step": 71968
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.2082200050354004,
      "learning_rate": 1.972559732489765e-07,
      "loss": 2.3188,
      "step": 71969
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0858653783798218,
      "learning_rate": 1.9717460535712885e-07,
      "loss": 2.3031,
      "step": 71970
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0166049003601074,
      "learning_rate": 1.9709325408373447e-07,
      "loss": 2.2019,
      "step": 71971
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.013708233833313,
      "learning_rate": 1.9701191942892884e-07,
      "loss": 2.4549,
      "step": 71972
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.102105736732483,
      "learning_rate": 1.9693060139284958e-07,
      "loss": 2.4335,
      "step": 71973
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.009509801864624,
      "learning_rate": 1.9684929997563552e-07,
      "loss": 2.3147,
      "step": 71974
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.135513186454773,
      "learning_rate": 1.9676801517742538e-07,
      "loss": 2.1799,
      "step": 71975
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.063058853149414,
      "learning_rate": 1.9668674699835467e-07,
      "loss": 2.2454,
      "step": 71976
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0740008354187012,
      "learning_rate": 1.9660549543856322e-07,
      "loss": 2.3932,
      "step": 71977
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1335300207138062,
      "learning_rate": 1.9652426049818763e-07,
      "loss": 2.3462,
      "step": 71978
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0946860313415527,
      "learning_rate": 1.9644304217736666e-07,
      "loss": 2.0482,
      "step": 71979
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0629445314407349,
      "learning_rate": 1.9636184047623686e-07,
      "loss": 2.3188,
      "step": 71980
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1880652904510498,
      "learning_rate": 1.96280655394937e-07,
      "loss": 2.3492,
      "step": 71981
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1736844778060913,
      "learning_rate": 1.961994869336037e-07,
      "loss": 2.1177,
      "step": 71982
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1637217998504639,
      "learning_rate": 1.9611833509237454e-07,
      "loss": 2.1457,
      "step": 71983
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1017707586288452,
      "learning_rate": 1.9603719987138835e-07,
      "loss": 2.5197,
      "step": 71984
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0849577188491821,
      "learning_rate": 1.959560812707817e-07,
      "loss": 2.4447,
      "step": 71985
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1615407466888428,
      "learning_rate": 1.958749792906911e-07,
      "loss": 2.1118,
      "step": 71986
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.111548900604248,
      "learning_rate": 1.9579389393125647e-07,
      "loss": 2.2241,
      "step": 71987
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1061855554580688,
      "learning_rate": 1.9571282519261325e-07,
      "loss": 2.4921,
      "step": 71988
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.083997368812561,
      "learning_rate": 1.9563177307490022e-07,
      "loss": 2.1372,
      "step": 71989
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1454907655715942,
      "learning_rate": 1.9555073757825394e-07,
      "loss": 2.4108,
      "step": 71990
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0224218368530273,
      "learning_rate": 1.9546971870281095e-07,
      "loss": 2.5346,
      "step": 71991
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1045680046081543,
      "learning_rate": 1.9538871644871116e-07,
      "loss": 2.2013,
      "step": 71992
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1060031652450562,
      "learning_rate": 1.953077308160889e-07,
      "loss": 2.2574,
      "step": 71993
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0947322845458984,
      "learning_rate": 1.9522676180508405e-07,
      "loss": 2.0372,
      "step": 71994
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0768393278121948,
      "learning_rate": 1.9514580941583205e-07,
      "loss": 2.4064,
      "step": 71995
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.2478504180908203,
      "learning_rate": 1.9506487364847172e-07,
      "loss": 2.2123,
      "step": 71996
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.285991907119751,
      "learning_rate": 1.9498395450313957e-07,
      "loss": 2.4578,
      "step": 71997
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0495665073394775,
      "learning_rate": 1.9490305197997216e-07,
      "loss": 2.1529,
      "step": 71998
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.2082741260528564,
      "learning_rate": 1.948221660791072e-07,
      "loss": 2.4224,
      "step": 71999
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.115429162979126,
      "learning_rate": 1.947412968006812e-07,
      "loss": 2.1745,
      "step": 72000
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1372922658920288,
      "learning_rate": 1.9466044414483188e-07,
      "loss": 2.359,
      "step": 72001
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0989453792572021,
      "learning_rate": 1.9457960811169685e-07,
      "loss": 2.2473,
      "step": 72002
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1280090808868408,
      "learning_rate": 1.944987887014116e-07,
      "loss": 2.1253,
      "step": 72003
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1763745546340942,
      "learning_rate": 1.9441798591411487e-07,
      "loss": 2.329,
      "step": 72004
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.9420665502548218,
      "learning_rate": 1.9433719974994215e-07,
      "loss": 2.2889,
      "step": 72005
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0176753997802734,
      "learning_rate": 1.9425643020903107e-07,
      "loss": 2.2636,
      "step": 72006
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0199596881866455,
      "learning_rate": 1.941756772915193e-07,
      "loss": 2.1961,
      "step": 72007
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.4630248546600342,
      "learning_rate": 1.9409494099754232e-07,
      "loss": 2.3568,
      "step": 72008
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1415951251983643,
      "learning_rate": 1.940142213272378e-07,
      "loss": 2.4045,
      "step": 72009
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1914652585983276,
      "learning_rate": 1.9393351828074337e-07,
      "loss": 2.4851,
      "step": 72010
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.2283594608306885,
      "learning_rate": 1.9385283185819338e-07,
      "loss": 2.3792,
      "step": 72011
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0707042217254639,
      "learning_rate": 1.937721620597277e-07,
      "loss": 2.3114,
      "step": 72012
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1606603860855103,
      "learning_rate": 1.9369150888547961e-07,
      "loss": 2.38,
      "step": 72013
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0741902589797974,
      "learning_rate": 1.9361087233559007e-07,
      "loss": 2.311,
      "step": 72014
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0669691562652588,
      "learning_rate": 1.935302524101912e-07,
      "loss": 2.2673,
      "step": 72015
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.205283522605896,
      "learning_rate": 1.9344964910942398e-07,
      "loss": 2.1302,
      "step": 72016
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1336679458618164,
      "learning_rate": 1.9336906243342168e-07,
      "loss": 2.1347,
      "step": 72017
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1235275268554688,
      "learning_rate": 1.9328849238232194e-07,
      "loss": 2.1833,
      "step": 72018
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.070704460144043,
      "learning_rate": 1.9320793895626354e-07,
      "loss": 2.3532,
      "step": 72019
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.2736293077468872,
      "learning_rate": 1.9312740215537973e-07,
      "loss": 2.1565,
      "step": 72020
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.2720484733581543,
      "learning_rate": 1.9304688197980815e-07,
      "loss": 2.4974,
      "step": 72021
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0069243907928467,
      "learning_rate": 1.9296637842968645e-07,
      "loss": 2.2762,
      "step": 72022
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1018837690353394,
      "learning_rate": 1.9288589150515015e-07,
      "loss": 2.3228,
      "step": 72023
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.6029456853866577,
      "learning_rate": 1.9280542120633572e-07,
      "loss": 2.4166,
      "step": 72024
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1903363466262817,
      "learning_rate": 1.9272496753338087e-07,
      "loss": 2.3248,
      "step": 72025
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1008342504501343,
      "learning_rate": 1.9264453048641884e-07,
      "loss": 2.478,
      "step": 72026
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1633810997009277,
      "learning_rate": 1.925641100655895e-07,
      "loss": 2.4153,
      "step": 72027
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0603605508804321,
      "learning_rate": 1.924837062710261e-07,
      "loss": 2.4035,
      "step": 72028
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.7398284673690796,
      "learning_rate": 1.9240331910286847e-07,
      "loss": 2.3387,
      "step": 72029
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.102091908454895,
      "learning_rate": 1.9232294856124879e-07,
      "loss": 2.2582,
      "step": 72030
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1160128116607666,
      "learning_rate": 1.922425946463069e-07,
      "loss": 2.4542,
      "step": 72031
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0825409889221191,
      "learning_rate": 1.9216225735817718e-07,
      "loss": 2.2177,
      "step": 72032
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1160671710968018,
      "learning_rate": 1.9208193669699614e-07,
      "loss": 2.3113,
      "step": 72033
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0931236743927002,
      "learning_rate": 1.9200163266289928e-07,
      "loss": 2.2847,
      "step": 72034
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.2224621772766113,
      "learning_rate": 1.9192134525602534e-07,
      "loss": 2.3208,
      "step": 72035
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1042436361312866,
      "learning_rate": 1.9184107447650647e-07,
      "loss": 2.3082,
      "step": 72036
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.133234977722168,
      "learning_rate": 1.9176082032448363e-07,
      "loss": 2.2121,
      "step": 72037
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.070652961730957,
      "learning_rate": 1.9168058280008784e-07,
      "loss": 2.1562,
      "step": 72038
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1034115552902222,
      "learning_rate": 1.9160036190345787e-07,
      "loss": 2.4577,
      "step": 72039
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1882855892181396,
      "learning_rate": 1.9152015763472808e-07,
      "loss": 2.2467,
      "step": 72040
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0527522563934326,
      "learning_rate": 1.9143996999403725e-07,
      "loss": 2.3866,
      "step": 72041
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1779905557632446,
      "learning_rate": 1.9135979898151856e-07,
      "loss": 2.4494,
      "step": 72042
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0920230150222778,
      "learning_rate": 1.9127964459730975e-07,
      "loss": 2.4143,
      "step": 72043
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0521695613861084,
      "learning_rate": 1.911995068415462e-07,
      "loss": 2.2729,
      "step": 72044
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1287211179733276,
      "learning_rate": 1.9111938571436228e-07,
      "loss": 2.4079,
      "step": 72045
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1131497621536255,
      "learning_rate": 1.9103928121589677e-07,
      "loss": 2.1823,
      "step": 72046
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1628391742706299,
      "learning_rate": 1.9095919334628178e-07,
      "loss": 2.4693,
      "step": 72047
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.050218939781189,
      "learning_rate": 1.9087912210565607e-07,
      "loss": 2.1587,
      "step": 72048
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0991677045822144,
      "learning_rate": 1.90799067494154e-07,
      "loss": 2.2821,
      "step": 72049
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.110970139503479,
      "learning_rate": 1.9071902951191323e-07,
      "loss": 2.2169,
      "step": 72050
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0859618186950684,
      "learning_rate": 1.9063900815906698e-07,
      "loss": 2.4394,
      "step": 72051
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1348381042480469,
      "learning_rate": 1.9055900343575185e-07,
      "loss": 2.4145,
      "step": 72052
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1247762441635132,
      "learning_rate": 1.9047901534210212e-07,
      "loss": 2.5023,
      "step": 72053
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0778353214263916,
      "learning_rate": 1.9039904387825546e-07,
      "loss": 2.2227,
      "step": 72054
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1313056945800781,
      "learning_rate": 1.9031908904434625e-07,
      "loss": 2.3968,
      "step": 72055
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0533685684204102,
      "learning_rate": 1.9023915084051215e-07,
      "loss": 2.2635,
      "step": 72056
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.2360881567001343,
      "learning_rate": 1.9015922926688524e-07,
      "loss": 2.5012,
      "step": 72057
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.2541545629501343,
      "learning_rate": 1.9007932432360322e-07,
      "loss": 2.2024,
      "step": 72058
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.2095260620117188,
      "learning_rate": 1.899994360108015e-07,
      "loss": 2.4397,
      "step": 72059
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.2576545476913452,
      "learning_rate": 1.8991956432861448e-07,
      "loss": 2.4421,
      "step": 72060
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.9691148400306702,
      "learning_rate": 1.8983970927717866e-07,
      "loss": 2.3823,
      "step": 72061
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.113184928894043,
      "learning_rate": 1.8975987085662838e-07,
      "loss": 2.1899,
      "step": 72062
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0081121921539307,
      "learning_rate": 1.8968004906710024e-07,
      "loss": 2.1895,
      "step": 72063
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0052133798599243,
      "learning_rate": 1.8960024390872967e-07,
      "loss": 2.288,
      "step": 72064
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0818997621536255,
      "learning_rate": 1.8952045538164875e-07,
      "loss": 2.347,
      "step": 72065
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1370048522949219,
      "learning_rate": 1.8944068348599743e-07,
      "loss": 2.4079,
      "step": 72066
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.165365219116211,
      "learning_rate": 1.8936092822190667e-07,
      "loss": 2.4515,
      "step": 72067
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.267273187637329,
      "learning_rate": 1.8928118958951525e-07,
      "loss": 2.3008,
      "step": 72068
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1910465955734253,
      "learning_rate": 1.892014675889564e-07,
      "loss": 2.4269,
      "step": 72069
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0179920196533203,
      "learning_rate": 1.891217622203656e-07,
      "loss": 2.2825,
      "step": 72070
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.9541807174682617,
      "learning_rate": 1.8904207348387716e-07,
      "loss": 2.2949,
      "step": 72071
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.161026954650879,
      "learning_rate": 1.8896240137962761e-07,
      "loss": 2.3248,
      "step": 72072
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1059294939041138,
      "learning_rate": 1.8888274590775246e-07,
      "loss": 2.3536,
      "step": 72073
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0591742992401123,
      "learning_rate": 1.888031070683838e-07,
      "loss": 2.1961,
      "step": 72074
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0877013206481934,
      "learning_rate": 1.8872348486166036e-07,
      "loss": 2.4404,
      "step": 72075
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.2233935594558716,
      "learning_rate": 1.8864387928771543e-07,
      "loss": 2.3714,
      "step": 72076
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1704539060592651,
      "learning_rate": 1.8856429034668223e-07,
      "loss": 2.2395,
      "step": 72077
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0967241525650024,
      "learning_rate": 1.884847180386984e-07,
      "loss": 2.1481,
      "step": 72078
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.092948317527771,
      "learning_rate": 1.8840516236389716e-07,
      "loss": 2.5033,
      "step": 72079
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.017229676246643,
      "learning_rate": 1.88325623322414e-07,
      "loss": 2.5185,
      "step": 72080
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.121409296989441,
      "learning_rate": 1.8824610091438434e-07,
      "loss": 2.2102,
      "step": 72081
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0627655982971191,
      "learning_rate": 1.881665951399414e-07,
      "loss": 2.3047,
      "step": 72082
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.100821614265442,
      "learning_rate": 1.8808710599922176e-07,
      "loss": 2.3197,
      "step": 72083
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1294642686843872,
      "learning_rate": 1.8800763349235862e-07,
      "loss": 2.1996,
      "step": 72084
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.9365432262420654,
      "learning_rate": 1.8792817761948855e-07,
      "loss": 2.2228,
      "step": 72085
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1151028871536255,
      "learning_rate": 1.878487383807437e-07,
      "loss": 2.2023,
      "step": 72086
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0008046627044678,
      "learning_rate": 1.8776931577626167e-07,
      "loss": 2.0698,
      "step": 72087
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.103991985321045,
      "learning_rate": 1.8768990980617464e-07,
      "loss": 2.4099,
      "step": 72088
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.2282475233078003,
      "learning_rate": 1.87610520470618e-07,
      "loss": 2.0722,
      "step": 72089
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1164053678512573,
      "learning_rate": 1.8753114776972726e-07,
      "loss": 2.342,
      "step": 72090
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0814330577850342,
      "learning_rate": 1.874517917036367e-07,
      "loss": 2.0934,
      "step": 72091
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1317856311798096,
      "learning_rate": 1.8737245227247958e-07,
      "loss": 2.12,
      "step": 72092
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0849369764328003,
      "learning_rate": 1.8729312947639134e-07,
      "loss": 2.5606,
      "step": 72093
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0918940305709839,
      "learning_rate": 1.8721382331550632e-07,
      "loss": 2.3331,
      "step": 72094
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.102410078048706,
      "learning_rate": 1.8713453378995882e-07,
      "loss": 2.5585,
      "step": 72095
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.9896889925003052,
      "learning_rate": 1.8705526089988324e-07,
      "loss": 2.3449,
      "step": 72096
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1794354915618896,
      "learning_rate": 1.86976004645415e-07,
      "loss": 2.3206,
      "step": 72097
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1043174266815186,
      "learning_rate": 1.868967650266862e-07,
      "loss": 2.2278,
      "step": 72098
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.076407551765442,
      "learning_rate": 1.8681754204383452e-07,
      "loss": 2.24,
      "step": 72099
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0380311012268066,
      "learning_rate": 1.8673833569699206e-07,
      "loss": 2.2831,
      "step": 72100
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0879167318344116,
      "learning_rate": 1.866591459862921e-07,
      "loss": 2.5303,
      "step": 72101
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0313091278076172,
      "learning_rate": 1.8657997291187112e-07,
      "loss": 2.2348,
      "step": 72102
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.086102843284607,
      "learning_rate": 1.865008164738613e-07,
      "loss": 2.2832,
      "step": 72103
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0404322147369385,
      "learning_rate": 1.864216766723992e-07,
      "loss": 2.4378,
      "step": 72104
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1077382564544678,
      "learning_rate": 1.8634255350761688e-07,
      "loss": 2.5314,
      "step": 72105
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.024268627166748,
      "learning_rate": 1.8626344697964872e-07,
      "loss": 2.107,
      "step": 72106
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.042145013809204,
      "learning_rate": 1.8618435708863124e-07,
      "loss": 2.3287,
      "step": 72107
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.050748348236084,
      "learning_rate": 1.861052838346955e-07,
      "loss": 2.261,
      "step": 72108
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.256782054901123,
      "learning_rate": 1.860262272179758e-07,
      "loss": 2.4328,
      "step": 72109
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.053297758102417,
      "learning_rate": 1.859471872386087e-07,
      "loss": 2.2579,
      "step": 72110
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0721114873886108,
      "learning_rate": 1.8586816389672524e-07,
      "loss": 2.0375,
      "step": 72111
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.119372010231018,
      "learning_rate": 1.8578915719246194e-07,
      "loss": 2.5256,
      "step": 72112
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0380892753601074,
      "learning_rate": 1.8571016712595092e-07,
      "loss": 2.4198,
      "step": 72113
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0103009939193726,
      "learning_rate": 1.856311936973265e-07,
      "loss": 2.2148,
      "step": 72114
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0573185682296753,
      "learning_rate": 1.8555223690672307e-07,
      "loss": 2.3173,
      "step": 72115
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.7433648109436035,
      "learning_rate": 1.8547329675427495e-07,
      "loss": 2.2514,
      "step": 72116
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1302788257598877,
      "learning_rate": 1.8539437324011423e-07,
      "loss": 2.348,
      "step": 72117
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.9759961366653442,
      "learning_rate": 1.8531546636437635e-07,
      "loss": 2.3395,
      "step": 72118
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0500692129135132,
      "learning_rate": 1.852365761271935e-07,
      "loss": 2.3156,
      "step": 72119
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.055436134338379,
      "learning_rate": 1.8515770252870103e-07,
      "loss": 2.645,
      "step": 72120
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0272998809814453,
      "learning_rate": 1.850788455690311e-07,
      "loss": 2.2077,
      "step": 72121
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.022863745689392,
      "learning_rate": 1.850000052483192e-07,
      "loss": 2.2659,
      "step": 72122
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.2770804166793823,
      "learning_rate": 1.8492118156669626e-07,
      "loss": 2.2896,
      "step": 72123
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0629526376724243,
      "learning_rate": 1.8484237452429888e-07,
      "loss": 2.3464,
      "step": 72124
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0766127109527588,
      "learning_rate": 1.8476358412125917e-07,
      "loss": 2.172,
      "step": 72125
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.985053300857544,
      "learning_rate": 1.8468481035771145e-07,
      "loss": 2.1437,
      "step": 72126
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1280843019485474,
      "learning_rate": 1.84606053233789e-07,
      "loss": 2.2667,
      "step": 72127
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0816587209701538,
      "learning_rate": 1.84527312749625e-07,
      "loss": 2.1657,
      "step": 72128
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.9816084504127502,
      "learning_rate": 1.8444858890535267e-07,
      "loss": 2.2712,
      "step": 72129
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0629444122314453,
      "learning_rate": 1.8436988170110637e-07,
      "loss": 2.2112,
      "step": 72130
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0774219036102295,
      "learning_rate": 1.8429119113701822e-07,
      "loss": 2.3095,
      "step": 72131
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1330783367156982,
      "learning_rate": 1.8421251721322252e-07,
      "loss": 2.3919,
      "step": 72132
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.120234489440918,
      "learning_rate": 1.8413385992985256e-07,
      "loss": 2.225,
      "step": 72133
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1078109741210938,
      "learning_rate": 1.8405521928704262e-07,
      "loss": 2.3491,
      "step": 72134
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.039170503616333,
      "learning_rate": 1.8397659528492372e-07,
      "loss": 2.336,
      "step": 72135
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.2267827987670898,
      "learning_rate": 1.8389798792363133e-07,
      "loss": 2.2736,
      "step": 72136
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0303988456726074,
      "learning_rate": 1.8381939720329757e-07,
      "loss": 2.3801,
      "step": 72137
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0469624996185303,
      "learning_rate": 1.8374082312405562e-07,
      "loss": 2.4516,
      "step": 72138
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.9809591770172119,
      "learning_rate": 1.8366226568603983e-07,
      "loss": 2.2756,
      "step": 72139
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.096709132194519,
      "learning_rate": 1.8358372488938125e-07,
      "loss": 2.2949,
      "step": 72140
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1845539808273315,
      "learning_rate": 1.835052007342164e-07,
      "loss": 2.3649,
      "step": 72141
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0801438093185425,
      "learning_rate": 1.8342669322067518e-07,
      "loss": 2.1479,
      "step": 72142
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1569621562957764,
      "learning_rate": 1.8334820234889194e-07,
      "loss": 2.5049,
      "step": 72143
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0775588750839233,
      "learning_rate": 1.832697281189988e-07,
      "loss": 2.3167,
      "step": 72144
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1983282566070557,
      "learning_rate": 1.8319127053113118e-07,
      "loss": 2.5616,
      "step": 72145
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.2017779350280762,
      "learning_rate": 1.831128295854201e-07,
      "loss": 2.513,
      "step": 72146
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1386806964874268,
      "learning_rate": 1.8303440528199877e-07,
      "loss": 2.2863,
      "step": 72147
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0950721502304077,
      "learning_rate": 1.8295599762099935e-07,
      "loss": 2.3675,
      "step": 72148
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0062589645385742,
      "learning_rate": 1.8287760660255727e-07,
      "loss": 2.2173,
      "step": 72149
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.2087280750274658,
      "learning_rate": 1.8279923222680352e-07,
      "loss": 2.1868,
      "step": 72150
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0756237506866455,
      "learning_rate": 1.8272087449387133e-07,
      "loss": 2.259,
      "step": 72151
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.2491111755371094,
      "learning_rate": 1.8264253340389281e-07,
      "loss": 2.4101,
      "step": 72152
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0464707612991333,
      "learning_rate": 1.8256420895700345e-07,
      "loss": 2.2861,
      "step": 72153
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1654716730117798,
      "learning_rate": 1.82485901153332e-07,
      "loss": 2.2138,
      "step": 72154
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0185837745666504,
      "learning_rate": 1.824076099930161e-07,
      "loss": 2.2513,
      "step": 72155
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.073946237564087,
      "learning_rate": 1.823293354761835e-07,
      "loss": 2.3761,
      "step": 72156
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0421319007873535,
      "learning_rate": 1.8225107760296956e-07,
      "loss": 2.3485,
      "step": 72157
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.00261652469635,
      "learning_rate": 1.8217283637350647e-07,
      "loss": 2.2356,
      "step": 72158
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.9341902136802673,
      "learning_rate": 1.8209461178792742e-07,
      "loss": 2.411,
      "step": 72159
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0879584550857544,
      "learning_rate": 1.8201640384636343e-07,
      "loss": 2.1381,
      "step": 72160
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0033771991729736,
      "learning_rate": 1.8193821254894884e-07,
      "loss": 2.3923,
      "step": 72161
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.088824987411499,
      "learning_rate": 1.8186003789581574e-07,
      "loss": 2.4143,
      "step": 72162
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0990288257598877,
      "learning_rate": 1.8178187988709627e-07,
      "loss": 2.3771,
      "step": 72163
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.9315767884254456,
      "learning_rate": 1.8170373852292368e-07,
      "loss": 2.409,
      "step": 72164
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0357648134231567,
      "learning_rate": 1.8162561380342892e-07,
      "loss": 2.268,
      "step": 72165
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0578895807266235,
      "learning_rate": 1.8154750572874526e-07,
      "loss": 2.2188,
      "step": 72166
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0100736618041992,
      "learning_rate": 1.8146941429900587e-07,
      "loss": 2.2022,
      "step": 72167
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.033942461013794,
      "learning_rate": 1.8139133951434297e-07,
      "loss": 2.3968,
      "step": 72168
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.9794139266014099,
      "learning_rate": 1.8131328137488746e-07,
      "loss": 2.2696,
      "step": 72169
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1888347864151,
      "learning_rate": 1.8123523988077375e-07,
      "loss": 2.3517,
      "step": 72170
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0529340505599976,
      "learning_rate": 1.811572150321317e-07,
      "loss": 2.102,
      "step": 72171
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.212071180343628,
      "learning_rate": 1.8107920682909563e-07,
      "loss": 2.2551,
      "step": 72172
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1254310607910156,
      "learning_rate": 1.810012152717966e-07,
      "loss": 2.4277,
      "step": 72173
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0344728231430054,
      "learning_rate": 1.809232403603678e-07,
      "loss": 2.6555,
      "step": 72174
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.9764434695243835,
      "learning_rate": 1.8084528209494024e-07,
      "loss": 2.1669,
      "step": 72175
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0957393646240234,
      "learning_rate": 1.8076734047564826e-07,
      "loss": 2.3971,
      "step": 72176
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1843544244766235,
      "learning_rate": 1.8068941550262065e-07,
      "loss": 2.2633,
      "step": 72177
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0402542352676392,
      "learning_rate": 1.8061150717599284e-07,
      "loss": 2.4348,
      "step": 72178
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.9676640629768372,
      "learning_rate": 1.8053361549589478e-07,
      "loss": 2.4375,
      "step": 72179
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.404520034790039,
      "learning_rate": 1.8045574046245963e-07,
      "loss": 2.1518,
      "step": 72180
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1139354705810547,
      "learning_rate": 1.8037788207581842e-07,
      "loss": 2.418,
      "step": 72181
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1647900342941284,
      "learning_rate": 1.8030004033610437e-07,
      "loss": 2.3124,
      "step": 72182
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.9644331932067871,
      "learning_rate": 1.8022221524344852e-07,
      "loss": 2.2712,
      "step": 72183
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1381709575653076,
      "learning_rate": 1.8014440679798296e-07,
      "loss": 2.1884,
      "step": 72184
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0688273906707764,
      "learning_rate": 1.800666149998387e-07,
      "loss": 2.3282,
      "step": 72185
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0277353525161743,
      "learning_rate": 1.7998883984915006e-07,
      "loss": 2.7013,
      "step": 72186
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0803967714309692,
      "learning_rate": 1.7991108134604584e-07,
      "loss": 2.405,
      "step": 72187
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1447041034698486,
      "learning_rate": 1.798333394906604e-07,
      "loss": 2.1283,
      "step": 72188
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1235451698303223,
      "learning_rate": 1.7975561428312472e-07,
      "loss": 2.5526,
      "step": 72189
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0816550254821777,
      "learning_rate": 1.796779057235709e-07,
      "loss": 2.1348,
      "step": 72190
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0959360599517822,
      "learning_rate": 1.796002138121289e-07,
      "loss": 2.4743,
      "step": 72191
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0530192852020264,
      "learning_rate": 1.7952253854893188e-07,
      "loss": 2.6043,
      "step": 72192
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1809903383255005,
      "learning_rate": 1.7944487993411197e-07,
      "loss": 2.2567,
      "step": 72193
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.061343789100647,
      "learning_rate": 1.793672379678002e-07,
      "loss": 2.4469,
      "step": 72194
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0275763273239136,
      "learning_rate": 1.7928961265012869e-07,
      "loss": 2.1629,
      "step": 72195
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1841963529586792,
      "learning_rate": 1.792120039812284e-07,
      "loss": 2.3817,
      "step": 72196
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0941176414489746,
      "learning_rate": 1.7913441196123038e-07,
      "loss": 2.4765,
      "step": 72197
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.149085521697998,
      "learning_rate": 1.7905683659026674e-07,
      "loss": 2.3702,
      "step": 72198
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1283372640609741,
      "learning_rate": 1.7897927786846958e-07,
      "loss": 2.0376,
      "step": 72199
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0799064636230469,
      "learning_rate": 1.789017357959688e-07,
      "loss": 2.2665,
      "step": 72200
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0840624570846558,
      "learning_rate": 1.7882421037289877e-07,
      "loss": 2.3721,
      "step": 72201
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0557734966278076,
      "learning_rate": 1.7874670159938712e-07,
      "loss": 2.4352,
      "step": 72202
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.025101661682129,
      "learning_rate": 1.786692094755693e-07,
      "loss": 2.2363,
      "step": 72203
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.031436800956726,
      "learning_rate": 1.7859173400157305e-07,
      "loss": 2.3053,
      "step": 72204
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1699875593185425,
      "learning_rate": 1.785142751775315e-07,
      "loss": 2.0794,
      "step": 72205
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.01761794090271,
      "learning_rate": 1.7843683300357572e-07,
      "loss": 2.2382,
      "step": 72206
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.3673211336135864,
      "learning_rate": 1.7835940747983782e-07,
      "loss": 2.1801,
      "step": 72207
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0506826639175415,
      "learning_rate": 1.782819986064488e-07,
      "loss": 2.3346,
      "step": 72208
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0817259550094604,
      "learning_rate": 1.7820460638353853e-07,
      "loss": 2.3556,
      "step": 72209
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0978444814682007,
      "learning_rate": 1.7812723081123805e-07,
      "loss": 2.3168,
      "step": 72210
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.030465841293335,
      "learning_rate": 1.780498718896806e-07,
      "loss": 2.3602,
      "step": 72211
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0135663747787476,
      "learning_rate": 1.779725296189949e-07,
      "loss": 2.3251,
      "step": 72212
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.102817177772522,
      "learning_rate": 1.7789520399931538e-07,
      "loss": 2.3398,
      "step": 72213
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0997952222824097,
      "learning_rate": 1.7781789503076964e-07,
      "loss": 2.4237,
      "step": 72214
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.9871495962142944,
      "learning_rate": 1.777406027134909e-07,
      "loss": 2.4471,
      "step": 72215
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.03951096534729,
      "learning_rate": 1.7766332704760803e-07,
      "loss": 2.2268,
      "step": 72216
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0008713006973267,
      "learning_rate": 1.7758606803325528e-07,
      "loss": 2.3679,
      "step": 72217
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1283633708953857,
      "learning_rate": 1.775088256705626e-07,
      "loss": 2.4258,
      "step": 72218
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0513705015182495,
      "learning_rate": 1.7743159995965765e-07,
      "loss": 2.2145,
      "step": 72219
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1702600717544556,
      "learning_rate": 1.7735439090067586e-07,
      "loss": 2.3417,
      "step": 72220
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0738474130630493,
      "learning_rate": 1.7727719849374602e-07,
      "loss": 2.1578,
      "step": 72221
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1830130815505981,
      "learning_rate": 1.7720002273899805e-07,
      "loss": 2.4979,
      "step": 72222
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0394269227981567,
      "learning_rate": 1.7712286363656516e-07,
      "loss": 2.29,
      "step": 72223
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1188104152679443,
      "learning_rate": 1.7704572118657614e-07,
      "loss": 2.486,
      "step": 72224
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.004727840423584,
      "learning_rate": 1.769685953891631e-07,
      "loss": 2.426,
      "step": 72225
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1378496885299683,
      "learning_rate": 1.7689148624445597e-07,
      "loss": 2.2566,
      "step": 72226
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.041593074798584,
      "learning_rate": 1.7681439375258457e-07,
      "loss": 2.2535,
      "step": 72227
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.163238286972046,
      "learning_rate": 1.767373179136822e-07,
      "loss": 2.5562,
      "step": 72228
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.079938292503357,
      "learning_rate": 1.766602587278765e-07,
      "loss": 2.0736,
      "step": 72229
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.060195803642273,
      "learning_rate": 1.7658321619530073e-07,
      "loss": 2.4783,
      "step": 72230
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.148719072341919,
      "learning_rate": 1.7650619031608472e-07,
      "loss": 2.3586,
      "step": 72231
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1068181991577148,
      "learning_rate": 1.7642918109035845e-07,
      "loss": 2.344,
      "step": 72232
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0838981866836548,
      "learning_rate": 1.7635218851825175e-07,
      "loss": 2.119,
      "step": 72233
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.078739881515503,
      "learning_rate": 1.7627521259989789e-07,
      "loss": 2.294,
      "step": 72234
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.2871861457824707,
      "learning_rate": 1.761982533354245e-07,
      "loss": 2.0759,
      "step": 72235
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0620999336242676,
      "learning_rate": 1.7612131072496375e-07,
      "loss": 2.3252,
      "step": 72236
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1124074459075928,
      "learning_rate": 1.760443847686444e-07,
      "loss": 2.159,
      "step": 72237
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.236751914024353,
      "learning_rate": 1.7596747546659965e-07,
      "loss": 2.4152,
      "step": 72238
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.9526696801185608,
      "learning_rate": 1.758905828189572e-07,
      "loss": 2.3386,
      "step": 72239
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1848338842391968,
      "learning_rate": 1.7581370682584808e-07,
      "loss": 2.106,
      "step": 72240
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.155832052230835,
      "learning_rate": 1.7573684748740328e-07,
      "loss": 2.2497,
      "step": 72241
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.089302659034729,
      "learning_rate": 1.7566000480375267e-07,
      "loss": 2.1619,
      "step": 72242
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0630604028701782,
      "learning_rate": 1.7558317877502618e-07,
      "loss": 2.1149,
      "step": 72243
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.041961908340454,
      "learning_rate": 1.755063694013559e-07,
      "loss": 2.3098,
      "step": 72244
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1327803134918213,
      "learning_rate": 1.7542957668286952e-07,
      "loss": 2.4012,
      "step": 72245
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0671182870864868,
      "learning_rate": 1.7535280061969916e-07,
      "loss": 2.3357,
      "step": 72246
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.032068133354187,
      "learning_rate": 1.7527604121197363e-07,
      "loss": 2.2428,
      "step": 72247
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0794568061828613,
      "learning_rate": 1.7519929845982385e-07,
      "loss": 2.5461,
      "step": 72248
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0082366466522217,
      "learning_rate": 1.751225723633787e-07,
      "loss": 2.3657,
      "step": 72249
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.028469443321228,
      "learning_rate": 1.7504586292277026e-07,
      "loss": 2.2652,
      "step": 72250
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0473142862319946,
      "learning_rate": 1.749691701381262e-07,
      "loss": 2.338,
      "step": 72251
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.086323618888855,
      "learning_rate": 1.7489249400957975e-07,
      "loss": 2.2572,
      "step": 72252
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.061055302619934,
      "learning_rate": 1.748158345372575e-07,
      "loss": 2.1685,
      "step": 72253
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.9919502139091492,
      "learning_rate": 1.747391917212915e-07,
      "loss": 2.3575,
      "step": 72254
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.304073452949524,
      "learning_rate": 1.746625655618106e-07,
      "loss": 2.4794,
      "step": 72255
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0762118101119995,
      "learning_rate": 1.7458595605894468e-07,
      "loss": 2.3445,
      "step": 72256
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.9809338450431824,
      "learning_rate": 1.7450936321282585e-07,
      "loss": 2.4714,
      "step": 72257
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.2176997661590576,
      "learning_rate": 1.744327870235807e-07,
      "loss": 2.3345,
      "step": 72258
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.2000852823257446,
      "learning_rate": 1.743562274913413e-07,
      "loss": 2.4246,
      "step": 72259
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0397130250930786,
      "learning_rate": 1.7427968461623647e-07,
      "loss": 2.3951,
      "step": 72260
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1380735635757446,
      "learning_rate": 1.742031583983972e-07,
      "loss": 2.0931,
      "step": 72261
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1672258377075195,
      "learning_rate": 1.7412664883795117e-07,
      "loss": 2.372,
      "step": 72262
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.9997168779373169,
      "learning_rate": 1.7405015593502938e-07,
      "loss": 2.344,
      "step": 72263
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0980172157287598,
      "learning_rate": 1.7397367968976065e-07,
      "loss": 2.4463,
      "step": 72264
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1339328289031982,
      "learning_rate": 1.7389722010227594e-07,
      "loss": 2.4085,
      "step": 72265
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0753799676895142,
      "learning_rate": 1.7382077717270297e-07,
      "loss": 2.2562,
      "step": 72266
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0041252374649048,
      "learning_rate": 1.737443509011738e-07,
      "loss": 2.3295,
      "step": 72267
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.058411717414856,
      "learning_rate": 1.7366794128781617e-07,
      "loss": 2.3697,
      "step": 72268
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1340768337249756,
      "learning_rate": 1.7359154833276103e-07,
      "loss": 2.2187,
      "step": 72269
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0682029724121094,
      "learning_rate": 1.735151720361361e-07,
      "loss": 2.4598,
      "step": 72270
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0504298210144043,
      "learning_rate": 1.7343881239807237e-07,
      "loss": 2.1486,
      "step": 72271
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0431307554244995,
      "learning_rate": 1.733624694186986e-07,
      "loss": 2.3749,
      "step": 72272
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1114591360092163,
      "learning_rate": 1.732861430981436e-07,
      "loss": 2.153,
      "step": 72273
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.087448000907898,
      "learning_rate": 1.7320983343653842e-07,
      "loss": 2.3072,
      "step": 72274
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0723083019256592,
      "learning_rate": 1.7313354043401176e-07,
      "loss": 2.279,
      "step": 72275
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.142077922821045,
      "learning_rate": 1.7305726409069244e-07,
      "loss": 2.4228,
      "step": 72276
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.070341944694519,
      "learning_rate": 1.729810044067093e-07,
      "loss": 2.1715,
      "step": 72277
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.134739637374878,
      "learning_rate": 1.7290476138219215e-07,
      "loss": 2.2575,
      "step": 72278
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.2976655960083008,
      "learning_rate": 1.7282853501727203e-07,
      "loss": 2.4644,
      "step": 72279
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.211333990097046,
      "learning_rate": 1.727523253120744e-07,
      "loss": 2.1669,
      "step": 72280
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0393320322036743,
      "learning_rate": 1.7267613226673253e-07,
      "loss": 2.3869,
      "step": 72281
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0043597221374512,
      "learning_rate": 1.725999558813729e-07,
      "loss": 2.3383,
      "step": 72282
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.9927679300308228,
      "learning_rate": 1.7252379615612547e-07,
      "loss": 2.2652,
      "step": 72283
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0460939407348633,
      "learning_rate": 1.7244765309111898e-07,
      "loss": 2.3568,
      "step": 72284
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.4231741428375244,
      "learning_rate": 1.723715266864834e-07,
      "loss": 2.4216,
      "step": 72285
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0798120498657227,
      "learning_rate": 1.7229541694234742e-07,
      "loss": 2.4944,
      "step": 72286
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1776206493377686,
      "learning_rate": 1.722193238588399e-07,
      "loss": 2.504,
      "step": 72287
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.113275170326233,
      "learning_rate": 1.7214324743608957e-07,
      "loss": 2.4048,
      "step": 72288
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0378414392471313,
      "learning_rate": 1.7206718767422414e-07,
      "loss": 2.331,
      "step": 72289
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1093223094940186,
      "learning_rate": 1.7199114457337573e-07,
      "loss": 2.3363,
      "step": 72290
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.277297854423523,
      "learning_rate": 1.719151181336709e-07,
      "loss": 2.4226,
      "step": 72291
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.5622040033340454,
      "learning_rate": 1.7183910835524064e-07,
      "loss": 2.1912,
      "step": 72292
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1386018991470337,
      "learning_rate": 1.7176311523821044e-07,
      "loss": 2.2844,
      "step": 72293
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1397058963775635,
      "learning_rate": 1.7168713878271238e-07,
      "loss": 2.2539,
      "step": 72294
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.107014775276184,
      "learning_rate": 1.7161117898887303e-07,
      "loss": 2.3279,
      "step": 72295
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0634472370147705,
      "learning_rate": 1.715352358568223e-07,
      "loss": 2.2433,
      "step": 72296
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.23037588596344,
      "learning_rate": 1.7145930938668897e-07,
      "loss": 2.2668,
      "step": 72297
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1327784061431885,
      "learning_rate": 1.7138339957860184e-07,
      "loss": 2.3323,
      "step": 72298
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1994647979736328,
      "learning_rate": 1.7130750643268857e-07,
      "loss": 2.3434,
      "step": 72299
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1194101572036743,
      "learning_rate": 1.7123162994908015e-07,
      "loss": 2.2433,
      "step": 72300
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0913137197494507,
      "learning_rate": 1.7115577012790096e-07,
      "loss": 2.3418,
      "step": 72301
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1172853708267212,
      "learning_rate": 1.7107992696928422e-07,
      "loss": 2.2966,
      "step": 72302
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.158497929573059,
      "learning_rate": 1.7100410047335537e-07,
      "loss": 2.3794,
      "step": 72303
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0937964916229248,
      "learning_rate": 1.7092829064024542e-07,
      "loss": 1.9818,
      "step": 72304
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.2115399837493896,
      "learning_rate": 1.708524974700798e-07,
      "loss": 2.382,
      "step": 72305
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.2339493036270142,
      "learning_rate": 1.7077672096298957e-07,
      "loss": 2.505,
      "step": 72306
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1436623334884644,
      "learning_rate": 1.707009611191024e-07,
      "loss": 2.4049,
      "step": 72307
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.264596700668335,
      "learning_rate": 1.7062521793854702e-07,
      "loss": 2.2669,
      "step": 72308
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0572967529296875,
      "learning_rate": 1.7054949142145115e-07,
      "loss": 2.233,
      "step": 72309
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1111705303192139,
      "learning_rate": 1.7047378156794358e-07,
      "loss": 2.1767,
      "step": 72310
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1288686990737915,
      "learning_rate": 1.7039808837815308e-07,
      "loss": 2.203,
      "step": 72311
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0229673385620117,
      "learning_rate": 1.7032241185220622e-07,
      "loss": 2.4233,
      "step": 72312
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0976024866104126,
      "learning_rate": 1.70246751990234e-07,
      "loss": 2.3689,
      "step": 72313
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0661158561706543,
      "learning_rate": 1.7017110879236297e-07,
      "loss": 2.2447,
      "step": 72314
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1313581466674805,
      "learning_rate": 1.7009548225872197e-07,
      "loss": 2.2664,
      "step": 72315
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0905593633651733,
      "learning_rate": 1.700198723894375e-07,
      "loss": 2.2382,
      "step": 72316
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.080424189567566,
      "learning_rate": 1.6994427918464062e-07,
      "loss": 2.25,
      "step": 72317
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0736569166183472,
      "learning_rate": 1.6986870264445676e-07,
      "loss": 2.2945,
      "step": 72318
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.9687307476997375,
      "learning_rate": 1.6979314276901582e-07,
      "loss": 2.3563,
      "step": 72319
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0781899690628052,
      "learning_rate": 1.6971759955844547e-07,
      "loss": 2.3926,
      "step": 72320
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1015039682388306,
      "learning_rate": 1.696420730128734e-07,
      "loss": 2.171,
      "step": 72321
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.2278907299041748,
      "learning_rate": 1.6956656313242725e-07,
      "loss": 2.1024,
      "step": 72322
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0509060621261597,
      "learning_rate": 1.6949106991723695e-07,
      "loss": 2.3435,
      "step": 72323
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0301127433776855,
      "learning_rate": 1.6941559336742907e-07,
      "loss": 2.0373,
      "step": 72324
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.9984490871429443,
      "learning_rate": 1.6934013348313127e-07,
      "loss": 2.3242,
      "step": 72325
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1713082790374756,
      "learning_rate": 1.6926469026447123e-07,
      "loss": 2.389,
      "step": 72326
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0818793773651123,
      "learning_rate": 1.6918926371157996e-07,
      "loss": 2.1221,
      "step": 72327
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0567753314971924,
      "learning_rate": 1.6911385382458068e-07,
      "loss": 2.1677,
      "step": 72328
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0079365968704224,
      "learning_rate": 1.690384606036044e-07,
      "loss": 2.1385,
      "step": 72329
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0412507057189941,
      "learning_rate": 1.689630840487766e-07,
      "loss": 2.2462,
      "step": 72330
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0916749238967896,
      "learning_rate": 1.6888772416022825e-07,
      "loss": 2.6106,
      "step": 72331
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1005208492279053,
      "learning_rate": 1.6881238093808371e-07,
      "loss": 2.3355,
      "step": 72332
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.259831428527832,
      "learning_rate": 1.6873705438247402e-07,
      "loss": 2.3363,
      "step": 72333
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0938917398452759,
      "learning_rate": 1.6866174449352346e-07,
      "loss": 2.1479,
      "step": 72334
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.2960724830627441,
      "learning_rate": 1.685864512713631e-07,
      "loss": 2.1176,
      "step": 72335
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0312556028366089,
      "learning_rate": 1.6851117471611833e-07,
      "loss": 2.2816,
      "step": 72336
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.9607681632041931,
      "learning_rate": 1.6843591482791578e-07,
      "loss": 2.397,
      "step": 72337
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.3445167541503906,
      "learning_rate": 1.683606716068864e-07,
      "loss": 2.2532,
      "step": 72338
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.3264397382736206,
      "learning_rate": 1.6828544505315457e-07,
      "loss": 2.4365,
      "step": 72339
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.264663577079773,
      "learning_rate": 1.682102351668502e-07,
      "loss": 2.3685,
      "step": 72340
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1441848278045654,
      "learning_rate": 1.681350419480998e-07,
      "loss": 2.1667,
      "step": 72341
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0035079717636108,
      "learning_rate": 1.6805986539703002e-07,
      "loss": 2.3771,
      "step": 72342
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1642483472824097,
      "learning_rate": 1.6798470551376955e-07,
      "loss": 2.3225,
      "step": 72343
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.218595027923584,
      "learning_rate": 1.6790956229844613e-07,
      "loss": 2.2039,
      "step": 72344
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.2284554243087769,
      "learning_rate": 1.678344357511852e-07,
      "loss": 2.2221,
      "step": 72345
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.9864925146102905,
      "learning_rate": 1.6775932587211553e-07,
      "loss": 2.077,
      "step": 72346
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.9852638840675354,
      "learning_rate": 1.676842326613637e-07,
      "loss": 2.1063,
      "step": 72347
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.9895035028457642,
      "learning_rate": 1.676091561190585e-07,
      "loss": 2.363,
      "step": 72348
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0861328840255737,
      "learning_rate": 1.6753409624532536e-07,
      "loss": 2.1845,
      "step": 72349
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.6043399572372437,
      "learning_rate": 1.674590530402931e-07,
      "loss": 2.3979,
      "step": 72350
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1280136108398438,
      "learning_rate": 1.6738402650408826e-07,
      "loss": 2.5058,
      "step": 72351
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0289039611816406,
      "learning_rate": 1.6730901663683742e-07,
      "loss": 2.4028,
      "step": 72352
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0099332332611084,
      "learning_rate": 1.6723402343866934e-07,
      "loss": 2.4743,
      "step": 72353
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1204829216003418,
      "learning_rate": 1.6715904690970952e-07,
      "loss": 2.2975,
      "step": 72354
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0247596502304077,
      "learning_rate": 1.670840870500856e-07,
      "loss": 2.2933,
      "step": 72355
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0571939945220947,
      "learning_rate": 1.6700914385992418e-07,
      "loss": 2.275,
      "step": 72356
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.065686583518982,
      "learning_rate": 1.669342173393529e-07,
      "loss": 2.2743,
      "step": 72357
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.2118518352508545,
      "learning_rate": 1.6685930748849944e-07,
      "loss": 2.3988,
      "step": 72358
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0250686407089233,
      "learning_rate": 1.6678441430749036e-07,
      "loss": 2.2796,
      "step": 72359
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0934659242630005,
      "learning_rate": 1.6670953779645227e-07,
      "loss": 2.2752,
      "step": 72360
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.048442006111145,
      "learning_rate": 1.6663467795551058e-07,
      "loss": 2.1148,
      "step": 72361
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.043968677520752,
      "learning_rate": 1.665598347847952e-07,
      "loss": 2.3372,
      "step": 72362
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1167186498641968,
      "learning_rate": 1.664850082844316e-07,
      "loss": 2.4891,
      "step": 72363
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.2834408283233643,
      "learning_rate": 1.664101984545463e-07,
      "loss": 2.2746,
      "step": 72364
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1198182106018066,
      "learning_rate": 1.6633540529526703e-07,
      "loss": 2.4458,
      "step": 72365
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0264897346496582,
      "learning_rate": 1.6626062880672033e-07,
      "loss": 2.1763,
      "step": 72366
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.220687985420227,
      "learning_rate": 1.6618586898903167e-07,
      "loss": 2.6347,
      "step": 72367
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1050344705581665,
      "learning_rate": 1.661111258423298e-07,
      "loss": 2.3993,
      "step": 72368
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1174803972244263,
      "learning_rate": 1.660363993667391e-07,
      "loss": 2.432,
      "step": 72369
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1388238668441772,
      "learning_rate": 1.6596168956238834e-07,
      "loss": 2.1682,
      "step": 72370
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0576379299163818,
      "learning_rate": 1.658869964294041e-07,
      "loss": 2.5547,
      "step": 72371
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.9957577586174011,
      "learning_rate": 1.6581231996791069e-07,
      "loss": 2.2936,
      "step": 72372
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.084108829498291,
      "learning_rate": 1.6573766017803805e-07,
      "loss": 2.2129,
      "step": 72373
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1289386749267578,
      "learning_rate": 1.656630170599094e-07,
      "loss": 2.1678,
      "step": 72374
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0791176557540894,
      "learning_rate": 1.655883906136535e-07,
      "loss": 2.2107,
      "step": 72375
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0116586685180664,
      "learning_rate": 1.6551378083939695e-07,
      "loss": 2.3398,
      "step": 72376
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.051963210105896,
      "learning_rate": 1.654391877372652e-07,
      "loss": 2.1417,
      "step": 72377
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0193867683410645,
      "learning_rate": 1.653646113073837e-07,
      "loss": 2.3941,
      "step": 72378
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.2918519973754883,
      "learning_rate": 1.6529005154988343e-07,
      "loss": 2.3241,
      "step": 72379
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.5577658414840698,
      "learning_rate": 1.6521550846488432e-07,
      "loss": 2.1831,
      "step": 72380
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0733286142349243,
      "learning_rate": 1.651409820525185e-07,
      "loss": 2.523,
      "step": 72381
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0491931438446045,
      "learning_rate": 1.6506647231290807e-07,
      "loss": 2.3027,
      "step": 72382
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0881400108337402,
      "learning_rate": 1.6499197924618182e-07,
      "loss": 2.3645,
      "step": 72383
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.161550521850586,
      "learning_rate": 1.649175028524641e-07,
      "loss": 2.3564,
      "step": 72384
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1074926853179932,
      "learning_rate": 1.648430431318837e-07,
      "loss": 2.393,
      "step": 72385
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0023843050003052,
      "learning_rate": 1.6476860008456498e-07,
      "loss": 2.5397,
      "step": 72386
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1888619661331177,
      "learning_rate": 1.6469417371063557e-07,
      "loss": 2.1895,
      "step": 72387
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0172778367996216,
      "learning_rate": 1.6461976401021984e-07,
      "loss": 2.4425,
      "step": 72388
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.108764886856079,
      "learning_rate": 1.6454537098344547e-07,
      "loss": 2.1782,
      "step": 72389
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.028527021408081,
      "learning_rate": 1.644709946304368e-07,
      "loss": 2.3778,
      "step": 72390
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0087933540344238,
      "learning_rate": 1.6439663495132264e-07,
      "loss": 2.4068,
      "step": 72391
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.419654369354248,
      "learning_rate": 1.6432229194622729e-07,
      "loss": 2.2026,
      "step": 72392
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.2517430782318115,
      "learning_rate": 1.6424796561527622e-07,
      "loss": 2.2609,
      "step": 72393
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0692284107208252,
      "learning_rate": 1.6417365595859713e-07,
      "loss": 2.1852,
      "step": 72394
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0479656457901,
      "learning_rate": 1.6409936297631435e-07,
      "loss": 2.1812,
      "step": 72395
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.087389349937439,
      "learning_rate": 1.6402508666855443e-07,
      "loss": 2.6382,
      "step": 72396
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0606868267059326,
      "learning_rate": 1.6395082703544395e-07,
      "loss": 2.2686,
      "step": 72397
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.072102665901184,
      "learning_rate": 1.6387658407710727e-07,
      "loss": 2.2737,
      "step": 72398
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0676435232162476,
      "learning_rate": 1.6380235779367204e-07,
      "loss": 2.3597,
      "step": 72399
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1307299137115479,
      "learning_rate": 1.6372814818526374e-07,
      "loss": 2.0606,
      "step": 72400
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.067939043045044,
      "learning_rate": 1.6365395525200666e-07,
      "loss": 2.3971,
      "step": 72401
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0246901512145996,
      "learning_rate": 1.6357977899402856e-07,
      "loss": 2.3705,
      "step": 72402
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.2367247343063354,
      "learning_rate": 1.635056194114526e-07,
      "loss": 2.2362,
      "step": 72403
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.9780797958374023,
      "learning_rate": 1.634314765044087e-07,
      "loss": 2.4299,
      "step": 72404
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.2323222160339355,
      "learning_rate": 1.633573502730179e-07,
      "loss": 2.1194,
      "step": 72405
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1333307027816772,
      "learning_rate": 1.6328324071741008e-07,
      "loss": 2.4846,
      "step": 72406
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0477391481399536,
      "learning_rate": 1.6320914783770626e-07,
      "loss": 2.1064,
      "step": 72407
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1639693975448608,
      "learning_rate": 1.631350716340363e-07,
      "loss": 2.3564,
      "step": 72408
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1485850811004639,
      "learning_rate": 1.6306101210652347e-07,
      "loss": 2.3634,
      "step": 72409
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1210287809371948,
      "learning_rate": 1.6298696925529432e-07,
      "loss": 2.3014,
      "step": 72410
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0546640157699585,
      "learning_rate": 1.6291294308047324e-07,
      "loss": 2.2815,
      "step": 72411
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.944646954536438,
      "learning_rate": 1.6283893358218672e-07,
      "loss": 2.2923,
      "step": 72412
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.2300039529800415,
      "learning_rate": 1.6276494076056026e-07,
      "loss": 2.2661,
      "step": 72413
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.095080852508545,
      "learning_rate": 1.6269096461571821e-07,
      "loss": 2.0877,
      "step": 72414
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1080822944641113,
      "learning_rate": 1.626170051477871e-07,
      "loss": 2.3755,
      "step": 72415
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.232252836227417,
      "learning_rate": 1.6254306235689242e-07,
      "loss": 2.2893,
      "step": 72416
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1958611011505127,
      "learning_rate": 1.6246913624315853e-07,
      "loss": 2.4072,
      "step": 72417
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.9737704992294312,
      "learning_rate": 1.6239522680671195e-07,
      "loss": 2.3015,
      "step": 72418
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1544204950332642,
      "learning_rate": 1.6232133404767703e-07,
      "loss": 2.3824,
      "step": 72419
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1075677871704102,
      "learning_rate": 1.6224745796618036e-07,
      "loss": 2.457,
      "step": 72420
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.098873496055603,
      "learning_rate": 1.6217359856234404e-07,
      "loss": 2.1247,
      "step": 72421
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.3016982078552246,
      "learning_rate": 1.620997558362969e-07,
      "loss": 2.2309,
      "step": 72422
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.044622540473938,
      "learning_rate": 1.6202592978816212e-07,
      "loss": 2.3211,
      "step": 72423
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.2376850843429565,
      "learning_rate": 1.6195212041806519e-07,
      "loss": 2.4361,
      "step": 72424
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.9976085424423218,
      "learning_rate": 1.6187832772613155e-07,
      "loss": 2.1504,
      "step": 72425
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0285941362380981,
      "learning_rate": 1.6180455171248555e-07,
      "loss": 2.3755,
      "step": 72426
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0086238384246826,
      "learning_rate": 1.6173079237725375e-07,
      "loss": 2.2458,
      "step": 72427
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0749801397323608,
      "learning_rate": 1.616570497205594e-07,
      "loss": 2.3381,
      "step": 72428
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1577609777450562,
      "learning_rate": 1.6158332374253016e-07,
      "loss": 2.4675,
      "step": 72429
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1636674404144287,
      "learning_rate": 1.6150961444328707e-07,
      "loss": 2.3462,
      "step": 72430
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1532701253890991,
      "learning_rate": 1.6143592182295886e-07,
      "loss": 2.3414,
      "step": 72431
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0842586755752563,
      "learning_rate": 1.6136224588166881e-07,
      "loss": 2.225,
      "step": 72432
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0084015130996704,
      "learning_rate": 1.6128858661954127e-07,
      "loss": 2.0471,
      "step": 72433
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0640108585357666,
      "learning_rate": 1.6121494403670169e-07,
      "loss": 2.1812,
      "step": 72434
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0313411951065063,
      "learning_rate": 1.611413181332755e-07,
      "loss": 2.3222,
      "step": 72435
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0683510303497314,
      "learning_rate": 1.6106770890938595e-07,
      "loss": 2.2409,
      "step": 72436
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.2099733352661133,
      "learning_rate": 1.609941163651596e-07,
      "loss": 2.4637,
      "step": 72437
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1408624649047852,
      "learning_rate": 1.609205405007197e-07,
      "loss": 2.588,
      "step": 72438
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1721422672271729,
      "learning_rate": 1.6084698131619281e-07,
      "loss": 2.4366,
      "step": 72439
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.9904240965843201,
      "learning_rate": 1.6077343881170215e-07,
      "loss": 2.2288,
      "step": 72440
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.009840965270996,
      "learning_rate": 1.606999129873732e-07,
      "loss": 2.0933,
      "step": 72441
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.166291356086731,
      "learning_rate": 1.6062640384332917e-07,
      "loss": 2.4114,
      "step": 72442
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0622457265853882,
      "learning_rate": 1.6055291137969664e-07,
      "loss": 2.3923,
      "step": 72443
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0877282619476318,
      "learning_rate": 1.6047943559659885e-07,
      "loss": 2.3887,
      "step": 72444
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0683170557022095,
      "learning_rate": 1.6040597649416234e-07,
      "loss": 2.2964,
      "step": 72445
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0721832513809204,
      "learning_rate": 1.6033253407250815e-07,
      "loss": 2.3943,
      "step": 72446
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0712640285491943,
      "learning_rate": 1.6025910833176285e-07,
      "loss": 2.2482,
      "step": 72447
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.3691879510879517,
      "learning_rate": 1.6018569927205075e-07,
      "loss": 2.1134,
      "step": 72448
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1885887384414673,
      "learning_rate": 1.6011230689349733e-07,
      "loss": 2.4118,
      "step": 72449
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0326969623565674,
      "learning_rate": 1.6003893119622583e-07,
      "loss": 2.2818,
      "step": 72450
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.000877857208252,
      "learning_rate": 1.5996557218036058e-07,
      "loss": 2.2596,
      "step": 72451
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0339802503585815,
      "learning_rate": 1.5989222984602593e-07,
      "loss": 2.1814,
      "step": 72452
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.4986540079116821,
      "learning_rate": 1.5981890419334623e-07,
      "loss": 2.269,
      "step": 72453
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0628666877746582,
      "learning_rate": 1.597455952224469e-07,
      "loss": 2.1672,
      "step": 72454
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0889256000518799,
      "learning_rate": 1.5967230293345126e-07,
      "loss": 2.6075,
      "step": 72455
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.073099970817566,
      "learning_rate": 1.5959902732648357e-07,
      "loss": 2.1782,
      "step": 72456
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1686186790466309,
      "learning_rate": 1.595257684016671e-07,
      "loss": 2.3127,
      "step": 72457
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0389097929000854,
      "learning_rate": 1.5945252615912842e-07,
      "loss": 2.2027,
      "step": 72458
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.115181565284729,
      "learning_rate": 1.5937930059899075e-07,
      "loss": 2.1792,
      "step": 72459
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.3235150575637817,
      "learning_rate": 1.5930609172137624e-07,
      "loss": 2.247,
      "step": 72460
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1242594718933105,
      "learning_rate": 1.592328995264114e-07,
      "loss": 2.3785,
      "step": 72461
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0877835750579834,
      "learning_rate": 1.5915972401422063e-07,
      "loss": 2.3029,
      "step": 72462
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0442426204681396,
      "learning_rate": 1.5908656518492493e-07,
      "loss": 2.3241,
      "step": 72463
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1041443347930908,
      "learning_rate": 1.5901342303865085e-07,
      "loss": 2.6137,
      "step": 72464
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0734741687774658,
      "learning_rate": 1.5894029757552166e-07,
      "loss": 2.468,
      "step": 72465
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1552538871765137,
      "learning_rate": 1.5886718879566276e-07,
      "loss": 2.4654,
      "step": 72466
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0393705368041992,
      "learning_rate": 1.5879409669919522e-07,
      "loss": 2.4058,
      "step": 72467
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.2477713823318481,
      "learning_rate": 1.5872102128624555e-07,
      "loss": 2.3925,
      "step": 72468
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.184996485710144,
      "learning_rate": 1.5864796255693594e-07,
      "loss": 2.3361,
      "step": 72469
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.190735101699829,
      "learning_rate": 1.5857492051139067e-07,
      "loss": 2.0573,
      "step": 72470
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0124449729919434,
      "learning_rate": 1.5850189514973413e-07,
      "loss": 2.2208,
      "step": 72471
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0069245100021362,
      "learning_rate": 1.5842888647209064e-07,
      "loss": 2.2938,
      "step": 72472
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.081344485282898,
      "learning_rate": 1.5835589447858235e-07,
      "loss": 2.3732,
      "step": 72473
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1092538833618164,
      "learning_rate": 1.5828291916933468e-07,
      "loss": 2.1033,
      "step": 72474
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.013572335243225,
      "learning_rate": 1.582099605444687e-07,
      "loss": 2.4491,
      "step": 72475
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.012062668800354,
      "learning_rate": 1.581370186041109e-07,
      "loss": 2.2707,
      "step": 72476
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1494014263153076,
      "learning_rate": 1.5806409334838346e-07,
      "loss": 2.3788,
      "step": 72477
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.050779938697815,
      "learning_rate": 1.579911847774107e-07,
      "loss": 2.3522,
      "step": 72478
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.2124991416931152,
      "learning_rate": 1.5791829289131588e-07,
      "loss": 2.604,
      "step": 72479
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1576592922210693,
      "learning_rate": 1.5784541769022332e-07,
      "loss": 2.4128,
      "step": 72480
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.180676817893982,
      "learning_rate": 1.5777255917425403e-07,
      "loss": 2.3952,
      "step": 72481
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0873314142227173,
      "learning_rate": 1.5769971734353462e-07,
      "loss": 2.4773,
      "step": 72482
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0961980819702148,
      "learning_rate": 1.5762689219818828e-07,
      "loss": 2.3389,
      "step": 72483
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1363699436187744,
      "learning_rate": 1.5755408373833603e-07,
      "loss": 2.2354,
      "step": 72484
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1287524700164795,
      "learning_rate": 1.5748129196410333e-07,
      "loss": 2.4116,
      "step": 72485
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.2070033550262451,
      "learning_rate": 1.5740851687561342e-07,
      "loss": 2.2333,
      "step": 72486
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.587649941444397,
      "learning_rate": 1.5733575847298843e-07,
      "loss": 2.2482,
      "step": 72487
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.170759916305542,
      "learning_rate": 1.572630167563527e-07,
      "loss": 2.524,
      "step": 72488
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.2141870260238647,
      "learning_rate": 1.5719029172583057e-07,
      "loss": 2.2737,
      "step": 72489
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0358275175094604,
      "learning_rate": 1.5711758338154193e-07,
      "loss": 2.2919,
      "step": 72490
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.062049150466919,
      "learning_rate": 1.5704489172361447e-07,
      "loss": 2.3737,
      "step": 72491
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.114364504814148,
      "learning_rate": 1.5697221675216812e-07,
      "loss": 2.4498,
      "step": 72492
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.064253807067871,
      "learning_rate": 1.5689955846732718e-07,
      "loss": 2.389,
      "step": 72493
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.9599820971488953,
      "learning_rate": 1.568269168692149e-07,
      "loss": 2.3116,
      "step": 72494
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0597643852233887,
      "learning_rate": 1.5675429195795456e-07,
      "loss": 2.3135,
      "step": 72495
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.4149998426437378,
      "learning_rate": 1.5668168373366822e-07,
      "loss": 2.333,
      "step": 72496
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1048882007598877,
      "learning_rate": 1.5660909219648135e-07,
      "loss": 2.4114,
      "step": 72497
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.2460485696792603,
      "learning_rate": 1.5653651734651497e-07,
      "loss": 2.4284,
      "step": 72498
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.097138524055481,
      "learning_rate": 1.5646395918389345e-07,
      "loss": 2.49,
      "step": 72499
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0884413719177246,
      "learning_rate": 1.5639141770873668e-07,
      "loss": 2.3242,
      "step": 72500
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0627663135528564,
      "learning_rate": 1.563188929211723e-07,
      "loss": 2.2407,
      "step": 72501
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.666218876838684,
      "learning_rate": 1.5624638482131914e-07,
      "loss": 2.1628,
      "step": 72502
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.2079694271087646,
      "learning_rate": 1.5617389340930267e-07,
      "loss": 2.0786,
      "step": 72503
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0411707162857056,
      "learning_rate": 1.5610141868524498e-07,
      "loss": 2.4401,
      "step": 72504
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0266309976577759,
      "learning_rate": 1.5602896064926932e-07,
      "loss": 2.3593,
      "step": 72505
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0586967468261719,
      "learning_rate": 1.559565193014967e-07,
      "loss": 2.1934,
      "step": 72506
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.006996750831604,
      "learning_rate": 1.5588409464205255e-07,
      "loss": 2.3843,
      "step": 72507
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.247557282447815,
      "learning_rate": 1.5581168667105795e-07,
      "loss": 2.3719,
      "step": 72508
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.5705311298370361,
      "learning_rate": 1.557392953886372e-07,
      "loss": 2.2169,
      "step": 72509
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1137464046478271,
      "learning_rate": 1.556669207949124e-07,
      "loss": 2.3974,
      "step": 72510
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.9567384123802185,
      "learning_rate": 1.5559456289000463e-07,
      "loss": 2.1493,
      "step": 72511
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1053342819213867,
      "learning_rate": 1.5552222167403818e-07,
      "loss": 2.6286,
      "step": 72512
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.3659669160842896,
      "learning_rate": 1.554498971471352e-07,
      "loss": 2.2688,
      "step": 72513
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.106985330581665,
      "learning_rate": 1.5537758930941782e-07,
      "loss": 2.4286,
      "step": 72514
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0454882383346558,
      "learning_rate": 1.5530529816101037e-07,
      "loss": 2.231,
      "step": 72515
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0525270700454712,
      "learning_rate": 1.5523302370203274e-07,
      "loss": 2.4383,
      "step": 72516
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0739831924438477,
      "learning_rate": 1.5516076593261042e-07,
      "loss": 2.0524,
      "step": 72517
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.4541356563568115,
      "learning_rate": 1.550885248528633e-07,
      "loss": 2.3781,
      "step": 72518
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1234616041183472,
      "learning_rate": 1.550163004629157e-07,
      "loss": 2.3025,
      "step": 72519
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1253457069396973,
      "learning_rate": 1.549440927628887e-07,
      "loss": 2.4314,
      "step": 72520
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.089848518371582,
      "learning_rate": 1.5487190175290546e-07,
      "loss": 2.4746,
      "step": 72521
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1045397520065308,
      "learning_rate": 1.5479972743308924e-07,
      "loss": 2.5673,
      "step": 72522
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1946353912353516,
      "learning_rate": 1.5472756980355997e-07,
      "loss": 2.4137,
      "step": 72523
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0434738397598267,
      "learning_rate": 1.5465542886444307e-07,
      "loss": 2.2474,
      "step": 72524
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.012953758239746,
      "learning_rate": 1.5458330461585735e-07,
      "loss": 2.4603,
      "step": 72525
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.149295449256897,
      "learning_rate": 1.5451119705792827e-07,
      "loss": 2.2635,
      "step": 72526
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0627031326293945,
      "learning_rate": 1.5443910619077573e-07,
      "loss": 2.4462,
      "step": 72527
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1336959600448608,
      "learning_rate": 1.5436703201452408e-07,
      "loss": 2.4266,
      "step": 72528
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.2583014965057373,
      "learning_rate": 1.542949745292932e-07,
      "loss": 2.2657,
      "step": 72529
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0152573585510254,
      "learning_rate": 1.5422293373520746e-07,
      "loss": 2.484,
      "step": 72530
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.099747657775879,
      "learning_rate": 1.5415090963238678e-07,
      "loss": 2.1463,
      "step": 72531
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1061421632766724,
      "learning_rate": 1.5407890222095434e-07,
      "loss": 2.297,
      "step": 72532
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0772314071655273,
      "learning_rate": 1.5400691150103341e-07,
      "loss": 2.1597,
      "step": 72533
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0849885940551758,
      "learning_rate": 1.539349374727439e-07,
      "loss": 2.4601,
      "step": 72534
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.3250408172607422,
      "learning_rate": 1.5386298013620905e-07,
      "loss": 2.3764,
      "step": 72535
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0639116764068604,
      "learning_rate": 1.5379103949155094e-07,
      "loss": 2.365,
      "step": 72536
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0545238256454468,
      "learning_rate": 1.5371911553889064e-07,
      "loss": 2.3116,
      "step": 72537
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.074127435684204,
      "learning_rate": 1.5364720827835246e-07,
      "loss": 2.4383,
      "step": 72538
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0507632493972778,
      "learning_rate": 1.5357531771005518e-07,
      "loss": 2.1767,
      "step": 72539
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0838119983673096,
      "learning_rate": 1.535034438341221e-07,
      "loss": 2.2657,
      "step": 72540
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.2042884826660156,
      "learning_rate": 1.5343158665067414e-07,
      "loss": 2.2548,
      "step": 72541
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0933597087860107,
      "learning_rate": 1.533597461598346e-07,
      "loss": 2.2601,
      "step": 72542
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1219120025634766,
      "learning_rate": 1.5328792236172452e-07,
      "loss": 2.3623,
      "step": 72543
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0527215003967285,
      "learning_rate": 1.5321611525646597e-07,
      "loss": 2.328,
      "step": 72544
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.061684489250183,
      "learning_rate": 1.5314432484418106e-07,
      "loss": 2.3499,
      "step": 72545
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.2261269092559814,
      "learning_rate": 1.5307255112498977e-07,
      "loss": 2.435,
      "step": 72546
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0874336957931519,
      "learning_rate": 1.5300079409901524e-07,
      "loss": 2.3421,
      "step": 72547
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0725005865097046,
      "learning_rate": 1.5292905376637856e-07,
      "loss": 2.2399,
      "step": 72548
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.13046133518219,
      "learning_rate": 1.5285733012720183e-07,
      "loss": 2.2598,
      "step": 72549
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0861414670944214,
      "learning_rate": 1.5278562318160605e-07,
      "loss": 2.3604,
      "step": 72550
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1288628578186035,
      "learning_rate": 1.5271393292971447e-07,
      "loss": 2.206,
      "step": 72551
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0865490436553955,
      "learning_rate": 1.526422593716459e-07,
      "loss": 2.3583,
      "step": 72552
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.038835048675537,
      "learning_rate": 1.5257060250752465e-07,
      "loss": 2.1583,
      "step": 72553
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1753909587860107,
      "learning_rate": 1.5249896233746842e-07,
      "loss": 2.111,
      "step": 72554
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.040945053100586,
      "learning_rate": 1.5242733886160266e-07,
      "loss": 2.351,
      "step": 72555
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0663515329360962,
      "learning_rate": 1.5235573208004617e-07,
      "loss": 2.4989,
      "step": 72556
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0812596082687378,
      "learning_rate": 1.522841419929222e-07,
      "loss": 2.1563,
      "step": 72557
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.052777886390686,
      "learning_rate": 1.522125686003506e-07,
      "loss": 2.3887,
      "step": 72558
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.039746880531311,
      "learning_rate": 1.5214101190245357e-07,
      "loss": 2.2105,
      "step": 72559
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1163157224655151,
      "learning_rate": 1.5206947189935205e-07,
      "loss": 2.3228,
      "step": 72560
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0756797790527344,
      "learning_rate": 1.5199794859116823e-07,
      "loss": 2.5606,
      "step": 72561
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.2823481559753418,
      "learning_rate": 1.5192644197802087e-07,
      "loss": 2.1627,
      "step": 72562
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.34260892868042,
      "learning_rate": 1.5185495206003432e-07,
      "loss": 2.1856,
      "step": 72563
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.9556310772895813,
      "learning_rate": 1.5178347883732848e-07,
      "loss": 2.3459,
      "step": 72564
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1219326257705688,
      "learning_rate": 1.5171202231002325e-07,
      "loss": 2.4387,
      "step": 72565
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0761758089065552,
      "learning_rate": 1.5164058247824186e-07,
      "loss": 2.0244,
      "step": 72566
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.3688240051269531,
      "learning_rate": 1.5156915934210427e-07,
      "loss": 2.205,
      "step": 72567
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1791895627975464,
      "learning_rate": 1.5149775290173142e-07,
      "loss": 2.3118,
      "step": 72568
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0675077438354492,
      "learning_rate": 1.5142636315724435e-07,
      "loss": 2.2105,
      "step": 72569
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.113232970237732,
      "learning_rate": 1.513549901087652e-07,
      "loss": 2.4752,
      "step": 72570
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1116504669189453,
      "learning_rate": 1.5128363375641385e-07,
      "loss": 2.3154,
      "step": 72571
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0775213241577148,
      "learning_rate": 1.5121229410031136e-07,
      "loss": 2.3125,
      "step": 72572
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1751519441604614,
      "learning_rate": 1.511409711405798e-07,
      "loss": 2.3291,
      "step": 72573
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0903571844100952,
      "learning_rate": 1.5106966487733797e-07,
      "loss": 2.3421,
      "step": 72574
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0673056840896606,
      "learning_rate": 1.5099837531070915e-07,
      "loss": 2.2028,
      "step": 72575
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.4538055658340454,
      "learning_rate": 1.509271024408121e-07,
      "loss": 2.3668,
      "step": 72576
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.087268352508545,
      "learning_rate": 1.5085584626777005e-07,
      "loss": 2.2167,
      "step": 72577
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.073043704032898,
      "learning_rate": 1.5078460679170072e-07,
      "loss": 2.2355,
      "step": 72578
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.070021390914917,
      "learning_rate": 1.5071338401272728e-07,
      "loss": 2.3588,
      "step": 72579
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1161231994628906,
      "learning_rate": 1.506421779309697e-07,
      "loss": 2.1252,
      "step": 72580
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0172789096832275,
      "learning_rate": 1.5057098854654784e-07,
      "loss": 2.2199,
      "step": 72581
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.191752314567566,
      "learning_rate": 1.5049981585958385e-07,
      "loss": 2.2061,
      "step": 72582
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.9955494403839111,
      "learning_rate": 1.5042865987019761e-07,
      "loss": 2.5518,
      "step": 72583
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.9962947368621826,
      "learning_rate": 1.5035752057851017e-07,
      "loss": 2.512,
      "step": 72584
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0648655891418457,
      "learning_rate": 1.502863979846414e-07,
      "loss": 2.1334,
      "step": 72585
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1257072687149048,
      "learning_rate": 1.5021529208871344e-07,
      "loss": 2.4387,
      "step": 72586
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0408109426498413,
      "learning_rate": 1.5014420289084397e-07,
      "loss": 2.0954,
      "step": 72587
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0711278915405273,
      "learning_rate": 1.5007313039115735e-07,
      "loss": 2.7457,
      "step": 72588
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1388741731643677,
      "learning_rate": 1.5000207458977011e-07,
      "loss": 2.2409,
      "step": 72589
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.9675978422164917,
      "learning_rate": 1.4993103548680666e-07,
      "loss": 2.3101,
      "step": 72590
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1135748624801636,
      "learning_rate": 1.4986001308238352e-07,
      "loss": 2.3046,
      "step": 72591
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0525277853012085,
      "learning_rate": 1.4978900737662284e-07,
      "loss": 2.1997,
      "step": 72592
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0995073318481445,
      "learning_rate": 1.497180183696456e-07,
      "loss": 2.1862,
      "step": 72593
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0158436298370361,
      "learning_rate": 1.4964704606157178e-07,
      "loss": 2.2614,
      "step": 72594
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.081624150276184,
      "learning_rate": 1.4957609045252118e-07,
      "loss": 2.3285,
      "step": 72595
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.142975091934204,
      "learning_rate": 1.4950515154261492e-07,
      "loss": 2.2799,
      "step": 72596
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1444587707519531,
      "learning_rate": 1.4943422933197283e-07,
      "loss": 2.3288,
      "step": 72597
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.2418029308319092,
      "learning_rate": 1.4936332382071483e-07,
      "loss": 2.42,
      "step": 72598
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1061761379241943,
      "learning_rate": 1.4929243500896084e-07,
      "loss": 2.4246,
      "step": 72599
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0398228168487549,
      "learning_rate": 1.4922156289683188e-07,
      "loss": 2.4088,
      "step": 72600
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.968126654624939,
      "learning_rate": 1.4915070748444893e-07,
      "loss": 2.5317,
      "step": 72601
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0755553245544434,
      "learning_rate": 1.490798687719297e-07,
      "loss": 2.2012,
      "step": 72602
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1096022129058838,
      "learning_rate": 1.4900904675939631e-07,
      "loss": 2.4453,
      "step": 72603
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1186467409133911,
      "learning_rate": 1.4893824144696757e-07,
      "loss": 2.4921,
      "step": 72604
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1208295822143555,
      "learning_rate": 1.4886745283476445e-07,
      "loss": 2.2877,
      "step": 72605
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1596732139587402,
      "learning_rate": 1.4879668092290689e-07,
      "loss": 2.3485,
      "step": 72606
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.226706624031067,
      "learning_rate": 1.4872592571151368e-07,
      "loss": 2.3122,
      "step": 72607
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0788244009017944,
      "learning_rate": 1.4865518720070583e-07,
      "loss": 2.3373,
      "step": 72608
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0551773309707642,
      "learning_rate": 1.4858446539060323e-07,
      "loss": 2.3763,
      "step": 72609
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0339771509170532,
      "learning_rate": 1.485137602813247e-07,
      "loss": 2.1231,
      "step": 72610
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.117700457572937,
      "learning_rate": 1.4844307187299233e-07,
      "loss": 2.3618,
      "step": 72611
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.188369870185852,
      "learning_rate": 1.4837240016572273e-07,
      "loss": 2.3387,
      "step": 72612
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1097731590270996,
      "learning_rate": 1.4830174515963914e-07,
      "loss": 2.5349,
      "step": 72613
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.9947302937507629,
      "learning_rate": 1.482311068548592e-07,
      "loss": 2.3973,
      "step": 72614
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0454049110412598,
      "learning_rate": 1.4816048525150394e-07,
      "loss": 2.3056,
      "step": 72615
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.197515845298767,
      "learning_rate": 1.4808988034969108e-07,
      "loss": 2.1545,
      "step": 72616
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1750404834747314,
      "learning_rate": 1.480192921495438e-07,
      "loss": 2.341,
      "step": 72617
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1005905866622925,
      "learning_rate": 1.479487206511776e-07,
      "loss": 2.0579,
      "step": 72618
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0337398052215576,
      "learning_rate": 1.478781658547146e-07,
      "loss": 2.4351,
      "step": 72619
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0669009685516357,
      "learning_rate": 1.4780762776027358e-07,
      "loss": 2.3439,
      "step": 72620
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.9858585000038147,
      "learning_rate": 1.4773710636797555e-07,
      "loss": 2.3302,
      "step": 72621
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1205629110336304,
      "learning_rate": 1.4766660167793823e-07,
      "loss": 2.2895,
      "step": 72622
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0909876823425293,
      "learning_rate": 1.4759611369028149e-07,
      "loss": 2.4128,
      "step": 72623
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.119972825050354,
      "learning_rate": 1.4752564240512524e-07,
      "loss": 2.2634,
      "step": 72624
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.040254831314087,
      "learning_rate": 1.4745518782258939e-07,
      "loss": 2.4505,
      "step": 72625
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.054395318031311,
      "learning_rate": 1.4738474994279273e-07,
      "loss": 2.1127,
      "step": 72626
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.137451171875,
      "learning_rate": 1.4731432876585515e-07,
      "loss": 2.2743,
      "step": 72627
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.9496382474899292,
      "learning_rate": 1.4724392429189661e-07,
      "loss": 2.2858,
      "step": 72628
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0776599645614624,
      "learning_rate": 1.471735365210336e-07,
      "loss": 2.2829,
      "step": 72629
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0183384418487549,
      "learning_rate": 1.4710316545338831e-07,
      "loss": 2.4829,
      "step": 72630
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0986266136169434,
      "learning_rate": 1.470328110890795e-07,
      "loss": 2.4071,
      "step": 72631
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1245304346084595,
      "learning_rate": 1.4696247342822601e-07,
      "loss": 2.4543,
      "step": 72632
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.2067184448242188,
      "learning_rate": 1.4689215247094657e-07,
      "loss": 2.1389,
      "step": 72633
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0043578147888184,
      "learning_rate": 1.4682184821736112e-07,
      "loss": 2.1893,
      "step": 72634
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.313204288482666,
      "learning_rate": 1.4675156066758955e-07,
      "loss": 2.2893,
      "step": 72635
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0690560340881348,
      "learning_rate": 1.4668128982175066e-07,
      "loss": 2.2219,
      "step": 72636
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1017662286758423,
      "learning_rate": 1.4661103567996105e-07,
      "loss": 2.21,
      "step": 72637
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0602731704711914,
      "learning_rate": 1.465407982423439e-07,
      "loss": 2.3577,
      "step": 72638
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.2097570896148682,
      "learning_rate": 1.4647057750901472e-07,
      "loss": 2.416,
      "step": 72639
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.12602961063385,
      "learning_rate": 1.464003734800945e-07,
      "loss": 2.2818,
      "step": 72640
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.074507713317871,
      "learning_rate": 1.4633018615570205e-07,
      "loss": 2.3032,
      "step": 72641
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0720359086990356,
      "learning_rate": 1.4626001553595614e-07,
      "loss": 2.1213,
      "step": 72642
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.026626706123352,
      "learning_rate": 1.461898616209756e-07,
      "loss": 2.2152,
      "step": 72643
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1931909322738647,
      "learning_rate": 1.461197244108803e-07,
      "loss": 2.4728,
      "step": 72644
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1375162601470947,
      "learning_rate": 1.4604960390578792e-07,
      "loss": 2.3028,
      "step": 72645
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.5314209461212158,
      "learning_rate": 1.4597950010581729e-07,
      "loss": 2.4685,
      "step": 72646
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0135271549224854,
      "learning_rate": 1.459094130110872e-07,
      "loss": 2.2937,
      "step": 72647
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.060347080230713,
      "learning_rate": 1.4583934262171862e-07,
      "loss": 2.3727,
      "step": 72648
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0957167148590088,
      "learning_rate": 1.4576928893782704e-07,
      "loss": 2.4436,
      "step": 72649
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1335102319717407,
      "learning_rate": 1.4569925195953462e-07,
      "loss": 2.244,
      "step": 72650
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1578714847564697,
      "learning_rate": 1.4562923168695675e-07,
      "loss": 2.3488,
      "step": 72651
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1440343856811523,
      "learning_rate": 1.455592281202145e-07,
      "loss": 2.2647,
      "step": 72652
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0559033155441284,
      "learning_rate": 1.4548924125942553e-07,
      "loss": 2.4417,
      "step": 72653
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.017575979232788,
      "learning_rate": 1.4541927110470867e-07,
      "loss": 2.361,
      "step": 72654
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1836901903152466,
      "learning_rate": 1.4534931765618266e-07,
      "loss": 2.2163,
      "step": 72655
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.097678303718567,
      "learning_rate": 1.4527938091396744e-07,
      "loss": 2.3704,
      "step": 72656
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0598070621490479,
      "learning_rate": 1.4520946087817844e-07,
      "loss": 2.3482,
      "step": 72657
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1035608053207397,
      "learning_rate": 1.4513955754893672e-07,
      "loss": 2.2252,
      "step": 72658
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1169049739837646,
      "learning_rate": 1.4506967092635992e-07,
      "loss": 2.3031,
      "step": 72659
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0883104801177979,
      "learning_rate": 1.4499980101056576e-07,
      "loss": 2.106,
      "step": 72660
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.400774359703064,
      "learning_rate": 1.4492994780167413e-07,
      "loss": 2.4315,
      "step": 72661
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.2393217086791992,
      "learning_rate": 1.448601112998038e-07,
      "loss": 2.3083,
      "step": 72662
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.9977408647537231,
      "learning_rate": 1.447902915050714e-07,
      "loss": 2.1807,
      "step": 72663
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1073062419891357,
      "learning_rate": 1.4472048841759567e-07,
      "loss": 2.3447,
      "step": 72664
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1327251195907593,
      "learning_rate": 1.4465070203749542e-07,
      "loss": 1.94,
      "step": 72665
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.193585991859436,
      "learning_rate": 1.4458093236488835e-07,
      "loss": 2.3887,
      "step": 72666
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1008834838867188,
      "learning_rate": 1.4451117939989545e-07,
      "loss": 2.3602,
      "step": 72667
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0439820289611816,
      "learning_rate": 1.4444144314262998e-07,
      "loss": 2.3654,
      "step": 72668
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.116328477859497,
      "learning_rate": 1.4437172359321626e-07,
      "loss": 2.4608,
      "step": 72669
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0349947214126587,
      "learning_rate": 1.4430202075176647e-07,
      "loss": 2.1475,
      "step": 72670
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0816396474838257,
      "learning_rate": 1.442323346184027e-07,
      "loss": 2.2564,
      "step": 72671
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1561709642410278,
      "learning_rate": 1.441626651932415e-07,
      "loss": 2.4535,
      "step": 72672
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.103075385093689,
      "learning_rate": 1.4409301247640173e-07,
      "loss": 2.1594,
      "step": 72673
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0640926361083984,
      "learning_rate": 1.44023376468001e-07,
      "loss": 2.4355,
      "step": 72674
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0503381490707397,
      "learning_rate": 1.4395375716815818e-07,
      "loss": 2.2781,
      "step": 72675
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0788078308105469,
      "learning_rate": 1.4388415457698868e-07,
      "loss": 2.2153,
      "step": 72676
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1722851991653442,
      "learning_rate": 1.4381456869461462e-07,
      "loss": 2.5969,
      "step": 72677
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0488544702529907,
      "learning_rate": 1.437449995211504e-07,
      "loss": 2.3054,
      "step": 72678
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0135371685028076,
      "learning_rate": 1.4367544705671587e-07,
      "loss": 2.4219,
      "step": 72679
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1031888723373413,
      "learning_rate": 1.4360591130142765e-07,
      "loss": 2.3744,
      "step": 72680
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.101070761680603,
      "learning_rate": 1.4353639225540562e-07,
      "loss": 2.3503,
      "step": 72681
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.002569556236267,
      "learning_rate": 1.4346688991876522e-07,
      "loss": 2.2864,
      "step": 72682
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.2139135599136353,
      "learning_rate": 1.4339740429162752e-07,
      "loss": 2.2789,
      "step": 72683
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.014923095703125,
      "learning_rate": 1.433279353741057e-07,
      "loss": 2.3847,
      "step": 72684
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0194554328918457,
      "learning_rate": 1.4325848316632196e-07,
      "loss": 2.0854,
      "step": 72685
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0517578125,
      "learning_rate": 1.4318904766839059e-07,
      "loss": 2.3867,
      "step": 72686
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0405820608139038,
      "learning_rate": 1.431196288804315e-07,
      "loss": 2.0415,
      "step": 72687
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.148921251296997,
      "learning_rate": 1.430502268025613e-07,
      "loss": 2.4403,
      "step": 72688
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0895822048187256,
      "learning_rate": 1.4298084143489878e-07,
      "loss": 2.3205,
      "step": 72689
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.045109748840332,
      "learning_rate": 1.4291147277756157e-07,
      "loss": 2.2706,
      "step": 72690
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0259051322937012,
      "learning_rate": 1.428421208306652e-07,
      "loss": 2.3475,
      "step": 72691
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0568901300430298,
      "learning_rate": 1.427727855943295e-07,
      "loss": 2.4002,
      "step": 72692
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0924092531204224,
      "learning_rate": 1.4270346706867e-07,
      "loss": 2.2853,
      "step": 72693
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0276075601577759,
      "learning_rate": 1.4263416525380548e-07,
      "loss": 2.1761,
      "step": 72694
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.122646450996399,
      "learning_rate": 1.425648801498536e-07,
      "loss": 2.4385,
      "step": 72695
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0917906761169434,
      "learning_rate": 1.4249561175693204e-07,
      "loss": 2.2503,
      "step": 72696
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1383358240127563,
      "learning_rate": 1.424263600751574e-07,
      "loss": 2.2869,
      "step": 72697
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0857421159744263,
      "learning_rate": 1.4235712510464739e-07,
      "loss": 2.4102,
      "step": 72698
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0381474494934082,
      "learning_rate": 1.4228790684551963e-07,
      "loss": 2.1907,
      "step": 72699
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1623786687850952,
      "learning_rate": 1.4221870529789071e-07,
      "loss": 2.1656,
      "step": 72700
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0419386625289917,
      "learning_rate": 1.4214952046187835e-07,
      "loss": 2.3722,
      "step": 72701
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0353527069091797,
      "learning_rate": 1.420803523376002e-07,
      "loss": 2.1292,
      "step": 72702
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1491999626159668,
      "learning_rate": 1.4201120092517285e-07,
      "loss": 2.2935,
      "step": 72703
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.115435004234314,
      "learning_rate": 1.4194206622471397e-07,
      "loss": 2.352,
      "step": 72704
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0102070569992065,
      "learning_rate": 1.4187294823634012e-07,
      "loss": 2.2177,
      "step": 72705
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0693359375,
      "learning_rate": 1.4180384696017012e-07,
      "loss": 2.3952,
      "step": 72706
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1339417695999146,
      "learning_rate": 1.4173476239631944e-07,
      "loss": 2.323,
      "step": 72707
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1148220300674438,
      "learning_rate": 1.4166569454490685e-07,
      "loss": 2.1585,
      "step": 72708
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.2032114267349243,
      "learning_rate": 1.4159664340604784e-07,
      "loss": 2.4148,
      "step": 72709
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1074720621109009,
      "learning_rate": 1.4152760897986008e-07,
      "loss": 2.3937,
      "step": 72710
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0868548154830933,
      "learning_rate": 1.4145859126646012e-07,
      "loss": 2.4316,
      "step": 72711
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0125586986541748,
      "learning_rate": 1.4138959026596566e-07,
      "loss": 2.2276,
      "step": 72712
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0767974853515625,
      "learning_rate": 1.4132060597849216e-07,
      "loss": 2.3282,
      "step": 72713
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0861319303512573,
      "learning_rate": 1.4125163840415956e-07,
      "loss": 2.0034,
      "step": 72714
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0888203382492065,
      "learning_rate": 1.4118268754308217e-07,
      "loss": 2.3122,
      "step": 72715
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.3646409511566162,
      "learning_rate": 1.411137533953788e-07,
      "loss": 2.5557,
      "step": 72716
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0254987478256226,
      "learning_rate": 1.410448359611638e-07,
      "loss": 2.462,
      "step": 72717
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.9770398736000061,
      "learning_rate": 1.4097593524055596e-07,
      "loss": 2.3392,
      "step": 72718
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.959714949131012,
      "learning_rate": 1.4090705123367188e-07,
      "loss": 2.2971,
      "step": 72719
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1068888902664185,
      "learning_rate": 1.4083818394062809e-07,
      "loss": 2.3552,
      "step": 72720
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.073032021522522,
      "learning_rate": 1.407693333615412e-07,
      "loss": 2.3678,
      "step": 72721
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0024309158325195,
      "learning_rate": 1.4070049949652774e-07,
      "loss": 2.3055,
      "step": 72722
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0389546155929565,
      "learning_rate": 1.4063168234570434e-07,
      "loss": 2.4013,
      "step": 72723
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1869114637374878,
      "learning_rate": 1.4056288190918976e-07,
      "loss": 2.4684,
      "step": 72724
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.3574947118759155,
      "learning_rate": 1.4049409818709724e-07,
      "loss": 2.2995,
      "step": 72725
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0562775135040283,
      "learning_rate": 1.404253311795456e-07,
      "loss": 2.3505,
      "step": 72726
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.6281700134277344,
      "learning_rate": 1.4035658088665027e-07,
      "loss": 2.2903,
      "step": 72727
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0867798328399658,
      "learning_rate": 1.4028784730852896e-07,
      "loss": 2.47,
      "step": 72728
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.054893136024475,
      "learning_rate": 1.4021913044529712e-07,
      "loss": 2.3689,
      "step": 72729
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0682693719863892,
      "learning_rate": 1.4015043029707242e-07,
      "loss": 2.2832,
      "step": 72730
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1797913312911987,
      "learning_rate": 1.4008174686397035e-07,
      "loss": 2.4273,
      "step": 72731
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1875444650650024,
      "learning_rate": 1.400130801461075e-07,
      "loss": 2.3314,
      "step": 72732
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0241588354110718,
      "learning_rate": 1.3994443014360038e-07,
      "loss": 2.2874,
      "step": 72733
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.115869402885437,
      "learning_rate": 1.398757968565645e-07,
      "loss": 2.4017,
      "step": 72734
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.9937173128128052,
      "learning_rate": 1.3980718028511975e-07,
      "loss": 2.151,
      "step": 72735
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0140403509140015,
      "learning_rate": 1.3973858042937716e-07,
      "loss": 2.4036,
      "step": 72736
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0861150026321411,
      "learning_rate": 1.3966999728945773e-07,
      "loss": 2.6099,
      "step": 72737
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0912985801696777,
      "learning_rate": 1.3960143086547363e-07,
      "loss": 2.5372,
      "step": 72738
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0620684623718262,
      "learning_rate": 1.3953288115754471e-07,
      "loss": 2.3215,
      "step": 72739
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0326586961746216,
      "learning_rate": 1.3946434816578537e-07,
      "loss": 2.5149,
      "step": 72740
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.104655146598816,
      "learning_rate": 1.3939583189031213e-07,
      "loss": 2.3329,
      "step": 72741
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.2047802209854126,
      "learning_rate": 1.393273323312394e-07,
      "loss": 2.6215,
      "step": 72742
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.3028451204299927,
      "learning_rate": 1.3925884948868705e-07,
      "loss": 2.3441,
      "step": 72743
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0208680629730225,
      "learning_rate": 1.3919038336276725e-07,
      "loss": 2.3163,
      "step": 72744
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.150488018989563,
      "learning_rate": 1.3912193395359984e-07,
      "loss": 2.22,
      "step": 72745
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.206834316253662,
      "learning_rate": 1.3905350126129814e-07,
      "loss": 2.3029,
      "step": 72746
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.126837968826294,
      "learning_rate": 1.3898508528597864e-07,
      "loss": 2.0979,
      "step": 72747
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0253244638442993,
      "learning_rate": 1.3891668602775798e-07,
      "loss": 2.3247,
      "step": 72748
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0454686880111694,
      "learning_rate": 1.388483034867527e-07,
      "loss": 2.2243,
      "step": 72749
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0022648572921753,
      "learning_rate": 1.3877993766307606e-07,
      "loss": 2.1677,
      "step": 72750
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1532820463180542,
      "learning_rate": 1.3871158855684686e-07,
      "loss": 2.1911,
      "step": 72751
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.130029320716858,
      "learning_rate": 1.3864325616817832e-07,
      "loss": 2.2014,
      "step": 72752
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0260324478149414,
      "learning_rate": 1.3857494049718923e-07,
      "loss": 2.3261,
      "step": 72753
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1228054761886597,
      "learning_rate": 1.3850664154399395e-07,
      "loss": 2.1453,
      "step": 72754
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.061342477798462,
      "learning_rate": 1.3843835930870687e-07,
      "loss": 2.3147,
      "step": 72755
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.2901469469070435,
      "learning_rate": 1.3837009379144674e-07,
      "loss": 2.4376,
      "step": 72756
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.026252269744873,
      "learning_rate": 1.383018449923268e-07,
      "loss": 2.2733,
      "step": 72757
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.091098427772522,
      "learning_rate": 1.3823361291146363e-07,
      "loss": 2.1127,
      "step": 72758
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.144550085067749,
      "learning_rate": 1.3816539754897272e-07,
      "loss": 2.468,
      "step": 72759
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1162503957748413,
      "learning_rate": 1.3809719890497064e-07,
      "loss": 2.3822,
      "step": 72760
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1503300666809082,
      "learning_rate": 1.3802901697957172e-07,
      "loss": 2.4123,
      "step": 72761
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1801031827926636,
      "learning_rate": 1.3796085177289365e-07,
      "loss": 2.3355,
      "step": 72762
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1119256019592285,
      "learning_rate": 1.3789270328504857e-07,
      "loss": 2.3228,
      "step": 72763
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.2159618139266968,
      "learning_rate": 1.3782457151615415e-07,
      "loss": 2.2251,
      "step": 72764
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0418002605438232,
      "learning_rate": 1.3775645646632586e-07,
      "loss": 2.4118,
      "step": 72765
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.3593957424163818,
      "learning_rate": 1.3768835813567915e-07,
      "loss": 2.4079,
      "step": 72766
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1604053974151611,
      "learning_rate": 1.376202765243284e-07,
      "loss": 2.5182,
      "step": 72767
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0123192071914673,
      "learning_rate": 1.375522116323913e-07,
      "loss": 2.2696,
      "step": 72768
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.099862813949585,
      "learning_rate": 1.3748416345997994e-07,
      "loss": 2.3266,
      "step": 72769
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1454485654830933,
      "learning_rate": 1.3741613200721317e-07,
      "loss": 2.3968,
      "step": 72770
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.087422251701355,
      "learning_rate": 1.3734811727420417e-07,
      "loss": 2.3403,
      "step": 72771
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.17933988571167,
      "learning_rate": 1.3728011926106842e-07,
      "loss": 2.4281,
      "step": 72772
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0763102769851685,
      "learning_rate": 1.3721213796792143e-07,
      "loss": 2.2953,
      "step": 72773
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0613620281219482,
      "learning_rate": 1.3714417339487974e-07,
      "loss": 2.3431,
      "step": 72774
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0500990152359009,
      "learning_rate": 1.3707622554205656e-07,
      "loss": 2.3683,
      "step": 72775
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0182666778564453,
      "learning_rate": 1.3700829440956853e-07,
      "loss": 2.3163,
      "step": 72776
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1254959106445312,
      "learning_rate": 1.3694037999752884e-07,
      "loss": 2.2604,
      "step": 72777
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.048551321029663,
      "learning_rate": 1.3687248230605522e-07,
      "loss": 2.3058,
      "step": 72778
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1484352350234985,
      "learning_rate": 1.3680460133526085e-07,
      "loss": 2.2366,
      "step": 72779
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.26345694065094,
      "learning_rate": 1.3673673708526235e-07,
      "loss": 2.2243,
      "step": 72780
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.3172147274017334,
      "learning_rate": 1.3666888955617296e-07,
      "loss": 2.164,
      "step": 72781
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.080712080001831,
      "learning_rate": 1.3660105874810926e-07,
      "loss": 2.3085,
      "step": 72782
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0780428647994995,
      "learning_rate": 1.3653324466118556e-07,
      "loss": 2.4514,
      "step": 72783
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0752300024032593,
      "learning_rate": 1.3646544729551625e-07,
      "loss": 2.2295,
      "step": 72784
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1363236904144287,
      "learning_rate": 1.3639766665121678e-07,
      "loss": 2.2631,
      "step": 72785
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0501501560211182,
      "learning_rate": 1.3632990272840264e-07,
      "loss": 2.3801,
      "step": 72786
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1385711431503296,
      "learning_rate": 1.3626215552718924e-07,
      "loss": 2.1374,
      "step": 72787
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1457626819610596,
      "learning_rate": 1.3619442504768988e-07,
      "loss": 2.462,
      "step": 72788
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.9883436560630798,
      "learning_rate": 1.3612671129001997e-07,
      "loss": 2.3768,
      "step": 72789
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.05934476852417,
      "learning_rate": 1.360590142542928e-07,
      "loss": 2.1846,
      "step": 72790
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.9543216824531555,
      "learning_rate": 1.3599133394062602e-07,
      "loss": 2.4587,
      "step": 72791
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0273773670196533,
      "learning_rate": 1.3592367034913289e-07,
      "loss": 2.3302,
      "step": 72792
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.083395004272461,
      "learning_rate": 1.3585602347992776e-07,
      "loss": 2.1129,
      "step": 72793
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0390336513519287,
      "learning_rate": 1.35788393333125e-07,
      "loss": 2.1198,
      "step": 72794
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.11228346824646,
      "learning_rate": 1.3572077990884226e-07,
      "loss": 2.26,
      "step": 72795
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1315605640411377,
      "learning_rate": 1.3565318320718945e-07,
      "loss": 2.2028,
      "step": 72796
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.007228970527649,
      "learning_rate": 1.355856032282854e-07,
      "loss": 2.3122,
      "step": 72797
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.2261667251586914,
      "learning_rate": 1.355180399722411e-07,
      "loss": 2.4427,
      "step": 72798
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0986454486846924,
      "learning_rate": 1.3545049343917427e-07,
      "loss": 2.4681,
      "step": 72799
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1176652908325195,
      "learning_rate": 1.353829636291981e-07,
      "loss": 2.2514,
      "step": 72800
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.155411958694458,
      "learning_rate": 1.3531545054242702e-07,
      "loss": 2.5262,
      "step": 72801
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0710989236831665,
      "learning_rate": 1.352479541789753e-07,
      "loss": 2.481,
      "step": 72802
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1270899772644043,
      "learning_rate": 1.3518047453895732e-07,
      "loss": 2.5388,
      "step": 72803
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.094285488128662,
      "learning_rate": 1.3511301162248746e-07,
      "loss": 2.1472,
      "step": 72804
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.9810779094696045,
      "learning_rate": 1.3504556542968117e-07,
      "loss": 2.3566,
      "step": 72805
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1548231840133667,
      "learning_rate": 1.3497813596065058e-07,
      "loss": 2.3031,
      "step": 72806
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0228477716445923,
      "learning_rate": 1.3491072321551224e-07,
      "loss": 2.431,
      "step": 72807
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0336037874221802,
      "learning_rate": 1.3484332719437943e-07,
      "loss": 2.2824,
      "step": 72808
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0709211826324463,
      "learning_rate": 1.3477594789736649e-07,
      "loss": 2.4872,
      "step": 72809
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.198740005493164,
      "learning_rate": 1.3470858532458776e-07,
      "loss": 2.1431,
      "step": 72810
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.9940110445022583,
      "learning_rate": 1.346412394761565e-07,
      "loss": 2.2519,
      "step": 72811
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0617716312408447,
      "learning_rate": 1.3457391035218925e-07,
      "loss": 2.292,
      "step": 72812
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1366372108459473,
      "learning_rate": 1.3450659795279708e-07,
      "loss": 2.4656,
      "step": 72813
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.05045485496521,
      "learning_rate": 1.3443930227809766e-07,
      "loss": 2.315,
      "step": 72814
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0390044450759888,
      "learning_rate": 1.3437202332820198e-07,
      "loss": 2.4234,
      "step": 72815
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1164870262145996,
      "learning_rate": 1.3430476110322554e-07,
      "loss": 2.3932,
      "step": 72816
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0652344226837158,
      "learning_rate": 1.3423751560328048e-07,
      "loss": 2.2371,
      "step": 72817
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.151806354522705,
      "learning_rate": 1.3417028682848332e-07,
      "loss": 2.372,
      "step": 72818
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1938424110412598,
      "learning_rate": 1.3410307477894735e-07,
      "loss": 2.3079,
      "step": 72819
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0772178173065186,
      "learning_rate": 1.340358794547858e-07,
      "loss": 2.3287,
      "step": 72820
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0633982419967651,
      "learning_rate": 1.3396870085611303e-07,
      "loss": 2.1795,
      "step": 72821
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.3629610538482666,
      "learning_rate": 1.3390153898304338e-07,
      "loss": 2.2185,
      "step": 72822
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1259299516677856,
      "learning_rate": 1.3383439383569007e-07,
      "loss": 2.3165,
      "step": 72823
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1042511463165283,
      "learning_rate": 1.337672654141664e-07,
      "loss": 2.3139,
      "step": 72824
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1281709671020508,
      "learning_rate": 1.3370015371858669e-07,
      "loss": 2.4222,
      "step": 72825
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0004427433013916,
      "learning_rate": 1.336330587490664e-07,
      "loss": 2.231,
      "step": 72826
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1388742923736572,
      "learning_rate": 1.3356598050571657e-07,
      "loss": 2.5016,
      "step": 72827
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.2780042886734009,
      "learning_rate": 1.3349891898865375e-07,
      "loss": 2.2263,
      "step": 72828
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.074242115020752,
      "learning_rate": 1.3343187419798786e-07,
      "loss": 2.1915,
      "step": 72829
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.2047874927520752,
      "learning_rate": 1.333648461338355e-07,
      "loss": 2.3408,
      "step": 72830
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0242877006530762,
      "learning_rate": 1.3329783479630876e-07,
      "loss": 2.2737,
      "step": 72831
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0191197395324707,
      "learning_rate": 1.3323084018552312e-07,
      "loss": 2.4616,
      "step": 72832
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.160255789756775,
      "learning_rate": 1.3316386230158962e-07,
      "loss": 2.3449,
      "step": 72833
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1926695108413696,
      "learning_rate": 1.3309690114462482e-07,
      "loss": 2.353,
      "step": 72834
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0469613075256348,
      "learning_rate": 1.3302995671473973e-07,
      "loss": 2.3794,
      "step": 72835
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.122457504272461,
      "learning_rate": 1.3296302901204872e-07,
      "loss": 2.3319,
      "step": 72836
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1235429048538208,
      "learning_rate": 1.3289611803666502e-07,
      "loss": 2.1586,
      "step": 72837
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.9981262683868408,
      "learning_rate": 1.3282922378870188e-07,
      "loss": 2.1412,
      "step": 72838
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0669623613357544,
      "learning_rate": 1.3276234626827478e-07,
      "loss": 2.453,
      "step": 72839
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.2290599346160889,
      "learning_rate": 1.326954854754936e-07,
      "loss": 2.255,
      "step": 72840
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1215741634368896,
      "learning_rate": 1.3262864141047493e-07,
      "loss": 2.2685,
      "step": 72841
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.9904330968856812,
      "learning_rate": 1.325618140733298e-07,
      "loss": 2.1428,
      "step": 72842
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0423715114593506,
      "learning_rate": 1.3249500346417255e-07,
      "loss": 2.3051,
      "step": 72843
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1809908151626587,
      "learning_rate": 1.3242820958311642e-07,
      "loss": 2.1757,
      "step": 72844
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1056312322616577,
      "learning_rate": 1.3236143243027356e-07,
      "loss": 2.2723,
      "step": 72845
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0893440246582031,
      "learning_rate": 1.322946720057594e-07,
      "loss": 2.4999,
      "step": 72846
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.099214792251587,
      "learning_rate": 1.32227928309685e-07,
      "loss": 2.332,
      "step": 72847
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1029901504516602,
      "learning_rate": 1.321612013421636e-07,
      "loss": 2.1619,
      "step": 72848
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1015151739120483,
      "learning_rate": 1.3209449110331062e-07,
      "loss": 2.3357,
      "step": 72849
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.13187575340271,
      "learning_rate": 1.3202779759323602e-07,
      "loss": 2.1351,
      "step": 72850
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.022891640663147,
      "learning_rate": 1.3196112081205637e-07,
      "loss": 2.3452,
      "step": 72851
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1955713033676147,
      "learning_rate": 1.3189446075988043e-07,
      "loss": 2.2622,
      "step": 72852
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.2247501611709595,
      "learning_rate": 1.3182781743682482e-07,
      "loss": 2.1144,
      "step": 72853
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.2599446773529053,
      "learning_rate": 1.3176119084300166e-07,
      "loss": 2.3799,
      "step": 72854
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0631685256958008,
      "learning_rate": 1.3169458097852306e-07,
      "loss": 2.1259,
      "step": 72855
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0401910543441772,
      "learning_rate": 1.3162798784350227e-07,
      "loss": 2.1582,
      "step": 72856
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1253191232681274,
      "learning_rate": 1.3156141143805257e-07,
      "loss": 2.1255,
      "step": 72857
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.090991497039795,
      "learning_rate": 1.3149485176228605e-07,
      "loss": 2.417,
      "step": 72858
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1852154731750488,
      "learning_rate": 1.3142830881631597e-07,
      "loss": 2.3308,
      "step": 72859
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.149686336517334,
      "learning_rate": 1.3136178260025446e-07,
      "loss": 2.1467,
      "step": 72860
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0029423236846924,
      "learning_rate": 1.3129527311421585e-07,
      "loss": 2.3843,
      "step": 72861
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.9961366653442383,
      "learning_rate": 1.3122878035831231e-07,
      "loss": 2.2952,
      "step": 72862
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1668422222137451,
      "learning_rate": 1.3116230433265598e-07,
      "loss": 2.3589,
      "step": 72863
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0793505907058716,
      "learning_rate": 1.3109584503736005e-07,
      "loss": 2.478,
      "step": 72864
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1201218366622925,
      "learning_rate": 1.310294024725367e-07,
      "loss": 2.508,
      "step": 72865
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.2493298053741455,
      "learning_rate": 1.3096297663829915e-07,
      "loss": 2.3989,
      "step": 72866
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.010240077972412,
      "learning_rate": 1.3089656753476065e-07,
      "loss": 2.314,
      "step": 72867
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0971064567565918,
      "learning_rate": 1.3083017516203112e-07,
      "loss": 2.4098,
      "step": 72868
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1189792156219482,
      "learning_rate": 1.30763799520226e-07,
      "loss": 2.3555,
      "step": 72869
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.2160226106643677,
      "learning_rate": 1.3069744060945633e-07,
      "loss": 2.4102,
      "step": 72870
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1304957866668701,
      "learning_rate": 1.3063109842983535e-07,
      "loss": 2.3728,
      "step": 72871
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0484446287155151,
      "learning_rate": 1.3056477298147518e-07,
      "loss": 2.2812,
      "step": 72872
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0912681818008423,
      "learning_rate": 1.3049846426448686e-07,
      "loss": 2.4877,
      "step": 72873
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1223868131637573,
      "learning_rate": 1.3043217227898587e-07,
      "loss": 2.3601,
      "step": 72874
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1193410158157349,
      "learning_rate": 1.3036589702508095e-07,
      "loss": 2.4524,
      "step": 72875
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0022293329238892,
      "learning_rate": 1.3029963850288873e-07,
      "loss": 2.4427,
      "step": 72876
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.9835088849067688,
      "learning_rate": 1.302333967125169e-07,
      "loss": 2.1289,
      "step": 72877
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.9595136046409607,
      "learning_rate": 1.3016717165408198e-07,
      "loss": 2.4303,
      "step": 72878
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.988618791103363,
      "learning_rate": 1.3010096332769283e-07,
      "loss": 2.224,
      "step": 72879
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.029532790184021,
      "learning_rate": 1.3003477173346492e-07,
      "loss": 2.446,
      "step": 72880
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.2034589052200317,
      "learning_rate": 1.29968596871507e-07,
      "loss": 2.4219,
      "step": 72881
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0132553577423096,
      "learning_rate": 1.2990243874193452e-07,
      "loss": 2.3874,
      "step": 72882
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0358011722564697,
      "learning_rate": 1.2983629734485635e-07,
      "loss": 2.5148,
      "step": 72883
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1247800588607788,
      "learning_rate": 1.2977017268038794e-07,
      "loss": 2.1974,
      "step": 72884
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1099313497543335,
      "learning_rate": 1.2970406474863917e-07,
      "loss": 2.4568,
      "step": 72885
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0328562259674072,
      "learning_rate": 1.2963797354972218e-07,
      "loss": 2.4003,
      "step": 72886
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1311832666397095,
      "learning_rate": 1.2957189908375022e-07,
      "loss": 2.354,
      "step": 72887
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1057883501052856,
      "learning_rate": 1.2950584135083432e-07,
      "loss": 2.2237,
      "step": 72888
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0576814413070679,
      "learning_rate": 1.294398003510866e-07,
      "loss": 2.3123,
      "step": 72889
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1011234521865845,
      "learning_rate": 1.2937377608462032e-07,
      "loss": 2.4147,
      "step": 72890
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1025325059890747,
      "learning_rate": 1.2930776855154536e-07,
      "loss": 2.5254,
      "step": 72891
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1497479677200317,
      "learning_rate": 1.29241777751975e-07,
      "loss": 2.3085,
      "step": 72892
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.256543755531311,
      "learning_rate": 1.2917580368602023e-07,
      "loss": 2.4352,
      "step": 72893
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.2722331285476685,
      "learning_rate": 1.291098463537943e-07,
      "loss": 2.4382,
      "step": 72894
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0964921712875366,
      "learning_rate": 1.2904390575540714e-07,
      "loss": 2.2254,
      "step": 72895
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.09683358669281,
      "learning_rate": 1.28977981890972e-07,
      "loss": 2.3192,
      "step": 72896
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0420985221862793,
      "learning_rate": 1.2891207476059987e-07,
      "loss": 2.3374,
      "step": 72897
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1540502309799194,
      "learning_rate": 1.2884618436440288e-07,
      "loss": 2.2484,
      "step": 72898
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1240220069885254,
      "learning_rate": 1.2878031070249207e-07,
      "loss": 2.4086,
      "step": 72899
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0019551515579224,
      "learning_rate": 1.2871445377497959e-07,
      "loss": 2.2096,
      "step": 72900
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1101183891296387,
      "learning_rate": 1.2864861358197866e-07,
      "loss": 2.4391,
      "step": 72901
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1669180393218994,
      "learning_rate": 1.2858279012359697e-07,
      "loss": 2.4323,
      "step": 72902
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1946789026260376,
      "learning_rate": 1.2851698339995e-07,
      "loss": 2.2802,
      "step": 72903
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0646785497665405,
      "learning_rate": 1.2845119341114765e-07,
      "loss": 2.4917,
      "step": 72904
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.016777753829956,
      "learning_rate": 1.2838542015730205e-07,
      "loss": 2.2039,
      "step": 72905
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0603864192962646,
      "learning_rate": 1.283196636385231e-07,
      "loss": 2.3788,
      "step": 72906
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0431677103042603,
      "learning_rate": 1.282539238549252e-07,
      "loss": 2.3395,
      "step": 72907
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1511224508285522,
      "learning_rate": 1.2818820080661597e-07,
      "loss": 2.4516,
      "step": 72908
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.2179033756256104,
      "learning_rate": 1.2812249449370984e-07,
      "loss": 2.2588,
      "step": 72909
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0225542783737183,
      "learning_rate": 1.2805680491631667e-07,
      "loss": 2.2812,
      "step": 72910
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1435996294021606,
      "learning_rate": 1.2799113207454973e-07,
      "loss": 2.2456,
      "step": 72911
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0106945037841797,
      "learning_rate": 1.2792547596851778e-07,
      "loss": 2.2585,
      "step": 72912
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0680040121078491,
      "learning_rate": 1.2785983659833412e-07,
      "loss": 2.3867,
      "step": 72913
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0557920932769775,
      "learning_rate": 1.2779421396410861e-07,
      "loss": 2.3441,
      "step": 72914
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0363987684249878,
      "learning_rate": 1.2772860806595345e-07,
      "loss": 2.2674,
      "step": 72915
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1177010536193848,
      "learning_rate": 1.276630189039796e-07,
      "loss": 2.3607,
      "step": 72916
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.9995557069778442,
      "learning_rate": 1.2759744647829807e-07,
      "loss": 2.4856,
      "step": 72917
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1162097454071045,
      "learning_rate": 1.2753189078901996e-07,
      "loss": 2.1959,
      "step": 72918
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0353606939315796,
      "learning_rate": 1.2746635183625733e-07,
      "loss": 2.396,
      "step": 72919
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1754794120788574,
      "learning_rate": 1.2740082962012013e-07,
      "loss": 2.3357,
      "step": 72920
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1533974409103394,
      "learning_rate": 1.2733532414072047e-07,
      "loss": 2.4989,
      "step": 72921
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0821186304092407,
      "learning_rate": 1.2726983539816828e-07,
      "loss": 2.3801,
      "step": 72922
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.990956723690033,
      "learning_rate": 1.2720436339257568e-07,
      "loss": 2.1781,
      "step": 72923
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1471079587936401,
      "learning_rate": 1.271389081240515e-07,
      "loss": 2.5846,
      "step": 72924
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1351182460784912,
      "learning_rate": 1.2707346959271006e-07,
      "loss": 2.4868,
      "step": 72925
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0418939590454102,
      "learning_rate": 1.2700804779866017e-07,
      "loss": 2.2904,
      "step": 72926
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0600184202194214,
      "learning_rate": 1.2694264274201285e-07,
      "loss": 2.2021,
      "step": 72927
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0391772985458374,
      "learning_rate": 1.2687725442287913e-07,
      "loss": 2.0203,
      "step": 72928
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1372641324996948,
      "learning_rate": 1.2681188284137004e-07,
      "loss": 2.2183,
      "step": 72929
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.280259370803833,
      "learning_rate": 1.2674652799759657e-07,
      "loss": 2.3318,
      "step": 72930
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.260473370552063,
      "learning_rate": 1.2668118989166866e-07,
      "loss": 2.0508,
      "step": 72931
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0678757429122925,
      "learning_rate": 1.2661586852369845e-07,
      "loss": 2.3736,
      "step": 72932
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0686137676239014,
      "learning_rate": 1.2655056389379582e-07,
      "loss": 2.3665,
      "step": 72933
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.121932029724121,
      "learning_rate": 1.264852760020707e-07,
      "loss": 2.1126,
      "step": 72934
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.038974642753601,
      "learning_rate": 1.2642000484863525e-07,
      "loss": 2.2169,
      "step": 72935
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1271966695785522,
      "learning_rate": 1.2635475043359936e-07,
      "loss": 2.343,
      "step": 72936
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0927799940109253,
      "learning_rate": 1.2628951275707403e-07,
      "loss": 2.1249,
      "step": 72937
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.038691759109497,
      "learning_rate": 1.2622429181917028e-07,
      "loss": 2.4864,
      "step": 72938
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0510145425796509,
      "learning_rate": 1.2615908761999695e-07,
      "loss": 2.1967,
      "step": 72939
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0685114860534668,
      "learning_rate": 1.2609390015966616e-07,
      "loss": 2.4251,
      "step": 72940
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0544737577438354,
      "learning_rate": 1.260287294382867e-07,
      "loss": 2.2373,
      "step": 72941
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0396724939346313,
      "learning_rate": 1.259635754559718e-07,
      "loss": 2.2454,
      "step": 72942
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0332838296890259,
      "learning_rate": 1.2589843821283032e-07,
      "loss": 2.1925,
      "step": 72943
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0983259677886963,
      "learning_rate": 1.258333177089721e-07,
      "loss": 2.16,
      "step": 72944
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1022772789001465,
      "learning_rate": 1.2576821394450822e-07,
      "loss": 2.3516,
      "step": 72945
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0815045833587646,
      "learning_rate": 1.2570312691955078e-07,
      "loss": 2.1861,
      "step": 72946
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1747071743011475,
      "learning_rate": 1.2563805663420636e-07,
      "loss": 2.3603,
      "step": 72947
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.140364646911621,
      "learning_rate": 1.2557300308858823e-07,
      "loss": 2.3323,
      "step": 72948
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.4037984609603882,
      "learning_rate": 1.2550796628280405e-07,
      "loss": 2.3858,
      "step": 72949
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0501128435134888,
      "learning_rate": 1.254429462169682e-07,
      "loss": 2.4464,
      "step": 72950
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.9863718748092651,
      "learning_rate": 1.253779428911861e-07,
      "loss": 2.3283,
      "step": 72951
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1113746166229248,
      "learning_rate": 1.2531295630557217e-07,
      "loss": 2.5731,
      "step": 72952
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1033964157104492,
      "learning_rate": 1.2524798646023296e-07,
      "loss": 2.4857,
      "step": 72953
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1306101083755493,
      "learning_rate": 1.251830333552817e-07,
      "loss": 2.1599,
      "step": 72954
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0356606245040894,
      "learning_rate": 1.2511809699082722e-07,
      "loss": 2.3099,
      "step": 72955
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.9959491491317749,
      "learning_rate": 1.2505317736697941e-07,
      "loss": 2.3192,
      "step": 72956
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1249794960021973,
      "learning_rate": 1.2498827448384822e-07,
      "loss": 2.2804,
      "step": 72957
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1388295888900757,
      "learning_rate": 1.249233883415435e-07,
      "loss": 2.1157,
      "step": 72958
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1406028270721436,
      "learning_rate": 1.248585189401763e-07,
      "loss": 2.2982,
      "step": 72959
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.9943795800209045,
      "learning_rate": 1.2479366627985657e-07,
      "loss": 2.393,
      "step": 72960
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.165351390838623,
      "learning_rate": 1.2472883036069304e-07,
      "loss": 2.2832,
      "step": 72961
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.125711441040039,
      "learning_rate": 1.246640111827957e-07,
      "loss": 2.2533,
      "step": 72962
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0567744970321655,
      "learning_rate": 1.2459920874627552e-07,
      "loss": 2.3669,
      "step": 72963
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.2369896173477173,
      "learning_rate": 1.245344230512413e-07,
      "loss": 2.187,
      "step": 72964
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.3247969150543213,
      "learning_rate": 1.24469654097803e-07,
      "loss": 2.4293,
      "step": 72965
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.020010232925415,
      "learning_rate": 1.2440490188607158e-07,
      "loss": 2.356,
      "step": 72966
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1414602994918823,
      "learning_rate": 1.2434016641615587e-07,
      "loss": 2.5318,
      "step": 72967
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0690590143203735,
      "learning_rate": 1.242754476881658e-07,
      "loss": 2.5628,
      "step": 72968
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1049820184707642,
      "learning_rate": 1.2421074570221125e-07,
      "loss": 2.3872,
      "step": 72969
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.2017468214035034,
      "learning_rate": 1.2414606045840106e-07,
      "loss": 2.3747,
      "step": 72970
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0803042650222778,
      "learning_rate": 1.2408139195684622e-07,
      "loss": 2.1456,
      "step": 72971
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1888169050216675,
      "learning_rate": 1.2401674019765554e-07,
      "loss": 2.2619,
      "step": 72972
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.982974112033844,
      "learning_rate": 1.2395210518093892e-07,
      "loss": 2.4294,
      "step": 72973
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.172822117805481,
      "learning_rate": 1.238874869068052e-07,
      "loss": 2.3978,
      "step": 72974
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0914555788040161,
      "learning_rate": 1.2382288537536537e-07,
      "loss": 2.6967,
      "step": 72975
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1106282472610474,
      "learning_rate": 1.2375830058672711e-07,
      "loss": 2.2693,
      "step": 72976
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1368532180786133,
      "learning_rate": 1.2369373254100148e-07,
      "loss": 2.3237,
      "step": 72977
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.061140537261963,
      "learning_rate": 1.2362918123829726e-07,
      "loss": 2.3155,
      "step": 72978
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1476770639419556,
      "learning_rate": 1.2356464667872326e-07,
      "loss": 2.383,
      "step": 72979
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.193377137184143,
      "learning_rate": 1.2350012886239048e-07,
      "loss": 2.2956,
      "step": 72980
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1335415840148926,
      "learning_rate": 1.2343562778940775e-07,
      "loss": 2.4236,
      "step": 72981
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.9962584376335144,
      "learning_rate": 1.2337114345988276e-07,
      "loss": 2.2922,
      "step": 72982
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1170246601104736,
      "learning_rate": 1.2330667587392653e-07,
      "loss": 2.5396,
      "step": 72983
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.047282338142395,
      "learning_rate": 1.2324222503164894e-07,
      "loss": 2.1053,
      "step": 72984
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1100131273269653,
      "learning_rate": 1.2317779093315775e-07,
      "loss": 2.0688,
      "step": 72985
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1940181255340576,
      "learning_rate": 1.2311337357856279e-07,
      "loss": 2.3098,
      "step": 72986
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.200222134590149,
      "learning_rate": 1.230489729679729e-07,
      "loss": 2.3306,
      "step": 72987
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1473702192306519,
      "learning_rate": 1.2298458910149692e-07,
      "loss": 2.3702,
      "step": 72988
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.123501181602478,
      "learning_rate": 1.229202219792458e-07,
      "loss": 2.5051,
      "step": 72989
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.3627969026565552,
      "learning_rate": 1.2285587160132728e-07,
      "loss": 2.1061,
      "step": 72990
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.2086864709854126,
      "learning_rate": 1.2279153796785015e-07,
      "loss": 2.1072,
      "step": 72991
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0590285062789917,
      "learning_rate": 1.2272722107892433e-07,
      "loss": 2.3695,
      "step": 72992
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0893648862838745,
      "learning_rate": 1.226629209346575e-07,
      "loss": 2.2636,
      "step": 72993
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1848477125167847,
      "learning_rate": 1.2259863753516066e-07,
      "loss": 2.1862,
      "step": 72994
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.125535011291504,
      "learning_rate": 1.2253437088054154e-07,
      "loss": 2.2772,
      "step": 72995
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1044495105743408,
      "learning_rate": 1.2247012097091006e-07,
      "loss": 2.2279,
      "step": 72996
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.2112855911254883,
      "learning_rate": 1.2240588780637386e-07,
      "loss": 2.4894,
      "step": 72997
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.4079713821411133,
      "learning_rate": 1.2234167138704177e-07,
      "loss": 2.3706,
      "step": 72998
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.2606079578399658,
      "learning_rate": 1.2227747171302484e-07,
      "loss": 2.1769,
      "step": 72999
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0784223079681396,
      "learning_rate": 1.222132887844296e-07,
      "loss": 2.2102,
      "step": 73000
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0205062627792358,
      "learning_rate": 1.2214912260136492e-07,
      "loss": 2.2193,
      "step": 73001
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1566474437713623,
      "learning_rate": 1.2208497316394064e-07,
      "loss": 2.4173,
      "step": 73002
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0577712059020996,
      "learning_rate": 1.2202084047226558e-07,
      "loss": 2.2915,
      "step": 73003
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1902079582214355,
      "learning_rate": 1.2195672452644746e-07,
      "loss": 2.3583,
      "step": 73004
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0751678943634033,
      "learning_rate": 1.2189262532659508e-07,
      "loss": 2.4532,
      "step": 73005
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.2268368005752563,
      "learning_rate": 1.218285428728183e-07,
      "loss": 2.4119,
      "step": 73006
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1447566747665405,
      "learning_rate": 1.2176447716522488e-07,
      "loss": 2.2923,
      "step": 73007
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0438666343688965,
      "learning_rate": 1.2170042820392358e-07,
      "loss": 2.4624,
      "step": 73008
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.098227858543396,
      "learning_rate": 1.2163639598902322e-07,
      "loss": 2.4268,
      "step": 73009
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1412298679351807,
      "learning_rate": 1.2157238052063259e-07,
      "loss": 2.3002,
      "step": 73010
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1944692134857178,
      "learning_rate": 1.2150838179885828e-07,
      "loss": 2.3698,
      "step": 73011
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1625016927719116,
      "learning_rate": 1.2144439982381128e-07,
      "loss": 2.5328,
      "step": 73012
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0982104539871216,
      "learning_rate": 1.2138043459559824e-07,
      "loss": 2.4151,
      "step": 73013
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0728800296783447,
      "learning_rate": 1.2131648611433011e-07,
      "loss": 2.552,
      "step": 73014
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.2794886827468872,
      "learning_rate": 1.212525543801113e-07,
      "loss": 2.2748,
      "step": 73015
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0953441858291626,
      "learning_rate": 1.2118863939305391e-07,
      "loss": 2.2229,
      "step": 73016
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.9745432138442993,
      "learning_rate": 1.2112474115326456e-07,
      "loss": 2.3242,
      "step": 73017
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0767806768417358,
      "learning_rate": 1.2106085966085092e-07,
      "loss": 2.027,
      "step": 73018
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.109459400177002,
      "learning_rate": 1.2099699491592286e-07,
      "loss": 2.3681,
      "step": 73019
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1918059587478638,
      "learning_rate": 1.2093314691858816e-07,
      "loss": 2.323,
      "step": 73020
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1148802042007446,
      "learning_rate": 1.2086931566895443e-07,
      "loss": 2.3659,
      "step": 73021
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.4081132411956787,
      "learning_rate": 1.2080550116713053e-07,
      "loss": 2.2101,
      "step": 73022
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.2952500581741333,
      "learning_rate": 1.2074170341322523e-07,
      "loss": 2.1813,
      "step": 73023
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0983076095581055,
      "learning_rate": 1.206779224073451e-07,
      "loss": 2.4021,
      "step": 73024
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0746922492980957,
      "learning_rate": 1.206141581496001e-07,
      "loss": 2.2878,
      "step": 73025
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0401331186294556,
      "learning_rate": 1.2055041064009566e-07,
      "loss": 2.2165,
      "step": 73026
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0295326709747314,
      "learning_rate": 1.204866798789428e-07,
      "loss": 2.3503,
      "step": 73027
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0417358875274658,
      "learning_rate": 1.2042296586624701e-07,
      "loss": 2.3269,
      "step": 73028
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.9914091229438782,
      "learning_rate": 1.203592686021182e-07,
      "loss": 2.2126,
      "step": 73029
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0795093774795532,
      "learning_rate": 1.202955880866641e-07,
      "loss": 2.5311,
      "step": 73030
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1770840883255005,
      "learning_rate": 1.2023192431999232e-07,
      "loss": 2.4369,
      "step": 73031
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.105921983718872,
      "learning_rate": 1.201682773022095e-07,
      "loss": 2.1966,
      "step": 73032
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.9602017998695374,
      "learning_rate": 1.201046470334255e-07,
      "loss": 2.0619,
      "step": 73033
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0287561416625977,
      "learning_rate": 1.2004103351374808e-07,
      "loss": 2.2897,
      "step": 73034
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1486148834228516,
      "learning_rate": 1.1997743674328378e-07,
      "loss": 2.1788,
      "step": 73035
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0068730115890503,
      "learning_rate": 1.1991385672214138e-07,
      "loss": 2.1809,
      "step": 73036
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1481448411941528,
      "learning_rate": 1.1985029345042865e-07,
      "loss": 2.1043,
      "step": 73037
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1200637817382812,
      "learning_rate": 1.197867469282532e-07,
      "loss": 2.1409,
      "step": 73038
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.112236499786377,
      "learning_rate": 1.1972321715572167e-07,
      "loss": 2.4674,
      "step": 73039
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.162448763847351,
      "learning_rate": 1.196597041329428e-07,
      "loss": 2.1993,
      "step": 73040
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1629506349563599,
      "learning_rate": 1.1959620786002547e-07,
      "loss": 2.2883,
      "step": 73041
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.143937587738037,
      "learning_rate": 1.1953272833707508e-07,
      "loss": 2.3287,
      "step": 73042
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.077907681465149,
      "learning_rate": 1.1946926556420047e-07,
      "loss": 2.3544,
      "step": 73043
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.076939582824707,
      "learning_rate": 1.194058195415093e-07,
      "loss": 2.4074,
      "step": 73044
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1445571184158325,
      "learning_rate": 1.1934239026910821e-07,
      "loss": 2.3315,
      "step": 73045
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.103381872177124,
      "learning_rate": 1.1927897774710483e-07,
      "loss": 2.4848,
      "step": 73046
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1721197366714478,
      "learning_rate": 1.19215581975608e-07,
      "loss": 2.3386,
      "step": 73047
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0162935256958008,
      "learning_rate": 1.1915220295472429e-07,
      "loss": 2.2776,
      "step": 73048
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0833197832107544,
      "learning_rate": 1.1908884068456028e-07,
      "loss": 2.4906,
      "step": 73049
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.2066432237625122,
      "learning_rate": 1.1902549516522588e-07,
      "loss": 2.4898,
      "step": 73050
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0974419116973877,
      "learning_rate": 1.1896216639682546e-07,
      "loss": 2.3801,
      "step": 73051
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.3547897338867188,
      "learning_rate": 1.1889885437947002e-07,
      "loss": 2.3469,
      "step": 73052
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1590015888214111,
      "learning_rate": 1.1883555911326172e-07,
      "loss": 2.3015,
      "step": 73053
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.139971137046814,
      "learning_rate": 1.1877228059831269e-07,
      "loss": 2.2854,
      "step": 73054
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.457550048828125,
      "learning_rate": 1.1870901883472841e-07,
      "loss": 2.5414,
      "step": 73055
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.180666208267212,
      "learning_rate": 1.1864577382261544e-07,
      "loss": 2.367,
      "step": 73056
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1222302913665771,
      "learning_rate": 1.1858254556208148e-07,
      "loss": 2.4136,
      "step": 73057
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.032301902770996,
      "learning_rate": 1.1851933405323423e-07,
      "loss": 2.1509,
      "step": 73058
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.172598958015442,
      "learning_rate": 1.1845613929618028e-07,
      "loss": 2.4317,
      "step": 73059
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0542429685592651,
      "learning_rate": 1.1839296129102728e-07,
      "loss": 2.3788,
      "step": 73060
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.9795867800712585,
      "learning_rate": 1.1832980003788075e-07,
      "loss": 2.2955,
      "step": 73061
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1242167949676514,
      "learning_rate": 1.1826665553685057e-07,
      "loss": 2.4061,
      "step": 73062
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.9840586185455322,
      "learning_rate": 1.1820352778804112e-07,
      "loss": 2.2504,
      "step": 73063
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0724002122879028,
      "learning_rate": 1.1814041679156229e-07,
      "loss": 2.5654,
      "step": 73064
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1013209819793701,
      "learning_rate": 1.1807732254751847e-07,
      "loss": 2.3005,
      "step": 73065
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.5540963411331177,
      "learning_rate": 1.1801424505601734e-07,
      "loss": 2.1316,
      "step": 73066
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1441377401351929,
      "learning_rate": 1.1795118431716545e-07,
      "loss": 2.2733,
      "step": 73067
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1006981134414673,
      "learning_rate": 1.1788814033107054e-07,
      "loss": 2.1943,
      "step": 73068
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.206199049949646,
      "learning_rate": 1.1782511309783917e-07,
      "loss": 2.3002,
      "step": 73069
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0870035886764526,
      "learning_rate": 1.1776210261757903e-07,
      "loss": 2.4937,
      "step": 73070
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.166243553161621,
      "learning_rate": 1.1769910889039449e-07,
      "loss": 2.2476,
      "step": 73071
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0585479736328125,
      "learning_rate": 1.1763613191639545e-07,
      "loss": 2.5458,
      "step": 73072
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.995806097984314,
      "learning_rate": 1.1757317169568738e-07,
      "loss": 2.4764,
      "step": 73073
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1443288326263428,
      "learning_rate": 1.1751022822837466e-07,
      "loss": 2.4148,
      "step": 73074
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.2503588199615479,
      "learning_rate": 1.1744730151456829e-07,
      "loss": 2.3373,
      "step": 73075
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1684969663619995,
      "learning_rate": 1.1738439155437154e-07,
      "loss": 2.4432,
      "step": 73076
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.961585283279419,
      "learning_rate": 1.1732149834789319e-07,
      "loss": 2.0264,
      "step": 73077
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.9928761720657349,
      "learning_rate": 1.1725862189523874e-07,
      "loss": 2.1884,
      "step": 73078
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1314553022384644,
      "learning_rate": 1.1719576219651585e-07,
      "loss": 2.2413,
      "step": 73079
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.028727650642395,
      "learning_rate": 1.1713291925182891e-07,
      "loss": 2.1911,
      "step": 73080
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.2075588703155518,
      "learning_rate": 1.170700930612867e-07,
      "loss": 2.4523,
      "step": 73081
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.2393056154251099,
      "learning_rate": 1.1700728362499359e-07,
      "loss": 2.1495,
      "step": 73082
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1245249509811401,
      "learning_rate": 1.1694449094305838e-07,
      "loss": 2.3988,
      "step": 73083
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0411487817764282,
      "learning_rate": 1.1688171501558543e-07,
      "loss": 2.302,
      "step": 73084
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.4809973239898682,
      "learning_rate": 1.1681895584268354e-07,
      "loss": 2.1254,
      "step": 73085
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1256701946258545,
      "learning_rate": 1.1675621342445709e-07,
      "loss": 2.3782,
      "step": 73086
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1302518844604492,
      "learning_rate": 1.1669348776101375e-07,
      "loss": 2.3794,
      "step": 73087
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0947542190551758,
      "learning_rate": 1.1663077885245788e-07,
      "loss": 2.46,
      "step": 73088
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0303053855895996,
      "learning_rate": 1.165680866988983e-07,
      "loss": 2.3555,
      "step": 73089
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.061415672302246,
      "learning_rate": 1.1650541130043935e-07,
      "loss": 2.4093,
      "step": 73090
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0946332216262817,
      "learning_rate": 1.1644275265718985e-07,
      "loss": 2.4602,
      "step": 73091
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.2010380029678345,
      "learning_rate": 1.1638011076925193e-07,
      "loss": 2.0102,
      "step": 73092
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1301507949829102,
      "learning_rate": 1.163174856367355e-07,
      "loss": 2.1764,
      "step": 73093
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.3435039520263672,
      "learning_rate": 1.1625487725974494e-07,
      "loss": 2.3282,
      "step": 73094
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1740957498550415,
      "learning_rate": 1.161922856383868e-07,
      "loss": 2.5288,
      "step": 73095
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.093802809715271,
      "learning_rate": 1.1612971077276658e-07,
      "loss": 2.208,
      "step": 73096
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1109519004821777,
      "learning_rate": 1.1606715266299084e-07,
      "loss": 2.2967,
      "step": 73097
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0495309829711914,
      "learning_rate": 1.1600461130916618e-07,
      "loss": 2.3884,
      "step": 73098
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.120041012763977,
      "learning_rate": 1.1594208671139807e-07,
      "loss": 2.7971,
      "step": 73099
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1016888618469238,
      "learning_rate": 1.1587957886979306e-07,
      "loss": 2.3356,
      "step": 73100
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1106094121932983,
      "learning_rate": 1.1581708778445555e-07,
      "loss": 2.2777,
      "step": 73101
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1300098896026611,
      "learning_rate": 1.1575461345549433e-07,
      "loss": 2.3191,
      "step": 73102
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.053269624710083,
      "learning_rate": 1.1569215588301153e-07,
      "loss": 2.3688,
      "step": 73103
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1229019165039062,
      "learning_rate": 1.1562971506711706e-07,
      "loss": 2.4636,
      "step": 73104
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0715452432632446,
      "learning_rate": 1.155672910079142e-07,
      "loss": 2.3468,
      "step": 73105
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.170554280281067,
      "learning_rate": 1.1550488370550838e-07,
      "loss": 2.4925,
      "step": 73106
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0767347812652588,
      "learning_rate": 1.1544249316000733e-07,
      "loss": 2.4182,
      "step": 73107
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1465873718261719,
      "learning_rate": 1.1538011937151539e-07,
      "loss": 2.1519,
      "step": 73108
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0924125909805298,
      "learning_rate": 1.1531776234013803e-07,
      "loss": 2.2171,
      "step": 73109
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.2135930061340332,
      "learning_rate": 1.1525542206598294e-07,
      "loss": 2.3381,
      "step": 73110
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0155075788497925,
      "learning_rate": 1.151930985491545e-07,
      "loss": 2.4044,
      "step": 73111
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.2152494192123413,
      "learning_rate": 1.1513079178975816e-07,
      "loss": 1.9483,
      "step": 73112
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.948179304599762,
      "learning_rate": 1.150685017878994e-07,
      "loss": 2.2634,
      "step": 73113
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.175049901008606,
      "learning_rate": 1.1500622854368481e-07,
      "loss": 2.5102,
      "step": 73114
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1111458539962769,
      "learning_rate": 1.1494397205721875e-07,
      "loss": 2.2858,
      "step": 73115
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0673178434371948,
      "learning_rate": 1.1488173232860777e-07,
      "loss": 2.1015,
      "step": 73116
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1211738586425781,
      "learning_rate": 1.1481950935795738e-07,
      "loss": 2.1902,
      "step": 73117
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0461041927337646,
      "learning_rate": 1.1475730314537304e-07,
      "loss": 2.2175,
      "step": 73118
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1692568063735962,
      "learning_rate": 1.1469511369095909e-07,
      "loss": 2.1814,
      "step": 73119
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1456655263900757,
      "learning_rate": 1.1463294099482214e-07,
      "loss": 2.281,
      "step": 73120
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0715081691741943,
      "learning_rate": 1.1457078505706653e-07,
      "loss": 2.249,
      "step": 73121
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.118369221687317,
      "learning_rate": 1.1450864587779886e-07,
      "loss": 2.3132,
      "step": 73122
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.9968793988227844,
      "learning_rate": 1.1444652345712348e-07,
      "loss": 2.4072,
      "step": 73123
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1583231687545776,
      "learning_rate": 1.1438441779514586e-07,
      "loss": 2.1818,
      "step": 73124
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0562512874603271,
      "learning_rate": 1.1432232889197259e-07,
      "loss": 2.344,
      "step": 73125
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.046951174736023,
      "learning_rate": 1.1426025674770691e-07,
      "loss": 2.4183,
      "step": 73126
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0852049589157104,
      "learning_rate": 1.141982013624554e-07,
      "loss": 2.2633,
      "step": 73127
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.589113712310791,
      "learning_rate": 1.1413616273632355e-07,
      "loss": 2.1901,
      "step": 73128
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0552177429199219,
      "learning_rate": 1.1407414086941571e-07,
      "loss": 2.2773,
      "step": 73129
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0725458860397339,
      "learning_rate": 1.1401213576183623e-07,
      "loss": 2.2687,
      "step": 73130
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1037064790725708,
      "learning_rate": 1.139501474136917e-07,
      "loss": 2.3297,
      "step": 73131
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1859407424926758,
      "learning_rate": 1.1388817582508649e-07,
      "loss": 2.1611,
      "step": 73132
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1424050331115723,
      "learning_rate": 1.1382622099612605e-07,
      "loss": 2.4007,
      "step": 73133
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.9677356481552124,
      "learning_rate": 1.1376428292691589e-07,
      "loss": 2.4985,
      "step": 73134
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.2126764059066772,
      "learning_rate": 1.1370236161755921e-07,
      "loss": 2.1683,
      "step": 73135
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0984899997711182,
      "learning_rate": 1.1364045706816263e-07,
      "loss": 2.3817,
      "step": 73136
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0663607120513916,
      "learning_rate": 1.1357856927883049e-07,
      "loss": 2.2434,
      "step": 73137
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0569252967834473,
      "learning_rate": 1.1351669824966716e-07,
      "loss": 2.5251,
      "step": 73138
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1876277923583984,
      "learning_rate": 1.1345484398077922e-07,
      "loss": 2.142,
      "step": 73139
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.091979742050171,
      "learning_rate": 1.1339300647226991e-07,
      "loss": 2.1302,
      "step": 73140
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0737310647964478,
      "learning_rate": 1.1333118572424473e-07,
      "loss": 2.3308,
      "step": 73141
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0612481832504272,
      "learning_rate": 1.1326938173680802e-07,
      "loss": 2.0632,
      "step": 73142
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0271679162979126,
      "learning_rate": 1.1320759451006524e-07,
      "loss": 2.3305,
      "step": 73143
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1849119663238525,
      "learning_rate": 1.1314582404412189e-07,
      "loss": 2.3645,
      "step": 73144
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.9806239604949951,
      "learning_rate": 1.1308407033908008e-07,
      "loss": 2.3572,
      "step": 73145
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.121752381324768,
      "learning_rate": 1.1302233339504642e-07,
      "loss": 2.185,
      "step": 73146
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0550601482391357,
      "learning_rate": 1.1296061321212525e-07,
      "loss": 2.2651,
      "step": 73147
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1649197340011597,
      "learning_rate": 1.1289890979042096e-07,
      "loss": 2.1438,
      "step": 73148
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0930067300796509,
      "learning_rate": 1.1283722313003898e-07,
      "loss": 2.3097,
      "step": 73149
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1264609098434448,
      "learning_rate": 1.1277555323108258e-07,
      "loss": 2.2033,
      "step": 73150
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1889351606369019,
      "learning_rate": 1.1271390009365723e-07,
      "loss": 2.3902,
      "step": 73151
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1388875246047974,
      "learning_rate": 1.1265226371786619e-07,
      "loss": 2.408,
      "step": 73152
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1230064630508423,
      "learning_rate": 1.1259064410381604e-07,
      "loss": 2.5352,
      "step": 73153
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0595725774765015,
      "learning_rate": 1.1252904125161002e-07,
      "loss": 2.4616,
      "step": 73154
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0545496940612793,
      "learning_rate": 1.1246745516135249e-07,
      "loss": 2.2294,
      "step": 73155
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.235039472579956,
      "learning_rate": 1.1240588583314893e-07,
      "loss": 2.2752,
      "step": 73156
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1980608701705933,
      "learning_rate": 1.1234433326710148e-07,
      "loss": 2.2743,
      "step": 73157
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1238900423049927,
      "learning_rate": 1.1228279746331672e-07,
      "loss": 2.291,
      "step": 73158
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.136074185371399,
      "learning_rate": 1.122212784218979e-07,
      "loss": 2.3455,
      "step": 73159
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0357913970947266,
      "learning_rate": 1.1215977614294826e-07,
      "loss": 2.3808,
      "step": 73160
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0313234329223633,
      "learning_rate": 1.1209829062657441e-07,
      "loss": 2.3916,
      "step": 73161
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0551084280014038,
      "learning_rate": 1.1203682187287956e-07,
      "loss": 2.2113,
      "step": 73162
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.107269525527954,
      "learning_rate": 1.119753698819681e-07,
      "loss": 2.4563,
      "step": 73163
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0967241525650024,
      "learning_rate": 1.119139346539433e-07,
      "loss": 2.4216,
      "step": 73164
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.065548300743103,
      "learning_rate": 1.1185251618890946e-07,
      "loss": 2.3399,
      "step": 73165
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.986007571220398,
      "learning_rate": 1.1179111448697211e-07,
      "loss": 2.2871,
      "step": 73166
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0784106254577637,
      "learning_rate": 1.1172972954823447e-07,
      "loss": 2.3868,
      "step": 73167
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0664031505584717,
      "learning_rate": 1.1166836137280091e-07,
      "loss": 2.4111,
      "step": 73168
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0573776960372925,
      "learning_rate": 1.1160700996077356e-07,
      "loss": 2.4152,
      "step": 73169
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.072432518005371,
      "learning_rate": 1.1154567531226012e-07,
      "loss": 2.2747,
      "step": 73170
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.182898998260498,
      "learning_rate": 1.1148435742736053e-07,
      "loss": 2.2953,
      "step": 73171
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0040874481201172,
      "learning_rate": 1.1142305630618134e-07,
      "loss": 2.2175,
      "step": 73172
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0999544858932495,
      "learning_rate": 1.113617719488258e-07,
      "loss": 2.173,
      "step": 73173
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.118044376373291,
      "learning_rate": 1.113005043553983e-07,
      "loss": 2.4249,
      "step": 73174
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0821053981781006,
      "learning_rate": 1.1123925352600096e-07,
      "loss": 2.3512,
      "step": 73175
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1707416772842407,
      "learning_rate": 1.1117801946073925e-07,
      "loss": 2.2355,
      "step": 73176
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0746372938156128,
      "learning_rate": 1.1111680215971642e-07,
      "loss": 2.3873,
      "step": 73177
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.052925705909729,
      "learning_rate": 1.1105560162303685e-07,
      "loss": 2.2414,
      "step": 73178
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.9797142148017883,
      "learning_rate": 1.1099441785080378e-07,
      "loss": 2.3002,
      "step": 73179
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.141554832458496,
      "learning_rate": 1.1093325084312156e-07,
      "loss": 2.211,
      "step": 73180
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0979217290878296,
      "learning_rate": 1.1087210060009234e-07,
      "loss": 2.4647,
      "step": 73181
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0184922218322754,
      "learning_rate": 1.108109671218216e-07,
      "loss": 2.4366,
      "step": 73182
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.150691270828247,
      "learning_rate": 1.1074985040841146e-07,
      "loss": 2.3798,
      "step": 73183
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.2473846673965454,
      "learning_rate": 1.106887504599663e-07,
      "loss": 2.1893,
      "step": 73184
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0195213556289673,
      "learning_rate": 1.1062766727658936e-07,
      "loss": 2.3741,
      "step": 73185
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.2850576639175415,
      "learning_rate": 1.10566600858385e-07,
      "loss": 2.0742,
      "step": 73186
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.02949857711792,
      "learning_rate": 1.1050555120545648e-07,
      "loss": 2.0916,
      "step": 73187
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.9729459881782532,
      "learning_rate": 1.1044451831790592e-07,
      "loss": 2.1064,
      "step": 73188
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1883620023727417,
      "learning_rate": 1.1038350219583882e-07,
      "loss": 2.3222,
      "step": 73189
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1281174421310425,
      "learning_rate": 1.103225028393573e-07,
      "loss": 2.1466,
      "step": 73190
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1168925762176514,
      "learning_rate": 1.1026152024856574e-07,
      "loss": 2.224,
      "step": 73191
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1483052968978882,
      "learning_rate": 1.1020055442356626e-07,
      "loss": 2.4343,
      "step": 73192
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0164552927017212,
      "learning_rate": 1.101396053644621e-07,
      "loss": 2.3974,
      "step": 73193
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1716610193252563,
      "learning_rate": 1.1007867307135877e-07,
      "loss": 2.1011,
      "step": 73194
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.2098652124404907,
      "learning_rate": 1.1001775754435728e-07,
      "loss": 2.5033,
      "step": 73195
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0040984153747559,
      "learning_rate": 1.0995685878356199e-07,
      "loss": 2.2824,
      "step": 73196
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.12765371799469,
      "learning_rate": 1.0989597678907726e-07,
      "loss": 2.4008,
      "step": 73197
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0734277963638306,
      "learning_rate": 1.0983511156100302e-07,
      "loss": 2.5016,
      "step": 73198
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0126985311508179,
      "learning_rate": 1.0977426309944584e-07,
      "loss": 2.4959,
      "step": 73199
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1897376775741577,
      "learning_rate": 1.0971343140450563e-07,
      "loss": 2.1975,
      "step": 73200
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0232418775558472,
      "learning_rate": 1.09652616476289e-07,
      "loss": 2.2775,
      "step": 73201
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0818443298339844,
      "learning_rate": 1.0959181831489585e-07,
      "loss": 2.3846,
      "step": 73202
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0402476787567139,
      "learning_rate": 1.0953103692043276e-07,
      "loss": 2.4099,
      "step": 73203
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.99591064453125,
      "learning_rate": 1.0947027229299856e-07,
      "loss": 2.2209,
      "step": 73204
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0695414543151855,
      "learning_rate": 1.0940952443269981e-07,
      "loss": 2.1568,
      "step": 73205
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.067825436592102,
      "learning_rate": 1.0934879333963755e-07,
      "loss": 2.3987,
      "step": 73206
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1347142457962036,
      "learning_rate": 1.0928807901391614e-07,
      "loss": 2.3161,
      "step": 73207
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.9720140099525452,
      "learning_rate": 1.092273814556366e-07,
      "loss": 2.2942,
      "step": 73208
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0896861553192139,
      "learning_rate": 1.0916670066490442e-07,
      "loss": 2.329,
      "step": 73209
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.9955707788467407,
      "learning_rate": 1.0910603664182062e-07,
      "loss": 2.2751,
      "step": 73210
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1542532444000244,
      "learning_rate": 1.0904538938648846e-07,
      "loss": 2.4436,
      "step": 73211
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.044870138168335,
      "learning_rate": 1.0898475889901005e-07,
      "loss": 2.2791,
      "step": 73212
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0792194604873657,
      "learning_rate": 1.0892414517948868e-07,
      "loss": 2.0903,
      "step": 73213
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0432894229888916,
      "learning_rate": 1.0886354822802759e-07,
      "loss": 2.304,
      "step": 73214
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0168867111206055,
      "learning_rate": 1.0880296804473001e-07,
      "loss": 2.554,
      "step": 73215
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1996906995773315,
      "learning_rate": 1.0874240462969588e-07,
      "loss": 2.2081,
      "step": 73216
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.994961678981781,
      "learning_rate": 1.0868185798303177e-07,
      "loss": 2.3603,
      "step": 73217
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.019276738166809,
      "learning_rate": 1.0862132810483761e-07,
      "loss": 2.3038,
      "step": 73218
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0136399269104004,
      "learning_rate": 1.0856081499521665e-07,
      "loss": 2.2862,
      "step": 73219
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.062099814414978,
      "learning_rate": 1.0850031865427213e-07,
      "loss": 2.4383,
      "step": 73220
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0563335418701172,
      "learning_rate": 1.084398390821051e-07,
      "loss": 2.4474,
      "step": 73221
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.195090413093567,
      "learning_rate": 1.083793762788199e-07,
      "loss": 2.3846,
      "step": 73222
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0295640230178833,
      "learning_rate": 1.0831893024451756e-07,
      "loss": 2.1164,
      "step": 73223
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1176687479019165,
      "learning_rate": 1.0825850097930135e-07,
      "loss": 2.4037,
      "step": 73224
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.068963885307312,
      "learning_rate": 1.081980884832734e-07,
      "loss": 2.4376,
      "step": 73225
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1096893548965454,
      "learning_rate": 1.0813769275653696e-07,
      "loss": 2.4778,
      "step": 73226
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0970853567123413,
      "learning_rate": 1.0807731379919306e-07,
      "loss": 2.3772,
      "step": 73227
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1492347717285156,
      "learning_rate": 1.0801695161134496e-07,
      "loss": 2.4077,
      "step": 73228
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1139194965362549,
      "learning_rate": 1.0795660619309478e-07,
      "loss": 2.3717,
      "step": 73229
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0918911695480347,
      "learning_rate": 1.0789627754454468e-07,
      "loss": 2.1644,
      "step": 73230
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1053509712219238,
      "learning_rate": 1.078359656657968e-07,
      "loss": 2.4111,
      "step": 73231
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0481843948364258,
      "learning_rate": 1.0777567055695437e-07,
      "loss": 2.4243,
      "step": 73232
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0658739805221558,
      "learning_rate": 1.0771539221811844e-07,
      "loss": 2.1665,
      "step": 73233
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0253686904907227,
      "learning_rate": 1.0765513064939226e-07,
      "loss": 2.4037,
      "step": 73234
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1287730932235718,
      "learning_rate": 1.0759488585087574e-07,
      "loss": 2.1358,
      "step": 73235
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0675898790359497,
      "learning_rate": 1.0753465782267436e-07,
      "loss": 2.2136,
      "step": 73236
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1411877870559692,
      "learning_rate": 1.0747444656488804e-07,
      "loss": 2.1516,
      "step": 73237
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1192920207977295,
      "learning_rate": 1.0741425207761891e-07,
      "loss": 2.0666,
      "step": 73238
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.171242356300354,
      "learning_rate": 1.0735407436096911e-07,
      "loss": 2.2823,
      "step": 73239
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.3097792863845825,
      "learning_rate": 1.0729391341504192e-07,
      "loss": 2.3711,
      "step": 73240
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0322649478912354,
      "learning_rate": 1.0723376923993722e-07,
      "loss": 2.1125,
      "step": 73241
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.9716355800628662,
      "learning_rate": 1.071736418357594e-07,
      "loss": 2.2291,
      "step": 73242
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.9713141918182373,
      "learning_rate": 1.0711353120260726e-07,
      "loss": 2.4922,
      "step": 73243
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.045270562171936,
      "learning_rate": 1.0705343734058627e-07,
      "loss": 2.3761,
      "step": 73244
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1217639446258545,
      "learning_rate": 1.0699336024979634e-07,
      "loss": 2.3542,
      "step": 73245
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.9848394393920898,
      "learning_rate": 1.0693329993033851e-07,
      "loss": 2.5023,
      "step": 73246
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.077202558517456,
      "learning_rate": 1.0687325638231716e-07,
      "loss": 2.282,
      "step": 73247
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.001823902130127,
      "learning_rate": 1.0681322960583107e-07,
      "loss": 2.2263,
      "step": 73248
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.977189838886261,
      "learning_rate": 1.0675321960098461e-07,
      "loss": 2.1753,
      "step": 73249
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0878105163574219,
      "learning_rate": 1.0669322636787882e-07,
      "loss": 2.2616,
      "step": 73250
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.2246037721633911,
      "learning_rate": 1.0663324990661362e-07,
      "loss": 2.2937,
      "step": 73251
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0522884130477905,
      "learning_rate": 1.0657329021729335e-07,
      "loss": 2.331,
      "step": 73252
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.168807864189148,
      "learning_rate": 1.0651334730001794e-07,
      "loss": 2.2908,
      "step": 73253
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.9376295208930969,
      "learning_rate": 1.0645342115488843e-07,
      "loss": 2.3119,
      "step": 73254
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0790904760360718,
      "learning_rate": 1.0639351178200919e-07,
      "loss": 2.2167,
      "step": 73255
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.067362904548645,
      "learning_rate": 1.0633361918147789e-07,
      "loss": 2.3105,
      "step": 73256
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0664843320846558,
      "learning_rate": 1.0627374335340002e-07,
      "loss": 2.6082,
      "step": 73257
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.148525357246399,
      "learning_rate": 1.0621388429787439e-07,
      "loss": 2.4387,
      "step": 73258
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0400563478469849,
      "learning_rate": 1.0615404201500423e-07,
      "loss": 2.2815,
      "step": 73259
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1321239471435547,
      "learning_rate": 1.0609421650488839e-07,
      "loss": 2.1131,
      "step": 73260
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.9843754768371582,
      "learning_rate": 1.0603440776763118e-07,
      "loss": 1.965,
      "step": 73261
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0936394929885864,
      "learning_rate": 1.0597461580333257e-07,
      "loss": 2.2866,
      "step": 73262
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0993727445602417,
      "learning_rate": 1.0591484061209467e-07,
      "loss": 2.5372,
      "step": 73263
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0610716342926025,
      "learning_rate": 1.058550821940174e-07,
      "loss": 2.2177,
      "step": 73264
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1719388961791992,
      "learning_rate": 1.0579534054920404e-07,
      "loss": 2.2251,
      "step": 73265
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1405586004257202,
      "learning_rate": 1.0573561567775448e-07,
      "loss": 2.2752,
      "step": 73266
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1123157739639282,
      "learning_rate": 1.0567590757976976e-07,
      "loss": 2.059,
      "step": 73267
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.027060866355896,
      "learning_rate": 1.05616216255352e-07,
      "loss": 2.3087,
      "step": 73268
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1005891561508179,
      "learning_rate": 1.0555654170460228e-07,
      "loss": 2.0668,
      "step": 73269
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1066845655441284,
      "learning_rate": 1.0549688392762048e-07,
      "loss": 2.2858,
      "step": 73270
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1555864810943604,
      "learning_rate": 1.0543724292450986e-07,
      "loss": 2.2963,
      "step": 73271
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.14760422706604,
      "learning_rate": 1.0537761869536922e-07,
      "loss": 2.3219,
      "step": 73272
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1515710353851318,
      "learning_rate": 1.0531801124030295e-07,
      "loss": 2.2682,
      "step": 73273
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1187844276428223,
      "learning_rate": 1.0525842055940871e-07,
      "loss": 2.3593,
      "step": 73274
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0398629903793335,
      "learning_rate": 1.0519884665278868e-07,
      "loss": 2.2351,
      "step": 73275
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1138248443603516,
      "learning_rate": 1.0513928952054386e-07,
      "loss": 2.4087,
      "step": 73276
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.103189468383789,
      "learning_rate": 1.0507974916277641e-07,
      "loss": 2.2297,
      "step": 73277
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.104307770729065,
      "learning_rate": 1.0502022557958513e-07,
      "loss": 2.5143,
      "step": 73278
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.122991681098938,
      "learning_rate": 1.0496071877107216e-07,
      "loss": 2.4148,
      "step": 73279
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.9742617011070251,
      "learning_rate": 1.0490122873733854e-07,
      "loss": 2.3344,
      "step": 73280
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0780141353607178,
      "learning_rate": 1.0484175547848529e-07,
      "loss": 2.5097,
      "step": 73281
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0610748529434204,
      "learning_rate": 1.0478229899461235e-07,
      "loss": 2.2476,
      "step": 73282
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.233980655670166,
      "learning_rate": 1.047228592858196e-07,
      "loss": 2.0625,
      "step": 73283
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0831784009933472,
      "learning_rate": 1.0466343635221032e-07,
      "loss": 2.4647,
      "step": 73284
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.049618124961853,
      "learning_rate": 1.0460403019388443e-07,
      "loss": 2.2344,
      "step": 73285
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0184258222579956,
      "learning_rate": 1.0454464081094184e-07,
      "loss": 2.5069,
      "step": 73286
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0223315954208374,
      "learning_rate": 1.0448526820348359e-07,
      "loss": 2.4464,
      "step": 73287
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.051218032836914,
      "learning_rate": 1.044259123716107e-07,
      "loss": 2.2598,
      "step": 73288
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0157746076583862,
      "learning_rate": 1.043665733154231e-07,
      "loss": 2.3297,
      "step": 73289
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0740015506744385,
      "learning_rate": 1.0430725103502181e-07,
      "loss": 2.2112,
      "step": 73290
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.065794825553894,
      "learning_rate": 1.0424794553050787e-07,
      "loss": 2.5304,
      "step": 73291
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.9906671047210693,
      "learning_rate": 1.041886568019812e-07,
      "loss": 2.2977,
      "step": 73292
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1519365310668945,
      "learning_rate": 1.0412938484954171e-07,
      "loss": 2.5718,
      "step": 73293
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0991406440734863,
      "learning_rate": 1.0407012967329045e-07,
      "loss": 2.2122,
      "step": 73294
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.076942801475525,
      "learning_rate": 1.0401089127332842e-07,
      "loss": 2.3396,
      "step": 73295
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1102725267410278,
      "learning_rate": 1.0395166964975666e-07,
      "loss": 2.2849,
      "step": 73296
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.3408622741699219,
      "learning_rate": 1.038924648026729e-07,
      "loss": 2.1335,
      "step": 73297
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.070043683052063,
      "learning_rate": 1.0383327673218036e-07,
      "loss": 2.3029,
      "step": 73298
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.2212343215942383,
      "learning_rate": 1.0377410543837785e-07,
      "loss": 2.3591,
      "step": 73299
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1271165609359741,
      "learning_rate": 1.0371495092136529e-07,
      "loss": 2.2771,
      "step": 73300
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0507636070251465,
      "learning_rate": 1.0365581318124485e-07,
      "loss": 2.2803,
      "step": 73301
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.096756935119629,
      "learning_rate": 1.035966922181153e-07,
      "loss": 2.0349,
      "step": 73302
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1946344375610352,
      "learning_rate": 1.0353758803207658e-07,
      "loss": 2.4382,
      "step": 73303
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0330164432525635,
      "learning_rate": 1.0347850062323084e-07,
      "loss": 2.3002,
      "step": 73304
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.996363639831543,
      "learning_rate": 1.0341942999167576e-07,
      "loss": 2.3381,
      "step": 73305
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.2954659461975098,
      "learning_rate": 1.0336037613751238e-07,
      "loss": 2.3803,
      "step": 73306
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0534517765045166,
      "learning_rate": 1.0330133906084172e-07,
      "loss": 2.3707,
      "step": 73307
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1043174266815186,
      "learning_rate": 1.0324231876176261e-07,
      "loss": 2.3175,
      "step": 73308
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.119526743888855,
      "learning_rate": 1.0318331524037606e-07,
      "loss": 2.3911,
      "step": 73309
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.177917242050171,
      "learning_rate": 1.03124328496782e-07,
      "loss": 2.0352,
      "step": 73310
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1022578477859497,
      "learning_rate": 1.0306535853108035e-07,
      "loss": 2.2476,
      "step": 73311
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.3069630861282349,
      "learning_rate": 1.0300640534336991e-07,
      "loss": 2.3409,
      "step": 73312
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.053180456161499,
      "learning_rate": 1.0294746893375174e-07,
      "loss": 2.2183,
      "step": 73313
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1793686151504517,
      "learning_rate": 1.0288854930232572e-07,
      "loss": 2.1689,
      "step": 73314
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.053393840789795,
      "learning_rate": 1.0282964644919292e-07,
      "loss": 2.3816,
      "step": 73315
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1312296390533447,
      "learning_rate": 1.02770760374451e-07,
      "loss": 2.2367,
      "step": 73316
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1693236827850342,
      "learning_rate": 1.027118910781999e-07,
      "loss": 2.2909,
      "step": 73317
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0445706844329834,
      "learning_rate": 1.0265303856054065e-07,
      "loss": 2.17,
      "step": 73318
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.2052640914916992,
      "learning_rate": 1.0259420282157318e-07,
      "loss": 2.4859,
      "step": 73319
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1442055702209473,
      "learning_rate": 1.0253538386139628e-07,
      "loss": 2.2122,
      "step": 73320
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.090901255607605,
      "learning_rate": 1.0247658168010987e-07,
      "loss": 2.4077,
      "step": 73321
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0687987804412842,
      "learning_rate": 1.024177962778139e-07,
      "loss": 2.6127,
      "step": 73322
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.181057095527649,
      "learning_rate": 1.0235902765460825e-07,
      "loss": 2.3699,
      "step": 73323
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1134592294692993,
      "learning_rate": 1.0230027581059176e-07,
      "loss": 2.3481,
      "step": 73324
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1620880365371704,
      "learning_rate": 1.0224154074586435e-07,
      "loss": 2.1861,
      "step": 73325
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0914151668548584,
      "learning_rate": 1.0218282246052591e-07,
      "loss": 2.4007,
      "step": 73326
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.2014384269714355,
      "learning_rate": 1.0212412095467639e-07,
      "loss": 2.3495,
      "step": 73327
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0347511768341064,
      "learning_rate": 1.0206543622841347e-07,
      "loss": 2.4603,
      "step": 73328
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1225218772888184,
      "learning_rate": 1.0200676828183931e-07,
      "loss": 2.3717,
      "step": 73329
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.10797917842865,
      "learning_rate": 1.0194811711505048e-07,
      "loss": 2.2139,
      "step": 73330
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0601187944412231,
      "learning_rate": 1.0188948272814914e-07,
      "loss": 2.253,
      "step": 73331
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0508266687393188,
      "learning_rate": 1.0183086512123297e-07,
      "loss": 2.449,
      "step": 73332
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0604844093322754,
      "learning_rate": 1.017722642944019e-07,
      "loss": 2.4099,
      "step": 73333
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0710939168930054,
      "learning_rate": 1.0171368024775474e-07,
      "loss": 2.2222,
      "step": 73334
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.2037599086761475,
      "learning_rate": 1.016551129813914e-07,
      "loss": 2.3774,
      "step": 73335
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0394481420516968,
      "learning_rate": 1.0159656249541072e-07,
      "loss": 2.253,
      "step": 73336
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.022667646408081,
      "learning_rate": 1.015380287899126e-07,
      "loss": 2.2125,
      "step": 73337
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0782196521759033,
      "learning_rate": 1.0147951186499582e-07,
      "loss": 2.3225,
      "step": 73338
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.234881043434143,
      "learning_rate": 1.0142101172075924e-07,
      "loss": 2.2233,
      "step": 73339
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.133420467376709,
      "learning_rate": 1.0136252835730387e-07,
      "loss": 2.47,
      "step": 73340
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1012558937072754,
      "learning_rate": 1.013040617747263e-07,
      "loss": 2.2513,
      "step": 73341
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.9902939796447754,
      "learning_rate": 1.0124561197312755e-07,
      "loss": 2.1804,
      "step": 73342
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.126979947090149,
      "learning_rate": 1.0118717895260532e-07,
      "loss": 2.4617,
      "step": 73343
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.041489839553833,
      "learning_rate": 1.0112876271325955e-07,
      "loss": 2.2577,
      "step": 73344
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.2638014554977417,
      "learning_rate": 1.0107036325518904e-07,
      "loss": 2.3637,
      "step": 73345
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.088218092918396,
      "learning_rate": 1.010119805784926e-07,
      "loss": 2.3141,
      "step": 73346
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.17750084400177,
      "learning_rate": 1.0095361468326903e-07,
      "loss": 2.3151,
      "step": 73347
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0820294618606567,
      "learning_rate": 1.0089526556961826e-07,
      "loss": 2.3565,
      "step": 73348
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0416545867919922,
      "learning_rate": 1.0083693323763799e-07,
      "loss": 2.3354,
      "step": 73349
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.096570372581482,
      "learning_rate": 1.0077861768742814e-07,
      "loss": 2.493,
      "step": 73350
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.066178798675537,
      "learning_rate": 1.0072031891908641e-07,
      "loss": 2.2716,
      "step": 73351
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0179417133331299,
      "learning_rate": 1.0066203693271381e-07,
      "loss": 2.3452,
      "step": 73352
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.045590877532959,
      "learning_rate": 1.0060377172840585e-07,
      "loss": 2.5268,
      "step": 73353
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.048331618309021,
      "learning_rate": 1.0054552330626466e-07,
      "loss": 2.5039,
      "step": 73354
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.9889081716537476,
      "learning_rate": 1.0048729166638683e-07,
      "loss": 2.3252,
      "step": 73355
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1445677280426025,
      "learning_rate": 1.0042907680887226e-07,
      "loss": 2.2525,
      "step": 73356
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0911552906036377,
      "learning_rate": 1.0037087873381868e-07,
      "loss": 2.5452,
      "step": 73357
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.072296142578125,
      "learning_rate": 1.0031269744132488e-07,
      "loss": 2.4346,
      "step": 73358
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0708296298980713,
      "learning_rate": 1.0025453293148968e-07,
      "loss": 2.4449,
      "step": 73359
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.115207552909851,
      "learning_rate": 1.0019638520441188e-07,
      "loss": 2.4398,
      "step": 73360
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0204145908355713,
      "learning_rate": 1.001382542601903e-07,
      "loss": 2.3137,
      "step": 73361
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1018459796905518,
      "learning_rate": 1.0008014009892263e-07,
      "loss": 2.2663,
      "step": 73362
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.9891292452812195,
      "learning_rate": 1.0002204272070881e-07,
      "loss": 2.1921,
      "step": 73363
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.044047236442566,
      "learning_rate": 9.99639621256454e-08,
      "loss": 2.2954,
      "step": 73364
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.9669185876846313,
      "learning_rate": 9.990589831383346e-08,
      "loss": 2.314,
      "step": 73365
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.2487307786941528,
      "learning_rate": 9.984785128536844e-08,
      "loss": 2.3102,
      "step": 73366
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0193870067596436,
      "learning_rate": 9.978982104035029e-08,
      "loss": 2.397,
      "step": 73367
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.075659990310669,
      "learning_rate": 9.97318075788778e-08,
      "loss": 2.3425,
      "step": 73368
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0641002655029297,
      "learning_rate": 9.967381090104755e-08,
      "loss": 2.2285,
      "step": 73369
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1277450323104858,
      "learning_rate": 9.961583100696059e-08,
      "loss": 2.2639,
      "step": 73370
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5759515762329102,
      "learning_rate": 9.955786789671351e-08,
      "loss": 2.1182,
      "step": 73371
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.17942476272583,
      "learning_rate": 9.9499921570404e-08,
      "loss": 2.2924,
      "step": 73372
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0929824113845825,
      "learning_rate": 9.944199202813198e-08,
      "loss": 2.0208,
      "step": 73373
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0043011903762817,
      "learning_rate": 9.938407926999294e-08,
      "loss": 2.2629,
      "step": 73374
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0821034908294678,
      "learning_rate": 9.932618329608901e-08,
      "loss": 2.15,
      "step": 73375
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.057990550994873,
      "learning_rate": 9.926830410651456e-08,
      "loss": 2.2974,
      "step": 73376
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.4819297790527344,
      "learning_rate": 9.921044170137062e-08,
      "loss": 2.2836,
      "step": 73377
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0685052871704102,
      "learning_rate": 9.915259608075267e-08,
      "loss": 2.317,
      "step": 73378
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0608218908309937,
      "learning_rate": 9.909476724476174e-08,
      "loss": 2.2922,
      "step": 73379
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.122139573097229,
      "learning_rate": 9.903695519349221e-08,
      "loss": 2.3365,
      "step": 73380
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1715384721755981,
      "learning_rate": 9.89791599270462e-08,
      "loss": 2.2334,
      "step": 73381
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.175641655921936,
      "learning_rate": 9.892138144551811e-08,
      "loss": 2.3178,
      "step": 73382
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1011815071105957,
      "learning_rate": 9.886361974900893e-08,
      "loss": 2.2906,
      "step": 73383
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1050976514816284,
      "learning_rate": 9.880587483761305e-08,
      "loss": 2.2365,
      "step": 73384
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1160143613815308,
      "learning_rate": 9.874814671143151e-08,
      "loss": 2.4797,
      "step": 73385
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0957087278366089,
      "learning_rate": 9.869043537056088e-08,
      "loss": 2.2616,
      "step": 73386
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1455057859420776,
      "learning_rate": 9.863274081509888e-08,
      "loss": 2.4746,
      "step": 73387
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0100845098495483,
      "learning_rate": 9.857506304514319e-08,
      "loss": 2.5061,
      "step": 73388
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0991719961166382,
      "learning_rate": 9.851740206079264e-08,
      "loss": 2.4263,
      "step": 73389
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0472800731658936,
      "learning_rate": 9.845975786214379e-08,
      "loss": 2.251,
      "step": 73390
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.2144007682800293,
      "learning_rate": 9.840213044929548e-08,
      "loss": 2.3238,
      "step": 73391
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5270297527313232,
      "learning_rate": 9.834451982234539e-08,
      "loss": 2.2525,
      "step": 73392
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1275811195373535,
      "learning_rate": 9.8286925981389e-08,
      "loss": 2.4274,
      "step": 73393
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0926851034164429,
      "learning_rate": 9.822934892652625e-08,
      "loss": 2.266,
      "step": 73394
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1428635120391846,
      "learning_rate": 9.817178865785482e-08,
      "loss": 2.4777,
      "step": 73395
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1695098876953125,
      "learning_rate": 9.81142451754702e-08,
      "loss": 2.1952,
      "step": 73396
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.062807321548462,
      "learning_rate": 9.805671847947229e-08,
      "loss": 2.3829,
      "step": 73397
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.9898760318756104,
      "learning_rate": 9.79992085699577e-08,
      "loss": 2.3102,
      "step": 73398
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1194968223571777,
      "learning_rate": 9.794171544702192e-08,
      "loss": 2.3661,
      "step": 73399
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1184190511703491,
      "learning_rate": 9.788423911076594e-08,
      "loss": 2.4463,
      "step": 73400
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.9577704071998596,
      "learning_rate": 9.782677956128528e-08,
      "loss": 2.42,
      "step": 73401
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1129175424575806,
      "learning_rate": 9.776933679867762e-08,
      "loss": 2.2281,
      "step": 73402
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1738437414169312,
      "learning_rate": 9.771191082303955e-08,
      "loss": 2.1941,
      "step": 73403
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1499801874160767,
      "learning_rate": 9.765450163446988e-08,
      "loss": 2.271,
      "step": 73404
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0654469728469849,
      "learning_rate": 9.75971092330652e-08,
      "loss": 2.4066,
      "step": 73405
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0757392644882202,
      "learning_rate": 9.753973361892321e-08,
      "loss": 2.2609,
      "step": 73406
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1946607828140259,
      "learning_rate": 9.74823747921405e-08,
      "loss": 2.2224,
      "step": 73407
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0887960195541382,
      "learning_rate": 9.74250327528159e-08,
      "loss": 2.2578,
      "step": 73408
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1716047525405884,
      "learning_rate": 9.736770750104374e-08,
      "loss": 2.3854,
      "step": 73409
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1476150751113892,
      "learning_rate": 9.731039903692285e-08,
      "loss": 2.3375,
      "step": 73410
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6404826641082764,
      "learning_rate": 9.725310736055094e-08,
      "loss": 2.2363,
      "step": 73411
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.153730034828186,
      "learning_rate": 9.719583247202457e-08,
      "loss": 2.4009,
      "step": 73412
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.325316309928894,
      "learning_rate": 9.713857437144037e-08,
      "loss": 2.4516,
      "step": 73413
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.141716480255127,
      "learning_rate": 9.708133305889711e-08,
      "loss": 2.3299,
      "step": 73414
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0218284130096436,
      "learning_rate": 9.702410853448918e-08,
      "loss": 2.0856,
      "step": 73415
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0005955696105957,
      "learning_rate": 9.696690079831539e-08,
      "loss": 2.1038,
      "step": 73416
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.9978483319282532,
      "learning_rate": 9.690970985047233e-08,
      "loss": 2.3303,
      "step": 73417
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1174870729446411,
      "learning_rate": 9.685253569105658e-08,
      "loss": 2.3793,
      "step": 73418
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0863922834396362,
      "learning_rate": 9.679537832016584e-08,
      "loss": 2.3567,
      "step": 73419
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1920417547225952,
      "learning_rate": 9.67382377378967e-08,
      "loss": 2.0958,
      "step": 73420
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1276512145996094,
      "learning_rate": 9.668111394434687e-08,
      "loss": 2.3327,
      "step": 73421
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.9751764535903931,
      "learning_rate": 9.662400693961182e-08,
      "loss": 2.0515,
      "step": 73422
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0565441846847534,
      "learning_rate": 9.656691672378704e-08,
      "loss": 2.5842,
      "step": 73423
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0990759134292603,
      "learning_rate": 9.650984329697354e-08,
      "loss": 2.3163,
      "step": 73424
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0834096670150757,
      "learning_rate": 9.645278665926349e-08,
      "loss": 2.3663,
      "step": 73425
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.08577299118042,
      "learning_rate": 9.63957468107568e-08,
      "loss": 2.2444,
      "step": 73426
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1330610513687134,
      "learning_rate": 9.633872375154896e-08,
      "loss": 2.5257,
      "step": 73427
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0386704206466675,
      "learning_rate": 9.628171748173654e-08,
      "loss": 2.1613,
      "step": 73428
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0667287111282349,
      "learning_rate": 9.622472800141724e-08,
      "loss": 2.2481,
      "step": 73429
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.091081976890564,
      "learning_rate": 9.616775531068545e-08,
      "loss": 2.2559,
      "step": 73430
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.9913907051086426,
      "learning_rate": 9.611079940964107e-08,
      "loss": 2.4536,
      "step": 73431
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1324429512023926,
      "learning_rate": 9.605386029837627e-08,
      "loss": 2.3318,
      "step": 73432
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1126327514648438,
      "learning_rate": 9.599693797699205e-08,
      "loss": 2.5548,
      "step": 73433
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1676827669143677,
      "learning_rate": 9.594003244558281e-08,
      "loss": 2.1642,
      "step": 73434
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1408870220184326,
      "learning_rate": 9.588314370424512e-08,
      "loss": 2.293,
      "step": 73435
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1361157894134521,
      "learning_rate": 9.582627175307446e-08,
      "loss": 2.24,
      "step": 73436
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.072866678237915,
      "learning_rate": 9.576941659216854e-08,
      "loss": 2.2695,
      "step": 73437
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0402041673660278,
      "learning_rate": 9.571257822162283e-08,
      "loss": 2.2169,
      "step": 73438
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1150832176208496,
      "learning_rate": 9.565575664153503e-08,
      "loss": 2.383,
      "step": 73439
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0472819805145264,
      "learning_rate": 9.559895185200063e-08,
      "loss": 2.3603,
      "step": 73440
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1463786363601685,
      "learning_rate": 9.554216385311621e-08,
      "loss": 2.2871,
      "step": 73441
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0787872076034546,
      "learning_rate": 9.548539264497725e-08,
      "loss": 2.2972,
      "step": 73442
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.2230957746505737,
      "learning_rate": 9.542863822768145e-08,
      "loss": 2.3797,
      "step": 73443
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0689325332641602,
      "learning_rate": 9.537190060132318e-08,
      "loss": 2.2819,
      "step": 73444
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1308680772781372,
      "learning_rate": 9.531517976600013e-08,
      "loss": 2.2559,
      "step": 73445
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.9340372085571289,
      "learning_rate": 9.52584757218078e-08,
      "loss": 2.3383,
      "step": 73446
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0795154571533203,
      "learning_rate": 9.520178846884387e-08,
      "loss": 2.2486,
      "step": 73447
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0562063455581665,
      "learning_rate": 9.51451180072005e-08,
      "loss": 2.2415,
      "step": 73448
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.153083324432373,
      "learning_rate": 9.50884643369776e-08,
      "loss": 2.4921,
      "step": 73449
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0138393640518188,
      "learning_rate": 9.503182745826956e-08,
      "loss": 2.3688,
      "step": 73450
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.9850016236305237,
      "learning_rate": 9.497520737117295e-08,
      "loss": 2.2036,
      "step": 73451
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.2623757123947144,
      "learning_rate": 9.491860407578324e-08,
      "loss": 2.4148,
      "step": 73452
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.2116450071334839,
      "learning_rate": 9.486201757219815e-08,
      "loss": 2.251,
      "step": 73453
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.038885235786438,
      "learning_rate": 9.480544786051094e-08,
      "loss": 2.2674,
      "step": 73454
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0089951753616333,
      "learning_rate": 9.474889494081818e-08,
      "loss": 2.38,
      "step": 73455
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0580480098724365,
      "learning_rate": 9.469235881321648e-08,
      "loss": 2.5241,
      "step": 73456
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0884686708450317,
      "learning_rate": 9.46358394778013e-08,
      "loss": 2.2641,
      "step": 73457
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0967415571212769,
      "learning_rate": 9.457933693467037e-08,
      "loss": 2.3355,
      "step": 73458
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.030943512916565,
      "learning_rate": 9.45228511839158e-08,
      "loss": 2.1756,
      "step": 73459
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.069571852684021,
      "learning_rate": 9.446638222563642e-08,
      "loss": 2.3703,
      "step": 73460
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.2078019380569458,
      "learning_rate": 9.440993005992661e-08,
      "loss": 2.4396,
      "step": 73461
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.04031240940094,
      "learning_rate": 9.435349468688182e-08,
      "loss": 2.3338,
      "step": 73462
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0914523601531982,
      "learning_rate": 9.429707610659866e-08,
      "loss": 2.1527,
      "step": 73463
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0291805267333984,
      "learning_rate": 9.42406743191715e-08,
      "loss": 2.2466,
      "step": 73464
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0910980701446533,
      "learning_rate": 9.418428932469692e-08,
      "loss": 2.2213,
      "step": 73465
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1587566137313843,
      "learning_rate": 9.412792112327151e-08,
      "loss": 2.4087,
      "step": 73466
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1549581289291382,
      "learning_rate": 9.407156971498854e-08,
      "loss": 2.4313,
      "step": 73467
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0490678548812866,
      "learning_rate": 9.40152350999457e-08,
      "loss": 2.4475,
      "step": 73468
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.114758014678955,
      "learning_rate": 9.395891727823625e-08,
      "loss": 2.4306,
      "step": 73469
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.03959059715271,
      "learning_rate": 9.390261624995789e-08,
      "loss": 2.1161,
      "step": 73470
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0424772500991821,
      "learning_rate": 9.384633201520388e-08,
      "loss": 2.4974,
      "step": 73471
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.2267487049102783,
      "learning_rate": 9.379006457407191e-08,
      "loss": 2.3165,
      "step": 73472
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0135244131088257,
      "learning_rate": 9.373381392665637e-08,
      "loss": 2.1574,
      "step": 73473
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1060287952423096,
      "learning_rate": 9.367758007305161e-08,
      "loss": 2.1667,
      "step": 73474
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.9623725414276123,
      "learning_rate": 9.362136301335422e-08,
      "loss": 2.5955,
      "step": 73475
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0734468698501587,
      "learning_rate": 9.35651627476597e-08,
      "loss": 2.3372,
      "step": 73476
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0633070468902588,
      "learning_rate": 9.35089792760624e-08,
      "loss": 2.2898,
      "step": 73477
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0740872621536255,
      "learning_rate": 9.34528125986578e-08,
      "loss": 2.3446,
      "step": 73478
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0782675743103027,
      "learning_rate": 9.339666271554137e-08,
      "loss": 2.1286,
      "step": 73479
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0505032539367676,
      "learning_rate": 9.334052962680862e-08,
      "loss": 2.4285,
      "step": 73480
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0588772296905518,
      "learning_rate": 9.328441333255278e-08,
      "loss": 2.2898,
      "step": 73481
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1543316841125488,
      "learning_rate": 9.322831383287157e-08,
      "loss": 2.5638,
      "step": 73482
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0674713850021362,
      "learning_rate": 9.317223112785933e-08,
      "loss": 2.3375,
      "step": 73483
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0657107830047607,
      "learning_rate": 9.311616521761047e-08,
      "loss": 2.5121,
      "step": 73484
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0417829751968384,
      "learning_rate": 9.306011610222044e-08,
      "loss": 2.1846,
      "step": 73485
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0686579942703247,
      "learning_rate": 9.30040837817836e-08,
      "loss": 2.2923,
      "step": 73486
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1480672359466553,
      "learning_rate": 9.294806825639657e-08,
      "loss": 2.3563,
      "step": 73487
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.041244387626648,
      "learning_rate": 9.28920695261537e-08,
      "loss": 2.2785,
      "step": 73488
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.9954004287719727,
      "learning_rate": 9.283608759114826e-08,
      "loss": 2.2834,
      "step": 73489
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1251128911972046,
      "learning_rate": 9.278012245147683e-08,
      "loss": 2.4486,
      "step": 73490
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0028237104415894,
      "learning_rate": 9.272417410723378e-08,
      "loss": 2.4662,
      "step": 73491
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.2384006977081299,
      "learning_rate": 9.266824255851347e-08,
      "loss": 2.5926,
      "step": 73492
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0539658069610596,
      "learning_rate": 9.261232780541252e-08,
      "loss": 2.3694,
      "step": 73493
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1322896480560303,
      "learning_rate": 9.255642984802304e-08,
      "loss": 2.306,
      "step": 73494
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.271591067314148,
      "learning_rate": 9.250054868644276e-08,
      "loss": 2.4414,
      "step": 73495
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.126273512840271,
      "learning_rate": 9.244468432076382e-08,
      "loss": 2.3815,
      "step": 73496
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0427677631378174,
      "learning_rate": 9.238883675108278e-08,
      "loss": 2.2597,
      "step": 73497
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1255478858947754,
      "learning_rate": 9.233300597749184e-08,
      "loss": 2.3596,
      "step": 73498
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0294153690338135,
      "learning_rate": 9.227719200008867e-08,
      "loss": 2.3405,
      "step": 73499
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.9970412850379944,
      "learning_rate": 9.222139481896764e-08,
      "loss": 2.3514,
      "step": 73500
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1067581176757812,
      "learning_rate": 9.21656144342209e-08,
      "loss": 2.4767,
      "step": 73501
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0044556856155396,
      "learning_rate": 9.210985084594392e-08,
      "loss": 2.3169,
      "step": 73502
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1586987972259521,
      "learning_rate": 9.205410405423331e-08,
      "loss": 2.3103,
      "step": 73503
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0007834434509277,
      "learning_rate": 9.199837405918011e-08,
      "loss": 2.2705,
      "step": 73504
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1572767496109009,
      "learning_rate": 9.194266086088199e-08,
      "loss": 2.043,
      "step": 73505
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0000568628311157,
      "learning_rate": 9.188696445943113e-08,
      "loss": 2.3479,
      "step": 73506
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0628762245178223,
      "learning_rate": 9.183128485492299e-08,
      "loss": 2.6164,
      "step": 73507
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0800890922546387,
      "learning_rate": 9.177562204745194e-08,
      "loss": 2.2652,
      "step": 73508
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.9608399271965027,
      "learning_rate": 9.171997603711347e-08,
      "loss": 2.3917,
      "step": 73509
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0875792503356934,
      "learning_rate": 9.166434682399972e-08,
      "loss": 2.4994,
      "step": 73510
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.3062163591384888,
      "learning_rate": 9.160873440820505e-08,
      "loss": 2.2795,
      "step": 73511
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.2175617218017578,
      "learning_rate": 9.155313878982607e-08,
      "loss": 2.1469,
      "step": 73512
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1606611013412476,
      "learning_rate": 9.149755996895604e-08,
      "loss": 2.2729,
      "step": 73513
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1717541217803955,
      "learning_rate": 9.14419979456871e-08,
      "loss": 2.1758,
      "step": 73514
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.038278579711914,
      "learning_rate": 9.138645272011581e-08,
      "loss": 2.1295,
      "step": 73515
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0692816972732544,
      "learning_rate": 9.133092429233548e-08,
      "loss": 2.4416,
      "step": 73516
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0716173648834229,
      "learning_rate": 9.127541266244045e-08,
      "loss": 2.3346,
      "step": 73517
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1549530029296875,
      "learning_rate": 9.121991783052508e-08,
      "loss": 2.2421,
      "step": 73518
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1281325817108154,
      "learning_rate": 9.116443979668266e-08,
      "loss": 2.3558,
      "step": 73519
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1334452629089355,
      "learning_rate": 9.110897856100865e-08,
      "loss": 2.3685,
      "step": 73520
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1123378276824951,
      "learning_rate": 9.105353412359519e-08,
      "loss": 2.3991,
      "step": 73521
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1626262664794922,
      "learning_rate": 9.099810648453778e-08,
      "loss": 2.3101,
      "step": 73522
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.209993839263916,
      "learning_rate": 9.094269564392854e-08,
      "loss": 2.3845,
      "step": 73523
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.029538631439209,
      "learning_rate": 9.08873016018641e-08,
      "loss": 2.051,
      "step": 73524
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0519744157791138,
      "learning_rate": 9.083192435843657e-08,
      "loss": 2.3382,
      "step": 73525
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1613705158233643,
      "learning_rate": 9.077656391374146e-08,
      "loss": 2.3878,
      "step": 73526
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0982584953308105,
      "learning_rate": 9.072122026786978e-08,
      "loss": 2.1924,
      "step": 73527
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.146926999092102,
      "learning_rate": 9.066589342091703e-08,
      "loss": 2.3142,
      "step": 73528
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0549895763397217,
      "learning_rate": 9.061058337297645e-08,
      "loss": 2.3183,
      "step": 73529
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.18217933177948,
      "learning_rate": 9.055529012414354e-08,
      "loss": 2.3952,
      "step": 73530
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.2241774797439575,
      "learning_rate": 9.050001367451045e-08,
      "loss": 2.4915,
      "step": 73531
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1526440382003784,
      "learning_rate": 9.044475402417152e-08,
      "loss": 2.4256,
      "step": 73532
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0788912773132324,
      "learning_rate": 9.038951117321893e-08,
      "loss": 2.3764,
      "step": 73533
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.157641887664795,
      "learning_rate": 9.033428512174813e-08,
      "loss": 2.3484,
      "step": 73534
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1390578746795654,
      "learning_rate": 9.02790758698524e-08,
      "loss": 2.2938,
      "step": 73535
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1174616813659668,
      "learning_rate": 9.0223883417625e-08,
      "loss": 2.4874,
      "step": 73536
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0692722797393799,
      "learning_rate": 9.016870776515918e-08,
      "loss": 2.4814,
      "step": 73537
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0579584836959839,
      "learning_rate": 9.011354891254931e-08,
      "loss": 2.3156,
      "step": 73538
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0293678045272827,
      "learning_rate": 9.005840685988865e-08,
      "loss": 2.4059,
      "step": 73539
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0141583681106567,
      "learning_rate": 9.000328160727045e-08,
      "loss": 2.247,
      "step": 73540
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0345090627670288,
      "learning_rate": 8.994817315478799e-08,
      "loss": 2.4454,
      "step": 73541
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0924148559570312,
      "learning_rate": 8.989308150253562e-08,
      "loss": 2.5365,
      "step": 73542
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1981574296951294,
      "learning_rate": 8.983800665060439e-08,
      "loss": 2.0999,
      "step": 73543
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1738886833190918,
      "learning_rate": 8.978294859909087e-08,
      "loss": 2.4075,
      "step": 73544
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1305207014083862,
      "learning_rate": 8.972790734808611e-08,
      "loss": 2.3191,
      "step": 73545
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1266906261444092,
      "learning_rate": 8.96728828976845e-08,
      "loss": 2.2848,
      "step": 73546
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0347321033477783,
      "learning_rate": 8.961787524797816e-08,
      "loss": 2.3308,
      "step": 73547
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0016244649887085,
      "learning_rate": 8.956288439906146e-08,
      "loss": 2.3869,
      "step": 73548
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1517378091812134,
      "learning_rate": 8.950791035102769e-08,
      "loss": 2.2325,
      "step": 73549
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.036536455154419,
      "learning_rate": 8.945295310396896e-08,
      "loss": 2.3132,
      "step": 73550
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1623938083648682,
      "learning_rate": 8.939801265798076e-08,
      "loss": 2.2644,
      "step": 73551
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0760433673858643,
      "learning_rate": 8.934308901315303e-08,
      "loss": 2.4213,
      "step": 73552
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1904562711715698,
      "learning_rate": 8.928818216958123e-08,
      "loss": 2.4919,
      "step": 73553
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.436354160308838,
      "learning_rate": 8.923329212735753e-08,
      "loss": 2.1295,
      "step": 73554
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1254271268844604,
      "learning_rate": 8.917841888657519e-08,
      "loss": 2.0549,
      "step": 73555
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.057963252067566,
      "learning_rate": 8.912356244732634e-08,
      "loss": 2.2738,
      "step": 73556
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0454193353652954,
      "learning_rate": 8.906872280970536e-08,
      "loss": 2.1075,
      "step": 73557
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0810438394546509,
      "learning_rate": 8.901389997380549e-08,
      "loss": 2.3087,
      "step": 73558
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.9801498055458069,
      "learning_rate": 8.89590939397178e-08,
      "loss": 2.1629,
      "step": 73559
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.4281656742095947,
      "learning_rate": 8.890430470753664e-08,
      "loss": 2.2233,
      "step": 73560
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1179382801055908,
      "learning_rate": 8.884953227735415e-08,
      "loss": 2.0857,
      "step": 73561
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.9693081974983215,
      "learning_rate": 8.879477664926361e-08,
      "loss": 2.5925,
      "step": 73562
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1963715553283691,
      "learning_rate": 8.874003782335827e-08,
      "loss": 2.0832,
      "step": 73563
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.9925047755241394,
      "learning_rate": 8.868531579973028e-08,
      "loss": 2.4028,
      "step": 73564
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.619857907295227,
      "learning_rate": 8.863061057847178e-08,
      "loss": 2.4477,
      "step": 73565
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0372512340545654,
      "learning_rate": 8.857592215967714e-08,
      "loss": 2.343,
      "step": 73566
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.060692310333252,
      "learning_rate": 8.852125054343852e-08,
      "loss": 2.2531,
      "step": 73567
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0399023294448853,
      "learning_rate": 8.846659572984806e-08,
      "loss": 2.1108,
      "step": 73568
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0178152322769165,
      "learning_rate": 8.841195771899791e-08,
      "loss": 2.4693,
      "step": 73569
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1278430223464966,
      "learning_rate": 8.835733651098132e-08,
      "loss": 2.3743,
      "step": 73570
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0507335662841797,
      "learning_rate": 8.830273210589268e-08,
      "loss": 2.2553,
      "step": 73571
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0313284397125244,
      "learning_rate": 8.824814450382191e-08,
      "loss": 2.0979,
      "step": 73572
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.036799669265747,
      "learning_rate": 8.819357370486225e-08,
      "loss": 2.3994,
      "step": 73573
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0948759317398071,
      "learning_rate": 8.813901970910698e-08,
      "loss": 2.2418,
      "step": 73574
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1462922096252441,
      "learning_rate": 8.808448251664714e-08,
      "loss": 2.5019,
      "step": 73575
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0917388200759888,
      "learning_rate": 8.802996212757708e-08,
      "loss": 2.3648,
      "step": 73576
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1009905338287354,
      "learning_rate": 8.797545854198786e-08,
      "loss": 2.291,
      "step": 73577
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1167151927947998,
      "learning_rate": 8.792097175997271e-08,
      "loss": 2.1983,
      "step": 73578
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1170654296875,
      "learning_rate": 8.78665017816227e-08,
      "loss": 2.3632,
      "step": 73579
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0523309707641602,
      "learning_rate": 8.781204860703219e-08,
      "loss": 2.2577,
      "step": 73580
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0892833471298218,
      "learning_rate": 8.77576122362911e-08,
      "loss": 2.2489,
      "step": 73581
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1019315719604492,
      "learning_rate": 8.770319266949489e-08,
      "loss": 2.4149,
      "step": 73582
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0656481981277466,
      "learning_rate": 8.764878990673242e-08,
      "loss": 2.1517,
      "step": 73583
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0658931732177734,
      "learning_rate": 8.759440394809804e-08,
      "loss": 2.2413,
      "step": 73584
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.2094171047210693,
      "learning_rate": 8.754003479368278e-08,
      "loss": 2.3132,
      "step": 73585
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0320446491241455,
      "learning_rate": 8.748568244357991e-08,
      "loss": 2.0989,
      "step": 73586
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0967293977737427,
      "learning_rate": 8.743134689788046e-08,
      "loss": 2.4308,
      "step": 73587
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.9762330651283264,
      "learning_rate": 8.73770281566766e-08,
      "loss": 2.3781,
      "step": 73588
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.159582495689392,
      "learning_rate": 8.732272622006155e-08,
      "loss": 2.6184,
      "step": 73589
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1598899364471436,
      "learning_rate": 8.726844108812749e-08,
      "loss": 2.2248,
      "step": 73590
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1714646816253662,
      "learning_rate": 8.721417276096433e-08,
      "loss": 2.1307,
      "step": 73591
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1067070960998535,
      "learning_rate": 8.715992123866757e-08,
      "loss": 2.1387,
      "step": 73592
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1582785844802856,
      "learning_rate": 8.710568652132489e-08,
      "loss": 2.1717,
      "step": 73593
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0607208013534546,
      "learning_rate": 8.70514686090318e-08,
      "loss": 2.247,
      "step": 73594
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0641337633132935,
      "learning_rate": 8.699726750187709e-08,
      "loss": 2.3217,
      "step": 73595
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0533779859542847,
      "learning_rate": 8.694308319995515e-08,
      "loss": 2.1442,
      "step": 73596
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1587579250335693,
      "learning_rate": 8.6888915703357e-08,
      "loss": 2.297,
      "step": 73597
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.9646874666213989,
      "learning_rate": 8.68347650121748e-08,
      "loss": 2.2291,
      "step": 73598
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0546427965164185,
      "learning_rate": 8.67806311265007e-08,
      "loss": 2.4901,
      "step": 73599
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0792567729949951,
      "learning_rate": 8.672651404642462e-08,
      "loss": 2.298,
      "step": 73600
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0313833951950073,
      "learning_rate": 8.667241377204094e-08,
      "loss": 2.1881,
      "step": 73601
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0557585954666138,
      "learning_rate": 8.661833030343847e-08,
      "loss": 2.1294,
      "step": 73602
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.136373519897461,
      "learning_rate": 8.656426364071047e-08,
      "loss": 2.3415,
      "step": 73603
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.022252082824707,
      "learning_rate": 8.651021378394909e-08,
      "loss": 2.3143,
      "step": 73604
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0618045330047607,
      "learning_rate": 8.645618073324535e-08,
      "loss": 2.3812,
      "step": 73605
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1482545137405396,
      "learning_rate": 8.640216448869032e-08,
      "loss": 2.2486,
      "step": 73606
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.022524356842041,
      "learning_rate": 8.634816505037725e-08,
      "loss": 2.4101,
      "step": 73607
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0732707977294922,
      "learning_rate": 8.629418241839493e-08,
      "loss": 2.2274,
      "step": 73608
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.227660894393921,
      "learning_rate": 8.624021659283776e-08,
      "loss": 2.0558,
      "step": 73609
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.2029951810836792,
      "learning_rate": 8.618626757379567e-08,
      "loss": 2.3499,
      "step": 73610
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.9716100096702576,
      "learning_rate": 8.613233536136079e-08,
      "loss": 2.3208,
      "step": 73611
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0617711544036865,
      "learning_rate": 8.607841995562304e-08,
      "loss": 2.4552,
      "step": 73612
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0187430381774902,
      "learning_rate": 8.602452135667571e-08,
      "loss": 2.2324,
      "step": 73613
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0542702674865723,
      "learning_rate": 8.597063956460872e-08,
      "loss": 2.3299,
      "step": 73614
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0605223178863525,
      "learning_rate": 8.59167745795153e-08,
      "loss": 2.4712,
      "step": 73615
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0710517168045044,
      "learning_rate": 8.58629264014843e-08,
      "loss": 2.2519,
      "step": 73616
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0401066541671753,
      "learning_rate": 8.580909503060896e-08,
      "loss": 2.2848,
      "step": 73617
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.151904582977295,
      "learning_rate": 8.575528046698034e-08,
      "loss": 2.2698,
      "step": 73618
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0509164333343506,
      "learning_rate": 8.570148271068945e-08,
      "loss": 2.3404,
      "step": 73619
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1472432613372803,
      "learning_rate": 8.564770176182624e-08,
      "loss": 2.4486,
      "step": 73620
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1415386199951172,
      "learning_rate": 8.559393762048284e-08,
      "loss": 2.3045,
      "step": 73621
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0355297327041626,
      "learning_rate": 8.554019028675031e-08,
      "loss": 2.3171,
      "step": 73622
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1955835819244385,
      "learning_rate": 8.548645976072079e-08,
      "loss": 2.3312,
      "step": 73623
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.083243727684021,
      "learning_rate": 8.543274604248308e-08,
      "loss": 2.3953,
      "step": 73624
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0344209671020508,
      "learning_rate": 8.537904913213157e-08,
      "loss": 2.3436,
      "step": 73625
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0599766969680786,
      "learning_rate": 8.532536902975397e-08,
      "loss": 2.1468,
      "step": 73626
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0684760808944702,
      "learning_rate": 8.527170573544242e-08,
      "loss": 2.5075,
      "step": 73627
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0892858505249023,
      "learning_rate": 8.521805924928905e-08,
      "loss": 2.3403,
      "step": 73628
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1392359733581543,
      "learning_rate": 8.516442957138382e-08,
      "loss": 2.385,
      "step": 73629
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0397039651870728,
      "learning_rate": 8.511081670181776e-08,
      "loss": 2.5659,
      "step": 73630
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0075546503067017,
      "learning_rate": 8.505722064068079e-08,
      "loss": 2.3037,
      "step": 73631
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.9775329232215881,
      "learning_rate": 8.500364138806505e-08,
      "loss": 2.3216,
      "step": 73632
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1414577960968018,
      "learning_rate": 8.49500789440616e-08,
      "loss": 2.5274,
      "step": 73633
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.048541784286499,
      "learning_rate": 8.489653330876035e-08,
      "loss": 2.4042,
      "step": 73634
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0902743339538574,
      "learning_rate": 8.484300448225236e-08,
      "loss": 2.1899,
      "step": 73635
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1299965381622314,
      "learning_rate": 8.478949246462864e-08,
      "loss": 2.3034,
      "step": 73636
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.067172884941101,
      "learning_rate": 8.473599725597914e-08,
      "loss": 2.3791,
      "step": 73637
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5334625244140625,
      "learning_rate": 8.4682518856396e-08,
      "loss": 2.3442,
      "step": 73638
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.3388960361480713,
      "learning_rate": 8.462905726596805e-08,
      "loss": 2.4376,
      "step": 73639
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0817391872406006,
      "learning_rate": 8.45756124847863e-08,
      "loss": 2.3423,
      "step": 73640
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1300050020217896,
      "learning_rate": 8.452218451294292e-08,
      "loss": 2.3793,
      "step": 73641
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1307637691497803,
      "learning_rate": 8.446877335052783e-08,
      "loss": 2.0876,
      "step": 73642
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.130179524421692,
      "learning_rate": 8.441537899762986e-08,
      "loss": 2.2793,
      "step": 73643
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.13435697555542,
      "learning_rate": 8.436200145434114e-08,
      "loss": 2.4499,
      "step": 73644
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0346015691757202,
      "learning_rate": 8.430864072075273e-08,
      "loss": 2.3272,
      "step": 73645
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0521169900894165,
      "learning_rate": 8.425529679695454e-08,
      "loss": 2.355,
      "step": 73646
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.023503303527832,
      "learning_rate": 8.42019696830354e-08,
      "loss": 2.3075,
      "step": 73647
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0123512744903564,
      "learning_rate": 8.414865937908745e-08,
      "loss": 2.1888,
      "step": 73648
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1017861366271973,
      "learning_rate": 8.409536588520062e-08,
      "loss": 2.2523,
      "step": 73649
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.3773725032806396,
      "learning_rate": 8.404208920146595e-08,
      "loss": 2.2768,
      "step": 73650
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.117640495300293,
      "learning_rate": 8.398882932797226e-08,
      "loss": 2.0436,
      "step": 73651
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.2406532764434814,
      "learning_rate": 8.393558626481058e-08,
      "loss": 2.2102,
      "step": 73652
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.028946876525879,
      "learning_rate": 8.388236001207195e-08,
      "loss": 2.1764,
      "step": 73653
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0756778717041016,
      "learning_rate": 8.382915056984519e-08,
      "loss": 2.1735,
      "step": 73654
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0789282321929932,
      "learning_rate": 8.377595793822135e-08,
      "loss": 2.5185,
      "step": 73655
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.9691846370697021,
      "learning_rate": 8.372278211729035e-08,
      "loss": 2.5043,
      "step": 73656
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0870238542556763,
      "learning_rate": 8.366962310714321e-08,
      "loss": 2.0406,
      "step": 73657
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0746694803237915,
      "learning_rate": 8.361648090786989e-08,
      "loss": 2.5134,
      "step": 73658
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.2392921447753906,
      "learning_rate": 8.356335551955808e-08,
      "loss": 1.9978,
      "step": 73659
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0444960594177246,
      "learning_rate": 8.351024694230104e-08,
      "loss": 2.1761,
      "step": 73660
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.2287541627883911,
      "learning_rate": 8.345715517618646e-08,
      "loss": 2.5918,
      "step": 73661
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1855095624923706,
      "learning_rate": 8.340408022130542e-08,
      "loss": 2.4236,
      "step": 73662
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.2054402828216553,
      "learning_rate": 8.335102207774781e-08,
      "loss": 2.5528,
      "step": 73663
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1729522943496704,
      "learning_rate": 8.329798074560357e-08,
      "loss": 2.384,
      "step": 73664
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1362265348434448,
      "learning_rate": 8.324495622496376e-08,
      "loss": 2.2108,
      "step": 73665
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1111656427383423,
      "learning_rate": 8.319194851591495e-08,
      "loss": 2.4203,
      "step": 73666
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1121554374694824,
      "learning_rate": 8.313895761855039e-08,
      "loss": 2.421,
      "step": 73667
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.9949231743812561,
      "learning_rate": 8.308598353295894e-08,
      "loss": 2.3288,
      "step": 73668
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1786702871322632,
      "learning_rate": 8.303302625922938e-08,
      "loss": 2.3264,
      "step": 73669
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1081700325012207,
      "learning_rate": 8.298008579745276e-08,
      "loss": 2.2384,
      "step": 73670
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0078392028808594,
      "learning_rate": 8.2927162147719e-08,
      "loss": 2.1667,
      "step": 73671
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.159254789352417,
      "learning_rate": 8.287425531011584e-08,
      "loss": 2.3697,
      "step": 73672
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0042930841445923,
      "learning_rate": 8.282136528473539e-08,
      "loss": 2.471,
      "step": 73673
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0547749996185303,
      "learning_rate": 8.276849207166537e-08,
      "loss": 2.3388,
      "step": 73674
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1439898014068604,
      "learning_rate": 8.271563567099683e-08,
      "loss": 2.2944,
      "step": 73675
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1468818187713623,
      "learning_rate": 8.266279608281746e-08,
      "loss": 2.4029,
      "step": 73676
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.047075629234314,
      "learning_rate": 8.260997330721942e-08,
      "loss": 2.6072,
      "step": 73677
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0728471279144287,
      "learning_rate": 8.255716734429042e-08,
      "loss": 2.1973,
      "step": 73678
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0716965198516846,
      "learning_rate": 8.250437819412149e-08,
      "loss": 2.3195,
      "step": 73679
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.988297700881958,
      "learning_rate": 8.245160585680035e-08,
      "loss": 2.3049,
      "step": 73680
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.079108476638794,
      "learning_rate": 8.239885033241801e-08,
      "loss": 2.3208,
      "step": 73681
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.177890658378601,
      "learning_rate": 8.23461116210622e-08,
      "loss": 2.2129,
      "step": 73682
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.031067967414856,
      "learning_rate": 8.229338972282397e-08,
      "loss": 2.4782,
      "step": 73683
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.528381109237671,
      "learning_rate": 8.224068463779211e-08,
      "loss": 2.2136,
      "step": 73684
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0471892356872559,
      "learning_rate": 8.218799636605546e-08,
      "loss": 2.2951,
      "step": 73685
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0164835453033447,
      "learning_rate": 8.213532490770393e-08,
      "loss": 2.2128,
      "step": 73686
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.335653305053711,
      "learning_rate": 8.208267026282746e-08,
      "loss": 2.2647,
      "step": 73687
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.014182448387146,
      "learning_rate": 8.203003243151375e-08,
      "loss": 2.2384,
      "step": 73688
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.079923391342163,
      "learning_rate": 8.197741141385274e-08,
      "loss": 2.1699,
      "step": 73689
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1736081838607788,
      "learning_rate": 8.192480720993434e-08,
      "loss": 2.4494,
      "step": 73690
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0400358438491821,
      "learning_rate": 8.187221981984628e-08,
      "loss": 2.5636,
      "step": 73691
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0953752994537354,
      "learning_rate": 8.181964924367957e-08,
      "loss": 2.3892,
      "step": 73692
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1178109645843506,
      "learning_rate": 8.176709548152085e-08,
      "loss": 2.3518,
      "step": 73693
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1267361640930176,
      "learning_rate": 8.171455853346222e-08,
      "loss": 2.3458,
      "step": 73694
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1425174474716187,
      "learning_rate": 8.166203839959031e-08,
      "loss": 2.3351,
      "step": 73695
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1645839214324951,
      "learning_rate": 8.160953507999503e-08,
      "loss": 2.2842,
      "step": 73696
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0761334896087646,
      "learning_rate": 8.155704857476521e-08,
      "loss": 2.334,
      "step": 73697
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0217454433441162,
      "learning_rate": 8.150457888399077e-08,
      "loss": 2.3264,
      "step": 73698
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1184215545654297,
      "learning_rate": 8.145212600775942e-08,
      "loss": 2.3949,
      "step": 73699
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1015340089797974,
      "learning_rate": 8.139968994616109e-08,
      "loss": 2.305,
      "step": 73700
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.002761960029602,
      "learning_rate": 8.13472706992835e-08,
      "loss": 2.2694,
      "step": 73701
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1168522834777832,
      "learning_rate": 8.129486826721655e-08,
      "loss": 2.4271,
      "step": 73702
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0249227285385132,
      "learning_rate": 8.124248265004797e-08,
      "loss": 2.4798,
      "step": 73703
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.138346552848816,
      "learning_rate": 8.119011384786769e-08,
      "loss": 2.1504,
      "step": 73704
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.2316160202026367,
      "learning_rate": 8.113776186076339e-08,
      "loss": 2.3512,
      "step": 73705
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.4824351072311401,
      "learning_rate": 8.108542668882613e-08,
      "loss": 2.4989,
      "step": 73706
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1378337144851685,
      "learning_rate": 8.10331083321414e-08,
      "loss": 2.3878,
      "step": 73707
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1722276210784912,
      "learning_rate": 8.098080679080023e-08,
      "loss": 2.5029,
      "step": 73708
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1105670928955078,
      "learning_rate": 8.092852206489033e-08,
      "loss": 2.142,
      "step": 73709
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.146315097808838,
      "learning_rate": 8.087625415450163e-08,
      "loss": 2.3184,
      "step": 73710
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0419362783432007,
      "learning_rate": 8.082400305972071e-08,
      "loss": 2.2945,
      "step": 73711
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.2914482355117798,
      "learning_rate": 8.077176878063752e-08,
      "loss": 2.3185,
      "step": 73712
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0861239433288574,
      "learning_rate": 8.071955131733978e-08,
      "loss": 2.2143,
      "step": 73713
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1375107765197754,
      "learning_rate": 8.066735066991627e-08,
      "loss": 2.4024,
      "step": 73714
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.3143436908721924,
      "learning_rate": 8.061516683845583e-08,
      "loss": 2.2861,
      "step": 73715
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.3973114490509033,
      "learning_rate": 8.056299982304728e-08,
      "loss": 2.5267,
      "step": 73716
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1302241086959839,
      "learning_rate": 8.051084962377831e-08,
      "loss": 2.4761,
      "step": 73717
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0577797889709473,
      "learning_rate": 8.045871624073776e-08,
      "loss": 2.5885,
      "step": 73718
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1134636402130127,
      "learning_rate": 8.040659967401331e-08,
      "loss": 2.1615,
      "step": 73719
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1764531135559082,
      "learning_rate": 8.035449992369381e-08,
      "loss": 2.2161,
      "step": 73720
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1214946508407593,
      "learning_rate": 8.030241698986807e-08,
      "loss": 2.2161,
      "step": 73721
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1294926404953003,
      "learning_rate": 8.025035087262268e-08,
      "loss": 2.2076,
      "step": 73722
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.133114218711853,
      "learning_rate": 8.019830157204866e-08,
      "loss": 2.2291,
      "step": 73723
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0977654457092285,
      "learning_rate": 8.014626908823154e-08,
      "loss": 2.335,
      "step": 73724
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0711193084716797,
      "learning_rate": 8.00942534212612e-08,
      "loss": 2.0944,
      "step": 73725
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0574877262115479,
      "learning_rate": 8.004225457122539e-08,
      "loss": 2.1534,
      "step": 73726
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0580813884735107,
      "learning_rate": 7.999027253821179e-08,
      "loss": 2.3261,
      "step": 73727
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.048538327217102,
      "learning_rate": 7.993830732230923e-08,
      "loss": 2.3113,
      "step": 73728
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.2112648487091064,
      "learning_rate": 7.988635892360541e-08,
      "loss": 2.2521,
      "step": 73729
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1009905338287354,
      "learning_rate": 7.983442734218805e-08,
      "loss": 2.2906,
      "step": 73730
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0671954154968262,
      "learning_rate": 7.978251257814706e-08,
      "loss": 2.2737,
      "step": 73731
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1013215780258179,
      "learning_rate": 7.973061463156684e-08,
      "loss": 2.1491,
      "step": 73732
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1079273223876953,
      "learning_rate": 7.967873350253952e-08,
      "loss": 2.3562,
      "step": 73733
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.015367865562439,
      "learning_rate": 7.962686919114948e-08,
      "loss": 2.3549,
      "step": 73734
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.070487380027771,
      "learning_rate": 7.957502169748776e-08,
      "loss": 2.5031,
      "step": 73735
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.983688473701477,
      "learning_rate": 7.952319102163986e-08,
      "loss": 2.1037,
      "step": 73736
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1275241374969482,
      "learning_rate": 7.947137716369457e-08,
      "loss": 2.3956,
      "step": 73737
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.2397446632385254,
      "learning_rate": 7.941958012373962e-08,
      "loss": 2.2298,
      "step": 73738
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1255347728729248,
      "learning_rate": 7.936779990186382e-08,
      "loss": 2.1078,
      "step": 73739
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0549943447113037,
      "learning_rate": 7.931603649815267e-08,
      "loss": 2.3115,
      "step": 73740
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0866104364395142,
      "learning_rate": 7.926428991269608e-08,
      "loss": 2.3837,
      "step": 73741
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.075430989265442,
      "learning_rate": 7.921256014557954e-08,
      "loss": 2.3092,
      "step": 73742
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0040415525436401,
      "learning_rate": 7.91608471968941e-08,
      "loss": 2.3986,
      "step": 73743
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1599539518356323,
      "learning_rate": 7.910915106672412e-08,
      "loss": 2.3503,
      "step": 73744
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1130142211914062,
      "learning_rate": 7.905747175515954e-08,
      "loss": 2.2742,
      "step": 73745
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1556884050369263,
      "learning_rate": 7.900580926228696e-08,
      "loss": 2.4387,
      "step": 73746
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0667825937271118,
      "learning_rate": 7.895416358819297e-08,
      "loss": 2.3699,
      "step": 73747
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.14756441116333,
      "learning_rate": 7.89025347329675e-08,
      "loss": 2.1968,
      "step": 73748
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0526841878890991,
      "learning_rate": 7.885092269669602e-08,
      "loss": 2.2303,
      "step": 73749
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1243436336517334,
      "learning_rate": 7.879932747946739e-08,
      "loss": 2.4449,
      "step": 73750
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0238922834396362,
      "learning_rate": 7.874774908136817e-08,
      "loss": 2.2003,
      "step": 73751
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1471015214920044,
      "learning_rate": 7.869618750248609e-08,
      "loss": 2.2258,
      "step": 73752
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1920415163040161,
      "learning_rate": 7.864464274290773e-08,
      "loss": 2.4236,
      "step": 73753
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.9660910964012146,
      "learning_rate": 7.859311480272191e-08,
      "loss": 2.2415,
      "step": 73754
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1445733308792114,
      "learning_rate": 7.854160368201524e-08,
      "loss": 1.9849,
      "step": 73755
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1070054769515991,
      "learning_rate": 7.849010938087543e-08,
      "loss": 2.2334,
      "step": 73756
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0090314149856567,
      "learning_rate": 7.843863189939016e-08,
      "loss": 2.3177,
      "step": 73757
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1101953983306885,
      "learning_rate": 7.838717123764494e-08,
      "loss": 2.3141,
      "step": 73758
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0401691198349,
      "learning_rate": 7.833572739572859e-08,
      "loss": 2.3636,
      "step": 73759
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1601248979568481,
      "learning_rate": 7.82843003737277e-08,
      "loss": 2.1952,
      "step": 73760
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1753658056259155,
      "learning_rate": 7.823289017172997e-08,
      "loss": 2.3504,
      "step": 73761
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1066162586212158,
      "learning_rate": 7.8181496789822e-08,
      "loss": 2.2643,
      "step": 73762
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.203203797340393,
      "learning_rate": 7.813012022809152e-08,
      "loss": 2.2527,
      "step": 73763
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.053653597831726,
      "learning_rate": 7.807876048662511e-08,
      "loss": 2.466,
      "step": 73764
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1246246099472046,
      "learning_rate": 7.802741756550935e-08,
      "loss": 2.2874,
      "step": 73765
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0315569639205933,
      "learning_rate": 7.797609146483198e-08,
      "loss": 2.1859,
      "step": 73766
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0533567667007446,
      "learning_rate": 7.792478218468069e-08,
      "loss": 2.3264,
      "step": 73767
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1697462797164917,
      "learning_rate": 7.787348972514097e-08,
      "loss": 2.4,
      "step": 73768
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0398304462432861,
      "learning_rate": 7.782221408630052e-08,
      "loss": 2.2676,
      "step": 73769
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.2527042627334595,
      "learning_rate": 7.777095526824707e-08,
      "loss": 2.4165,
      "step": 73770
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0458029508590698,
      "learning_rate": 7.771971327106498e-08,
      "loss": 2.4049,
      "step": 73771
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5643043518066406,
      "learning_rate": 7.766848809484417e-08,
      "loss": 2.485,
      "step": 73772
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.9716228246688843,
      "learning_rate": 7.761727973967015e-08,
      "loss": 2.4045,
      "step": 73773
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0967611074447632,
      "learning_rate": 7.75660882056295e-08,
      "loss": 2.4586,
      "step": 73774
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.073563814163208,
      "learning_rate": 7.751491349280993e-08,
      "loss": 2.3372,
      "step": 73775
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.9817962050437927,
      "learning_rate": 7.746375560129582e-08,
      "loss": 2.288,
      "step": 73776
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0698177814483643,
      "learning_rate": 7.74126145311771e-08,
      "loss": 2.3192,
      "step": 73777
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.2308783531188965,
      "learning_rate": 7.736149028253814e-08,
      "loss": 2.3632,
      "step": 73778
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.2532715797424316,
      "learning_rate": 7.731038285546666e-08,
      "loss": 2.3761,
      "step": 73779
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1294199228286743,
      "learning_rate": 7.725929225004924e-08,
      "loss": 2.2702,
      "step": 73780
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0621092319488525,
      "learning_rate": 7.72082184663725e-08,
      "loss": 2.2582,
      "step": 73781
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0760060548782349,
      "learning_rate": 7.715716150452191e-08,
      "loss": 2.307,
      "step": 73782
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1188745498657227,
      "learning_rate": 7.710612136458518e-08,
      "loss": 2.2454,
      "step": 73783
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1118342876434326,
      "learning_rate": 7.70550980466489e-08,
      "loss": 2.3521,
      "step": 73784
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0883804559707642,
      "learning_rate": 7.700409155079968e-08,
      "loss": 2.3877,
      "step": 73785
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0545053482055664,
      "learning_rate": 7.695310187712191e-08,
      "loss": 2.223,
      "step": 73786
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.2165193557739258,
      "learning_rate": 7.690212902570548e-08,
      "loss": 2.2264,
      "step": 73787
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0284267663955688,
      "learning_rate": 7.685117299663369e-08,
      "loss": 2.242,
      "step": 73788
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5244715213775635,
      "learning_rate": 7.680023378999535e-08,
      "loss": 2.2759,
      "step": 73789
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0270015001296997,
      "learning_rate": 7.674931140587594e-08,
      "loss": 2.4872,
      "step": 73790
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.096612572669983,
      "learning_rate": 7.669840584436095e-08,
      "loss": 2.2966,
      "step": 73791
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.198028326034546,
      "learning_rate": 7.664751710553698e-08,
      "loss": 2.3676,
      "step": 73792
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0660334825515747,
      "learning_rate": 7.659664518949173e-08,
      "loss": 2.2277,
      "step": 73793
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.054697036743164,
      "learning_rate": 7.654579009630848e-08,
      "loss": 2.269,
      "step": 73794
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0355926752090454,
      "learning_rate": 7.649495182607713e-08,
      "loss": 2.2974,
      "step": 73795
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1429729461669922,
      "learning_rate": 7.644413037888099e-08,
      "loss": 2.3756,
      "step": 73796
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0698339939117432,
      "learning_rate": 7.639332575480774e-08,
      "loss": 2.018,
      "step": 73797
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.015126347541809,
      "learning_rate": 7.634253795394175e-08,
      "loss": 2.0311,
      "step": 73798
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.213625431060791,
      "learning_rate": 7.629176697637186e-08,
      "loss": 2.512,
      "step": 73799
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.101428747177124,
      "learning_rate": 7.624101282218243e-08,
      "loss": 2.3227,
      "step": 73800
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1665894985198975,
      "learning_rate": 7.619027549145896e-08,
      "loss": 2.1824,
      "step": 73801
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0591129064559937,
      "learning_rate": 7.613955498428916e-08,
      "loss": 2.3581,
      "step": 73802
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0579434633255005,
      "learning_rate": 7.608885130075849e-08,
      "loss": 2.3325,
      "step": 73803
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.144630789756775,
      "learning_rate": 7.603816444095136e-08,
      "loss": 2.3698,
      "step": 73804
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0173975229263306,
      "learning_rate": 7.598749440495545e-08,
      "loss": 2.2982,
      "step": 73805
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.031350016593933,
      "learning_rate": 7.593684119285515e-08,
      "loss": 2.3808,
      "step": 73806
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.2751952409744263,
      "learning_rate": 7.588620480473818e-08,
      "loss": 2.1987,
      "step": 73807
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1415363550186157,
      "learning_rate": 7.583558524069002e-08,
      "loss": 2.4219,
      "step": 73808
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0099267959594727,
      "learning_rate": 7.578498250079502e-08,
      "loss": 2.252,
      "step": 73809
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0730860233306885,
      "learning_rate": 7.573439658513981e-08,
      "loss": 2.192,
      "step": 73810
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.041271448135376,
      "learning_rate": 7.568382749380987e-08,
      "loss": 2.3012,
      "step": 73811
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1198986768722534,
      "learning_rate": 7.56332752268918e-08,
      "loss": 2.332,
      "step": 73812
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.2210787534713745,
      "learning_rate": 7.558273978447106e-08,
      "loss": 2.3967,
      "step": 73813
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1246269941329956,
      "learning_rate": 7.553222116663205e-08,
      "loss": 2.3624,
      "step": 73814
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0459977388381958,
      "learning_rate": 7.548171937346249e-08,
      "loss": 2.2974,
      "step": 73815
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0625454187393188,
      "learning_rate": 7.543123440504673e-08,
      "loss": 2.2536,
      "step": 73816
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.146105408668518,
      "learning_rate": 7.538076626147029e-08,
      "loss": 2.3913,
      "step": 73817
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0742701292037964,
      "learning_rate": 7.533031494281861e-08,
      "loss": 2.3522,
      "step": 73818
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1527642011642456,
      "learning_rate": 7.527988044917723e-08,
      "loss": 2.2323,
      "step": 73819
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1319609880447388,
      "learning_rate": 7.52294627806327e-08,
      "loss": 2.3051,
      "step": 73820
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0756635665893555,
      "learning_rate": 7.517906193726943e-08,
      "loss": 2.3262,
      "step": 73821
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.3354209661483765,
      "learning_rate": 7.51286779191729e-08,
      "loss": 2.2,
      "step": 73822
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0499597787857056,
      "learning_rate": 7.507831072642969e-08,
      "loss": 2.1446,
      "step": 73823
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.296518325805664,
      "learning_rate": 7.502796035912418e-08,
      "loss": 2.2981,
      "step": 73824
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.2465767860412598,
      "learning_rate": 7.497762681734078e-08,
      "loss": 2.318,
      "step": 73825
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1247141361236572,
      "learning_rate": 7.492731010116716e-08,
      "loss": 2.3807,
      "step": 73826
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1446281671524048,
      "learning_rate": 7.487701021068661e-08,
      "loss": 2.2038,
      "step": 73827
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.014035940170288,
      "learning_rate": 7.482672714598461e-08,
      "loss": 2.1215,
      "step": 73828
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0814170837402344,
      "learning_rate": 7.477646090714886e-08,
      "loss": 2.2041,
      "step": 73829
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.020007610321045,
      "learning_rate": 7.472621149426151e-08,
      "loss": 2.297,
      "step": 73830
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0890527963638306,
      "learning_rate": 7.467597890740808e-08,
      "loss": 2.1038,
      "step": 73831
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1026612520217896,
      "learning_rate": 7.462576314667625e-08,
      "loss": 2.2054,
      "step": 73832
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.9858248829841614,
      "learning_rate": 7.457556421214818e-08,
      "loss": 2.3686,
      "step": 73833
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0933302640914917,
      "learning_rate": 7.452538210391046e-08,
      "loss": 2.3444,
      "step": 73834
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0305814743041992,
      "learning_rate": 7.44752168220475e-08,
      "loss": 2.2003,
      "step": 73835
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0975940227508545,
      "learning_rate": 7.442506836664588e-08,
      "loss": 2.2896,
      "step": 73836
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.121212124824524,
      "learning_rate": 7.437493673778884e-08,
      "loss": 2.199,
      "step": 73837
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0620664358139038,
      "learning_rate": 7.432482193556189e-08,
      "loss": 2.3427,
      "step": 73838
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1008244752883911,
      "learning_rate": 7.427472396005164e-08,
      "loss": 2.4209,
      "step": 73839
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1074657440185547,
      "learning_rate": 7.422464281134023e-08,
      "loss": 2.4467,
      "step": 73840
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0791354179382324,
      "learning_rate": 7.417457848951314e-08,
      "loss": 2.1306,
      "step": 73841
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0291293859481812,
      "learning_rate": 7.4124530994657e-08,
      "loss": 2.2472,
      "step": 73842
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1560002565383911,
      "learning_rate": 7.407450032685615e-08,
      "loss": 2.1684,
      "step": 73843
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.9445040225982666,
      "learning_rate": 7.402448648619387e-08,
      "loss": 2.3142,
      "step": 73844
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1876096725463867,
      "learning_rate": 7.397448947275676e-08,
      "loss": 2.184,
      "step": 73845
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.080177903175354,
      "learning_rate": 7.392450928662809e-08,
      "loss": 2.2984,
      "step": 73846
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.04965078830719,
      "learning_rate": 7.387454592789334e-08,
      "loss": 2.3152,
      "step": 73847
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.023491382598877,
      "learning_rate": 7.382459939663689e-08,
      "loss": 2.2647,
      "step": 73848
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0636545419692993,
      "learning_rate": 7.377466969294422e-08,
      "loss": 2.1419,
      "step": 73849
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1583906412124634,
      "learning_rate": 7.372475681689861e-08,
      "loss": 2.3853,
      "step": 73850
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0922844409942627,
      "learning_rate": 7.367486076858666e-08,
      "loss": 2.5167,
      "step": 73851
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.132563591003418,
      "learning_rate": 7.36249815480905e-08,
      "loss": 2.1798,
      "step": 73852
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0345053672790527,
      "learning_rate": 7.357511915549675e-08,
      "loss": 2.4328,
      "step": 73853
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.045366644859314,
      "learning_rate": 7.352527359088868e-08,
      "loss": 2.3032,
      "step": 73854
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1954811811447144,
      "learning_rate": 7.347544485435176e-08,
      "loss": 2.2551,
      "step": 73855
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0902684926986694,
      "learning_rate": 7.342563294597038e-08,
      "loss": 2.3615,
      "step": 73856
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1129809617996216,
      "learning_rate": 7.33758378658278e-08,
      "loss": 2.4036,
      "step": 73857
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0357749462127686,
      "learning_rate": 7.33260596140084e-08,
      "loss": 2.2287,
      "step": 73858
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0148890018463135,
      "learning_rate": 7.327629819059878e-08,
      "loss": 2.1935,
      "step": 73859
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.04849374294281,
      "learning_rate": 7.32265535956811e-08,
      "loss": 2.3046,
      "step": 73860
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.9899156093597412,
      "learning_rate": 7.317682582934193e-08,
      "loss": 2.2987,
      "step": 73861
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1045022010803223,
      "learning_rate": 7.312711489166235e-08,
      "loss": 2.1786,
      "step": 73862
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.228840708732605,
      "learning_rate": 7.307742078272895e-08,
      "loss": 2.1827,
      "step": 73863
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0328551530838013,
      "learning_rate": 7.302774350262609e-08,
      "loss": 2.3942,
      "step": 73864
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1394352912902832,
      "learning_rate": 7.297808305143706e-08,
      "loss": 2.0768,
      "step": 73865
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.074984073638916,
      "learning_rate": 7.292843942924621e-08,
      "loss": 2.151,
      "step": 73866
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.044969081878662,
      "learning_rate": 7.287881263613682e-08,
      "loss": 2.3764,
      "step": 73867
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0384581089019775,
      "learning_rate": 7.282920267219551e-08,
      "loss": 2.5051,
      "step": 73868
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1397113800048828,
      "learning_rate": 7.27796095375044e-08,
      "loss": 2.4915,
      "step": 73869
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1250619888305664,
      "learning_rate": 7.273003323214789e-08,
      "loss": 2.4148,
      "step": 73870
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0990873575210571,
      "learning_rate": 7.268047375621035e-08,
      "loss": 2.5067,
      "step": 73871
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.2544854879379272,
      "learning_rate": 7.263093110977614e-08,
      "loss": 2.5525,
      "step": 73872
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.196735143661499,
      "learning_rate": 7.258140529292745e-08,
      "loss": 2.4103,
      "step": 73873
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0506784915924072,
      "learning_rate": 7.253189630574975e-08,
      "loss": 2.4136,
      "step": 73874
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1517962217330933,
      "learning_rate": 7.248240414832742e-08,
      "loss": 2.4602,
      "step": 73875
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.075985074043274,
      "learning_rate": 7.24329288207437e-08,
      "loss": 2.2139,
      "step": 73876
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0796757936477661,
      "learning_rate": 7.23834703230819e-08,
      "loss": 2.3754,
      "step": 73877
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0716115236282349,
      "learning_rate": 7.233402865542638e-08,
      "loss": 2.1579,
      "step": 73878
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.9691575765609741,
      "learning_rate": 7.228460381786151e-08,
      "loss": 2.4818,
      "step": 73879
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.261098861694336,
      "learning_rate": 7.223519581047056e-08,
      "loss": 2.354,
      "step": 73880
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1396435499191284,
      "learning_rate": 7.21858046333368e-08,
      "loss": 2.3249,
      "step": 73881
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1600971221923828,
      "learning_rate": 7.213643028654572e-08,
      "loss": 2.1106,
      "step": 73882
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1496305465698242,
      "learning_rate": 7.208707277017835e-08,
      "loss": 2.281,
      "step": 73883
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0934288501739502,
      "learning_rate": 7.203773208432019e-08,
      "loss": 2.3068,
      "step": 73884
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1487427949905396,
      "learning_rate": 7.198840822905451e-08,
      "loss": 2.4163,
      "step": 73885
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0977638959884644,
      "learning_rate": 7.193910120446568e-08,
      "loss": 2.1688,
      "step": 73886
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.9930999279022217,
      "learning_rate": 7.188981101063474e-08,
      "loss": 2.4794,
      "step": 73887
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0428991317749023,
      "learning_rate": 7.184053764764832e-08,
      "loss": 2.2183,
      "step": 73888
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.4487645626068115,
      "learning_rate": 7.179128111558854e-08,
      "loss": 2.5425,
      "step": 73889
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1800124645233154,
      "learning_rate": 7.174204141453866e-08,
      "loss": 2.3823,
      "step": 73890
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1257575750350952,
      "learning_rate": 7.169281854458199e-08,
      "loss": 2.4237,
      "step": 73891
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1323444843292236,
      "learning_rate": 7.164361250580399e-08,
      "loss": 2.3775,
      "step": 73892
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0803520679473877,
      "learning_rate": 7.15944232982857e-08,
      "loss": 2.2852,
      "step": 73893
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.099933385848999,
      "learning_rate": 7.15452509221115e-08,
      "loss": 2.2644,
      "step": 73894
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0530704259872437,
      "learning_rate": 7.149609537736469e-08,
      "loss": 2.188,
      "step": 73895
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1724077463150024,
      "learning_rate": 7.144695666412848e-08,
      "loss": 2.2091,
      "step": 73896
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1842106580734253,
      "learning_rate": 7.139783478248619e-08,
      "loss": 2.0464,
      "step": 73897
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.037572979927063,
      "learning_rate": 7.134872973252105e-08,
      "loss": 2.0524,
      "step": 73898
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.2602708339691162,
      "learning_rate": 7.129964151431635e-08,
      "loss": 2.497,
      "step": 73899
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6308718919754028,
      "learning_rate": 7.125057012795533e-08,
      "loss": 2.1589,
      "step": 73900
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1188417673110962,
      "learning_rate": 7.120151557352129e-08,
      "loss": 2.2661,
      "step": 73901
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.035697102546692,
      "learning_rate": 7.115247785109747e-08,
      "loss": 2.3487,
      "step": 73902
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0679261684417725,
      "learning_rate": 7.110345696076714e-08,
      "loss": 2.3295,
      "step": 73903
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.9987154006958008,
      "learning_rate": 7.105445290261137e-08,
      "loss": 2.1636,
      "step": 73904
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0909733772277832,
      "learning_rate": 7.100546567671674e-08,
      "loss": 2.3378,
      "step": 73905
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0168976783752441,
      "learning_rate": 7.095649528316428e-08,
      "loss": 2.4075,
      "step": 73906
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1239951848983765,
      "learning_rate": 7.090754172203729e-08,
      "loss": 2.2693,
      "step": 73907
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0316122770309448,
      "learning_rate": 7.08586049934179e-08,
      "loss": 2.4977,
      "step": 73908
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.3007303476333618,
      "learning_rate": 7.080968509739162e-08,
      "loss": 2.2416,
      "step": 73909
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0707675218582153,
      "learning_rate": 7.076078203403725e-08,
      "loss": 2.5017,
      "step": 73910
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0694096088409424,
      "learning_rate": 7.071189580344251e-08,
      "loss": 2.4317,
      "step": 73911
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1174570322036743,
      "learning_rate": 7.066302640568624e-08,
      "loss": 2.2328,
      "step": 73912
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0354621410369873,
      "learning_rate": 7.061417384085279e-08,
      "loss": 2.2526,
      "step": 73913
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1489449739456177,
      "learning_rate": 7.056533810902544e-08,
      "loss": 2.2954,
      "step": 73914
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1084963083267212,
      "learning_rate": 7.051651921028745e-08,
      "loss": 2.1807,
      "step": 73915
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0411760807037354,
      "learning_rate": 7.046771714471989e-08,
      "loss": 2.1463,
      "step": 73916
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1273809671401978,
      "learning_rate": 7.041893191240711e-08,
      "loss": 2.1806,
      "step": 73917
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0636377334594727,
      "learning_rate": 7.037016351343018e-08,
      "loss": 2.343,
      "step": 73918
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.9639847874641418,
      "learning_rate": 7.032141194787346e-08,
      "loss": 2.2407,
      "step": 73919
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.052909255027771,
      "learning_rate": 7.027267721581799e-08,
      "loss": 2.2727,
      "step": 73920
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1715162992477417,
      "learning_rate": 7.022395931734815e-08,
      "loss": 2.2627,
      "step": 73921
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1063239574432373,
      "learning_rate": 7.0175258252545e-08,
      "loss": 2.382,
      "step": 73922
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.9846723675727844,
      "learning_rate": 7.012657402149181e-08,
      "loss": 2.348,
      "step": 73923
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1435233354568481,
      "learning_rate": 7.007790662427072e-08,
      "loss": 2.1748,
      "step": 73924
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1577742099761963,
      "learning_rate": 7.0029256060965e-08,
      "loss": 2.3851,
      "step": 73925
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.086557149887085,
      "learning_rate": 6.998062233165681e-08,
      "loss": 2.3084,
      "step": 73926
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1689125299453735,
      "learning_rate": 6.993200543642831e-08,
      "loss": 2.3556,
      "step": 73927
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1567814350128174,
      "learning_rate": 6.988340537536165e-08,
      "loss": 2.2437,
      "step": 73928
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1559009552001953,
      "learning_rate": 6.983482214853899e-08,
      "loss": 2.4274,
      "step": 73929
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1375319957733154,
      "learning_rate": 6.97862557560447e-08,
      "loss": 2.4298,
      "step": 73930
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.9988412857055664,
      "learning_rate": 6.973770619795872e-08,
      "loss": 2.4639,
      "step": 73931
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.3186622858047485,
      "learning_rate": 6.968917347436544e-08,
      "loss": 2.2149,
      "step": 73932
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0843945741653442,
      "learning_rate": 6.964065758534477e-08,
      "loss": 2.1294,
      "step": 73933
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1840664148330688,
      "learning_rate": 6.959215853098111e-08,
      "loss": 2.4494,
      "step": 73934
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0691708326339722,
      "learning_rate": 6.95436763113555e-08,
      "loss": 2.426,
      "step": 73935
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0526851415634155,
      "learning_rate": 6.949521092655009e-08,
      "loss": 2.4469,
      "step": 73936
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1782485246658325,
      "learning_rate": 6.944676237664815e-08,
      "loss": 2.2913,
      "step": 73937
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1388479471206665,
      "learning_rate": 6.939833066173074e-08,
      "loss": 2.4343,
      "step": 73938
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1354421377182007,
      "learning_rate": 6.934991578187889e-08,
      "loss": 2.2898,
      "step": 73939
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0381077527999878,
      "learning_rate": 6.930151773717808e-08,
      "loss": 2.0854,
      "step": 73940
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0521643161773682,
      "learning_rate": 6.925313652770716e-08,
      "loss": 2.357,
      "step": 73941
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0681999921798706,
      "learning_rate": 6.920477215354938e-08,
      "loss": 2.0276,
      "step": 73942
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0788811445236206,
      "learning_rate": 6.915642461478689e-08,
      "loss": 2.4392,
      "step": 73943
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.07085382938385,
      "learning_rate": 6.910809391150186e-08,
      "loss": 2.123,
      "step": 73944
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.2197469472885132,
      "learning_rate": 6.905978004377533e-08,
      "loss": 2.3153,
      "step": 73945
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1264081001281738,
      "learning_rate": 6.901148301169058e-08,
      "loss": 2.3636,
      "step": 73946
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0709506273269653,
      "learning_rate": 6.896320281532754e-08,
      "loss": 2.3639,
      "step": 73947
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1965370178222656,
      "learning_rate": 6.891493945476946e-08,
      "loss": 2.5303,
      "step": 73948
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1632646322250366,
      "learning_rate": 6.88666929300974e-08,
      "loss": 2.3531,
      "step": 73949
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1116193532943726,
      "learning_rate": 6.881846324139462e-08,
      "loss": 2.142,
      "step": 73950
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.037317156791687,
      "learning_rate": 6.877025038874108e-08,
      "loss": 2.2096,
      "step": 73951
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0423829555511475,
      "learning_rate": 6.872205437221891e-08,
      "loss": 2.2762,
      "step": 73952
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0569040775299072,
      "learning_rate": 6.867387519191026e-08,
      "loss": 2.2965,
      "step": 73953
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.129508376121521,
      "learning_rate": 6.862571284789731e-08,
      "loss": 2.222,
      "step": 73954
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1428661346435547,
      "learning_rate": 6.857756734026111e-08,
      "loss": 2.4178,
      "step": 73955
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0013784170150757,
      "learning_rate": 6.852943866908269e-08,
      "loss": 2.4021,
      "step": 73956
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0003892183303833,
      "learning_rate": 6.848132683444419e-08,
      "loss": 2.4361,
      "step": 73957
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.037204384803772,
      "learning_rate": 6.843323183642781e-08,
      "loss": 2.2466,
      "step": 73958
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0358097553253174,
      "learning_rate": 6.838515367511456e-08,
      "loss": 2.0852,
      "step": 73959
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5958741903305054,
      "learning_rate": 6.833709235058438e-08,
      "loss": 2.4633,
      "step": 73960
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0184013843536377,
      "learning_rate": 6.828904786292279e-08,
      "loss": 2.1483,
      "step": 73961
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1607625484466553,
      "learning_rate": 6.824102021220636e-08,
      "loss": 2.1581,
      "step": 73962
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0983500480651855,
      "learning_rate": 6.819300939852058e-08,
      "loss": 2.2234,
      "step": 73963
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0016978979110718,
      "learning_rate": 6.814501542194318e-08,
      "loss": 2.4589,
      "step": 73964
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0823273658752441,
      "learning_rate": 6.809703828255854e-08,
      "loss": 2.3404,
      "step": 73965
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0861232280731201,
      "learning_rate": 6.804907798044657e-08,
      "loss": 2.3449,
      "step": 73966
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1059765815734863,
      "learning_rate": 6.800113451568946e-08,
      "loss": 2.1822,
      "step": 73967
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.150917410850525,
      "learning_rate": 6.795320788836712e-08,
      "loss": 2.4464,
      "step": 73968
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1258537769317627,
      "learning_rate": 6.790529809856284e-08,
      "loss": 2.3261,
      "step": 73969
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0300877094268799,
      "learning_rate": 6.785740514635542e-08,
      "loss": 2.3996,
      "step": 73970
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0694206953048706,
      "learning_rate": 6.780952903182702e-08,
      "loss": 2.3152,
      "step": 73971
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.9462008476257324,
      "learning_rate": 6.776166975505982e-08,
      "loss": 2.3176,
      "step": 73972
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0093151330947876,
      "learning_rate": 6.771382731613374e-08,
      "loss": 2.4257,
      "step": 73973
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1287293434143066,
      "learning_rate": 6.766600171513093e-08,
      "loss": 2.6013,
      "step": 73974
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0715097188949585,
      "learning_rate": 6.761819295213135e-08,
      "loss": 2.3754,
      "step": 73975
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.100394606590271,
      "learning_rate": 6.757040102721713e-08,
      "loss": 2.4214,
      "step": 73976
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1106842756271362,
      "learning_rate": 6.752262594046822e-08,
      "loss": 2.4283,
      "step": 73977
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0436713695526123,
      "learning_rate": 6.747486769196565e-08,
      "loss": 2.2564,
      "step": 73978
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0239477157592773,
      "learning_rate": 6.74271262817916e-08,
      "loss": 2.1368,
      "step": 73979
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0345052480697632,
      "learning_rate": 6.737940171002489e-08,
      "loss": 2.3989,
      "step": 73980
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0089420080184937,
      "learning_rate": 6.733169397674877e-08,
      "loss": 2.1407,
      "step": 73981
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0639047622680664,
      "learning_rate": 6.72840030820432e-08,
      "loss": 2.4558,
      "step": 73982
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.191298246383667,
      "learning_rate": 6.72363290259892e-08,
      "loss": 2.6151,
      "step": 73983
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1383252143859863,
      "learning_rate": 6.718867180866673e-08,
      "loss": 2.4271,
      "step": 73984
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.994012713432312,
      "learning_rate": 6.714103143015683e-08,
      "loss": 2.312,
      "step": 73985
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1444352865219116,
      "learning_rate": 6.709340789054164e-08,
      "loss": 2.2316,
      "step": 73986
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0421521663665771,
      "learning_rate": 6.70458011899e-08,
      "loss": 2.4329,
      "step": 73987
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1267828941345215,
      "learning_rate": 6.699821132831407e-08,
      "loss": 2.1485,
      "step": 73988
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0435004234313965,
      "learning_rate": 6.69506383058638e-08,
      "loss": 2.5371,
      "step": 73989
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1267547607421875,
      "learning_rate": 6.69030821226302e-08,
      "loss": 2.2222,
      "step": 73990
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.9332053065299988,
      "learning_rate": 6.685554277869211e-08,
      "loss": 2.14,
      "step": 73991
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1614667177200317,
      "learning_rate": 6.680802027413391e-08,
      "loss": 2.3085,
      "step": 73992
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0613682270050049,
      "learning_rate": 6.676051460903221e-08,
      "loss": 2.3198,
      "step": 73993
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1171865463256836,
      "learning_rate": 6.671302578347028e-08,
      "loss": 2.3439,
      "step": 73994
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1086505651474,
      "learning_rate": 6.666555379752693e-08,
      "loss": 2.2906,
      "step": 73995
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0952587127685547,
      "learning_rate": 6.661809865128433e-08,
      "loss": 2.2705,
      "step": 73996
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.106492519378662,
      "learning_rate": 6.657066034482018e-08,
      "loss": 2.1362,
      "step": 73997
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0459202527999878,
      "learning_rate": 6.652323887821777e-08,
      "loss": 2.4583,
      "step": 73998
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0048091411590576,
      "learning_rate": 6.647583425155702e-08,
      "loss": 2.3783,
      "step": 73999
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0569286346435547,
      "learning_rate": 6.642844646491675e-08,
      "loss": 2.5348,
      "step": 74000
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.2953323125839233,
      "learning_rate": 6.638107551837802e-08,
      "loss": 2.3554,
      "step": 74001
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1535520553588867,
      "learning_rate": 6.633372141202188e-08,
      "loss": 2.18,
      "step": 74002
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1006910800933838,
      "learning_rate": 6.628638414592824e-08,
      "loss": 2.3389,
      "step": 74003
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1388543844223022,
      "learning_rate": 6.623906372017708e-08,
      "loss": 2.4723,
      "step": 74004
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0995512008666992,
      "learning_rate": 6.619176013484719e-08,
      "loss": 2.4539,
      "step": 74005
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0245699882507324,
      "learning_rate": 6.614447339002183e-08,
      "loss": 2.2575,
      "step": 74006
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.12193763256073,
      "learning_rate": 6.609720348577875e-08,
      "loss": 2.3423,
      "step": 74007
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1114922761917114,
      "learning_rate": 6.604995042220008e-08,
      "loss": 2.1677,
      "step": 74008
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0519226789474487,
      "learning_rate": 6.600271419936354e-08,
      "loss": 2.1517,
      "step": 74009
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1117539405822754,
      "learning_rate": 6.59554948173513e-08,
      "loss": 2.1776,
      "step": 74010
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.08540940284729,
      "learning_rate": 6.590829227624218e-08,
      "loss": 2.3806,
      "step": 74011
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1065510511398315,
      "learning_rate": 6.586110657611611e-08,
      "loss": 2.2692,
      "step": 74012
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0352013111114502,
      "learning_rate": 6.581393771705413e-08,
      "loss": 2.2162,
      "step": 74013
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0785043239593506,
      "learning_rate": 6.576678569913509e-08,
      "loss": 2.1445,
      "step": 74014
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1306105852127075,
      "learning_rate": 6.571965052244e-08,
      "loss": 2.3122,
      "step": 74015
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0651713609695435,
      "learning_rate": 6.567253218704772e-08,
      "loss": 2.2618,
      "step": 74016
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1010730266571045,
      "learning_rate": 6.562543069303928e-08,
      "loss": 2.2594,
      "step": 74017
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1235231161117554,
      "learning_rate": 6.557834604049351e-08,
      "loss": 2.2171,
      "step": 74018
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1138442754745483,
      "learning_rate": 6.553127822949146e-08,
      "loss": 2.3096,
      "step": 74019
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1098806858062744,
      "learning_rate": 6.548422726011083e-08,
      "loss": 2.7276,
      "step": 74020
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0752686262130737,
      "learning_rate": 6.543719313243269e-08,
      "loss": 2.2097,
      "step": 74021
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1185885667800903,
      "learning_rate": 6.539017584653695e-08,
      "loss": 2.5018,
      "step": 74022
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.2012265920639038,
      "learning_rate": 6.534317540250356e-08,
      "loss": 2.3674,
      "step": 74023
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0360321998596191,
      "learning_rate": 6.529619180041025e-08,
      "loss": 2.4446,
      "step": 74024
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.9994147419929504,
      "learning_rate": 6.524922504033915e-08,
      "loss": 2.2126,
      "step": 74025
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1230783462524414,
      "learning_rate": 6.52022751223691e-08,
      "loss": 2.2293,
      "step": 74026
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.419621229171753,
      "learning_rate": 6.515534204658002e-08,
      "loss": 2.1821,
      "step": 74027
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.046639323234558,
      "learning_rate": 6.510842581304966e-08,
      "loss": 2.2608,
      "step": 74028
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0855793952941895,
      "learning_rate": 6.506152642185903e-08,
      "loss": 2.4773,
      "step": 74029
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.01841139793396,
      "learning_rate": 6.501464387308698e-08,
      "loss": 2.047,
      "step": 74030
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0429390668869019,
      "learning_rate": 6.496777816681454e-08,
      "loss": 2.211,
      "step": 74031
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.175298810005188,
      "learning_rate": 6.492092930311945e-08,
      "loss": 2.1649,
      "step": 74032
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0660555362701416,
      "learning_rate": 6.487409728208161e-08,
      "loss": 2.4023,
      "step": 74033
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1737823486328125,
      "learning_rate": 6.482728210378097e-08,
      "loss": 2.3631,
      "step": 74034
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0996772050857544,
      "learning_rate": 6.478048376829637e-08,
      "loss": 2.4516,
      "step": 74035
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0734790563583374,
      "learning_rate": 6.473370227570664e-08,
      "loss": 2.2752,
      "step": 74036
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1697202920913696,
      "learning_rate": 6.468693762609168e-08,
      "loss": 2.2379,
      "step": 74037
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1189905405044556,
      "learning_rate": 6.464018981953147e-08,
      "loss": 2.349,
      "step": 74038
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1482166051864624,
      "learning_rate": 6.459345885610369e-08,
      "loss": 2.238,
      "step": 74039
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1040340662002563,
      "learning_rate": 6.45467447358894e-08,
      "loss": 2.1054,
      "step": 74040
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0906167030334473,
      "learning_rate": 6.450004745896632e-08,
      "loss": 2.2644,
      "step": 74041
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0936901569366455,
      "learning_rate": 6.445336702541439e-08,
      "loss": 2.2128,
      "step": 74042
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1196750402450562,
      "learning_rate": 6.440670343531241e-08,
      "loss": 2.2987,
      "step": 74043
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1597552299499512,
      "learning_rate": 6.436005668873924e-08,
      "loss": 2.2244,
      "step": 74044
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0236470699310303,
      "learning_rate": 6.431342678577368e-08,
      "loss": 2.34,
      "step": 74045
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0265841484069824,
      "learning_rate": 6.42668137264968e-08,
      "loss": 2.2839,
      "step": 74046
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1250673532485962,
      "learning_rate": 6.422021751098517e-08,
      "loss": 2.3834,
      "step": 74047
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0396925210952759,
      "learning_rate": 6.417363813931876e-08,
      "loss": 2.463,
      "step": 74048
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1312133073806763,
      "learning_rate": 6.412707561157638e-08,
      "loss": 2.2073,
      "step": 74049
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.985751748085022,
      "learning_rate": 6.408052992783797e-08,
      "loss": 2.2139,
      "step": 74050
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6076539754867554,
      "learning_rate": 6.403400108818126e-08,
      "loss": 2.1659,
      "step": 74051
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1395337581634521,
      "learning_rate": 6.398748909268503e-08,
      "loss": 2.2676,
      "step": 74052
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0499958992004395,
      "learning_rate": 6.394099394142928e-08,
      "loss": 2.3907,
      "step": 74053
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1251130104064941,
      "learning_rate": 6.389451563449167e-08,
      "loss": 2.2238,
      "step": 74054
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.083003282546997,
      "learning_rate": 6.384805417195216e-08,
      "loss": 2.4195,
      "step": 74055
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0597641468048096,
      "learning_rate": 6.380160955388848e-08,
      "loss": 2.2346,
      "step": 74056
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.013493299484253,
      "learning_rate": 6.375518178037942e-08,
      "loss": 2.3675,
      "step": 74057
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1102099418640137,
      "learning_rate": 6.370877085150384e-08,
      "loss": 2.4994,
      "step": 74058
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.2013943195343018,
      "learning_rate": 6.366237676734055e-08,
      "loss": 2.2793,
      "step": 74059
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0352091789245605,
      "learning_rate": 6.361599952796837e-08,
      "loss": 2.4776,
      "step": 74060
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.9755171537399292,
      "learning_rate": 6.356963913346614e-08,
      "loss": 2.2519,
      "step": 74061
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.030419945716858,
      "learning_rate": 6.352329558391158e-08,
      "loss": 2.157,
      "step": 74062
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.093742847442627,
      "learning_rate": 6.347696887938349e-08,
      "loss": 2.2969,
      "step": 74063
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0634186267852783,
      "learning_rate": 6.343065901996181e-08,
      "loss": 2.2859,
      "step": 74064
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0397752523422241,
      "learning_rate": 6.338436600572317e-08,
      "loss": 2.3002,
      "step": 74065
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1442840099334717,
      "learning_rate": 6.333808983674638e-08,
      "loss": 2.2419,
      "step": 74066
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1399884223937988,
      "learning_rate": 6.329183051311138e-08,
      "loss": 2.3062,
      "step": 74067
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0707200765609741,
      "learning_rate": 6.324558803489478e-08,
      "loss": 2.1449,
      "step": 74068
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1691926717758179,
      "learning_rate": 6.319936240217538e-08,
      "loss": 2.3565,
      "step": 74069
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1747976541519165,
      "learning_rate": 6.315315361503316e-08,
      "loss": 2.0088,
      "step": 74070
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.049024224281311,
      "learning_rate": 6.310696167354358e-08,
      "loss": 2.3359,
      "step": 74071
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1941227912902832,
      "learning_rate": 6.306078657778769e-08,
      "loss": 2.287,
      "step": 74072
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1810081005096436,
      "learning_rate": 6.30146283278421e-08,
      "loss": 2.4289,
      "step": 74073
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0381031036376953,
      "learning_rate": 6.296848692378565e-08,
      "loss": 2.3114,
      "step": 74074
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0999791622161865,
      "learning_rate": 6.292236236569605e-08,
      "loss": 2.4246,
      "step": 74075
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.9228191375732422,
      "learning_rate": 6.28762546536521e-08,
      "loss": 2.4145,
      "step": 74076
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0705243349075317,
      "learning_rate": 6.283016378773155e-08,
      "loss": 2.3179,
      "step": 74077
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0176674127578735,
      "learning_rate": 6.27840897680132e-08,
      "loss": 2.4725,
      "step": 74078
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0340265035629272,
      "learning_rate": 6.273803259457478e-08,
      "loss": 2.3801,
      "step": 74079
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0179572105407715,
      "learning_rate": 6.269199226749512e-08,
      "loss": 2.5265,
      "step": 74080
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.133516788482666,
      "learning_rate": 6.264596878685081e-08,
      "loss": 2.2287,
      "step": 74081
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0654116868972778,
      "learning_rate": 6.259996215271958e-08,
      "loss": 2.3901,
      "step": 74082
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.2006698846817017,
      "learning_rate": 6.255397236518135e-08,
      "loss": 2.112,
      "step": 74083
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0657849311828613,
      "learning_rate": 6.250799942431274e-08,
      "loss": 2.4088,
      "step": 74084
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0458943843841553,
      "learning_rate": 6.246204333019257e-08,
      "loss": 2.2141,
      "step": 74085
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1536588668823242,
      "learning_rate": 6.241610408289745e-08,
      "loss": 2.3471,
      "step": 74086
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0130023956298828,
      "learning_rate": 6.23701816825062e-08,
      "loss": 2.2823,
      "step": 74087
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.9597394466400146,
      "learning_rate": 6.232427612909653e-08,
      "loss": 2.2696,
      "step": 74088
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0021638870239258,
      "learning_rate": 6.227838742274727e-08,
      "loss": 2.2354,
      "step": 74089
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.4536151885986328,
      "learning_rate": 6.223251556353393e-08,
      "loss": 2.393,
      "step": 74090
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0328571796417236,
      "learning_rate": 6.218666055153643e-08,
      "loss": 2.3555,
      "step": 74091
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0397390127182007,
      "learning_rate": 6.214082238683028e-08,
      "loss": 2.3989,
      "step": 74092
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0974338054656982,
      "learning_rate": 6.20950010694965e-08,
      "loss": 2.1186,
      "step": 74093
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0078792572021484,
      "learning_rate": 6.204919659960951e-08,
      "loss": 2.3649,
      "step": 74094
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.360771894454956,
      "learning_rate": 6.20034089772481e-08,
      "loss": 2.224,
      "step": 74095
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0090433359146118,
      "learning_rate": 6.195763820249002e-08,
      "loss": 2.4909,
      "step": 74096
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0119401216506958,
      "learning_rate": 6.191188427541405e-08,
      "loss": 2.3672,
      "step": 74097
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0657607316970825,
      "learning_rate": 6.186614719609462e-08,
      "loss": 2.2182,
      "step": 74098
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0459308624267578,
      "learning_rate": 6.182042696461277e-08,
      "loss": 2.3798,
      "step": 74099
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0222779512405396,
      "learning_rate": 6.177472358104508e-08,
      "loss": 2.2256,
      "step": 74100
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0294983386993408,
      "learning_rate": 6.172903704546707e-08,
      "loss": 2.229,
      "step": 74101
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.9652695655822754,
      "learning_rate": 6.168336735795754e-08,
      "loss": 2.3071,
      "step": 74102
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.2217121124267578,
      "learning_rate": 6.163771451859424e-08,
      "loss": 2.3161,
      "step": 74103
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.967117428779602,
      "learning_rate": 6.159207852745486e-08,
      "loss": 2.3677,
      "step": 74104
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0956127643585205,
      "learning_rate": 6.15464593846149e-08,
      "loss": 2.1524,
      "step": 74105
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.022217869758606,
      "learning_rate": 6.150085709015318e-08,
      "loss": 2.313,
      "step": 74106
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0535544157028198,
      "learning_rate": 6.145527164414744e-08,
      "loss": 2.2154,
      "step": 74107
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.335245966911316,
      "learning_rate": 6.140970304667427e-08,
      "loss": 2.4071,
      "step": 74108
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0805026292800903,
      "learning_rate": 6.136415129781137e-08,
      "loss": 2.3818,
      "step": 74109
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.2929511070251465,
      "learning_rate": 6.131861639763426e-08,
      "loss": 2.0602,
      "step": 74110
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.989906907081604,
      "learning_rate": 6.127309834622175e-08,
      "loss": 2.2969,
      "step": 74111
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0462702512741089,
      "learning_rate": 6.122759714365156e-08,
      "loss": 2.3459,
      "step": 74112
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0750157833099365,
      "learning_rate": 6.11821127899992e-08,
      "loss": 2.0501,
      "step": 74113
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1402121782302856,
      "learning_rate": 6.113664528534235e-08,
      "loss": 2.4053,
      "step": 74114
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.101404070854187,
      "learning_rate": 6.109119462975766e-08,
      "loss": 2.4239,
      "step": 74115
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1715829372406006,
      "learning_rate": 6.104576082332392e-08,
      "loss": 2.2214,
      "step": 74116
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1973495483398438,
      "learning_rate": 6.100034386611664e-08,
      "loss": 2.3838,
      "step": 74117
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0665831565856934,
      "learning_rate": 6.095494375821353e-08,
      "loss": 2.4587,
      "step": 74118
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1509571075439453,
      "learning_rate": 6.090956049969122e-08,
      "loss": 2.4117,
      "step": 74119
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.6844302415847778,
      "learning_rate": 6.086419409062628e-08,
      "loss": 2.2938,
      "step": 74120
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0897939205169678,
      "learning_rate": 6.081884453109532e-08,
      "loss": 2.291,
      "step": 74121
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0159205198287964,
      "learning_rate": 6.077351182117719e-08,
      "loss": 2.2357,
      "step": 74122
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.9708865284919739,
      "learning_rate": 6.072819596094626e-08,
      "loss": 2.183,
      "step": 74123
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0042937994003296,
      "learning_rate": 6.068289695048135e-08,
      "loss": 2.1683,
      "step": 74124
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0802717208862305,
      "learning_rate": 6.063761478985796e-08,
      "loss": 2.1383,
      "step": 74125
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0026752948760986,
      "learning_rate": 6.059234947915271e-08,
      "loss": 2.137,
      "step": 74126
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0767052173614502,
      "learning_rate": 6.05471010184433e-08,
      "loss": 2.4221,
      "step": 74127
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1469039916992188,
      "learning_rate": 6.050186940780745e-08,
      "loss": 2.4082,
      "step": 74128
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.2071917057037354,
      "learning_rate": 6.045665464731953e-08,
      "loss": 2.2067,
      "step": 74129
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.173240303993225,
      "learning_rate": 6.041145673705617e-08,
      "loss": 2.1258,
      "step": 74130
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1065263748168945,
      "learning_rate": 6.036627567709619e-08,
      "loss": 2.3086,
      "step": 74131
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.014634132385254,
      "learning_rate": 6.032111146751506e-08,
      "loss": 2.1559,
      "step": 74132
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0288466215133667,
      "learning_rate": 6.027596410838831e-08,
      "loss": 2.2809,
      "step": 74133
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0154781341552734,
      "learning_rate": 6.023083359979365e-08,
      "loss": 2.2978,
      "step": 74134
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0172348022460938,
      "learning_rate": 6.018571994180767e-08,
      "loss": 2.3445,
      "step": 74135
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.2470073699951172,
      "learning_rate": 6.014062313450586e-08,
      "loss": 2.4866,
      "step": 74136
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1214500665664673,
      "learning_rate": 6.009554317796596e-08,
      "loss": 2.3528,
      "step": 74137
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1216521263122559,
      "learning_rate": 6.005048007226344e-08,
      "loss": 2.1274,
      "step": 74138
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1391756534576416,
      "learning_rate": 6.000543381747492e-08,
      "loss": 2.3518,
      "step": 74139
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0638386011123657,
      "learning_rate": 5.996040441367701e-08,
      "loss": 2.407,
      "step": 74140
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.095026969909668,
      "learning_rate": 5.99153918609463e-08,
      "loss": 2.4627,
      "step": 74141
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0712155103683472,
      "learning_rate": 5.987039615935719e-08,
      "loss": 2.3336,
      "step": 74142
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1095787286758423,
      "learning_rate": 5.982541730898961e-08,
      "loss": 2.0944,
      "step": 74143
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1018052101135254,
      "learning_rate": 5.978045530991572e-08,
      "loss": 2.1024,
      "step": 74144
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.2251830101013184,
      "learning_rate": 5.973551016221546e-08,
      "loss": 2.332,
      "step": 74145
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.4913763999938965,
      "learning_rate": 5.96905818659621e-08,
      "loss": 2.5254,
      "step": 74146
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1359667778015137,
      "learning_rate": 5.964567042123337e-08,
      "loss": 2.1356,
      "step": 74147
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0350242853164673,
      "learning_rate": 5.960077582810475e-08,
      "loss": 2.2274,
      "step": 74148
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1444091796875,
      "learning_rate": 5.9555898086652855e-08,
      "loss": 2.4624,
      "step": 74149
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.117323398590088,
      "learning_rate": 5.951103719695317e-08,
      "loss": 2.4624,
      "step": 74150
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0901206731796265,
      "learning_rate": 5.9466193159082307e-08,
      "loss": 2.3389,
      "step": 74151
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.9836958050727844,
      "learning_rate": 5.942136597311576e-08,
      "loss": 2.1727,
      "step": 74152
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0711376667022705,
      "learning_rate": 5.937655563913014e-08,
      "loss": 2.1509,
      "step": 74153
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0176076889038086,
      "learning_rate": 5.933176215720093e-08,
      "loss": 2.1698,
      "step": 74154
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.190985918045044,
      "learning_rate": 5.928698552740475e-08,
      "loss": 2.42,
      "step": 74155
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.323917269706726,
      "learning_rate": 5.924222574981708e-08,
      "loss": 2.2053,
      "step": 74156
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.105644941329956,
      "learning_rate": 5.919748282451232e-08,
      "loss": 2.0357,
      "step": 74157
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1164138317108154,
      "learning_rate": 5.9152756751569285e-08,
      "loss": 2.2504,
      "step": 74158
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0110865831375122,
      "learning_rate": 5.910804753106125e-08,
      "loss": 2.3176,
      "step": 74159
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0192768573760986,
      "learning_rate": 5.906335516306483e-08,
      "loss": 2.1869,
      "step": 74160
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1470009088516235,
      "learning_rate": 5.901867964765662e-08,
      "loss": 2.1155,
      "step": 74161
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0644272565841675,
      "learning_rate": 5.897402098491212e-08,
      "loss": 2.3841,
      "step": 74162
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0097846984863281,
      "learning_rate": 5.892937917490571e-08,
      "loss": 2.4243,
      "step": 74163
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.040940761566162,
      "learning_rate": 5.8884754217714e-08,
      "loss": 2.2803,
      "step": 74164
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0355473756790161,
      "learning_rate": 5.884014611341249e-08,
      "loss": 2.5482,
      "step": 74165
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.2078181505203247,
      "learning_rate": 5.879555486207778e-08,
      "loss": 2.228,
      "step": 74166
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.9864888787269592,
      "learning_rate": 5.875098046378314e-08,
      "loss": 2.4282,
      "step": 74167
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1037040948867798,
      "learning_rate": 5.8706422918607395e-08,
      "loss": 2.2488,
      "step": 74168
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.9720019102096558,
      "learning_rate": 5.866188222662273e-08,
      "loss": 2.2548,
      "step": 74169
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.3343660831451416,
      "learning_rate": 5.861735838790683e-08,
      "loss": 2.3132,
      "step": 74170
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.037865161895752,
      "learning_rate": 5.8572851402534104e-08,
      "loss": 2.4756,
      "step": 74171
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.219094157218933,
      "learning_rate": 5.852836127058226e-08,
      "loss": 2.3346,
      "step": 74172
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1117652654647827,
      "learning_rate": 5.8483887992123454e-08,
      "loss": 2.5557,
      "step": 74173
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0890611410140991,
      "learning_rate": 5.843943156723431e-08,
      "loss": 2.0869,
      "step": 74174
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1493972539901733,
      "learning_rate": 5.8394991995990303e-08,
      "loss": 2.2198,
      "step": 74175
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1179662942886353,
      "learning_rate": 5.835056927846805e-08,
      "loss": 2.2556,
      "step": 74176
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.9981034994125366,
      "learning_rate": 5.8306163414740824e-08,
      "loss": 2.4125,
      "step": 74177
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.2006256580352783,
      "learning_rate": 5.826177440488523e-08,
      "loss": 2.2188,
      "step": 74178
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1867680549621582,
      "learning_rate": 5.821740224897454e-08,
      "loss": 2.4108,
      "step": 74179
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.2100657224655151,
      "learning_rate": 5.8173046947087585e-08,
      "loss": 2.2623,
      "step": 74180
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1454187631607056,
      "learning_rate": 5.8128708499296525e-08,
      "loss": 2.3633,
      "step": 74181
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0314230918884277,
      "learning_rate": 5.808438690567686e-08,
      "loss": 2.2218,
      "step": 74182
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.129369854927063,
      "learning_rate": 5.804008216630519e-08,
      "loss": 2.2946,
      "step": 74183
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0964497327804565,
      "learning_rate": 5.79957942812559e-08,
      "loss": 2.5136,
      "step": 74184
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0235713720321655,
      "learning_rate": 5.795152325060449e-08,
      "loss": 2.351,
      "step": 74185
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.004396677017212,
      "learning_rate": 5.790726907442423e-08,
      "loss": 2.4957,
      "step": 74186
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1268770694732666,
      "learning_rate": 5.786303175279173e-08,
      "loss": 2.3311,
      "step": 74187
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0089246034622192,
      "learning_rate": 5.7818811285782483e-08,
      "loss": 2.2351,
      "step": 74188
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0364407300949097,
      "learning_rate": 5.7774607673468654e-08,
      "loss": 2.4064,
      "step": 74189
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0721617937088013,
      "learning_rate": 5.773042091592906e-08,
      "loss": 2.3368,
      "step": 74190
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0612988471984863,
      "learning_rate": 5.7686251013235884e-08,
      "loss": 2.0397,
      "step": 74191
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.083404302597046,
      "learning_rate": 5.7642097965464606e-08,
      "loss": 2.3177,
      "step": 74192
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.086207389831543,
      "learning_rate": 5.7597961772690726e-08,
      "loss": 2.1728,
      "step": 74193
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.121819019317627,
      "learning_rate": 5.7553842434988625e-08,
      "loss": 2.3099,
      "step": 74194
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.2998096942901611,
      "learning_rate": 5.7509739952432694e-08,
      "loss": 2.2478,
      "step": 74195
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1156131029129028,
      "learning_rate": 5.746565432509843e-08,
      "loss": 2.3195,
      "step": 74196
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.152207374572754,
      "learning_rate": 5.742158555306021e-08,
      "loss": 2.1574,
      "step": 74197
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0103975534439087,
      "learning_rate": 5.7377533636393534e-08,
      "loss": 2.1514,
      "step": 74198
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1439340114593506,
      "learning_rate": 5.7333498575171676e-08,
      "loss": 2.1073,
      "step": 74199
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.182503342628479,
      "learning_rate": 5.728948036946902e-08,
      "loss": 2.3772,
      "step": 74200
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0927609205245972,
      "learning_rate": 5.724547901936217e-08,
      "loss": 2.2664,
      "step": 74201
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0919036865234375,
      "learning_rate": 5.7201494524924405e-08,
      "loss": 2.305,
      "step": 74202
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.091759204864502,
      "learning_rate": 5.715752688623011e-08,
      "loss": 2.5451,
      "step": 74203
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.4887433052062988,
      "learning_rate": 5.711357610335477e-08,
      "loss": 2.1965,
      "step": 74204
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1250617504119873,
      "learning_rate": 5.7069642176371674e-08,
      "loss": 2.3542,
      "step": 74205
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1069557666778564,
      "learning_rate": 5.702572510535631e-08,
      "loss": 2.3514,
      "step": 74206
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0786749124526978,
      "learning_rate": 5.6981824890383065e-08,
      "loss": 2.2293,
      "step": 74207
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0739850997924805,
      "learning_rate": 5.693794153152632e-08,
      "loss": 2.0027,
      "step": 74208
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.2353464365005493,
      "learning_rate": 5.6894075028860464e-08,
      "loss": 2.2049,
      "step": 74209
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0166040658950806,
      "learning_rate": 5.685022538245877e-08,
      "loss": 2.2501,
      "step": 74210
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.2101316452026367,
      "learning_rate": 5.680639259239673e-08,
      "loss": 2.4935,
      "step": 74211
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.087604284286499,
      "learning_rate": 5.6762576658748734e-08,
      "loss": 2.1283,
      "step": 74212
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.112769603729248,
      "learning_rate": 5.671877758158917e-08,
      "loss": 2.4518,
      "step": 74213
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1042863130569458,
      "learning_rate": 5.6674995360991305e-08,
      "loss": 2.1339,
      "step": 74214
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1041806936264038,
      "learning_rate": 5.663122999702952e-08,
      "loss": 2.36,
      "step": 74215
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.3905538320541382,
      "learning_rate": 5.658748148977822e-08,
      "loss": 2.386,
      "step": 74216
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0638771057128906,
      "learning_rate": 5.654374983931288e-08,
      "loss": 2.3213,
      "step": 74217
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0172680616378784,
      "learning_rate": 5.650003504570567e-08,
      "loss": 2.4426,
      "step": 74218
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0004565715789795,
      "learning_rate": 5.645633710903098e-08,
      "loss": 2.4101,
      "step": 74219
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0433392524719238,
      "learning_rate": 5.6412656029364296e-08,
      "loss": 2.1263,
      "step": 74220
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.099611520767212,
      "learning_rate": 5.6368991806778905e-08,
      "loss": 2.3564,
      "step": 74221
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.9897242188453674,
      "learning_rate": 5.632534444134807e-08,
      "loss": 2.3733,
      "step": 74222
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.2924325466156006,
      "learning_rate": 5.6281713933146186e-08,
      "loss": 2.4695,
      "step": 74223
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0675874948501587,
      "learning_rate": 5.6238100282248745e-08,
      "loss": 2.3182,
      "step": 74224
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0307378768920898,
      "learning_rate": 5.6194503488727904e-08,
      "loss": 2.1898,
      "step": 74225
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1850717067718506,
      "learning_rate": 5.615092355265805e-08,
      "loss": 2.1985,
      "step": 74226
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0808770656585693,
      "learning_rate": 5.6107360474112474e-08,
      "loss": 2.3271,
      "step": 74227
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1240921020507812,
      "learning_rate": 5.6063814253166645e-08,
      "loss": 2.2854,
      "step": 74228
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0217994451522827,
      "learning_rate": 5.6020284889892754e-08,
      "loss": 2.395,
      "step": 74229
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.099608302116394,
      "learning_rate": 5.597677238436516e-08,
      "loss": 2.2807,
      "step": 74230
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1513656377792358,
      "learning_rate": 5.593327673665827e-08,
      "loss": 2.4153,
      "step": 74231
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1079339981079102,
      "learning_rate": 5.588979794684424e-08,
      "loss": 2.3455,
      "step": 74232
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.110537052154541,
      "learning_rate": 5.584633601499856e-08,
      "loss": 2.2803,
      "step": 74233
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.09187912940979,
      "learning_rate": 5.580289094119451e-08,
      "loss": 2.4441,
      "step": 74234
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0054683685302734,
      "learning_rate": 5.575946272550425e-08,
      "loss": 2.213,
      "step": 74235
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0844273567199707,
      "learning_rate": 5.5716051368003285e-08,
      "loss": 2.2029,
      "step": 74236
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1046252250671387,
      "learning_rate": 5.567265686876378e-08,
      "loss": 2.4042,
      "step": 74237
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.189218282699585,
      "learning_rate": 5.562927922786121e-08,
      "loss": 2.3091,
      "step": 74238
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0501843690872192,
      "learning_rate": 5.558591844536665e-08,
      "loss": 2.3212,
      "step": 74239
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.2176299095153809,
      "learning_rate": 5.554257452135559e-08,
      "loss": 2.243,
      "step": 74240
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0543489456176758,
      "learning_rate": 5.5499247455900186e-08,
      "loss": 2.2522,
      "step": 74241
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0357482433319092,
      "learning_rate": 5.545593724907483e-08,
      "loss": 2.2839,
      "step": 74242
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.109728455543518,
      "learning_rate": 5.54126439009528e-08,
      "loss": 2.4086,
      "step": 74243
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.2467180490493774,
      "learning_rate": 5.5369367411606256e-08,
      "loss": 2.2948,
      "step": 74244
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.093941330909729,
      "learning_rate": 5.5326107781110695e-08,
      "loss": 2.5847,
      "step": 74245
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1556628942489624,
      "learning_rate": 5.528286500953717e-08,
      "loss": 2.2676,
      "step": 74246
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.036388874053955,
      "learning_rate": 5.5239639096961176e-08,
      "loss": 2.2287,
      "step": 74247
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.257708191871643,
      "learning_rate": 5.519643004345487e-08,
      "loss": 2.4015,
      "step": 74248
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0398588180541992,
      "learning_rate": 5.5153237849091545e-08,
      "loss": 2.2515,
      "step": 74249
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0790560245513916,
      "learning_rate": 5.511006251394335e-08,
      "loss": 2.2101,
      "step": 74250
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0610289573669434,
      "learning_rate": 5.5066904038085786e-08,
      "loss": 2.2871,
      "step": 74251
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1658111810684204,
      "learning_rate": 5.502376242159102e-08,
      "loss": 2.1577,
      "step": 74252
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0754506587982178,
      "learning_rate": 5.498063766453121e-08,
      "loss": 2.1572,
      "step": 74253
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0564730167388916,
      "learning_rate": 5.493752976698075e-08,
      "loss": 2.1348,
      "step": 74254
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.212777853012085,
      "learning_rate": 5.489443872901179e-08,
      "loss": 2.4257,
      "step": 74255
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0324077606201172,
      "learning_rate": 5.485136455069762e-08,
      "loss": 2.2833,
      "step": 74256
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0298576354980469,
      "learning_rate": 5.480830723211261e-08,
      "loss": 2.4174,
      "step": 74257
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.063305377960205,
      "learning_rate": 5.476526677332672e-08,
      "loss": 2.3273,
      "step": 74258
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1925779581069946,
      "learning_rate": 5.4722243174416544e-08,
      "loss": 2.0721,
      "step": 74259
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.02357017993927,
      "learning_rate": 5.4679236435452033e-08,
      "loss": 2.3749,
      "step": 74260
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0287015438079834,
      "learning_rate": 5.463624655650868e-08,
      "loss": 2.3771,
      "step": 74261
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0628695487976074,
      "learning_rate": 5.459327353765642e-08,
      "loss": 2.4099,
      "step": 74262
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.9609700441360474,
      "learning_rate": 5.455031737897076e-08,
      "loss": 2.4652,
      "step": 74263
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0795396566390991,
      "learning_rate": 5.450737808052386e-08,
      "loss": 2.4128,
      "step": 74264
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1599267721176147,
      "learning_rate": 5.446445564238678e-08,
      "loss": 2.2296,
      "step": 74265
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0681707859039307,
      "learning_rate": 5.442155006463501e-08,
      "loss": 2.3801,
      "step": 74266
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0915738344192505,
      "learning_rate": 5.437866134733849e-08,
      "loss": 2.1405,
      "step": 74267
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1433154344558716,
      "learning_rate": 5.433578949057161e-08,
      "loss": 2.1518,
      "step": 74268
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.986011266708374,
      "learning_rate": 5.4292934494407646e-08,
      "loss": 2.3712,
      "step": 74269
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0987799167633057,
      "learning_rate": 5.425009635891765e-08,
      "loss": 2.3407,
      "step": 74270
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0290321111679077,
      "learning_rate": 5.4207275084176005e-08,
      "loss": 2.1905,
      "step": 74271
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.5328035354614258,
      "learning_rate": 5.416447067025266e-08,
      "loss": 2.4704,
      "step": 74272
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0928350687026978,
      "learning_rate": 5.41216831172231e-08,
      "loss": 2.3307,
      "step": 74273
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.201029896736145,
      "learning_rate": 5.407891242515839e-08,
      "loss": 2.5342,
      "step": 74274
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1293226480484009,
      "learning_rate": 5.403615859413069e-08,
      "loss": 2.3848,
      "step": 74275
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1184897422790527,
      "learning_rate": 5.3993421624213285e-08,
      "loss": 2.5512,
      "step": 74276
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1784813404083252,
      "learning_rate": 5.395070151547832e-08,
      "loss": 2.3392,
      "step": 74277
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0800707340240479,
      "learning_rate": 5.390799826799798e-08,
      "loss": 2.4078,
      "step": 74278
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1082725524902344,
      "learning_rate": 5.3865311881845514e-08,
      "loss": 2.5183,
      "step": 74279
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.4808900356292725,
      "learning_rate": 5.3822642357092e-08,
      "loss": 2.2581,
      "step": 74280
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.133786678314209,
      "learning_rate": 5.377998969381071e-08,
      "loss": 2.3839,
      "step": 74281
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1516380310058594,
      "learning_rate": 5.3737353892073795e-08,
      "loss": 2.5819,
      "step": 74282
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0842719078063965,
      "learning_rate": 5.369473495195232e-08,
      "loss": 2.1312,
      "step": 74283
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0357915163040161,
      "learning_rate": 5.365213287352178e-08,
      "loss": 2.3007,
      "step": 74284
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1188024282455444,
      "learning_rate": 5.360954765684989e-08,
      "loss": 2.3245,
      "step": 74285
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.106862187385559,
      "learning_rate": 5.3566979302013266e-08,
      "loss": 2.2848,
      "step": 74286
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.178451657295227,
      "learning_rate": 5.352442780908074e-08,
      "loss": 2.3241,
      "step": 74287
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.147768259048462,
      "learning_rate": 5.3481893178126685e-08,
      "loss": 2.4438,
      "step": 74288
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0510307550430298,
      "learning_rate": 5.343937540922106e-08,
      "loss": 2.3663,
      "step": 74289
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1157151460647583,
      "learning_rate": 5.3396874502438244e-08,
      "loss": 2.2778,
      "step": 74290
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0178754329681396,
      "learning_rate": 5.3354390457849294e-08,
      "loss": 2.2708,
      "step": 74291
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.078822374343872,
      "learning_rate": 5.331192327552526e-08,
      "loss": 2.1265,
      "step": 74292
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0617029666900635,
      "learning_rate": 5.326947295553941e-08,
      "loss": 2.1434,
      "step": 74293
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.052353858947754,
      "learning_rate": 5.3227039497963926e-08,
      "loss": 2.203,
      "step": 74294
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0327906608581543,
      "learning_rate": 5.3184622902868745e-08,
      "loss": 2.285,
      "step": 74295
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1585564613342285,
      "learning_rate": 5.314222317032824e-08,
      "loss": 2.4201,
      "step": 74296
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.9882001876831055,
      "learning_rate": 5.309984030041237e-08,
      "loss": 2.433,
      "step": 74297
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1397817134857178,
      "learning_rate": 5.305747429319441e-08,
      "loss": 2.4014,
      "step": 74298
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0767606496810913,
      "learning_rate": 5.30151251487454e-08,
      "loss": 2.2568,
      "step": 74299
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.008780837059021,
      "learning_rate": 5.2972792867137526e-08,
      "loss": 2.3715,
      "step": 74300
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1320263147354126,
      "learning_rate": 5.293047744844182e-08,
      "loss": 2.2078,
      "step": 74301
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0857784748077393,
      "learning_rate": 5.2888178892730455e-08,
      "loss": 2.3384,
      "step": 74302
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.4881565570831299,
      "learning_rate": 5.284589720007671e-08,
      "loss": 2.5437,
      "step": 74303
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1909663677215576,
      "learning_rate": 5.2803632370549415e-08,
      "loss": 2.3671,
      "step": 74304
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.018890380859375,
      "learning_rate": 5.276138440422185e-08,
      "loss": 2.0425,
      "step": 74305
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.2013736963272095,
      "learning_rate": 5.271915330116506e-08,
      "loss": 2.299,
      "step": 74306
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.186930537223816,
      "learning_rate": 5.2676939061451214e-08,
      "loss": 2.3206,
      "step": 74307
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0063533782958984,
      "learning_rate": 5.263474168515248e-08,
      "loss": 2.2306,
      "step": 74308
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1393040418624878,
      "learning_rate": 5.2592561172338795e-08,
      "loss": 2.2263,
      "step": 74309
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.067017912864685,
      "learning_rate": 5.255039752308233e-08,
      "loss": 2.1854,
      "step": 74310
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.219218373298645,
      "learning_rate": 5.250825073745414e-08,
      "loss": 2.3872,
      "step": 74311
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1267237663269043,
      "learning_rate": 5.246612081552638e-08,
      "loss": 2.2296,
      "step": 74312
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.065529704093933,
      "learning_rate": 5.242400775737122e-08,
      "loss": 2.2121,
      "step": 74313
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.2979252338409424,
      "learning_rate": 5.2381911563057496e-08,
      "loss": 2.1753,
      "step": 74314
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.120708703994751,
      "learning_rate": 5.233983223265959e-08,
      "loss": 2.3977,
      "step": 74315
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0794036388397217,
      "learning_rate": 5.2297769766246345e-08,
      "loss": 2.3263,
      "step": 74316
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.06455659866333,
      "learning_rate": 5.2255724163892127e-08,
      "loss": 2.358,
      "step": 74317
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0249742269515991,
      "learning_rate": 5.221369542566468e-08,
      "loss": 2.1733,
      "step": 74318
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0936684608459473,
      "learning_rate": 5.217168355163726e-08,
      "loss": 2.2616,
      "step": 74319
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.9776116013526917,
      "learning_rate": 5.212968854187983e-08,
      "loss": 2.3132,
      "step": 74320
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0543783903121948,
      "learning_rate": 5.208771039646454e-08,
      "loss": 2.1461,
      "step": 74321
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.014839768409729,
      "learning_rate": 5.2045749115462454e-08,
      "loss": 2.3128,
      "step": 74322
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0332763195037842,
      "learning_rate": 5.2003804698945725e-08,
      "loss": 2.4012,
      "step": 74323
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0773189067840576,
      "learning_rate": 5.1961877146983195e-08,
      "loss": 2.3301,
      "step": 74324
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.117421269416809,
      "learning_rate": 5.191996645964703e-08,
      "loss": 2.474,
      "step": 74325
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.2184417247772217,
      "learning_rate": 5.1878072637009394e-08,
      "loss": 2.5134,
      "step": 74326
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.2478337287902832,
      "learning_rate": 5.1836195679140225e-08,
      "loss": 2.3021,
      "step": 74327
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0841734409332275,
      "learning_rate": 5.179433558610947e-08,
      "loss": 2.5402,
      "step": 74328
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1634527444839478,
      "learning_rate": 5.1752492357990406e-08,
      "loss": 2.2956,
      "step": 74329
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0824949741363525,
      "learning_rate": 5.171066599485186e-08,
      "loss": 2.4785,
      "step": 74330
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.169339656829834,
      "learning_rate": 5.166885649676601e-08,
      "loss": 2.2997,
      "step": 74331
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0708273649215698,
      "learning_rate": 5.1627063863803895e-08,
      "loss": 2.2904,
      "step": 74332
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1722303628921509,
      "learning_rate": 5.158528809603547e-08,
      "loss": 2.2945,
      "step": 74333
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0624462366104126,
      "learning_rate": 5.154352919353178e-08,
      "loss": 2.3293,
      "step": 74334
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0996817350387573,
      "learning_rate": 5.150178715636389e-08,
      "loss": 2.2728,
      "step": 74335
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.031701683998108,
      "learning_rate": 5.146006198460174e-08,
      "loss": 2.2452,
      "step": 74336
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0681514739990234,
      "learning_rate": 5.141835367831749e-08,
      "loss": 2.4004,
      "step": 74337
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1192679405212402,
      "learning_rate": 5.137666223758109e-08,
      "loss": 2.1319,
      "step": 74338
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0715793371200562,
      "learning_rate": 5.133498766246359e-08,
      "loss": 2.4865,
      "step": 74339
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.119856357574463,
      "learning_rate": 5.129332995303493e-08,
      "loss": 2.3695,
      "step": 74340
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0486021041870117,
      "learning_rate": 5.125168910936618e-08,
      "loss": 2.4607,
      "step": 74341
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.7221975326538086,
      "learning_rate": 5.121006513152727e-08,
      "loss": 2.3725,
      "step": 74342
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1779686212539673,
      "learning_rate": 5.116845801959036e-08,
      "loss": 2.3918,
      "step": 74343
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0556437969207764,
      "learning_rate": 5.11268677736243e-08,
      "loss": 2.1114,
      "step": 74344
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0835084915161133,
      "learning_rate": 5.108529439370014e-08,
      "loss": 2.2716,
      "step": 74345
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0262271165847778,
      "learning_rate": 5.104373787988892e-08,
      "loss": 2.1441,
      "step": 74346
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0112202167510986,
      "learning_rate": 5.10021982322606e-08,
      "loss": 2.1313,
      "step": 74347
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0868672132492065,
      "learning_rate": 5.0960675450886233e-08,
      "loss": 2.3116,
      "step": 74348
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.102245569229126,
      "learning_rate": 5.091916953583464e-08,
      "loss": 2.2059,
      "step": 74349
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1050516366958618,
      "learning_rate": 5.0877680487178005e-08,
      "loss": 2.3207,
      "step": 74350
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1261907815933228,
      "learning_rate": 5.0836208304985144e-08,
      "loss": 2.2438,
      "step": 74351
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.178110122680664,
      "learning_rate": 5.0794752989327123e-08,
      "loss": 2.3302,
      "step": 74352
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.021857500076294,
      "learning_rate": 5.0753314540273876e-08,
      "loss": 2.3599,
      "step": 74353
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0599695444107056,
      "learning_rate": 5.071189295789758e-08,
      "loss": 2.3822,
      "step": 74354
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0511505603790283,
      "learning_rate": 5.0670488242264836e-08,
      "loss": 2.2564,
      "step": 74355
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.9735904335975647,
      "learning_rate": 5.062910039344893e-08,
      "loss": 2.1625,
      "step": 74356
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.2295783758163452,
      "learning_rate": 5.058772941151868e-08,
      "loss": 2.2726,
      "step": 74357
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0928442478179932,
      "learning_rate": 5.054637529654516e-08,
      "loss": 2.2924,
      "step": 74358
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1424728631973267,
      "learning_rate": 5.050503804859608e-08,
      "loss": 2.6432,
      "step": 74359
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.2393680810928345,
      "learning_rate": 5.046371766774471e-08,
      "loss": 2.43,
      "step": 74360
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.278780221939087,
      "learning_rate": 5.0422414154058795e-08,
      "loss": 2.2348,
      "step": 74361
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0293059349060059,
      "learning_rate": 5.038112750760937e-08,
      "loss": 2.3487,
      "step": 74362
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.086500883102417,
      "learning_rate": 5.033985772846639e-08,
      "loss": 2.2521,
      "step": 74363
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.2100656032562256,
      "learning_rate": 5.029860481669979e-08,
      "loss": 2.1242,
      "step": 74364
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0422587394714355,
      "learning_rate": 5.025736877237952e-08,
      "loss": 2.3236,
      "step": 74365
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1353617906570435,
      "learning_rate": 5.021614959557442e-08,
      "loss": 2.3488,
      "step": 74366
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1428134441375732,
      "learning_rate": 5.0174947286355526e-08,
      "loss": 2.3384,
      "step": 74367
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0362319946289062,
      "learning_rate": 5.0133761844792794e-08,
      "loss": 2.2567,
      "step": 74368
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0917580127716064,
      "learning_rate": 5.0092593270955057e-08,
      "loss": 2.3111,
      "step": 74369
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1158195734024048,
      "learning_rate": 5.0051441564914484e-08,
      "loss": 2.425,
      "step": 74370
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0756280422210693,
      "learning_rate": 5.001030672673768e-08,
      "loss": 2.4174,
      "step": 74371
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0162583589553833,
      "learning_rate": 4.99691887564957e-08,
      "loss": 2.2741,
      "step": 74372
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.9636766910552979,
      "learning_rate": 4.992808765425849e-08,
      "loss": 2.1735,
      "step": 74373
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1689280271530151,
      "learning_rate": 4.988700342009489e-08,
      "loss": 2.1509,
      "step": 74374
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.059398889541626,
      "learning_rate": 4.984593605407706e-08,
      "loss": 2.3141,
      "step": 74375
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.9994312524795532,
      "learning_rate": 4.98048855562705e-08,
      "loss": 2.3276,
      "step": 74376
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0691187381744385,
      "learning_rate": 4.97638519267496e-08,
      "loss": 2.4074,
      "step": 74377
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.2466676235198975,
      "learning_rate": 4.972283516557985e-08,
      "loss": 2.356,
      "step": 74378
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.9582690000534058,
      "learning_rate": 4.968183527283232e-08,
      "loss": 2.1946,
      "step": 74379
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0788716077804565,
      "learning_rate": 4.964085224857695e-08,
      "loss": 2.4481,
      "step": 74380
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1193181276321411,
      "learning_rate": 4.959988609288257e-08,
      "loss": 2.3374,
      "step": 74381
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.9629390835762024,
      "learning_rate": 4.9558936805819137e-08,
      "loss": 2.2422,
      "step": 74382
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0256794691085815,
      "learning_rate": 4.9518004387455464e-08,
      "loss": 2.5706,
      "step": 74383
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0743167400360107,
      "learning_rate": 4.947708883786151e-08,
      "loss": 2.4155,
      "step": 74384
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1542565822601318,
      "learning_rate": 4.943619015710721e-08,
      "loss": 2.3051,
      "step": 74385
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1449135541915894,
      "learning_rate": 4.9395308345259185e-08,
      "loss": 2.1974,
      "step": 74386
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0102580785751343,
      "learning_rate": 4.93544434023907e-08,
      "loss": 2.1424,
      "step": 74387
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0021835565567017,
      "learning_rate": 4.931359532856728e-08,
      "loss": 2.2133,
      "step": 74388
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.117425560951233,
      "learning_rate": 4.9272764123861064e-08,
      "loss": 2.4289,
      "step": 74389
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0964534282684326,
      "learning_rate": 4.9231949788339785e-08,
      "loss": 2.3193,
      "step": 74390
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1292654275894165,
      "learning_rate": 4.919115232207339e-08,
      "loss": 2.3733,
      "step": 74391
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1196664571762085,
      "learning_rate": 4.9150371725129596e-08,
      "loss": 2.2695,
      "step": 74392
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.2233874797821045,
      "learning_rate": 4.9109607997579466e-08,
      "loss": 2.2724,
      "step": 74393
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0168964862823486,
      "learning_rate": 4.906886113949072e-08,
      "loss": 2.3071,
      "step": 74394
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.118506908416748,
      "learning_rate": 4.902813115093219e-08,
      "loss": 2.0717,
      "step": 74395
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.2052861452102661,
      "learning_rate": 4.898741803197493e-08,
      "loss": 2.3793,
      "step": 74396
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.2141814231872559,
      "learning_rate": 4.894672178268556e-08,
      "loss": 2.2997,
      "step": 74397
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0859824419021606,
      "learning_rate": 4.890604240313512e-08,
      "loss": 2.2823,
      "step": 74398
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.2712286710739136,
      "learning_rate": 4.886537989339135e-08,
      "loss": 2.3814,
      "step": 74399
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0549204349517822,
      "learning_rate": 4.882473425352308e-08,
      "loss": 2.1794,
      "step": 74400
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.140187382698059,
      "learning_rate": 4.878410548360024e-08,
      "loss": 2.3975,
      "step": 74401
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1046125888824463,
      "learning_rate": 4.874349358369057e-08,
      "loss": 2.2156,
      "step": 74402
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.4906588792800903,
      "learning_rate": 4.8702898553862896e-08,
      "loss": 2.3459,
      "step": 74403
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0304737091064453,
      "learning_rate": 4.866232039418717e-08,
      "loss": 2.4363,
      "step": 74404
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1128965616226196,
      "learning_rate": 4.86217591047311e-08,
      "loss": 2.2253,
      "step": 74405
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1225743293762207,
      "learning_rate": 4.858121468556465e-08,
      "loss": 2.2489,
      "step": 74406
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0210403203964233,
      "learning_rate": 4.854068713675553e-08,
      "loss": 2.3627,
      "step": 74407
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.9873902797698975,
      "learning_rate": 4.8500176458372575e-08,
      "loss": 2.0885,
      "step": 74408
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.3273870944976807,
      "learning_rate": 4.845968265048462e-08,
      "loss": 2.3793,
      "step": 74409
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.178415298461914,
      "learning_rate": 4.84192057131605e-08,
      "loss": 2.4649,
      "step": 74410
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.3280829191207886,
      "learning_rate": 4.837874564646905e-08,
      "loss": 2.4635,
      "step": 74411
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.205241084098816,
      "learning_rate": 4.833830245047799e-08,
      "loss": 2.4331,
      "step": 74412
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.124664545059204,
      "learning_rate": 4.829787612525505e-08,
      "loss": 2.4446,
      "step": 74413
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0428963899612427,
      "learning_rate": 4.825746667087239e-08,
      "loss": 2.17,
      "step": 74414
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.9737781882286072,
      "learning_rate": 4.82170740873944e-08,
      "loss": 2.2665,
      "step": 74415
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.2698206901550293,
      "learning_rate": 4.817669837489214e-08,
      "loss": 2.3225,
      "step": 74416
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0856292247772217,
      "learning_rate": 4.813633953343333e-08,
      "loss": 2.3659,
      "step": 74417
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.255433201789856,
      "learning_rate": 4.80959975630857e-08,
      "loss": 2.3369,
      "step": 74418
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.9566062688827515,
      "learning_rate": 4.805567246391807e-08,
      "loss": 2.1726,
      "step": 74419
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1291671991348267,
      "learning_rate": 4.801536423600039e-08,
      "loss": 2.0723,
      "step": 74420
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.074635624885559,
      "learning_rate": 4.7975072879398176e-08,
      "loss": 2.3209,
      "step": 74421
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1669834852218628,
      "learning_rate": 4.7934798394182466e-08,
      "loss": 2.1958,
      "step": 74422
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1357084512710571,
      "learning_rate": 4.7894540780418774e-08,
      "loss": 2.3209,
      "step": 74423
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0673260688781738,
      "learning_rate": 4.7854300038178147e-08,
      "loss": 2.2048,
      "step": 74424
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0774344205856323,
      "learning_rate": 4.781407616752609e-08,
      "loss": 2.3022,
      "step": 74425
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.101359486579895,
      "learning_rate": 4.777386916853366e-08,
      "loss": 2.2149,
      "step": 74426
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.9584101438522339,
      "learning_rate": 4.7733679041266356e-08,
      "loss": 2.3519,
      "step": 74427
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.079047441482544,
      "learning_rate": 4.769350578579413e-08,
      "loss": 2.3786,
      "step": 74428
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0580681562423706,
      "learning_rate": 4.765334940218358e-08,
      "loss": 2.4325,
      "step": 74429
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0311548709869385,
      "learning_rate": 4.7613209890504665e-08,
      "loss": 2.2918,
      "step": 74430
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.050755262374878,
      "learning_rate": 4.757308725082399e-08,
      "loss": 2.6314,
      "step": 74431
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1370035409927368,
      "learning_rate": 4.7532981483209284e-08,
      "loss": 2.2549,
      "step": 74432
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.097048282623291,
      "learning_rate": 4.749289258772938e-08,
      "loss": 2.3916,
      "step": 74433
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.079664707183838,
      "learning_rate": 4.7452820564453105e-08,
      "loss": 2.2594,
      "step": 74434
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1137521266937256,
      "learning_rate": 4.741276541344708e-08,
      "loss": 2.4282,
      "step": 74435
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0864061117172241,
      "learning_rate": 4.737272713477903e-08,
      "loss": 2.6435,
      "step": 74436
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0566017627716064,
      "learning_rate": 4.733270572851778e-08,
      "loss": 2.3644,
      "step": 74437
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.983618974685669,
      "learning_rate": 4.729270119472995e-08,
      "loss": 2.0339,
      "step": 74438
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0940359830856323,
      "learning_rate": 4.7252713533484376e-08,
      "loss": 2.1756,
      "step": 74439
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.157443642616272,
      "learning_rate": 4.721274274484877e-08,
      "loss": 2.2885,
      "step": 74440
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.2089359760284424,
      "learning_rate": 4.717278882889087e-08,
      "loss": 2.2826,
      "step": 74441
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0926506519317627,
      "learning_rate": 4.713285178567839e-08,
      "loss": 2.2773,
      "step": 74442
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.926425039768219,
      "learning_rate": 4.709293161527795e-08,
      "loss": 2.316,
      "step": 74443
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0805689096450806,
      "learning_rate": 4.7053028317758375e-08,
      "loss": 2.3149,
      "step": 74444
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.9574568271636963,
      "learning_rate": 4.701314189318851e-08,
      "loss": 2.1946,
      "step": 74445
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.9830176830291748,
      "learning_rate": 4.6973272341632734e-08,
      "loss": 2.2649,
      "step": 74446
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0729695558547974,
      "learning_rate": 4.693341966316212e-08,
      "loss": 2.2248,
      "step": 74447
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.079485535621643,
      "learning_rate": 4.689358385784104e-08,
      "loss": 2.2504,
      "step": 74448
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1094741821289062,
      "learning_rate": 4.6853764925739455e-08,
      "loss": 2.3345,
      "step": 74449
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.9679129719734192,
      "learning_rate": 4.681396286692397e-08,
      "loss": 2.4683,
      "step": 74450
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1253682374954224,
      "learning_rate": 4.6774177681462305e-08,
      "loss": 2.485,
      "step": 74451
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0781347751617432,
      "learning_rate": 4.6734409369421086e-08,
      "loss": 2.2155,
      "step": 74452
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0926512479782104,
      "learning_rate": 4.669465793086803e-08,
      "loss": 2.4309,
      "step": 74453
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.016035795211792,
      "learning_rate": 4.665492336587196e-08,
      "loss": 2.3038,
      "step": 74454
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1012061834335327,
      "learning_rate": 4.66152056744984e-08,
      "loss": 2.2142,
      "step": 74455
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.128197431564331,
      "learning_rate": 4.657550485681506e-08,
      "loss": 2.4496,
      "step": 74456
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0584664344787598,
      "learning_rate": 4.653582091288966e-08,
      "loss": 2.4942,
      "step": 74457
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0509403944015503,
      "learning_rate": 4.6496153842789935e-08,
      "loss": 2.1563,
      "step": 74458
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1042475700378418,
      "learning_rate": 4.6456503646581383e-08,
      "loss": 2.3198,
      "step": 74459
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0382393598556519,
      "learning_rate": 4.6416870324333954e-08,
      "loss": 2.2761,
      "step": 74460
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1231433153152466,
      "learning_rate": 4.637725387611203e-08,
      "loss": 2.2336,
      "step": 74461
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.9950655102729797,
      "learning_rate": 4.633765430198556e-08,
      "loss": 2.2043,
      "step": 74462
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.9861111640930176,
      "learning_rate": 4.6298071602018935e-08,
      "loss": 2.3476,
      "step": 74463
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.987402617931366,
      "learning_rate": 4.6258505776279883e-08,
      "loss": 2.3043,
      "step": 74464
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.104498028755188,
      "learning_rate": 4.6218956824837234e-08,
      "loss": 2.3032,
      "step": 74465
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.00114107131958,
      "learning_rate": 4.61794247477565e-08,
      "loss": 2.2587,
      "step": 74466
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.135231614112854,
      "learning_rate": 4.613990954510428e-08,
      "loss": 2.2302,
      "step": 74467
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0639793872833252,
      "learning_rate": 4.6100411216949415e-08,
      "loss": 2.1519,
      "step": 74468
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0719456672668457,
      "learning_rate": 4.6060929763357406e-08,
      "loss": 2.1887,
      "step": 74469
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0410361289978027,
      "learning_rate": 4.602146518439599e-08,
      "loss": 2.2702,
      "step": 74470
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1152993440628052,
      "learning_rate": 4.598201748013065e-08,
      "loss": 2.4085,
      "step": 74471
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1749364137649536,
      "learning_rate": 4.594258665063023e-08,
      "loss": 2.2757,
      "step": 74472
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1020395755767822,
      "learning_rate": 4.5903172695960227e-08,
      "loss": 2.1064,
      "step": 74473
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0561960935592651,
      "learning_rate": 4.5863775616188375e-08,
      "loss": 2.5416,
      "step": 74474
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0052005052566528,
      "learning_rate": 4.5824395411380176e-08,
      "loss": 2.3264,
      "step": 74475
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1018348932266235,
      "learning_rate": 4.578503208160334e-08,
      "loss": 2.3225,
      "step": 74476
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0962588787078857,
      "learning_rate": 4.5745685626924495e-08,
      "loss": 2.1953,
      "step": 74477
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.059390902519226,
      "learning_rate": 4.570635604741025e-08,
      "loss": 2.2918,
      "step": 74478
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.075597882270813,
      "learning_rate": 4.5667043343127215e-08,
      "loss": 2.4862,
      "step": 74479
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0482540130615234,
      "learning_rate": 4.5627747514143115e-08,
      "loss": 2.2252,
      "step": 74480
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1279252767562866,
      "learning_rate": 4.558846856052235e-08,
      "loss": 2.3646,
      "step": 74481
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1661889553070068,
      "learning_rate": 4.554920648233263e-08,
      "loss": 2.1874,
      "step": 74482
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.177810549736023,
      "learning_rate": 4.550996127964058e-08,
      "loss": 2.2607,
      "step": 74483
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0829411745071411,
      "learning_rate": 4.547073295251392e-08,
      "loss": 2.3598,
      "step": 74484
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1394970417022705,
      "learning_rate": 4.543152150101704e-08,
      "loss": 2.4079,
      "step": 74485
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.100942850112915,
      "learning_rate": 4.5392326925216555e-08,
      "loss": 2.5686,
      "step": 74486
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1025419235229492,
      "learning_rate": 4.53531492251813e-08,
      "loss": 2.3134,
      "step": 74487
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1420766115188599,
      "learning_rate": 4.531398840097567e-08,
      "loss": 2.138,
      "step": 74488
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1335352659225464,
      "learning_rate": 4.5274844452666275e-08,
      "loss": 2.1145,
      "step": 74489
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0649243593215942,
      "learning_rate": 4.5235717380319734e-08,
      "loss": 2.365,
      "step": 74490
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1412947177886963,
      "learning_rate": 4.519660718400265e-08,
      "loss": 2.3619,
      "step": 74491
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.9699026942253113,
      "learning_rate": 4.5157513863780536e-08,
      "loss": 2.2552,
      "step": 74492
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0556622743606567,
      "learning_rate": 4.511843741972e-08,
      "loss": 2.4254,
      "step": 74493
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0740187168121338,
      "learning_rate": 4.507937785188765e-08,
      "loss": 2.4944,
      "step": 74494
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0348912477493286,
      "learning_rate": 4.5040335160349004e-08,
      "loss": 2.2528,
      "step": 74495
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.065605640411377,
      "learning_rate": 4.500130934517066e-08,
      "loss": 2.5007,
      "step": 74496
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.9917078018188477,
      "learning_rate": 4.4962300406420357e-08,
      "loss": 2.1294,
      "step": 74497
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1141844987869263,
      "learning_rate": 4.492330834416136e-08,
      "loss": 2.5252,
      "step": 74498
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.9841437935829163,
      "learning_rate": 4.488433315846141e-08,
      "loss": 2.3361,
      "step": 74499
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.307984709739685,
      "learning_rate": 4.4845374849386e-08,
      "loss": 2.3819,
      "step": 74500
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0739223957061768,
      "learning_rate": 4.480643341700175e-08,
      "loss": 2.2807,
      "step": 74501
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.148423433303833,
      "learning_rate": 4.476750886137526e-08,
      "loss": 2.3175,
      "step": 74502
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.170284628868103,
      "learning_rate": 4.472860118257094e-08,
      "loss": 2.1292,
      "step": 74503
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0105762481689453,
      "learning_rate": 4.4689710380655394e-08,
      "loss": 2.4267,
      "step": 74504
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.9753671884536743,
      "learning_rate": 4.4650836455694125e-08,
      "loss": 2.1682,
      "step": 74505
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.088784098625183,
      "learning_rate": 4.4611979407753746e-08,
      "loss": 2.3313,
      "step": 74506
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1635546684265137,
      "learning_rate": 4.4573139236900874e-08,
      "loss": 2.3305,
      "step": 74507
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1320475339889526,
      "learning_rate": 4.453431594319879e-08,
      "loss": 2.2969,
      "step": 74508
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1206895112991333,
      "learning_rate": 4.449550952671633e-08,
      "loss": 2.1925,
      "step": 74509
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1700705289840698,
      "learning_rate": 4.4456719987516775e-08,
      "loss": 2.2733,
      "step": 74510
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.2116124629974365,
      "learning_rate": 4.4417947325667844e-08,
      "loss": 2.255,
      "step": 74511
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.9047796726226807,
      "learning_rate": 4.437919154123394e-08,
      "loss": 2.1835,
      "step": 74512
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1543813943862915,
      "learning_rate": 4.434045263428166e-08,
      "loss": 2.4052,
      "step": 74513
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1144336462020874,
      "learning_rate": 4.4301730604875415e-08,
      "loss": 2.4774,
      "step": 74514
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.2184029817581177,
      "learning_rate": 4.426302545308292e-08,
      "loss": 2.4084,
      "step": 74515
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.2202895879745483,
      "learning_rate": 4.422433717896746e-08,
      "loss": 2.4332,
      "step": 74516
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0123300552368164,
      "learning_rate": 4.418566578259564e-08,
      "loss": 2.2328,
      "step": 74517
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0820733308792114,
      "learning_rate": 4.414701126403409e-08,
      "loss": 2.3444,
      "step": 74518
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0789952278137207,
      "learning_rate": 4.410837362334608e-08,
      "loss": 2.4408,
      "step": 74519
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0584276914596558,
      "learning_rate": 4.4069752860599335e-08,
      "loss": 2.3161,
      "step": 74520
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0910418033599854,
      "learning_rate": 4.403114897585714e-08,
      "loss": 2.4539,
      "step": 74521
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0772771835327148,
      "learning_rate": 4.399256196918722e-08,
      "loss": 2.237,
      "step": 74522
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0605300664901733,
      "learning_rate": 4.395399184065397e-08,
      "loss": 2.4698,
      "step": 74523
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1404411792755127,
      "learning_rate": 4.391543859032288e-08,
      "loss": 2.3194,
      "step": 74524
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.073311686515808,
      "learning_rate": 4.387690221825835e-08,
      "loss": 2.4433,
      "step": 74525
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0576061010360718,
      "learning_rate": 4.3838382724527003e-08,
      "loss": 2.2632,
      "step": 74526
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.2404860258102417,
      "learning_rate": 4.3799880109193224e-08,
      "loss": 2.4894,
      "step": 74527
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.9972796440124512,
      "learning_rate": 4.3761394372323626e-08,
      "loss": 2.3974,
      "step": 74528
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0559968948364258,
      "learning_rate": 4.37229255139815e-08,
      "loss": 2.3445,
      "step": 74529
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.9465417265892029,
      "learning_rate": 4.368447353423344e-08,
      "loss": 2.1161,
      "step": 74530
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0791199207305908,
      "learning_rate": 4.3646038433144967e-08,
      "loss": 2.1839,
      "step": 74531
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.2599619626998901,
      "learning_rate": 4.360762021077936e-08,
      "loss": 2.4082,
      "step": 74532
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0011463165283203,
      "learning_rate": 4.3569218867203225e-08,
      "loss": 2.4364,
      "step": 74533
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1195106506347656,
      "learning_rate": 4.353083440248207e-08,
      "loss": 2.4027,
      "step": 74534
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0899745225906372,
      "learning_rate": 4.349246681668029e-08,
      "loss": 2.4375,
      "step": 74535
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0859086513519287,
      "learning_rate": 4.3454116109862275e-08,
      "loss": 2.2084,
      "step": 74536
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0929101705551147,
      "learning_rate": 4.341578228209464e-08,
      "loss": 2.4821,
      "step": 74537
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0793699026107788,
      "learning_rate": 4.337746533344067e-08,
      "loss": 2.2908,
      "step": 74538
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.9971398711204529,
      "learning_rate": 4.3339165263965865e-08,
      "loss": 2.3107,
      "step": 74539
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0645020008087158,
      "learning_rate": 4.330088207373573e-08,
      "loss": 2.3077,
      "step": 74540
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0630478858947754,
      "learning_rate": 4.3262615762815765e-08,
      "loss": 2.4996,
      "step": 74541
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.077209711074829,
      "learning_rate": 4.322436633126814e-08,
      "loss": 2.2398,
      "step": 74542
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0342992544174194,
      "learning_rate": 4.318613377916059e-08,
      "loss": 2.2789,
      "step": 74543
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.022756814956665,
      "learning_rate": 4.3147918106556385e-08,
      "loss": 2.2116,
      "step": 74544
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0785335302352905,
      "learning_rate": 4.310971931352104e-08,
      "loss": 2.3304,
      "step": 74545
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.2056313753128052,
      "learning_rate": 4.307153740011782e-08,
      "loss": 2.4213,
      "step": 74546
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.385254979133606,
      "learning_rate": 4.3033372366414474e-08,
      "loss": 2.5129,
      "step": 74547
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0555311441421509,
      "learning_rate": 4.299522421247204e-08,
      "loss": 2.2752,
      "step": 74548
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0789202451705933,
      "learning_rate": 4.295709293835826e-08,
      "loss": 2.3672,
      "step": 74549
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.200465202331543,
      "learning_rate": 4.291897854413529e-08,
      "loss": 2.4863,
      "step": 74550
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.094430923461914,
      "learning_rate": 4.288088102986976e-08,
      "loss": 2.2859,
      "step": 74551
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.076369285583496,
      "learning_rate": 4.284280039562605e-08,
      "loss": 2.3618,
      "step": 74552
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.9907716512680054,
      "learning_rate": 4.280473664146745e-08,
      "loss": 2.2473,
      "step": 74553
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1614625453948975,
      "learning_rate": 4.2766689767459455e-08,
      "loss": 2.4345,
      "step": 74554
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1037518978118896,
      "learning_rate": 4.272865977366536e-08,
      "loss": 2.4667,
      "step": 74555
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0519888401031494,
      "learning_rate": 4.269064666015177e-08,
      "loss": 2.3917,
      "step": 74556
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.9820995926856995,
      "learning_rate": 4.265265042698086e-08,
      "loss": 2.436,
      "step": 74557
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1370939016342163,
      "learning_rate": 4.261467107421813e-08,
      "loss": 2.0526,
      "step": 74558
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.2220379114151,
      "learning_rate": 4.2576708601929086e-08,
      "loss": 2.48,
      "step": 74559
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0653542280197144,
      "learning_rate": 4.25387630101759e-08,
      "loss": 2.5259,
      "step": 74560
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0529835224151611,
      "learning_rate": 4.2500834299024075e-08,
      "loss": 2.0978,
      "step": 74561
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0195063352584839,
      "learning_rate": 4.2462922468538005e-08,
      "loss": 2.368,
      "step": 74562
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0515263080596924,
      "learning_rate": 4.242502751878097e-08,
      "loss": 2.3046,
      "step": 74563
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1055964231491089,
      "learning_rate": 4.2387149449818473e-08,
      "loss": 2.3375,
      "step": 74564
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1013902425765991,
      "learning_rate": 4.234928826171491e-08,
      "loss": 2.427,
      "step": 74565
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.093496322631836,
      "learning_rate": 4.231144395453357e-08,
      "loss": 2.2411,
      "step": 74566
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.994363009929657,
      "learning_rate": 4.227361652833772e-08,
      "loss": 2.3766,
      "step": 74567
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0360664129257202,
      "learning_rate": 4.223580598319288e-08,
      "loss": 2.2538,
      "step": 74568
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.046975016593933,
      "learning_rate": 4.2198012319163426e-08,
      "loss": 2.232,
      "step": 74569
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1816518306732178,
      "learning_rate": 4.2160235536312655e-08,
      "loss": 2.3873,
      "step": 74570
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.100220799446106,
      "learning_rate": 4.212247563470495e-08,
      "loss": 2.217,
      "step": 74571
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.001908302307129,
      "learning_rate": 4.2084732614403603e-08,
      "loss": 2.4766,
      "step": 74572
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1354942321777344,
      "learning_rate": 4.2047006475474104e-08,
      "loss": 2.3017,
      "step": 74573
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.211289405822754,
      "learning_rate": 4.200929721797864e-08,
      "loss": 2.4262,
      "step": 74574
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0950859785079956,
      "learning_rate": 4.197160484198159e-08,
      "loss": 2.1917,
      "step": 74575
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1386005878448486,
      "learning_rate": 4.1933929347548475e-08,
      "loss": 2.4714,
      "step": 74576
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1442091464996338,
      "learning_rate": 4.189627073474145e-08,
      "loss": 2.3117,
      "step": 74577
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0467160940170288,
      "learning_rate": 4.185862900362492e-08,
      "loss": 2.2042,
      "step": 74578
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1409438848495483,
      "learning_rate": 4.182100415426216e-08,
      "loss": 2.3648,
      "step": 74579
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1478593349456787,
      "learning_rate": 4.178339618671756e-08,
      "loss": 2.4869,
      "step": 74580
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0474592447280884,
      "learning_rate": 4.1745805101054415e-08,
      "loss": 2.3879,
      "step": 74581
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.062240719795227,
      "learning_rate": 4.170823089733711e-08,
      "loss": 2.1679,
      "step": 74582
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1196985244750977,
      "learning_rate": 4.167067357562893e-08,
      "loss": 2.5038,
      "step": 74583
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.2020186185836792,
      "learning_rate": 4.163313313599426e-08,
      "loss": 2.3397,
      "step": 74584
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0992413759231567,
      "learning_rate": 4.159560957849529e-08,
      "loss": 2.3168,
      "step": 74585
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1476621627807617,
      "learning_rate": 4.1558102903196393e-08,
      "loss": 2.142,
      "step": 74586
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0586498975753784,
      "learning_rate": 4.152061311016198e-08,
      "loss": 2.2372,
      "step": 74587
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0116771459579468,
      "learning_rate": 4.148314019945421e-08,
      "loss": 2.336,
      "step": 74588
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0770195722579956,
      "learning_rate": 4.144568417113637e-08,
      "loss": 2.5276,
      "step": 74589
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0212913751602173,
      "learning_rate": 4.1408245025273966e-08,
      "loss": 2.291,
      "step": 74590
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.013398289680481,
      "learning_rate": 4.137082276192916e-08,
      "loss": 2.2002,
      "step": 74591
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1020957231521606,
      "learning_rate": 4.133341738116525e-08,
      "loss": 2.3049,
      "step": 74592
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0742485523223877,
      "learning_rate": 4.129602888304662e-08,
      "loss": 2.1248,
      "step": 74593
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1011059284210205,
      "learning_rate": 4.125865726763545e-08,
      "loss": 2.3699,
      "step": 74594
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.9549750685691833,
      "learning_rate": 4.122130253499501e-08,
      "loss": 2.2505,
      "step": 74595
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.981048583984375,
      "learning_rate": 4.1183964685190814e-08,
      "loss": 2.2376,
      "step": 74596
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1111565828323364,
      "learning_rate": 4.114664371828281e-08,
      "loss": 2.2539,
      "step": 74597
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0370564460754395,
      "learning_rate": 4.110933963433761e-08,
      "loss": 2.3107,
      "step": 74598
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0920517444610596,
      "learning_rate": 4.107205243341517e-08,
      "loss": 2.2152,
      "step": 74599
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.170255184173584,
      "learning_rate": 4.103478211558209e-08,
      "loss": 2.1714,
      "step": 74600
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0836204290390015,
      "learning_rate": 4.0997528680899455e-08,
      "loss": 2.2035,
      "step": 74601
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0075010061264038,
      "learning_rate": 4.0960292129429425e-08,
      "loss": 2.1986,
      "step": 74602
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0547767877578735,
      "learning_rate": 4.092307246123861e-08,
      "loss": 2.2498,
      "step": 74603
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.2279114723205566,
      "learning_rate": 4.088586967638586e-08,
      "loss": 2.2711,
      "step": 74604
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.065097689628601,
      "learning_rate": 4.084868377493778e-08,
      "loss": 2.3521,
      "step": 74605
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0812950134277344,
      "learning_rate": 4.081151475695544e-08,
      "loss": 2.336,
      "step": 74606
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0125545263290405,
      "learning_rate": 4.077436262250434e-08,
      "loss": 2.2346,
      "step": 74607
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.9938712120056152,
      "learning_rate": 4.0737227371643315e-08,
      "loss": 2.2467,
      "step": 74608
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.036942958831787,
      "learning_rate": 4.0700109004438994e-08,
      "loss": 2.3666,
      "step": 74609
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1906577348709106,
      "learning_rate": 4.066300752095131e-08,
      "loss": 2.4132,
      "step": 74610
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0252883434295654,
      "learning_rate": 4.0625922921246895e-08,
      "loss": 2.3038,
      "step": 74611
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.050525426864624,
      "learning_rate": 4.058885520538458e-08,
      "loss": 2.288,
      "step": 74612
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.032967448234558,
      "learning_rate": 4.055180437342987e-08,
      "loss": 2.2788,
      "step": 74613
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0223767757415771,
      "learning_rate": 4.051477042544494e-08,
      "loss": 2.353,
      "step": 74614
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1145167350769043,
      "learning_rate": 4.0477753361491954e-08,
      "loss": 2.2297,
      "step": 74615
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1139183044433594,
      "learning_rate": 4.044075318163421e-08,
      "loss": 2.3955,
      "step": 74616
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0085948705673218,
      "learning_rate": 4.040376988593497e-08,
      "loss": 2.102,
      "step": 74617
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0556391477584839,
      "learning_rate": 4.036680347445532e-08,
      "loss": 2.3143,
      "step": 74618
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0651782751083374,
      "learning_rate": 4.032985394725964e-08,
      "loss": 2.2727,
      "step": 74619
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0850228071212769,
      "learning_rate": 4.0292921304410096e-08,
      "loss": 2.3862,
      "step": 74620
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1433496475219727,
      "learning_rate": 4.025600554596887e-08,
      "loss": 2.2931,
      "step": 74621
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0646629333496094,
      "learning_rate": 4.0219106671999245e-08,
      "loss": 2.2281,
      "step": 74622
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0570712089538574,
      "learning_rate": 4.018222468256228e-08,
      "loss": 2.2799,
      "step": 74623
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0344761610031128,
      "learning_rate": 4.014535957772237e-08,
      "loss": 2.1983,
      "step": 74624
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1358290910720825,
      "learning_rate": 4.0108511357540566e-08,
      "loss": 2.1286,
      "step": 74625
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.060966968536377,
      "learning_rate": 4.0071680022079066e-08,
      "loss": 2.3008,
      "step": 74626
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0890270471572876,
      "learning_rate": 4.003486557140224e-08,
      "loss": 2.3438,
      "step": 74627
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1241010427474976,
      "learning_rate": 3.9998068005572266e-08,
      "loss": 2.3627,
      "step": 74628
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0485570430755615,
      "learning_rate": 3.99612873246491e-08,
      "loss": 2.1119,
      "step": 74629
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.7293050289154053,
      "learning_rate": 3.992452352869713e-08,
      "loss": 2.3027,
      "step": 74630
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0263679027557373,
      "learning_rate": 3.988777661777854e-08,
      "loss": 2.2221,
      "step": 74631
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1237983703613281,
      "learning_rate": 3.985104659195549e-08,
      "loss": 2.1045,
      "step": 74632
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1322849988937378,
      "learning_rate": 3.981433345128904e-08,
      "loss": 2.4445,
      "step": 74633
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0263049602508545,
      "learning_rate": 3.97776371958436e-08,
      "loss": 2.2595,
      "step": 74634
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.289543867111206,
      "learning_rate": 3.97409578256791e-08,
      "loss": 2.3567,
      "step": 74635
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.2778284549713135,
      "learning_rate": 3.9704295340859955e-08,
      "loss": 2.3665,
      "step": 74636
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.2200342416763306,
      "learning_rate": 3.96676497414461e-08,
      "loss": 2.3962,
      "step": 74637
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1597365140914917,
      "learning_rate": 3.963102102750194e-08,
      "loss": 2.1224,
      "step": 74638
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0062644481658936,
      "learning_rate": 3.9594409199087416e-08,
      "loss": 2.3131,
      "step": 74639
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0080000162124634,
      "learning_rate": 3.955781425626581e-08,
      "loss": 2.2486,
      "step": 74640
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.9721762537956238,
      "learning_rate": 3.952123619909931e-08,
      "loss": 2.3498,
      "step": 74641
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.053247332572937,
      "learning_rate": 3.948467502764897e-08,
      "loss": 2.3897,
      "step": 74642
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.9879451990127563,
      "learning_rate": 3.944813074197695e-08,
      "loss": 2.2307,
      "step": 74643
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0470222234725952,
      "learning_rate": 3.9411603342146556e-08,
      "loss": 2.2557,
      "step": 74644
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1947448253631592,
      "learning_rate": 3.937509282821883e-08,
      "loss": 2.3495,
      "step": 74645
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1310259103775024,
      "learning_rate": 3.933859920025485e-08,
      "loss": 2.2507,
      "step": 74646
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0522940158843994,
      "learning_rate": 3.9302122458317884e-08,
      "loss": 2.1963,
      "step": 74647
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0758939981460571,
      "learning_rate": 3.9265662602469e-08,
      "loss": 2.3828,
      "step": 74648
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.085842251777649,
      "learning_rate": 3.922921963277038e-08,
      "loss": 2.4077,
      "step": 74649
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.226736068725586,
      "learning_rate": 3.919279354928307e-08,
      "loss": 2.4922,
      "step": 74650
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.9859380125999451,
      "learning_rate": 3.915638435206925e-08,
      "loss": 2.3621,
      "step": 74651
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.02364182472229,
      "learning_rate": 3.91199920411911e-08,
      "loss": 2.2499,
      "step": 74652
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1602513790130615,
      "learning_rate": 3.908361661670967e-08,
      "loss": 2.43,
      "step": 74653
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1006091833114624,
      "learning_rate": 3.9047258078686034e-08,
      "loss": 2.418,
      "step": 74654
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1834336519241333,
      "learning_rate": 3.901091642718346e-08,
      "loss": 2.2596,
      "step": 74655
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.012580156326294,
      "learning_rate": 3.897459166226303e-08,
      "loss": 2.2533,
      "step": 74656
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0218058824539185,
      "learning_rate": 3.8938283783985785e-08,
      "loss": 2.3081,
      "step": 74657
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1111862659454346,
      "learning_rate": 3.890199279241391e-08,
      "loss": 2.309,
      "step": 74658
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0203789472579956,
      "learning_rate": 3.886571868760736e-08,
      "loss": 2.2191,
      "step": 74659
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.033202052116394,
      "learning_rate": 3.882946146963052e-08,
      "loss": 2.2998,
      "step": 74660
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.9898828268051147,
      "learning_rate": 3.8793221138541115e-08,
      "loss": 2.1519,
      "step": 74661
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0822035074234009,
      "learning_rate": 3.875699769440466e-08,
      "loss": 2.3253,
      "step": 74662
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.9866319894790649,
      "learning_rate": 3.872079113728e-08,
      "loss": 2.0909,
      "step": 74663
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0671323537826538,
      "learning_rate": 3.868460146722819e-08,
      "loss": 2.4538,
      "step": 74664
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.9804776310920715,
      "learning_rate": 3.86484286843114e-08,
      "loss": 2.3889,
      "step": 74665
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1183074712753296,
      "learning_rate": 3.861227278859181e-08,
      "loss": 2.1152,
      "step": 74666
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0922127962112427,
      "learning_rate": 3.857613378012937e-08,
      "loss": 2.5492,
      "step": 74667
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.2548606395721436,
      "learning_rate": 3.854001165898624e-08,
      "loss": 2.4838,
      "step": 74668
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0284249782562256,
      "learning_rate": 3.8503906425223505e-08,
      "loss": 2.2396,
      "step": 74669
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1861666440963745,
      "learning_rate": 3.84678180789011e-08,
      "loss": 2.504,
      "step": 74670
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1768693923950195,
      "learning_rate": 3.843174662008231e-08,
      "loss": 2.4575,
      "step": 74671
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.161866307258606,
      "learning_rate": 3.8395692048825986e-08,
      "loss": 2.3,
      "step": 74672
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1108647584915161,
      "learning_rate": 3.835965436519651e-08,
      "loss": 2.3101,
      "step": 74673
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0125616788864136,
      "learning_rate": 3.8323633569251614e-08,
      "loss": 2.2302,
      "step": 74674
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0527857542037964,
      "learning_rate": 3.8287629661053486e-08,
      "loss": 2.448,
      "step": 74675
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1556434631347656,
      "learning_rate": 3.825164264066428e-08,
      "loss": 2.1927,
      "step": 74676
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0024906396865845,
      "learning_rate": 3.821567250814395e-08,
      "loss": 2.2637,
      "step": 74677
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1200509071350098,
      "learning_rate": 3.8179719263552463e-08,
      "loss": 2.3663,
      "step": 74678
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0948505401611328,
      "learning_rate": 3.814378290695309e-08,
      "loss": 2.2181,
      "step": 74679
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1555949449539185,
      "learning_rate": 3.8107863438405776e-08,
      "loss": 2.3016,
      "step": 74680
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0988850593566895,
      "learning_rate": 3.807196085797049e-08,
      "loss": 2.2849,
      "step": 74681
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0656788349151611,
      "learning_rate": 3.8036075165709395e-08,
      "loss": 2.333,
      "step": 74682
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1078150272369385,
      "learning_rate": 3.800020636168245e-08,
      "loss": 2.4402,
      "step": 74683
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0009520053863525,
      "learning_rate": 3.79643544459507e-08,
      "loss": 2.4592,
      "step": 74684
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0547453165054321,
      "learning_rate": 3.7928519418575225e-08,
      "loss": 2.323,
      "step": 74685
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0421017408370972,
      "learning_rate": 3.7892701279617085e-08,
      "loss": 2.4905,
      "step": 74686
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.2043125629425049,
      "learning_rate": 3.785690002913622e-08,
      "loss": 2.3571,
      "step": 74687
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0187939405441284,
      "learning_rate": 3.78211156671926e-08,
      "loss": 2.1309,
      "step": 74688
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.2231411933898926,
      "learning_rate": 3.7785348193849494e-08,
      "loss": 2.3616,
      "step": 74689
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0516870021820068,
      "learning_rate": 3.774959760916463e-08,
      "loss": 2.2929,
      "step": 74690
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1505860090255737,
      "learning_rate": 3.77138639132002e-08,
      "loss": 2.4355,
      "step": 74691
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.3427242040634155,
      "learning_rate": 3.767814710601725e-08,
      "loss": 2.479,
      "step": 74692
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0118889808654785,
      "learning_rate": 3.764244718767462e-08,
      "loss": 2.1006,
      "step": 74693
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0923222303390503,
      "learning_rate": 3.76067641582345e-08,
      "loss": 2.3836,
      "step": 74694
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.9934177398681641,
      "learning_rate": 3.7571098017755714e-08,
      "loss": 2.1595,
      "step": 74695
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1697081327438354,
      "learning_rate": 3.753544876630044e-08,
      "loss": 2.5161,
      "step": 74696
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0289208889007568,
      "learning_rate": 3.7499816403927526e-08,
      "loss": 2.1537,
      "step": 74697
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.2175873517990112,
      "learning_rate": 3.746420093069914e-08,
      "loss": 2.2321,
      "step": 74698
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0030639171600342,
      "learning_rate": 3.742860234667412e-08,
      "loss": 2.1398,
      "step": 74699
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.155392050743103,
      "learning_rate": 3.7393020651914635e-08,
      "loss": 2.4092,
      "step": 74700
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0079538822174072,
      "learning_rate": 3.735745584647843e-08,
      "loss": 2.4468,
      "step": 74701
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0319504737854004,
      "learning_rate": 3.732190793042767e-08,
      "loss": 2.371,
      "step": 74702
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.9675900936126709,
      "learning_rate": 3.7286376903822306e-08,
      "loss": 2.0209,
      "step": 74703
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.9813932180404663,
      "learning_rate": 3.7250862766721184e-08,
      "loss": 2.1889,
      "step": 74704
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0713626146316528,
      "learning_rate": 3.7215365519186476e-08,
      "loss": 2.0741,
      "step": 74705
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0670987367630005,
      "learning_rate": 3.717988516127813e-08,
      "loss": 2.2895,
      "step": 74706
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0760434865951538,
      "learning_rate": 3.714442169305499e-08,
      "loss": 2.3128,
      "step": 74707
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0669816732406616,
      "learning_rate": 3.7108975114579226e-08,
      "loss": 2.4563,
      "step": 74708
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1288141012191772,
      "learning_rate": 3.7073545425907465e-08,
      "loss": 2.2264,
      "step": 74709
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.020828366279602,
      "learning_rate": 3.70381326271041e-08,
      "loss": 2.1325,
      "step": 74710
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.065171718597412,
      "learning_rate": 3.700273671822574e-08,
      "loss": 2.3581,
      "step": 74711
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1422052383422852,
      "learning_rate": 3.696735769933457e-08,
      "loss": 2.5207,
      "step": 74712
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1049022674560547,
      "learning_rate": 3.693199557048832e-08,
      "loss": 2.2326,
      "step": 74713
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.033536672592163,
      "learning_rate": 3.689665033174916e-08,
      "loss": 2.2616,
      "step": 74714
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.9985101819038391,
      "learning_rate": 3.686132198317593e-08,
      "loss": 2.3501,
      "step": 74715
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0256359577178955,
      "learning_rate": 3.6826010524828594e-08,
      "loss": 2.3201,
      "step": 74716
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1681146621704102,
      "learning_rate": 3.6790715956767085e-08,
      "loss": 2.1924,
      "step": 74717
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0298540592193604,
      "learning_rate": 3.675543827905137e-08,
      "loss": 2.4202,
      "step": 74718
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1836864948272705,
      "learning_rate": 3.672017749174139e-08,
      "loss": 2.285,
      "step": 74719
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0570580959320068,
      "learning_rate": 3.668493359489711e-08,
      "loss": 2.4539,
      "step": 74720
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0953432321548462,
      "learning_rate": 3.664970658857736e-08,
      "loss": 2.3461,
      "step": 74721
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.049026608467102,
      "learning_rate": 3.66144964728421e-08,
      "loss": 2.0554,
      "step": 74722
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.9929760098457336,
      "learning_rate": 3.6579303247751274e-08,
      "loss": 2.2094,
      "step": 74723
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1297435760498047,
      "learning_rate": 3.654412691336484e-08,
      "loss": 2.4322,
      "step": 74724
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0645440816879272,
      "learning_rate": 3.650896746974164e-08,
      "loss": 2.3422,
      "step": 74725
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1692270040512085,
      "learning_rate": 3.6473824916942735e-08,
      "loss": 2.253,
      "step": 74726
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.028043270111084,
      "learning_rate": 3.643869925502697e-08,
      "loss": 2.3692,
      "step": 74727
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0219535827636719,
      "learning_rate": 3.640359048405207e-08,
      "loss": 2.2087,
      "step": 74728
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0182095766067505,
      "learning_rate": 3.6368498604080206e-08,
      "loss": 2.4325,
      "step": 74729
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0627598762512207,
      "learning_rate": 3.633342361517023e-08,
      "loss": 2.2223,
      "step": 74730
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.9822095036506653,
      "learning_rate": 3.629836551738097e-08,
      "loss": 2.2154,
      "step": 74731
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1084650754928589,
      "learning_rate": 3.626332431077129e-08,
      "loss": 2.1979,
      "step": 74732
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.09881591796875,
      "learning_rate": 3.622829999540223e-08,
      "loss": 2.3181,
      "step": 74733
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.2650727033615112,
      "learning_rate": 3.619329257133153e-08,
      "loss": 2.2696,
      "step": 74734
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1415411233901978,
      "learning_rate": 3.615830203861914e-08,
      "loss": 2.5672,
      "step": 74735
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.046380877494812,
      "learning_rate": 3.612332839732502e-08,
      "loss": 2.3059,
      "step": 74736
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.2016066312789917,
      "learning_rate": 3.608837164750911e-08,
      "loss": 2.373,
      "step": 74737
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.9890868663787842,
      "learning_rate": 3.6053431789228044e-08,
      "loss": 2.1722,
      "step": 74738
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1220186948776245,
      "learning_rate": 3.601850882254398e-08,
      "loss": 2.0952,
      "step": 74739
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.2985752820968628,
      "learning_rate": 3.5983602747513556e-08,
      "loss": 2.3292,
      "step": 74740
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0308661460876465,
      "learning_rate": 3.5948713564197824e-08,
      "loss": 2.2242,
      "step": 74741
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0962485074996948,
      "learning_rate": 3.591384127265452e-08,
      "loss": 2.3238,
      "step": 74742
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0973149538040161,
      "learning_rate": 3.5878985872943584e-08,
      "loss": 2.3773,
      "step": 74743
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0588397979736328,
      "learning_rate": 3.5844147365124984e-08,
      "loss": 2.4336,
      "step": 74744
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0746943950653076,
      "learning_rate": 3.580932574925533e-08,
      "loss": 2.1704,
      "step": 74745
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.9379447102546692,
      "learning_rate": 3.5774521025396804e-08,
      "loss": 2.3455,
      "step": 74746
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.3120300769805908,
      "learning_rate": 3.573973319360491e-08,
      "loss": 2.3006,
      "step": 74747
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.013008713722229,
      "learning_rate": 3.570496225394182e-08,
      "loss": 2.2342,
      "step": 74748
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.019246220588684,
      "learning_rate": 3.567020820646416e-08,
      "loss": 2.3602,
      "step": 74749
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0600332021713257,
      "learning_rate": 3.5635471051231884e-08,
      "loss": 2.4969,
      "step": 74750
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0234116315841675,
      "learning_rate": 3.560075078830383e-08,
      "loss": 2.4832,
      "step": 74751
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0898690223693848,
      "learning_rate": 3.556604741773995e-08,
      "loss": 2.2832,
      "step": 74752
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.091762661933899,
      "learning_rate": 3.553136093959686e-08,
      "loss": 2.4963,
      "step": 74753
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.2811949253082275,
      "learning_rate": 3.549669135393452e-08,
      "loss": 2.4963,
      "step": 74754
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.071456789970398,
      "learning_rate": 3.546203866081066e-08,
      "loss": 2.1886,
      "step": 74755
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0078296661376953,
      "learning_rate": 3.542740286028634e-08,
      "loss": 2.3355,
      "step": 74756
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.2169229984283447,
      "learning_rate": 3.5392783952418184e-08,
      "loss": 2.3143,
      "step": 74757
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1006698608398438,
      "learning_rate": 3.535818193726614e-08,
      "loss": 2.1218,
      "step": 74758
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.092824101448059,
      "learning_rate": 3.532359681488795e-08,
      "loss": 2.5019,
      "step": 74759
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0297667980194092,
      "learning_rate": 3.5289028585342444e-08,
      "loss": 2.3451,
      "step": 74760
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0589221715927124,
      "learning_rate": 3.525447724868958e-08,
      "loss": 2.3454,
      "step": 74761
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1277116537094116,
      "learning_rate": 3.5219942804985975e-08,
      "loss": 2.3286,
      "step": 74762
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1016820669174194,
      "learning_rate": 3.5185425254290474e-08,
      "loss": 2.263,
      "step": 74763
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0802028179168701,
      "learning_rate": 3.515092459666303e-08,
      "loss": 2.4105,
      "step": 74764
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1060254573822021,
      "learning_rate": 3.5116440832160256e-08,
      "loss": 2.3845,
      "step": 74765
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0896646976470947,
      "learning_rate": 3.508197396084212e-08,
      "loss": 2.173,
      "step": 74766
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1298832893371582,
      "learning_rate": 3.504752398276634e-08,
      "loss": 2.542,
      "step": 74767
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0957103967666626,
      "learning_rate": 3.5013090897991766e-08,
      "loss": 2.3182,
      "step": 74768
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1745859384536743,
      "learning_rate": 3.497867470657612e-08,
      "loss": 2.3419,
      "step": 74769
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.035122275352478,
      "learning_rate": 3.494427540857826e-08,
      "loss": 2.3193,
      "step": 74770
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0498133897781372,
      "learning_rate": 3.4909893004055893e-08,
      "loss": 2.1591,
      "step": 74771
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.4422682523727417,
      "learning_rate": 3.487552749306788e-08,
      "loss": 2.1632,
      "step": 74772
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1003824472427368,
      "learning_rate": 3.4841178875671956e-08,
      "loss": 2.3816,
      "step": 74773
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0811437368392944,
      "learning_rate": 3.480684715192806e-08,
      "loss": 2.2918,
      "step": 74774
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1463183164596558,
      "learning_rate": 3.4772532321891703e-08,
      "loss": 2.5359,
      "step": 74775
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.9647504687309265,
      "learning_rate": 3.473823438562285e-08,
      "loss": 2.3675,
      "step": 74776
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.103493094444275,
      "learning_rate": 3.470395334317922e-08,
      "loss": 2.201,
      "step": 74777
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.005226492881775,
      "learning_rate": 3.466968919461966e-08,
      "loss": 2.1941,
      "step": 74778
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0386768579483032,
      "learning_rate": 3.463544194000079e-08,
      "loss": 2.2188,
      "step": 74779
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1048738956451416,
      "learning_rate": 3.460121157938145e-08,
      "loss": 2.1663,
      "step": 74780
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0975220203399658,
      "learning_rate": 3.4566998112820494e-08,
      "loss": 2.3222,
      "step": 74781
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1561392545700073,
      "learning_rate": 3.453280154037342e-08,
      "loss": 2.4051,
      "step": 74782
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0992804765701294,
      "learning_rate": 3.449862186210129e-08,
      "loss": 2.4388,
      "step": 74783
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1044727563858032,
      "learning_rate": 3.446445907805962e-08,
      "loss": 2.3883,
      "step": 74784
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0054816007614136,
      "learning_rate": 3.443031318830725e-08,
      "loss": 2.2235,
      "step": 74785
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0978806018829346,
      "learning_rate": 3.4396184192903024e-08,
      "loss": 2.4645,
      "step": 74786
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.048716425895691,
      "learning_rate": 3.436207209190356e-08,
      "loss": 2.3811,
      "step": 74787
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.155998706817627,
      "learning_rate": 3.43279768853666e-08,
      "loss": 2.4054,
      "step": 74788
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.9468234181404114,
      "learning_rate": 3.429389857334986e-08,
      "loss": 2.409,
      "step": 74789
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1286028623580933,
      "learning_rate": 3.425983715591219e-08,
      "loss": 2.5366,
      "step": 74790
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.022965908050537,
      "learning_rate": 3.422579263311132e-08,
      "loss": 2.2738,
      "step": 74791
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0824991464614868,
      "learning_rate": 3.4191765005003874e-08,
      "loss": 2.1918,
      "step": 74792
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.3083410263061523,
      "learning_rate": 3.415775427164869e-08,
      "loss": 2.4409,
      "step": 74793
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0735169649124146,
      "learning_rate": 3.412376043310239e-08,
      "loss": 2.6789,
      "step": 74794
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0613224506378174,
      "learning_rate": 3.4089783489422714e-08,
      "loss": 2.3201,
      "step": 74795
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0301564931869507,
      "learning_rate": 3.405582344066738e-08,
      "loss": 2.2208,
      "step": 74796
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.126636028289795,
      "learning_rate": 3.4021880286895234e-08,
      "loss": 2.3357,
      "step": 74797
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.9893749356269836,
      "learning_rate": 3.39879540281618e-08,
      "loss": 2.1662,
      "step": 74798
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.162206768989563,
      "learning_rate": 3.3954044664525896e-08,
      "loss": 2.2454,
      "step": 74799
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0317133665084839,
      "learning_rate": 3.3920152196044165e-08,
      "loss": 2.1812,
      "step": 74800
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0804975032806396,
      "learning_rate": 3.388627662277433e-08,
      "loss": 2.3436,
      "step": 74801
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.150437593460083,
      "learning_rate": 3.385241794477523e-08,
      "loss": 2.367,
      "step": 74802
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0497701168060303,
      "learning_rate": 3.381857616210127e-08,
      "loss": 2.3449,
      "step": 74803
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1538488864898682,
      "learning_rate": 3.378475127481351e-08,
      "loss": 2.2642,
      "step": 74804
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.2365385293960571,
      "learning_rate": 3.375094328296635e-08,
      "loss": 2.3681,
      "step": 74805
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1569613218307495,
      "learning_rate": 3.371715218661753e-08,
      "loss": 2.1239,
      "step": 74806
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1432511806488037,
      "learning_rate": 3.368337798582588e-08,
      "loss": 2.2627,
      "step": 74807
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.250025987625122,
      "learning_rate": 3.3649620680648034e-08,
      "loss": 2.1242,
      "step": 74808
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1470388174057007,
      "learning_rate": 3.36158802711406e-08,
      "loss": 2.2233,
      "step": 74809
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.2424638271331787,
      "learning_rate": 3.3582156757361314e-08,
      "loss": 2.3187,
      "step": 74810
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0603387355804443,
      "learning_rate": 3.3548450139365694e-08,
      "loss": 2.2776,
      "step": 74811
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.056581974029541,
      "learning_rate": 3.351476041721369e-08,
      "loss": 2.657,
      "step": 74812
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.118517279624939,
      "learning_rate": 3.3481087590960804e-08,
      "loss": 2.3814,
      "step": 74813
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.380950927734375,
      "learning_rate": 3.3447431660663666e-08,
      "loss": 2.4858,
      "step": 74814
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1179295778274536,
      "learning_rate": 3.341379262638111e-08,
      "loss": 2.3335,
      "step": 74815
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.091108798980713,
      "learning_rate": 3.338017048816866e-08,
      "loss": 2.5235,
      "step": 74816
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.115258812904358,
      "learning_rate": 3.334656524608293e-08,
      "loss": 2.5272,
      "step": 74817
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.117168664932251,
      "learning_rate": 3.331297690018276e-08,
      "loss": 2.2892,
      "step": 74818
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1357338428497314,
      "learning_rate": 3.327940545052366e-08,
      "loss": 2.7496,
      "step": 74819
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0050667524337769,
      "learning_rate": 3.324585089716226e-08,
      "loss": 2.2649,
      "step": 74820
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.9717680215835571,
      "learning_rate": 3.321231324015628e-08,
      "loss": 2.2672,
      "step": 74821
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1296120882034302,
      "learning_rate": 3.317879247956346e-08,
      "loss": 2.2832,
      "step": 74822
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.065348744392395,
      "learning_rate": 3.31452886154382e-08,
      "loss": 2.5518,
      "step": 74823
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1298155784606934,
      "learning_rate": 3.311180164783934e-08,
      "loss": 2.3023,
      "step": 74824
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.2259165048599243,
      "learning_rate": 3.3078331576822386e-08,
      "loss": 2.6484,
      "step": 74825
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0901333093643188,
      "learning_rate": 3.304487840244619e-08,
      "loss": 2.2451,
      "step": 74826
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.114542007446289,
      "learning_rate": 3.3011442124764034e-08,
      "loss": 1.9515,
      "step": 74827
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1722407341003418,
      "learning_rate": 3.297802274383588e-08,
      "loss": 2.5515,
      "step": 74828
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1019647121429443,
      "learning_rate": 3.2944620259716123e-08,
      "loss": 2.1835,
      "step": 74829
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.011144995689392,
      "learning_rate": 3.2911234672462485e-08,
      "loss": 2.1919,
      "step": 74830
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0566883087158203,
      "learning_rate": 3.287786598213161e-08,
      "loss": 2.2925,
      "step": 74831
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0306673049926758,
      "learning_rate": 3.2844514188778985e-08,
      "loss": 2.3507,
      "step": 74832
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.146193504333496,
      "learning_rate": 3.281117929246236e-08,
      "loss": 2.3515,
      "step": 74833
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1996811628341675,
      "learning_rate": 3.277786129323834e-08,
      "loss": 2.5237,
      "step": 74834
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.158877968788147,
      "learning_rate": 3.274456019116246e-08,
      "loss": 2.3688,
      "step": 74835
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0660459995269775,
      "learning_rate": 3.271127598629131e-08,
      "loss": 2.3633,
      "step": 74836
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0703190565109253,
      "learning_rate": 3.267800867868265e-08,
      "loss": 2.2438,
      "step": 74837
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.047237753868103,
      "learning_rate": 3.264475826839086e-08,
      "loss": 2.3698,
      "step": 74838
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0785309076309204,
      "learning_rate": 3.261152475547369e-08,
      "loss": 2.4112,
      "step": 74839
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0128519535064697,
      "learning_rate": 3.2578308139986635e-08,
      "loss": 2.2155,
      "step": 74840
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1337862014770508,
      "learning_rate": 3.254510842198633e-08,
      "loss": 2.3938,
      "step": 74841
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.116987943649292,
      "learning_rate": 3.251192560153049e-08,
      "loss": 2.3816,
      "step": 74842
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1995630264282227,
      "learning_rate": 3.2478759678672425e-08,
      "loss": 2.4667,
      "step": 74843
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.098490834236145,
      "learning_rate": 3.244561065347207e-08,
      "loss": 2.5127,
      "step": 74844
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0487860441207886,
      "learning_rate": 3.241247852598162e-08,
      "loss": 2.1546,
      "step": 74845
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1013840436935425,
      "learning_rate": 3.23793632962599e-08,
      "loss": 2.4514,
      "step": 74846
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1477919816970825,
      "learning_rate": 3.234626496436244e-08,
      "loss": 2.3544,
      "step": 74847
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0982345342636108,
      "learning_rate": 3.231318353034474e-08,
      "loss": 2.4416,
      "step": 74848
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.032166600227356,
      "learning_rate": 3.228011899426453e-08,
      "loss": 2.1496,
      "step": 74849
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.121832251548767,
      "learning_rate": 3.224707135617511e-08,
      "loss": 2.2332,
      "step": 74850
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.9614355564117432,
      "learning_rate": 3.221404061613531e-08,
      "loss": 2.5218,
      "step": 74851
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.2868889570236206,
      "learning_rate": 3.218102677419954e-08,
      "loss": 2.3092,
      "step": 74852
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0062215328216553,
      "learning_rate": 3.214802983042442e-08,
      "loss": 2.2172,
      "step": 74853
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0016087293624878,
      "learning_rate": 3.211504978486546e-08,
      "loss": 2.2459,
      "step": 74854
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.9648090600967407,
      "learning_rate": 3.208208663757817e-08,
      "loss": 2.084,
      "step": 74855
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1680046319961548,
      "learning_rate": 3.204914038861917e-08,
      "loss": 2.1934,
      "step": 74856
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1761890649795532,
      "learning_rate": 3.2016211038045085e-08,
      "loss": 2.3006,
      "step": 74857
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1003278493881226,
      "learning_rate": 3.198329858591032e-08,
      "loss": 2.2344,
      "step": 74858
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.215700626373291,
      "learning_rate": 3.195040303227148e-08,
      "loss": 2.2526,
      "step": 74859
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0773470401763916,
      "learning_rate": 3.1917524377182986e-08,
      "loss": 2.1884,
      "step": 74860
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0508453845977783,
      "learning_rate": 3.1884662620702556e-08,
      "loss": 2.1867,
      "step": 74861
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0551948547363281,
      "learning_rate": 3.1851817762883484e-08,
      "loss": 2.4737,
      "step": 74862
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1358628273010254,
      "learning_rate": 3.181898980378462e-08,
      "loss": 2.3833,
      "step": 74863
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1703977584838867,
      "learning_rate": 3.178617874345924e-08,
      "loss": 2.2705,
      "step": 74864
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1872236728668213,
      "learning_rate": 3.1753384581962864e-08,
      "loss": 2.2959,
      "step": 74865
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0373260974884033,
      "learning_rate": 3.1720607319353226e-08,
      "loss": 2.4257,
      "step": 74866
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.9958287477493286,
      "learning_rate": 3.168784695568361e-08,
      "loss": 2.2127,
      "step": 74867
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.2477434873580933,
      "learning_rate": 3.1655103491010644e-08,
      "loss": 2.3136,
      "step": 74868
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1277728080749512,
      "learning_rate": 3.162237692538872e-08,
      "loss": 2.5128,
      "step": 74869
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.019782304763794,
      "learning_rate": 3.158966725887558e-08,
      "loss": 2.4174,
      "step": 74870
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1145169734954834,
      "learning_rate": 3.1556974491524505e-08,
      "loss": 2.1635,
      "step": 74871
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0942087173461914,
      "learning_rate": 3.1524298623393234e-08,
      "loss": 2.3817,
      "step": 74872
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1147449016571045,
      "learning_rate": 3.1491639654533946e-08,
      "loss": 2.5101,
      "step": 74873
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1086325645446777,
      "learning_rate": 3.145899758500437e-08,
      "loss": 2.3779,
      "step": 74874
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0212140083312988,
      "learning_rate": 3.142637241485891e-08,
      "loss": 2.3492,
      "step": 74875
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.106379747390747,
      "learning_rate": 3.139376414415418e-08,
      "loss": 2.3504,
      "step": 74876
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.9826644659042358,
      "learning_rate": 3.1361172772943486e-08,
      "loss": 2.1009,
      "step": 74877
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0680218935012817,
      "learning_rate": 3.132859830128343e-08,
      "loss": 2.1387,
      "step": 74878
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0368207693099976,
      "learning_rate": 3.129604072922843e-08,
      "loss": 2.3916,
      "step": 74879
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.9829803109169006,
      "learning_rate": 3.1263500056833986e-08,
      "loss": 2.3439,
      "step": 74880
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0939732789993286,
      "learning_rate": 3.1230976284155614e-08,
      "loss": 2.3027,
      "step": 74881
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1810880899429321,
      "learning_rate": 3.119846941124882e-08,
      "loss": 2.2889,
      "step": 74882
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1733129024505615,
      "learning_rate": 3.116597943816801e-08,
      "loss": 2.3527,
      "step": 74883
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0155439376831055,
      "learning_rate": 3.113350636496759e-08,
      "loss": 2.4987,
      "step": 74884
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0751999616622925,
      "learning_rate": 3.1101050191704174e-08,
      "loss": 2.5663,
      "step": 74885
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.017063856124878,
      "learning_rate": 3.106861091843216e-08,
      "loss": 2.2449,
      "step": 74886
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1244642734527588,
      "learning_rate": 3.103618854520485e-08,
      "loss": 2.2703,
      "step": 74887
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0915815830230713,
      "learning_rate": 3.100378307207996e-08,
      "loss": 2.6412,
      "step": 74888
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0582528114318848,
      "learning_rate": 3.0971394499110796e-08,
      "loss": 2.4214,
      "step": 74889
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0894527435302734,
      "learning_rate": 3.093902282635397e-08,
      "loss": 2.3883,
      "step": 74890
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0762137174606323,
      "learning_rate": 3.090666805386167e-08,
      "loss": 2.2397,
      "step": 74891
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0124341249465942,
      "learning_rate": 3.08743301816905e-08,
      "loss": 2.3449,
      "step": 74892
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.2086549997329712,
      "learning_rate": 3.084200920989488e-08,
      "loss": 2.3126,
      "step": 74893
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0529801845550537,
      "learning_rate": 3.080970513852921e-08,
      "loss": 1.9789,
      "step": 74894
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1299052238464355,
      "learning_rate": 3.077741796764899e-08,
      "loss": 2.1995,
      "step": 74895
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0997153520584106,
      "learning_rate": 3.074514769730863e-08,
      "loss": 2.496,
      "step": 74896
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.2546876668930054,
      "learning_rate": 3.071289432756363e-08,
      "loss": 2.377,
      "step": 74897
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.11826491355896,
      "learning_rate": 3.06806578584673e-08,
      "loss": 2.2563,
      "step": 74898
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1898850202560425,
      "learning_rate": 3.064843829007513e-08,
      "loss": 2.3676,
      "step": 74899
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1495765447616577,
      "learning_rate": 3.061623562244043e-08,
      "loss": 2.2664,
      "step": 74900
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.2892725467681885,
      "learning_rate": 3.05840498556198e-08,
      "loss": 2.3252,
      "step": 74901
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.2273162603378296,
      "learning_rate": 3.055188098966655e-08,
      "loss": 2.2383,
      "step": 74902
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.079209327697754,
      "learning_rate": 3.051972902463618e-08,
      "loss": 2.2808,
      "step": 74903
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1334846019744873,
      "learning_rate": 3.048759396058198e-08,
      "loss": 2.3583,
      "step": 74904
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0234730243682861,
      "learning_rate": 3.045547579755837e-08,
      "loss": 2.2895,
      "step": 74905
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0055888891220093,
      "learning_rate": 3.042337453562083e-08,
      "loss": 2.2615,
      "step": 74906
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1149696111679077,
      "learning_rate": 3.0391290174823786e-08,
      "loss": 2.0984,
      "step": 74907
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1268703937530518,
      "learning_rate": 3.035922271522162e-08,
      "loss": 2.345,
      "step": 74908
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.114896535873413,
      "learning_rate": 3.032717215686764e-08,
      "loss": 2.2811,
      "step": 74909
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1266045570373535,
      "learning_rate": 3.029513849981736e-08,
      "loss": 2.1515,
      "step": 74910
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.012040138244629,
      "learning_rate": 3.026312174412405e-08,
      "loss": 2.1469,
      "step": 74911
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.3478885889053345,
      "learning_rate": 3.023112188984212e-08,
      "loss": 2.1771,
      "step": 74912
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0720055103302002,
      "learning_rate": 3.0199138937027083e-08,
      "loss": 2.2522,
      "step": 74913
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1351033449172974,
      "learning_rate": 3.016717288573112e-08,
      "loss": 2.3402,
      "step": 74914
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1153512001037598,
      "learning_rate": 3.0135223736010856e-08,
      "loss": 2.2293,
      "step": 74915
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.9774708151817322,
      "learning_rate": 3.0103291487918465e-08,
      "loss": 2.1301,
      "step": 74916
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.009298324584961,
      "learning_rate": 3.007137614150835e-08,
      "loss": 2.3278,
      "step": 74917
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.9765578508377075,
      "learning_rate": 3.003947769683602e-08,
      "loss": 2.1475,
      "step": 74918
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0325323343276978,
      "learning_rate": 3.0007596153953655e-08,
      "loss": 2.499,
      "step": 74919
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.017221450805664,
      "learning_rate": 2.997573151291677e-08,
      "loss": 2.4413,
      "step": 74920
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1793218851089478,
      "learning_rate": 2.994388377377866e-08,
      "loss": 2.4913,
      "step": 74921
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.023239254951477,
      "learning_rate": 2.991205293659372e-08,
      "loss": 2.27,
      "step": 74922
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1379395723342896,
      "learning_rate": 2.9880239001416345e-08,
      "loss": 2.2033,
      "step": 74923
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0229188203811646,
      "learning_rate": 2.9848441968298725e-08,
      "loss": 2.3002,
      "step": 74924
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1408616304397583,
      "learning_rate": 2.9816661837296366e-08,
      "loss": 2.0873,
      "step": 74925
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.2217286825180054,
      "learning_rate": 2.9784898608462563e-08,
      "loss": 2.4296,
      "step": 74926
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.255238652229309,
      "learning_rate": 2.9753152281850604e-08,
      "loss": 2.3102,
      "step": 74927
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0197397470474243,
      "learning_rate": 2.9721422857516003e-08,
      "loss": 2.2576,
      "step": 74928
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0457197427749634,
      "learning_rate": 2.9689710335510935e-08,
      "loss": 2.1728,
      "step": 74929
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1218150854110718,
      "learning_rate": 2.9658014715889805e-08,
      "loss": 2.3575,
      "step": 74930
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0056551694869995,
      "learning_rate": 2.96263359987059e-08,
      "loss": 2.3252,
      "step": 74931
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0348023176193237,
      "learning_rate": 2.9594674184013626e-08,
      "loss": 2.268,
      "step": 74932
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0157692432403564,
      "learning_rate": 2.9563029271866273e-08,
      "loss": 2.304,
      "step": 74933
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.21285879611969,
      "learning_rate": 2.9531401262317128e-08,
      "loss": 2.3155,
      "step": 74934
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.7770094871520996,
      "learning_rate": 2.9499790155420592e-08,
      "loss": 2.285,
      "step": 74935
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.2170110940933228,
      "learning_rate": 2.946819595122996e-08,
      "loss": 2.2881,
      "step": 74936
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0673424005508423,
      "learning_rate": 2.943661864979852e-08,
      "loss": 2.2478,
      "step": 74937
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.2343705892562866,
      "learning_rate": 2.9405058251179563e-08,
      "loss": 2.1463,
      "step": 74938
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0319995880126953,
      "learning_rate": 2.937351475542749e-08,
      "loss": 2.4061,
      "step": 74939
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1722455024719238,
      "learning_rate": 2.9341988162595593e-08,
      "loss": 2.4566,
      "step": 74940
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.062690258026123,
      "learning_rate": 2.931047847273605e-08,
      "loss": 2.3597,
      "step": 74941
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0673854351043701,
      "learning_rate": 2.927898568590437e-08,
      "loss": 2.1869,
      "step": 74942
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.059823751449585,
      "learning_rate": 2.924750980215163e-08,
      "loss": 2.414,
      "step": 74943
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0551141500473022,
      "learning_rate": 2.9216050821533336e-08,
      "loss": 2.3074,
      "step": 74944
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.06432044506073,
      "learning_rate": 2.9184608744101673e-08,
      "loss": 2.35,
      "step": 74945
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.9923461675643921,
      "learning_rate": 2.9153183569909927e-08,
      "loss": 2.2478,
      "step": 74946
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1757826805114746,
      "learning_rate": 2.912177529901139e-08,
      "loss": 2.2763,
      "step": 74947
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1182092428207397,
      "learning_rate": 2.9090383931460464e-08,
      "loss": 2.249,
      "step": 74948
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0293989181518555,
      "learning_rate": 2.905900946730933e-08,
      "loss": 2.3118,
      "step": 74949
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0923572778701782,
      "learning_rate": 2.9027651906610166e-08,
      "loss": 2.5058,
      "step": 74950
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1016474962234497,
      "learning_rate": 2.8996311249417375e-08,
      "loss": 2.4053,
      "step": 74951
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.9982686638832092,
      "learning_rate": 2.8964987495784247e-08,
      "loss": 2.3879,
      "step": 74952
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1960340738296509,
      "learning_rate": 2.8933680645764072e-08,
      "loss": 1.993,
      "step": 74953
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.028925895690918,
      "learning_rate": 2.8902390699409033e-08,
      "loss": 2.2798,
      "step": 74954
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6020411252975464,
      "learning_rate": 2.887111765677242e-08,
      "loss": 2.2949,
      "step": 74955
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.174807071685791,
      "learning_rate": 2.883986151790752e-08,
      "loss": 2.2732,
      "step": 74956
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.115034580230713,
      "learning_rate": 2.8808622282866516e-08,
      "loss": 2.2095,
      "step": 74957
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.034670352935791,
      "learning_rate": 2.877739995170381e-08,
      "loss": 2.085,
      "step": 74958
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.7458832263946533,
      "learning_rate": 2.874619452447158e-08,
      "loss": 2.3292,
      "step": 74959
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0284432172775269,
      "learning_rate": 2.8715006001222012e-08,
      "loss": 2.3594,
      "step": 74960
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.2487280368804932,
      "learning_rate": 2.86838343820095e-08,
      "loss": 2.2254,
      "step": 74961
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1233083009719849,
      "learning_rate": 2.865267966688512e-08,
      "loss": 2.5323,
      "step": 74962
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.075365662574768,
      "learning_rate": 2.862154185590438e-08,
      "loss": 2.2348,
      "step": 74963
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0487431287765503,
      "learning_rate": 2.8590420949116125e-08,
      "loss": 2.2476,
      "step": 74964
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0904518365859985,
      "learning_rate": 2.8559316946576986e-08,
      "loss": 2.283,
      "step": 74965
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0775243043899536,
      "learning_rate": 2.852822984833692e-08,
      "loss": 2.2126,
      "step": 74966
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1010993719100952,
      "learning_rate": 2.8497159654450323e-08,
      "loss": 2.2625,
      "step": 74967
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0938767194747925,
      "learning_rate": 2.846610636496938e-08,
      "loss": 2.3535,
      "step": 74968
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.210890769958496,
      "learning_rate": 2.8435069979946272e-08,
      "loss": 2.2183,
      "step": 74969
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1345363855361938,
      "learning_rate": 2.8404050499433178e-08,
      "loss": 2.3248,
      "step": 74970
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.9993183016777039,
      "learning_rate": 2.8373047923484497e-08,
      "loss": 2.1683,
      "step": 74971
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1767760515213013,
      "learning_rate": 2.8342062252151302e-08,
      "loss": 2.5124,
      "step": 74972
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0791593790054321,
      "learning_rate": 2.8311093485486884e-08,
      "loss": 2.3016,
      "step": 74973
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0292929410934448,
      "learning_rate": 2.828014162354342e-08,
      "loss": 2.0404,
      "step": 74974
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.9809314012527466,
      "learning_rate": 2.8249206666373098e-08,
      "loss": 2.0696,
      "step": 74975
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.98427414894104,
      "learning_rate": 2.82182886140292e-08,
      "loss": 2.1183,
      "step": 74976
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1890740394592285,
      "learning_rate": 2.818738746656391e-08,
      "loss": 2.2772,
      "step": 74977
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0277589559555054,
      "learning_rate": 2.81565032240283e-08,
      "loss": 2.3035,
      "step": 74978
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.135591745376587,
      "learning_rate": 2.8125635886476766e-08,
      "loss": 2.3209,
      "step": 74979
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0548373460769653,
      "learning_rate": 2.8094785453959273e-08,
      "loss": 2.3588,
      "step": 74980
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1762211322784424,
      "learning_rate": 2.806395192653022e-08,
      "loss": 2.3402,
      "step": 74981
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.9785916805267334,
      "learning_rate": 2.8033135304241788e-08,
      "loss": 2.4379,
      "step": 74982
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1527376174926758,
      "learning_rate": 2.8002335587143935e-08,
      "loss": 2.4091,
      "step": 74983
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0596550703048706,
      "learning_rate": 2.7971552775292178e-08,
      "loss": 2.3439,
      "step": 74984
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.989715576171875,
      "learning_rate": 2.794078686873536e-08,
      "loss": 2.1537,
      "step": 74985
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1058247089385986,
      "learning_rate": 2.7910037867528996e-08,
      "loss": 2.3035,
      "step": 74986
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.133614182472229,
      "learning_rate": 2.7879305771721933e-08,
      "loss": 2.5004,
      "step": 74987
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1255972385406494,
      "learning_rate": 2.7848590581368573e-08,
      "loss": 2.4292,
      "step": 74988
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0470235347747803,
      "learning_rate": 2.7817892296519987e-08,
      "loss": 2.6263,
      "step": 74989
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.3139487504959106,
      "learning_rate": 2.778721091722947e-08,
      "loss": 2.3913,
      "step": 74990
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0678614377975464,
      "learning_rate": 2.7756546443546972e-08,
      "loss": 2.4722,
      "step": 74991
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0515310764312744,
      "learning_rate": 2.7725898875525792e-08,
      "loss": 2.3209,
      "step": 74992
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0865201950073242,
      "learning_rate": 2.7695268213218108e-08,
      "loss": 2.1974,
      "step": 74993
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0367573499679565,
      "learning_rate": 2.766465445667499e-08,
      "loss": 2.2331,
      "step": 74994
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1257075071334839,
      "learning_rate": 2.7634057605948616e-08,
      "loss": 2.3887,
      "step": 74995
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0713999271392822,
      "learning_rate": 2.7603477661091172e-08,
      "loss": 2.1649,
      "step": 74996
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1567902565002441,
      "learning_rate": 2.7572914622153724e-08,
      "loss": 2.3804,
      "step": 74997
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1105952262878418,
      "learning_rate": 2.7542368489189563e-08,
      "loss": 2.5171,
      "step": 74998
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1388911008834839,
      "learning_rate": 2.7511839262248653e-08,
      "loss": 2.3883,
      "step": 74999
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0940300226211548,
      "learning_rate": 2.748132694138428e-08,
      "loss": 2.1061,
      "step": 75000
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.9985148906707764,
      "learning_rate": 2.7450831526647514e-08,
      "loss": 2.594,
      "step": 75001
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.10585618019104,
      "learning_rate": 2.742035301809054e-08,
      "loss": 2.2606,
      "step": 75002
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0983974933624268,
      "learning_rate": 2.7389891415763314e-08,
      "loss": 2.141,
      "step": 75003
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1020179986953735,
      "learning_rate": 2.735944671972024e-08,
      "loss": 2.2788,
      "step": 75004
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.2712836265563965,
      "learning_rate": 2.7329018930010166e-08,
      "loss": 2.0536,
      "step": 75005
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1834595203399658,
      "learning_rate": 2.729860804668638e-08,
      "loss": 2.3026,
      "step": 75006
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1577444076538086,
      "learning_rate": 2.726821406979996e-08,
      "loss": 2.3556,
      "step": 75007
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1778383255004883,
      "learning_rate": 2.7237836999403076e-08,
      "loss": 2.3836,
      "step": 75008
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.9994767308235168,
      "learning_rate": 2.7207476835545698e-08,
      "loss": 2.3638,
      "step": 75009
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.480207920074463,
      "learning_rate": 2.717713357828111e-08,
      "loss": 2.3058,
      "step": 75010
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1371625661849976,
      "learning_rate": 2.7146807227659278e-08,
      "loss": 2.2501,
      "step": 75011
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0617462396621704,
      "learning_rate": 2.7116497783732378e-08,
      "loss": 2.2466,
      "step": 75012
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0461198091506958,
      "learning_rate": 2.708620524655148e-08,
      "loss": 2.3152,
      "step": 75013
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0515433549880981,
      "learning_rate": 2.7055929616167654e-08,
      "loss": 2.2888,
      "step": 75014
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.2774794101715088,
      "learning_rate": 2.7025670892633082e-08,
      "loss": 2.3725,
      "step": 75015
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0492700338363647,
      "learning_rate": 2.6995429075998836e-08,
      "loss": 2.3489,
      "step": 75016
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1994476318359375,
      "learning_rate": 2.6965204166315984e-08,
      "loss": 2.1846,
      "step": 75017
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.036097764968872,
      "learning_rate": 2.6934996163635597e-08,
      "loss": 2.3073,
      "step": 75018
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0219926834106445,
      "learning_rate": 2.6904805068009853e-08,
      "loss": 2.3804,
      "step": 75019
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0691763162612915,
      "learning_rate": 2.6874630879487606e-08,
      "loss": 2.165,
      "step": 75020
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0737262964248657,
      "learning_rate": 2.6844473598122145e-08,
      "loss": 2.2721,
      "step": 75021
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.076585054397583,
      "learning_rate": 2.681433322396454e-08,
      "loss": 2.2791,
      "step": 75022
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0532360076904297,
      "learning_rate": 2.678420975706475e-08,
      "loss": 2.1746,
      "step": 75023
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0090845823287964,
      "learning_rate": 2.6754103197473846e-08,
      "loss": 2.3454,
      "step": 75024
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1613622903823853,
      "learning_rate": 2.672401354524512e-08,
      "loss": 2.2164,
      "step": 75025
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0579187870025635,
      "learning_rate": 2.669394080042631e-08,
      "loss": 2.4103,
      "step": 75026
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1371629238128662,
      "learning_rate": 2.6663884963070706e-08,
      "loss": 2.4369,
      "step": 75027
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1256439685821533,
      "learning_rate": 2.663384603322827e-08,
      "loss": 2.4041,
      "step": 75028
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1467220783233643,
      "learning_rate": 2.6603824010950076e-08,
      "loss": 2.5706,
      "step": 75029
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0425076484680176,
      "learning_rate": 2.6573818896287184e-08,
      "loss": 2.358,
      "step": 75030
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.106458306312561,
      "learning_rate": 2.654383068929067e-08,
      "loss": 2.5289,
      "step": 75031
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.9934728145599365,
      "learning_rate": 2.651385939001161e-08,
      "loss": 2.3088,
      "step": 75032
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1350070238113403,
      "learning_rate": 2.6483904998499955e-08,
      "loss": 2.357,
      "step": 75033
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.073560118675232,
      "learning_rate": 2.6453967514805667e-08,
      "loss": 2.335,
      "step": 75034
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.08810555934906,
      "learning_rate": 2.642404693898204e-08,
      "loss": 2.3314,
      "step": 75035
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0665313005447388,
      "learning_rate": 2.639414327107792e-08,
      "loss": 2.2176,
      "step": 75036
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0659133195877075,
      "learning_rate": 2.636425651114438e-08,
      "loss": 2.3215,
      "step": 75037
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.136527419090271,
      "learning_rate": 2.6334386659233603e-08,
      "loss": 2.3444,
      "step": 75038
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1214792728424072,
      "learning_rate": 2.630453371539332e-08,
      "loss": 2.2652,
      "step": 75039
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1234737634658813,
      "learning_rate": 2.627469767967572e-08,
      "loss": 2.1671,
      "step": 75040
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.072621464729309,
      "learning_rate": 2.624487855213187e-08,
      "loss": 2.4092,
      "step": 75041
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.2246898412704468,
      "learning_rate": 2.621507633281173e-08,
      "loss": 2.3356,
      "step": 75042
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1564630270004272,
      "learning_rate": 2.6185291021766367e-08,
      "loss": 2.3469,
      "step": 75043
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.3739553689956665,
      "learning_rate": 2.615552261904464e-08,
      "loss": 2.3326,
      "step": 75044
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0799379348754883,
      "learning_rate": 2.6125771124698716e-08,
      "loss": 2.203,
      "step": 75045
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0620698928833008,
      "learning_rate": 2.609603653877857e-08,
      "loss": 2.4331,
      "step": 75046
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0302265882492065,
      "learning_rate": 2.606631886133415e-08,
      "loss": 2.2258,
      "step": 75047
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0134868621826172,
      "learning_rate": 2.6036618092416533e-08,
      "loss": 2.0918,
      "step": 75048
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0872091054916382,
      "learning_rate": 2.6006934232075677e-08,
      "loss": 2.2182,
      "step": 75049
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0066415071487427,
      "learning_rate": 2.597726728036265e-08,
      "loss": 2.4192,
      "step": 75050
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1191099882125854,
      "learning_rate": 2.5947617237326306e-08,
      "loss": 2.2962,
      "step": 75051
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.3338669538497925,
      "learning_rate": 2.591798410301771e-08,
      "loss": 2.2693,
      "step": 75052
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1253018379211426,
      "learning_rate": 2.588836787748683e-08,
      "loss": 2.1806,
      "step": 75053
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1985266208648682,
      "learning_rate": 2.585876856078473e-08,
      "loss": 2.1685,
      "step": 75054
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.221243143081665,
      "learning_rate": 2.5829186152960263e-08,
      "loss": 2.4923,
      "step": 75055
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.083565592765808,
      "learning_rate": 2.5799620654064496e-08,
      "loss": 2.4309,
      "step": 75056
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0018677711486816,
      "learning_rate": 2.5770072064147388e-08,
      "loss": 2.3704,
      "step": 75057
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.9828217625617981,
      "learning_rate": 2.5740540383258904e-08,
      "loss": 2.4031,
      "step": 75058
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.9614041447639465,
      "learning_rate": 2.5711025611450115e-08,
      "loss": 2.3583,
      "step": 75059
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0509883165359497,
      "learning_rate": 2.5681527748768754e-08,
      "loss": 2.1623,
      "step": 75060
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.9805105328559875,
      "learning_rate": 2.5652046795267004e-08,
      "loss": 2.3203,
      "step": 75061
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1275653839111328,
      "learning_rate": 2.5622582750993718e-08,
      "loss": 2.2769,
      "step": 75062
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.9633618593215942,
      "learning_rate": 2.5593135615998854e-08,
      "loss": 2.2663,
      "step": 75063
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1372252702713013,
      "learning_rate": 2.5563705390333483e-08,
      "loss": 2.471,
      "step": 75064
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.202314853668213,
      "learning_rate": 2.553429207404534e-08,
      "loss": 2.2975,
      "step": 75065
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0461716651916504,
      "learning_rate": 2.5504895667186613e-08,
      "loss": 2.0982,
      "step": 75066
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0358963012695312,
      "learning_rate": 2.5475516169806148e-08,
      "loss": 2.3667,
      "step": 75067
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1067842245101929,
      "learning_rate": 2.5446153581953905e-08,
      "loss": 2.1229,
      "step": 75068
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1002129316329956,
      "learning_rate": 2.5416807903678732e-08,
      "loss": 2.2754,
      "step": 75069
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0804311037063599,
      "learning_rate": 2.53874791350317e-08,
      "loss": 2.1541,
      "step": 75070
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0080695152282715,
      "learning_rate": 2.5358167276061664e-08,
      "loss": 2.2454,
      "step": 75071
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.2299156188964844,
      "learning_rate": 2.5328872326818577e-08,
      "loss": 2.0974,
      "step": 75072
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.9455556869506836,
      "learning_rate": 2.52995942873524e-08,
      "loss": 2.2066,
      "step": 75073
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0781211853027344,
      "learning_rate": 2.527033315771199e-08,
      "loss": 2.2942,
      "step": 75074
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1123710870742798,
      "learning_rate": 2.524108893794841e-08,
      "loss": 2.2759,
      "step": 75075
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0977697372436523,
      "learning_rate": 2.52118616281094e-08,
      "loss": 2.4405,
      "step": 75076
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1620622873306274,
      "learning_rate": 2.5182651228246037e-08,
      "loss": 2.3942,
      "step": 75077
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1988369226455688,
      "learning_rate": 2.515345773840605e-08,
      "loss": 2.2874,
      "step": 75078
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1380141973495483,
      "learning_rate": 2.512428115864163e-08,
      "loss": 1.8792,
      "step": 75079
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0489574670791626,
      "learning_rate": 2.5095121488999396e-08,
      "loss": 2.3917,
      "step": 75080
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.984358549118042,
      "learning_rate": 2.5065978729531538e-08,
      "loss": 2.3781,
      "step": 75081
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0720385313034058,
      "learning_rate": 2.503685288028468e-08,
      "loss": 2.3039,
      "step": 75082
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0599573850631714,
      "learning_rate": 2.5007743941309894e-08,
      "loss": 2.2593,
      "step": 75083
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.089599370956421,
      "learning_rate": 2.4978651912656027e-08,
      "loss": 2.3357,
      "step": 75084
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.3060016632080078,
      "learning_rate": 2.4949576794373044e-08,
      "loss": 2.3771,
      "step": 75085
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1446815729141235,
      "learning_rate": 2.492051858650979e-08,
      "loss": 2.3211,
      "step": 75086
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.9961891174316406,
      "learning_rate": 2.4891477289115116e-08,
      "loss": 2.2376,
      "step": 75087
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1174204349517822,
      "learning_rate": 2.4862452902237876e-08,
      "loss": 2.3538,
      "step": 75088
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.9693281054496765,
      "learning_rate": 2.4833445425929135e-08,
      "loss": 2.2938,
      "step": 75089
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.014299750328064,
      "learning_rate": 2.4804454860235528e-08,
      "loss": 2.2328,
      "step": 75090
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.036193609237671,
      "learning_rate": 2.477548120520923e-08,
      "loss": 2.3732,
      "step": 75091
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1237174272537231,
      "learning_rate": 2.4746524460896872e-08,
      "loss": 2.189,
      "step": 75092
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0836212635040283,
      "learning_rate": 2.471758462734841e-08,
      "loss": 2.3266,
      "step": 75093
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0598753690719604,
      "learning_rate": 2.4688661704612703e-08,
      "loss": 2.177,
      "step": 75094
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.9867347478866577,
      "learning_rate": 2.4659755692739707e-08,
      "loss": 2.21,
      "step": 75095
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.095855474472046,
      "learning_rate": 2.4630866591777157e-08,
      "loss": 2.4657,
      "step": 75096
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1066433191299438,
      "learning_rate": 2.460199440177502e-08,
      "loss": 2.1128,
      "step": 75097
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.056479573249817,
      "learning_rate": 2.457313912278103e-08,
      "loss": 2.3659,
      "step": 75098
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1948187351226807,
      "learning_rate": 2.454430075484515e-08,
      "loss": 2.1481,
      "step": 75099
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0166665315628052,
      "learning_rate": 2.4515479298016233e-08,
      "loss": 2.3646,
      "step": 75100
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.173230767250061,
      "learning_rate": 2.448667475234312e-08,
      "loss": 2.3628,
      "step": 75101
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1779053211212158,
      "learning_rate": 2.4457887117873558e-08,
      "loss": 2.2897,
      "step": 75102
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.375321865081787,
      "learning_rate": 2.4429116394657502e-08,
      "loss": 2.1893,
      "step": 75103
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1512242555618286,
      "learning_rate": 2.4400362582743808e-08,
      "loss": 2.393,
      "step": 75104
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.2276064157485962,
      "learning_rate": 2.437162568218021e-08,
      "loss": 2.2515,
      "step": 75105
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.003841757774353,
      "learning_rate": 2.4342905693016673e-08,
      "loss": 2.2018,
      "step": 75106
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.9755300879478455,
      "learning_rate": 2.431420261530093e-08,
      "loss": 1.975,
      "step": 75107
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0553128719329834,
      "learning_rate": 2.4285516449082947e-08,
      "loss": 2.322,
      "step": 75108
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1051042079925537,
      "learning_rate": 2.425684719440935e-08,
      "loss": 2.0811,
      "step": 75109
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0997090339660645,
      "learning_rate": 2.42281948513301e-08,
      "loss": 2.3355,
      "step": 75110
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0218950510025024,
      "learning_rate": 2.4199559419892936e-08,
      "loss": 2.3778,
      "step": 75111
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.2606607675552368,
      "learning_rate": 2.417094090014671e-08,
      "loss": 2.3575,
      "step": 75112
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.083860993385315,
      "learning_rate": 2.414233929214027e-08,
      "loss": 2.4089,
      "step": 75113
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.2510184049606323,
      "learning_rate": 2.4113754595922467e-08,
      "loss": 2.1555,
      "step": 75114
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0998613834381104,
      "learning_rate": 2.408518681154104e-08,
      "loss": 2.2479,
      "step": 75115
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0374687910079956,
      "learning_rate": 2.4056635939044837e-08,
      "loss": 2.0434,
      "step": 75116
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.166633129119873,
      "learning_rate": 2.402810197848271e-08,
      "loss": 2.5037,
      "step": 75117
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.3445945978164673,
      "learning_rate": 2.3999584929901287e-08,
      "loss": 2.224,
      "step": 75118
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.989987850189209,
      "learning_rate": 2.397108479335053e-08,
      "loss": 2.3792,
      "step": 75119
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1753685474395752,
      "learning_rate": 2.3942601568878177e-08,
      "loss": 2.2256,
      "step": 75120
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0581858158111572,
      "learning_rate": 2.3914135256533078e-08,
      "loss": 2.5186,
      "step": 75121
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0710901021957397,
      "learning_rate": 2.3885685856361862e-08,
      "loss": 2.3146,
      "step": 75122
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0794528722763062,
      "learning_rate": 2.385725336841449e-08,
      "loss": 2.2379,
      "step": 75123
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0947046279907227,
      "learning_rate": 2.38288377927387e-08,
      "loss": 2.4236,
      "step": 75124
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0652302503585815,
      "learning_rate": 2.3800439129382237e-08,
      "loss": 2.3854,
      "step": 75125
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0963773727416992,
      "learning_rate": 2.377205737839394e-08,
      "loss": 2.3067,
      "step": 75126
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1621887683868408,
      "learning_rate": 2.3743692539820452e-08,
      "loss": 2.166,
      "step": 75127
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1238213777542114,
      "learning_rate": 2.371534461371172e-08,
      "loss": 2.2618,
      "step": 75128
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.9798485040664673,
      "learning_rate": 2.3687013600115494e-08,
      "loss": 2.3364,
      "step": 75129
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.086035132408142,
      "learning_rate": 2.3658699499078397e-08,
      "loss": 2.1966,
      "step": 75130
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.965747594833374,
      "learning_rate": 2.363040231064928e-08,
      "loss": 2.2281,
      "step": 75131
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1538816690444946,
      "learning_rate": 2.3602122034876996e-08,
      "loss": 2.2694,
      "step": 75132
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.049432396888733,
      "learning_rate": 2.357385867180817e-08,
      "loss": 2.2037,
      "step": 75133
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1867945194244385,
      "learning_rate": 2.3545612221491654e-08,
      "loss": 2.5613,
      "step": 75134
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0887089967727661,
      "learning_rate": 2.3517382683974076e-08,
      "loss": 2.383,
      "step": 75135
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0483170747756958,
      "learning_rate": 2.34891700593054e-08,
      "loss": 2.3319,
      "step": 75136
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1231942176818848,
      "learning_rate": 2.346097434753114e-08,
      "loss": 2.4329,
      "step": 75137
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0030580759048462,
      "learning_rate": 2.3432795548700148e-08,
      "loss": 2.3194,
      "step": 75138
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0496078729629517,
      "learning_rate": 2.3404633662860167e-08,
      "loss": 2.2467,
      "step": 75139
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.280850887298584,
      "learning_rate": 2.337648869005893e-08,
      "loss": 2.3521,
      "step": 75140
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0904701948165894,
      "learning_rate": 2.334836063034418e-08,
      "loss": 2.2432,
      "step": 75141
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0718976259231567,
      "learning_rate": 2.3320249483763657e-08,
      "loss": 2.4795,
      "step": 75142
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1187975406646729,
      "learning_rate": 2.32921552503651e-08,
      "loss": 2.159,
      "step": 75143
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.290679693222046,
      "learning_rate": 2.326407793019625e-08,
      "loss": 2.3591,
      "step": 75144
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.206058144569397,
      "learning_rate": 2.3236017523303734e-08,
      "loss": 2.2438,
      "step": 75145
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.2007403373718262,
      "learning_rate": 2.3207974029736402e-08,
      "loss": 2.2169,
      "step": 75146
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0649148225784302,
      "learning_rate": 2.3179947449541994e-08,
      "loss": 2.2342,
      "step": 75147
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.033997893333435,
      "learning_rate": 2.315193778276603e-08,
      "loss": 2.5144,
      "step": 75148
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1055935621261597,
      "learning_rate": 2.3123945029457362e-08,
      "loss": 2.3978,
      "step": 75149
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.086683988571167,
      "learning_rate": 2.3095969189662615e-08,
      "loss": 2.2332,
      "step": 75150
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0937200784683228,
      "learning_rate": 2.306801026343064e-08,
      "loss": 2.2976,
      "step": 75151
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0976771116256714,
      "learning_rate": 2.304006825080807e-08,
      "loss": 2.3628,
      "step": 75152
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1260857582092285,
      "learning_rate": 2.3012143151842637e-08,
      "loss": 2.2464,
      "step": 75153
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.216829538345337,
      "learning_rate": 2.2984234966580975e-08,
      "loss": 2.2496,
      "step": 75154
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0897873640060425,
      "learning_rate": 2.2956343695070827e-08,
      "loss": 2.196,
      "step": 75155
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1685924530029297,
      "learning_rate": 2.2928469337358816e-08,
      "loss": 2.2439,
      "step": 75156
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.9907302856445312,
      "learning_rate": 2.2900611893493796e-08,
      "loss": 2.2435,
      "step": 75157
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0309369564056396,
      "learning_rate": 2.2872771363521284e-08,
      "loss": 2.3872,
      "step": 75158
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.07236909866333,
      "learning_rate": 2.284494774748902e-08,
      "loss": 2.2809,
      "step": 75159
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0552247762680054,
      "learning_rate": 2.2817141045444747e-08,
      "loss": 2.3239,
      "step": 75160
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1732730865478516,
      "learning_rate": 2.2789351257436198e-08,
      "loss": 2.2415,
      "step": 75161
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1125119924545288,
      "learning_rate": 2.276157838350779e-08,
      "loss": 2.3453,
      "step": 75162
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0759224891662598,
      "learning_rate": 2.2733822423709475e-08,
      "loss": 2.3953,
      "step": 75163
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.189910650253296,
      "learning_rate": 2.2706083378085665e-08,
      "loss": 2.4331,
      "step": 75164
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.015270709991455,
      "learning_rate": 2.267836124668632e-08,
      "loss": 2.1634,
      "step": 75165
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0823696851730347,
      "learning_rate": 2.265065602955585e-08,
      "loss": 2.1879,
      "step": 75166
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0246938467025757,
      "learning_rate": 2.2622967726743105e-08,
      "loss": 2.4342,
      "step": 75167
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.9834064245223999,
      "learning_rate": 2.2595296338293603e-08,
      "loss": 2.4127,
      "step": 75168
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0675959587097168,
      "learning_rate": 2.2567641864255085e-08,
      "loss": 2.2477,
      "step": 75169
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.225826621055603,
      "learning_rate": 2.254000430467529e-08,
      "loss": 2.1345,
      "step": 75170
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.209888219833374,
      "learning_rate": 2.2512383659598624e-08,
      "loss": 2.2316,
      "step": 75171
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.2493574619293213,
      "learning_rate": 2.248477992907394e-08,
      "loss": 2.3791,
      "step": 75172
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1400706768035889,
      "learning_rate": 2.2457193113147868e-08,
      "loss": 1.9832,
      "step": 75173
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0071617364883423,
      "learning_rate": 2.2429623211865924e-08,
      "loss": 2.4536,
      "step": 75174
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0012699365615845,
      "learning_rate": 2.240207022527585e-08,
      "loss": 2.1805,
      "step": 75175
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.10201895236969,
      "learning_rate": 2.2374534153424276e-08,
      "loss": 2.0373,
      "step": 75176
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1332213878631592,
      "learning_rate": 2.234701499635783e-08,
      "loss": 2.2758,
      "step": 75177
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1686644554138184,
      "learning_rate": 2.231951275412314e-08,
      "loss": 2.2021,
      "step": 75178
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0253175497055054,
      "learning_rate": 2.229202742676573e-08,
      "loss": 2.4078,
      "step": 75179
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0626533031463623,
      "learning_rate": 2.226455901433444e-08,
      "loss": 2.2008,
      "step": 75180
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0888702869415283,
      "learning_rate": 2.22371075168748e-08,
      "loss": 2.1577,
      "step": 75181
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0436017513275146,
      "learning_rate": 2.2209672934432324e-08,
      "loss": 2.313,
      "step": 75182
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1047816276550293,
      "learning_rate": 2.2182255267054753e-08,
      "loss": 2.2268,
      "step": 75183
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0155993700027466,
      "learning_rate": 2.2154854514787604e-08,
      "loss": 2.06,
      "step": 75184
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.9879624247550964,
      "learning_rate": 2.212747067767862e-08,
      "loss": 2.3524,
      "step": 75185
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1492486000061035,
      "learning_rate": 2.2100103755773316e-08,
      "loss": 2.4552,
      "step": 75186
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0885334014892578,
      "learning_rate": 2.2072753749119435e-08,
      "loss": 2.2392,
      "step": 75187
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1772021055221558,
      "learning_rate": 2.2045420657760273e-08,
      "loss": 2.2985,
      "step": 75188
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.9887896180152893,
      "learning_rate": 2.2018104481745796e-08,
      "loss": 2.5048,
      "step": 75189
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.2715387344360352,
      "learning_rate": 2.1990805221119293e-08,
      "loss": 2.168,
      "step": 75190
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0955804586410522,
      "learning_rate": 2.1963522875929622e-08,
      "loss": 2.4171,
      "step": 75191
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0382903814315796,
      "learning_rate": 2.1936257446220078e-08,
      "loss": 2.309,
      "step": 75192
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.047522783279419,
      "learning_rate": 2.190900893203951e-08,
      "loss": 2.3034,
      "step": 75193
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0777974128723145,
      "learning_rate": 2.188177733343344e-08,
      "loss": 1.9885,
      "step": 75194
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1113073825836182,
      "learning_rate": 2.1854562650447385e-08,
      "loss": 2.5549,
      "step": 75195
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.035872220993042,
      "learning_rate": 2.1827364883127977e-08,
      "loss": 2.426,
      "step": 75196
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1486809253692627,
      "learning_rate": 2.1800184031521844e-08,
      "loss": 2.3125,
      "step": 75197
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.119098424911499,
      "learning_rate": 2.1773020095673393e-08,
      "loss": 2.3796,
      "step": 75198
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1693204641342163,
      "learning_rate": 2.1745873075630365e-08,
      "loss": 2.3454,
      "step": 75199
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.2702192068099976,
      "learning_rate": 2.171874297143828e-08,
      "loss": 2.2316,
      "step": 75200
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0560927391052246,
      "learning_rate": 2.1691629783142652e-08,
      "loss": 2.2314,
      "step": 75201
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0992050170898438,
      "learning_rate": 2.166453351079012e-08,
      "loss": 2.3553,
      "step": 75202
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.9927327632904053,
      "learning_rate": 2.1637454154426196e-08,
      "loss": 2.2462,
      "step": 75203
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.9998220801353455,
      "learning_rate": 2.1610391714096402e-08,
      "loss": 2.1124,
      "step": 75204
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1483638286590576,
      "learning_rate": 2.1583346189847364e-08,
      "loss": 2.1048,
      "step": 75205
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0199775695800781,
      "learning_rate": 2.1556317581724606e-08,
      "loss": 2.3947,
      "step": 75206
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1351014375686646,
      "learning_rate": 2.1529305889773643e-08,
      "loss": 2.3821,
      "step": 75207
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1949570178985596,
      "learning_rate": 2.1502311114041107e-08,
      "loss": 2.2659,
      "step": 75208
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1921982765197754,
      "learning_rate": 2.1475333254572515e-08,
      "loss": 2.2492,
      "step": 75209
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0654250383377075,
      "learning_rate": 2.144837231141228e-08,
      "loss": 2.2428,
      "step": 75210
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.904778003692627,
      "learning_rate": 2.1421428284608137e-08,
      "loss": 2.1503,
      "step": 75211
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0854194164276123,
      "learning_rate": 2.1394501174204496e-08,
      "loss": 2.2013,
      "step": 75212
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.091162919998169,
      "learning_rate": 2.1367590980247988e-08,
      "loss": 2.4424,
      "step": 75213
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0410196781158447,
      "learning_rate": 2.134069770278302e-08,
      "loss": 2.401,
      "step": 75214
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0463653802871704,
      "learning_rate": 2.131382134185622e-08,
      "loss": 2.2347,
      "step": 75215
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.020194411277771,
      "learning_rate": 2.1286961897512003e-08,
      "loss": 2.3269,
      "step": 75216
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.048453688621521,
      "learning_rate": 2.1260119369795885e-08,
      "loss": 2.5299,
      "step": 75217
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0200600624084473,
      "learning_rate": 2.1233293758755603e-08,
      "loss": 2.3547,
      "step": 75218
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1228539943695068,
      "learning_rate": 2.1206485064434455e-08,
      "loss": 2.3738,
      "step": 75219
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1439143419265747,
      "learning_rate": 2.1179693286877966e-08,
      "loss": 2.142,
      "step": 75220
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1489582061767578,
      "learning_rate": 2.115291842613276e-08,
      "loss": 2.2421,
      "step": 75221
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.9458716511726379,
      "learning_rate": 2.112616048224325e-08,
      "loss": 2.2927,
      "step": 75222
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0802899599075317,
      "learning_rate": 2.1099419455256066e-08,
      "loss": 2.2582,
      "step": 75223
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0538023710250854,
      "learning_rate": 2.10726953452145e-08,
      "loss": 2.3598,
      "step": 75224
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.169715404510498,
      "learning_rate": 2.1045988152165186e-08,
      "loss": 2.3827,
      "step": 75225
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1712758541107178,
      "learning_rate": 2.1019297876153642e-08,
      "loss": 2.1239,
      "step": 75226
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.9810081720352173,
      "learning_rate": 2.0992624517224282e-08,
      "loss": 2.2663,
      "step": 75227
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1066782474517822,
      "learning_rate": 2.0965968075422616e-08,
      "loss": 2.4715,
      "step": 75228
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0750384330749512,
      "learning_rate": 2.093932855079417e-08,
      "loss": 2.4351,
      "step": 75229
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1248948574066162,
      "learning_rate": 2.091270594338335e-08,
      "loss": 2.3697,
      "step": 75230
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0718555450439453,
      "learning_rate": 2.0886100253235675e-08,
      "loss": 2.1785,
      "step": 75231
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.9837300181388855,
      "learning_rate": 2.0859511480396667e-08,
      "loss": 2.2867,
      "step": 75232
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.068469524383545,
      "learning_rate": 2.083293962491184e-08,
      "loss": 2.3012,
      "step": 75233
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.2666893005371094,
      "learning_rate": 2.0806384686824498e-08,
      "loss": 2.2868,
      "step": 75234
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.9865599274635315,
      "learning_rate": 2.077984666618127e-08,
      "loss": 2.4159,
      "step": 75235
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0794310569763184,
      "learning_rate": 2.0753325563025452e-08,
      "loss": 2.1616,
      "step": 75236
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1282821893692017,
      "learning_rate": 2.0726821377403673e-08,
      "loss": 2.4081,
      "step": 75237
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.984245240688324,
      "learning_rate": 2.0700334109361453e-08,
      "loss": 2.3313,
      "step": 75238
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1873222589492798,
      "learning_rate": 2.067386375894098e-08,
      "loss": 2.2902,
      "step": 75239
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0735546350479126,
      "learning_rate": 2.064741032618889e-08,
      "loss": 2.3155,
      "step": 75240
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1167128086090088,
      "learning_rate": 2.062097381115069e-08,
      "loss": 2.067,
      "step": 75241
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.079372763633728,
      "learning_rate": 2.0594554213869688e-08,
      "loss": 2.3528,
      "step": 75242
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0762008428573608,
      "learning_rate": 2.05681515343914e-08,
      "loss": 2.4155,
      "step": 75243
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0158683061599731,
      "learning_rate": 2.0541765772760235e-08,
      "loss": 2.2899,
      "step": 75244
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.375434398651123,
      "learning_rate": 2.0515396929020604e-08,
      "loss": 2.1296,
      "step": 75245
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.081997275352478,
      "learning_rate": 2.0489045003219132e-08,
      "loss": 2.1934,
      "step": 75246
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1165621280670166,
      "learning_rate": 2.046270999539801e-08,
      "loss": 2.5829,
      "step": 75247
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.079701542854309,
      "learning_rate": 2.043639190560387e-08,
      "loss": 2.2671,
      "step": 75248
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0557082891464233,
      "learning_rate": 2.0410090733880007e-08,
      "loss": 2.2981,
      "step": 75249
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.2090911865234375,
      "learning_rate": 2.0383806480270828e-08,
      "loss": 2.2369,
      "step": 75250
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0244581699371338,
      "learning_rate": 2.0357539144822968e-08,
      "loss": 2.1836,
      "step": 75251
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.9597806930541992,
      "learning_rate": 2.0331288727577503e-08,
      "loss": 2.1778,
      "step": 75252
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1097004413604736,
      "learning_rate": 2.0305055228583282e-08,
      "loss": 2.2662,
      "step": 75253
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.992676854133606,
      "learning_rate": 2.0278838647881383e-08,
      "loss": 2.4041,
      "step": 75254
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.010408639907837,
      "learning_rate": 2.0252638985517325e-08,
      "loss": 2.4087,
      "step": 75255
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1699857711791992,
      "learning_rate": 2.0226456241534408e-08,
      "loss": 2.2772,
      "step": 75256
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0990287065505981,
      "learning_rate": 2.0200290415979262e-08,
      "loss": 2.4095,
      "step": 75257
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1130748987197876,
      "learning_rate": 2.0174141508895186e-08,
      "loss": 2.4742,
      "step": 75258
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.017448902130127,
      "learning_rate": 2.0148009520326585e-08,
      "loss": 2.2232,
      "step": 75259
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.043068528175354,
      "learning_rate": 2.012189445031676e-08,
      "loss": 2.2326,
      "step": 75260
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1115223169326782,
      "learning_rate": 2.009579629891123e-08,
      "loss": 2.3481,
      "step": 75261
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1852264404296875,
      "learning_rate": 2.0069715066153295e-08,
      "loss": 2.0783,
      "step": 75262
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0272653102874756,
      "learning_rate": 2.0043650752088473e-08,
      "loss": 2.2514,
      "step": 75263
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0652261972427368,
      "learning_rate": 2.0017603356760064e-08,
      "loss": 2.1059,
      "step": 75264
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1383134126663208,
      "learning_rate": 1.9991572880212472e-08,
      "loss": 2.3332,
      "step": 75265
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.041691541671753,
      "learning_rate": 1.9965559322490113e-08,
      "loss": 2.3588,
      "step": 75266
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0906717777252197,
      "learning_rate": 1.9939562683636283e-08,
      "loss": 2.3061,
      "step": 75267
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0804942846298218,
      "learning_rate": 1.9913582963694277e-08,
      "loss": 2.3479,
      "step": 75268
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0761102437973022,
      "learning_rate": 1.988762016271073e-08,
      "loss": 2.0366,
      "step": 75269
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0549347400665283,
      "learning_rate": 1.9861674280727826e-08,
      "loss": 2.2483,
      "step": 75270
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1644678115844727,
      "learning_rate": 1.9835745317789978e-08,
      "loss": 2.2859,
      "step": 75271
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0277310609817505,
      "learning_rate": 1.980983327394048e-08,
      "loss": 2.2017,
      "step": 75272
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.183852195739746,
      "learning_rate": 1.978393814922486e-08,
      "loss": 2.0246,
      "step": 75273
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0170491933822632,
      "learning_rate": 1.9758059943685292e-08,
      "loss": 2.255,
      "step": 75274
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1412006616592407,
      "learning_rate": 1.9732198657366198e-08,
      "loss": 2.4917,
      "step": 75275
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1632112264633179,
      "learning_rate": 1.970635429031309e-08,
      "loss": 2.2552,
      "step": 75276
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0892753601074219,
      "learning_rate": 1.968052684256705e-08,
      "loss": 2.4455,
      "step": 75277
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0065531730651855,
      "learning_rate": 1.9654716314173594e-08,
      "loss": 2.2779,
      "step": 75278
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0802894830703735,
      "learning_rate": 1.9628922705176023e-08,
      "loss": 2.2557,
      "step": 75279
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0753079652786255,
      "learning_rate": 1.9603146015617637e-08,
      "loss": 2.0543,
      "step": 75280
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1248373985290527,
      "learning_rate": 1.957738624554284e-08,
      "loss": 2.3032,
      "step": 75281
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1901133060455322,
      "learning_rate": 1.9551643394994936e-08,
      "loss": 2.3632,
      "step": 75282
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1167213916778564,
      "learning_rate": 1.952591746401833e-08,
      "loss": 2.2179,
      "step": 75283
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0622392892837524,
      "learning_rate": 1.950020845265521e-08,
      "loss": 2.0983,
      "step": 75284
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0678749084472656,
      "learning_rate": 1.947451636094999e-08,
      "loss": 2.5586,
      "step": 75285
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1251723766326904,
      "learning_rate": 1.9448841188945965e-08,
      "loss": 2.1436,
      "step": 75286
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1797679662704468,
      "learning_rate": 1.9423182936687546e-08,
      "loss": 2.2314,
      "step": 75287
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.9877411127090454,
      "learning_rate": 1.939754160421692e-08,
      "loss": 2.3707,
      "step": 75288
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1665599346160889,
      "learning_rate": 1.9371917191578494e-08,
      "loss": 2.2924,
      "step": 75289
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.9501938223838806,
      "learning_rate": 1.934630969881446e-08,
      "loss": 2.3974,
      "step": 75290
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1632559299468994,
      "learning_rate": 1.9320719125970334e-08,
      "loss": 2.3267,
      "step": 75291
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.265812873840332,
      "learning_rate": 1.9295145473088304e-08,
      "loss": 2.3927,
      "step": 75292
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.096512794494629,
      "learning_rate": 1.9269588740210566e-08,
      "loss": 2.4548,
      "step": 75293
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.9940767884254456,
      "learning_rate": 1.924404892738263e-08,
      "loss": 2.2443,
      "step": 75294
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.2426942586898804,
      "learning_rate": 1.921852603464669e-08,
      "loss": 2.3455,
      "step": 75295
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0770076513290405,
      "learning_rate": 1.919302006204493e-08,
      "loss": 2.4763,
      "step": 75296
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0061780214309692,
      "learning_rate": 1.9167531009622876e-08,
      "loss": 2.3823,
      "step": 75297
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.073786735534668,
      "learning_rate": 1.9142058877422708e-08,
      "loss": 2.4099,
      "step": 75298
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1229764223098755,
      "learning_rate": 1.9116603665486623e-08,
      "loss": 2.064,
      "step": 75299
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.2491916418075562,
      "learning_rate": 1.909116537386013e-08,
      "loss": 2.3371,
      "step": 75300
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.154464602470398,
      "learning_rate": 1.906574400258321e-08,
      "loss": 2.2951,
      "step": 75301
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.2211552858352661,
      "learning_rate": 1.904033955170137e-08,
      "loss": 2.4586,
      "step": 75302
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.7612695693969727,
      "learning_rate": 1.9014952021257916e-08,
      "loss": 2.4845,
      "step": 75303
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0580570697784424,
      "learning_rate": 1.898958141129281e-08,
      "loss": 2.2953,
      "step": 75304
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1593859195709229,
      "learning_rate": 1.896422772185269e-08,
      "loss": 2.3685,
      "step": 75305
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1156100034713745,
      "learning_rate": 1.8938890952978627e-08,
      "loss": 2.2215,
      "step": 75306
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0835152864456177,
      "learning_rate": 1.8913571104713924e-08,
      "loss": 2.3122,
      "step": 75307
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.9520717859268188,
      "learning_rate": 1.8888268177101877e-08,
      "loss": 2.4065,
      "step": 75308
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1068083047866821,
      "learning_rate": 1.8862982170185783e-08,
      "loss": 2.1223,
      "step": 75309
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.111291766166687,
      "learning_rate": 1.8837713084005615e-08,
      "loss": 2.3585,
      "step": 75310
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0951509475708008,
      "learning_rate": 1.8812460918608e-08,
      "loss": 2.303,
      "step": 75311
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.4070497751235962,
      "learning_rate": 1.8787225674034015e-08,
      "loss": 2.1558,
      "step": 75312
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.4085909128189087,
      "learning_rate": 1.876200735032585e-08,
      "loss": 2.2467,
      "step": 75313
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.079736351966858,
      "learning_rate": 1.8736805947526804e-08,
      "loss": 2.2323,
      "step": 75314
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.9963950514793396,
      "learning_rate": 1.8711621465681285e-08,
      "loss": 2.2261,
      "step": 75315
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1627211570739746,
      "learning_rate": 1.868645390482926e-08,
      "loss": 2.2693,
      "step": 75316
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0816766023635864,
      "learning_rate": 1.866130326501403e-08,
      "loss": 2.3517,
      "step": 75317
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1499449014663696,
      "learning_rate": 1.8636169546280003e-08,
      "loss": 2.2994,
      "step": 75318
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1128143072128296,
      "learning_rate": 1.8611052748668256e-08,
      "loss": 2.4718,
      "step": 75319
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0423425436019897,
      "learning_rate": 1.858595287222098e-08,
      "loss": 2.3277,
      "step": 75320
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.2911301851272583,
      "learning_rate": 1.8560869916982584e-08,
      "loss": 2.4496,
      "step": 75321
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1135601997375488,
      "learning_rate": 1.853580388299303e-08,
      "loss": 2.3551,
      "step": 75322
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0889142751693726,
      "learning_rate": 1.8510754770297844e-08,
      "loss": 2.213,
      "step": 75323
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0748519897460938,
      "learning_rate": 1.8485722578936992e-08,
      "loss": 2.3738,
      "step": 75324
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.3940821886062622,
      "learning_rate": 1.8460707308953774e-08,
      "loss": 2.3789,
      "step": 75325
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.047102689743042,
      "learning_rate": 1.8435708960390374e-08,
      "loss": 2.3628,
      "step": 75326
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.3034799098968506,
      "learning_rate": 1.8410727533288986e-08,
      "loss": 2.4355,
      "step": 75327
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0662431716918945,
      "learning_rate": 1.8385763027692906e-08,
      "loss": 2.28,
      "step": 75328
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0866856575012207,
      "learning_rate": 1.836081544364432e-08,
      "loss": 2.2074,
      "step": 75329
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1186199188232422,
      "learning_rate": 1.833588478118542e-08,
      "loss": 2.4517,
      "step": 75330
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.2277710437774658,
      "learning_rate": 1.8310971040357283e-08,
      "loss": 2.5466,
      "step": 75331
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0952528715133667,
      "learning_rate": 1.828607422120321e-08,
      "loss": 2.2122,
      "step": 75332
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.121264100074768,
      "learning_rate": 1.8261194323765387e-08,
      "loss": 2.2125,
      "step": 75333
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1021878719329834,
      "learning_rate": 1.8236331348086e-08,
      "loss": 2.3956,
      "step": 75334
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0695654153823853,
      "learning_rate": 1.8211485294207242e-08,
      "loss": 2.3146,
      "step": 75335
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0534902811050415,
      "learning_rate": 1.8186656162171302e-08,
      "loss": 2.3288,
      "step": 75336
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1559544801712036,
      "learning_rate": 1.8161843952019254e-08,
      "loss": 2.1234,
      "step": 75337
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1394232511520386,
      "learning_rate": 1.8137048663793288e-08,
      "loss": 2.1788,
      "step": 75338
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.083986520767212,
      "learning_rate": 1.8112270297536705e-08,
      "loss": 2.1901,
      "step": 75339
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1611660718917847,
      "learning_rate": 1.8087508853291692e-08,
      "loss": 2.2007,
      "step": 75340
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0993715524673462,
      "learning_rate": 1.8062764331098216e-08,
      "loss": 2.2629,
      "step": 75341
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.2795519828796387,
      "learning_rate": 1.8038036730999575e-08,
      "loss": 2.1567,
      "step": 75342
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.073238730430603,
      "learning_rate": 1.801332605303796e-08,
      "loss": 2.4239,
      "step": 75343
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0057122707366943,
      "learning_rate": 1.798863229725556e-08,
      "loss": 2.5279,
      "step": 75344
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.9795231819152832,
      "learning_rate": 1.7963955463692338e-08,
      "loss": 2.2429,
      "step": 75345
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1255778074264526,
      "learning_rate": 1.79392955523916e-08,
      "loss": 2.3588,
      "step": 75346
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1107597351074219,
      "learning_rate": 1.791465256339442e-08,
      "loss": 2.4137,
      "step": 75347
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0356601476669312,
      "learning_rate": 1.7890026496744095e-08,
      "loss": 2.5598,
      "step": 75348
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.134013295173645,
      "learning_rate": 1.7865417352480597e-08,
      "loss": 2.243,
      "step": 75349
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0262964963912964,
      "learning_rate": 1.784082513064611e-08,
      "loss": 2.3509,
      "step": 75350
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.2145187854766846,
      "learning_rate": 1.7816249831282828e-08,
      "loss": 2.3738,
      "step": 75351
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0862244367599487,
      "learning_rate": 1.7791691454431825e-08,
      "loss": 2.2925,
      "step": 75352
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1264220476150513,
      "learning_rate": 1.776715000013529e-08,
      "loss": 2.306,
      "step": 75353
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0471391677856445,
      "learning_rate": 1.7742625468435413e-08,
      "loss": 2.3271,
      "step": 75354
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.038697361946106,
      "learning_rate": 1.7718117859371052e-08,
      "loss": 2.2072,
      "step": 75355
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0795774459838867,
      "learning_rate": 1.7693627172987725e-08,
      "loss": 2.3044,
      "step": 75356
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1094027757644653,
      "learning_rate": 1.766915340932318e-08,
      "loss": 2.1778,
      "step": 75357
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1454828977584839,
      "learning_rate": 1.7644696568421827e-08,
      "loss": 2.2484,
      "step": 75358
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0907089710235596,
      "learning_rate": 1.762025665032252e-08,
      "loss": 2.3549,
      "step": 75359
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0269116163253784,
      "learning_rate": 1.7595833655069672e-08,
      "loss": 2.1585,
      "step": 75360
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1443389654159546,
      "learning_rate": 1.7571427582702138e-08,
      "loss": 2.2115,
      "step": 75361
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0593740940093994,
      "learning_rate": 1.7547038433262108e-08,
      "loss": 2.3426,
      "step": 75362
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.156716227531433,
      "learning_rate": 1.752266620679066e-08,
      "loss": 2.2969,
      "step": 75363
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0614778995513916,
      "learning_rate": 1.7498310903329985e-08,
      "loss": 2.5066,
      "step": 75364
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.055877447128296,
      "learning_rate": 1.7473972522921156e-08,
      "loss": 2.2997,
      "step": 75365
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1172585487365723,
      "learning_rate": 1.7449651065605255e-08,
      "loss": 2.5046,
      "step": 75366
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0421373844146729,
      "learning_rate": 1.742534653142336e-08,
      "loss": 2.3841,
      "step": 75367
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0211156606674194,
      "learning_rate": 1.7401058920416547e-08,
      "loss": 2.1726,
      "step": 75368
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.092349886894226,
      "learning_rate": 1.7376788232625895e-08,
      "loss": 2.5024,
      "step": 75369
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.9807953834533691,
      "learning_rate": 1.7352534468093594e-08,
      "loss": 2.4424,
      "step": 75370
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0613163709640503,
      "learning_rate": 1.7328297626859614e-08,
      "loss": 2.4801,
      "step": 75371
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0793362855911255,
      "learning_rate": 1.7304077708965027e-08,
      "loss": 2.4939,
      "step": 75372
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1396794319152832,
      "learning_rate": 1.7279874714452028e-08,
      "loss": 2.1108,
      "step": 75373
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.08417546749115,
      "learning_rate": 1.725568864335947e-08,
      "loss": 2.2489,
      "step": 75374
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.9534785151481628,
      "learning_rate": 1.7231519495730652e-08,
      "loss": 2.2954,
      "step": 75375
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0879926681518555,
      "learning_rate": 1.7207367271606658e-08,
      "loss": 2.1929,
      "step": 75376
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0794316530227661,
      "learning_rate": 1.7183231971026338e-08,
      "loss": 2.3687,
      "step": 75377
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1924810409545898,
      "learning_rate": 1.7159113594031883e-08,
      "loss": 2.3823,
      "step": 75378
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1272246837615967,
      "learning_rate": 1.7135012140664374e-08,
      "loss": 2.3603,
      "step": 75379
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1370457410812378,
      "learning_rate": 1.711092761096378e-08,
      "loss": 2.2612,
      "step": 75380
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0773241519927979,
      "learning_rate": 1.7086860004972283e-08,
      "loss": 2.4626,
      "step": 75381
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.3060672283172607,
      "learning_rate": 1.7062809322728745e-08,
      "loss": 2.5333,
      "step": 75382
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1777912378311157,
      "learning_rate": 1.7038775564276467e-08,
      "loss": 2.3384,
      "step": 75383
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1305261850357056,
      "learning_rate": 1.7014758729653192e-08,
      "loss": 2.4147,
      "step": 75384
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.023924708366394,
      "learning_rate": 1.699075881890333e-08,
      "loss": 2.3221,
      "step": 75385
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0126428604125977,
      "learning_rate": 1.696677583206352e-08,
      "loss": 2.5086,
      "step": 75386
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1116923093795776,
      "learning_rate": 1.694280976917706e-08,
      "loss": 2.3011,
      "step": 75387
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0281951427459717,
      "learning_rate": 1.691886063028392e-08,
      "loss": 2.2751,
      "step": 75388
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.2543964385986328,
      "learning_rate": 1.6894928415425172e-08,
      "loss": 2.2685,
      "step": 75389
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1854362487792969,
      "learning_rate": 1.687101312464079e-08,
      "loss": 2.3167,
      "step": 75390
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1245638132095337,
      "learning_rate": 1.684711475797074e-08,
      "loss": 2.152,
      "step": 75391
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0479594469070435,
      "learning_rate": 1.682323331545721e-08,
      "loss": 2.1967,
      "step": 75392
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.069553017616272,
      "learning_rate": 1.679936879713795e-08,
      "loss": 2.4032,
      "step": 75393
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1491140127182007,
      "learning_rate": 1.6775521203056254e-08,
      "loss": 2.2999,
      "step": 75394
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.9936851859092712,
      "learning_rate": 1.6751690533252097e-08,
      "loss": 2.4106,
      "step": 75395
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0795456171035767,
      "learning_rate": 1.672787678776433e-08,
      "loss": 2.2201,
      "step": 75396
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0565680265426636,
      "learning_rate": 1.6704079966634036e-08,
      "loss": 2.1413,
      "step": 75397
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0634934902191162,
      "learning_rate": 1.66803000699034e-08,
      "loss": 2.3828,
      "step": 75398
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0235997438430786,
      "learning_rate": 1.665653709760906e-08,
      "loss": 2.3037,
      "step": 75399
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.119409203529358,
      "learning_rate": 1.6632791049794318e-08,
      "loss": 2.155,
      "step": 75400
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.2911847829818726,
      "learning_rate": 1.6609061926498028e-08,
      "loss": 2.3695,
      "step": 75401
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0304352045059204,
      "learning_rate": 1.658534972776127e-08,
      "loss": 2.3127,
      "step": 75402
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0606164932250977,
      "learning_rate": 1.656165445362401e-08,
      "loss": 2.4876,
      "step": 75403
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.171082615852356,
      "learning_rate": 1.6537976104126218e-08,
      "loss": 2.3239,
      "step": 75404
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0651652812957764,
      "learning_rate": 1.6514314679306754e-08,
      "loss": 2.5124,
      "step": 75405
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1363263130187988,
      "learning_rate": 1.649067017920891e-08,
      "loss": 2.6094,
      "step": 75406
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.112731695175171,
      "learning_rate": 1.646704260386933e-08,
      "loss": 2.4829,
      "step": 75407
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.5945461988449097,
      "learning_rate": 1.64434319533302e-08,
      "loss": 2.1851,
      "step": 75408
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.188637137413025,
      "learning_rate": 1.641983822763038e-08,
      "loss": 2.2682,
      "step": 75409
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0171756744384766,
      "learning_rate": 1.6396261426810946e-08,
      "loss": 2.2651,
      "step": 75410
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1204664707183838,
      "learning_rate": 1.637270155091186e-08,
      "loss": 2.2887,
      "step": 75411
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1770752668380737,
      "learning_rate": 1.6349158599971992e-08,
      "loss": 2.2429,
      "step": 75412
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0525556802749634,
      "learning_rate": 1.6325632574032414e-08,
      "loss": 2.0359,
      "step": 75413
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1584815979003906,
      "learning_rate": 1.630212347313198e-08,
      "loss": 2.3335,
      "step": 75414
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1313610076904297,
      "learning_rate": 1.6278631297310667e-08,
      "loss": 2.3905,
      "step": 75415
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0583890676498413,
      "learning_rate": 1.6255156046608434e-08,
      "loss": 2.4024,
      "step": 75416
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1233452558517456,
      "learning_rate": 1.6231697721065253e-08,
      "loss": 2.337,
      "step": 75417
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1443541049957275,
      "learning_rate": 1.6208256320722204e-08,
      "loss": 2.3142,
      "step": 75418
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0854541063308716,
      "learning_rate": 1.6184831845615922e-08,
      "loss": 2.3016,
      "step": 75419
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0091993808746338,
      "learning_rate": 1.6161424295789706e-08,
      "loss": 2.1174,
      "step": 75420
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0068299770355225,
      "learning_rate": 1.6138033671280194e-08,
      "loss": 2.3636,
      "step": 75421
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.9781725406646729,
      "learning_rate": 1.6114659972128467e-08,
      "loss": 2.2536,
      "step": 75422
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0815021991729736,
      "learning_rate": 1.6091303198374487e-08,
      "loss": 2.5174,
      "step": 75423
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.134932279586792,
      "learning_rate": 1.6067963350057115e-08,
      "loss": 2.2059,
      "step": 75424
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.2006243467330933,
      "learning_rate": 1.6044640427216317e-08,
      "loss": 2.0165,
      "step": 75425
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0638861656188965,
      "learning_rate": 1.6021334429890955e-08,
      "loss": 2.2366,
      "step": 75426
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0989429950714111,
      "learning_rate": 1.5998045358120994e-08,
      "loss": 2.038,
      "step": 75427
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.2307260036468506,
      "learning_rate": 1.5974773211946403e-08,
      "loss": 2.3453,
      "step": 75428
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1071274280548096,
      "learning_rate": 1.595151799140604e-08,
      "loss": 2.297,
      "step": 75429
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0410784482955933,
      "learning_rate": 1.592827969653987e-08,
      "loss": 2.1103,
      "step": 75430
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.047587275505066,
      "learning_rate": 1.590505832738676e-08,
      "loss": 2.4796,
      "step": 75431
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1891796588897705,
      "learning_rate": 1.5881853883985553e-08,
      "loss": 2.4398,
      "step": 75432
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.9937164783477783,
      "learning_rate": 1.585866636637734e-08,
      "loss": 2.4933,
      "step": 75433
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1593868732452393,
      "learning_rate": 1.5835495774598754e-08,
      "loss": 2.3453,
      "step": 75434
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0516806840896606,
      "learning_rate": 1.5812342108691982e-08,
      "loss": 2.2669,
      "step": 75435
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0639468431472778,
      "learning_rate": 1.578920536869477e-08,
      "loss": 2.4706,
      "step": 75436
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0941710472106934,
      "learning_rate": 1.5766085554645983e-08,
      "loss": 2.4237,
      "step": 75437
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1740660667419434,
      "learning_rate": 1.5742982666585583e-08,
      "loss": 2.2834,
      "step": 75438
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0645136833190918,
      "learning_rate": 1.571989670455354e-08,
      "loss": 2.1934,
      "step": 75439
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.2809628248214722,
      "learning_rate": 1.569682766858649e-08,
      "loss": 2.4357,
      "step": 75440
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0488284826278687,
      "learning_rate": 1.5673775558725514e-08,
      "loss": 2.3357,
      "step": 75441
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.123978614807129,
      "learning_rate": 1.5650740375009466e-08,
      "loss": 2.3923,
      "step": 75442
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0406913757324219,
      "learning_rate": 1.5627722117477206e-08,
      "loss": 2.5014,
      "step": 75443
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.9571418762207031,
      "learning_rate": 1.5604720786167595e-08,
      "loss": 2.3938,
      "step": 75444
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0189381837844849,
      "learning_rate": 1.5581736381119482e-08,
      "loss": 2.3729,
      "step": 75445
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.156629204750061,
      "learning_rate": 1.5558768902371733e-08,
      "loss": 2.267,
      "step": 75446
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.083240270614624,
      "learning_rate": 1.5535818349964317e-08,
      "loss": 2.378,
      "step": 75447
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0396831035614014,
      "learning_rate": 1.5512884723934973e-08,
      "loss": 2.2433,
      "step": 75448
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.9443361163139343,
      "learning_rate": 1.5489968024322566e-08,
      "loss": 2.3949,
      "step": 75449
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.14772367477417,
      "learning_rate": 1.5467068251168172e-08,
      "loss": 2.3889,
      "step": 75450
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1543346643447876,
      "learning_rate": 1.5444185404507316e-08,
      "loss": 2.2618,
      "step": 75451
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0799312591552734,
      "learning_rate": 1.542131948438108e-08,
      "loss": 2.3278,
      "step": 75452
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0932596921920776,
      "learning_rate": 1.539847049082721e-08,
      "loss": 2.2678,
      "step": 75453
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0374306440353394,
      "learning_rate": 1.5375638423884564e-08,
      "loss": 2.3584,
      "step": 75454
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0774991512298584,
      "learning_rate": 1.5352823283592e-08,
      "loss": 2.2315,
      "step": 75455
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.162191390991211,
      "learning_rate": 1.5330025069988374e-08,
      "loss": 2.5281,
      "step": 75456
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1616911888122559,
      "learning_rate": 1.5307243783111438e-08,
      "loss": 2.4985,
      "step": 75457
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.9610501527786255,
      "learning_rate": 1.5284479423001154e-08,
      "loss": 2.2715,
      "step": 75458
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0593136548995972,
      "learning_rate": 1.5261731989695274e-08,
      "loss": 2.3144,
      "step": 75459
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.097285270690918,
      "learning_rate": 1.5239001483232652e-08,
      "loss": 2.2538,
      "step": 75460
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0791274309158325,
      "learning_rate": 1.521628790365215e-08,
      "loss": 2.0745,
      "step": 75461
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0753098726272583,
      "learning_rate": 1.519359125099151e-08,
      "loss": 2.2974,
      "step": 75462
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0374327898025513,
      "learning_rate": 1.51709115252896e-08,
      "loss": 2.3372,
      "step": 75463
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0902488231658936,
      "learning_rate": 1.514824872658527e-08,
      "loss": 2.3199,
      "step": 75464
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1583137512207031,
      "learning_rate": 1.5125602854916264e-08,
      "loss": 2.1991,
      "step": 75465
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.062641978263855,
      "learning_rate": 1.510297391032145e-08,
      "loss": 2.4021,
      "step": 75466
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1606662273406982,
      "learning_rate": 1.5080361892837458e-08,
      "loss": 2.3163,
      "step": 75467
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0536644458770752,
      "learning_rate": 1.5057766802505368e-08,
      "loss": 2.3677,
      "step": 75468
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1868816614151,
      "learning_rate": 1.503518863936293e-08,
      "loss": 2.4589,
      "step": 75469
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.9897751212120056,
      "learning_rate": 1.501262740344678e-08,
      "loss": 2.1201,
      "step": 75470
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.175161361694336,
      "learning_rate": 1.4990083094795772e-08,
      "loss": 2.4325,
      "step": 75471
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.075780987739563,
      "learning_rate": 1.4967555713448767e-08,
      "loss": 2.2647,
      "step": 75472
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0419542789459229,
      "learning_rate": 1.4945045259443512e-08,
      "loss": 2.3452,
      "step": 75473
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0526071786880493,
      "learning_rate": 1.492255173281887e-08,
      "loss": 2.3226,
      "step": 75474
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1386432647705078,
      "learning_rate": 1.4900075133612579e-08,
      "loss": 2.29,
      "step": 75475
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0767598152160645,
      "learning_rate": 1.4877615461862394e-08,
      "loss": 2.1276,
      "step": 75476
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.026631474494934,
      "learning_rate": 1.485517271760606e-08,
      "loss": 2.316,
      "step": 75477
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1629449129104614,
      "learning_rate": 1.4832746900882433e-08,
      "loss": 2.3672,
      "step": 75478
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0473659038543701,
      "learning_rate": 1.4810338011729264e-08,
      "loss": 2.4566,
      "step": 75479
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1085923910140991,
      "learning_rate": 1.4787946050184299e-08,
      "loss": 2.4024,
      "step": 75480
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.2325087785720825,
      "learning_rate": 1.4765571016286396e-08,
      "loss": 2.3539,
      "step": 75481
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.3159830570220947,
      "learning_rate": 1.4743212910072191e-08,
      "loss": 2.2673,
      "step": 75482
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.096712589263916,
      "learning_rate": 1.4720871731580544e-08,
      "loss": 2.4323,
      "step": 75483
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.2662910223007202,
      "learning_rate": 1.4698547480848092e-08,
      "loss": 2.1983,
      "step": 75484
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1986826658248901,
      "learning_rate": 1.4676240157914801e-08,
      "loss": 2.1027,
      "step": 75485
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0699052810668945,
      "learning_rate": 1.46539497628162e-08,
      "loss": 2.3758,
      "step": 75486
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0189168453216553,
      "learning_rate": 1.4631676295592257e-08,
      "loss": 2.3724,
      "step": 75487
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0520669221878052,
      "learning_rate": 1.4609419756278499e-08,
      "loss": 2.2785,
      "step": 75488
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.2118251323699951,
      "learning_rate": 1.4587180144913781e-08,
      "loss": 2.4695,
      "step": 75489
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.9466729760169983,
      "learning_rate": 1.4564957461536966e-08,
      "loss": 2.3244,
      "step": 75490
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0967835187911987,
      "learning_rate": 1.4542751706183577e-08,
      "loss": 2.209,
      "step": 75491
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0799450874328613,
      "learning_rate": 1.4520562878892474e-08,
      "loss": 2.4854,
      "step": 75492
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.067399024963379,
      "learning_rate": 1.4498390979701404e-08,
      "loss": 2.2929,
      "step": 75493
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1176952123641968,
      "learning_rate": 1.4476236008647005e-08,
      "loss": 2.1883,
      "step": 75494
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.192521333694458,
      "learning_rate": 1.4454097965768133e-08,
      "loss": 2.1994,
      "step": 75495
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.9982508420944214,
      "learning_rate": 1.4431976851100316e-08,
      "loss": 2.2972,
      "step": 75496
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.269374966621399,
      "learning_rate": 1.4409872664683523e-08,
      "loss": 2.3186,
      "step": 75497
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6033605337142944,
      "learning_rate": 1.4387785406553278e-08,
      "loss": 2.1333,
      "step": 75498
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.16288423538208,
      "learning_rate": 1.4365715076748444e-08,
      "loss": 2.4811,
      "step": 75499
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0929815769195557,
      "learning_rate": 1.4343661675305653e-08,
      "loss": 2.1472,
      "step": 75500
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1445367336273193,
      "learning_rate": 1.4321625202262657e-08,
      "loss": 2.2632,
      "step": 75501
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1075371503829956,
      "learning_rate": 1.429960565765609e-08,
      "loss": 2.5199,
      "step": 75502
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.041827917098999,
      "learning_rate": 1.4277603041523702e-08,
      "loss": 2.2796,
      "step": 75503
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1389681100845337,
      "learning_rate": 1.4255617353903239e-08,
      "loss": 2.311,
      "step": 75504
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.7475452423095703,
      "learning_rate": 1.423364859483134e-08,
      "loss": 2.2274,
      "step": 75505
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.9696748852729797,
      "learning_rate": 1.421169676434575e-08,
      "loss": 2.2388,
      "step": 75506
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0224754810333252,
      "learning_rate": 1.418976186248311e-08,
      "loss": 2.1986,
      "step": 75507
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0873069763183594,
      "learning_rate": 1.4167843889281163e-08,
      "loss": 2.4805,
      "step": 75508
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.012506127357483,
      "learning_rate": 1.414594284477655e-08,
      "loss": 2.3019,
      "step": 75509
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0545518398284912,
      "learning_rate": 1.4124058729007017e-08,
      "loss": 2.104,
      "step": 75510
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0551214218139648,
      "learning_rate": 1.4102191542009202e-08,
      "loss": 2.4133,
      "step": 75511
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0929465293884277,
      "learning_rate": 1.4080341283819742e-08,
      "loss": 2.2914,
      "step": 75512
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.084129810333252,
      "learning_rate": 1.4058507954477495e-08,
      "loss": 2.5357,
      "step": 75513
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.070530891418457,
      "learning_rate": 1.4036691554016879e-08,
      "loss": 2.1912,
      "step": 75514
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.2039610147476196,
      "learning_rate": 1.401489208247675e-08,
      "loss": 2.3288,
      "step": 75515
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1404645442962646,
      "learning_rate": 1.3993109539894857e-08,
      "loss": 2.2953,
      "step": 75516
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0755424499511719,
      "learning_rate": 1.3971343926305614e-08,
      "loss": 2.4992,
      "step": 75517
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1122722625732422,
      "learning_rate": 1.3949595241746772e-08,
      "loss": 2.4899,
      "step": 75518
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0524793863296509,
      "learning_rate": 1.3927863486256077e-08,
      "loss": 2.1714,
      "step": 75519
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0211375951766968,
      "learning_rate": 1.3906148659869056e-08,
      "loss": 2.1534,
      "step": 75520
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1091065406799316,
      "learning_rate": 1.3884450762624569e-08,
      "loss": 2.2698,
      "step": 75521
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.162861943244934,
      "learning_rate": 1.386276979455703e-08,
      "loss": 2.3488,
      "step": 75522
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0495539903640747,
      "learning_rate": 1.3841105755704187e-08,
      "loss": 2.4106,
      "step": 75523
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.9447028636932373,
      "learning_rate": 1.381945864610379e-08,
      "loss": 2.3465,
      "step": 75524
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0285934209823608,
      "learning_rate": 1.3797828465791362e-08,
      "loss": 2.1961,
      "step": 75525
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0415126085281372,
      "learning_rate": 1.3776215214803545e-08,
      "loss": 2.1653,
      "step": 75526
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.050402283668518,
      "learning_rate": 1.3754618893176975e-08,
      "loss": 2.4122,
      "step": 75527
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1457569599151611,
      "learning_rate": 1.3733039500949397e-08,
      "loss": 2.0666,
      "step": 75528
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1681077480316162,
      "learning_rate": 1.371147703815634e-08,
      "loss": 2.4537,
      "step": 75529
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0896165370941162,
      "learning_rate": 1.3689931504834442e-08,
      "loss": 2.4955,
      "step": 75530
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1147922277450562,
      "learning_rate": 1.3668402901020339e-08,
      "loss": 2.2583,
      "step": 75531
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.040287971496582,
      "learning_rate": 1.364689122675067e-08,
      "loss": 2.2091,
      "step": 75532
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1276072263717651,
      "learning_rate": 1.3625396482060959e-08,
      "loss": 2.3796,
      "step": 75533
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0333331823349,
      "learning_rate": 1.3603918666990068e-08,
      "loss": 2.1467,
      "step": 75534
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1372894048690796,
      "learning_rate": 1.3582457781571302e-08,
      "loss": 2.3695,
      "step": 75535
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.9869274497032166,
      "learning_rate": 1.3561013825843517e-08,
      "loss": 2.2368,
      "step": 75536
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.9398603439331055,
      "learning_rate": 1.3539586799841131e-08,
      "loss": 2.3134,
      "step": 75537
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0696245431900024,
      "learning_rate": 1.3518176703601893e-08,
      "loss": 2.2927,
      "step": 75538
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1449092626571655,
      "learning_rate": 1.349678353716244e-08,
      "loss": 2.3306,
      "step": 75539
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1480064392089844,
      "learning_rate": 1.3475407300557186e-08,
      "loss": 2.2612,
      "step": 75540
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.059208869934082,
      "learning_rate": 1.3454047993823882e-08,
      "loss": 2.2336,
      "step": 75541
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0624873638153076,
      "learning_rate": 1.3432705616998054e-08,
      "loss": 2.283,
      "step": 75542
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0523784160614014,
      "learning_rate": 1.3411380170116339e-08,
      "loss": 2.2619,
      "step": 75543
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0701345205307007,
      "learning_rate": 1.3390071653214265e-08,
      "loss": 2.4,
      "step": 75544
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.027750849723816,
      "learning_rate": 1.3368780066328468e-08,
      "loss": 2.3994,
      "step": 75545
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0481257438659668,
      "learning_rate": 1.3347505409495588e-08,
      "loss": 2.324,
      "step": 75546
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0283154249191284,
      "learning_rate": 1.3326247682750038e-08,
      "loss": 2.1924,
      "step": 75547
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.2091223001480103,
      "learning_rate": 1.3305006886129567e-08,
      "loss": 2.283,
      "step": 75548
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0067849159240723,
      "learning_rate": 1.3283783019668595e-08,
      "loss": 2.1312,
      "step": 75549
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.07771635055542,
      "learning_rate": 1.3262576083404866e-08,
      "loss": 2.2378,
      "step": 75550
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0693641901016235,
      "learning_rate": 1.3241386077372797e-08,
      "loss": 2.4499,
      "step": 75551
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1180647611618042,
      "learning_rate": 1.3220213001609029e-08,
      "loss": 2.1137,
      "step": 75552
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.9972736835479736,
      "learning_rate": 1.3199056856150194e-08,
      "loss": 2.3846,
      "step": 75553
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1063494682312012,
      "learning_rate": 1.3177917641030714e-08,
      "loss": 2.3715,
      "step": 75554
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1902847290039062,
      "learning_rate": 1.3156795356287221e-08,
      "loss": 2.3667,
      "step": 75555
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.977840006351471,
      "learning_rate": 1.3135690001955248e-08,
      "loss": 2.276,
      "step": 75556
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0000112056732178,
      "learning_rate": 1.3114601578070318e-08,
      "loss": 2.4206,
      "step": 75557
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.137041687965393,
      "learning_rate": 1.309353008466907e-08,
      "loss": 2.3515,
      "step": 75558
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1513357162475586,
      "learning_rate": 1.3072475521785921e-08,
      "loss": 2.1518,
      "step": 75559
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.029853343963623,
      "learning_rate": 1.3051437889458618e-08,
      "loss": 2.3546,
      "step": 75560
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.01317298412323,
      "learning_rate": 1.3030417187720468e-08,
      "loss": 2.2406,
      "step": 75561
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0286343097686768,
      "learning_rate": 1.3009413416608108e-08,
      "loss": 2.2138,
      "step": 75562
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0941646099090576,
      "learning_rate": 1.2988426576158175e-08,
      "loss": 2.0723,
      "step": 75563
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.2588242292404175,
      "learning_rate": 1.2967456666405086e-08,
      "loss": 2.2472,
      "step": 75564
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.077096700668335,
      "learning_rate": 1.2946503687384371e-08,
      "loss": 2.1421,
      "step": 75565
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1133240461349487,
      "learning_rate": 1.2925567639131553e-08,
      "loss": 2.5195,
      "step": 75566
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.145599603652954,
      "learning_rate": 1.2904648521682162e-08,
      "loss": 2.221,
      "step": 75567
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1373966932296753,
      "learning_rate": 1.2883746335071723e-08,
      "loss": 2.2304,
      "step": 75568
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0577702522277832,
      "learning_rate": 1.2862861079336874e-08,
      "loss": 2.499,
      "step": 75569
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1307804584503174,
      "learning_rate": 1.2841992754510924e-08,
      "loss": 2.4274,
      "step": 75570
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0276312828063965,
      "learning_rate": 1.2821141360630507e-08,
      "loss": 2.318,
      "step": 75571
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1549782752990723,
      "learning_rate": 1.2800306897731152e-08,
      "loss": 2.1125,
      "step": 75572
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.057334303855896,
      "learning_rate": 1.2779489365847275e-08,
      "loss": 2.1515,
      "step": 75573
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.981636106967926,
      "learning_rate": 1.2758688765015515e-08,
      "loss": 2.1032,
      "step": 75574
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0851824283599854,
      "learning_rate": 1.2737905095269176e-08,
      "loss": 2.3853,
      "step": 75575
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.9607742428779602,
      "learning_rate": 1.2717138356646008e-08,
      "loss": 2.1904,
      "step": 75576
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.9651523232460022,
      "learning_rate": 1.2696388549178207e-08,
      "loss": 2.1893,
      "step": 75577
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0903810262680054,
      "learning_rate": 1.267565567290352e-08,
      "loss": 2.2177,
      "step": 75578
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0851646661758423,
      "learning_rate": 1.2654939727855253e-08,
      "loss": 2.3,
      "step": 75579
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0383415222167969,
      "learning_rate": 1.2634240714070046e-08,
      "loss": 2.2507,
      "step": 75580
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0525414943695068,
      "learning_rate": 1.2613558631582313e-08,
      "loss": 2.2921,
      "step": 75581
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.9793609976768494,
      "learning_rate": 1.2592893480427581e-08,
      "loss": 2.4068,
      "step": 75582
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.9856300950050354,
      "learning_rate": 1.257224526063916e-08,
      "loss": 2.2781,
      "step": 75583
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1709165573120117,
      "learning_rate": 1.2551613972254795e-08,
      "loss": 2.3114,
      "step": 75584
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1044903993606567,
      "learning_rate": 1.2530999615306682e-08,
      "loss": 2.4614,
      "step": 75585
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0529532432556152,
      "learning_rate": 1.251040218983146e-08,
      "loss": 2.4045,
      "step": 75586
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1351124048233032,
      "learning_rate": 1.2489821695864657e-08,
      "loss": 2.4145,
      "step": 75587
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.11292564868927,
      "learning_rate": 1.2469258133439577e-08,
      "loss": 2.2106,
      "step": 75588
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.3726403713226318,
      "learning_rate": 1.2448711502590637e-08,
      "loss": 2.2447,
      "step": 75589
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1788110733032227,
      "learning_rate": 1.2428181803354478e-08,
      "loss": 2.0953,
      "step": 75590
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1246633529663086,
      "learning_rate": 1.2407669035765513e-08,
      "loss": 2.1653,
      "step": 75591
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.2825634479522705,
      "learning_rate": 1.238717319985816e-08,
      "loss": 2.1034,
      "step": 75592
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0824382305145264,
      "learning_rate": 1.2366694295666837e-08,
      "loss": 2.3258,
      "step": 75593
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0457993745803833,
      "learning_rate": 1.234623232322596e-08,
      "loss": 2.4381,
      "step": 75594
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0731468200683594,
      "learning_rate": 1.2325787282572165e-08,
      "loss": 2.3517,
      "step": 75595
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0501883029937744,
      "learning_rate": 1.2305359173737653e-08,
      "loss": 2.3464,
      "step": 75596
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1366909742355347,
      "learning_rate": 1.2284947996759056e-08,
      "loss": 2.291,
      "step": 75597
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.111727237701416,
      "learning_rate": 1.2264553751669684e-08,
      "loss": 2.355,
      "step": 75598
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0668811798095703,
      "learning_rate": 1.2244176438503953e-08,
      "loss": 2.3084,
      "step": 75599
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0121915340423584,
      "learning_rate": 1.2223816057298498e-08,
      "loss": 2.234,
      "step": 75600
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.192514181137085,
      "learning_rate": 1.220347260808441e-08,
      "loss": 2.396,
      "step": 75601
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0791670083999634,
      "learning_rate": 1.2183146090899434e-08,
      "loss": 2.1347,
      "step": 75602
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.4040014743804932,
      "learning_rate": 1.2162836505775766e-08,
      "loss": 2.3952,
      "step": 75603
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0844080448150635,
      "learning_rate": 1.2142543852750044e-08,
      "loss": 2.1613,
      "step": 75604
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1516274213790894,
      "learning_rate": 1.2122268131854464e-08,
      "loss": 2.1982,
      "step": 75605
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0385974645614624,
      "learning_rate": 1.2102009343124554e-08,
      "loss": 2.1641,
      "step": 75606
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.125228762626648,
      "learning_rate": 1.2081767486593621e-08,
      "loss": 2.3653,
      "step": 75607
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1078087091445923,
      "learning_rate": 1.2061542562297191e-08,
      "loss": 2.3135,
      "step": 75608
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1547203063964844,
      "learning_rate": 1.204133457026968e-08,
      "loss": 2.3341,
      "step": 75609
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.9894458055496216,
      "learning_rate": 1.2021143510544397e-08,
      "loss": 2.2607,
      "step": 75610
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0992757081985474,
      "learning_rate": 1.2000969383156869e-08,
      "loss": 2.5179,
      "step": 75611
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.171068549156189,
      "learning_rate": 1.198081218813929e-08,
      "loss": 2.3675,
      "step": 75612
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.2878284454345703,
      "learning_rate": 1.1960671925527189e-08,
      "loss": 2.3289,
      "step": 75613
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1357777118682861,
      "learning_rate": 1.1940548595354983e-08,
      "loss": 2.299,
      "step": 75614
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0599086284637451,
      "learning_rate": 1.1920442197654869e-08,
      "loss": 2.1928,
      "step": 75615
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0686278343200684,
      "learning_rate": 1.1900352732463483e-08,
      "loss": 2.2474,
      "step": 75616
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.002779245376587,
      "learning_rate": 1.1880280199814132e-08,
      "loss": 2.3486,
      "step": 75617
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.007124662399292,
      "learning_rate": 1.1860224599739012e-08,
      "loss": 2.2237,
      "step": 75618
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.100304126739502,
      "learning_rate": 1.1840185932274761e-08,
      "loss": 2.3534,
      "step": 75619
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0655205249786377,
      "learning_rate": 1.1820164197453576e-08,
      "loss": 2.3527,
      "step": 75620
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1276487112045288,
      "learning_rate": 1.1800159395309874e-08,
      "loss": 2.4985,
      "step": 75621
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1591598987579346,
      "learning_rate": 1.178017152587807e-08,
      "loss": 2.3426,
      "step": 75622
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1101540327072144,
      "learning_rate": 1.1760200589192583e-08,
      "loss": 2.334,
      "step": 75623
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0097053050994873,
      "learning_rate": 1.1740246585284499e-08,
      "loss": 2.4429,
      "step": 75624
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.126079797744751,
      "learning_rate": 1.1720309514190453e-08,
      "loss": 2.2837,
      "step": 75625
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0458827018737793,
      "learning_rate": 1.1700389375942645e-08,
      "loss": 2.0812,
      "step": 75626
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.127725601196289,
      "learning_rate": 1.1680486170576599e-08,
      "loss": 2.37,
      "step": 75627
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0334242582321167,
      "learning_rate": 1.1660599898123404e-08,
      "loss": 2.4982,
      "step": 75628
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0457186698913574,
      "learning_rate": 1.1640730558619694e-08,
      "loss": 2.2898,
      "step": 75629
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.2730977535247803,
      "learning_rate": 1.1620878152096559e-08,
      "loss": 2.2843,
      "step": 75630
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0950738191604614,
      "learning_rate": 1.1601042678589525e-08,
      "loss": 2.2521,
      "step": 75631
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.078144907951355,
      "learning_rate": 1.1581224138131896e-08,
      "loss": 2.2464,
      "step": 75632
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.9564996361732483,
      "learning_rate": 1.1561422530755873e-08,
      "loss": 2.2415,
      "step": 75633
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.2093043327331543,
      "learning_rate": 1.1541637856496979e-08,
      "loss": 2.1766,
      "step": 75634
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1136866807937622,
      "learning_rate": 1.1521870115387413e-08,
      "loss": 2.3897,
      "step": 75635
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0950169563293457,
      "learning_rate": 1.150211930746159e-08,
      "loss": 2.3698,
      "step": 75636
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1372807025909424,
      "learning_rate": 1.1482385432751709e-08,
      "loss": 2.4542,
      "step": 75637
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0690100193023682,
      "learning_rate": 1.1462668491292184e-08,
      "loss": 2.154,
      "step": 75638
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.153961181640625,
      "learning_rate": 1.1442968483116324e-08,
      "loss": 2.2419,
      "step": 75639
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0885809659957886,
      "learning_rate": 1.1423285408257434e-08,
      "loss": 2.4188,
      "step": 75640
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1791718006134033,
      "learning_rate": 1.1403619266749932e-08,
      "loss": 2.2983,
      "step": 75641
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.4438960552215576,
      "learning_rate": 1.1383970058624905e-08,
      "loss": 2.3225,
      "step": 75642
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0939348936080933,
      "learning_rate": 1.1364337783917879e-08,
      "loss": 2.2461,
      "step": 75643
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.03122079372406,
      "learning_rate": 1.1344722442659938e-08,
      "loss": 2.2638,
      "step": 75644
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.9848464727401733,
      "learning_rate": 1.1325124034886614e-08,
      "loss": 2.3652,
      "step": 75645
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1687241792678833,
      "learning_rate": 1.13055425606301e-08,
      "loss": 2.4738,
      "step": 75646
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1808892488479614,
      "learning_rate": 1.1285978019922594e-08,
      "loss": 2.2553,
      "step": 75647
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.072683572769165,
      "learning_rate": 1.1266430412799622e-08,
      "loss": 2.382,
      "step": 75648
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.104361653327942,
      "learning_rate": 1.1246899739292271e-08,
      "loss": 2.1615,
      "step": 75649
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0717661380767822,
      "learning_rate": 1.1227385999434959e-08,
      "loss": 2.0014,
      "step": 75650
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0483678579330444,
      "learning_rate": 1.120788919325877e-08,
      "loss": 2.4347,
      "step": 75651
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.148857593536377,
      "learning_rate": 1.1188409320799232e-08,
      "loss": 2.3809,
      "step": 75652
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0224902629852295,
      "learning_rate": 1.116894638208743e-08,
      "loss": 2.5943,
      "step": 75653
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.2680684328079224,
      "learning_rate": 1.1149500377157785e-08,
      "loss": 2.371,
      "step": 75654
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0340393781661987,
      "learning_rate": 1.1130071306042489e-08,
      "loss": 2.248,
      "step": 75655
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.9993541836738586,
      "learning_rate": 1.1110659168774852e-08,
      "loss": 2.2326,
      "step": 75656
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.044524908065796,
      "learning_rate": 1.1091263965388177e-08,
      "loss": 2.3881,
      "step": 75657
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1065138578414917,
      "learning_rate": 1.1071885695913554e-08,
      "loss": 2.3585,
      "step": 75658
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0258992910385132,
      "learning_rate": 1.1052524360385398e-08,
      "loss": 2.2984,
      "step": 75659
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.044481873512268,
      "learning_rate": 1.1033179958837014e-08,
      "loss": 2.4832,
      "step": 75660
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.066137671470642,
      "learning_rate": 1.1013852491299492e-08,
      "loss": 2.3298,
      "step": 75661
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1511048078536987,
      "learning_rate": 1.0994541957806137e-08,
      "loss": 2.562,
      "step": 75662
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.21146821975708,
      "learning_rate": 1.0975248358391366e-08,
      "loss": 2.1738,
      "step": 75663
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1716374158859253,
      "learning_rate": 1.0955971693085154e-08,
      "loss": 2.5876,
      "step": 75664
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.104841709136963,
      "learning_rate": 1.093671196192192e-08,
      "loss": 2.2997,
      "step": 75665
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0136021375656128,
      "learning_rate": 1.0917469164934969e-08,
      "loss": 2.3937,
      "step": 75666
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0858526229858398,
      "learning_rate": 1.0898243302154277e-08,
      "loss": 2.1939,
      "step": 75667
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.128866195678711,
      "learning_rate": 1.087903437361537e-08,
      "loss": 2.3886,
      "step": 75668
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0691508054733276,
      "learning_rate": 1.0859842379349339e-08,
      "loss": 2.3645,
      "step": 75669
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1178377866744995,
      "learning_rate": 1.0840667319388376e-08,
      "loss": 2.3879,
      "step": 75670
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.207157850265503,
      "learning_rate": 1.082150919376579e-08,
      "loss": 2.1924,
      "step": 75671
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0802669525146484,
      "learning_rate": 1.0802368002512665e-08,
      "loss": 2.114,
      "step": 75672
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1090950965881348,
      "learning_rate": 1.0783243745664528e-08,
      "loss": 2.3416,
      "step": 75673
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.124810814857483,
      "learning_rate": 1.0764136423250249e-08,
      "loss": 2.193,
      "step": 75674
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.206583023071289,
      "learning_rate": 1.074504603530424e-08,
      "loss": 2.5458,
      "step": 75675
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.066867470741272,
      "learning_rate": 1.07259725818587e-08,
      "loss": 2.3927,
      "step": 75676
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0498884916305542,
      "learning_rate": 1.0706916062945827e-08,
      "loss": 2.3084,
      "step": 75677
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0132441520690918,
      "learning_rate": 1.0687876478597813e-08,
      "loss": 2.199,
      "step": 75678
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0916433334350586,
      "learning_rate": 1.066885382884686e-08,
      "loss": 2.4448,
      "step": 75679
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0602256059646606,
      "learning_rate": 1.0649848113724049e-08,
      "loss": 2.2793,
      "step": 75680
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.9915621280670166,
      "learning_rate": 1.0630859333263799e-08,
      "loss": 2.354,
      "step": 75681
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0371448993682861,
      "learning_rate": 1.0611887487497196e-08,
      "loss": 2.16,
      "step": 75682
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0744832754135132,
      "learning_rate": 1.0592932576457549e-08,
      "loss": 2.1502,
      "step": 75683
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0464285612106323,
      "learning_rate": 1.057399460017483e-08,
      "loss": 2.5256,
      "step": 75684
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0542712211608887,
      "learning_rate": 1.055507355868235e-08,
      "loss": 2.2661,
      "step": 75685
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0792961120605469,
      "learning_rate": 1.0536169452012302e-08,
      "loss": 2.4437,
      "step": 75686
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1207492351531982,
      "learning_rate": 1.0517282280196884e-08,
      "loss": 2.3189,
      "step": 75687
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1439136266708374,
      "learning_rate": 1.049841204326718e-08,
      "loss": 2.0755,
      "step": 75688
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0563982725143433,
      "learning_rate": 1.0479558741256502e-08,
      "loss": 2.3341,
      "step": 75689
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0914995670318604,
      "learning_rate": 1.0460722374194821e-08,
      "loss": 2.2645,
      "step": 75690
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.009881854057312,
      "learning_rate": 1.0441902942116555e-08,
      "loss": 2.1625,
      "step": 75691
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0508874654769897,
      "learning_rate": 1.0423100445051681e-08,
      "loss": 2.1659,
      "step": 75692
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.051054835319519,
      "learning_rate": 1.0404314883033506e-08,
      "loss": 2.1796,
      "step": 75693
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1111241579055786,
      "learning_rate": 1.0385546256092004e-08,
      "loss": 2.5063,
      "step": 75694
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1342676877975464,
      "learning_rate": 1.0366794564261596e-08,
      "loss": 2.3619,
      "step": 75695
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0769320726394653,
      "learning_rate": 1.0348059807571143e-08,
      "loss": 2.4291,
      "step": 75696
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0758650302886963,
      "learning_rate": 1.0329341986055064e-08,
      "loss": 2.2799,
      "step": 75697
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0924652814865112,
      "learning_rate": 1.0310641099743334e-08,
      "loss": 2.3849,
      "step": 75698
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0189299583435059,
      "learning_rate": 1.0291957148668152e-08,
      "loss": 2.493,
      "step": 75699
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0081461668014526,
      "learning_rate": 1.0273290132861712e-08,
      "loss": 2.2934,
      "step": 75700
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.9961529970169067,
      "learning_rate": 1.0254640052355102e-08,
      "loss": 2.2828,
      "step": 75701
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.073634386062622,
      "learning_rate": 1.0236006907179407e-08,
      "loss": 2.2195,
      "step": 75702
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0679781436920166,
      "learning_rate": 1.0217390697367935e-08,
      "loss": 2.3414,
      "step": 75703
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0564701557159424,
      "learning_rate": 1.019879142295066e-08,
      "loss": 2.2605,
      "step": 75704
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0519734621047974,
      "learning_rate": 1.018020908395978e-08,
      "loss": 2.5484,
      "step": 75705
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.3965648412704468,
      "learning_rate": 1.0161643680426381e-08,
      "loss": 2.3751,
      "step": 75706
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.9711037278175354,
      "learning_rate": 1.0143095212381548e-08,
      "loss": 2.2673,
      "step": 75707
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0707266330718994,
      "learning_rate": 1.012456367985859e-08,
      "loss": 2.1664,
      "step": 75708
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.9194262623786926,
      "learning_rate": 1.0106049082887481e-08,
      "loss": 2.337,
      "step": 75709
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0859941244125366,
      "learning_rate": 1.008755142149931e-08,
      "loss": 2.2525,
      "step": 75710
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0143113136291504,
      "learning_rate": 1.0069070695726269e-08,
      "loss": 2.2297,
      "step": 75711
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.2665926218032837,
      "learning_rate": 1.0050606905599448e-08,
      "loss": 2.2048,
      "step": 75712
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1704431772232056,
      "learning_rate": 1.003216005114993e-08,
      "loss": 2.2847,
      "step": 75713
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.156376838684082,
      "learning_rate": 1.0013730132408805e-08,
      "loss": 2.3074,
      "step": 75714
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.154043436050415,
      "learning_rate": 9.995317149408269e-09,
      "loss": 2.5641,
      "step": 75715
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0978615283966064,
      "learning_rate": 9.976921102178294e-09,
      "loss": 2.292,
      "step": 75716
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.9894061088562012,
      "learning_rate": 9.95854199075108e-09,
      "loss": 2.4281,
      "step": 75717
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0721886157989502,
      "learning_rate": 9.940179815157714e-09,
      "loss": 2.5409,
      "step": 75718
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0665500164031982,
      "learning_rate": 9.92183457542928e-09,
      "loss": 2.3896,
      "step": 75719
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0848031044006348,
      "learning_rate": 9.903506271595753e-09,
      "loss": 2.4382,
      "step": 75720
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.2266780138015747,
      "learning_rate": 9.885194903689333e-09,
      "loss": 2.1484,
      "step": 75721
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.143580436706543,
      "learning_rate": 9.866900471739993e-09,
      "loss": 2.1881,
      "step": 75722
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0614862442016602,
      "learning_rate": 9.848622975781042e-09,
      "loss": 2.2639,
      "step": 75723
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.9569582939147949,
      "learning_rate": 9.830362415841343e-09,
      "loss": 2.3079,
      "step": 75724
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.044537901878357,
      "learning_rate": 9.812118791951986e-09,
      "loss": 2.4032,
      "step": 75725
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.2755540609359741,
      "learning_rate": 9.793892104145164e-09,
      "loss": 2.2277,
      "step": 75726
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1127723455429077,
      "learning_rate": 9.775682352450855e-09,
      "loss": 2.4584,
      "step": 75727
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.117335319519043,
      "learning_rate": 9.757489536899035e-09,
      "loss": 2.1811,
      "step": 75728
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1240379810333252,
      "learning_rate": 9.739313657523008e-09,
      "loss": 2.5945,
      "step": 75729
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.9951139092445374,
      "learning_rate": 9.721154714350534e-09,
      "loss": 2.3274,
      "step": 75730
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0603302717208862,
      "learning_rate": 9.703012707416027e-09,
      "loss": 2.1803,
      "step": 75731
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0045527219772339,
      "learning_rate": 9.684887636746131e-09,
      "loss": 2.3376,
      "step": 75732
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.157699465751648,
      "learning_rate": 9.666779502375268e-09,
      "loss": 2.1996,
      "step": 75733
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0857758522033691,
      "learning_rate": 9.648688304332298e-09,
      "loss": 2.3647,
      "step": 75734
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0971301794052124,
      "learning_rate": 9.630614042647202e-09,
      "loss": 2.486,
      "step": 75735
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0617883205413818,
      "learning_rate": 9.612556717352172e-09,
      "loss": 2.4359,
      "step": 75736
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1398171186447144,
      "learning_rate": 9.594516328478299e-09,
      "loss": 2.2936,
      "step": 75737
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.9198857545852661,
      "learning_rate": 9.576492876053334e-09,
      "loss": 2.2288,
      "step": 75738
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1393033266067505,
      "learning_rate": 9.558486360110586e-09,
      "loss": 2.3634,
      "step": 75739
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0677769184112549,
      "learning_rate": 9.540496780678921e-09,
      "loss": 2.3196,
      "step": 75740
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1940306425094604,
      "learning_rate": 9.522524137790535e-09,
      "loss": 2.2031,
      "step": 75741
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1472768783569336,
      "learning_rate": 9.504568431474293e-09,
      "loss": 2.48,
      "step": 75742
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0641758441925049,
      "learning_rate": 9.486629661761282e-09,
      "loss": 2.3703,
      "step": 75743
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5651670694351196,
      "learning_rate": 9.468707828681478e-09,
      "loss": 2.1745,
      "step": 75744
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.3924638032913208,
      "learning_rate": 9.450802932265968e-09,
      "loss": 2.3273,
      "step": 75745
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.2204668521881104,
      "learning_rate": 9.432914972543616e-09,
      "loss": 2.4598,
      "step": 75746
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0906413793563843,
      "learning_rate": 9.415043949547731e-09,
      "loss": 2.3331,
      "step": 75747
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.2044389247894287,
      "learning_rate": 9.397189863304957e-09,
      "loss": 2.2251,
      "step": 75748
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0127378702163696,
      "learning_rate": 9.379352713848599e-09,
      "loss": 2.2658,
      "step": 75749
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0786502361297607,
      "learning_rate": 9.361532501207526e-09,
      "loss": 2.2878,
      "step": 75750
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1515202522277832,
      "learning_rate": 9.343729225411714e-09,
      "loss": 2.1315,
      "step": 75751
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1632695198059082,
      "learning_rate": 9.325942886491135e-09,
      "loss": 2.3381,
      "step": 75752
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0377005338668823,
      "learning_rate": 9.30817348447799e-09,
      "loss": 2.3489,
      "step": 75753
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0461266040802002,
      "learning_rate": 9.290421019400032e-09,
      "loss": 2.1886,
      "step": 75754
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0183037519454956,
      "learning_rate": 9.272685491288348e-09,
      "loss": 2.1978,
      "step": 75755
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.2297565937042236,
      "learning_rate": 9.254966900172912e-09,
      "loss": 2.2036,
      "step": 75756
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.2174655199050903,
      "learning_rate": 9.237265246083705e-09,
      "loss": 2.201,
      "step": 75757
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0606993436813354,
      "learning_rate": 9.219580529050698e-09,
      "loss": 2.2551,
      "step": 75758
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.011461615562439,
      "learning_rate": 9.20191274910387e-09,
      "loss": 2.4257,
      "step": 75759
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1103166341781616,
      "learning_rate": 9.184261906273196e-09,
      "loss": 2.4095,
      "step": 75760
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1215423345565796,
      "learning_rate": 9.16662800058976e-09,
      "loss": 2.2534,
      "step": 75761
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.103096842765808,
      "learning_rate": 9.149011032081323e-09,
      "loss": 2.184,
      "step": 75762
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1456362009048462,
      "learning_rate": 9.131411000778967e-09,
      "loss": 2.2733,
      "step": 75763
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0150572061538696,
      "learning_rate": 9.11382790671267e-09,
      "loss": 2.1397,
      "step": 75764
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.4084395170211792,
      "learning_rate": 9.096261749911296e-09,
      "loss": 2.3416,
      "step": 75765
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.9921263456344604,
      "learning_rate": 9.078712530405931e-09,
      "loss": 2.2904,
      "step": 75766
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1076136827468872,
      "learning_rate": 9.061180248225442e-09,
      "loss": 2.4714,
      "step": 75767
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0835589170455933,
      "learning_rate": 9.043664903399807e-09,
      "loss": 2.3569,
      "step": 75768
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.9729591012001038,
      "learning_rate": 9.026166495957889e-09,
      "loss": 2.2646,
      "step": 75769
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0506560802459717,
      "learning_rate": 9.008685025930774e-09,
      "loss": 2.2174,
      "step": 75770
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0248956680297852,
      "learning_rate": 8.99122049334733e-09,
      "loss": 2.1451,
      "step": 75771
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.050840139389038,
      "learning_rate": 8.973772898237531e-09,
      "loss": 2.2279,
      "step": 75772
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.9426885843276978,
      "learning_rate": 8.956342240630244e-09,
      "loss": 2.246,
      "step": 75773
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1280434131622314,
      "learning_rate": 8.938928520556555e-09,
      "loss": 2.2548,
      "step": 75774
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.995551586151123,
      "learning_rate": 8.92153173804422e-09,
      "loss": 2.3821,
      "step": 75775
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1365967988967896,
      "learning_rate": 8.904151893123214e-09,
      "loss": 2.3317,
      "step": 75776
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1857739686965942,
      "learning_rate": 8.886788985823513e-09,
      "loss": 2.1756,
      "step": 75777
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1621240377426147,
      "learning_rate": 8.869443016173984e-09,
      "loss": 2.2877,
      "step": 75778
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1018911600112915,
      "learning_rate": 8.852113984204603e-09,
      "loss": 2.3091,
      "step": 75779
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0325770378112793,
      "learning_rate": 8.834801889944234e-09,
      "loss": 2.2975,
      "step": 75780
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.2190016508102417,
      "learning_rate": 8.817506733421743e-09,
      "loss": 2.3122,
      "step": 75781
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1077935695648193,
      "learning_rate": 8.800228514668218e-09,
      "loss": 2.619,
      "step": 75782
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.183061122894287,
      "learning_rate": 8.782967233711414e-09,
      "loss": 2.286,
      "step": 75783
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1567065715789795,
      "learning_rate": 8.765722890581308e-09,
      "loss": 2.2818,
      "step": 75784
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0422523021697998,
      "learning_rate": 8.748495485306763e-09,
      "loss": 2.2815,
      "step": 75785
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1797337532043457,
      "learning_rate": 8.731285017916646e-09,
      "loss": 2.3857,
      "step": 75786
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0852992534637451,
      "learning_rate": 8.714091488442044e-09,
      "loss": 2.2466,
      "step": 75787
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1223877668380737,
      "learning_rate": 8.696914896909603e-09,
      "loss": 2.1832,
      "step": 75788
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1865299940109253,
      "learning_rate": 8.679755243349297e-09,
      "loss": 2.2565,
      "step": 75789
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.147552490234375,
      "learning_rate": 8.662612527789993e-09,
      "loss": 2.3656,
      "step": 75790
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.2917876243591309,
      "learning_rate": 8.645486750262777e-09,
      "loss": 2.2774,
      "step": 75791
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.006430745124817,
      "learning_rate": 8.628377910793184e-09,
      "loss": 2.2122,
      "step": 75792
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1533774137496948,
      "learning_rate": 8.611286009413412e-09,
      "loss": 2.4689,
      "step": 75793
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0820283889770508,
      "learning_rate": 8.594211046151212e-09,
      "loss": 2.1994,
      "step": 75794
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0246256589889526,
      "learning_rate": 8.577153021035457e-09,
      "loss": 2.5113,
      "step": 75795
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1957041025161743,
      "learning_rate": 8.560111934093896e-09,
      "loss": 2.5658,
      "step": 75796
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0673344135284424,
      "learning_rate": 8.543087785357618e-09,
      "loss": 2.363,
      "step": 75797
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.3617595434188843,
      "learning_rate": 8.526080574854378e-09,
      "loss": 2.1356,
      "step": 75798
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.427383303642273,
      "learning_rate": 8.509090302613043e-09,
      "loss": 2.157,
      "step": 75799
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0121111869812012,
      "learning_rate": 8.492116968661368e-09,
      "loss": 2.0697,
      "step": 75800
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0218684673309326,
      "learning_rate": 8.47516057303044e-09,
      "loss": 2.5461,
      "step": 75801
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0643610954284668,
      "learning_rate": 8.458221115748011e-09,
      "loss": 2.182,
      "step": 75802
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.2269355058670044,
      "learning_rate": 8.441298596841841e-09,
      "loss": 2.23,
      "step": 75803
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.2587933540344238,
      "learning_rate": 8.424393016340794e-09,
      "loss": 2.2485,
      "step": 75804
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0260204076766968,
      "learning_rate": 8.407504374274844e-09,
      "loss": 2.5113,
      "step": 75805
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0980018377304077,
      "learning_rate": 8.39063267067175e-09,
      "loss": 2.4464,
      "step": 75806
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.3476289510726929,
      "learning_rate": 8.373777905560376e-09,
      "loss": 2.0831,
      "step": 75807
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.9586803913116455,
      "learning_rate": 8.356940078969589e-09,
      "loss": 2.1693,
      "step": 75808
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0422147512435913,
      "learning_rate": 8.340119190927143e-09,
      "loss": 2.1732,
      "step": 75809
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1497443914413452,
      "learning_rate": 8.323315241461904e-09,
      "loss": 2.4621,
      "step": 75810
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1175163984298706,
      "learning_rate": 8.306528230601629e-09,
      "loss": 2.2558,
      "step": 75811
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0455164909362793,
      "learning_rate": 8.289758158376294e-09,
      "loss": 2.4935,
      "step": 75812
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.044188380241394,
      "learning_rate": 8.273005024812542e-09,
      "loss": 2.1568,
      "step": 75813
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.9960221648216248,
      "learning_rate": 8.25626882994035e-09,
      "loss": 2.1894,
      "step": 75814
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.2224677801132202,
      "learning_rate": 8.239549573787475e-09,
      "loss": 2.2511,
      "step": 75815
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1226097345352173,
      "learning_rate": 8.222847256382782e-09,
      "loss": 2.402,
      "step": 75816
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0736843347549438,
      "learning_rate": 8.206161877754026e-09,
      "loss": 2.324,
      "step": 75817
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1690478324890137,
      "learning_rate": 8.189493437928964e-09,
      "loss": 2.2855,
      "step": 75818
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0976959466934204,
      "learning_rate": 8.17284193693646e-09,
      "loss": 2.2236,
      "step": 75819
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1185674667358398,
      "learning_rate": 8.156207374805381e-09,
      "loss": 2.265,
      "step": 75820
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.118619680404663,
      "learning_rate": 8.139589751562372e-09,
      "loss": 2.1635,
      "step": 75821
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1129800081253052,
      "learning_rate": 8.122989067237408e-09,
      "loss": 2.3395,
      "step": 75822
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0194910764694214,
      "learning_rate": 8.106405321857136e-09,
      "loss": 2.4669,
      "step": 75823
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0769113302230835,
      "learning_rate": 8.089838515451532e-09,
      "loss": 2.2156,
      "step": 75824
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0715322494506836,
      "learning_rate": 8.073288648046129e-09,
      "loss": 2.2341,
      "step": 75825
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1549654006958008,
      "learning_rate": 8.056755719669795e-09,
      "loss": 2.1718,
      "step": 75826
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0698225498199463,
      "learning_rate": 8.040239730352506e-09,
      "loss": 2.3686,
      "step": 75827
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.150578498840332,
      "learning_rate": 8.023740680119796e-09,
      "loss": 2.454,
      "step": 75828
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1263096332550049,
      "learning_rate": 8.007258569000531e-09,
      "loss": 2.1053,
      "step": 75829
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1357300281524658,
      "learning_rate": 7.990793397023578e-09,
      "loss": 2.3175,
      "step": 75830
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0803190469741821,
      "learning_rate": 7.97434516421558e-09,
      "loss": 2.3994,
      "step": 75831
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1453443765640259,
      "learning_rate": 7.957913870604294e-09,
      "loss": 2.3085,
      "step": 75832
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0888686180114746,
      "learning_rate": 7.941499516218587e-09,
      "loss": 2.543,
      "step": 75833
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0187543630599976,
      "learning_rate": 7.925102101086213e-09,
      "loss": 2.2794,
      "step": 75834
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1294344663619995,
      "learning_rate": 7.908721625234927e-09,
      "loss": 2.308,
      "step": 75835
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.016687273979187,
      "learning_rate": 7.892358088691376e-09,
      "loss": 2.179,
      "step": 75836
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.9851059317588806,
      "learning_rate": 7.876011491484426e-09,
      "loss": 2.2688,
      "step": 75837
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.167928695678711,
      "learning_rate": 7.85968183364072e-09,
      "loss": 2.299,
      "step": 75838
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.180871844291687,
      "learning_rate": 7.843369115190235e-09,
      "loss": 2.3392,
      "step": 75839
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.2234548330307007,
      "learning_rate": 7.827073336157398e-09,
      "loss": 2.5363,
      "step": 75840
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.117314100265503,
      "learning_rate": 7.810794496572182e-09,
      "loss": 2.3165,
      "step": 75841
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.199262022972107,
      "learning_rate": 7.794532596461235e-09,
      "loss": 2.4915,
      "step": 75842
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0795589685440063,
      "learning_rate": 7.778287635853422e-09,
      "loss": 2.3166,
      "step": 75843
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.153734564781189,
      "learning_rate": 7.762059614774276e-09,
      "loss": 2.238,
      "step": 75844
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1431188583374023,
      "learning_rate": 7.745848533252664e-09,
      "loss": 2.2803,
      "step": 75845
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1377357244491577,
      "learning_rate": 7.729654391316343e-09,
      "loss": 2.2978,
      "step": 75846
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.10052490234375,
      "learning_rate": 7.713477188990847e-09,
      "loss": 2.3794,
      "step": 75847
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.9663291573524475,
      "learning_rate": 7.697316926305043e-09,
      "loss": 2.2168,
      "step": 75848
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.014858603477478,
      "learning_rate": 7.681173603286685e-09,
      "loss": 2.3205,
      "step": 75849
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.126893162727356,
      "learning_rate": 7.665047219962419e-09,
      "loss": 2.4426,
      "step": 75850
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1151344776153564,
      "learning_rate": 7.64893777636e-09,
      "loss": 2.2571,
      "step": 75851
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1654658317565918,
      "learning_rate": 7.632845272506073e-09,
      "loss": 2.2288,
      "step": 75852
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.028889536857605,
      "learning_rate": 7.616769708429505e-09,
      "loss": 2.3037,
      "step": 75853
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0646909475326538,
      "learning_rate": 7.600711084154722e-09,
      "loss": 2.2627,
      "step": 75854
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0276793241500854,
      "learning_rate": 7.584669399711696e-09,
      "loss": 2.1367,
      "step": 75855
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.05552339553833,
      "learning_rate": 7.568644655127077e-09,
      "loss": 2.4615,
      "step": 75856
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.9555436968803406,
      "learning_rate": 7.552636850426397e-09,
      "loss": 2.4172,
      "step": 75857
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1785163879394531,
      "learning_rate": 7.536645985638525e-09,
      "loss": 2.4072,
      "step": 75858
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1895840167999268,
      "learning_rate": 7.520672060790102e-09,
      "loss": 2.3075,
      "step": 75859
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1158281564712524,
      "learning_rate": 7.504715075906666e-09,
      "loss": 2.22,
      "step": 75860
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0339024066925049,
      "learning_rate": 7.488775031018192e-09,
      "loss": 2.3359,
      "step": 75861
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0725278854370117,
      "learning_rate": 7.472851926149106e-09,
      "loss": 2.1963,
      "step": 75862
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.228746771812439,
      "learning_rate": 7.456945761328272e-09,
      "loss": 2.26,
      "step": 75863
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1285074949264526,
      "learning_rate": 7.4410565365823365e-09,
      "loss": 2.2365,
      "step": 75864
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1679226160049438,
      "learning_rate": 7.425184251936835e-09,
      "loss": 2.3973,
      "step": 75865
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.095725655555725,
      "learning_rate": 7.4093289074195216e-09,
      "loss": 2.5363,
      "step": 75866
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0687689781188965,
      "learning_rate": 7.393490503057043e-09,
      "loss": 2.294,
      "step": 75867
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.2666083574295044,
      "learning_rate": 7.377669038877156e-09,
      "loss": 2.3477,
      "step": 75868
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.9933705925941467,
      "learning_rate": 7.3618645149065025e-09,
      "loss": 2.2233,
      "step": 75869
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0841307640075684,
      "learning_rate": 7.34607693117062e-09,
      "loss": 2.2775,
      "step": 75870
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.9988868236541748,
      "learning_rate": 7.330306287697264e-09,
      "loss": 2.2217,
      "step": 75871
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0606460571289062,
      "learning_rate": 7.31455258451419e-09,
      "loss": 2.4609,
      "step": 75872
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0624138116836548,
      "learning_rate": 7.298815821645822e-09,
      "loss": 2.2577,
      "step": 75873
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1549468040466309,
      "learning_rate": 7.283095999119916e-09,
      "loss": 2.1158,
      "step": 75874
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.073447346687317,
      "learning_rate": 7.267393116963117e-09,
      "loss": 2.2954,
      "step": 75875
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.144191026687622,
      "learning_rate": 7.251707175202072e-09,
      "loss": 2.0671,
      "step": 75876
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1665964126586914,
      "learning_rate": 7.2360381738634245e-09,
      "loss": 2.4282,
      "step": 75877
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.2212985754013062,
      "learning_rate": 7.220386112973821e-09,
      "loss": 2.2495,
      "step": 75878
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0926777124404907,
      "learning_rate": 7.204750992559906e-09,
      "loss": 2.2643,
      "step": 75879
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1510419845581055,
      "learning_rate": 7.189132812648325e-09,
      "loss": 2.0739,
      "step": 75880
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.036027431488037,
      "learning_rate": 7.173531573263503e-09,
      "loss": 2.1627,
      "step": 75881
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0987685918807983,
      "learning_rate": 7.1579472744354174e-09,
      "loss": 2.1044,
      "step": 75882
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0547337532043457,
      "learning_rate": 7.142379916187381e-09,
      "loss": 2.1981,
      "step": 75883
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1075644493103027,
      "learning_rate": 7.12682949854715e-09,
      "loss": 2.1921,
      "step": 75884
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0471867322921753,
      "learning_rate": 7.11129602154137e-09,
      "loss": 2.2562,
      "step": 75885
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0782442092895508,
      "learning_rate": 7.095779485196685e-09,
      "loss": 2.3746,
      "step": 75886
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0360585451126099,
      "learning_rate": 7.080279889537522e-09,
      "loss": 2.4363,
      "step": 75887
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0538021326065063,
      "learning_rate": 7.064797234591636e-09,
      "loss": 2.2338,
      "step": 75888
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.3214764595031738,
      "learning_rate": 7.0493315203845615e-09,
      "loss": 2.4582,
      "step": 75889
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0329654216766357,
      "learning_rate": 7.033882746944054e-09,
      "loss": 2.2542,
      "step": 75890
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0784099102020264,
      "learning_rate": 7.018450914294539e-09,
      "loss": 2.2429,
      "step": 75891
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0886409282684326,
      "learning_rate": 7.003036022462662e-09,
      "loss": 2.5342,
      "step": 75892
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0638720989227295,
      "learning_rate": 6.987638071475067e-09,
      "loss": 2.2288,
      "step": 75893
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.3734581470489502,
      "learning_rate": 6.97225706135729e-09,
      "loss": 2.1454,
      "step": 75894
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0944381952285767,
      "learning_rate": 6.956892992134867e-09,
      "loss": 2.2988,
      "step": 75895
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.211188554763794,
      "learning_rate": 6.941545863835553e-09,
      "loss": 2.2824,
      "step": 75896
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0192452669143677,
      "learning_rate": 6.926215676483772e-09,
      "loss": 2.4409,
      "step": 75897
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0223896503448486,
      "learning_rate": 6.91090243010728e-09,
      "loss": 2.2304,
      "step": 75898
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.077405333518982,
      "learning_rate": 6.895606124730503e-09,
      "loss": 2.3648,
      "step": 75899
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1269633769989014,
      "learning_rate": 6.8803267603789745e-09,
      "loss": 2.4087,
      "step": 75900
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.146925926208496,
      "learning_rate": 6.865064337079342e-09,
      "loss": 2.3941,
      "step": 75901
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0394669771194458,
      "learning_rate": 6.849818854858248e-09,
      "loss": 2.2753,
      "step": 75902
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0786494016647339,
      "learning_rate": 6.834590313740119e-09,
      "loss": 2.187,
      "step": 75903
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0371816158294678,
      "learning_rate": 6.819378713751601e-09,
      "loss": 2.1863,
      "step": 75904
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.106374740600586,
      "learning_rate": 6.804184054919338e-09,
      "loss": 2.307,
      "step": 75905
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1480478048324585,
      "learning_rate": 6.789006337266646e-09,
      "loss": 2.4312,
      "step": 75906
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0639744997024536,
      "learning_rate": 6.77384556082239e-09,
      "loss": 2.3978,
      "step": 75907
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.2308670282363892,
      "learning_rate": 6.758701725608774e-09,
      "loss": 2.3962,
      "step": 75908
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0285156965255737,
      "learning_rate": 6.743574831654664e-09,
      "loss": 2.22,
      "step": 75909
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1021173000335693,
      "learning_rate": 6.728464878983376e-09,
      "loss": 2.2287,
      "step": 75910
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1113686561584473,
      "learning_rate": 6.713371867622664e-09,
      "loss": 2.1805,
      "step": 75911
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1433149576187134,
      "learning_rate": 6.698295797595844e-09,
      "loss": 2.3212,
      "step": 75912
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.027432918548584,
      "learning_rate": 6.6832366689295605e-09,
      "loss": 2.0415,
      "step": 75913
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0816915035247803,
      "learning_rate": 6.668194481650459e-09,
      "loss": 2.1609,
      "step": 75914
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.026788353919983,
      "learning_rate": 6.653169235781853e-09,
      "loss": 2.1522,
      "step": 75915
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0938007831573486,
      "learning_rate": 6.6381609313503905e-09,
      "loss": 2.3847,
      "step": 75916
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0710409879684448,
      "learning_rate": 6.6231695683816045e-09,
      "loss": 2.2961,
      "step": 75917
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.037081003189087,
      "learning_rate": 6.60819514689992e-09,
      "loss": 2.3328,
      "step": 75918
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.05656898021698,
      "learning_rate": 6.593237666933095e-09,
      "loss": 2.2226,
      "step": 75919
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.106439232826233,
      "learning_rate": 6.57829712850444e-09,
      "loss": 2.478,
      "step": 75920
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1105172634124756,
      "learning_rate": 6.563373531638384e-09,
      "loss": 2.4195,
      "step": 75921
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0289039611816406,
      "learning_rate": 6.5484668763626805e-09,
      "loss": 2.0804,
      "step": 75922
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1569104194641113,
      "learning_rate": 6.5335771627017545e-09,
      "loss": 2.1448,
      "step": 75923
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0491644144058228,
      "learning_rate": 6.518704390680031e-09,
      "loss": 2.438,
      "step": 75924
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0460468530654907,
      "learning_rate": 6.503848560324155e-09,
      "loss": 2.1924,
      "step": 75925
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0632975101470947,
      "learning_rate": 6.489009671657442e-09,
      "loss": 2.2219,
      "step": 75926
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1963608264923096,
      "learning_rate": 6.4741877247065375e-09,
      "loss": 2.2667,
      "step": 75927
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1104880571365356,
      "learning_rate": 6.459382719495866e-09,
      "loss": 2.3847,
      "step": 75928
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0505393743515015,
      "learning_rate": 6.444594656050962e-09,
      "loss": 2.515,
      "step": 75929
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1237614154815674,
      "learning_rate": 6.429823534396251e-09,
      "loss": 2.2396,
      "step": 75930
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1130214929580688,
      "learning_rate": 6.415069354558379e-09,
      "loss": 2.3937,
      "step": 75931
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0475152730941772,
      "learning_rate": 6.400332116559549e-09,
      "loss": 2.4151,
      "step": 75932
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0886383056640625,
      "learning_rate": 6.385611820427518e-09,
      "loss": 2.4936,
      "step": 75933
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.185218095779419,
      "learning_rate": 6.370908466185599e-09,
      "loss": 2.1552,
      "step": 75934
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0797314643859863,
      "learning_rate": 6.356222053859329e-09,
      "loss": 2.1117,
      "step": 75935
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0150588750839233,
      "learning_rate": 6.341552583474242e-09,
      "loss": 2.5841,
      "step": 75936
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0019181966781616,
      "learning_rate": 6.326900055053653e-09,
      "loss": 2.5312,
      "step": 75937
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.082701563835144,
      "learning_rate": 6.312264468623097e-09,
      "loss": 2.5359,
      "step": 75938
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.041399359703064,
      "learning_rate": 6.29764582420811e-09,
      "loss": 2.277,
      "step": 75939
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.3041961193084717,
      "learning_rate": 6.2830441218320045e-09,
      "loss": 2.4071,
      "step": 75940
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1458170413970947,
      "learning_rate": 6.2684593615214285e-09,
      "loss": 2.252,
      "step": 75941
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.065027117729187,
      "learning_rate": 6.253891543299695e-09,
      "loss": 2.363,
      "step": 75942
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0831451416015625,
      "learning_rate": 6.239340667191229e-09,
      "loss": 2.4811,
      "step": 75943
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0233094692230225,
      "learning_rate": 6.224806733221567e-09,
      "loss": 2.4122,
      "step": 75944
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0520470142364502,
      "learning_rate": 6.210289741416242e-09,
      "loss": 2.2558,
      "step": 75945
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1514723300933838,
      "learning_rate": 6.19578969179746e-09,
      "loss": 2.2805,
      "step": 75946
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0494898557662964,
      "learning_rate": 6.181306584391866e-09,
      "loss": 2.2112,
      "step": 75947
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1538997888565063,
      "learning_rate": 6.1668404192227745e-09,
      "loss": 2.3115,
      "step": 75948
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.022567629814148,
      "learning_rate": 6.152391196315721e-09,
      "loss": 2.4357,
      "step": 75949
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0305980443954468,
      "learning_rate": 6.13795891569402e-09,
      "loss": 2.2814,
      "step": 75950
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0939735174179077,
      "learning_rate": 6.123543577383206e-09,
      "loss": 2.2552,
      "step": 75951
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.101868748664856,
      "learning_rate": 6.109145181406595e-09,
      "loss": 2.2638,
      "step": 75952
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0299034118652344,
      "learning_rate": 6.094763727789721e-09,
      "loss": 2.346,
      "step": 75953
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.9816443920135498,
      "learning_rate": 6.080399216557009e-09,
      "loss": 2.2495,
      "step": 75954
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0674757957458496,
      "learning_rate": 6.066051647731774e-09,
      "loss": 2.5184,
      "step": 75955
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.9686233997344971,
      "learning_rate": 6.0517210213395514e-09,
      "loss": 2.2877,
      "step": 75956
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0857160091400146,
      "learning_rate": 6.037407337403655e-09,
      "loss": 2.3126,
      "step": 75957
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.225319266319275,
      "learning_rate": 6.0231105959474016e-09,
      "loss": 2.3692,
      "step": 75958
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0326420068740845,
      "learning_rate": 6.008830796998544e-09,
      "loss": 2.2356,
      "step": 75959
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1115055084228516,
      "learning_rate": 5.9945679405770675e-09,
      "loss": 2.427,
      "step": 75960
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.160058856010437,
      "learning_rate": 5.980322026710728e-09,
      "loss": 2.3153,
      "step": 75961
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1597199440002441,
      "learning_rate": 5.966093055420619e-09,
      "loss": 2.2148,
      "step": 75962
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.9890137314796448,
      "learning_rate": 5.951881026733386e-09,
      "loss": 2.254,
      "step": 75963
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1219936609268188,
      "learning_rate": 5.937685940671234e-09,
      "loss": 2.1853,
      "step": 75964
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0359503030776978,
      "learning_rate": 5.923507797259698e-09,
      "loss": 2.2699,
      "step": 75965
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.00180184841156,
      "learning_rate": 5.909346596522092e-09,
      "loss": 2.2468,
      "step": 75966
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.2465646266937256,
      "learning_rate": 5.895202338482842e-09,
      "loss": 2.1595,
      "step": 75967
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.130534052848816,
      "learning_rate": 5.8810750231652615e-09,
      "loss": 2.242,
      "step": 75968
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.122496485710144,
      "learning_rate": 5.866964650593776e-09,
      "loss": 2.2688,
      "step": 75969
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0974690914154053,
      "learning_rate": 5.852871220791701e-09,
      "loss": 2.334,
      "step": 75970
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1651020050048828,
      "learning_rate": 5.838794733783459e-09,
      "loss": 2.4026,
      "step": 75971
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1956708431243896,
      "learning_rate": 5.824735189592367e-09,
      "loss": 2.4321,
      "step": 75972
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0817210674285889,
      "learning_rate": 5.8106925882439606e-09,
      "loss": 2.4664,
      "step": 75973
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.2037385702133179,
      "learning_rate": 5.796666929760442e-09,
      "loss": 2.3806,
      "step": 75974
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0513988733291626,
      "learning_rate": 5.782658214165127e-09,
      "loss": 2.2857,
      "step": 75975
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0168442726135254,
      "learning_rate": 5.76866644148355e-09,
      "loss": 2.291,
      "step": 75976
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1800771951675415,
      "learning_rate": 5.754691611737917e-09,
      "loss": 2.3425,
      "step": 75977
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0607222318649292,
      "learning_rate": 5.740733724952652e-09,
      "loss": 2.209,
      "step": 75978
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1336997747421265,
      "learning_rate": 5.726792781151069e-09,
      "loss": 2.346,
      "step": 75979
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0440092086791992,
      "learning_rate": 5.712868780357595e-09,
      "loss": 2.1082,
      "step": 75980
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.144422173500061,
      "learning_rate": 5.6989617225944315e-09,
      "loss": 2.3618,
      "step": 75981
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.13474702835083,
      "learning_rate": 5.685071607887116e-09,
      "loss": 2.1689,
      "step": 75982
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1527953147888184,
      "learning_rate": 5.671198436256742e-09,
      "loss": 2.5658,
      "step": 75983
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1147204637527466,
      "learning_rate": 5.6573422077288445e-09,
      "loss": 2.4588,
      "step": 75984
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.003110647201538,
      "learning_rate": 5.643502922325627e-09,
      "loss": 2.3713,
      "step": 75985
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0923696756362915,
      "learning_rate": 5.629680580071517e-09,
      "loss": 2.3023,
      "step": 75986
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.2110395431518555,
      "learning_rate": 5.615875180989827e-09,
      "loss": 2.2062,
      "step": 75987
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0938018560409546,
      "learning_rate": 5.6020867251027625e-09,
      "loss": 2.6329,
      "step": 75988
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0147148370742798,
      "learning_rate": 5.588315212434747e-09,
      "loss": 2.3501,
      "step": 75989
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1611191034317017,
      "learning_rate": 5.574560643010207e-09,
      "loss": 2.375,
      "step": 75990
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.9527381658554077,
      "learning_rate": 5.5608230168502365e-09,
      "loss": 2.3754,
      "step": 75991
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1857606172561646,
      "learning_rate": 5.54710233397926e-09,
      "loss": 2.3936,
      "step": 75992
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1192688941955566,
      "learning_rate": 5.533398594420592e-09,
      "loss": 2.4926,
      "step": 75993
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0347228050231934,
      "learning_rate": 5.519711798196437e-09,
      "loss": 2.1933,
      "step": 75994
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0653146505355835,
      "learning_rate": 5.50604194533122e-09,
      "loss": 2.5543,
      "step": 75995
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1339309215545654,
      "learning_rate": 5.492389035848255e-09,
      "loss": 2.3646,
      "step": 75996
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0328963994979858,
      "learning_rate": 5.478753069769749e-09,
      "loss": 2.375,
      "step": 75997
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0513712167739868,
      "learning_rate": 5.4651340471201246e-09,
      "loss": 2.4551,
      "step": 75998
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.9838902354240417,
      "learning_rate": 5.451531967920476e-09,
      "loss": 2.3105,
      "step": 75999
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.000274419784546,
      "learning_rate": 5.4379468321952285e-09,
      "loss": 2.3353,
      "step": 76000
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.9811325073242188,
      "learning_rate": 5.424378639966587e-09,
      "loss": 2.3495,
      "step": 76001
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1470956802368164,
      "learning_rate": 5.410827391258977e-09,
      "loss": 2.1201,
      "step": 76002
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0980418920516968,
      "learning_rate": 5.397293086094602e-09,
      "loss": 2.2492,
      "step": 76003
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0382888317108154,
      "learning_rate": 5.383775724495666e-09,
      "loss": 2.3125,
      "step": 76004
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.076102375984192,
      "learning_rate": 5.370275306486594e-09,
      "loss": 2.2704,
      "step": 76005
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.204306721687317,
      "learning_rate": 5.356791832088482e-09,
      "loss": 2.618,
      "step": 76006
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1507664918899536,
      "learning_rate": 5.343325301325753e-09,
      "loss": 2.2311,
      "step": 76007
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1304980516433716,
      "learning_rate": 5.329875714220612e-09,
      "loss": 2.2179,
      "step": 76008
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.3304831981658936,
      "learning_rate": 5.316443070795263e-09,
      "loss": 2.4387,
      "step": 76009
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1023184061050415,
      "learning_rate": 5.303027371073022e-09,
      "loss": 2.2311,
      "step": 76010
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1855806112289429,
      "learning_rate": 5.289628615077203e-09,
      "loss": 2.4557,
      "step": 76011
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1026767492294312,
      "learning_rate": 5.2762468028300095e-09,
      "loss": 2.2266,
      "step": 76012
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1521522998809814,
      "learning_rate": 5.262881934353647e-09,
      "loss": 2.1127,
      "step": 76013
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1694068908691406,
      "learning_rate": 5.249534009671431e-09,
      "loss": 2.3077,
      "step": 76014
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1305454969406128,
      "learning_rate": 5.236203028806675e-09,
      "loss": 2.2897,
      "step": 76015
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0935112237930298,
      "learning_rate": 5.2228889917804726e-09,
      "loss": 2.1412,
      "step": 76016
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.01226007938385,
      "learning_rate": 5.20959189861503e-09,
      "loss": 2.3956,
      "step": 76017
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0386801958084106,
      "learning_rate": 5.1963117493347706e-09,
      "loss": 2.3668,
      "step": 76018
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.2231584787368774,
      "learning_rate": 5.1830485439619e-09,
      "loss": 2.4804,
      "step": 76019
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.9761089086532593,
      "learning_rate": 5.169802282517511e-09,
      "loss": 2.3405,
      "step": 76020
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0426149368286133,
      "learning_rate": 5.15657296502492e-09,
      "loss": 2.2219,
      "step": 76021
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0987831354141235,
      "learning_rate": 5.14336059150633e-09,
      "loss": 2.2045,
      "step": 76022
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.9338406920433044,
      "learning_rate": 5.130165161985057e-09,
      "loss": 2.2381,
      "step": 76023
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0558722019195557,
      "learning_rate": 5.116986676482194e-09,
      "loss": 2.5399,
      "step": 76024
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0805155038833618,
      "learning_rate": 5.103825135019946e-09,
      "loss": 2.5379,
      "step": 76025
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.021447777748108,
      "learning_rate": 5.090680537622738e-09,
      "loss": 2.4485,
      "step": 76026
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.009498119354248,
      "learning_rate": 5.077552884310555e-09,
      "loss": 2.3108,
      "step": 76027
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0448349714279175,
      "learning_rate": 5.064442175106709e-09,
      "loss": 2.2482,
      "step": 76028
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0391887426376343,
      "learning_rate": 5.051348410032297e-09,
      "loss": 2.2774,
      "step": 76029
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0393290519714355,
      "learning_rate": 5.038271589111743e-09,
      "loss": 2.2188,
      "step": 76030
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.3922635316848755,
      "learning_rate": 5.02521171236614e-09,
      "loss": 2.1173,
      "step": 76031
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1464735269546509,
      "learning_rate": 5.012168779816584e-09,
      "loss": 2.4605,
      "step": 76032
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.054814100265503,
      "learning_rate": 4.999142791485279e-09,
      "loss": 2.4756,
      "step": 76033
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.029891848564148,
      "learning_rate": 4.986133747396649e-09,
      "loss": 2.2047,
      "step": 76034
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1676791906356812,
      "learning_rate": 4.9731416475695686e-09,
      "loss": 2.3796,
      "step": 76035
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0592492818832397,
      "learning_rate": 4.960166492028462e-09,
      "loss": 2.1867,
      "step": 76036
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.2058525085449219,
      "learning_rate": 4.947208280794424e-09,
      "loss": 2.2451,
      "step": 76037
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1276600360870361,
      "learning_rate": 4.93426701388966e-09,
      "loss": 2.3967,
      "step": 76038
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.9735316634178162,
      "learning_rate": 4.921342691336372e-09,
      "loss": 2.4058,
      "step": 76039
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0584766864776611,
      "learning_rate": 4.908435313155657e-09,
      "loss": 2.1996,
      "step": 76040
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0447629690170288,
      "learning_rate": 4.895544879368608e-09,
      "loss": 2.2695,
      "step": 76041
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1008799076080322,
      "learning_rate": 4.8826713899996495e-09,
      "loss": 2.2806,
      "step": 76042
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1409924030303955,
      "learning_rate": 4.869814845068766e-09,
      "loss": 2.1978,
      "step": 76043
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0582231283187866,
      "learning_rate": 4.856975244598161e-09,
      "loss": 2.381,
      "step": 76044
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1861636638641357,
      "learning_rate": 4.844152588610041e-09,
      "loss": 2.2873,
      "step": 76045
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0074913501739502,
      "learning_rate": 4.831346877125498e-09,
      "loss": 2.4381,
      "step": 76046
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0303815603256226,
      "learning_rate": 4.818558110165628e-09,
      "loss": 2.3156,
      "step": 76047
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.2142242193222046,
      "learning_rate": 4.805786287753744e-09,
      "loss": 2.2097,
      "step": 76048
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.2401562929153442,
      "learning_rate": 4.793031409910942e-09,
      "loss": 2.446,
      "step": 76049
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0637341737747192,
      "learning_rate": 4.780293476658315e-09,
      "loss": 2.2384,
      "step": 76050
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0078586339950562,
      "learning_rate": 4.767572488016958e-09,
      "loss": 2.1685,
      "step": 76051
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0109004974365234,
      "learning_rate": 4.7548684440101855e-09,
      "loss": 2.3257,
      "step": 76052
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.2747515439987183,
      "learning_rate": 4.742181344657981e-09,
      "loss": 2.3543,
      "step": 76053
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.039463758468628,
      "learning_rate": 4.72951118998366e-09,
      "loss": 2.3167,
      "step": 76054
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1404309272766113,
      "learning_rate": 4.716857980006095e-09,
      "loss": 2.4527,
      "step": 76055
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0703938007354736,
      "learning_rate": 4.704221714748603e-09,
      "loss": 2.3431,
      "step": 76056
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0439971685409546,
      "learning_rate": 4.691602394232275e-09,
      "loss": 2.3052,
      "step": 76057
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0760560035705566,
      "learning_rate": 4.679000018479318e-09,
      "loss": 2.1999,
      "step": 76058
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0351921319961548,
      "learning_rate": 4.666414587508605e-09,
      "loss": 2.2559,
      "step": 76059
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.73140025138855,
      "learning_rate": 4.653846101343451e-09,
      "loss": 2.0875,
      "step": 76060
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.151538610458374,
      "learning_rate": 4.64129456000495e-09,
      "loss": 2.2421,
      "step": 76061
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.9948747754096985,
      "learning_rate": 4.628759963514196e-09,
      "loss": 2.3662,
      "step": 76062
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0452520847320557,
      "learning_rate": 4.616242311892283e-09,
      "loss": 2.3864,
      "step": 76063
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.233640432357788,
      "learning_rate": 4.603741605160306e-09,
      "loss": 2.3848,
      "step": 76064
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0452300310134888,
      "learning_rate": 4.591257843340469e-09,
      "loss": 2.3975,
      "step": 76065
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1620088815689087,
      "learning_rate": 4.578791026452756e-09,
      "loss": 2.3903,
      "step": 76066
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1540672779083252,
      "learning_rate": 4.566341154518261e-09,
      "loss": 2.3961,
      "step": 76067
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0508533716201782,
      "learning_rate": 4.553908227559189e-09,
      "loss": 2.464,
      "step": 76068
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.051370620727539,
      "learning_rate": 4.541492245595524e-09,
      "loss": 2.3351,
      "step": 76069
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.3299548625946045,
      "learning_rate": 4.529093208649471e-09,
      "loss": 2.4135,
      "step": 76070
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1509981155395508,
      "learning_rate": 4.516711116739903e-09,
      "loss": 2.1891,
      "step": 76071
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0339847803115845,
      "learning_rate": 4.504345969890134e-09,
      "loss": 2.1142,
      "step": 76072
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.3023191690444946,
      "learning_rate": 4.491997768120149e-09,
      "loss": 2.3275,
      "step": 76073
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0720351934432983,
      "learning_rate": 4.4796665114510415e-09,
      "loss": 2.1598,
      "step": 76074
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.2061114311218262,
      "learning_rate": 4.4673521999039074e-09,
      "loss": 2.1353,
      "step": 76075
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.2521287202835083,
      "learning_rate": 4.455054833498729e-09,
      "loss": 2.4915,
      "step": 76076
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.153530478477478,
      "learning_rate": 4.442774412257711e-09,
      "loss": 2.3096,
      "step": 76077
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1027004718780518,
      "learning_rate": 4.430510936200838e-09,
      "loss": 2.4606,
      "step": 76078
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.159632921218872,
      "learning_rate": 4.418264405349204e-09,
      "loss": 2.428,
      "step": 76079
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.079897403717041,
      "learning_rate": 4.4060348197227935e-09,
      "loss": 2.4874,
      "step": 76080
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0051705837249756,
      "learning_rate": 4.393822179342699e-09,
      "loss": 2.1985,
      "step": 76081
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0491538047790527,
      "learning_rate": 4.3816264842300175e-09,
      "loss": 2.377,
      "step": 76082
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0913031101226807,
      "learning_rate": 4.36944773440584e-09,
      "loss": 2.0922,
      "step": 76083
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.056898832321167,
      "learning_rate": 4.357285929889044e-09,
      "loss": 2.2558,
      "step": 76084
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1258833408355713,
      "learning_rate": 4.345141070702941e-09,
      "loss": 2.3523,
      "step": 76085
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1138678789138794,
      "learning_rate": 4.333013156865295e-09,
      "loss": 2.3017,
      "step": 76086
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.082550048828125,
      "learning_rate": 4.320902188399423e-09,
      "loss": 2.2984,
      "step": 76087
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0881463289260864,
      "learning_rate": 4.3088081653230865e-09,
      "loss": 2.3513,
      "step": 76088
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0229140520095825,
      "learning_rate": 4.29673108765849e-09,
      "loss": 2.144,
      "step": 76089
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.131041169166565,
      "learning_rate": 4.284670955425618e-09,
      "loss": 2.4586,
      "step": 76090
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1975480318069458,
      "learning_rate": 4.272627768645565e-09,
      "loss": 2.3182,
      "step": 76091
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0664340257644653,
      "learning_rate": 4.260601527337205e-09,
      "loss": 2.4708,
      "step": 76092
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1859467029571533,
      "learning_rate": 4.248592231522741e-09,
      "loss": 2.5445,
      "step": 76093
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.11851167678833,
      "learning_rate": 4.236599881222159e-09,
      "loss": 2.3792,
      "step": 76094
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0252060890197754,
      "learning_rate": 4.224624476454331e-09,
      "loss": 2.2244,
      "step": 76095
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.045840859413147,
      "learning_rate": 4.212666017241462e-09,
      "loss": 2.4771,
      "step": 76096
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0205122232437134,
      "learning_rate": 4.200724503602427e-09,
      "loss": 2.4627,
      "step": 76097
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.042462944984436,
      "learning_rate": 4.1887999355583184e-09,
      "loss": 2.4071,
      "step": 76098
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.048703670501709,
      "learning_rate": 4.176892313128012e-09,
      "loss": 2.3772,
      "step": 76099
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.2674007415771484,
      "learning_rate": 4.16500163633371e-09,
      "loss": 2.2093,
      "step": 76100
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.076127529144287,
      "learning_rate": 4.1531279051942875e-09,
      "loss": 2.3308,
      "step": 76101
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.049420714378357,
      "learning_rate": 4.1412711197308385e-09,
      "loss": 2.1449,
      "step": 76102
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0664466619491577,
      "learning_rate": 4.129431279961127e-09,
      "loss": 2.2612,
      "step": 76103
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1335556507110596,
      "learning_rate": 4.117608385908467e-09,
      "loss": 2.2712,
      "step": 76104
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.3373252153396606,
      "learning_rate": 4.105802437590623e-09,
      "loss": 2.4764,
      "step": 76105
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1400896310806274,
      "learning_rate": 4.094013435028687e-09,
      "loss": 2.4192,
      "step": 76106
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0944222211837769,
      "learning_rate": 4.082241378241536e-09,
      "loss": 2.3961,
      "step": 76107
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0898098945617676,
      "learning_rate": 4.0704862672502624e-09,
      "loss": 2.3815,
      "step": 76108
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0430713891983032,
      "learning_rate": 4.058748102073739e-09,
      "loss": 2.1906,
      "step": 76109
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1255090236663818,
      "learning_rate": 4.047026882734173e-09,
      "loss": 2.4276,
      "step": 76110
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0691494941711426,
      "learning_rate": 4.0353226092482156e-09,
      "loss": 2.2613,
      "step": 76111
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1160870790481567,
      "learning_rate": 4.023635281638072e-09,
      "loss": 2.338,
      "step": 76112
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0530831813812256,
      "learning_rate": 4.011964899921506e-09,
      "loss": 2.3709,
      "step": 76113
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0203475952148438,
      "learning_rate": 4.000311464120721e-09,
      "loss": 2.3054,
      "step": 76114
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0456095933914185,
      "learning_rate": 3.9886749742545915e-09,
      "loss": 2.34,
      "step": 76115
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0544027090072632,
      "learning_rate": 3.977055430341992e-09,
      "loss": 2.2928,
      "step": 76116
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0707587003707886,
      "learning_rate": 3.965452832402905e-09,
      "loss": 2.1965,
      "step": 76117
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0434499979019165,
      "learning_rate": 3.953867180457316e-09,
      "loss": 2.3058,
      "step": 76118
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.9893978834152222,
      "learning_rate": 3.942298474524098e-09,
      "loss": 2.1831,
      "step": 76119
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0961017608642578,
      "learning_rate": 3.930746714625455e-09,
      "loss": 2.1982,
      "step": 76120
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.204503059387207,
      "learning_rate": 3.919211900778042e-09,
      "loss": 2.2989,
      "step": 76121
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0953677892684937,
      "learning_rate": 3.907694033001841e-09,
      "loss": 2.2372,
      "step": 76122
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.9927732944488525,
      "learning_rate": 3.896193111317948e-09,
      "loss": 2.2033,
      "step": 76123
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1422898769378662,
      "learning_rate": 3.884709135744125e-09,
      "loss": 2.0861,
      "step": 76124
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0803959369659424,
      "learning_rate": 3.873242106301467e-09,
      "loss": 2.1654,
      "step": 76125
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.013609528541565,
      "learning_rate": 3.8617920230077375e-09,
      "loss": 2.4862,
      "step": 76126
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0993975400924683,
      "learning_rate": 3.850358885884031e-09,
      "loss": 2.4982,
      "step": 76127
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0141639709472656,
      "learning_rate": 3.8389426949492215e-09,
      "loss": 2.315,
      "step": 76128
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.069688081741333,
      "learning_rate": 3.827543450221072e-09,
      "loss": 2.2805,
      "step": 76129
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0255213975906372,
      "learning_rate": 3.8161611517206764e-09,
      "loss": 2.3174,
      "step": 76130
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.010597825050354,
      "learning_rate": 3.80479579946691e-09,
      "loss": 2.2781,
      "step": 76131
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1265532970428467,
      "learning_rate": 3.7934473934786445e-09,
      "loss": 2.5668,
      "step": 76132
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.079474687576294,
      "learning_rate": 3.782115933775865e-09,
      "loss": 2.2662,
      "step": 76133
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.14531672000885,
      "learning_rate": 3.770801420377445e-09,
      "loss": 2.5127,
      "step": 76134
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1520113945007324,
      "learning_rate": 3.75950385330226e-09,
      "loss": 2.3698,
      "step": 76135
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.02703857421875,
      "learning_rate": 3.748223232569181e-09,
      "loss": 2.4975,
      "step": 76136
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1136969327926636,
      "learning_rate": 3.736959558198195e-09,
      "loss": 2.388,
      "step": 76137
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.3601943254470825,
      "learning_rate": 3.725712830208173e-09,
      "loss": 2.2473,
      "step": 76138
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.813931941986084,
      "learning_rate": 3.7144830486179896e-09,
      "loss": 2.2919,
      "step": 76139
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1264193058013916,
      "learning_rate": 3.7032702134465192e-09,
      "loss": 2.5384,
      "step": 76140
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.102331280708313,
      "learning_rate": 3.6920743247126356e-09,
      "loss": 2.2989,
      "step": 76141
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1651250123977661,
      "learning_rate": 3.6808953824363224e-09,
      "loss": 2.4369,
      "step": 76142
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0622202157974243,
      "learning_rate": 3.6697333866353435e-09,
      "loss": 2.3874,
      "step": 76143
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.9938828349113464,
      "learning_rate": 3.6585883373285726e-09,
      "loss": 2.3189,
      "step": 76144
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.095173716545105,
      "learning_rate": 3.6474602345371036e-09,
      "loss": 2.3072,
      "step": 76145
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.089130163192749,
      "learning_rate": 3.63634907827648e-09,
      "loss": 2.243,
      "step": 76146
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.3335610628128052,
      "learning_rate": 3.625254868568906e-09,
      "loss": 2.4802,
      "step": 76147
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0429569482803345,
      "learning_rate": 3.614177605429925e-09,
      "loss": 2.3193,
      "step": 76148
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0773242712020874,
      "learning_rate": 3.603117288880631e-09,
      "loss": 2.3182,
      "step": 76149
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0246630907058716,
      "learning_rate": 3.5920739189387877e-09,
      "loss": 2.3128,
      "step": 76150
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0850415229797363,
      "learning_rate": 3.581047495624379e-09,
      "loss": 2.3419,
      "step": 76151
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1210283041000366,
      "learning_rate": 3.5700380189540583e-09,
      "loss": 2.2434,
      "step": 76152
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0326749086380005,
      "learning_rate": 3.5590454889478097e-09,
      "loss": 2.2314,
      "step": 76153
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0595252513885498,
      "learning_rate": 3.548069905623397e-09,
      "loss": 2.1903,
      "step": 76154
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.2082728147506714,
      "learning_rate": 3.5371112690008035e-09,
      "loss": 2.5154,
      "step": 76155
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.121660590171814,
      "learning_rate": 3.5261695790977935e-09,
      "loss": 2.3256,
      "step": 76156
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0847318172454834,
      "learning_rate": 3.51524483593213e-09,
      "loss": 2.4376,
      "step": 76157
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0057934522628784,
      "learning_rate": 3.5043370395237975e-09,
      "loss": 2.3438,
      "step": 76158
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0894389152526855,
      "learning_rate": 3.493446189890559e-09,
      "loss": 2.3335,
      "step": 76159
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.131920337677002,
      "learning_rate": 3.4825722870501788e-09,
      "loss": 2.2503,
      "step": 76160
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0923439264297485,
      "learning_rate": 3.4717153310226402e-09,
      "loss": 2.4135,
      "step": 76161
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.9799133539199829,
      "learning_rate": 3.4608753218257075e-09,
      "loss": 2.153,
      "step": 76162
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1095246076583862,
      "learning_rate": 3.4500522594771435e-09,
      "loss": 2.4113,
      "step": 76163
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.9888079166412354,
      "learning_rate": 3.439246143996933e-09,
      "loss": 2.3048,
      "step": 76164
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.03379487991333,
      "learning_rate": 3.428456975400618e-09,
      "loss": 2.3524,
      "step": 76165
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.9887431263923645,
      "learning_rate": 3.4176847537092938e-09,
      "loss": 2.0996,
      "step": 76166
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0590025186538696,
      "learning_rate": 3.4069294789396133e-09,
      "loss": 1.9548,
      "step": 76167
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1243630647659302,
      "learning_rate": 3.3961911511093406e-09,
      "loss": 2.303,
      "step": 76168
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.230422019958496,
      "learning_rate": 3.3854697702384587e-09,
      "loss": 2.3095,
      "step": 76169
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1210006475448608,
      "learning_rate": 3.3747653363447317e-09,
      "loss": 2.3631,
      "step": 76170
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0888510942459106,
      "learning_rate": 3.3640778494448133e-09,
      "loss": 2.1182,
      "step": 76171
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0807946920394897,
      "learning_rate": 3.3534073095586873e-09,
      "loss": 2.3686,
      "step": 76172
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1279886960983276,
      "learning_rate": 3.3427537167030064e-09,
      "loss": 2.3519,
      "step": 76173
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0632362365722656,
      "learning_rate": 3.3321170708966454e-09,
      "loss": 2.5428,
      "step": 76174
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0945738554000854,
      "learning_rate": 3.3214973721573675e-09,
      "loss": 2.2935,
      "step": 76175
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1336488723754883,
      "learning_rate": 3.3108946205029356e-09,
      "loss": 2.2216,
      "step": 76176
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.128332257270813,
      "learning_rate": 3.300308815951114e-09,
      "loss": 2.129,
      "step": 76177
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0150800943374634,
      "learning_rate": 3.289739958521887e-09,
      "loss": 2.1973,
      "step": 76178
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1293140649795532,
      "learning_rate": 3.279188048230797e-09,
      "loss": 2.2535,
      "step": 76179
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0520597696304321,
      "learning_rate": 3.268653085095608e-09,
      "loss": 2.1177,
      "step": 76180
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.044723391532898,
      "learning_rate": 3.2581350691363033e-09,
      "loss": 2.4768,
      "step": 76181
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1485475301742554,
      "learning_rate": 3.247634000369537e-09,
      "loss": 2.2866,
      "step": 76182
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0946112871170044,
      "learning_rate": 3.2371498788130728e-09,
      "loss": 2.3877,
      "step": 76183
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0870696306228638,
      "learning_rate": 3.2266827044846737e-09,
      "loss": 2.3184,
      "step": 76184
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0288400650024414,
      "learning_rate": 3.2162324774021037e-09,
      "loss": 2.2622,
      "step": 76185
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0071741342544556,
      "learning_rate": 3.205799197582016e-09,
      "loss": 2.1906,
      "step": 76186
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1110419034957886,
      "learning_rate": 3.195382865044394e-09,
      "loss": 2.3628,
      "step": 76187
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.2517566680908203,
      "learning_rate": 3.1849834798058922e-09,
      "loss": 2.3128,
      "step": 76188
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0312520265579224,
      "learning_rate": 3.174601041883163e-09,
      "loss": 2.0864,
      "step": 76189
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0921130180358887,
      "learning_rate": 3.1642355512950808e-09,
      "loss": 2.4861,
      "step": 76190
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0880318880081177,
      "learning_rate": 3.1538870080582984e-09,
      "loss": 2.3618,
      "step": 76191
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.081862211227417,
      "learning_rate": 3.14355541219169e-09,
      "loss": 2.2927,
      "step": 76192
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0679559707641602,
      "learning_rate": 3.1332407637107987e-09,
      "loss": 2.3198,
      "step": 76193
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1343483924865723,
      "learning_rate": 3.1229430626344982e-09,
      "loss": 2.3438,
      "step": 76194
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1784132719039917,
      "learning_rate": 3.112662308980552e-09,
      "loss": 2.4281,
      "step": 76195
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.2228095531463623,
      "learning_rate": 3.1023985027645033e-09,
      "loss": 2.3425,
      "step": 76196
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.071012258529663,
      "learning_rate": 3.0921516440063357e-09,
      "loss": 2.542,
      "step": 76197
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0102165937423706,
      "learning_rate": 3.0819217327215935e-09,
      "loss": 2.2302,
      "step": 76198
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0123796463012695,
      "learning_rate": 3.071708768928039e-09,
      "loss": 2.2218,
      "step": 76199
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.028517484664917,
      "learning_rate": 3.061512752642326e-09,
      "loss": 2.3332,
      "step": 76200
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.2039523124694824,
      "learning_rate": 3.0513336838833283e-09,
      "loss": 2.2225,
      "step": 76201
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.063201665878296,
      "learning_rate": 3.0411715626676997e-09,
      "loss": 2.1848,
      "step": 76202
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1012468338012695,
      "learning_rate": 3.0310263890120927e-09,
      "loss": 2.3109,
      "step": 76203
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0703136920928955,
      "learning_rate": 3.0208981629342717e-09,
      "loss": 2.4023,
      "step": 76204
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0296708345413208,
      "learning_rate": 3.010786884450889e-09,
      "loss": 2.3854,
      "step": 76205
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.9750305414199829,
      "learning_rate": 3.000692553579709e-09,
      "loss": 2.2849,
      "step": 76206
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1651073694229126,
      "learning_rate": 2.9906151703362753e-09,
      "loss": 2.146,
      "step": 76207
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.068084716796875,
      "learning_rate": 2.9805547347405706e-09,
      "loss": 2.4123,
      "step": 76208
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0771844387054443,
      "learning_rate": 2.9705112468070285e-09,
      "loss": 2.2454,
      "step": 76209
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0295689105987549,
      "learning_rate": 2.9604847065534124e-09,
      "loss": 2.2664,
      "step": 76210
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0316500663757324,
      "learning_rate": 2.9504751139974862e-09,
      "loss": 2.1315,
      "step": 76211
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.061193585395813,
      "learning_rate": 2.940482469155903e-09,
      "loss": 2.4057,
      "step": 76212
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0855025053024292,
      "learning_rate": 2.930506772044206e-09,
      "loss": 2.2204,
      "step": 76213
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.123721718788147,
      "learning_rate": 2.920548022681269e-09,
      "loss": 2.2973,
      "step": 76214
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1092348098754883,
      "learning_rate": 2.910606221083745e-09,
      "loss": 2.4233,
      "step": 76215
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.185910940170288,
      "learning_rate": 2.9006813672671773e-09,
      "loss": 2.3176,
      "step": 76216
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1233859062194824,
      "learning_rate": 2.89077346124933e-09,
      "loss": 2.2691,
      "step": 76217
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0963060855865479,
      "learning_rate": 2.8808825030468557e-09,
      "loss": 2.5124,
      "step": 76218
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.9306777715682983,
      "learning_rate": 2.8710084926764083e-09,
      "loss": 2.1036,
      "step": 76219
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.2467586994171143,
      "learning_rate": 2.8611514301546407e-09,
      "loss": 2.1905,
      "step": 76220
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1126219034194946,
      "learning_rate": 2.8513113154982062e-09,
      "loss": 2.2056,
      "step": 76221
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0334105491638184,
      "learning_rate": 2.841488148724869e-09,
      "loss": 2.1167,
      "step": 76222
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.2983710765838623,
      "learning_rate": 2.8316819298490615e-09,
      "loss": 2.3222,
      "step": 76223
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.3391172885894775,
      "learning_rate": 2.8218926588896576e-09,
      "loss": 2.2565,
      "step": 76224
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.113842487335205,
      "learning_rate": 2.8121203358622005e-09,
      "loss": 2.1478,
      "step": 76225
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1829957962036133,
      "learning_rate": 2.8023649607833438e-09,
      "loss": 2.3077,
      "step": 76226
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0594775676727295,
      "learning_rate": 2.7926265336697402e-09,
      "loss": 2.2633,
      "step": 76227
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0719130039215088,
      "learning_rate": 2.782905054538043e-09,
      "loss": 2.1769,
      "step": 76228
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1258658170700073,
      "learning_rate": 2.7732005234037963e-09,
      "loss": 2.2499,
      "step": 76229
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.093679666519165,
      "learning_rate": 2.7635129402847627e-09,
      "loss": 2.1874,
      "step": 76230
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.159606695175171,
      "learning_rate": 2.753842305196486e-09,
      "loss": 2.4638,
      "step": 76231
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.2293123006820679,
      "learning_rate": 2.7441886181567288e-09,
      "loss": 2.4191,
      "step": 76232
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0489782094955444,
      "learning_rate": 2.7345518791799252e-09,
      "loss": 1.9356,
      "step": 76233
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1662747859954834,
      "learning_rate": 2.724932088282728e-09,
      "loss": 2.1551,
      "step": 76234
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0376243591308594,
      "learning_rate": 2.7153292454829004e-09,
      "loss": 2.1649,
      "step": 76235
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0893967151641846,
      "learning_rate": 2.705743350794876e-09,
      "loss": 2.4006,
      "step": 76236
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.3789312839508057,
      "learning_rate": 2.696174404236418e-09,
      "loss": 2.3936,
      "step": 76237
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0386064052581787,
      "learning_rate": 2.68662240582418e-09,
      "loss": 2.1494,
      "step": 76238
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.3402113914489746,
      "learning_rate": 2.677087355571484e-09,
      "loss": 2.257,
      "step": 76239
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.9774781465530396,
      "learning_rate": 2.667569253497204e-09,
      "loss": 2.327,
      "step": 76240
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1642481088638306,
      "learning_rate": 2.6580680996168838e-09,
      "loss": 2.1529,
      "step": 76241
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0685940980911255,
      "learning_rate": 2.648583893946066e-09,
      "loss": 2.1738,
      "step": 76242
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.9922581315040588,
      "learning_rate": 2.6391166365014042e-09,
      "loss": 2.235,
      "step": 76243
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1637976169586182,
      "learning_rate": 2.6296663272984414e-09,
      "loss": 2.2607,
      "step": 76244
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1404188871383667,
      "learning_rate": 2.620232966353831e-09,
      "loss": 2.3736,
      "step": 76245
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0796345472335815,
      "learning_rate": 2.610816553683115e-09,
      "loss": 2.237,
      "step": 76246
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.9566043615341187,
      "learning_rate": 2.601417089301839e-09,
      "loss": 2.2761,
      "step": 76247
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0458532571792603,
      "learning_rate": 2.592034573226654e-09,
      "loss": 2.3896,
      "step": 76248
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0249135494232178,
      "learning_rate": 2.582669005473104e-09,
      "loss": 2.4044,
      "step": 76249
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1965970993041992,
      "learning_rate": 2.5733203860578427e-09,
      "loss": 2.2057,
      "step": 76250
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1677137613296509,
      "learning_rate": 2.5639887149953026e-09,
      "loss": 2.2687,
      "step": 76251
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.03400719165802,
      "learning_rate": 2.5546739923021367e-09,
      "loss": 2.3961,
      "step": 76252
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.080920696258545,
      "learning_rate": 2.545376217993889e-09,
      "loss": 2.3754,
      "step": 76253
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1652464866638184,
      "learning_rate": 2.5360953920872123e-09,
      "loss": 2.2139,
      "step": 76254
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.186283826828003,
      "learning_rate": 2.5268315145965392e-09,
      "loss": 2.322,
      "step": 76255
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.253365397453308,
      "learning_rate": 2.517584585538524e-09,
      "loss": 2.2183,
      "step": 76256
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1517399549484253,
      "learning_rate": 2.5083546049287087e-09,
      "loss": 2.3414,
      "step": 76257
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1436752080917358,
      "learning_rate": 2.4991415727826375e-09,
      "loss": 2.3808,
      "step": 76258
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0793782472610474,
      "learning_rate": 2.4899454891147422e-09,
      "loss": 2.4274,
      "step": 76259
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.024648904800415,
      "learning_rate": 2.4807663539427874e-09,
      "loss": 2.3952,
      "step": 76260
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.016914963722229,
      "learning_rate": 2.471604167280095e-09,
      "loss": 2.082,
      "step": 76261
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1614700555801392,
      "learning_rate": 2.462458929144429e-09,
      "loss": 2.2429,
      "step": 76262
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.059624433517456,
      "learning_rate": 2.453330639549112e-09,
      "loss": 2.4061,
      "step": 76263
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1131407022476196,
      "learning_rate": 2.4442192985119073e-09,
      "loss": 2.3086,
      "step": 76264
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0141578912734985,
      "learning_rate": 2.435124906046138e-09,
      "loss": 2.2301,
      "step": 76265
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.121226191520691,
      "learning_rate": 2.426047462168457e-09,
      "loss": 2.2949,
      "step": 76266
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0848267078399658,
      "learning_rate": 2.4169869668932977e-09,
      "loss": 2.3818,
      "step": 76267
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.107556700706482,
      "learning_rate": 2.407943420237313e-09,
      "loss": 2.3143,
      "step": 76268
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.101893424987793,
      "learning_rate": 2.3989168222160464e-09,
      "loss": 2.0808,
      "step": 76269
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.3354127407073975,
      "learning_rate": 2.38990717284282e-09,
      "loss": 2.2536,
      "step": 76270
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0985002517700195,
      "learning_rate": 2.380914472134288e-09,
      "loss": 2.2347,
      "step": 76271
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0659301280975342,
      "learning_rate": 2.371938720104883e-09,
      "loss": 2.42,
      "step": 76272
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1338990926742554,
      "learning_rate": 2.3629799167712573e-09,
      "loss": 2.2982,
      "step": 76273
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0711265802383423,
      "learning_rate": 2.354038062146735e-09,
      "loss": 2.2861,
      "step": 76274
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0329458713531494,
      "learning_rate": 2.345113156247969e-09,
      "loss": 2.3582,
      "step": 76275
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.136683464050293,
      "learning_rate": 2.336205199089392e-09,
      "loss": 2.29,
      "step": 76276
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0789523124694824,
      "learning_rate": 2.3273141906865472e-09,
      "loss": 2.1657,
      "step": 76277
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.9999243021011353,
      "learning_rate": 2.3184401310538675e-09,
      "loss": 2.2118,
      "step": 76278
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1073002815246582,
      "learning_rate": 2.3095830202068957e-09,
      "loss": 2.398,
      "step": 76279
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.3863765001296997,
      "learning_rate": 2.300742858161176e-09,
      "loss": 2.2942,
      "step": 76280
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1318047046661377,
      "learning_rate": 2.2919196449300297e-09,
      "loss": 2.2851,
      "step": 76281
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.199812889099121,
      "learning_rate": 2.283113380530111e-09,
      "loss": 2.4692,
      "step": 76282
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1385667324066162,
      "learning_rate": 2.2743240649758524e-09,
      "loss": 2.4457,
      "step": 76283
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.138363242149353,
      "learning_rate": 2.2655516982816874e-09,
      "loss": 2.3303,
      "step": 76284
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1825913190841675,
      "learning_rate": 2.256796280462048e-09,
      "loss": 2.3975,
      "step": 76285
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1847590208053589,
      "learning_rate": 2.2480578115335885e-09,
      "loss": 2.2652,
      "step": 76286
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.3399733304977417,
      "learning_rate": 2.2393362915085202e-09,
      "loss": 2.1832,
      "step": 76287
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0860285758972168,
      "learning_rate": 2.230631720404608e-09,
      "loss": 2.3672,
      "step": 76288
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.2472078800201416,
      "learning_rate": 2.2219440982340636e-09,
      "loss": 2.3828,
      "step": 76289
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0609469413757324,
      "learning_rate": 2.21327342501354e-09,
      "loss": 2.3027,
      "step": 76290
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.2494208812713623,
      "learning_rate": 2.204619700756361e-09,
      "loss": 2.0795,
      "step": 76291
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0548690557479858,
      "learning_rate": 2.1959829254780684e-09,
      "loss": 2.5474,
      "step": 76292
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0276230573654175,
      "learning_rate": 2.187363099193096e-09,
      "loss": 2.2548,
      "step": 76293
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0435036420822144,
      "learning_rate": 2.1787602219158766e-09,
      "loss": 2.3922,
      "step": 76294
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1411439180374146,
      "learning_rate": 2.170174293660843e-09,
      "loss": 2.5076,
      "step": 76295
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0550897121429443,
      "learning_rate": 2.1616053144424276e-09,
      "loss": 2.2212,
      "step": 76296
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.232417345046997,
      "learning_rate": 2.153053284276174e-09,
      "loss": 2.4092,
      "step": 76297
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1110533475875854,
      "learning_rate": 2.144518203175405e-09,
      "loss": 2.2238,
      "step": 76298
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.035097599029541,
      "learning_rate": 2.1360000711545536e-09,
      "loss": 2.4643,
      "step": 76299
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.9993315935134888,
      "learning_rate": 2.1274988882291624e-09,
      "loss": 2.1322,
      "step": 76300
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.186065435409546,
      "learning_rate": 2.1190146544125544e-09,
      "loss": 2.1755,
      "step": 76301
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.3090591430664062,
      "learning_rate": 2.1105473697202727e-09,
      "loss": 2.2665,
      "step": 76302
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0778462886810303,
      "learning_rate": 2.10209703416564e-09,
      "loss": 2.4515,
      "step": 76303
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.006230115890503,
      "learning_rate": 2.0936636477641994e-09,
      "loss": 2.3003,
      "step": 76304
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.062595248222351,
      "learning_rate": 2.085247210528163e-09,
      "loss": 2.3648,
      "step": 76305
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1161867380142212,
      "learning_rate": 2.0768477224730745e-09,
      "loss": 2.3318,
      "step": 76306
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0841375589370728,
      "learning_rate": 2.0684651836144766e-09,
      "loss": 2.3765,
      "step": 76307
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0326658487319946,
      "learning_rate": 2.0600995939634715e-09,
      "loss": 2.4105,
      "step": 76308
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0988069772720337,
      "learning_rate": 2.0517509535378233e-09,
      "loss": 2.4926,
      "step": 76309
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0545175075531006,
      "learning_rate": 2.0434192623486337e-09,
      "loss": 2.3097,
      "step": 76310
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0936129093170166,
      "learning_rate": 2.0351045204114463e-09,
      "loss": 2.2716,
      "step": 76311
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0906414985656738,
      "learning_rate": 2.026806727739583e-09,
      "loss": 2.4866,
      "step": 76312
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0604009628295898,
      "learning_rate": 2.0185258843485876e-09,
      "loss": 2.3007,
      "step": 76313
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.109479308128357,
      "learning_rate": 2.0102619902517826e-09,
      "loss": 2.0887,
      "step": 76314
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1283848285675049,
      "learning_rate": 2.002015045462491e-09,
      "loss": 2.3612,
      "step": 76315
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1101082563400269,
      "learning_rate": 1.993785049995145e-09,
      "loss": 2.1585,
      "step": 76316
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1010425090789795,
      "learning_rate": 1.985572003864178e-09,
      "loss": 2.2613,
      "step": 76317
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1721405982971191,
      "learning_rate": 1.9773759070829122e-09,
      "loss": 2.3804,
      "step": 76318
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.150790810585022,
      "learning_rate": 1.969196759664671e-09,
      "loss": 2.516,
      "step": 76319
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1074742078781128,
      "learning_rate": 1.961034561624997e-09,
      "loss": 2.238,
      "step": 76320
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1510474681854248,
      "learning_rate": 1.9528893129761027e-09,
      "loss": 2.4846,
      "step": 76321
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1414191722869873,
      "learning_rate": 1.9447610137335314e-09,
      "loss": 2.2725,
      "step": 76322
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.7725822925567627,
      "learning_rate": 1.9366496639094955e-09,
      "loss": 2.1804,
      "step": 76323
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1825916767120361,
      "learning_rate": 1.928555263518428e-09,
      "loss": 2.5185,
      "step": 76324
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0669506788253784,
      "learning_rate": 1.9204778125736512e-09,
      "loss": 2.3093,
      "step": 76325
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1213948726654053,
      "learning_rate": 1.912417311088488e-09,
      "loss": 2.0164,
      "step": 76326
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0876271724700928,
      "learning_rate": 1.904373759078482e-09,
      "loss": 2.0871,
      "step": 76327
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.088732123374939,
      "learning_rate": 1.896347156554734e-09,
      "loss": 2.2848,
      "step": 76328
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1221537590026855,
      "learning_rate": 1.888337503532789e-09,
      "loss": 2.2356,
      "step": 76329
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1070791482925415,
      "learning_rate": 1.880344800025968e-09,
      "loss": 2.3567,
      "step": 76330
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1169538497924805,
      "learning_rate": 1.872369046046485e-09,
      "loss": 2.2925,
      "step": 76331
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.6605702638626099,
      "learning_rate": 1.864410241609882e-09,
      "loss": 2.1974,
      "step": 76332
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1486856937408447,
      "learning_rate": 1.8564683867272615e-09,
      "loss": 2.5279,
      "step": 76333
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1747251749038696,
      "learning_rate": 1.8485434814141667e-09,
      "loss": 2.0299,
      "step": 76334
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.072210431098938,
      "learning_rate": 1.8406355256839204e-09,
      "loss": 2.138,
      "step": 76335
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0512675046920776,
      "learning_rate": 1.8327445195487348e-09,
      "loss": 2.4739,
      "step": 76336
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0146056413650513,
      "learning_rate": 1.8248704630219326e-09,
      "loss": 2.3016,
      "step": 76337
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1059846878051758,
      "learning_rate": 1.817013356117947e-09,
      "loss": 2.5698,
      "step": 76338
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1070505380630493,
      "learning_rate": 1.8091731988489902e-09,
      "loss": 2.5605,
      "step": 76339
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.114290714263916,
      "learning_rate": 1.8013499912294952e-09,
      "loss": 2.2456,
      "step": 76340
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0755099058151245,
      "learning_rate": 1.7935437332716743e-09,
      "loss": 2.4009,
      "step": 76341
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0351181030273438,
      "learning_rate": 1.7857544249899606e-09,
      "loss": 2.452,
      "step": 76342
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0694949626922607,
      "learning_rate": 1.7779820663965663e-09,
      "loss": 2.3427,
      "step": 76343
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1178677082061768,
      "learning_rate": 1.7702266575048144e-09,
      "loss": 2.2298,
      "step": 76344
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1393318176269531,
      "learning_rate": 1.7624881983291376e-09,
      "loss": 2.5009,
      "step": 76345
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1558738946914673,
      "learning_rate": 1.754766688880638e-09,
      "loss": 2.4487,
      "step": 76346
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.9844544529914856,
      "learning_rate": 1.7470621291726386e-09,
      "loss": 1.8875,
      "step": 76347
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1382226943969727,
      "learning_rate": 1.739374519219572e-09,
      "loss": 2.4223,
      "step": 76348
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.2841432094573975,
      "learning_rate": 1.731703859033651e-09,
      "loss": 2.3849,
      "step": 76349
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.2255876064300537,
      "learning_rate": 1.724050148628198e-09,
      "loss": 2.3247,
      "step": 76350
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1292589902877808,
      "learning_rate": 1.7164133880154255e-09,
      "loss": 2.4569,
      "step": 76351
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0781230926513672,
      "learning_rate": 1.7087935772097664e-09,
      "loss": 2.4477,
      "step": 76352
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.066592812538147,
      "learning_rate": 1.701190716222323e-09,
      "loss": 2.1833,
      "step": 76353
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1566909551620483,
      "learning_rate": 1.693604805067528e-09,
      "loss": 2.4458,
      "step": 76354
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.2135894298553467,
      "learning_rate": 1.686035843756484e-09,
      "loss": 2.3909,
      "step": 76355
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1965301036834717,
      "learning_rate": 1.6784838323047337e-09,
      "loss": 2.2703,
      "step": 76356
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1505169868469238,
      "learning_rate": 1.6709487707222693e-09,
      "loss": 2.2715,
      "step": 76357
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.143205165863037,
      "learning_rate": 1.6634306590235238e-09,
      "loss": 2.2275,
      "step": 76358
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.9877804517745972,
      "learning_rate": 1.6559294972207097e-09,
      "loss": 2.3999,
      "step": 76359
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0430281162261963,
      "learning_rate": 1.648445285326039e-09,
      "loss": 2.482,
      "step": 76360
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0244204998016357,
      "learning_rate": 1.6409780233528349e-09,
      "loss": 2.0566,
      "step": 76361
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0623189210891724,
      "learning_rate": 1.63352771131442e-09,
      "loss": 2.2792,
      "step": 76362
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0199531316757202,
      "learning_rate": 1.6260943492230064e-09,
      "loss": 2.4854,
      "step": 76363
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.173280954360962,
      "learning_rate": 1.6186779370896965e-09,
      "loss": 2.4284,
      "step": 76364
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.2669568061828613,
      "learning_rate": 1.6112784749289234e-09,
      "loss": 2.251,
      "step": 76365
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0321364402770996,
      "learning_rate": 1.6038959627528995e-09,
      "loss": 2.3296,
      "step": 76366
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0886812210083008,
      "learning_rate": 1.5965304005727266e-09,
      "loss": 2.2312,
      "step": 76367
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0235786437988281,
      "learning_rate": 1.5891817884028383e-09,
      "loss": 2.4253,
      "step": 76368
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.036821722984314,
      "learning_rate": 1.5818501262543362e-09,
      "loss": 2.2199,
      "step": 76369
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0320165157318115,
      "learning_rate": 1.5745354141405434e-09,
      "loss": 2.3772,
      "step": 76370
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.9625861048698425,
      "learning_rate": 1.567237652073672e-09,
      "loss": 2.3745,
      "step": 76371
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.132384181022644,
      "learning_rate": 1.5599568400648245e-09,
      "loss": 2.2503,
      "step": 76372
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1123836040496826,
      "learning_rate": 1.552692978128434e-09,
      "loss": 2.1979,
      "step": 76373
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0112292766571045,
      "learning_rate": 1.545446066275602e-09,
      "loss": 2.1904,
      "step": 76374
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1512466669082642,
      "learning_rate": 1.5382161045185418e-09,
      "loss": 2.3183,
      "step": 76375
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1353826522827148,
      "learning_rate": 1.5310030928694652e-09,
      "loss": 2.2663,
      "step": 76376
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1097731590270996,
      "learning_rate": 1.523807031340585e-09,
      "loss": 2.2166,
      "step": 76377
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1591156721115112,
      "learning_rate": 1.5166279199452239e-09,
      "loss": 2.2207,
      "step": 76378
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0440794229507446,
      "learning_rate": 1.5094657586944839e-09,
      "loss": 2.3114,
      "step": 76379
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.101880669593811,
      "learning_rate": 1.5023205476005776e-09,
      "loss": 2.4236,
      "step": 76380
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.275420904159546,
      "learning_rate": 1.4951922866768277e-09,
      "loss": 2.2373,
      "step": 76381
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.054267406463623,
      "learning_rate": 1.488080975933226e-09,
      "loss": 2.3728,
      "step": 76382
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.095460057258606,
      "learning_rate": 1.4809866153830955e-09,
      "loss": 2.4007,
      "step": 76383
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.041983723640442,
      "learning_rate": 1.473909205037538e-09,
      "loss": 2.1451,
      "step": 76384
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.158825159072876,
      "learning_rate": 1.4668487449098767e-09,
      "loss": 2.4912,
      "step": 76385
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0188161134719849,
      "learning_rate": 1.4598052350123237e-09,
      "loss": 2.3544,
      "step": 76386
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.266021728515625,
      "learning_rate": 1.4527786753548713e-09,
      "loss": 2.4583,
      "step": 76387
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0847312211990356,
      "learning_rate": 1.4457690659508416e-09,
      "loss": 2.2076,
      "step": 76388
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0277912616729736,
      "learning_rate": 1.4387764068124477e-09,
      "loss": 2.6605,
      "step": 76389
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1156755685806274,
      "learning_rate": 1.4318006979496812e-09,
      "loss": 2.1561,
      "step": 76390
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1370584964752197,
      "learning_rate": 1.4248419393758651e-09,
      "loss": 2.3354,
      "step": 76391
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0306413173675537,
      "learning_rate": 1.4179001311032114e-09,
      "loss": 2.3371,
      "step": 76392
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.9840637445449829,
      "learning_rate": 1.4109752731428227e-09,
      "loss": 1.9407,
      "step": 76393
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.069406270980835,
      "learning_rate": 1.4040673655058013e-09,
      "loss": 2.3115,
      "step": 76394
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.124448537826538,
      "learning_rate": 1.3971764082043593e-09,
      "loss": 2.279,
      "step": 76395
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0719833374023438,
      "learning_rate": 1.3903024012507093e-09,
      "loss": 2.2284,
      "step": 76396
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1412392854690552,
      "learning_rate": 1.3834453446559537e-09,
      "loss": 2.3698,
      "step": 76397
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0800645351409912,
      "learning_rate": 1.3766052384323047e-09,
      "loss": 2.3839,
      "step": 76398
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0433329343795776,
      "learning_rate": 1.3697820825897546e-09,
      "loss": 2.3033,
      "step": 76399
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0641610622406006,
      "learning_rate": 1.362975877142736e-09,
      "loss": 2.463,
      "step": 76400
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1740524768829346,
      "learning_rate": 1.3561866220990205e-09,
      "loss": 2.2847,
      "step": 76401
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.018878698348999,
      "learning_rate": 1.3494143174741515e-09,
      "loss": 2.3887,
      "step": 76402
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.9749289751052856,
      "learning_rate": 1.3426589632759002e-09,
      "loss": 2.2829,
      "step": 76403
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1792082786560059,
      "learning_rate": 1.3359205595186997e-09,
      "loss": 2.1456,
      "step": 76404
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0285382270812988,
      "learning_rate": 1.329199106212542e-09,
      "loss": 2.4771,
      "step": 76405
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0482282638549805,
      "learning_rate": 1.3224946033685293e-09,
      "loss": 2.3297,
      "step": 76406
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0491812229156494,
      "learning_rate": 1.3158070509977638e-09,
      "loss": 2.3782,
      "step": 76407
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.9993994832038879,
      "learning_rate": 1.3091364491135682e-09,
      "loss": 2.2954,
      "step": 76408
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0437934398651123,
      "learning_rate": 1.3024827977259346e-09,
      "loss": 2.3641,
      "step": 76409
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.034639835357666,
      "learning_rate": 1.2958460968459651e-09,
      "loss": 2.2518,
      "step": 76410
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1053634881973267,
      "learning_rate": 1.2892263464847622e-09,
      "loss": 2.009,
      "step": 76411
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.113520622253418,
      "learning_rate": 1.2826235466545379e-09,
      "loss": 2.5649,
      "step": 76412
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.021028995513916,
      "learning_rate": 1.2760376973652843e-09,
      "loss": 2.3613,
      "step": 76413
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0567530393600464,
      "learning_rate": 1.2694687986292143e-09,
      "loss": 2.2243,
      "step": 76414
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1051846742630005,
      "learning_rate": 1.2629168504563194e-09,
      "loss": 2.4924,
      "step": 76415
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.9819419980049133,
      "learning_rate": 1.2563818528599225e-09,
      "loss": 2.3537,
      "step": 76416
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0810965299606323,
      "learning_rate": 1.2498638058477951e-09,
      "loss": 2.4734,
      "step": 76417
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.095839500427246,
      "learning_rate": 1.2433627094343704e-09,
      "loss": 2.1556,
      "step": 76418
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1243430376052856,
      "learning_rate": 1.2368785636274194e-09,
      "loss": 2.2487,
      "step": 76419
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.108579397201538,
      "learning_rate": 1.2304113684402651e-09,
      "loss": 2.2896,
      "step": 76420
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0201754570007324,
      "learning_rate": 1.2239611238840098e-09,
      "loss": 2.4715,
      "step": 76421
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1206989288330078,
      "learning_rate": 1.2175278299675353e-09,
      "loss": 2.5232,
      "step": 76422
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0890254974365234,
      "learning_rate": 1.211111486704164e-09,
      "loss": 2.2778,
      "step": 76423
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.2171399593353271,
      "learning_rate": 1.2047120941027779e-09,
      "loss": 2.3415,
      "step": 76424
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1395771503448486,
      "learning_rate": 1.1983296521755895e-09,
      "loss": 2.3004,
      "step": 76425
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.9974733591079712,
      "learning_rate": 1.1919641609325905e-09,
      "loss": 2.2616,
      "step": 76426
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1739999055862427,
      "learning_rate": 1.1856156203848834e-09,
      "loss": 2.1172,
      "step": 76427
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0147674083709717,
      "learning_rate": 1.1792840305424601e-09,
      "loss": 2.3489,
      "step": 76428
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0530216693878174,
      "learning_rate": 1.1729693914175333e-09,
      "loss": 2.1827,
      "step": 76429
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0954432487487793,
      "learning_rate": 1.1666717030200947e-09,
      "loss": 2.2524,
      "step": 76430
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1334419250488281,
      "learning_rate": 1.1603909653612465e-09,
      "loss": 2.3715,
      "step": 76431
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.107466697692871,
      "learning_rate": 1.1541271784498708e-09,
      "loss": 2.2535,
      "step": 76432
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0722341537475586,
      "learning_rate": 1.1478803422992902e-09,
      "loss": 2.1861,
      "step": 76433
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1328125,
      "learning_rate": 1.1416504569183863e-09,
      "loss": 2.3198,
      "step": 76434
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1220195293426514,
      "learning_rate": 1.135437522317151e-09,
      "loss": 2.3787,
      "step": 76435
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1522729396820068,
      "learning_rate": 1.1292415385077971e-09,
      "loss": 2.3205,
      "step": 76436
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.107027292251587,
      "learning_rate": 1.1230625054992061e-09,
      "loss": 2.3299,
      "step": 76437
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1310698986053467,
      "learning_rate": 1.1169004233035908e-09,
      "loss": 2.1805,
      "step": 76438
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0848090648651123,
      "learning_rate": 1.1107552919309428e-09,
      "loss": 2.2422,
      "step": 76439
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.03688645362854,
      "learning_rate": 1.1046271113901441e-09,
      "loss": 2.4579,
      "step": 76440
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1087274551391602,
      "learning_rate": 1.0985158816922969e-09,
      "loss": 2.3729,
      "step": 76441
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0622925758361816,
      "learning_rate": 1.0924216028496138e-09,
      "loss": 2.2953,
      "step": 76442
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.3048672676086426,
      "learning_rate": 1.086344274869866e-09,
      "loss": 2.3093,
      "step": 76443
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0860704183578491,
      "learning_rate": 1.0802838977641562e-09,
      "loss": 2.4293,
      "step": 76444
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0297991037368774,
      "learning_rate": 1.0742404715446963e-09,
      "loss": 2.3589,
      "step": 76445
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1341992616653442,
      "learning_rate": 1.0682139962181482e-09,
      "loss": 2.3412,
      "step": 76446
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1488854885101318,
      "learning_rate": 1.0622044717978342e-09,
      "loss": 2.2652,
      "step": 76447
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0936272144317627,
      "learning_rate": 1.0562118982926361e-09,
      "loss": 2.2941,
      "step": 76448
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0269496440887451,
      "learning_rate": 1.0502362757125461e-09,
      "loss": 2.3921,
      "step": 76449
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1478012800216675,
      "learning_rate": 1.0442776040675561e-09,
      "loss": 2.1205,
      "step": 76450
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1472194194793701,
      "learning_rate": 1.0383358833687685e-09,
      "loss": 2.401,
      "step": 76451
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.9885151982307434,
      "learning_rate": 1.0324111136261749e-09,
      "loss": 2.3241,
      "step": 76452
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0652341842651367,
      "learning_rate": 1.0265032948486576e-09,
      "loss": 2.4884,
      "step": 76453
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0671913623809814,
      "learning_rate": 1.020612427046208e-09,
      "loss": 2.3108,
      "step": 76454
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.079769253730774,
      "learning_rate": 1.0147385102310391e-09,
      "loss": 2.2835,
      "step": 76455
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1299434900283813,
      "learning_rate": 1.0088815444109224e-09,
      "loss": 2.2979,
      "step": 76456
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.3445833921432495,
      "learning_rate": 1.0030415295958496e-09,
      "loss": 2.358,
      "step": 76457
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.001858115196228,
      "learning_rate": 9.97218465796923e-10,
      "loss": 2.2096,
      "step": 76458
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.07182776927948,
      "learning_rate": 9.914123530230247e-10,
      "loss": 2.3628,
      "step": 76459
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0213475227355957,
      "learning_rate": 9.856231912852566e-10,
      "loss": 2.4161,
      "step": 76460
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0459905862808228,
      "learning_rate": 9.798509805913902e-10,
      "loss": 2.3989,
      "step": 76461
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1425023078918457,
      "learning_rate": 9.740957209536383e-10,
      "loss": 2.3067,
      "step": 76462
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0664490461349487,
      "learning_rate": 9.683574123797724e-10,
      "loss": 2.0772,
      "step": 76463
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0801249742507935,
      "learning_rate": 9.626360548797842e-10,
      "loss": 2.1883,
      "step": 76464
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0277642011642456,
      "learning_rate": 9.569316484647762e-10,
      "loss": 2.3343,
      "step": 76465
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.044978141784668,
      "learning_rate": 9.5124419314252e-10,
      "loss": 2.071,
      "step": 76466
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0309488773345947,
      "learning_rate": 9.455736889241174e-10,
      "loss": 2.3559,
      "step": 76467
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0800014734268188,
      "learning_rate": 9.399201358184506e-10,
      "loss": 2.438,
      "step": 76468
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0977030992507935,
      "learning_rate": 9.342835338355116e-10,
      "loss": 2.3552,
      "step": 76469
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0092192888259888,
      "learning_rate": 9.286638829852923e-10,
      "loss": 2.344,
      "step": 76470
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1372675895690918,
      "learning_rate": 9.230611832755643e-10,
      "loss": 2.1551,
      "step": 76471
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.3581401109695435,
      "learning_rate": 9.174754347174297e-10,
      "loss": 2.3446,
      "step": 76472
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1588138341903687,
      "learning_rate": 9.119066373197704e-10,
      "loss": 2.3523,
      "step": 76473
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.2905097007751465,
      "learning_rate": 9.063547910925785e-10,
      "loss": 2.3027,
      "step": 76474
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.9923364520072937,
      "learning_rate": 9.008198960447357e-10,
      "loss": 2.4674,
      "step": 76475
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1283762454986572,
      "learning_rate": 8.953019521851236e-10,
      "loss": 2.3926,
      "step": 76476
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.031540870666504,
      "learning_rate": 8.898009595237344e-10,
      "loss": 2.3004,
      "step": 76477
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0370657444000244,
      "learning_rate": 8.843169180705602e-10,
      "loss": 2.1648,
      "step": 76478
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.009937047958374,
      "learning_rate": 8.788498278344826e-10,
      "loss": 2.4146,
      "step": 76479
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0345807075500488,
      "learning_rate": 8.733996888232732e-10,
      "loss": 2.2758,
      "step": 76480
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0086548328399658,
      "learning_rate": 8.679665010480343e-10,
      "loss": 2.3813,
      "step": 76481
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0923622846603394,
      "learning_rate": 8.625502645176475e-10,
      "loss": 2.3468,
      "step": 76482
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0107671022415161,
      "learning_rate": 8.571509792398846e-10,
      "loss": 2.4277,
      "step": 76483
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0054017305374146,
      "learning_rate": 8.517686452258478e-10,
      "loss": 2.3934,
      "step": 76484
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0586291551589966,
      "learning_rate": 8.464032624833085e-10,
      "loss": 2.3768,
      "step": 76485
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0237500667572021,
      "learning_rate": 8.410548310222589e-10,
      "loss": 2.3961,
      "step": 76486
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.998742938041687,
      "learning_rate": 8.357233508515805e-10,
      "loss": 2.1715,
      "step": 76487
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0107886791229248,
      "learning_rate": 8.304088219801554e-10,
      "loss": 2.3371,
      "step": 76488
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1008085012435913,
      "learning_rate": 8.25111244415755e-10,
      "loss": 2.3851,
      "step": 76489
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1391420364379883,
      "learning_rate": 8.198306181694815e-10,
      "loss": 2.3072,
      "step": 76490
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0904011726379395,
      "learning_rate": 8.145669432491065e-10,
      "loss": 2.2061,
      "step": 76491
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0729460716247559,
      "learning_rate": 8.093202196646221e-10,
      "loss": 2.3878,
      "step": 76492
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0404876470565796,
      "learning_rate": 8.040904474226896e-10,
      "loss": 2.2801,
      "step": 76493
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.29061758518219,
      "learning_rate": 7.988776265344111e-10,
      "loss": 2.3345,
      "step": 76494
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1035279035568237,
      "learning_rate": 7.936817570075583e-10,
      "loss": 2.0565,
      "step": 76495
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.9813978672027588,
      "learning_rate": 7.88502838851013e-10,
      "loss": 2.3365,
      "step": 76496
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0592211484909058,
      "learning_rate": 7.833408720736569e-10,
      "loss": 2.3654,
      "step": 76497
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.2683418989181519,
      "learning_rate": 7.781958566843717e-10,
      "loss": 2.5076,
      "step": 76498
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.087570071220398,
      "learning_rate": 7.730677926920393e-10,
      "loss": 2.3571,
      "step": 76499
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.057113766670227,
      "learning_rate": 7.679566801055416e-10,
      "loss": 2.206,
      "step": 76500
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.9921295642852783,
      "learning_rate": 7.628625189315397e-10,
      "loss": 2.3786,
      "step": 76501
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.2874958515167236,
      "learning_rate": 7.577853091822463e-10,
      "loss": 2.125,
      "step": 76502
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.086442232131958,
      "learning_rate": 7.527250508632122e-10,
      "loss": 2.2438,
      "step": 76503
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0435066223144531,
      "learning_rate": 7.476817439833195e-10,
      "loss": 2.2641,
      "step": 76504
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1273523569107056,
      "learning_rate": 7.4265538855256e-10,
      "loss": 2.3331,
      "step": 76505
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1218043565750122,
      "learning_rate": 7.376459845798157e-10,
      "loss": 2.2737,
      "step": 76506
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.2495384216308594,
      "learning_rate": 7.326535320706373e-10,
      "loss": 2.256,
      "step": 76507
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1287922859191895,
      "learning_rate": 7.276780310372378e-10,
      "loss": 2.1764,
      "step": 76508
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.145603895187378,
      "learning_rate": 7.227194814851679e-10,
      "loss": 2.3559,
      "step": 76509
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.9782229065895081,
      "learning_rate": 7.177778834233096e-10,
      "loss": 2.1313,
      "step": 76510
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0593478679656982,
      "learning_rate": 7.128532368616548e-10,
      "loss": 2.5515,
      "step": 76511
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0988340377807617,
      "learning_rate": 7.079455418068648e-10,
      "loss": 2.1419,
      "step": 76512
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1615796089172363,
      "learning_rate": 7.030547982678215e-10,
      "loss": 2.2219,
      "step": 76513
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.2519532442092896,
      "learning_rate": 6.981810062534067e-10,
      "loss": 2.3335,
      "step": 76514
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1227275133132935,
      "learning_rate": 6.933241657713918e-10,
      "loss": 2.2113,
      "step": 76515
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0269265174865723,
      "learning_rate": 6.884842768295486e-10,
      "loss": 2.0661,
      "step": 76516
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.2284635305404663,
      "learning_rate": 6.836613394367586e-10,
      "loss": 2.4255,
      "step": 76517
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0976301431655884,
      "learning_rate": 6.788553536007936e-10,
      "loss": 2.2837,
      "step": 76518
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0823562145233154,
      "learning_rate": 6.74066319329425e-10,
      "loss": 2.4794,
      "step": 76519
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0299572944641113,
      "learning_rate": 6.692942366315347e-10,
      "loss": 2.3653,
      "step": 76520
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.091634750366211,
      "learning_rate": 6.645391055148942e-10,
      "loss": 2.383,
      "step": 76521
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.07606840133667,
      "learning_rate": 6.598009259883853e-10,
      "loss": 2.3563,
      "step": 76522
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1662662029266357,
      "learning_rate": 6.550796980586693e-10,
      "loss": 2.2455,
      "step": 76523
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.105418086051941,
      "learning_rate": 6.503754217346281e-10,
      "loss": 2.2806,
      "step": 76524
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1015398502349854,
      "learning_rate": 6.456880970240332e-10,
      "loss": 2.3656,
      "step": 76525
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0396249294281006,
      "learning_rate": 6.41017723934656e-10,
      "loss": 2.331,
      "step": 76526
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0607918500900269,
      "learning_rate": 6.363643024753785e-10,
      "loss": 2.4667,
      "step": 76527
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.9691629409790039,
      "learning_rate": 6.317278326517517e-10,
      "loss": 2.4319,
      "step": 76528
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.2523179054260254,
      "learning_rate": 6.271083144737677e-10,
      "loss": 2.46,
      "step": 76529
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.14478600025177,
      "learning_rate": 6.22505747949198e-10,
      "loss": 2.1995,
      "step": 76530
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0415000915527344,
      "learning_rate": 6.17920133084704e-10,
      "loss": 2.1206,
      "step": 76531
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0277601480484009,
      "learning_rate": 6.133514698891674e-10,
      "loss": 2.1861,
      "step": 76532
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0404704809188843,
      "learning_rate": 6.087997583692496e-10,
      "loss": 2.4668,
      "step": 76533
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.5498318672180176,
      "learning_rate": 6.04264998532722e-10,
      "loss": 2.2502,
      "step": 76534
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0994091033935547,
      "learning_rate": 5.997471903884667e-10,
      "loss": 2.6007,
      "step": 76535
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0098168849945068,
      "learning_rate": 5.952463339431447e-10,
      "loss": 2.4073,
      "step": 76536
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.130554437637329,
      "learning_rate": 5.907624292045277e-10,
      "loss": 2.212,
      "step": 76537
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.2632168531417847,
      "learning_rate": 5.862954761814977e-10,
      "loss": 2.2519,
      "step": 76538
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.140789270401001,
      "learning_rate": 5.818454748796054e-10,
      "loss": 2.2648,
      "step": 76539
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0128300189971924,
      "learning_rate": 5.774124253077329e-10,
      "loss": 2.3382,
      "step": 76540
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0165480375289917,
      "learning_rate": 5.729963274725414e-10,
      "loss": 2.5813,
      "step": 76541
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1583974361419678,
      "learning_rate": 5.685971813818026e-10,
      "loss": 2.2832,
      "step": 76542
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0050849914550781,
      "learning_rate": 5.642149870432878e-10,
      "loss": 2.2457,
      "step": 76543
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1699961423873901,
      "learning_rate": 5.598497444636586e-10,
      "loss": 2.3938,
      "step": 76544
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.087390661239624,
      "learning_rate": 5.555014536517967e-10,
      "loss": 2.2369,
      "step": 76545
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.174901008605957,
      "learning_rate": 5.511701146143633e-10,
      "loss": 2.3681,
      "step": 76546
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1514908075332642,
      "learning_rate": 5.4685572735802e-10,
      "loss": 2.183,
      "step": 76547
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.033531665802002,
      "learning_rate": 5.425582918905381e-10,
      "loss": 2.106,
      "step": 76548
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.9941168427467346,
      "learning_rate": 5.382778082185791e-10,
      "loss": 2.3043,
      "step": 76549
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1552315950393677,
      "learning_rate": 5.340142763510248e-10,
      "loss": 2.3611,
      "step": 76550
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1805334091186523,
      "learning_rate": 5.297676962934261e-10,
      "loss": 2.2943,
      "step": 76551
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1397795677185059,
      "learning_rate": 5.255380680546651e-10,
      "loss": 2.2635,
      "step": 76552
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0818060636520386,
      "learning_rate": 5.213253916402927e-10,
      "loss": 2.1585,
      "step": 76553
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1492581367492676,
      "learning_rate": 5.171296670580806e-10,
      "loss": 2.4591,
      "step": 76554
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0417027473449707,
      "learning_rate": 5.129508943158002e-10,
      "loss": 2.1143,
      "step": 76555
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.2564270496368408,
      "learning_rate": 5.087890734190026e-10,
      "loss": 2.3468,
      "step": 76556
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.9990526437759399,
      "learning_rate": 5.046442043754596e-10,
      "loss": 2.2357,
      "step": 76557
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0642508268356323,
      "learning_rate": 5.005162871929425e-10,
      "loss": 2.3591,
      "step": 76558
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1932599544525146,
      "learning_rate": 4.964053218781128e-10,
      "loss": 2.1991,
      "step": 76559
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1374032497406006,
      "learning_rate": 4.923113084376319e-10,
      "loss": 2.3525,
      "step": 76560
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0346028804779053,
      "learning_rate": 4.882342468781609e-10,
      "loss": 2.0608,
      "step": 76561
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0653358697891235,
      "learning_rate": 4.841741372074716e-10,
      "loss": 2.2863,
      "step": 76562
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1647589206695557,
      "learning_rate": 4.801309794311149e-10,
      "loss": 2.3555,
      "step": 76563
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1831402778625488,
      "learning_rate": 4.761047735568625e-10,
      "loss": 2.3238,
      "step": 76564
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.223320484161377,
      "learning_rate": 4.720955195924859e-10,
      "loss": 2.1274,
      "step": 76565
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1491062641143799,
      "learning_rate": 4.681032175424261e-10,
      "loss": 2.3406,
      "step": 76566
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.117664098739624,
      "learning_rate": 4.641278674155647e-10,
      "loss": 2.2853,
      "step": 76567
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.190794825553894,
      "learning_rate": 4.6016946921745296e-10,
      "loss": 2.3963,
      "step": 76568
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1347935199737549,
      "learning_rate": 4.5622802295586246e-10,
      "loss": 2.4129,
      "step": 76569
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1163281202316284,
      "learning_rate": 4.523035286363442e-10,
      "loss": 2.2723,
      "step": 76570
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1828373670578003,
      "learning_rate": 4.483959862655596e-10,
      "loss": 2.2902,
      "step": 76571
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0825530290603638,
      "learning_rate": 4.4450539585128017e-10,
      "loss": 2.3647,
      "step": 76572
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0547524690628052,
      "learning_rate": 4.4063175739905706e-10,
      "loss": 2.2008,
      "step": 76573
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0800729990005493,
      "learning_rate": 4.3677507091666183e-10,
      "loss": 2.5291,
      "step": 76574
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0549540519714355,
      "learning_rate": 4.3293533640853536e-10,
      "loss": 2.2384,
      "step": 76575
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.2367535829544067,
      "learning_rate": 4.2911255388355943e-10,
      "loss": 2.3661,
      "step": 76576
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0705039501190186,
      "learning_rate": 4.2530672334617494e-10,
      "loss": 2.32,
      "step": 76577
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0899680852890015,
      "learning_rate": 4.215178448041535e-10,
      "loss": 2.1793,
      "step": 76578
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1695904731750488,
      "learning_rate": 4.1774591826304614e-10,
      "loss": 2.2804,
      "step": 76579
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0283504724502563,
      "learning_rate": 4.139909437306244e-10,
      "loss": 2.2534,
      "step": 76580
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1937777996063232,
      "learning_rate": 4.102529212113293e-10,
      "loss": 2.2121,
      "step": 76581
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0980054140090942,
      "learning_rate": 4.0653185071293235e-10,
      "loss": 2.4266,
      "step": 76582
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1461535692214966,
      "learning_rate": 4.028277322409846e-10,
      "loss": 2.4092,
      "step": 76583
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0232067108154297,
      "learning_rate": 3.991405658032577e-10,
      "loss": 2.3211,
      "step": 76584
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.089025855064392,
      "learning_rate": 3.954703514030822e-10,
      "loss": 2.3848,
      "step": 76585
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1204644441604614,
      "learning_rate": 3.9181708904934e-10,
      "loss": 2.2599,
      "step": 76586
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0018153190612793,
      "learning_rate": 3.881807787475822e-10,
      "loss": 2.0239,
      "step": 76587
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1107269525527954,
      "learning_rate": 3.8456142050335986e-10,
      "loss": 2.3185,
      "step": 76588
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1622960567474365,
      "learning_rate": 3.8095901432333436e-10,
      "loss": 2.3468,
      "step": 76589
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1482760906219482,
      "learning_rate": 3.7737356021416704e-10,
      "loss": 2.3965,
      "step": 76590
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0353991985321045,
      "learning_rate": 3.7380505818029875e-10,
      "loss": 2.4811,
      "step": 76591
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0861984491348267,
      "learning_rate": 3.702535082283909e-10,
      "loss": 2.2717,
      "step": 76592
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.082275390625,
      "learning_rate": 3.6671891036510476e-10,
      "loss": 2.1589,
      "step": 76593
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1040748357772827,
      "learning_rate": 3.632012645971017e-10,
      "loss": 2.3013,
      "step": 76594
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.154414176940918,
      "learning_rate": 3.597005709277124e-10,
      "loss": 2.4297,
      "step": 76595
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1155868768692017,
      "learning_rate": 3.562168293658186e-10,
      "loss": 2.2608,
      "step": 76596
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.9869635701179504,
      "learning_rate": 3.52750039914751e-10,
      "loss": 2.3906,
      "step": 76597
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.3522332906723022,
      "learning_rate": 3.4930020258228115e-10,
      "loss": 2.2366,
      "step": 76598
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.143330454826355,
      "learning_rate": 3.4586731737396017e-10,
      "loss": 2.2512,
      "step": 76599
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1203744411468506,
      "learning_rate": 3.424513842953392e-10,
      "loss": 2.3803,
      "step": 76600
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0619786977767944,
      "learning_rate": 3.3905240335196934e-10,
      "loss": 2.2756,
      "step": 76601
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1517937183380127,
      "learning_rate": 3.3567037454940164e-10,
      "loss": 2.1399,
      "step": 76602
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1175967454910278,
      "learning_rate": 3.3230529789318733e-10,
      "loss": 2.3264,
      "step": 76603
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1234488487243652,
      "learning_rate": 3.289571733910979e-10,
      "loss": 2.5199,
      "step": 76604
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0144962072372437,
      "learning_rate": 3.2562600104646403e-10,
      "loss": 2.3906,
      "step": 76605
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0191447734832764,
      "learning_rate": 3.2231178086594707e-10,
      "loss": 2.4195,
      "step": 76606
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.661340594291687,
      "learning_rate": 3.1901451285398787e-10,
      "loss": 2.2631,
      "step": 76607
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.042240858078003,
      "learning_rate": 3.1573419701835805e-10,
      "loss": 2.2838,
      "step": 76608
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.094093680381775,
      "learning_rate": 3.124708333634985e-10,
      "loss": 2.3269,
      "step": 76609
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0232510566711426,
      "learning_rate": 3.0922442189385006e-10,
      "loss": 2.2982,
      "step": 76610
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0753933191299438,
      "learning_rate": 3.059949626171843e-10,
      "loss": 2.3559,
      "step": 76611
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.2295079231262207,
      "learning_rate": 3.0278245553683194e-10,
      "loss": 2.2089,
      "step": 76612
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0859249830245972,
      "learning_rate": 2.995869006594543e-10,
      "loss": 2.4165,
      "step": 76613
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1756577491760254,
      "learning_rate": 2.9640829799060245e-10,
      "loss": 2.4734,
      "step": 76614
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.065285086631775,
      "learning_rate": 2.9324664753471734e-10,
      "loss": 2.2788,
      "step": 76615
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0530948638916016,
      "learning_rate": 2.9010194929735005e-10,
      "loss": 2.5106,
      "step": 76616
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0740206241607666,
      "learning_rate": 2.8697420328405167e-10,
      "loss": 2.4986,
      "step": 76617
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1812530755996704,
      "learning_rate": 2.838634095014836e-10,
      "loss": 2.3604,
      "step": 76618
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1586132049560547,
      "learning_rate": 2.807695679529765e-10,
      "loss": 2.2496,
      "step": 76619
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0933586359024048,
      "learning_rate": 2.7769267864408145e-10,
      "loss": 2.3033,
      "step": 76620
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1404749155044556,
      "learning_rate": 2.7463274158034957e-10,
      "loss": 2.2982,
      "step": 76621
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.9935334324836731,
      "learning_rate": 2.71589756767332e-10,
      "loss": 2.0513,
      "step": 76622
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.3704860210418701,
      "learning_rate": 2.6856372420946964e-10,
      "loss": 2.393,
      "step": 76623
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.9703958034515381,
      "learning_rate": 2.6555464391342376e-10,
      "loss": 2.2994,
      "step": 76624
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.151482105255127,
      "learning_rate": 2.625625158814149e-10,
      "loss": 2.3283,
      "step": 76625
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.989546000957489,
      "learning_rate": 2.5958734012121455e-10,
      "loss": 2.4386,
      "step": 76626
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1551986932754517,
      "learning_rate": 2.566291166372636e-10,
      "loss": 2.3294,
      "step": 76627
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1497029066085815,
      "learning_rate": 2.5368784543400305e-10,
      "loss": 2.457,
      "step": 76628
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0331438779830933,
      "learning_rate": 2.5076352651587367e-10,
      "loss": 2.348,
      "step": 76629
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1733343601226807,
      "learning_rate": 2.478561598895368e-10,
      "loss": 2.1642,
      "step": 76630
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0454463958740234,
      "learning_rate": 2.449657455583232e-10,
      "loss": 2.6637,
      "step": 76631
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1737074851989746,
      "learning_rate": 2.4209228352889416e-10,
      "loss": 2.278,
      "step": 76632
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.010613203048706,
      "learning_rate": 2.3923577380347005e-10,
      "loss": 2.2274,
      "step": 76633
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.2694412469863892,
      "learning_rate": 2.363962163898226e-10,
      "loss": 2.1717,
      "step": 76634
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.2113627195358276,
      "learning_rate": 2.3357361129017206e-10,
      "loss": 2.277,
      "step": 76635
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.106006145477295,
      "learning_rate": 2.3076795851117996e-10,
      "loss": 2.4183,
      "step": 76636
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1448514461517334,
      "learning_rate": 2.2797925805728704e-10,
      "loss": 2.2126,
      "step": 76637
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1105183362960815,
      "learning_rate": 2.2520750993182405e-10,
      "loss": 2.2188,
      "step": 76638
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0852065086364746,
      "learning_rate": 2.224527141414523e-10,
      "loss": 2.0995,
      "step": 76639
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0636378526687622,
      "learning_rate": 2.197148706895025e-10,
      "loss": 2.2921,
      "step": 76640
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1170802116394043,
      "learning_rate": 2.1699397958152567e-10,
      "loss": 2.577,
      "step": 76641
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0903983116149902,
      "learning_rate": 2.142900408208526e-10,
      "loss": 2.2504,
      "step": 76642
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1302655935287476,
      "learning_rate": 2.116030544130343e-10,
      "loss": 2.3207,
      "step": 76643
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.018136739730835,
      "learning_rate": 2.0893302036362196e-10,
      "loss": 2.1863,
      "step": 76644
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.084535837173462,
      "learning_rate": 2.0627993867483598e-10,
      "loss": 2.3504,
      "step": 76645
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0574188232421875,
      "learning_rate": 2.036438093522275e-10,
      "loss": 2.3153,
      "step": 76646
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.198786973953247,
      "learning_rate": 2.0102463240134763e-10,
      "loss": 2.2565,
      "step": 76647
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1280608177185059,
      "learning_rate": 1.984224078244168e-10,
      "loss": 2.4159,
      "step": 76648
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.9921071529388428,
      "learning_rate": 1.9583713562809636e-10,
      "loss": 2.1996,
      "step": 76649
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1188019514083862,
      "learning_rate": 1.9326881581460676e-10,
      "loss": 2.4614,
      "step": 76650
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.103364109992981,
      "learning_rate": 1.9071744839060936e-10,
      "loss": 2.2994,
      "step": 76651
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.072399616241455,
      "learning_rate": 1.8818303335832456e-10,
      "loss": 2.1763,
      "step": 76652
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.11346435546875,
      "learning_rate": 1.8566557072330348e-10,
      "loss": 2.2685,
      "step": 76653
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1088840961456299,
      "learning_rate": 1.8316506048998704e-10,
      "loss": 2.0787,
      "step": 76654
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0526540279388428,
      "learning_rate": 1.8068150266170591e-10,
      "loss": 2.2272,
      "step": 76655
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1404316425323486,
      "learning_rate": 1.7821489724290097e-10,
      "loss": 2.4336,
      "step": 76656
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0887702703475952,
      "learning_rate": 1.757652442380131e-10,
      "loss": 2.4934,
      "step": 76657
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.3481563329696655,
      "learning_rate": 1.7333254365148322e-10,
      "loss": 2.267,
      "step": 76658
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1101477146148682,
      "learning_rate": 1.7091679548664197e-10,
      "loss": 2.3517,
      "step": 76659
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.2047712802886963,
      "learning_rate": 1.6851799974793027e-10,
      "loss": 2.2476,
      "step": 76660
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.9522128105163574,
      "learning_rate": 1.6613615643978896e-10,
      "loss": 2.3479,
      "step": 76661
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.9851568937301636,
      "learning_rate": 1.63771265566659e-10,
      "loss": 2.2816,
      "step": 76662
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.9522566199302673,
      "learning_rate": 1.614233271307608e-10,
      "loss": 2.3315,
      "step": 76663
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.039445400238037,
      "learning_rate": 1.5909234113764548e-10,
      "loss": 2.1895,
      "step": 76664
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.214900016784668,
      "learning_rate": 1.5677830759064373e-10,
      "loss": 2.2254,
      "step": 76665
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0309638977050781,
      "learning_rate": 1.5448122649308618e-10,
      "loss": 2.3815,
      "step": 76666
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1507757902145386,
      "learning_rate": 1.5220109785052394e-10,
      "loss": 2.1428,
      "step": 76667
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1123381853103638,
      "learning_rate": 1.4993792166628774e-10,
      "loss": 2.451,
      "step": 76668
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0738613605499268,
      "learning_rate": 1.4769169794370818e-10,
      "loss": 2.3498,
      "step": 76669
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1028025150299072,
      "learning_rate": 1.45462426686116e-10,
      "loss": 2.3213,
      "step": 76670
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.9923548698425293,
      "learning_rate": 1.4325010789795203e-10,
      "loss": 2.3299,
      "step": 76671
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.164559006690979,
      "learning_rate": 1.4105474158254695e-10,
      "loss": 2.3031,
      "step": 76672
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1295051574707031,
      "learning_rate": 1.3887632774434168e-10,
      "loss": 2.32,
      "step": 76673
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.038292407989502,
      "learning_rate": 1.3671486638666686e-10,
      "loss": 2.4156,
      "step": 76674
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.063645601272583,
      "learning_rate": 1.345703575139634e-10,
      "loss": 2.0956,
      "step": 76675
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.267331838607788,
      "learning_rate": 1.3244280112845176e-10,
      "loss": 2.3907,
      "step": 76676
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1731294393539429,
      "learning_rate": 1.303321972345728e-10,
      "loss": 2.2531,
      "step": 76677
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1143772602081299,
      "learning_rate": 1.2823854583454699e-10,
      "loss": 2.2983,
      "step": 76678
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.2173737287521362,
      "learning_rate": 1.2616184693503563e-10,
      "loss": 2.0499,
      "step": 76679
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1860426664352417,
      "learning_rate": 1.2410210053603877e-10,
      "loss": 2.1642,
      "step": 76680
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0154268741607666,
      "learning_rate": 1.220593066431075e-10,
      "loss": 2.2072,
      "step": 76681
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.203345775604248,
      "learning_rate": 1.2003346525957248e-10,
      "loss": 2.3979,
      "step": 76682
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0061290264129639,
      "learning_rate": 1.180245763876542e-10,
      "loss": 2.1131,
      "step": 76683
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.3139656782150269,
      "learning_rate": 1.160326400329037e-10,
      "loss": 2.1824,
      "step": 76684
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0385947227478027,
      "learning_rate": 1.1405765619643127e-10,
      "loss": 2.4492,
      "step": 76685
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0774524211883545,
      "learning_rate": 1.1209962488267778e-10,
      "loss": 2.3227,
      "step": 76686
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1467925310134888,
      "learning_rate": 1.1015854609497389e-10,
      "loss": 2.2227,
      "step": 76687
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0864827632904053,
      "learning_rate": 1.0823441983665029e-10,
      "loss": 2.3231,
      "step": 76688
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1104282140731812,
      "learning_rate": 1.0632724611103762e-10,
      "loss": 2.3162,
      "step": 76689
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.027957558631897,
      "learning_rate": 1.0443702492035635e-10,
      "loss": 2.279,
      "step": 76690
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1201711893081665,
      "learning_rate": 1.0256375626904736e-10,
      "loss": 2.2,
      "step": 76691
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0623794794082642,
      "learning_rate": 1.0070744015933109e-10,
      "loss": 2.3489,
      "step": 76692
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.9881128668785095,
      "learning_rate": 9.886807659564846e-11,
      "loss": 2.3828,
      "step": 76693
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.046345591545105,
      "learning_rate": 9.704566558021988e-11,
      "loss": 2.3108,
      "step": 76694
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0141334533691406,
      "learning_rate": 9.524020711526583e-11,
      "loss": 2.1913,
      "step": 76695
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0531808137893677,
      "learning_rate": 9.34517012063374e-11,
      "loss": 2.5123,
      "step": 76696
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0439457893371582,
      "learning_rate": 9.16801478534346e-11,
      "loss": 2.2986,
      "step": 76697
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.011501669883728,
      "learning_rate": 8.992554706210855e-11,
      "loss": 2.2506,
      "step": 76698
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1367162466049194,
      "learning_rate": 8.818789883457967e-11,
      "loss": 2.0816,
      "step": 76699
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.5444601774215698,
      "learning_rate": 8.646720317195823e-11,
      "loss": 2.3337,
      "step": 76700
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0607839822769165,
      "learning_rate": 8.47634600797953e-11,
      "loss": 2.3534,
      "step": 76701
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0986427068710327,
      "learning_rate": 8.307666956031135e-11,
      "loss": 2.2405,
      "step": 76702
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.992275059223175,
      "learning_rate": 8.14068316146166e-11,
      "loss": 2.2391,
      "step": 76703
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.044495701789856,
      "learning_rate": 7.975394624826216e-11,
      "loss": 2.3348,
      "step": 76704
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0345022678375244,
      "learning_rate": 7.811801346235826e-11,
      "loss": 2.349,
      "step": 76705
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1105976104736328,
      "learning_rate": 7.649903325912534e-11,
      "loss": 2.4105,
      "step": 76706
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1604325771331787,
      "learning_rate": 7.489700564300428e-11,
      "loss": 2.2283,
      "step": 76707
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0530290603637695,
      "learning_rate": 7.331193061510533e-11,
      "loss": 2.2806,
      "step": 76708
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1649545431137085,
      "learning_rate": 7.174380817986937e-11,
      "loss": 2.2517,
      "step": 76709
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0818370580673218,
      "learning_rate": 7.019263833840661e-11,
      "loss": 2.2763,
      "step": 76710
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0161442756652832,
      "learning_rate": 6.865842109404775e-11,
      "loss": 2.4558,
      "step": 76711
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.036572813987732,
      "learning_rate": 6.71411564490132e-11,
      "loss": 2.1274,
      "step": 76712
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.9536886215209961,
      "learning_rate": 6.564084440663365e-11,
      "loss": 2.2254,
      "step": 76713
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0350005626678467,
      "learning_rate": 6.415748496801933e-11,
      "loss": 2.2299,
      "step": 76714
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.119599461555481,
      "learning_rate": 6.26910781376111e-11,
      "loss": 2.3108,
      "step": 76715
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.2993431091308594,
      "learning_rate": 6.124162391651922e-11,
      "loss": 2.3036,
      "step": 76716
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.026795506477356,
      "learning_rate": 5.980912230696412e-11,
      "loss": 2.0857,
      "step": 76717
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.9771738052368164,
      "learning_rate": 5.839357331338669e-11,
      "loss": 2.091,
      "step": 76718
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.06830894947052,
      "learning_rate": 5.6994976935786925e-11,
      "loss": 2.2935,
      "step": 76719
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.5622767210006714,
      "learning_rate": 5.561333317749551e-11,
      "loss": 2.0861,
      "step": 76720
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1423035860061646,
      "learning_rate": 5.424864204073288e-11,
      "loss": 2.0975,
      "step": 76721
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0587704181671143,
      "learning_rate": 5.290090352882971e-11,
      "loss": 2.3436,
      "step": 76722
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0771540403366089,
      "learning_rate": 5.157011764289621e-11,
      "loss": 2.3404,
      "step": 76723
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0842981338500977,
      "learning_rate": 5.025628438515284e-11,
      "loss": 2.1254,
      "step": 76724
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0557990074157715,
      "learning_rate": 4.895940375782005e-11,
      "loss": 2.4948,
      "step": 76725
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.2889167070388794,
      "learning_rate": 4.767947576422849e-11,
      "loss": 2.2546,
      "step": 76726
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.2209646701812744,
      "learning_rate": 4.64165004054884e-11,
      "loss": 2.3726,
      "step": 76727
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0476469993591309,
      "learning_rate": 4.517047768382021e-11,
      "loss": 2.1389,
      "step": 76728
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0870904922485352,
      "learning_rate": 4.3941407601444384e-11,
      "loss": 2.2066,
      "step": 76729
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.14757239818573,
      "learning_rate": 4.272929016169158e-11,
      "loss": 2.3255,
      "step": 76730
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.9879988431930542,
      "learning_rate": 4.1534125364561806e-11,
      "loss": 2.0931,
      "step": 76731
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0588098764419556,
      "learning_rate": 4.03559132122755e-11,
      "loss": 2.4978,
      "step": 76732
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.10672128200531,
      "learning_rate": 3.919465370816333e-11,
      "loss": 2.3029,
      "step": 76733
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0226389169692993,
      "learning_rate": 3.805034685444575e-11,
      "loss": 2.3082,
      "step": 76734
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1769245862960815,
      "learning_rate": 3.6922992651122756e-11,
      "loss": 2.4807,
      "step": 76735
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0791287422180176,
      "learning_rate": 3.581259110152502e-11,
      "loss": 2.377,
      "step": 76736
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0741338729858398,
      "learning_rate": 3.471914220676276e-11,
      "loss": 2.1953,
      "step": 76737
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1379077434539795,
      "learning_rate": 3.364264597016664e-11,
      "loss": 2.3719,
      "step": 76738
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0086725950241089,
      "learning_rate": 3.258310239173668e-11,
      "loss": 2.257,
      "step": 76739
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.01902437210083,
      "learning_rate": 3.15405114736933e-11,
      "loss": 2.2726,
      "step": 76740
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.186545491218567,
      "learning_rate": 3.051487321825697e-11,
      "loss": 2.3174,
      "step": 76741
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0240520238876343,
      "learning_rate": 2.950618762764812e-11,
      "loss": 2.4313,
      "step": 76742
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0036942958831787,
      "learning_rate": 2.8514454702976978e-11,
      "loss": 2.3325,
      "step": 76743
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0174294710159302,
      "learning_rate": 2.753967444535377e-11,
      "loss": 2.2398,
      "step": 76744
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1189864873886108,
      "learning_rate": 2.658184685699894e-11,
      "loss": 2.2106,
      "step": 76745
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0907032489776611,
      "learning_rate": 2.564097194013293e-11,
      "loss": 2.4395,
      "step": 76746
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0200380086898804,
      "learning_rate": 2.471704969586597e-11,
      "loss": 2.0588,
      "step": 76747
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.9902372360229492,
      "learning_rate": 2.3810080125308277e-11,
      "loss": 2.3439,
      "step": 76748
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.2644034624099731,
      "learning_rate": 2.2920063229570076e-11,
      "loss": 2.5054,
      "step": 76749
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0409005880355835,
      "learning_rate": 2.204699901198204e-11,
      "loss": 2.4073,
      "step": 76750
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1093857288360596,
      "learning_rate": 2.1190887472544165e-11,
      "loss": 2.1213,
      "step": 76751
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1552138328552246,
      "learning_rate": 2.0351728613476894e-11,
      "loss": 2.4333,
      "step": 76752
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1970833539962769,
      "learning_rate": 1.9529522435890457e-11,
      "loss": 2.2332,
      "step": 76753
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1103748083114624,
      "learning_rate": 1.8724268940895073e-11,
      "loss": 2.1504,
      "step": 76754
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1415542364120483,
      "learning_rate": 1.7935968129600966e-11,
      "loss": 2.2133,
      "step": 76755
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1075435876846313,
      "learning_rate": 1.7164620004228583e-11,
      "loss": 2.4374,
      "step": 76756
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1709526777267456,
      "learning_rate": 1.6410224565888143e-11,
      "loss": 2.4947,
      "step": 76757
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0455111265182495,
      "learning_rate": 1.5672781815689874e-11,
      "loss": 2.0566,
      "step": 76758
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.037575125694275,
      "learning_rate": 1.495229175585422e-11,
      "loss": 2.3548,
      "step": 76759
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0525251626968384,
      "learning_rate": 1.4248754385270958e-11,
      "loss": 2.4003,
      "step": 76760
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0819097757339478,
      "learning_rate": 1.3562169707270756e-11,
      "loss": 2.291,
      "step": 76761
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.161690592765808,
      "learning_rate": 1.2892537721853616e-11,
      "loss": 2.4498,
      "step": 76762
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1582450866699219,
      "learning_rate": 1.2239858430129758e-11,
      "loss": 2.302,
      "step": 76763
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.2006293535232544,
      "learning_rate": 1.1604131834319631e-11,
      "loss": 2.3404,
      "step": 76764
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1917262077331543,
      "learning_rate": 1.0985357934423235e-11,
      "loss": 2.2038,
      "step": 76765
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.9614351987838745,
      "learning_rate": 1.0383536731550792e-11,
      "loss": 2.3763,
      "step": 76766
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0159813165664673,
      "learning_rate": 9.798668227922748e-12,
      "loss": 2.36,
      "step": 76767
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.029888391494751,
      "learning_rate": 9.230752423539103e-12,
      "loss": 2.3167,
      "step": 76768
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.9917356967926025,
      "learning_rate": 8.679789318399856e-12,
      "loss": 2.4184,
      "step": 76769
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1053962707519531,
      "learning_rate": 8.145778915835678e-12,
      "loss": 2.4082,
      "step": 76770
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0106223821640015,
      "learning_rate": 7.628721214736346e-12,
      "loss": 2.3618,
      "step": 76771
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0705705881118774,
      "learning_rate": 7.128616216212081e-12,
      "loss": 2.387,
      "step": 76772
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.055714726448059,
      "learning_rate": 6.645463922483331e-12,
      "loss": 2.3641,
      "step": 76773
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.038086175918579,
      "learning_rate": 6.179264333550094e-12,
      "loss": 2.5025,
      "step": 76774
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.9811164736747742,
      "learning_rate": 5.7300174494123726e-12,
      "loss": 2.2551,
      "step": 76775
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1850043535232544,
      "learning_rate": 5.297723271180388e-12,
      "loss": 2.3884,
      "step": 76776
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0540602207183838,
      "learning_rate": 4.882381801074587e-12,
      "loss": 2.4074,
      "step": 76777
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1575654745101929,
      "learning_rate": 4.483993037984746e-12,
      "loss": 2.5091,
      "step": 76778
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0611586570739746,
      "learning_rate": 4.102556983021089e-12,
      "loss": 2.1229,
      "step": 76779
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0650278329849243,
      "learning_rate": 3.738073637293838e-12,
      "loss": 2.2978,
      "step": 76780
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.089078664779663,
      "learning_rate": 3.3905430019132158e-12,
      "loss": 2.441,
      "step": 76781
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.2247488498687744,
      "learning_rate": 3.0599650757690004e-12,
      "loss": 2.3076,
      "step": 76782
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.112471580505371,
      "learning_rate": 2.7463398610816374e-12,
      "loss": 2.4535,
      "step": 76783
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.9923936724662781,
      "learning_rate": 2.4496673567409033e-12,
      "loss": 2.2553,
      "step": 76784
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0175894498825073,
      "learning_rate": 2.1699475649672454e-12,
      "loss": 2.5119,
      "step": 76785
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.031387448310852,
      "learning_rate": 1.9071804857606624e-12,
      "loss": 2.1522,
      "step": 76786
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.9390716552734375,
      "learning_rate": 1.6613661191211549e-12,
      "loss": 2.4136,
      "step": 76787
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0870667695999146,
      "learning_rate": 1.432504465048723e-12,
      "loss": 2.3543,
      "step": 76788
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0413271188735962,
      "learning_rate": 1.2205955246535894e-12,
      "loss": 2.3239,
      "step": 76789
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0349475145339966,
      "learning_rate": 1.0256392979357543e-12,
      "loss": 2.3286,
      "step": 76790
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.088958740234375,
      "learning_rate": 8.476357860054407e-13,
      "loss": 2.1795,
      "step": 76791
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1936891078948975,
      "learning_rate": 6.865849888626485e-13,
      "loss": 2.2199,
      "step": 76792
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0163642168045044,
      "learning_rate": 5.424869065073778e-13,
      "loss": 2.4427,
      "step": 76793
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1698077917099,
      "learning_rate": 4.1534153782940565e-13,
      "loss": 2.2506,
      "step": 76794
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1868271827697754,
      "learning_rate": 3.0514888615940097e-13,
      "loss": 2.3621,
      "step": 76795
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.2270678281784058,
      "learning_rate": 2.1190894927691775e-13,
      "loss": 2.2109,
      "step": 76796
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0388243198394775,
      "learning_rate": 1.3562172718195599e-13,
      "loss": 2.2517,
      "step": 76797
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.2389556169509888,
      "learning_rate": 7.628722209496175e-14,
      "loss": 2.4783,
      "step": 76798
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1168971061706543,
      "learning_rate": 3.3905431795488996e-14,
      "loss": 2.1256,
      "step": 76799
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0295658111572266,
      "learning_rate": 8.476358503983762e-15,
      "loss": 2.384,
      "step": 76800
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.3721376657485962,
      "learning_rate": 0.0,
      "loss": 2.4015,
      "step": 76801
    },
    {
      "epoch": 1.0,
      "step": 76801,
      "total_flos": 1.4151972183657677e+17,
      "train_loss": 2.4245699297610215,
      "train_runtime": 11508.8146,
      "train_samples_per_second": 427.084,
      "train_steps_per_second": 6.673
    }
  ],
  "logging_steps": 1.0,
  "max_steps": 76801,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 5000,
  "total_flos": 1.4151972183657677e+17,
  "train_batch_size": 64,
  "trial_name": null,
  "trial_params": null
}
